summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Lively <tlively@google.com>2023-09-19 11:08:58 -0700
committerGitHub <noreply@github.com>2023-09-19 18:08:58 +0000
commit0fe05b19eb01c65fedbaee8e102c4f7aa0e5fdda (patch)
tree32e3616b9f8e476a4bea123fa2fdee6eeaee7b8d
parenta51bd6df919a5b79574f0996a760cc20cb05697e (diff)
downloadbinaryen-0fe05b19eb01c65fedbaee8e102c4f7aa0e5fdda.tar.gz
binaryen-0fe05b19eb01c65fedbaee8e102c4f7aa0e5fdda.tar.bz2
binaryen-0fe05b19eb01c65fedbaee8e102c4f7aa0e5fdda.zip
[NFC] Split the new wat parser into multiple files (#5960)
And put the new files in a new source directory, "parser". This is a rough split and is not yet expected to dramatically improve compile times. The exact organization of the new files is subject to change, but this splitting should be enough to make further parser development more pleasant.
-rw-r--r--CMakeLists.txt8
-rw-r--r--src/parser/CMakeLists.txt9
-rw-r--r--src/parser/common.h31
-rw-r--r--src/parser/context-decls.cpp194
-rw-r--r--src/parser/context-defs.cpp98
-rw-r--r--src/parser/contexts.h1275
-rw-r--r--src/parser/input-impl.h273
-rw-r--r--src/parser/input.h75
-rw-r--r--src/parser/lexer.cpp (renamed from src/wasm/wat-lexer.cpp)4
-rw-r--r--src/parser/lexer.h (renamed from src/wat-lexer.h)8
-rw-r--r--src/parser/parsers.h (renamed from src/wasm/wat-parser.cpp)1923
-rw-r--r--src/parser/wat-parser.cpp172
-rw-r--r--src/parser/wat-parser.h32
-rw-r--r--src/wasm/CMakeLists.txt2
-rw-r--r--test/gtest/wat-lexer.cpp2
15 files changed, 2203 insertions, 1903 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f07bd8cc7..ac49355af 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -357,6 +357,7 @@ add_subdirectory(src/asmjs)
add_subdirectory(src/cfg)
add_subdirectory(src/emscripten-optimizer)
add_subdirectory(src/passes)
+add_subdirectory(src/parser)
add_subdirectory(src/support)
add_subdirectory(src/wasm)
add_subdirectory(src/analysis)
@@ -385,7 +386,8 @@ set(binaryen_objs
$<TARGET_OBJECTS:ir>
$<TARGET_OBJECTS:cfg>
$<TARGET_OBJECTS:support>
- $<TARGET_OBJECTS:analysis>)
+ $<TARGET_OBJECTS:analysis>
+ $<TARGET_OBJECTS:parser>)
if(BUILD_LLVM_DWARF)
SET(binaryen_objs ${binaryen_objs} $<TARGET_OBJECTS:llvm_dwarf>)
@@ -432,7 +434,7 @@ if(EMSCRIPTEN)
# binaryen.js WebAssembly variant
add_executable(binaryen_wasm
${binaryen_emscripten_SOURCES})
- target_link_libraries(binaryen_wasm wasm asmjs emscripten-optimizer passes ir cfg support analysis wasm)
+ target_link_libraries(binaryen_wasm wasm asmjs emscripten-optimizer passes ir cfg support analysis parser wasm)
target_link_libraries(binaryen_wasm "-sFILESYSTEM")
target_link_libraries(binaryen_wasm "-sEXPORT_NAME=Binaryen")
target_link_libraries(binaryen_wasm "-sNODERAWFS=0")
@@ -453,7 +455,7 @@ if(EMSCRIPTEN)
# binaryen.js JavaScript variant
add_executable(binaryen_js
${binaryen_emscripten_SOURCES})
- target_link_libraries(binaryen_js wasm asmjs emscripten-optimizer passes ir cfg support analysis wasm)
+ target_link_libraries(binaryen_js wasm asmjs emscripten-optimizer passes ir cfg support analysis parser wasm)
target_link_libraries(binaryen_js "-sWASM=0")
target_link_libraries(binaryen_js "-sWASM_ASYNC_COMPILATION=0")
if(${CMAKE_CXX_COMPILER_VERSION} STREQUAL "6.0.1")
diff --git a/src/parser/CMakeLists.txt b/src/parser/CMakeLists.txt
new file mode 100644
index 000000000..bae90379e
--- /dev/null
+++ b/src/parser/CMakeLists.txt
@@ -0,0 +1,9 @@
+FILE(GLOB parser_HEADERS *.h)
+set(parser_SOURCES
+ context-decls.cpp
+ context-defs.cpp
+ lexer.cpp
+ wat-parser.cpp
+ ${parser_HEADERS}
+)
+add_library(parser OBJECT ${parser_SOURCES})
diff --git a/src/parser/common.h b/src/parser/common.h
new file mode 100644
index 000000000..7adf2e5fa
--- /dev/null
+++ b/src/parser/common.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2023 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef parser_common_h
+#define parser_common_h
+
+#include "support/name.h"
+
+namespace wasm::WATParser {
+
+struct ImportNames {
+ Name mod;
+ Name nm;
+};
+
+} // namespace wasm::WATParser
+
+#endif // parser_common_h
diff --git a/src/parser/context-decls.cpp b/src/parser/context-decls.cpp
new file mode 100644
index 000000000..f668c67ae
--- /dev/null
+++ b/src/parser/context-decls.cpp
@@ -0,0 +1,194 @@
+/*
+ * Copyright 2023 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "contexts.h"
+
+namespace wasm::WATParser {
+
+namespace {
+
+void applyImportNames(Importable& item, ImportNames* names) {
+ if (names) {
+ item.module = names->mod;
+ item.base = names->nm;
+ }
+}
+
+Result<> addExports(ParseInput& in,
+ Module& wasm,
+ const Named* item,
+ const std::vector<Name>& exports,
+ ExternalKind kind) {
+ for (auto name : exports) {
+ if (wasm.getExportOrNull(name)) {
+ // TODO: Fix error location
+ return in.err("repeated export name");
+ }
+ wasm.addExport(Builder(wasm).makeExport(name, item->name, kind));
+ }
+ return Ok{};
+}
+
+} // anonymous namespace
+
+Result<Function*>
+ParseDeclsCtx::addFuncDecl(Index pos, Name name, ImportNames* importNames) {
+ auto f = std::make_unique<Function>();
+ if (name.is()) {
+ if (wasm.getFunctionOrNull(name)) {
+ // TDOO: if the existing function is not explicitly named, fix its name
+ // and continue.
+ return in.err(pos, "repeated function name");
+ }
+ f->setExplicitName(name);
+ } else {
+ name = (importNames ? "fimport$" : "") + std::to_string(funcCounter++);
+ name = Names::getValidFunctionName(wasm, name);
+ f->name = name;
+ }
+ applyImportNames(*f, importNames);
+ return wasm.addFunction(std::move(f));
+}
+
+Result<> ParseDeclsCtx::addFunc(Name name,
+ const std::vector<Name>& exports,
+ ImportNames* import,
+ TypeUseT type,
+ std::optional<LocalsT>,
+ std::optional<InstrsT>,
+ Index pos) {
+ if (import && hasNonImport) {
+ return in.err(pos, "import after non-import");
+ }
+ auto f = addFuncDecl(pos, name, import);
+ CHECK_ERR(f);
+ CHECK_ERR(addExports(in, wasm, *f, exports, ExternalKind::Function));
+ funcDefs.push_back({name, pos, Index(funcDefs.size())});
+ return Ok{};
+}
+
+Result<Memory*> ParseDeclsCtx::addMemoryDecl(Index pos,
+ Name name,
+ ImportNames* importNames,
+ MemType type) {
+ auto m = std::make_unique<Memory>();
+ m->indexType = type.type;
+ m->initial = type.limits.initial;
+ m->max = type.limits.max;
+ m->shared = type.shared;
+ if (name) {
+ // TODO: if the existing memory is not explicitly named, fix its name
+ // and continue.
+ if (wasm.getMemoryOrNull(name)) {
+ return in.err(pos, "repeated memory name");
+ }
+ m->setExplicitName(name);
+ } else {
+ name = (importNames ? "mimport$" : "") + std::to_string(memoryCounter++);
+ name = Names::getValidMemoryName(wasm, name);
+ m->name = name;
+ }
+ applyImportNames(*m, importNames);
+ return wasm.addMemory(std::move(m));
+}
+
+Result<> ParseDeclsCtx::addMemory(Name name,
+ const std::vector<Name>& exports,
+ ImportNames* import,
+ MemType type,
+ Index pos) {
+ if (import && hasNonImport) {
+ return in.err(pos, "import after non-import");
+ }
+ auto m = addMemoryDecl(pos, name, import, type);
+ CHECK_ERR(m);
+ CHECK_ERR(addExports(in, wasm, *m, exports, ExternalKind::Memory));
+ memoryDefs.push_back({name, pos, Index(memoryDefs.size())});
+ return Ok{};
+}
+
+Result<> ParseDeclsCtx::addImplicitData(DataStringT&& data) {
+ auto& mem = *wasm.memories.back();
+ auto d = std::make_unique<DataSegment>();
+ d->memory = mem.name;
+ d->isPassive = false;
+ d->offset = Builder(wasm).makeConstPtr(0, mem.indexType);
+ d->data = std::move(data);
+ d->name = Names::getValidDataSegmentName(wasm, "implicit-data");
+ wasm.addDataSegment(std::move(d));
+ return Ok{};
+}
+
+Result<Global*>
+ParseDeclsCtx::addGlobalDecl(Index pos, Name name, ImportNames* importNames) {
+ auto g = std::make_unique<Global>();
+ if (name) {
+ if (wasm.getGlobalOrNull(name)) {
+ // TODO: if the existing global is not explicitly named, fix its name
+ // and continue.
+ return in.err(pos, "repeated global name");
+ }
+ g->setExplicitName(name);
+ } else {
+ name = (importNames ? "gimport$" : "") + std::to_string(globalCounter++);
+ name = Names::getValidGlobalName(wasm, name);
+ g->name = name;
+ }
+ applyImportNames(*g, importNames);
+ return wasm.addGlobal(std::move(g));
+}
+
+Result<> ParseDeclsCtx::addGlobal(Name name,
+ const std::vector<Name>& exports,
+ ImportNames* import,
+ GlobalTypeT,
+ std::optional<ExprT>,
+ Index pos) {
+ if (import && hasNonImport) {
+ return in.err(pos, "import after non-import");
+ }
+ auto g = addGlobalDecl(pos, name, import);
+ CHECK_ERR(g);
+ CHECK_ERR(addExports(in, wasm, *g, exports, ExternalKind::Global));
+ globalDefs.push_back({name, pos, Index(globalDefs.size())});
+ return Ok{};
+}
+
+Result<> ParseDeclsCtx::addData(Name name,
+ MemoryIdxT*,
+ std::optional<ExprT>,
+ std::vector<char>&& data,
+ Index pos) {
+ auto d = std::make_unique<DataSegment>();
+ if (name) {
+ if (wasm.getDataSegmentOrNull(name)) {
+ // TODO: if the existing segment is not explicitly named, fix its name
+ // and continue.
+ return in.err(pos, "repeated data segment name");
+ }
+ d->setExplicitName(name);
+ } else {
+ name = std::to_string(dataCounter++);
+ name = Names::getValidDataSegmentName(wasm, name);
+ d->name = name;
+ }
+ d->data = std::move(data);
+ dataDefs.push_back({name, pos, Index(wasm.dataSegments.size())});
+ wasm.addDataSegment(std::move(d));
+ return Ok{};
+}
+
+} // namespace wasm::WATParser
diff --git a/src/parser/context-defs.cpp b/src/parser/context-defs.cpp
new file mode 100644
index 000000000..ca8f61ec3
--- /dev/null
+++ b/src/parser/context-defs.cpp
@@ -0,0 +1,98 @@
+/*
+ * Copyright 2023 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "contexts.h"
+
+namespace wasm::WATParser {
+
+Result<typename ParseDefsCtx::TypeUseT>
+ParseDefsCtx::makeTypeUse(Index pos,
+ std::optional<HeapTypeT> type,
+ ParamsT* params,
+ ResultsT* results) {
+ if (type && (params || results)) {
+ std::vector<Type> paramTypes;
+ if (params) {
+ paramTypes = getUnnamedTypes(*params);
+ }
+
+ std::vector<Type> resultTypes;
+ if (results) {
+ resultTypes = *results;
+ }
+
+ auto sig = Signature(Type(paramTypes), Type(resultTypes));
+
+ if (!type->isSignature() || type->getSignature() != sig) {
+ return in.err(pos, "type does not match provided signature");
+ }
+ }
+
+ if (type) {
+ return *type;
+ }
+
+ auto it = implicitTypes.find(pos);
+ assert(it != implicitTypes.end());
+ return it->second;
+}
+
+Result<> ParseDefsCtx::addFunc(Name,
+ const std::vector<Name>&,
+ ImportNames*,
+ TypeUseT,
+ std::optional<LocalsT>,
+ std::optional<InstrsT>,
+ Index pos) {
+ CHECK_ERR(withLoc(pos, irBuilder.visitEnd()));
+ auto body = irBuilder.build();
+ CHECK_ERR(withLoc(pos, body));
+ wasm.functions[index]->body = *body;
+ return Ok{};
+}
+
+Result<> ParseDefsCtx::addGlobal(Name,
+ const std::vector<Name>&,
+ ImportNames*,
+ GlobalTypeT,
+ std::optional<ExprT> exp,
+ Index) {
+ if (exp) {
+ wasm.globals[index]->init = *exp;
+ }
+ return Ok{};
+}
+
+Result<> ParseDefsCtx::addData(
+ Name, Name* mem, std::optional<ExprT> offset, DataStringT, Index pos) {
+ auto& d = wasm.dataSegments[index];
+ if (offset) {
+ d->isPassive = false;
+ d->offset = *offset;
+ if (mem) {
+ d->memory = *mem;
+ } else if (wasm.memories.size() > 0) {
+ d->memory = wasm.memories[0]->name;
+ } else {
+ return in.err(pos, "active segment with no memory");
+ }
+ } else {
+ d->isPassive = true;
+ }
+ return Ok{};
+}
+
+} // namespace wasm::WATParser
diff --git a/src/parser/contexts.h b/src/parser/contexts.h
new file mode 100644
index 000000000..210945e8d
--- /dev/null
+++ b/src/parser/contexts.h
@@ -0,0 +1,1275 @@
+/*
+ * Copyright 2023 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef parser_context_h
+#define parser_context_h
+
+#include "common.h"
+#include "input.h"
+#include "ir/names.h"
+#include "support/name.h"
+#include "support/result.h"
+#include "wasm-builder.h"
+#include "wasm-ir-builder.h"
+#include "wasm.h"
+
+namespace wasm::WATParser {
+
+using IndexMap = std::unordered_map<Name, Index>;
+
+inline std::vector<Type> getUnnamedTypes(const std::vector<NameType>& named) {
+ std::vector<Type> types;
+ types.reserve(named.size());
+ for (auto& t : named) {
+ types.push_back(t.type);
+ }
+ return types;
+}
+
+struct Limits {
+ uint64_t initial;
+ uint64_t max;
+};
+
+struct MemType {
+ Type type;
+ Limits limits;
+ bool shared;
+};
+
+struct Memarg {
+ uint64_t offset;
+ uint32_t align;
+};
+
+// The location, possible name, and index in the respective module index space
+// of a module-level definition in the input.
+struct DefPos {
+ Name name;
+ Index pos;
+ Index index;
+};
+
+struct GlobalType {
+ Mutability mutability;
+ Type type;
+};
+
+// A signature type and parameter names (possibly empty), used for parsing
+// function types.
+struct TypeUse {
+ HeapType type;
+ std::vector<Name> names;
+};
+
+struct NullTypeParserCtx {
+ using IndexT = Ok;
+ using HeapTypeT = Ok;
+ using TypeT = Ok;
+ using ParamsT = Ok;
+ using ResultsT = size_t;
+ using BlockTypeT = Ok;
+ using SignatureT = Ok;
+ using StorageT = Ok;
+ using FieldT = Ok;
+ using FieldsT = Ok;
+ using StructT = Ok;
+ using ArrayT = Ok;
+ using LimitsT = Ok;
+ using MemTypeT = Ok;
+ using GlobalTypeT = Ok;
+ using TypeUseT = Ok;
+ using LocalsT = Ok;
+ using DataStringT = Ok;
+
+ HeapTypeT makeFunc() { return Ok{}; }
+ HeapTypeT makeAny() { return Ok{}; }
+ HeapTypeT makeExtern() { return Ok{}; }
+ HeapTypeT makeEq() { return Ok{}; }
+ HeapTypeT makeI31() { return Ok{}; }
+ HeapTypeT makeStructType() { return Ok{}; }
+ HeapTypeT makeArrayType() { return Ok{}; }
+
+ TypeT makeI32() { return Ok{}; }
+ TypeT makeI64() { return Ok{}; }
+ TypeT makeF32() { return Ok{}; }
+ TypeT makeF64() { return Ok{}; }
+ TypeT makeV128() { return Ok{}; }
+
+ TypeT makeRefType(HeapTypeT, Nullability) { return Ok{}; }
+
+ ParamsT makeParams() { return Ok{}; }
+ void appendParam(ParamsT&, Name, TypeT) {}
+
+ // We have to count results because whether or not a block introduces a
+ // typeuse that may implicitly define a type depends on how many results it
+ // has.
+ size_t makeResults() { return 0; }
+ void appendResult(size_t& results, TypeT) { ++results; }
+ size_t getResultsSize(size_t results) { return results; }
+
+ SignatureT makeFuncType(ParamsT*, ResultsT*) { return Ok{}; }
+
+ StorageT makeI8() { return Ok{}; }
+ StorageT makeI16() { return Ok{}; }
+ StorageT makeStorageType(TypeT) { return Ok{}; }
+
+ FieldT makeFieldType(StorageT, Mutability) { return Ok{}; }
+
+ FieldsT makeFields() { return Ok{}; }
+ void appendField(FieldsT&, Name, FieldT) {}
+
+ StructT makeStruct(FieldsT&) { return Ok{}; }
+
+ std::optional<ArrayT> makeArray(FieldsT&) { return Ok{}; }
+
+ GlobalTypeT makeGlobalType(Mutability, TypeT) { return Ok{}; }
+
+ LocalsT makeLocals() { return Ok{}; }
+ void appendLocal(LocalsT&, Name, TypeT) {}
+
+ Result<Index> getTypeIndex(Name) { return 1; }
+ Result<HeapTypeT> getHeapTypeFromIdx(Index) { return Ok{}; }
+
+ DataStringT makeDataString() { return Ok{}; }
+ void appendDataString(DataStringT&, std::string_view) {}
+
+ MemTypeT makeMemType(Type, LimitsT, bool) { return Ok{}; }
+
+ BlockTypeT getBlockTypeFromResult(size_t results) { return Ok{}; }
+
+ Result<> getBlockTypeFromTypeUse(Index, TypeUseT) { return Ok{}; }
+};
+
+template<typename Ctx> struct TypeParserCtx {
+ using IndexT = Index;
+ using HeapTypeT = HeapType;
+ using TypeT = Type;
+ using ParamsT = std::vector<NameType>;
+ using ResultsT = std::vector<Type>;
+ using BlockTypeT = HeapType;
+ using SignatureT = Signature;
+ using StorageT = Field;
+ using FieldT = Field;
+ using FieldsT = std::pair<std::vector<Name>, std::vector<Field>>;
+ using StructT = std::pair<std::vector<Name>, Struct>;
+ using ArrayT = Array;
+ using LimitsT = Ok;
+ using MemTypeT = Ok;
+ using LocalsT = std::vector<NameType>;
+ using DataStringT = Ok;
+
+ // Map heap type names to their indices.
+ const IndexMap& typeIndices;
+
+ TypeParserCtx(const IndexMap& typeIndices) : typeIndices(typeIndices) {}
+
+ Ctx& self() { return *static_cast<Ctx*>(this); }
+
+ HeapTypeT makeFunc() { return HeapType::func; }
+ HeapTypeT makeAny() { return HeapType::any; }
+ HeapTypeT makeExtern() { return HeapType::ext; }
+ HeapTypeT makeEq() { return HeapType::eq; }
+ HeapTypeT makeI31() { return HeapType::i31; }
+ HeapTypeT makeStructType() { return HeapType::struct_; }
+ HeapTypeT makeArrayType() { return HeapType::array; }
+
+ TypeT makeI32() { return Type::i32; }
+ TypeT makeI64() { return Type::i64; }
+ TypeT makeF32() { return Type::f32; }
+ TypeT makeF64() { return Type::f64; }
+ TypeT makeV128() { return Type::v128; }
+
+ TypeT makeRefType(HeapTypeT ht, Nullability nullability) {
+ return Type(ht, nullability);
+ }
+
+ TypeT makeTupleType(const std::vector<Type> types) { return Tuple(types); }
+
+ ParamsT makeParams() { return {}; }
+ void appendParam(ParamsT& params, Name id, TypeT type) {
+ params.push_back({id, type});
+ }
+
+ ResultsT makeResults() { return {}; }
+ void appendResult(ResultsT& results, TypeT type) { results.push_back(type); }
+ size_t getResultsSize(const ResultsT& results) { return results.size(); }
+
+ SignatureT makeFuncType(ParamsT* params, ResultsT* results) {
+ std::vector<Type> empty;
+ const auto& paramTypes = params ? getUnnamedTypes(*params) : empty;
+ const auto& resultTypes = results ? *results : empty;
+ return Signature(self().makeTupleType(paramTypes),
+ self().makeTupleType(resultTypes));
+ }
+
+ StorageT makeI8() { return Field(Field::i8, Immutable); }
+ StorageT makeI16() { return Field(Field::i16, Immutable); }
+ StorageT makeStorageType(TypeT type) { return Field(type, Immutable); }
+
+ FieldT makeFieldType(FieldT field, Mutability mutability) {
+ if (field.packedType == Field::not_packed) {
+ return Field(field.type, mutability);
+ }
+ return Field(field.packedType, mutability);
+ }
+
+ FieldsT makeFields() { return {}; }
+ void appendField(FieldsT& fields, Name name, FieldT field) {
+ fields.first.push_back(name);
+ fields.second.push_back(field);
+ }
+
+ StructT makeStruct(FieldsT& fields) {
+ return {std::move(fields.first), Struct(std::move(fields.second))};
+ }
+
+ std::optional<ArrayT> makeArray(FieldsT& fields) {
+ if (fields.second.size() == 1) {
+ return Array(fields.second[0]);
+ }
+ return {};
+ }
+
+ LocalsT makeLocals() { return {}; }
+ void appendLocal(LocalsT& locals, Name id, TypeT type) {
+ locals.push_back({id, type});
+ }
+
+ Result<Index> getTypeIndex(Name id) {
+ auto it = typeIndices.find(id);
+ if (it == typeIndices.end()) {
+ return self().in.err("unknown type identifier");
+ }
+ return it->second;
+ }
+
+ DataStringT makeDataString() { return Ok{}; }
+ void appendDataString(DataStringT&, std::string_view) {}
+
+ LimitsT makeLimits(uint64_t, std::optional<uint64_t>) { return Ok{}; }
+ LimitsT getLimitsFromData(DataStringT) { return Ok{}; }
+
+ MemTypeT makeMemType(Type, LimitsT, bool) { return Ok{}; }
+
+ HeapType getBlockTypeFromResult(const std::vector<Type> results) {
+ assert(results.size() == 1);
+ return HeapType(Signature(Type::none, results[0]));
+ }
+};
+
+struct NullInstrParserCtx {
+ using InstrT = Ok;
+ using InstrsT = Ok;
+ using ExprT = Ok;
+
+ using FieldIdxT = Ok;
+ using LocalIdxT = Ok;
+ using GlobalIdxT = Ok;
+ using MemoryIdxT = Ok;
+ using DataIdxT = Ok;
+
+ using MemargT = Ok;
+
+ InstrsT makeInstrs() { return Ok{}; }
+ void appendInstr(InstrsT&, InstrT) {}
+ InstrsT finishInstrs(InstrsT&) { return Ok{}; }
+
+ ExprT makeExpr(InstrsT) { return Ok{}; }
+ Result<ExprT> instrToExpr(InstrT) { return Ok{}; }
+
+ template<typename HeapTypeT> FieldIdxT getFieldFromIdx(HeapTypeT, uint32_t) {
+ return Ok{};
+ }
+ template<typename HeapTypeT> FieldIdxT getFieldFromName(HeapTypeT, Name) {
+ return Ok{};
+ }
+ LocalIdxT getLocalFromIdx(uint32_t) { return Ok{}; }
+ LocalIdxT getLocalFromName(Name) { return Ok{}; }
+ GlobalIdxT getGlobalFromIdx(uint32_t) { return Ok{}; }
+ GlobalIdxT getGlobalFromName(Name) { return Ok{}; }
+ MemoryIdxT getMemoryFromIdx(uint32_t) { return Ok{}; }
+ MemoryIdxT getMemoryFromName(Name) { return Ok{}; }
+ DataIdxT getDataFromIdx(uint32_t) { return Ok{}; }
+ DataIdxT getDataFromName(Name) { return Ok{}; }
+
+ MemargT getMemarg(uint64_t, uint32_t) { return Ok{}; }
+
+ template<typename BlockTypeT>
+ InstrT makeBlock(Index, std::optional<Name>, BlockTypeT) {
+ return Ok{};
+ }
+ InstrT finishBlock(Index, InstrsT) { return Ok{}; }
+
+ InstrT makeUnreachable(Index) { return Ok{}; }
+ InstrT makeNop(Index) { return Ok{}; }
+ InstrT makeBinary(Index, BinaryOp) { return Ok{}; }
+ InstrT makeUnary(Index, UnaryOp) { return Ok{}; }
+ template<typename ResultsT> InstrT makeSelect(Index, ResultsT*) {
+ return Ok{};
+ }
+ InstrT makeDrop(Index) { return Ok{}; }
+ InstrT makeMemorySize(Index, MemoryIdxT*) { return Ok{}; }
+ InstrT makeMemoryGrow(Index, MemoryIdxT*) { return Ok{}; }
+ InstrT makeLocalGet(Index, LocalIdxT) { return Ok{}; }
+ InstrT makeLocalTee(Index, LocalIdxT) { return Ok{}; }
+ InstrT makeLocalSet(Index, LocalIdxT) { return Ok{}; }
+ InstrT makeGlobalGet(Index, GlobalIdxT) { return Ok{}; }
+ InstrT makeGlobalSet(Index, GlobalIdxT) { return Ok{}; }
+
+ InstrT makeI32Const(Index, uint32_t) { return Ok{}; }
+ InstrT makeI64Const(Index, uint64_t) { return Ok{}; }
+ InstrT makeF32Const(Index, float) { return Ok{}; }
+ InstrT makeF64Const(Index, double) { return Ok{}; }
+ InstrT makeLoad(Index, Type, bool, int, bool, MemoryIdxT*, MemargT) {
+ return Ok{};
+ }
+ InstrT makeStore(Index, Type, int, bool, MemoryIdxT*, MemargT) {
+ return Ok{};
+ }
+ InstrT makeAtomicRMW(Index, AtomicRMWOp, Type, int, MemoryIdxT*, MemargT) {
+ return Ok{};
+ }
+ InstrT makeAtomicCmpxchg(Index, Type, int, MemoryIdxT*, MemargT) {
+ return Ok{};
+ }
+ InstrT makeAtomicWait(Index, Type, MemoryIdxT*, MemargT) { return Ok{}; }
+ InstrT makeAtomicNotify(Index, MemoryIdxT*, MemargT) { return Ok{}; }
+ InstrT makeAtomicFence(Index) { return Ok{}; }
+ InstrT makeSIMDExtract(Index, SIMDExtractOp, uint8_t) { return Ok{}; }
+ InstrT makeSIMDReplace(Index, SIMDReplaceOp, uint8_t) { return Ok{}; }
+ InstrT makeSIMDShuffle(Index, const std::array<uint8_t, 16>&) { return Ok{}; }
+ InstrT makeSIMDTernary(Index, SIMDTernaryOp) { return Ok{}; }
+ InstrT makeSIMDShift(Index, SIMDShiftOp) { return Ok{}; }
+ InstrT makeSIMDLoad(Index, SIMDLoadOp, MemoryIdxT*, MemargT) { return Ok{}; }
+ InstrT makeSIMDLoadStoreLane(
+ Index, SIMDLoadStoreLaneOp, MemoryIdxT*, MemargT, uint8_t) {
+ return Ok{};
+ }
+ InstrT makeMemoryInit(Index, MemoryIdxT*, DataIdxT) { return Ok{}; }
+ InstrT makeDataDrop(Index, DataIdxT) { return Ok{}; }
+
+ InstrT makeMemoryCopy(Index, MemoryIdxT*, MemoryIdxT*) { return Ok{}; }
+ InstrT makeMemoryFill(Index, MemoryIdxT*) { return Ok{}; }
+
+ InstrT makeReturn(Index) { return Ok{}; }
+ template<typename HeapTypeT> InstrT makeRefNull(Index, HeapTypeT) {
+ return Ok{};
+ }
+ InstrT makeRefIsNull(Index) { return Ok{}; }
+
+ InstrT makeRefEq(Index) { return Ok{}; }
+
+ InstrT makeRefI31(Index) { return Ok{}; }
+ InstrT makeI31Get(Index, bool) { return Ok{}; }
+
+ template<typename HeapTypeT> InstrT makeStructNew(Index, HeapTypeT) {
+ return Ok{};
+ }
+ template<typename HeapTypeT> InstrT makeStructNewDefault(Index, HeapTypeT) {
+ return Ok{};
+ }
+ template<typename HeapTypeT>
+ InstrT makeStructGet(Index, HeapTypeT, FieldIdxT, bool) {
+ return Ok{};
+ }
+ template<typename HeapTypeT>
+ InstrT makeStructSet(Index, HeapTypeT, FieldIdxT) {
+ return Ok{};
+ }
+ template<typename HeapTypeT> InstrT makeArrayNew(Index, HeapTypeT) {
+ return Ok{};
+ }
+ template<typename HeapTypeT> InstrT makeArrayNewDefault(Index, HeapTypeT) {
+ return Ok{};
+ }
+ template<typename HeapTypeT>
+ InstrT makeArrayNewData(Index, HeapTypeT, DataIdxT) {
+ return Ok{};
+ }
+ template<typename HeapTypeT>
+ InstrT makeArrayNewElem(Index, HeapTypeT, DataIdxT) {
+ return Ok{};
+ }
+ template<typename HeapTypeT> InstrT makeArrayGet(Index, HeapTypeT, bool) {
+ return Ok{};
+ }
+ template<typename HeapTypeT> InstrT makeArraySet(Index, HeapTypeT) {
+ return Ok{};
+ }
+ InstrT makeArrayLen(Index) { return Ok{}; }
+ template<typename HeapTypeT>
+ InstrT makeArrayCopy(Index, HeapTypeT, HeapTypeT) {
+ return Ok{};
+ }
+ template<typename HeapTypeT> InstrT makeArrayFill(Index, HeapTypeT) {
+ return Ok{};
+ }
+};
+
+// Phase 1: Parse definition spans for top-level module elements and determine
+// their indices and names.
+struct ParseDeclsCtx : NullTypeParserCtx, NullInstrParserCtx {
+ using DataStringT = std::vector<char>;
+ using LimitsT = Limits;
+ using MemTypeT = MemType;
+
+ ParseInput in;
+
+ // At this stage we only look at types to find implicit type definitions,
+ // which are inserted directly into the context. We cannot materialize or
+ // validate any types because we don't know what types exist yet.
+ //
+ // Declared module elements are inserted into the module, but their bodies are
+ // not filled out until later parsing phases.
+ Module& wasm;
+
+ // The module element definitions we are parsing in this phase.
+ std::vector<DefPos> typeDefs;
+ std::vector<DefPos> subtypeDefs;
+ std::vector<DefPos> funcDefs;
+ std::vector<DefPos> memoryDefs;
+ std::vector<DefPos> globalDefs;
+ std::vector<DefPos> dataDefs;
+
+ // Positions of typeuses that might implicitly define new types.
+ std::vector<Index> implicitTypeDefs;
+
+ // Counters used for generating names for module elements.
+ int funcCounter = 0;
+ int memoryCounter = 0;
+ int globalCounter = 0;
+ int dataCounter = 0;
+
+ // Used to verify that all imports come before all non-imports.
+ bool hasNonImport = false;
+
+ ParseDeclsCtx(std::string_view in, Module& wasm) : in(in), wasm(wasm) {}
+
+ void addFuncType(SignatureT) {}
+ void addStructType(StructT) {}
+ void addArrayType(ArrayT) {}
+ void setOpen() {}
+ Result<> addSubtype(Index) { return Ok{}; }
+ void finishSubtype(Name name, Index pos) {
+ subtypeDefs.push_back({name, pos, Index(subtypeDefs.size())});
+ }
+ size_t getRecGroupStartIndex() { return 0; }
+ void addRecGroup(Index, size_t) {}
+ void finishDeftype(Index pos) {
+ typeDefs.push_back({{}, pos, Index(typeDefs.size())});
+ }
+
+ std::vector<char> makeDataString() { return {}; }
+ void appendDataString(std::vector<char>& data, std::string_view str) {
+ data.insert(data.end(), str.begin(), str.end());
+ }
+
+ Limits makeLimits(uint64_t n, std::optional<uint64_t> m) {
+ return m ? Limits{n, *m} : Limits{n, Memory::kUnlimitedSize};
+ }
+ Limits getLimitsFromData(const std::vector<char>& data) {
+ uint64_t size = (data.size() + Memory::kPageSize - 1) / Memory::kPageSize;
+ return {size, size};
+ }
+
+ MemType makeMemType(Type type, Limits limits, bool shared) {
+ return {type, limits, shared};
+ }
+
+ Result<TypeUseT>
+ makeTypeUse(Index pos, std::optional<HeapTypeT> type, ParamsT*, ResultsT*) {
+ if (!type) {
+ implicitTypeDefs.push_back(pos);
+ }
+ return Ok{};
+ }
+
+ Result<Function*> addFuncDecl(Index pos, Name name, ImportNames* importNames);
+ Result<> addFunc(Name name,
+ const std::vector<Name>& exports,
+ ImportNames* import,
+ TypeUseT type,
+ std::optional<LocalsT>,
+ std::optional<InstrsT>,
+ Index pos);
+
+ Result<Memory*>
+ addMemoryDecl(Index pos, Name name, ImportNames* importNames, MemType type);
+
+ Result<> addMemory(Name name,
+ const std::vector<Name>& exports,
+ ImportNames* import,
+ MemType type,
+ Index pos);
+
+ Result<> addImplicitData(DataStringT&& data);
+
+ Result<Global*> addGlobalDecl(Index pos, Name name, ImportNames* importNames);
+
+ Result<> addGlobal(Name name,
+ const std::vector<Name>& exports,
+ ImportNames* import,
+ GlobalTypeT,
+ std::optional<ExprT>,
+ Index pos);
+
+ Result<> addData(Name name,
+ MemoryIdxT*,
+ std::optional<ExprT>,
+ std::vector<char>&& data,
+ Index pos);
+};
+
+// Phase 2: Parse type definitions into a TypeBuilder.
+struct ParseTypeDefsCtx : TypeParserCtx<ParseTypeDefsCtx> {
+ ParseInput in;
+
+ // We update slots in this builder as we parse type definitions.
+ TypeBuilder& builder;
+
+ // Parse the names of types and fields as we go.
+ std::vector<TypeNames> names;
+
+ // The index of the subtype definition we are parsing.
+ Index index = 0;
+
+ ParseTypeDefsCtx(std::string_view in,
+ TypeBuilder& builder,
+ const IndexMap& typeIndices)
+ : TypeParserCtx<ParseTypeDefsCtx>(typeIndices), in(in), builder(builder),
+ names(builder.size()) {}
+
+ TypeT makeRefType(HeapTypeT ht, Nullability nullability) {
+ return builder.getTempRefType(ht, nullability);
+ }
+
+ TypeT makeTupleType(const std::vector<Type> types) {
+ return builder.getTempTupleType(types);
+ }
+
+ Result<HeapTypeT> getHeapTypeFromIdx(Index idx) {
+ if (idx >= builder.size()) {
+ return in.err("type index out of bounds");
+ }
+ return builder[idx];
+ }
+
+ void addFuncType(SignatureT& type) { builder[index] = type; }
+
+ void addStructType(StructT& type) {
+ auto& [fieldNames, str] = type;
+ builder[index] = str;
+ for (Index i = 0; i < fieldNames.size(); ++i) {
+ if (auto name = fieldNames[i]; name.is()) {
+ names[index].fieldNames[i] = name;
+ }
+ }
+ }
+
+ void addArrayType(ArrayT& type) { builder[index] = type; }
+
+ void setOpen() { builder[index].setOpen(); }
+
+ Result<> addSubtype(Index super) {
+ if (super >= builder.size()) {
+ return in.err("supertype index out of bounds");
+ }
+ builder[index].subTypeOf(builder[super]);
+ return Ok{};
+ }
+
+ void finishSubtype(Name name, Index pos) { names[index++].name = name; }
+
+ size_t getRecGroupStartIndex() { return index; }
+
+ void addRecGroup(Index start, size_t len) {
+ builder.createRecGroup(start, len);
+ }
+
+ void finishDeftype(Index) {}
+};
+
+// Phase 3: Parse type uses to find implicitly defined types.
+struct ParseImplicitTypeDefsCtx : TypeParserCtx<ParseImplicitTypeDefsCtx> {
+ using TypeUseT = Ok;
+
+ ParseInput in;
+
+ // Types parsed so far.
+ std::vector<HeapType>& types;
+
+ // Map typeuse positions without an explicit type to the correct type.
+ std::unordered_map<Index, HeapType>& implicitTypes;
+
+ // Map signatures to the first defined heap type they match.
+ std::unordered_map<Signature, HeapType> sigTypes;
+
+ ParseImplicitTypeDefsCtx(std::string_view in,
+ std::vector<HeapType>& types,
+ std::unordered_map<Index, HeapType>& implicitTypes,
+ const IndexMap& typeIndices)
+ : TypeParserCtx<ParseImplicitTypeDefsCtx>(typeIndices), in(in),
+ types(types), implicitTypes(implicitTypes) {
+ for (auto type : types) {
+ if (type.isSignature() && type.getRecGroup().size() == 1) {
+ sigTypes.insert({type.getSignature(), type});
+ }
+ }
+ }
+
+ Result<HeapTypeT> getHeapTypeFromIdx(Index idx) {
+ if (idx >= types.size()) {
+ return in.err("type index out of bounds");
+ }
+ return types[idx];
+ }
+
+ Result<TypeUseT> makeTypeUse(Index pos,
+ std::optional<HeapTypeT>,
+ ParamsT* params,
+ ResultsT* results) {
+ std::vector<Type> paramTypes;
+ if (params) {
+ paramTypes = getUnnamedTypes(*params);
+ }
+
+ std::vector<Type> resultTypes;
+ if (results) {
+ resultTypes = *results;
+ }
+
+ auto sig = Signature(Type(paramTypes), Type(resultTypes));
+ auto [it, inserted] = sigTypes.insert({sig, HeapType::func});
+ if (inserted) {
+ auto type = HeapType(sig);
+ it->second = type;
+ types.push_back(type);
+ }
+ implicitTypes.insert({pos, it->second});
+
+ return Ok{};
+ }
+};
+
+// Phase 4: Parse and set the types of module elements.
+struct ParseModuleTypesCtx : TypeParserCtx<ParseModuleTypesCtx>,
+ NullInstrParserCtx {
+ // In this phase we have constructed all the types, so we can materialize and
+ // validate them when they are used.
+
+ using GlobalTypeT = GlobalType;
+ using TypeUseT = TypeUse;
+
+ ParseInput in;
+
+ Module& wasm;
+
+ const std::vector<HeapType>& types;
+ const std::unordered_map<Index, HeapType>& implicitTypes;
+
+ // The index of the current type.
+ Index index = 0;
+
+ ParseModuleTypesCtx(std::string_view in,
+ Module& wasm,
+ const std::vector<HeapType>& types,
+ const std::unordered_map<Index, HeapType>& implicitTypes,
+ const IndexMap& typeIndices)
+ : TypeParserCtx<ParseModuleTypesCtx>(typeIndices), in(in), wasm(wasm),
+ types(types), implicitTypes(implicitTypes) {}
+
+ Result<HeapTypeT> getHeapTypeFromIdx(Index idx) {
+ if (idx >= types.size()) {
+ return in.err("type index out of bounds");
+ }
+ return types[idx];
+ }
+
+ Result<TypeUseT> makeTypeUse(Index pos,
+ std::optional<HeapTypeT> type,
+ ParamsT* params,
+ ResultsT* results) {
+ std::vector<Name> ids;
+ if (params) {
+ ids.reserve(params->size());
+ for (auto& p : *params) {
+ ids.push_back(p.name);
+ }
+ }
+
+ if (type) {
+ return TypeUse{*type, ids};
+ }
+
+ auto it = implicitTypes.find(pos);
+ assert(it != implicitTypes.end());
+
+ return TypeUse{it->second, ids};
+ }
+
+ Result<HeapType> getBlockTypeFromTypeUse(Index pos, TypeUse use) {
+ assert(use.type.isSignature());
+ if (use.type.getSignature().params != Type::none) {
+ return in.err(pos, "block parameters not yet supported");
+ }
+ // TODO: Once we support block parameters, return an error here if any of
+ // them are named.
+ return use.type;
+ }
+
+ GlobalTypeT makeGlobalType(Mutability mutability, TypeT type) {
+ return {mutability, type};
+ }
+
+ Result<> addFunc(Name name,
+ const std::vector<Name>&,
+ ImportNames*,
+ TypeUse type,
+ std::optional<LocalsT> locals,
+ std::optional<InstrsT>,
+ Index pos) {
+ auto& f = wasm.functions[index];
+ if (!type.type.isSignature()) {
+ return in.err(pos, "expected signature type");
+ }
+ f->type = type.type;
+ for (Index i = 0; i < type.names.size(); ++i) {
+ if (type.names[i].is()) {
+ f->setLocalName(i, type.names[i]);
+ }
+ }
+ if (locals) {
+ for (auto& l : *locals) {
+ Builder::addVar(f.get(), l.name, l.type);
+ }
+ }
+ return Ok{};
+ }
+
+ Result<>
+ addMemory(Name, const std::vector<Name>&, ImportNames*, MemTypeT, Index) {
+ return Ok{};
+ }
+
+ Result<> addImplicitData(DataStringT&& data) { return Ok{}; }
+
+ Result<> addGlobal(Name,
+ const std::vector<Name>&,
+ ImportNames*,
+ GlobalType type,
+ std::optional<ExprT>,
+ Index) {
+ auto& g = wasm.globals[index];
+ g->mutable_ = type.mutability;
+ g->type = type.type;
+ return Ok{};
+ }
+};
+
+// Phase 5: Parse module element definitions, including instructions.
+struct ParseDefsCtx : TypeParserCtx<ParseDefsCtx> {
+ using GlobalTypeT = Ok;
+ using TypeUseT = HeapType;
+
+ // Keep track of instructions internally rather than letting the general
+ // parser collect them.
+ using InstrT = Ok;
+ using InstrsT = Ok;
+ using ExprT = Expression*;
+
+ using FieldIdxT = Index;
+ using LocalIdxT = Index;
+ using GlobalIdxT = Name;
+ using MemoryIdxT = Name;
+ using DataIdxT = Name;
+
+ using MemargT = Memarg;
+
+ ParseInput in;
+
+ Module& wasm;
+ Builder builder;
+
+ const std::vector<HeapType>& types;
+ const std::unordered_map<Index, HeapType>& implicitTypes;
+
+ // The index of the current module element.
+ Index index = 0;
+
+ // The current function being parsed, used to create scratch locals, type
+ // local.get, etc.
+ Function* func = nullptr;
+
+ IRBuilder irBuilder;
+
+ void setFunction(Function* func) {
+ this->func = func;
+ irBuilder.setFunction(func);
+ }
+
+ ParseDefsCtx(std::string_view in,
+ Module& wasm,
+ const std::vector<HeapType>& types,
+ const std::unordered_map<Index, HeapType>& implicitTypes,
+ const IndexMap& typeIndices)
+ : TypeParserCtx(typeIndices), in(in), wasm(wasm), builder(wasm),
+ types(types), implicitTypes(implicitTypes), irBuilder(wasm) {}
+
+ template<typename T> Result<T> withLoc(Index pos, Result<T> res) {
+ if (auto err = res.getErr()) {
+ return in.err(pos, err->msg);
+ }
+ return res;
+ }
+
+ template<typename T> Result<T> withLoc(Result<T> res) {
+ return withLoc(in.getPos(), res);
+ }
+
+ HeapType getBlockTypeFromResult(const std::vector<Type> results) {
+ assert(results.size() == 1);
+ return HeapType(Signature(Type::none, results[0]));
+ }
+
+ Result<HeapType> getBlockTypeFromTypeUse(Index pos, HeapType type) {
+ return type;
+ }
+
+ Ok makeInstrs() { return Ok{}; }
+
+ void appendInstr(Ok&, InstrT instr) {}
+
+ Result<InstrsT> finishInstrs(Ok&) { return Ok{}; }
+
+ Result<Expression*> instrToExpr(Ok&) { return irBuilder.build(); }
+
+ GlobalTypeT makeGlobalType(Mutability, TypeT) { return Ok{}; }
+
+ Result<HeapTypeT> getHeapTypeFromIdx(Index idx) {
+ if (idx >= types.size()) {
+ return in.err("type index out of bounds");
+ }
+ return types[idx];
+ }
+
+ Result<Index> getFieldFromIdx(HeapType type, uint32_t idx) {
+ if (!type.isStruct()) {
+ return in.err("expected struct type");
+ }
+ if (idx >= type.getStruct().fields.size()) {
+ return in.err("struct index out of bounds");
+ }
+ return idx;
+ }
+
+ Result<Index> getFieldFromName(HeapType type, Name name) {
+ // TODO: Field names
+ return in.err("symbolic field names note yet supported");
+ }
+
+ Result<Index> getLocalFromIdx(uint32_t idx) {
+ if (!func) {
+ return in.err("cannot access locals outside of a function");
+ }
+ if (idx >= func->getNumLocals()) {
+ return in.err("local index out of bounds");
+ }
+ return idx;
+ }
+
+ Result<Index> getLocalFromName(Name name) {
+ if (!func) {
+ return in.err("cannot access locals outside of a function");
+ }
+ if (!func->hasLocalIndex(name)) {
+ return in.err("local $" + name.toString() + " does not exist");
+ }
+ return func->getLocalIndex(name);
+ }
+
+ Result<Name> getGlobalFromIdx(uint32_t idx) {
+ if (idx >= wasm.globals.size()) {
+ return in.err("global index out of bounds");
+ }
+ return wasm.globals[idx]->name;
+ }
+
+ Result<Name> getGlobalFromName(Name name) {
+ if (!wasm.getGlobalOrNull(name)) {
+ return in.err("global $" + name.toString() + " does not exist");
+ }
+ return name;
+ }
+
+ Result<Name> getMemoryFromIdx(uint32_t idx) {
+ if (idx >= wasm.memories.size()) {
+ return in.err("memory index out of bounds");
+ }
+ return wasm.memories[idx]->name;
+ }
+
+ Result<Name> getMemoryFromName(Name name) {
+ if (!wasm.getMemoryOrNull(name)) {
+ return in.err("memory $" + name.toString() + " does not exist");
+ }
+ return name;
+ }
+
+ Result<Name> getDataFromIdx(uint32_t idx) {
+ if (idx >= wasm.dataSegments.size()) {
+ return in.err("data index out of bounds");
+ }
+ return wasm.dataSegments[idx]->name;
+ }
+
+ Result<Name> getDataFromName(Name name) {
+ if (!wasm.getDataSegmentOrNull(name)) {
+ return in.err("data $" + name.toString() + " does not exist");
+ }
+ return name;
+ }
+
+ Result<TypeUseT> makeTypeUse(Index pos,
+ std::optional<HeapTypeT> type,
+ ParamsT* params,
+ ResultsT* results);
+ Result<> addFunc(Name,
+ const std::vector<Name>&,
+ ImportNames*,
+ TypeUseT,
+ std::optional<LocalsT>,
+ std::optional<InstrsT>,
+ Index pos);
+
+ Result<> addGlobal(Name,
+ const std::vector<Name>&,
+ ImportNames*,
+ GlobalTypeT,
+ std::optional<ExprT> exp,
+ Index);
+ Result<>
+ addData(Name, Name* mem, std::optional<ExprT> offset, DataStringT, Index pos);
+ Result<Index> addScratchLocal(Index pos, Type type) {
+ if (!func) {
+ return in.err(pos,
+ "scratch local required, but there is no function context");
+ }
+ Name name = Names::getValidLocalName(*func, "scratch");
+ return Builder::addVar(func, name, type);
+ }
+
+ Result<Expression*> makeExpr(InstrsT& instrs) { return irBuilder.build(); }
+
+ Memarg getMemarg(uint64_t offset, uint32_t align) { return {offset, align}; }
+
+ Result<Name> getMemory(Index pos, Name* mem) {
+ if (mem) {
+ return *mem;
+ }
+ if (wasm.memories.empty()) {
+ return in.err(pos, "memory required, but there is no memory");
+ }
+ return wasm.memories[0]->name;
+ }
+
+ Result<> makeBlock(Index pos, std::optional<Name> label, HeapType type) {
+ // TODO: validate labels?
+ // TODO: Move error on input types to here?
+ return withLoc(pos,
+ irBuilder.makeBlock(label ? *label : Name{},
+ type.getSignature().results));
+ }
+
+ Result<> finishBlock(Index pos, InstrsT) {
+ return withLoc(pos, irBuilder.visitEnd());
+ }
+
+ Result<> makeUnreachable(Index pos) {
+ return withLoc(pos, irBuilder.makeUnreachable());
+ }
+
+ Result<> makeNop(Index pos) { return withLoc(pos, irBuilder.makeNop()); }
+
+ Result<> makeBinary(Index pos, BinaryOp op) {
+ return withLoc(pos, irBuilder.makeBinary(op));
+ }
+
+ Result<> makeUnary(Index pos, UnaryOp op) {
+ return withLoc(pos, irBuilder.makeUnary(op));
+ }
+
+ Result<> makeSelect(Index pos, std::vector<Type>* res) {
+ if (res && res->size()) {
+ if (res->size() > 1) {
+ return in.err(pos, "select may not have more than one result type");
+ }
+ return withLoc(pos, irBuilder.makeSelect((*res)[0]));
+ }
+ return withLoc(pos, irBuilder.makeSelect());
+ }
+
+ Result<> makeDrop(Index pos) { return withLoc(pos, irBuilder.makeDrop()); }
+
+ Result<> makeMemorySize(Index pos, Name* mem) {
+ auto m = getMemory(pos, mem);
+ CHECK_ERR(m);
+ return withLoc(pos, irBuilder.makeMemorySize(*m));
+ }
+
+ Result<> makeMemoryGrow(Index pos, Name* mem) {
+ auto m = getMemory(pos, mem);
+ CHECK_ERR(m);
+ return withLoc(pos, irBuilder.makeMemoryGrow(*m));
+ }
+
+ Result<> makeLocalGet(Index pos, Index local) {
+ return withLoc(pos, irBuilder.makeLocalGet(local));
+ }
+
+ Result<> makeLocalTee(Index pos, Index local) {
+ return withLoc(pos, irBuilder.makeLocalTee(local));
+ }
+
+ Result<> makeLocalSet(Index pos, Index local) {
+ return withLoc(pos, irBuilder.makeLocalSet(local));
+ }
+
+ Result<> makeGlobalGet(Index pos, Name global) {
+ return withLoc(pos, irBuilder.makeGlobalGet(global));
+ }
+
+ Result<> makeGlobalSet(Index pos, Name global) {
+ assert(wasm.getGlobalOrNull(global));
+ return withLoc(pos, irBuilder.makeGlobalSet(global));
+ }
+
+ Result<> makeI32Const(Index pos, uint32_t c) {
+ return withLoc(pos, irBuilder.makeConst(Literal(c)));
+ }
+
+ Result<> makeI64Const(Index pos, uint64_t c) {
+ return withLoc(pos, irBuilder.makeConst(Literal(c)));
+ }
+
+ Result<> makeF32Const(Index pos, float c) {
+ return withLoc(pos, irBuilder.makeConst(Literal(c)));
+ }
+
+ Result<> makeF64Const(Index pos, double c) {
+ return withLoc(pos, irBuilder.makeConst(Literal(c)));
+ }
+
+ Result<> makeLoad(Index pos,
+ Type type,
+ bool signed_,
+ int bytes,
+ bool isAtomic,
+ Name* mem,
+ Memarg memarg) {
+ auto m = getMemory(pos, mem);
+ CHECK_ERR(m);
+ if (isAtomic) {
+ return withLoc(pos,
+ irBuilder.makeAtomicLoad(bytes, memarg.offset, type, *m));
+ }
+ return withLoc(pos,
+ irBuilder.makeLoad(
+ bytes, signed_, memarg.offset, memarg.align, type, *m));
+ }
+
+ Result<> makeStore(
+ Index pos, Type type, int bytes, bool isAtomic, Name* mem, Memarg memarg) {
+ auto m = getMemory(pos, mem);
+ CHECK_ERR(m);
+ if (isAtomic) {
+ return withLoc(pos,
+ irBuilder.makeAtomicStore(bytes, memarg.offset, type, *m));
+ }
+ return withLoc(
+ pos, irBuilder.makeStore(bytes, memarg.offset, memarg.align, type, *m));
+ }
+
+ Result<> makeAtomicRMW(
+ Index pos, AtomicRMWOp op, Type type, int bytes, Name* mem, Memarg memarg) {
+ auto m = getMemory(pos, mem);
+ CHECK_ERR(m);
+ return withLoc(pos,
+ irBuilder.makeAtomicRMW(op, bytes, memarg.offset, type, *m));
+ }
+
+ Result<>
+ makeAtomicCmpxchg(Index pos, Type type, int bytes, Name* mem, Memarg memarg) {
+ auto m = getMemory(pos, mem);
+ CHECK_ERR(m);
+ return withLoc(pos,
+ irBuilder.makeAtomicCmpxchg(bytes, memarg.offset, type, *m));
+ }
+
+ Result<> makeAtomicWait(Index pos, Type type, Name* mem, Memarg memarg) {
+ auto m = getMemory(pos, mem);
+ CHECK_ERR(m);
+ return withLoc(pos, irBuilder.makeAtomicWait(type, memarg.offset, *m));
+ }
+
+ Result<> makeAtomicNotify(Index pos, Name* mem, Memarg memarg) {
+ auto m = getMemory(pos, mem);
+ CHECK_ERR(m);
+ return withLoc(pos, irBuilder.makeAtomicNotify(memarg.offset, *m));
+ }
+
+ Result<> makeAtomicFence(Index pos) {
+ return withLoc(pos, irBuilder.makeAtomicFence());
+ }
+
+ Result<> makeSIMDExtract(Index pos, SIMDExtractOp op, uint8_t lane) {
+ return withLoc(pos, irBuilder.makeSIMDExtract(op, lane));
+ }
+
+ Result<> makeSIMDReplace(Index pos, SIMDReplaceOp op, uint8_t lane) {
+ return withLoc(pos, irBuilder.makeSIMDReplace(op, lane));
+ }
+
+ Result<> makeSIMDShuffle(Index pos, const std::array<uint8_t, 16>& lanes) {
+ return withLoc(pos, irBuilder.makeSIMDShuffle(lanes));
+ }
+
+ Result<> makeSIMDTernary(Index pos, SIMDTernaryOp op) {
+ return withLoc(pos, irBuilder.makeSIMDTernary(op));
+ }
+
+ Result<> makeSIMDShift(Index pos, SIMDShiftOp op) {
+ return withLoc(pos, irBuilder.makeSIMDShift(op));
+ }
+
+ Result<> makeSIMDLoad(Index pos, SIMDLoadOp op, Name* mem, Memarg memarg) {
+ auto m = getMemory(pos, mem);
+ CHECK_ERR(m);
+ return withLoc(pos,
+ irBuilder.makeSIMDLoad(op, memarg.offset, memarg.align, *m));
+ }
+
+ Result<> makeSIMDLoadStoreLane(
+ Index pos, SIMDLoadStoreLaneOp op, Name* mem, Memarg memarg, uint8_t lane) {
+ auto m = getMemory(pos, mem);
+ CHECK_ERR(m);
+ return withLoc(pos,
+ irBuilder.makeSIMDLoadStoreLane(
+ op, memarg.offset, memarg.align, lane, *m));
+ }
+
+ Result<> makeMemoryInit(Index pos, Name* mem, Name data) {
+ auto m = getMemory(pos, mem);
+ CHECK_ERR(m);
+ return withLoc(pos, irBuilder.makeMemoryInit(data, *m));
+ }
+
+ Result<> makeDataDrop(Index pos, Name data) {
+ return withLoc(pos, irBuilder.makeDataDrop(data));
+ }
+
+ Result<> makeMemoryCopy(Index pos, Name* destMem, Name* srcMem) {
+ auto destMemory = getMemory(pos, destMem);
+ CHECK_ERR(destMemory);
+ auto srcMemory = getMemory(pos, srcMem);
+ CHECK_ERR(srcMemory);
+ return withLoc(pos, irBuilder.makeMemoryCopy(*destMemory, *srcMemory));
+ }
+
+ Result<> makeMemoryFill(Index pos, Name* mem) {
+ auto m = getMemory(pos, mem);
+ CHECK_ERR(m);
+ return withLoc(pos, irBuilder.makeMemoryFill(*m));
+ }
+
+ Result<> makeReturn(Index pos) {
+ return withLoc(pos, irBuilder.makeReturn());
+ }
+
+ Result<> makeRefNull(Index pos, HeapType type) {
+ return withLoc(pos, irBuilder.makeRefNull(type));
+ }
+
+ Result<> makeRefIsNull(Index pos) {
+ return withLoc(pos, irBuilder.makeRefIsNull());
+ }
+
+ Result<> makeRefEq(Index pos) { return withLoc(pos, irBuilder.makeRefEq()); }
+
+ Result<> makeRefI31(Index pos) {
+ return withLoc(pos, irBuilder.makeRefI31());
+ }
+
+ Result<> makeI31Get(Index pos, bool signed_) {
+ return withLoc(pos, irBuilder.makeI31Get(signed_));
+ }
+
+ Result<> makeStructNew(Index pos, HeapType type) {
+ return withLoc(pos, irBuilder.makeStructNew(type));
+ }
+
+ Result<> makeStructNewDefault(Index pos, HeapType type) {
+ return withLoc(pos, irBuilder.makeStructNewDefault(type));
+ }
+
+ Result<> makeStructGet(Index pos, HeapType type, Index field, bool signed_) {
+ return withLoc(pos, irBuilder.makeStructGet(type, field, signed_));
+ }
+
+ Result<> makeStructSet(Index pos, HeapType type, Index field) {
+ return withLoc(pos, irBuilder.makeStructSet(type, field));
+ }
+
+ Result<> makeArrayNew(Index pos, HeapType type) {
+ return withLoc(pos, irBuilder.makeArrayNew(type));
+ }
+
+ Result<> makeArrayNewDefault(Index pos, HeapType type) {
+ return withLoc(pos, irBuilder.makeArrayNewDefault(type));
+ }
+
+ Result<> makeArrayNewData(Index pos, HeapType type, Name data) {
+ return withLoc(pos, irBuilder.makeArrayNewData(type, data));
+ }
+
+ Result<> makeArrayNewElem(Index pos, HeapType type, Name elem) {
+ return withLoc(pos, irBuilder.makeArrayNewElem(type, elem));
+ }
+
+ Result<> makeArrayGet(Index pos, HeapType type, bool signed_) {
+ return withLoc(pos, irBuilder.makeArrayGet(type, signed_));
+ }
+
+ Result<> makeArraySet(Index pos, HeapType type) {
+ return withLoc(pos, irBuilder.makeArraySet(type));
+ }
+
+ Result<> makeArrayLen(Index pos) {
+ return withLoc(pos, irBuilder.makeArrayLen());
+ }
+
+ Result<> makeArrayCopy(Index pos, HeapType destType, HeapType srcType) {
+ return withLoc(pos, irBuilder.makeArrayCopy(destType, srcType));
+ }
+
+ Result<> makeArrayFill(Index pos, HeapType type) {
+ return withLoc(pos, irBuilder.makeArrayFill(type));
+ }
+};
+
+} // namespace wasm::WATParser
+
+#endif // parser_context_h
diff --git a/src/parser/input-impl.h b/src/parser/input-impl.h
new file mode 100644
index 000000000..35a39b2f3
--- /dev/null
+++ b/src/parser/input-impl.h
@@ -0,0 +1,273 @@
+/*
+ * Copyright 2023 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "input.h"
+
+#ifndef parser_input_impl_h
+#define parser_input_impl_h
+
+inline std::optional<Token> ParseInput::peek() {
+ if (!empty()) {
+ return *lexer;
+ }
+ return {};
+}
+
+inline bool ParseInput::takeLParen() {
+ auto t = peek();
+ if (!t || !t->isLParen()) {
+ return false;
+ }
+ ++lexer;
+ return true;
+}
+
+inline bool ParseInput::takeRParen() {
+ auto t = peek();
+ if (!t || !t->isRParen()) {
+ return false;
+ }
+ ++lexer;
+ return true;
+}
+
+inline bool ParseInput::takeUntilParen() {
+ while (true) {
+ auto t = peek();
+ if (!t) {
+ return false;
+ }
+ if (t->isLParen() || t->isRParen()) {
+ return true;
+ }
+ ++lexer;
+ }
+}
+
+inline std::optional<Name> ParseInput::takeID() {
+ if (auto t = peek()) {
+ if (auto id = t->getID()) {
+ ++lexer;
+ // See comment on takeName.
+ return Name(std::string(*id));
+ }
+ }
+ return {};
+}
+
+inline std::optional<std::string_view> ParseInput::takeKeyword() {
+ if (auto t = peek()) {
+ if (auto keyword = t->getKeyword()) {
+ ++lexer;
+ return *keyword;
+ }
+ }
+ return {};
+}
+
+inline bool ParseInput::takeKeyword(std::string_view expected) {
+ if (auto t = peek()) {
+ if (auto keyword = t->getKeyword()) {
+ if (*keyword == expected) {
+ ++lexer;
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+inline std::optional<uint64_t> ParseInput::takeOffset() {
+ if (auto t = peek()) {
+ if (auto keyword = t->getKeyword()) {
+ if (keyword->substr(0, 7) != "offset="sv) {
+ return {};
+ }
+ Lexer subLexer(keyword->substr(7));
+ if (subLexer == subLexer.end()) {
+ return {};
+ }
+ if (auto o = subLexer->getU64()) {
+ ++subLexer;
+ if (subLexer == subLexer.end()) {
+ ++lexer;
+ return o;
+ }
+ }
+ }
+ }
+ return std::nullopt;
+}
+
+inline std::optional<uint32_t> ParseInput::takeAlign() {
+ if (auto t = peek()) {
+ if (auto keyword = t->getKeyword()) {
+ if (keyword->substr(0, 6) != "align="sv) {
+ return {};
+ }
+ Lexer subLexer(keyword->substr(6));
+ if (subLexer == subLexer.end()) {
+ return {};
+ }
+ if (auto a = subLexer->getU32()) {
+ ++subLexer;
+ if (subLexer == subLexer.end()) {
+ ++lexer;
+ return a;
+ }
+ }
+ }
+ }
+ return {};
+}
+
+inline std::optional<uint64_t> ParseInput::takeU64() {
+ if (auto t = peek()) {
+ if (auto n = t->getU64()) {
+ ++lexer;
+ return n;
+ }
+ }
+ return std::nullopt;
+}
+
+inline std::optional<int64_t> ParseInput::takeS64() {
+ if (auto t = peek()) {
+ if (auto n = t->getS64()) {
+ ++lexer;
+ return n;
+ }
+ }
+ return {};
+}
+
+inline std::optional<int64_t> ParseInput::takeI64() {
+ if (auto t = peek()) {
+ if (auto n = t->getI64()) {
+ ++lexer;
+ return n;
+ }
+ }
+ return {};
+}
+
+inline std::optional<uint32_t> ParseInput::takeU32() {
+ if (auto t = peek()) {
+ if (auto n = t->getU32()) {
+ ++lexer;
+ return n;
+ }
+ }
+ return std::nullopt;
+}
+
+inline std::optional<int32_t> ParseInput::takeS32() {
+ if (auto t = peek()) {
+ if (auto n = t->getS32()) {
+ ++lexer;
+ return n;
+ }
+ }
+ return {};
+}
+
+inline std::optional<int32_t> ParseInput::takeI32() {
+ if (auto t = peek()) {
+ if (auto n = t->getI32()) {
+ ++lexer;
+ return n;
+ }
+ }
+ return {};
+}
+
+inline std::optional<uint8_t> ParseInput::takeU8() {
+ if (auto t = peek()) {
+ if (auto n = t->getU32()) {
+ if (n <= std::numeric_limits<uint8_t>::max()) {
+ ++lexer;
+ return uint8_t(*n);
+ }
+ }
+ }
+ return {};
+}
+
+inline std::optional<double> ParseInput::takeF64() {
+ if (auto t = peek()) {
+ if (auto d = t->getF64()) {
+ ++lexer;
+ return d;
+ }
+ }
+ return std::nullopt;
+}
+
+inline std::optional<float> ParseInput::takeF32() {
+ if (auto t = peek()) {
+ if (auto f = t->getF32()) {
+ ++lexer;
+ return f;
+ }
+ }
+ return std::nullopt;
+}
+
+inline std::optional<std::string_view> ParseInput::takeString() {
+ if (auto t = peek()) {
+ if (auto s = t->getString()) {
+ ++lexer;
+ return s;
+ }
+ }
+ return {};
+}
+
+inline std::optional<Name> ParseInput::takeName() {
+ // TODO: Move this to lexer and validate UTF.
+ if (auto str = takeString()) {
+ // Copy to a std::string to make sure we have a null terminator, otherwise
+ // the `Name` constructor won't work correctly.
+ // TODO: Update `Name` to use string_view instead of char* and/or to take
+ // rvalue strings to avoid this extra copy.
+ return Name(std::string(*str));
+ }
+ return {};
+}
+
+inline bool ParseInput::takeSExprStart(std::string_view expected) {
+ auto original = lexer;
+ if (takeLParen() && takeKeyword(expected)) {
+ return true;
+ }
+ lexer = original;
+ return false;
+}
+
+inline Index ParseInput::getPos() {
+ if (auto t = peek()) {
+ return lexer.getIndex() - t->span.size();
+ }
+ return lexer.getIndex();
+}
+
+inline Err ParseInput::err(Index pos, std::string reason) {
+ std::stringstream msg;
+ msg << lexer.position(pos) << ": error: " << reason;
+ return Err{msg.str()};
+}
+
+#endif // parser_input_impl_h
diff --git a/src/parser/input.h b/src/parser/input.h
new file mode 100644
index 000000000..5c7c57d20
--- /dev/null
+++ b/src/parser/input.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2023 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef parser_input_h
+#define parser_input_h
+
+#include "lexer.h"
+#include "support/result.h"
+#include "wasm.h"
+
+namespace wasm::WATParser {
+
+using namespace std::string_view_literals;
+
+// Wraps a lexer and provides utilities for consuming tokens.
+struct ParseInput {
+ Lexer lexer;
+
+ explicit ParseInput(std::string_view in) : lexer(in) {}
+
+ ParseInput(std::string_view in, size_t index) : lexer(in) {
+ lexer.setIndex(index);
+ }
+
+ ParseInput(const ParseInput& other, size_t index) : lexer(other.lexer) {
+ lexer.setIndex(index);
+ }
+
+ bool empty() { return lexer.empty(); }
+
+ std::optional<Token> peek();
+ bool takeLParen();
+ bool takeRParen();
+ bool takeUntilParen();
+ std::optional<Name> takeID();
+ std::optional<std::string_view> takeKeyword();
+ bool takeKeyword(std::string_view expected);
+ std::optional<uint64_t> takeOffset();
+ std::optional<uint32_t> takeAlign();
+ std::optional<uint64_t> takeU64();
+ std::optional<int64_t> takeS64();
+ std::optional<int64_t> takeI64();
+ std::optional<uint32_t> takeU32();
+ std::optional<int32_t> takeS32();
+ std::optional<int32_t> takeI32();
+ std::optional<uint8_t> takeU8();
+ std::optional<double> takeF64();
+ std::optional<float> takeF32();
+ std::optional<std::string_view> takeString();
+ std::optional<Name> takeName();
+ bool takeSExprStart(std::string_view expected);
+
+ Index getPos();
+ [[nodiscard]] Err err(Index pos, std::string reason);
+ [[nodiscard]] Err err(std::string reason) { return err(getPos(), reason); }
+};
+
+#include "input-impl.h"
+
+} // namespace wasm::WATParser
+
+#endif // parser_input_h
diff --git a/src/wasm/wat-lexer.cpp b/src/parser/lexer.cpp
index 264ffd40c..0796013fe 100644
--- a/src/wasm/wat-lexer.cpp
+++ b/src/parser/lexer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright 2022 WebAssembly Community Group participants
+ * Copyright 2023 WebAssembly Community Group participants
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -22,7 +22,7 @@
#include <sstream>
#include <variant>
-#include "wat-lexer.h"
+#include "lexer.h"
using namespace std::string_view_literals;
diff --git a/src/wat-lexer.h b/src/parser/lexer.h
index 7b6c93552..67d29b002 100644
--- a/src/wat-lexer.h
+++ b/src/parser/lexer.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2022 WebAssembly Community Group participants
+ * Copyright 2023 WebAssembly Community Group participants
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -23,8 +23,8 @@
#include <string_view>
#include <variant>
-#ifndef wasm_wat_lexer_h
-#define wasm_wat_lexer_h
+#ifndef parser_lexer_h
+#define parser_lexer_h
namespace wasm::WATParser {
@@ -224,4 +224,4 @@ private:
} // namespace wasm::WATParser
-#endif // wasm_wat_lexer_h
+#endif // parser_lexer_h
diff --git a/src/wasm/wat-parser.cpp b/src/parser/parsers.h
index b31019811..5f9f23a2a 100644
--- a/src/wasm/wat-parser.cpp
+++ b/src/parser/parsers.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2022 WebAssembly Community Group participants
+ * Copyright 2023 WebAssembly Community Group participants
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,1818 +14,15 @@
* limitations under the License.
*/
-#include "wat-parser.h"
-#include "ir/names.h"
-#include "support/name.h"
-#include "wasm-builder.h"
-#include "wasm-ir-builder.h"
-#include "wasm-type.h"
-#include "wasm.h"
-#include "wat-lexer.h"
-
-// The WebAssembly text format is recursive in the sense that elements may be
-// referred to before they are declared. Furthermore, elements may be referred
-// to by index or by name. As a result, we need to parse text modules in
-// multiple phases.
-//
-// In the first phase, we find all of the module element declarations and
-// record, but do not interpret, the input spans of their corresponding
-// definitions. This phase establishes the indices and names of each module
-// element so that subsequent phases can look them up.
-//
-// The second phase parses type definitions to construct the types used in the
-// module. This has to be its own phase because we have no way to refer to a
-// type before it has been built along with all the other types, unlike for
-// other module elements that can be referred to by name before their
-// definitions have been parsed.
-//
-// The third phase further parses and constructs types implicitly defined by
-// type uses in functions, blocks, and call_indirect instructions. These
-// implicitly defined types may be referred to by index elsewhere.
-//
-// The fourth phase parses and sets the types of globals, functions, and other
-// top-level module elements. These types need to be set before we parse
-// instructions because they determine the types of instructions such as
-// global.get and ref.func.
-//
-// The fifth and final phase parses the remaining contents of all module
-// elements, including instructions.
-//
-// Each phase of parsing gets its own context type that is passed to the
-// individual parsing functions. There is a parsing function for each element of
-// the grammar given in the spec. Parsing functions are templatized so that they
-// may be passed the appropriate context type and return the correct result type
-// for each phase.
+#ifndef parser_parsers_h
+#define parser_parsers_h
-using namespace std::string_view_literals;
+#include "common.h"
+#include "input.h"
namespace wasm::WATParser {
-namespace {
-
-// ============
-// Parser Input
-// ============
-
-// Wraps a lexer and provides utilities for consuming tokens.
-struct ParseInput {
- Lexer lexer;
-
- explicit ParseInput(std::string_view in) : lexer(in) {}
-
- ParseInput(std::string_view in, size_t index) : lexer(in) {
- lexer.setIndex(index);
- }
-
- ParseInput(const ParseInput& other, size_t index) : lexer(other.lexer) {
- lexer.setIndex(index);
- }
-
- bool empty() { return lexer.empty(); }
-
- std::optional<Token> peek() {
- if (!empty()) {
- return *lexer;
- }
- return {};
- }
-
- bool takeLParen() {
- auto t = peek();
- if (!t || !t->isLParen()) {
- return false;
- }
- ++lexer;
- return true;
- }
-
- bool takeRParen() {
- auto t = peek();
- if (!t || !t->isRParen()) {
- return false;
- }
- ++lexer;
- return true;
- }
-
- bool takeUntilParen() {
- while (true) {
- auto t = peek();
- if (!t) {
- return false;
- }
- if (t->isLParen() || t->isRParen()) {
- return true;
- }
- ++lexer;
- }
- }
-
- std::optional<Name> takeID() {
- if (auto t = peek()) {
- if (auto id = t->getID()) {
- ++lexer;
- // See comment on takeName.
- return Name(std::string(*id));
- }
- }
- return {};
- }
-
- std::optional<std::string_view> takeKeyword() {
- if (auto t = peek()) {
- if (auto keyword = t->getKeyword()) {
- ++lexer;
- return *keyword;
- }
- }
- return {};
- }
-
- bool takeKeyword(std::string_view expected) {
- if (auto t = peek()) {
- if (auto keyword = t->getKeyword()) {
- if (*keyword == expected) {
- ++lexer;
- return true;
- }
- }
- }
- return false;
- }
-
- std::optional<uint64_t> takeOffset() {
- if (auto t = peek()) {
- if (auto keyword = t->getKeyword()) {
- if (keyword->substr(0, 7) != "offset="sv) {
- return {};
- }
- Lexer subLexer(keyword->substr(7));
- if (subLexer == subLexer.end()) {
- return {};
- }
- if (auto o = subLexer->getU64()) {
- ++subLexer;
- if (subLexer == subLexer.end()) {
- ++lexer;
- return o;
- }
- }
- }
- }
- return std::nullopt;
- }
-
- std::optional<uint32_t> takeAlign() {
- if (auto t = peek()) {
- if (auto keyword = t->getKeyword()) {
- if (keyword->substr(0, 6) != "align="sv) {
- return {};
- }
- Lexer subLexer(keyword->substr(6));
- if (subLexer == subLexer.end()) {
- return {};
- }
- if (auto a = subLexer->getU32()) {
- ++subLexer;
- if (subLexer == subLexer.end()) {
- ++lexer;
- return a;
- }
- }
- }
- }
- return {};
- }
-
- std::optional<uint64_t> takeU64() {
- if (auto t = peek()) {
- if (auto n = t->getU64()) {
- ++lexer;
- return n;
- }
- }
- return std::nullopt;
- }
-
- std::optional<int64_t> takeS64() {
- if (auto t = peek()) {
- if (auto n = t->getS64()) {
- ++lexer;
- return n;
- }
- }
- return {};
- }
-
- std::optional<int64_t> takeI64() {
- if (auto t = peek()) {
- if (auto n = t->getI64()) {
- ++lexer;
- return n;
- }
- }
- return {};
- }
-
- std::optional<uint32_t> takeU32() {
- if (auto t = peek()) {
- if (auto n = t->getU32()) {
- ++lexer;
- return n;
- }
- }
- return std::nullopt;
- }
-
- std::optional<int32_t> takeS32() {
- if (auto t = peek()) {
- if (auto n = t->getS32()) {
- ++lexer;
- return n;
- }
- }
- return {};
- }
-
- std::optional<int32_t> takeI32() {
- if (auto t = peek()) {
- if (auto n = t->getI32()) {
- ++lexer;
- return n;
- }
- }
- return {};
- }
-
- std::optional<uint8_t> takeU8() {
- if (auto t = peek()) {
- if (auto n = t->getU32()) {
- if (n <= std::numeric_limits<uint8_t>::max()) {
- ++lexer;
- return uint8_t(*n);
- }
- }
- }
- return {};
- }
-
- std::optional<double> takeF64() {
- if (auto t = peek()) {
- if (auto d = t->getF64()) {
- ++lexer;
- return d;
- }
- }
- return std::nullopt;
- }
-
- std::optional<float> takeF32() {
- if (auto t = peek()) {
- if (auto f = t->getF32()) {
- ++lexer;
- return f;
- }
- }
- return std::nullopt;
- }
-
- std::optional<std::string_view> takeString() {
- if (auto t = peek()) {
- if (auto s = t->getString()) {
- ++lexer;
- return s;
- }
- }
- return {};
- }
-
- std::optional<Name> takeName() {
- // TODO: Move this to lexer and validate UTF.
- if (auto str = takeString()) {
- // Copy to a std::string to make sure we have a null terminator, otherwise
- // the `Name` constructor won't work correctly.
- // TODO: Update `Name` to use string_view instead of char* and/or to take
- // rvalue strings to avoid this extra copy.
- return Name(std::string(*str));
- }
- return {};
- }
-
- bool takeSExprStart(std::string_view expected) {
- auto original = lexer;
- if (takeLParen() && takeKeyword(expected)) {
- return true;
- }
- lexer = original;
- return false;
- }
-
- Index getPos() {
- if (auto t = peek()) {
- return lexer.getIndex() - t->span.size();
- }
- return lexer.getIndex();
- }
-
- [[nodiscard]] Err err(Index pos, std::string reason) {
- std::stringstream msg;
- msg << lexer.position(pos) << ": error: " << reason;
- return Err{msg.str()};
- }
-
- [[nodiscard]] Err err(std::string reason) { return err(getPos(), reason); }
-};
-
-// =========
-// Utilities
-// =========
-
-// The location, possible name, and index in the respective module index space
-// of a module-level definition in the input.
-struct DefPos {
- Name name;
- Index pos;
- Index index;
-};
-
-struct GlobalType {
- Mutability mutability;
- Type type;
-};
-
-// A signature type and parameter names (possibly empty), used for parsing
-// function types.
-struct TypeUse {
- HeapType type;
- std::vector<Name> names;
-};
-
-struct ImportNames {
- Name mod;
- Name nm;
-};
-
-struct Limits {
- uint64_t initial;
- uint64_t max;
-};
-
-struct MemType {
- Type type;
- Limits limits;
- bool shared;
-};
-
-struct Memarg {
- uint64_t offset;
- uint32_t align;
-};
-
-// RAII utility for temporarily changing the parsing position of a parsing
-// context.
-template<typename Ctx> struct WithPosition {
- Ctx& ctx;
- Index original;
-
- WithPosition(Ctx& ctx, Index pos) : ctx(ctx), original(ctx.in.getPos()) {
- ctx.in.lexer.setIndex(pos);
- }
-
- ~WithPosition() { ctx.in.lexer.setIndex(original); }
-};
-
-// Deduction guide to satisfy -Wctad-maybe-unsupported.
-template<typename Ctx> WithPosition(Ctx& ctx, Index) -> WithPosition<Ctx>;
-
-using IndexMap = std::unordered_map<Name, Index>;
-
-void applyImportNames(Importable& item, ImportNames* names) {
- if (names) {
- item.module = names->mod;
- item.base = names->nm;
- }
-}
-
-Result<> addExports(ParseInput& in,
- Module& wasm,
- const Named* item,
- const std::vector<Name>& exports,
- ExternalKind kind) {
- for (auto name : exports) {
- if (wasm.getExportOrNull(name)) {
- // TODO: Fix error location
- return in.err("repeated export name");
- }
- wasm.addExport(Builder(wasm).makeExport(name, item->name, kind));
- }
- return Ok{};
-}
-
-Result<IndexMap> createIndexMap(ParseInput& in,
- const std::vector<DefPos>& defs) {
- IndexMap indices;
- for (auto& def : defs) {
- if (def.name.is()) {
- if (!indices.insert({def.name, def.index}).second) {
- return in.err(def.pos, "duplicate element name");
- }
- }
- }
- return indices;
-}
-
-std::vector<Type> getUnnamedTypes(const std::vector<NameType>& named) {
- std::vector<Type> types;
- types.reserve(named.size());
- for (auto& t : named) {
- types.push_back(t.type);
- }
- return types;
-}
-
-template<typename Ctx>
-Result<> parseDefs(Ctx& ctx,
- const std::vector<DefPos>& defs,
- MaybeResult<> (*parser)(Ctx&)) {
- for (auto& def : defs) {
- ctx.index = def.index;
- WithPosition with(ctx, def.pos);
- auto parsed = parser(ctx);
- CHECK_ERR(parsed);
- assert(parsed);
- }
- return Ok{};
-}
-
-// ===============
-// Parser Contexts
-// ===============
-
-struct NullTypeParserCtx {
- using IndexT = Ok;
- using HeapTypeT = Ok;
- using TypeT = Ok;
- using ParamsT = Ok;
- using ResultsT = size_t;
- using BlockTypeT = Ok;
- using SignatureT = Ok;
- using StorageT = Ok;
- using FieldT = Ok;
- using FieldsT = Ok;
- using StructT = Ok;
- using ArrayT = Ok;
- using LimitsT = Ok;
- using MemTypeT = Ok;
- using GlobalTypeT = Ok;
- using TypeUseT = Ok;
- using LocalsT = Ok;
- using DataStringT = Ok;
-
- HeapTypeT makeFunc() { return Ok{}; }
- HeapTypeT makeAny() { return Ok{}; }
- HeapTypeT makeExtern() { return Ok{}; }
- HeapTypeT makeEq() { return Ok{}; }
- HeapTypeT makeI31() { return Ok{}; }
- HeapTypeT makeStructType() { return Ok{}; }
- HeapTypeT makeArrayType() { return Ok{}; }
-
- TypeT makeI32() { return Ok{}; }
- TypeT makeI64() { return Ok{}; }
- TypeT makeF32() { return Ok{}; }
- TypeT makeF64() { return Ok{}; }
- TypeT makeV128() { return Ok{}; }
-
- TypeT makeRefType(HeapTypeT, Nullability) { return Ok{}; }
-
- ParamsT makeParams() { return Ok{}; }
- void appendParam(ParamsT&, Name, TypeT) {}
-
- // We have to count results because whether or not a block introduces a
- // typeuse that may implicitly define a type depends on how many results it
- // has.
- size_t makeResults() { return 0; }
- void appendResult(size_t& results, TypeT) { ++results; }
- size_t getResultsSize(size_t results) { return results; }
-
- SignatureT makeFuncType(ParamsT*, ResultsT*) { return Ok{}; }
-
- StorageT makeI8() { return Ok{}; }
- StorageT makeI16() { return Ok{}; }
- StorageT makeStorageType(TypeT) { return Ok{}; }
-
- FieldT makeFieldType(StorageT, Mutability) { return Ok{}; }
-
- FieldsT makeFields() { return Ok{}; }
- void appendField(FieldsT&, Name, FieldT) {}
-
- StructT makeStruct(FieldsT&) { return Ok{}; }
-
- std::optional<ArrayT> makeArray(FieldsT&) { return Ok{}; }
-
- GlobalTypeT makeGlobalType(Mutability, TypeT) { return Ok{}; }
-
- LocalsT makeLocals() { return Ok{}; }
- void appendLocal(LocalsT&, Name, TypeT) {}
-
- Result<Index> getTypeIndex(Name) { return 1; }
- Result<HeapTypeT> getHeapTypeFromIdx(Index) { return Ok{}; }
-
- DataStringT makeDataString() { return Ok{}; }
- void appendDataString(DataStringT&, std::string_view) {}
-
- MemTypeT makeMemType(Type, LimitsT, bool) { return Ok{}; }
-
- BlockTypeT getBlockTypeFromResult(size_t results) { return Ok{}; }
-
- Result<> getBlockTypeFromTypeUse(Index, TypeUseT) { return Ok{}; }
-};
-
-template<typename Ctx> struct TypeParserCtx {
- using IndexT = Index;
- using HeapTypeT = HeapType;
- using TypeT = Type;
- using ParamsT = std::vector<NameType>;
- using ResultsT = std::vector<Type>;
- using BlockTypeT = HeapType;
- using SignatureT = Signature;
- using StorageT = Field;
- using FieldT = Field;
- using FieldsT = std::pair<std::vector<Name>, std::vector<Field>>;
- using StructT = std::pair<std::vector<Name>, Struct>;
- using ArrayT = Array;
- using LimitsT = Ok;
- using MemTypeT = Ok;
- using LocalsT = std::vector<NameType>;
- using DataStringT = Ok;
-
- // Map heap type names to their indices.
- const IndexMap& typeIndices;
-
- TypeParserCtx(const IndexMap& typeIndices) : typeIndices(typeIndices) {}
-
- Ctx& self() { return *static_cast<Ctx*>(this); }
-
- HeapTypeT makeFunc() { return HeapType::func; }
- HeapTypeT makeAny() { return HeapType::any; }
- HeapTypeT makeExtern() { return HeapType::ext; }
- HeapTypeT makeEq() { return HeapType::eq; }
- HeapTypeT makeI31() { return HeapType::i31; }
- HeapTypeT makeStructType() { return HeapType::struct_; }
- HeapTypeT makeArrayType() { return HeapType::array; }
-
- TypeT makeI32() { return Type::i32; }
- TypeT makeI64() { return Type::i64; }
- TypeT makeF32() { return Type::f32; }
- TypeT makeF64() { return Type::f64; }
- TypeT makeV128() { return Type::v128; }
-
- TypeT makeRefType(HeapTypeT ht, Nullability nullability) {
- return Type(ht, nullability);
- }
-
- TypeT makeTupleType(const std::vector<Type> types) { return Tuple(types); }
-
- ParamsT makeParams() { return {}; }
- void appendParam(ParamsT& params, Name id, TypeT type) {
- params.push_back({id, type});
- }
-
- ResultsT makeResults() { return {}; }
- void appendResult(ResultsT& results, TypeT type) { results.push_back(type); }
- size_t getResultsSize(const ResultsT& results) { return results.size(); }
-
- SignatureT makeFuncType(ParamsT* params, ResultsT* results) {
- std::vector<Type> empty;
- const auto& paramTypes = params ? getUnnamedTypes(*params) : empty;
- const auto& resultTypes = results ? *results : empty;
- return Signature(self().makeTupleType(paramTypes),
- self().makeTupleType(resultTypes));
- }
-
- StorageT makeI8() { return Field(Field::i8, Immutable); }
- StorageT makeI16() { return Field(Field::i16, Immutable); }
- StorageT makeStorageType(TypeT type) { return Field(type, Immutable); }
-
- FieldT makeFieldType(FieldT field, Mutability mutability) {
- if (field.packedType == Field::not_packed) {
- return Field(field.type, mutability);
- }
- return Field(field.packedType, mutability);
- }
-
- FieldsT makeFields() { return {}; }
- void appendField(FieldsT& fields, Name name, FieldT field) {
- fields.first.push_back(name);
- fields.second.push_back(field);
- }
-
- StructT makeStruct(FieldsT& fields) {
- return {std::move(fields.first), Struct(std::move(fields.second))};
- }
-
- std::optional<ArrayT> makeArray(FieldsT& fields) {
- if (fields.second.size() == 1) {
- return Array(fields.second[0]);
- }
- return {};
- }
-
- LocalsT makeLocals() { return {}; }
- void appendLocal(LocalsT& locals, Name id, TypeT type) {
- locals.push_back({id, type});
- }
-
- Result<Index> getTypeIndex(Name id) {
- auto it = typeIndices.find(id);
- if (it == typeIndices.end()) {
- return self().in.err("unknown type identifier");
- }
- return it->second;
- }
-
- DataStringT makeDataString() { return Ok{}; }
- void appendDataString(DataStringT&, std::string_view) {}
-
- LimitsT makeLimits(uint64_t, std::optional<uint64_t>) { return Ok{}; }
- LimitsT getLimitsFromData(DataStringT) { return Ok{}; }
-
- MemTypeT makeMemType(Type, LimitsT, bool) { return Ok{}; }
-
- HeapType getBlockTypeFromResult(const std::vector<Type> results) {
- assert(results.size() == 1);
- return HeapType(Signature(Type::none, results[0]));
- }
-};
-
-struct NullInstrParserCtx {
- using InstrT = Ok;
- using InstrsT = Ok;
- using ExprT = Ok;
-
- using FieldIdxT = Ok;
- using LocalIdxT = Ok;
- using GlobalIdxT = Ok;
- using MemoryIdxT = Ok;
- using DataIdxT = Ok;
-
- using MemargT = Ok;
-
- InstrsT makeInstrs() { return Ok{}; }
- void appendInstr(InstrsT&, InstrT) {}
- InstrsT finishInstrs(InstrsT&) { return Ok{}; }
-
- ExprT makeExpr(InstrsT) { return Ok{}; }
- Result<ExprT> instrToExpr(InstrT) { return Ok{}; }
-
- template<typename HeapTypeT> FieldIdxT getFieldFromIdx(HeapTypeT, uint32_t) {
- return Ok{};
- }
- template<typename HeapTypeT> FieldIdxT getFieldFromName(HeapTypeT, Name) {
- return Ok{};
- }
- LocalIdxT getLocalFromIdx(uint32_t) { return Ok{}; }
- LocalIdxT getLocalFromName(Name) { return Ok{}; }
- GlobalIdxT getGlobalFromIdx(uint32_t) { return Ok{}; }
- GlobalIdxT getGlobalFromName(Name) { return Ok{}; }
- MemoryIdxT getMemoryFromIdx(uint32_t) { return Ok{}; }
- MemoryIdxT getMemoryFromName(Name) { return Ok{}; }
- DataIdxT getDataFromIdx(uint32_t) { return Ok{}; }
- DataIdxT getDataFromName(Name) { return Ok{}; }
-
- MemargT getMemarg(uint64_t, uint32_t) { return Ok{}; }
-
- template<typename BlockTypeT>
- InstrT makeBlock(Index, std::optional<Name>, BlockTypeT) {
- return Ok{};
- }
- InstrT finishBlock(Index, InstrsT) { return Ok{}; }
-
- InstrT makeUnreachable(Index) { return Ok{}; }
- InstrT makeNop(Index) { return Ok{}; }
- InstrT makeBinary(Index, BinaryOp) { return Ok{}; }
- InstrT makeUnary(Index, UnaryOp) { return Ok{}; }
- template<typename ResultsT> InstrT makeSelect(Index, ResultsT*) {
- return Ok{};
- }
- InstrT makeDrop(Index) { return Ok{}; }
- InstrT makeMemorySize(Index, MemoryIdxT*) { return Ok{}; }
- InstrT makeMemoryGrow(Index, MemoryIdxT*) { return Ok{}; }
- InstrT makeLocalGet(Index, LocalIdxT) { return Ok{}; }
- InstrT makeLocalTee(Index, LocalIdxT) { return Ok{}; }
- InstrT makeLocalSet(Index, LocalIdxT) { return Ok{}; }
- InstrT makeGlobalGet(Index, GlobalIdxT) { return Ok{}; }
- InstrT makeGlobalSet(Index, GlobalIdxT) { return Ok{}; }
-
- InstrT makeI32Const(Index, uint32_t) { return Ok{}; }
- InstrT makeI64Const(Index, uint64_t) { return Ok{}; }
- InstrT makeF32Const(Index, float) { return Ok{}; }
- InstrT makeF64Const(Index, double) { return Ok{}; }
- InstrT makeLoad(Index, Type, bool, int, bool, MemoryIdxT*, MemargT) {
- return Ok{};
- }
- InstrT makeStore(Index, Type, int, bool, MemoryIdxT*, MemargT) {
- return Ok{};
- }
- InstrT makeAtomicRMW(Index, AtomicRMWOp, Type, int, MemoryIdxT*, MemargT) {
- return Ok{};
- }
- InstrT makeAtomicCmpxchg(Index, Type, int, MemoryIdxT*, MemargT) {
- return Ok{};
- }
- InstrT makeAtomicWait(Index, Type, MemoryIdxT*, MemargT) { return Ok{}; }
- InstrT makeAtomicNotify(Index, MemoryIdxT*, MemargT) { return Ok{}; }
- InstrT makeAtomicFence(Index) { return Ok{}; }
- InstrT makeSIMDExtract(Index, SIMDExtractOp, uint8_t) { return Ok{}; }
- InstrT makeSIMDReplace(Index, SIMDReplaceOp, uint8_t) { return Ok{}; }
- InstrT makeSIMDShuffle(Index, const std::array<uint8_t, 16>&) { return Ok{}; }
- InstrT makeSIMDTernary(Index, SIMDTernaryOp) { return Ok{}; }
- InstrT makeSIMDShift(Index, SIMDShiftOp) { return Ok{}; }
- InstrT makeSIMDLoad(Index, SIMDLoadOp, MemoryIdxT*, MemargT) { return Ok{}; }
- InstrT makeSIMDLoadStoreLane(
- Index, SIMDLoadStoreLaneOp, MemoryIdxT*, MemargT, uint8_t) {
- return Ok{};
- }
- InstrT makeMemoryInit(Index, MemoryIdxT*, DataIdxT) { return Ok{}; }
- InstrT makeDataDrop(Index, DataIdxT) { return Ok{}; }
-
- InstrT makeMemoryCopy(Index, MemoryIdxT*, MemoryIdxT*) { return Ok{}; }
- InstrT makeMemoryFill(Index, MemoryIdxT*) { return Ok{}; }
-
- InstrT makeReturn(Index) { return Ok{}; }
- template<typename HeapTypeT> InstrT makeRefNull(Index, HeapTypeT) {
- return Ok{};
- }
- InstrT makeRefIsNull(Index) { return Ok{}; }
-
- InstrT makeRefEq(Index) { return Ok{}; }
-
- InstrT makeRefI31(Index) { return Ok{}; }
- InstrT makeI31Get(Index, bool) { return Ok{}; }
-
- template<typename HeapTypeT> InstrT makeStructNew(Index, HeapTypeT) {
- return Ok{};
- }
- template<typename HeapTypeT> InstrT makeStructNewDefault(Index, HeapTypeT) {
- return Ok{};
- }
- template<typename HeapTypeT>
- InstrT makeStructGet(Index, HeapTypeT, FieldIdxT, bool) {
- return Ok{};
- }
- template<typename HeapTypeT>
- InstrT makeStructSet(Index, HeapTypeT, FieldIdxT) {
- return Ok{};
- }
- template<typename HeapTypeT> InstrT makeArrayNew(Index, HeapTypeT) {
- return Ok{};
- }
- template<typename HeapTypeT> InstrT makeArrayNewDefault(Index, HeapTypeT) {
- return Ok{};
- }
- template<typename HeapTypeT>
- InstrT makeArrayNewData(Index, HeapTypeT, DataIdxT) {
- return Ok{};
- }
- template<typename HeapTypeT>
- InstrT makeArrayNewElem(Index, HeapTypeT, DataIdxT) {
- return Ok{};
- }
- template<typename HeapTypeT> InstrT makeArrayGet(Index, HeapTypeT, bool) {
- return Ok{};
- }
- template<typename HeapTypeT> InstrT makeArraySet(Index, HeapTypeT) {
- return Ok{};
- }
- InstrT makeArrayLen(Index) { return Ok{}; }
- template<typename HeapTypeT>
- InstrT makeArrayCopy(Index, HeapTypeT, HeapTypeT) {
- return Ok{};
- }
- template<typename HeapTypeT> InstrT makeArrayFill(Index, HeapTypeT) {
- return Ok{};
- }
-};
-
-// Phase 1: Parse definition spans for top-level module elements and determine
-// their indices and names.
-struct ParseDeclsCtx : NullTypeParserCtx, NullInstrParserCtx {
- using DataStringT = std::vector<char>;
- using LimitsT = Limits;
- using MemTypeT = MemType;
-
- ParseInput in;
-
- // At this stage we only look at types to find implicit type definitions,
- // which are inserted directly into the context. We cannot materialize or
- // validate any types because we don't know what types exist yet.
- //
- // Declared module elements are inserted into the module, but their bodies are
- // not filled out until later parsing phases.
- Module& wasm;
-
- // The module element definitions we are parsing in this phase.
- std::vector<DefPos> typeDefs;
- std::vector<DefPos> subtypeDefs;
- std::vector<DefPos> funcDefs;
- std::vector<DefPos> memoryDefs;
- std::vector<DefPos> globalDefs;
- std::vector<DefPos> dataDefs;
-
- // Positions of typeuses that might implicitly define new types.
- std::vector<Index> implicitTypeDefs;
-
- // Counters used for generating names for module elements.
- int funcCounter = 0;
- int memoryCounter = 0;
- int globalCounter = 0;
- int dataCounter = 0;
-
- // Used to verify that all imports come before all non-imports.
- bool hasNonImport = false;
-
- ParseDeclsCtx(std::string_view in, Module& wasm) : in(in), wasm(wasm) {}
-
- void addFuncType(SignatureT) {}
- void addStructType(StructT) {}
- void addArrayType(ArrayT) {}
- void setOpen() {}
- Result<> addSubtype(Index) { return Ok{}; }
- void finishSubtype(Name name, Index pos) {
- subtypeDefs.push_back({name, pos, Index(subtypeDefs.size())});
- }
- size_t getRecGroupStartIndex() { return 0; }
- void addRecGroup(Index, size_t) {}
- void finishDeftype(Index pos) {
- typeDefs.push_back({{}, pos, Index(typeDefs.size())});
- }
-
- std::vector<char> makeDataString() { return {}; }
- void appendDataString(std::vector<char>& data, std::string_view str) {
- data.insert(data.end(), str.begin(), str.end());
- }
-
- Limits makeLimits(uint64_t n, std::optional<uint64_t> m) {
- return m ? Limits{n, *m} : Limits{n, Memory::kUnlimitedSize};
- }
- Limits getLimitsFromData(const std::vector<char>& data) {
- uint64_t size = (data.size() + Memory::kPageSize - 1) / Memory::kPageSize;
- return {size, size};
- }
-
- MemType makeMemType(Type type, Limits limits, bool shared) {
- return {type, limits, shared};
- }
-
- Result<TypeUseT>
- makeTypeUse(Index pos, std::optional<HeapTypeT> type, ParamsT*, ResultsT*) {
- if (!type) {
- implicitTypeDefs.push_back(pos);
- }
- return Ok{};
- }
-
- Result<Function*>
- addFuncDecl(Index pos, Name name, ImportNames* importNames) {
- auto f = std::make_unique<Function>();
- if (name.is()) {
- if (wasm.getFunctionOrNull(name)) {
- // TDOO: if the existing function is not explicitly named, fix its name
- // and continue.
- return in.err(pos, "repeated function name");
- }
- f->setExplicitName(name);
- } else {
- name = (importNames ? "fimport$" : "") + std::to_string(funcCounter++);
- name = Names::getValidFunctionName(wasm, name);
- f->name = name;
- }
- applyImportNames(*f, importNames);
- return wasm.addFunction(std::move(f));
- }
-
- Result<> addFunc(Name name,
- const std::vector<Name>& exports,
- ImportNames* import,
- TypeUseT type,
- std::optional<LocalsT>,
- std::optional<InstrsT>,
- Index pos) {
- if (import && hasNonImport) {
- return in.err(pos, "import after non-import");
- }
- auto f = addFuncDecl(pos, name, import);
- CHECK_ERR(f);
- CHECK_ERR(addExports(in, wasm, *f, exports, ExternalKind::Function));
- funcDefs.push_back({name, pos, Index(funcDefs.size())});
- return Ok{};
- }
-
- Result<Memory*>
- addMemoryDecl(Index pos, Name name, ImportNames* importNames, MemType type) {
- auto m = std::make_unique<Memory>();
- m->indexType = type.type;
- m->initial = type.limits.initial;
- m->max = type.limits.max;
- m->shared = type.shared;
- if (name) {
- // TODO: if the existing memory is not explicitly named, fix its name
- // and continue.
- if (wasm.getMemoryOrNull(name)) {
- return in.err(pos, "repeated memory name");
- }
- m->setExplicitName(name);
- } else {
- name = (importNames ? "mimport$" : "") + std::to_string(memoryCounter++);
- name = Names::getValidMemoryName(wasm, name);
- m->name = name;
- }
- applyImportNames(*m, importNames);
- return wasm.addMemory(std::move(m));
- }
-
- Result<> addMemory(Name name,
- const std::vector<Name>& exports,
- ImportNames* import,
- MemType type,
- Index pos) {
- if (import && hasNonImport) {
- return in.err(pos, "import after non-import");
- }
- auto m = addMemoryDecl(pos, name, import, type);
- CHECK_ERR(m);
- CHECK_ERR(addExports(in, wasm, *m, exports, ExternalKind::Memory));
- memoryDefs.push_back({name, pos, Index(memoryDefs.size())});
- return Ok{};
- }
-
- Result<> addImplicitData(DataStringT&& data) {
- auto& mem = *wasm.memories.back();
- auto d = std::make_unique<DataSegment>();
- d->memory = mem.name;
- d->isPassive = false;
- d->offset = Builder(wasm).makeConstPtr(0, mem.indexType);
- d->data = std::move(data);
- d->name = Names::getValidDataSegmentName(wasm, "implicit-data");
- wasm.addDataSegment(std::move(d));
- return Ok{};
- }
-
- Result<Global*>
- addGlobalDecl(Index pos, Name name, ImportNames* importNames) {
- auto g = std::make_unique<Global>();
- if (name) {
- if (wasm.getGlobalOrNull(name)) {
- // TODO: if the existing global is not explicitly named, fix its name
- // and continue.
- return in.err(pos, "repeated global name");
- }
- g->setExplicitName(name);
- } else {
- name = (importNames ? "gimport$" : "") + std::to_string(globalCounter++);
- name = Names::getValidGlobalName(wasm, name);
- g->name = name;
- }
- applyImportNames(*g, importNames);
- return wasm.addGlobal(std::move(g));
- }
-
- Result<> addGlobal(Name name,
- const std::vector<Name>& exports,
- ImportNames* import,
- GlobalTypeT,
- std::optional<ExprT>,
- Index pos) {
- if (import && hasNonImport) {
- return in.err(pos, "import after non-import");
- }
- auto g = addGlobalDecl(pos, name, import);
- CHECK_ERR(g);
- CHECK_ERR(addExports(in, wasm, *g, exports, ExternalKind::Global));
- globalDefs.push_back({name, pos, Index(globalDefs.size())});
- return Ok{};
- }
-
- Result<> addData(Name name,
- MemoryIdxT*,
- std::optional<ExprT>,
- std::vector<char>&& data,
- Index pos) {
- auto d = std::make_unique<DataSegment>();
- if (name) {
- if (wasm.getDataSegmentOrNull(name)) {
- // TODO: if the existing segment is not explicitly named, fix its name
- // and continue.
- return in.err(pos, "repeated data segment name");
- }
- d->setExplicitName(name);
- } else {
- name = std::to_string(dataCounter++);
- name = Names::getValidDataSegmentName(wasm, name);
- d->name = name;
- }
- d->data = std::move(data);
- dataDefs.push_back({name, pos, Index(wasm.dataSegments.size())});
- wasm.addDataSegment(std::move(d));
- return Ok{};
- }
-};
-
-// Phase 2: Parse type definitions into a TypeBuilder.
-struct ParseTypeDefsCtx : TypeParserCtx<ParseTypeDefsCtx> {
- ParseInput in;
-
- // We update slots in this builder as we parse type definitions.
- TypeBuilder& builder;
-
- // Parse the names of types and fields as we go.
- std::vector<TypeNames> names;
-
- // The index of the subtype definition we are parsing.
- Index index = 0;
-
- ParseTypeDefsCtx(std::string_view in,
- TypeBuilder& builder,
- const IndexMap& typeIndices)
- : TypeParserCtx<ParseTypeDefsCtx>(typeIndices), in(in), builder(builder),
- names(builder.size()) {}
-
- TypeT makeRefType(HeapTypeT ht, Nullability nullability) {
- return builder.getTempRefType(ht, nullability);
- }
-
- TypeT makeTupleType(const std::vector<Type> types) {
- return builder.getTempTupleType(types);
- }
-
- Result<HeapTypeT> getHeapTypeFromIdx(Index idx) {
- if (idx >= builder.size()) {
- return in.err("type index out of bounds");
- }
- return builder[idx];
- }
-
- void addFuncType(SignatureT& type) { builder[index] = type; }
-
- void addStructType(StructT& type) {
- auto& [fieldNames, str] = type;
- builder[index] = str;
- for (Index i = 0; i < fieldNames.size(); ++i) {
- if (auto name = fieldNames[i]; name.is()) {
- names[index].fieldNames[i] = name;
- }
- }
- }
-
- void addArrayType(ArrayT& type) { builder[index] = type; }
-
- void setOpen() { builder[index].setOpen(); }
-
- Result<> addSubtype(Index super) {
- if (super >= builder.size()) {
- return in.err("supertype index out of bounds");
- }
- builder[index].subTypeOf(builder[super]);
- return Ok{};
- }
-
- void finishSubtype(Name name, Index pos) { names[index++].name = name; }
-
- size_t getRecGroupStartIndex() { return index; }
-
- void addRecGroup(Index start, size_t len) {
- builder.createRecGroup(start, len);
- }
-
- void finishDeftype(Index) {}
-};
-
-// Phase 3: Parse type uses to find implicitly defined types.
-struct ParseImplicitTypeDefsCtx : TypeParserCtx<ParseImplicitTypeDefsCtx> {
- using TypeUseT = Ok;
-
- ParseInput in;
-
- // Types parsed so far.
- std::vector<HeapType>& types;
-
- // Map typeuse positions without an explicit type to the correct type.
- std::unordered_map<Index, HeapType>& implicitTypes;
-
- // Map signatures to the first defined heap type they match.
- std::unordered_map<Signature, HeapType> sigTypes;
-
- ParseImplicitTypeDefsCtx(std::string_view in,
- std::vector<HeapType>& types,
- std::unordered_map<Index, HeapType>& implicitTypes,
- const IndexMap& typeIndices)
- : TypeParserCtx<ParseImplicitTypeDefsCtx>(typeIndices), in(in),
- types(types), implicitTypes(implicitTypes) {
- for (auto type : types) {
- if (type.isSignature() && type.getRecGroup().size() == 1) {
- sigTypes.insert({type.getSignature(), type});
- }
- }
- }
-
- Result<HeapTypeT> getHeapTypeFromIdx(Index idx) {
- if (idx >= types.size()) {
- return in.err("type index out of bounds");
- }
- return types[idx];
- }
-
- Result<TypeUseT> makeTypeUse(Index pos,
- std::optional<HeapTypeT>,
- ParamsT* params,
- ResultsT* results) {
- std::vector<Type> paramTypes;
- if (params) {
- paramTypes = getUnnamedTypes(*params);
- }
-
- std::vector<Type> resultTypes;
- if (results) {
- resultTypes = *results;
- }
-
- auto sig = Signature(Type(paramTypes), Type(resultTypes));
- auto [it, inserted] = sigTypes.insert({sig, HeapType::func});
- if (inserted) {
- auto type = HeapType(sig);
- it->second = type;
- types.push_back(type);
- }
- implicitTypes.insert({pos, it->second});
-
- return Ok{};
- }
-};
-
-// Phase 4: Parse and set the types of module elements.
-struct ParseModuleTypesCtx : TypeParserCtx<ParseModuleTypesCtx>,
- NullInstrParserCtx {
- // In this phase we have constructed all the types, so we can materialize and
- // validate them when they are used.
-
- using GlobalTypeT = GlobalType;
- using TypeUseT = TypeUse;
-
- ParseInput in;
-
- Module& wasm;
-
- const std::vector<HeapType>& types;
- const std::unordered_map<Index, HeapType>& implicitTypes;
-
- // The index of the current type.
- Index index = 0;
-
- ParseModuleTypesCtx(std::string_view in,
- Module& wasm,
- const std::vector<HeapType>& types,
- const std::unordered_map<Index, HeapType>& implicitTypes,
- const IndexMap& typeIndices)
- : TypeParserCtx<ParseModuleTypesCtx>(typeIndices), in(in), wasm(wasm),
- types(types), implicitTypes(implicitTypes) {}
-
- Result<HeapTypeT> getHeapTypeFromIdx(Index idx) {
- if (idx >= types.size()) {
- return in.err("type index out of bounds");
- }
- return types[idx];
- }
-
- Result<TypeUseT> makeTypeUse(Index pos,
- std::optional<HeapTypeT> type,
- ParamsT* params,
- ResultsT* results) {
- std::vector<Name> ids;
- if (params) {
- ids.reserve(params->size());
- for (auto& p : *params) {
- ids.push_back(p.name);
- }
- }
-
- if (type) {
- return TypeUse{*type, ids};
- }
-
- auto it = implicitTypes.find(pos);
- assert(it != implicitTypes.end());
-
- return TypeUse{it->second, ids};
- }
-
- Result<HeapType> getBlockTypeFromTypeUse(Index pos, TypeUse use) {
- assert(use.type.isSignature());
- if (use.type.getSignature().params != Type::none) {
- return in.err(pos, "block parameters not yet supported");
- }
- // TODO: Once we support block parameters, return an error here if any of
- // them are named.
- return use.type;
- }
-
- GlobalTypeT makeGlobalType(Mutability mutability, TypeT type) {
- return {mutability, type};
- }
-
- Result<> addFunc(Name name,
- const std::vector<Name>&,
- ImportNames*,
- TypeUse type,
- std::optional<LocalsT> locals,
- std::optional<InstrsT>,
- Index pos) {
- auto& f = wasm.functions[index];
- if (!type.type.isSignature()) {
- return in.err(pos, "expected signature type");
- }
- f->type = type.type;
- for (Index i = 0; i < type.names.size(); ++i) {
- if (type.names[i].is()) {
- f->setLocalName(i, type.names[i]);
- }
- }
- if (locals) {
- for (auto& l : *locals) {
- Builder::addVar(f.get(), l.name, l.type);
- }
- }
- return Ok{};
- }
-
- Result<>
- addMemory(Name, const std::vector<Name>&, ImportNames*, MemTypeT, Index) {
- return Ok{};
- }
-
- Result<> addImplicitData(DataStringT&& data) { return Ok{}; }
-
- Result<> addGlobal(Name,
- const std::vector<Name>&,
- ImportNames*,
- GlobalType type,
- std::optional<ExprT>,
- Index) {
- auto& g = wasm.globals[index];
- g->mutable_ = type.mutability;
- g->type = type.type;
- return Ok{};
- }
-};
-
-// Phase 5: Parse module element definitions, including instructions.
-struct ParseDefsCtx : TypeParserCtx<ParseDefsCtx> {
- using GlobalTypeT = Ok;
- using TypeUseT = HeapType;
-
- // Keep track of instructions internally rather than letting the general
- // parser collect them.
- using InstrT = Ok;
- using InstrsT = Ok;
- using ExprT = Expression*;
-
- using FieldIdxT = Index;
- using LocalIdxT = Index;
- using GlobalIdxT = Name;
- using MemoryIdxT = Name;
- using DataIdxT = Name;
-
- using MemargT = Memarg;
-
- ParseInput in;
-
- Module& wasm;
- Builder builder;
-
- const std::vector<HeapType>& types;
- const std::unordered_map<Index, HeapType>& implicitTypes;
-
- // The index of the current module element.
- Index index = 0;
-
- // The current function being parsed, used to create scratch locals, type
- // local.get, etc.
- Function* func = nullptr;
-
- IRBuilder irBuilder;
-
- void setFunction(Function* func) {
- this->func = func;
- irBuilder.setFunction(func);
- }
-
- ParseDefsCtx(std::string_view in,
- Module& wasm,
- const std::vector<HeapType>& types,
- const std::unordered_map<Index, HeapType>& implicitTypes,
- const IndexMap& typeIndices)
- : TypeParserCtx(typeIndices), in(in), wasm(wasm), builder(wasm),
- types(types), implicitTypes(implicitTypes), irBuilder(wasm) {}
-
- template<typename T> Result<T> withLoc(Index pos, Result<T> res) {
- if (auto err = res.getErr()) {
- return in.err(pos, err->msg);
- }
- return res;
- }
-
- template<typename T> Result<T> withLoc(Result<T> res) {
- return withLoc(in.getPos(), res);
- }
-
- HeapType getBlockTypeFromResult(const std::vector<Type> results) {
- assert(results.size() == 1);
- return HeapType(Signature(Type::none, results[0]));
- }
-
- Result<HeapType> getBlockTypeFromTypeUse(Index pos, HeapType type) {
- return type;
- }
-
- Ok makeInstrs() { return Ok{}; }
-
- void appendInstr(Ok&, InstrT instr) {}
-
- Result<InstrsT> finishInstrs(Ok&) { return Ok{}; }
-
- Result<Expression*> instrToExpr(Ok&) { return irBuilder.build(); }
-
- GlobalTypeT makeGlobalType(Mutability, TypeT) { return Ok{}; }
-
- Result<HeapTypeT> getHeapTypeFromIdx(Index idx) {
- if (idx >= types.size()) {
- return in.err("type index out of bounds");
- }
- return types[idx];
- }
-
- Result<Index> getFieldFromIdx(HeapType type, uint32_t idx) {
- if (!type.isStruct()) {
- return in.err("expected struct type");
- }
- if (idx >= type.getStruct().fields.size()) {
- return in.err("struct index out of bounds");
- }
- return idx;
- }
-
- Result<Index> getFieldFromName(HeapType type, Name name) {
- // TODO: Field names
- return in.err("symbolic field names note yet supported");
- }
-
- Result<Index> getLocalFromIdx(uint32_t idx) {
- if (!func) {
- return in.err("cannot access locals outside of a function");
- }
- if (idx >= func->getNumLocals()) {
- return in.err("local index out of bounds");
- }
- return idx;
- }
-
- Result<Index> getLocalFromName(Name name) {
- if (!func) {
- return in.err("cannot access locals outside of a function");
- }
- if (!func->hasLocalIndex(name)) {
- return in.err("local $" + name.toString() + " does not exist");
- }
- return func->getLocalIndex(name);
- }
-
- Result<Name> getGlobalFromIdx(uint32_t idx) {
- if (idx >= wasm.globals.size()) {
- return in.err("global index out of bounds");
- }
- return wasm.globals[idx]->name;
- }
-
- Result<Name> getGlobalFromName(Name name) {
- if (!wasm.getGlobalOrNull(name)) {
- return in.err("global $" + name.toString() + " does not exist");
- }
- return name;
- }
-
- Result<Name> getMemoryFromIdx(uint32_t idx) {
- if (idx >= wasm.memories.size()) {
- return in.err("memory index out of bounds");
- }
- return wasm.memories[idx]->name;
- }
-
- Result<Name> getMemoryFromName(Name name) {
- if (!wasm.getMemoryOrNull(name)) {
- return in.err("memory $" + name.toString() + " does not exist");
- }
- return name;
- }
-
- Result<Name> getDataFromIdx(uint32_t idx) {
- if (idx >= wasm.dataSegments.size()) {
- return in.err("data index out of bounds");
- }
- return wasm.dataSegments[idx]->name;
- }
-
- Result<Name> getDataFromName(Name name) {
- if (!wasm.getDataSegmentOrNull(name)) {
- return in.err("data $" + name.toString() + " does not exist");
- }
- return name;
- }
-
- Result<TypeUseT> makeTypeUse(Index pos,
- std::optional<HeapTypeT> type,
- ParamsT* params,
- ResultsT* results) {
- if (type && (params || results)) {
- std::vector<Type> paramTypes;
- if (params) {
- paramTypes = getUnnamedTypes(*params);
- }
-
- std::vector<Type> resultTypes;
- if (results) {
- resultTypes = *results;
- }
-
- auto sig = Signature(Type(paramTypes), Type(resultTypes));
-
- if (!type->isSignature() || type->getSignature() != sig) {
- return in.err(pos, "type does not match provided signature");
- }
- }
-
- if (type) {
- return *type;
- }
-
- auto it = implicitTypes.find(pos);
- assert(it != implicitTypes.end());
- return it->second;
- }
-
- Result<> addFunc(Name,
- const std::vector<Name>&,
- ImportNames*,
- TypeUseT,
- std::optional<LocalsT>,
- std::optional<InstrsT>,
- Index pos) {
- CHECK_ERR(withLoc(pos, irBuilder.visitEnd()));
- auto body = irBuilder.build();
- CHECK_ERR(withLoc(pos, body));
- wasm.functions[index]->body = *body;
- return Ok{};
- }
-
- Result<> addGlobal(Name,
- const std::vector<Name>&,
- ImportNames*,
- GlobalTypeT,
- std::optional<ExprT> exp,
- Index) {
- if (exp) {
- wasm.globals[index]->init = *exp;
- }
- return Ok{};
- }
-
- Result<> addData(
- Name, Name* mem, std::optional<ExprT> offset, DataStringT, Index pos) {
- auto& d = wasm.dataSegments[index];
- if (offset) {
- d->isPassive = false;
- d->offset = *offset;
- if (mem) {
- d->memory = *mem;
- } else if (wasm.memories.size() > 0) {
- d->memory = wasm.memories[0]->name;
- } else {
- return in.err(pos, "active segment with no memory");
- }
- } else {
- d->isPassive = true;
- }
- return Ok{};
- }
-
- Result<Index> addScratchLocal(Index pos, Type type) {
- if (!func) {
- return in.err(pos,
- "scratch local required, but there is no function context");
- }
- Name name = Names::getValidLocalName(*func, "scratch");
- return Builder::addVar(func, name, type);
- }
-
- Result<Expression*> makeExpr(InstrsT& instrs) { return irBuilder.build(); }
-
- Memarg getMemarg(uint64_t offset, uint32_t align) { return {offset, align}; }
-
- Result<Name> getMemory(Index pos, Name* mem) {
- if (mem) {
- return *mem;
- }
- if (wasm.memories.empty()) {
- return in.err(pos, "memory required, but there is no memory");
- }
- return wasm.memories[0]->name;
- }
-
- Result<> makeBlock(Index pos, std::optional<Name> label, HeapType type) {
- // TODO: validate labels?
- // TODO: Move error on input types to here?
- return withLoc(pos,
- irBuilder.makeBlock(label ? *label : Name{},
- type.getSignature().results));
- }
-
- Result<> finishBlock(Index pos, InstrsT) {
- return withLoc(pos, irBuilder.visitEnd());
- }
-
- Result<> makeUnreachable(Index pos) {
- return withLoc(pos, irBuilder.makeUnreachable());
- }
-
- Result<> makeNop(Index pos) { return withLoc(pos, irBuilder.makeNop()); }
-
- Result<> makeBinary(Index pos, BinaryOp op) {
- return withLoc(pos, irBuilder.makeBinary(op));
- }
-
- Result<> makeUnary(Index pos, UnaryOp op) {
- return withLoc(pos, irBuilder.makeUnary(op));
- }
-
- Result<> makeSelect(Index pos, std::vector<Type>* res) {
- if (res && res->size()) {
- if (res->size() > 1) {
- return in.err(pos, "select may not have more than one result type");
- }
- return withLoc(pos, irBuilder.makeSelect((*res)[0]));
- }
- return withLoc(pos, irBuilder.makeSelect());
- }
-
- Result<> makeDrop(Index pos) { return withLoc(pos, irBuilder.makeDrop()); }
-
- Result<> makeMemorySize(Index pos, Name* mem) {
- auto m = getMemory(pos, mem);
- CHECK_ERR(m);
- return withLoc(pos, irBuilder.makeMemorySize(*m));
- }
-
- Result<> makeMemoryGrow(Index pos, Name* mem) {
- auto m = getMemory(pos, mem);
- CHECK_ERR(m);
- return withLoc(pos, irBuilder.makeMemoryGrow(*m));
- }
-
- Result<> makeLocalGet(Index pos, Index local) {
- return withLoc(pos, irBuilder.makeLocalGet(local));
- }
-
- Result<> makeLocalTee(Index pos, Index local) {
- return withLoc(pos, irBuilder.makeLocalTee(local));
- }
-
- Result<> makeLocalSet(Index pos, Index local) {
- return withLoc(pos, irBuilder.makeLocalSet(local));
- }
-
- Result<> makeGlobalGet(Index pos, Name global) {
- return withLoc(pos, irBuilder.makeGlobalGet(global));
- }
-
- Result<> makeGlobalSet(Index pos, Name global) {
- assert(wasm.getGlobalOrNull(global));
- return withLoc(pos, irBuilder.makeGlobalSet(global));
- }
-
- Result<> makeI32Const(Index pos, uint32_t c) {
- return withLoc(pos, irBuilder.makeConst(Literal(c)));
- }
-
- Result<> makeI64Const(Index pos, uint64_t c) {
- return withLoc(pos, irBuilder.makeConst(Literal(c)));
- }
-
- Result<> makeF32Const(Index pos, float c) {
- return withLoc(pos, irBuilder.makeConst(Literal(c)));
- }
-
- Result<> makeF64Const(Index pos, double c) {
- return withLoc(pos, irBuilder.makeConst(Literal(c)));
- }
-
- Result<> makeLoad(Index pos,
- Type type,
- bool signed_,
- int bytes,
- bool isAtomic,
- Name* mem,
- Memarg memarg) {
- auto m = getMemory(pos, mem);
- CHECK_ERR(m);
- if (isAtomic) {
- return withLoc(pos,
- irBuilder.makeAtomicLoad(bytes, memarg.offset, type, *m));
- }
- return withLoc(pos,
- irBuilder.makeLoad(
- bytes, signed_, memarg.offset, memarg.align, type, *m));
- }
-
- Result<> makeStore(
- Index pos, Type type, int bytes, bool isAtomic, Name* mem, Memarg memarg) {
- auto m = getMemory(pos, mem);
- CHECK_ERR(m);
- if (isAtomic) {
- return withLoc(pos,
- irBuilder.makeAtomicStore(bytes, memarg.offset, type, *m));
- }
- return withLoc(
- pos, irBuilder.makeStore(bytes, memarg.offset, memarg.align, type, *m));
- }
-
- Result<> makeAtomicRMW(
- Index pos, AtomicRMWOp op, Type type, int bytes, Name* mem, Memarg memarg) {
- auto m = getMemory(pos, mem);
- CHECK_ERR(m);
- return withLoc(pos,
- irBuilder.makeAtomicRMW(op, bytes, memarg.offset, type, *m));
- }
-
- Result<>
- makeAtomicCmpxchg(Index pos, Type type, int bytes, Name* mem, Memarg memarg) {
- auto m = getMemory(pos, mem);
- CHECK_ERR(m);
- return withLoc(pos,
- irBuilder.makeAtomicCmpxchg(bytes, memarg.offset, type, *m));
- }
-
- Result<> makeAtomicWait(Index pos, Type type, Name* mem, Memarg memarg) {
- auto m = getMemory(pos, mem);
- CHECK_ERR(m);
- return withLoc(pos, irBuilder.makeAtomicWait(type, memarg.offset, *m));
- }
-
- Result<> makeAtomicNotify(Index pos, Name* mem, Memarg memarg) {
- auto m = getMemory(pos, mem);
- CHECK_ERR(m);
- return withLoc(pos, irBuilder.makeAtomicNotify(memarg.offset, *m));
- }
-
- Result<> makeAtomicFence(Index pos) {
- return withLoc(pos, irBuilder.makeAtomicFence());
- }
-
- Result<> makeSIMDExtract(Index pos, SIMDExtractOp op, uint8_t lane) {
- return withLoc(pos, irBuilder.makeSIMDExtract(op, lane));
- }
-
- Result<> makeSIMDReplace(Index pos, SIMDReplaceOp op, uint8_t lane) {
- return withLoc(pos, irBuilder.makeSIMDReplace(op, lane));
- }
-
- Result<> makeSIMDShuffle(Index pos, const std::array<uint8_t, 16>& lanes) {
- return withLoc(pos, irBuilder.makeSIMDShuffle(lanes));
- }
-
- Result<> makeSIMDTernary(Index pos, SIMDTernaryOp op) {
- return withLoc(pos, irBuilder.makeSIMDTernary(op));
- }
-
- Result<> makeSIMDShift(Index pos, SIMDShiftOp op) {
- return withLoc(pos, irBuilder.makeSIMDShift(op));
- }
-
- Result<> makeSIMDLoad(Index pos, SIMDLoadOp op, Name* mem, Memarg memarg) {
- auto m = getMemory(pos, mem);
- CHECK_ERR(m);
- return withLoc(pos,
- irBuilder.makeSIMDLoad(op, memarg.offset, memarg.align, *m));
- }
-
- Result<> makeSIMDLoadStoreLane(
- Index pos, SIMDLoadStoreLaneOp op, Name* mem, Memarg memarg, uint8_t lane) {
- auto m = getMemory(pos, mem);
- CHECK_ERR(m);
- return withLoc(pos,
- irBuilder.makeSIMDLoadStoreLane(
- op, memarg.offset, memarg.align, lane, *m));
- }
-
- Result<> makeMemoryInit(Index pos, Name* mem, Name data) {
- auto m = getMemory(pos, mem);
- CHECK_ERR(m);
- return withLoc(pos, irBuilder.makeMemoryInit(data, *m));
- }
-
- Result<> makeDataDrop(Index pos, Name data) {
- return withLoc(pos, irBuilder.makeDataDrop(data));
- }
-
- Result<> makeMemoryCopy(Index pos, Name* destMem, Name* srcMem) {
- auto destMemory = getMemory(pos, destMem);
- CHECK_ERR(destMemory);
- auto srcMemory = getMemory(pos, srcMem);
- CHECK_ERR(srcMemory);
- return withLoc(pos, irBuilder.makeMemoryCopy(*destMemory, *srcMemory));
- }
-
- Result<> makeMemoryFill(Index pos, Name* mem) {
- auto m = getMemory(pos, mem);
- CHECK_ERR(m);
- return withLoc(pos, irBuilder.makeMemoryFill(*m));
- }
-
- Result<> makeReturn(Index pos) {
- return withLoc(pos, irBuilder.makeReturn());
- }
-
- Result<> makeRefNull(Index pos, HeapType type) {
- return withLoc(pos, irBuilder.makeRefNull(type));
- }
-
- Result<> makeRefIsNull(Index pos) {
- return withLoc(pos, irBuilder.makeRefIsNull());
- }
-
- Result<> makeRefEq(Index pos) { return withLoc(pos, irBuilder.makeRefEq()); }
-
- Result<> makeRefI31(Index pos) {
- return withLoc(pos, irBuilder.makeRefI31());
- }
-
- Result<> makeI31Get(Index pos, bool signed_) {
- return withLoc(pos, irBuilder.makeI31Get(signed_));
- }
-
- Result<> makeStructNew(Index pos, HeapType type) {
- return withLoc(pos, irBuilder.makeStructNew(type));
- }
-
- Result<> makeStructNewDefault(Index pos, HeapType type) {
- return withLoc(pos, irBuilder.makeStructNewDefault(type));
- }
-
- Result<> makeStructGet(Index pos, HeapType type, Index field, bool signed_) {
- return withLoc(pos, irBuilder.makeStructGet(type, field, signed_));
- }
-
- Result<> makeStructSet(Index pos, HeapType type, Index field) {
- return withLoc(pos, irBuilder.makeStructSet(type, field));
- }
-
- Result<> makeArrayNew(Index pos, HeapType type) {
- return withLoc(pos, irBuilder.makeArrayNew(type));
- }
-
- Result<> makeArrayNewDefault(Index pos, HeapType type) {
- return withLoc(pos, irBuilder.makeArrayNewDefault(type));
- }
-
- Result<> makeArrayNewData(Index pos, HeapType type, Name data) {
- return withLoc(pos, irBuilder.makeArrayNewData(type, data));
- }
-
- Result<> makeArrayNewElem(Index pos, HeapType type, Name elem) {
- return withLoc(pos, irBuilder.makeArrayNewElem(type, elem));
- }
-
- Result<> makeArrayGet(Index pos, HeapType type, bool signed_) {
- return withLoc(pos, irBuilder.makeArrayGet(type, signed_));
- }
-
- Result<> makeArraySet(Index pos, HeapType type) {
- return withLoc(pos, irBuilder.makeArraySet(type));
- }
-
- Result<> makeArrayLen(Index pos) {
- return withLoc(pos, irBuilder.makeArrayLen());
- }
-
- Result<> makeArrayCopy(Index pos, HeapType destType, HeapType srcType) {
- return withLoc(pos, irBuilder.makeArrayCopy(destType, srcType));
- }
-
- Result<> makeArrayFill(Index pos, HeapType type) {
- return withLoc(pos, irBuilder.makeArrayFill(type));
- }
-};
-
-// ================
-// Parser Functions
-// ================
+using namespace std::string_view_literals;
// Types
template<typename Ctx> Result<typename Ctx::HeapTypeT> heaptype(Ctx&);
@@ -2034,8 +231,28 @@ template<typename Ctx> MaybeResult<> memory(Ctx&);
template<typename Ctx> MaybeResult<> global(Ctx&);
template<typename Ctx> Result<typename Ctx::DataStringT> datastring(Ctx&);
template<typename Ctx> MaybeResult<> data(Ctx&);
-MaybeResult<> modulefield(ParseDeclsCtx&);
-Result<> module(ParseDeclsCtx&);
+template<typename Ctx> MaybeResult<> modulefield(Ctx&);
+template<typename Ctx> Result<> module(Ctx&);
+
+// =========
+// Utilities
+// =========
+
+// RAII utility for temporarily changing the parsing position of a parsing
+// context.
+template<typename Ctx> struct WithPosition {
+ Ctx& ctx;
+ Index original;
+
+ WithPosition(Ctx& ctx, Index pos) : ctx(ctx), original(ctx.in.getPos()) {
+ ctx.in.lexer.setIndex(pos);
+ }
+
+ ~WithPosition() { ctx.in.lexer.setIndex(original); }
+};
+
+// Deduction guide to satisfy -Wctad-maybe-unsupported.
+template<typename Ctx> WithPosition(Ctx& ctx, Index) -> WithPosition<Ctx>;
// =====
// Types
@@ -3765,7 +1982,7 @@ template<typename Ctx> MaybeResult<> data(Ctx& ctx) {
// | start
// | elem
// | data
-MaybeResult<> modulefield(ParseDeclsCtx& ctx) {
+template<typename Ctx> MaybeResult<> modulefield(Ctx& ctx) {
if (auto t = ctx.in.peek(); !t || t->isRParen()) {
return {};
}
@@ -3794,7 +2011,7 @@ MaybeResult<> modulefield(ParseDeclsCtx& ctx) {
// module ::= '(' 'module' id? (m:modulefield)* ')'
// | (m:modulefield)* eof
-Result<> module(ParseDeclsCtx& ctx) {
+template<typename Ctx> Result<> module(Ctx& ctx) {
bool outer = ctx.in.takeSExprStart("module"sv);
if (outer) {
@@ -3814,82 +2031,6 @@ Result<> module(ParseDeclsCtx& ctx) {
return Ok{};
}
-} // anonymous namespace
-
-Result<> parseModule(Module& wasm, std::string_view input) {
- // Parse module-level declarations.
- ParseDeclsCtx decls(input, wasm);
- CHECK_ERR(module(decls));
- if (!decls.in.empty()) {
- return decls.in.err("Unexpected tokens after module");
- }
-
- auto typeIndices = createIndexMap(decls.in, decls.subtypeDefs);
- CHECK_ERR(typeIndices);
-
- // Parse type definitions.
- std::vector<HeapType> types;
- {
- TypeBuilder builder(decls.subtypeDefs.size());
- ParseTypeDefsCtx ctx(input, builder, *typeIndices);
- for (auto& typeDef : decls.typeDefs) {
- WithPosition with(ctx, typeDef.pos);
- CHECK_ERR(deftype(ctx));
- }
- auto built = builder.build();
- if (auto* err = built.getError()) {
- std::stringstream msg;
- msg << "invalid type: " << err->reason;
- return ctx.in.err(decls.typeDefs[err->index].pos, msg.str());
- }
- types = *built;
- // Record type names on the module.
- for (size_t i = 0; i < types.size(); ++i) {
- auto& names = ctx.names[i];
- if (names.name.is() || names.fieldNames.size()) {
- wasm.typeNames.insert({types[i], names});
- }
- }
- }
-
- // Parse implicit type definitions and map typeuses without explicit types to
- // the correct types.
- std::unordered_map<Index, HeapType> implicitTypes;
- {
- ParseImplicitTypeDefsCtx ctx(input, types, implicitTypes, *typeIndices);
- for (Index pos : decls.implicitTypeDefs) {
- WithPosition with(ctx, pos);
- CHECK_ERR(typeuse(ctx));
- }
- }
-
- {
- // Parse module-level types.
- ParseModuleTypesCtx ctx(input, wasm, types, implicitTypes, *typeIndices);
- CHECK_ERR(parseDefs(ctx, decls.funcDefs, func));
- CHECK_ERR(parseDefs(ctx, decls.memoryDefs, memory));
- CHECK_ERR(parseDefs(ctx, decls.globalDefs, global));
- // TODO: Parse types of other module elements.
- }
- {
- // Parse definitions.
- // TODO: Parallelize this.
- ParseDefsCtx ctx(input, wasm, types, implicitTypes, *typeIndices);
- CHECK_ERR(parseDefs(ctx, decls.globalDefs, global));
- CHECK_ERR(parseDefs(ctx, decls.dataDefs, data));
-
- for (Index i = 0; i < decls.funcDefs.size(); ++i) {
- ctx.index = i;
- ctx.setFunction(wasm.functions[i].get());
- CHECK_ERR(ctx.irBuilder.makeBlock(Name{}, ctx.func->getResults()));
- WithPosition with(ctx, decls.funcDefs[i].pos);
- auto parsed = func(ctx);
- CHECK_ERR(parsed);
- assert(parsed);
- }
- }
-
- return Ok{};
-}
-
} // namespace wasm::WATParser
+
+#endif // parser_parsers_h
diff --git a/src/parser/wat-parser.cpp b/src/parser/wat-parser.cpp
new file mode 100644
index 000000000..7b58be4d5
--- /dev/null
+++ b/src/parser/wat-parser.cpp
@@ -0,0 +1,172 @@
+/*
+ * Copyright 2023 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "wat-parser.h"
+#include "contexts.h"
+#include "ir/names.h"
+#include "lexer.h"
+#include "parsers.h"
+#include "wasm-type.h"
+#include "wasm.h"
+
+// The WebAssembly text format is recursive in the sense that elements may be
+// referred to before they are declared. Furthermore, elements may be referred
+// to by index or by name. As a result, we need to parse text modules in
+// multiple phases.
+//
+// In the first phase, we find all of the module element declarations and
+// record, but do not interpret, the input spans of their corresponding
+// definitions. This phase establishes the indices and names of each module
+// element so that subsequent phases can look them up.
+//
+// The second phase parses type definitions to construct the types used in the
+// module. This has to be its own phase because we have no way to refer to a
+// type before it has been built along with all the other types, unlike for
+// other module elements that can be referred to by name before their
+// definitions have been parsed.
+//
+// The third phase further parses and constructs types implicitly defined by
+// type uses in functions, blocks, and call_indirect instructions. These
+// implicitly defined types may be referred to by index elsewhere.
+//
+// The fourth phase parses and sets the types of globals, functions, and other
+// top-level module elements. These types need to be set before we parse
+// instructions because they determine the types of instructions such as
+// global.get and ref.func.
+//
+// The fifth and final phase parses the remaining contents of all module
+// elements, including instructions.
+//
+// Each phase of parsing gets its own context type that is passed to the
+// individual parsing functions. There is a parsing function for each element of
+// the grammar given in the spec. Parsing functions are templatized so that they
+// may be passed the appropriate context type and return the correct result type
+// for each phase.
+
+namespace wasm::WATParser {
+
+namespace {
+
+Result<IndexMap> createIndexMap(ParseInput& in,
+ const std::vector<DefPos>& defs) {
+ IndexMap indices;
+ for (auto& def : defs) {
+ if (def.name.is()) {
+ if (!indices.insert({def.name, def.index}).second) {
+ return in.err(def.pos, "duplicate element name");
+ }
+ }
+ }
+ return indices;
+}
+
+template<typename Ctx>
+Result<> parseDefs(Ctx& ctx,
+ const std::vector<DefPos>& defs,
+ MaybeResult<> (*parser)(Ctx&)) {
+ for (auto& def : defs) {
+ ctx.index = def.index;
+ WithPosition with(ctx, def.pos);
+ auto parsed = parser(ctx);
+ CHECK_ERR(parsed);
+ assert(parsed);
+ }
+ return Ok{};
+}
+
+// ================
+// Parser Functions
+// ================
+
+} // anonymous namespace
+
+Result<> parseModule(Module& wasm, std::string_view input) {
+ // Parse module-level declarations.
+ ParseDeclsCtx decls(input, wasm);
+ CHECK_ERR(module(decls));
+ if (!decls.in.empty()) {
+ return decls.in.err("Unexpected tokens after module");
+ }
+
+ auto typeIndices = createIndexMap(decls.in, decls.subtypeDefs);
+ CHECK_ERR(typeIndices);
+
+ // Parse type definitions.
+ std::vector<HeapType> types;
+ {
+ TypeBuilder builder(decls.subtypeDefs.size());
+ ParseTypeDefsCtx ctx(input, builder, *typeIndices);
+ for (auto& typeDef : decls.typeDefs) {
+ WithPosition with(ctx, typeDef.pos);
+ CHECK_ERR(deftype(ctx));
+ }
+ auto built = builder.build();
+ if (auto* err = built.getError()) {
+ std::stringstream msg;
+ msg << "invalid type: " << err->reason;
+ return ctx.in.err(decls.typeDefs[err->index].pos, msg.str());
+ }
+ types = *built;
+ // Record type names on the module.
+ for (size_t i = 0; i < types.size(); ++i) {
+ auto& names = ctx.names[i];
+ if (names.name.is() || names.fieldNames.size()) {
+ wasm.typeNames.insert({types[i], names});
+ }
+ }
+ }
+
+ // Parse implicit type definitions and map typeuses without explicit types to
+ // the correct types.
+ std::unordered_map<Index, HeapType> implicitTypes;
+ {
+ ParseImplicitTypeDefsCtx ctx(input, types, implicitTypes, *typeIndices);
+ for (Index pos : decls.implicitTypeDefs) {
+ WithPosition with(ctx, pos);
+ CHECK_ERR(typeuse(ctx));
+ }
+ }
+
+ {
+ // Parse module-level types.
+ ParseModuleTypesCtx ctx(input, wasm, types, implicitTypes, *typeIndices);
+ CHECK_ERR(parseDefs(ctx, decls.funcDefs, func));
+ CHECK_ERR(parseDefs(ctx, decls.memoryDefs, memory));
+ CHECK_ERR(parseDefs(ctx, decls.globalDefs, global));
+ // TODO: Parse types of other module elements.
+ }
+ {
+ // Parse definitions.
+ // TODO: Parallelize this.
+ ParseDefsCtx ctx(input, wasm, types, implicitTypes, *typeIndices);
+ CHECK_ERR(parseDefs(ctx, decls.globalDefs, global));
+ CHECK_ERR(parseDefs(ctx, decls.dataDefs, data));
+
+ for (Index i = 0; i < decls.funcDefs.size(); ++i) {
+ ctx.index = i;
+ ctx.setFunction(wasm.functions[i].get());
+ CHECK_ERR(ctx.irBuilder.makeBlock(Name{}, ctx.func->getResults()));
+ WithPosition with(ctx, decls.funcDefs[i].pos);
+ auto parsed = func(ctx);
+ CHECK_ERR(parsed);
+ assert(parsed);
+ }
+ }
+
+ return Ok{};
+}
+
+} // namespace wasm::WATParser
diff --git a/src/parser/wat-parser.h b/src/parser/wat-parser.h
new file mode 100644
index 000000000..d3ad8d7f3
--- /dev/null
+++ b/src/parser/wat-parser.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2023 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef parser_wat_parser_h
+#define parser_wat_parser_h
+
+#include <string_view>
+
+#include "support/result.h"
+#include "wasm.h"
+
+namespace wasm::WATParser {
+
+// Parse a single WAT module.
+Result<> parseModule(Module& wasm, std::string_view in);
+
+} // namespace wasm::WATParser
+
+#endif // paser_wat_parser_h
diff --git a/src/wasm/CMakeLists.txt b/src/wasm/CMakeLists.txt
index 30f52f9ee..d5b4f6747 100644
--- a/src/wasm/CMakeLists.txt
+++ b/src/wasm/CMakeLists.txt
@@ -13,8 +13,6 @@ set(wasm_SOURCES
wasm-stack.cpp
wasm-type.cpp
wasm-validator.cpp
- wat-lexer.cpp
- wat-parser.cpp
${wasm_HEADERS}
)
# wasm-debug.cpp includes LLVM header using std::iterator (deprecated in C++17)
diff --git a/test/gtest/wat-lexer.cpp b/test/gtest/wat-lexer.cpp
index 83a365054..2ae29a59f 100644
--- a/test/gtest/wat-lexer.cpp
+++ b/test/gtest/wat-lexer.cpp
@@ -16,7 +16,7 @@
#include <cmath>
-#include "wat-lexer.h"
+#include "parser/lexer.h"
#include "gtest/gtest.h"
using namespace wasm::WATParser;