/* * Copyright 2024 WebAssembly Community Group participants * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "lexer.h" #include "literal.h" #include "wat-parser.h" namespace wasm::WATParser { using namespace std::string_view_literals; namespace { Result const_(Lexer& in) { if (in.takeSExprStart("ref.extern"sv)) { auto n = in.takeI32(); if (!n) { return in.err("expected host reference payload"); } if (!in.takeRParen()) { return in.err("expected end of ref.extern"); } // Represent host references as externalized i31s. return Literal::makeI31(*n, Unshared).externalize(); } return parseConst(in); } Result consts(Lexer& in) { Literals lits; while (!in.peekRParen()) { auto l = const_(in); CHECK_ERR(l); lits.push_back(*l); } return lits; } MaybeResult maybeAction(Lexer& in) { if (in.takeSExprStart("invoke"sv)) { auto id = in.takeID(); auto name = in.takeName(); if (!name) { return in.err("expected export name"); } auto args = consts(in); CHECK_ERR(args); if (!in.takeRParen()) { return in.err("expected end of invoke action"); } return InvokeAction{id, *name, *args}; } if (in.takeSExprStart("get"sv)) { auto id = in.takeID(); auto name = in.takeName(); if (!name) { return in.err("expected export name"); } if (!in.takeRParen()) { return in.err("expected end of get action"); } return GetAction{id, *name}; } return {}; } Result action(Lexer& in) { if (auto a = maybeAction(in)) { CHECK_ERR(a); return *a; } return in.err("expected action"); } // (module id? binary string*) // (module id? quote string*) // (module ...) Result wastModule(Lexer& in, bool maybeInvalid = false) { Lexer reset = in; if (!in.takeSExprStart("module"sv)) { return in.err("expected module"); } // TODO: use ID? [[maybe_unused]] auto id = in.takeID(); QuotedModuleType type; if (in.takeKeyword("quote"sv)) { type = QuotedModuleType::Text; } else if (in.takeKeyword("binary")) { type = QuotedModuleType::Binary; } else if (maybeInvalid) { // This is not a quoted text or binary module, so it must be a normal inline // module, but we might not be able to parse it. Treat it as through it were // a quoted module instead. int count = 1; while (count && in.takeUntilParen()) { if (in.takeLParen()) { ++count; } else if (in.takeRParen()) { --count; } else { return in.err("unexpected end of script"); } } std::string mod(reset.next().substr(0, in.getPos() - reset.getPos())); return QuotedModule{QuotedModuleType::Text, mod}; } else { // This is a normal inline module that should be parseable. Reset to the // start and parse it normally. in = std::move(reset); auto wasm = std::make_shared(); CHECK_ERR(parseModule(*wasm, in)); return wasm; } // We have a quote or binary module. Collect its contents. std::stringstream ss; while (auto s = in.takeString()) { ss << *s; } if (!in.takeRParen()) { return in.err("expected end of module"); } return QuotedModule{type, ss.str()}; } Result nan(Lexer& in) { if (in.takeKeyword("nan:canonical"sv)) { return NaNKind::Canonical; } if (in.takeKeyword("nan:arithmetic"sv)) { return NaNKind::Arithmetic; } return in.err("expected NaN result pattern"); } Result result(Lexer& in) { Lexer constLexer = in; auto c = const_(constLexer); // TODO: Generating and discarding errors like this can lead to quadratic // behavior. Optimize this if necessary. if (!c.getErr()) { in = constLexer; return *c; } // If we failed to parse a constant, we must have either a nan pattern or a // reference. if (in.takeSExprStart("f32.const"sv)) { auto kind = nan(in); CHECK_ERR(kind); if (!in.takeRParen()) { return in.err("expected end of f32.const"); } return NaNResult{*kind, Type::f32}; } if (in.takeSExprStart("f64.const"sv)) { auto kind = nan(in); CHECK_ERR(kind); if (!in.takeRParen()) { return in.err("expected end of f64.const"); } return NaNResult{*kind, Type::f64}; } if (in.takeSExprStart("v128.const"sv)) { LaneResults lanes; if (in.takeKeyword("f32x4"sv)) { for (int i = 0; i < 4; ++i) { if (auto f = in.takeF32()) { lanes.push_back(Literal(*f)); } else { auto kind = nan(in); CHECK_ERR(kind); lanes.push_back(NaNResult{*kind, Type::f32}); } } } else if (in.takeKeyword("f64x2"sv)) { for (int i = 0; i < 2; ++i) { if (auto f = in.takeF64()) { lanes.push_back(Literal(*f)); } else { auto kind = nan(in); CHECK_ERR(kind); lanes.push_back(NaNResult{*kind, Type::f64}); } } } else { return in.err("unexpected vector shape"); } if (!in.takeRParen()) { return in.err("expected end of v128.const"); } return lanes; } if (in.takeSExprStart("ref.extern")) { if (!in.takeRParen()) { return in.err("expected end of ref.extern"); } return RefResult{HeapType::ext}; } if (in.takeSExprStart("ref.func")) { if (!in.takeRParen()) { return in.err("expected end of ref.func"); } return RefResult{HeapType::func}; } if (in.takeSExprStart("ref.struct")) { if (!in.takeRParen()) { return in.err("expected end of ref.struct"); } return RefResult{HeapType::struct_}; } if (in.takeSExprStart("ref.array")) { if (!in.takeRParen()) { return in.err("expected end of ref.array"); } return RefResult{HeapType::array}; } if (in.takeSExprStart("ref.eq")) { if (!in.takeRParen()) { return in.err("expected end of ref.eq"); } return RefResult{HeapType::eq}; } if (in.takeSExprStart("ref.i31")) { if (!in.takeRParen()) { return in.err("expected end of ref.i31"); } return RefResult{HeapType::i31}; } if (in.takeSExprStart("ref.i31_shared")) { if (!in.takeRParen()) { return in.err("expected end of ref.i31_shared"); } return RefResult{HeapTypes::i31.getBasic(Shared)}; } return in.err("unrecognized result"); } Result results(Lexer& in) { ExpectedResults res; while (!in.peekRParen()) { auto r = result(in); CHECK_ERR(r); res.emplace_back(std::move(*r)); } return res; } // (assert_return action result*) MaybeResult assertReturn(Lexer& in) { if (!in.takeSExprStart("assert_return"sv)) { return {}; } auto a = action(in); CHECK_ERR(a); auto expected = results(in); CHECK_ERR(expected); if (!in.takeRParen()) { return in.err("expected end of assert_return"); } return AssertReturn{*a, *expected}; } // (assert_exception action) MaybeResult assertException(Lexer& in) { if (!in.takeSExprStart("assert_exception"sv)) { return {}; } auto a = action(in); CHECK_ERR(a); if (!in.takeRParen()) { return in.err("expected end of assert_exception"); } return AssertAction{ActionAssertionType::Exception, *a}; } // (assert_exhaustion action msg) MaybeResult assertAction(Lexer& in) { ActionAssertionType type; if (in.takeSExprStart("assert_exhaustion"sv)) { type = ActionAssertionType::Exhaustion; } else { return {}; } auto a = action(in); CHECK_ERR(a); auto msg = in.takeString(); if (!msg) { return in.err("expected error message"); } if (!in.takeRParen()) { return in.err("expected end of assertion"); } return AssertAction{type, *a}; } // (assert_malformed module msg) // (assert_invalid module msg) // (assert_unlinkable module msg) MaybeResult assertModule(Lexer& in) { ModuleAssertionType type; if (in.takeSExprStart("assert_malformed"sv)) { type = ModuleAssertionType::Malformed; } else if (in.takeSExprStart("assert_invalid"sv)) { type = ModuleAssertionType::Invalid; } else if (in.takeSExprStart("assert_unlinkable"sv)) { type = ModuleAssertionType::Unlinkable; } else { return {}; } auto mod = wastModule(in, type == ModuleAssertionType::Invalid); CHECK_ERR(mod); auto msg = in.takeString(); if (!msg) { return in.err("expected error message"); } if (!in.takeRParen()) { return in.err("expected end of assertion"); } return AssertModule{type, *mod}; } // (assert_trap action msg) // (assert_trap module msg) MaybeResult assertTrap(Lexer& in) { if (!in.takeSExprStart("assert_trap"sv)) { return {}; } auto pos = in.getPos(); if (auto a = maybeAction(in)) { CHECK_ERR(a); auto msg = in.takeString(); if (!msg) { return in.err("expected error message"); } if (!in.takeRParen()) { return in.err("expected end of assertion"); } return Assertion{AssertAction{ActionAssertionType::Trap, *a}}; } auto mod = wastModule(in); if (mod.getErr()) { return in.err(pos, "expected action or module"); } auto msg = in.takeString(); if (!msg) { return in.err("expected error message"); } if (!in.takeRParen()) { return in.err("expected end of assertion"); } return Assertion{AssertModule{ModuleAssertionType::Trap, *mod}}; } MaybeResult assertion(Lexer& in) { if (auto a = assertReturn(in)) { CHECK_ERR(a); return Assertion{*a}; } if (auto a = assertException(in)) { CHECK_ERR(a); return Assertion{*a}; } if (auto a = assertAction(in)) { CHECK_ERR(a); return Assertion{*a}; } if (auto a = assertModule(in)) { CHECK_ERR(a); return Assertion{*a}; } if (auto a = assertTrap(in)) { CHECK_ERR(a); return *a; } return {}; } // (register name id?) MaybeResult register_(Lexer& in) { if (!in.takeSExprStart("register"sv)) { return {}; } auto name = in.takeName(); if (!name) { return in.err("expected name"); } // TODO: Do we need to use this optional id? in.takeID(); if (!in.takeRParen()) { // TODO: handle optional module id. return in.err("expected end of register command"); } return Register{*name}; } // module | register | action | assertion Result command(Lexer& in) { if (auto cmd = register_(in)) { CHECK_ERR(cmd); return *cmd; } if (auto cmd = maybeAction(in)) { CHECK_ERR(cmd); return *cmd; } if (auto cmd = assertion(in)) { CHECK_ERR(cmd); return *cmd; } auto mod = wastModule(in); CHECK_ERR(mod); return *mod; } #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wmaybe-uninitialized" Result wast(Lexer& in) { WASTScript cmds; while (!in.empty()) { size_t line = in.position().line; auto cmd = command(in); if (auto* err = cmd.getErr(); err && cmds.empty()) { // The entire script might be a single module comprising a sequence of // module fields with a top-level `(module ...)`. auto wasm = std::make_shared(); auto parsed = parseModule(*wasm, in.buffer); if (parsed.getErr()) { // No, that wasn't the problem. Return the original error. return Err{err->msg}; } cmds.push_back({WASTModule{std::move(wasm)}, line}); return cmds; } CHECK_ERR(cmd); cmds.push_back(ScriptEntry{std::move(*cmd), line}); } return cmds; } #pragma GCC diagnostic pop } // anonymous namespace Result parseScript(std::string_view in) { Lexer lexer(in); return wast(lexer); } } // namespace wasm::WATParser