From c183dc9ce6b6e14581078ba42ff1824f922234ca Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Wed, 24 Apr 2024 11:07:28 -0700 Subject: [Parser] Use the new parser in wasm-shell and wasm-as (#6529) Updating just one or the other of these tools would cause the tests spec/import-after-*.fail.wast to fail, since only the updated tool would correctly fail to parse its contents. To avoid this, update both tools at once. (The tests erroneously pass before this change because check.py does not ensure that .fail.wast tests fail, only that failing tests end in .fail.wast.) In wasm-shell, to minimize the diff, only use the new parser to parse modules and instructions. Continue using the legacy parsing based on s-expressions for the other wast commands. Updating the parsing of the other commands to use `Lexer` instead of `SExpressionParser` is left as future work. The boundary between the two parsing styles is somewhat hacky, but it is worth it to enable incremental development. Update the tests to fix incorrect wast rejected by the new parser. Many of the spec/old_* tests use non-standard forms from before Wasm MVP was standardized, so fixing them would have been onerous. All of these tests have non-old_* variants, so simply delete them. --- src/parser/contexts.h | 12 ++-- src/parser/lexer.h | 3 +- src/parser/wat-parser.cpp | 29 +++++++--- src/parser/wat-parser.h | 7 +++ src/tools/wasm-as.cpp | 18 ++---- src/tools/wasm-shell.cpp | 141 +++++++++++++++++++++++++++++++--------------- 6 files changed, 136 insertions(+), 74 deletions(-) (limited to 'src') diff --git a/src/parser/contexts.h b/src/parser/contexts.h index 81537abaf..cead35f60 100644 --- a/src/parser/contexts.h +++ b/src/parser/contexts.h @@ -912,7 +912,7 @@ struct ParseDeclsCtx : NullTypeParserCtx, NullInstrParserCtx { return Ok{}; } - ParseDeclsCtx(std::string_view in, Module& wasm) : in(in), wasm(wasm) {} + ParseDeclsCtx(Lexer& in, Module& wasm) : in(in), wasm(wasm) {} void addFuncType(SignatureT) {} void addContType(ContinuationT) {} @@ -1049,9 +1049,7 @@ struct ParseTypeDefsCtx : TypeParserCtx { // The index of the subtype definition we are parsing. Index index = 0; - ParseTypeDefsCtx(std::string_view in, - TypeBuilder& builder, - const IndexMap& typeIndices) + ParseTypeDefsCtx(Lexer& in, TypeBuilder& builder, const IndexMap& typeIndices) : TypeParserCtx(typeIndices), in(in), builder(builder), names(builder.size()) {} @@ -1121,7 +1119,7 @@ struct ParseImplicitTypeDefsCtx : TypeParserCtx { // Map signatures to the first defined heap type they match. std::unordered_map sigTypes; - ParseImplicitTypeDefsCtx(std::string_view in, + ParseImplicitTypeDefsCtx(Lexer& in, std::vector& types, std::unordered_map& implicitTypes, const IndexMap& typeIndices) @@ -1192,7 +1190,7 @@ struct ParseModuleTypesCtx : TypeParserCtx, Index index = 0; ParseModuleTypesCtx( - std::string_view in, + Lexer& in, Module& wasm, const std::vector& types, const std::unordered_map& implicitTypes, @@ -1397,7 +1395,7 @@ struct ParseDefsCtx : TypeParserCtx { } ParseDefsCtx( - std::string_view in, + Lexer& in, Module& wasm, const std::vector& types, const std::unordered_map& implicitTypes, diff --git a/src/parser/lexer.h b/src/parser/lexer.h index 1a93d3e99..e601091db 100644 --- a/src/parser/lexer.h +++ b/src/parser/lexer.h @@ -157,12 +157,13 @@ extern Name srcAnnotationKind; struct Lexer { private: - std::string_view buffer; size_t index = 0; std::optional curr; std::vector annotations; public: + std::string_view buffer; + Lexer(std::string_view buffer) : buffer(buffer) { setIndex(0); } size_t getIndex() const { return index; } diff --git a/src/parser/wat-parser.cpp b/src/parser/wat-parser.cpp index 7f6dd2975..cc7d87540 100644 --- a/src/parser/wat-parser.cpp +++ b/src/parser/wat-parser.cpp @@ -99,17 +99,11 @@ void propagateDebugLocations(Module& wasm) { runner.run(); } -// ================ -// Parser Functions -// ================ - -} // anonymous namespace - -Result<> parseModule(Module& wasm, std::string_view input) { +Result<> doParseModule(Module& wasm, Lexer& input, bool allowExtra) { // Parse module-level declarations. ParseDeclsCtx decls(input, wasm); CHECK_ERR(module(decls)); - if (!decls.in.empty()) { + if (!allowExtra && !decls.in.empty()) { return decls.in.err("Unexpected tokens after module"); } @@ -222,8 +216,27 @@ Result<> parseModule(Module& wasm, std::string_view input) { } propagateDebugLocations(wasm); + input = decls.in; return Ok{}; } +} // anonymous namespace + +Result<> parseModule(Module& wasm, std::string_view in) { + Lexer lexer(in); + return doParseModule(wasm, lexer, false); +} + +Result<> parseModule(Module& wasm, Lexer& lexer) { + return doParseModule(wasm, lexer, true); +} + +Result parseExpression(Module& wasm, Lexer& lexer) { + ParseDefsCtx ctx(lexer, wasm, {}, {}, {}, {}, {}); + auto e = expr(ctx); + CHECK_ERR(e); + return *e; +} + } // namespace wasm::WATParser diff --git a/src/parser/wat-parser.h b/src/parser/wat-parser.h index b31523af9..3f7dd64c4 100644 --- a/src/parser/wat-parser.h +++ b/src/parser/wat-parser.h @@ -19,6 +19,7 @@ #include +#include "parser/lexer.h" #include "support/result.h" #include "wasm.h" @@ -27,6 +28,12 @@ namespace wasm::WATParser { // Parse a single WAT module. Result<> parseModule(Module& wasm, std::string_view in); +// Parse a single WAT module that may have other things after it, as in a wast +// file. +Result<> parseModule(Module& wasm, Lexer& lexer); + +Result parseExpression(Module& wasm, Lexer& lexer); + } // namespace wasm::WATParser #endif // parser_wat_parser_h diff --git a/src/tools/wasm-as.cpp b/src/tools/wasm-as.cpp index cc4f6fda2..311605326 100644 --- a/src/tools/wasm-as.cpp +++ b/src/tools/wasm-as.cpp @@ -18,10 +18,10 @@ // wasm2asm console tool // +#include "parser/wat-parser.h" #include "support/colors.h" #include "support/file.h" #include "wasm-io.h" -#include "wasm-s-parser.h" #include "wasm-validator.h" #include "tool-options.h" @@ -109,19 +109,9 @@ int main(int argc, const char* argv[]) { Module wasm; options.applyFeatures(wasm); - try { - if (options.debug) { - std::cerr << "s-parsing..." << std::endl; - } - SExpressionParser parser(const_cast(input.c_str())); - Element& root = *parser.root; - if (options.debug) { - std::cerr << "w-parsing..." << std::endl; - } - SExpressionWasmBuilder builder(wasm, *root[0], options.profile); - } catch (ParseException& p) { - p.dump(std::cerr); - Fatal() << "error in parsing input"; + auto parsed = WATParser::parseModule(wasm, input); + if (auto* err = parsed.getErr()) { + Fatal() << err->msg; } if (options.extra["validate"] != "none") { diff --git a/src/tools/wasm-shell.cpp b/src/tools/wasm-shell.cpp index b282e1885..625914cbc 100644 --- a/src/tools/wasm-shell.cpp +++ b/src/tools/wasm-shell.cpp @@ -23,16 +23,21 @@ #include "execution-results.h" #include "ir/element-utils.h" +#include "parser/lexer.h" +#include "parser/wat-parser.h" #include "pass.h" #include "shell-interface.h" #include "support/command-line.h" #include "support/file.h" +#include "support/result.h" #include "wasm-interpreter.h" #include "wasm-s-parser.h" #include "wasm-validator.h" using namespace wasm; +using Lexer = WATParser::Lexer; + Name ASSERT_RETURN("assert_return"); Name ASSERT_TRAP("assert_trap"); Name ASSERT_EXCEPTION("assert_exception"); @@ -46,7 +51,6 @@ Name GET("get"); class Shell { protected: std::map> modules; - std::map> builders; std::map> interfaces; std::map> instances; // used for imports @@ -63,11 +67,57 @@ protected: instances[wasm->name].swap(tempInstance); } - void parse(Element& s) { + Result parseSExpr(Lexer& lexer) { + auto begin = lexer.getPos(); + + if (!lexer.takeLParen()) { + return lexer.err("expected s-expression"); + } + + size_t count = 1; + while (count != 0 && lexer.takeUntilParen()) { + if (lexer.takeLParen()) { + ++count; + } else if (lexer.takeRParen()) { + --count; + } else { + WASM_UNREACHABLE("unexpected token"); + } + } + + if (count != 0) { + return lexer.err("unexpected unterminated s-expression"); + } + + return std::string(lexer.buffer.substr(begin, lexer.getPos() - begin)); + } + + Expression* parseExpression(Module& wasm, Element& s) { + std::stringstream ss; + ss << s; + auto str = ss.str(); + Lexer lexer(str); + auto arg = WATParser::parseExpression(wasm, lexer); + if (auto* err = arg.getErr()) { + Fatal() << err->msg << '\n'; + } + return *arg; + } + + Result<> parse(Lexer& lexer) { + if (auto res = parseModule(lexer)) { + CHECK_ERR(res); + return Ok{}; + } + + auto pos = lexer.getPos(); + auto sexpr = parseSExpr(lexer); + CHECK_ERR(sexpr); + + SExpressionParser parser(sexpr->data()); + Element& s = *parser.root[0][0]; IString id = s[0]->str(); - if (id == MODULE) { - parseModule(s); - } else if (id == REGISTER) { + if (id == REGISTER) { parseRegister(s); } else if (id == INVOKE) { parseOperation(s); @@ -77,26 +127,28 @@ protected: parseAssertTrap(s); } else if (id == ASSERT_EXCEPTION) { parseAssertException(s); - } else if ((id == ASSERT_INVALID) || (id == ASSERT_MALFORMED)) { + } else if ((id == ASSERT_INVALID) || (id == ASSERT_MALFORMED) || + (id == ASSERT_UNLINKABLE)) { parseModuleAssertion(s); } else { - Fatal() << s.line << ": unknown command\n"; + return lexer.err(pos, "unrecognized command"); } + return Ok{}; } - Module* parseModule(Element& s) { - if (options.debug) { - std::cerr << "parsing s-expressions to wasm...\n"; + MaybeResult<> parseModule(Lexer& lexer) { + if (!lexer.peekSExprStart("module")) { + return {}; } Colors::green(std::cerr); - std::cerr << "BUILDING MODULE [line: " << s.line << "]\n"; + std::cerr << "BUILDING MODULE [line: " << lexer.position().line << "]\n"; Colors::normal(std::cerr); auto module = std::make_shared(); - auto builder = - std::make_shared(*module, s, IRProfile::Normal); + + CHECK_ERR(WATParser::parseModule(*module, lexer)); + auto moduleName = module->name; lastModule = module->name; - builders[moduleName] = builder; modules[moduleName].swap(module); modules[moduleName]->features = FeatureSet::All; bool valid = WasmValidator().validate(*modules[moduleName]); @@ -106,8 +158,7 @@ protected: } instantiate(modules[moduleName].get()); - - return modules[moduleName].get(); + return Ok{}; } void parseRegister(Element& s) { @@ -121,7 +172,6 @@ protected: // we copy pointers as a registered module's name might still be used // in an assertion or invoke command. modules[name] = modules[lastModule]; - builders[name] = builders[lastModule]; interfaces[name] = interfaces[lastModule]; instances[name] = instances[lastModule]; @@ -140,18 +190,21 @@ protected: ModuleRunner* instance = instances[moduleName].get(); assert(instance); - Name base = s[i++]->str(); + std::string baseStr = std::string("\"") + s[i++]->str().toString() + "\""; + auto base = Lexer(baseStr).takeString(); + if (!base) { + Fatal() << "expected string\n"; + } if (s[0]->str() == INVOKE) { Literals args; while (i < s.size()) { - Expression* argument = builders[moduleName]->parseExpression(*s[i++]); - args.push_back(getLiteralFromConstExpression(argument)); + auto* arg = parseExpression(*modules[moduleName], *s[i++]); + args.push_back(getLiteralFromConstExpression(arg)); } - - return instance->callExport(base, args); + return instance->callExport(*base, args); } else if (s[0]->str() == GET) { - return instance->getExport(base); + return instance->getExport(*base); } Fatal() << "Invalid operation " << s[0]->toString(); @@ -193,7 +246,7 @@ protected: Literals expected; if (s.size() >= 3) { expected = getLiteralsFromConstExpression( - builders[lastModule]->parseExpression(*s[2])); + parseExpression(*modules[lastModule], *s[2])); } [[maybe_unused]] bool trapped = false; try { @@ -340,29 +393,35 @@ protected: public: Shell(Options& options) : options(options) { buildSpectestModule(); } - bool parseAndRun(Element& root) { + MaybeResult<> parseAndRun(Lexer& lexer) { size_t i = 0; - while (i < root.size()) { - Element& curr = *root[i]; + while (!lexer.empty()) { + auto next = lexer.next(); + auto size = next.find('\n'); + if (size != std::string_view::npos) { + next = next.substr(0, size); + } else { + next = ""; + } - if (curr[0]->str() != MODULE) { + if (!lexer.peekSExprStart("module")) { Colors::red(std::cerr); - std::cerr << i << '/' << (root.size() - 1); + std::cerr << i; Colors::green(std::cerr); std::cerr << " CHECKING: "; Colors::normal(std::cerr); - std::cerr << curr; + std::cerr << next; Colors::green(std::cerr); - std::cerr << " [line: " << curr.line << "]\n"; + std::cerr << " [line: " << lexer.position().line << "]\n"; Colors::normal(std::cerr); } - parse(curr); + CHECK_ERR(parse(lexer)); i += 1; } - return false; + return Ok{}; } }; @@ -380,21 +439,15 @@ int main(int argc, const char* argv[]) { options.parse(argc, argv); auto input = read_file(infile, Flags::Text); + Lexer lexer(input); - bool checked = false; - try { - if (options.debug) { - std::cerr << "parsing text to s-expressions...\n"; - } - SExpressionParser parser(input.data()); - Element& root = *parser.root; - checked = Shell(options).parseAndRun(root); - } catch (ParseException& p) { - p.dump(std::cerr); + auto result = Shell(options).parseAndRun(lexer); + if (auto* err = result.getErr()) { + std::cerr << err->msg; exit(1); } - if (checked) { + if (result) { Colors::green(std::cerr); Colors::bold(std::cerr); std::cerr << "all checks passed.\n"; -- cgit v1.2.3