summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorThomas Lively <tlively@google.com>2024-04-24 11:07:28 -0700
committerGitHub <noreply@github.com>2024-04-24 11:07:28 -0700
commitc183dc9ce6b6e14581078ba42ff1824f922234ca (patch)
tree75efc6dde4de579fa3e347d7e8b2cc31224cd3ca /src
parentff02ea0bbe7e3288a2463bb449eb74a2753dda10 (diff)
downloadbinaryen-c183dc9ce6b6e14581078ba42ff1824f922234ca.tar.gz
binaryen-c183dc9ce6b6e14581078ba42ff1824f922234ca.tar.bz2
binaryen-c183dc9ce6b6e14581078ba42ff1824f922234ca.zip
[Parser] Use the new parser in wasm-shell and wasm-as (#6529)
Updating just one or the other of these tools would cause the tests spec/import-after-*.fail.wast to fail, since only the updated tool would correctly fail to parse its contents. To avoid this, update both tools at once. (The tests erroneously pass before this change because check.py does not ensure that .fail.wast tests fail, only that failing tests end in .fail.wast.) In wasm-shell, to minimize the diff, only use the new parser to parse modules and instructions. Continue using the legacy parsing based on s-expressions for the other wast commands. Updating the parsing of the other commands to use `Lexer` instead of `SExpressionParser` is left as future work. The boundary between the two parsing styles is somewhat hacky, but it is worth it to enable incremental development. Update the tests to fix incorrect wast rejected by the new parser. Many of the spec/old_* tests use non-standard forms from before Wasm MVP was standardized, so fixing them would have been onerous. All of these tests have non-old_* variants, so simply delete them.
Diffstat (limited to 'src')
-rw-r--r--src/parser/contexts.h12
-rw-r--r--src/parser/lexer.h3
-rw-r--r--src/parser/wat-parser.cpp29
-rw-r--r--src/parser/wat-parser.h7
-rw-r--r--src/tools/wasm-as.cpp18
-rw-r--r--src/tools/wasm-shell.cpp141
6 files changed, 136 insertions, 74 deletions
diff --git a/src/parser/contexts.h b/src/parser/contexts.h
index 81537abaf..cead35f60 100644
--- a/src/parser/contexts.h
+++ b/src/parser/contexts.h
@@ -912,7 +912,7 @@ struct ParseDeclsCtx : NullTypeParserCtx, NullInstrParserCtx {
return Ok{};
}
- ParseDeclsCtx(std::string_view in, Module& wasm) : in(in), wasm(wasm) {}
+ ParseDeclsCtx(Lexer& in, Module& wasm) : in(in), wasm(wasm) {}
void addFuncType(SignatureT) {}
void addContType(ContinuationT) {}
@@ -1049,9 +1049,7 @@ struct ParseTypeDefsCtx : TypeParserCtx<ParseTypeDefsCtx> {
// The index of the subtype definition we are parsing.
Index index = 0;
- ParseTypeDefsCtx(std::string_view in,
- TypeBuilder& builder,
- const IndexMap& typeIndices)
+ ParseTypeDefsCtx(Lexer& in, TypeBuilder& builder, const IndexMap& typeIndices)
: TypeParserCtx<ParseTypeDefsCtx>(typeIndices), in(in), builder(builder),
names(builder.size()) {}
@@ -1121,7 +1119,7 @@ struct ParseImplicitTypeDefsCtx : TypeParserCtx<ParseImplicitTypeDefsCtx> {
// Map signatures to the first defined heap type they match.
std::unordered_map<Signature, HeapType> sigTypes;
- ParseImplicitTypeDefsCtx(std::string_view in,
+ ParseImplicitTypeDefsCtx(Lexer& in,
std::vector<HeapType>& types,
std::unordered_map<Index, HeapType>& implicitTypes,
const IndexMap& typeIndices)
@@ -1192,7 +1190,7 @@ struct ParseModuleTypesCtx : TypeParserCtx<ParseModuleTypesCtx>,
Index index = 0;
ParseModuleTypesCtx(
- std::string_view in,
+ Lexer& in,
Module& wasm,
const std::vector<HeapType>& types,
const std::unordered_map<Index, HeapType>& implicitTypes,
@@ -1397,7 +1395,7 @@ struct ParseDefsCtx : TypeParserCtx<ParseDefsCtx> {
}
ParseDefsCtx(
- std::string_view in,
+ Lexer& in,
Module& wasm,
const std::vector<HeapType>& types,
const std::unordered_map<Index, HeapType>& implicitTypes,
diff --git a/src/parser/lexer.h b/src/parser/lexer.h
index 1a93d3e99..e601091db 100644
--- a/src/parser/lexer.h
+++ b/src/parser/lexer.h
@@ -157,12 +157,13 @@ extern Name srcAnnotationKind;
struct Lexer {
private:
- std::string_view buffer;
size_t index = 0;
std::optional<Token> curr;
std::vector<Annotation> annotations;
public:
+ std::string_view buffer;
+
Lexer(std::string_view buffer) : buffer(buffer) { setIndex(0); }
size_t getIndex() const { return index; }
diff --git a/src/parser/wat-parser.cpp b/src/parser/wat-parser.cpp
index 7f6dd2975..cc7d87540 100644
--- a/src/parser/wat-parser.cpp
+++ b/src/parser/wat-parser.cpp
@@ -99,17 +99,11 @@ void propagateDebugLocations(Module& wasm) {
runner.run();
}
-// ================
-// Parser Functions
-// ================
-
-} // anonymous namespace
-
-Result<> parseModule(Module& wasm, std::string_view input) {
+Result<> doParseModule(Module& wasm, Lexer& input, bool allowExtra) {
// Parse module-level declarations.
ParseDeclsCtx decls(input, wasm);
CHECK_ERR(module(decls));
- if (!decls.in.empty()) {
+ if (!allowExtra && !decls.in.empty()) {
return decls.in.err("Unexpected tokens after module");
}
@@ -222,8 +216,27 @@ Result<> parseModule(Module& wasm, std::string_view input) {
}
propagateDebugLocations(wasm);
+ input = decls.in;
return Ok{};
}
+} // anonymous namespace
+
+Result<> parseModule(Module& wasm, std::string_view in) {
+ Lexer lexer(in);
+ return doParseModule(wasm, lexer, false);
+}
+
+Result<> parseModule(Module& wasm, Lexer& lexer) {
+ return doParseModule(wasm, lexer, true);
+}
+
+Result<Expression*> parseExpression(Module& wasm, Lexer& lexer) {
+ ParseDefsCtx ctx(lexer, wasm, {}, {}, {}, {}, {});
+ auto e = expr(ctx);
+ CHECK_ERR(e);
+ return *e;
+}
+
} // namespace wasm::WATParser
diff --git a/src/parser/wat-parser.h b/src/parser/wat-parser.h
index b31523af9..3f7dd64c4 100644
--- a/src/parser/wat-parser.h
+++ b/src/parser/wat-parser.h
@@ -19,6 +19,7 @@
#include <string_view>
+#include "parser/lexer.h"
#include "support/result.h"
#include "wasm.h"
@@ -27,6 +28,12 @@ namespace wasm::WATParser {
// Parse a single WAT module.
Result<> parseModule(Module& wasm, std::string_view in);
+// Parse a single WAT module that may have other things after it, as in a wast
+// file.
+Result<> parseModule(Module& wasm, Lexer& lexer);
+
+Result<Expression*> parseExpression(Module& wasm, Lexer& lexer);
+
} // namespace wasm::WATParser
#endif // parser_wat_parser_h
diff --git a/src/tools/wasm-as.cpp b/src/tools/wasm-as.cpp
index cc4f6fda2..311605326 100644
--- a/src/tools/wasm-as.cpp
+++ b/src/tools/wasm-as.cpp
@@ -18,10 +18,10 @@
// wasm2asm console tool
//
+#include "parser/wat-parser.h"
#include "support/colors.h"
#include "support/file.h"
#include "wasm-io.h"
-#include "wasm-s-parser.h"
#include "wasm-validator.h"
#include "tool-options.h"
@@ -109,19 +109,9 @@ int main(int argc, const char* argv[]) {
Module wasm;
options.applyFeatures(wasm);
- try {
- if (options.debug) {
- std::cerr << "s-parsing..." << std::endl;
- }
- SExpressionParser parser(const_cast<char*>(input.c_str()));
- Element& root = *parser.root;
- if (options.debug) {
- std::cerr << "w-parsing..." << std::endl;
- }
- SExpressionWasmBuilder builder(wasm, *root[0], options.profile);
- } catch (ParseException& p) {
- p.dump(std::cerr);
- Fatal() << "error in parsing input";
+ auto parsed = WATParser::parseModule(wasm, input);
+ if (auto* err = parsed.getErr()) {
+ Fatal() << err->msg;
}
if (options.extra["validate"] != "none") {
diff --git a/src/tools/wasm-shell.cpp b/src/tools/wasm-shell.cpp
index b282e1885..625914cbc 100644
--- a/src/tools/wasm-shell.cpp
+++ b/src/tools/wasm-shell.cpp
@@ -23,16 +23,21 @@
#include "execution-results.h"
#include "ir/element-utils.h"
+#include "parser/lexer.h"
+#include "parser/wat-parser.h"
#include "pass.h"
#include "shell-interface.h"
#include "support/command-line.h"
#include "support/file.h"
+#include "support/result.h"
#include "wasm-interpreter.h"
#include "wasm-s-parser.h"
#include "wasm-validator.h"
using namespace wasm;
+using Lexer = WATParser::Lexer;
+
Name ASSERT_RETURN("assert_return");
Name ASSERT_TRAP("assert_trap");
Name ASSERT_EXCEPTION("assert_exception");
@@ -46,7 +51,6 @@ Name GET("get");
class Shell {
protected:
std::map<Name, std::shared_ptr<Module>> modules;
- std::map<Name, std::shared_ptr<SExpressionWasmBuilder>> builders;
std::map<Name, std::shared_ptr<ShellExternalInterface>> interfaces;
std::map<Name, std::shared_ptr<ModuleRunner>> instances;
// used for imports
@@ -63,11 +67,57 @@ protected:
instances[wasm->name].swap(tempInstance);
}
- void parse(Element& s) {
+ Result<std::string> parseSExpr(Lexer& lexer) {
+ auto begin = lexer.getPos();
+
+ if (!lexer.takeLParen()) {
+ return lexer.err("expected s-expression");
+ }
+
+ size_t count = 1;
+ while (count != 0 && lexer.takeUntilParen()) {
+ if (lexer.takeLParen()) {
+ ++count;
+ } else if (lexer.takeRParen()) {
+ --count;
+ } else {
+ WASM_UNREACHABLE("unexpected token");
+ }
+ }
+
+ if (count != 0) {
+ return lexer.err("unexpected unterminated s-expression");
+ }
+
+ return std::string(lexer.buffer.substr(begin, lexer.getPos() - begin));
+ }
+
+ Expression* parseExpression(Module& wasm, Element& s) {
+ std::stringstream ss;
+ ss << s;
+ auto str = ss.str();
+ Lexer lexer(str);
+ auto arg = WATParser::parseExpression(wasm, lexer);
+ if (auto* err = arg.getErr()) {
+ Fatal() << err->msg << '\n';
+ }
+ return *arg;
+ }
+
+ Result<> parse(Lexer& lexer) {
+ if (auto res = parseModule(lexer)) {
+ CHECK_ERR(res);
+ return Ok{};
+ }
+
+ auto pos = lexer.getPos();
+ auto sexpr = parseSExpr(lexer);
+ CHECK_ERR(sexpr);
+
+ SExpressionParser parser(sexpr->data());
+ Element& s = *parser.root[0][0];
IString id = s[0]->str();
- if (id == MODULE) {
- parseModule(s);
- } else if (id == REGISTER) {
+ if (id == REGISTER) {
parseRegister(s);
} else if (id == INVOKE) {
parseOperation(s);
@@ -77,26 +127,28 @@ protected:
parseAssertTrap(s);
} else if (id == ASSERT_EXCEPTION) {
parseAssertException(s);
- } else if ((id == ASSERT_INVALID) || (id == ASSERT_MALFORMED)) {
+ } else if ((id == ASSERT_INVALID) || (id == ASSERT_MALFORMED) ||
+ (id == ASSERT_UNLINKABLE)) {
parseModuleAssertion(s);
} else {
- Fatal() << s.line << ": unknown command\n";
+ return lexer.err(pos, "unrecognized command");
}
+ return Ok{};
}
- Module* parseModule(Element& s) {
- if (options.debug) {
- std::cerr << "parsing s-expressions to wasm...\n";
+ MaybeResult<> parseModule(Lexer& lexer) {
+ if (!lexer.peekSExprStart("module")) {
+ return {};
}
Colors::green(std::cerr);
- std::cerr << "BUILDING MODULE [line: " << s.line << "]\n";
+ std::cerr << "BUILDING MODULE [line: " << lexer.position().line << "]\n";
Colors::normal(std::cerr);
auto module = std::make_shared<Module>();
- auto builder =
- std::make_shared<SExpressionWasmBuilder>(*module, s, IRProfile::Normal);
+
+ CHECK_ERR(WATParser::parseModule(*module, lexer));
+
auto moduleName = module->name;
lastModule = module->name;
- builders[moduleName] = builder;
modules[moduleName].swap(module);
modules[moduleName]->features = FeatureSet::All;
bool valid = WasmValidator().validate(*modules[moduleName]);
@@ -106,8 +158,7 @@ protected:
}
instantiate(modules[moduleName].get());
-
- return modules[moduleName].get();
+ return Ok{};
}
void parseRegister(Element& s) {
@@ -121,7 +172,6 @@ protected:
// we copy pointers as a registered module's name might still be used
// in an assertion or invoke command.
modules[name] = modules[lastModule];
- builders[name] = builders[lastModule];
interfaces[name] = interfaces[lastModule];
instances[name] = instances[lastModule];
@@ -140,18 +190,21 @@ protected:
ModuleRunner* instance = instances[moduleName].get();
assert(instance);
- Name base = s[i++]->str();
+ std::string baseStr = std::string("\"") + s[i++]->str().toString() + "\"";
+ auto base = Lexer(baseStr).takeString();
+ if (!base) {
+ Fatal() << "expected string\n";
+ }
if (s[0]->str() == INVOKE) {
Literals args;
while (i < s.size()) {
- Expression* argument = builders[moduleName]->parseExpression(*s[i++]);
- args.push_back(getLiteralFromConstExpression(argument));
+ auto* arg = parseExpression(*modules[moduleName], *s[i++]);
+ args.push_back(getLiteralFromConstExpression(arg));
}
-
- return instance->callExport(base, args);
+ return instance->callExport(*base, args);
} else if (s[0]->str() == GET) {
- return instance->getExport(base);
+ return instance->getExport(*base);
}
Fatal() << "Invalid operation " << s[0]->toString();
@@ -193,7 +246,7 @@ protected:
Literals expected;
if (s.size() >= 3) {
expected = getLiteralsFromConstExpression(
- builders[lastModule]->parseExpression(*s[2]));
+ parseExpression(*modules[lastModule], *s[2]));
}
[[maybe_unused]] bool trapped = false;
try {
@@ -340,29 +393,35 @@ protected:
public:
Shell(Options& options) : options(options) { buildSpectestModule(); }
- bool parseAndRun(Element& root) {
+ MaybeResult<> parseAndRun(Lexer& lexer) {
size_t i = 0;
- while (i < root.size()) {
- Element& curr = *root[i];
+ while (!lexer.empty()) {
+ auto next = lexer.next();
+ auto size = next.find('\n');
+ if (size != std::string_view::npos) {
+ next = next.substr(0, size);
+ } else {
+ next = "";
+ }
- if (curr[0]->str() != MODULE) {
+ if (!lexer.peekSExprStart("module")) {
Colors::red(std::cerr);
- std::cerr << i << '/' << (root.size() - 1);
+ std::cerr << i;
Colors::green(std::cerr);
std::cerr << " CHECKING: ";
Colors::normal(std::cerr);
- std::cerr << curr;
+ std::cerr << next;
Colors::green(std::cerr);
- std::cerr << " [line: " << curr.line << "]\n";
+ std::cerr << " [line: " << lexer.position().line << "]\n";
Colors::normal(std::cerr);
}
- parse(curr);
+ CHECK_ERR(parse(lexer));
i += 1;
}
- return false;
+ return Ok{};
}
};
@@ -380,21 +439,15 @@ int main(int argc, const char* argv[]) {
options.parse(argc, argv);
auto input = read_file<std::string>(infile, Flags::Text);
+ Lexer lexer(input);
- bool checked = false;
- try {
- if (options.debug) {
- std::cerr << "parsing text to s-expressions...\n";
- }
- SExpressionParser parser(input.data());
- Element& root = *parser.root;
- checked = Shell(options).parseAndRun(root);
- } catch (ParseException& p) {
- p.dump(std::cerr);
+ auto result = Shell(options).parseAndRun(lexer);
+ if (auto* err = result.getErr()) {
+ std::cerr << err->msg;
exit(1);
}
- if (checked) {
+ if (result) {
Colors::green(std::cerr);
Colors::bold(std::cerr);
std::cerr << "all checks passed.\n";