/* * Copyright 2017 WebAssembly Community Group participants * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "src/wast-parser.h" #include "src/binary-reader-ir.h" #include "src/binary-reader.h" #include "src/cast.h" #include "src/expr-visitor.h" #include "src/make-unique.h" #include "src/utf8.h" #define WABT_TRACING 0 #include "src/tracing.h" #define EXPECT(token_type) CHECK_RESULT(Expect(TokenType::token_type)) namespace wabt { namespace { static const size_t kMaxErrorTokenLength = 80; bool IsPowerOfTwo(uint32_t x) { return x && ((x & (x - 1)) == 0); } template void RemoveEscapes(string_view text, OutputIter dest) { // Remove surrounding quotes; if any. This may be empty if the string was // invalid (e.g. if it contained a bad escape sequence). if (text.size() <= 2) { return; } text = text.substr(1, text.size() - 2); const char* src = text.data(); const char* end = text.data() + text.size(); while (src < end) { if (*src == '\\') { src++; switch (*src) { case 'n': *dest++ = '\n'; break; case 'r': *dest++ = '\r'; break; case 't': *dest++ = '\t'; break; case '\\': *dest++ = '\\'; break; case '\'': *dest++ = '\''; break; case '\"': *dest++ = '\"'; break; default: { // The string should be validated already, so we know this is a hex // sequence. uint32_t hi; uint32_t lo; if (Succeeded(ParseHexdigit(src[0], &hi)) && Succeeded(ParseHexdigit(src[1], &lo))) { *dest++ = (hi << 4) | lo; } else { assert(0); } src++; break; } } src++; } else { *dest++ = *src++; } } } typedef std::vector TextVector; template void RemoveEscapes(const TextVector& texts, OutputIter out) { for (const std::string& text : texts) RemoveEscapes(text, out); } bool IsPlainInstr(TokenType token_type) { switch (token_type) { case TokenType::Unreachable: case TokenType::Nop: case TokenType::Drop: case TokenType::Select: case TokenType::Br: case TokenType::BrIf: case TokenType::BrOnExn: case TokenType::BrTable: case TokenType::Return: case TokenType::ReturnCall: case TokenType::ReturnCallIndirect: case TokenType::Call: case TokenType::CallIndirect: case TokenType::LocalGet: case TokenType::LocalSet: case TokenType::LocalTee: case TokenType::GlobalGet: case TokenType::GlobalSet: case TokenType::Load: case TokenType::Store: case TokenType::Const: case TokenType::Unary: case TokenType::Binary: case TokenType::Compare: case TokenType::Convert: case TokenType::MemoryCopy: case TokenType::DataDrop: case TokenType::MemoryFill: case TokenType::MemoryGrow: case TokenType::MemoryInit: case TokenType::MemorySize: case TokenType::TableCopy: case TokenType::ElemDrop: case TokenType::TableInit: case TokenType::TableGet: case TokenType::TableSet: case TokenType::TableGrow: case TokenType::TableSize: case TokenType::Throw: case TokenType::Rethrow: case TokenType::RefNull: case TokenType::RefIsNull: case TokenType::AtomicLoad: case TokenType::AtomicStore: case TokenType::AtomicRmw: case TokenType::AtomicRmwCmpxchg: case TokenType::AtomicNotify: case TokenType::AtomicWait: case TokenType::Ternary: case TokenType::SimdLaneOp: case TokenType::SimdShuffleOp: return true; default: return false; } } bool IsBlockInstr(TokenType token_type) { switch (token_type) { case TokenType::Block: case TokenType::Loop: case TokenType::If: case TokenType::Try: return true; default: return false; } } bool IsPlainOrBlockInstr(TokenType token_type) { return IsPlainInstr(token_type) || IsBlockInstr(token_type); } bool IsExpr(TokenTypePair pair) { return pair[0] == TokenType::Lpar && IsPlainOrBlockInstr(pair[1]); } bool IsInstr(TokenTypePair pair) { return IsPlainOrBlockInstr(pair[0]) || IsExpr(pair); } bool IsModuleField(TokenTypePair pair) { if (pair[0] != TokenType::Lpar) { return false; } switch (pair[1]) { case TokenType::Data: case TokenType::Elem: case TokenType::Event: case TokenType::Export: case TokenType::Func: case TokenType::Type: case TokenType::Global: case TokenType::Import: case TokenType::Memory: case TokenType::Start: case TokenType::Table: return true; default: return false; } } bool IsCommand(TokenTypePair pair) { if (pair[0] != TokenType::Lpar) { return false; } switch (pair[1]) { case TokenType::AssertExhaustion: case TokenType::AssertInvalid: case TokenType::AssertMalformed: case TokenType::AssertReturn: case TokenType::AssertReturnArithmeticNan: case TokenType::AssertReturnCanonicalNan: case TokenType::AssertTrap: case TokenType::AssertUnlinkable: case TokenType::Get: case TokenType::Invoke: case TokenType::Module: case TokenType::Register: return true; default: return false; } } bool IsEmptySignature(const FuncSignature& sig) { return sig.result_types.empty() && sig.param_types.empty(); } void ResolveFuncTypeWithEmptySignature(const Module& module, FuncDeclaration* decl) { // Resolve func type variables where the signature was not specified // explicitly, e.g.: (func (type 1) ...) if (decl->has_func_type && IsEmptySignature(decl->sig)) { const FuncType* func_type = module.GetFuncType(decl->type_var); if (func_type) { decl->sig = func_type->sig; } } } void ResolveImplicitlyDefinedFunctionType(const Location& loc, Module* module, const FuncDeclaration& decl) { // Resolve implicitly defined function types, e.g.: (func (param i32) ...) if (!decl.has_func_type) { Index func_type_index = module->GetFuncTypeIndex(decl.sig); if (func_type_index == kInvalidIndex) { auto func_type_field = MakeUnique(loc); func_type_field->func_type.sig = decl.sig; module->AppendField(std::move(func_type_field)); } } } bool IsInlinableFuncSignature(const FuncSignature& sig) { return sig.GetNumParams() == 0 && sig.GetNumResults() <= 1; } class ResolveFuncTypesExprVisitorDelegate : public ExprVisitor::DelegateNop { public: explicit ResolveFuncTypesExprVisitorDelegate(Module* module) : module_(module) {} void ResolveBlockDeclaration(const Location& loc, BlockDeclaration* decl) { ResolveFuncTypeWithEmptySignature(*module_, decl); if (!IsInlinableFuncSignature(decl->sig)) { ResolveImplicitlyDefinedFunctionType(loc, module_, *decl); } } Result BeginBlockExpr(BlockExpr* expr) override { ResolveBlockDeclaration(expr->loc, &expr->block.decl); return Result::Ok; } Result BeginIfExpr(IfExpr* expr) override { ResolveBlockDeclaration(expr->loc, &expr->true_.decl); return Result::Ok; } Result BeginLoopExpr(LoopExpr* expr) override { ResolveBlockDeclaration(expr->loc, &expr->block.decl); return Result::Ok; } Result BeginTryExpr(TryExpr* expr) override { ResolveBlockDeclaration(expr->loc, &expr->block.decl); return Result::Ok; } Result OnCallIndirectExpr(CallIndirectExpr* expr) override { ResolveFuncTypeWithEmptySignature(*module_, &expr->decl); ResolveImplicitlyDefinedFunctionType(expr->loc, module_, expr->decl); return Result::Ok; } Result OnReturnCallIndirectExpr(ReturnCallIndirectExpr* expr) override { ResolveFuncTypeWithEmptySignature(*module_, &expr->decl); ResolveImplicitlyDefinedFunctionType(expr->loc, module_, expr->decl); return Result::Ok; } private: Module* module_; }; void ResolveFuncTypes(Module* module) { for (ModuleField& field : module->fields) { Func* func = nullptr; FuncDeclaration* decl = nullptr; if (auto* func_field = dyn_cast(&field)) { func = &func_field->func; decl = &func->decl; } else if (auto* event_field = dyn_cast(&field)) { decl = &event_field->event.decl; } else if (auto* import_field = dyn_cast(&field)) { if (auto* func_import = dyn_cast(import_field->import.get())) { // Only check the declaration, not the function itself, since it is an // import. decl = &func_import->func.decl; } else if (auto* event_import = dyn_cast(import_field->import.get())) { decl = &event_import->event.decl; } else { continue; } } else { continue; } if (decl) { ResolveFuncTypeWithEmptySignature(*module, decl); ResolveImplicitlyDefinedFunctionType(field.loc, module, *decl); } if (func) { ResolveFuncTypesExprVisitorDelegate delegate(module); ExprVisitor visitor(&delegate); visitor.VisitFunc(func); } } } void AppendInlineExportFields(Module* module, ModuleFieldList* fields, Index index) { Location last_field_loc = module->fields.back().loc; for (ModuleField& field : *fields) { auto* export_field = cast(&field); export_field->export_.var = Var(index, last_field_loc); } module->AppendFields(fields); } } // End of anonymous namespace WastParser::WastParser(WastLexer* lexer, Errors* errors, WastParseOptions* options) : lexer_(lexer), errors_(errors), options_(options) {} void WastParser::Error(Location loc, const char* format, ...) { WABT_SNPRINTF_ALLOCA(buffer, length, format); errors_->emplace_back(ErrorLevel::Error, loc, buffer); } Token WastParser::GetToken() { if (tokens_.empty()) { tokens_.push_back(lexer_->GetToken(this)); } return tokens_.front(); } Location WastParser::GetLocation() { return GetToken().loc; } TokenType WastParser::Peek(size_t n) { while (tokens_.size() <= n) tokens_.push_back(lexer_->GetToken(this)); return tokens_.at(n).token_type(); } TokenTypePair WastParser::PeekPair() { return TokenTypePair{{Peek(), Peek(1)}}; } bool WastParser::PeekMatch(TokenType type) { return Peek() == type; } bool WastParser::PeekMatchLpar(TokenType type) { return Peek() == TokenType::Lpar && Peek(1) == type; } bool WastParser::PeekMatchExpr() { return IsExpr(PeekPair()); } bool WastParser::Match(TokenType type) { if (PeekMatch(type)) { Consume(); return true; } return false; } bool WastParser::MatchLpar(TokenType type) { if (PeekMatchLpar(type)) { Consume(); Consume(); return true; } return false; } Result WastParser::Expect(TokenType type) { if (!Match(type)) { Token token = Consume(); Error(token.loc, "unexpected token %s, expected %s.", token.to_string_clamp(kMaxErrorTokenLength).c_str(), GetTokenTypeName(type)); return Result::Error; } return Result::Ok; } Token WastParser::Consume() { assert(!tokens_.empty()); Token token = tokens_.front(); tokens_.pop_front(); return token; } Result WastParser::Synchronize(SynchronizeFunc func) { static const int kMaxConsumed = 10; for (int i = 0; i < kMaxConsumed; ++i) { if (func(PeekPair())) { return Result::Ok; } Token token = Consume(); if (token.token_type() == TokenType::Reserved) { Error(token.loc, "unexpected token %s.", token.to_string_clamp(kMaxErrorTokenLength).c_str()); } } return Result::Error; } void WastParser::ErrorUnlessOpcodeEnabled(const Token& token) { Opcode opcode = token.opcode(); if (!opcode.IsEnabled(options_->features)) { Error(token.loc, "opcode not allowed: %s", opcode.GetName()); } } Result WastParser::ErrorExpected(const std::vector& expected, const char* example) { Token token = Consume(); std::string expected_str; if (!expected.empty()) { expected_str = ", expected "; for (size_t i = 0; i < expected.size(); ++i) { if (i != 0) { if (i == expected.size() - 1) { expected_str += " or "; } else { expected_str += ", "; } } expected_str += expected[i]; } if (example) { expected_str += " (e.g. "; expected_str += example; expected_str += ")"; } } Error(token.loc, "unexpected token \"%s\"%s.", token.to_string_clamp(kMaxErrorTokenLength).c_str(), expected_str.c_str()); return Result::Error; } Result WastParser::ErrorIfLpar(const std::vector& expected, const char* example) { if (Match(TokenType::Lpar)) { GetToken(); return ErrorExpected(expected, example); } return Result::Ok; } void WastParser::ParseBindVarOpt(std::string* name) { WABT_TRACE(ParseBindVarOpt); if (PeekMatch(TokenType::Var)) { Token token = Consume(); *name = token.text(); } } Result WastParser::ParseVar(Var* out_var) { WABT_TRACE(ParseVar); if (PeekMatch(TokenType::Nat)) { Token token = Consume(); string_view sv = token.literal().text; uint64_t index = kInvalidIndex; if (Failed(ParseUint64(sv.begin(), sv.end(), &index))) { // Print an error, but don't fail parsing. Error(token.loc, "invalid int \"" PRIstringview "\"", WABT_PRINTF_STRING_VIEW_ARG(sv)); } *out_var = Var(index, token.loc); return Result::Ok; } else if (PeekMatch(TokenType::Var)) { Token token = Consume(); *out_var = Var(token.text(), token.loc); return Result::Ok; } else { return ErrorExpected({"a numeric index", "a name"}, "12 or $foo"); } } bool WastParser::ParseVarOpt(Var* out_var, Var default_var) { WABT_TRACE(ParseVarOpt); if (PeekMatch(TokenType::Nat) || PeekMatch(TokenType::Var)) { Result result = ParseVar(out_var); // Should always succeed, the only way it could fail is if the token // doesn't match. assert(Succeeded(result)); WABT_USE(result); return true; } else { *out_var = default_var; return false; } } Result WastParser::ParseOffsetExpr(ExprList* out_expr_list) { WABT_TRACE(ParseOffsetExpr); if (MatchLpar(TokenType::Offset)) { CHECK_RESULT(ParseTerminatingInstrList(out_expr_list)); EXPECT(Rpar); } else if (PeekMatchExpr()) { CHECK_RESULT(ParseExpr(out_expr_list)); } else { return ErrorExpected({"an offset expr"}, "(i32.const 123)"); } return Result::Ok; } Result WastParser::ParseTextList(std::vector* out_data) { WABT_TRACE(ParseTextList); if (!ParseTextListOpt(out_data)) { return Result::Error; } return Result::Ok; } bool WastParser::ParseTextListOpt(std::vector* out_data) { WABT_TRACE(ParseTextListOpt); TextVector texts; while (PeekMatch(TokenType::Text)) texts.push_back(Consume().text()); RemoveEscapes(texts, std::back_inserter(*out_data)); return !texts.empty(); } Result WastParser::ParseVarList(VarVector* out_var_list) { WABT_TRACE(ParseVarList); if (!ParseVarListOpt(out_var_list)) { return Result::Error; } return Result::Ok; } bool WastParser::ParseVarListOpt(VarVector* out_var_list) { WABT_TRACE(ParseVarListOpt); Var var; while (ParseVarOpt(&var)) out_var_list->push_back(var); return !out_var_list->empty(); } Result WastParser::ParseValueType(Type* out_type) { WABT_TRACE(ParseValueType); if (!PeekMatch(TokenType::ValueType)) { return ErrorExpected({"i32", "i64", "f32", "f64", "v128", "anyref"}); } Token token = Consume(); Type type = token.type(); bool is_enabled; switch (type) { case Type::V128: is_enabled = options_->features.simd_enabled(); break; case Type::Anyref: is_enabled = options_->features.reference_types_enabled(); break; default: is_enabled = true; break; } if (!is_enabled) { Error(token.loc, "value type not allowed: %s", GetTypeName(type)); return Result::Error; } *out_type = type; return Result::Ok; } Result WastParser::ParseValueTypeList(TypeVector* out_type_list) { WABT_TRACE(ParseValueTypeList); while (PeekMatch(TokenType::ValueType)) out_type_list->push_back(Consume().type()); return Result::Ok; } Result WastParser::ParseRefType(Type* out_type) { WABT_TRACE(ParseRefType); if (!PeekMatch(TokenType::ValueType)) { return ErrorExpected({"anyref", "funcref"}); } Token token = Consume(); Type type = token.type(); if (type == Type::Anyref && !options_->features.reference_types_enabled()) { Error(token.loc, "value type not allowed: %s", GetTypeName(type)); return Result::Error; } *out_type = type; return Result::Ok; } Result WastParser::ParseQuotedText(std::string* text) { WABT_TRACE(ParseQuotedText); if (!PeekMatch(TokenType::Text)) { return ErrorExpected({"a quoted string"}, "\"foo\""); } Token token = Consume(); RemoveEscapes(token.text(), std::back_inserter(*text)); if (!IsValidUtf8(text->data(), text->length())) { Error(token.loc, "quoted string has an invalid utf-8 encoding"); } return Result::Ok; } bool WastParser::ParseOffsetOpt(uint32_t* out_offset) { WABT_TRACE(ParseOffsetOpt); if (PeekMatch(TokenType::OffsetEqNat)) { Token token = Consume(); uint64_t offset64; string_view sv = token.text(); if (Failed(ParseInt64(sv.begin(), sv.end(), &offset64, ParseIntType::SignedAndUnsigned))) { Error(token.loc, "invalid offset \"" PRIstringview "\"", WABT_PRINTF_STRING_VIEW_ARG(sv)); } if (offset64 > UINT32_MAX) { Error(token.loc, "offset must be less than or equal to 0xffffffff"); } *out_offset = static_cast(offset64); return true; } else { *out_offset = 0; return false; } } bool WastParser::ParseAlignOpt(uint32_t* out_align) { WABT_TRACE(ParseAlignOpt); if (PeekMatch(TokenType::AlignEqNat)) { Token token = Consume(); string_view sv = token.text(); if (Failed(ParseInt32(sv.begin(), sv.end(), out_align, ParseIntType::UnsignedOnly))) { Error(token.loc, "invalid alignment \"" PRIstringview "\"", WABT_PRINTF_STRING_VIEW_ARG(sv)); } if (!IsPowerOfTwo(*out_align)) { Error(token.loc, "alignment must be power-of-two"); } return true; } else { *out_align = WABT_USE_NATURAL_ALIGNMENT; return false; } } Result WastParser::ParseLimits(Limits* out_limits) { WABT_TRACE(ParseLimits); CHECK_RESULT(ParseNat(&out_limits->initial)); if (PeekMatch(TokenType::Nat)) { CHECK_RESULT(ParseNat(&out_limits->max)); out_limits->has_max = true; } else { out_limits->has_max = false; } if (Match(TokenType::Shared)) { out_limits->is_shared = true; } return Result::Ok; } Result WastParser::ParseNat(uint64_t* out_nat) { WABT_TRACE(ParseNat); if (!PeekMatch(TokenType::Nat)) { return ErrorExpected({"a natural number"}, "123"); } Token token = Consume(); string_view sv = token.literal().text; if (Failed(ParseUint64(sv.begin(), sv.end(), out_nat))) { Error(token.loc, "invalid int \"" PRIstringview "\"", WABT_PRINTF_STRING_VIEW_ARG(sv)); } return Result::Ok; } Result WastParser::ParseModule(std::unique_ptr* out_module) { WABT_TRACE(ParseModule); auto module = MakeUnique(); if (PeekMatchLpar(TokenType::Module)) { // Starts with "(module". Allow text and binary modules, but no quoted // modules. CommandPtr command; CHECK_RESULT(ParseModuleCommand(nullptr, &command)); auto module_command = cast(std::move(command)); *module = std::move(module_command->module); } else if (IsModuleField(PeekPair())) { // Parse an inline module (i.e. one with no surrounding (module)). CHECK_RESULT(ParseModuleFieldList(module.get())); } else { ConsumeIfLpar(); ErrorExpected({"a module field", "a module"}); } EXPECT(Eof); if (errors_->size() == 0) { *out_module = std::move(module); return Result::Ok; } else { return Result::Error; } } Result WastParser::ParseScript(std::unique_ptr