/* * Copyright 2015 WebAssembly Community Group participants * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ // // Parses WebAssembly code in S-Expression format, as in .wast files // such as are in the spec test suite. // #ifndef wasm_wasm_s_parser_h #define wasm_wasm_s_parser_h #include #include #include #include "wasm.h" #include "wasm-binary.h" #include "asmjs/shared-constants.h" #include "mixed_arena.h" #include "parsing.h" #include "asm_v_wasm.h" #include "ast_utils.h" #include "wasm-builder.h" namespace wasm { using namespace cashew; // Globals int unhex(char c) { if (c >= '0' && c <= '9') return c - '0'; if (c >= 'a' && c <= 'f') return c - 'a' + 10; if (c >= 'A' && c <= 'F') return c - 'A' + 10; abort(); } // // An element in an S-Expression: a list or a string // class Element { typedef ArenaVector List; bool isList_; List list_; IString str_; bool dollared_; public: Element(MixedArena& allocator) : isList_(true), list_(allocator), line(-1), col(-1) {} bool isList() { return isList_; } bool isStr() { return !isList_; } bool dollared() { return dollared_; } size_t line, col; // list methods List& list() { if (!isList()) throw ParseException("expected list", line, col); return list_; } Element* operator[](unsigned i) { return list()[i]; } size_t size() { return list().size(); } // string methods IString str() { assert(!isList_); return str_; } const char* c_str() { assert(!isList_); return str_.str; } Element* setString(IString str__, bool dollared__) { isList_ = false; str_ = str__; dollared_ = dollared__; return this; } Element* setMetadata(size_t line_, size_t col_) { line = line_; col = col_; return this; } // printing friend std::ostream& operator<<(std::ostream& o, Element& e) { if (e.isList_) { o << '('; for (auto item : e.list_) o << ' ' << *item; o << " )"; } else { o << e.str_.str; } return o; } void dump() { std::cout << "dumping " << this << " : " << *this << ".\n"; } }; // // Generic S-Expression parsing into lists // class SExpressionParser { char* input; size_t line; char* lineStart; MixedArena allocator; public: // Assumes control of and modifies the input. SExpressionParser(char* input) : input(input) { root = nullptr; line = 0; lineStart = input; while (!root) { // keep parsing until we pass an initial comment root = parse(); } } Element* root; private: Element* parse() { std::vector stack; Element *curr = allocator.alloc(); while (1) { skipWhitespace(); if (input[0] == 0) break; if (input[0] == '(') { input++; stack.push_back(curr); curr = allocator.alloc()->setMetadata(line, input - lineStart - 1); } else if (input[0] == ')') { input++; auto last = curr; curr = stack.back(); assert(stack.size()); stack.pop_back(); curr->list().push_back(last); } else { curr->list().push_back(parseString()); } } if (stack.size() != 0) throw ParseException("stack is not empty", curr->line, curr->col); return curr; } void skipWhitespace() { while (1) { while (isspace(input[0])) { if (input[0] == '\n') { line++; lineStart = input + 1; } input++; } if (input[0] == ';' && input[1] == ';') { while (input[0] && input[0] != '\n') input++; line++; lineStart = input; } else if (input[0] == '(' && input[1] == ';') { // Skip nested block comments. input += 2; int depth = 1; while (1) { if (input[0] == 0) { return; } if (input[0] == '(' && input[1] == ';') { input += 2; depth++; } else if (input[0] == ';' && input[1] == ')') { input += 2; --depth; if (depth == 0) { break; } } else { input++; } } } else { return; } } } Element* parseString() { bool dollared = false; if (input[0] == '$') { input++; dollared = true; } char *start = input; if (input[0] == '"') { // parse escaping \", but leave code escaped - we'll handle escaping in memory segments specifically input++; std::string str; while (1) { if (input[0] == '"') break; if (input[0] == '\\') { str += input[0]; str += input[1]; input += 2; continue; } str += input[0]; input++; } input++; return allocator.alloc()->setString(IString(str.c_str(), false), dollared)->setMetadata(line, start - lineStart); } while (input[0] && !isspace(input[0]) && input[0] != ')' && input[0] != '(' && input[0] != ';') input++; if (start == input) throw ParseException("expected string", line, input - lineStart); char temp = input[0]; input[0] = 0; auto ret = allocator.alloc()->setString(IString(start, false), dollared)->setMetadata(line, start - lineStart); input[0] = temp; return ret; } }; // // SExpressions => WebAssembly module // class SExpressionWasmBuilder { Module& wasm; MixedArena& allocator; std::vector functionNames; int functionCounter; int importCounter; std::map functionTypes; // we need to know function return types before we parse their contents public: // Assumes control of and modifies the input. SExpressionWasmBuilder(Module& wasm, Element& module) : wasm(wasm), allocator(wasm.allocator), importCounter(0) { assert(module[0]->str() == MODULE); if (module.size() > 1 && module[1]->isStr()) { // these s-expressions contain a binary module, actually std::vector data; size_t i = 1; while (i < module.size()) { auto str = module[i++]->c_str(); if (auto size = strlen(str)) { stringToBinary(str, size, data); } } WasmBinaryBuilder binaryBuilder(wasm, data, false); binaryBuilder.read(); return; } functionCounter = 0; for (unsigned i = 1; i < module.size(); i++) { preParseFunctionType(*module[i]); preParseImports(*module[i]); } functionCounter = 0; for (unsigned i = 1; i < module.size(); i++) { parseModuleElement(*module[i]); } } private: // pre-parse types and function definitions, so we know function return types before parsing their contents void preParseFunctionType(Element& s) { IString id = s[0]->str(); if (id == TYPE) return parseType(s); if (id != FUNC) return; size_t i = 1; Name name, exportName; i = parseFunctionNames(s, name, exportName); if (!name.is()) { // unnamed, use an index name = Name::fromInt(functionCounter); } functionNames.push_back(name); functionCounter++; for (;i < s.size(); i++) { Element& curr = *s[i]; IString id = curr[0]->str(); if (id == RESULT) { functionTypes[name] = stringToWasmType(curr[1]->str()); return; } else if (id == TYPE) { Name typeName = curr[1]->str(); if (!wasm.checkFunctionType(typeName)) throw ParseException("unknown function"); FunctionType* type = wasm.getFunctionType(typeName); functionTypes[name] = type->result; return; } } functionTypes[name] = none; } void preParseImports(Element& curr) { IString id = curr[0]->str(); if (id == IMPORT) parseImport(curr); } void parseModuleElement(Element& curr) { IString id = curr[0]->str(); if (id == START) return parseStart(curr); if (id == FUNC) return parseFunction(curr); if (id == MEMORY) return parseMemory(curr); if (id == EXPORT) return parseExport(curr); if (id == IMPORT) return; // already done if (id == TABLE) return parseTable(curr); if (id == TYPE) return; // already done std::cerr << "bad module element " << id.str << '\n'; throw ParseException("unknown module element"); } // function parsing state std::unique_ptr currFunction; std::map currLocalTypes; size_t localIndex; // params and vars size_t otherIndex; std::vector labelStack; bool brokeToAutoBlock; Name getPrefixedName(std::string prefix) { return IString((prefix + std::to_string(otherIndex++)).c_str(), false); } Name getFunctionName(Element& s) { if (s.dollared()) { return s.str(); } else { // index size_t offset = atoi(s.str().c_str()); if (offset >= functionNames.size()) throw ParseException("unknown function"); return functionNames[offset]; } } void parseStart(Element& s) { wasm.addStart(getFunctionName(*s[1])); } // returns the next index in s size_t parseFunctionNames(Element& s, Name& name, Name& exportName) { size_t i = 1; while (i < s.size() && s[i]->isStr()) { if (!s[i]->dollared()) { // an export name exportName = s[i]->str(); i++; } else { name = s[i]->str(); i++; } } return i; } void parseFunction(Element& s) { size_t i = 1; Name name, exportName; i = parseFunctionNames(s, name, exportName); if (!name.is()) { // unnamed, use an index name = Name::fromInt(functionCounter); } if (exportName.is()) { auto ex = make_unique(); ex->name = exportName; ex->value = name; wasm.addExport(ex.release()); } functionCounter++; Expression* body = nullptr; localIndex = 0; otherIndex = 0; brokeToAutoBlock = false; std::vector typeParams; // we may have both params and a type. store the type info here std::vector params; std::vector vars; WasmType result = none; Name type; Block* autoBlock = nullptr; // we may need to add a block for the very top level auto makeFunction = [&]() { currFunction = std::unique_ptr(Builder(wasm).makeFunction( name, std::move(params), result, std::move(vars) )); }; auto ensureAutoBlock = [&]() { if (!autoBlock) { autoBlock = allocator.alloc(); autoBlock->list.push_back(body); body = autoBlock; } }; for (;i < s.size(); i++) { Element& curr = *s[i]; IString id = curr[0]->str(); if (id == PARAM || id == LOCAL) { size_t j = 1; while (j < curr.size()) { IString name; WasmType type = none; if (!curr[j]->dollared()) { // dollared input symbols cannot be types type = stringToWasmType(curr[j]->str(), true); } if (type != none) { // a type, so an unnamed parameter name = Name::fromInt(localIndex); } else { name = curr[j]->str(); type = stringToWasmType(curr[j+1]->str()); j++; } j++; if (id == PARAM) { params.emplace_back(name, type); } else { vars.emplace_back(name, type); } localIndex++; currLocalTypes[name] = type; } } else if (id == RESULT) { result = stringToWasmType(curr[1]->str()); } else if (id == TYPE) { Name name = curr[1]->str(); type = name; if (!wasm.checkFunctionType(name)) throw ParseException("unknown function"); FunctionType* type = wasm.getFunctionType(name); result = type->result; for (size_t j = 0; j < type->params.size(); j++) { IString name = Name::fromInt(j); WasmType currType = type->params[j]; typeParams.emplace_back(name, currType); currLocalTypes[name] = currType; } } else { // body if (typeParams.size() > 0 && params.size() == 0) { params = typeParams; } if (!currFunction) makeFunction(); Expression* ex = parseExpression(curr); if (!body) { body = ex; } else { ensureAutoBlock(); autoBlock->list.push_back(ex); } } } if (brokeToAutoBlock) { ensureAutoBlock(); autoBlock->name = FAKE_RETURN; } if (autoBlock) { autoBlock->finalize(); } if (!currFunction) { makeFunction(); body = allocator.alloc(); } assert(currFunction->result == result); currFunction->body = body; currFunction->type = type; wasm.addFunction(currFunction.release()); currLocalTypes.clear(); labelStack.clear(); } WasmType stringToWasmType(IString str, bool allowError=false, bool prefix=false) { return stringToWasmType(str.str, allowError, prefix); } WasmType stringToWasmType(const char* str, bool allowError=false, bool prefix=false) { if (str[0] == 'i') { if (str[1] == '3' && str[2] == '2' && (prefix || str[3] == 0)) return i32; if (str[1] == '6' && str[2] == '4' && (prefix || str[3] == 0)) return i64; } if (str[0] == 'f') { if (str[1] == '3' && str[2] == '2' && (prefix || str[3] == 0)) return f32; if (str[1] == '6' && str[2] == '4' && (prefix || str[3] == 0)) return f64; } if (allowError) return none; throw ParseException("unknown type"); abort(); } public: Expression* parseExpression(Element* s) { return parseExpression(*s); } #define abort_on(str) { throw ParseException(std::string("abort_on ") + str); } Expression* parseExpression(Element& s) { IString id = s[0]->str(); const char *str = id.str; const char *dot = strchr(str, '.'); if (dot) { // type.operation (e.g. i32.add) WasmType type = stringToWasmType(str, false, true); // Local copy to index into op without bounds checking. enum { maxNameSize = 15 }; char op[maxNameSize + 1] = {'\0'}; strncpy(op, dot + 1, maxNameSize); #define BINARY_INT_OR_FLOAT(op) (type == i32 ? BinaryOp::op##Int32 : (type == i64 ? BinaryOp::op##Int64 : (type == f32 ? BinaryOp::op##Float32 : BinaryOp::op##Float64))) #define BINARY_INT(op) (type == i32 ? BinaryOp::op##Int32 : BinaryOp::op##Int64) #define BINARY_FLOAT(op) (type == f32 ? BinaryOp::op##Float32 : BinaryOp::op##Float64) switch (op[0]) { case 'a': { if (op[1] == 'b') return makeUnary(s, type == f32 ? UnaryOp::AbsFloat32 : UnaryOp::AbsFloat64, type); if (op[1] == 'd') return makeBinary(s, BINARY_INT_OR_FLOAT(Add), type); if (op[1] == 'n') return makeBinary(s, BINARY_INT(And), type); abort_on(op); } case 'c': { if (op[1] == 'e') return makeUnary(s, type == f32 ? UnaryOp::CeilFloat32 : UnaryOp::CeilFloat64, type); if (op[1] == 'l') return makeUnary(s, type == i32 ? UnaryOp::ClzInt32 : UnaryOp::ClzInt64, type); if (op[1] == 'o') { if (op[2] == 'p') return makeBinary(s, BINARY_FLOAT(CopySign), type); if (op[2] == 'n') { if (op[3] == 'v') { if (op[8] == 's') return makeUnary(s, op[11] == '3' ? (type == f32 ? UnaryOp::ConvertSInt32ToFloat32 : UnaryOp::ConvertSInt32ToFloat64) : (type == f32 ? UnaryOp::ConvertSInt64ToFloat32 : UnaryOp::ConvertSInt64ToFloat64), type); if (op[8] == 'u') return makeUnary(s, op[11] == '3' ? (type == f32 ? UnaryOp::ConvertUInt32ToFloat32 : UnaryOp::ConvertUInt32ToFloat64) : (type == f32 ? UnaryOp::ConvertUInt64ToFloat32 : UnaryOp::ConvertUInt64ToFloat64), type); } if (op[3] == 's') return makeConst(s, type); } } if (op[1] == 't') return makeUnary(s, type == i32 ? UnaryOp::CtzInt32 : UnaryOp::CtzInt64, type); abort_on(op); } case 'd': { if (op[1] == 'i') { if (op[3] == '_') return makeBinary(s, op[4] == 'u' ? BINARY_INT(DivU) : BINARY_INT(DivS), type); if (op[3] == 0) return makeBinary(s, BINARY_FLOAT(Div), type); } if (op[1] == 'e') return makeUnary(s, UnaryOp::DemoteFloat64, type); abort_on(op); } case 'e': { if (op[1] == 'q') { if (op[2] == 0) return makeBinary(s, BINARY_INT_OR_FLOAT(Eq), type); if (op[2] == 'z') return makeUnary(s, type == i32 ? UnaryOp::EqZInt32 : UnaryOp::EqZInt64, type); } if (op[1] == 'x') return makeUnary(s, op[7] == 'u' ? UnaryOp::ExtendUInt32 : UnaryOp::ExtendSInt32, type); abort_on(op); } case 'f': { if (op[1] == 'l') return makeUnary(s, type == f32 ? UnaryOp::FloorFloat32 : UnaryOp::FloorFloat64, type); abort_on(op); } case 'g': { if (op[1] == 't') { if (op[2] == '_') return makeBinary(s, op[3] == 'u' ? BINARY_INT(GtU) : BINARY_INT(GtS), type); if (op[2] == 0) return makeBinary(s, BINARY_FLOAT(Gt), type); } if (op[1] == 'e') { if (op[2] == '_') return makeBinary(s, op[3] == 'u' ? BINARY_INT(GeU) : BINARY_INT(GeS), type); if (op[2] == 0) return makeBinary(s, BINARY_FLOAT(Ge), type); } abort_on(op); } case 'l': { if (op[1] == 't') { if (op[2] == '_') return makeBinary(s, op[3] == 'u' ? BINARY_INT(LtU) : BINARY_INT(LtS), type); if (op[2] == 0) return makeBinary(s, BINARY_FLOAT(Lt), type); } if (op[1] == 'e') { if (op[2] == '_') return makeBinary(s, op[3] == 'u' ? BINARY_INT(LeU) : BINARY_INT(LeS), type); if (op[2] == 0) return makeBinary(s, BINARY_FLOAT(Le), type); } if (op[1] == 'o') return makeLoad(s, type); abort_on(op); } case 'm': { if (op[1] == 'i') return makeBinary(s, BINARY_FLOAT(Min), type); if (op[1] == 'a') return makeBinary(s, BINARY_FLOAT(Max), type); if (op[1] == 'u') return makeBinary(s, BINARY_INT_OR_FLOAT(Mul), type); abort_on(op); } case 'n': { if (op[1] == 'e') { if (op[2] == 0) return makeBinary(s, BINARY_INT_OR_FLOAT(Ne), type); if (op[2] == 'a') return makeUnary(s, type == f32 ? UnaryOp::NearestFloat32 : UnaryOp::NearestFloat64, type); if (op[2] == 'g') return makeUnary(s, type == f32 ? UnaryOp::NegFloat32 : UnaryOp::NegFloat64, type); } abort_on(op); } case 'o': { if (op[1] == 'r') return makeBinary(s, BINARY_INT(Or), type); abort_on(op); } case 'p': { if (op[1] == 'r') return makeUnary(s, UnaryOp::PromoteFloat32, type); if (op[1] == 'o') return makeUnary(s, type == i32 ? UnaryOp::PopcntInt32 : UnaryOp::PopcntInt64, type); abort_on(op); } case 'r': { if (op[1] == 'e') { if (op[2] == 'm') return makeBinary(s, op[4] == 'u' ? BINARY_INT(RemU) : BINARY_INT(RemS), type); if (op[2] == 'i') return makeUnary(s, isWasmTypeFloat(type) ? (type == f32 ? UnaryOp::ReinterpretInt32 : UnaryOp::ReinterpretInt64) : (type == i32 ? UnaryOp::ReinterpretFloat32 : UnaryOp::ReinterpretFloat64), type); } if (op[1] == 'o' && op[2] == 't') { return makeBinary(s, op[3] == 'l' ? BINARY_INT(RotL) : BINARY_INT(RotR), type); } abort_on(op); } case 's': { if (op[1] == 'h') { if (op[2] == 'l') return makeBinary(s, BINARY_INT(Shl), type); return makeBinary(s, op[4] == 'u' ? BINARY_INT(ShrU) : BINARY_INT(ShrS), type); } if (op[1] == 'u') return makeBinary(s, BINARY_INT_OR_FLOAT(Sub), type); if (op[1] == 'q') return makeUnary(s, type == f32 ? UnaryOp::SqrtFloat32 : UnaryOp::SqrtFloat64, type); if (op[1] == 't') return makeStore(s, type); abort_on(op); } case 't': { if (op[1] == 'r') { if (op[6] == 's') return makeUnary(s, op[9] == '3' ? (type == i32 ? UnaryOp::TruncSFloat32ToInt32 : UnaryOp::TruncSFloat32ToInt64) : (type == i32 ? UnaryOp::TruncSFloat64ToInt32 : UnaryOp::TruncSFloat64ToInt64), type); if (op[6] == 'u') return makeUnary(s, op[9] == '3' ? (type == i32 ? UnaryOp::TruncUFloat32ToInt32 : UnaryOp::TruncUFloat32ToInt64) : (type == i32 ? UnaryOp::TruncUFloat64ToInt32 : UnaryOp::TruncUFloat64ToInt64), type); if (op[2] == 'u') return makeUnary(s, type == f32 ? UnaryOp::TruncFloat32 : UnaryOp::TruncFloat64, type); } abort_on(op); } case 'w': { if (op[1] == 'r') return makeUnary(s, UnaryOp::WrapInt64, type); abort_on(op); } case 'x': { if (op[1] == 'o') return makeBinary(s, BINARY_INT(Xor), type); abort_on(op); } default: abort_on(op); } } else { // other expression switch (str[0]) { case 'b': { if (str[1] == 'l') return makeBlock(s); if (str[1] == 'r') { if (str[2] == '_' && str[3] == 't') return makeBreakTable(s); return makeBreak(s); } abort_on(str); } case 'c': { if (str[1] == 'a') { if (id == CALL) return makeCall(s); if (id == CALL_IMPORT) return makeCallImport(s); if (id == CALL_INDIRECT) return makeCallIndirect(s); } else if (str[1] == 'u') return makeHost(s, HostOp::CurrentMemory); abort_on(str); } case 'e': { if (str[1] == 'l') return makeThenOrElse(s); abort_on(str); } case 'g': { if (str[1] == 'e') return makeGetLocal(s); if (str[1] == 'r') return makeHost(s, HostOp::GrowMemory); abort_on(str); } case 'h': { if (str[1] == 'a') return makeHost(s, HostOp::HasFeature); abort_on(str); } case 'i': { if (str[1] == 'f') return makeIf(s); abort_on(str); } case 'l': { if (str[1] == 'o') return makeLoop(s); abort_on(str); } case 'n': { if (str[1] == 'o') return allocator.alloc(); abort_on(str); } case 'p': { if (str[1] == 'a') return makeHost(s, HostOp::PageSize); abort_on(str); } case 's': { if (str[1] == 'e' && str[2] == 't') return makeSetLocal(s); if (str[1] == 'e' && str[2] == 'l') return makeSelect(s); abort_on(str); } case 'r': { if (str[1] == 'e') return makeReturn(s); abort_on(str); } case 't': { if (str[1] == 'h') return makeThenOrElse(s); abort_on(str); } case 'u': { if (str[1] == 'n') return allocator.alloc(); abort_on(str); } default: abort_on(str); } } abort(); } private: Expression* makeBinary(Element& s, BinaryOp op, WasmType type) { auto ret = allocator.alloc(); ret->op = op; ret->left = parseExpression(s[1]); ret->right = parseExpression(s[2]); ret->finalize(); return ret; } Expression* makeUnary(Element& s, UnaryOp op, WasmType type) { auto ret = allocator.alloc(); ret->op = op; ret->value = parseExpression(s[1]); ret->finalize(); // type is the reported type, e.g. i64.ctz reports i64 (but has a return type of i32, in this case) // verify the reported type is correct switch (op) { case EqZInt32: case NegFloat32: case AbsFloat32: case CeilFloat32: case FloorFloat32: case TruncFloat32: case NearestFloat32: case SqrtFloat32: case ClzInt32: case CtzInt32: case PopcntInt32: case EqZInt64: case NegFloat64: case AbsFloat64: case CeilFloat64: case FloorFloat64: case TruncFloat64: case NearestFloat64: case SqrtFloat64: case ClzInt64: case CtzInt64: case PopcntInt64: { if (ret->value->type != unreachable && type != ret->value->type) throw ParseException(std::string("bad type for ") + getExpressionName(ret) + ": " + printWasmType(type) + " vs value type " + printWasmType(ret->value->type), s.line, s.col); break; } case ExtendSInt32: case ExtendUInt32: case WrapInt64: case PromoteFloat32: case DemoteFloat64: case TruncSFloat32ToInt32: case TruncUFloat32ToInt32: case TruncSFloat64ToInt32: case TruncUFloat64ToInt32: case ReinterpretFloat32: case TruncSFloat32ToInt64: case TruncUFloat32ToInt64: case TruncSFloat64ToInt64: case TruncUFloat64ToInt64: case ReinterpretFloat64: case ReinterpretInt32: case ConvertSInt32ToFloat32: case ConvertUInt32ToFloat32: case ConvertSInt64ToFloat32: case ConvertUInt64ToFloat32: case ReinterpretInt64: case ConvertSInt32ToFloat64: case ConvertUInt32ToFloat64: case ConvertSInt64ToFloat64: case ConvertUInt64ToFloat64: break; default: WASM_UNREACHABLE(); } return ret; } Expression* makeSelect(Element& s) { auto ret = allocator.alloc