// // Parses WebAssembly code in S-Expression format, as in .wast files // such as are in the spec test suite. // #include #include #include "wasm.h" #include "mixed_arena.h" namespace wasm { int debug = 0; // wasm::debug is set in main(), typically from an env var using namespace cashew; // Globals IString MODULE("module"), FUNC("func"), PARAM("param"), RESULT("result"), MEMORY("memory"), SEGMENT("segment"), EXPORT("export"), IMPORT("import"), TABLE("table"), LOCAL("local"), TYPE("type"), CALL("call"), CALL_IMPORT("call_import"), CALL_INDIRECT("call_indirect"), INFINITY_("infinity"), NEG_INFINITY("-infinity"), NAN_("nan"), NEG_NAN("-nan"), CASE("case"), BR("br"), FAKE_RETURN("fake_return_waka123"); int unhex(char c) { if (c >= '0' && c <= '9') return c - '0'; if (c >= 'a' && c <= 'f') return c - 'a' + 10; if (c >= 'A' && c <= 'F') return c - 'A' + 10; abort(); } // // An element in an S-Expression: a list or a string // class Element { typedef std::vector List; bool isList_; List list_; IString str_; bool dollared_; public: Element() : isList_(true) {} bool isList() { return isList_; } bool isStr() { return !isList_; } bool dollared() { return dollared_; } // list methods List& list() { assert(isList_); return list_; } Element* operator[](unsigned i) { return list()[i]; } size_t size() { return list().size(); } // string methods IString str() { assert(!isList_); return str_; } const char* c_str() { assert(!isList_); return str_.str; } Element* setString(IString str__, bool dollared__) { isList_ = false; str_ = str__; dollared_ = dollared__; return this; } // printing friend std::ostream& operator<<(std::ostream& o, Element& e) { if (e.isList_) { o << '('; for (auto item : e.list_) o << ' ' << *item; o << " )"; } else { o << e.str_.str; } return o; } }; // // Generic S-Expression parsing into lists // class SExpressionParser { char *beginning; char* input; MixedArena allocator; public: // Assumes control of and modifies the input. SExpressionParser(char* input) : beginning(input), input(input) { root = nullptr; while (!root) { // keep parsing until we pass an initial comment root = parseInnerList(); } } Element* root; private: // parses the internal part of a list, inside the parens. Element* parseInnerList() { if (input[0] == ';') { // comment input++; if (input[0] == ';') { while (input[0] != '\n') input++; return nullptr; } input = strstr(input, ";)"); assert(input); return nullptr; } auto ret = allocator.alloc(); while (1) { Element* curr = parse(); if (!curr) return ret; ret->list().push_back(curr); } } Element* parse() { skipWhitespace(); if (input[0] == 0 || input[0] == ')') return nullptr; if (input[0] == '(') { // a list input++; auto ret = parseInnerList(); skipWhitespace(); assert(input[0] == ')'); input++; return ret; } return parseString(); } void skipWhitespace() { while (1) { while (isspace(input[0])) input++; if (input[0] == ';' && input[1] == ';') { while (input[0] != '\n') input++; } else if (input[0] == '(' && input[1] == ';') { input = strstr(input, ";)") + 2; } else { return; } } } Element* parseString() { bool dollared = false; if (input[0] == '$') { input++; dollared = true; } char *start = input; if (input[0] == '"') { // parse escaping \", and \a7 into 0xa7 the character code input++; std::string str; while (1) { if (input[0] == '"') break; if (input[0] == '\\') { if (input[1] == '"') { str += '"'; input += 2; continue; } else if (input[1] == '\\') { str += '\\'; input += 2; } else { str += (char)(unhex(input[1])*16 + unhex(input[2])); input += 3; continue; } } str += input[0]; input++; } input++; return allocator.alloc()->setString(IString(str.c_str(), false), dollared); } while (input[0] && !isspace(input[0]) && input[0] != ')') input++; char temp = input[0]; input[0] = 0; auto ret = allocator.alloc()->setString(IString(start, false), dollared); // TODO: reuse the string here, carefully input[0] = temp; return ret; } }; // // SExpressions => WebAssembly module // class SExpressionWasmBuilder { Module& wasm; MixedArena allocator; std::function onError; int functionCounter; public: // Assumes control of and modifies the input. SExpressionWasmBuilder(Module& wasm, Element& module, std::function onError) : wasm(wasm), onError(onError), functionCounter(0) { assert(module[0]->str() == MODULE); for (unsigned i = 1; i < module.size(); i++) { parseModuleElement(*module[i]); } } private: void parseModuleElement(Element& curr) { IString id = curr[0]->str(); if (id == FUNC) return parseFunction(curr); if (id == MEMORY) return parseMemory(curr); if (id == EXPORT) return parseExport(curr); if (id == IMPORT) return parseImport(curr); if (id == TABLE) return parseTable(curr); if (id == TYPE) return parseType(curr); std::cerr << "bad module element " << id.str << '\n'; onError(); } // function parsing state Function *currFunction = nullptr; std::map currLocalTypes; size_t localIndex; // params and locals size_t otherIndex; std::vector labelStack; Name getPrefixedName(std::string prefix) { return IString((prefix + std::to_string(otherIndex++)).c_str(), false); } void parseFunction(Element& s) { auto func = currFunction = allocator.alloc(); size_t i = 1; if (s[i]->isStr()) { func->name = s[i]->str(); i++; } else { // unnamed, use an index func->name = Name::fromInt(functionCounter); } functionCounter++; func->body = nullptr; localIndex = 0; otherIndex = 0; std::vector typeParams; // we may have both params and a type. store the type info here for (;i < s.size(); i++) { Element& curr = *s[i]; IString id = curr[0]->str(); if (id == PARAM || id == LOCAL) { size_t j = 1; while (j < curr.size()) { IString name; WasmType type = none; if (!curr[j]->dollared()) { // dollared input symbols cannot be types type = stringToWasmType(curr[j]->str(), true); } if (type != none) { // a type, so an unnamed parameter name = Name::fromInt(localIndex); } else { name = curr[j]->str(); type = stringToWasmType(curr[j+1]->str()); j++; } j++; if (id == PARAM) { func->params.emplace_back(name, type); } else { func->locals.emplace_back(name, type); } localIndex++; currLocalTypes[name] = type; } } else if (id == RESULT) { func->result = stringToWasmType(curr[1]->str()); } else if (id == TYPE) { Name name = curr[1]->str(); func->type = name; if (wasm.functionTypesMap.find(name) == wasm.functionTypesMap.end()) onError(); FunctionType* type = wasm.functionTypesMap[name]; func->result = type->result; for (size_t j = 0; j < type->params.size(); j++) { IString name = Name::fromInt(j); WasmType currType = type->params[j]; typeParams.emplace_back(name, currType); currLocalTypes[name] = currType; } } else { // body if (typeParams.size() > 0 && func->params.size() == 0) { func->params = typeParams; } Expression* ex = parseExpression(curr); if (!func->body) { func->body = ex; } else { auto block = func->body->dyn_cast(); if (!block) { block = allocator.alloc(); block->list.push_back(func->body); func->body = block; } block->list.push_back(ex); } } } if (!func->body) func->body = allocator.alloc(); wasm.addFunction(func); currLocalTypes.clear(); labelStack.clear(); currFunction = nullptr; } WasmType stringToWasmType(IString str, bool allowError=false, bool prefix=false) { return stringToWasmType(str.str, allowError, prefix); } WasmType stringToWasmType(const char* str, bool allowError=false, bool prefix=false) { if (str[0] == 'i') { if (str[1] == '3' && str[2] == '2' && (prefix || str[3] == 0)) return i32; if (str[1] == '6' && str[2] == '4' && (prefix || str[3] == 0)) return i64; } if (str[0] == 'f') { if (str[1] == '3' && str[2] == '2' && (prefix || str[3] == 0)) return f32; if (str[1] == '6' && str[2] == '4' && (prefix || str[3] == 0)) return f64; } if (allowError) return none; onError(); abort(); } public: Expression* parseExpression(Element* s) { return parseExpression(*s); } #define abort_on(str) { std::cerr << "aborting on " << str << '\n'; onError(); } Expression* parseExpression(Element& s) { //std::cerr << "parse expression " << s << '\n'; IString id = s[0]->str(); const char *str = id.str; const char *dot = strchr(str, '.'); if (dot) { // type.operation (e.g. i32.add) WasmType type = stringToWasmType(str, false, true); const char *op = dot + 1; switch (op[0]) { case 'a': { if (op[1] == 'b') return makeUnary(s, UnaryOp::Abs, type); if (op[1] == 'd') return makeBinary(s, BinaryOp::Add, type); if (op[1] == 'n') return makeBinary(s, BinaryOp::And, type); abort_on(op); } case 'c': { if (op[1] == 'e') return makeUnary(s, UnaryOp::Ceil, type); if (op[1] == 'l') return makeUnary(s, UnaryOp::Clz, type); if (op[1] == 'o') { if (op[2] == 'p') return makeBinary(s, BinaryOp::CopySign, type); if (op[2] == 'n') { if (op[3] == 'v') { if (op[8] == 's') return makeConvert(s, op[11] == '3' ? ConvertOp::ConvertSInt32 : ConvertOp::ConvertSInt64, type); if (op[8] == 'u') return makeConvert(s, op[11] == '3' ? ConvertOp::ConvertUInt32 : ConvertOp::ConvertUInt64, type); } if (op[3] == 's') return makeConst(s, type); } } if (op[1] == 't') return makeUnary(s, UnaryOp::Ctz, type); abort_on(op); } case 'd': { if (op[1] == 'i') { if (op[3] == '_') return makeBinary(s, op[4] == 'u' ? BinaryOp::DivU : BinaryOp::DivS, type); if (op[3] == 0) return makeBinary(s, BinaryOp::Div, type); } if (op[1] == 'e') return makeConvert(s, ConvertOp::DemoteFloat64, type); abort_on(op); } case 'e': { if (op[1] == 'q') return makeCompare(s, RelationalOp::Eq, type); if (op[1] == 'x') return makeConvert(s, op[7] == 'u' ? ConvertOp::ExtendUInt32 : ConvertOp::ExtendSInt32, type); abort_on(op); } case 'f': { if (op[1] == 'l') return makeUnary(s, UnaryOp::Floor, type); abort_on(op); } case 'g': { if (op[1] == 't') { if (op[2] == '_') return makeCompare(s, op[3] == 'u' ? RelationalOp::GtU : RelationalOp::GtS, type); if (op[2] == 0) return makeCompare(s, RelationalOp::Gt, type); } if (op[1] == 'e') { if (op[2] == '_') return makeCompare(s, op[3] == 'u' ? RelationalOp::GeU : RelationalOp::GeS, type); if (op[2] == 0) return makeCompare(s, RelationalOp::Ge, type); } abort_on(op); } case 'l': { if (op[1] == 't') { if (op[2] == '_') return makeCompare(s, op[3] == 'u' ? RelationalOp::LtU : RelationalOp::LtS, type); if (op[2] == 0) return makeCompare(s, RelationalOp::Lt, type); } if (op[1] == 'e') { if (op[2] == '_') return makeCompare(s, op[3] == 'u' ? RelationalOp::LeU : RelationalOp::LeS, type); if (op[2] == 0) return makeCompare(s, RelationalOp::Le, type); } if (op[1] == 'o') return makeLoad(s, type); abort_on(op); } case 'm': { if (op[1] == 'i') return makeBinary(s, BinaryOp::Min, type); if (op[1] == 'a') return makeBinary(s, BinaryOp::Max, type); if (op[1] == 'u') return makeBinary(s, BinaryOp::Mul, type); abort_on(op); } case 'n': { if (op[1] == 'e') { if (op[2] == 0) return makeCompare(s, RelationalOp::Ne, type); if (op[2] == 'a') return makeUnary(s, UnaryOp::Nearest, type); if (op[2] == 'g') return makeUnary(s, UnaryOp::Neg, type); } abort_on(op); } case 'o': { if (op[1] == 'r') return makeBinary(s, BinaryOp::Or, type); abort_on(op); } case 'p': { if (op[1] == 'r') return makeConvert(s, ConvertOp::PromoteFloat32, type); if (op[1] == 'o') return makeUnary(s, UnaryOp::Popcnt, type); abort_on(op); } case 'r': { if (op[1] == 'e') { if (op[2] == 'm') return makeBinary(s, op[4] == 'u' ? BinaryOp::RemU : BinaryOp::RemS, type); if (op[2] == 'i') return makeConvert(s, isWasmTypeFloat(type) ? ConvertOp::ReinterpretInt : ConvertOp::ReinterpretFloat, type); } abort_on(op); } case 's': { if (op[1] == 'e') return makeSelect(s, type); if (op[1] == 'h') { if (op[2] == 'l') return makeBinary(s, BinaryOp::Shl, type); return makeBinary(s, op[4] == 'u' ? BinaryOp::ShrU : BinaryOp::ShrS, type); } if (op[1] == 'u') return makeBinary(s, BinaryOp::Sub, type); if (op[1] == 'q') return makeUnary(s, UnaryOp::Sqrt, type); if (op[1] == 't') return makeStore(s, type); abort_on(op); } case 't': { if (op[1] == 'r') { if (op[6] == 's') return makeConvert(s, op[9] == '3' ? ConvertOp::TruncSFloat32 : ConvertOp::TruncSFloat64, type); if (op[6] == 'u') return makeConvert(s, op[9] == '3' ? ConvertOp::TruncUFloat32 : ConvertOp::TruncUFloat64, type); if (op[2] == 'u') return makeUnary(s, UnaryOp::Trunc, type); } abort_on(op); } case 'w': { if (op[1] == 'r') return makeConvert(s, ConvertOp::WrapInt64, type); abort_on(op); } case 'x': { if (op[1] == 'o') return makeBinary(s, BinaryOp::Xor, type); abort_on(op); } default: abort_on(op); } } else { // other expression switch (str[0]) { case 'b': { if (str[1] == 'l') return makeBlock(s); if (str[1] == 'r') return makeBreak(s); abort_on(str); } case 'c': { if (str[1] == 'a') { if (id == CALL) return makeCall(s); if (id == CALL_IMPORT) return makeCallImport(s); if (id == CALL_INDIRECT) return makeCallIndirect(s); } abort_on(str); } case 'g': { if (str[1] == 'e') return makeGetLocal(s); if (str[1] == 'r') return makeHost(s, HostOp::GrowMemory); abort_on(str); } case 'h': { if (str[1] == 'a') return makeHost(s, HostOp::HasFeature); abort_on(str); } case 'i': { if (str[1] == 'f') return makeIf(s); abort_on(str); } case 'l': { if (str[1] == 'a') return makeLabel(s); if (str[1] == 'o') return makeLoop(s); abort_on(str); } case 'm': { if (str[1] == 'e') return makeHost(s, HostOp::MemorySize); abort_on(str); } case 'n': { if (str[1] == 'o') return allocator.alloc(); abort_on(str); } case 'p': { if (str[1] == 'a') return makeHost(s, HostOp::PageSize); abort_on(str); } case 's': { if (str[1] == 'e') return makeSetLocal(s); abort_on(str); } case 'r': { if (str[1] == 'e') return makeReturn(s); abort_on(str); } case 't': { if (str[1] == 'a') return makeSwitch(s); // aka tableswitch abort_on(str); } case 'u': { if (str[1] == 'n') return allocator.alloc(); abort_on(str); } default: abort_on(str); } } abort(); } private: Expression* makeBinary(Element& s, BinaryOp op, WasmType type) { auto ret = allocator.alloc(); ret->op = op; ret->left = parseExpression(s[1]); ret->right = parseExpression(s[2]); ret->type = type; return ret; } Expression* makeUnary(Element& s, UnaryOp op, WasmType type) { auto ret = allocator.alloc(); ret->op = op; ret->value = parseExpression(s[1]); ret->type = type; return ret; } Expression* makeCompare(Element& s, RelationalOp op, WasmType type) { auto ret = allocator.alloc(); ret->op = op; ret->left = parseExpression(s[1]); ret->right = parseExpression(s[2]); ret->inputType = type; return ret; } Expression* makeConvert(Element& s, ConvertOp op, WasmType type) { auto ret = allocator.alloc(); ret->op = op; ret->value = parseExpression(s[1]); ret->type = type; return ret; } Expression* makeSelect(Element& s, WasmType type) { auto ret = allocator.alloc