/* * Copyright 2015 WebAssembly Community Group participants * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ // // .s to WebAssembly translator. // #ifndef wasm_s2wasm_h #define wasm_s2wasm_h #include #include "wasm.h" #include "parsing.h" #include "pass.h" #include "asm_v_wasm.h" #include "wasm-builder.h" #include "wasm-linker.h" namespace wasm { // // S2WasmBuilder - parses a .s file into WebAssembly // class S2WasmBuilder { const char* inputStart; const char* s; bool debug; Module* wasm; MixedArena* allocator; LinkerObject* linkerObj; std::unique_ptr symbolInfo; std::unordered_map fileIndexMap; public: S2WasmBuilder(const char* input, bool debug) : inputStart(input), s(input), debug(debug), wasm(nullptr), allocator(nullptr), linkerObj(nullptr) {} void build(LinkerObject *obj) { // If getSymbolInfo has not already been called, populate the symbol // info now. if (!symbolInfo) symbolInfo.reset(getSymbolInfo()); linkerObj = obj; wasm = &obj->wasm; allocator = &wasm->allocator; s = inputStart; process(); } // getSymbolInfo scans the .s file to determine what symbols it defines // and references. LinkerObject::SymbolInfo* getSymbolInfo() { if (!symbolInfo) { symbolInfo = make_unique(); scan(symbolInfo.get()); } return symbolInfo.get(); } private: // utilities void skipWhitespace() { while (1) { while (*s && isspace(*s)) s++; if (*s != '#') break; while (*s != '\n') s++; } } void skipToEOL() { s = strchr(s, '\n'); assert(s); } bool skipComma() { skipWhitespace(); if (*s != ',') return false; s++; skipWhitespace(); return true; } bool skipEqual() { skipWhitespace(); if (*s != '=') return false; s++; skipWhitespace(); return true; } #define abort_on(why) { \ dump(why ":"); \ abort(); \ } bool peek(const char *pattern) { return strncmp(s, pattern, strlen(pattern)) == 0; } // match and skip the pattern, if matched bool match(const char *pattern) { size_t size = strlen(pattern); if (strncmp(s, pattern, size) == 0) { s += size; skipWhitespace(); return true; } return false; } void mustMatch(const char *pattern) { bool matched = match(pattern); if (!matched) { std::cerr << "<< " << pattern << " >>\n"; abort_on("bad mustMatch"); } } void dump(const char *text) { std::cerr << "[[" << text << "]]:\n==========\n"; for (size_t i = 0; i < 60; i++) { if (!s[i]) break; std::cerr << s[i]; } std::cerr << "\n==========\n"; } void unget(Name str) { s -= strlen(str.str); } Name getStr() { std::string str; // TODO: optimize this and the other get* methods while (*s && !isspace(*s)) { str += *s; s++; } return cashew::IString(str.c_str(), false); } void skipToSep() { while (*s && !isspace(*s) && *s != ',' && *s != '(' && *s != ')' && *s != ':' && *s != '+' && *s != '-') { s++; } } Name getStrToSep() { std::string str; while (*s && !isspace(*s) && *s != ',' && *s != '(' && *s != ')' && *s != ':' && *s != '+' && *s != '-' && *s != '=') { str += *s; s++; } return cashew::IString(str.c_str(), false); } Name getStrToColon() { std::string str; while (*s && !isspace(*s) && *s != ':') { str += *s; s++; } return cashew::IString(str.c_str(), false); } // get an int int32_t getInt() { const char* loc = s; uint32_t value = 0; bool neg = false; if (*loc == '-') { neg = true; loc++; } while (isdigit(*loc)) { uint32_t digit = *loc - '0'; if (value > std::numeric_limits::max() / 10) { abort_on("uint32_t overflow"); } value *= 10; if (value > std::numeric_limits::max() - digit) { abort_on("uint32_t overflow"); } value += digit; loc++; } if (neg) { uint32_t positive_int_min = (uint32_t) - (1 + std::numeric_limits::min()) + (uint32_t)1; if (value > positive_int_min) { abort_on("negative int32_t overflow"); } s = loc; return -value; } s = loc; return value; } // get an int from an arbitrary string, with our full error handling int32_t getInt(const char *from) { const char *before = s; s = from; auto ret = getInt(); s = before; return ret; } // gets a constant, which may be a relocation for later. // returns whether this is a relocation // TODO: Clean up this and the way relocs are created from parsed objects LinkerObject::Relocation* getRelocatableConst(uint32_t* target) { if (isdigit(*s) || *s == '-') { int32_t val = getInt(); memcpy(target, &val, sizeof(val)); return nullptr; } // a global constant, we need to fix it up later Name name = getStrToSep(); LinkerObject::Relocation::Kind kind = isFunctionName(name) ? LinkerObject::Relocation::kFunction : LinkerObject::Relocation::kData; int offset = 0; if (*s == '+') { s++; offset = getInt(); } else if (*s == '-') { s++; offset = -getInt(); } return new LinkerObject::Relocation( kind, target, fixEmLongjmp(cleanFunction(name)), offset); } Expression* relocationToGetGlobal(LinkerObject::Relocation* relocation) { if (!relocation) { return nullptr; } auto name = relocation->symbol; auto g = allocator->alloc(); g->name = name; g->type = i32; // Optimization: store any nonnegative addends in their natural place. // Only do this for positive addends because load/store offsets cannot be // negative. if (relocation->addend >= 0) { *relocation->data = relocation->addend; return g; } auto c = allocator->alloc(); c->type = i32; c->value = Literal(relocation->addend); auto add = allocator->alloc(); add->type = i32; add->op = AddInt32; add->left = c; add->right = g; return add; } Expression* getRelocatableExpression(uint32_t* target) { auto relocation = std::unique_ptr(getRelocatableConst(target)); if (!relocation) { return nullptr; } if (linkerObj->isObjectImplemented(relocation->symbol)) { linkerObj->addRelocation(relocation.release()); return nullptr; } return relocationToGetGlobal(relocation.get()); } int64_t getInt64() { const char* loc = s; uint64_t value = 0; bool neg = false; if (*loc == '-') { neg = true; loc++; } while (isdigit(*loc)) { uint64_t digit = *loc - '0'; if (value > std::numeric_limits::max() / 10) { abort_on("uint64_t overflow"); } value *= 10; if (value > std::numeric_limits::max() - digit) { abort_on("uint64_t overflow"); } value += digit; loc++; } if (neg) { uint64_t positive_int_min = (uint64_t) - (1 + std::numeric_limits::min()) + (uint64_t)1; if (value > positive_int_min) { abort_on("negative int64_t overflow"); } s = loc; return -value; } s = loc; return value; } Name getSeparated(char separator) { skipWhitespace(); std::string str; while (*s && *s != separator && *s != '\n') { str += *s; s++; } skipWhitespace(); return cashew::IString(str.c_str(), false); } Name getCommaSeparated() { return getSeparated(','); } Name getAtSeparated() { return getSeparated('@'); } Name getAssign() { skipWhitespace(); if (*s != '$') return Name(); const char *before = s; s++; std::string str; while (*s && *s != '=' && *s != '\n' && *s != ',') { str += *s; s++; } if (*s != '=') { // not an assign s = before; return Name(); } s++; skipComma(); return cashew::IString(str.c_str(), false); } std::vector getQuoted() { assert(*s == '"'); s++; std::vector str; while (*s && *s != '\"') { if (s[0] == '\\') { switch (s[1]) { case 'n': str.push_back('\n'); s += 2; continue; case 'r': str.push_back('\r'); s += 2; continue; case 't': str.push_back('\t'); s += 2; continue; case 'f': str.push_back('\f'); s += 2; continue; case 'b': str.push_back('\b'); s += 2; continue; case '\\': str.push_back('\\'); s += 2; continue; case '"': str.push_back('"'); s += 2; continue; default: { if (isdigit(s[1])) { int code = (s[1] - '0')*8*8 + (s[2] - '0')*8 + (s[3] - '0'); str.push_back(char(code)); s += 4; continue; } else abort_on("getQuoted-escape"); } } } str.push_back(*s); s++; } s++; skipWhitespace(); return str; } WasmType tryGetType() { if (match("i32")) return i32; if (match("i64")) return i64; if (match("f32")) return f32; if (match("f64")) return f64; return none; } WasmType tryGetTypeWithoutNewline() { const char* saved = s; WasmType type = tryGetType(); if (type != none && strchr(saved, '\n') > s) { s = saved; type = none; } return type; } WasmType getType() { WasmType t = tryGetType(); if (t != none) { return t; } abort_on("getType"); } // The LLVM backend emits function names as name@FUNCTION. bool isFunctionName(Name name) { return !!strstr(name.str, "@FUNCTION"); } // Drop the @ and after it. Name cleanFunction(Name name) { if (!strchr(name.str, '@')) return name; char *temp = strdup(name.str); *strchr(temp, '@') = 0; Name ret = cashew::IString(temp, false); free(temp); return ret; } // processors void scan(LinkerObject::SymbolInfo* info) { s = inputStart; while (*s) { skipWhitespace(); // add function definitions and aliases if (match(".type")) { Name name = getCommaSeparated(); skipComma(); if (!match("@function")) continue; if (match(".hidden")) mustMatch(name.str); mustMatch(name.str); if (match(":")) { info->implementedFunctions.insert(name); } else if (match("=")) { Name alias = getAtSeparated(); mustMatch("@FUNCTION"); auto ret = info->aliasedSymbols.insert({name, LinkerObject::SymbolAlias(alias, LinkerObject::Relocation::kFunction, 0)}); if (!ret.second) std::cerr << "Unsupported data alias redefinition: " << name << ", skipping...\n"; } else { abort_on("unknown directive"); } } else if (match(".import_global")) { Name name = getStr(); info->importedObjects.insert(name); s = strchr(s, '\n'); } else { // add data aliases Name lhs = getStrToSep(); // When the current line contains only one word, e.g.".text" if (match("\n")) continue; // When the current line contains more than one word if (!skipEqual()){ s = strchr(s, '\n'); if (!s) break; continue; } // get the original name Name rhs = getStrToSep(); assert(!isFunctionName(rhs)); Offset offset = 0; if (*s == '+') { s++; offset = getInt(); } // check if the rhs is already an alias const auto alias = symbolInfo->aliasedSymbols.find(rhs); if (alias != symbolInfo->aliasedSymbols.end() && alias->second.kind == LinkerObject::Relocation::kData) { offset += alias->second.offset; rhs = alias->second.symbol; } // add the new alias auto ret = symbolInfo->aliasedSymbols.insert({lhs, LinkerObject::SymbolAlias(rhs, LinkerObject::Relocation::kData, offset)}); if (!ret.second) std::cerr << "Unsupported function alias redefinition: " << lhs << ", skipping...\n"; } } } void process() { while (*s) { skipWhitespace(); if (debug) dump("process"); if (!*s) break; if (*s != '.') skipObjectAlias(false); s++; if (match("text")) parseText(); else if (match("type")) parseType(); else if (match("weak") || match("hidden") || match("protected") || match("internal")) getStr(); // contents are in the content that follows else if (match("imports")) skipImports(); else if (match("data")) {} else if (match("ident")) skipToEOL(); else if (match("section")) parseToplevelSection(); else if (match("file")) parseFile(); else if (match("align") || match("p2align")) skipToEOL(); else if (match("import_global")) { skipToEOL(); skipWhitespace(); if (match(".size")) { skipToEOL(); } } else if (match("globl")) parseGlobl(); else if (match("functype")) parseFuncType(); else skipObjectAlias(true); } } void skipObjectAlias(bool prefix) { if (debug) dump("object_alias"); // grab the dot that was consumed earlier if (prefix) s--; Name lhs = getStrToSep(); WASM_UNUSED(lhs); if (!skipEqual()) abort_on("object_alias"); Name rhs = getStr(); WASM_UNUSED(rhs); skipWhitespace(); // if no size attribute (e.g. weak symbol), skip if (!match(".size")) return; mustMatch(lhs.str); mustMatch(","); Name size = getStr(); WASM_UNUSED(size); skipWhitespace(); } void parseToplevelSection() { auto section = getCommaSeparated(); // Skipping .debug_ sections if (!strncmp(section.c_str(), ".debug_", strlen(".debug_"))) { const char *next = strstr(s, ".section"); s = !next ? s + strlen(s) : next; return; } // Initializers are anything in a section whose name begins with .init_array if (!strncmp(section.c_str(), ".init_array", strlen(".init_array") - 1)) { parseInitializer(); return; } s = strchr(s, '\n'); } void parseInitializer() { // Ignore the rest of the .section line skipToEOL(); skipWhitespace(); // The section may start with .p2align if (match(".p2align")) { skipToEOL(); skipWhitespace(); } mustMatch(".int32"); do { linkerObj->addInitializerFunction(cleanFunction(getStr())); skipWhitespace(); } while (match(".int32")); } void parseText() { while (*s) { skipWhitespace(); if (!*s) break; if (*s != '.') break; s++; if (parseVersionMin()); else if (match("file")) parseFile(); else if (match("globl")) parseGlobl(); else if (match("type")) parseType(); else { s--; break; } } } void parseFile() { if (debug) dump("file"); size_t fileId = 0; if (*s != '"') { fileId = getInt(); skipWhitespace(); } auto filename = getQuoted(); uint32_t index = wasm->debugInfoFileNames.size(); wasm->debugInfoFileNames.push_back(std::string(filename.begin(), filename.end())); fileIndexMap[fileId] = index; } void parseGlobl() { linkerObj->addGlobal(getStr()); skipWhitespace(); } void parseFuncType() { auto decl = make_unique(); Name rawName = getCommaSeparated(); skipComma(); if(match("void")) { decl->result = none; } else { decl->result = getType(); } while (*s && skipComma()) decl->params.push_back(getType()); std::string sig = getSig(decl.get()); decl->name = "FUNCSIG$" + sig; FunctionType *ty = wasm->getFunctionTypeOrNull(decl->name); Name name = fixEmEHSjLjNames(rawName, sig); if (!ty) { // The wasm module takes ownership of the FunctionType if we insert it. // Otherwise it's already in the module and ours is freed. ty = decl.release(); wasm->addFunctionType(ty); } linkerObj->addExternType(name, ty); } bool parseVersionMin() { if (match("watchos_version_min") || match("tvos_version_min") || match("ios_version_min") || match("macosx_version_min")) { s = strchr(s, '\n'); skipWhitespace(); return true; } else return false; } void parseFunction() { if (debug) dump("func"); Name name = getStrToSep(); if (match(" =")) { /* alias = */ getAtSeparated(); mustMatch("@FUNCTION"); return; } mustMatch(":"); Function::DebugLocation debugLocation = { 0, 0, 0 }; bool useDebugLocation = false; auto recordLoc = [&]() { if (debug) dump("loc"); size_t fileId = getInt(); skipWhitespace(); uint32_t row = getInt(); skipWhitespace(); uint32_t column = getInt(); auto iter = fileIndexMap.find(fileId); if (iter == fileIndexMap.end()) { abort_on("idx"); } useDebugLocation = true; debugLocation = { iter->second, row, column }; s = strchr(s, '\n'); }; auto recordLabel = [&]() { if (debug) dump("label"); Name label = getStrToSep(); // TODO: track and create map of labels and their ranges for our AST WASM_UNUSED(label); s = strchr(s, '\n'); }; unsigned nextId = 0; auto getNextId = [&nextId]() { return cashew::IString(std::to_string(nextId++).c_str(), false); }; wasm::Builder builder(*wasm); std::vector params; WasmType resultType = none; std::vector vars; std::map localTypes; // params and result while (1) { if (match(".param")) { while (1) { Name name = getNextId(); WasmType type = getType(); params.emplace_back(name, type); localTypes[name] = type; skipWhitespace(); if (!match(",")) break; } } else if (match(".result")) { resultType = getType(); } else if (match(".indidx")) { int64_t indirectIndex = getInt64(); skipWhitespace(); if (indirectIndex < 0) { abort_on("indidx"); } linkerObj->addIndirectIndex(name, indirectIndex); } else if (match(".local")) { while (1) { Name name = getNextId(); WasmType type = getType(); vars.emplace_back(name, type); localTypes[name] = type; skipWhitespace(); if (!match(",")) break; } } else if (match(".file")) { parseFile(); skipWhitespace(); } else if (match(".loc")) { recordLoc(); skipWhitespace(); } else if (peek(".Lfunc_begin")) { recordLabel(); skipWhitespace(); } else break; } Function* func = builder.makeFunction(name, std::move(params), resultType, std::move(vars)); // parse body func->body = allocator->alloc(); std::vector bstack; auto addToBlock = [&](Expression* curr) { if (useDebugLocation) { func->debugLocations[curr] = debugLocation; } Expression* last = bstack.back(); if (last->is()) { last = last->cast()->body; } last->cast()->list.push_back(curr); }; bstack.push_back(func->body); std::vector estack; auto push = [&](Expression* curr) { //std::cerr << "push " << curr << '\n'; estack.push_back(curr); }; auto pop = [&]() { assert(!estack.empty()); Expression* ret = estack.back(); assert(ret); estack.pop_back(); //std::cerr << "pop " << ret << '\n'; return ret; }; auto getNumInputs = [&]() { int ret = 1; const char *t = s; while (*t != '\n') { if (*t == ',') ret++; t++; } return ret; }; auto getInputs = [&](int num) { // we may have $pop, $0, $pop, $1 etc., which are getlocals // interleaved with stack pops, and the stack pops must be done in // *reverse* order, i.e., that input should turn into // lastpop, getlocal(0), firstpop, getlocal(1) std::vector inputs; // TODO: optimize (if .s format doesn't change) inputs.resize(num); for (int i = 0; i < num; i++) { if (match("$pop")) { skipToSep(); inputs[i] = nullptr; } else if (*s == '$') { s++; auto curr = allocator->alloc(); curr->index = func->getLocalIndex(getStrToSep()); curr->type = func->getLocalType(curr->index); inputs[i] = curr; } else { abort_on("bad input register"); } if (*s == ')') s++; // tolerate 0(argument) syntax, where we started at the 'a' if (*s == ':') { // tolerate :attribute=value syntax (see getAttributes) s++; skipToSep(); } if (i < num - 1) skipComma(); } for (int i = num-1; i >= 0; i--) { if (inputs[i] == nullptr) inputs[i] = pop(); } return inputs; }; auto getInput = [&]() { return getInputs(1)[0]; }; auto setOutput = [&](Expression* curr, Name assign) { if (assign.isNull() || assign.str[0] == 'd') { // drop auto* add = curr; if (isConcreteWasmType(curr->type)) { add = builder.makeDrop(curr); } addToBlock(add); } else if (assign.str[0] == 'p') { // push push(curr); } else { // set to a local auto set = allocator->alloc(); set->index = func->getLocalIndex(assign); set->value = curr; set->type = curr->type; set->setTee(false); addToBlock(set); } }; auto getAttributes = [&](int num) { const char *before = s; std::vector attributes; // TODO: optimize (if .s format doesn't change) attributes.resize(num); for (int i = 0; i < num; i++) { skipToSep(); if (*s == ')') s++; // tolerate 0(argument) syntax, where we started at the 'a' if (*s == ':') { attributes[i] = s + 1; } else { attributes[i] = nullptr; } if (i < num - 1) skipComma(); } s = before; return attributes; }; // auto makeBinary = [&](BinaryOp op, WasmType type) { Name assign = getAssign(); skipComma(); auto curr = allocator->alloc(); curr->op = op; auto inputs = getInputs(2); curr->left = inputs[0]; curr->right = inputs[1]; curr->finalize(); assert(curr->type == type); setOutput(curr, assign); }; auto makeUnary = [&](UnaryOp op, WasmType type) { Name assign = getAssign(); skipComma(); auto curr = allocator->alloc(); curr->op = op; curr->value = getInput(); curr->type = type; curr->finalize(); setOutput(curr, assign); }; auto makeHost = [&](HostOp op) { Name assign = getAssign(); auto curr = allocator->alloc(); curr->op = op; curr->finalize(); setOutput(curr, assign); }; auto makeHost1 = [&](HostOp op) { Name assign = getAssign(); auto curr = allocator->alloc(); curr->op = op; curr->operands.push_back(getInput()); curr->finalize(); setOutput(curr, assign); }; auto useRelocationExpression = [&](Expression *expr, Expression *reloc) { if (!reloc) { return expr; } // Optimization: if the given expr is (i32.const 0), ignore it if (expr->_id == Expression::ConstId && ((Const*)expr)->value.getInteger() == 0) { return reloc; } // Otherwise, need to add relocation expr to given expr auto add = allocator->alloc(); add->type = i32; add->op = AddInt32; add->left = expr; add->right = reloc; return (Expression*)add; }; auto makeLoad = [&](WasmType type) { skipComma(); auto curr = allocator->alloc(); curr->isAtomic = false; curr->type = type; int32_t bytes = getInt() / CHAR_BIT; curr->bytes = bytes > 0 ? bytes : getWasmTypeSize(type); curr->signed_ = match("_s"); match("_u"); Name assign = getAssign(); auto relocation = getRelocatableExpression(&curr->offset.addr); mustMatch("("); auto attributes = getAttributes(1); curr->ptr = useRelocationExpression(getInput(), relocation); curr->align = curr->bytes; if (attributes[0]) { assert(strncmp(attributes[0], "p2align=", 8) == 0); curr->align = 1U << getInt(attributes[0] + 8); } setOutput(curr, assign); }; auto makeStore = [&](WasmType type) { auto curr = allocator->alloc(); curr->isAtomic = false; curr->valueType = type; s += strlen("store"); if(!isspace(*s)) { curr->bytes = getInt() / CHAR_BIT; } else { curr->bytes = getWasmTypeSize(type); } skipWhitespace(); auto relocation = getRelocatableExpression(&curr->offset.addr); mustMatch("("); auto attributes = getAttributes(2); auto inputs = getInputs(2); curr->ptr = useRelocationExpression(inputs[0], relocation); curr->align = curr->bytes; if (attributes[0]) { assert(strncmp(attributes[0], "p2align=", 8) == 0); curr->align = 1U << getInt(attributes[0] + 8); } curr->value = inputs[1]; curr->finalize(); addToBlock(curr); }; auto makeSelect = [&](WasmType type) { Name assign = getAssign(); skipComma(); auto curr = allocator->alloc