/* * Copyright 2015 WebAssembly Community Group participants * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ // // .s to WebAssembly translator. // #ifndef wasm_s2wasm_h #define wasm_s2wasm_h #include #include "wasm.h" #include "parsing.h" #include "pass.h" #include "asm_v_wasm.h" #include "wasm-builder.h" #include "wasm-linker.h" namespace wasm { // // S2WasmBuilder - parses a .s file into WebAssembly // class S2WasmBuilder { const char* inputStart; const char* s; bool debug; Module* wasm; MixedArena* allocator; LinkerObject* linkerObj; public: S2WasmBuilder(const char* input, bool debug) : inputStart(input), s(input), debug(debug), wasm(nullptr), allocator(nullptr), linkerObj(nullptr) {} void build(LinkerObject *obj, LinkerObject::SymbolInfo* info) { if (!obj->isEmpty()) Fatal() << "Cannot construct an S2WasmBuilder in an non-empty LinkerObject"; if (!info) info = getSymbolInfo(); linkerObj = obj; wasm = &obj->wasm; allocator = &wasm->allocator; s = inputStart; process(); } LinkerObject::SymbolInfo* getSymbolInfo() { auto* info = new LinkerObject::SymbolInfo(); scan(info); return info; } private: // utilities void skipWhitespace() { while (1) { while (*s && isspace(*s)) s++; if (*s != '#') break; while (*s != '\n') s++; } } bool skipComma() { skipWhitespace(); if (*s != ',') return false; s++; skipWhitespace(); return true; } #define abort_on(why) { \ dump(why ":"); \ abort(); \ } // match and skip the pattern, if matched bool match(const char *pattern) { size_t size = strlen(pattern); if (strncmp(s, pattern, size) == 0) { s += size; skipWhitespace(); return true; } return false; } void mustMatch(const char *pattern) { bool matched = match(pattern); if (!matched) { std::cerr << "<< " << pattern << " >>\n"; abort_on("bad mustMatch"); } } void dump(const char *text) { std::cerr << "[[" << text << "]]:\n==========\n"; for (size_t i = 0; i < 60; i++) { if (!s[i]) break; std::cerr << s[i]; } std::cerr << "\n==========\n"; } void unget(Name str) { s -= strlen(str.str); } Name getStr() { std::string str; // TODO: optimize this and the other get* methods while (*s && !isspace(*s)) { str += *s; s++; } return cashew::IString(str.c_str(), false); } void skipToSep() { while (*s && !isspace(*s) && *s != ',' && *s != '(' && *s != ')' && *s != ':' && *s != '+' && *s != '-') { s++; } } Name getStrToSep() { std::string str; while (*s && !isspace(*s) && *s != ',' && *s != '(' && *s != ')' && *s != ':' && *s != '+' && *s != '-' && *s != '=') { str += *s; s++; } return cashew::IString(str.c_str(), false); } Name getStrToColon() { std::string str; while (*s && !isspace(*s) && *s != ':') { str += *s; s++; } return cashew::IString(str.c_str(), false); } // get an int int32_t getInt() { const char* loc = s; uint32_t value = 0; bool neg = false; if (*loc == '-') { neg = true; loc++; } while (isdigit(*loc)) { uint32_t digit = *loc - '0'; if (value > std::numeric_limits::max() / 10) { abort_on("uint32_t overflow"); } value *= 10; if (value > std::numeric_limits::max() - digit) { abort_on("uint32_t overflow"); } value += digit; loc++; } if (neg) { uint32_t positive_int_min = (uint32_t) - (1 + std::numeric_limits::min()) + (uint32_t)1; if (value > positive_int_min) { abort_on("negative int32_t overflow"); } s = loc; return -value; } s = loc; return value; } // get an int from an arbitrary string, with our full error handling int32_t getInt(const char *from) { const char *before = s; s = from; auto ret = getInt(); s = before; return ret; } // gets a constant, which may be a relocation for later. // returns whether this is a relocation bool getConst(uint32_t* target) { if (isdigit(*s) || *s == '-') { *target = getInt(); return false; } else { // a global constant, we need to fix it up later Name name = getStrToSep(); LinkerObject::Relocation::Kind kind = isFunctionName(name) ? LinkerObject::Relocation::kFunction : LinkerObject::Relocation::kData; int offset = 0; if (*s == '+') { s++; offset = getInt(); } else if (*s == '-') { s++; offset = -getInt(); } linkerObj->addRelocation(kind, target, cleanFunction(name), offset); return true; } } int64_t getInt64() { const char* loc = s; uint64_t value = 0; bool neg = false; if (*loc == '-') { neg = true; loc++; } while (isdigit(*loc)) { uint64_t digit = *loc - '0'; if (value > std::numeric_limits::max() / 10) { abort_on("uint64_t overflow"); } value *= 10; if (value > std::numeric_limits::max() - digit) { abort_on("uint64_t overflow"); } value += digit; loc++; } if (neg) { uint64_t positive_int_min = (uint64_t) - (1 + std::numeric_limits::min()) + (uint64_t)1; if (value > positive_int_min) { abort_on("negative int64_t overflow"); } s = loc; return -value; } s = loc; return value; } Name getSeparated(char separator) { skipWhitespace(); std::string str; while (*s && *s != separator && *s != '\n') { str += *s; s++; } skipWhitespace(); return cashew::IString(str.c_str(), false); } Name getCommaSeparated() { return getSeparated(','); } Name getAtSeparated() { return getSeparated('@'); } Name getAssign() { skipWhitespace(); if (*s != '$') return Name(); std::string str; const char *before = s; while (*s && *s != '=' && *s != '\n' && *s != ',') { str += *s; s++; } if (*s != '=') { // not an assign s = before; return Name(); } s++; skipComma(); return cashew::IString(str.c_str(), false); } std::vector getQuoted() { assert(*s == '"'); s++; std::vector str; while (*s && *s != '\"') { if (s[0] == '\\') { switch (s[1]) { case 'n': str.push_back('\n'); s += 2; continue; case 'r': str.push_back('\r'); s += 2; continue; case 't': str.push_back('\t'); s += 2; continue; case 'f': str.push_back('\f'); s += 2; continue; case 'b': str.push_back('\b'); s += 2; continue; case '\\': str.push_back('\\'); s += 2; continue; case '"': str.push_back('"'); s += 2; continue; default: { if (isdigit(s[1])) { int code = (s[1] - '0')*8*8 + (s[2] - '0')*8 + (s[3] - '0'); str.push_back(char(code)); s += 4; continue; } else abort_on("getQuoted-escape"); } } } str.push_back(*s); s++; } s++; skipWhitespace(); return str; } WasmType getType() { if (match("i32")) return i32; if (match("i64")) return i64; if (match("f32")) return f32; if (match("f64")) return f64; abort_on("getType"); } // The LLVM backend emits function names as name@FUNCTION. bool isFunctionName(Name name) { return strstr(name.str, "@FUNCTION"); } // Drop the @ and after it. Name cleanFunction(Name name) { if (!strchr(name.str, '@')) return name; char *temp = strdup(name.str); *strchr(temp, '@') = 0; Name ret = cashew::IString(temp, false); free(temp); return ret; } // processors void scan(LinkerObject::SymbolInfo* info) { s = inputStart; while (*s) { skipWhitespace(); s = strstr(s, ".type"); if (!s) break; mustMatch(".type"); Name name = getCommaSeparated(); skipComma(); if (!match("@function")) continue; if (match(".hidden")) mustMatch(name.str); mustMatch(name.str); if (match(":")) { info->implementedFunctions.insert(name); } else if (match("=")) { Name alias = getAtSeparated(); mustMatch("@FUNCTION"); info->aliasedFunctions.insert({name, alias}); } else { abort_on("unknown directive"); } } } void process() { while (*s) { skipWhitespace(); if (debug) dump("process"); if (!*s) break; if (*s != '.') break; s++; if (match("text")) parseText(); else if (match("type")) parseType(); else if (match("weak") || match("hidden") || match("protected") || match("internal")) getStr(); // contents are in the content that follows else if (match("imports")) skipImports(); else if (match("data")) {} else if (match("ident")) {} else if (match("section")) parseToplevelSection(); else if (match("align") || match("p2align")) s = strchr(s, '\n'); else if (match("Lfunc_end")) { // skip the next line, which has a .size we can ignore s = strstr(s, ".size"); s = strchr(s, '\n'); } else if (match("globl")) parseGlobl(); else abort_on("process"); } } void parseToplevelSection() { auto section = getCommaSeparated(); // Initializers are anything in a section whose name begins with .init_array if (!strncmp(section.c_str(), ".init_array", strlen(".init_array") - 1)) { parseInitializer(); return; } s = strchr(s, '\n'); } void parseInitializer() { // Ignore the rest of the .section line s = strchr(s, '\n'); skipWhitespace(); // The section may start with .p2align if (match(".p2align")) { s = strchr(s, '\n'); skipWhitespace(); } mustMatch(".int32"); do { linkerObj->addInitializerFunction(cleanFunction(getStr())); skipWhitespace(); } while (match(".int32")); } void parseText() { while (*s) { skipWhitespace(); if (!*s) break; if (*s != '.') break; s++; if (parseVersionMin()); else if (match("file")) parseFile(); else if (match("globl")) parseGlobl(); else if (match("type")) parseType(); else { s--; break; } } } void parseFile() { assert(*s == '"'); s++; std::string filename; while (*s != '"') { filename += *s; s++; } s++; // TODO: use the filename? } void parseGlobl() { linkerObj->addGlobal(getStr()); skipWhitespace(); } bool parseVersionMin() { if (match("watchos_version_min") || match("tvos_version_min") || match("ios_version_min") || match("macosx_version_min")) { s = strchr(s, '\n'); skipWhitespace(); return true; } else return false; } void parseFunction() { if (debug) dump("func"); Name name = getStrToSep(); if (match(" =")) { /* alias = */ getAtSeparated(); mustMatch("@FUNCTION"); return; } mustMatch(":"); unsigned nextId = 0; auto getNextId = [&nextId]() { return cashew::IString(('$' + std::to_string(nextId++)).c_str(), false); }; wasm::Builder builder(*wasm); std::vector params; WasmType resultType = none; std::vector vars; std::map localTypes; // params and result while (1) { if (match(".param")) { while (1) { Name name = getNextId(); WasmType type = getType(); params.emplace_back(name, type); localTypes[name] = type; skipWhitespace(); if (!match(",")) break; } } else if (match(".result")) { resultType = getType(); } else if (match(".local")) { while (1) { Name name = getNextId(); WasmType type = getType(); vars.emplace_back(name, type); localTypes[name] = type; skipWhitespace(); if (!match(",")) break; } } else break; } Function* func = builder.makeFunction(name, std::move(params), resultType, std::move(vars)); // parse body func->body = allocator->alloc(); std::vector bstack; auto addToBlock = [&bstack](Expression* curr) { Expression* last = bstack.back(); if (last->is()) { last = last->cast()->body; } last->cast()->list.push_back(curr); last->cast()->finalize(); }; bstack.push_back(func->body); std::vector estack; auto push = [&](Expression* curr) { //std::cerr << "push " << curr << '\n'; estack.push_back(curr); }; auto pop = [&]() { assert(!estack.empty()); Expression* ret = estack.back(); assert(ret); estack.pop_back(); //std::cerr << "pop " << ret << '\n'; return ret; }; auto getNumInputs = [&]() { int ret = 1; const char *t = s; while (*t != '\n') { if (*t == ',') ret++; t++; } return ret; }; auto getInputs = [&](int num) { // we may have $pop, $0, $pop, $1 etc., which are getlocals // interleaved with stack pops, and the stack pops must be done in // *reverse* order, i.e., that input should turn into // lastpop, getlocal(0), firstpop, getlocal(1) std::vector inputs; // TODO: optimize (if .s format doesn't change) inputs.resize(num); for (int i = 0; i < num; i++) { if (match("$pop")) { skipToSep(); inputs[i] = nullptr; } else { auto curr = allocator->alloc(); curr->index = func->getLocalIndex(getStrToSep()); curr->type = func->getLocalType(curr->index); inputs[i] = curr; } if (*s == ')') s++; // tolerate 0(argument) syntax, where we started at the 'a' if (*s == ':') { // tolerate :attribute=value syntax (see getAttributes) s++; skipToSep(); } if (i < num - 1) skipComma(); } for (int i = num-1; i >= 0; i--) { if (inputs[i] == nullptr) inputs[i] = pop(); } return inputs; }; auto getInput = [&]() { return getInputs(1)[0]; }; auto setOutput = [&](Expression* curr, Name assign) { if (assign.isNull() || assign.str[1] == 'd') { // discard addToBlock(curr); } else if (assign.str[1] == 'p') { // push push(curr); } else { // set to a local auto set = allocator->alloc(); set->index = func->getLocalIndex(assign); set->value = curr; set->type = curr->type; addToBlock(set); } }; auto getAttributes = [&](int num) { const char *before = s; std::vector attributes; // TODO: optimize (if .s format doesn't change) attributes.resize(num); for (int i = 0; i < num; i++) { skipToSep(); if (*s == ')') s++; // tolerate 0(argument) syntax, where we started at the 'a' if (*s == ':') { attributes[i] = s + 1; } else { attributes[i] = nullptr; } if (i < num - 1) skipComma(); } s = before; return attributes; }; // auto makeBinary = [&](BinaryOp op, WasmType type) { Name assign = getAssign(); skipComma(); auto curr = allocator->alloc(); curr->op = op; auto inputs = getInputs(2); curr->left = inputs[0]; curr->right = inputs[1]; curr->finalize(); assert(curr->type == type); setOutput(curr, assign); }; auto makeUnary = [&](UnaryOp op, WasmType type) { Name assign = getAssign(); skipComma(); auto curr = allocator->alloc(); curr->op = op; curr->value = getInput(); curr->type = type; setOutput(curr, assign); }; auto makeHost = [&](HostOp op) { Name assign = getAssign(); auto curr = allocator->alloc(); curr->op = op; setOutput(curr, assign); }; auto makeHost1 = [&](HostOp op) { Name assign = getAssign(); auto curr = allocator->alloc(); curr->op = op; curr->operands.push_back(getInput()); setOutput(curr, assign); }; auto makeLoad = [&](WasmType type) { skipComma(); auto curr = allocator->alloc(); curr->type = type; int32_t bytes = getInt() / CHAR_BIT; curr->bytes = bytes > 0 ? bytes : getWasmTypeSize(type); curr->signed_ = match("_s"); match("_u"); Name assign = getAssign(); getConst(&curr->offset); mustMatch("("); auto attributes = getAttributes(1); curr->ptr = getInput(); curr->align = curr->bytes; if (attributes[0]) { assert(strncmp(attributes[0], "p2align=", 8) == 0); curr->align = 1U << getInt(attributes[0] + 8); } setOutput(curr, assign); }; auto makeStore = [&](WasmType type) { skipComma(); auto curr = allocator->alloc(); curr->type = type; int32_t bytes = getInt() / CHAR_BIT; curr->bytes = bytes > 0 ? bytes : getWasmTypeSize(type); Name assign = getAssign(); getConst(&curr->offset); mustMatch("("); auto attributes = getAttributes(2); auto inputs = getInputs(2); curr->ptr = inputs[0]; curr->align = curr->bytes; if (attributes[0]) { assert(strncmp(attributes[0], "p2align=", 8) == 0); curr->align = 1U << getInt(attributes[0] + 8); } curr->value = inputs[1]; setOutput(curr, assign); }; auto makeSelect = [&](WasmType type) { Name assign = getAssign(); skipComma(); auto curr = allocator->alloc