// // .s to WebAssembly translator. // #include "wasm.h" #include "parsing.h" #include "asm_v_wasm.h" namespace wasm { extern int debug; // wasm::debug is set in main(), typically from an env var cashew::IString EMSCRIPTEN_ASM_CONST("emscripten_asm_const"); // // S2WasmBuilder - parses a .s file into WebAssembly // class S2WasmBuilder { AllocatingModule& wasm; MixedArena& allocator; char *s; public: S2WasmBuilder(AllocatingModule& wasm, char *input) : wasm(wasm), allocator(wasm.allocator) { s = input; scan(); s = input; process(); fix(); } private: // state size_t nextStatic = 1; // location of next static allocation, i.e., the data segment std::map staticAddresses; // name => address typedef std::pair Addressing; std::vector addressings; // we fix these up struct Relocation { uint32_t* data; Name value; int offset; Relocation(uint32_t* data, Name value, int offset) : data(data), value(value), offset(offset) {} }; std::vector relocations; std::set implementedFunctions; std::map addressSegments; // address => segment index // utilities void skipWhitespace() { while (1) { while (*s && isspace(*s)) s++; if (*s != '#') break; while (*s != '\n') s++; } } bool skipComma() { skipWhitespace(); if (*s != ',') return false; s++; skipWhitespace(); return true; } #define abort_on(why) { \ dump(why ":"); \ abort(); \ } // match and skip the pattern, if matched bool match(const char *pattern) { size_t size = strlen(pattern); if (strncmp(s, pattern, size) == 0) { s += size; skipWhitespace(); return true; } return false; } void mustMatch(const char *pattern) { bool matched = match(pattern); if (!matched) { std::cerr << "<< " << pattern << " >>\n"; abort_on("bad mustMatch"); } } void dump(const char *text) { std::cerr << "[[" << text << "]]:\n==========\n"; for (size_t i = 0; i < 60; i++) { if (!s[i]) break; std::cerr << s[i]; } std::cerr << "\n==========\n"; } void unget(Name str) { s -= strlen(str.str); } Name getStr() { std::string str; // TODO: optimize this and the other get* methods while (*s && !isspace(*s)) { str += *s; s++; } return cashew::IString(str.c_str(), false); } void skipToSep() { while (*s && !isspace(*s) && *s != ',' && *s != '(' && *s != ')' && *s != ':' && *s != '+') { s++; } } Name getStrToSep() { std::string str; while (*s && !isspace(*s) && *s != ',' && *s != '(' && *s != ')' && *s != ':' && *s != '+') { str += *s; s++; } return cashew::IString(str.c_str(), false); } Name getStrToColon() { std::string str; while (*s && !isspace(*s) && *s != ':') { str += *s; s++; } return cashew::IString(str.c_str(), false); } int32_t getInt() { int32_t ret = 0; bool neg = false; if (*s == '-') { neg = true; s++; } while (isdigit(*s)) { ret *= 10; ret += (*s - '0'); s++; } if (neg) ret = -ret; return ret; } void getConst(uint32_t* target) { if (isdigit(*s)) { *target = getInt(); } else { // a global constant, we need to fix it up later Name name = getStrToSep(); int offset = 0; if (*s == '+') { s++; offset = getInt(); } relocations.emplace_back(target, name, offset); } } int64_t getInt64() { int64_t ret = 0; bool neg = false; if (*s == '-') { neg = true; s++; } while (isdigit(*s)) { ret *= 10; ret += (*s - '0'); s++; } if (neg) ret = -ret; return ret; } Name getCommaSeparated() { skipWhitespace(); std::string str; while (*s && *s != ',' && *s != '\n') { str += *s; s++; } skipWhitespace(); return cashew::IString(str.c_str(), false); } Name getAssign() { skipWhitespace(); if (*s != '$') return Name(); std::string str; char *before = s; while (*s && *s != '=' && *s != '\n' && *s != ',') { str += *s; s++; } if (*s != '=') { // not an assign s = before; return Name(); } s++; skipComma(); return cashew::IString(str.c_str(), false); } std::vector getQuoted() { // TODO: support 0 in the middle, etc., use a raw buffer, etc. assert(*s == '"'); s++; std::vector str; while (*s && *s != '\"') { if (s[0] == '\\') { switch (s[1]) { case 'n': str.push_back('\n'); s += 2; continue; case 'r': str.push_back('\r'); s += 2; continue; case 't': str.push_back('\t'); s += 2; continue; case 'f': str.push_back('\f'); s += 2; continue; case 'b': str.push_back('\b'); s += 2; continue; case '\\': str.push_back('\\'); s += 2; continue; case '"': str.push_back('"'); s += 2; continue; default: { if (isdigit(s[1])) { int code = (s[1] - '0')*8*8 + (s[2] - '0')*8 + (s[3] - '0'); str.push_back(char(code)); s += 4; continue; } else abort_on("getQuoted-escape"); } } } str.push_back(*s); s++; } s++; skipWhitespace(); return str; } WasmType getType() { if (match("i32")) return i32; if (match("i64")) return i64; if (match("f32")) return f32; if (match("f64")) return f64; abort_on("getType"); } // processors void scan() { while (*s) { s = strstr(s, "\n .type "); if (!s) break; mustMatch("\n .type "); Name name = getCommaSeparated(); skipComma(); if (!match("@function")) continue; mustMatch(name.str); mustMatch(":"); implementedFunctions.insert(name); } } void process() { while (*s) { skipWhitespace(); if (!*s) break; if (*s != '.') break; s++; if (match("text")) parseText(); else if (match("type")) parseType(); else if (match("weak")) getStr(); // contents are in the type that follows else if (match("imports")) skipImports(); else if (match("data")) {} else if (match("ident")) {} else if (match("section")) s = strchr(s, '\n'); else abort_on("process"); } } void parseText() { while (*s) { skipWhitespace(); if (!*s) break; if (*s != '.') break; s++; if (match("file")) parseFile(); else if (match("globl")) parseGlobl(); else if (match("type")) parseType(); else { s--; break; } } } void parseFile() { assert(*s == '"'); s++; std::string filename; while (*s != '"') { filename += *s; s++; } s++; // TODO: use the filename? } void parseGlobl() { Name name = getStr(); skipWhitespace(); } void parseFunction() { if (debug) dump("func"); Name name = getStrToSep(); mustMatch(":"); unsigned nextId = 0; auto getNextId = [&nextId]() { return cashew::IString(('$' + std::to_string(nextId++)).c_str(), false); }; auto func = allocator.alloc(); func->name = name; std::map localTypes; // params and result while (1) { if (match(".param")) { while (1) { Name name = getNextId(); WasmType type = getType(); func->params.emplace_back(name, type); localTypes[name] = type; skipWhitespace(); if (!match(",")) break; } } else if (match(".result")) { func->result = getType(); } else if (match(".local")) { while (1) { Name name = getNextId(); WasmType type = getType(); func->locals.emplace_back(name, type); localTypes[name] = type; skipWhitespace(); if (!match(",")) break; } } else break; } // parse body func->body = allocator.alloc(); std::vector bstack; bstack.push_back(func->body->dyn_cast()); std::vector estack; auto push = [&](Expression* curr) { //std::cerr << "push " << curr << '\n'; estack.push_back(curr); }; auto pop = [&]() { assert(!estack.empty()); Expression* ret = estack.back(); assert(ret); estack.pop_back(); //std::cerr << "pop " << ret << '\n'; return ret; }; auto getNumInputs = [&]() { int ret = 1; char *t = s; while (*t != '\n') { if (*t == ',') ret++; t++; } return ret; }; auto getInputs = [&](int num) { // we may have $pop, $0, $pop, $1 etc., which are getlocals // interleaved with stack pops, and the stack pops must be done in // *reverse* order, i.e., that input should turn into // lastpop, getlocal(0), firstpop, getlocal(1) std::vector inputs; // TODO: optimize (if .s format doesn't change) inputs.resize(num); for (int i = 0; i < num; i++) { if (match("$pop")) { skipToSep(); inputs[i] = nullptr; } else { auto curr = allocator.alloc(); curr->name = getStrToSep(); curr->type = localTypes[curr->name]; inputs[i] = curr; } if (*s == ')') s++; // tolerate 0(argument) syntax, where we started at the 'a' if (i < num - 1) skipComma(); } for (int i = num-1; i >= 0; i--) { if (inputs[i] == nullptr) inputs[i] = pop(); } return inputs; }; auto getInput = [&]() { return getInputs(1)[0]; }; auto setOutput = [&](Expression* curr, Name assign) { if (assign.isNull() || assign.str[1] == 'd') { // discard bstack.back()->list.push_back(curr); } else if (assign.str[1] == 'p') { // push estack.push_back(curr); } else { // set to a local auto set = allocator.alloc(); set->name = assign; set->value = curr; set->type = curr->type; bstack.back()->list.push_back(set); } }; auto makeBinary = [&](BinaryOp op, WasmType type) { Name assign = getAssign(); skipComma(); auto curr = allocator.alloc(); curr->op = op; auto inputs = getInputs(2); curr->left = inputs[0]; curr->right = inputs[1]; curr->finalize(); assert(curr->type == type); setOutput(curr, assign); }; auto makeUnary = [&](UnaryOp op, WasmType type) { Name assign = getAssign(); skipComma(); auto curr = allocator.alloc(); curr->op = op; curr->value = getInput(); curr->type = type; setOutput(curr, assign); }; auto makeHost = [&](HostOp op) { Name assign = getAssign(); auto curr = allocator.alloc(); curr->op = MemorySize; setOutput(curr, assign); }; auto makeHost1 = [&](HostOp op) { Name assign = getAssign(); auto curr = allocator.alloc(); curr->op = MemorySize; curr->operands.push_back(getInput()); setOutput(curr, assign); }; auto makeLoad = [&](WasmType type) { skipComma(); auto curr = allocator.alloc(); curr->type = type; int32_t bytes = getInt(); curr->bytes = bytes > 0 ? bytes : getWasmTypeSize(type); curr->signed_ = match("_s"); match("_u"); Name assign = getAssign(); getConst(&curr->offset); curr->align = curr->bytes; // XXX mustMatch("("); curr->ptr = getInput(); setOutput(curr, assign); }; auto makeStore = [&](WasmType type) { skipComma(); auto curr = allocator.alloc(); curr->type = type; int32_t bytes = getInt(); curr->bytes = bytes > 0 ? bytes : getWasmTypeSize(type); curr->align = curr->bytes; // XXX Name assign = getAssign(); getConst(&curr->offset); mustMatch("("); auto inputs = getInputs(2); curr->ptr = inputs[0]; curr->value = inputs[1]; setOutput(curr, assign); }; auto makeSelect = [&](WasmType type) { Name assign = getAssign(); skipComma(); auto curr = allocator.alloc