// // .s to WebAssembly translator. // #include "wasm.h" #include "parsing.h" #include "asm_v_wasm.h" namespace wasm { extern int debug; // wasm::debug is set in main(), typically from an env var // // S2WasmBuilder - parses a .s file into WebAssembly // class S2WasmBuilder { AllocatingModule& wasm; MixedArena& allocator; char *s; public: S2WasmBuilder(AllocatingModule& wasm, char *s) : wasm(wasm), allocator(wasm.allocator), s(s) { process(); fix(); } private: // state size_t nextStatic = 0; // location of next static allocation, i.e., the data segment std::map staticAddresses; // name => address typedef std::pair Addressing; std::vector addressings; // we fix these up // utilities void skipWhitespace() { while (1) { while (*s && isspace(*s)) s++; if (*s != '#') break; while (*s != '\n') s++; } } bool skipComma() { skipWhitespace(); if (*s != ',') return false; s++; skipWhitespace(); return true; } // match and skip the pattern, if matched bool match(const char *pattern) { size_t size = strlen(pattern); if (strncmp(s, pattern, size) == 0) { s += size; skipWhitespace(); return true; } return false; } void mustMatch(const char *pattern) { bool matched = match(pattern); assert(matched); } void dump(const char *text) { std::cerr << "[[" << text << "]]:\n==========\n"; for (size_t i = 0; i < 60; i++) { if (!s[i]) break; std::cerr << s[i]; } std::cerr << "\n==========\n"; } #define abort_on(why) { \ dump(why ":"); \ abort(); \ } void unget(Name str) { s -= strlen(str.str); } Name getStr() { std::string str; // TODO: optimize this and the other get* methods while (*s && !isspace(*s)) { str += *s; s++; } return cashew::IString(str.c_str(), false); } Name getStrToSep() { std::string str; while (*s && !isspace(*s) && *s != ',' && *s != ')') { str += *s; s++; } return cashew::IString(str.c_str(), false); } Name getStrToColon() { std::string str; while (*s && !isspace(*s) && *s != ':') { str += *s; s++; } return cashew::IString(str.c_str(), false); } int32_t getInt() { int32_t ret = 0; while (isdigit(*s)) { ret *= 10; ret += (*s - '0'); s++; } return ret; } Name getCommaSeparated() { skipWhitespace(); std::string str; while (*s && *s != ',' && *s != '\n') { str += *s; s++; } skipWhitespace(); return cashew::IString(str.c_str(), false); } Name getAssign() { skipWhitespace(); if (*s != '$') return Name(); std::string str; char *before = s; while (*s && *s != '=' && *s != '\n' && *s != ',') { str += *s; s++; } if (*s != '=') { // not an assign s = before; return Name(); } s++; skipComma(); return cashew::IString(str.c_str(), false); } Name getQuoted() { // TODO: support 0 in the middle, etc., use a raw buffer, etc. assert(*s == '"'); s++; std::string str; while (*s && *s != '\"') { str += *s; s++; } s++; skipWhitespace(); return cashew::IString(str.c_str(), false); } WasmType getType() { if (match("i32")) return i32; if (match("i64")) return i64; if (match("f32")) return f32; if (match("f64")) return f64; abort_on("getType"); } // processors void process() { while (*s) { skipWhitespace(); if (!*s) break; if (*s != '.') break; s++; if (match("text")) parseText(); else if (match("type")) parseType(); else if (match("imports")) skipImports(); else abort_on("process"); } } void parseText() { while (*s) { skipWhitespace(); if (!*s) break; if (*s != '.') break; s++; if (match("file")) parseFile(); else if (match("globl")) parseGlobl(); else { s--; break; } } } void parseFile() { assert(*s == '"'); s++; std::string filename; while (*s != '"') { filename += *s; s++; } s++; // TODO: use the filename? } void parseGlobl() { unsigned nextId = 0; auto getNextId = [&nextId]() { return cashew::IString(('$' + std::to_string(nextId++)).c_str(), false); }; if (debug) dump("func"); Name name = getStr(); skipWhitespace(); mustMatch(".type"); mustMatch(name.str); mustMatch(",@function"); mustMatch(name.str); mustMatch(":"); auto func = allocator.alloc(); func->name = name; std::map localTypes; // params and result while (1) { if (match(".param")) { while (1) { Name name = getNextId(); WasmType type = getType(); func->params.emplace_back(name, type); localTypes[name] = type; skipWhitespace(); if (!match(",")) break; } } else if (match(".result")) { func->result = getType(); } else if (match(".local")) { Name name = getNextId(); WasmType type = getType(); func->locals.emplace_back(name, type); localTypes[name] = type; skipWhitespace(); } else break; } // parse body func->body = allocator.alloc(); std::vector bstack; bstack.push_back(func->body->dyn_cast()); std::vector estack; auto push = [&](Expression* curr) { //std::cerr << "push " << curr << '\n'; estack.push_back(curr); }; auto pop = [&]() { assert(!estack.empty()); Expression* ret = estack.back(); assert(ret); estack.pop_back(); //std::cerr << "pop " << ret << '\n'; return ret; }; auto getInput = [&]() { //dump("getinput"); if (match("$pop")) { while (isdigit(*s)) s++; return pop(); } else { auto curr = allocator.alloc(); curr->name = getStrToSep(); curr->type = localTypes[curr->name]; return (Expression*)curr; } }; auto setOutput = [&](Expression* curr, Name assign) { if (assign.isNull() || assign.str[1] == 'd') { // discard bstack.back()->list.push_back(curr); } else if (assign.str[1] == 'p') { // push estack.push_back(curr); } else { // set to a local auto set = allocator.alloc(); set->name = assign; set->value = curr; set->type = curr->type; bstack.back()->list.push_back(set); } }; auto makeBinary = [&](BinaryOp op, WasmType type) { Name assign = getAssign(); skipComma(); auto curr = allocator.alloc(); curr->op = op; curr->right = getInput(); skipComma(); curr->left = getInput(); curr->finalize(); assert(curr->type == type); setOutput(curr, assign); }; auto makeLoad = [&](WasmType type) { Name assign = getAssign(); skipComma(); auto curr = allocator.alloc(); curr->type = type; switch (type) { case i32: { curr->bytes = 4; curr->signed_ = false; // XXX } case i64: { curr->bytes = 8; curr->signed_ = false; // XXX } case f32: { curr->bytes = 4; } case f64: { curr->bytes = 8; } } curr->offset = getInt(); curr->align = curr->bytes; // XXX mustMatch("("); curr->ptr = getInput(); mustMatch(")"); setOutput(curr, assign); }; auto makeStore = [&](WasmType type) { Name assign = getAssign(); skipComma(); auto curr = allocator.alloc(); curr->type = type; switch (type) { case i32: { curr->bytes = 4; } case i64: { curr->bytes = 8; } case f32: { curr->bytes = 4; } case f64: { curr->bytes = 8; } } curr->offset = getInt(); curr->align = curr->bytes; // XXX mustMatch("("); curr->ptr = getInput(); mustMatch(")"); skipComma(); curr->value = getInput(); setOutput(curr, assign); }; auto handleTyped = [&](WasmType type) { switch (*s) { case 'a': { if (match("add")) makeBinary(BinaryOp::Add, type); else if (match("and")) makeBinary(BinaryOp::And, type); else abort_on("i32.a"); break; } case 'c': { if (match("const")) { Name assign = getAssign(); if (*s == '.') { // global address auto curr = allocator.alloc(); curr->type = i32; addressings.emplace_back(curr, getStr()); setOutput(curr, assign); } else { // constant setOutput(parseConst(getStr(), type, allocator), assign); } } else abort_on("i32.c"); break; } case 'e': { if (match("eq")) makeBinary(BinaryOp::Eq, i32); break; } case 'g': { if (match("gt_s")) makeBinary(BinaryOp::GtS, i32); else if (match("gt_u")) makeBinary(BinaryOp::GtU, i32); else if (match("ge_s")) makeBinary(BinaryOp::GeS, i32); else if (match("ge_u")) makeBinary(BinaryOp::GeU, i32); else if (match("gt")) makeBinary(BinaryOp::Gt, i32); else if (match("ge")) makeBinary(BinaryOp::Ge, i32); else abort_on("i32.g"); break; } case 'l': { if (match("lt_s")) makeBinary(BinaryOp::LtS, i32); else if (match("lt_u")) makeBinary(BinaryOp::LtU, i32); else if (match("le_s")) makeBinary(BinaryOp::LeS, i32); else if (match("le_u")) makeBinary(BinaryOp::LeU, i32); else if (match("load")) makeLoad(type); else if (match("lt")) makeBinary(BinaryOp::Lt, i32); else if (match("le")) makeBinary(BinaryOp::Le, i32); else abort_on("i32.g"); break; } case 'm': { if (match("mul")) makeBinary(BinaryOp::Mul, type); else abort_on("i32.m"); break; } case 'n': { if (match("ne")) makeBinary(BinaryOp::Ne, i32); else abort_on("i32.n"); break; } case 'o': { if (match("or")) makeBinary(BinaryOp::Or, type); else abort_on("i32.n"); break; } case 'r': { if (match("rem_s")) makeBinary(BinaryOp::RemS, type); else if (match("rem_u")) makeBinary(BinaryOp::RemU, type); else abort_on("i32.n"); break; } case 's': { if (match("shr_s")) makeBinary(BinaryOp::ShrS, type); else if (match("shr_u")) makeBinary(BinaryOp::ShrU, type); else if (match("sub")) makeBinary(BinaryOp::Sub, type); else if (match("store")) makeStore(type); else abort_on("i32.s"); break; } case 'x': { if (match("or")) makeBinary(BinaryOp::Xor, type); else abort_on("i32.n"); break; } default: abort_on("i32.?"); } }; // fixups std::vector loopBlocks; // we need to clear their names std::set seenLabels; // if we already used a label, we don't need it in a loop (there is a block above it, with that label) // main loop while (1) { skipWhitespace(); if (debug) dump("main function loop"); if (match("i32.")) { handleTyped(i32); } else if (match("i64.")) { handleTyped(i64); } else if (match("f32.")) { handleTyped(f32); } else if (match("f64.")) { handleTyped(f64); } else if (match("call")) { CallBase* curr; if (match("_import")) { curr = allocator.alloc(); } else if (match("_indirect")) { curr = allocator.alloc(); } else { curr = allocator.alloc(); } Name assign = getAssign(); if (curr->is()) { curr->dyn_cast()->target = getCommaSeparated(); } else if (curr->is()) { curr->dyn_cast()->target = getCommaSeparated(); } else { curr->dyn_cast()->target = getInput(); } while (1) { if (!skipComma()) break; curr->operands.push_back(getInput()); } std::reverse(curr->operands.begin(), curr->operands.end()); setOutput(curr, assign); if (curr->is()) { auto call = curr->dyn_cast(); auto typeName = cashew::IString((std::string("FUNCSIG_") + getSig(call)).c_str(), false); if (wasm.functionTypesMap.count(typeName) == 0) { auto type = allocator.alloc(); type->name = typeName; // TODO type->result for (auto operand : call->operands) { type->params.push_back(operand->type); } wasm.addFunctionType(type); call->fullType = type; } else { call->fullType = wasm.functionTypesMap[typeName]; } } } else if (match("block")) { auto curr = allocator.alloc(); curr->name = getStr(); bstack.back()->list.push_back(curr); bstack.push_back(curr); seenLabels.insert(curr->name); } else if (match("BB")) { s -= 2; Name name = getStrToColon(); s++; skipWhitespace(); // pop all blocks/loops that reach this target // pop all targets with this label while (!bstack.empty()) { auto curr = bstack.back(); if (curr->name == name) { bstack.pop_back(); continue; } break; } // this may also be a loop beginning if (*s == 'l') { auto curr = allocator.alloc(); bstack.back()->list.push_back(curr); curr->in = name; mustMatch("loop"); Name out = getStr(); if (seenLabels.count(out) == 0) { curr->out = out; } auto block = allocator.alloc(); block->name = out; // temporary, fake curr->body = block; loopBlocks.push_back(block); bstack.push_back(block); } } else if (match("br")) { auto curr = allocator.alloc(); if (*s == '_') { mustMatch("_if"); curr->condition = getInput(); skipComma(); } curr->name = getStr(); bstack.back()->list.push_back(curr); } else if (match("return")) { Block *temp; if (!(func->body && (temp = func->body->dyn_cast()) && temp->name == FAKE_RETURN)) { Expression* old = func->body; temp = allocator.alloc(); temp->name = FAKE_RETURN; if (old) temp->list.push_back(old); func->body = temp; } auto curr = allocator.alloc(); curr->name = FAKE_RETURN; if (*s == '$') { curr->value = getInput(); } bstack.back()->list.push_back(curr); } else if (match("unreachable")) { bstack.back()->list.push_back(allocator.alloc()); } else if (match("func_end")) { s = strchr(s, '\n'); s++; s = strchr(s, '\n'); break; // the function is done } else { abort_on("function element"); } } // finishing touches bstack.pop_back(); // remove the base block for the function body assert(bstack.empty()); assert(estack.empty()); for (auto block : loopBlocks) { block->name = Name(); } wasm.addFunction(func); } void parseType() { Name name = getStrToSep(); skipComma(); mustMatch("@object"); mustMatch(".data"); mustMatch(name.str); mustMatch(":"); mustMatch(".asciz"); Name buffer = getQuoted(); mustMatch(".size"); mustMatch(name.str); mustMatch(","); size_t size = atoi(getStr().str); // TODO: optimize assert(strlen(buffer.str) == size); const int ALIGN = 16; if (nextStatic == 0) nextStatic = ALIGN; // assign the address, add to memory, and increment for the next one staticAddresses[name] = nextStatic; wasm.memory.segments.emplace_back(nextStatic, buffer.str, size); nextStatic += size; nextStatic = (nextStatic + ALIGN - 1) & -ALIGN; } void skipImports() { while (1) { if (match(".import")) { s = strchr(s, '\n'); skipWhitespace(); continue; } break; } } void fix() { for (auto& pair : addressings) { Const* curr = pair.first; Name name = pair.second; curr->value = Literal(staticAddresses[name]); assert(curr->value.i32 > 0); curr->type = i32; } } }; } // namespace wasm