diff options
Diffstat (limited to 'src')
33 files changed, 1900 insertions, 1117 deletions
diff --git a/src/asm2wasm-main.cpp b/src/asm2wasm-main.cpp index aa914c168..56e8a417f 100644 --- a/src/asm2wasm-main.cpp +++ b/src/asm2wasm-main.cpp @@ -29,6 +29,9 @@ using namespace cashew; using namespace wasm; int main(int argc, const char *argv[]) { + bool opts = true; + bool imprecise = false; + Options options("asm2wasm", "Translate asm.js files to .wast files"); options .add("--output", "-o", "Output file (stdout if not specified)", @@ -41,6 +44,18 @@ int main(int argc, const char *argv[]) { [](Options *o, const std::string &argument) { o->extra["mapped globals"] = argument; }) + .add("--total-memory", "-m", "Total memory size", Options::Arguments::One, + [](Options *o, const std::string &argument) { + o->extra["total memory"] = argument; + }) + .add("--no-opts", "-n", "Disable optimization passes", Options::Arguments::Zero, + [&opts](Options *o, const std::string &) { + opts = false; + }) + .add("--imprecise", "-i", "Imprecise optimizations", Options::Arguments::Zero, + [&imprecise](Options *o, const std::string &) { + imprecise = true; + }) .add_positional("INFILE", Options::Arguments::One, [](Options *o, const std::string &argument) { o->extra["infile"] = argument; @@ -51,6 +66,15 @@ int main(int argc, const char *argv[]) { const char *mappedGlobals = mg_it == options.extra.end() ? nullptr : mg_it->second.c_str(); + const auto &tm_it = options.extra.find("total memory"); + size_t totalMemory = + tm_it == options.extra.end() ? 16 * 1024 * 1024 : atoi(tm_it->second.c_str()); + if (totalMemory & ~Memory::kPageMask) { + std::cerr << "Error: total memory size " << totalMemory << + " is not a multiple of the 64k wasm page size\n"; + exit(EXIT_FAILURE); + } + Asm2WasmPreProcessor pre; auto input( read_file<std::vector<char>>(options.extra["infile"], options.debug)); @@ -67,13 +91,14 @@ int main(int argc, const char *argv[]) { if (options.debug) std::cerr << "wasming..." << std::endl; AllocatingModule wasm; - wasm.memory.initial = wasm.memory.max = - 16 * 1024 * 1024; // we would normally receive this from the compiler - Asm2WasmBuilder asm2wasm(wasm, pre.memoryGrowth, options.debug); + wasm.memory.initial = wasm.memory.max = totalMemory / Memory::kPageSize; + Asm2WasmBuilder asm2wasm(wasm, pre.memoryGrowth, options.debug, imprecise); asm2wasm.processAsm(asmjs); - if (options.debug) std::cerr << "optimizing..." << std::endl; - asm2wasm.optimize(); + if (opts) { + if (options.debug) std::cerr << "optimizing..." << std::endl; + asm2wasm.optimize(); + } if (options.debug) std::cerr << "printing..." << std::endl; Output output(options.extra["output"], options.debug); diff --git a/src/asm2wasm.h b/src/asm2wasm.h index 353f80413..4c37b7705 100644 --- a/src/asm2wasm.h +++ b/src/asm2wasm.h @@ -28,6 +28,7 @@ #include "shared-constants.h" #include "asm_v_wasm.h" #include "pass.h" +#include "ast_utils.h" namespace wasm { @@ -149,7 +150,8 @@ class Asm2WasmBuilder { std::map<CallIndirect*, IString> callIndirects; // track these, as we need to fix them after we know the functionTableStarts. this maps call => its function table bool memoryGrowth; - int debug; + bool debug; + bool imprecise; public: std::map<IString, MappedGlobal> mappedGlobals; @@ -204,8 +206,9 @@ private: // uses, in the first pass std::map<IString, FunctionType> importedFunctionTypes; + std::map<IString, std::vector<CallImport*>> importedFunctionCalls; - void noteImportedFunctionCall(Ref ast, WasmType resultType, AsmData *asmData) { + void noteImportedFunctionCall(Ref ast, WasmType resultType, AsmData *asmData, CallImport* call) { assert(ast[0] == CALL && ast[1][0] == NAME); IString importName = ast[1][1]->getIString(); FunctionType type; @@ -242,6 +245,7 @@ private: } else { importedFunctionTypes[importName] = type; } + importedFunctionCalls[importName].push_back(call); } FunctionType* getFunctionType(Ref parent, ExpressionList& operands) { @@ -251,13 +255,14 @@ private: } public: - Asm2WasmBuilder(AllocatingModule& wasm, bool memoryGrowth, int debug) + Asm2WasmBuilder(AllocatingModule& wasm, bool memoryGrowth, bool debug, bool imprecise) : wasm(wasm), allocator(wasm.allocator), nextGlobal(8), maxGlobal(1000), memoryGrowth(memoryGrowth), - debug(debug) {} + debug(debug), + imprecise(imprecise) {} void processAsm(Ref ast); void optimize(); @@ -414,10 +419,12 @@ private: return nullptr; } + // ensure a nameless block Block* blockify(Expression* expression) { - if (expression->is<Block>()) return expression->dyn_cast<Block>(); + if (expression->is<Block>() && !expression->cast<Block>()->name.is()) return expression->dyn_cast<Block>(); auto ret = allocator.alloc<Block>(); ret->list.push_back(expression); + ret->finalize(); return ret; } @@ -635,11 +642,17 @@ void Asm2WasmBuilder::processAsm(Ref ast) { for (unsigned k = 0; k < contents->size(); k++) { Ref pair = contents[k]; IString key = pair[0]->getIString(); - Ref value = pair[1]; - assert(value[0] == NAME); + assert(pair[1][0] == NAME); + IString value = pair[1][1]->getIString(); + if (key == Name("_emscripten_replace_memory")) { + // asm.js memory growth provides this special non-asm function, which we don't need (we use grow_memory) + assert(wasm.functionsMap.find(value) == wasm.functionsMap.end()); + continue; + } + assert(wasm.functionsMap.find(value) != wasm.functionsMap.end()); auto export_ = allocator.alloc<Export>(); export_->name = key; - export_->value = value[1]->getIString(); + export_->value = value; wasm.addExport(export_); } } @@ -670,6 +683,20 @@ void Asm2WasmBuilder::processAsm(Ref ast) { wasm.removeImport(curr); } + // fill out call_import - add extra params as needed. asm tolerates ffi overloading, wasm does not + for (auto& pair : importedFunctionCalls) { + IString name = pair.first; + auto& list = pair.second; + auto type = importedFunctionTypes[name]; + for (auto* call : list) { + for (size_t i = call->operands.size(); i < type.params.size(); i++) { + auto val = allocator.alloc<Const>(); + val->type = val->value.type = type.params[i]; + call->operands.push_back(val); + } + } + } + // finalize indirect calls for (auto& pair : callIndirects) { @@ -703,6 +730,7 @@ void Asm2WasmBuilder::processAsm(Ref ast) { wasm.addExport(export_); } + wasm.memory.exportName = MEMORY; } Function* Asm2WasmBuilder::processFunction(Ref ast) { @@ -710,10 +738,8 @@ Function* Asm2WasmBuilder::processFunction(Ref ast) { if (debug) { std::cout << "\nfunc: " << ast[1]->getIString().str << '\n'; - if (debug >= 2) { - ast->stringify(std::cout); - std::cout << '\n'; - } + ast->stringify(std::cout); + std::cout << '\n'; } auto function = allocator.alloc<Function>(); @@ -783,7 +809,7 @@ Function* Asm2WasmBuilder::processFunction(Ref ast) { std::function<Expression* (Ref)> process = [&](Ref ast) -> Expression* { AstStackHelper astStackHelper(ast); // TODO: only create one when we need it? - if (debug >= 2) { + if (debug) { std::cout << "at: "; ast->stringify(std::cout); std::cout << '\n'; @@ -992,29 +1018,38 @@ Function* Asm2WasmBuilder::processFunction(Ref ast) { } else if (ast[1] == B_NOT) { // ~, might be ~~ as a coercion or just a not if (ast[2][0] == UNARY_PREFIX && ast[2][1] == B_NOT) { -#if 0 - auto ret = allocator.alloc<Unary>(); - ret->op = TruncSFloat64; // equivalent to U, except for error handling, which asm.js doesn't have anyhow - ret->value = process(ast[2][2]); - ret->type = WasmType::i32; - return ret; -#endif - // WebAssembly traps on float-to-int overflows, but asm.js wouldn't, so we must emulate that - CallImport *ret = allocator.alloc<CallImport>(); - ret->target = F64_TO_INT; - ret->operands.push_back(process(ast[2][2])); - ret->type = i32; - static bool addedImport = false; - if (!addedImport) { - addedImport = true; - auto import = allocator.alloc<Import>(); // f64-to-int = asm2wasm.f64-to-int; - import->name = F64_TO_INT; - import->module = ASM2WASM; - import->base = F64_TO_INT; - import->type = ensureFunctionType("id", &wasm, allocator); - wasm.addImport(import); + if (imprecise) { + auto ret = allocator.alloc<Unary>(); + ret->value = process(ast[2][2]); + ret->op = ret->value->type == f64 ? TruncSFloat64 : TruncSFloat32; // imprecise, because this wasm thing might trap, while asm.js never would + ret->type = WasmType::i32; + return ret; + } else { + // WebAssembly traps on float-to-int overflows, but asm.js wouldn't, so we must emulate that + CallImport *ret = allocator.alloc<CallImport>(); + ret->target = F64_TO_INT; + auto input = process(ast[2][2]); + if (input->type == f32) { + auto conv = allocator.alloc<Unary>(); + conv->op = PromoteFloat32; + conv->value = input; + conv->type = WasmType::f64; + input = conv; + } + ret->operands.push_back(input); + ret->type = i32; + static bool addedImport = false; + if (!addedImport) { + addedImport = true; + auto import = allocator.alloc<Import>(); // f64-to-int = asm2wasm.f64-to-int; + import->name = F64_TO_INT; + import->module = ASM2WASM; + import->base = F64_TO_INT; + import->type = ensureFunctionType("id", &wasm, allocator); + wasm.addImport(import); + } + return ret; } - return ret; } // no bitwise unary not, so do xor with -1 auto ret = allocator.alloc<Binary>(); @@ -1024,13 +1059,10 @@ Function* Asm2WasmBuilder::processFunction(Ref ast) { ret->type = WasmType::i32; return ret; } else if (ast[1] == L_NOT) { - // no logical unary not, so do == 0 - auto ret = allocator.alloc<Binary>(); - ret->op = Eq; - ret->left = process(ast[2]); - ret->right = allocator.alloc<Const>()->set(Literal(0)); - assert(ret->left->type == ret->right->type); - ret->finalize(); + auto ret = allocator.alloc<Unary>(); + ret->op = EqZ; + ret->value = process(ast[2]); + ret->type = i32; return ret; } abort_on("bad unary", ast); @@ -1119,7 +1151,7 @@ Function* Asm2WasmBuilder::processFunction(Ref ast) { select->condition = isNegative; select->type = i32; block->list.push_back(select); - block->type = i32; + block->finalize(); return block; } else if (value->type == f32 || value->type == f64) { auto ret = allocator.alloc<Unary>(); @@ -1148,8 +1180,9 @@ Function* Asm2WasmBuilder::processFunction(Ref ast) { if (wasm.importsMap.find(name) != wasm.importsMap.end()) { Ref parent = astStackHelper.getParent(); WasmType type = !!parent ? detectWasmType(parent, &asmData) : none; - ret = allocator.alloc<CallImport>(); - noteImportedFunctionCall(ast, type, &asmData); + auto specific = allocator.alloc<CallImport>(); + noteImportedFunctionCall(ast, type, &asmData, specific); + ret = specific; } else { ret = allocator.alloc<Call>(); } @@ -1201,6 +1234,7 @@ Function* Asm2WasmBuilder::processFunction(Ref ast) { block = allocator.alloc<Block>(); block->name = name; block->list.push_back(ret); + block->finalize(); ret = block; } } @@ -1243,6 +1277,7 @@ Function* Asm2WasmBuilder::processFunction(Ref ast) { auto body = allocator.alloc<Block>(); body->list.push_back(condition); body->list.push_back(process(ast[2])); + body->finalize(); ret->body = body; } // loops do not automatically loop, add a branch back @@ -1256,8 +1291,7 @@ Function* Asm2WasmBuilder::processFunction(Ref ast) { return ret; } else if (what == DO) { if (ast[1][0] == NUM && ast[1][1]->getNumber() == 0) { - // one-time loop - auto block = allocator.alloc<Block>(); + // one-time loop, unless there is a continue IString stop; if (!parentLabel.isNull()) { stop = getBreakLabelName(parentLabel); @@ -1265,13 +1299,28 @@ Function* Asm2WasmBuilder::processFunction(Ref ast) { } else { stop = getNextId("do-once"); } - block->name = stop; + IString more = getNextId("unlikely-continue"); breakStack.push_back(stop); - continueStack.push_back(IMPOSSIBLE_CONTINUE); - block->list.push_back(process(ast[2])); + continueStack.push_back(more); + auto child = process(ast[2]); continueStack.pop_back(); breakStack.pop_back(); - return block; + // if we never continued, we don't need a loop + BreakSeeker breakSeeker(more); + breakSeeker.walk(child); + if (breakSeeker.found == 0) { + auto block = allocator.alloc<Block>(); + block->list.push_back(child); + block->name = stop; + block->finalize(); + return block; + } else { + auto loop = allocator.alloc<Loop>(); + loop->body = child; + loop->out = stop; + loop->in = more; + return loop; + } } // general do-while loop auto ret = allocator.alloc<Loop>(); @@ -1327,6 +1376,7 @@ Function* Asm2WasmBuilder::processFunction(Ref ast) { body->list.push_back(condition); body->list.push_back(process(fbody)); body->list.push_back(process(finc)); + body->finalize(); ret->body = body; // loops do not automatically loop, add a branch back Block* block = blockify(ret->body); @@ -1340,6 +1390,7 @@ Function* Asm2WasmBuilder::processFunction(Ref ast) { // add an outer block for the init as well outer->list.push_back(process(finit)); outer->list.push_back(ret); + outer->finalize(); return outer; } else if (what == LABEL) { assert(parentLabel.isNull()); @@ -1356,10 +1407,10 @@ Function* Asm2WasmBuilder::processFunction(Ref ast) { auto ret = allocator.alloc<Block>(); ret->list.push_back(process(ast[1])); ret->list.push_back(process(ast[2])); - ret->type = ret->list[1]->type; + ret->finalize(); return ret; } else if (what == SWITCH) { - IString name; + IString name; // for breaking out of the entire switch if (!parentLabel.isNull()) { name = getBreakLabelName(parentLabel); parentLabel = IString(); @@ -1367,10 +1418,11 @@ Function* Asm2WasmBuilder::processFunction(Ref ast) { name = getNextId("switch"); } breakStack.push_back(name); - auto ret = allocator.alloc<Switch>(); - ret->name = name; - ret->value = process(ast[1]); - assert(ret->value->type == i32); + + auto br = allocator.alloc<Switch>(); + br->condition = process(ast[1]); + assert(br->condition->type == i32); + Ref cases = ast[2]; bool seen = false; int min = 0; // the lowest index we see; we will offset to it @@ -1390,18 +1442,23 @@ Function* Asm2WasmBuilder::processFunction(Ref ast) { } Binary* offsetor = allocator.alloc<Binary>(); offsetor->op = BinaryOp::Sub; - offsetor->left = ret->value; + offsetor->left = br->condition; offsetor->right = allocator.alloc<Const>()->set(Literal(min)); offsetor->type = i32; - ret->value = offsetor; + br->condition = offsetor; + + auto top = allocator.alloc<Block>(); + top->list.push_back(br); + top->finalize(); + for (unsigned i = 0; i < cases->size(); i++) { Ref curr = cases[i]; Ref condition = curr[0]; Ref body = curr[1]; - Switch::Case case_; - case_.body = processStatements(body, 0); + auto case_ = processStatements(body, 0); + Name name; if (condition->isNull()) { - case_.name = ret->default_ = getNextId("switch-default"); + name = br->default_ = getNextId("switch-default"); } else { assert(condition[0] == NUM || condition[0] == UNARY_PREFIX); int32_t index = getLiteral(condition).geti32(); @@ -1409,26 +1466,35 @@ Function* Asm2WasmBuilder::processFunction(Ref ast) { index -= min; assert(index >= 0); size_t index_s = index; - case_.name = getNextId("switch-case"); - if (ret->targets.size() <= index_s) { - ret->targets.resize(index_s+1); + name = getNextId("switch-case"); + if (br->targets.size() <= index_s) { + br->targets.resize(index_s+1); } - ret->targets[index_s] = case_.name; + br->targets[index_s] = name; } - ret->cases.push_back(case_); + auto next = allocator.alloc<Block>(); + top->name = name; + next->list.push_back(top); + next->list.push_back(case_); + next->finalize(); + top = next; } + // ensure a default - if (ret->default_.isNull()) { - Switch::Case defaultCase; - defaultCase.name = ret->default_ = getNextId("switch-default"); - defaultCase.body = allocator.alloc<Nop>(); // ok if others fall through to this - ret->cases.push_back(defaultCase); + if (br->default_.isNull()) { + br->default_ = getNextId("switch-default"); } - for (size_t i = 0; i < ret->targets.size(); i++) { - if (ret->targets[i].isNull()) ret->targets[i] = ret->default_; + for (size_t i = 0; i < br->targets.size(); i++) { + if (br->targets[i].isNull()) br->targets[i] = br->default_; } - // finalize + top->name = br->default_; + breakStack.pop_back(); + + // Create a topmost block for breaking out of the entire switch + auto ret = allocator.alloc<Block>(); + ret->name = name; + ret->list.push_back(top); return ret; } abort_on("confusing expression", ast); @@ -1461,6 +1527,7 @@ Function* Asm2WasmBuilder::processFunction(Ref ast) { for (unsigned i = from; i < ast->size(); i++) { block->list.push_back(process(ast[i])); } + block->finalize(); return block; }; // body @@ -1473,51 +1540,6 @@ Function* Asm2WasmBuilder::processFunction(Ref ast) { } void Asm2WasmBuilder::optimize() { - // Optimization passes. Note: no effort is made to free nodes that are no longer held on to. - - struct BlockBreakOptimizer : public WasmWalker<BlockBreakOptimizer> { - void visitBlock(Block *curr) { - // if the block ends in a break on this very block, then just put the value there - Break *last = curr->list[curr->list.size()-1]->dyn_cast<Break>(); - if (last && last->value && last->name == curr->name) { - curr->list[curr->list.size()-1] = last->value; - } - if (curr->list.size() > 1) return; // no hope to remove the block - // just one element; maybe we can return just the element - if (curr->name.isNull()) { - replaceCurrent(curr->list[0]); // cannot break into it - return; - } - // we might be broken to, but maybe there isn't a break (and we may have removed it, leading to this) - - struct BreakSeeker : public WasmWalker<BreakSeeker> { - IString target; // look for this one - size_t found; - - BreakSeeker(IString target) : target(target), found(false) {} - - void visitBreak(Break *curr) { - if (curr->name == target) found++; - } - }; - - // look for any breaks to this block - BreakSeeker breakSeeker(curr->name); - Expression *child = curr->list[0]; - breakSeeker.walk(child); - if (breakSeeker.found == 0) { - replaceCurrent(child); // no breaks to here, so eliminate the block - } - } - }; - - BlockBreakOptimizer blockBreakOptimizer; - for (auto pair : wasm.functionsMap) { - blockBreakOptimizer.startWalk(pair.second); - } - - // Standard passes - PassRunner passRunner(&allocator); passRunner.add("remove-unused-brs"); passRunner.add("remove-unused-names"); diff --git a/src/ast_utils.h b/src/ast_utils.h new file mode 100644 index 000000000..df2ffe578 --- /dev/null +++ b/src/ast_utils.h @@ -0,0 +1,43 @@ +/* + * Copyright 2016 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef wasm_ast_utils_h +#define wasm_ast_utils_h + +#include "wasm.h" + +namespace wasm { + +struct BreakSeeker : public WasmWalker<BreakSeeker> { + Name target; // look for this one + size_t found; + + BreakSeeker(Name target) : target(target), found(false) {} + + void visitBreak(Break *curr) { + if (curr->name == target) found++; + } + + static bool has(Expression* tree, Name target) { + BreakSeeker breakSeeker(target); + breakSeeker.walk(tree); + return breakSeeker.found > 0; + } +}; + +} // namespace wasm + +#endif // wasm_ast_utils_h diff --git a/src/binaryen-shell.cpp b/src/binaryen-shell.cpp index 61cab723c..93d4c65ce 100644 --- a/src/binaryen-shell.cpp +++ b/src/binaryen-shell.cpp @@ -66,8 +66,8 @@ struct ShellExternalInterface : ModuleInstance::ExternalInterface { std::vector<char> memory; template <typename T> static bool aligned(const char* address) { - static_assert(!(alignof(T) & (alignof(T) - 1)), "must be a power of 2"); - return 0 == (reinterpret_cast<uintptr_t>(address) & (alignof(T) - 1)); + static_assert(!(sizeof(T) & (sizeof(T) - 1)), "must be a power of 2"); + return 0 == (reinterpret_cast<uintptr_t>(address) & (sizeof(T) - 1)); } Memory(Memory&) = delete; Memory& operator=(const Memory&) = delete; @@ -111,10 +111,10 @@ struct ShellExternalInterface : ModuleInstance::ExternalInterface { ShellExternalInterface() : memory() {} void init(Module& wasm) override { - memory.resize(wasm.memory.initial); + memory.resize(wasm.memory.initial * wasm::Memory::kPageSize); // apply memory segments for (auto segment : wasm.memory.segments) { - assert(segment.offset + segment.size <= wasm.memory.initial); + assert(segment.offset + segment.size <= wasm.memory.initial * wasm::Memory::kPageSize); for (size_t i = 0; i != segment.size; ++i) { memory.set(segment.offset + i, segment.data[i]); } @@ -178,9 +178,9 @@ struct ShellExternalInterface : ModuleInstance::ExternalInterface { } case i64: { switch (store->bytes) { - case 1: memory.set<int8_t>(addr, value.geti64()); break; - case 2: memory.set<int16_t>(addr, value.geti64()); break; - case 4: memory.set<int32_t>(addr, value.geti64()); break; + case 1: memory.set<int8_t>(addr, (int8_t)value.geti64()); break; + case 2: memory.set<int16_t>(addr, (int16_t)value.geti64()); break; + case 4: memory.set<int32_t>(addr, (int32_t)value.geti64()); break; case 8: memory.set<int64_t>(addr, value.geti64()); break; default: abort(); } @@ -248,7 +248,7 @@ static void run_asserts(size_t* i, bool* checked, AllocatingModule* wasm, if (wasm) { interface = new ShellExternalInterface(); instance = new ModuleInstance(*wasm, interface); - if (entry.is() > 0) { + if (entry.is()) { Function* function = wasm->functionsMap[entry]; if (!function) { std::cerr << "Unknown entry " << entry << std::endl; @@ -259,7 +259,7 @@ static void run_asserts(size_t* i, bool* checked, AllocatingModule* wasm, } try { instance->callExport(entry, arguments); - } catch (ExitException& x) { + } catch (ExitException&) { } } } @@ -287,7 +287,7 @@ static void run_asserts(size_t* i, bool* checked, AllocatingModule* wasm, throw ParseException(); }) ); - } catch (const ParseException& e) { + } catch (const ParseException&) { invalid = true; } if (!invalid) { @@ -307,7 +307,7 @@ static void run_asserts(size_t* i, bool* checked, AllocatingModule* wasm, try { Invocation invocation(*curr[1], instance, *builder->get()); result = invocation.invoke(); - } catch (const TrapException& e) { + } catch (const TrapException&) { trapped = true; } if (id == ASSERT_RETURN) { diff --git a/src/compiler-support.h b/src/compiler-support.h index 54dd61bc8..9e298b278 100644 --- a/src/compiler-support.h +++ b/src/compiler-support.h @@ -32,6 +32,7 @@ #elif defined(_MSC_VER) # define WASM_UNREACHABLE() __assume(false) #else +# include <stdlib.h> # define WASM_UNREACHABLE() abort() #endif diff --git a/src/emscripten-optimizer/istring.h b/src/emscripten-optimizer/istring.h index abc1e327b..8e91d044d 100644 --- a/src/emscripten-optimizer/istring.h +++ b/src/emscripten-optimizer/istring.h @@ -72,13 +72,14 @@ struct IString { } else { auto existing = strings->find(s); if (existing == strings->end()) { - char *copy = (char*)malloc(strlen(s)+1); // XXX leaked - strcpy(copy, s); + size_t len = strlen(s) + 1; + char *copy = (char*)malloc(len); // XXX leaked + strncpy(copy, s, len); s = copy; + strings->insert(s); } else { s = *existing; } - strings->insert(s); str = s; } } @@ -147,9 +148,9 @@ class IStringSet : public std::unordered_set<IString> { public: IStringSet() {} IStringSet(const char *init) { // comma-delimited list - int size = strlen(init); - char *curr = new char[size+1]; // leaked! - strcpy(curr, init); + int size = strlen(init) + 1; + char *curr = new char[size]; // leaked! + strncpy(curr, init, size); while (1) { char *end = strchr(curr, ' '); if (end) *end = 0; diff --git a/src/emscripten-optimizer/simple_ast.h b/src/emscripten-optimizer/simple_ast.h index 73037815f..75b7987be 100644 --- a/src/emscripten-optimizer/simple_ast.h +++ b/src/emscripten-optimizer/simple_ast.h @@ -626,7 +626,7 @@ struct JSPrinter { maybeSpace(*s); int len = strlen(s); ensure(len+1); - strcpy(buffer + used, s); + strncpy(buffer + used, s, len+1); used += len; } diff --git a/src/js/wasm.js-post.js b/src/js/wasm.js-post.js index 75a91d4d4..9851fc777 100644 --- a/src/js/wasm.js-post.js +++ b/src/js/wasm.js-post.js @@ -16,17 +16,25 @@ function integrateWasmJS(Module) { // wasm.js has several methods for creating the compiled code module here: - // * 'wasm-s-parser': load s-expression code from a .wast and create wasm - // * 'asm2wasm': load asm.js code and translate to wasm - // * 'just-asm': no wasm, just load the asm.js code and use that (good for testing) + // * 'native-wasm' : use native WebAssembly support in the browser + // * 'interpret-s-expr': load s-expression code from a .wast and interpret + // * 'interpret-binary': load binary wasm and interpret + // * 'interpret-asm2wasm': load asm.js code, translate to wasm, and interpret + // * 'asmjs': no wasm, just load the asm.js code and use that (good for testing) // The method can be set at compile time (BINARYEN_METHOD), or runtime by setting Module['wasmJSMethod']. - var method = Module['wasmJSMethod'] || 'wasm-s-parser'; - assert(method == 'asm2wasm' || method == 'wasm-s-parser' || method == 'just-asm'); + // The method can be a comma-separated list, in which case, we will try the + // options one by one. Some of them can fail gracefully, and then we can try + // the next. - if (method == 'just-asm') { - eval(Module['read'](Module['asmjsCodeFile'])); - return; - } + // inputs + + var method = Module['wasmJSMethod'] || {{{ wasmJSMethod }}} || 'native-wasm,interpret-s-expr'; // by default, try native and then .wast + + var wasmTextFile = Module['wasmTextFile'] || {{{ wasmTextFile }}}; + var wasmBinaryFile = Module['wasmBinaryFile'] || {{{ wasmBinaryFile }}}; + var asmjsCodeFile = Module['asmjsCodeFile'] || {{{ asmjsCodeFile }}}; + + // utilities var asm2wasmImports = { // special asm2wasm imports "f64-rem": function(x, y) { @@ -40,48 +48,76 @@ function integrateWasmJS(Module) { }, }; - function flatten(obj) { - var ret = {}; - for (var x in obj) { - for (var y in obj[x]) { - if (ret[y]) Module['printErr']('warning: flatten dupe: ' + y); - if (typeof obj[x][y] === 'function') { - ret[y] = obj[x][y]; - } - } + var info = { + 'global': null, + 'env': null, + 'asm2wasm': asm2wasmImports, + 'parent': Module // Module inside wasm-js.cpp refers to wasm-js.cpp; this allows access to the outside program. + }; + + var exports = null; + + function lookupImport(mod, base) { + var lookup = info; + if (mod.indexOf('.') < 0) { + lookup = (lookup || {})[mod]; + } else { + var parts = mod.split('.'); + lookup = (lookup || {})[parts[0]]; + lookup = (lookup || {})[parts[1]]; } - return ret; + if (base) { + lookup = (lookup || {})[base]; + } + if (lookup === undefined) { + abort('bad lookupImport to (' + mod + ').' + base); + } + return lookup; } function mergeMemory(newBuffer) { // The wasm instance creates its memory. But static init code might have written to - // buffer already, and we must copy it over in a proper merge. + // buffer already, including the mem init file, and we must copy it over in a proper merge. // TODO: avoid this copy, by avoiding such static init writes // TODO: in shorter term, just copy up to the last static init write var oldBuffer = Module['buffer']; - assert(newBuffer.byteLength >= oldBuffer.byteLength, 'we might fail if we allocated more than TOTAL_MEMORY'); - // the wasm module does write out the memory initialization, in range STATIC_BASE..STATIC_BUMP, so avoid that - (new Int8Array(newBuffer).subarray(0, STATIC_BASE)).set(new Int8Array(oldBuffer).subarray(0, STATIC_BASE)); - (new Int8Array(newBuffer).subarray(STATIC_BASE + STATIC_BUMP)).set(new Int8Array(oldBuffer).subarray(STATIC_BASE + STATIC_BUMP)); + if (newBuffer.byteLength < oldBuffer.byteLength) { + Module['printErr']('the new buffer in mergeMemory is smaller than the previous one. in native wasm, we should grow memory here'); + } + var oldView = new Int8Array(oldBuffer); + var newView = new Int8Array(newBuffer); + if ({{{ WASM_BACKEND }}}) { + // memory segments arrived in the wast, do not trample them + oldView.set(newView.subarray(STATIC_BASE, STATIC_BASE + STATIC_BUMP), STATIC_BASE); + } + newView.set(oldView); updateGlobalBuffer(newBuffer); updateGlobalBufferViews(); Module['reallocBuffer'] = function(size) { var old = Module['buffer']; - wasmJS['asmExports']['__growWasmMemory'](size); // tiny wasm method that just does grow_memory + exports['__growWasmMemory'](size); // tiny wasm method that just does grow_memory return Module['buffer'] !== old ? Module['buffer'] : null; // if it was reallocated, it changed }; } + var WasmTypes = { + none: 0, + i32: 1, + i64: 2, + f32: 3, + f64: 4 + }; + // wasm lacks globals, so asm2wasm maps them into locations in memory. that information cannot // be present in the wasm output of asm2wasm, so we store it in a side file. If we load asm2wasm // output, either generated ahead of time or on the client, we need to apply those mapped // globals after loading the module. - function applyMappedGlobals() { - var mappedGlobals = JSON.parse(Module['read'](Module['wasmCodeFile'] + '.mappedGlobals')); + function applyMappedGlobals(globalsFileBase) { + var mappedGlobals = JSON.parse(Module['read'](globalsFileBase + '.mappedGlobals')); for (var name in mappedGlobals) { var global = mappedGlobals[name]; if (!global.import) continue; // non-imports are initialized to zero in the typed array anyhow, so nothing to do here - var value = wasmJS['lookupImport'](global.module, global.base); + var value = lookupImport(global.module, global.base); var address = global.address; switch (global.type) { case WasmTypes.i32: Module['HEAP32'][address >> 2] = value; break; @@ -92,113 +128,171 @@ function integrateWasmJS(Module) { } } - if (typeof WASM === 'object' || typeof wasmEval === 'function') { + function fixImports(imports) { + if (!{{{ WASM_BACKEND }}}) return imports; + var ret = {}; + for (var i in imports) { + var fixed = i; + if (fixed[0] == '_') fixed = fixed.substr(1); + ret[fixed] = imports[i]; + } + return ret; + } + + function getBinary() { + var binary; + if (ENVIRONMENT_IS_WEB || ENVIRONMENT_IS_WORKER) { + binary = Module['wasmBinary']; + assert(binary, "on the web, we need the wasm binary to be preloaded and set on Module['wasmBinary']. emcc.py will do that for you when generating HTML (but not JS)"); + binary = new Uint8Array(binary); + } else { + binary = Module['readBinary'](wasmBinaryFile); + } + return binary; + } + + // do-method functions + + function doJustAsm() { + if (typeof Module['asm'] !== 'function') { + // you can load the .asm.js file before this, to avoid this sync xhr and eval + eval(Module['read'](asmjsCodeFile)); + } + if (typeof Module['asm'] !== 'function') { + // evalling the asm.js file should have set this + Module['printErr']('asm evalling did not set the module properly'); + return false; + } + return true; + } + + function doNativeWasm() { + if (typeof Wasm !== 'object') { + Module['printErr']('no native wasm support detected'); + return false; + } + // Provide an "asm.js function" for the application, called to "link" the asm.js module. We instantiate // the wasm module at that time, and it receives imports and provides exports and so forth, the app // doesn't need to care that it is wasm and not asm. Module['asm'] = function(global, env, providedBuffer) { - // Load the wasm module - var binary = Module['readBinary'](Module['wasmCodeFile']); - // Create an instance of the module using native support in the JS engine. - var importObj = { - "global.Math": global.Math, - "env": env, - "asm2wasm": asm2wasmImports + global = fixImports(global); + env = fixImports(env); + + // Load the wasm module and create an instance of using native support in the JS engine. + info['global'] = { + 'NaN': NaN, + 'Infinity': Infinity }; + info['global.Math'] = global.Math; + info['env'] = env; var instance; - if (typeof WASM === 'object') { - instance = WASM.instantiateModule(binary, flatten(importObj)); - } else if (typeof wasmEval === 'function') { - instance = wasmEval(binary.buffer, importObj); - } else { - throw 'how to wasm?'; - } - mergeMemory(instance.memory); + instance = Wasm.instantiateModule(getBinary(), info); + exports = instance.exports; + mergeMemory(exports.memory); - applyMappedGlobals(); + applyMappedGlobals(wasmBinaryFile); - return instance; + return exports; }; - return; + Module["usingWasm"] = true; + + return true; } - var WasmTypes = { - none: 0, - i32: 1, - i64: 2, - f32: 3, - f64: 4 - }; + function doWasmPolyfill(method) { + if (typeof WasmJS !== 'function') { + Module['printErr']('WasmJS not detected - polyfill not bundled?'); + return false; + } - // Use wasm.js to polyfill and execute code in a wasm interpreter. - var wasmJS = WasmJS({}); + // Use wasm.js to polyfill and execute code in a wasm interpreter. + var wasmJS = WasmJS({}); - // XXX don't be confused. Module here is in the outside program. wasmJS is the inner wasm-js.cpp. - wasmJS['outside'] = Module; // Inside wasm-js.cpp, Module['outside'] reaches the outside module. + // XXX don't be confused. Module here is in the outside program. wasmJS is the inner wasm-js.cpp. + wasmJS['outside'] = Module; // Inside wasm-js.cpp, Module['outside'] reaches the outside module. - // Information for the instance of the module. - var info = wasmJS['info'] = { - global: null, - env: null, - asm2wasm: asm2wasmImports, - parent: Module // Module inside wasm-js.cpp refers to wasm-js.cpp; this allows access to the outside program. - }; + // Information for the instance of the module. + wasmJS['info'] = info; - wasmJS['lookupImport'] = function(mod, base) { - var lookup = info; - if (mod.indexOf('.') < 0) { - lookup = (lookup || {})[mod]; - } else { - var parts = mod.split('.'); - lookup = (lookup || {})[parts[0]]; - lookup = (lookup || {})[parts[1]]; - } - lookup = (lookup || {})[base]; - if (lookup === undefined) { - abort('bad lookupImport to (' + mod + ').' + base); - } - return lookup; - } + wasmJS['lookupImport'] = lookupImport; + + // The asm.js function, called to "link" the asm.js module. At that time, we are provided imports + // and respond with exports, and so forth. + Module['asm'] = function(global, env, providedBuffer) { + global = fixImports(global); + env = fixImports(env); - // The asm.js function, called to "link" the asm.js module. At that time, we are provided imports - // and respond with exports, and so forth. - Module['asm'] = function(global, env, providedBuffer) { - assert(providedBuffer === Module['buffer']); // we should not even need to pass it as a 3rd arg for wasm, but that's the asm.js way. + assert(providedBuffer === Module['buffer']); // we should not even need to pass it as a 3rd arg for wasm, but that's the asm.js way. - info.global = global; - info.env = env; + info.global = global; + info.env = env; - Module['reallocBuffer'] = function(size) { - var old = Module['buffer']; - wasmJS['asmExports']['__growWasmMemory'](size); // tiny wasm method that just does grow_memory - return Module['buffer'] !== old ? Module['buffer'] : null; // if it was reallocated, it changed - }; + wasmJS['providedTotalMemory'] = Module['buffer'].byteLength; - // Prepare to generate wasm, using either asm2wasm or wasm-s-parser - var code = Module['read'](method == 'asm2wasm' ? Module['asmjsCodeFile'] : Module['wasmCodeFile']); - var temp = wasmJS['_malloc'](code.length + 1); - wasmJS['writeAsciiToMemory'](code, temp); - if (method == 'asm2wasm') { - wasmJS['_load_asm2wasm'](temp); - } else { - wasmJS['_load_s_expr2wasm'](temp); - } - wasmJS['_free'](temp); + // Prepare to generate wasm, using either asm2wasm or s-exprs + var code; + if (method === 'interpret-binary') { + code = getBinary(); + } else { + code = Module['read'](method == 'interpret-asm2wasm' ? asmjsCodeFile : wasmTextFile); + } + var temp; + if (method == 'interpret-asm2wasm') { + temp = wasmJS['_malloc'](code.length + 1); + wasmJS['writeAsciiToMemory'](code, temp); + wasmJS['_load_asm2wasm'](temp); + } else if (method === 'interpret-s-expr') { + temp = wasmJS['_malloc'](code.length + 1); + wasmJS['writeAsciiToMemory'](code, temp); + wasmJS['_load_s_expr2wasm'](temp); + } else if (method === 'interpret-binary') { + temp = wasmJS['_malloc'](code.length); + wasmJS['HEAPU8'].set(code, temp); + wasmJS['_load_binary2wasm'](temp, code.length); + } else { + throw 'what? ' + method; + } + wasmJS['_free'](temp); - wasmJS['providedTotalMemory'] = Module['buffer'].byteLength; + wasmJS['_instantiate'](temp); - wasmJS['_instantiate'](temp); + if (Module['newBuffer']) { + mergeMemory(Module['newBuffer']); + Module['newBuffer'] = null; + } - if (Module['newBuffer']) { - mergeMemory(Module['newBuffer']); - Module['newBuffer'] = null; - } + if (method == 'interpret-s-expr') { + applyMappedGlobals(wasmTextFile); + } else if (method == 'interpret-binary') { + applyMappedGlobals(wasmBinaryFile); + } - if (method == 'wasm-s-parser') { - applyMappedGlobals(); - } + exports = wasmJS['asmExports']; - return wasmJS['asmExports']; - }; + return exports; + }; + + return true; + } + + // use the right method + + var methods = method.split(','); + for (var i = 0; i < methods.length; i++) { + var curr = methods[i]; + //Module['printErr']('using wasm/js method: ' + curr); + if (curr === 'native-wasm') { + if (doNativeWasm()) return; + } else if (curr === 'asmjs') { + if (doJustAsm()) return; + } else if (curr === 'interpret-asm2wasm' || curr === 'interpret-s-expr' || curr === 'interpret-binary') { + if (doWasmPolyfill(curr)) return; + } else { + throw 'bad method: ' + curr; + } + } + throw 'no wasm method succeeded'; } + diff --git a/src/pass.h b/src/pass.h index 53b486e41..41ef30b90 100644 --- a/src/pass.h +++ b/src/pass.h @@ -156,6 +156,7 @@ private: // Prints out a module class Printer : public Pass { +protected: std::ostream& o; public: diff --git a/src/passes/LowerCase.cpp b/src/passes/LowerCase.cpp deleted file mode 100644 index f890de411..000000000 --- a/src/passes/LowerCase.cpp +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright 2016 WebAssembly Community Group participants - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// -// Lowers switch cases out into blocks, -// -/* - (tableswitch $switch$0 - (i32.sub - (get_local $x) - (i32.const 1) - ) - (table (case $switch-case$1) (case $switch-case$2)) (case $switch-default$3) - (case $switch-case$1 - (return - (i32.const 1) - ) - ) - (case $switch-case$2 - (return - (i32.const 2) - ) - ) - (case $switch-default$3 - (nop) - ) - ) - - into - - (block $forswitch$switch$0 - (block $switch-case$3 - (block $switch-case$2 - (block $switch-case$1 - (tableswitch $switch$0 - (i32.sub - (get_local $x) - (i32.const 1) - ) - (table (br $switch-case$1) (br $switch-case$2)) (br $switch-default$3) - ) - ) - (return - (i32.const 1) - ) - ) - (return - (i32.const 2) - ) - ) - (nop) - ) -*/ - - -#include <memory> - -#include <wasm.h> -#include <pass.h> - -namespace wasm { - -struct LowerCase : public WalkerPass<WasmWalker<LowerCase, void>> { - MixedArena* allocator; - - void prepare(PassRunner* runner, Module *module) override { - allocator = runner->allocator; - } - - void visitSwitch(Switch *curr) { - if (curr->cases.size() == 0) return; - auto top = allocator->alloc<Block>(); - top->list.push_back(curr); - for (auto& c : curr->cases) { - top->name = c.name; - auto next = allocator->alloc<Block>(); - next->list.push_back(top); - next->list.push_back(c.body); - top = next; - } - curr->cases.clear(); - top->name = curr->name; - curr->name = Name(); - replaceCurrent(top); - } -}; - -static RegisterPass<LowerCase> registerPass("lower-case", "lowers cases into blocks"); - -} // namespace wasm diff --git a/src/passes/LowerIfElse.cpp b/src/passes/LowerIfElse.cpp index fa575d87d..922a294d2 100644 --- a/src/passes/LowerIfElse.cpp +++ b/src/passes/LowerIfElse.cpp @@ -49,7 +49,7 @@ struct LowerIfElse : public WalkerPass<WasmWalker<LowerIfElse, void>> { block->name = name; block->list.push_back(curr); block->list.push_back(curr->ifFalse); - block->type = curr->type; + block->finalize(); curr->ifFalse = nullptr; auto break_ = allocator->alloc<Break>(); break_->name = name; diff --git a/src/passes/LowerInt64.cpp b/src/passes/LowerInt64.cpp index 58e56cba6..7af7443e5 100644 --- a/src/passes/LowerInt64.cpp +++ b/src/passes/LowerInt64.cpp @@ -112,7 +112,7 @@ struct LowerInt64 : public Pass { ret->list.push_back(curr); ret->list.push_back(set); ret->list.push_back(low); // so the block returns the low bits - ret->type = i32; + ret->finalize(); fixes[ret] = high; replaceCurrent(ret); } @@ -132,6 +132,7 @@ struct LowerInt64 : public Pass { set->type = value->type; ret->list.push_back(set); } + ret->finalize(); return ret; } diff --git a/src/passes/NameManager.cpp b/src/passes/NameManager.cpp index 8ed29c478..8519d70eb 100644 --- a/src/passes/NameManager.cpp +++ b/src/passes/NameManager.cpp @@ -44,7 +44,6 @@ void NameManager::visitBreak(Break* curr) { names.insert(curr->name); } void NameManager::visitSwitch(Switch* curr) { - names.insert(curr->name); names.insert(curr->default_); for (auto& target : curr->targets) { names.insert(target); diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp index 3e886074d..77dcb6154 100644 --- a/src/passes/Print.cpp +++ b/src/passes/Print.cpp @@ -26,33 +26,87 @@ namespace wasm { struct PrintSExpression : public WasmVisitor<PrintSExpression, void> { std::ostream& o; unsigned indent; + bool minify; + const char *maybeSpace; + const char *maybeNewLine; - PrintSExpression(std::ostream& o) : o(o), indent(0) {} + PrintSExpression(std::ostream& o, bool minify = false) + : o(o), indent(0), minify(minify) { + maybeSpace = minify ? "" : " "; + maybeNewLine = minify ? "" : "\n"; + } + void incIndent() { + if (minify) return; + o << '\n'; + indent++; + } + void decIndent() { + if (!minify) { + indent--; + doIndent(o, indent); + } + o << ')'; + } void printFullLine(Expression *expression) { - doIndent(o, indent); + !minify && doIndent(o, indent); visit(expression); - o << '\n'; + o << maybeNewLine; } - void visitBlock(Block *curr) { - printOpening(o, "block"); - if (curr->name.is()) { - o << ' ' << curr->name; + // special-case Block, because Block nesting (in their first element) can be incredibly deep + std::vector<Block*> stack; + while (1) { + if (stack.size() > 0) doIndent(o, indent); + stack.push_back(curr); + printOpening(o, "block"); + if (curr->name.is()) { + o << ' ' << curr->name; + } + incIndent(); + if (curr->list.size() > 0 && curr->list[0]->is<Block>()) { + // recurse into the first element + curr = curr->list[0]->cast<Block>(); + continue; + } else { + break; // that's all we can recurse, start to unwind + } } - incIndent(o, indent); - for (auto expression : curr->list) { - printFullLine(expression); + auto* top = stack.back(); + while (stack.size() > 0) { + curr = stack.back(); + stack.pop_back(); + auto& list = curr->list; + for (size_t i = 0; i < list.size(); i++) { + if (curr != top && i == 0) { + // one of the block recursions we already handled + decIndent(); + o << '\n'; + continue; + } + printFullLine(list[i]); + } } - decIndent(o, indent); + decIndent(); } void visitIf(If *curr) { - printOpening(o, curr->ifFalse ? "if_else" : "if"); - incIndent(o, indent); + printOpening(o, "if"); + incIndent(); printFullLine(curr->condition); - printFullLine(curr->ifTrue); - if (curr->ifFalse) printFullLine(curr->ifFalse); - decIndent(o, indent); + // ifTrue and False have implict blocks, avoid printing them if possible + if (curr->ifTrue->is<Block>() && curr->ifTrue->dyn_cast<Block>()->name.isNull() && curr->ifTrue->dyn_cast<Block>()->list.size() == 1) { + printFullLine(curr->ifTrue->dyn_cast<Block>()->list.back()); + } else { + printFullLine(curr->ifTrue); + } + if (curr->ifFalse) { + if (curr->ifFalse->is<Block>() && curr->ifFalse->dyn_cast<Block>()->name.isNull() && curr->ifFalse->dyn_cast<Block>()->list.size() == 1) { + printFullLine(curr->ifFalse->dyn_cast<Block>()->list.back()); + } else { + printFullLine(curr->ifFalse); + } + } + decIndent(); } void visitLoop(Loop *curr) { printOpening(o, "loop"); @@ -63,7 +117,7 @@ struct PrintSExpression : public WasmVisitor<PrintSExpression, void> { if (curr->in.is()) { o << ' ' << curr->in; } - incIndent(o, indent); + incIndent(); auto block = curr->body->dyn_cast<Block>(); if (block && block->name.isNull()) { // wasm spec has loops containing children directly, while our ast @@ -74,12 +128,12 @@ struct PrintSExpression : public WasmVisitor<PrintSExpression, void> { } else { printFullLine(curr->body); } - decIndent(o, indent); + decIndent(); } void visitBreak(Break *curr) { if (curr->condition) { printOpening(o, "br_if ") << curr->name; - incIndent(o, indent); + incIndent(); } else { printOpening(o, "br ") << curr->name; if (!curr->value || curr->value->is<Nop>()) { @@ -87,48 +141,36 @@ struct PrintSExpression : public WasmVisitor<PrintSExpression, void> { o << ")"; return; } - incIndent(o, indent); + incIndent(); } if (curr->value && !curr->value->is<Nop>()) printFullLine(curr->value); if (curr->condition) { printFullLine(curr->condition); } - decIndent(o, indent); + decIndent(); } void visitSwitch(Switch *curr) { - printOpening(o, "tableswitch "); - if (curr->name.is()) o << curr->name; - incIndent(o, indent); - printFullLine(curr->value); - doIndent(o, indent) << "(table"; - std::set<Name> caseNames; - for (auto& c : curr->cases) { - caseNames.insert(c.name); - } + printOpening(o, "br_table"); for (auto& t : curr->targets) { - o << " (" << (caseNames.count(t) == 0 ? "br" : "case") << " " << (t.is() ? t : curr->default_) << ")"; + o << " " << t; } - o << ")"; - if (curr->default_.is()) o << " (" << (caseNames.count(curr->default_) == 0 ? "br" : "case") << " " << curr->default_ << ")"; - o << "\n"; - for (auto& c : curr->cases) { - doIndent(o, indent); - printMinorOpening(o, "case ") << c.name; - incIndent(o, indent); - printFullLine(c.body); - decIndent(o, indent) << '\n'; + o << " " << curr->default_; + incIndent(); + if (curr->value) { + printFullLine(curr->value); } - decIndent(o, indent); + printFullLine(curr->condition); + decIndent(); } void printCallBody(Call* curr) { o << curr->target; if (curr->operands.size() > 0) { - incIndent(o, indent); + incIndent(); for (auto operand : curr->operands) { printFullLine(operand); } - decIndent(o, indent); + decIndent(); } else { o << ')'; } @@ -144,21 +186,21 @@ struct PrintSExpression : public WasmVisitor<PrintSExpression, void> { } void visitCallIndirect(CallIndirect *curr) { printOpening(o, "call_indirect ") << curr->fullType->name; - incIndent(o, indent); + incIndent(); printFullLine(curr->target); for (auto operand : curr->operands) { printFullLine(operand); } - decIndent(o, indent); + decIndent(); } void visitGetLocal(GetLocal *curr) { printOpening(o, "get_local ") << curr->name << ')'; } void visitSetLocal(SetLocal *curr) { printOpening(o, "set_local ") << curr->name; - incIndent(o, indent); + incIndent(); printFullLine(curr->value); - decIndent(o, indent); + decIndent(); } void visitLoad(Load *curr) { o << '('; @@ -182,9 +224,9 @@ struct PrintSExpression : public WasmVisitor<PrintSExpression, void> { if (curr->align != curr->bytes) { o << " align=" << curr->align; } - incIndent(o, indent); + incIndent(); printFullLine(curr->ptr); - decIndent(o, indent); + decIndent(); } void visitStore(Store *curr) { o << '('; @@ -207,21 +249,22 @@ struct PrintSExpression : public WasmVisitor<PrintSExpression, void> { if (curr->align != curr->bytes) { o << " align=" << curr->align; } - incIndent(o, indent); + incIndent(); printFullLine(curr->ptr); printFullLine(curr->value); - decIndent(o, indent); + decIndent(); } void visitConst(Const *curr) { o << curr->value; } void visitUnary(Unary *curr) { o << '('; - prepareColor(o) << printWasmType(curr->type) << '.'; + prepareColor(o) << printWasmType(curr->isRelational() ? curr->value->type : curr->type) << '.'; switch (curr->op) { case Clz: o << "clz"; break; case Ctz: o << "ctz"; break; case Popcnt: o << "popcnt"; break; + case EqZ: o << "eqz"; break; case Neg: o << "neg"; break; case Abs: o << "abs"; break; case Ceil: o << "ceil"; break; @@ -246,9 +289,9 @@ struct PrintSExpression : public WasmVisitor<PrintSExpression, void> { case ReinterpretInt: o << "reinterpret/" << (curr->type == f64 ? "i64" : "i32"); break; default: abort(); } - incIndent(o, indent); + incIndent(); printFullLine(curr->value); - decIndent(o, indent); + decIndent(); } void visitBinary(Binary *curr) { o << '('; @@ -267,6 +310,8 @@ struct PrintSExpression : public WasmVisitor<PrintSExpression, void> { case Shl: o << "shl"; break; case ShrU: o << "shr_u"; break; case ShrS: o << "shr_s"; break; + case RotL: o << "rotl"; break; + case RotR: o << "rotr"; break; case Div: o << "div"; break; case CopySign: o << "copysign"; break; case Min: o << "min"; break; @@ -288,19 +333,19 @@ struct PrintSExpression : public WasmVisitor<PrintSExpression, void> { default: abort(); } restoreNormalColor(o); - incIndent(o, indent); + incIndent(); printFullLine(curr->left); printFullLine(curr->right); - decIndent(o, indent); + decIndent(); } void visitSelect(Select *curr) { o << '('; - prepareColor(o) << printWasmType(curr->type) << ".select"; - incIndent(o, indent); + prepareColor(o) << "select"; + incIndent(); printFullLine(curr->ifTrue); printFullLine(curr->ifFalse); printFullLine(curr->condition); - decIndent(o, indent); + decIndent(); } void visitReturn(Return *curr) { printOpening(o, "return"); @@ -309,9 +354,9 @@ struct PrintSExpression : public WasmVisitor<PrintSExpression, void> { o << ")"; return; } - incIndent(o, indent); + incIndent(); printFullLine(curr->value); - decIndent(o, indent); + decIndent(); } void visitHost(Host *curr) { switch (curr->op) { @@ -319,9 +364,9 @@ struct PrintSExpression : public WasmVisitor<PrintSExpression, void> { case MemorySize: printOpening(o, "memory_size") << ')'; break; case GrowMemory: { printOpening(o, "grow_memory"); - incIndent(o, indent); + incIndent(); printFullLine(curr->operands[0]); - decIndent(o, indent); + decIndent(); break; } case HasFeature: printOpening(o, "hasfeature ") << curr->nameOperand << ')'; break; @@ -340,7 +385,7 @@ struct PrintSExpression : public WasmVisitor<PrintSExpression, void> { printOpening(o, "type") << ' ' << curr->name << " (func"; } if (curr->params.size() > 0) { - o << ' '; + o << maybeSpace; printMinorOpening(o, "param"); for (auto& param : curr->params) { o << ' ' << printWasmType(param); @@ -348,11 +393,11 @@ struct PrintSExpression : public WasmVisitor<PrintSExpression, void> { o << ')'; } if (curr->result != none) { - o << ' '; + o << maybeSpace; printMinorOpening(o, "result ") << printWasmType(curr->result) << ')'; } if (full) { - o << "))";; + o << "))"; } } void visitImport(Import *curr) { @@ -369,22 +414,23 @@ struct PrintSExpression : public WasmVisitor<PrintSExpression, void> { void visitFunction(Function *curr) { printOpening(o, "func ", true) << curr->name; if (curr->type.is()) { - o << " (type " << curr->type << ')'; + o << maybeSpace << "(type " << curr->type << ')'; } if (curr->params.size() > 0) { for (auto& param : curr->params) { - o << ' '; + o << maybeSpace; printMinorOpening(o, "param ") << param.name << ' ' << printWasmType(param.type) << ")"; } } if (curr->result != none) { - o << ' '; + o << maybeSpace; printMinorOpening(o, "result ") << printWasmType(curr->result) << ")"; } - incIndent(o, indent); + incIndent(); for (auto& local : curr->locals) { doIndent(o, indent); - printMinorOpening(o, "local ") << local.name << ' ' << printWasmType(local.type) << ")\n"; + printMinorOpening(o, "local ") << local.name << ' ' << printWasmType(local.type) << ")"; + o << maybeNewLine; } // It is ok to emit a block here, as a function can directly contain a list, even if our // ast avoids that for simplicity. We can just do that optimization here.. @@ -396,7 +442,7 @@ struct PrintSExpression : public WasmVisitor<PrintSExpression, void> { } else { printFullLine(curr->body); } - decIndent(o, indent); + decIndent(); } void visitTable(Table *curr) { printOpening(o, "table"); @@ -407,12 +453,13 @@ struct PrintSExpression : public WasmVisitor<PrintSExpression, void> { } void visitModule(Module *curr) { printOpening(o, "module", true); - incIndent(o, indent); + incIndent(); doIndent(o, indent); printOpening(o, "memory") << " " << curr->memory.initial; if (curr->memory.max && curr->memory.max != (uint32_t)-1) o << " " << curr->memory.max; for (auto segment : curr->memory.segments) { - o << "\n (segment " << segment.offset << " \""; + o << maybeNewLine; + o << (minify ? "" : " ") << "(segment " << segment.offset << " \""; for (size_t i = 0; i < segment.size; i++) { unsigned char c = segment.data[i]; switch (c) { @@ -435,38 +482,46 @@ struct PrintSExpression : public WasmVisitor<PrintSExpression, void> { } o << "\")"; } - o << (curr->memory.segments.size() > 0 ? "\n " : "") << ")\n"; + o << ((curr->memory.segments.size() > 0 && !minify) ? "\n " : "") << ")"; + o << maybeNewLine; + if (curr->memory.exportName.is()) { + doIndent(o, indent); + printOpening(o, "export "); + printText(o, curr->memory.exportName.str) << " memory)"; + o << maybeNewLine; + } if (curr->start.is()) { doIndent(o, indent); - printOpening(o, "start") << " " << curr->start << ")\n"; + printOpening(o, "start") << " " << curr->start << ")"; + o << maybeNewLine; } for (auto& child : curr->functionTypes) { doIndent(o, indent); visitFunctionType(child, true); - o << '\n'; + o << maybeNewLine; } for (auto& child : curr->imports) { doIndent(o, indent); visitImport(child); - o << '\n'; + o << maybeNewLine; } for (auto& child : curr->exports) { doIndent(o, indent); visitExport(child); - o << '\n'; + o << maybeNewLine; } if (curr->table.names.size() > 0) { doIndent(o, indent); visitTable(&curr->table); - o << '\n'; + o << maybeNewLine; } for (auto& child : curr->functions) { doIndent(o, indent); visitFunction(child); - o << '\n'; + o << maybeNewLine; } - decIndent(o, indent); - o << '\n'; + decIndent(); + o << maybeNewLine; } }; @@ -479,10 +534,27 @@ void Printer::run(PassRunner* runner, Module* module) { static RegisterPass<Printer> registerPass("print", "print in s-expression format"); +// Prints out a minified module +class MinifiedPrinter : public Printer { + public: + MinifiedPrinter() : Printer() {} + MinifiedPrinter(std::ostream& o) : Printer(o) {} + + void run(PassRunner* runner, Module* module) override; +}; + +void MinifiedPrinter::run(PassRunner* runner, Module* module) { + PrintSExpression print(o, true); + print.visitModule(module); +} + + +static RegisterPass<MinifiedPrinter> registerMinifyPass("print-minified", "print in minified s-expression format"); + // Print individual expressions -std::ostream& printWasm(Expression* expression, std::ostream& o) { - PrintSExpression print(o); +std::ostream& printWasm(Expression* expression, std::ostream& o, bool minify = false) { + PrintSExpression print(o, minify); print.visit(expression); return o; } diff --git a/src/passes/RemoveUnusedNames.cpp b/src/passes/RemoveUnusedNames.cpp index 6929aa671..a8beaa15e 100644 --- a/src/passes/RemoveUnusedNames.cpp +++ b/src/passes/RemoveUnusedNames.cpp @@ -38,6 +38,13 @@ struct RemoveUnusedNames : public WalkerPass<WasmWalker<RemoveUnusedNames>> { } } + void visitSwitch(Switch *curr) { + for (auto name : curr->targets) { + branchesSeen.insert(name); + } + branchesSeen.insert(curr->default_); + } + void visitFunction(Function *curr) { branchesSeen.clear(); } diff --git a/src/pretty_printing.h b/src/pretty_printing.h index f0a233d69..9dec22ca3 100644 --- a/src/pretty_printing.h +++ b/src/pretty_printing.h @@ -32,18 +32,6 @@ inline std::ostream &doIndent(std::ostream &o, unsigned indent) { return o; } -inline std::ostream &incIndent(std::ostream &o, unsigned& indent) { - o << '\n'; - indent++; - return o; -} - -inline std::ostream &decIndent(std::ostream &o, unsigned& indent) { - indent--; - doIndent(o, indent); - return o << ')'; -} - inline std::ostream &prepareMajorColor(std::ostream &o) { Colors::red(o); Colors::bold(o); diff --git a/src/s2wasm-main.cpp b/src/s2wasm-main.cpp index d7a18f547..40009cbc9 100644 --- a/src/s2wasm-main.cpp +++ b/src/s2wasm-main.cpp @@ -58,6 +58,16 @@ int main(int argc, const char *argv[]) { [](Options *o, const std::string &argument) { o->extra["stack-allocation"] = argument; }) + .add("--initial-memory", "-i", "Initial size of the linear memory", + Options::Arguments::One, + [](Options *o, const std::string &argument) { + o->extra["initial-memory"] = argument; + }) + .add("--max-memory", "-m", "Maximum size of the linear memory", + Options::Arguments::One, + [](Options *o, const std::string &argument) { + o->extra["max-memory"] = argument; + }) .add_positional("INFILE", Options::Arguments::One, [](Options *o, const std::string &argument) { o->extra["infile"] = argument; @@ -68,16 +78,25 @@ int main(int argc, const char *argv[]) { if (options.debug) std::cerr << "Parsing and wasming..." << std::endl; AllocatingModule wasm; - size_t globalBase = options.extra.find("global-base") != options.extra.end() + uint64_t globalBase = options.extra.find("global-base") != options.extra.end() ? std::stoull(options.extra["global-base"]) : 1; - size_t stackAllocation = + uint64_t stackAllocation = options.extra.find("stack-allocation") != options.extra.end() ? std::stoull(options.extra["stack-allocation"]) : 0; + uint64_t initialMem = + options.extra.find("initial-memory") != options.extra.end() + ? std::stoull(options.extra["initial-memory"]) + : 0; + uint64_t maxMem = + options.extra.find("max-memory") != options.extra.end() + ? std::stoull(options.extra["max-memory"]) + : 0; if (options.debug) std::cerr << "Global base " << globalBase << '\n'; S2WasmBuilder s2wasm(wasm, input.c_str(), options.debug, globalBase, - stackAllocation, ignoreUnknownSymbols, startFunction); + stackAllocation, initialMem, maxMem, ignoreUnknownSymbols, + startFunction); if (options.debug) std::cerr << "Emscripten gluing..." << std::endl; std::stringstream meta; diff --git a/src/s2wasm.h b/src/s2wasm.h index 8db41a339..1fa624dd0 100644 --- a/src/s2wasm.h +++ b/src/s2wasm.h @@ -44,10 +44,12 @@ class S2WasmBuilder { bool debug; bool ignoreUnknownSymbols; Name startFunction; + std::vector<Name> globls; public: S2WasmBuilder(AllocatingModule& wasm, const char* input, bool debug, size_t globalBase, size_t stackAllocation, + size_t userInitialMemory, size_t userMaxMemory, bool ignoreUnknownSymbols, Name startFunction) : wasm(wasm), allocator(wasm.allocator), @@ -55,7 +57,20 @@ class S2WasmBuilder { ignoreUnknownSymbols(ignoreUnknownSymbols), startFunction(startFunction), globalBase(globalBase), - nextStatic(globalBase) { + nextStatic(globalBase), + minInitialMemory(0), + userInitialMemory(userInitialMemory), + userMaxMemory(userMaxMemory) { + if (userMaxMemory && userMaxMemory < userInitialMemory) { + Fatal() << "Specified max memory " << userMaxMemory << + " is < specified initial memory " << userInitialMemory; + } + if (roundUpToPageSize(userMaxMemory) != userMaxMemory) { + Fatal() << "Specified max memory " << userMaxMemory << " is not a multiple of 64k"; + } + if (roundUpToPageSize(userInitialMemory) != userInitialMemory) { + Fatal() << "Specified initial memory " << userInitialMemory << " is not a multiple of 64k"; + } s = input; scan(); s = input; @@ -75,6 +90,10 @@ class S2WasmBuilder { size_t globalBase, // where globals can start to be statically allocated, i.e., the data segment nextStatic; // location of next static allocation std::map<Name, int32_t> staticAddresses; // name => address + size_t minInitialMemory; // Minimum initial size (in bytes) of memory. + size_t userInitialMemory; // Initial memory size (in bytes) specified by the user. + size_t userMaxMemory; // Max memory size (in bytes) specified by the user. + //(after linking, this is rounded and set on the wasm object in pages) struct Relocation { uint32_t* data; @@ -93,6 +112,23 @@ class S2WasmBuilder { // utilities + // For fatal errors which could arise from input (i.e. not assertion failures) + class Fatal { + public: + Fatal() { + std::cerr << "Fatal: "; + } + template<typename T> + Fatal &operator<<(T arg) { + std::cerr << arg; + return *this; + } + ~Fatal() { + std::cerr << "\n"; + exit(1); + } + }; + void skipWhitespace() { while (1) { while (*s && isspace(*s)) s++; @@ -396,7 +432,7 @@ class S2WasmBuilder { addressSegments[nextStatic] = wasm.memory.segments.size(); wasm.memory.segments.emplace_back( nextStatic, reinterpret_cast<char*>(raw), pointerSize); - wasm.memory.initial = nextStatic + pointerSize; + minInitialMemory = nextStatic + pointerSize; } nextStatic += pointerSize; } @@ -407,7 +443,7 @@ class S2WasmBuilder { nextStatic = (nextStatic + 15) & static_cast<size_t>(-16); staticAddresses[".stack"] = nextStatic; nextStatic += stackAllocation; - wasm.memory.initial = nextStatic; + minInitialMemory = nextStatic; } void process() { @@ -463,7 +499,8 @@ class S2WasmBuilder { } void parseGlobl() { - (void)getStr(); + auto str = getStr(); + globls.push_back(str); skipWhitespace(); } @@ -528,6 +565,7 @@ class S2WasmBuilder { last = last->cast<Loop>()->body; } last->cast<Block>()->list.push_back(curr); + last->cast<Block>()->finalize(); }; bstack.push_back(func->body); std::vector<Expression*> estack; @@ -665,7 +703,7 @@ class S2WasmBuilder { curr->align = curr->bytes; if (attributes[0]) { assert(strncmp(attributes[0], "p2align=", 8) == 0); - curr->align = pow(2, getInt(attributes[0] + 8)); + curr->align = 1U << getInt(attributes[0] + 8); } setOutput(curr, assign); }; @@ -684,7 +722,7 @@ class S2WasmBuilder { curr->align = curr->bytes; if (attributes[0]) { assert(strncmp(attributes[0], "p2align=", 8) == 0); - curr->align = pow(2, getInt(attributes[0] + 8)); + curr->align = 1U << getInt(attributes[0] + 8); } curr->value = inputs[1]; setOutput(curr, assign); @@ -714,16 +752,7 @@ class S2WasmBuilder { indirect->operands.push_back(inputs[i]); } setOutput(indirect, assign); - auto typeName = cashew::IString((std::string("FUNCSIG_") + getSig(indirect)).c_str(), false); - if (wasm.functionTypesMap.count(typeName) == 0) { - auto type = allocator.alloc<FunctionType>(); - *type = sigToFunctionType(getSig(indirect)); - type->name = typeName; - wasm.addFunctionType(type); - indirect->fullType = type; - } else { - indirect->fullType = wasm.functionTypesMap[typeName]; - } + indirect->fullType = wasm.functionTypesMap[ensureFunctionType(getSig(indirect), &wasm, allocator)->name]; } else { // non-indirect call CallBase* curr; @@ -807,7 +836,8 @@ class S2WasmBuilder { break; } case 'e': { - if (match("eq")) makeBinary(BinaryOp::Eq, i32); + if (match("eqz")) makeUnary(UnaryOp::EqZ, i32); + else if (match("eq")) makeBinary(BinaryOp::Eq, i32); else if (match("extend_s/i32")) makeUnary(UnaryOp::ExtendSInt32, type); else if (match("extend_u/i32")) makeUnary(UnaryOp::ExtendUInt32, type); else abort_on("type.e"); @@ -869,6 +899,8 @@ class S2WasmBuilder { else if (match("rem_u")) makeBinary(BinaryOp::RemU, type); else if (match("reinterpret/i32") || match("reinterpret/i64")) makeUnary(UnaryOp::ReinterpretInt, type); else if (match("reinterpret/f32") || match("reinterpret/f64")) makeUnary(UnaryOp::ReinterpretFloat, type); + else if (match("rotl")) makeBinary(BinaryOp::RotL, type); + else if (match("rotr")) makeBinary(BinaryOp::RotR, type); else abort_on("type.r"); break; } @@ -956,6 +988,16 @@ class S2WasmBuilder { } else if (match("end_loop")) { bstack.pop_back(); bstack.pop_back(); + } else if (match("br_table")) { + auto curr = allocator.alloc<Switch>(); + curr->condition = getInput(); + while (skipComma()) { + curr->targets.push_back(getBranchLabel(getInt())); + } + assert(curr->targets.size() > 0); + curr->default_ = curr->targets.back(); + curr->targets.pop_back(); + addToBlock(curr); } else if (match("br")) { auto curr = allocator.alloc<Break>(); bool hasCondition = false; @@ -990,15 +1032,6 @@ class S2WasmBuilder { curr->value = getInput(); } addToBlock(curr); - } else if (match("tableswitch")) { - auto curr = allocator.alloc<Switch>(); - curr->value = getInput(); - skipComma(); - curr->default_ = getBranchLabel(getInt()); - while (skipComma()) { - curr->targets.push_back(getBranchLabel(getInt())); - } - addToBlock(curr); } else if (match("unreachable")) { addToBlock(allocator.alloc<Unreachable>()); } else if (match("memory_size")) { @@ -1023,11 +1056,8 @@ class S2WasmBuilder { for (auto block : loopBlocks) { block->name = Name(); } + func->body->dyn_cast<Block>()->finalize(); wasm.addFunction(func); - // XXX for now, export all functions - auto exp = allocator.alloc<Export>(); - exp->name = exp->value = func->name; - wasm.addExport(exp); } void parseType() { @@ -1062,7 +1092,7 @@ class S2WasmBuilder { align = getInt(); skipWhitespace(); } - align = pow(2, align); // convert from power to actual bytes + align = (size_t)1 << align; // convert from power to actual bytes if (match(".lcomm")) { parseLcomm(name, align); return; @@ -1151,7 +1181,7 @@ class S2WasmBuilder { wasm.memory.segments.emplace_back(nextStatic, (const char*)&(*raw)[0], size); } nextStatic += size; - wasm.memory.initial = nextStatic; + minInitialMemory = nextStatic; } void parseLcomm(Name name, size_t align=1) { @@ -1165,7 +1195,7 @@ class S2WasmBuilder { while (nextStatic % align) nextStatic++; staticAddresses[name] = nextStatic; nextStatic += size; - wasm.memory.initial = nextStatic; + minInitialMemory = nextStatic; } void skipImports() { @@ -1179,7 +1209,36 @@ class S2WasmBuilder { } } + static size_t roundUpToPageSize(size_t size) { + return (size + Memory::kPageSize - 1) & Memory::kPageMask; + } + void fix() { + // Round the memory size up to a page, and update the page-increment versions + // of initial and max + size_t initialMem = roundUpToPageSize(minInitialMemory); + if (userInitialMemory) { + if (initialMem > userInitialMemory) { + Fatal() << "Specified initial memory size " << userInitialMemory << + " is smaller than required size " << initialMem; + } + wasm.memory.initial = userInitialMemory / Memory::kPageSize; + } else { + wasm.memory.initial = initialMem / Memory::kPageSize; + } + + if (userMaxMemory) wasm.memory.max = userMaxMemory / Memory::kPageSize; + wasm.memory.exportName = MEMORY; + + // XXX For now, export all functions marked .globl. + for (Name name : globls) { + if (wasm.functionsMap.count(name)) { + auto exp = allocator.alloc<Export>(); + exp->name = exp->value = name; + wasm.addExport(exp); + } + } + auto ensureFunctionIndex = [&](Name name) { if (functionIndexes.count(name) == 0) { functionIndexes[name] = wasm.table.names.size(); @@ -1243,8 +1302,15 @@ class S2WasmBuilder { call->operands.push_back(param); } block->list.push_back(call); + block->finalize(); } } + + // ensure an explicit function type for indirect call targets + for (auto& name : wasm.table.names) { + auto* func = wasm.functionsMap[name]; + func->type = ensureFunctionType(getSig(func), &wasm, allocator)->name; + } } template<class C> @@ -1294,7 +1360,7 @@ public: } std::string sig = getSig(curr); sigsForCode[code].insert(sig); - std::string fixedTarget = std::string("_") + EMSCRIPTEN_ASM_CONST.str + '_' + sig; + std::string fixedTarget = EMSCRIPTEN_ASM_CONST.str + std::string("_") + sig; curr->target = cashew::IString(fixedTarget.c_str(), false); arg->value = Literal(id); // add import, if necessary diff --git a/src/shared-constants.h b/src/shared-constants.h index 053aecc83..47fa60947 100644 --- a/src/shared-constants.h +++ b/src/shared-constants.h @@ -67,7 +67,10 @@ cashew::IString GLOBAL("global"), CALL("call"), CALL_IMPORT("call_import"), CALL_INDIRECT("call_indirect"), + BLOCK("block"), BR_IF("br_if"), + THEN("then"), + ELSE("else"), NEG_INFINITY("-infinity"), NEG_NAN("-nan"), CASE("case"), diff --git a/src/support/bits.cpp b/src/support/bits.cpp index c1de4da8b..3d03b100c 100644 --- a/src/support/bits.cpp +++ b/src/support/bits.cpp @@ -15,6 +15,7 @@ */ #define wasm_support_bits_definitions +#include "../compiler-support.h" #include "support/bits.h" namespace wasm { @@ -99,4 +100,28 @@ int CountLeadingZeroes<uint64_t>(uint64_t v) { : 32 + CountLeadingZeroes((uint32_t)v); } +uint32_t Log2(uint32_t v) { + switch (v) { + default: WASM_UNREACHABLE(); + case 1: return 0; + case 2: return 1; + case 4: return 2; + case 8: return 3; + case 16: return 4; + case 32: return 5; + } +} + +uint32_t Pow2(uint32_t v) { + switch (v) { + default: WASM_UNREACHABLE(); + case 0: return 1; + case 1: return 2; + case 2: return 4; + case 3: return 8; + case 4: return 16; + case 5: return 32; + } +} + } // namespace wasm diff --git a/src/support/bits.h b/src/support/bits.h index bbafb29d4..5d3502b81 100644 --- a/src/support/bits.h +++ b/src/support/bits.h @@ -17,6 +17,7 @@ #ifndef wasm_support_bits_h #define wasm_support_bits_h +#include <climits> #include <cstdint> #include <type_traits> @@ -65,6 +66,22 @@ int CountLeadingZeroes(T v) { return CountLeadingZeroes(typename std::make_unsigned<T>::type(v)); } +template <typename T> +inline static T RotateLeft(T val, T count) { + T mask = sizeof(T) * CHAR_BIT - 1; + count &= mask; + return (val << count) | (val >> (-count & mask)); +} +template <typename T> +inline static T RotateRight(T val, T count) { + T mask = sizeof(T) * CHAR_BIT - 1; + count &= mask; + return (val >> count) | (val << (-count & mask)); +} + +extern uint32_t Log2(uint32_t v); +extern uint32_t Pow2(uint32_t v); + } // namespace wasm #endif // wasm_support_bits_h diff --git a/src/support/command-line.cpp b/src/support/command-line.cpp index b4f8a4d62..71cb33c2a 100644 --- a/src/support/command-line.cpp +++ b/src/support/command-line.cpp @@ -19,7 +19,7 @@ using namespace wasm; Options::Options(const std::string &command, const std::string &description) - : debug(0), positional(Arguments::Zero) { + : debug(false), positional(Arguments::Zero) { add("--help", "-h", "Show this help message and exit", Arguments::Zero, [this, command, description](Options *o, const std::string &) { std::cerr << command; @@ -41,10 +41,8 @@ Options::Options(const std::string &command, const std::string &description) std::cerr << '\n'; exit(EXIT_SUCCESS); }); - add("--debug", "-d", "Print debug information to stderr", Arguments::Optional, - [](Options *o, const std::string &arguments) { - o->debug = arguments.size() ? std::stoi(arguments) : 1; - }); + add("--debug", "-d", "Print debug information to stderr", Arguments::Zero, + [&](Options *o, const std::string &arguments) { debug = true; }); } Options::~Options() {} diff --git a/src/support/command-line.h b/src/support/command-line.h index 6e0af846e..7c3ae528f 100644 --- a/src/support/command-line.h +++ b/src/support/command-line.h @@ -37,7 +37,7 @@ class Options { typedef std::function<void(Options *, const std::string &)> Action; enum class Arguments { Zero, One, N, Optional }; - int debug; + bool debug; std::map<std::string, std::string> extra; Options(const std::string &command, const std::string &description); diff --git a/src/support/file.cpp b/src/support/file.cpp index 8813750d4..c93086990 100644 --- a/src/support/file.cpp +++ b/src/support/file.cpp @@ -17,6 +17,7 @@ #include "support/file.h" #include <cstdlib> +#include <limits> template <typename T> T wasm::read_file(const std::string &filename, bool debug) { @@ -27,8 +28,13 @@ T wasm::read_file(const std::string &filename, bool debug) { exit(EXIT_FAILURE); } infile.seekg(0, std::ios::end); - size_t insize = infile.tellg(); - T input(insize + 1, '\0'); + std::streampos insize = infile.tellg(); + if (size_t(insize) >= std::numeric_limits<size_t>::max()) { + // Building a 32-bit executable where size_t == 32 bits, we are not able to create strings larger than 2^32 bytes in length, so must abort here. + std::cerr << "Failed opening '" << filename << "': Input file too large: " << insize << " bytes. Try rebuilding in 64-bit mode." << std::endl; + exit(EXIT_FAILURE); + } + T input(size_t(insize) + 1, '\0'); infile.seekg(0); infile.read(&input[0], insize); return input; diff --git a/src/support/safe_integer.cpp b/src/support/safe_integer.cpp index dbe62ca52..bce2cbc39 100644 --- a/src/support/safe_integer.cpp +++ b/src/support/safe_integer.cpp @@ -25,7 +25,8 @@ using namespace wasm; bool wasm::isInteger(double x) { return fmod(x, 1) == 0; } bool wasm::isUInteger32(double x) { - return isInteger(x) && x >= 0 && x <= std::numeric_limits<uint32_t>::max(); + return !std::signbit(x) && isInteger(x) && + x <= std::numeric_limits<uint32_t>::max(); } bool wasm::isSInteger32(double x) { @@ -34,23 +35,22 @@ bool wasm::isSInteger32(double x) { } uint32_t wasm::toUInteger32(double x) { - assert(isUInteger32(x)); - return x < std::numeric_limits<uint32_t>::max() - ? x - : std::numeric_limits<uint32_t>::max(); + return std::signbit(x) ? 0 : (x < std::numeric_limits<uint32_t>::max() + ? x + : std::numeric_limits<uint32_t>::max()); } int32_t wasm::toSInteger32(double x) { - assert(isSInteger32(x)); - return x > std::numeric_limits<int32_t>::min() && - x < std::numeric_limits<int32_t>::max() + return (x > std::numeric_limits<int32_t>::min() && + x < std::numeric_limits<int32_t>::max()) ? x - : (x < 0 ? std::numeric_limits<int32_t>::min() - : std::numeric_limits<int32_t>::max()); + : (std::signbit(x) ? std::numeric_limits<int32_t>::min() + : std::numeric_limits<int32_t>::max()); } bool wasm::isUInteger64(double x) { - return isInteger(x) && x >= 0 && x <= std::numeric_limits<uint64_t>::max(); + return !std::signbit(x) && isInteger(x) && + x <= std::numeric_limits<uint64_t>::max(); } bool wasm::isSInteger64(double x) { @@ -59,17 +59,15 @@ bool wasm::isSInteger64(double x) { } uint64_t wasm::toUInteger64(double x) { - assert(isUInteger64(x)); - return x < (double)std::numeric_limits<uint64_t>::max() - ? (uint64_t)x - : std::numeric_limits<uint64_t>::max(); + return std::signbit(x) ? 0 : (x < (double)std::numeric_limits<uint64_t>::max() + ? (uint64_t)x + : std::numeric_limits<uint64_t>::max()); } int64_t wasm::toSInteger64(double x) { - assert(isSInteger64(x)); - return x > (double)std::numeric_limits<int64_t>::min() && - x < (double)std::numeric_limits<int64_t>::max() + return (x > (double)std::numeric_limits<int64_t>::min() && + x < (double)std::numeric_limits<int64_t>::max()) ? (int64_t)x - : (x < 0 ? std::numeric_limits<int64_t>::min() - : std::numeric_limits<int64_t>::max()); + : (std::signbit(x) ? std::numeric_limits<int64_t>::min() + : std::numeric_limits<int64_t>::max()); } diff --git a/src/wasm-binary.h b/src/wasm-binary.h index b1db70a97..edc904787 100644 --- a/src/wasm-binary.h +++ b/src/wasm-binary.h @@ -30,36 +30,79 @@ namespace wasm { -struct LEB128 { - uint32_t value; +template<typename T, typename MiniT> +struct LEB { + T value; - LEB128() {} - LEB128(uint32_t value) : value(value) {} + LEB() {} + LEB(T value) : value(value) {} + + bool isSigned() { + return int(MiniT(-1)) < 0; + } + + bool hasMore(T temp, MiniT byte) { + // for signed, we must ensure the last bit has the right sign, as it will zero extend + return isSigned() ? (temp != 0 && int32_t(temp) != -1) || (value >= 0 && (byte & 64)) || (value < 0 && !(byte & 64)): (temp != 0); + } void write(std::vector<uint8_t>* out) { - uint32_t temp = value; + T temp = value; + bool more; do { uint8_t byte = temp & 127; temp >>= 7; - if (temp) { + more = hasMore(temp, byte); + if (more) { byte = byte | 128; } out->push_back(byte); - } while (temp); + } while (more); + } + + void writeAt(std::vector<uint8_t>* out, size_t at, size_t minimum = 0) { + T temp = value; + size_t offset = 0; + bool more; + do { + uint8_t byte = temp & 127; + temp >>= 7; + more = hasMore(temp, byte) || offset + 1 < minimum; + if (more) { + byte = byte | 128; + } + (*out)[at + offset] = byte; + offset++; + } while (more); } - void read(std::function<uint8_t ()> get) { + void read(std::function<MiniT ()> get) { value = 0; - uint32_t shift = 0; + T shift = 0; + MiniT byte; while (1) { - uint8_t byte = get(); - value |= ((byte & 127) << shift); + byte = get(); + value |= ((T(byte & 127)) << shift); if (!(byte & 128)) break; shift += 7; } + // if signed LEB, then we might need to sign-extend. (compile should optimize this out if not needed) + if (isSigned()) { + shift += 7; + if (byte & 64 && size_t(shift) < 8*sizeof(T)) { + // the highest bit we received was a 1, sign-extend all the rest + value = value | (T(-1) << shift); + assert(value < 0); + } + } } }; +typedef LEB<uint32_t, uint8_t> U32LEB; +typedef LEB<uint64_t, uint8_t> U64LEB; +typedef LEB<int32_t, int8_t> S32LEB; +typedef LEB<int64_t, int8_t> S64LEB; + // // We mostly stream into a buffer as we create the binary format, however, // sometimes we need to backtrack and write to a location behind us - wasm @@ -102,8 +145,23 @@ public: push_back(x & 0xff); return *this; } - BufferWithRandomAccess& operator<<(LEB128 x) { - if (debug) std::cerr << "writeLEB128: " << x.value << " (at " << size() << ")" << std::endl; + BufferWithRandomAccess& operator<<(U32LEB x) { + if (debug) std::cerr << "writeU32LEB: " << x.value << " (at " << size() << ")" << std::endl; + x.write(this); + return *this; + } + BufferWithRandomAccess& operator<<(U64LEB x) { + if (debug) std::cerr << "writeU64LEB: " << x.value << " (at " << size() << ")" << std::endl; + x.write(this); + return *this; + } + BufferWithRandomAccess& operator<<(S32LEB x) { + if (debug) std::cerr << "writeS32LEB: " << x.value << " (at " << size() << ")" << std::endl; + x.write(this); + return *this; + } + BufferWithRandomAccess& operator<<(S64LEB x) { + if (debug) std::cerr << "writeS64LEB: " << x.value << " (at " << size() << ")" << std::endl; x.write(this); return *this; } @@ -131,15 +189,21 @@ public: } void writeAt(size_t i, uint16_t x) { + if (debug) std::cerr << "backpatchInt16: " << x << " (at " << i << ")" << std::endl; (*this)[i] = x & 0xff; (*this)[i+1] = x >> 8; } void writeAt(size_t i, uint32_t x) { + if (debug) std::cerr << "backpatchInt32: " << x << " (at " << i << ")" << std::endl; (*this)[i] = x & 0xff; x >>= 8; (*this)[i+1] = x & 0xff; x >>= 8; (*this)[i+2] = x & 0xff; x >>= 8; (*this)[i+3] = x & 0xff; } + void writeAt(size_t i, U32LEB x) { + if (debug) std::cerr << "backpatchU32LEB: " << x.value << " (at " << i << ")" << std::endl; + x.writeAt(this, i, 5); // fill all 5 bytes, we have to do this when backpatching + } template <typename T> void writeTo(T& o) { @@ -149,14 +213,18 @@ public: namespace BinaryConsts { -enum Section { - Memory = 0, - Signatures = 1, - Functions = 2, - DataSegments = 4, - FunctionTable = 5, - End = 6, - Start = 7 +namespace Section { + auto Memory = "memory"; + auto Signatures = "signatures"; + auto ImportTable = "import_table"; + auto FunctionSignatures = "function_signatures"; + auto Functions = "functions"; + auto ExportTable = "export_table"; + auto DataSegments = "data_segments"; + auto FunctionTable = "function_table"; + auto Names = "names"; + auto End = "end"; + auto Start = "start_function"; }; enum FunctionEntry { @@ -195,6 +263,7 @@ enum ASTNodes { I32Clz = 0x57, I32Ctz = 0x58, I32Popcnt = 0x59, + I32EqZ = 0xc0, // XXX BoolNot = 0x5a, I64Add = 0x5b, I64Sub = 0x5c, @@ -222,6 +291,7 @@ enum ASTNodes { I64Clz = 0x72, I64Ctz = 0x73, I64Popcnt = 0x74, + I64EqZ = 0xc1, // XXX F32Add = 0x75, F32Sub = 0x76, F32Mul = 0x77, @@ -287,6 +357,10 @@ enum ASTNodes { F64ConvertF32 = 0xb2, F64ReinterpretI64 = 0xb3, I64ReinterpretF64 = 0xb5, + I32RotR = 0xb6, + I32RotL = 0xb7, + I64RotR = 0xb8, + I64RotL = 0xb9, I32ReinterpretF32 = 0xfe, // XXX not in v8 spec doc I32LoadMem8S = 0x20, @@ -313,7 +387,6 @@ enum ASTNodes { F32StoreMem = 0x35, F64StoreMem = 0x36, - I8Const = 0x09, I32Const = 0x0a, I64Const = 0x0b, F64Const = 0x0c, @@ -324,6 +397,7 @@ enum ASTNodes { StoreGlobal = 0x11, CallFunction = 0x12, CallIndirect = 0x13, + CallImport = 0x1f, Nop = 0x00, Block = 0x01, @@ -335,7 +409,8 @@ enum ASTNodes { BrIf = 0x07, TableSwitch = 0x08, Return = 0x14, - Unreachable = 0x15 + Unreachable = 0x15, + EndMarker = 0xff }; enum MemoryAccess { @@ -381,55 +456,81 @@ public: } void write() { - writeStart(); + writeHeader(); writeMemory(); writeSignatures(); + writeImports(); + writeFunctionSignatures(); writeFunctions(); + writeStart(); + writeExports(); writeDataSegments(); writeFunctionTable(); + writeNames(); writeEnd(); finishUp(); } + void writeHeader() { + if (debug) std::cerr << "== writeHeader" << std::endl; + o << int32_t(0x6d736100); // magic number \0asm + o << int32_t(10); // version number + } + + int32_t writeU32LEBPlaceholder() { + int32_t ret = o.size(); + o << int32_t(0); + o << int8_t(0); + return ret; + } + + int32_t startSection(const char* name) { + // emit 5 bytes of 0, which we'll fill with LEB later + auto ret = writeU32LEBPlaceholder(); + writeInlineString(name); + return ret; + } + + void finishSection(int32_t start) { + int32_t size = o.size() - start - 5; // section size does not include the 5 bytes of the size field itself + o.writeAt(start, U32LEB(size)); + } + void writeStart() { if (!wasm->start.is()) return; if (debug) std::cerr << "== writeStart" << std::endl; - o << int8_t(BinaryConsts::Start); - emitString(wasm->start.str); + auto start = startSection(BinaryConsts::Section::Start); + o << U32LEB(getFunctionIndex(wasm->start.str)); + finishSection(start); } void writeMemory() { if (wasm->memory.max == 0) return; if (debug) std::cerr << "== writeMemory" << std::endl; - o << int8_t(BinaryConsts::Memory); - if (wasm->memory.initial == 0) { // XXX diverge from v8, 0 means 0, 1 and above are powers of 2 starting at 0 - o << int8_t(0); - } else { - o << int8_t(std::min(ceil(log2(wasm->memory.initial)), 31.0) + 1); // up to 31 bits, don't let ceil get us to UINT_MAX which can overflow - } - if (wasm->memory.max == 0) { - o << int8_t(0); - } else { - o << int8_t(std::min(ceil(log2(wasm->memory.max)), 31.0) + 1); - } - o << int8_t(1); // export memory + auto start = startSection(BinaryConsts::Section::Memory); + o << U32LEB(wasm->memory.initial) + << U32LEB(wasm->memory.max) + << int8_t(1); // export memory + finishSection(start); } void writeSignatures() { if (wasm->functionTypes.size() == 0) return; if (debug) std::cerr << "== writeSignatures" << std::endl; - o << int8_t(BinaryConsts::Signatures) << LEB128(wasm->functionTypes.size()); + auto start = startSection(BinaryConsts::Section::Signatures); + o << U32LEB(wasm->functionTypes.size()); for (auto* type : wasm->functionTypes) { if (debug) std::cerr << "write one" << std::endl; - o << int8_t(type->params.size()); + o << U32LEB(type->params.size()); o << binaryWasmType(type->result); for (auto param : type->params) { o << binaryWasmType(param); } } + finishSection(start); } - int16_t getFunctionTypeIndex(Name type) { + int32_t getFunctionTypeIndex(Name type) { // TODO: optimize for (size_t i = 0; i < wasm->functionTypes.size(); i++) { if (wasm->functionTypes[i]->name == type) return i; @@ -437,6 +538,20 @@ public: abort(); } + void writeImports() { + if (wasm->imports.size() == 0) return; + if (debug) std::cerr << "== writeImports" << std::endl; + auto start = startSection(BinaryConsts::Section::ImportTable); + o << U32LEB(wasm->imports.size()); + for (auto* import : wasm->imports) { + if (debug) std::cerr << "write one" << std::endl; + o << U32LEB(getFunctionTypeIndex(import->type->name)); + writeInlineString(import->module.str); + writeInlineString(import->base.str); + } + finishSection(start); + } + std::map<Name, size_t> mappedLocals; // local name => index in compact form of [all int32s][all int64s]etc std::map<WasmType, size_t> numLocalsByType; // type => number of locals of that type in the compact form @@ -477,62 +592,66 @@ public: } } + void writeFunctionSignatures() { + if (wasm->functions.size() == 0) return; + if (debug) std::cerr << "== writeFunctionSignatures" << std::endl; + auto start = startSection(BinaryConsts::Section::FunctionSignatures); + o << U32LEB(wasm->functions.size()); + for (auto* curr : wasm->functions) { + if (debug) std::cerr << "write one" << std::endl; + o << U32LEB(getFunctionTypeIndex(curr->type)); + } + finishSection(start); + } + void writeFunctions() { - if (wasm->functions.size() + wasm->imports.size() == 0) return; + if (wasm->functions.size() == 0) return; if (debug) std::cerr << "== writeFunctions" << std::endl; - size_t total = wasm->imports.size() + wasm->functions.size(); - o << int8_t(BinaryConsts::Functions) << LEB128(total); - std::map<Name, Name> exportedFunctions; - for (auto* e : wasm->exports) { - exportedFunctions[e->value] = e->name; - } + auto start = startSection(BinaryConsts::Section::Functions); + size_t total = wasm->functions.size(); + o << U32LEB(total); for (size_t i = 0; i < total; i++) { if (debug) std::cerr << "write one at" << o.size() << std::endl; - Import* import = i < wasm->imports.size() ? wasm->imports[i] : nullptr; - Function* function = i >= wasm->imports.size() ? wasm->functions[i - wasm->imports.size()] : nullptr; - Name name, type; - if (import) { - name = import->name; - type = import->type->name; - } else { - name = function->name; - type = function->type; - mappedLocals.clear(); - numLocalsByType.clear(); - } - if (debug) std::cerr << "writing" << name << std::endl; - bool export_ = exportedFunctions.count(name) > 0; - o << int8_t(BinaryConsts::Named | - (BinaryConsts::Import * !!import) | - (BinaryConsts::Locals * (function && function->locals.size() > 0)) | - (BinaryConsts::Export * export_)); - o << getFunctionTypeIndex(type); - emitString(name.str); - if (export_) emitString(exportedFunctions[name].str); // XXX addition to v8 binary format - if (function) { - mapLocals(function); - if (function->locals.size() > 0) { - o << uint16_t(numLocalsByType[i32]) - << uint16_t(numLocalsByType[i64]) - << uint16_t(numLocalsByType[f32]) - << uint16_t(numLocalsByType[f64]); - } - size_t sizePos = o.size(); - o << (uint32_t)0; // placeholder, we fill in the size later when we have it // XXX int32, diverge from v8 format, to get more code to compile - size_t start = o.size(); - depth = 0; - recurse(function->body); - assert(depth == 0); - size_t size = o.size() - start; - assert(size <= std::numeric_limits<uint16_t>::max()); - if (debug) std::cerr << "body size: " << size << ", writing at " << sizePos << ", next starts at " << o.size() << std::endl; - o.writeAt(sizePos, uint32_t(size)); // XXX int32, diverge from v8 format, to get more code to compile - } else { - // import - emitString(import->module.str); // XXX diverge - emitString(import->base.str); // from v8 - } + size_t sizePos = writeU32LEBPlaceholder(); + size_t start = o.size(); + Function* function = wasm->functions[i]; + mappedLocals.clear(); + numLocalsByType.clear(); + if (debug) std::cerr << "writing" << function->name << std::endl; + mapLocals(function); + o << U32LEB( + (numLocalsByType[i32] ? 1 : 0) + + (numLocalsByType[i64] ? 1 : 0) + + (numLocalsByType[f32] ? 1 : 0) + + (numLocalsByType[f64] ? 1 : 0) + ); + if (numLocalsByType[i32]) o << U32LEB(numLocalsByType[i32]) << binaryWasmType(i32); + if (numLocalsByType[i64]) o << U32LEB(numLocalsByType[i64]) << binaryWasmType(i64); + if (numLocalsByType[f32]) o << U32LEB(numLocalsByType[f32]) << binaryWasmType(f32); + if (numLocalsByType[f64]) o << U32LEB(numLocalsByType[f64]) << binaryWasmType(f64); + depth = 0; + recurse(function->body); + o << int8_t(BinaryConsts::EndMarker); + assert(depth == 0); + size_t size = o.size() - start; + assert(size <= std::numeric_limits<uint32_t>::max()); + if (debug) std::cerr << "body size: " << size << ", writing at " << sizePos << ", next starts at " << o.size() << std::endl; + o.writeAt(sizePos, U32LEB(size)); + } + finishSection(start); + } + + void writeExports() { + if (wasm->exports.size() == 0) return; + if (debug) std::cerr << "== writeexports" << std::endl; + auto start = startSection(BinaryConsts::Section::ExportTable); + o << U32LEB(wasm->exports.size()); + for (auto* curr : wasm->exports) { + if (debug) std::cerr << "write one" << std::endl; + o << U32LEB(getFunctionIndex(curr->value)); + writeInlineString(curr->name.str); } + finishSection(start); } void writeDataSegments() { @@ -541,42 +660,87 @@ public: for (auto& segment : wasm->memory.segments) { if (segment.size > 0) num++; } - o << int8_t(BinaryConsts::DataSegments) << LEB128(num); + auto start = startSection(BinaryConsts::Section::DataSegments); + o << U32LEB(num); for (auto& segment : wasm->memory.segments) { if (segment.size == 0) continue; - o << int32_t(segment.offset); - emitBuffer(segment.data, segment.size); - o << int32_t(segment.size); - o << int8_t(1); // load at program start + o << U32LEB(segment.offset); + writeInlineBuffer(segment.data, segment.size); } + finishSection(start); } - uint16_t getFunctionIndex(Name name) { - // TODO: optimize - for (size_t i = 0; i < wasm->imports.size(); i++) { - if (wasm->imports[i]->name == name) return i; + std::map<Name, uint32_t> mappedImports; // name of the Import => index + uint32_t getImportIndex(Name name) { + if (!mappedImports.size()) { + // Create name => index mapping. + for (size_t i = 0; i < wasm->imports.size(); i++) { + assert(mappedImports.count(wasm->imports[i]->name) == 0); + mappedImports[wasm->imports[i]->name] = i; + } } - for (size_t i = 0; i < wasm->functions.size(); i++) { - if (wasm->functions[i]->name == name) return wasm->imports.size() + i; + assert(mappedImports.count(name)); + return mappedImports[name]; + } + + std::map<Name, uint32_t> mappedFunctions; // name of the Function => index + uint32_t getFunctionIndex(Name name) { + if (!mappedFunctions.size()) { + // Create name => index mapping. + for (size_t i = 0; i < wasm->functions.size(); i++) { + assert(mappedFunctions.count(wasm->functions[i]->name) == 0); + mappedFunctions[wasm->functions[i]->name] = i; + } } - abort(); + assert(mappedFunctions.count(name)); + return mappedFunctions[name]; } void writeFunctionTable() { if (wasm->table.names.size() == 0) return; if (debug) std::cerr << "== writeFunctionTable" << std::endl; - o << int8_t(BinaryConsts::FunctionTable) << LEB128(wasm->table.names.size()); + auto start = startSection(BinaryConsts::Section::FunctionTable); + o << U32LEB(wasm->table.names.size()); for (auto name : wasm->table.names) { - o << getFunctionIndex(name); + o << U32LEB(getFunctionIndex(name)); + } + finishSection(start); + } + + void writeNames() { + if (wasm->functions.size() == 0) return; + if (debug) std::cerr << "== writeNames" << std::endl; + auto start = startSection(BinaryConsts::Section::Names); + o << U32LEB(wasm->functions.size()); + for (auto* curr : wasm->functions) { + writeInlineString(curr->name.str); + o << U32LEB(0); // TODO: locals } + finishSection(start); } void writeEnd() { - o << int8_t(BinaryConsts::End); + auto start = startSection(BinaryConsts::Section::End); + finishSection(start); } // helpers + void writeInlineString(const char* name) { + int32_t size = strlen(name); + o << U32LEB(size); + for (int32_t i = 0; i < size; i++) { + o << int8_t(name[i]); + } + } + + void writeInlineBuffer(const char* data, size_t size) { + o << U32LEB(size); + for (size_t i = 0; i < size; i++) { + o << int8_t(data[i]); + } + } + struct Buffer { const char* data; size_t size; @@ -623,7 +787,7 @@ public: void visitBlock(Block *curr) { if (debug) std::cerr << "zz node: Block" << std::endl; - o << int8_t(BinaryConsts::Block) << LEB128(curr->list.size()); // XXX larger block size, divergence from v8 to get more code to build + o << int8_t(BinaryConsts::Block); breakStack.push_back(curr->name); size_t i = 0; for (auto* child : curr->list) { @@ -631,13 +795,14 @@ public: recurse(child); } breakStack.pop_back(); + o << int8_t(BinaryConsts::EndMarker); } void visitIf(If *curr) { if (debug) std::cerr << "zz node: If" << std::endl; - o << int8_t(curr->ifFalse ? BinaryConsts::IfElse : BinaryConsts::If); recurse(curr->condition); recurse(curr->ifTrue); if (curr->ifFalse) recurse(curr->ifFalse); + o << int8_t(curr->ifFalse ? BinaryConsts::IfElse : BinaryConsts::If); } void visitLoop(Loop *curr) { if (debug) std::cerr << "zz node: Loop" << std::endl; @@ -648,98 +813,86 @@ public: recurse(curr->body); breakStack.pop_back(); breakStack.pop_back(); + o << int8_t(BinaryConsts::EndMarker); } - int getBreakIndex(Name name) { // -1 if not found + int32_t getBreakIndex(Name name) { // -1 if not found for (int i = breakStack.size() - 1; i >= 0; i--) { if (breakStack[i] == name) { return breakStack.size() - 1 - i; } } -printf("bad!\n"); std::cerr << "bad break: " << name << std::endl; - - - -assert(0); - - abort(); } void visitBreak(Break *curr) { if (debug) std::cerr << "zz node: Break" << std::endl; - o << int8_t(curr->condition ? BinaryConsts::BrIf : BinaryConsts::Br); - int offset = getBreakIndex(curr->name); - o << int8_t(offset); if (curr->value) { recurse(curr->value); } else { visitNop(nullptr); } if (curr->condition) recurse(curr->condition); + o << int8_t(curr->condition ? BinaryConsts::BrIf : BinaryConsts::Br) + << U32LEB(getBreakIndex(curr->name)); } void visitSwitch(Switch *curr) { if (debug) std::cerr << "zz node: Switch" << std::endl; - o << int8_t(BinaryConsts::TableSwitch) << int16_t(curr->cases.size()) - << int16_t(curr->targets.size() + 1); - for (size_t i = 0; i < curr->cases.size(); i++) { - breakStack.push_back(curr->cases[i].name); - } - breakStack.push_back(curr->name); + o << int8_t(BinaryConsts::TableSwitch) << U32LEB(curr->targets.size()); for (auto target : curr->targets) { - o << (int16_t)getBreakIndex(target); - } - o << (int16_t)getBreakIndex(curr->default_); - recurse(curr->value); - for (auto& c : curr->cases) { - recurse(c.body); + o << U32LEB(getBreakIndex(target)); } - breakStack.pop_back(); // name - for (size_t i = 0; i < curr->cases.size(); i++) { - breakStack.pop_back(); // case + o << U32LEB(getBreakIndex(curr->default_)); + recurse(curr->condition); + o << int8_t(BinaryConsts::EndMarker); + if (curr->value) { + recurse(curr->value); + } else { + visitNop(nullptr); } + o << int8_t(BinaryConsts::EndMarker); } void visitCall(Call *curr) { if (debug) std::cerr << "zz node: Call" << std::endl; - o << int8_t(BinaryConsts::CallFunction) << LEB128(getFunctionIndex(curr->target)); for (auto* operand : curr->operands) { recurse(operand); } + o << int8_t(BinaryConsts::CallFunction) << U32LEB(getFunctionIndex(curr->target)); } void visitCallImport(CallImport *curr) { if (debug) std::cerr << "zz node: CallImport" << std::endl; - o << int8_t(BinaryConsts::CallFunction) << LEB128(getFunctionIndex(curr->target)); for (auto* operand : curr->operands) { recurse(operand); } + o << int8_t(BinaryConsts::CallImport) << U32LEB(getImportIndex(curr->target)); } void visitCallIndirect(CallIndirect *curr) { if (debug) std::cerr << "zz node: CallIndirect" << std::endl; - o << int8_t(BinaryConsts::CallIndirect) << LEB128(getFunctionTypeIndex(curr->fullType->name)); recurse(curr->target); for (auto* operand : curr->operands) { recurse(operand); } + o << int8_t(BinaryConsts::CallIndirect) << U32LEB(getFunctionTypeIndex(curr->fullType->name)); } void visitGetLocal(GetLocal *curr) { if (debug) std::cerr << "zz node: GetLocal " << (o.size() + 1) << std::endl; - o << int8_t(BinaryConsts::GetLocal) << LEB128(mappedLocals[curr->name]); + o << int8_t(BinaryConsts::GetLocal) << U32LEB(mappedLocals[curr->name]); } void visitSetLocal(SetLocal *curr) { if (debug) std::cerr << "zz node: SetLocal" << std::endl; - o << int8_t(BinaryConsts::SetLocal) << LEB128(mappedLocals[curr->name]); recurse(curr->value); + o << int8_t(BinaryConsts::SetLocal) << U32LEB(mappedLocals[curr->name]); } void emitMemoryAccess(size_t alignment, size_t bytes, uint32_t offset) { - o << int8_t( ((alignment == bytes || alignment == 0) ? BinaryConsts::NaturalAlignment : BinaryConsts::Alignment) | - (offset ? BinaryConsts::Offset : 0) ); - if (offset) o << LEB128(offset); + o << U32LEB(Log2(alignment ? alignment : bytes)); + o << U32LEB(offset); } void visitLoad(Load *curr) { if (debug) std::cerr << "zz node: Load" << std::endl; + recurse(curr->ptr); switch (curr->type) { case i32: { switch (curr->bytes) { @@ -765,10 +918,11 @@ assert(0); default: abort(); } emitMemoryAccess(curr->align, curr->bytes, curr->offset); - recurse(curr->ptr); } void visitStore(Store *curr) { if (debug) std::cerr << "zz node: Store" << std::endl; + recurse(curr->ptr); + recurse(curr->value); switch (curr->type) { case i32: { switch (curr->bytes) { @@ -794,23 +948,16 @@ assert(0); default: abort(); } emitMemoryAccess(curr->align, curr->bytes, curr->offset); - recurse(curr->ptr); - recurse(curr->value); } void visitConst(Const *curr) { if (debug) std::cerr << "zz node: Const" << curr << " : " << curr->type << std::endl; switch (curr->type) { case i32: { - uint32_t value = curr->value.geti32(); - if (value <= 255) { - o << int8_t(BinaryConsts::I8Const) << uint8_t(value); - break; - } - o << int8_t(BinaryConsts::I32Const) << value; + o << int8_t(BinaryConsts::I32Const) << S32LEB(curr->value.geti32()); break; } case i64: { - o << int8_t(BinaryConsts::I64Const) << curr->value.geti64(); + o << int8_t(BinaryConsts::I64Const) << S64LEB(curr->value.geti64()); break; } case f32: { @@ -827,10 +974,12 @@ assert(0); } void visitUnary(Unary *curr) { if (debug) std::cerr << "zz node: Unary" << std::endl; + recurse(curr->value); switch (curr->op) { case Clz: o << int8_t(curr->type == i32 ? BinaryConsts::I32Clz : BinaryConsts::I64Clz); break; case Ctz: o << int8_t(curr->type == i32 ? BinaryConsts::I32Ctz : BinaryConsts::I64Ctz); break; case Popcnt: o << int8_t(curr->type == i32 ? BinaryConsts::I32Popcnt : BinaryConsts::I64Popcnt); break; + case EqZ: o << int8_t(curr->type == i32 ? BinaryConsts::I32EqZ : BinaryConsts::I64EqZ); break; case Neg: o << int8_t(curr->type == f32 ? BinaryConsts::F32Neg : BinaryConsts::F64Neg); break; case Abs: o << int8_t(curr->type == f32 ? BinaryConsts::F32Abs : BinaryConsts::F64Abs); break; case Ceil: o << int8_t(curr->type == f32 ? BinaryConsts::F32Ceil : BinaryConsts::F64Ceil); break; @@ -855,10 +1004,11 @@ assert(0); case ReinterpretInt: o << int8_t(curr->type == i32 ? BinaryConsts::I32ReinterpretF32 : BinaryConsts::I64ReinterpretF64); break; default: abort(); } - recurse(curr->value); } void visitBinary(Binary *curr) { if (debug) std::cerr << "zz node: Binary" << std::endl; + recurse(curr->left); + recurse(curr->right); #define TYPED_CODE(code) { \ switch (getReachableWasmType(curr->left->type, curr->right->type)) { \ case i32: o << int8_t(BinaryConsts::I32##code); break; \ @@ -900,6 +1050,8 @@ assert(0); case Shl: INT_TYPED_CODE(Shl);; case ShrU: INT_TYPED_CODE(ShrU); case ShrS: INT_TYPED_CODE(ShrS); + case RotL: INT_TYPED_CODE(RotL); + case RotR: INT_TYPED_CODE(RotR); case Div: FLOAT_TYPED_CODE(Div); case CopySign: FLOAT_TYPED_CODE(CopySign); case Min: FLOAT_TYPED_CODE(Min); @@ -920,27 +1072,25 @@ assert(0); case Ge: FLOAT_TYPED_CODE(Ge); default: abort(); } - recurse(curr->left); - recurse(curr->right); #undef TYPED_CODE #undef INT_TYPED_CODE #undef FLOAT_TYPED_CODE } void visitSelect(Select *curr) { if (debug) std::cerr << "zz node: Select" << std::endl; - o << int8_t(BinaryConsts::Select); recurse(curr->ifTrue); recurse(curr->ifFalse); recurse(curr->condition); + o << int8_t(BinaryConsts::Select); } void visitReturn(Return *curr) { if (debug) std::cerr << "zz node: Return" << std::endl; - o << int8_t(BinaryConsts::Return); if (curr->value) { recurse(curr->value); } else { visitNop(nullptr); } + o << int8_t(BinaryConsts::Return); } void visitHost(Host *curr) { if (debug) std::cerr << "zz node: Host" << std::endl; @@ -950,8 +1100,8 @@ assert(0); break; } case GrowMemory: { - o << int8_t(BinaryConsts::GrowMemory); recurse(curr->operands[0]); + o << int8_t(BinaryConsts::GrowMemory); break; } default: abort(); @@ -973,37 +1123,58 @@ class WasmBinaryBuilder { std::vector<char>& input; bool debug; - size_t pos; + size_t pos = 0; + int32_t startIndex = -1; public: - WasmBinaryBuilder(AllocatingModule& wasm, std::vector<char>& input, bool debug) : wasm(wasm), allocator(wasm.allocator), input(input), debug(debug), pos(0) {} + WasmBinaryBuilder(AllocatingModule& wasm, std::vector<char>& input, bool debug) : wasm(wasm), allocator(wasm.allocator), input(input), debug(debug) {} void read() { - // read sections until the end - while (1) { - int8_t section = getInt8(); - if (section == BinaryConsts::End) { + readHeader(); + + // read sections until the end + while (more()) { + auto sectionSize = getU32LEB(); + assert(sectionSize < pos + input.size()); + auto nameSize = getU32LEB(); + auto match = [&](const char* name) { + for (size_t i = 0; i < nameSize; i++) { + if (pos + i >= input.size()) return false; + if (name[i] == 0) return false; + if (input[pos + i] != name[i]) return false; + } + if (strlen(name) != nameSize) return false; + pos += nameSize; + return true; + }; + if (match(BinaryConsts::Section::Start)) readStart(); + else if (match(BinaryConsts::Section::Memory)) readMemory(); + else if (match(BinaryConsts::Section::Signatures)) readSignatures(); + else if (match(BinaryConsts::Section::ImportTable)) readImports(); + else if (match(BinaryConsts::Section::FunctionSignatures)) readFunctionSignatures(); + else if (match(BinaryConsts::Section::Functions)) readFunctions(); + else if (match(BinaryConsts::Section::ExportTable)) readExports(); + else if (match(BinaryConsts::Section::DataSegments)) readDataSegments(); + else if (match(BinaryConsts::Section::FunctionTable)) readFunctionTable(); + else if (match(BinaryConsts::Section::Names)) readNames(); + else if (match(BinaryConsts::Section::End)) { if (debug) std::cerr << "== readEnd" << std::endl; break; - } - - switch (section) { - case BinaryConsts::Start: readStart(); break; - case BinaryConsts::Memory: readMemory(); break; - case BinaryConsts::Signatures: readSignatures(); break; - case BinaryConsts::Functions: readFunctions(); break; - case BinaryConsts::DataSegments: readDataSegments(); break; - case BinaryConsts::FunctionTable: readFunctionTable(); break; - default: abort(); + } else { + abort(); } } processFunctions(); } + bool more() { + return pos < input.size(); + } + uint8_t getInt8() { - assert(pos < input.size()); + assert(more()); if (debug) std::cerr << "getInt8: " << (int)(uint8_t)input[pos] << " (at " << pos << ")" << std::endl; return input[pos++]; } @@ -1038,13 +1209,40 @@ public: return ret; } - uint32_t getLEB128() { + uint32_t getU32LEB() { + if (debug) std::cerr << "<==" << std::endl; + U32LEB ret; + ret.read([&]() { + return getInt8(); + }); + if (debug) std::cerr << "getU32LEB: " << ret.value << " ==>" << std::endl; + return ret.value; + } + uint64_t getU64LEB() { if (debug) std::cerr << "<==" << std::endl; - LEB128 ret; + U64LEB ret; ret.read([&]() { return getInt8(); }); - if (debug) std::cerr << "getLEB128: " << ret.value << " ==>" << std::endl; + if (debug) std::cerr << "getU64LEB: " << ret.value << " ==>" << std::endl; + return ret.value; + } + int32_t getS32LEB() { + if (debug) std::cerr << "<==" << std::endl; + S32LEB ret; + ret.read([&]() { + return (int8_t)getInt8(); + }); + if (debug) std::cerr << "getU32LEB: " << ret.value << " ==>" << std::endl; + return ret.value; + } + int64_t getS64LEB() { + if (debug) std::cerr << "<==" << std::endl; + S64LEB ret; + ret.read([&]() { + return (int8_t)getInt8(); + }); + if (debug) std::cerr << "getU64LEB: " << ret.value << " ==>" << std::endl; return ret.value; } WasmType getWasmType() { @@ -1067,6 +1265,17 @@ public: return ret; } + Name getInlineString() { + if (debug) std::cerr << "<==" << std::endl; + auto len = getU32LEB(); + std::string str; + for (size_t i = 0; i < len; i++) { + str = str + char(getInt8()); + } + if (debug) std::cerr << "getInlineString: " << str << " ==>" << std::endl; + return Name(str); + } + void verifyInt8(int8_t x) { int8_t y = getInt8(); assert(x == y); @@ -1092,28 +1301,38 @@ public: assert(x == y); } + void ungetInt8() { + assert(pos > 0); + if (debug) std::cerr << "ungetInt8 (at " << pos << ")" << std::endl; + pos--; + } + + void readHeader() { + if (debug) std::cerr << "== readHeader" << std::endl; + verifyInt32(0x6d736100); + verifyInt32(10); + } + void readStart() { if (debug) std::cerr << "== readStart" << std::endl; - wasm.start = getString(); + startIndex = getU32LEB(); } void readMemory() { if (debug) std::cerr << "== readMemory" << std::endl; - size_t initial = getInt8(); - wasm.memory.initial = initial == 0 ? 0 : std::pow<size_t>(2, initial - 1); - size_t max = getInt8(); - wasm.memory.max = max == 0 ? 0 : std::pow<size_t>(2, max - 1); + wasm.memory.initial = getU32LEB(); + wasm.memory.max = getU32LEB(); verifyInt8(1); // export memory } void readSignatures() { if (debug) std::cerr << "== readSignatures" << std::endl; - size_t numTypes = getLEB128(); + size_t numTypes = getU32LEB(); if (debug) std::cerr << "num: " << numTypes << std::endl; for (size_t i = 0; i < numTypes; i++) { if (debug) std::cerr << "read one" << std::endl; auto curr = allocator.alloc<FunctionType>(); - size_t numParams = getInt8(); + size_t numParams = getU32LEB(); if (debug) std::cerr << "num params: " << numParams << std::endl; curr->result = getWasmType(); for (size_t j = 0; j < numParams; j++) { @@ -1123,7 +1342,37 @@ public: } } - std::vector<Name> mappedFunctions; // index => name of the Import or Function + void readImports() { + if (debug) std::cerr << "== readImports" << std::endl; + size_t num = getU32LEB(); + if (debug) std::cerr << "num: " << num << std::endl; + for (size_t i = 0; i < num; i++) { + if (debug) std::cerr << "read one" << std::endl; + auto curr = allocator.alloc<Import>(); + curr->name = Name(std::string("import$") + std::to_string(i)); + auto index = getU32LEB(); + assert(index < wasm.functionTypes.size()); + curr->type = wasm.functionTypes[index]; + assert(curr->type->name.is()); + curr->module = getInlineString(); + curr->base = getInlineString(); + wasm.addImport(curr); + } + } + + std::vector<FunctionType*> functionTypes; + + void readFunctionSignatures() { + if (debug) std::cerr << "== readFunctionSignatures" << std::endl; + size_t num = getU32LEB(); + if (debug) std::cerr << "num: " << num << std::endl; + for (size_t i = 0; i < num; i++) { + if (debug) std::cerr << "read one" << std::endl; + auto index = getU32LEB(); + assert(index < wasm.functionTypes.size()); + functionTypes.push_back(wasm.functionTypes[index]); + } + } size_t nextLabel; @@ -1131,134 +1380,174 @@ public: return cashew::IString(("label$" + std::to_string(nextLabel++)).c_str(), false); } + // We read functions before we know their names, so we need to backpatch the names later + + std::vector<Function*> functions; // we store functions here before wasm.addFunction after we know their names + std::map<size_t, std::vector<Call*>> functionCalls; // at index i we have all calls to i + void readFunctions() { if (debug) std::cerr << "== readFunctions" << std::endl; - size_t total = getLEB128(); // imports and functions + size_t total = getU32LEB(); for (size_t i = 0; i < total; i++) { if (debug) std::cerr << "read one at " << pos << std::endl; - auto data = getInt8(); - auto type = wasm.functionTypes[getInt16()]; - bool named = data & BinaryConsts::Named; - assert(named); - bool import = data & BinaryConsts::Import; - bool locals = data & BinaryConsts::Locals; - bool export_ = data & BinaryConsts::Export; - Name name = getString(); - if (export_) { // XXX addition to v8 binary format - Name exportName = getString(); - auto e = allocator.alloc<Export>(); - e->name = exportName; - e->value = name; - wasm.addExport(e); + size_t size = getU32LEB(); + assert(size > 0); // we could also check it matches the seen size + auto type = functionTypes[i]; + if (debug) std::cerr << "reading" << i << std::endl; + auto func = allocator.alloc<Function>(); + func->type = type->name; + func->result = type->result; + size_t nextVar = 0; + auto addVar = [&]() { + Name name = cashew::IString(("var$" + std::to_string(nextVar++)).c_str(), false); + return name; + }; + for (size_t j = 0; j < type->params.size(); j++) { + func->params.emplace_back(addVar(), type->params[j]); } - if (debug) std::cerr << "reading" << name << std::endl; - mappedFunctions.push_back(name); - if (import) { - auto imp = allocator.alloc<Import>(); - imp->name = name; - imp->module = getString(); // XXX diverge - imp->base = getString(); // from v8 - imp->type = type; - wasm.addImport(imp); - } else { - auto func = allocator.alloc<Function>(); - func->name = name; - func->type = type->name; - func->result = type->result; - size_t nextVar = 0; - auto addVar = [&]() { - Name name = cashew::IString(("var$" + std::to_string(nextVar++)).c_str(), false); - return name; - }; - for (size_t j = 0; j < type->params.size(); j++) { - func->params.emplace_back(addVar(), type->params[j]); + size_t numLocalTypes = getU32LEB(); + for (size_t t = 0; t < numLocalTypes; t++) { + auto num = getU32LEB(); + auto type = getWasmType(); + while (num > 0) { + func->locals.emplace_back(addVar(), type); + num--; } - if (locals) { - auto addLocals = [&](WasmType type) { - int16_t num = getInt16(); - while (num > 0) { - func->locals.emplace_back(addVar(), type); - num--; - } - }; - addLocals(i32); - addLocals(i64); - addLocals(f32); - addLocals(f64); + } + { + // process the function body + if (debug) std::cerr << "processing function: " << i << std::endl; + nextLabel = 0; + // prepare locals + mappedLocals.clear(); + localTypes.clear(); + for (size_t i = 0; i < func->params.size(); i++) { + mappedLocals.push_back(func->params[i].name); + localTypes[func->params[i].name] = func->params[i].type; + } + for (size_t i = 0; i < func->locals.size(); i++) { + mappedLocals.push_back(func->locals[i].name); + localTypes[func->locals[i].name] = func->locals[i].type; } - size_t size = getInt32(); // XXX int32, diverge from v8 format, to get more code to compile - // we can't read the function yet - it might call other functions that are defined later, - // and we do depend on the function type, as well as the mappedFunctions table. - functions.emplace_back(func, pos, size); - pos += size; - func->body = nullptr; // will be filled later. but we do have the name and the type already. - wasm.addFunction(func); + // process body + assert(breakStack.empty()); + assert(expressionStack.empty()); + depth = 0; + processExpressions(); + assert(expressionStack.size() == 1); + func->body = popExpression(); + assert(depth == 0); + assert(breakStack.empty()); + assert(expressionStack.empty()); } + functions.push_back(func); } } - struct FunctionData { - Function* func; - size_t pos, size; - FunctionData(Function* func, size_t pos, size_t size) : func(func), pos(pos), size(size) {} - }; + std::map<Export*, size_t> exportIndexes; - std::vector<FunctionData> functions; + void readExports() { + if (debug) std::cerr << "== readExports" << std::endl; + size_t num = getU32LEB(); + if (debug) std::cerr << "num: " << num << std::endl; + for (size_t i = 0; i < num; i++) { + if (debug) std::cerr << "read one" << std::endl; + auto curr = allocator.alloc<Export>(); + auto index = getU32LEB(); + assert(index < functionTypes.size()); + curr->name = getInlineString(); + exportIndexes[curr] = index; + } + } std::vector<Name> mappedLocals; // index => local name std::map<Name, WasmType> localTypes; // TODO: optimize std::vector<Name> breakStack; + std::vector<Expression*> expressionStack; + + void processExpressions() { // until an end marker + while (1) { + Expression* curr; + readExpression(curr); + if (!curr) break; // end marker, done with this function/block/etc + expressionStack.push_back(curr); + } + } + + Expression* popExpression() { + assert(expressionStack.size() > 0); + auto ret = expressionStack.back(); + expressionStack.pop_back(); + return ret; + } + void processFunctions() { for (auto& func : functions) { - Function* curr = func.func; - if (debug) std::cerr << "processing function: " << curr->name << std::endl; - pos = func.pos; - nextLabel = 0; - // prepare locals - mappedLocals.clear(); - localTypes.clear(); - for (size_t i = 0; i < curr->params.size(); i++) { - mappedLocals.push_back(curr->params[i].name); - localTypes[curr->params[i].name] = curr->params[i].type; - } - for (size_t i = 0; i < curr->locals.size(); i++) { - mappedLocals.push_back(curr->locals[i].name); - localTypes[curr->locals[i].name] = curr->locals[i].type; + wasm.addFunction(func); + } + // now that we have names for each function, apply things + + if (startIndex >= 0) { + wasm.start = wasm.functions[startIndex]->name; + } + + for (auto& iter : exportIndexes) { + Export* curr = iter.first; + curr->value = wasm.functions[iter.second]->name; + wasm.addExport(curr); + } + + for (auto& iter : functionCalls) { + size_t index = iter.first; + auto& calls = iter.second; + for (auto* call : calls) { + call->target = wasm.functions[index]->name; } - // process body - assert(breakStack.empty()); - depth = 0; - readExpression(curr->body); - assert(depth == 0); - assert(breakStack.empty()); - assert(pos == func.pos + func.size); + } + + for (size_t index : functionTable) { + assert(index < wasm.functions.size()); + wasm.table.names.push_back(wasm.functions[index]->name); } } void readDataSegments() { if (debug) std::cerr << "== readDataSegments" << std::endl; - auto num = getLEB128(); + auto num = getU32LEB(); for (size_t i = 0; i < num; i++) { Memory::Segment curr; - curr.offset = getInt32(); - auto start = getInt32(); - auto size = getInt32(); - auto buffer = malloc(size); - memcpy(buffer, &input[start], size); + curr.offset = getU32LEB(); + auto size = getU32LEB(); + auto buffer = (char*)malloc(size); + for (size_t j = 0; j < size; j++) { + buffer[j] = char(getInt8()); + } curr.data = (const char*)buffer; curr.size = size; wasm.memory.segments.push_back(curr); - verifyInt8(1); // load at program start } } + std::vector<size_t> functionTable; + void readFunctionTable() { if (debug) std::cerr << "== readFunctionTable" << std::endl; - auto num = getLEB128(); + auto num = getU32LEB(); for (size_t i = 0; i < num; i++) { - wasm.table.names.push_back(mappedFunctions[getInt16()]); + auto index = getU32LEB(); + functionTable.push_back(index); + } + } + + void readNames() { + if (debug) std::cerr << "== readNames" << std::endl; + auto num = getU32LEB(); + for (size_t i = 0; i < num; i++) { + functions[i]->name = getInlineString(); + auto numLocals = getU32LEB(); + assert(numLocals == 0); // TODO } } @@ -1278,19 +1567,8 @@ public: case BinaryConsts::Br: case BinaryConsts::BrIf: visitBreak((curr = allocator.alloc<Break>())->cast<Break>(), code); break; // code distinguishes br from br_if case BinaryConsts::TableSwitch: visitSwitch((curr = allocator.alloc<Switch>())->cast<Switch>()); break; - case BinaryConsts::CallFunction: { - // might be an import or not. we have to check here. - Name target = mappedFunctions[getLEB128()]; - assert(target.is()); - if (debug) std::cerr << "call(import?) target: " << target << std::endl; - if (wasm.importsMap.find(target) == wasm.importsMap.end()) { - assert(wasm.functionsMap.find(target) != wasm.functionsMap.end()); - visitCall((curr = allocator.alloc<Call>())->cast<Call>(), target); - } else { - visitCallImport((curr = allocator.alloc<CallImport>())->cast<CallImport>(), target); - } - break; - } + case BinaryConsts::CallFunction: visitCall((curr = allocator.alloc<Call>())->cast<Call>()); break; + case BinaryConsts::CallImport: visitCallImport((curr = allocator.alloc<CallImport>())->cast<CallImport>()); break; case BinaryConsts::CallIndirect: visitCallIndirect((curr = allocator.alloc<CallIndirect>())->cast<CallIndirect>()); break; case BinaryConsts::GetLocal: visitGetLocal((curr = allocator.alloc<GetLocal>())->cast<GetLocal>()); break; case BinaryConsts::SetLocal: visitSetLocal((curr = allocator.alloc<SetLocal>())->cast<SetLocal>()); break; @@ -1298,6 +1576,7 @@ public: case BinaryConsts::Return: visitReturn((curr = allocator.alloc<Return>())->cast<Return>()); break; case BinaryConsts::Nop: visitNop((curr = allocator.alloc<Nop>())->cast<Nop>()); break; case BinaryConsts::Unreachable: visitUnreachable((curr = allocator.alloc<Unreachable>())->cast<Unreachable>()); break; + case BinaryConsts::EndMarker: curr = nullptr; break; default: { // otherwise, the code is a subcode TODO: optimize if (maybeVisit<Binary>(curr, code)) break; @@ -1327,26 +1606,53 @@ public: void visitBlock(Block *curr) { if (debug) std::cerr << "zz node: Block" << std::endl; - auto num = getLEB128(); // XXX larger block size, divergence from v8 to get more code to build - curr->name = getNextLabel(); - breakStack.push_back(curr->name); - for (uint32_t i = 0; i < num; i++) { - if (debug) std::cerr << " " << size_t(curr) << "\n zz Block element " << i << std::endl; - Expression* child; - readExpression(child); - curr->list.push_back(child); + // special-case Block and de-recurse nested blocks in their first position, as that is + // a common pattern that can be very highly nested. + std::vector<Block*> stack; + while (1) { + curr->name = getNextLabel(); + breakStack.push_back(curr->name); + stack.push_back(curr); + if (getInt8() == BinaryConsts::Block) { + // a recursion + curr = allocator.alloc<Block>(); + continue; + } else { + // end of recursion + ungetInt8(); + break; + } } - if (num > 0) { - curr->type = curr->list.back()->type; + Block* last = nullptr; + while (stack.size() > 0) { + curr = stack.back(); + stack.pop_back(); + size_t start = expressionStack.size(); // everything after this, that is left when we see the marker, is ours + if (last) { + // the previous block is our first-position element + expressionStack.push_back(last); + } + last = curr; + processExpressions(); + size_t end = expressionStack.size(); + assert(end >= start); + for (size_t i = start; i < end; i++) { + if (debug) std::cerr << " " << size_t(expressionStack[i]) << "\n zz Block element " << curr->list.size() << std::endl; + curr->list.push_back(expressionStack[i]); + } + expressionStack.resize(start); + curr->finalize(); + breakStack.pop_back(); } - breakStack.pop_back(); } void visitIf(If *curr, uint8_t code) { if (debug) std::cerr << "zz node: If" << std::endl; - readExpression(curr->condition); - readExpression(curr->ifTrue); if (code == BinaryConsts::IfElse) { - readExpression(curr->ifFalse); + curr->ifFalse = popExpression(); + } + curr->ifTrue = popExpression(); + curr->condition = popExpression(); + if (code == BinaryConsts::IfElse) { curr->finalize(); } } @@ -1357,106 +1663,91 @@ public: curr->in = getNextLabel(); breakStack.push_back(curr->out); breakStack.push_back(curr->in); - readExpression(curr->body); + processExpressions(); + curr->body = popExpression(); breakStack.pop_back(); breakStack.pop_back(); curr->finalize(); } - Name getBreakName(int offset) { + Name getBreakName(int32_t offset) { + assert(breakStack.size() - 1 - offset < breakStack.size()); return breakStack[breakStack.size() - 1 - offset]; } void visitBreak(Break *curr, uint8_t code) { if (debug) std::cerr << "zz node: Break" << std::endl; - auto offset = getInt8(); - curr->name = getBreakName(offset); - readExpression(curr->value); - if (code == BinaryConsts::BrIf) readExpression(curr->condition); + curr->name = getBreakName(getU32LEB()); + if (code == BinaryConsts::BrIf) curr->condition = popExpression(); + curr->value = popExpression(); } void visitSwitch(Switch *curr) { if (debug) std::cerr << "zz node: Switch" << std::endl; - auto numCases = getInt16(); - auto numTargets = getInt16(); - for (auto i = 0; i < numCases; i++) { - Switch::Case c; - c.name = getNextLabel(); - curr->cases.push_back(c); - breakStack.push_back(c.name); - } - curr->name = getNextLabel(); - breakStack.push_back(curr->name); - for (auto i = 0; i < numTargets - 1; i++) { - curr->targets.push_back(getBreakName(getInt16())); - } - curr->default_ = getBreakName(getInt16()); - readExpression(curr->value); - for (auto i = 0; i < numCases; i++) { - readExpression(curr->cases[i].body); - } - breakStack.pop_back(); // name - for (size_t i = 0; i < curr->cases.size(); i++) { - breakStack.pop_back(); // case + auto numTargets = getU32LEB(); + for (size_t i = 0; i < numTargets; i++) { + curr->targets.push_back(getBreakName(getU32LEB())); } + curr->default_ = getBreakName(getU32LEB()); + processExpressions(); + curr->condition = popExpression(); + processExpressions(); + curr->value = popExpression(); + if (curr->value->is<Nop>()) curr->value = nullptr; } - void visitCall(Call *curr, Name target) { + void visitCall(Call *curr) { if (debug) std::cerr << "zz node: Call" << std::endl; - curr->target = target; - auto type = wasm.functionTypesMap[wasm.functionsMap[curr->target]->type]; + auto index = getU32LEB(); + auto type = functionTypes[index]; auto num = type->params.size(); + curr->operands.resize(num); for (size_t i = 0; i < num; i++) { - Expression* operand; - readExpression(operand); - curr->operands.push_back(operand); + curr->operands[num - i - 1] = popExpression(); } curr->type = type->result; + functionCalls[index].push_back(curr); } - void visitCallImport(CallImport *curr, Name target) { + void visitCallImport(CallImport *curr) { if (debug) std::cerr << "zz node: CallImport" << std::endl; - curr->target = target; + curr->target = wasm.imports[getU32LEB()]->name; + assert(wasm.importsMap.find(curr->target) != wasm.importsMap.end()); auto type = wasm.importsMap[curr->target]->type; + assert(type); auto num = type->params.size(); + if (debug) std::cerr << "zz node: CallImport " << curr->target << " with type " << type->name << " and " << num << " params\n"; + curr->operands.resize(num); for (size_t i = 0; i < num; i++) { - Expression* operand; - readExpression(operand); - curr->operands.push_back(operand); + curr->operands[num - i - 1] = popExpression(); } curr->type = type->result; } void visitCallIndirect(CallIndirect *curr) { if (debug) std::cerr << "zz node: CallIndirect" << std::endl; - curr->fullType = wasm.functionTypes[getLEB128()]; - readExpression(curr->target); + curr->fullType = wasm.functionTypes[getU32LEB()]; auto num = curr->fullType->params.size(); + curr->operands.resize(num); for (size_t i = 0; i < num; i++) { - Expression* operand; - readExpression(operand); - curr->operands.push_back(operand); + curr->operands[num - i - 1] = popExpression(); } + curr->target = popExpression(); curr->type = curr->fullType->result; } void visitGetLocal(GetLocal *curr) { if (debug) std::cerr << "zz node: GetLocal " << pos << std::endl; - curr->name = mappedLocals[getLEB128()]; + curr->name = mappedLocals[getU32LEB()]; assert(curr->name.is()); curr->type = localTypes[curr->name]; } void visitSetLocal(SetLocal *curr) { if (debug) std::cerr << "zz node: SetLocal" << std::endl; - curr->name = mappedLocals[getLEB128()]; + curr->name = mappedLocals[getU32LEB()]; assert(curr->name.is()); - readExpression(curr->value); + curr->value = popExpression(); curr->type = curr->value->type; } void readMemoryAccess(uint32_t& alignment, size_t bytes, uint32_t& offset) { - auto value = getInt8(); - alignment = value & BinaryConsts::Alignment ? 1 : bytes; - if (value & BinaryConsts::Offset) { - offset = getLEB128(); - } else { - offset = 0; - } + alignment = Pow2(getU32LEB()); + offset = getU32LEB(); } bool maybeVisitImpl(Load *curr, uint8_t code) { @@ -1479,7 +1770,7 @@ public: } if (debug) std::cerr << "zz node: Load" << std::endl; readMemoryAccess(curr->align, curr->bytes, curr->offset); - readExpression(curr->ptr); + curr->ptr = popExpression(); return true; } bool maybeVisitImpl(Store *curr, uint8_t code) { @@ -1497,15 +1788,14 @@ public: } if (debug) std::cerr << "zz node: Store" << std::endl; readMemoryAccess(curr->align, curr->bytes, curr->offset); - readExpression(curr->ptr); - readExpression(curr->value); + curr->value = popExpression(); + curr->ptr = popExpression(); return true; } bool maybeVisitImpl(Const *curr, uint8_t code) { switch (code) { - case BinaryConsts::I8Const: curr->value = Literal(int32_t(getInt8())); break; - case BinaryConsts::I32Const: curr->value = Literal(getInt32()); break; - case BinaryConsts::I64Const: curr->value = Literal(getInt64()); break; + case BinaryConsts::I32Const: curr->value = Literal(getS32LEB()); break; + case BinaryConsts::I64Const: curr->value = Literal(getS64LEB()); break; case BinaryConsts::F32Const: curr->value = Literal(getFloat32()); break; case BinaryConsts::F64Const: curr->value = Literal(getFloat64()); break; default: return false; @@ -1522,6 +1812,8 @@ public: case BinaryConsts::I64Ctz: curr->op = Ctz; curr->type = i64; break; case BinaryConsts::I32Popcnt: curr->op = Popcnt; curr->type = i32; break; case BinaryConsts::I64Popcnt: curr->op = Popcnt; curr->type = i64; break; + case BinaryConsts::I32EqZ: curr->op = EqZ; curr->type = i32; break; + case BinaryConsts::I64EqZ: curr->op = EqZ; curr->type = i64; break; case BinaryConsts::F32Neg: curr->op = Neg; curr->type = f32; break; case BinaryConsts::F64Neg: curr->op = Neg; curr->type = f64; break; case BinaryConsts::F32Abs: curr->op = Abs; curr->type = f32; break; @@ -1569,7 +1861,7 @@ public: default: return false; } if (debug) std::cerr << "zz node: Unary" << std::endl; - readExpression(curr->value); + curr->value = popExpression(); return true; } bool maybeVisitImpl(Binary *curr, uint8_t code) { @@ -1601,6 +1893,8 @@ public: INT_TYPED_CODE(Shl); INT_TYPED_CODE(ShrU); INT_TYPED_CODE(ShrS); + INT_TYPED_CODE(RotL); + INT_TYPED_CODE(RotR); FLOAT_TYPED_CODE(Div); FLOAT_TYPED_CODE(CopySign); FLOAT_TYPED_CODE(Min); @@ -1622,8 +1916,9 @@ public: default: return false; } if (debug) std::cerr << "zz node: Binary" << std::endl; - readExpression(curr->left); - readExpression(curr->right); + curr->right = popExpression(); + curr->left = popExpression(); + curr->finalize(); return true; #undef TYPED_CODE #undef INT_TYPED_CODE @@ -1631,14 +1926,14 @@ public: } void visitSelect(Select *curr) { if (debug) std::cerr << "zz node: Select" << std::endl; - readExpression(curr->ifTrue); - readExpression(curr->ifFalse); - readExpression(curr->condition); + curr->condition = popExpression(); + curr->ifFalse = popExpression(); + curr->ifTrue = popExpression(); curr->finalize(); } void visitReturn(Return *curr) { if (debug) std::cerr << "zz node: Return" << std::endl; - readExpression(curr->value); + curr->value = popExpression(); } bool maybeVisitImpl(Host *curr, uint8_t code) { switch (code) { @@ -1650,12 +1945,13 @@ public: case BinaryConsts::GrowMemory: { curr->op = GrowMemory; curr->operands.resize(1); - readExpression(curr->operands[0]); + curr->operands[0] = popExpression(); break; } default: return false; } if (debug) std::cerr << "zz node: Host" << std::endl; + curr->finalize(); return true; } void visitNop(Nop *curr) { @@ -1669,4 +1965,3 @@ public: } // namespace wasm #endif // wasm_wasm_binary_h - diff --git a/src/wasm-interpreter.h b/src/wasm-interpreter.h index 2a841f6fd..e7a5d6d14 100644 --- a/src/wasm-interpreter.h +++ b/src/wasm-interpreter.h @@ -29,6 +29,10 @@ #include "support/bits.h" #include "wasm.h" +#ifdef WASM_INTERPRETER_DEBUG +#include "wasm-printing.h" +#endif + namespace wasm { using namespace cashew; @@ -39,7 +43,6 @@ IString WASM("wasm"), RETURN_FLOW("*return:)*"); enum { - pageSize = 64*1024, maxCallDepth = 250 }; @@ -112,6 +115,15 @@ public: return callFunction(export_->value, arguments); } + std::string printFunctionStack() { + std::string ret = "/== (binaryen interpreter stack trace)\n"; + for (int i = int(functionStack.size()) - 1; i >= 0; i--) { + ret += std::string("|: ") + functionStack[i].str + "\n"; + } + ret += std::string("\\==\n"); + return ret; + } + private: size_t callDepth = 0; @@ -120,6 +132,10 @@ private: int indent = 0; #endif + // Function stack. We maintain this explicitly to allow printing of + // stack traces. + std::vector<Name> functionStack; + // // Calls a function. This can be used both internally (calls from // the interpreter to another method), or when you want to call into @@ -166,7 +182,7 @@ private: indent++; #if WASM_INTERPRETER_DEBUG == 2 doIndent(std::cout, indent); - expression->print(std::cout, indent) << '\n'; + std::cout << "\n" << expression << '\n'; indent++; #endif } @@ -200,12 +216,33 @@ private: Flow visitBlock(Block *curr) { NOTE_ENTER("Block"); + // special-case Block, because Block nesting (in their first element) can be incredibly deep + std::vector<Block*> stack; + stack.push_back(curr); + while (curr->list.size() > 0 && curr->list[0]->is<Block>()) { + curr = curr->list[0]->cast<Block>(); + stack.push_back(curr); + } Flow flow; - for (auto expression : curr->list) { - flow = visit(expression); + auto* top = stack.back(); + while (stack.size() > 0) { + curr = stack.back(); + stack.pop_back(); if (flow.breaking()) { flow.clearIf(curr->name); - return flow; + continue; + } + auto& list = curr->list; + for (size_t i = 0; i < list.size(); i++) { + if (curr != top && i == 0) { + // one of the block recursions we already handled + continue; + } + flow = visit(list[i]); + if (flow.breaking()) { + flow.clearIf(curr->name); + break; + } } } return flow; @@ -246,46 +283,30 @@ private: if (curr->condition) { Flow conditionFlow = visit(curr->condition); if (conditionFlow.breaking()) return conditionFlow; - condition = conditionFlow.value.getInteger(); + condition = conditionFlow.value.getInteger() != 0; } return condition ? flow : Flow(); } Flow visitSwitch(Switch *curr) { NOTE_ENTER("Switch"); - Flow flow = visit(curr->value); - if (flow.breaking()) { - flow.clearIf(curr->name); - return flow; - } + Flow flow = visit(curr->condition); + if (flow.breaking()) return flow; NOTE_EVAL1(flow.value); int64_t index = flow.value.getInteger(); + + if (curr->value) { + flow = visit(curr->value); + if (flow.breaking()) return flow; + NOTE_EVAL1(flow.value); + } else { + flow = Flow(); + } + Name target = curr->default_; if (index >= 0 && (size_t)index < curr->targets.size()) { - target = curr->targets[index]; - } - // This is obviously very inefficient. This should be a cached data structure - std::map<Name, size_t> caseMap; // name => index in cases - for (size_t i = 0; i < curr->cases.size(); i++) { - caseMap[curr->cases[i].name] = i; - } - auto iter = caseMap.find(target); - if (iter == caseMap.end()) { - // not in the cases, so this is a break - Flow flow(target); - flow.clearIf(curr->name); - return flow; - } - size_t caseIndex = iter->second; - assert(caseIndex < curr->cases.size()); - while (caseIndex < curr->cases.size()) { - Switch::Case& c = curr->cases[caseIndex]; - flow = visit(c.body); - if (flow.breaking()) { - flow.clearIf(curr->name); - break; - } - caseIndex++; + target = curr->targets[(size_t)index]; } + flow.breakTo = target; return flow; } @@ -382,6 +403,7 @@ private: case Clz: return value.countLeadingZeroes(); case Ctz: return value.countTrailingZeroes(); case Popcnt: return value.popCount(); + case EqZ: return Literal(int32_t(value == Literal(int32_t(0)))); case ReinterpretInt: return value.castToF32(); case ExtendSInt32: return value.extendToSI64(); case ExtendUInt32: return value.extendToUI64(); @@ -395,6 +417,7 @@ private: case Clz: return value.countLeadingZeroes(); case Ctz: return value.countTrailingZeroes(); case Popcnt: return value.popCount(); + case EqZ: return Literal(int32_t(value == Literal(int64_t(0)))); case WrapInt64: return value.truncateToI32(); case ReinterpretInt: return value.castToF64(); case ConvertUInt64: return curr->type == f32 ? value.convertUToF32() : value.convertUToF64(); @@ -445,8 +468,8 @@ private: if (flow.breaking()) return flow; Literal right = flow.value; NOTE_EVAL2(left, right); - assert(left.type == curr->left->type); - assert(right.type == curr->right->type); + assert(isConcreteWasmType(curr->left->type) ? left.type == curr->left->type : true); + assert(isConcreteWasmType(curr->right->type) ? right.type == curr->right->type : true); if (left.type == i32) { switch (curr->op) { case Add: return left.add(right); @@ -476,6 +499,8 @@ private: case Shl: return left.shl(right.and_(Literal(int32_t(31)))); case ShrU: return left.shrU(right.and_(Literal(int32_t(31)))); case ShrS: return left.shrS(right.and_(Literal(int32_t(31)))); + case RotL: return left.rotL(right); + case RotR: return left.rotR(right); case Eq: return left.eq(right); case Ne: return left.ne(right); case LtS: return left.ltS(right); @@ -517,6 +542,8 @@ private: case Shl: return left.shl(right.and_(Literal(int64_t(63)))); case ShrU: return left.shrU(right.and_(Literal(int64_t(63)))); case ShrS: return left.shrS(right.and_(Literal(int64_t(63)))); + case RotL: return left.rotL(right); + case RotR: return left.rotR(right); case Eq: return left.eq(right); case Ne: return left.ne(right); case LtS: return left.ltS(right); @@ -574,21 +601,20 @@ private: Flow visitHost(Host *curr) { NOTE_ENTER("Host"); switch (curr->op) { - case PageSize: return Literal((int32_t)pageSize); - case MemorySize: return Literal((int32_t)instance.memorySize); + case PageSize: return Literal((int32_t)Memory::kPageSize); + case MemorySize: return Literal(int32_t(instance.memorySize * Memory::kPageSize)); case GrowMemory: { Flow flow = visit(curr->operands[0]); if (flow.breaking()) return flow; int32_t ret = instance.memorySize; uint32_t delta = flow.value.geti32(); - if (delta % pageSize != 0) trap("growMemory: delta not multiple"); - if (delta > uint32_t(-1) - pageSize) trap("growMemory: delta relatively too big"); + if (delta > uint32_t(-1) /Memory::kPageSize) trap("growMemory: delta relatively too big"); if (instance.memorySize >= uint32_t(-1) - delta) trap("growMemory: delta objectively too big"); uint32_t newSize = instance.memorySize + delta; if (newSize > instance.wasm.memory.max) trap("growMemory: exceeds max"); - instance.externalInterface->growMemory(instance.memorySize, newSize); + instance.externalInterface->growMemory(instance.memorySize * Memory::kPageSize, newSize * Memory::kPageSize); instance.memorySize = newSize; - return Literal(ret); + return Literal(int32_t(ret * Memory::kPageSize)); } case HasFeature: { IString id = curr->nameOperand; @@ -615,8 +641,8 @@ private: if (val > (double)std::numeric_limits<int32_t>::max() || val < (double)std::numeric_limits<int32_t>::min()) trap("i32.truncSFloat overflow"); return Literal(int32_t(val)); } else { - int64_t converted = val; - if ((val >= 1 && converted <= 0) || val < (double)LLONG_MIN) trap("i32.truncSFloat overflow"); + int64_t converted = (int64_t)val; + if ((val >= 1 && converted <= 0) || val < (double)LLONG_MIN) trap("i64.truncSFloat overflow"); return Literal(converted); } } @@ -625,10 +651,10 @@ private: double val = value.getFloat(); if (isnan(val)) trap("truncUFloat of nan"); if (curr->type == i32) { - if (val > (double)std::numeric_limits<uint32_t>::max() || val <= (double)-1) trap("i64.truncUFloat overflow"); + if (val > (double)std::numeric_limits<uint32_t>::max() || val <= (double)-1) trap("i32.truncUFloat overflow"); return Literal(uint32_t(val)); } else { - uint64_t converted = val; + uint64_t converted = (uint64_t)val; if (converted < val - 1 || val <= (double)-1) trap("i64.truncUFloat overflow"); return Literal(converted); } @@ -641,6 +667,7 @@ private: if (callDepth > maxCallDepth) externalInterface->trap("stack limit"); callDepth++; + functionStack.push_back(name); Function *function = wasm.functionsMap[name]; assert(function); @@ -656,29 +683,32 @@ private: if (function->result == none) ret = Literal(); assert(function->result == ret.type); callDepth--; + assert(functionStack.back() == name); + functionStack.pop_back(); #ifdef WASM_INTERPRETER_DEBUG std::cout << "exiting " << function->name << " with " << ret << '\n'; #endif return ret; } - size_t memorySize; + size_t memorySize; // in pages template <class LS> size_t getFinalAddress(LS* curr, Literal ptr) { - auto trapIfGt = [this](size_t lhs, size_t rhs, const char* msg) { + auto trapIfGt = [this](uint64_t lhs, uint64_t rhs, const char* msg) { if (lhs > rhs) { std::stringstream ss; ss << msg << ": " << lhs << " > " << rhs; externalInterface->trap(ss.str().c_str()); } }; + uint32_t memorySizeBytes = memorySize * Memory::kPageSize; uint64_t addr = ptr.type == i32 ? ptr.geti32() : ptr.geti64(); - trapIfGt(curr->offset, memorySize, "offset > memory"); - trapIfGt(addr, memorySize - curr->offset, "final > memory"); + trapIfGt(curr->offset, memorySizeBytes, "offset > memory"); + trapIfGt(addr, memorySizeBytes - curr->offset, "final > memory"); addr += curr->offset; - trapIfGt(curr->bytes, memorySize, "bytes > memory"); - trapIfGt(addr, memorySize - curr->bytes, "highest > memory"); + trapIfGt(curr->bytes, memorySizeBytes, "bytes > memory"); + trapIfGt(addr, memorySizeBytes - curr->bytes, "highest > memory"); return addr; } diff --git a/src/wasm-js.cpp b/src/wasm-js.cpp index 9c9096ad8..d978e4a2a 100644 --- a/src/wasm-js.cpp +++ b/src/wasm-js.cpp @@ -26,6 +26,7 @@ #include "asm2wasm.h" #include "wasm-interpreter.h" #include "wasm-s-parser.h" +#include "wasm-binary.h" #include "wasm-printing.h" using namespace cashew; @@ -67,13 +68,18 @@ extern "C" void EMSCRIPTEN_KEEPALIVE load_asm2wasm(char *input) { Ref asmjs = builder.parseToplevel(input); module = new AllocatingModule(); - module->memory.initial = EM_ASM_INT_V({ + uint32_t providedMemory = EM_ASM_INT_V({ return Module['providedTotalMemory']; // we receive the size of memory from emscripten }); + if (providedMemory & ~Memory::kPageMask) { + std::cerr << "Error: provided memory is not a multiple of the 64k wasm page size\n"; + exit(EXIT_FAILURE); + } + module->memory.initial = providedMemory / Memory::kPageSize; module->memory.max = pre.memoryGrowth ? -1 : module->memory.initial; if (wasmJSDebug) std::cerr << "wasming...\n"; - asm2wasm = new Asm2WasmBuilder(*module, pre.memoryGrowth, debug); + asm2wasm = new Asm2WasmBuilder(*module, pre.memoryGrowth, debug, false /* TODO: support imprecise? */); asm2wasm->processAsm(asmjs); if (wasmJSDebug) std::cerr << "optimizing...\n"; @@ -95,8 +101,22 @@ extern "C" void EMSCRIPTEN_KEEPALIVE load_asm2wasm(char *input) { } } +void finalizeModule() { + uint32_t providedMemory = EM_ASM_INT_V({ + return Module['providedTotalMemory']; // we receive the size of memory from emscripten + }); + if (providedMemory & ~Memory::kPageMask) { + std::cerr << "Error: provided memory is not a multiple of the 64k wasm page size\n"; + exit(EXIT_FAILURE); + } + module->memory.initial = providedMemory / Memory::kPageSize; + module->memory.max = (module->exportsMap.find(GROW_WASM_MEMORY) != module->exportsMap.end()) ? -1 : module->memory.initial; + + // global mapping is done in js in post.js +} + // loads wasm code in s-expression format -extern "C" void EMSCRIPTEN_KEEPALIVE load_s_expr2wasm(char *input, char *mappedGlobals) { +extern "C" void EMSCRIPTEN_KEEPALIVE load_s_expr2wasm(char *input) { prepare2wasm(); if (wasmJSDebug) std::cerr << "wasm-s-expression parsing...\n"; @@ -114,12 +134,25 @@ extern "C" void EMSCRIPTEN_KEEPALIVE load_s_expr2wasm(char *input, char *mappedG abort(); }); - module->memory.initial = EM_ASM_INT_V({ - return Module['providedTotalMemory']; // we receive the size of memory from emscripten - }); - module->memory.max = (module->exportsMap.find(GROW_WASM_MEMORY) != module->exportsMap.end()) ? -1 : module->memory.initial; + finalizeModule(); +} - // global mapping is done in js in post.js +// loads wasm code in binary format +extern "C" void EMSCRIPTEN_KEEPALIVE load_binary2wasm(char *raw, int32_t size) { + prepare2wasm(); + + if (wasmJSDebug) std::cerr << "wasm-binary parsing...\n"; + + module = new AllocatingModule(); + std::vector<char> input; + input.resize(size); + for (int32_t i = 0; i < size; i++) { + input[i] = raw[i]; + } + WasmBinaryBuilder parser(*module, input, debug); + parser.read(); + + finalizeModule(); } // instantiates the loaded wasm (which might be from asm2wasm, or @@ -148,19 +181,16 @@ extern "C" void EMSCRIPTEN_KEEPALIVE instantiate() { struct JSExternalInterface : ModuleInstance::ExternalInterface { void init(Module& wasm) override { - // if we have memory segments, create a new buffer here, just like native wasm support would. - // otherwise, no need to. - if (wasm.memory.segments.size() > 0) { + // create a new buffer here, just like native wasm support would. + EM_ASM_({ + Module['outside']['newBuffer'] = new ArrayBuffer($0); + }, wasm.memory.initial * Memory::kPageSize); + for (auto segment : wasm.memory.segments) { EM_ASM_({ - Module['outside']['newBuffer'] = new ArrayBuffer($0); - }, wasm.memory.initial); - for (auto segment : wasm.memory.segments) { - EM_ASM_({ - var source = Module['HEAP8'].subarray($1, $1 + $2); - var target = new Int8Array(Module['outside']['newBuffer']); - target.set(source, $0); - }, segment.offset, segment.data, segment.size); - } + var source = Module['HEAP8'].subarray($1, $1 + $2); + var target = new Int8Array(Module['outside']['newBuffer']); + target.set(source, $0); + }, segment.offset, segment.data, segment.size); } } @@ -352,6 +382,18 @@ extern "C" void EMSCRIPTEN_KEEPALIVE instantiate() { }; instance = new ModuleInstance(*module, new JSExternalInterface()); + + // stack trace hooks + EM_ASM({ + Module['outside']['extraStackTrace'] = function() { + return Pointer_stringify(Module['_interpreter_stack_trace']()); + }; + }); +} + +extern "C" int EMSCRIPTEN_KEEPALIVE interpreter_stack_trace() { + std::string stack = instance->printFunctionStack(); + return (int)strdup(stack.c_str()); // XXX leak } // Does a call from js into an export of the module. diff --git a/src/wasm-printing.h b/src/wasm-printing.h index 6d8ed7c8a..5b47a38bb 100644 --- a/src/wasm-printing.h +++ b/src/wasm-printing.h @@ -29,7 +29,7 @@ inline std::ostream& printWasm(Module* module, std::ostream& o) { return o; } -extern std::ostream& printWasm(Expression* expression, std::ostream& o); +extern std::ostream& printWasm(Expression* expression, std::ostream& o, bool minify = false); } diff --git a/src/wasm-s-parser.h b/src/wasm-s-parser.h index d0206bcc2..95f8a280d 100644 --- a/src/wasm-s-parser.h +++ b/src/wasm-s-parser.h @@ -23,12 +23,15 @@ #define wasm_wasm_s_parser_h #include <cmath> +#include <cctype> +#include <limits> #include "wasm.h" #include "mixed_arena.h" #include "shared-constants.h" #include "parsing.h" #include "asm_v_wasm.h" +#include "ast_utils.h" namespace wasm { @@ -124,47 +127,37 @@ public: SExpressionParser(char* input) : input(input) { root = nullptr; while (!root) { // keep parsing until we pass an initial comment - root = parseInnerList(); + root = parse(); } } Element* root; private: - // parses the internal part of a list, inside the parens. - Element* parseInnerList() { - if (input[0] == ';') { - // comment - input++; - if (input[0] == ';') { - while (input[0] != '\n') input++; - return nullptr; - } - input = strstr(input, ";)"); - assert(input); - return nullptr; - } - auto ret = allocator.alloc<Element>(); - while (1) { - Element* curr = parse(); - if (!curr) return ret; - ret->list().push_back(curr); - } - } - Element* parse() { - skipWhitespace(); - if (input[0] == 0 || input[0] == ')') return nullptr; - if (input[0] == '(') { - // a list - input++; - auto ret = parseInnerList(); + std::vector<Element *> stack; + Element *curr = allocator.alloc<Element>(); + while (1) { skipWhitespace(); - assert(input[0] == ')'); - input++; - return ret; + if (input[0] == 0) + break; + if (input[0] == '(') { + input++; + stack.push_back(curr); + curr = allocator.alloc<Element>(); + } else if (input[0] == ')') { + input++; + auto last = curr; + curr = stack.back(); + assert(stack.size()); + stack.pop_back(); + curr->list().push_back(last); + } else { + curr->list().push_back(parseString()); + } } - return parseString(); + assert(stack.size() == 0); + return curr; } void skipWhitespace() { @@ -173,7 +166,26 @@ private: if (input[0] == ';' && input[1] == ';') { while (input[0] && input[0] != '\n') input++; } else if (input[0] == '(' && input[1] == ';') { - input = strstr(input, ";)") + 2; + // Skip nested block comments. + input += 2; + int depth = 1; + while (1) { + if (input[0] == 0) { + return; + } + if (input[0] == '(' && input[1] == ';') { + input += 2; + depth++; + } else if (input[0] == ';' && input[1] == ')') { + input += 2; + --depth; + if (depth == 0) { + break; + } + } else { + input++; + } + } } else { return; } @@ -307,7 +319,20 @@ private: return IString((prefix + std::to_string(otherIndex++)).c_str(), false); } - void parseStart(Element& s) { wasm.addStart(s[1]->str()); } + Name getFunctionName(Element& s) { + if (s.dollared()) { + return s.str(); + } else { + // index + size_t offset = atoi(s.str().c_str()); + if (offset >= functionNames.size()) onError(); + return functionNames[offset]; + } + } + + void parseStart(Element& s) { + wasm.addStart(getFunctionName(*s[1])); + } void parseFunction(Element& s) { auto func = currFunction = allocator.alloc<Function>(); @@ -380,6 +405,7 @@ private: if (!autoBlock) { autoBlock = allocator.alloc<Block>(); autoBlock->list.push_back(func->body); + autoBlock->finalize(); func->body = autoBlock; } autoBlock->list.push_back(ex); @@ -427,8 +453,8 @@ public: // type.operation (e.g. i32.add) WasmType type = stringToWasmType(str, false, true); // Local copy to index into op without bounds checking. - constexpr size_t maxNameSize = 15; - char op[maxNameSize + 1] = { '\0' }; + enum { maxNameSize = 15 }; + char op[maxNameSize + 1] = {'\0'}; strncpy(op, dot + 1, maxNameSize); switch (op[0]) { case 'a': { @@ -462,7 +488,10 @@ public: abort_on(op); } case 'e': { - if (op[1] == 'q') return makeBinary(s, BinaryOp::Eq, type); + if (op[1] == 'q') { + if (op[2] == 0) return makeBinary(s, BinaryOp::Eq, type); + if (op[2] == 'z') return makeUnary(s, UnaryOp::EqZ, i32); + } if (op[1] == 'x') return makeUnary(s, op[7] == 'u' ? UnaryOp::ExtendUInt32 : UnaryOp::ExtendSInt32, type); abort_on(op); } @@ -521,10 +550,12 @@ public: if (op[2] == 'm') return makeBinary(s, op[4] == 'u' ? BinaryOp::RemU : BinaryOp::RemS, type); if (op[2] == 'i') return makeUnary(s, isWasmTypeFloat(type) ? UnaryOp::ReinterpretInt : UnaryOp::ReinterpretFloat, type); } + if (op[1] == 'o' && op[2] == 't') { + return makeBinary(s, op[3] == 'l' ? BinaryOp::RotL : BinaryOp::RotR, type); + } abort_on(op); } case 's': { - if (op[1] == 'e') return makeSelect(s, type); if (op[1] == 'h') { if (op[2] == 'l') return makeBinary(s, BinaryOp::Shl, type); return makeBinary(s, op[4] == 'u' ? BinaryOp::ShrU : BinaryOp::ShrS, type); @@ -557,7 +588,10 @@ public: switch (str[0]) { case 'b': { if (str[1] == 'l') return makeBlock(s); - if (str[1] == 'r') return makeBreak(s); + if (str[1] == 'r') { + if (str[2] == '_' && str[3] == 't') return makeBreakTable(s); + return makeBreak(s); + } abort_on(str); } case 'c': { @@ -568,6 +602,10 @@ public: } abort_on(str); } + case 'e': { + if (str[1] == 'l') return makeThenOrElse(s); + abort_on(str); + } case 'g': { if (str[1] == 'e') return makeGetLocal(s); if (str[1] == 'r') return makeHost(s, HostOp::GrowMemory); @@ -598,7 +636,8 @@ public: abort_on(str); } case 's': { - if (str[1] == 'e') return makeSetLocal(s); + if (str[1] == 'e' && str[2] == 't') return makeSetLocal(s); + if (str[1] == 'e' && str[2] == 'l') return makeSelect(s); abort_on(str); } case 'r': { @@ -606,7 +645,7 @@ public: abort_on(str); } case 't': { - if (str[1] == 'a') return makeSwitch(s); // aka tableswitch + if (str[1] == 'h') return makeThenOrElse(s); abort_on(str); } case 'u': { @@ -637,12 +676,12 @@ private: return ret; } - Expression* makeSelect(Element& s, WasmType type) { + Expression* makeSelect(Element& s) { auto ret = allocator.alloc<Select>(); ret->ifTrue = parseExpression(s[1]); ret->ifFalse = parseExpression(s[2]); ret->condition = parseExpression(s[3]); - ret->type = type; + ret->finalize(); return ret; } @@ -686,20 +725,68 @@ private: } Expression* makeBlock(Element& s) { + // special-case Block, because Block nesting (in their first element) can be incredibly deep + auto curr = allocator.alloc<Block>(); + auto* sp = &s; + std::vector<std::pair<Element*, Block*>> stack; + while (1) { + stack.emplace_back(sp, curr); + auto& s = *sp; + size_t i = 1; + if (i < s.size() && s[i]->isStr()) { + curr->name = s[i]->str(); + i++; + } else { + curr->name = getPrefixedName("block"); + } + labelStack.push_back(curr->name); + if (i >= s.size()) break; // labeled empty block + auto& first = *s[i]; + if (first[0]->str() == BLOCK) { + // recurse + curr = allocator.alloc<Block>(); + sp = &first; + continue; + } + break; + } + // we now have a stack of Blocks, with their labels, but no contents yet + for (int t = int(stack.size()) - 1; t >= 0; t--) { + auto* sp = stack[t].first; + auto* curr = stack[t].second; + auto& s = *sp; + size_t i = 1; + if (i < s.size()) { + if (s[i]->isStr()) { + i++; + } + if (t < int(stack.size()) - 1) { + // first child is one of our recursions + curr->list.push_back(stack[t + 1].second); + i++; + } + for (; i < s.size(); i++) { + curr->list.push_back(parseExpression(s[i])); + } + } + assert(labelStack.back() == curr->name); + labelStack.pop_back(); + curr->finalize(); + } + return stack[0].second; + } + + // Similar to block, but the label is handled by the enclosing if (since there might not be a then or else, ick) + Expression* makeThenOrElse(Element& s) { auto ret = allocator.alloc<Block>(); size_t i = 1; if (s[1]->isStr()) { - ret->name = s[1]->str(); i++; - } else { - ret->name = getPrefixedName("block"); } - labelStack.push_back(ret->name); for (; i < s.size(); i++) { ret->list.push_back(parseExpression(s[i])); } - labelStack.pop_back(); - if (ret->list.size() > 0) ret->type = ret->list.back()->type; + ret->finalize(); return ret; } @@ -739,8 +826,8 @@ private: ret->align = atoi(eq); } else if (str[0] == 'o') { uint64_t offset = atoll(eq); - if (offset > 0xffffffff) onError(); - ret->offset = offset; + if (offset > std::numeric_limits<uint32_t>::max()) onError(); + ret->offset = (uint32_t)offset; } else onError(); i++; } @@ -788,9 +875,38 @@ private: Expression* makeIf(Element& s) { auto ret = allocator.alloc<If>(); ret->condition = parseExpression(s[1]); - ret->ifTrue = parseExpression(s[2]); + + // ifTrue and ifFalse may get implicit blocks + auto handle = [&](const char* title, Element& s) { + Name name = getPrefixedName(title); + bool explicitThenElse = false; + if (s[0]->str() == THEN || s[0]->str() == ELSE) { + explicitThenElse = true; + if (s[1]->dollared()) { + name = s[1]->str(); + } + } + labelStack.push_back(name); + auto* ret = parseExpression(&s); + labelStack.pop_back(); + if (explicitThenElse) { + ret->dyn_cast<Block>()->name = name; + } else { + // add a block if we must + if (BreakSeeker::has(ret, name)) { + auto* block = allocator.alloc<Block>(); + block->name = name; + block->list.push_back(ret); + block->finalize(); + ret = block; + } + } + return ret; + }; + + ret->ifTrue = handle("if-true", *s[2]); if (s.size() == 4) { - ret->ifFalse = parseExpression(s[3]); + ret->ifFalse = handle("if-else", *s[3]); ret->finalize(); } return ret; @@ -802,9 +918,7 @@ private: for (; i < s.size() && i < stopAt; i++) { ret->list.push_back(parseExpression(s[i])); } - if (ret->list.size() > 0) { - ret->type = ret->list.back()->type; - } + ret->finalize(); // Note that we do not name these implicit/synthetic blocks. They // are the effects of syntactic sugar, and nothing can branch to // them anyhow. @@ -899,6 +1013,22 @@ private: return ret; } + Expression* makeBreakTable(Element& s) { + auto ret = allocator.alloc<Switch>(); + size_t i = 1; + while (!s[i]->isList()) { + ret->targets.push_back(getLabel(*s[i++])); + } + ret->default_ = ret->targets.back(); + ret->targets.pop_back(); + ret->condition = parseExpression(s[i++]); + if (i < s.size()) { + ret->value = ret->condition; + ret->condition = parseExpression(s[i++]); + } + return ret; + } + Expression* makeReturn(Element& s) { auto ret = allocator.alloc<Return>(); if (s.size() >= 2) { @@ -907,39 +1037,11 @@ private: return ret; } - Expression* makeSwitch(Element& s) { - auto ret = allocator.alloc<Switch>(); - size_t i = 1; - if (s[i]->isStr()) { - ret->name = s[i]->str(); - i++; - } else { - ret->name = getPrefixedName("switch"); - } - labelStack.push_back(ret->name); - ret->value = parseExpression(s[i]); - i++; - Element& table = *s[i]; - i++; - for (size_t j = 1; j < table.size(); j++) { - Element& curr = *table[j]; - ret->targets.push_back(getLabel(*curr[1])); - } - Element& curr = *s[i]; - ret->default_ = getLabel(*curr[1]); - i++; - for (; i < s.size(); i++) { - Element& curr = *s[i]; - assert(curr[0]->str() == CASE); - if (curr.size() < 2) onError(); - ret->cases.emplace_back(curr[1]->str(), makeMaybeBlock(curr, 2, curr.size())); - } - ret->type = ret->cases.size() > 0 ? ret->cases[0].body->type : none; - labelStack.pop_back(); - return ret; - } + bool hasMemory = false; void parseMemory(Element& s) { + hasMemory = true; + wasm.memory.initial = atoi(s[1]->c_str()); if (s.size() == 2) return; size_t i = 2; @@ -991,6 +1093,12 @@ private: } void parseExport(Element& s) { + if (!s[2]->dollared() && !std::isdigit(s[2]->str()[0])) { + assert(s[2]->str() == MEMORY); + if (!hasMemory) onError(); + wasm.memory.exportName = s[1]->str(); + return; + } auto ex = allocator.alloc<Export>(); ex->name = s[1]->str(); ex->value = s[2]->str(); @@ -1038,12 +1146,7 @@ private: void parseTable(Element& s) { for (size_t i = 1; i < s.size(); i++) { - Name name = s[i]->str(); - if (!s[i]->dollared()) { - // index, we haven't - name = functionNames[atoi(name.str)]; - } - wasm.table.names.push_back(name); + wasm.table.names.push_back(getFunctionName(*s[i])); } } diff --git a/src/wasm-validator.h b/src/wasm-validator.h index 04493bb5b..2918c918d 100644 --- a/src/wasm-validator.h +++ b/src/wasm-validator.h @@ -76,16 +76,6 @@ public: validateAlignment(curr->align); } void visitSwitch(Switch *curr) { - std::set<Name> inTable; - for (auto target : curr->targets) { - if (target.is()) { - inTable.insert(target); - } - } - for (auto& c : curr->cases) { - shouldBeFalse(c.name.is() && inTable.find(c.name) == inTable.end()); - } - shouldBeFalse(curr->default_.is() && inTable.find(curr->default_) == inTable.end()); } void visitUnary(Unary *curr) { shouldBeTrue(curr->value->type == curr->type); diff --git a/src/wasm.h b/src/wasm.h index ec2cd469e..7422dd85d 100644 --- a/src/wasm.h +++ b/src/wasm.h @@ -503,6 +503,20 @@ public: default: WASM_UNREACHABLE(); } } + Literal rotL(const Literal& other) const { + switch (type) { + case WasmType::i32: return Literal(RotateLeft(uint32_t(i32), uint32_t(other.i32))); + case WasmType::i64: return Literal(RotateLeft(uint64_t(i64), uint64_t(other.i64))); + default: WASM_UNREACHABLE(); + } + } + Literal rotR(const Literal& other) const { + switch (type) { + case WasmType::i32: return Literal(RotateRight(uint32_t(i32), uint32_t(other.i32))); + case WasmType::i64: return Literal(RotateRight(uint64_t(i64), uint64_t(other.i64))); + default: WASM_UNREACHABLE(); + } + } Literal eq(const Literal& other) const { switch (type) { @@ -669,6 +683,8 @@ public: enum UnaryOp { Clz, Ctz, Popcnt, // int Neg, Abs, Ceil, Floor, Trunc, Nearest, Sqrt, // float + // relational + EqZ, // conversions ExtendSInt32, ExtendUInt32, WrapInt64, TruncSFloat32, TruncUFloat32, TruncSFloat64, TruncUFloat64, ReinterpretFloat, // int ConvertSInt32, ConvertUInt32, ConvertSInt64, ConvertUInt64, PromoteFloat32, DemoteFloat64, ReinterpretInt // float @@ -676,7 +692,7 @@ enum UnaryOp { enum BinaryOp { Add, Sub, Mul, // int or float - DivS, DivU, RemS, RemU, And, Or, Xor, Shl, ShrU, ShrS, // int + DivS, DivU, RemS, RemU, And, Or, Xor, Shl, ShrU, ShrS, RotL, RotR, // int Div, CopySign, Min, Max, // float // relational ops Eq, Ne, // int or float @@ -794,6 +810,12 @@ public: Name name; ExpressionList list; + + void finalize() { + if (list.size() > 0) { + type = list.back()->type; + } + } }; class If : public Expression { @@ -836,20 +858,14 @@ public: class Switch : public Expression { public: - Switch() : Expression(SwitchId) {} - - struct Case { - Name name; - Expression *body; - Case() {} - Case(Name name, Expression *body) : name(name), body(body) {} - }; + Switch() : Expression(SwitchId), condition(nullptr), value(nullptr) { + type = unreachable; + } - Name name; - Expression *value; std::vector<Name> targets; Name default_; - std::vector<Case> cases; + Expression *condition; + Expression *value; }; class CallBase : public Expression { @@ -958,6 +974,11 @@ public: UnaryOp op; Expression *value; + + // the type is always the type of the operands, + // except for relationals + + bool isRelational() { return op == EqZ; } }; class Binary : public Expression { @@ -1074,6 +1095,8 @@ public: class Memory { public: + static const size_t kPageSize = 64 * 1024; + static const size_t kPageMask = ~(kPageSize - 1); struct Segment { size_t offset; const char* data; @@ -1082,8 +1105,9 @@ public: Segment(size_t offset, const char *data, size_t size) : offset(offset), data(data), size(size) {} }; - size_t initial, max; + size_t initial, max; // sizes are in pages std::vector<Segment> segments; + Name exportName; Memory() : initial(0), max((uint32_t)-1) {} }; @@ -1108,7 +1132,7 @@ public: Module() : functionTypeIndex(0), importIndex(0), exportIndex(0), functionIndex(0) {} void addFunctionType(FunctionType* curr) { - Name numericName = Name::fromInt(functionTypeIndex); + Name numericName = Name::fromInt(functionTypeIndex); // TODO: remove all these, assert on names already existing, do numeric stuff in wasm-s-parser etc. if (curr->name.isNull()) { curr->name = numericName; } @@ -1282,10 +1306,8 @@ struct ChildWalker : public WasmWalkerBase<ChildWalker<ParentType>> { parent.walk(curr->value); } void visitSwitch(Switch *curr) { - parent.walk(curr->value); - for (auto& case_ : curr->cases) { - parent.walk(case_.body); - } + parent.walk(curr->condition); + if (curr->value) parent.walk(curr->value); } void visitCall(Call *curr) { ExpressionList& list = curr->operands; @@ -1394,7 +1416,37 @@ struct WasmWalker : public WasmWalkerBase<SubType, ReturnType> { void walk(Expression*& curr) override { if (!curr) return; - ChildWalker<WasmWalker<SubType, ReturnType>>(*this).visit(curr); + // special-case Block, because Block nesting (in their first element) can be incredibly deep + if (curr->is<Block>()) { + auto* block = curr->dyn_cast<Block>(); + std::vector<Block*> stack; + stack.push_back(block); + while (block->list.size() > 0 && block->list[0]->is<Block>()) { + block = block->list[0]->cast<Block>(); + stack.push_back(block); + } + // walk all the children + for (int i = int(stack.size()) - 1; i >= 0; i--) { + auto* block = stack[i]; + auto& children = block->list; + for (size_t j = 0; j < children.size(); j++) { + if (i < int(stack.size()) - 1 && j == 0) { + // this is one of the stacked blocks, no need to walk its children, we are doing that ourselves + this->visit(children[0]); + if (replace) { + children[0] = replace; + replace = nullptr; + } + } else { + this->walk(children[j]); + } + } + } + // we walked all the children, and can rejoin later below to visit this node itself + } else { + // generic child-walking + ChildWalker<WasmWalker<SubType, ReturnType>>(*this).visit(curr); + } this->visit(curr); diff --git a/src/wasm2asm.h b/src/wasm2asm.h index d8ac6376e..310ba4224 100644 --- a/src/wasm2asm.h +++ b/src/wasm2asm.h @@ -652,36 +652,24 @@ Ref Wasm2AsmBuilder::processFunctionBody(Expression* curr, IString result) { } Expression *defaultBody = nullptr; // default must be last in asm.js Ref visitSwitch(Switch *curr) { - Ref ret = ValueBuilder::makeLabel(fromName(curr->name), ValueBuilder::makeBlock()); - Ref value; - if (isStatement(curr->value)) { + assert(!curr->value); + Ref ret = ValueBuilder::makeBlock(); + Ref condition; + if (isStatement(curr->condition)) { ScopedTemp temp(i32, parent); - flattenAppend(ret[2], visit(curr->value, temp)); - value = temp.getAstName(); + flattenAppend(ret[2], visit(curr->condition, temp)); + condition = temp.getAstName(); } else { - value = visit(curr->value, EXPRESSION_RESULT); + condition = visit(curr->condition, EXPRESSION_RESULT); } - Ref theSwitch = ValueBuilder::makeSwitch(value); + Ref theSwitch = ValueBuilder::makeSwitch(condition); ret[2][1]->push_back(theSwitch); - for (auto& c : curr->cases) { - if (c.name == curr->default_) { - defaultBody = c.body; - continue; - } - bool added = false; - for (size_t i = 0; i < curr->targets.size(); i++) { - if (curr->targets[i] == c.name) { - ValueBuilder::appendCaseToSwitch(theSwitch, ValueBuilder::makeNum(i)); - added = true; - } - } - assert(added); - ValueBuilder::appendCodeToSwitch(theSwitch, blockify(visit(c.body, NO_RESULT)), false); - } - if (defaultBody) { - ValueBuilder::appendDefaultToSwitch(theSwitch); - ValueBuilder::appendCodeToSwitch(theSwitch, blockify(visit(defaultBody, NO_RESULT)), false); + for (size_t i = 0; i < curr->targets.size(); i++) { + ValueBuilder::appendCaseToSwitch(theSwitch, ValueBuilder::makeNum(i)); + ValueBuilder::appendCodeToSwitch(theSwitch, blockify(ValueBuilder::makeBreak(fromName(curr->targets[i]))), false); } + ValueBuilder::appendDefaultToSwitch(theSwitch); + ValueBuilder::appendCodeToSwitch(theSwitch, blockify(ValueBuilder::makeBreak(fromName(curr->default_))), false); return ret; } |