diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/asm2wasm-main.cpp | 19 | ||||
-rw-r--r-- | src/asm2wasm.h | 103 | ||||
-rw-r--r-- | src/compiler-support.h | 1 | ||||
-rw-r--r-- | src/js/wasm.js-post.js | 49 | ||||
-rw-r--r-- | src/passes/Print.cpp | 3 | ||||
-rw-r--r-- | src/s2wasm-main.cpp | 21 | ||||
-rw-r--r-- | src/s2wasm.h | 66 | ||||
-rw-r--r-- | src/support/bits.cpp | 25 | ||||
-rw-r--r-- | src/support/bits.h | 2 | ||||
-rw-r--r-- | src/support/command-line.cpp | 8 | ||||
-rw-r--r-- | src/support/command-line.h | 2 | ||||
-rw-r--r-- | src/wasm-binary.h | 485 | ||||
-rw-r--r-- | src/wasm-interpreter.h | 6 | ||||
-rw-r--r-- | src/wasm-js.cpp | 2 | ||||
-rw-r--r-- | src/wasm-s-parser.h | 12 | ||||
-rw-r--r-- | src/wasm.h | 9 |
16 files changed, 515 insertions, 298 deletions
diff --git a/src/asm2wasm-main.cpp b/src/asm2wasm-main.cpp index 73f6853d8..56e8a417f 100644 --- a/src/asm2wasm-main.cpp +++ b/src/asm2wasm-main.cpp @@ -29,6 +29,9 @@ using namespace cashew; using namespace wasm; int main(int argc, const char *argv[]) { + bool opts = true; + bool imprecise = false; + Options options("asm2wasm", "Translate asm.js files to .wast files"); options .add("--output", "-o", "Output file (stdout if not specified)", @@ -45,6 +48,14 @@ int main(int argc, const char *argv[]) { [](Options *o, const std::string &argument) { o->extra["total memory"] = argument; }) + .add("--no-opts", "-n", "Disable optimization passes", Options::Arguments::Zero, + [&opts](Options *o, const std::string &) { + opts = false; + }) + .add("--imprecise", "-i", "Imprecise optimizations", Options::Arguments::Zero, + [&imprecise](Options *o, const std::string &) { + imprecise = true; + }) .add_positional("INFILE", Options::Arguments::One, [](Options *o, const std::string &argument) { o->extra["infile"] = argument; @@ -81,11 +92,13 @@ int main(int argc, const char *argv[]) { if (options.debug) std::cerr << "wasming..." << std::endl; AllocatingModule wasm; wasm.memory.initial = wasm.memory.max = totalMemory / Memory::kPageSize; - Asm2WasmBuilder asm2wasm(wasm, pre.memoryGrowth, options.debug); + Asm2WasmBuilder asm2wasm(wasm, pre.memoryGrowth, options.debug, imprecise); asm2wasm.processAsm(asmjs); - if (options.debug) std::cerr << "optimizing..." << std::endl; - asm2wasm.optimize(); + if (opts) { + if (options.debug) std::cerr << "optimizing..." << std::endl; + asm2wasm.optimize(); + } if (options.debug) std::cerr << "printing..." << std::endl; Output output(options.extra["output"], options.debug); diff --git a/src/asm2wasm.h b/src/asm2wasm.h index c4f73b57a..4c37b7705 100644 --- a/src/asm2wasm.h +++ b/src/asm2wasm.h @@ -150,7 +150,8 @@ class Asm2WasmBuilder { std::map<CallIndirect*, IString> callIndirects; // track these, as we need to fix them after we know the functionTableStarts. this maps call => its function table bool memoryGrowth; - int debug; + bool debug; + bool imprecise; public: std::map<IString, MappedGlobal> mappedGlobals; @@ -254,13 +255,14 @@ private: } public: - Asm2WasmBuilder(AllocatingModule& wasm, bool memoryGrowth, int debug) + Asm2WasmBuilder(AllocatingModule& wasm, bool memoryGrowth, bool debug, bool imprecise) : wasm(wasm), allocator(wasm.allocator), nextGlobal(8), maxGlobal(1000), memoryGrowth(memoryGrowth), - debug(debug) {} + debug(debug), + imprecise(imprecise) {} void processAsm(Ref ast); void optimize(); @@ -417,8 +419,9 @@ private: return nullptr; } + // ensure a nameless block Block* blockify(Expression* expression) { - if (expression->is<Block>()) return expression->dyn_cast<Block>(); + if (expression->is<Block>() && !expression->cast<Block>()->name.is()) return expression->dyn_cast<Block>(); auto ret = allocator.alloc<Block>(); ret->list.push_back(expression); ret->finalize(); @@ -639,11 +642,17 @@ void Asm2WasmBuilder::processAsm(Ref ast) { for (unsigned k = 0; k < contents->size(); k++) { Ref pair = contents[k]; IString key = pair[0]->getIString(); - Ref value = pair[1]; - assert(value[0] == NAME); + assert(pair[1][0] == NAME); + IString value = pair[1][1]->getIString(); + if (key == Name("_emscripten_replace_memory")) { + // asm.js memory growth provides this special non-asm function, which we don't need (we use grow_memory) + assert(wasm.functionsMap.find(value) == wasm.functionsMap.end()); + continue; + } + assert(wasm.functionsMap.find(value) != wasm.functionsMap.end()); auto export_ = allocator.alloc<Export>(); export_->name = key; - export_->value = value[1]->getIString(); + export_->value = value; wasm.addExport(export_); } } @@ -729,10 +738,8 @@ Function* Asm2WasmBuilder::processFunction(Ref ast) { if (debug) { std::cout << "\nfunc: " << ast[1]->getIString().str << '\n'; - if (debug >= 2) { - ast->stringify(std::cout); - std::cout << '\n'; - } + ast->stringify(std::cout); + std::cout << '\n'; } auto function = allocator.alloc<Function>(); @@ -802,7 +809,7 @@ Function* Asm2WasmBuilder::processFunction(Ref ast) { std::function<Expression* (Ref)> process = [&](Ref ast) -> Expression* { AstStackHelper astStackHelper(ast); // TODO: only create one when we need it? - if (debug >= 2) { + if (debug) { std::cout << "at: "; ast->stringify(std::cout); std::cout << '\n'; @@ -1011,37 +1018,38 @@ Function* Asm2WasmBuilder::processFunction(Ref ast) { } else if (ast[1] == B_NOT) { // ~, might be ~~ as a coercion or just a not if (ast[2][0] == UNARY_PREFIX && ast[2][1] == B_NOT) { -#if 0 - auto ret = allocator.alloc<Unary>(); - ret->op = TruncSFloat64; // equivalent to U, except for error handling, which asm.js doesn't have anyhow - ret->value = process(ast[2][2]); - ret->type = WasmType::i32; - return ret; -#endif - // WebAssembly traps on float-to-int overflows, but asm.js wouldn't, so we must emulate that - CallImport *ret = allocator.alloc<CallImport>(); - ret->target = F64_TO_INT; - auto input = process(ast[2][2]); - if (input->type == f32) { - auto conv = allocator.alloc<Unary>(); - conv->op = PromoteFloat32; - conv->value = input; - conv->type = WasmType::f64; - input = conv; - } - ret->operands.push_back(input); - ret->type = i32; - static bool addedImport = false; - if (!addedImport) { - addedImport = true; - auto import = allocator.alloc<Import>(); // f64-to-int = asm2wasm.f64-to-int; - import->name = F64_TO_INT; - import->module = ASM2WASM; - import->base = F64_TO_INT; - import->type = ensureFunctionType("id", &wasm, allocator); - wasm.addImport(import); + if (imprecise) { + auto ret = allocator.alloc<Unary>(); + ret->value = process(ast[2][2]); + ret->op = ret->value->type == f64 ? TruncSFloat64 : TruncSFloat32; // imprecise, because this wasm thing might trap, while asm.js never would + ret->type = WasmType::i32; + return ret; + } else { + // WebAssembly traps on float-to-int overflows, but asm.js wouldn't, so we must emulate that + CallImport *ret = allocator.alloc<CallImport>(); + ret->target = F64_TO_INT; + auto input = process(ast[2][2]); + if (input->type == f32) { + auto conv = allocator.alloc<Unary>(); + conv->op = PromoteFloat32; + conv->value = input; + conv->type = WasmType::f64; + input = conv; + } + ret->operands.push_back(input); + ret->type = i32; + static bool addedImport = false; + if (!addedImport) { + addedImport = true; + auto import = allocator.alloc<Import>(); // f64-to-int = asm2wasm.f64-to-int; + import->name = F64_TO_INT; + import->module = ASM2WASM; + import->base = F64_TO_INT; + import->type = ensureFunctionType("id", &wasm, allocator); + wasm.addImport(import); + } + return ret; } - return ret; } // no bitwise unary not, so do xor with -1 auto ret = allocator.alloc<Binary>(); @@ -1051,13 +1059,10 @@ Function* Asm2WasmBuilder::processFunction(Ref ast) { ret->type = WasmType::i32; return ret; } else if (ast[1] == L_NOT) { - // no logical unary not, so do == 0 - auto ret = allocator.alloc<Binary>(); - ret->op = Eq; - ret->left = process(ast[2]); - ret->right = allocator.alloc<Const>()->set(Literal(0)); - assert(ret->left->type == ret->right->type); - ret->finalize(); + auto ret = allocator.alloc<Unary>(); + ret->op = EqZ; + ret->value = process(ast[2]); + ret->type = i32; return ret; } abort_on("bad unary", ast); diff --git a/src/compiler-support.h b/src/compiler-support.h index 54dd61bc8..9e298b278 100644 --- a/src/compiler-support.h +++ b/src/compiler-support.h @@ -32,6 +32,7 @@ #elif defined(_MSC_VER) # define WASM_UNREACHABLE() __assume(false) #else +# include <stdlib.h> # define WASM_UNREACHABLE() abort() #endif diff --git a/src/js/wasm.js-post.js b/src/js/wasm.js-post.js index 12e9e5315..91ca0c4e8 100644 --- a/src/js/wasm.js-post.js +++ b/src/js/wasm.js-post.js @@ -17,10 +17,10 @@ function integrateWasmJS(Module) { // wasm.js has several methods for creating the compiled code module here: // * 'native-wasm' : use native WebAssembly support in the browser - // * 'wasm-s-parser': load s-expression code from a .wast and interpret - // * 'wasm-binary': load binary wasm and interpret - // * 'asm2wasm': load asm.js code, translate to wasm, and interpret - // * 'just-asm': no wasm, just load the asm.js code and use that (good for testing) + // * 'interpret-s-expr': load s-expression code from a .wast and interpret + // * 'interpret-binary': load binary wasm and interpret + // * 'interpret-asm2wasm': load asm.js code, translate to wasm, and interpret + // * 'asmjs': no wasm, just load the asm.js code and use that (good for testing) // The method can be set at compile time (BINARYEN_METHOD), or runtime by setting Module['wasmJSMethod']. // The method can be a comma-separated list, in which case, we will try the // options one by one. Some of them can fail gracefully, and then we can try @@ -28,7 +28,7 @@ function integrateWasmJS(Module) { // inputs - var method = Module['wasmJSMethod'] || {{{ wasmJSMethod }}} || 'native-wasm,wasm-s-parser'; // by default, try native and then .wast + var method = Module['wasmJSMethod'] || {{{ wasmJSMethod }}} || 'native-wasm,interpret-s-expr'; // by default, try native and then .wast var wasmTextFile = Module['wasmTextFile'] || {{{ wasmTextFile }}}; var wasmBinaryFile = Module['wasmBinaryFile'] || {{{ wasmBinaryFile }}}; @@ -55,6 +55,8 @@ function integrateWasmJS(Module) { parent: Module // Module inside wasm-js.cpp refers to wasm-js.cpp; this allows access to the outside program. }; + var exports = null; + function lookupImport(mod, base) { var lookup = info; if (mod.indexOf('.') < 0) { @@ -93,7 +95,7 @@ function integrateWasmJS(Module) { updateGlobalBufferViews(); Module['reallocBuffer'] = function(size) { var old = Module['buffer']; - wasmJS['asmExports']['__growWasmMemory'](size); // tiny wasm method that just does grow_memory + exports['__growWasmMemory'](size); // tiny wasm method that just does grow_memory return Module['buffer'] !== old ? Module['buffer'] : null; // if it was reallocated, it changed }; } @@ -183,11 +185,12 @@ function integrateWasmJS(Module) { info['env'] = env; var instance; instance = Wasm.instantiateModule(getBinary(), info); - mergeMemory(instance.exports.memory); + exports = instance.exports; + mergeMemory(exports.memory); applyMappedGlobals(wasmBinaryFile); - return instance.exports; + return exports; }; return true; @@ -221,31 +224,25 @@ function integrateWasmJS(Module) { info.global = global; info.env = env; - Module['reallocBuffer'] = function(size) { - var old = Module['buffer']; - wasmJS['asmExports']['__growWasmMemory'](size); // tiny wasm method that just does grow_memory - return Module['buffer'] !== old ? Module['buffer'] : null; // if it was reallocated, it changed - }; - wasmJS['providedTotalMemory'] = Module['buffer'].byteLength; - // Prepare to generate wasm, using either asm2wasm or wasm-s-parser + // Prepare to generate wasm, using either asm2wasm or s-exprs var code; - if (method === 'wasm-binary') { + if (method === 'interpret-binary') { code = getBinary(); } else { - code = Module['read'](method == 'asm2wasm' ? asmjsCodeFile : wasmTextFile); + code = Module['read'](method == 'interpret-asm2wasm' ? asmjsCodeFile : wasmTextFile); } var temp; - if (method == 'asm2wasm') { + if (method == 'interpret-asm2wasm') { temp = wasmJS['_malloc'](code.length + 1); wasmJS['writeAsciiToMemory'](code, temp); wasmJS['_load_asm2wasm'](temp); - } else if (method === 'wasm-s-parser') { + } else if (method === 'interpret-s-expr') { temp = wasmJS['_malloc'](code.length + 1); wasmJS['writeAsciiToMemory'](code, temp); wasmJS['_load_s_expr2wasm'](temp); - } else if (method === 'wasm-binary') { + } else if (method === 'interpret-binary') { temp = wasmJS['_malloc'](code.length); wasmJS['HEAPU8'].set(code, temp); wasmJS['_load_binary2wasm'](temp, code.length); @@ -261,13 +258,15 @@ function integrateWasmJS(Module) { Module['newBuffer'] = null; } - if (method == 'wasm-s-parser') { + if (method == 'interpret-s-expr') { applyMappedGlobals(wasmTextFile); - } else if (method == 'wasm-binary') { + } else if (method == 'interpret-binary') { applyMappedGlobals(wasmBinaryFile); } - return wasmJS['asmExports']; + exports = wasmJS['asmExports']; + + return exports; }; return true; @@ -281,9 +280,9 @@ function integrateWasmJS(Module) { //Module['printErr']('using wasm/js method: ' + curr); if (curr === 'native-wasm') { if (doNativeWasm()) return; - } else if (curr === 'just-asm') { + } else if (curr === 'asmjs') { if (doJustAsm()) return; - } else if (curr === 'asm2wasm' || curr === 'wasm-s-parser' || curr === 'wasm-binary') { + } else if (curr === 'interpret-asm2wasm' || curr === 'interpret-s-expr' || curr === 'interpret-binary') { if (doWasmPolyfill(curr)) return; } else { throw 'bad method: ' + curr; diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp index e3663e164..77dcb6154 100644 --- a/src/passes/Print.cpp +++ b/src/passes/Print.cpp @@ -259,11 +259,12 @@ struct PrintSExpression : public WasmVisitor<PrintSExpression, void> { } void visitUnary(Unary *curr) { o << '('; - prepareColor(o) << printWasmType(curr->type) << '.'; + prepareColor(o) << printWasmType(curr->isRelational() ? curr->value->type : curr->type) << '.'; switch (curr->op) { case Clz: o << "clz"; break; case Ctz: o << "ctz"; break; case Popcnt: o << "popcnt"; break; + case EqZ: o << "eqz"; break; case Neg: o << "neg"; break; case Abs: o << "abs"; break; case Ceil: o << "ceil"; break; diff --git a/src/s2wasm-main.cpp b/src/s2wasm-main.cpp index d7a18f547..752494aac 100644 --- a/src/s2wasm-main.cpp +++ b/src/s2wasm-main.cpp @@ -58,6 +58,16 @@ int main(int argc, const char *argv[]) { [](Options *o, const std::string &argument) { o->extra["stack-allocation"] = argument; }) + .add("--initial-memory", "-i", "Initial size of the linear memory", + Options::Arguments::One, + [](Options *o, const std::string &argument) { + o->extra["initial-memory"] = argument; + }) + .add("--max-memory", "-m", "Maximum size of the linear memory", + Options::Arguments::One, + [](Options *o, const std::string &argument) { + o->extra["max-memory"] = argument; + }) .add_positional("INFILE", Options::Arguments::One, [](Options *o, const std::string &argument) { o->extra["infile"] = argument; @@ -75,9 +85,18 @@ int main(int argc, const char *argv[]) { options.extra.find("stack-allocation") != options.extra.end() ? std::stoull(options.extra["stack-allocation"]) : 0; + size_t initialMem = + options.extra.find("initial-memory") != options.extra.end() + ? std::stoull(options.extra["initial-memory"]) + : 0; + size_t maxMem = + options.extra.find("max-memory") != options.extra.end() + ? std::stoull(options.extra["max-memory"]) + : 0; if (options.debug) std::cerr << "Global base " << globalBase << '\n'; S2WasmBuilder s2wasm(wasm, input.c_str(), options.debug, globalBase, - stackAllocation, ignoreUnknownSymbols, startFunction); + stackAllocation, initialMem, maxMem, ignoreUnknownSymbols, + startFunction); if (options.debug) std::cerr << "Emscripten gluing..." << std::endl; std::stringstream meta; diff --git a/src/s2wasm.h b/src/s2wasm.h index eeff5ba04..a12c5cb53 100644 --- a/src/s2wasm.h +++ b/src/s2wasm.h @@ -49,6 +49,7 @@ class S2WasmBuilder { public: S2WasmBuilder(AllocatingModule& wasm, const char* input, bool debug, size_t globalBase, size_t stackAllocation, + size_t userInitialMemory, size_t userMaxMemory, bool ignoreUnknownSymbols, Name startFunction) : wasm(wasm), allocator(wasm.allocator), @@ -57,7 +58,19 @@ class S2WasmBuilder { startFunction(startFunction), globalBase(globalBase), nextStatic(globalBase), - initialMemory(0) { + minInitialMemory(0), + userInitialMemory(userInitialMemory), + userMaxMemory(userMaxMemory) { + if (userMaxMemory && userMaxMemory < userInitialMemory) { + Fatal() << "Specified max memory " << userMaxMemory << + " is < specified initial memory " << userInitialMemory; + } + if (roundUpToPageSize(userMaxMemory) != userMaxMemory) { + Fatal() << "Specified max memory " << userMaxMemory << " is not a multiple of 64k"; + } + if (roundUpToPageSize(userInitialMemory) != userInitialMemory) { + Fatal() << "Specified initial memory " << userInitialMemory << " is not a multiple of 64k"; + } s = input; scan(); s = input; @@ -77,7 +90,10 @@ class S2WasmBuilder { size_t globalBase, // where globals can start to be statically allocated, i.e., the data segment nextStatic; // location of next static allocation std::map<Name, int32_t> staticAddresses; // name => address - size_t initialMemory; // Initial size (in bytes) of memory (after linking, this is rounded and set on the wasm object in pages) + size_t minInitialMemory; // Minimum initial size (in bytes) of memory. + size_t userInitialMemory; // Initial memory size (in bytes) specified by the user. + size_t userMaxMemory; // Max memory size (in bytes) specified by the user. + //(after linking, this is rounded and set on the wasm object in pages) struct Relocation { uint32_t* data; @@ -96,6 +112,23 @@ class S2WasmBuilder { // utilities + // For fatal errors which could arise from input (i.e. not assertion failures) + class Fatal { + public: + Fatal() { + std::cerr << "Fatal: "; + } + template<typename T> + Fatal &operator<<(T arg) { + std::cerr << arg; + return *this; + } + ~Fatal() { + std::cerr << "\n"; + exit(1); + } + }; + void skipWhitespace() { while (1) { while (*s && isspace(*s)) s++; @@ -399,7 +432,7 @@ class S2WasmBuilder { addressSegments[nextStatic] = wasm.memory.segments.size(); wasm.memory.segments.emplace_back( nextStatic, reinterpret_cast<char*>(raw), pointerSize); - initialMemory = nextStatic + pointerSize; + minInitialMemory = nextStatic + pointerSize; } nextStatic += pointerSize; } @@ -410,7 +443,7 @@ class S2WasmBuilder { nextStatic = (nextStatic + 15) & static_cast<size_t>(-16); staticAddresses[".stack"] = nextStatic; nextStatic += stackAllocation; - initialMemory = nextStatic; + minInitialMemory = nextStatic; } void process() { @@ -812,7 +845,8 @@ class S2WasmBuilder { break; } case 'e': { - if (match("eq")) makeBinary(BinaryOp::Eq, i32); + if (match("eqz")) makeUnary(UnaryOp::EqZ, i32); + else if (match("eq")) makeBinary(BinaryOp::Eq, i32); else if (match("extend_s/i32")) makeUnary(UnaryOp::ExtendSInt32, type); else if (match("extend_u/i32")) makeUnary(UnaryOp::ExtendUInt32, type); else abort_on("type.e"); @@ -1154,7 +1188,7 @@ class S2WasmBuilder { wasm.memory.segments.emplace_back(nextStatic, (const char*)&(*raw)[0], size); } nextStatic += size; - initialMemory = nextStatic; + minInitialMemory = nextStatic; } void parseLcomm(Name name, size_t align=1) { @@ -1168,7 +1202,7 @@ class S2WasmBuilder { while (nextStatic % align) nextStatic++; staticAddresses[name] = nextStatic; nextStatic += size; - initialMemory = nextStatic; + minInitialMemory = nextStatic; } void skipImports() { @@ -1182,11 +1216,25 @@ class S2WasmBuilder { } } + static size_t roundUpToPageSize(size_t size) { + return (size + Memory::kPageSize - 1) & Memory::kPageMask; + } + void fix() { // Round the memory size up to a page, and update the page-increment versions // of initial and max - wasm.memory.initial = ((initialMemory + Memory::kPageSize - 1) & Memory::kPageMask) / - Memory::kPageSize; + size_t initialMem = roundUpToPageSize(minInitialMemory); + if (userInitialMemory) { + if (initialMem > userInitialMemory) { + Fatal() << "Specified initial memory size " << userInitialMemory << + " is smaller than required size " << initialMem; + } + wasm.memory.initial = userInitialMemory / Memory::kPageSize; + } else { + wasm.memory.initial = initialMem / Memory::kPageSize; + } + + if (userMaxMemory) wasm.memory.max = userMaxMemory / Memory::kPageSize; wasm.memory.exportName = MEMORY; // XXX For now, export all functions marked .globl. diff --git a/src/support/bits.cpp b/src/support/bits.cpp index c1de4da8b..3d03b100c 100644 --- a/src/support/bits.cpp +++ b/src/support/bits.cpp @@ -15,6 +15,7 @@ */ #define wasm_support_bits_definitions +#include "../compiler-support.h" #include "support/bits.h" namespace wasm { @@ -99,4 +100,28 @@ int CountLeadingZeroes<uint64_t>(uint64_t v) { : 32 + CountLeadingZeroes((uint32_t)v); } +uint32_t Log2(uint32_t v) { + switch (v) { + default: WASM_UNREACHABLE(); + case 1: return 0; + case 2: return 1; + case 4: return 2; + case 8: return 3; + case 16: return 4; + case 32: return 5; + } +} + +uint32_t Pow2(uint32_t v) { + switch (v) { + default: WASM_UNREACHABLE(); + case 0: return 1; + case 1: return 2; + case 2: return 4; + case 3: return 8; + case 4: return 16; + case 5: return 32; + } +} + } // namespace wasm diff --git a/src/support/bits.h b/src/support/bits.h index 6c9fbad94..5d3502b81 100644 --- a/src/support/bits.h +++ b/src/support/bits.h @@ -79,6 +79,8 @@ inline static T RotateRight(T val, T count) { return (val >> count) | (val << (-count & mask)); } +extern uint32_t Log2(uint32_t v); +extern uint32_t Pow2(uint32_t v); } // namespace wasm diff --git a/src/support/command-line.cpp b/src/support/command-line.cpp index b4f8a4d62..71cb33c2a 100644 --- a/src/support/command-line.cpp +++ b/src/support/command-line.cpp @@ -19,7 +19,7 @@ using namespace wasm; Options::Options(const std::string &command, const std::string &description) - : debug(0), positional(Arguments::Zero) { + : debug(false), positional(Arguments::Zero) { add("--help", "-h", "Show this help message and exit", Arguments::Zero, [this, command, description](Options *o, const std::string &) { std::cerr << command; @@ -41,10 +41,8 @@ Options::Options(const std::string &command, const std::string &description) std::cerr << '\n'; exit(EXIT_SUCCESS); }); - add("--debug", "-d", "Print debug information to stderr", Arguments::Optional, - [](Options *o, const std::string &arguments) { - o->debug = arguments.size() ? std::stoi(arguments) : 1; - }); + add("--debug", "-d", "Print debug information to stderr", Arguments::Zero, + [&](Options *o, const std::string &arguments) { debug = true; }); } Options::~Options() {} diff --git a/src/support/command-line.h b/src/support/command-line.h index 6e0af846e..7c3ae528f 100644 --- a/src/support/command-line.h +++ b/src/support/command-line.h @@ -37,7 +37,7 @@ class Options { typedef std::function<void(Options *, const std::string &)> Action; enum class Arguments { Zero, One, N, Optional }; - int debug; + bool debug; std::map<std::string, std::string> extra; Options(const std::string &command, const std::string &description); diff --git a/src/wasm-binary.h b/src/wasm-binary.h index 0f404390b..6782eec21 100644 --- a/src/wasm-binary.h +++ b/src/wasm-binary.h @@ -30,32 +30,44 @@ namespace wasm { -struct LEB128 { - uint32_t value; +template<typename T, typename MiniT> +struct LEB { + T value; - LEB128() {} - LEB128(uint32_t value) : value(value) {} + LEB() {} + LEB(T value) : value(value) {} + + bool isSigned() { + return int(MiniT(-1)) < 0; + } + + bool hasMore(T temp, MiniT byte) { + // for signed, we must ensure the last bit has the right sign, as it will zero extend + return isSigned() ? (temp != 0 && int32_t(temp) != -1) || (value >= 0 && (byte & 64)) || (value < 0 && !(byte & 64)): temp; + } void write(std::vector<uint8_t>* out) { - uint32_t temp = value; + T temp = value; + bool more; do { uint8_t byte = temp & 127; temp >>= 7; - if (temp) { + more = hasMore(temp, byte); + if (more) { byte = byte | 128; } out->push_back(byte); - } while (temp); + } while (more); } void writeAt(std::vector<uint8_t>* out, size_t at, size_t minimum = 0) { - uint32_t temp = value; + T temp = value; size_t offset = 0; bool more; do { uint8_t byte = temp & 127; temp >>= 7; - more = temp || offset + 1 < minimum; + more = hasMore(temp, byte) || offset + 1 < minimum; if (more) { byte = byte | 128; } @@ -64,18 +76,33 @@ struct LEB128 { } while (more); } - void read(std::function<uint8_t ()> get) { + void read(std::function<MiniT ()> get) { value = 0; - uint32_t shift = 0; + T shift = 0; + MiniT byte; while (1) { - uint8_t byte = get(); - value |= ((byte & 127) << shift); + byte = get(); + value |= ((T(byte & 127)) << shift); if (!(byte & 128)) break; shift += 7; } + // if signed LEB, then we might need to sign-extend. (compile should optimize this out if not needed) + if (isSigned()) { + shift += 7; + if (byte & 64 && size_t(shift) < 8*sizeof(T)) { + // the highest bit we received was a 1, sign-extend all the rest + value = value | (T(-1) << shift); + assert(value < 0); + } + } } }; +typedef LEB<uint32_t, uint8_t> U32LEB; +typedef LEB<uint64_t, uint8_t> U64LEB; +typedef LEB<int32_t, int8_t> S32LEB; +typedef LEB<int64_t, int8_t> S64LEB; + // // We mostly stream into a buffer as we create the binary format, however, // sometimes we need to backtrack and write to a location behind us - wasm @@ -118,8 +145,23 @@ public: push_back(x & 0xff); return *this; } - BufferWithRandomAccess& operator<<(LEB128 x) { - if (debug) std::cerr << "writeLEB128: " << x.value << " (at " << size() << ")" << std::endl; + BufferWithRandomAccess& operator<<(U32LEB x) { + if (debug) std::cerr << "writeU32LEB: " << x.value << " (at " << size() << ")" << std::endl; + x.write(this); + return *this; + } + BufferWithRandomAccess& operator<<(U64LEB x) { + if (debug) std::cerr << "writeU64LEB: " << x.value << " (at " << size() << ")" << std::endl; + x.write(this); + return *this; + } + BufferWithRandomAccess& operator<<(S32LEB x) { + if (debug) std::cerr << "writeS32LEB: " << x.value << " (at " << size() << ")" << std::endl; + x.write(this); + return *this; + } + BufferWithRandomAccess& operator<<(S64LEB x) { + if (debug) std::cerr << "writeS64LEB: " << x.value << " (at " << size() << ")" << std::endl; x.write(this); return *this; } @@ -158,9 +200,9 @@ public: (*this)[i+2] = x & 0xff; x >>= 8; (*this)[i+3] = x & 0xff; } - void writeAt(size_t i, LEB128 x, size_t minimum = 0) { - if (debug) std::cerr << "backpatchLEB128: " << x.value << " (at " << i << "), minimum " << minimum << std::endl; - x.writeAt(this, i, minimum); + void writeAt(size_t i, U32LEB x) { + if (debug) std::cerr << "backpatchU32LEB: " << x.value << " (at " << i << ")" << std::endl; + x.writeAt(this, i, 5); // fill all 5 bytes, we have to do this when backpatching } template <typename T> @@ -180,6 +222,7 @@ namespace Section { auto ExportTable = "export_table"; auto DataSegments = "data_segments"; auto FunctionTable = "function_table"; + auto Names = "names"; auto End = "end"; auto Start = "start_function"; }; @@ -220,6 +263,7 @@ enum ASTNodes { I32Clz = 0x57, I32Ctz = 0x58, I32Popcnt = 0x59, + I32EqZ = 0xc0, // XXX BoolNot = 0x5a, I64Add = 0x5b, I64Sub = 0x5c, @@ -247,6 +291,7 @@ enum ASTNodes { I64Clz = 0x72, I64Ctz = 0x73, I64Popcnt = 0x74, + I64EqZ = 0xc1, // XXX F32Add = 0x75, F32Sub = 0x76, F32Mul = 0x77, @@ -342,7 +387,6 @@ enum ASTNodes { F32StoreMem = 0x35, F64StoreMem = 0x36, - I8Const = 0x09, I32Const = 0x0a, I64Const = 0x0b, F64Const = 0x0c, @@ -422,6 +466,7 @@ public: writeExports(); writeDataSegments(); writeFunctionTable(); + writeNames(); writeEnd(); finishUp(); } @@ -432,25 +477,30 @@ public: o << int32_t(10); // version number } - int32_t startSection(const char* name) { - // emit 5 bytes of 0, which we'll fill with LEB later + int32_t writeU32LEBPlaceholder() { int32_t ret = o.size(); o << int32_t(0); o << int8_t(0); + return ret; + } + + int32_t startSection(const char* name) { + // emit 5 bytes of 0, which we'll fill with LEB later + auto ret = writeU32LEBPlaceholder(); writeInlineString(name); return ret; } void finishSection(int32_t start) { int32_t size = o.size() - start - 5; // section size does not include the 5 bytes of the size field itself - o.writeAt(start, LEB128(size), 5); + o.writeAt(start, U32LEB(size)); } void writeStart() { if (!wasm->start.is()) return; if (debug) std::cerr << "== writeStart" << std::endl; auto start = startSection(BinaryConsts::Section::Start); - o << LEB128(getFunctionIndex(wasm->start.str)); + o << U32LEB(getFunctionIndex(wasm->start.str)); finishSection(start); } @@ -458,8 +508,8 @@ public: if (wasm->memory.max == 0) return; if (debug) std::cerr << "== writeMemory" << std::endl; auto start = startSection(BinaryConsts::Section::Memory); - o << LEB128(wasm->memory.initial) - << LEB128(wasm->memory.max) + o << U32LEB(wasm->memory.initial) + << U32LEB(wasm->memory.max) << int8_t(1); // export memory finishSection(start); } @@ -468,10 +518,10 @@ public: if (wasm->functionTypes.size() == 0) return; if (debug) std::cerr << "== writeSignatures" << std::endl; auto start = startSection(BinaryConsts::Section::Signatures); - o << LEB128(wasm->functionTypes.size()); + o << U32LEB(wasm->functionTypes.size()); for (auto* type : wasm->functionTypes) { if (debug) std::cerr << "write one" << std::endl; - o << LEB128(type->params.size()); + o << U32LEB(type->params.size()); o << binaryWasmType(type->result); for (auto param : type->params) { o << binaryWasmType(param); @@ -492,10 +542,10 @@ public: if (wasm->imports.size() == 0) return; if (debug) std::cerr << "== writeImports" << std::endl; auto start = startSection(BinaryConsts::Section::ImportTable); - o << LEB128(wasm->imports.size()); + o << U32LEB(wasm->imports.size()); for (auto* import : wasm->imports) { if (debug) std::cerr << "write one" << std::endl; - o << LEB128(getFunctionTypeIndex(import->type->name)); + o << U32LEB(getFunctionTypeIndex(import->type->name)); writeInlineString(import->module.str); writeInlineString(import->base.str); } @@ -546,10 +596,10 @@ public: if (wasm->functions.size() == 0) return; if (debug) std::cerr << "== writeFunctionSignatures" << std::endl; auto start = startSection(BinaryConsts::Section::FunctionSignatures); - o << LEB128(wasm->functions.size()); + o << U32LEB(wasm->functions.size()); for (auto* curr : wasm->functions) { if (debug) std::cerr << "write one" << std::endl; - o << LEB128(getFunctionTypeIndex(curr->type)); + o << U32LEB(getFunctionTypeIndex(curr->type)); } finishSection(start); } @@ -559,29 +609,26 @@ public: if (debug) std::cerr << "== writeFunctions" << std::endl; auto start = startSection(BinaryConsts::Section::Functions); size_t total = wasm->functions.size(); - o << LEB128(total); + o << U32LEB(total); for (size_t i = 0; i < total; i++) { if (debug) std::cerr << "write one at" << o.size() << std::endl; + size_t sizePos = writeU32LEBPlaceholder(); + size_t start = o.size(); Function* function = wasm->functions[i]; - Name name, type; - name = function->name; - type = function->type; mappedLocals.clear(); numLocalsByType.clear(); - if (debug) std::cerr << "writing" << name << std::endl; - o << int8_t(BinaryConsts::Named | - (BinaryConsts::Locals * (function && function->locals.size() > 0))); - emitString(name.str); + if (debug) std::cerr << "writing" << function->name << std::endl; mapLocals(function); - if (function->locals.size() > 0) { - o << uint16_t(numLocalsByType[i32]) - << uint16_t(numLocalsByType[i64]) - << uint16_t(numLocalsByType[f32]) - << uint16_t(numLocalsByType[f64]); - } - size_t sizePos = o.size(); - o << (uint32_t)0; // placeholder, we fill in the size later when we have it // XXX int32, diverge from v8 format, to get more code to compile - size_t start = o.size(); + o << U32LEB( + (numLocalsByType[i32] ? 1 : 0) + + (numLocalsByType[i64] ? 1 : 0) + + (numLocalsByType[f32] ? 1 : 0) + + (numLocalsByType[f64] ? 1 : 0) + ); + if (numLocalsByType[i32]) o << U32LEB(numLocalsByType[i32]) << binaryWasmType(i32); + if (numLocalsByType[i64]) o << U32LEB(numLocalsByType[i64]) << binaryWasmType(i64); + if (numLocalsByType[f32]) o << U32LEB(numLocalsByType[f32]) << binaryWasmType(f32); + if (numLocalsByType[f64]) o << U32LEB(numLocalsByType[f64]) << binaryWasmType(f64); depth = 0; recurse(function->body); o << int8_t(BinaryConsts::EndMarker); @@ -589,7 +636,7 @@ public: size_t size = o.size() - start; assert(size <= std::numeric_limits<uint32_t>::max()); if (debug) std::cerr << "body size: " << size << ", writing at " << sizePos << ", next starts at " << o.size() << std::endl; - o.writeAt(sizePos, uint32_t(size)); // XXX int32, diverge from v8 format, to get more code to compile + o.writeAt(sizePos, U32LEB(size)); } finishSection(start); } @@ -598,10 +645,10 @@ public: if (wasm->exports.size() == 0) return; if (debug) std::cerr << "== writeexports" << std::endl; auto start = startSection(BinaryConsts::Section::ExportTable); - o << LEB128(wasm->exports.size()); + o << U32LEB(wasm->exports.size()); for (auto* curr : wasm->exports) { if (debug) std::cerr << "write one" << std::endl; - o << LEB128(getFunctionIndex(curr->value)); + o << U32LEB(getFunctionIndex(curr->value)); writeInlineString(curr->name.str); } finishSection(start); @@ -614,10 +661,10 @@ public: if (segment.size > 0) num++; } auto start = startSection(BinaryConsts::Section::DataSegments); - o << LEB128(num); + o << U32LEB(num); for (auto& segment : wasm->memory.segments) { if (segment.size == 0) continue; - o << LEB128(segment.offset); + o << U32LEB(segment.offset); writeInlineBuffer(segment.data, segment.size); } finishSection(start); @@ -653,9 +700,21 @@ public: if (wasm->table.names.size() == 0) return; if (debug) std::cerr << "== writeFunctionTable" << std::endl; auto start = startSection(BinaryConsts::Section::FunctionTable); - o << LEB128(wasm->table.names.size()); + o << U32LEB(wasm->table.names.size()); for (auto name : wasm->table.names) { - o << LEB128(getFunctionIndex(name)); + o << U32LEB(getFunctionIndex(name)); + } + finishSection(start); + } + + void writeNames() { + if (wasm->functions.size() == 0) return; + if (debug) std::cerr << "== writeNames" << std::endl; + auto start = startSection(BinaryConsts::Section::Names); + o << U32LEB(wasm->functions.size()); + for (auto* curr : wasm->functions) { + writeInlineString(curr->name.str); + o << U32LEB(0); // TODO: locals } finishSection(start); } @@ -669,14 +728,14 @@ public: void writeInlineString(const char* name) { int32_t size = strlen(name); - o << LEB128(size); + o << U32LEB(size); for (int32_t i = 0; i < size; i++) { o << int8_t(name[i]); } } void writeInlineBuffer(const char* data, size_t size) { - o << LEB128(size); + o << U32LEB(size); for (size_t i = 0; i < size; i++) { o << int8_t(data[i]); } @@ -776,35 +835,37 @@ public: } if (curr->condition) recurse(curr->condition); o << int8_t(curr->condition ? BinaryConsts::BrIf : BinaryConsts::Br) - << int32_t(getBreakIndex(curr->name)); + << U32LEB(getBreakIndex(curr->name)); } void visitSwitch(Switch *curr) { if (debug) std::cerr << "zz node: Switch" << std::endl; - o << int8_t(BinaryConsts::TableSwitch) << int16_t(curr->targets.size() + 1) << int8_t(curr->value != nullptr); + o << int8_t(BinaryConsts::TableSwitch) << U32LEB(curr->targets.size()); for (auto target : curr->targets) { - o << (int32_t)getBreakIndex(target); + o << U32LEB(getBreakIndex(target)); } - o << (int32_t)getBreakIndex(curr->default_); + o << U32LEB(getBreakIndex(curr->default_)); recurse(curr->condition); o << int8_t(BinaryConsts::EndMarker); if (curr->value) { recurse(curr->value); - o << int8_t(BinaryConsts::EndMarker); + } else { + visitNop(nullptr); } + o << int8_t(BinaryConsts::EndMarker); } void visitCall(Call *curr) { if (debug) std::cerr << "zz node: Call" << std::endl; for (auto* operand : curr->operands) { recurse(operand); } - o << int8_t(BinaryConsts::CallFunction) << LEB128(getFunctionIndex(curr->target)); + o << int8_t(BinaryConsts::CallFunction) << U32LEB(getFunctionIndex(curr->target)); } void visitCallImport(CallImport *curr) { if (debug) std::cerr << "zz node: CallImport" << std::endl; for (auto* operand : curr->operands) { recurse(operand); } - o << int8_t(BinaryConsts::CallImport) << LEB128(getImportIndex(curr->target)); + o << int8_t(BinaryConsts::CallImport) << U32LEB(getImportIndex(curr->target)); } void visitCallIndirect(CallIndirect *curr) { if (debug) std::cerr << "zz node: CallIndirect" << std::endl; @@ -812,22 +873,21 @@ public: for (auto* operand : curr->operands) { recurse(operand); } - o << int8_t(BinaryConsts::CallIndirect) << LEB128(getFunctionTypeIndex(curr->fullType->name)); + o << int8_t(BinaryConsts::CallIndirect) << U32LEB(getFunctionTypeIndex(curr->fullType->name)); } void visitGetLocal(GetLocal *curr) { if (debug) std::cerr << "zz node: GetLocal " << (o.size() + 1) << std::endl; - o << int8_t(BinaryConsts::GetLocal) << LEB128(mappedLocals[curr->name]); + o << int8_t(BinaryConsts::GetLocal) << U32LEB(mappedLocals[curr->name]); } void visitSetLocal(SetLocal *curr) { if (debug) std::cerr << "zz node: SetLocal" << std::endl; recurse(curr->value); - o << int8_t(BinaryConsts::SetLocal) << LEB128(mappedLocals[curr->name]); + o << int8_t(BinaryConsts::SetLocal) << U32LEB(mappedLocals[curr->name]); } void emitMemoryAccess(size_t alignment, size_t bytes, uint32_t offset) { - o << int8_t( ((alignment == bytes || alignment == 0) ? BinaryConsts::NaturalAlignment : BinaryConsts::Alignment) | - (offset ? BinaryConsts::Offset : 0) ); - if (offset) o << LEB128(offset); + o << U32LEB(Log2(alignment ? alignment : bytes)); + o << U32LEB(offset); } void visitLoad(Load *curr) { @@ -893,16 +953,11 @@ public: if (debug) std::cerr << "zz node: Const" << curr << " : " << curr->type << std::endl; switch (curr->type) { case i32: { - uint32_t value = curr->value.geti32(); - if (value <= 255) { - o << int8_t(BinaryConsts::I8Const) << uint8_t(value); - break; - } - o << int8_t(BinaryConsts::I32Const) << value; + o << int8_t(BinaryConsts::I32Const) << S32LEB(curr->value.geti32()); break; } case i64: { - o << int8_t(BinaryConsts::I64Const) << curr->value.geti64(); + o << int8_t(BinaryConsts::I64Const) << S64LEB(curr->value.geti64()); break; } case f32: { @@ -924,6 +979,7 @@ public: case Clz: o << int8_t(curr->type == i32 ? BinaryConsts::I32Clz : BinaryConsts::I64Clz); break; case Ctz: o << int8_t(curr->type == i32 ? BinaryConsts::I32Ctz : BinaryConsts::I64Ctz); break; case Popcnt: o << int8_t(curr->type == i32 ? BinaryConsts::I32Popcnt : BinaryConsts::I64Popcnt); break; + case EqZ: o << int8_t(curr->type == i32 ? BinaryConsts::I32EqZ : BinaryConsts::I64EqZ); break; case Neg: o << int8_t(curr->type == f32 ? BinaryConsts::F32Neg : BinaryConsts::F64Neg); break; case Abs: o << int8_t(curr->type == f32 ? BinaryConsts::F32Abs : BinaryConsts::F64Abs); break; case Ceil: o << int8_t(curr->type == f32 ? BinaryConsts::F32Ceil : BinaryConsts::F64Ceil); break; @@ -1067,10 +1123,11 @@ class WasmBinaryBuilder { std::vector<char>& input; bool debug; - size_t pos; + size_t pos = 0; + int32_t startIndex = -1; public: - WasmBinaryBuilder(AllocatingModule& wasm, std::vector<char>& input, bool debug) : wasm(wasm), allocator(wasm.allocator), input(input), debug(debug), pos(0) {} + WasmBinaryBuilder(AllocatingModule& wasm, std::vector<char>& input, bool debug) : wasm(wasm), allocator(wasm.allocator), input(input), debug(debug) {} void read() { @@ -1078,9 +1135,9 @@ public: // read sections until the end while (more()) { - auto sectionSize = getLEB128(); + auto sectionSize = getU32LEB(); assert(sectionSize < pos + input.size()); - auto nameSize = getLEB128(); + auto nameSize = getU32LEB(); auto match = [&](const char* name) { for (size_t i = 0; i < nameSize; i++) { if (pos + i >= input.size()) return false; @@ -1100,6 +1157,7 @@ public: else if (match(BinaryConsts::Section::ExportTable)) readExports(); else if (match(BinaryConsts::Section::DataSegments)) readDataSegments(); else if (match(BinaryConsts::Section::FunctionTable)) readFunctionTable(); + else if (match(BinaryConsts::Section::Names)) readNames(); else if (match(BinaryConsts::Section::End)) { if (debug) std::cerr << "== readEnd" << std::endl; break; @@ -1151,13 +1209,40 @@ public: return ret; } - uint32_t getLEB128() { + uint32_t getU32LEB() { + if (debug) std::cerr << "<==" << std::endl; + U32LEB ret; + ret.read([&]() { + return getInt8(); + }); + if (debug) std::cerr << "getU32LEB: " << ret.value << " ==>" << std::endl; + return ret.value; + } + uint64_t getU64LEB() { if (debug) std::cerr << "<==" << std::endl; - LEB128 ret; + U64LEB ret; ret.read([&]() { return getInt8(); }); - if (debug) std::cerr << "getLEB128: " << ret.value << " ==>" << std::endl; + if (debug) std::cerr << "getU64LEB: " << ret.value << " ==>" << std::endl; + return ret.value; + } + int32_t getS32LEB() { + if (debug) std::cerr << "<==" << std::endl; + S32LEB ret; + ret.read([&]() { + return (int8_t)getInt8(); + }); + if (debug) std::cerr << "getU32LEB: " << ret.value << " ==>" << std::endl; + return ret.value; + } + int64_t getS64LEB() { + if (debug) std::cerr << "<==" << std::endl; + S64LEB ret; + ret.read([&]() { + return (int8_t)getInt8(); + }); + if (debug) std::cerr << "getU64LEB: " << ret.value << " ==>" << std::endl; return ret.value; } WasmType getWasmType() { @@ -1182,7 +1267,7 @@ public: Name getInlineString() { if (debug) std::cerr << "<==" << std::endl; - auto len = getLEB128(); + auto len = getU32LEB(); std::string str; for (size_t i = 0; i < len; i++) { str = str + char(getInt8()); @@ -1230,24 +1315,24 @@ public: void readStart() { if (debug) std::cerr << "== readStart" << std::endl; - wasm.start = wasm.functions[getLEB128()]->name; + startIndex = getU32LEB(); } void readMemory() { if (debug) std::cerr << "== readMemory" << std::endl; - wasm.memory.initial = getLEB128(); - wasm.memory.max = getLEB128(); + wasm.memory.initial = getU32LEB(); + wasm.memory.max = getU32LEB(); verifyInt8(1); // export memory } void readSignatures() { if (debug) std::cerr << "== readSignatures" << std::endl; - size_t numTypes = getLEB128(); + size_t numTypes = getU32LEB(); if (debug) std::cerr << "num: " << numTypes << std::endl; for (size_t i = 0; i < numTypes; i++) { if (debug) std::cerr << "read one" << std::endl; auto curr = allocator.alloc<FunctionType>(); - size_t numParams = getLEB128(); + size_t numParams = getU32LEB(); if (debug) std::cerr << "num params: " << numParams << std::endl; curr->result = getWasmType(); for (size_t j = 0; j < numParams; j++) { @@ -1259,13 +1344,13 @@ public: void readImports() { if (debug) std::cerr << "== readImports" << std::endl; - size_t num = getLEB128(); + size_t num = getU32LEB(); if (debug) std::cerr << "num: " << num << std::endl; for (size_t i = 0; i < num; i++) { if (debug) std::cerr << "read one" << std::endl; auto curr = allocator.alloc<Import>(); curr->name = Name(std::string("import$") + std::to_string(i)); - auto index = getLEB128(); + auto index = getU32LEB(); assert(index < wasm.functionTypes.size()); curr->type = wasm.functionTypes[index]; assert(curr->type->name.is()); @@ -1279,11 +1364,11 @@ public: void readFunctionSignatures() { if (debug) std::cerr << "== readFunctionSignatures" << std::endl; - size_t num = getLEB128(); + size_t num = getU32LEB(); if (debug) std::cerr << "num: " << num << std::endl; for (size_t i = 0; i < num; i++) { if (debug) std::cerr << "read one" << std::endl; - auto index = getLEB128(); + auto index = getU32LEB(); assert(index < wasm.functionTypes.size()); functionTypes.push_back(wasm.functionTypes[index]); } @@ -1295,20 +1380,21 @@ public: return cashew::IString(("label$" + std::to_string(nextLabel++)).c_str(), false); } + // We read functions before we know their names, so we need to backpatch the names later + + std::vector<Function*> functions; // we store functions here before wasm.addFunction after we know their names + std::map<size_t, std::vector<Call*>> functionCalls; // at index i we have all calls to i + void readFunctions() { if (debug) std::cerr << "== readFunctions" << std::endl; - size_t total = getLEB128(); + size_t total = getU32LEB(); for (size_t i = 0; i < total; i++) { if (debug) std::cerr << "read one at " << pos << std::endl; - auto data = getInt8(); + size_t size = getU32LEB(); + assert(size > 0); // we could also check it matches the seen size auto type = functionTypes[i]; - bool named = data & BinaryConsts::Named; - assert(named); - bool locals = data & BinaryConsts::Locals; - Name name = getString(); - if (debug) std::cerr << "reading" << name << std::endl; + if (debug) std::cerr << "reading" << i << std::endl; auto func = allocator.alloc<Function>(); - func->name = name; func->type = type->name; func->result = type->result; size_t nextVar = 0; @@ -1319,53 +1405,61 @@ public: for (size_t j = 0; j < type->params.size(); j++) { func->params.emplace_back(addVar(), type->params[j]); } - if (locals) { - auto addLocals = [&](WasmType type) { - int16_t num = getInt16(); - while (num > 0) { - func->locals.emplace_back(addVar(), type); - num--; - } - }; - addLocals(i32); - addLocals(i64); - addLocals(f32); - addLocals(f64); + size_t numLocalTypes = getU32LEB(); + for (size_t t = 0; t < numLocalTypes; t++) { + auto num = getU32LEB(); + auto type = getWasmType(); + while (num > 0) { + func->locals.emplace_back(addVar(), type); + num--; + } } - size_t size = getInt32(); // XXX int32, diverge from v8 format, to get more code to compile - // we can't read the function yet - it might call other functions that are defined later, - // and we do depend on the function type. - functions.emplace_back(func, pos, size); - pos += size; - func->body = nullptr; // will be filled later. but we do have the name and the type already. - wasm.addFunction(func); + { + // process the function body + if (debug) std::cerr << "processing function: " << i << std::endl; + nextLabel = 0; + // prepare locals + mappedLocals.clear(); + localTypes.clear(); + for (size_t i = 0; i < func->params.size(); i++) { + mappedLocals.push_back(func->params[i].name); + localTypes[func->params[i].name] = func->params[i].type; + } + for (size_t i = 0; i < func->locals.size(); i++) { + mappedLocals.push_back(func->locals[i].name); + localTypes[func->locals[i].name] = func->locals[i].type; + } + // process body + assert(breakStack.empty()); + assert(expressionStack.empty()); + depth = 0; + processExpressions(); + assert(expressionStack.size() == 1); + func->body = popExpression(); + assert(depth == 0); + assert(breakStack.empty()); + assert(expressionStack.empty()); + } + functions.push_back(func); } } + std::map<Export*, size_t> exportIndexes; + void readExports() { if (debug) std::cerr << "== readExports" << std::endl; - size_t num = getLEB128(); + size_t num = getU32LEB(); if (debug) std::cerr << "num: " << num << std::endl; for (size_t i = 0; i < num; i++) { if (debug) std::cerr << "read one" << std::endl; auto curr = allocator.alloc<Export>(); - auto index = getLEB128(); - assert(index < wasm.functions.size()); - curr->value = wasm.functions[index]->name; - assert(curr->value.is()); + auto index = getU32LEB(); + assert(index < functionTypes.size()); curr->name = getInlineString(); - wasm.addExport(curr); + exportIndexes[curr] = index; } } - struct FunctionData { - Function* func; - size_t pos, size; - FunctionData(Function* func, size_t pos, size_t size) : func(func), pos(pos), size(size) {} - }; - - std::vector<FunctionData> functions; - std::vector<Name> mappedLocals; // index => local name std::map<Name, WasmType> localTypes; // TODO: optimize @@ -1391,42 +1485,41 @@ public: void processFunctions() { for (auto& func : functions) { - Function* curr = func.func; - if (debug) std::cerr << "processing function: " << curr->name << std::endl; - pos = func.pos; - nextLabel = 0; - // prepare locals - mappedLocals.clear(); - localTypes.clear(); - for (size_t i = 0; i < curr->params.size(); i++) { - mappedLocals.push_back(curr->params[i].name); - localTypes[curr->params[i].name] = curr->params[i].type; - } - for (size_t i = 0; i < curr->locals.size(); i++) { - mappedLocals.push_back(curr->locals[i].name); - localTypes[curr->locals[i].name] = curr->locals[i].type; + wasm.addFunction(func); + } + // now that we have names for each function, apply things + + if (startIndex >= 0) { + wasm.start = wasm.functions[startIndex]->name; + } + + for (auto& iter : exportIndexes) { + Export* curr = iter.first; + curr->value = wasm.functions[iter.second]->name; + wasm.addExport(curr); + } + + for (auto& iter : functionCalls) { + size_t index = iter.first; + auto& calls = iter.second; + for (auto* call : calls) { + call->target = wasm.functions[index]->name; } - // process body - assert(breakStack.empty()); - assert(expressionStack.empty()); - depth = 0; - processExpressions(); - assert(expressionStack.size() == 1); - curr->body = popExpression(); - assert(depth == 0); - assert(breakStack.empty()); - assert(expressionStack.empty()); - assert(pos == func.pos + func.size); + } + + for (size_t index : functionTable) { + assert(index < wasm.functions.size()); + wasm.table.names.push_back(wasm.functions[index]->name); } } void readDataSegments() { if (debug) std::cerr << "== readDataSegments" << std::endl; - auto num = getLEB128(); + auto num = getU32LEB(); for (size_t i = 0; i < num; i++) { Memory::Segment curr; - curr.offset = getLEB128(); - auto size = getLEB128(); + curr.offset = getU32LEB(); + auto size = getU32LEB(); auto buffer = (char*)malloc(size); for (size_t j = 0; j < size; j++) { buffer[j] = char(getInt8()); @@ -1437,13 +1530,24 @@ public: } } + std::vector<size_t> functionTable; + void readFunctionTable() { if (debug) std::cerr << "== readFunctionTable" << std::endl; - auto num = getLEB128(); + auto num = getU32LEB(); for (size_t i = 0; i < num; i++) { - auto index = getLEB128(); - assert(index < wasm.functions.size()); - wasm.table.names.push_back(wasm.functions[index]->name); + auto index = getU32LEB(); + functionTable.push_back(index); + } + } + + void readNames() { + if (debug) std::cerr << "== readNames" << std::endl; + auto num = getU32LEB(); + for (size_t i = 0; i < num; i++) { + functions[i]->name = getInlineString(); + auto numLocals = getU32LEB(); + assert(numLocals == 0); // TODO } } @@ -1573,39 +1677,38 @@ public: void visitBreak(Break *curr, uint8_t code) { if (debug) std::cerr << "zz node: Break" << std::endl; - curr->name = getBreakName(getInt32()); + curr->name = getBreakName(getU32LEB()); if (code == BinaryConsts::BrIf) curr->condition = popExpression(); curr->value = popExpression(); } void visitSwitch(Switch *curr) { if (debug) std::cerr << "zz node: Switch" << std::endl; - auto numTargets = getInt16(); - auto hasValue = getInt8(); - for (auto i = 0; i < numTargets - 1; i++) { - curr->targets.push_back(getBreakName(getInt32())); + auto numTargets = getU32LEB(); + for (size_t i = 0; i < numTargets; i++) { + curr->targets.push_back(getBreakName(getU32LEB())); } - curr->default_ = getBreakName(getInt32()); + curr->default_ = getBreakName(getU32LEB()); processExpressions(); curr->condition = popExpression(); - if (hasValue) { - processExpressions(); - curr->value = popExpression(); - } + processExpressions(); + curr->value = popExpression(); + if (curr->value->is<Nop>()) curr->value = nullptr; } void visitCall(Call *curr) { if (debug) std::cerr << "zz node: Call" << std::endl; - curr->target = wasm.functions[getLEB128()]->name; - auto type = wasm.functionTypesMap[wasm.functionsMap[curr->target]->type]; + auto index = getU32LEB(); + auto type = functionTypes[index]; auto num = type->params.size(); curr->operands.resize(num); for (size_t i = 0; i < num; i++) { curr->operands[num - i - 1] = popExpression(); } curr->type = type->result; + functionCalls[index].push_back(curr); } void visitCallImport(CallImport *curr) { if (debug) std::cerr << "zz node: CallImport" << std::endl; - curr->target = wasm.imports[getLEB128()]->name; + curr->target = wasm.imports[getU32LEB()]->name; assert(wasm.importsMap.find(curr->target) != wasm.importsMap.end()); auto type = wasm.importsMap[curr->target]->type; assert(type); @@ -1619,7 +1722,7 @@ public: } void visitCallIndirect(CallIndirect *curr) { if (debug) std::cerr << "zz node: CallIndirect" << std::endl; - curr->fullType = wasm.functionTypes[getLEB128()]; + curr->fullType = wasm.functionTypes[getU32LEB()]; auto num = curr->fullType->params.size(); curr->operands.resize(num); for (size_t i = 0; i < num; i++) { @@ -1630,26 +1733,21 @@ public: } void visitGetLocal(GetLocal *curr) { if (debug) std::cerr << "zz node: GetLocal " << pos << std::endl; - curr->name = mappedLocals[getLEB128()]; + curr->name = mappedLocals[getU32LEB()]; assert(curr->name.is()); curr->type = localTypes[curr->name]; } void visitSetLocal(SetLocal *curr) { if (debug) std::cerr << "zz node: SetLocal" << std::endl; - curr->name = mappedLocals[getLEB128()]; + curr->name = mappedLocals[getU32LEB()]; assert(curr->name.is()); curr->value = popExpression(); curr->type = curr->value->type; } void readMemoryAccess(uint32_t& alignment, size_t bytes, uint32_t& offset) { - auto value = getInt8(); - alignment = value & BinaryConsts::Alignment ? 1 : bytes; - if (value & BinaryConsts::Offset) { - offset = getLEB128(); - } else { - offset = 0; - } + alignment = Pow2(getU32LEB()); + offset = getU32LEB(); } bool maybeVisitImpl(Load *curr, uint8_t code) { @@ -1696,9 +1794,8 @@ public: } bool maybeVisitImpl(Const *curr, uint8_t code) { switch (code) { - case BinaryConsts::I8Const: curr->value = Literal(int32_t(getInt8())); break; - case BinaryConsts::I32Const: curr->value = Literal(getInt32()); break; - case BinaryConsts::I64Const: curr->value = Literal(getInt64()); break; + case BinaryConsts::I32Const: curr->value = Literal(getS32LEB()); break; + case BinaryConsts::I64Const: curr->value = Literal(getS64LEB()); break; case BinaryConsts::F32Const: curr->value = Literal(getFloat32()); break; case BinaryConsts::F64Const: curr->value = Literal(getFloat64()); break; default: return false; @@ -1715,6 +1812,8 @@ public: case BinaryConsts::I64Ctz: curr->op = Ctz; curr->type = i64; break; case BinaryConsts::I32Popcnt: curr->op = Popcnt; curr->type = i32; break; case BinaryConsts::I64Popcnt: curr->op = Popcnt; curr->type = i64; break; + case BinaryConsts::I32EqZ: curr->op = EqZ; curr->type = i32; break; + case BinaryConsts::I64EqZ: curr->op = EqZ; curr->type = i64; break; case BinaryConsts::F32Neg: curr->op = Neg; curr->type = f32; break; case BinaryConsts::F64Neg: curr->op = Neg; curr->type = f64; break; case BinaryConsts::F32Abs: curr->op = Abs; curr->type = f32; break; diff --git a/src/wasm-interpreter.h b/src/wasm-interpreter.h index fb860627c..e9ced6b6f 100644 --- a/src/wasm-interpreter.h +++ b/src/wasm-interpreter.h @@ -391,6 +391,7 @@ private: case Clz: return value.countLeadingZeroes(); case Ctz: return value.countTrailingZeroes(); case Popcnt: return value.popCount(); + case EqZ: return Literal(int32_t(value == Literal(int32_t(0)))); case ReinterpretInt: return value.castToF32(); case ExtendSInt32: return value.extendToSI64(); case ExtendUInt32: return value.extendToUI64(); @@ -404,6 +405,7 @@ private: case Clz: return value.countLeadingZeroes(); case Ctz: return value.countTrailingZeroes(); case Popcnt: return value.popCount(); + case EqZ: return Literal(int32_t(value == Literal(int64_t(0)))); case WrapInt64: return value.truncateToI32(); case ReinterpretInt: return value.castToF64(); case ConvertUInt64: return curr->type == f32 ? value.convertUToF32() : value.convertUToF64(); @@ -629,7 +631,7 @@ private: return Literal(int32_t(val)); } else { int64_t converted = val; - if ((val >= 1 && converted <= 0) || val < (double)LLONG_MIN) trap("i32.truncSFloat overflow"); + if ((val >= 1 && converted <= 0) || val < (double)LLONG_MIN) trap("i64.truncSFloat overflow"); return Literal(converted); } } @@ -638,7 +640,7 @@ private: double val = value.getFloat(); if (isnan(val)) trap("truncUFloat of nan"); if (curr->type == i32) { - if (val > (double)std::numeric_limits<uint32_t>::max() || val <= (double)-1) trap("i64.truncUFloat overflow"); + if (val > (double)std::numeric_limits<uint32_t>::max() || val <= (double)-1) trap("i32.truncUFloat overflow"); return Literal(uint32_t(val)); } else { uint64_t converted = val; diff --git a/src/wasm-js.cpp b/src/wasm-js.cpp index 3083b3b9e..03eddd031 100644 --- a/src/wasm-js.cpp +++ b/src/wasm-js.cpp @@ -79,7 +79,7 @@ extern "C" void EMSCRIPTEN_KEEPALIVE load_asm2wasm(char *input) { module->memory.max = pre.memoryGrowth ? -1 : module->memory.initial; if (wasmJSDebug) std::cerr << "wasming...\n"; - asm2wasm = new Asm2WasmBuilder(*module, pre.memoryGrowth, debug); + asm2wasm = new Asm2WasmBuilder(*module, pre.memoryGrowth, debug, false /* TODO: support imprecise? */); asm2wasm->processAsm(asmjs); if (wasmJSDebug) std::cerr << "optimizing...\n"; diff --git a/src/wasm-s-parser.h b/src/wasm-s-parser.h index 47cc4e588..965336857 100644 --- a/src/wasm-s-parser.h +++ b/src/wasm-s-parser.h @@ -487,7 +487,10 @@ public: abort_on(op); } case 'e': { - if (op[1] == 'q') return makeBinary(s, BinaryOp::Eq, type); + if (op[1] == 'q') { + if (op[2] == 0) return makeBinary(s, BinaryOp::Eq, type); + if (op[2] == 'z') return makeUnary(s, UnaryOp::EqZ, i32); + } if (op[1] == 'x') return makeUnary(s, op[7] == 'u' ? UnaryOp::ExtendUInt32 : UnaryOp::ExtendSInt32, type); abort_on(op); } @@ -1142,12 +1145,7 @@ private: void parseTable(Element& s) { for (size_t i = 1; i < s.size(); i++) { - Name name = s[i]->str(); - if (!s[i]->dollared()) { - // index, we haven't - name = functionNames[atoi(name.str)]; - } - wasm.table.names.push_back(name); + wasm.table.names.push_back(getFunctionName(*s[i])); } } diff --git a/src/wasm.h b/src/wasm.h index 50aeef94b..7422dd85d 100644 --- a/src/wasm.h +++ b/src/wasm.h @@ -683,6 +683,8 @@ public: enum UnaryOp { Clz, Ctz, Popcnt, // int Neg, Abs, Ceil, Floor, Trunc, Nearest, Sqrt, // float + // relational + EqZ, // conversions ExtendSInt32, ExtendUInt32, WrapInt64, TruncSFloat32, TruncUFloat32, TruncSFloat64, TruncUFloat64, ReinterpretFloat, // int ConvertSInt32, ConvertUInt32, ConvertSInt64, ConvertUInt64, PromoteFloat32, DemoteFloat64, ReinterpretInt // float @@ -972,6 +974,11 @@ public: UnaryOp op; Expression *value; + + // the type is always the type of the operands, + // except for relationals + + bool isRelational() { return op == EqZ; } }; class Binary : public Expression { @@ -1125,7 +1132,7 @@ public: Module() : functionTypeIndex(0), importIndex(0), exportIndex(0), functionIndex(0) {} void addFunctionType(FunctionType* curr) { - Name numericName = Name::fromInt(functionTypeIndex); + Name numericName = Name::fromInt(functionTypeIndex); // TODO: remove all these, assert on names already existing, do numeric stuff in wasm-s-parser etc. if (curr->name.isNull()) { curr->name = numericName; } |