diff options
Diffstat (limited to 'src/wasm-binary.h')
-rw-r--r-- | src/wasm-binary.h | 1041 |
1 files changed, 668 insertions, 373 deletions
diff --git a/src/wasm-binary.h b/src/wasm-binary.h index b1db70a97..edc904787 100644 --- a/src/wasm-binary.h +++ b/src/wasm-binary.h @@ -30,36 +30,79 @@ namespace wasm { -struct LEB128 { - uint32_t value; +template<typename T, typename MiniT> +struct LEB { + T value; - LEB128() {} - LEB128(uint32_t value) : value(value) {} + LEB() {} + LEB(T value) : value(value) {} + + bool isSigned() { + return int(MiniT(-1)) < 0; + } + + bool hasMore(T temp, MiniT byte) { + // for signed, we must ensure the last bit has the right sign, as it will zero extend + return isSigned() ? (temp != 0 && int32_t(temp) != -1) || (value >= 0 && (byte & 64)) || (value < 0 && !(byte & 64)): (temp != 0); + } void write(std::vector<uint8_t>* out) { - uint32_t temp = value; + T temp = value; + bool more; do { uint8_t byte = temp & 127; temp >>= 7; - if (temp) { + more = hasMore(temp, byte); + if (more) { byte = byte | 128; } out->push_back(byte); - } while (temp); + } while (more); + } + + void writeAt(std::vector<uint8_t>* out, size_t at, size_t minimum = 0) { + T temp = value; + size_t offset = 0; + bool more; + do { + uint8_t byte = temp & 127; + temp >>= 7; + more = hasMore(temp, byte) || offset + 1 < minimum; + if (more) { + byte = byte | 128; + } + (*out)[at + offset] = byte; + offset++; + } while (more); } - void read(std::function<uint8_t ()> get) { + void read(std::function<MiniT ()> get) { value = 0; - uint32_t shift = 0; + T shift = 0; + MiniT byte; while (1) { - uint8_t byte = get(); - value |= ((byte & 127) << shift); + byte = get(); + value |= ((T(byte & 127)) << shift); if (!(byte & 128)) break; shift += 7; } + // if signed LEB, then we might need to sign-extend. (compile should optimize this out if not needed) + if (isSigned()) { + shift += 7; + if (byte & 64 && size_t(shift) < 8*sizeof(T)) { + // the highest bit we received was a 1, sign-extend all the rest + value = value | (T(-1) << shift); + assert(value < 0); + } + } } }; +typedef LEB<uint32_t, uint8_t> U32LEB; +typedef LEB<uint64_t, uint8_t> U64LEB; +typedef LEB<int32_t, int8_t> S32LEB; +typedef LEB<int64_t, int8_t> S64LEB; + // // We mostly stream into a buffer as we create the binary format, however, // sometimes we need to backtrack and write to a location behind us - wasm @@ -102,8 +145,23 @@ public: push_back(x & 0xff); return *this; } - BufferWithRandomAccess& operator<<(LEB128 x) { - if (debug) std::cerr << "writeLEB128: " << x.value << " (at " << size() << ")" << std::endl; + BufferWithRandomAccess& operator<<(U32LEB x) { + if (debug) std::cerr << "writeU32LEB: " << x.value << " (at " << size() << ")" << std::endl; + x.write(this); + return *this; + } + BufferWithRandomAccess& operator<<(U64LEB x) { + if (debug) std::cerr << "writeU64LEB: " << x.value << " (at " << size() << ")" << std::endl; + x.write(this); + return *this; + } + BufferWithRandomAccess& operator<<(S32LEB x) { + if (debug) std::cerr << "writeS32LEB: " << x.value << " (at " << size() << ")" << std::endl; + x.write(this); + return *this; + } + BufferWithRandomAccess& operator<<(S64LEB x) { + if (debug) std::cerr << "writeS64LEB: " << x.value << " (at " << size() << ")" << std::endl; x.write(this); return *this; } @@ -131,15 +189,21 @@ public: } void writeAt(size_t i, uint16_t x) { + if (debug) std::cerr << "backpatchInt16: " << x << " (at " << i << ")" << std::endl; (*this)[i] = x & 0xff; (*this)[i+1] = x >> 8; } void writeAt(size_t i, uint32_t x) { + if (debug) std::cerr << "backpatchInt32: " << x << " (at " << i << ")" << std::endl; (*this)[i] = x & 0xff; x >>= 8; (*this)[i+1] = x & 0xff; x >>= 8; (*this)[i+2] = x & 0xff; x >>= 8; (*this)[i+3] = x & 0xff; } + void writeAt(size_t i, U32LEB x) { + if (debug) std::cerr << "backpatchU32LEB: " << x.value << " (at " << i << ")" << std::endl; + x.writeAt(this, i, 5); // fill all 5 bytes, we have to do this when backpatching + } template <typename T> void writeTo(T& o) { @@ -149,14 +213,18 @@ public: namespace BinaryConsts { -enum Section { - Memory = 0, - Signatures = 1, - Functions = 2, - DataSegments = 4, - FunctionTable = 5, - End = 6, - Start = 7 +namespace Section { + auto Memory = "memory"; + auto Signatures = "signatures"; + auto ImportTable = "import_table"; + auto FunctionSignatures = "function_signatures"; + auto Functions = "functions"; + auto ExportTable = "export_table"; + auto DataSegments = "data_segments"; + auto FunctionTable = "function_table"; + auto Names = "names"; + auto End = "end"; + auto Start = "start_function"; }; enum FunctionEntry { @@ -195,6 +263,7 @@ enum ASTNodes { I32Clz = 0x57, I32Ctz = 0x58, I32Popcnt = 0x59, + I32EqZ = 0xc0, // XXX BoolNot = 0x5a, I64Add = 0x5b, I64Sub = 0x5c, @@ -222,6 +291,7 @@ enum ASTNodes { I64Clz = 0x72, I64Ctz = 0x73, I64Popcnt = 0x74, + I64EqZ = 0xc1, // XXX F32Add = 0x75, F32Sub = 0x76, F32Mul = 0x77, @@ -287,6 +357,10 @@ enum ASTNodes { F64ConvertF32 = 0xb2, F64ReinterpretI64 = 0xb3, I64ReinterpretF64 = 0xb5, + I32RotR = 0xb6, + I32RotL = 0xb7, + I64RotR = 0xb8, + I64RotL = 0xb9, I32ReinterpretF32 = 0xfe, // XXX not in v8 spec doc I32LoadMem8S = 0x20, @@ -313,7 +387,6 @@ enum ASTNodes { F32StoreMem = 0x35, F64StoreMem = 0x36, - I8Const = 0x09, I32Const = 0x0a, I64Const = 0x0b, F64Const = 0x0c, @@ -324,6 +397,7 @@ enum ASTNodes { StoreGlobal = 0x11, CallFunction = 0x12, CallIndirect = 0x13, + CallImport = 0x1f, Nop = 0x00, Block = 0x01, @@ -335,7 +409,8 @@ enum ASTNodes { BrIf = 0x07, TableSwitch = 0x08, Return = 0x14, - Unreachable = 0x15 + Unreachable = 0x15, + EndMarker = 0xff }; enum MemoryAccess { @@ -381,55 +456,81 @@ public: } void write() { - writeStart(); + writeHeader(); writeMemory(); writeSignatures(); + writeImports(); + writeFunctionSignatures(); writeFunctions(); + writeStart(); + writeExports(); writeDataSegments(); writeFunctionTable(); + writeNames(); writeEnd(); finishUp(); } + void writeHeader() { + if (debug) std::cerr << "== writeHeader" << std::endl; + o << int32_t(0x6d736100); // magic number \0asm + o << int32_t(10); // version number + } + + int32_t writeU32LEBPlaceholder() { + int32_t ret = o.size(); + o << int32_t(0); + o << int8_t(0); + return ret; + } + + int32_t startSection(const char* name) { + // emit 5 bytes of 0, which we'll fill with LEB later + auto ret = writeU32LEBPlaceholder(); + writeInlineString(name); + return ret; + } + + void finishSection(int32_t start) { + int32_t size = o.size() - start - 5; // section size does not include the 5 bytes of the size field itself + o.writeAt(start, U32LEB(size)); + } + void writeStart() { if (!wasm->start.is()) return; if (debug) std::cerr << "== writeStart" << std::endl; - o << int8_t(BinaryConsts::Start); - emitString(wasm->start.str); + auto start = startSection(BinaryConsts::Section::Start); + o << U32LEB(getFunctionIndex(wasm->start.str)); + finishSection(start); } void writeMemory() { if (wasm->memory.max == 0) return; if (debug) std::cerr << "== writeMemory" << std::endl; - o << int8_t(BinaryConsts::Memory); - if (wasm->memory.initial == 0) { // XXX diverge from v8, 0 means 0, 1 and above are powers of 2 starting at 0 - o << int8_t(0); - } else { - o << int8_t(std::min(ceil(log2(wasm->memory.initial)), 31.0) + 1); // up to 31 bits, don't let ceil get us to UINT_MAX which can overflow - } - if (wasm->memory.max == 0) { - o << int8_t(0); - } else { - o << int8_t(std::min(ceil(log2(wasm->memory.max)), 31.0) + 1); - } - o << int8_t(1); // export memory + auto start = startSection(BinaryConsts::Section::Memory); + o << U32LEB(wasm->memory.initial) + << U32LEB(wasm->memory.max) + << int8_t(1); // export memory + finishSection(start); } void writeSignatures() { if (wasm->functionTypes.size() == 0) return; if (debug) std::cerr << "== writeSignatures" << std::endl; - o << int8_t(BinaryConsts::Signatures) << LEB128(wasm->functionTypes.size()); + auto start = startSection(BinaryConsts::Section::Signatures); + o << U32LEB(wasm->functionTypes.size()); for (auto* type : wasm->functionTypes) { if (debug) std::cerr << "write one" << std::endl; - o << int8_t(type->params.size()); + o << U32LEB(type->params.size()); o << binaryWasmType(type->result); for (auto param : type->params) { o << binaryWasmType(param); } } + finishSection(start); } - int16_t getFunctionTypeIndex(Name type) { + int32_t getFunctionTypeIndex(Name type) { // TODO: optimize for (size_t i = 0; i < wasm->functionTypes.size(); i++) { if (wasm->functionTypes[i]->name == type) return i; @@ -437,6 +538,20 @@ public: abort(); } + void writeImports() { + if (wasm->imports.size() == 0) return; + if (debug) std::cerr << "== writeImports" << std::endl; + auto start = startSection(BinaryConsts::Section::ImportTable); + o << U32LEB(wasm->imports.size()); + for (auto* import : wasm->imports) { + if (debug) std::cerr << "write one" << std::endl; + o << U32LEB(getFunctionTypeIndex(import->type->name)); + writeInlineString(import->module.str); + writeInlineString(import->base.str); + } + finishSection(start); + } + std::map<Name, size_t> mappedLocals; // local name => index in compact form of [all int32s][all int64s]etc std::map<WasmType, size_t> numLocalsByType; // type => number of locals of that type in the compact form @@ -477,62 +592,66 @@ public: } } + void writeFunctionSignatures() { + if (wasm->functions.size() == 0) return; + if (debug) std::cerr << "== writeFunctionSignatures" << std::endl; + auto start = startSection(BinaryConsts::Section::FunctionSignatures); + o << U32LEB(wasm->functions.size()); + for (auto* curr : wasm->functions) { + if (debug) std::cerr << "write one" << std::endl; + o << U32LEB(getFunctionTypeIndex(curr->type)); + } + finishSection(start); + } + void writeFunctions() { - if (wasm->functions.size() + wasm->imports.size() == 0) return; + if (wasm->functions.size() == 0) return; if (debug) std::cerr << "== writeFunctions" << std::endl; - size_t total = wasm->imports.size() + wasm->functions.size(); - o << int8_t(BinaryConsts::Functions) << LEB128(total); - std::map<Name, Name> exportedFunctions; - for (auto* e : wasm->exports) { - exportedFunctions[e->value] = e->name; - } + auto start = startSection(BinaryConsts::Section::Functions); + size_t total = wasm->functions.size(); + o << U32LEB(total); for (size_t i = 0; i < total; i++) { if (debug) std::cerr << "write one at" << o.size() << std::endl; - Import* import = i < wasm->imports.size() ? wasm->imports[i] : nullptr; - Function* function = i >= wasm->imports.size() ? wasm->functions[i - wasm->imports.size()] : nullptr; - Name name, type; - if (import) { - name = import->name; - type = import->type->name; - } else { - name = function->name; - type = function->type; - mappedLocals.clear(); - numLocalsByType.clear(); - } - if (debug) std::cerr << "writing" << name << std::endl; - bool export_ = exportedFunctions.count(name) > 0; - o << int8_t(BinaryConsts::Named | - (BinaryConsts::Import * !!import) | - (BinaryConsts::Locals * (function && function->locals.size() > 0)) | - (BinaryConsts::Export * export_)); - o << getFunctionTypeIndex(type); - emitString(name.str); - if (export_) emitString(exportedFunctions[name].str); // XXX addition to v8 binary format - if (function) { - mapLocals(function); - if (function->locals.size() > 0) { - o << uint16_t(numLocalsByType[i32]) - << uint16_t(numLocalsByType[i64]) - << uint16_t(numLocalsByType[f32]) - << uint16_t(numLocalsByType[f64]); - } - size_t sizePos = o.size(); - o << (uint32_t)0; // placeholder, we fill in the size later when we have it // XXX int32, diverge from v8 format, to get more code to compile - size_t start = o.size(); - depth = 0; - recurse(function->body); - assert(depth == 0); - size_t size = o.size() - start; - assert(size <= std::numeric_limits<uint16_t>::max()); - if (debug) std::cerr << "body size: " << size << ", writing at " << sizePos << ", next starts at " << o.size() << std::endl; - o.writeAt(sizePos, uint32_t(size)); // XXX int32, diverge from v8 format, to get more code to compile - } else { - // import - emitString(import->module.str); // XXX diverge - emitString(import->base.str); // from v8 - } + size_t sizePos = writeU32LEBPlaceholder(); + size_t start = o.size(); + Function* function = wasm->functions[i]; + mappedLocals.clear(); + numLocalsByType.clear(); + if (debug) std::cerr << "writing" << function->name << std::endl; + mapLocals(function); + o << U32LEB( + (numLocalsByType[i32] ? 1 : 0) + + (numLocalsByType[i64] ? 1 : 0) + + (numLocalsByType[f32] ? 1 : 0) + + (numLocalsByType[f64] ? 1 : 0) + ); + if (numLocalsByType[i32]) o << U32LEB(numLocalsByType[i32]) << binaryWasmType(i32); + if (numLocalsByType[i64]) o << U32LEB(numLocalsByType[i64]) << binaryWasmType(i64); + if (numLocalsByType[f32]) o << U32LEB(numLocalsByType[f32]) << binaryWasmType(f32); + if (numLocalsByType[f64]) o << U32LEB(numLocalsByType[f64]) << binaryWasmType(f64); + depth = 0; + recurse(function->body); + o << int8_t(BinaryConsts::EndMarker); + assert(depth == 0); + size_t size = o.size() - start; + assert(size <= std::numeric_limits<uint32_t>::max()); + if (debug) std::cerr << "body size: " << size << ", writing at " << sizePos << ", next starts at " << o.size() << std::endl; + o.writeAt(sizePos, U32LEB(size)); + } + finishSection(start); + } + + void writeExports() { + if (wasm->exports.size() == 0) return; + if (debug) std::cerr << "== writeexports" << std::endl; + auto start = startSection(BinaryConsts::Section::ExportTable); + o << U32LEB(wasm->exports.size()); + for (auto* curr : wasm->exports) { + if (debug) std::cerr << "write one" << std::endl; + o << U32LEB(getFunctionIndex(curr->value)); + writeInlineString(curr->name.str); } + finishSection(start); } void writeDataSegments() { @@ -541,42 +660,87 @@ public: for (auto& segment : wasm->memory.segments) { if (segment.size > 0) num++; } - o << int8_t(BinaryConsts::DataSegments) << LEB128(num); + auto start = startSection(BinaryConsts::Section::DataSegments); + o << U32LEB(num); for (auto& segment : wasm->memory.segments) { if (segment.size == 0) continue; - o << int32_t(segment.offset); - emitBuffer(segment.data, segment.size); - o << int32_t(segment.size); - o << int8_t(1); // load at program start + o << U32LEB(segment.offset); + writeInlineBuffer(segment.data, segment.size); } + finishSection(start); } - uint16_t getFunctionIndex(Name name) { - // TODO: optimize - for (size_t i = 0; i < wasm->imports.size(); i++) { - if (wasm->imports[i]->name == name) return i; + std::map<Name, uint32_t> mappedImports; // name of the Import => index + uint32_t getImportIndex(Name name) { + if (!mappedImports.size()) { + // Create name => index mapping. + for (size_t i = 0; i < wasm->imports.size(); i++) { + assert(mappedImports.count(wasm->imports[i]->name) == 0); + mappedImports[wasm->imports[i]->name] = i; + } } - for (size_t i = 0; i < wasm->functions.size(); i++) { - if (wasm->functions[i]->name == name) return wasm->imports.size() + i; + assert(mappedImports.count(name)); + return mappedImports[name]; + } + + std::map<Name, uint32_t> mappedFunctions; // name of the Function => index + uint32_t getFunctionIndex(Name name) { + if (!mappedFunctions.size()) { + // Create name => index mapping. + for (size_t i = 0; i < wasm->functions.size(); i++) { + assert(mappedFunctions.count(wasm->functions[i]->name) == 0); + mappedFunctions[wasm->functions[i]->name] = i; + } } - abort(); + assert(mappedFunctions.count(name)); + return mappedFunctions[name]; } void writeFunctionTable() { if (wasm->table.names.size() == 0) return; if (debug) std::cerr << "== writeFunctionTable" << std::endl; - o << int8_t(BinaryConsts::FunctionTable) << LEB128(wasm->table.names.size()); + auto start = startSection(BinaryConsts::Section::FunctionTable); + o << U32LEB(wasm->table.names.size()); for (auto name : wasm->table.names) { - o << getFunctionIndex(name); + o << U32LEB(getFunctionIndex(name)); + } + finishSection(start); + } + + void writeNames() { + if (wasm->functions.size() == 0) return; + if (debug) std::cerr << "== writeNames" << std::endl; + auto start = startSection(BinaryConsts::Section::Names); + o << U32LEB(wasm->functions.size()); + for (auto* curr : wasm->functions) { + writeInlineString(curr->name.str); + o << U32LEB(0); // TODO: locals } + finishSection(start); } void writeEnd() { - o << int8_t(BinaryConsts::End); + auto start = startSection(BinaryConsts::Section::End); + finishSection(start); } // helpers + void writeInlineString(const char* name) { + int32_t size = strlen(name); + o << U32LEB(size); + for (int32_t i = 0; i < size; i++) { + o << int8_t(name[i]); + } + } + + void writeInlineBuffer(const char* data, size_t size) { + o << U32LEB(size); + for (size_t i = 0; i < size; i++) { + o << int8_t(data[i]); + } + } + struct Buffer { const char* data; size_t size; @@ -623,7 +787,7 @@ public: void visitBlock(Block *curr) { if (debug) std::cerr << "zz node: Block" << std::endl; - o << int8_t(BinaryConsts::Block) << LEB128(curr->list.size()); // XXX larger block size, divergence from v8 to get more code to build + o << int8_t(BinaryConsts::Block); breakStack.push_back(curr->name); size_t i = 0; for (auto* child : curr->list) { @@ -631,13 +795,14 @@ public: recurse(child); } breakStack.pop_back(); + o << int8_t(BinaryConsts::EndMarker); } void visitIf(If *curr) { if (debug) std::cerr << "zz node: If" << std::endl; - o << int8_t(curr->ifFalse ? BinaryConsts::IfElse : BinaryConsts::If); recurse(curr->condition); recurse(curr->ifTrue); if (curr->ifFalse) recurse(curr->ifFalse); + o << int8_t(curr->ifFalse ? BinaryConsts::IfElse : BinaryConsts::If); } void visitLoop(Loop *curr) { if (debug) std::cerr << "zz node: Loop" << std::endl; @@ -648,98 +813,86 @@ public: recurse(curr->body); breakStack.pop_back(); breakStack.pop_back(); + o << int8_t(BinaryConsts::EndMarker); } - int getBreakIndex(Name name) { // -1 if not found + int32_t getBreakIndex(Name name) { // -1 if not found for (int i = breakStack.size() - 1; i >= 0; i--) { if (breakStack[i] == name) { return breakStack.size() - 1 - i; } } -printf("bad!\n"); std::cerr << "bad break: " << name << std::endl; - - - -assert(0); - - abort(); } void visitBreak(Break *curr) { if (debug) std::cerr << "zz node: Break" << std::endl; - o << int8_t(curr->condition ? BinaryConsts::BrIf : BinaryConsts::Br); - int offset = getBreakIndex(curr->name); - o << int8_t(offset); if (curr->value) { recurse(curr->value); } else { visitNop(nullptr); } if (curr->condition) recurse(curr->condition); + o << int8_t(curr->condition ? BinaryConsts::BrIf : BinaryConsts::Br) + << U32LEB(getBreakIndex(curr->name)); } void visitSwitch(Switch *curr) { if (debug) std::cerr << "zz node: Switch" << std::endl; - o << int8_t(BinaryConsts::TableSwitch) << int16_t(curr->cases.size()) - << int16_t(curr->targets.size() + 1); - for (size_t i = 0; i < curr->cases.size(); i++) { - breakStack.push_back(curr->cases[i].name); - } - breakStack.push_back(curr->name); + o << int8_t(BinaryConsts::TableSwitch) << U32LEB(curr->targets.size()); for (auto target : curr->targets) { - o << (int16_t)getBreakIndex(target); - } - o << (int16_t)getBreakIndex(curr->default_); - recurse(curr->value); - for (auto& c : curr->cases) { - recurse(c.body); + o << U32LEB(getBreakIndex(target)); } - breakStack.pop_back(); // name - for (size_t i = 0; i < curr->cases.size(); i++) { - breakStack.pop_back(); // case + o << U32LEB(getBreakIndex(curr->default_)); + recurse(curr->condition); + o << int8_t(BinaryConsts::EndMarker); + if (curr->value) { + recurse(curr->value); + } else { + visitNop(nullptr); } + o << int8_t(BinaryConsts::EndMarker); } void visitCall(Call *curr) { if (debug) std::cerr << "zz node: Call" << std::endl; - o << int8_t(BinaryConsts::CallFunction) << LEB128(getFunctionIndex(curr->target)); for (auto* operand : curr->operands) { recurse(operand); } + o << int8_t(BinaryConsts::CallFunction) << U32LEB(getFunctionIndex(curr->target)); } void visitCallImport(CallImport *curr) { if (debug) std::cerr << "zz node: CallImport" << std::endl; - o << int8_t(BinaryConsts::CallFunction) << LEB128(getFunctionIndex(curr->target)); for (auto* operand : curr->operands) { recurse(operand); } + o << int8_t(BinaryConsts::CallImport) << U32LEB(getImportIndex(curr->target)); } void visitCallIndirect(CallIndirect *curr) { if (debug) std::cerr << "zz node: CallIndirect" << std::endl; - o << int8_t(BinaryConsts::CallIndirect) << LEB128(getFunctionTypeIndex(curr->fullType->name)); recurse(curr->target); for (auto* operand : curr->operands) { recurse(operand); } + o << int8_t(BinaryConsts::CallIndirect) << U32LEB(getFunctionTypeIndex(curr->fullType->name)); } void visitGetLocal(GetLocal *curr) { if (debug) std::cerr << "zz node: GetLocal " << (o.size() + 1) << std::endl; - o << int8_t(BinaryConsts::GetLocal) << LEB128(mappedLocals[curr->name]); + o << int8_t(BinaryConsts::GetLocal) << U32LEB(mappedLocals[curr->name]); } void visitSetLocal(SetLocal *curr) { if (debug) std::cerr << "zz node: SetLocal" << std::endl; - o << int8_t(BinaryConsts::SetLocal) << LEB128(mappedLocals[curr->name]); recurse(curr->value); + o << int8_t(BinaryConsts::SetLocal) << U32LEB(mappedLocals[curr->name]); } void emitMemoryAccess(size_t alignment, size_t bytes, uint32_t offset) { - o << int8_t( ((alignment == bytes || alignment == 0) ? BinaryConsts::NaturalAlignment : BinaryConsts::Alignment) | - (offset ? BinaryConsts::Offset : 0) ); - if (offset) o << LEB128(offset); + o << U32LEB(Log2(alignment ? alignment : bytes)); + o << U32LEB(offset); } void visitLoad(Load *curr) { if (debug) std::cerr << "zz node: Load" << std::endl; + recurse(curr->ptr); switch (curr->type) { case i32: { switch (curr->bytes) { @@ -765,10 +918,11 @@ assert(0); default: abort(); } emitMemoryAccess(curr->align, curr->bytes, curr->offset); - recurse(curr->ptr); } void visitStore(Store *curr) { if (debug) std::cerr << "zz node: Store" << std::endl; + recurse(curr->ptr); + recurse(curr->value); switch (curr->type) { case i32: { switch (curr->bytes) { @@ -794,23 +948,16 @@ assert(0); default: abort(); } emitMemoryAccess(curr->align, curr->bytes, curr->offset); - recurse(curr->ptr); - recurse(curr->value); } void visitConst(Const *curr) { if (debug) std::cerr << "zz node: Const" << curr << " : " << curr->type << std::endl; switch (curr->type) { case i32: { - uint32_t value = curr->value.geti32(); - if (value <= 255) { - o << int8_t(BinaryConsts::I8Const) << uint8_t(value); - break; - } - o << int8_t(BinaryConsts::I32Const) << value; + o << int8_t(BinaryConsts::I32Const) << S32LEB(curr->value.geti32()); break; } case i64: { - o << int8_t(BinaryConsts::I64Const) << curr->value.geti64(); + o << int8_t(BinaryConsts::I64Const) << S64LEB(curr->value.geti64()); break; } case f32: { @@ -827,10 +974,12 @@ assert(0); } void visitUnary(Unary *curr) { if (debug) std::cerr << "zz node: Unary" << std::endl; + recurse(curr->value); switch (curr->op) { case Clz: o << int8_t(curr->type == i32 ? BinaryConsts::I32Clz : BinaryConsts::I64Clz); break; case Ctz: o << int8_t(curr->type == i32 ? BinaryConsts::I32Ctz : BinaryConsts::I64Ctz); break; case Popcnt: o << int8_t(curr->type == i32 ? BinaryConsts::I32Popcnt : BinaryConsts::I64Popcnt); break; + case EqZ: o << int8_t(curr->type == i32 ? BinaryConsts::I32EqZ : BinaryConsts::I64EqZ); break; case Neg: o << int8_t(curr->type == f32 ? BinaryConsts::F32Neg : BinaryConsts::F64Neg); break; case Abs: o << int8_t(curr->type == f32 ? BinaryConsts::F32Abs : BinaryConsts::F64Abs); break; case Ceil: o << int8_t(curr->type == f32 ? BinaryConsts::F32Ceil : BinaryConsts::F64Ceil); break; @@ -855,10 +1004,11 @@ assert(0); case ReinterpretInt: o << int8_t(curr->type == i32 ? BinaryConsts::I32ReinterpretF32 : BinaryConsts::I64ReinterpretF64); break; default: abort(); } - recurse(curr->value); } void visitBinary(Binary *curr) { if (debug) std::cerr << "zz node: Binary" << std::endl; + recurse(curr->left); + recurse(curr->right); #define TYPED_CODE(code) { \ switch (getReachableWasmType(curr->left->type, curr->right->type)) { \ case i32: o << int8_t(BinaryConsts::I32##code); break; \ @@ -900,6 +1050,8 @@ assert(0); case Shl: INT_TYPED_CODE(Shl);; case ShrU: INT_TYPED_CODE(ShrU); case ShrS: INT_TYPED_CODE(ShrS); + case RotL: INT_TYPED_CODE(RotL); + case RotR: INT_TYPED_CODE(RotR); case Div: FLOAT_TYPED_CODE(Div); case CopySign: FLOAT_TYPED_CODE(CopySign); case Min: FLOAT_TYPED_CODE(Min); @@ -920,27 +1072,25 @@ assert(0); case Ge: FLOAT_TYPED_CODE(Ge); default: abort(); } - recurse(curr->left); - recurse(curr->right); #undef TYPED_CODE #undef INT_TYPED_CODE #undef FLOAT_TYPED_CODE } void visitSelect(Select *curr) { if (debug) std::cerr << "zz node: Select" << std::endl; - o << int8_t(BinaryConsts::Select); recurse(curr->ifTrue); recurse(curr->ifFalse); recurse(curr->condition); + o << int8_t(BinaryConsts::Select); } void visitReturn(Return *curr) { if (debug) std::cerr << "zz node: Return" << std::endl; - o << int8_t(BinaryConsts::Return); if (curr->value) { recurse(curr->value); } else { visitNop(nullptr); } + o << int8_t(BinaryConsts::Return); } void visitHost(Host *curr) { if (debug) std::cerr << "zz node: Host" << std::endl; @@ -950,8 +1100,8 @@ assert(0); break; } case GrowMemory: { - o << int8_t(BinaryConsts::GrowMemory); recurse(curr->operands[0]); + o << int8_t(BinaryConsts::GrowMemory); break; } default: abort(); @@ -973,37 +1123,58 @@ class WasmBinaryBuilder { std::vector<char>& input; bool debug; - size_t pos; + size_t pos = 0; + int32_t startIndex = -1; public: - WasmBinaryBuilder(AllocatingModule& wasm, std::vector<char>& input, bool debug) : wasm(wasm), allocator(wasm.allocator), input(input), debug(debug), pos(0) {} + WasmBinaryBuilder(AllocatingModule& wasm, std::vector<char>& input, bool debug) : wasm(wasm), allocator(wasm.allocator), input(input), debug(debug) {} void read() { - // read sections until the end - while (1) { - int8_t section = getInt8(); - if (section == BinaryConsts::End) { + readHeader(); + + // read sections until the end + while (more()) { + auto sectionSize = getU32LEB(); + assert(sectionSize < pos + input.size()); + auto nameSize = getU32LEB(); + auto match = [&](const char* name) { + for (size_t i = 0; i < nameSize; i++) { + if (pos + i >= input.size()) return false; + if (name[i] == 0) return false; + if (input[pos + i] != name[i]) return false; + } + if (strlen(name) != nameSize) return false; + pos += nameSize; + return true; + }; + if (match(BinaryConsts::Section::Start)) readStart(); + else if (match(BinaryConsts::Section::Memory)) readMemory(); + else if (match(BinaryConsts::Section::Signatures)) readSignatures(); + else if (match(BinaryConsts::Section::ImportTable)) readImports(); + else if (match(BinaryConsts::Section::FunctionSignatures)) readFunctionSignatures(); + else if (match(BinaryConsts::Section::Functions)) readFunctions(); + else if (match(BinaryConsts::Section::ExportTable)) readExports(); + else if (match(BinaryConsts::Section::DataSegments)) readDataSegments(); + else if (match(BinaryConsts::Section::FunctionTable)) readFunctionTable(); + else if (match(BinaryConsts::Section::Names)) readNames(); + else if (match(BinaryConsts::Section::End)) { if (debug) std::cerr << "== readEnd" << std::endl; break; - } - - switch (section) { - case BinaryConsts::Start: readStart(); break; - case BinaryConsts::Memory: readMemory(); break; - case BinaryConsts::Signatures: readSignatures(); break; - case BinaryConsts::Functions: readFunctions(); break; - case BinaryConsts::DataSegments: readDataSegments(); break; - case BinaryConsts::FunctionTable: readFunctionTable(); break; - default: abort(); + } else { + abort(); } } processFunctions(); } + bool more() { + return pos < input.size(); + } + uint8_t getInt8() { - assert(pos < input.size()); + assert(more()); if (debug) std::cerr << "getInt8: " << (int)(uint8_t)input[pos] << " (at " << pos << ")" << std::endl; return input[pos++]; } @@ -1038,13 +1209,40 @@ public: return ret; } - uint32_t getLEB128() { + uint32_t getU32LEB() { + if (debug) std::cerr << "<==" << std::endl; + U32LEB ret; + ret.read([&]() { + return getInt8(); + }); + if (debug) std::cerr << "getU32LEB: " << ret.value << " ==>" << std::endl; + return ret.value; + } + uint64_t getU64LEB() { if (debug) std::cerr << "<==" << std::endl; - LEB128 ret; + U64LEB ret; ret.read([&]() { return getInt8(); }); - if (debug) std::cerr << "getLEB128: " << ret.value << " ==>" << std::endl; + if (debug) std::cerr << "getU64LEB: " << ret.value << " ==>" << std::endl; + return ret.value; + } + int32_t getS32LEB() { + if (debug) std::cerr << "<==" << std::endl; + S32LEB ret; + ret.read([&]() { + return (int8_t)getInt8(); + }); + if (debug) std::cerr << "getU32LEB: " << ret.value << " ==>" << std::endl; + return ret.value; + } + int64_t getS64LEB() { + if (debug) std::cerr << "<==" << std::endl; + S64LEB ret; + ret.read([&]() { + return (int8_t)getInt8(); + }); + if (debug) std::cerr << "getU64LEB: " << ret.value << " ==>" << std::endl; return ret.value; } WasmType getWasmType() { @@ -1067,6 +1265,17 @@ public: return ret; } + Name getInlineString() { + if (debug) std::cerr << "<==" << std::endl; + auto len = getU32LEB(); + std::string str; + for (size_t i = 0; i < len; i++) { + str = str + char(getInt8()); + } + if (debug) std::cerr << "getInlineString: " << str << " ==>" << std::endl; + return Name(str); + } + void verifyInt8(int8_t x) { int8_t y = getInt8(); assert(x == y); @@ -1092,28 +1301,38 @@ public: assert(x == y); } + void ungetInt8() { + assert(pos > 0); + if (debug) std::cerr << "ungetInt8 (at " << pos << ")" << std::endl; + pos--; + } + + void readHeader() { + if (debug) std::cerr << "== readHeader" << std::endl; + verifyInt32(0x6d736100); + verifyInt32(10); + } + void readStart() { if (debug) std::cerr << "== readStart" << std::endl; - wasm.start = getString(); + startIndex = getU32LEB(); } void readMemory() { if (debug) std::cerr << "== readMemory" << std::endl; - size_t initial = getInt8(); - wasm.memory.initial = initial == 0 ? 0 : std::pow<size_t>(2, initial - 1); - size_t max = getInt8(); - wasm.memory.max = max == 0 ? 0 : std::pow<size_t>(2, max - 1); + wasm.memory.initial = getU32LEB(); + wasm.memory.max = getU32LEB(); verifyInt8(1); // export memory } void readSignatures() { if (debug) std::cerr << "== readSignatures" << std::endl; - size_t numTypes = getLEB128(); + size_t numTypes = getU32LEB(); if (debug) std::cerr << "num: " << numTypes << std::endl; for (size_t i = 0; i < numTypes; i++) { if (debug) std::cerr << "read one" << std::endl; auto curr = allocator.alloc<FunctionType>(); - size_t numParams = getInt8(); + size_t numParams = getU32LEB(); if (debug) std::cerr << "num params: " << numParams << std::endl; curr->result = getWasmType(); for (size_t j = 0; j < numParams; j++) { @@ -1123,7 +1342,37 @@ public: } } - std::vector<Name> mappedFunctions; // index => name of the Import or Function + void readImports() { + if (debug) std::cerr << "== readImports" << std::endl; + size_t num = getU32LEB(); + if (debug) std::cerr << "num: " << num << std::endl; + for (size_t i = 0; i < num; i++) { + if (debug) std::cerr << "read one" << std::endl; + auto curr = allocator.alloc<Import>(); + curr->name = Name(std::string("import$") + std::to_string(i)); + auto index = getU32LEB(); + assert(index < wasm.functionTypes.size()); + curr->type = wasm.functionTypes[index]; + assert(curr->type->name.is()); + curr->module = getInlineString(); + curr->base = getInlineString(); + wasm.addImport(curr); + } + } + + std::vector<FunctionType*> functionTypes; + + void readFunctionSignatures() { + if (debug) std::cerr << "== readFunctionSignatures" << std::endl; + size_t num = getU32LEB(); + if (debug) std::cerr << "num: " << num << std::endl; + for (size_t i = 0; i < num; i++) { + if (debug) std::cerr << "read one" << std::endl; + auto index = getU32LEB(); + assert(index < wasm.functionTypes.size()); + functionTypes.push_back(wasm.functionTypes[index]); + } + } size_t nextLabel; @@ -1131,134 +1380,174 @@ public: return cashew::IString(("label$" + std::to_string(nextLabel++)).c_str(), false); } + // We read functions before we know their names, so we need to backpatch the names later + + std::vector<Function*> functions; // we store functions here before wasm.addFunction after we know their names + std::map<size_t, std::vector<Call*>> functionCalls; // at index i we have all calls to i + void readFunctions() { if (debug) std::cerr << "== readFunctions" << std::endl; - size_t total = getLEB128(); // imports and functions + size_t total = getU32LEB(); for (size_t i = 0; i < total; i++) { if (debug) std::cerr << "read one at " << pos << std::endl; - auto data = getInt8(); - auto type = wasm.functionTypes[getInt16()]; - bool named = data & BinaryConsts::Named; - assert(named); - bool import = data & BinaryConsts::Import; - bool locals = data & BinaryConsts::Locals; - bool export_ = data & BinaryConsts::Export; - Name name = getString(); - if (export_) { // XXX addition to v8 binary format - Name exportName = getString(); - auto e = allocator.alloc<Export>(); - e->name = exportName; - e->value = name; - wasm.addExport(e); + size_t size = getU32LEB(); + assert(size > 0); // we could also check it matches the seen size + auto type = functionTypes[i]; + if (debug) std::cerr << "reading" << i << std::endl; + auto func = allocator.alloc<Function>(); + func->type = type->name; + func->result = type->result; + size_t nextVar = 0; + auto addVar = [&]() { + Name name = cashew::IString(("var$" + std::to_string(nextVar++)).c_str(), false); + return name; + }; + for (size_t j = 0; j < type->params.size(); j++) { + func->params.emplace_back(addVar(), type->params[j]); } - if (debug) std::cerr << "reading" << name << std::endl; - mappedFunctions.push_back(name); - if (import) { - auto imp = allocator.alloc<Import>(); - imp->name = name; - imp->module = getString(); // XXX diverge - imp->base = getString(); // from v8 - imp->type = type; - wasm.addImport(imp); - } else { - auto func = allocator.alloc<Function>(); - func->name = name; - func->type = type->name; - func->result = type->result; - size_t nextVar = 0; - auto addVar = [&]() { - Name name = cashew::IString(("var$" + std::to_string(nextVar++)).c_str(), false); - return name; - }; - for (size_t j = 0; j < type->params.size(); j++) { - func->params.emplace_back(addVar(), type->params[j]); + size_t numLocalTypes = getU32LEB(); + for (size_t t = 0; t < numLocalTypes; t++) { + auto num = getU32LEB(); + auto type = getWasmType(); + while (num > 0) { + func->locals.emplace_back(addVar(), type); + num--; } - if (locals) { - auto addLocals = [&](WasmType type) { - int16_t num = getInt16(); - while (num > 0) { - func->locals.emplace_back(addVar(), type); - num--; - } - }; - addLocals(i32); - addLocals(i64); - addLocals(f32); - addLocals(f64); + } + { + // process the function body + if (debug) std::cerr << "processing function: " << i << std::endl; + nextLabel = 0; + // prepare locals + mappedLocals.clear(); + localTypes.clear(); + for (size_t i = 0; i < func->params.size(); i++) { + mappedLocals.push_back(func->params[i].name); + localTypes[func->params[i].name] = func->params[i].type; + } + for (size_t i = 0; i < func->locals.size(); i++) { + mappedLocals.push_back(func->locals[i].name); + localTypes[func->locals[i].name] = func->locals[i].type; } - size_t size = getInt32(); // XXX int32, diverge from v8 format, to get more code to compile - // we can't read the function yet - it might call other functions that are defined later, - // and we do depend on the function type, as well as the mappedFunctions table. - functions.emplace_back(func, pos, size); - pos += size; - func->body = nullptr; // will be filled later. but we do have the name and the type already. - wasm.addFunction(func); + // process body + assert(breakStack.empty()); + assert(expressionStack.empty()); + depth = 0; + processExpressions(); + assert(expressionStack.size() == 1); + func->body = popExpression(); + assert(depth == 0); + assert(breakStack.empty()); + assert(expressionStack.empty()); } + functions.push_back(func); } } - struct FunctionData { - Function* func; - size_t pos, size; - FunctionData(Function* func, size_t pos, size_t size) : func(func), pos(pos), size(size) {} - }; + std::map<Export*, size_t> exportIndexes; - std::vector<FunctionData> functions; + void readExports() { + if (debug) std::cerr << "== readExports" << std::endl; + size_t num = getU32LEB(); + if (debug) std::cerr << "num: " << num << std::endl; + for (size_t i = 0; i < num; i++) { + if (debug) std::cerr << "read one" << std::endl; + auto curr = allocator.alloc<Export>(); + auto index = getU32LEB(); + assert(index < functionTypes.size()); + curr->name = getInlineString(); + exportIndexes[curr] = index; + } + } std::vector<Name> mappedLocals; // index => local name std::map<Name, WasmType> localTypes; // TODO: optimize std::vector<Name> breakStack; + std::vector<Expression*> expressionStack; + + void processExpressions() { // until an end marker + while (1) { + Expression* curr; + readExpression(curr); + if (!curr) break; // end marker, done with this function/block/etc + expressionStack.push_back(curr); + } + } + + Expression* popExpression() { + assert(expressionStack.size() > 0); + auto ret = expressionStack.back(); + expressionStack.pop_back(); + return ret; + } + void processFunctions() { for (auto& func : functions) { - Function* curr = func.func; - if (debug) std::cerr << "processing function: " << curr->name << std::endl; - pos = func.pos; - nextLabel = 0; - // prepare locals - mappedLocals.clear(); - localTypes.clear(); - for (size_t i = 0; i < curr->params.size(); i++) { - mappedLocals.push_back(curr->params[i].name); - localTypes[curr->params[i].name] = curr->params[i].type; - } - for (size_t i = 0; i < curr->locals.size(); i++) { - mappedLocals.push_back(curr->locals[i].name); - localTypes[curr->locals[i].name] = curr->locals[i].type; + wasm.addFunction(func); + } + // now that we have names for each function, apply things + + if (startIndex >= 0) { + wasm.start = wasm.functions[startIndex]->name; + } + + for (auto& iter : exportIndexes) { + Export* curr = iter.first; + curr->value = wasm.functions[iter.second]->name; + wasm.addExport(curr); + } + + for (auto& iter : functionCalls) { + size_t index = iter.first; + auto& calls = iter.second; + for (auto* call : calls) { + call->target = wasm.functions[index]->name; } - // process body - assert(breakStack.empty()); - depth = 0; - readExpression(curr->body); - assert(depth == 0); - assert(breakStack.empty()); - assert(pos == func.pos + func.size); + } + + for (size_t index : functionTable) { + assert(index < wasm.functions.size()); + wasm.table.names.push_back(wasm.functions[index]->name); } } void readDataSegments() { if (debug) std::cerr << "== readDataSegments" << std::endl; - auto num = getLEB128(); + auto num = getU32LEB(); for (size_t i = 0; i < num; i++) { Memory::Segment curr; - curr.offset = getInt32(); - auto start = getInt32(); - auto size = getInt32(); - auto buffer = malloc(size); - memcpy(buffer, &input[start], size); + curr.offset = getU32LEB(); + auto size = getU32LEB(); + auto buffer = (char*)malloc(size); + for (size_t j = 0; j < size; j++) { + buffer[j] = char(getInt8()); + } curr.data = (const char*)buffer; curr.size = size; wasm.memory.segments.push_back(curr); - verifyInt8(1); // load at program start } } + std::vector<size_t> functionTable; + void readFunctionTable() { if (debug) std::cerr << "== readFunctionTable" << std::endl; - auto num = getLEB128(); + auto num = getU32LEB(); for (size_t i = 0; i < num; i++) { - wasm.table.names.push_back(mappedFunctions[getInt16()]); + auto index = getU32LEB(); + functionTable.push_back(index); + } + } + + void readNames() { + if (debug) std::cerr << "== readNames" << std::endl; + auto num = getU32LEB(); + for (size_t i = 0; i < num; i++) { + functions[i]->name = getInlineString(); + auto numLocals = getU32LEB(); + assert(numLocals == 0); // TODO } } @@ -1278,19 +1567,8 @@ public: case BinaryConsts::Br: case BinaryConsts::BrIf: visitBreak((curr = allocator.alloc<Break>())->cast<Break>(), code); break; // code distinguishes br from br_if case BinaryConsts::TableSwitch: visitSwitch((curr = allocator.alloc<Switch>())->cast<Switch>()); break; - case BinaryConsts::CallFunction: { - // might be an import or not. we have to check here. - Name target = mappedFunctions[getLEB128()]; - assert(target.is()); - if (debug) std::cerr << "call(import?) target: " << target << std::endl; - if (wasm.importsMap.find(target) == wasm.importsMap.end()) { - assert(wasm.functionsMap.find(target) != wasm.functionsMap.end()); - visitCall((curr = allocator.alloc<Call>())->cast<Call>(), target); - } else { - visitCallImport((curr = allocator.alloc<CallImport>())->cast<CallImport>(), target); - } - break; - } + case BinaryConsts::CallFunction: visitCall((curr = allocator.alloc<Call>())->cast<Call>()); break; + case BinaryConsts::CallImport: visitCallImport((curr = allocator.alloc<CallImport>())->cast<CallImport>()); break; case BinaryConsts::CallIndirect: visitCallIndirect((curr = allocator.alloc<CallIndirect>())->cast<CallIndirect>()); break; case BinaryConsts::GetLocal: visitGetLocal((curr = allocator.alloc<GetLocal>())->cast<GetLocal>()); break; case BinaryConsts::SetLocal: visitSetLocal((curr = allocator.alloc<SetLocal>())->cast<SetLocal>()); break; @@ -1298,6 +1576,7 @@ public: case BinaryConsts::Return: visitReturn((curr = allocator.alloc<Return>())->cast<Return>()); break; case BinaryConsts::Nop: visitNop((curr = allocator.alloc<Nop>())->cast<Nop>()); break; case BinaryConsts::Unreachable: visitUnreachable((curr = allocator.alloc<Unreachable>())->cast<Unreachable>()); break; + case BinaryConsts::EndMarker: curr = nullptr; break; default: { // otherwise, the code is a subcode TODO: optimize if (maybeVisit<Binary>(curr, code)) break; @@ -1327,26 +1606,53 @@ public: void visitBlock(Block *curr) { if (debug) std::cerr << "zz node: Block" << std::endl; - auto num = getLEB128(); // XXX larger block size, divergence from v8 to get more code to build - curr->name = getNextLabel(); - breakStack.push_back(curr->name); - for (uint32_t i = 0; i < num; i++) { - if (debug) std::cerr << " " << size_t(curr) << "\n zz Block element " << i << std::endl; - Expression* child; - readExpression(child); - curr->list.push_back(child); + // special-case Block and de-recurse nested blocks in their first position, as that is + // a common pattern that can be very highly nested. + std::vector<Block*> stack; + while (1) { + curr->name = getNextLabel(); + breakStack.push_back(curr->name); + stack.push_back(curr); + if (getInt8() == BinaryConsts::Block) { + // a recursion + curr = allocator.alloc<Block>(); + continue; + } else { + // end of recursion + ungetInt8(); + break; + } } - if (num > 0) { - curr->type = curr->list.back()->type; + Block* last = nullptr; + while (stack.size() > 0) { + curr = stack.back(); + stack.pop_back(); + size_t start = expressionStack.size(); // everything after this, that is left when we see the marker, is ours + if (last) { + // the previous block is our first-position element + expressionStack.push_back(last); + } + last = curr; + processExpressions(); + size_t end = expressionStack.size(); + assert(end >= start); + for (size_t i = start; i < end; i++) { + if (debug) std::cerr << " " << size_t(expressionStack[i]) << "\n zz Block element " << curr->list.size() << std::endl; + curr->list.push_back(expressionStack[i]); + } + expressionStack.resize(start); + curr->finalize(); + breakStack.pop_back(); } - breakStack.pop_back(); } void visitIf(If *curr, uint8_t code) { if (debug) std::cerr << "zz node: If" << std::endl; - readExpression(curr->condition); - readExpression(curr->ifTrue); if (code == BinaryConsts::IfElse) { - readExpression(curr->ifFalse); + curr->ifFalse = popExpression(); + } + curr->ifTrue = popExpression(); + curr->condition = popExpression(); + if (code == BinaryConsts::IfElse) { curr->finalize(); } } @@ -1357,106 +1663,91 @@ public: curr->in = getNextLabel(); breakStack.push_back(curr->out); breakStack.push_back(curr->in); - readExpression(curr->body); + processExpressions(); + curr->body = popExpression(); breakStack.pop_back(); breakStack.pop_back(); curr->finalize(); } - Name getBreakName(int offset) { + Name getBreakName(int32_t offset) { + assert(breakStack.size() - 1 - offset < breakStack.size()); return breakStack[breakStack.size() - 1 - offset]; } void visitBreak(Break *curr, uint8_t code) { if (debug) std::cerr << "zz node: Break" << std::endl; - auto offset = getInt8(); - curr->name = getBreakName(offset); - readExpression(curr->value); - if (code == BinaryConsts::BrIf) readExpression(curr->condition); + curr->name = getBreakName(getU32LEB()); + if (code == BinaryConsts::BrIf) curr->condition = popExpression(); + curr->value = popExpression(); } void visitSwitch(Switch *curr) { if (debug) std::cerr << "zz node: Switch" << std::endl; - auto numCases = getInt16(); - auto numTargets = getInt16(); - for (auto i = 0; i < numCases; i++) { - Switch::Case c; - c.name = getNextLabel(); - curr->cases.push_back(c); - breakStack.push_back(c.name); - } - curr->name = getNextLabel(); - breakStack.push_back(curr->name); - for (auto i = 0; i < numTargets - 1; i++) { - curr->targets.push_back(getBreakName(getInt16())); - } - curr->default_ = getBreakName(getInt16()); - readExpression(curr->value); - for (auto i = 0; i < numCases; i++) { - readExpression(curr->cases[i].body); - } - breakStack.pop_back(); // name - for (size_t i = 0; i < curr->cases.size(); i++) { - breakStack.pop_back(); // case + auto numTargets = getU32LEB(); + for (size_t i = 0; i < numTargets; i++) { + curr->targets.push_back(getBreakName(getU32LEB())); } + curr->default_ = getBreakName(getU32LEB()); + processExpressions(); + curr->condition = popExpression(); + processExpressions(); + curr->value = popExpression(); + if (curr->value->is<Nop>()) curr->value = nullptr; } - void visitCall(Call *curr, Name target) { + void visitCall(Call *curr) { if (debug) std::cerr << "zz node: Call" << std::endl; - curr->target = target; - auto type = wasm.functionTypesMap[wasm.functionsMap[curr->target]->type]; + auto index = getU32LEB(); + auto type = functionTypes[index]; auto num = type->params.size(); + curr->operands.resize(num); for (size_t i = 0; i < num; i++) { - Expression* operand; - readExpression(operand); - curr->operands.push_back(operand); + curr->operands[num - i - 1] = popExpression(); } curr->type = type->result; + functionCalls[index].push_back(curr); } - void visitCallImport(CallImport *curr, Name target) { + void visitCallImport(CallImport *curr) { if (debug) std::cerr << "zz node: CallImport" << std::endl; - curr->target = target; + curr->target = wasm.imports[getU32LEB()]->name; + assert(wasm.importsMap.find(curr->target) != wasm.importsMap.end()); auto type = wasm.importsMap[curr->target]->type; + assert(type); auto num = type->params.size(); + if (debug) std::cerr << "zz node: CallImport " << curr->target << " with type " << type->name << " and " << num << " params\n"; + curr->operands.resize(num); for (size_t i = 0; i < num; i++) { - Expression* operand; - readExpression(operand); - curr->operands.push_back(operand); + curr->operands[num - i - 1] = popExpression(); } curr->type = type->result; } void visitCallIndirect(CallIndirect *curr) { if (debug) std::cerr << "zz node: CallIndirect" << std::endl; - curr->fullType = wasm.functionTypes[getLEB128()]; - readExpression(curr->target); + curr->fullType = wasm.functionTypes[getU32LEB()]; auto num = curr->fullType->params.size(); + curr->operands.resize(num); for (size_t i = 0; i < num; i++) { - Expression* operand; - readExpression(operand); - curr->operands.push_back(operand); + curr->operands[num - i - 1] = popExpression(); } + curr->target = popExpression(); curr->type = curr->fullType->result; } void visitGetLocal(GetLocal *curr) { if (debug) std::cerr << "zz node: GetLocal " << pos << std::endl; - curr->name = mappedLocals[getLEB128()]; + curr->name = mappedLocals[getU32LEB()]; assert(curr->name.is()); curr->type = localTypes[curr->name]; } void visitSetLocal(SetLocal *curr) { if (debug) std::cerr << "zz node: SetLocal" << std::endl; - curr->name = mappedLocals[getLEB128()]; + curr->name = mappedLocals[getU32LEB()]; assert(curr->name.is()); - readExpression(curr->value); + curr->value = popExpression(); curr->type = curr->value->type; } void readMemoryAccess(uint32_t& alignment, size_t bytes, uint32_t& offset) { - auto value = getInt8(); - alignment = value & BinaryConsts::Alignment ? 1 : bytes; - if (value & BinaryConsts::Offset) { - offset = getLEB128(); - } else { - offset = 0; - } + alignment = Pow2(getU32LEB()); + offset = getU32LEB(); } bool maybeVisitImpl(Load *curr, uint8_t code) { @@ -1479,7 +1770,7 @@ public: } if (debug) std::cerr << "zz node: Load" << std::endl; readMemoryAccess(curr->align, curr->bytes, curr->offset); - readExpression(curr->ptr); + curr->ptr = popExpression(); return true; } bool maybeVisitImpl(Store *curr, uint8_t code) { @@ -1497,15 +1788,14 @@ public: } if (debug) std::cerr << "zz node: Store" << std::endl; readMemoryAccess(curr->align, curr->bytes, curr->offset); - readExpression(curr->ptr); - readExpression(curr->value); + curr->value = popExpression(); + curr->ptr = popExpression(); return true; } bool maybeVisitImpl(Const *curr, uint8_t code) { switch (code) { - case BinaryConsts::I8Const: curr->value = Literal(int32_t(getInt8())); break; - case BinaryConsts::I32Const: curr->value = Literal(getInt32()); break; - case BinaryConsts::I64Const: curr->value = Literal(getInt64()); break; + case BinaryConsts::I32Const: curr->value = Literal(getS32LEB()); break; + case BinaryConsts::I64Const: curr->value = Literal(getS64LEB()); break; case BinaryConsts::F32Const: curr->value = Literal(getFloat32()); break; case BinaryConsts::F64Const: curr->value = Literal(getFloat64()); break; default: return false; @@ -1522,6 +1812,8 @@ public: case BinaryConsts::I64Ctz: curr->op = Ctz; curr->type = i64; break; case BinaryConsts::I32Popcnt: curr->op = Popcnt; curr->type = i32; break; case BinaryConsts::I64Popcnt: curr->op = Popcnt; curr->type = i64; break; + case BinaryConsts::I32EqZ: curr->op = EqZ; curr->type = i32; break; + case BinaryConsts::I64EqZ: curr->op = EqZ; curr->type = i64; break; case BinaryConsts::F32Neg: curr->op = Neg; curr->type = f32; break; case BinaryConsts::F64Neg: curr->op = Neg; curr->type = f64; break; case BinaryConsts::F32Abs: curr->op = Abs; curr->type = f32; break; @@ -1569,7 +1861,7 @@ public: default: return false; } if (debug) std::cerr << "zz node: Unary" << std::endl; - readExpression(curr->value); + curr->value = popExpression(); return true; } bool maybeVisitImpl(Binary *curr, uint8_t code) { @@ -1601,6 +1893,8 @@ public: INT_TYPED_CODE(Shl); INT_TYPED_CODE(ShrU); INT_TYPED_CODE(ShrS); + INT_TYPED_CODE(RotL); + INT_TYPED_CODE(RotR); FLOAT_TYPED_CODE(Div); FLOAT_TYPED_CODE(CopySign); FLOAT_TYPED_CODE(Min); @@ -1622,8 +1916,9 @@ public: default: return false; } if (debug) std::cerr << "zz node: Binary" << std::endl; - readExpression(curr->left); - readExpression(curr->right); + curr->right = popExpression(); + curr->left = popExpression(); + curr->finalize(); return true; #undef TYPED_CODE #undef INT_TYPED_CODE @@ -1631,14 +1926,14 @@ public: } void visitSelect(Select *curr) { if (debug) std::cerr << "zz node: Select" << std::endl; - readExpression(curr->ifTrue); - readExpression(curr->ifFalse); - readExpression(curr->condition); + curr->condition = popExpression(); + curr->ifFalse = popExpression(); + curr->ifTrue = popExpression(); curr->finalize(); } void visitReturn(Return *curr) { if (debug) std::cerr << "zz node: Return" << std::endl; - readExpression(curr->value); + curr->value = popExpression(); } bool maybeVisitImpl(Host *curr, uint8_t code) { switch (code) { @@ -1650,12 +1945,13 @@ public: case BinaryConsts::GrowMemory: { curr->op = GrowMemory; curr->operands.resize(1); - readExpression(curr->operands[0]); + curr->operands[0] = popExpression(); break; } default: return false; } if (debug) std::cerr << "zz node: Host" << std::endl; + curr->finalize(); return true; } void visitNop(Nop *curr) { @@ -1669,4 +1965,3 @@ public: } // namespace wasm #endif // wasm_wasm_binary_h - |