diff options
Diffstat (limited to 'src/wasm-binary.h')
-rw-r--r-- | src/wasm-binary.h | 485 |
1 files changed, 292 insertions, 193 deletions
diff --git a/src/wasm-binary.h b/src/wasm-binary.h index 0f404390b..6782eec21 100644 --- a/src/wasm-binary.h +++ b/src/wasm-binary.h @@ -30,32 +30,44 @@ namespace wasm { -struct LEB128 { - uint32_t value; +template<typename T, typename MiniT> +struct LEB { + T value; - LEB128() {} - LEB128(uint32_t value) : value(value) {} + LEB() {} + LEB(T value) : value(value) {} + + bool isSigned() { + return int(MiniT(-1)) < 0; + } + + bool hasMore(T temp, MiniT byte) { + // for signed, we must ensure the last bit has the right sign, as it will zero extend + return isSigned() ? (temp != 0 && int32_t(temp) != -1) || (value >= 0 && (byte & 64)) || (value < 0 && !(byte & 64)): temp; + } void write(std::vector<uint8_t>* out) { - uint32_t temp = value; + T temp = value; + bool more; do { uint8_t byte = temp & 127; temp >>= 7; - if (temp) { + more = hasMore(temp, byte); + if (more) { byte = byte | 128; } out->push_back(byte); - } while (temp); + } while (more); } void writeAt(std::vector<uint8_t>* out, size_t at, size_t minimum = 0) { - uint32_t temp = value; + T temp = value; size_t offset = 0; bool more; do { uint8_t byte = temp & 127; temp >>= 7; - more = temp || offset + 1 < minimum; + more = hasMore(temp, byte) || offset + 1 < minimum; if (more) { byte = byte | 128; } @@ -64,18 +76,33 @@ struct LEB128 { } while (more); } - void read(std::function<uint8_t ()> get) { + void read(std::function<MiniT ()> get) { value = 0; - uint32_t shift = 0; + T shift = 0; + MiniT byte; while (1) { - uint8_t byte = get(); - value |= ((byte & 127) << shift); + byte = get(); + value |= ((T(byte & 127)) << shift); if (!(byte & 128)) break; shift += 7; } + // if signed LEB, then we might need to sign-extend. (compile should optimize this out if not needed) + if (isSigned()) { + shift += 7; + if (byte & 64 && size_t(shift) < 8*sizeof(T)) { + // the highest bit we received was a 1, sign-extend all the rest + value = value | (T(-1) << shift); + assert(value < 0); + } + } } }; +typedef LEB<uint32_t, uint8_t> U32LEB; +typedef LEB<uint64_t, uint8_t> U64LEB; +typedef LEB<int32_t, int8_t> S32LEB; +typedef LEB<int64_t, int8_t> S64LEB; + // // We mostly stream into a buffer as we create the binary format, however, // sometimes we need to backtrack and write to a location behind us - wasm @@ -118,8 +145,23 @@ public: push_back(x & 0xff); return *this; } - BufferWithRandomAccess& operator<<(LEB128 x) { - if (debug) std::cerr << "writeLEB128: " << x.value << " (at " << size() << ")" << std::endl; + BufferWithRandomAccess& operator<<(U32LEB x) { + if (debug) std::cerr << "writeU32LEB: " << x.value << " (at " << size() << ")" << std::endl; + x.write(this); + return *this; + } + BufferWithRandomAccess& operator<<(U64LEB x) { + if (debug) std::cerr << "writeU64LEB: " << x.value << " (at " << size() << ")" << std::endl; + x.write(this); + return *this; + } + BufferWithRandomAccess& operator<<(S32LEB x) { + if (debug) std::cerr << "writeS32LEB: " << x.value << " (at " << size() << ")" << std::endl; + x.write(this); + return *this; + } + BufferWithRandomAccess& operator<<(S64LEB x) { + if (debug) std::cerr << "writeS64LEB: " << x.value << " (at " << size() << ")" << std::endl; x.write(this); return *this; } @@ -158,9 +200,9 @@ public: (*this)[i+2] = x & 0xff; x >>= 8; (*this)[i+3] = x & 0xff; } - void writeAt(size_t i, LEB128 x, size_t minimum = 0) { - if (debug) std::cerr << "backpatchLEB128: " << x.value << " (at " << i << "), minimum " << minimum << std::endl; - x.writeAt(this, i, minimum); + void writeAt(size_t i, U32LEB x) { + if (debug) std::cerr << "backpatchU32LEB: " << x.value << " (at " << i << ")" << std::endl; + x.writeAt(this, i, 5); // fill all 5 bytes, we have to do this when backpatching } template <typename T> @@ -180,6 +222,7 @@ namespace Section { auto ExportTable = "export_table"; auto DataSegments = "data_segments"; auto FunctionTable = "function_table"; + auto Names = "names"; auto End = "end"; auto Start = "start_function"; }; @@ -220,6 +263,7 @@ enum ASTNodes { I32Clz = 0x57, I32Ctz = 0x58, I32Popcnt = 0x59, + I32EqZ = 0xc0, // XXX BoolNot = 0x5a, I64Add = 0x5b, I64Sub = 0x5c, @@ -247,6 +291,7 @@ enum ASTNodes { I64Clz = 0x72, I64Ctz = 0x73, I64Popcnt = 0x74, + I64EqZ = 0xc1, // XXX F32Add = 0x75, F32Sub = 0x76, F32Mul = 0x77, @@ -342,7 +387,6 @@ enum ASTNodes { F32StoreMem = 0x35, F64StoreMem = 0x36, - I8Const = 0x09, I32Const = 0x0a, I64Const = 0x0b, F64Const = 0x0c, @@ -422,6 +466,7 @@ public: writeExports(); writeDataSegments(); writeFunctionTable(); + writeNames(); writeEnd(); finishUp(); } @@ -432,25 +477,30 @@ public: o << int32_t(10); // version number } - int32_t startSection(const char* name) { - // emit 5 bytes of 0, which we'll fill with LEB later + int32_t writeU32LEBPlaceholder() { int32_t ret = o.size(); o << int32_t(0); o << int8_t(0); + return ret; + } + + int32_t startSection(const char* name) { + // emit 5 bytes of 0, which we'll fill with LEB later + auto ret = writeU32LEBPlaceholder(); writeInlineString(name); return ret; } void finishSection(int32_t start) { int32_t size = o.size() - start - 5; // section size does not include the 5 bytes of the size field itself - o.writeAt(start, LEB128(size), 5); + o.writeAt(start, U32LEB(size)); } void writeStart() { if (!wasm->start.is()) return; if (debug) std::cerr << "== writeStart" << std::endl; auto start = startSection(BinaryConsts::Section::Start); - o << LEB128(getFunctionIndex(wasm->start.str)); + o << U32LEB(getFunctionIndex(wasm->start.str)); finishSection(start); } @@ -458,8 +508,8 @@ public: if (wasm->memory.max == 0) return; if (debug) std::cerr << "== writeMemory" << std::endl; auto start = startSection(BinaryConsts::Section::Memory); - o << LEB128(wasm->memory.initial) - << LEB128(wasm->memory.max) + o << U32LEB(wasm->memory.initial) + << U32LEB(wasm->memory.max) << int8_t(1); // export memory finishSection(start); } @@ -468,10 +518,10 @@ public: if (wasm->functionTypes.size() == 0) return; if (debug) std::cerr << "== writeSignatures" << std::endl; auto start = startSection(BinaryConsts::Section::Signatures); - o << LEB128(wasm->functionTypes.size()); + o << U32LEB(wasm->functionTypes.size()); for (auto* type : wasm->functionTypes) { if (debug) std::cerr << "write one" << std::endl; - o << LEB128(type->params.size()); + o << U32LEB(type->params.size()); o << binaryWasmType(type->result); for (auto param : type->params) { o << binaryWasmType(param); @@ -492,10 +542,10 @@ public: if (wasm->imports.size() == 0) return; if (debug) std::cerr << "== writeImports" << std::endl; auto start = startSection(BinaryConsts::Section::ImportTable); - o << LEB128(wasm->imports.size()); + o << U32LEB(wasm->imports.size()); for (auto* import : wasm->imports) { if (debug) std::cerr << "write one" << std::endl; - o << LEB128(getFunctionTypeIndex(import->type->name)); + o << U32LEB(getFunctionTypeIndex(import->type->name)); writeInlineString(import->module.str); writeInlineString(import->base.str); } @@ -546,10 +596,10 @@ public: if (wasm->functions.size() == 0) return; if (debug) std::cerr << "== writeFunctionSignatures" << std::endl; auto start = startSection(BinaryConsts::Section::FunctionSignatures); - o << LEB128(wasm->functions.size()); + o << U32LEB(wasm->functions.size()); for (auto* curr : wasm->functions) { if (debug) std::cerr << "write one" << std::endl; - o << LEB128(getFunctionTypeIndex(curr->type)); + o << U32LEB(getFunctionTypeIndex(curr->type)); } finishSection(start); } @@ -559,29 +609,26 @@ public: if (debug) std::cerr << "== writeFunctions" << std::endl; auto start = startSection(BinaryConsts::Section::Functions); size_t total = wasm->functions.size(); - o << LEB128(total); + o << U32LEB(total); for (size_t i = 0; i < total; i++) { if (debug) std::cerr << "write one at" << o.size() << std::endl; + size_t sizePos = writeU32LEBPlaceholder(); + size_t start = o.size(); Function* function = wasm->functions[i]; - Name name, type; - name = function->name; - type = function->type; mappedLocals.clear(); numLocalsByType.clear(); - if (debug) std::cerr << "writing" << name << std::endl; - o << int8_t(BinaryConsts::Named | - (BinaryConsts::Locals * (function && function->locals.size() > 0))); - emitString(name.str); + if (debug) std::cerr << "writing" << function->name << std::endl; mapLocals(function); - if (function->locals.size() > 0) { - o << uint16_t(numLocalsByType[i32]) - << uint16_t(numLocalsByType[i64]) - << uint16_t(numLocalsByType[f32]) - << uint16_t(numLocalsByType[f64]); - } - size_t sizePos = o.size(); - o << (uint32_t)0; // placeholder, we fill in the size later when we have it // XXX int32, diverge from v8 format, to get more code to compile - size_t start = o.size(); + o << U32LEB( + (numLocalsByType[i32] ? 1 : 0) + + (numLocalsByType[i64] ? 1 : 0) + + (numLocalsByType[f32] ? 1 : 0) + + (numLocalsByType[f64] ? 1 : 0) + ); + if (numLocalsByType[i32]) o << U32LEB(numLocalsByType[i32]) << binaryWasmType(i32); + if (numLocalsByType[i64]) o << U32LEB(numLocalsByType[i64]) << binaryWasmType(i64); + if (numLocalsByType[f32]) o << U32LEB(numLocalsByType[f32]) << binaryWasmType(f32); + if (numLocalsByType[f64]) o << U32LEB(numLocalsByType[f64]) << binaryWasmType(f64); depth = 0; recurse(function->body); o << int8_t(BinaryConsts::EndMarker); @@ -589,7 +636,7 @@ public: size_t size = o.size() - start; assert(size <= std::numeric_limits<uint32_t>::max()); if (debug) std::cerr << "body size: " << size << ", writing at " << sizePos << ", next starts at " << o.size() << std::endl; - o.writeAt(sizePos, uint32_t(size)); // XXX int32, diverge from v8 format, to get more code to compile + o.writeAt(sizePos, U32LEB(size)); } finishSection(start); } @@ -598,10 +645,10 @@ public: if (wasm->exports.size() == 0) return; if (debug) std::cerr << "== writeexports" << std::endl; auto start = startSection(BinaryConsts::Section::ExportTable); - o << LEB128(wasm->exports.size()); + o << U32LEB(wasm->exports.size()); for (auto* curr : wasm->exports) { if (debug) std::cerr << "write one" << std::endl; - o << LEB128(getFunctionIndex(curr->value)); + o << U32LEB(getFunctionIndex(curr->value)); writeInlineString(curr->name.str); } finishSection(start); @@ -614,10 +661,10 @@ public: if (segment.size > 0) num++; } auto start = startSection(BinaryConsts::Section::DataSegments); - o << LEB128(num); + o << U32LEB(num); for (auto& segment : wasm->memory.segments) { if (segment.size == 0) continue; - o << LEB128(segment.offset); + o << U32LEB(segment.offset); writeInlineBuffer(segment.data, segment.size); } finishSection(start); @@ -653,9 +700,21 @@ public: if (wasm->table.names.size() == 0) return; if (debug) std::cerr << "== writeFunctionTable" << std::endl; auto start = startSection(BinaryConsts::Section::FunctionTable); - o << LEB128(wasm->table.names.size()); + o << U32LEB(wasm->table.names.size()); for (auto name : wasm->table.names) { - o << LEB128(getFunctionIndex(name)); + o << U32LEB(getFunctionIndex(name)); + } + finishSection(start); + } + + void writeNames() { + if (wasm->functions.size() == 0) return; + if (debug) std::cerr << "== writeNames" << std::endl; + auto start = startSection(BinaryConsts::Section::Names); + o << U32LEB(wasm->functions.size()); + for (auto* curr : wasm->functions) { + writeInlineString(curr->name.str); + o << U32LEB(0); // TODO: locals } finishSection(start); } @@ -669,14 +728,14 @@ public: void writeInlineString(const char* name) { int32_t size = strlen(name); - o << LEB128(size); + o << U32LEB(size); for (int32_t i = 0; i < size; i++) { o << int8_t(name[i]); } } void writeInlineBuffer(const char* data, size_t size) { - o << LEB128(size); + o << U32LEB(size); for (size_t i = 0; i < size; i++) { o << int8_t(data[i]); } @@ -776,35 +835,37 @@ public: } if (curr->condition) recurse(curr->condition); o << int8_t(curr->condition ? BinaryConsts::BrIf : BinaryConsts::Br) - << int32_t(getBreakIndex(curr->name)); + << U32LEB(getBreakIndex(curr->name)); } void visitSwitch(Switch *curr) { if (debug) std::cerr << "zz node: Switch" << std::endl; - o << int8_t(BinaryConsts::TableSwitch) << int16_t(curr->targets.size() + 1) << int8_t(curr->value != nullptr); + o << int8_t(BinaryConsts::TableSwitch) << U32LEB(curr->targets.size()); for (auto target : curr->targets) { - o << (int32_t)getBreakIndex(target); + o << U32LEB(getBreakIndex(target)); } - o << (int32_t)getBreakIndex(curr->default_); + o << U32LEB(getBreakIndex(curr->default_)); recurse(curr->condition); o << int8_t(BinaryConsts::EndMarker); if (curr->value) { recurse(curr->value); - o << int8_t(BinaryConsts::EndMarker); + } else { + visitNop(nullptr); } + o << int8_t(BinaryConsts::EndMarker); } void visitCall(Call *curr) { if (debug) std::cerr << "zz node: Call" << std::endl; for (auto* operand : curr->operands) { recurse(operand); } - o << int8_t(BinaryConsts::CallFunction) << LEB128(getFunctionIndex(curr->target)); + o << int8_t(BinaryConsts::CallFunction) << U32LEB(getFunctionIndex(curr->target)); } void visitCallImport(CallImport *curr) { if (debug) std::cerr << "zz node: CallImport" << std::endl; for (auto* operand : curr->operands) { recurse(operand); } - o << int8_t(BinaryConsts::CallImport) << LEB128(getImportIndex(curr->target)); + o << int8_t(BinaryConsts::CallImport) << U32LEB(getImportIndex(curr->target)); } void visitCallIndirect(CallIndirect *curr) { if (debug) std::cerr << "zz node: CallIndirect" << std::endl; @@ -812,22 +873,21 @@ public: for (auto* operand : curr->operands) { recurse(operand); } - o << int8_t(BinaryConsts::CallIndirect) << LEB128(getFunctionTypeIndex(curr->fullType->name)); + o << int8_t(BinaryConsts::CallIndirect) << U32LEB(getFunctionTypeIndex(curr->fullType->name)); } void visitGetLocal(GetLocal *curr) { if (debug) std::cerr << "zz node: GetLocal " << (o.size() + 1) << std::endl; - o << int8_t(BinaryConsts::GetLocal) << LEB128(mappedLocals[curr->name]); + o << int8_t(BinaryConsts::GetLocal) << U32LEB(mappedLocals[curr->name]); } void visitSetLocal(SetLocal *curr) { if (debug) std::cerr << "zz node: SetLocal" << std::endl; recurse(curr->value); - o << int8_t(BinaryConsts::SetLocal) << LEB128(mappedLocals[curr->name]); + o << int8_t(BinaryConsts::SetLocal) << U32LEB(mappedLocals[curr->name]); } void emitMemoryAccess(size_t alignment, size_t bytes, uint32_t offset) { - o << int8_t( ((alignment == bytes || alignment == 0) ? BinaryConsts::NaturalAlignment : BinaryConsts::Alignment) | - (offset ? BinaryConsts::Offset : 0) ); - if (offset) o << LEB128(offset); + o << U32LEB(Log2(alignment ? alignment : bytes)); + o << U32LEB(offset); } void visitLoad(Load *curr) { @@ -893,16 +953,11 @@ public: if (debug) std::cerr << "zz node: Const" << curr << " : " << curr->type << std::endl; switch (curr->type) { case i32: { - uint32_t value = curr->value.geti32(); - if (value <= 255) { - o << int8_t(BinaryConsts::I8Const) << uint8_t(value); - break; - } - o << int8_t(BinaryConsts::I32Const) << value; + o << int8_t(BinaryConsts::I32Const) << S32LEB(curr->value.geti32()); break; } case i64: { - o << int8_t(BinaryConsts::I64Const) << curr->value.geti64(); + o << int8_t(BinaryConsts::I64Const) << S64LEB(curr->value.geti64()); break; } case f32: { @@ -924,6 +979,7 @@ public: case Clz: o << int8_t(curr->type == i32 ? BinaryConsts::I32Clz : BinaryConsts::I64Clz); break; case Ctz: o << int8_t(curr->type == i32 ? BinaryConsts::I32Ctz : BinaryConsts::I64Ctz); break; case Popcnt: o << int8_t(curr->type == i32 ? BinaryConsts::I32Popcnt : BinaryConsts::I64Popcnt); break; + case EqZ: o << int8_t(curr->type == i32 ? BinaryConsts::I32EqZ : BinaryConsts::I64EqZ); break; case Neg: o << int8_t(curr->type == f32 ? BinaryConsts::F32Neg : BinaryConsts::F64Neg); break; case Abs: o << int8_t(curr->type == f32 ? BinaryConsts::F32Abs : BinaryConsts::F64Abs); break; case Ceil: o << int8_t(curr->type == f32 ? BinaryConsts::F32Ceil : BinaryConsts::F64Ceil); break; @@ -1067,10 +1123,11 @@ class WasmBinaryBuilder { std::vector<char>& input; bool debug; - size_t pos; + size_t pos = 0; + int32_t startIndex = -1; public: - WasmBinaryBuilder(AllocatingModule& wasm, std::vector<char>& input, bool debug) : wasm(wasm), allocator(wasm.allocator), input(input), debug(debug), pos(0) {} + WasmBinaryBuilder(AllocatingModule& wasm, std::vector<char>& input, bool debug) : wasm(wasm), allocator(wasm.allocator), input(input), debug(debug) {} void read() { @@ -1078,9 +1135,9 @@ public: // read sections until the end while (more()) { - auto sectionSize = getLEB128(); + auto sectionSize = getU32LEB(); assert(sectionSize < pos + input.size()); - auto nameSize = getLEB128(); + auto nameSize = getU32LEB(); auto match = [&](const char* name) { for (size_t i = 0; i < nameSize; i++) { if (pos + i >= input.size()) return false; @@ -1100,6 +1157,7 @@ public: else if (match(BinaryConsts::Section::ExportTable)) readExports(); else if (match(BinaryConsts::Section::DataSegments)) readDataSegments(); else if (match(BinaryConsts::Section::FunctionTable)) readFunctionTable(); + else if (match(BinaryConsts::Section::Names)) readNames(); else if (match(BinaryConsts::Section::End)) { if (debug) std::cerr << "== readEnd" << std::endl; break; @@ -1151,13 +1209,40 @@ public: return ret; } - uint32_t getLEB128() { + uint32_t getU32LEB() { + if (debug) std::cerr << "<==" << std::endl; + U32LEB ret; + ret.read([&]() { + return getInt8(); + }); + if (debug) std::cerr << "getU32LEB: " << ret.value << " ==>" << std::endl; + return ret.value; + } + uint64_t getU64LEB() { if (debug) std::cerr << "<==" << std::endl; - LEB128 ret; + U64LEB ret; ret.read([&]() { return getInt8(); }); - if (debug) std::cerr << "getLEB128: " << ret.value << " ==>" << std::endl; + if (debug) std::cerr << "getU64LEB: " << ret.value << " ==>" << std::endl; + return ret.value; + } + int32_t getS32LEB() { + if (debug) std::cerr << "<==" << std::endl; + S32LEB ret; + ret.read([&]() { + return (int8_t)getInt8(); + }); + if (debug) std::cerr << "getU32LEB: " << ret.value << " ==>" << std::endl; + return ret.value; + } + int64_t getS64LEB() { + if (debug) std::cerr << "<==" << std::endl; + S64LEB ret; + ret.read([&]() { + return (int8_t)getInt8(); + }); + if (debug) std::cerr << "getU64LEB: " << ret.value << " ==>" << std::endl; return ret.value; } WasmType getWasmType() { @@ -1182,7 +1267,7 @@ public: Name getInlineString() { if (debug) std::cerr << "<==" << std::endl; - auto len = getLEB128(); + auto len = getU32LEB(); std::string str; for (size_t i = 0; i < len; i++) { str = str + char(getInt8()); @@ -1230,24 +1315,24 @@ public: void readStart() { if (debug) std::cerr << "== readStart" << std::endl; - wasm.start = wasm.functions[getLEB128()]->name; + startIndex = getU32LEB(); } void readMemory() { if (debug) std::cerr << "== readMemory" << std::endl; - wasm.memory.initial = getLEB128(); - wasm.memory.max = getLEB128(); + wasm.memory.initial = getU32LEB(); + wasm.memory.max = getU32LEB(); verifyInt8(1); // export memory } void readSignatures() { if (debug) std::cerr << "== readSignatures" << std::endl; - size_t numTypes = getLEB128(); + size_t numTypes = getU32LEB(); if (debug) std::cerr << "num: " << numTypes << std::endl; for (size_t i = 0; i < numTypes; i++) { if (debug) std::cerr << "read one" << std::endl; auto curr = allocator.alloc<FunctionType>(); - size_t numParams = getLEB128(); + size_t numParams = getU32LEB(); if (debug) std::cerr << "num params: " << numParams << std::endl; curr->result = getWasmType(); for (size_t j = 0; j < numParams; j++) { @@ -1259,13 +1344,13 @@ public: void readImports() { if (debug) std::cerr << "== readImports" << std::endl; - size_t num = getLEB128(); + size_t num = getU32LEB(); if (debug) std::cerr << "num: " << num << std::endl; for (size_t i = 0; i < num; i++) { if (debug) std::cerr << "read one" << std::endl; auto curr = allocator.alloc<Import>(); curr->name = Name(std::string("import$") + std::to_string(i)); - auto index = getLEB128(); + auto index = getU32LEB(); assert(index < wasm.functionTypes.size()); curr->type = wasm.functionTypes[index]; assert(curr->type->name.is()); @@ -1279,11 +1364,11 @@ public: void readFunctionSignatures() { if (debug) std::cerr << "== readFunctionSignatures" << std::endl; - size_t num = getLEB128(); + size_t num = getU32LEB(); if (debug) std::cerr << "num: " << num << std::endl; for (size_t i = 0; i < num; i++) { if (debug) std::cerr << "read one" << std::endl; - auto index = getLEB128(); + auto index = getU32LEB(); assert(index < wasm.functionTypes.size()); functionTypes.push_back(wasm.functionTypes[index]); } @@ -1295,20 +1380,21 @@ public: return cashew::IString(("label$" + std::to_string(nextLabel++)).c_str(), false); } + // We read functions before we know their names, so we need to backpatch the names later + + std::vector<Function*> functions; // we store functions here before wasm.addFunction after we know their names + std::map<size_t, std::vector<Call*>> functionCalls; // at index i we have all calls to i + void readFunctions() { if (debug) std::cerr << "== readFunctions" << std::endl; - size_t total = getLEB128(); + size_t total = getU32LEB(); for (size_t i = 0; i < total; i++) { if (debug) std::cerr << "read one at " << pos << std::endl; - auto data = getInt8(); + size_t size = getU32LEB(); + assert(size > 0); // we could also check it matches the seen size auto type = functionTypes[i]; - bool named = data & BinaryConsts::Named; - assert(named); - bool locals = data & BinaryConsts::Locals; - Name name = getString(); - if (debug) std::cerr << "reading" << name << std::endl; + if (debug) std::cerr << "reading" << i << std::endl; auto func = allocator.alloc<Function>(); - func->name = name; func->type = type->name; func->result = type->result; size_t nextVar = 0; @@ -1319,53 +1405,61 @@ public: for (size_t j = 0; j < type->params.size(); j++) { func->params.emplace_back(addVar(), type->params[j]); } - if (locals) { - auto addLocals = [&](WasmType type) { - int16_t num = getInt16(); - while (num > 0) { - func->locals.emplace_back(addVar(), type); - num--; - } - }; - addLocals(i32); - addLocals(i64); - addLocals(f32); - addLocals(f64); + size_t numLocalTypes = getU32LEB(); + for (size_t t = 0; t < numLocalTypes; t++) { + auto num = getU32LEB(); + auto type = getWasmType(); + while (num > 0) { + func->locals.emplace_back(addVar(), type); + num--; + } } - size_t size = getInt32(); // XXX int32, diverge from v8 format, to get more code to compile - // we can't read the function yet - it might call other functions that are defined later, - // and we do depend on the function type. - functions.emplace_back(func, pos, size); - pos += size; - func->body = nullptr; // will be filled later. but we do have the name and the type already. - wasm.addFunction(func); + { + // process the function body + if (debug) std::cerr << "processing function: " << i << std::endl; + nextLabel = 0; + // prepare locals + mappedLocals.clear(); + localTypes.clear(); + for (size_t i = 0; i < func->params.size(); i++) { + mappedLocals.push_back(func->params[i].name); + localTypes[func->params[i].name] = func->params[i].type; + } + for (size_t i = 0; i < func->locals.size(); i++) { + mappedLocals.push_back(func->locals[i].name); + localTypes[func->locals[i].name] = func->locals[i].type; + } + // process body + assert(breakStack.empty()); + assert(expressionStack.empty()); + depth = 0; + processExpressions(); + assert(expressionStack.size() == 1); + func->body = popExpression(); + assert(depth == 0); + assert(breakStack.empty()); + assert(expressionStack.empty()); + } + functions.push_back(func); } } + std::map<Export*, size_t> exportIndexes; + void readExports() { if (debug) std::cerr << "== readExports" << std::endl; - size_t num = getLEB128(); + size_t num = getU32LEB(); if (debug) std::cerr << "num: " << num << std::endl; for (size_t i = 0; i < num; i++) { if (debug) std::cerr << "read one" << std::endl; auto curr = allocator.alloc<Export>(); - auto index = getLEB128(); - assert(index < wasm.functions.size()); - curr->value = wasm.functions[index]->name; - assert(curr->value.is()); + auto index = getU32LEB(); + assert(index < functionTypes.size()); curr->name = getInlineString(); - wasm.addExport(curr); + exportIndexes[curr] = index; } } - struct FunctionData { - Function* func; - size_t pos, size; - FunctionData(Function* func, size_t pos, size_t size) : func(func), pos(pos), size(size) {} - }; - - std::vector<FunctionData> functions; - std::vector<Name> mappedLocals; // index => local name std::map<Name, WasmType> localTypes; // TODO: optimize @@ -1391,42 +1485,41 @@ public: void processFunctions() { for (auto& func : functions) { - Function* curr = func.func; - if (debug) std::cerr << "processing function: " << curr->name << std::endl; - pos = func.pos; - nextLabel = 0; - // prepare locals - mappedLocals.clear(); - localTypes.clear(); - for (size_t i = 0; i < curr->params.size(); i++) { - mappedLocals.push_back(curr->params[i].name); - localTypes[curr->params[i].name] = curr->params[i].type; - } - for (size_t i = 0; i < curr->locals.size(); i++) { - mappedLocals.push_back(curr->locals[i].name); - localTypes[curr->locals[i].name] = curr->locals[i].type; + wasm.addFunction(func); + } + // now that we have names for each function, apply things + + if (startIndex >= 0) { + wasm.start = wasm.functions[startIndex]->name; + } + + for (auto& iter : exportIndexes) { + Export* curr = iter.first; + curr->value = wasm.functions[iter.second]->name; + wasm.addExport(curr); + } + + for (auto& iter : functionCalls) { + size_t index = iter.first; + auto& calls = iter.second; + for (auto* call : calls) { + call->target = wasm.functions[index]->name; } - // process body - assert(breakStack.empty()); - assert(expressionStack.empty()); - depth = 0; - processExpressions(); - assert(expressionStack.size() == 1); - curr->body = popExpression(); - assert(depth == 0); - assert(breakStack.empty()); - assert(expressionStack.empty()); - assert(pos == func.pos + func.size); + } + + for (size_t index : functionTable) { + assert(index < wasm.functions.size()); + wasm.table.names.push_back(wasm.functions[index]->name); } } void readDataSegments() { if (debug) std::cerr << "== readDataSegments" << std::endl; - auto num = getLEB128(); + auto num = getU32LEB(); for (size_t i = 0; i < num; i++) { Memory::Segment curr; - curr.offset = getLEB128(); - auto size = getLEB128(); + curr.offset = getU32LEB(); + auto size = getU32LEB(); auto buffer = (char*)malloc(size); for (size_t j = 0; j < size; j++) { buffer[j] = char(getInt8()); @@ -1437,13 +1530,24 @@ public: } } + std::vector<size_t> functionTable; + void readFunctionTable() { if (debug) std::cerr << "== readFunctionTable" << std::endl; - auto num = getLEB128(); + auto num = getU32LEB(); for (size_t i = 0; i < num; i++) { - auto index = getLEB128(); - assert(index < wasm.functions.size()); - wasm.table.names.push_back(wasm.functions[index]->name); + auto index = getU32LEB(); + functionTable.push_back(index); + } + } + + void readNames() { + if (debug) std::cerr << "== readNames" << std::endl; + auto num = getU32LEB(); + for (size_t i = 0; i < num; i++) { + functions[i]->name = getInlineString(); + auto numLocals = getU32LEB(); + assert(numLocals == 0); // TODO } } @@ -1573,39 +1677,38 @@ public: void visitBreak(Break *curr, uint8_t code) { if (debug) std::cerr << "zz node: Break" << std::endl; - curr->name = getBreakName(getInt32()); + curr->name = getBreakName(getU32LEB()); if (code == BinaryConsts::BrIf) curr->condition = popExpression(); curr->value = popExpression(); } void visitSwitch(Switch *curr) { if (debug) std::cerr << "zz node: Switch" << std::endl; - auto numTargets = getInt16(); - auto hasValue = getInt8(); - for (auto i = 0; i < numTargets - 1; i++) { - curr->targets.push_back(getBreakName(getInt32())); + auto numTargets = getU32LEB(); + for (size_t i = 0; i < numTargets; i++) { + curr->targets.push_back(getBreakName(getU32LEB())); } - curr->default_ = getBreakName(getInt32()); + curr->default_ = getBreakName(getU32LEB()); processExpressions(); curr->condition = popExpression(); - if (hasValue) { - processExpressions(); - curr->value = popExpression(); - } + processExpressions(); + curr->value = popExpression(); + if (curr->value->is<Nop>()) curr->value = nullptr; } void visitCall(Call *curr) { if (debug) std::cerr << "zz node: Call" << std::endl; - curr->target = wasm.functions[getLEB128()]->name; - auto type = wasm.functionTypesMap[wasm.functionsMap[curr->target]->type]; + auto index = getU32LEB(); + auto type = functionTypes[index]; auto num = type->params.size(); curr->operands.resize(num); for (size_t i = 0; i < num; i++) { curr->operands[num - i - 1] = popExpression(); } curr->type = type->result; + functionCalls[index].push_back(curr); } void visitCallImport(CallImport *curr) { if (debug) std::cerr << "zz node: CallImport" << std::endl; - curr->target = wasm.imports[getLEB128()]->name; + curr->target = wasm.imports[getU32LEB()]->name; assert(wasm.importsMap.find(curr->target) != wasm.importsMap.end()); auto type = wasm.importsMap[curr->target]->type; assert(type); @@ -1619,7 +1722,7 @@ public: } void visitCallIndirect(CallIndirect *curr) { if (debug) std::cerr << "zz node: CallIndirect" << std::endl; - curr->fullType = wasm.functionTypes[getLEB128()]; + curr->fullType = wasm.functionTypes[getU32LEB()]; auto num = curr->fullType->params.size(); curr->operands.resize(num); for (size_t i = 0; i < num; i++) { @@ -1630,26 +1733,21 @@ public: } void visitGetLocal(GetLocal *curr) { if (debug) std::cerr << "zz node: GetLocal " << pos << std::endl; - curr->name = mappedLocals[getLEB128()]; + curr->name = mappedLocals[getU32LEB()]; assert(curr->name.is()); curr->type = localTypes[curr->name]; } void visitSetLocal(SetLocal *curr) { if (debug) std::cerr << "zz node: SetLocal" << std::endl; - curr->name = mappedLocals[getLEB128()]; + curr->name = mappedLocals[getU32LEB()]; assert(curr->name.is()); curr->value = popExpression(); curr->type = curr->value->type; } void readMemoryAccess(uint32_t& alignment, size_t bytes, uint32_t& offset) { - auto value = getInt8(); - alignment = value & BinaryConsts::Alignment ? 1 : bytes; - if (value & BinaryConsts::Offset) { - offset = getLEB128(); - } else { - offset = 0; - } + alignment = Pow2(getU32LEB()); + offset = getU32LEB(); } bool maybeVisitImpl(Load *curr, uint8_t code) { @@ -1696,9 +1794,8 @@ public: } bool maybeVisitImpl(Const *curr, uint8_t code) { switch (code) { - case BinaryConsts::I8Const: curr->value = Literal(int32_t(getInt8())); break; - case BinaryConsts::I32Const: curr->value = Literal(getInt32()); break; - case BinaryConsts::I64Const: curr->value = Literal(getInt64()); break; + case BinaryConsts::I32Const: curr->value = Literal(getS32LEB()); break; + case BinaryConsts::I64Const: curr->value = Literal(getS64LEB()); break; case BinaryConsts::F32Const: curr->value = Literal(getFloat32()); break; case BinaryConsts::F64Const: curr->value = Literal(getFloat64()); break; default: return false; @@ -1715,6 +1812,8 @@ public: case BinaryConsts::I64Ctz: curr->op = Ctz; curr->type = i64; break; case BinaryConsts::I32Popcnt: curr->op = Popcnt; curr->type = i32; break; case BinaryConsts::I64Popcnt: curr->op = Popcnt; curr->type = i64; break; + case BinaryConsts::I32EqZ: curr->op = EqZ; curr->type = i32; break; + case BinaryConsts::I64EqZ: curr->op = EqZ; curr->type = i64; break; case BinaryConsts::F32Neg: curr->op = Neg; curr->type = f32; break; case BinaryConsts::F64Neg: curr->op = Neg; curr->type = f64; break; case BinaryConsts::F32Abs: curr->op = Abs; curr->type = f32; break; |