summaryrefslogtreecommitdiff
path: root/src/wasm-binary.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/wasm-binary.h')
-rw-r--r--src/wasm-binary.h485
1 files changed, 292 insertions, 193 deletions
diff --git a/src/wasm-binary.h b/src/wasm-binary.h
index 0f404390b..6782eec21 100644
--- a/src/wasm-binary.h
+++ b/src/wasm-binary.h
@@ -30,32 +30,44 @@
namespace wasm {
-struct LEB128 {
- uint32_t value;
+template<typename T, typename MiniT>
+struct LEB {
+ T value;
- LEB128() {}
- LEB128(uint32_t value) : value(value) {}
+ LEB() {}
+ LEB(T value) : value(value) {}
+
+ bool isSigned() {
+ return int(MiniT(-1)) < 0;
+ }
+
+ bool hasMore(T temp, MiniT byte) {
+ // for signed, we must ensure the last bit has the right sign, as it will zero extend
+ return isSigned() ? (temp != 0 && int32_t(temp) != -1) || (value >= 0 && (byte & 64)) || (value < 0 && !(byte & 64)): temp;
+ }
void write(std::vector<uint8_t>* out) {
- uint32_t temp = value;
+ T temp = value;
+ bool more;
do {
uint8_t byte = temp & 127;
temp >>= 7;
- if (temp) {
+ more = hasMore(temp, byte);
+ if (more) {
byte = byte | 128;
}
out->push_back(byte);
- } while (temp);
+ } while (more);
}
void writeAt(std::vector<uint8_t>* out, size_t at, size_t minimum = 0) {
- uint32_t temp = value;
+ T temp = value;
size_t offset = 0;
bool more;
do {
uint8_t byte = temp & 127;
temp >>= 7;
- more = temp || offset + 1 < minimum;
+ more = hasMore(temp, byte) || offset + 1 < minimum;
if (more) {
byte = byte | 128;
}
@@ -64,18 +76,33 @@ struct LEB128 {
} while (more);
}
- void read(std::function<uint8_t ()> get) {
+ void read(std::function<MiniT ()> get) {
value = 0;
- uint32_t shift = 0;
+ T shift = 0;
+ MiniT byte;
while (1) {
- uint8_t byte = get();
- value |= ((byte & 127) << shift);
+ byte = get();
+ value |= ((T(byte & 127)) << shift);
if (!(byte & 128)) break;
shift += 7;
}
+ // if signed LEB, then we might need to sign-extend. (compile should optimize this out if not needed)
+ if (isSigned()) {
+ shift += 7;
+ if (byte & 64 && size_t(shift) < 8*sizeof(T)) {
+ // the highest bit we received was a 1, sign-extend all the rest
+ value = value | (T(-1) << shift);
+ assert(value < 0);
+ }
+ }
}
};
+typedef LEB<uint32_t, uint8_t> U32LEB;
+typedef LEB<uint64_t, uint8_t> U64LEB;
+typedef LEB<int32_t, int8_t> S32LEB;
+typedef LEB<int64_t, int8_t> S64LEB;
+
//
// We mostly stream into a buffer as we create the binary format, however,
// sometimes we need to backtrack and write to a location behind us - wasm
@@ -118,8 +145,23 @@ public:
push_back(x & 0xff);
return *this;
}
- BufferWithRandomAccess& operator<<(LEB128 x) {
- if (debug) std::cerr << "writeLEB128: " << x.value << " (at " << size() << ")" << std::endl;
+ BufferWithRandomAccess& operator<<(U32LEB x) {
+ if (debug) std::cerr << "writeU32LEB: " << x.value << " (at " << size() << ")" << std::endl;
+ x.write(this);
+ return *this;
+ }
+ BufferWithRandomAccess& operator<<(U64LEB x) {
+ if (debug) std::cerr << "writeU64LEB: " << x.value << " (at " << size() << ")" << std::endl;
+ x.write(this);
+ return *this;
+ }
+ BufferWithRandomAccess& operator<<(S32LEB x) {
+ if (debug) std::cerr << "writeS32LEB: " << x.value << " (at " << size() << ")" << std::endl;
+ x.write(this);
+ return *this;
+ }
+ BufferWithRandomAccess& operator<<(S64LEB x) {
+ if (debug) std::cerr << "writeS64LEB: " << x.value << " (at " << size() << ")" << std::endl;
x.write(this);
return *this;
}
@@ -158,9 +200,9 @@ public:
(*this)[i+2] = x & 0xff; x >>= 8;
(*this)[i+3] = x & 0xff;
}
- void writeAt(size_t i, LEB128 x, size_t minimum = 0) {
- if (debug) std::cerr << "backpatchLEB128: " << x.value << " (at " << i << "), minimum " << minimum << std::endl;
- x.writeAt(this, i, minimum);
+ void writeAt(size_t i, U32LEB x) {
+ if (debug) std::cerr << "backpatchU32LEB: " << x.value << " (at " << i << ")" << std::endl;
+ x.writeAt(this, i, 5); // fill all 5 bytes, we have to do this when backpatching
}
template <typename T>
@@ -180,6 +222,7 @@ namespace Section {
auto ExportTable = "export_table";
auto DataSegments = "data_segments";
auto FunctionTable = "function_table";
+ auto Names = "names";
auto End = "end";
auto Start = "start_function";
};
@@ -220,6 +263,7 @@ enum ASTNodes {
I32Clz = 0x57,
I32Ctz = 0x58,
I32Popcnt = 0x59,
+ I32EqZ = 0xc0, // XXX
BoolNot = 0x5a,
I64Add = 0x5b,
I64Sub = 0x5c,
@@ -247,6 +291,7 @@ enum ASTNodes {
I64Clz = 0x72,
I64Ctz = 0x73,
I64Popcnt = 0x74,
+ I64EqZ = 0xc1, // XXX
F32Add = 0x75,
F32Sub = 0x76,
F32Mul = 0x77,
@@ -342,7 +387,6 @@ enum ASTNodes {
F32StoreMem = 0x35,
F64StoreMem = 0x36,
- I8Const = 0x09,
I32Const = 0x0a,
I64Const = 0x0b,
F64Const = 0x0c,
@@ -422,6 +466,7 @@ public:
writeExports();
writeDataSegments();
writeFunctionTable();
+ writeNames();
writeEnd();
finishUp();
}
@@ -432,25 +477,30 @@ public:
o << int32_t(10); // version number
}
- int32_t startSection(const char* name) {
- // emit 5 bytes of 0, which we'll fill with LEB later
+ int32_t writeU32LEBPlaceholder() {
int32_t ret = o.size();
o << int32_t(0);
o << int8_t(0);
+ return ret;
+ }
+
+ int32_t startSection(const char* name) {
+ // emit 5 bytes of 0, which we'll fill with LEB later
+ auto ret = writeU32LEBPlaceholder();
writeInlineString(name);
return ret;
}
void finishSection(int32_t start) {
int32_t size = o.size() - start - 5; // section size does not include the 5 bytes of the size field itself
- o.writeAt(start, LEB128(size), 5);
+ o.writeAt(start, U32LEB(size));
}
void writeStart() {
if (!wasm->start.is()) return;
if (debug) std::cerr << "== writeStart" << std::endl;
auto start = startSection(BinaryConsts::Section::Start);
- o << LEB128(getFunctionIndex(wasm->start.str));
+ o << U32LEB(getFunctionIndex(wasm->start.str));
finishSection(start);
}
@@ -458,8 +508,8 @@ public:
if (wasm->memory.max == 0) return;
if (debug) std::cerr << "== writeMemory" << std::endl;
auto start = startSection(BinaryConsts::Section::Memory);
- o << LEB128(wasm->memory.initial)
- << LEB128(wasm->memory.max)
+ o << U32LEB(wasm->memory.initial)
+ << U32LEB(wasm->memory.max)
<< int8_t(1); // export memory
finishSection(start);
}
@@ -468,10 +518,10 @@ public:
if (wasm->functionTypes.size() == 0) return;
if (debug) std::cerr << "== writeSignatures" << std::endl;
auto start = startSection(BinaryConsts::Section::Signatures);
- o << LEB128(wasm->functionTypes.size());
+ o << U32LEB(wasm->functionTypes.size());
for (auto* type : wasm->functionTypes) {
if (debug) std::cerr << "write one" << std::endl;
- o << LEB128(type->params.size());
+ o << U32LEB(type->params.size());
o << binaryWasmType(type->result);
for (auto param : type->params) {
o << binaryWasmType(param);
@@ -492,10 +542,10 @@ public:
if (wasm->imports.size() == 0) return;
if (debug) std::cerr << "== writeImports" << std::endl;
auto start = startSection(BinaryConsts::Section::ImportTable);
- o << LEB128(wasm->imports.size());
+ o << U32LEB(wasm->imports.size());
for (auto* import : wasm->imports) {
if (debug) std::cerr << "write one" << std::endl;
- o << LEB128(getFunctionTypeIndex(import->type->name));
+ o << U32LEB(getFunctionTypeIndex(import->type->name));
writeInlineString(import->module.str);
writeInlineString(import->base.str);
}
@@ -546,10 +596,10 @@ public:
if (wasm->functions.size() == 0) return;
if (debug) std::cerr << "== writeFunctionSignatures" << std::endl;
auto start = startSection(BinaryConsts::Section::FunctionSignatures);
- o << LEB128(wasm->functions.size());
+ o << U32LEB(wasm->functions.size());
for (auto* curr : wasm->functions) {
if (debug) std::cerr << "write one" << std::endl;
- o << LEB128(getFunctionTypeIndex(curr->type));
+ o << U32LEB(getFunctionTypeIndex(curr->type));
}
finishSection(start);
}
@@ -559,29 +609,26 @@ public:
if (debug) std::cerr << "== writeFunctions" << std::endl;
auto start = startSection(BinaryConsts::Section::Functions);
size_t total = wasm->functions.size();
- o << LEB128(total);
+ o << U32LEB(total);
for (size_t i = 0; i < total; i++) {
if (debug) std::cerr << "write one at" << o.size() << std::endl;
+ size_t sizePos = writeU32LEBPlaceholder();
+ size_t start = o.size();
Function* function = wasm->functions[i];
- Name name, type;
- name = function->name;
- type = function->type;
mappedLocals.clear();
numLocalsByType.clear();
- if (debug) std::cerr << "writing" << name << std::endl;
- o << int8_t(BinaryConsts::Named |
- (BinaryConsts::Locals * (function && function->locals.size() > 0)));
- emitString(name.str);
+ if (debug) std::cerr << "writing" << function->name << std::endl;
mapLocals(function);
- if (function->locals.size() > 0) {
- o << uint16_t(numLocalsByType[i32])
- << uint16_t(numLocalsByType[i64])
- << uint16_t(numLocalsByType[f32])
- << uint16_t(numLocalsByType[f64]);
- }
- size_t sizePos = o.size();
- o << (uint32_t)0; // placeholder, we fill in the size later when we have it // XXX int32, diverge from v8 format, to get more code to compile
- size_t start = o.size();
+ o << U32LEB(
+ (numLocalsByType[i32] ? 1 : 0) +
+ (numLocalsByType[i64] ? 1 : 0) +
+ (numLocalsByType[f32] ? 1 : 0) +
+ (numLocalsByType[f64] ? 1 : 0)
+ );
+ if (numLocalsByType[i32]) o << U32LEB(numLocalsByType[i32]) << binaryWasmType(i32);
+ if (numLocalsByType[i64]) o << U32LEB(numLocalsByType[i64]) << binaryWasmType(i64);
+ if (numLocalsByType[f32]) o << U32LEB(numLocalsByType[f32]) << binaryWasmType(f32);
+ if (numLocalsByType[f64]) o << U32LEB(numLocalsByType[f64]) << binaryWasmType(f64);
depth = 0;
recurse(function->body);
o << int8_t(BinaryConsts::EndMarker);
@@ -589,7 +636,7 @@ public:
size_t size = o.size() - start;
assert(size <= std::numeric_limits<uint32_t>::max());
if (debug) std::cerr << "body size: " << size << ", writing at " << sizePos << ", next starts at " << o.size() << std::endl;
- o.writeAt(sizePos, uint32_t(size)); // XXX int32, diverge from v8 format, to get more code to compile
+ o.writeAt(sizePos, U32LEB(size));
}
finishSection(start);
}
@@ -598,10 +645,10 @@ public:
if (wasm->exports.size() == 0) return;
if (debug) std::cerr << "== writeexports" << std::endl;
auto start = startSection(BinaryConsts::Section::ExportTable);
- o << LEB128(wasm->exports.size());
+ o << U32LEB(wasm->exports.size());
for (auto* curr : wasm->exports) {
if (debug) std::cerr << "write one" << std::endl;
- o << LEB128(getFunctionIndex(curr->value));
+ o << U32LEB(getFunctionIndex(curr->value));
writeInlineString(curr->name.str);
}
finishSection(start);
@@ -614,10 +661,10 @@ public:
if (segment.size > 0) num++;
}
auto start = startSection(BinaryConsts::Section::DataSegments);
- o << LEB128(num);
+ o << U32LEB(num);
for (auto& segment : wasm->memory.segments) {
if (segment.size == 0) continue;
- o << LEB128(segment.offset);
+ o << U32LEB(segment.offset);
writeInlineBuffer(segment.data, segment.size);
}
finishSection(start);
@@ -653,9 +700,21 @@ public:
if (wasm->table.names.size() == 0) return;
if (debug) std::cerr << "== writeFunctionTable" << std::endl;
auto start = startSection(BinaryConsts::Section::FunctionTable);
- o << LEB128(wasm->table.names.size());
+ o << U32LEB(wasm->table.names.size());
for (auto name : wasm->table.names) {
- o << LEB128(getFunctionIndex(name));
+ o << U32LEB(getFunctionIndex(name));
+ }
+ finishSection(start);
+ }
+
+ void writeNames() {
+ if (wasm->functions.size() == 0) return;
+ if (debug) std::cerr << "== writeNames" << std::endl;
+ auto start = startSection(BinaryConsts::Section::Names);
+ o << U32LEB(wasm->functions.size());
+ for (auto* curr : wasm->functions) {
+ writeInlineString(curr->name.str);
+ o << U32LEB(0); // TODO: locals
}
finishSection(start);
}
@@ -669,14 +728,14 @@ public:
void writeInlineString(const char* name) {
int32_t size = strlen(name);
- o << LEB128(size);
+ o << U32LEB(size);
for (int32_t i = 0; i < size; i++) {
o << int8_t(name[i]);
}
}
void writeInlineBuffer(const char* data, size_t size) {
- o << LEB128(size);
+ o << U32LEB(size);
for (size_t i = 0; i < size; i++) {
o << int8_t(data[i]);
}
@@ -776,35 +835,37 @@ public:
}
if (curr->condition) recurse(curr->condition);
o << int8_t(curr->condition ? BinaryConsts::BrIf : BinaryConsts::Br)
- << int32_t(getBreakIndex(curr->name));
+ << U32LEB(getBreakIndex(curr->name));
}
void visitSwitch(Switch *curr) {
if (debug) std::cerr << "zz node: Switch" << std::endl;
- o << int8_t(BinaryConsts::TableSwitch) << int16_t(curr->targets.size() + 1) << int8_t(curr->value != nullptr);
+ o << int8_t(BinaryConsts::TableSwitch) << U32LEB(curr->targets.size());
for (auto target : curr->targets) {
- o << (int32_t)getBreakIndex(target);
+ o << U32LEB(getBreakIndex(target));
}
- o << (int32_t)getBreakIndex(curr->default_);
+ o << U32LEB(getBreakIndex(curr->default_));
recurse(curr->condition);
o << int8_t(BinaryConsts::EndMarker);
if (curr->value) {
recurse(curr->value);
- o << int8_t(BinaryConsts::EndMarker);
+ } else {
+ visitNop(nullptr);
}
+ o << int8_t(BinaryConsts::EndMarker);
}
void visitCall(Call *curr) {
if (debug) std::cerr << "zz node: Call" << std::endl;
for (auto* operand : curr->operands) {
recurse(operand);
}
- o << int8_t(BinaryConsts::CallFunction) << LEB128(getFunctionIndex(curr->target));
+ o << int8_t(BinaryConsts::CallFunction) << U32LEB(getFunctionIndex(curr->target));
}
void visitCallImport(CallImport *curr) {
if (debug) std::cerr << "zz node: CallImport" << std::endl;
for (auto* operand : curr->operands) {
recurse(operand);
}
- o << int8_t(BinaryConsts::CallImport) << LEB128(getImportIndex(curr->target));
+ o << int8_t(BinaryConsts::CallImport) << U32LEB(getImportIndex(curr->target));
}
void visitCallIndirect(CallIndirect *curr) {
if (debug) std::cerr << "zz node: CallIndirect" << std::endl;
@@ -812,22 +873,21 @@ public:
for (auto* operand : curr->operands) {
recurse(operand);
}
- o << int8_t(BinaryConsts::CallIndirect) << LEB128(getFunctionTypeIndex(curr->fullType->name));
+ o << int8_t(BinaryConsts::CallIndirect) << U32LEB(getFunctionTypeIndex(curr->fullType->name));
}
void visitGetLocal(GetLocal *curr) {
if (debug) std::cerr << "zz node: GetLocal " << (o.size() + 1) << std::endl;
- o << int8_t(BinaryConsts::GetLocal) << LEB128(mappedLocals[curr->name]);
+ o << int8_t(BinaryConsts::GetLocal) << U32LEB(mappedLocals[curr->name]);
}
void visitSetLocal(SetLocal *curr) {
if (debug) std::cerr << "zz node: SetLocal" << std::endl;
recurse(curr->value);
- o << int8_t(BinaryConsts::SetLocal) << LEB128(mappedLocals[curr->name]);
+ o << int8_t(BinaryConsts::SetLocal) << U32LEB(mappedLocals[curr->name]);
}
void emitMemoryAccess(size_t alignment, size_t bytes, uint32_t offset) {
- o << int8_t( ((alignment == bytes || alignment == 0) ? BinaryConsts::NaturalAlignment : BinaryConsts::Alignment) |
- (offset ? BinaryConsts::Offset : 0) );
- if (offset) o << LEB128(offset);
+ o << U32LEB(Log2(alignment ? alignment : bytes));
+ o << U32LEB(offset);
}
void visitLoad(Load *curr) {
@@ -893,16 +953,11 @@ public:
if (debug) std::cerr << "zz node: Const" << curr << " : " << curr->type << std::endl;
switch (curr->type) {
case i32: {
- uint32_t value = curr->value.geti32();
- if (value <= 255) {
- o << int8_t(BinaryConsts::I8Const) << uint8_t(value);
- break;
- }
- o << int8_t(BinaryConsts::I32Const) << value;
+ o << int8_t(BinaryConsts::I32Const) << S32LEB(curr->value.geti32());
break;
}
case i64: {
- o << int8_t(BinaryConsts::I64Const) << curr->value.geti64();
+ o << int8_t(BinaryConsts::I64Const) << S64LEB(curr->value.geti64());
break;
}
case f32: {
@@ -924,6 +979,7 @@ public:
case Clz: o << int8_t(curr->type == i32 ? BinaryConsts::I32Clz : BinaryConsts::I64Clz); break;
case Ctz: o << int8_t(curr->type == i32 ? BinaryConsts::I32Ctz : BinaryConsts::I64Ctz); break;
case Popcnt: o << int8_t(curr->type == i32 ? BinaryConsts::I32Popcnt : BinaryConsts::I64Popcnt); break;
+ case EqZ: o << int8_t(curr->type == i32 ? BinaryConsts::I32EqZ : BinaryConsts::I64EqZ); break;
case Neg: o << int8_t(curr->type == f32 ? BinaryConsts::F32Neg : BinaryConsts::F64Neg); break;
case Abs: o << int8_t(curr->type == f32 ? BinaryConsts::F32Abs : BinaryConsts::F64Abs); break;
case Ceil: o << int8_t(curr->type == f32 ? BinaryConsts::F32Ceil : BinaryConsts::F64Ceil); break;
@@ -1067,10 +1123,11 @@ class WasmBinaryBuilder {
std::vector<char>& input;
bool debug;
- size_t pos;
+ size_t pos = 0;
+ int32_t startIndex = -1;
public:
- WasmBinaryBuilder(AllocatingModule& wasm, std::vector<char>& input, bool debug) : wasm(wasm), allocator(wasm.allocator), input(input), debug(debug), pos(0) {}
+ WasmBinaryBuilder(AllocatingModule& wasm, std::vector<char>& input, bool debug) : wasm(wasm), allocator(wasm.allocator), input(input), debug(debug) {}
void read() {
@@ -1078,9 +1135,9 @@ public:
// read sections until the end
while (more()) {
- auto sectionSize = getLEB128();
+ auto sectionSize = getU32LEB();
assert(sectionSize < pos + input.size());
- auto nameSize = getLEB128();
+ auto nameSize = getU32LEB();
auto match = [&](const char* name) {
for (size_t i = 0; i < nameSize; i++) {
if (pos + i >= input.size()) return false;
@@ -1100,6 +1157,7 @@ public:
else if (match(BinaryConsts::Section::ExportTable)) readExports();
else if (match(BinaryConsts::Section::DataSegments)) readDataSegments();
else if (match(BinaryConsts::Section::FunctionTable)) readFunctionTable();
+ else if (match(BinaryConsts::Section::Names)) readNames();
else if (match(BinaryConsts::Section::End)) {
if (debug) std::cerr << "== readEnd" << std::endl;
break;
@@ -1151,13 +1209,40 @@ public:
return ret;
}
- uint32_t getLEB128() {
+ uint32_t getU32LEB() {
+ if (debug) std::cerr << "<==" << std::endl;
+ U32LEB ret;
+ ret.read([&]() {
+ return getInt8();
+ });
+ if (debug) std::cerr << "getU32LEB: " << ret.value << " ==>" << std::endl;
+ return ret.value;
+ }
+ uint64_t getU64LEB() {
if (debug) std::cerr << "<==" << std::endl;
- LEB128 ret;
+ U64LEB ret;
ret.read([&]() {
return getInt8();
});
- if (debug) std::cerr << "getLEB128: " << ret.value << " ==>" << std::endl;
+ if (debug) std::cerr << "getU64LEB: " << ret.value << " ==>" << std::endl;
+ return ret.value;
+ }
+ int32_t getS32LEB() {
+ if (debug) std::cerr << "<==" << std::endl;
+ S32LEB ret;
+ ret.read([&]() {
+ return (int8_t)getInt8();
+ });
+ if (debug) std::cerr << "getU32LEB: " << ret.value << " ==>" << std::endl;
+ return ret.value;
+ }
+ int64_t getS64LEB() {
+ if (debug) std::cerr << "<==" << std::endl;
+ S64LEB ret;
+ ret.read([&]() {
+ return (int8_t)getInt8();
+ });
+ if (debug) std::cerr << "getU64LEB: " << ret.value << " ==>" << std::endl;
return ret.value;
}
WasmType getWasmType() {
@@ -1182,7 +1267,7 @@ public:
Name getInlineString() {
if (debug) std::cerr << "<==" << std::endl;
- auto len = getLEB128();
+ auto len = getU32LEB();
std::string str;
for (size_t i = 0; i < len; i++) {
str = str + char(getInt8());
@@ -1230,24 +1315,24 @@ public:
void readStart() {
if (debug) std::cerr << "== readStart" << std::endl;
- wasm.start = wasm.functions[getLEB128()]->name;
+ startIndex = getU32LEB();
}
void readMemory() {
if (debug) std::cerr << "== readMemory" << std::endl;
- wasm.memory.initial = getLEB128();
- wasm.memory.max = getLEB128();
+ wasm.memory.initial = getU32LEB();
+ wasm.memory.max = getU32LEB();
verifyInt8(1); // export memory
}
void readSignatures() {
if (debug) std::cerr << "== readSignatures" << std::endl;
- size_t numTypes = getLEB128();
+ size_t numTypes = getU32LEB();
if (debug) std::cerr << "num: " << numTypes << std::endl;
for (size_t i = 0; i < numTypes; i++) {
if (debug) std::cerr << "read one" << std::endl;
auto curr = allocator.alloc<FunctionType>();
- size_t numParams = getLEB128();
+ size_t numParams = getU32LEB();
if (debug) std::cerr << "num params: " << numParams << std::endl;
curr->result = getWasmType();
for (size_t j = 0; j < numParams; j++) {
@@ -1259,13 +1344,13 @@ public:
void readImports() {
if (debug) std::cerr << "== readImports" << std::endl;
- size_t num = getLEB128();
+ size_t num = getU32LEB();
if (debug) std::cerr << "num: " << num << std::endl;
for (size_t i = 0; i < num; i++) {
if (debug) std::cerr << "read one" << std::endl;
auto curr = allocator.alloc<Import>();
curr->name = Name(std::string("import$") + std::to_string(i));
- auto index = getLEB128();
+ auto index = getU32LEB();
assert(index < wasm.functionTypes.size());
curr->type = wasm.functionTypes[index];
assert(curr->type->name.is());
@@ -1279,11 +1364,11 @@ public:
void readFunctionSignatures() {
if (debug) std::cerr << "== readFunctionSignatures" << std::endl;
- size_t num = getLEB128();
+ size_t num = getU32LEB();
if (debug) std::cerr << "num: " << num << std::endl;
for (size_t i = 0; i < num; i++) {
if (debug) std::cerr << "read one" << std::endl;
- auto index = getLEB128();
+ auto index = getU32LEB();
assert(index < wasm.functionTypes.size());
functionTypes.push_back(wasm.functionTypes[index]);
}
@@ -1295,20 +1380,21 @@ public:
return cashew::IString(("label$" + std::to_string(nextLabel++)).c_str(), false);
}
+ // We read functions before we know their names, so we need to backpatch the names later
+
+ std::vector<Function*> functions; // we store functions here before wasm.addFunction after we know their names
+ std::map<size_t, std::vector<Call*>> functionCalls; // at index i we have all calls to i
+
void readFunctions() {
if (debug) std::cerr << "== readFunctions" << std::endl;
- size_t total = getLEB128();
+ size_t total = getU32LEB();
for (size_t i = 0; i < total; i++) {
if (debug) std::cerr << "read one at " << pos << std::endl;
- auto data = getInt8();
+ size_t size = getU32LEB();
+ assert(size > 0); // we could also check it matches the seen size
auto type = functionTypes[i];
- bool named = data & BinaryConsts::Named;
- assert(named);
- bool locals = data & BinaryConsts::Locals;
- Name name = getString();
- if (debug) std::cerr << "reading" << name << std::endl;
+ if (debug) std::cerr << "reading" << i << std::endl;
auto func = allocator.alloc<Function>();
- func->name = name;
func->type = type->name;
func->result = type->result;
size_t nextVar = 0;
@@ -1319,53 +1405,61 @@ public:
for (size_t j = 0; j < type->params.size(); j++) {
func->params.emplace_back(addVar(), type->params[j]);
}
- if (locals) {
- auto addLocals = [&](WasmType type) {
- int16_t num = getInt16();
- while (num > 0) {
- func->locals.emplace_back(addVar(), type);
- num--;
- }
- };
- addLocals(i32);
- addLocals(i64);
- addLocals(f32);
- addLocals(f64);
+ size_t numLocalTypes = getU32LEB();
+ for (size_t t = 0; t < numLocalTypes; t++) {
+ auto num = getU32LEB();
+ auto type = getWasmType();
+ while (num > 0) {
+ func->locals.emplace_back(addVar(), type);
+ num--;
+ }
}
- size_t size = getInt32(); // XXX int32, diverge from v8 format, to get more code to compile
- // we can't read the function yet - it might call other functions that are defined later,
- // and we do depend on the function type.
- functions.emplace_back(func, pos, size);
- pos += size;
- func->body = nullptr; // will be filled later. but we do have the name and the type already.
- wasm.addFunction(func);
+ {
+ // process the function body
+ if (debug) std::cerr << "processing function: " << i << std::endl;
+ nextLabel = 0;
+ // prepare locals
+ mappedLocals.clear();
+ localTypes.clear();
+ for (size_t i = 0; i < func->params.size(); i++) {
+ mappedLocals.push_back(func->params[i].name);
+ localTypes[func->params[i].name] = func->params[i].type;
+ }
+ for (size_t i = 0; i < func->locals.size(); i++) {
+ mappedLocals.push_back(func->locals[i].name);
+ localTypes[func->locals[i].name] = func->locals[i].type;
+ }
+ // process body
+ assert(breakStack.empty());
+ assert(expressionStack.empty());
+ depth = 0;
+ processExpressions();
+ assert(expressionStack.size() == 1);
+ func->body = popExpression();
+ assert(depth == 0);
+ assert(breakStack.empty());
+ assert(expressionStack.empty());
+ }
+ functions.push_back(func);
}
}
+ std::map<Export*, size_t> exportIndexes;
+
void readExports() {
if (debug) std::cerr << "== readExports" << std::endl;
- size_t num = getLEB128();
+ size_t num = getU32LEB();
if (debug) std::cerr << "num: " << num << std::endl;
for (size_t i = 0; i < num; i++) {
if (debug) std::cerr << "read one" << std::endl;
auto curr = allocator.alloc<Export>();
- auto index = getLEB128();
- assert(index < wasm.functions.size());
- curr->value = wasm.functions[index]->name;
- assert(curr->value.is());
+ auto index = getU32LEB();
+ assert(index < functionTypes.size());
curr->name = getInlineString();
- wasm.addExport(curr);
+ exportIndexes[curr] = index;
}
}
- struct FunctionData {
- Function* func;
- size_t pos, size;
- FunctionData(Function* func, size_t pos, size_t size) : func(func), pos(pos), size(size) {}
- };
-
- std::vector<FunctionData> functions;
-
std::vector<Name> mappedLocals; // index => local name
std::map<Name, WasmType> localTypes; // TODO: optimize
@@ -1391,42 +1485,41 @@ public:
void processFunctions() {
for (auto& func : functions) {
- Function* curr = func.func;
- if (debug) std::cerr << "processing function: " << curr->name << std::endl;
- pos = func.pos;
- nextLabel = 0;
- // prepare locals
- mappedLocals.clear();
- localTypes.clear();
- for (size_t i = 0; i < curr->params.size(); i++) {
- mappedLocals.push_back(curr->params[i].name);
- localTypes[curr->params[i].name] = curr->params[i].type;
- }
- for (size_t i = 0; i < curr->locals.size(); i++) {
- mappedLocals.push_back(curr->locals[i].name);
- localTypes[curr->locals[i].name] = curr->locals[i].type;
+ wasm.addFunction(func);
+ }
+ // now that we have names for each function, apply things
+
+ if (startIndex >= 0) {
+ wasm.start = wasm.functions[startIndex]->name;
+ }
+
+ for (auto& iter : exportIndexes) {
+ Export* curr = iter.first;
+ curr->value = wasm.functions[iter.second]->name;
+ wasm.addExport(curr);
+ }
+
+ for (auto& iter : functionCalls) {
+ size_t index = iter.first;
+ auto& calls = iter.second;
+ for (auto* call : calls) {
+ call->target = wasm.functions[index]->name;
}
- // process body
- assert(breakStack.empty());
- assert(expressionStack.empty());
- depth = 0;
- processExpressions();
- assert(expressionStack.size() == 1);
- curr->body = popExpression();
- assert(depth == 0);
- assert(breakStack.empty());
- assert(expressionStack.empty());
- assert(pos == func.pos + func.size);
+ }
+
+ for (size_t index : functionTable) {
+ assert(index < wasm.functions.size());
+ wasm.table.names.push_back(wasm.functions[index]->name);
}
}
void readDataSegments() {
if (debug) std::cerr << "== readDataSegments" << std::endl;
- auto num = getLEB128();
+ auto num = getU32LEB();
for (size_t i = 0; i < num; i++) {
Memory::Segment curr;
- curr.offset = getLEB128();
- auto size = getLEB128();
+ curr.offset = getU32LEB();
+ auto size = getU32LEB();
auto buffer = (char*)malloc(size);
for (size_t j = 0; j < size; j++) {
buffer[j] = char(getInt8());
@@ -1437,13 +1530,24 @@ public:
}
}
+ std::vector<size_t> functionTable;
+
void readFunctionTable() {
if (debug) std::cerr << "== readFunctionTable" << std::endl;
- auto num = getLEB128();
+ auto num = getU32LEB();
for (size_t i = 0; i < num; i++) {
- auto index = getLEB128();
- assert(index < wasm.functions.size());
- wasm.table.names.push_back(wasm.functions[index]->name);
+ auto index = getU32LEB();
+ functionTable.push_back(index);
+ }
+ }
+
+ void readNames() {
+ if (debug) std::cerr << "== readNames" << std::endl;
+ auto num = getU32LEB();
+ for (size_t i = 0; i < num; i++) {
+ functions[i]->name = getInlineString();
+ auto numLocals = getU32LEB();
+ assert(numLocals == 0); // TODO
}
}
@@ -1573,39 +1677,38 @@ public:
void visitBreak(Break *curr, uint8_t code) {
if (debug) std::cerr << "zz node: Break" << std::endl;
- curr->name = getBreakName(getInt32());
+ curr->name = getBreakName(getU32LEB());
if (code == BinaryConsts::BrIf) curr->condition = popExpression();
curr->value = popExpression();
}
void visitSwitch(Switch *curr) {
if (debug) std::cerr << "zz node: Switch" << std::endl;
- auto numTargets = getInt16();
- auto hasValue = getInt8();
- for (auto i = 0; i < numTargets - 1; i++) {
- curr->targets.push_back(getBreakName(getInt32()));
+ auto numTargets = getU32LEB();
+ for (size_t i = 0; i < numTargets; i++) {
+ curr->targets.push_back(getBreakName(getU32LEB()));
}
- curr->default_ = getBreakName(getInt32());
+ curr->default_ = getBreakName(getU32LEB());
processExpressions();
curr->condition = popExpression();
- if (hasValue) {
- processExpressions();
- curr->value = popExpression();
- }
+ processExpressions();
+ curr->value = popExpression();
+ if (curr->value->is<Nop>()) curr->value = nullptr;
}
void visitCall(Call *curr) {
if (debug) std::cerr << "zz node: Call" << std::endl;
- curr->target = wasm.functions[getLEB128()]->name;
- auto type = wasm.functionTypesMap[wasm.functionsMap[curr->target]->type];
+ auto index = getU32LEB();
+ auto type = functionTypes[index];
auto num = type->params.size();
curr->operands.resize(num);
for (size_t i = 0; i < num; i++) {
curr->operands[num - i - 1] = popExpression();
}
curr->type = type->result;
+ functionCalls[index].push_back(curr);
}
void visitCallImport(CallImport *curr) {
if (debug) std::cerr << "zz node: CallImport" << std::endl;
- curr->target = wasm.imports[getLEB128()]->name;
+ curr->target = wasm.imports[getU32LEB()]->name;
assert(wasm.importsMap.find(curr->target) != wasm.importsMap.end());
auto type = wasm.importsMap[curr->target]->type;
assert(type);
@@ -1619,7 +1722,7 @@ public:
}
void visitCallIndirect(CallIndirect *curr) {
if (debug) std::cerr << "zz node: CallIndirect" << std::endl;
- curr->fullType = wasm.functionTypes[getLEB128()];
+ curr->fullType = wasm.functionTypes[getU32LEB()];
auto num = curr->fullType->params.size();
curr->operands.resize(num);
for (size_t i = 0; i < num; i++) {
@@ -1630,26 +1733,21 @@ public:
}
void visitGetLocal(GetLocal *curr) {
if (debug) std::cerr << "zz node: GetLocal " << pos << std::endl;
- curr->name = mappedLocals[getLEB128()];
+ curr->name = mappedLocals[getU32LEB()];
assert(curr->name.is());
curr->type = localTypes[curr->name];
}
void visitSetLocal(SetLocal *curr) {
if (debug) std::cerr << "zz node: SetLocal" << std::endl;
- curr->name = mappedLocals[getLEB128()];
+ curr->name = mappedLocals[getU32LEB()];
assert(curr->name.is());
curr->value = popExpression();
curr->type = curr->value->type;
}
void readMemoryAccess(uint32_t& alignment, size_t bytes, uint32_t& offset) {
- auto value = getInt8();
- alignment = value & BinaryConsts::Alignment ? 1 : bytes;
- if (value & BinaryConsts::Offset) {
- offset = getLEB128();
- } else {
- offset = 0;
- }
+ alignment = Pow2(getU32LEB());
+ offset = getU32LEB();
}
bool maybeVisitImpl(Load *curr, uint8_t code) {
@@ -1696,9 +1794,8 @@ public:
}
bool maybeVisitImpl(Const *curr, uint8_t code) {
switch (code) {
- case BinaryConsts::I8Const: curr->value = Literal(int32_t(getInt8())); break;
- case BinaryConsts::I32Const: curr->value = Literal(getInt32()); break;
- case BinaryConsts::I64Const: curr->value = Literal(getInt64()); break;
+ case BinaryConsts::I32Const: curr->value = Literal(getS32LEB()); break;
+ case BinaryConsts::I64Const: curr->value = Literal(getS64LEB()); break;
case BinaryConsts::F32Const: curr->value = Literal(getFloat32()); break;
case BinaryConsts::F64Const: curr->value = Literal(getFloat64()); break;
default: return false;
@@ -1715,6 +1812,8 @@ public:
case BinaryConsts::I64Ctz: curr->op = Ctz; curr->type = i64; break;
case BinaryConsts::I32Popcnt: curr->op = Popcnt; curr->type = i32; break;
case BinaryConsts::I64Popcnt: curr->op = Popcnt; curr->type = i64; break;
+ case BinaryConsts::I32EqZ: curr->op = EqZ; curr->type = i32; break;
+ case BinaryConsts::I64EqZ: curr->op = EqZ; curr->type = i64; break;
case BinaryConsts::F32Neg: curr->op = Neg; curr->type = f32; break;
case BinaryConsts::F64Neg: curr->op = Neg; curr->type = f64; break;
case BinaryConsts::F32Abs: curr->op = Abs; curr->type = f32; break;