summaryrefslogtreecommitdiff
path: root/src/wasm-binary.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/wasm-binary.h')
-rw-r--r--src/wasm-binary.h1041
1 files changed, 668 insertions, 373 deletions
diff --git a/src/wasm-binary.h b/src/wasm-binary.h
index b1db70a97..edc904787 100644
--- a/src/wasm-binary.h
+++ b/src/wasm-binary.h
@@ -30,36 +30,79 @@
namespace wasm {
-struct LEB128 {
- uint32_t value;
+template<typename T, typename MiniT>
+struct LEB {
+ T value;
- LEB128() {}
- LEB128(uint32_t value) : value(value) {}
+ LEB() {}
+ LEB(T value) : value(value) {}
+
+ bool isSigned() {
+ return int(MiniT(-1)) < 0;
+ }
+
+ bool hasMore(T temp, MiniT byte) {
+ // for signed, we must ensure the last bit has the right sign, as it will zero extend
+ return isSigned() ? (temp != 0 && int32_t(temp) != -1) || (value >= 0 && (byte & 64)) || (value < 0 && !(byte & 64)): (temp != 0);
+ }
void write(std::vector<uint8_t>* out) {
- uint32_t temp = value;
+ T temp = value;
+ bool more;
do {
uint8_t byte = temp & 127;
temp >>= 7;
- if (temp) {
+ more = hasMore(temp, byte);
+ if (more) {
byte = byte | 128;
}
out->push_back(byte);
- } while (temp);
+ } while (more);
+ }
+
+ void writeAt(std::vector<uint8_t>* out, size_t at, size_t minimum = 0) {
+ T temp = value;
+ size_t offset = 0;
+ bool more;
+ do {
+ uint8_t byte = temp & 127;
+ temp >>= 7;
+ more = hasMore(temp, byte) || offset + 1 < minimum;
+ if (more) {
+ byte = byte | 128;
+ }
+ (*out)[at + offset] = byte;
+ offset++;
+ } while (more);
}
- void read(std::function<uint8_t ()> get) {
+ void read(std::function<MiniT ()> get) {
value = 0;
- uint32_t shift = 0;
+ T shift = 0;
+ MiniT byte;
while (1) {
- uint8_t byte = get();
- value |= ((byte & 127) << shift);
+ byte = get();
+ value |= ((T(byte & 127)) << shift);
if (!(byte & 128)) break;
shift += 7;
}
+ // if signed LEB, then we might need to sign-extend. (compile should optimize this out if not needed)
+ if (isSigned()) {
+ shift += 7;
+ if (byte & 64 && size_t(shift) < 8*sizeof(T)) {
+ // the highest bit we received was a 1, sign-extend all the rest
+ value = value | (T(-1) << shift);
+ assert(value < 0);
+ }
+ }
}
};
+typedef LEB<uint32_t, uint8_t> U32LEB;
+typedef LEB<uint64_t, uint8_t> U64LEB;
+typedef LEB<int32_t, int8_t> S32LEB;
+typedef LEB<int64_t, int8_t> S64LEB;
+
//
// We mostly stream into a buffer as we create the binary format, however,
// sometimes we need to backtrack and write to a location behind us - wasm
@@ -102,8 +145,23 @@ public:
push_back(x & 0xff);
return *this;
}
- BufferWithRandomAccess& operator<<(LEB128 x) {
- if (debug) std::cerr << "writeLEB128: " << x.value << " (at " << size() << ")" << std::endl;
+ BufferWithRandomAccess& operator<<(U32LEB x) {
+ if (debug) std::cerr << "writeU32LEB: " << x.value << " (at " << size() << ")" << std::endl;
+ x.write(this);
+ return *this;
+ }
+ BufferWithRandomAccess& operator<<(U64LEB x) {
+ if (debug) std::cerr << "writeU64LEB: " << x.value << " (at " << size() << ")" << std::endl;
+ x.write(this);
+ return *this;
+ }
+ BufferWithRandomAccess& operator<<(S32LEB x) {
+ if (debug) std::cerr << "writeS32LEB: " << x.value << " (at " << size() << ")" << std::endl;
+ x.write(this);
+ return *this;
+ }
+ BufferWithRandomAccess& operator<<(S64LEB x) {
+ if (debug) std::cerr << "writeS64LEB: " << x.value << " (at " << size() << ")" << std::endl;
x.write(this);
return *this;
}
@@ -131,15 +189,21 @@ public:
}
void writeAt(size_t i, uint16_t x) {
+ if (debug) std::cerr << "backpatchInt16: " << x << " (at " << i << ")" << std::endl;
(*this)[i] = x & 0xff;
(*this)[i+1] = x >> 8;
}
void writeAt(size_t i, uint32_t x) {
+ if (debug) std::cerr << "backpatchInt32: " << x << " (at " << i << ")" << std::endl;
(*this)[i] = x & 0xff; x >>= 8;
(*this)[i+1] = x & 0xff; x >>= 8;
(*this)[i+2] = x & 0xff; x >>= 8;
(*this)[i+3] = x & 0xff;
}
+ void writeAt(size_t i, U32LEB x) {
+ if (debug) std::cerr << "backpatchU32LEB: " << x.value << " (at " << i << ")" << std::endl;
+ x.writeAt(this, i, 5); // fill all 5 bytes, we have to do this when backpatching
+ }
template <typename T>
void writeTo(T& o) {
@@ -149,14 +213,18 @@ public:
namespace BinaryConsts {
-enum Section {
- Memory = 0,
- Signatures = 1,
- Functions = 2,
- DataSegments = 4,
- FunctionTable = 5,
- End = 6,
- Start = 7
+namespace Section {
+ auto Memory = "memory";
+ auto Signatures = "signatures";
+ auto ImportTable = "import_table";
+ auto FunctionSignatures = "function_signatures";
+ auto Functions = "functions";
+ auto ExportTable = "export_table";
+ auto DataSegments = "data_segments";
+ auto FunctionTable = "function_table";
+ auto Names = "names";
+ auto End = "end";
+ auto Start = "start_function";
};
enum FunctionEntry {
@@ -195,6 +263,7 @@ enum ASTNodes {
I32Clz = 0x57,
I32Ctz = 0x58,
I32Popcnt = 0x59,
+ I32EqZ = 0xc0, // XXX
BoolNot = 0x5a,
I64Add = 0x5b,
I64Sub = 0x5c,
@@ -222,6 +291,7 @@ enum ASTNodes {
I64Clz = 0x72,
I64Ctz = 0x73,
I64Popcnt = 0x74,
+ I64EqZ = 0xc1, // XXX
F32Add = 0x75,
F32Sub = 0x76,
F32Mul = 0x77,
@@ -287,6 +357,10 @@ enum ASTNodes {
F64ConvertF32 = 0xb2,
F64ReinterpretI64 = 0xb3,
I64ReinterpretF64 = 0xb5,
+ I32RotR = 0xb6,
+ I32RotL = 0xb7,
+ I64RotR = 0xb8,
+ I64RotL = 0xb9,
I32ReinterpretF32 = 0xfe, // XXX not in v8 spec doc
I32LoadMem8S = 0x20,
@@ -313,7 +387,6 @@ enum ASTNodes {
F32StoreMem = 0x35,
F64StoreMem = 0x36,
- I8Const = 0x09,
I32Const = 0x0a,
I64Const = 0x0b,
F64Const = 0x0c,
@@ -324,6 +397,7 @@ enum ASTNodes {
StoreGlobal = 0x11,
CallFunction = 0x12,
CallIndirect = 0x13,
+ CallImport = 0x1f,
Nop = 0x00,
Block = 0x01,
@@ -335,7 +409,8 @@ enum ASTNodes {
BrIf = 0x07,
TableSwitch = 0x08,
Return = 0x14,
- Unreachable = 0x15
+ Unreachable = 0x15,
+ EndMarker = 0xff
};
enum MemoryAccess {
@@ -381,55 +456,81 @@ public:
}
void write() {
- writeStart();
+ writeHeader();
writeMemory();
writeSignatures();
+ writeImports();
+ writeFunctionSignatures();
writeFunctions();
+ writeStart();
+ writeExports();
writeDataSegments();
writeFunctionTable();
+ writeNames();
writeEnd();
finishUp();
}
+ void writeHeader() {
+ if (debug) std::cerr << "== writeHeader" << std::endl;
+ o << int32_t(0x6d736100); // magic number \0asm
+ o << int32_t(10); // version number
+ }
+
+ int32_t writeU32LEBPlaceholder() {
+ int32_t ret = o.size();
+ o << int32_t(0);
+ o << int8_t(0);
+ return ret;
+ }
+
+ int32_t startSection(const char* name) {
+ // emit 5 bytes of 0, which we'll fill with LEB later
+ auto ret = writeU32LEBPlaceholder();
+ writeInlineString(name);
+ return ret;
+ }
+
+ void finishSection(int32_t start) {
+ int32_t size = o.size() - start - 5; // section size does not include the 5 bytes of the size field itself
+ o.writeAt(start, U32LEB(size));
+ }
+
void writeStart() {
if (!wasm->start.is()) return;
if (debug) std::cerr << "== writeStart" << std::endl;
- o << int8_t(BinaryConsts::Start);
- emitString(wasm->start.str);
+ auto start = startSection(BinaryConsts::Section::Start);
+ o << U32LEB(getFunctionIndex(wasm->start.str));
+ finishSection(start);
}
void writeMemory() {
if (wasm->memory.max == 0) return;
if (debug) std::cerr << "== writeMemory" << std::endl;
- o << int8_t(BinaryConsts::Memory);
- if (wasm->memory.initial == 0) { // XXX diverge from v8, 0 means 0, 1 and above are powers of 2 starting at 0
- o << int8_t(0);
- } else {
- o << int8_t(std::min(ceil(log2(wasm->memory.initial)), 31.0) + 1); // up to 31 bits, don't let ceil get us to UINT_MAX which can overflow
- }
- if (wasm->memory.max == 0) {
- o << int8_t(0);
- } else {
- o << int8_t(std::min(ceil(log2(wasm->memory.max)), 31.0) + 1);
- }
- o << int8_t(1); // export memory
+ auto start = startSection(BinaryConsts::Section::Memory);
+ o << U32LEB(wasm->memory.initial)
+ << U32LEB(wasm->memory.max)
+ << int8_t(1); // export memory
+ finishSection(start);
}
void writeSignatures() {
if (wasm->functionTypes.size() == 0) return;
if (debug) std::cerr << "== writeSignatures" << std::endl;
- o << int8_t(BinaryConsts::Signatures) << LEB128(wasm->functionTypes.size());
+ auto start = startSection(BinaryConsts::Section::Signatures);
+ o << U32LEB(wasm->functionTypes.size());
for (auto* type : wasm->functionTypes) {
if (debug) std::cerr << "write one" << std::endl;
- o << int8_t(type->params.size());
+ o << U32LEB(type->params.size());
o << binaryWasmType(type->result);
for (auto param : type->params) {
o << binaryWasmType(param);
}
}
+ finishSection(start);
}
- int16_t getFunctionTypeIndex(Name type) {
+ int32_t getFunctionTypeIndex(Name type) {
// TODO: optimize
for (size_t i = 0; i < wasm->functionTypes.size(); i++) {
if (wasm->functionTypes[i]->name == type) return i;
@@ -437,6 +538,20 @@ public:
abort();
}
+ void writeImports() {
+ if (wasm->imports.size() == 0) return;
+ if (debug) std::cerr << "== writeImports" << std::endl;
+ auto start = startSection(BinaryConsts::Section::ImportTable);
+ o << U32LEB(wasm->imports.size());
+ for (auto* import : wasm->imports) {
+ if (debug) std::cerr << "write one" << std::endl;
+ o << U32LEB(getFunctionTypeIndex(import->type->name));
+ writeInlineString(import->module.str);
+ writeInlineString(import->base.str);
+ }
+ finishSection(start);
+ }
+
std::map<Name, size_t> mappedLocals; // local name => index in compact form of [all int32s][all int64s]etc
std::map<WasmType, size_t> numLocalsByType; // type => number of locals of that type in the compact form
@@ -477,62 +592,66 @@ public:
}
}
+ void writeFunctionSignatures() {
+ if (wasm->functions.size() == 0) return;
+ if (debug) std::cerr << "== writeFunctionSignatures" << std::endl;
+ auto start = startSection(BinaryConsts::Section::FunctionSignatures);
+ o << U32LEB(wasm->functions.size());
+ for (auto* curr : wasm->functions) {
+ if (debug) std::cerr << "write one" << std::endl;
+ o << U32LEB(getFunctionTypeIndex(curr->type));
+ }
+ finishSection(start);
+ }
+
void writeFunctions() {
- if (wasm->functions.size() + wasm->imports.size() == 0) return;
+ if (wasm->functions.size() == 0) return;
if (debug) std::cerr << "== writeFunctions" << std::endl;
- size_t total = wasm->imports.size() + wasm->functions.size();
- o << int8_t(BinaryConsts::Functions) << LEB128(total);
- std::map<Name, Name> exportedFunctions;
- for (auto* e : wasm->exports) {
- exportedFunctions[e->value] = e->name;
- }
+ auto start = startSection(BinaryConsts::Section::Functions);
+ size_t total = wasm->functions.size();
+ o << U32LEB(total);
for (size_t i = 0; i < total; i++) {
if (debug) std::cerr << "write one at" << o.size() << std::endl;
- Import* import = i < wasm->imports.size() ? wasm->imports[i] : nullptr;
- Function* function = i >= wasm->imports.size() ? wasm->functions[i - wasm->imports.size()] : nullptr;
- Name name, type;
- if (import) {
- name = import->name;
- type = import->type->name;
- } else {
- name = function->name;
- type = function->type;
- mappedLocals.clear();
- numLocalsByType.clear();
- }
- if (debug) std::cerr << "writing" << name << std::endl;
- bool export_ = exportedFunctions.count(name) > 0;
- o << int8_t(BinaryConsts::Named |
- (BinaryConsts::Import * !!import) |
- (BinaryConsts::Locals * (function && function->locals.size() > 0)) |
- (BinaryConsts::Export * export_));
- o << getFunctionTypeIndex(type);
- emitString(name.str);
- if (export_) emitString(exportedFunctions[name].str); // XXX addition to v8 binary format
- if (function) {
- mapLocals(function);
- if (function->locals.size() > 0) {
- o << uint16_t(numLocalsByType[i32])
- << uint16_t(numLocalsByType[i64])
- << uint16_t(numLocalsByType[f32])
- << uint16_t(numLocalsByType[f64]);
- }
- size_t sizePos = o.size();
- o << (uint32_t)0; // placeholder, we fill in the size later when we have it // XXX int32, diverge from v8 format, to get more code to compile
- size_t start = o.size();
- depth = 0;
- recurse(function->body);
- assert(depth == 0);
- size_t size = o.size() - start;
- assert(size <= std::numeric_limits<uint16_t>::max());
- if (debug) std::cerr << "body size: " << size << ", writing at " << sizePos << ", next starts at " << o.size() << std::endl;
- o.writeAt(sizePos, uint32_t(size)); // XXX int32, diverge from v8 format, to get more code to compile
- } else {
- // import
- emitString(import->module.str); // XXX diverge
- emitString(import->base.str); // from v8
- }
+ size_t sizePos = writeU32LEBPlaceholder();
+ size_t start = o.size();
+ Function* function = wasm->functions[i];
+ mappedLocals.clear();
+ numLocalsByType.clear();
+ if (debug) std::cerr << "writing" << function->name << std::endl;
+ mapLocals(function);
+ o << U32LEB(
+ (numLocalsByType[i32] ? 1 : 0) +
+ (numLocalsByType[i64] ? 1 : 0) +
+ (numLocalsByType[f32] ? 1 : 0) +
+ (numLocalsByType[f64] ? 1 : 0)
+ );
+ if (numLocalsByType[i32]) o << U32LEB(numLocalsByType[i32]) << binaryWasmType(i32);
+ if (numLocalsByType[i64]) o << U32LEB(numLocalsByType[i64]) << binaryWasmType(i64);
+ if (numLocalsByType[f32]) o << U32LEB(numLocalsByType[f32]) << binaryWasmType(f32);
+ if (numLocalsByType[f64]) o << U32LEB(numLocalsByType[f64]) << binaryWasmType(f64);
+ depth = 0;
+ recurse(function->body);
+ o << int8_t(BinaryConsts::EndMarker);
+ assert(depth == 0);
+ size_t size = o.size() - start;
+ assert(size <= std::numeric_limits<uint32_t>::max());
+ if (debug) std::cerr << "body size: " << size << ", writing at " << sizePos << ", next starts at " << o.size() << std::endl;
+ o.writeAt(sizePos, U32LEB(size));
+ }
+ finishSection(start);
+ }
+
+ void writeExports() {
+ if (wasm->exports.size() == 0) return;
+ if (debug) std::cerr << "== writeexports" << std::endl;
+ auto start = startSection(BinaryConsts::Section::ExportTable);
+ o << U32LEB(wasm->exports.size());
+ for (auto* curr : wasm->exports) {
+ if (debug) std::cerr << "write one" << std::endl;
+ o << U32LEB(getFunctionIndex(curr->value));
+ writeInlineString(curr->name.str);
}
+ finishSection(start);
}
void writeDataSegments() {
@@ -541,42 +660,87 @@ public:
for (auto& segment : wasm->memory.segments) {
if (segment.size > 0) num++;
}
- o << int8_t(BinaryConsts::DataSegments) << LEB128(num);
+ auto start = startSection(BinaryConsts::Section::DataSegments);
+ o << U32LEB(num);
for (auto& segment : wasm->memory.segments) {
if (segment.size == 0) continue;
- o << int32_t(segment.offset);
- emitBuffer(segment.data, segment.size);
- o << int32_t(segment.size);
- o << int8_t(1); // load at program start
+ o << U32LEB(segment.offset);
+ writeInlineBuffer(segment.data, segment.size);
}
+ finishSection(start);
}
- uint16_t getFunctionIndex(Name name) {
- // TODO: optimize
- for (size_t i = 0; i < wasm->imports.size(); i++) {
- if (wasm->imports[i]->name == name) return i;
+ std::map<Name, uint32_t> mappedImports; // name of the Import => index
+ uint32_t getImportIndex(Name name) {
+ if (!mappedImports.size()) {
+ // Create name => index mapping.
+ for (size_t i = 0; i < wasm->imports.size(); i++) {
+ assert(mappedImports.count(wasm->imports[i]->name) == 0);
+ mappedImports[wasm->imports[i]->name] = i;
+ }
}
- for (size_t i = 0; i < wasm->functions.size(); i++) {
- if (wasm->functions[i]->name == name) return wasm->imports.size() + i;
+ assert(mappedImports.count(name));
+ return mappedImports[name];
+ }
+
+ std::map<Name, uint32_t> mappedFunctions; // name of the Function => index
+ uint32_t getFunctionIndex(Name name) {
+ if (!mappedFunctions.size()) {
+ // Create name => index mapping.
+ for (size_t i = 0; i < wasm->functions.size(); i++) {
+ assert(mappedFunctions.count(wasm->functions[i]->name) == 0);
+ mappedFunctions[wasm->functions[i]->name] = i;
+ }
}
- abort();
+ assert(mappedFunctions.count(name));
+ return mappedFunctions[name];
}
void writeFunctionTable() {
if (wasm->table.names.size() == 0) return;
if (debug) std::cerr << "== writeFunctionTable" << std::endl;
- o << int8_t(BinaryConsts::FunctionTable) << LEB128(wasm->table.names.size());
+ auto start = startSection(BinaryConsts::Section::FunctionTable);
+ o << U32LEB(wasm->table.names.size());
for (auto name : wasm->table.names) {
- o << getFunctionIndex(name);
+ o << U32LEB(getFunctionIndex(name));
+ }
+ finishSection(start);
+ }
+
+ void writeNames() {
+ if (wasm->functions.size() == 0) return;
+ if (debug) std::cerr << "== writeNames" << std::endl;
+ auto start = startSection(BinaryConsts::Section::Names);
+ o << U32LEB(wasm->functions.size());
+ for (auto* curr : wasm->functions) {
+ writeInlineString(curr->name.str);
+ o << U32LEB(0); // TODO: locals
}
+ finishSection(start);
}
void writeEnd() {
- o << int8_t(BinaryConsts::End);
+ auto start = startSection(BinaryConsts::Section::End);
+ finishSection(start);
}
// helpers
+ void writeInlineString(const char* name) {
+ int32_t size = strlen(name);
+ o << U32LEB(size);
+ for (int32_t i = 0; i < size; i++) {
+ o << int8_t(name[i]);
+ }
+ }
+
+ void writeInlineBuffer(const char* data, size_t size) {
+ o << U32LEB(size);
+ for (size_t i = 0; i < size; i++) {
+ o << int8_t(data[i]);
+ }
+ }
+
struct Buffer {
const char* data;
size_t size;
@@ -623,7 +787,7 @@ public:
void visitBlock(Block *curr) {
if (debug) std::cerr << "zz node: Block" << std::endl;
- o << int8_t(BinaryConsts::Block) << LEB128(curr->list.size()); // XXX larger block size, divergence from v8 to get more code to build
+ o << int8_t(BinaryConsts::Block);
breakStack.push_back(curr->name);
size_t i = 0;
for (auto* child : curr->list) {
@@ -631,13 +795,14 @@ public:
recurse(child);
}
breakStack.pop_back();
+ o << int8_t(BinaryConsts::EndMarker);
}
void visitIf(If *curr) {
if (debug) std::cerr << "zz node: If" << std::endl;
- o << int8_t(curr->ifFalse ? BinaryConsts::IfElse : BinaryConsts::If);
recurse(curr->condition);
recurse(curr->ifTrue);
if (curr->ifFalse) recurse(curr->ifFalse);
+ o << int8_t(curr->ifFalse ? BinaryConsts::IfElse : BinaryConsts::If);
}
void visitLoop(Loop *curr) {
if (debug) std::cerr << "zz node: Loop" << std::endl;
@@ -648,98 +813,86 @@ public:
recurse(curr->body);
breakStack.pop_back();
breakStack.pop_back();
+ o << int8_t(BinaryConsts::EndMarker);
}
- int getBreakIndex(Name name) { // -1 if not found
+ int32_t getBreakIndex(Name name) { // -1 if not found
for (int i = breakStack.size() - 1; i >= 0; i--) {
if (breakStack[i] == name) {
return breakStack.size() - 1 - i;
}
}
-printf("bad!\n");
std::cerr << "bad break: " << name << std::endl;
-
-
-
-assert(0);
-
-
abort();
}
void visitBreak(Break *curr) {
if (debug) std::cerr << "zz node: Break" << std::endl;
- o << int8_t(curr->condition ? BinaryConsts::BrIf : BinaryConsts::Br);
- int offset = getBreakIndex(curr->name);
- o << int8_t(offset);
if (curr->value) {
recurse(curr->value);
} else {
visitNop(nullptr);
}
if (curr->condition) recurse(curr->condition);
+ o << int8_t(curr->condition ? BinaryConsts::BrIf : BinaryConsts::Br)
+ << U32LEB(getBreakIndex(curr->name));
}
void visitSwitch(Switch *curr) {
if (debug) std::cerr << "zz node: Switch" << std::endl;
- o << int8_t(BinaryConsts::TableSwitch) << int16_t(curr->cases.size())
- << int16_t(curr->targets.size() + 1);
- for (size_t i = 0; i < curr->cases.size(); i++) {
- breakStack.push_back(curr->cases[i].name);
- }
- breakStack.push_back(curr->name);
+ o << int8_t(BinaryConsts::TableSwitch) << U32LEB(curr->targets.size());
for (auto target : curr->targets) {
- o << (int16_t)getBreakIndex(target);
- }
- o << (int16_t)getBreakIndex(curr->default_);
- recurse(curr->value);
- for (auto& c : curr->cases) {
- recurse(c.body);
+ o << U32LEB(getBreakIndex(target));
}
- breakStack.pop_back(); // name
- for (size_t i = 0; i < curr->cases.size(); i++) {
- breakStack.pop_back(); // case
+ o << U32LEB(getBreakIndex(curr->default_));
+ recurse(curr->condition);
+ o << int8_t(BinaryConsts::EndMarker);
+ if (curr->value) {
+ recurse(curr->value);
+ } else {
+ visitNop(nullptr);
}
+ o << int8_t(BinaryConsts::EndMarker);
}
void visitCall(Call *curr) {
if (debug) std::cerr << "zz node: Call" << std::endl;
- o << int8_t(BinaryConsts::CallFunction) << LEB128(getFunctionIndex(curr->target));
for (auto* operand : curr->operands) {
recurse(operand);
}
+ o << int8_t(BinaryConsts::CallFunction) << U32LEB(getFunctionIndex(curr->target));
}
void visitCallImport(CallImport *curr) {
if (debug) std::cerr << "zz node: CallImport" << std::endl;
- o << int8_t(BinaryConsts::CallFunction) << LEB128(getFunctionIndex(curr->target));
for (auto* operand : curr->operands) {
recurse(operand);
}
+ o << int8_t(BinaryConsts::CallImport) << U32LEB(getImportIndex(curr->target));
}
void visitCallIndirect(CallIndirect *curr) {
if (debug) std::cerr << "zz node: CallIndirect" << std::endl;
- o << int8_t(BinaryConsts::CallIndirect) << LEB128(getFunctionTypeIndex(curr->fullType->name));
recurse(curr->target);
for (auto* operand : curr->operands) {
recurse(operand);
}
+ o << int8_t(BinaryConsts::CallIndirect) << U32LEB(getFunctionTypeIndex(curr->fullType->name));
}
void visitGetLocal(GetLocal *curr) {
if (debug) std::cerr << "zz node: GetLocal " << (o.size() + 1) << std::endl;
- o << int8_t(BinaryConsts::GetLocal) << LEB128(mappedLocals[curr->name]);
+ o << int8_t(BinaryConsts::GetLocal) << U32LEB(mappedLocals[curr->name]);
}
void visitSetLocal(SetLocal *curr) {
if (debug) std::cerr << "zz node: SetLocal" << std::endl;
- o << int8_t(BinaryConsts::SetLocal) << LEB128(mappedLocals[curr->name]);
recurse(curr->value);
+ o << int8_t(BinaryConsts::SetLocal) << U32LEB(mappedLocals[curr->name]);
}
void emitMemoryAccess(size_t alignment, size_t bytes, uint32_t offset) {
- o << int8_t( ((alignment == bytes || alignment == 0) ? BinaryConsts::NaturalAlignment : BinaryConsts::Alignment) |
- (offset ? BinaryConsts::Offset : 0) );
- if (offset) o << LEB128(offset);
+ o << U32LEB(Log2(alignment ? alignment : bytes));
+ o << U32LEB(offset);
}
void visitLoad(Load *curr) {
if (debug) std::cerr << "zz node: Load" << std::endl;
+ recurse(curr->ptr);
switch (curr->type) {
case i32: {
switch (curr->bytes) {
@@ -765,10 +918,11 @@ assert(0);
default: abort();
}
emitMemoryAccess(curr->align, curr->bytes, curr->offset);
- recurse(curr->ptr);
}
void visitStore(Store *curr) {
if (debug) std::cerr << "zz node: Store" << std::endl;
+ recurse(curr->ptr);
+ recurse(curr->value);
switch (curr->type) {
case i32: {
switch (curr->bytes) {
@@ -794,23 +948,16 @@ assert(0);
default: abort();
}
emitMemoryAccess(curr->align, curr->bytes, curr->offset);
- recurse(curr->ptr);
- recurse(curr->value);
}
void visitConst(Const *curr) {
if (debug) std::cerr << "zz node: Const" << curr << " : " << curr->type << std::endl;
switch (curr->type) {
case i32: {
- uint32_t value = curr->value.geti32();
- if (value <= 255) {
- o << int8_t(BinaryConsts::I8Const) << uint8_t(value);
- break;
- }
- o << int8_t(BinaryConsts::I32Const) << value;
+ o << int8_t(BinaryConsts::I32Const) << S32LEB(curr->value.geti32());
break;
}
case i64: {
- o << int8_t(BinaryConsts::I64Const) << curr->value.geti64();
+ o << int8_t(BinaryConsts::I64Const) << S64LEB(curr->value.geti64());
break;
}
case f32: {
@@ -827,10 +974,12 @@ assert(0);
}
void visitUnary(Unary *curr) {
if (debug) std::cerr << "zz node: Unary" << std::endl;
+ recurse(curr->value);
switch (curr->op) {
case Clz: o << int8_t(curr->type == i32 ? BinaryConsts::I32Clz : BinaryConsts::I64Clz); break;
case Ctz: o << int8_t(curr->type == i32 ? BinaryConsts::I32Ctz : BinaryConsts::I64Ctz); break;
case Popcnt: o << int8_t(curr->type == i32 ? BinaryConsts::I32Popcnt : BinaryConsts::I64Popcnt); break;
+ case EqZ: o << int8_t(curr->type == i32 ? BinaryConsts::I32EqZ : BinaryConsts::I64EqZ); break;
case Neg: o << int8_t(curr->type == f32 ? BinaryConsts::F32Neg : BinaryConsts::F64Neg); break;
case Abs: o << int8_t(curr->type == f32 ? BinaryConsts::F32Abs : BinaryConsts::F64Abs); break;
case Ceil: o << int8_t(curr->type == f32 ? BinaryConsts::F32Ceil : BinaryConsts::F64Ceil); break;
@@ -855,10 +1004,11 @@ assert(0);
case ReinterpretInt: o << int8_t(curr->type == i32 ? BinaryConsts::I32ReinterpretF32 : BinaryConsts::I64ReinterpretF64); break;
default: abort();
}
- recurse(curr->value);
}
void visitBinary(Binary *curr) {
if (debug) std::cerr << "zz node: Binary" << std::endl;
+ recurse(curr->left);
+ recurse(curr->right);
#define TYPED_CODE(code) { \
switch (getReachableWasmType(curr->left->type, curr->right->type)) { \
case i32: o << int8_t(BinaryConsts::I32##code); break; \
@@ -900,6 +1050,8 @@ assert(0);
case Shl: INT_TYPED_CODE(Shl);;
case ShrU: INT_TYPED_CODE(ShrU);
case ShrS: INT_TYPED_CODE(ShrS);
+ case RotL: INT_TYPED_CODE(RotL);
+ case RotR: INT_TYPED_CODE(RotR);
case Div: FLOAT_TYPED_CODE(Div);
case CopySign: FLOAT_TYPED_CODE(CopySign);
case Min: FLOAT_TYPED_CODE(Min);
@@ -920,27 +1072,25 @@ assert(0);
case Ge: FLOAT_TYPED_CODE(Ge);
default: abort();
}
- recurse(curr->left);
- recurse(curr->right);
#undef TYPED_CODE
#undef INT_TYPED_CODE
#undef FLOAT_TYPED_CODE
}
void visitSelect(Select *curr) {
if (debug) std::cerr << "zz node: Select" << std::endl;
- o << int8_t(BinaryConsts::Select);
recurse(curr->ifTrue);
recurse(curr->ifFalse);
recurse(curr->condition);
+ o << int8_t(BinaryConsts::Select);
}
void visitReturn(Return *curr) {
if (debug) std::cerr << "zz node: Return" << std::endl;
- o << int8_t(BinaryConsts::Return);
if (curr->value) {
recurse(curr->value);
} else {
visitNop(nullptr);
}
+ o << int8_t(BinaryConsts::Return);
}
void visitHost(Host *curr) {
if (debug) std::cerr << "zz node: Host" << std::endl;
@@ -950,8 +1100,8 @@ assert(0);
break;
}
case GrowMemory: {
- o << int8_t(BinaryConsts::GrowMemory);
recurse(curr->operands[0]);
+ o << int8_t(BinaryConsts::GrowMemory);
break;
}
default: abort();
@@ -973,37 +1123,58 @@ class WasmBinaryBuilder {
std::vector<char>& input;
bool debug;
- size_t pos;
+ size_t pos = 0;
+ int32_t startIndex = -1;
public:
- WasmBinaryBuilder(AllocatingModule& wasm, std::vector<char>& input, bool debug) : wasm(wasm), allocator(wasm.allocator), input(input), debug(debug), pos(0) {}
+ WasmBinaryBuilder(AllocatingModule& wasm, std::vector<char>& input, bool debug) : wasm(wasm), allocator(wasm.allocator), input(input), debug(debug) {}
void read() {
- // read sections until the end
- while (1) {
- int8_t section = getInt8();
- if (section == BinaryConsts::End) {
+ readHeader();
+
+ // read sections until the end
+ while (more()) {
+ auto sectionSize = getU32LEB();
+ assert(sectionSize < pos + input.size());
+ auto nameSize = getU32LEB();
+ auto match = [&](const char* name) {
+ for (size_t i = 0; i < nameSize; i++) {
+ if (pos + i >= input.size()) return false;
+ if (name[i] == 0) return false;
+ if (input[pos + i] != name[i]) return false;
+ }
+ if (strlen(name) != nameSize) return false;
+ pos += nameSize;
+ return true;
+ };
+ if (match(BinaryConsts::Section::Start)) readStart();
+ else if (match(BinaryConsts::Section::Memory)) readMemory();
+ else if (match(BinaryConsts::Section::Signatures)) readSignatures();
+ else if (match(BinaryConsts::Section::ImportTable)) readImports();
+ else if (match(BinaryConsts::Section::FunctionSignatures)) readFunctionSignatures();
+ else if (match(BinaryConsts::Section::Functions)) readFunctions();
+ else if (match(BinaryConsts::Section::ExportTable)) readExports();
+ else if (match(BinaryConsts::Section::DataSegments)) readDataSegments();
+ else if (match(BinaryConsts::Section::FunctionTable)) readFunctionTable();
+ else if (match(BinaryConsts::Section::Names)) readNames();
+ else if (match(BinaryConsts::Section::End)) {
if (debug) std::cerr << "== readEnd" << std::endl;
break;
- }
-
- switch (section) {
- case BinaryConsts::Start: readStart(); break;
- case BinaryConsts::Memory: readMemory(); break;
- case BinaryConsts::Signatures: readSignatures(); break;
- case BinaryConsts::Functions: readFunctions(); break;
- case BinaryConsts::DataSegments: readDataSegments(); break;
- case BinaryConsts::FunctionTable: readFunctionTable(); break;
- default: abort();
+ } else {
+ abort();
}
}
processFunctions();
}
+ bool more() {
+ return pos < input.size();
+ }
+
uint8_t getInt8() {
- assert(pos < input.size());
+ assert(more());
if (debug) std::cerr << "getInt8: " << (int)(uint8_t)input[pos] << " (at " << pos << ")" << std::endl;
return input[pos++];
}
@@ -1038,13 +1209,40 @@ public:
return ret;
}
- uint32_t getLEB128() {
+ uint32_t getU32LEB() {
+ if (debug) std::cerr << "<==" << std::endl;
+ U32LEB ret;
+ ret.read([&]() {
+ return getInt8();
+ });
+ if (debug) std::cerr << "getU32LEB: " << ret.value << " ==>" << std::endl;
+ return ret.value;
+ }
+ uint64_t getU64LEB() {
if (debug) std::cerr << "<==" << std::endl;
- LEB128 ret;
+ U64LEB ret;
ret.read([&]() {
return getInt8();
});
- if (debug) std::cerr << "getLEB128: " << ret.value << " ==>" << std::endl;
+ if (debug) std::cerr << "getU64LEB: " << ret.value << " ==>" << std::endl;
+ return ret.value;
+ }
+ int32_t getS32LEB() {
+ if (debug) std::cerr << "<==" << std::endl;
+ S32LEB ret;
+ ret.read([&]() {
+ return (int8_t)getInt8();
+ });
+ if (debug) std::cerr << "getU32LEB: " << ret.value << " ==>" << std::endl;
+ return ret.value;
+ }
+ int64_t getS64LEB() {
+ if (debug) std::cerr << "<==" << std::endl;
+ S64LEB ret;
+ ret.read([&]() {
+ return (int8_t)getInt8();
+ });
+ if (debug) std::cerr << "getU64LEB: " << ret.value << " ==>" << std::endl;
return ret.value;
}
WasmType getWasmType() {
@@ -1067,6 +1265,17 @@ public:
return ret;
}
+ Name getInlineString() {
+ if (debug) std::cerr << "<==" << std::endl;
+ auto len = getU32LEB();
+ std::string str;
+ for (size_t i = 0; i < len; i++) {
+ str = str + char(getInt8());
+ }
+ if (debug) std::cerr << "getInlineString: " << str << " ==>" << std::endl;
+ return Name(str);
+ }
+
void verifyInt8(int8_t x) {
int8_t y = getInt8();
assert(x == y);
@@ -1092,28 +1301,38 @@ public:
assert(x == y);
}
+ void ungetInt8() {
+ assert(pos > 0);
+ if (debug) std::cerr << "ungetInt8 (at " << pos << ")" << std::endl;
+ pos--;
+ }
+
+ void readHeader() {
+ if (debug) std::cerr << "== readHeader" << std::endl;
+ verifyInt32(0x6d736100);
+ verifyInt32(10);
+ }
+
void readStart() {
if (debug) std::cerr << "== readStart" << std::endl;
- wasm.start = getString();
+ startIndex = getU32LEB();
}
void readMemory() {
if (debug) std::cerr << "== readMemory" << std::endl;
- size_t initial = getInt8();
- wasm.memory.initial = initial == 0 ? 0 : std::pow<size_t>(2, initial - 1);
- size_t max = getInt8();
- wasm.memory.max = max == 0 ? 0 : std::pow<size_t>(2, max - 1);
+ wasm.memory.initial = getU32LEB();
+ wasm.memory.max = getU32LEB();
verifyInt8(1); // export memory
}
void readSignatures() {
if (debug) std::cerr << "== readSignatures" << std::endl;
- size_t numTypes = getLEB128();
+ size_t numTypes = getU32LEB();
if (debug) std::cerr << "num: " << numTypes << std::endl;
for (size_t i = 0; i < numTypes; i++) {
if (debug) std::cerr << "read one" << std::endl;
auto curr = allocator.alloc<FunctionType>();
- size_t numParams = getInt8();
+ size_t numParams = getU32LEB();
if (debug) std::cerr << "num params: " << numParams << std::endl;
curr->result = getWasmType();
for (size_t j = 0; j < numParams; j++) {
@@ -1123,7 +1342,37 @@ public:
}
}
- std::vector<Name> mappedFunctions; // index => name of the Import or Function
+ void readImports() {
+ if (debug) std::cerr << "== readImports" << std::endl;
+ size_t num = getU32LEB();
+ if (debug) std::cerr << "num: " << num << std::endl;
+ for (size_t i = 0; i < num; i++) {
+ if (debug) std::cerr << "read one" << std::endl;
+ auto curr = allocator.alloc<Import>();
+ curr->name = Name(std::string("import$") + std::to_string(i));
+ auto index = getU32LEB();
+ assert(index < wasm.functionTypes.size());
+ curr->type = wasm.functionTypes[index];
+ assert(curr->type->name.is());
+ curr->module = getInlineString();
+ curr->base = getInlineString();
+ wasm.addImport(curr);
+ }
+ }
+
+ std::vector<FunctionType*> functionTypes;
+
+ void readFunctionSignatures() {
+ if (debug) std::cerr << "== readFunctionSignatures" << std::endl;
+ size_t num = getU32LEB();
+ if (debug) std::cerr << "num: " << num << std::endl;
+ for (size_t i = 0; i < num; i++) {
+ if (debug) std::cerr << "read one" << std::endl;
+ auto index = getU32LEB();
+ assert(index < wasm.functionTypes.size());
+ functionTypes.push_back(wasm.functionTypes[index]);
+ }
+ }
size_t nextLabel;
@@ -1131,134 +1380,174 @@ public:
return cashew::IString(("label$" + std::to_string(nextLabel++)).c_str(), false);
}
+ // We read functions before we know their names, so we need to backpatch the names later
+
+ std::vector<Function*> functions; // we store functions here before wasm.addFunction after we know their names
+ std::map<size_t, std::vector<Call*>> functionCalls; // at index i we have all calls to i
+
void readFunctions() {
if (debug) std::cerr << "== readFunctions" << std::endl;
- size_t total = getLEB128(); // imports and functions
+ size_t total = getU32LEB();
for (size_t i = 0; i < total; i++) {
if (debug) std::cerr << "read one at " << pos << std::endl;
- auto data = getInt8();
- auto type = wasm.functionTypes[getInt16()];
- bool named = data & BinaryConsts::Named;
- assert(named);
- bool import = data & BinaryConsts::Import;
- bool locals = data & BinaryConsts::Locals;
- bool export_ = data & BinaryConsts::Export;
- Name name = getString();
- if (export_) { // XXX addition to v8 binary format
- Name exportName = getString();
- auto e = allocator.alloc<Export>();
- e->name = exportName;
- e->value = name;
- wasm.addExport(e);
+ size_t size = getU32LEB();
+ assert(size > 0); // we could also check it matches the seen size
+ auto type = functionTypes[i];
+ if (debug) std::cerr << "reading" << i << std::endl;
+ auto func = allocator.alloc<Function>();
+ func->type = type->name;
+ func->result = type->result;
+ size_t nextVar = 0;
+ auto addVar = [&]() {
+ Name name = cashew::IString(("var$" + std::to_string(nextVar++)).c_str(), false);
+ return name;
+ };
+ for (size_t j = 0; j < type->params.size(); j++) {
+ func->params.emplace_back(addVar(), type->params[j]);
}
- if (debug) std::cerr << "reading" << name << std::endl;
- mappedFunctions.push_back(name);
- if (import) {
- auto imp = allocator.alloc<Import>();
- imp->name = name;
- imp->module = getString(); // XXX diverge
- imp->base = getString(); // from v8
- imp->type = type;
- wasm.addImport(imp);
- } else {
- auto func = allocator.alloc<Function>();
- func->name = name;
- func->type = type->name;
- func->result = type->result;
- size_t nextVar = 0;
- auto addVar = [&]() {
- Name name = cashew::IString(("var$" + std::to_string(nextVar++)).c_str(), false);
- return name;
- };
- for (size_t j = 0; j < type->params.size(); j++) {
- func->params.emplace_back(addVar(), type->params[j]);
+ size_t numLocalTypes = getU32LEB();
+ for (size_t t = 0; t < numLocalTypes; t++) {
+ auto num = getU32LEB();
+ auto type = getWasmType();
+ while (num > 0) {
+ func->locals.emplace_back(addVar(), type);
+ num--;
}
- if (locals) {
- auto addLocals = [&](WasmType type) {
- int16_t num = getInt16();
- while (num > 0) {
- func->locals.emplace_back(addVar(), type);
- num--;
- }
- };
- addLocals(i32);
- addLocals(i64);
- addLocals(f32);
- addLocals(f64);
+ }
+ {
+ // process the function body
+ if (debug) std::cerr << "processing function: " << i << std::endl;
+ nextLabel = 0;
+ // prepare locals
+ mappedLocals.clear();
+ localTypes.clear();
+ for (size_t i = 0; i < func->params.size(); i++) {
+ mappedLocals.push_back(func->params[i].name);
+ localTypes[func->params[i].name] = func->params[i].type;
+ }
+ for (size_t i = 0; i < func->locals.size(); i++) {
+ mappedLocals.push_back(func->locals[i].name);
+ localTypes[func->locals[i].name] = func->locals[i].type;
}
- size_t size = getInt32(); // XXX int32, diverge from v8 format, to get more code to compile
- // we can't read the function yet - it might call other functions that are defined later,
- // and we do depend on the function type, as well as the mappedFunctions table.
- functions.emplace_back(func, pos, size);
- pos += size;
- func->body = nullptr; // will be filled later. but we do have the name and the type already.
- wasm.addFunction(func);
+ // process body
+ assert(breakStack.empty());
+ assert(expressionStack.empty());
+ depth = 0;
+ processExpressions();
+ assert(expressionStack.size() == 1);
+ func->body = popExpression();
+ assert(depth == 0);
+ assert(breakStack.empty());
+ assert(expressionStack.empty());
}
+ functions.push_back(func);
}
}
- struct FunctionData {
- Function* func;
- size_t pos, size;
- FunctionData(Function* func, size_t pos, size_t size) : func(func), pos(pos), size(size) {}
- };
+ std::map<Export*, size_t> exportIndexes;
- std::vector<FunctionData> functions;
+ void readExports() {
+ if (debug) std::cerr << "== readExports" << std::endl;
+ size_t num = getU32LEB();
+ if (debug) std::cerr << "num: " << num << std::endl;
+ for (size_t i = 0; i < num; i++) {
+ if (debug) std::cerr << "read one" << std::endl;
+ auto curr = allocator.alloc<Export>();
+ auto index = getU32LEB();
+ assert(index < functionTypes.size());
+ curr->name = getInlineString();
+ exportIndexes[curr] = index;
+ }
+ }
std::vector<Name> mappedLocals; // index => local name
std::map<Name, WasmType> localTypes; // TODO: optimize
std::vector<Name> breakStack;
+ std::vector<Expression*> expressionStack;
+
+ void processExpressions() { // until an end marker
+ while (1) {
+ Expression* curr;
+ readExpression(curr);
+ if (!curr) break; // end marker, done with this function/block/etc
+ expressionStack.push_back(curr);
+ }
+ }
+
+ Expression* popExpression() {
+ assert(expressionStack.size() > 0);
+ auto ret = expressionStack.back();
+ expressionStack.pop_back();
+ return ret;
+ }
+
void processFunctions() {
for (auto& func : functions) {
- Function* curr = func.func;
- if (debug) std::cerr << "processing function: " << curr->name << std::endl;
- pos = func.pos;
- nextLabel = 0;
- // prepare locals
- mappedLocals.clear();
- localTypes.clear();
- for (size_t i = 0; i < curr->params.size(); i++) {
- mappedLocals.push_back(curr->params[i].name);
- localTypes[curr->params[i].name] = curr->params[i].type;
- }
- for (size_t i = 0; i < curr->locals.size(); i++) {
- mappedLocals.push_back(curr->locals[i].name);
- localTypes[curr->locals[i].name] = curr->locals[i].type;
+ wasm.addFunction(func);
+ }
+ // now that we have names for each function, apply things
+
+ if (startIndex >= 0) {
+ wasm.start = wasm.functions[startIndex]->name;
+ }
+
+ for (auto& iter : exportIndexes) {
+ Export* curr = iter.first;
+ curr->value = wasm.functions[iter.second]->name;
+ wasm.addExport(curr);
+ }
+
+ for (auto& iter : functionCalls) {
+ size_t index = iter.first;
+ auto& calls = iter.second;
+ for (auto* call : calls) {
+ call->target = wasm.functions[index]->name;
}
- // process body
- assert(breakStack.empty());
- depth = 0;
- readExpression(curr->body);
- assert(depth == 0);
- assert(breakStack.empty());
- assert(pos == func.pos + func.size);
+ }
+
+ for (size_t index : functionTable) {
+ assert(index < wasm.functions.size());
+ wasm.table.names.push_back(wasm.functions[index]->name);
}
}
void readDataSegments() {
if (debug) std::cerr << "== readDataSegments" << std::endl;
- auto num = getLEB128();
+ auto num = getU32LEB();
for (size_t i = 0; i < num; i++) {
Memory::Segment curr;
- curr.offset = getInt32();
- auto start = getInt32();
- auto size = getInt32();
- auto buffer = malloc(size);
- memcpy(buffer, &input[start], size);
+ curr.offset = getU32LEB();
+ auto size = getU32LEB();
+ auto buffer = (char*)malloc(size);
+ for (size_t j = 0; j < size; j++) {
+ buffer[j] = char(getInt8());
+ }
curr.data = (const char*)buffer;
curr.size = size;
wasm.memory.segments.push_back(curr);
- verifyInt8(1); // load at program start
}
}
+ std::vector<size_t> functionTable;
+
void readFunctionTable() {
if (debug) std::cerr << "== readFunctionTable" << std::endl;
- auto num = getLEB128();
+ auto num = getU32LEB();
for (size_t i = 0; i < num; i++) {
- wasm.table.names.push_back(mappedFunctions[getInt16()]);
+ auto index = getU32LEB();
+ functionTable.push_back(index);
+ }
+ }
+
+ void readNames() {
+ if (debug) std::cerr << "== readNames" << std::endl;
+ auto num = getU32LEB();
+ for (size_t i = 0; i < num; i++) {
+ functions[i]->name = getInlineString();
+ auto numLocals = getU32LEB();
+ assert(numLocals == 0); // TODO
}
}
@@ -1278,19 +1567,8 @@ public:
case BinaryConsts::Br:
case BinaryConsts::BrIf: visitBreak((curr = allocator.alloc<Break>())->cast<Break>(), code); break; // code distinguishes br from br_if
case BinaryConsts::TableSwitch: visitSwitch((curr = allocator.alloc<Switch>())->cast<Switch>()); break;
- case BinaryConsts::CallFunction: {
- // might be an import or not. we have to check here.
- Name target = mappedFunctions[getLEB128()];
- assert(target.is());
- if (debug) std::cerr << "call(import?) target: " << target << std::endl;
- if (wasm.importsMap.find(target) == wasm.importsMap.end()) {
- assert(wasm.functionsMap.find(target) != wasm.functionsMap.end());
- visitCall((curr = allocator.alloc<Call>())->cast<Call>(), target);
- } else {
- visitCallImport((curr = allocator.alloc<CallImport>())->cast<CallImport>(), target);
- }
- break;
- }
+ case BinaryConsts::CallFunction: visitCall((curr = allocator.alloc<Call>())->cast<Call>()); break;
+ case BinaryConsts::CallImport: visitCallImport((curr = allocator.alloc<CallImport>())->cast<CallImport>()); break;
case BinaryConsts::CallIndirect: visitCallIndirect((curr = allocator.alloc<CallIndirect>())->cast<CallIndirect>()); break;
case BinaryConsts::GetLocal: visitGetLocal((curr = allocator.alloc<GetLocal>())->cast<GetLocal>()); break;
case BinaryConsts::SetLocal: visitSetLocal((curr = allocator.alloc<SetLocal>())->cast<SetLocal>()); break;
@@ -1298,6 +1576,7 @@ public:
case BinaryConsts::Return: visitReturn((curr = allocator.alloc<Return>())->cast<Return>()); break;
case BinaryConsts::Nop: visitNop((curr = allocator.alloc<Nop>())->cast<Nop>()); break;
case BinaryConsts::Unreachable: visitUnreachable((curr = allocator.alloc<Unreachable>())->cast<Unreachable>()); break;
+ case BinaryConsts::EndMarker: curr = nullptr; break;
default: {
// otherwise, the code is a subcode TODO: optimize
if (maybeVisit<Binary>(curr, code)) break;
@@ -1327,26 +1606,53 @@ public:
void visitBlock(Block *curr) {
if (debug) std::cerr << "zz node: Block" << std::endl;
- auto num = getLEB128(); // XXX larger block size, divergence from v8 to get more code to build
- curr->name = getNextLabel();
- breakStack.push_back(curr->name);
- for (uint32_t i = 0; i < num; i++) {
- if (debug) std::cerr << " " << size_t(curr) << "\n zz Block element " << i << std::endl;
- Expression* child;
- readExpression(child);
- curr->list.push_back(child);
+ // special-case Block and de-recurse nested blocks in their first position, as that is
+ // a common pattern that can be very highly nested.
+ std::vector<Block*> stack;
+ while (1) {
+ curr->name = getNextLabel();
+ breakStack.push_back(curr->name);
+ stack.push_back(curr);
+ if (getInt8() == BinaryConsts::Block) {
+ // a recursion
+ curr = allocator.alloc<Block>();
+ continue;
+ } else {
+ // end of recursion
+ ungetInt8();
+ break;
+ }
}
- if (num > 0) {
- curr->type = curr->list.back()->type;
+ Block* last = nullptr;
+ while (stack.size() > 0) {
+ curr = stack.back();
+ stack.pop_back();
+ size_t start = expressionStack.size(); // everything after this, that is left when we see the marker, is ours
+ if (last) {
+ // the previous block is our first-position element
+ expressionStack.push_back(last);
+ }
+ last = curr;
+ processExpressions();
+ size_t end = expressionStack.size();
+ assert(end >= start);
+ for (size_t i = start; i < end; i++) {
+ if (debug) std::cerr << " " << size_t(expressionStack[i]) << "\n zz Block element " << curr->list.size() << std::endl;
+ curr->list.push_back(expressionStack[i]);
+ }
+ expressionStack.resize(start);
+ curr->finalize();
+ breakStack.pop_back();
}
- breakStack.pop_back();
}
void visitIf(If *curr, uint8_t code) {
if (debug) std::cerr << "zz node: If" << std::endl;
- readExpression(curr->condition);
- readExpression(curr->ifTrue);
if (code == BinaryConsts::IfElse) {
- readExpression(curr->ifFalse);
+ curr->ifFalse = popExpression();
+ }
+ curr->ifTrue = popExpression();
+ curr->condition = popExpression();
+ if (code == BinaryConsts::IfElse) {
curr->finalize();
}
}
@@ -1357,106 +1663,91 @@ public:
curr->in = getNextLabel();
breakStack.push_back(curr->out);
breakStack.push_back(curr->in);
- readExpression(curr->body);
+ processExpressions();
+ curr->body = popExpression();
breakStack.pop_back();
breakStack.pop_back();
curr->finalize();
}
- Name getBreakName(int offset) {
+ Name getBreakName(int32_t offset) {
+ assert(breakStack.size() - 1 - offset < breakStack.size());
return breakStack[breakStack.size() - 1 - offset];
}
void visitBreak(Break *curr, uint8_t code) {
if (debug) std::cerr << "zz node: Break" << std::endl;
- auto offset = getInt8();
- curr->name = getBreakName(offset);
- readExpression(curr->value);
- if (code == BinaryConsts::BrIf) readExpression(curr->condition);
+ curr->name = getBreakName(getU32LEB());
+ if (code == BinaryConsts::BrIf) curr->condition = popExpression();
+ curr->value = popExpression();
}
void visitSwitch(Switch *curr) {
if (debug) std::cerr << "zz node: Switch" << std::endl;
- auto numCases = getInt16();
- auto numTargets = getInt16();
- for (auto i = 0; i < numCases; i++) {
- Switch::Case c;
- c.name = getNextLabel();
- curr->cases.push_back(c);
- breakStack.push_back(c.name);
- }
- curr->name = getNextLabel();
- breakStack.push_back(curr->name);
- for (auto i = 0; i < numTargets - 1; i++) {
- curr->targets.push_back(getBreakName(getInt16()));
- }
- curr->default_ = getBreakName(getInt16());
- readExpression(curr->value);
- for (auto i = 0; i < numCases; i++) {
- readExpression(curr->cases[i].body);
- }
- breakStack.pop_back(); // name
- for (size_t i = 0; i < curr->cases.size(); i++) {
- breakStack.pop_back(); // case
+ auto numTargets = getU32LEB();
+ for (size_t i = 0; i < numTargets; i++) {
+ curr->targets.push_back(getBreakName(getU32LEB()));
}
+ curr->default_ = getBreakName(getU32LEB());
+ processExpressions();
+ curr->condition = popExpression();
+ processExpressions();
+ curr->value = popExpression();
+ if (curr->value->is<Nop>()) curr->value = nullptr;
}
- void visitCall(Call *curr, Name target) {
+ void visitCall(Call *curr) {
if (debug) std::cerr << "zz node: Call" << std::endl;
- curr->target = target;
- auto type = wasm.functionTypesMap[wasm.functionsMap[curr->target]->type];
+ auto index = getU32LEB();
+ auto type = functionTypes[index];
auto num = type->params.size();
+ curr->operands.resize(num);
for (size_t i = 0; i < num; i++) {
- Expression* operand;
- readExpression(operand);
- curr->operands.push_back(operand);
+ curr->operands[num - i - 1] = popExpression();
}
curr->type = type->result;
+ functionCalls[index].push_back(curr);
}
- void visitCallImport(CallImport *curr, Name target) {
+ void visitCallImport(CallImport *curr) {
if (debug) std::cerr << "zz node: CallImport" << std::endl;
- curr->target = target;
+ curr->target = wasm.imports[getU32LEB()]->name;
+ assert(wasm.importsMap.find(curr->target) != wasm.importsMap.end());
auto type = wasm.importsMap[curr->target]->type;
+ assert(type);
auto num = type->params.size();
+ if (debug) std::cerr << "zz node: CallImport " << curr->target << " with type " << type->name << " and " << num << " params\n";
+ curr->operands.resize(num);
for (size_t i = 0; i < num; i++) {
- Expression* operand;
- readExpression(operand);
- curr->operands.push_back(operand);
+ curr->operands[num - i - 1] = popExpression();
}
curr->type = type->result;
}
void visitCallIndirect(CallIndirect *curr) {
if (debug) std::cerr << "zz node: CallIndirect" << std::endl;
- curr->fullType = wasm.functionTypes[getLEB128()];
- readExpression(curr->target);
+ curr->fullType = wasm.functionTypes[getU32LEB()];
auto num = curr->fullType->params.size();
+ curr->operands.resize(num);
for (size_t i = 0; i < num; i++) {
- Expression* operand;
- readExpression(operand);
- curr->operands.push_back(operand);
+ curr->operands[num - i - 1] = popExpression();
}
+ curr->target = popExpression();
curr->type = curr->fullType->result;
}
void visitGetLocal(GetLocal *curr) {
if (debug) std::cerr << "zz node: GetLocal " << pos << std::endl;
- curr->name = mappedLocals[getLEB128()];
+ curr->name = mappedLocals[getU32LEB()];
assert(curr->name.is());
curr->type = localTypes[curr->name];
}
void visitSetLocal(SetLocal *curr) {
if (debug) std::cerr << "zz node: SetLocal" << std::endl;
- curr->name = mappedLocals[getLEB128()];
+ curr->name = mappedLocals[getU32LEB()];
assert(curr->name.is());
- readExpression(curr->value);
+ curr->value = popExpression();
curr->type = curr->value->type;
}
void readMemoryAccess(uint32_t& alignment, size_t bytes, uint32_t& offset) {
- auto value = getInt8();
- alignment = value & BinaryConsts::Alignment ? 1 : bytes;
- if (value & BinaryConsts::Offset) {
- offset = getLEB128();
- } else {
- offset = 0;
- }
+ alignment = Pow2(getU32LEB());
+ offset = getU32LEB();
}
bool maybeVisitImpl(Load *curr, uint8_t code) {
@@ -1479,7 +1770,7 @@ public:
}
if (debug) std::cerr << "zz node: Load" << std::endl;
readMemoryAccess(curr->align, curr->bytes, curr->offset);
- readExpression(curr->ptr);
+ curr->ptr = popExpression();
return true;
}
bool maybeVisitImpl(Store *curr, uint8_t code) {
@@ -1497,15 +1788,14 @@ public:
}
if (debug) std::cerr << "zz node: Store" << std::endl;
readMemoryAccess(curr->align, curr->bytes, curr->offset);
- readExpression(curr->ptr);
- readExpression(curr->value);
+ curr->value = popExpression();
+ curr->ptr = popExpression();
return true;
}
bool maybeVisitImpl(Const *curr, uint8_t code) {
switch (code) {
- case BinaryConsts::I8Const: curr->value = Literal(int32_t(getInt8())); break;
- case BinaryConsts::I32Const: curr->value = Literal(getInt32()); break;
- case BinaryConsts::I64Const: curr->value = Literal(getInt64()); break;
+ case BinaryConsts::I32Const: curr->value = Literal(getS32LEB()); break;
+ case BinaryConsts::I64Const: curr->value = Literal(getS64LEB()); break;
case BinaryConsts::F32Const: curr->value = Literal(getFloat32()); break;
case BinaryConsts::F64Const: curr->value = Literal(getFloat64()); break;
default: return false;
@@ -1522,6 +1812,8 @@ public:
case BinaryConsts::I64Ctz: curr->op = Ctz; curr->type = i64; break;
case BinaryConsts::I32Popcnt: curr->op = Popcnt; curr->type = i32; break;
case BinaryConsts::I64Popcnt: curr->op = Popcnt; curr->type = i64; break;
+ case BinaryConsts::I32EqZ: curr->op = EqZ; curr->type = i32; break;
+ case BinaryConsts::I64EqZ: curr->op = EqZ; curr->type = i64; break;
case BinaryConsts::F32Neg: curr->op = Neg; curr->type = f32; break;
case BinaryConsts::F64Neg: curr->op = Neg; curr->type = f64; break;
case BinaryConsts::F32Abs: curr->op = Abs; curr->type = f32; break;
@@ -1569,7 +1861,7 @@ public:
default: return false;
}
if (debug) std::cerr << "zz node: Unary" << std::endl;
- readExpression(curr->value);
+ curr->value = popExpression();
return true;
}
bool maybeVisitImpl(Binary *curr, uint8_t code) {
@@ -1601,6 +1893,8 @@ public:
INT_TYPED_CODE(Shl);
INT_TYPED_CODE(ShrU);
INT_TYPED_CODE(ShrS);
+ INT_TYPED_CODE(RotL);
+ INT_TYPED_CODE(RotR);
FLOAT_TYPED_CODE(Div);
FLOAT_TYPED_CODE(CopySign);
FLOAT_TYPED_CODE(Min);
@@ -1622,8 +1916,9 @@ public:
default: return false;
}
if (debug) std::cerr << "zz node: Binary" << std::endl;
- readExpression(curr->left);
- readExpression(curr->right);
+ curr->right = popExpression();
+ curr->left = popExpression();
+ curr->finalize();
return true;
#undef TYPED_CODE
#undef INT_TYPED_CODE
@@ -1631,14 +1926,14 @@ public:
}
void visitSelect(Select *curr) {
if (debug) std::cerr << "zz node: Select" << std::endl;
- readExpression(curr->ifTrue);
- readExpression(curr->ifFalse);
- readExpression(curr->condition);
+ curr->condition = popExpression();
+ curr->ifFalse = popExpression();
+ curr->ifTrue = popExpression();
curr->finalize();
}
void visitReturn(Return *curr) {
if (debug) std::cerr << "zz node: Return" << std::endl;
- readExpression(curr->value);
+ curr->value = popExpression();
}
bool maybeVisitImpl(Host *curr, uint8_t code) {
switch (code) {
@@ -1650,12 +1945,13 @@ public:
case BinaryConsts::GrowMemory: {
curr->op = GrowMemory;
curr->operands.resize(1);
- readExpression(curr->operands[0]);
+ curr->operands[0] = popExpression();
break;
}
default: return false;
}
if (debug) std::cerr << "zz node: Host" << std::endl;
+ curr->finalize();
return true;
}
void visitNop(Nop *curr) {
@@ -1669,4 +1965,3 @@ public:
} // namespace wasm
#endif // wasm_wasm_binary_h
-