summaryrefslogtreecommitdiff
path: root/src/wasm-binary.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/wasm-binary.h')
-rw-r--r--src/wasm-binary.h317
1 files changed, 180 insertions, 137 deletions
diff --git a/src/wasm-binary.h b/src/wasm-binary.h
index 4b7e1a2a4..3ce76e247 100644
--- a/src/wasm-binary.h
+++ b/src/wasm-binary.h
@@ -30,14 +30,15 @@
namespace wasm {
-struct LEB128 {
- uint32_t value;
+template<typename T>
+struct LEB {
+ T value;
- LEB128() {}
- LEB128(uint32_t value) : value(value) {}
+ LEB() {}
+ LEB(T value) : value(value) {}
void write(std::vector<uint8_t>* out) {
- uint32_t temp = value;
+ T temp = value;
do {
uint8_t byte = temp & 127;
temp >>= 7;
@@ -49,7 +50,7 @@ struct LEB128 {
}
void writeAt(std::vector<uint8_t>* out, size_t at, size_t minimum = 0) {
- uint32_t temp = value;
+ T temp = value;
size_t offset = 0;
bool more;
do {
@@ -66,16 +67,19 @@ struct LEB128 {
void read(std::function<uint8_t ()> get) {
value = 0;
- uint32_t shift = 0;
+ T shift = 0;
while (1) {
uint8_t byte = get();
- value |= ((byte & 127) << shift);
+ value |= ((T(byte & 127)) << shift);
if (!(byte & 128)) break;
shift += 7;
}
}
};
+typedef LEB<uint32_t> LEB128;
+typedef LEB<uint64_t> LEB256;
+
//
// We mostly stream into a buffer as we create the binary format, however,
// sometimes we need to backtrack and write to a location behind us - wasm
@@ -123,6 +127,11 @@ public:
x.write(this);
return *this;
}
+ BufferWithRandomAccess& operator<<(LEB256 x) {
+ if (debug) std::cerr << "writeLEB256: " << x.value << " (at " << size() << ")" << std::endl;
+ x.write(this);
+ return *this;
+ }
BufferWithRandomAccess& operator<<(uint8_t x) {
return *this << (int8_t)x;
@@ -158,9 +167,9 @@ public:
(*this)[i+2] = x & 0xff; x >>= 8;
(*this)[i+3] = x & 0xff;
}
- void writeAt(size_t i, LEB128 x, size_t minimum = 0) {
- if (debug) std::cerr << "backpatchLEB128: " << x.value << " (at " << i << "), minimum " << minimum << std::endl;
- x.writeAt(this, i, minimum);
+ void writeAt(size_t i, LEB128 x) {
+ if (debug) std::cerr << "backpatchLEB128: " << x.value << " (at " << i << ")" << std::endl;
+ x.writeAt(this, i, 5); // fill all 5 bytes, we have to do this when backpatching
}
template <typename T>
@@ -180,6 +189,7 @@ namespace Section {
auto ExportTable = "export_table";
auto DataSegments = "data_segments";
auto FunctionTable = "function_table";
+ auto Names = "names";
auto End = "end";
auto Start = "start_function";
};
@@ -342,7 +352,6 @@ enum ASTNodes {
F32StoreMem = 0x35,
F64StoreMem = 0x36,
- I8Const = 0x09,
I32Const = 0x0a,
I64Const = 0x0b,
F64Const = 0x0c,
@@ -422,6 +431,7 @@ public:
writeExports();
writeDataSegments();
writeFunctionTable();
+ writeNames();
writeEnd();
finishUp();
}
@@ -432,18 +442,23 @@ public:
o << int32_t(10); // version number
}
- int32_t startSection(const char* name) {
- // emit 5 bytes of 0, which we'll fill with LEB later
+ int32_t writeLEB128Placeholder() {
int32_t ret = o.size();
o << int32_t(0);
o << int8_t(0);
+ return ret;
+ }
+
+ int32_t startSection(const char* name) {
+ // emit 5 bytes of 0, which we'll fill with LEB later
+ auto ret = writeLEB128Placeholder();
writeInlineString(name);
return ret;
}
void finishSection(int32_t start) {
int32_t size = o.size() - start - 5; // section size does not include the 5 bytes of the size field itself
- o.writeAt(start, LEB128(size), 5);
+ o.writeAt(start, LEB128(size));
}
void writeStart() {
@@ -562,26 +577,23 @@ public:
o << LEB128(total);
for (size_t i = 0; i < total; i++) {
if (debug) std::cerr << "write one at" << o.size() << std::endl;
+ size_t sizePos = writeLEB128Placeholder();
+ size_t start = o.size();
Function* function = wasm->functions[i];
- Name name, type;
- name = function->name;
- type = function->type;
mappedLocals.clear();
numLocalsByType.clear();
- if (debug) std::cerr << "writing" << name << std::endl;
- o << int8_t(BinaryConsts::Named |
- (BinaryConsts::Locals * (function && function->locals.size() > 0)));
- emitString(name.str);
+ if (debug) std::cerr << "writing" << function->name << std::endl;
mapLocals(function);
- if (function->locals.size() > 0) {
- o << uint16_t(numLocalsByType[i32])
- << uint16_t(numLocalsByType[i64])
- << uint16_t(numLocalsByType[f32])
- << uint16_t(numLocalsByType[f64]);
- }
- size_t sizePos = o.size();
- o << (uint32_t)0; // placeholder, we fill in the size later when we have it // XXX int32, diverge from v8 format, to get more code to compile
- size_t start = o.size();
+ o << LEB128(
+ (numLocalsByType[i32] ? 1 : 0) +
+ (numLocalsByType[i64] ? 1 : 0) +
+ (numLocalsByType[f32] ? 1 : 0) +
+ (numLocalsByType[f64] ? 1 : 0)
+ );
+ if (numLocalsByType[i32]) o << LEB128(numLocalsByType[i32]) << binaryWasmType(i32);
+ if (numLocalsByType[i64]) o << LEB128(numLocalsByType[i64]) << binaryWasmType(i64);
+ if (numLocalsByType[f32]) o << LEB128(numLocalsByType[f32]) << binaryWasmType(f32);
+ if (numLocalsByType[f64]) o << LEB128(numLocalsByType[f64]) << binaryWasmType(f64);
depth = 0;
recurse(function->body);
o << int8_t(BinaryConsts::EndMarker);
@@ -589,7 +601,7 @@ public:
size_t size = o.size() - start;
assert(size <= std::numeric_limits<uint32_t>::max());
if (debug) std::cerr << "body size: " << size << ", writing at " << sizePos << ", next starts at " << o.size() << std::endl;
- o.writeAt(sizePos, uint32_t(size)); // XXX int32, diverge from v8 format, to get more code to compile
+ o.writeAt(sizePos, LEB128(size));
}
finishSection(start);
}
@@ -649,6 +661,18 @@ public:
finishSection(start);
}
+ void writeNames() {
+ if (wasm->functions.size() == 0) return;
+ if (debug) std::cerr << "== writeNames" << std::endl;
+ auto start = startSection(BinaryConsts::Section::Names);
+ o << LEB128(wasm->functions.size());
+ for (auto* curr : wasm->functions) {
+ writeInlineString(curr->name.str);
+ o << LEB128(0); // TODO: locals
+ }
+ finishSection(start);
+ }
+
void writeEnd() {
auto start = startSection(BinaryConsts::Section::End);
finishSection(start);
@@ -765,21 +789,23 @@ public:
}
if (curr->condition) recurse(curr->condition);
o << int8_t(curr->condition ? BinaryConsts::BrIf : BinaryConsts::Br)
- << int32_t(getBreakIndex(curr->name));
+ << LEB128(getBreakIndex(curr->name));
}
void visitSwitch(Switch *curr) {
if (debug) std::cerr << "zz node: Switch" << std::endl;
- o << int8_t(BinaryConsts::TableSwitch) << int16_t(curr->targets.size() + 1) << int8_t(curr->value != nullptr);
+ o << int8_t(BinaryConsts::TableSwitch) << LEB128(curr->targets.size());
for (auto target : curr->targets) {
- o << (int32_t)getBreakIndex(target);
+ o << LEB128(getBreakIndex(target));
}
- o << (int32_t)getBreakIndex(curr->default_);
+ o << LEB128(getBreakIndex(curr->default_));
recurse(curr->condition);
o << int8_t(BinaryConsts::EndMarker);
if (curr->value) {
recurse(curr->value);
- o << int8_t(BinaryConsts::EndMarker);
+ } else {
+ visitNop(nullptr);
}
+ o << int8_t(BinaryConsts::EndMarker);
}
void visitCall(Call *curr) {
if (debug) std::cerr << "zz node: Call" << std::endl;
@@ -814,9 +840,8 @@ public:
}
void emitMemoryAccess(size_t alignment, size_t bytes, uint32_t offset) {
- o << int8_t( ((alignment == bytes || alignment == 0) ? BinaryConsts::NaturalAlignment : BinaryConsts::Alignment) |
- (offset ? BinaryConsts::Offset : 0) );
- if (offset) o << LEB128(offset);
+ o << LEB128(Log2(alignment ? alignment : bytes));
+ o << LEB128(offset);
}
void visitLoad(Load *curr) {
@@ -882,16 +907,11 @@ public:
if (debug) std::cerr << "zz node: Const" << curr << " : " << curr->type << std::endl;
switch (curr->type) {
case i32: {
- uint32_t value = curr->value.geti32();
- if (value <= 255) {
- o << int8_t(BinaryConsts::I8Const) << uint8_t(value);
- break;
- }
- o << int8_t(BinaryConsts::I32Const) << value;
+ o << int8_t(BinaryConsts::I32Const) << LEB128(curr->value.geti32());
break;
}
case i64: {
- o << int8_t(BinaryConsts::I64Const) << curr->value.geti64();
+ o << int8_t(BinaryConsts::I64Const) << LEB256(curr->value.geti64());
break;
}
case f32: {
@@ -1056,10 +1076,11 @@ class WasmBinaryBuilder {
std::vector<char>& input;
bool debug;
- size_t pos;
+ size_t pos = 0;
+ int32_t startIndex = -1;
public:
- WasmBinaryBuilder(AllocatingModule& wasm, std::vector<char>& input, bool debug) : wasm(wasm), allocator(wasm.allocator), input(input), debug(debug), pos(0) {}
+ WasmBinaryBuilder(AllocatingModule& wasm, std::vector<char>& input, bool debug) : wasm(wasm), allocator(wasm.allocator), input(input), debug(debug) {}
void read() {
@@ -1089,6 +1110,7 @@ public:
else if (match(BinaryConsts::Section::ExportTable)) readExports();
else if (match(BinaryConsts::Section::DataSegments)) readDataSegments();
else if (match(BinaryConsts::Section::FunctionTable)) readFunctionTable();
+ else if (match(BinaryConsts::Section::Names)) readNames();
else if (match(BinaryConsts::Section::End)) {
if (debug) std::cerr << "== readEnd" << std::endl;
break;
@@ -1149,6 +1171,15 @@ public:
if (debug) std::cerr << "getLEB128: " << ret.value << " ==>" << std::endl;
return ret.value;
}
+ uint64_t getLEB256() {
+ if (debug) std::cerr << "<==" << std::endl;
+ LEB256 ret;
+ ret.read([&]() {
+ return getInt8();
+ });
+ if (debug) std::cerr << "getLEB256: " << ret.value << " ==>" << std::endl;
+ return ret.value;
+ }
WasmType getWasmType() {
int8_t type = getInt8();
switch (type) {
@@ -1219,7 +1250,7 @@ public:
void readStart() {
if (debug) std::cerr << "== readStart" << std::endl;
- wasm.start = wasm.functions[getLEB128()]->name;
+ startIndex = getLEB128();
}
void readMemory() {
@@ -1284,20 +1315,21 @@ public:
return cashew::IString(("label$" + std::to_string(nextLabel++)).c_str(), false);
}
+ // We read functions before we know their names, so we need to backpatch the names later
+
+ std::vector<Function*> functions; // we store functions here before wasm.addFunction after we know their names
+ std::map<size_t, std::vector<Call*>> functionCalls; // at index i we have all calls to i
+
void readFunctions() {
if (debug) std::cerr << "== readFunctions" << std::endl;
size_t total = getLEB128();
for (size_t i = 0; i < total; i++) {
if (debug) std::cerr << "read one at " << pos << std::endl;
- auto data = getInt8();
+ size_t size = getLEB128();
+ assert(size > 0); // we could also check it matches the seen size
auto type = functionTypes[i];
- bool named = data & BinaryConsts::Named;
- assert(named);
- bool locals = data & BinaryConsts::Locals;
- Name name = getString();
- if (debug) std::cerr << "reading" << name << std::endl;
+ if (debug) std::cerr << "reading" << i << std::endl;
auto func = allocator.alloc<Function>();
- func->name = name;
func->type = type->name;
func->result = type->result;
size_t nextVar = 0;
@@ -1308,29 +1340,47 @@ public:
for (size_t j = 0; j < type->params.size(); j++) {
func->params.emplace_back(addVar(), type->params[j]);
}
- if (locals) {
- auto addLocals = [&](WasmType type) {
- int16_t num = getInt16();
- while (num > 0) {
- func->locals.emplace_back(addVar(), type);
- num--;
- }
- };
- addLocals(i32);
- addLocals(i64);
- addLocals(f32);
- addLocals(f64);
+ size_t numLocalTypes = getLEB128();
+ for (size_t t = 0; t < numLocalTypes; t++) {
+ auto num = getLEB128();
+ auto type = getWasmType();
+ while (num > 0) {
+ func->locals.emplace_back(addVar(), type);
+ num--;
+ }
}
- size_t size = getInt32(); // XXX int32, diverge from v8 format, to get more code to compile
- // we can't read the function yet - it might call other functions that are defined later,
- // and we do depend on the function type.
- functions.emplace_back(func, pos, size);
- pos += size;
- func->body = nullptr; // will be filled later. but we do have the name and the type already.
- wasm.addFunction(func);
+ {
+ // process the function body
+ if (debug) std::cerr << "processing function: " << i << std::endl;
+ nextLabel = 0;
+ // prepare locals
+ mappedLocals.clear();
+ localTypes.clear();
+ for (size_t i = 0; i < func->params.size(); i++) {
+ mappedLocals.push_back(func->params[i].name);
+ localTypes[func->params[i].name] = func->params[i].type;
+ }
+ for (size_t i = 0; i < func->locals.size(); i++) {
+ mappedLocals.push_back(func->locals[i].name);
+ localTypes[func->locals[i].name] = func->locals[i].type;
+ }
+ // process body
+ assert(breakStack.empty());
+ assert(expressionStack.empty());
+ depth = 0;
+ processExpressions();
+ assert(expressionStack.size() == 1);
+ func->body = popExpression();
+ assert(depth == 0);
+ assert(breakStack.empty());
+ assert(expressionStack.empty());
+ }
+ functions.push_back(func);
}
}
+ std::map<Export*, size_t> exportIndexes;
+
void readExports() {
if (debug) std::cerr << "== readExports" << std::endl;
size_t num = getLEB128();
@@ -1339,22 +1389,12 @@ public:
if (debug) std::cerr << "read one" << std::endl;
auto curr = allocator.alloc<Export>();
auto index = getLEB128();
- assert(index < wasm.functions.size());
- curr->value = wasm.functions[index]->name;
- assert(curr->value.is());
+ assert(index < functionTypes.size());
curr->name = getInlineString();
- wasm.addExport(curr);
+ exportIndexes[curr] = index;
}
}
- struct FunctionData {
- Function* func;
- size_t pos, size;
- FunctionData(Function* func, size_t pos, size_t size) : func(func), pos(pos), size(size) {}
- };
-
- std::vector<FunctionData> functions;
-
std::vector<Name> mappedLocals; // index => local name
std::map<Name, WasmType> localTypes; // TODO: optimize
@@ -1380,32 +1420,31 @@ public:
void processFunctions() {
for (auto& func : functions) {
- Function* curr = func.func;
- if (debug) std::cerr << "processing function: " << curr->name << std::endl;
- pos = func.pos;
- nextLabel = 0;
- // prepare locals
- mappedLocals.clear();
- localTypes.clear();
- for (size_t i = 0; i < curr->params.size(); i++) {
- mappedLocals.push_back(curr->params[i].name);
- localTypes[curr->params[i].name] = curr->params[i].type;
- }
- for (size_t i = 0; i < curr->locals.size(); i++) {
- mappedLocals.push_back(curr->locals[i].name);
- localTypes[curr->locals[i].name] = curr->locals[i].type;
+ wasm.addFunction(func);
+ }
+ // now that we have names for each function, apply things
+
+ if (startIndex >= 0) {
+ wasm.start = wasm.functions[startIndex]->name;
+ }
+
+ for (auto& iter : exportIndexes) {
+ Export* curr = iter.first;
+ curr->value = wasm.functions[iter.second]->name;
+ wasm.addExport(curr);
+ }
+
+ for (auto& iter : functionCalls) {
+ size_t index = iter.first;
+ auto& calls = iter.second;
+ for (auto* call : calls) {
+ call->target = wasm.functions[index]->name;
}
- // process body
- assert(breakStack.empty());
- assert(expressionStack.empty());
- depth = 0;
- processExpressions();
- assert(expressionStack.size() == 1);
- curr->body = popExpression();
- assert(depth == 0);
- assert(breakStack.empty());
- assert(expressionStack.empty());
- assert(pos == func.pos + func.size);
+ }
+
+ for (size_t index : functionTable) {
+ assert(index < wasm.functions.size());
+ wasm.table.names.push_back(wasm.functions[index]->name);
}
}
@@ -1426,13 +1465,24 @@ public:
}
}
+ std::vector<size_t> functionTable;
+
void readFunctionTable() {
if (debug) std::cerr << "== readFunctionTable" << std::endl;
auto num = getLEB128();
for (size_t i = 0; i < num; i++) {
auto index = getLEB128();
- assert(index < wasm.functions.size());
- wasm.table.names.push_back(wasm.functions[index]->name);
+ functionTable.push_back(index);
+ }
+ }
+
+ void readNames() {
+ if (debug) std::cerr << "== readNames" << std::endl;
+ auto num = getLEB128();
+ for (size_t i = 0; i < num; i++) {
+ functions[i]->name = getInlineString();
+ auto numLocals = getLEB128();
+ assert(numLocals == 0); // TODO
}
}
@@ -1562,35 +1612,34 @@ public:
void visitBreak(Break *curr, uint8_t code) {
if (debug) std::cerr << "zz node: Break" << std::endl;
- curr->name = getBreakName(getInt32());
+ curr->name = getBreakName(getLEB128());
if (code == BinaryConsts::BrIf) curr->condition = popExpression();
curr->value = popExpression();
}
void visitSwitch(Switch *curr) {
if (debug) std::cerr << "zz node: Switch" << std::endl;
- auto numTargets = getInt16();
- auto hasValue = getInt8();
- for (auto i = 0; i < numTargets - 1; i++) {
- curr->targets.push_back(getBreakName(getInt32()));
+ auto numTargets = getLEB128();
+ for (size_t i = 0; i < numTargets; i++) {
+ curr->targets.push_back(getBreakName(getLEB128()));
}
- curr->default_ = getBreakName(getInt32());
+ curr->default_ = getBreakName(getLEB128());
processExpressions();
curr->condition = popExpression();
- if (hasValue) {
- processExpressions();
- curr->value = popExpression();
- }
+ processExpressions();
+ curr->value = popExpression();
+ if (curr->value->is<Nop>()) curr->value = nullptr;
}
void visitCall(Call *curr) {
if (debug) std::cerr << "zz node: Call" << std::endl;
- curr->target = wasm.functions[getLEB128()]->name;
- auto type = wasm.functionTypesMap[wasm.functionsMap[curr->target]->type];
+ auto index = getLEB128();
+ auto type = functionTypes[index];
auto num = type->params.size();
curr->operands.resize(num);
for (size_t i = 0; i < num; i++) {
curr->operands[num - i - 1] = popExpression();
}
curr->type = type->result;
+ functionCalls[index].push_back(curr);
}
void visitCallImport(CallImport *curr) {
if (debug) std::cerr << "zz node: CallImport" << std::endl;
@@ -1632,13 +1681,8 @@ public:
}
void readMemoryAccess(uint32_t& alignment, size_t bytes, uint32_t& offset) {
- auto value = getInt8();
- alignment = value & BinaryConsts::Alignment ? 1 : bytes;
- if (value & BinaryConsts::Offset) {
- offset = getLEB128();
- } else {
- offset = 0;
- }
+ alignment = Pow2(getLEB128());
+ offset = getLEB128();
}
bool maybeVisitImpl(Load *curr, uint8_t code) {
@@ -1685,9 +1729,8 @@ public:
}
bool maybeVisitImpl(Const *curr, uint8_t code) {
switch (code) {
- case BinaryConsts::I8Const: curr->value = Literal(int32_t(getInt8())); break;
- case BinaryConsts::I32Const: curr->value = Literal(getInt32()); break;
- case BinaryConsts::I64Const: curr->value = Literal(getInt64()); break;
+ case BinaryConsts::I32Const: curr->value = Literal(getLEB128()); break;
+ case BinaryConsts::I64Const: curr->value = Literal(getLEB256()); break;
case BinaryConsts::F32Const: curr->value = Literal(getFloat32()); break;
case BinaryConsts::F64Const: curr->value = Literal(getFloat64()); break;
default: return false;