From d52213c3f5e96bb3450721d96aa68d3c5e0865b6 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 13 Mar 2018 09:29:38 -0700 Subject: Function pointer cast emulation (#1468) This adds a pass that implements "function pointer cast emulation" - allows indirect calls to go through even if the number of arguments or their types is incorrect. That is undefined behavior in C/C++ but in practice somehow works in native archs. It is even relied upon in e.g. Python. Emscripten already has such emulation for asm.js, which also worked for asm2wasm. This implements something like it in binaryen which also allows the wasm backend to use it. As a result, Python should now be portable using the wasm backend. The mechanism used for the emulation is to make all indirect calls use a fixed number of arguments, all of type i64, and a return type of also i64. Thunks are then placed in the table which translate the arguments properly for the target, basically by reinterpreting to i64 and back. As a result, receiving an i64 when an i32 is sent will have the upper bits all zero, and the reverse would truncate the upper bits, etc. (Note that this is different than emscripten's existing emulation, which converts (as signed) to a double. That makes sense for JS where double's can contain all numeric values, but in wasm we have i64s. Also, bitwise conversion may be more like what native archs do anyhow. It is enough for Python.) Also adds validation for a function's type matching the function's actual params and result (surprised we didn't have that before, but we didn't, and there was even a place in the test suite where that was wrong). Also simplifies the build script by moving two cpp files into the wasm/ subdir, so they can be built once and shared between the various tools. --- src/passes/CMakeLists.txt | 1 + src/passes/FuncCastEmulation.cpp | 235 ++++++++++++++ src/passes/pass.cpp | 1 + src/passes/passes.h | 1 + src/wasm-builder.h | 16 +- src/wasm-emscripten.cpp | 664 --------------------------------------- src/wasm-interpreter.cpp | 24 -- src/wasm-linker.cpp | 8 +- src/wasm/CMakeLists.txt | 2 + src/wasm/wasm-binary.cpp | 10 +- src/wasm/wasm-emscripten.cpp | 664 +++++++++++++++++++++++++++++++++++++++ src/wasm/wasm-interpreter.cpp | 24 ++ src/wasm/wasm-validator.cpp | 6 + 13 files changed, 960 insertions(+), 696 deletions(-) create mode 100644 src/passes/FuncCastEmulation.cpp delete mode 100644 src/wasm-emscripten.cpp delete mode 100644 src/wasm-interpreter.cpp create mode 100644 src/wasm/wasm-emscripten.cpp create mode 100644 src/wasm/wasm-interpreter.cpp (limited to 'src') diff --git a/src/passes/CMakeLists.txt b/src/passes/CMakeLists.txt index e5334fff1..d4fff78df 100644 --- a/src/passes/CMakeLists.txt +++ b/src/passes/CMakeLists.txt @@ -8,6 +8,7 @@ SET(passes_SOURCES DuplicateFunctionElimination.cpp ExtractFunction.cpp Flatten.cpp + FuncCastEmulation.cpp Inlining.cpp LegalizeJSInterface.cpp LocalCSE.cpp diff --git a/src/passes/FuncCastEmulation.cpp b/src/passes/FuncCastEmulation.cpp new file mode 100644 index 000000000..59a2588da --- /dev/null +++ b/src/passes/FuncCastEmulation.cpp @@ -0,0 +1,235 @@ +/* + * Copyright 2017 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// Instruments all indirect calls so that they work even if a function +// pointer was cast incorrectly. For example, if you cast an int (int, float) +// to an int (int, float, int) and call it natively, on most archs it will +// happen to work, ignoring the extra param, whereas in wasm it will trap. +// When porting code that relies on such casts working (like e.g. Python), +// this pass may be useful. It sets a new "ABI" for indirect calls, in which +// they all return an i64 and they have a fixed number of i64 params, and +// the pass converts everything to go through that. +// +// This should work even with dynamic linking, however, the number of +// params must be identical, i.e., the "ABI" must match. + +#include +#include +#include +#include +#include +#include + +namespace wasm { + +// This should be enough for everybody. (As described above, we need this +// to match when dynamically linking, and also dynamic linking is why we +// can't just detect this automatically in the module we see.) +static const int NUM_PARAMS = 15; + +// Converts a value to the ABI type of i64. +static Expression* toABI(Expression* value, Module* module) { + Builder builder(*module); + switch (value->type) { + case i32: { + value = builder.makeUnary(ExtendUInt32, value); + break; + } + case i64: { + // already good + break; + } + case f32: { + value = builder.makeUnary( + ExtendUInt32, + builder.makeUnary(ReinterpretFloat32, value) + ); + break; + } + case f64: { + value = builder.makeUnary(ReinterpretFloat64, value); + break; + } + case none: { + // the value is none, but we need a value here + value = builder.makeSequence( + value, + LiteralUtils::makeZero(i64, *module) + ); + break; + } + case unreachable: { + // can leave it, the call isn't taken anyhow + break; + } + default: { + // SIMD may be interesting some day + WASM_UNREACHABLE(); + } + } + return value; +} + +// Converts a value from the ABI type of i64 to the expected type +static Expression* fromABI(Expression* value, Type type, Module* module) { + Builder builder(*module); + switch (type) { + case i32: { + value = builder.makeUnary(WrapInt64, value); + break; + } + case i64: { + // already good + break; + } + case f32: { + value = builder.makeUnary( + ReinterpretInt32, + builder.makeUnary(WrapInt64, value) + ); + break; + } + case f64: { + value = builder.makeUnary(ReinterpretInt64, value); + break; + } + case none: { + value = builder.makeDrop(value); + } + case unreachable: { + // can leave it, the call isn't taken anyhow + break; + } + default: { + // SIMD may be interesting some day + WASM_UNREACHABLE(); + } + } + return value; +} + +struct ParallelFuncCastEmulation : public WalkerPass> { + bool isFunctionParallel() override { return true; } + + Pass* create() override { return new ParallelFuncCastEmulation(ABIType); } + + ParallelFuncCastEmulation(Name ABIType) : ABIType(ABIType) {} + + void visitCallIndirect(CallIndirect* curr) { + if (curr->operands.size() > NUM_PARAMS) { + Fatal() << "FuncCastEmulation::NUM_PARAMS needs to be at least " << + curr->operands.size(); + } + for (Expression*& operand : curr->operands) { + operand = toABI(operand, getModule()); + } + // Add extra operands as needed. + while (curr->operands.size() < NUM_PARAMS) { + curr->operands.push_back(LiteralUtils::makeZero(i64, *getModule())); + } + // Set the new types + auto oldType = curr->type; + curr->type = i64; + curr->fullType = ABIType; + // Fix up return value + replaceCurrent(fromABI(curr, oldType, getModule())); + } + +private: + // the name of a type for a call with the right params and return + Name ABIType; +}; + +struct FuncCastEmulation : public Pass { + void run(PassRunner* runner, Module* module) override { + // we just need the one ABI function type for all indirect calls + std::string sig = "j"; + for (Index i = 0; i < NUM_PARAMS; i++) { + sig += 'j'; + } + ABIType = ensureFunctionType(sig, module)->name; + // Add a way for JS to call into the table (as our i64 ABI means an i64 + // is returned when there is a return value, which JS engines will fail on), + // using dynCalls + EmscriptenGlueGenerator generator(*module); + generator.generateDynCallThunks(); + // Add a thunk for each function in the table, and do the call through it. + std::unordered_map funcThunks; + for (auto& segment : module->table.segments) { + for (auto& name : segment.data) { + auto iter = funcThunks.find(name); + if (iter == funcThunks.end()) { + auto thunk = makeThunk(name, module); + funcThunks[name] = thunk; + name = thunk; + } else { + name = iter->second; + } + } + } + // update call_indirects + PassRunner subRunner(module, runner->options); + subRunner.setIsNested(true); + subRunner.add(ABIType); + subRunner.run(); + } + +private: + // the name of a type for a call with the right params and return + Name ABIType; + + // Creates a thunk for a function, casting args and return value as needed. + Name makeThunk(Name name, Module* module) { + Name thunk = std::string("byn$fpcast-emu$") + name.str; + if (module->getFunctionOrNull(thunk)) { + Fatal() << "FuncCastEmulation::makeThunk seems a thunk name already in use. Was the pass already run on this code?"; + } + // The item in the table may be a function or a function import. + auto* func = module->getFunctionOrNull(name); + Import* imp = nullptr; + if (!func) imp = module->getImport(name); + std::vector& params = func ? func->params : module->getFunctionType(imp->functionType)->params; + Type type = func ? func->result : module->getFunctionType(imp->functionType)->result; + Builder builder(*module); + std::vector callOperands; + for (Index i = 0; i < params.size(); i++) { + callOperands.push_back(fromABI(builder.makeGetLocal(i, i64), params[i], module)); + } + Expression* call = func ? (Expression*)builder.makeCall(name, callOperands, type) + : (Expression*)builder.makeCallImport(name, callOperands, type); + std::vector thunkParams; + for (Index i = 0; i < NUM_PARAMS; i++) { + thunkParams.push_back(i64); + } + auto* thunkFunc = builder.makeFunction( + thunk, + std::move(thunkParams), + i64, + {}, // no vars + toABI(call, module) + ); + thunkFunc->type = ABIType; + module->addFunction(thunkFunc); + return thunk; + } +}; + +Pass* createFuncCastEmulationPass() { + return new FuncCastEmulation(); +} + +} // namespace wasm diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index 37c59f570..6518a6f3d 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -74,6 +74,7 @@ void PassRegistry::registerPasses() { registerPass("duplicate-function-elimination", "removes duplicate functions", createDuplicateFunctionEliminationPass); registerPass("extract-function", "leaves just one function (useful for debugging)", createExtractFunctionPass); registerPass("flatten", "flattens out code, removing nesting", createFlattenPass); + registerPass("fpcast-emu", "emulates function pointer casts, allowing incorrect indirect calls to (sometimes) work", createFuncCastEmulationPass); registerPass("func-metrics", "reports function metrics", createFunctionMetricsPass); registerPass("inlining", "inline functions (you probably want inlining-optimizing)", createInliningPass); registerPass("inlining-optimizing", "inline functions and optimizes where we inlined", createInliningOptimizingPass); diff --git a/src/passes/passes.h b/src/passes/passes.h index 230cdfd86..2eaf1049e 100644 --- a/src/passes/passes.h +++ b/src/passes/passes.h @@ -31,6 +31,7 @@ Pass* createDeadCodeEliminationPass(); Pass* createDuplicateFunctionEliminationPass(); Pass* createExtractFunctionPass(); Pass* createFlattenPass(); +Pass* createFuncCastEmulationPass(); Pass* createFullPrinterPass(); Pass* createFunctionMetricsPass(); Pass* createI64ToI32LoweringPass(); diff --git a/src/wasm-builder.h b/src/wasm-builder.h index 94699df7e..02304acec 100644 --- a/src/wasm-builder.h +++ b/src/wasm-builder.h @@ -42,6 +42,20 @@ public: // make* functions, create nodes + Function* makeFunction(Name name, + std::vector&& params, + Type resultType, + std::vector&& vars, + Expression* body = nullptr) { + auto* func = new Function; + func->name = name; + func->result = resultType; + func->body = body; + func->params.swap(params); + func->vars.swap(vars); + return func; + } + Function* makeFunction(Name name, std::vector&& params, Type resultType, @@ -51,7 +65,6 @@ public: func->name = name; func->result = resultType; func->body = body; - for (auto& param : params) { func->params.push_back(param.type); Index index = func->localNames.size(); @@ -64,7 +77,6 @@ public: func->localIndices[var.name] = index; func->localNames[index] = var.name; } - return func; } diff --git a/src/wasm-emscripten.cpp b/src/wasm-emscripten.cpp deleted file mode 100644 index 9a393db43..000000000 --- a/src/wasm-emscripten.cpp +++ /dev/null @@ -1,664 +0,0 @@ -/* - * Copyright 2016 WebAssembly Community Group participants - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "wasm-emscripten.h" - -#include - -#include "asm_v_wasm.h" -#include "asmjs/shared-constants.h" -#include "shared-constants.h" -#include "wasm-builder.h" -#include "wasm-linker.h" -#include "wasm-traversal.h" -#include "wasm.h" - -namespace wasm { - -cashew::IString EMSCRIPTEN_ASM_CONST("emscripten_asm_const"); -cashew::IString EM_JS_PREFIX("__em_js__"); - -static constexpr const char* dummyFunction = "__wasm_nullptr"; - -void addExportedFunction(Module& wasm, Function* function) { - wasm.addFunction(function); - auto export_ = new Export; - export_->name = export_->value = function->name; - export_->kind = ExternalKind::Function; - wasm.addExport(export_); -} - -Global* EmscriptenGlueGenerator::getStackPointerGlobal() { - // Assumption: first global is __stack_pointer - return wasm.globals[0].get(); -} - -Expression* EmscriptenGlueGenerator::generateLoadStackPointer() { - if (!useStackPointerGlobal) { - return builder.makeLoad( - /* bytes =*/ 4, - /* signed =*/ false, - /* offset =*/ stackPointerOffset, - /* align =*/ 4, - /* ptr =*/ builder.makeConst(Literal(0)), - /* type =*/ i32 - ); - } - Global* stackPointer = getStackPointerGlobal(); - return builder.makeGetGlobal(stackPointer->name, i32); -} - -Expression* EmscriptenGlueGenerator::generateStoreStackPointer(Expression* value) { - if (!useStackPointerGlobal) { - return builder.makeStore( - /* bytes =*/ 4, - /* offset =*/ stackPointerOffset, - /* align =*/ 4, - /* ptr =*/ builder.makeConst(Literal(0)), - /* value =*/ value, - /* type =*/ i32 - ); - } - Global* stackPointer = getStackPointerGlobal(); - return builder.makeSetGlobal(stackPointer->name, value); -} - -void EmscriptenGlueGenerator::generateStackSaveFunction() { - Name name("stackSave"); - std::vector params { }; - Function* function = builder.makeFunction( - name, std::move(params), i32, {} - ); - - function->body = generateLoadStackPointer(); - - addExportedFunction(wasm, function); -} - -void EmscriptenGlueGenerator::generateStackAllocFunction() { - Name name("stackAlloc"); - std::vector params { { "0", i32 } }; - Function* function = builder.makeFunction( - name, std::move(params), i32, { { "1", i32 } } - ); - Expression* loadStack = generateLoadStackPointer(); - GetLocal* getSizeArg = builder.makeGetLocal(0, i32); - Binary* sub = builder.makeBinary(SubInt32, loadStack, getSizeArg); - const static uint32_t bitAlignment = 16; - const static uint32_t bitMask = bitAlignment - 1; - Const* subConst = builder.makeConst(Literal(~bitMask)); - Binary* maskedSub = builder.makeBinary(AndInt32, sub, subConst); - SetLocal* teeStackLocal = builder.makeTeeLocal(1, maskedSub); - Expression* storeStack = generateStoreStackPointer(teeStackLocal); - - Block* block = builder.makeBlock(); - block->list.push_back(storeStack); - GetLocal* getStackLocal2 = builder.makeGetLocal(1, i32); - block->list.push_back(getStackLocal2); - block->type = i32; - function->body = block; - - addExportedFunction(wasm, function); -} - -void EmscriptenGlueGenerator::generateStackRestoreFunction() { - Name name("stackRestore"); - std::vector params { { "0", i32 } }; - Function* function = builder.makeFunction( - name, std::move(params), none, {} - ); - GetLocal* getArg = builder.makeGetLocal(0, i32); - Expression* store = generateStoreStackPointer(getArg); - - function->body = store; - - addExportedFunction(wasm, function); -} - -void EmscriptenGlueGenerator::generateRuntimeFunctions() { - generateStackSaveFunction(); - generateStackAllocFunction(); - generateStackRestoreFunction(); -} - -Function* EmscriptenGlueGenerator::generateMemoryGrowthFunction() { - Name name(GROW_WASM_MEMORY); - std::vector params { { NEW_SIZE, i32 } }; - Function* growFunction = builder.makeFunction( - name, std::move(params), i32, {} - ); - growFunction->body = builder.makeHost( - GrowMemory, - Name(), - { builder.makeGetLocal(0, i32) } - ); - - addExportedFunction(wasm, growFunction); - - return growFunction; -} - -static bool hasI64ResultOrParam(FunctionType* ft) { - if (ft->result == i64) return true; - for (auto ty : ft->params) { - if (ty == i64) return true; - } - return false; -} - -void EmscriptenGlueGenerator::generateDynCallThunks() { - std::unordered_set sigs; - Builder builder(wasm); - std::vector tableSegmentData; - if (wasm.table.segments.size() > 0) { - tableSegmentData = wasm.table.segments[0].data; - } - for (const auto& indirectFunc : tableSegmentData) { - if (indirectFunc == dummyFunction) { - continue; - } - std::string sig; - if (auto import = wasm.getImportOrNull(indirectFunc)) { - sig = getSig(wasm.getFunctionType(import->functionType)); - } else { - sig = getSig(wasm.getFunction(indirectFunc)); - } - auto* funcType = ensureFunctionType(sig, &wasm); - if (hasI64ResultOrParam(funcType)) continue; // Can't export i64s on the web. - if (!sigs.insert(sig).second) continue; // Sig is already in the set - std::vector params; - params.emplace_back("fptr", i32); // function pointer param - int p = 0; - for (const auto& ty : funcType->params) params.emplace_back(std::to_string(p++), ty); - Function* f = builder.makeFunction(std::string("dynCall_") + sig, std::move(params), funcType->result, {}); - Expression* fptr = builder.makeGetLocal(0, i32); - std::vector args; - for (unsigned i = 0; i < funcType->params.size(); ++i) { - args.push_back(builder.makeGetLocal(i + 1, funcType->params[i])); - } - Expression* call = builder.makeCallIndirect(funcType, fptr, args); - f->body = call; - - wasm.addFunction(f); - exportFunction(wasm, f->name, true); - } -} - -struct JSCallWalker : public PostWalker { - Module &wasm; - JSCallWalker(Module &_wasm) : wasm(_wasm) { - if (wasm.table.segments.size() == 0) { - auto emptySegment = - wasm.allocator.alloc()->set(Literal(uint32_t(0))); - wasm.table.segments.emplace_back(emptySegment); - } - const auto& tableSegmentData = wasm.table.segments[0].data; - - // Check if jsCalls have already been created - for (Index i = 0; i < tableSegmentData.size(); ++i) { - if (tableSegmentData[i].startsWith("jsCall_")) { - jsCallStartIndex = i; - return; - } - } - jsCallStartIndex = - wasm.table.segments[0].offset->cast()->value.getInteger() + - tableSegmentData.size(); - } - - // Gather all function signatures used in call_indirect, because any of them - // can be used to call function pointers created by emscripten's addFunction. - void visitCallIndirect(CallIndirect *curr) { - // dynCall thunks are generated in binaryen and call_indirect instructions - // within them cannot be used to call function pointers returned by - // emscripten's addFunction. - if (!getFunction()->name.startsWith("dynCall_")) { - indirectlyCallableSigs.insert( - getSig(wasm.getFunctionType(curr->fullType))); - } - } - - bool createJSCallThunks; - Index jsCallStartIndex; - // Function type signatures used in call_indirect instructions - std::set indirectlyCallableSigs; -}; - -JSCallWalker getJSCallWalker(Module& wasm) { - JSCallWalker walker(wasm); - walker.walkModule(&wasm); - return walker; -} - -void EmscriptenGlueGenerator::generateJSCallThunks( - unsigned numReservedFunctionPointers) { - if (numReservedFunctionPointers == 0) - return; - - JSCallWalker walker = getJSCallWalker(wasm); - auto& tableSegmentData = wasm.table.segments[0].data; - for (std::string sig : walker.indirectlyCallableSigs) { - // Add imports for jsCall_sig (e.g. jsCall_vi). - // Imported jsCall_sig functions have their first parameter as an index to - // the function table, so we should prepend an 'i' to parameters' signature - // (e.g. If the signature of the callee is 'vi', the imported jsCall_vi - // function would have signature 'vii'.) - std::string importSig = std::string(1, sig[0]) + 'i' + sig.substr(1); - FunctionType *importType = ensureFunctionType(importSig, &wasm); - auto import = new Import; - import->name = import->base = "jsCall_" + sig; - import->module = ENV; - import->functionType = importType->name; - import->kind = ExternalKind::Function; - wasm.addImport(import); - FunctionType *funcType = ensureFunctionType(sig, &wasm); - - // Create jsCall_sig_index thunks (e.g. jsCall_vi_0, jsCall_vi_1, ...) - // e.g. If # of reserved function pointers (given by a command line - // argument) is 3 and there are two possible signature 'vi' and 'ii', the - // genereated thunks will be jsCall_vi_0, jsCall_vi_1, jsCall_vi_2, - // jsCall_ii_0, jsCall_ii_1, and jsCall_ii_2. - for (unsigned fp = 0; fp < numReservedFunctionPointers; ++fp) { - std::vector params; - int p = 0; - for (const auto& ty : funcType->params) { - params.emplace_back(std::to_string(p++), ty); - } - Function* f = builder.makeFunction( - std::string("jsCall_") + sig + "_" + std::to_string(fp), - std::move(params), funcType->result, {}); - std::vector args; - args.push_back(builder.makeConst(Literal(fp))); - for (unsigned i = 0; i < funcType->params.size(); ++i) { - args.push_back(builder.makeGetLocal(i, funcType->params[i])); - } - Expression* call = - builder.makeCallImport(import->name, args, funcType->result); - f->body = call; - wasm.addFunction(f); - tableSegmentData.push_back(f->name); - } - } - wasm.table.initial = wasm.table.max = - wasm.table.segments[0].offset->cast()->value.getInteger() + - tableSegmentData.size(); -} - -std::vector
getSegmentOffsets(Module& wasm) { - std::vector
segmentOffsets; - for (unsigned i = 0; i < wasm.memory.segments.size(); ++i) { - Const* addrConst = wasm.memory.segments[i].offset->cast(); - auto address = addrConst->value.geti32(); - segmentOffsets.push_back(address); - } - return segmentOffsets; -} - -std::string escape(const char *input) { - std::string code = input; - // replace newlines quotes with escaped newlines - size_t curr = 0; - while ((curr = code.find("\\n", curr)) != std::string::npos) { - code = code.replace(curr, 2, "\\\\n"); - curr += 3; // skip this one - } - // replace double quotes with escaped single quotes - curr = 0; - while ((curr = code.find('"', curr)) != std::string::npos) { - if (curr == 0 || code[curr-1] != '\\') { - code = code.replace(curr, 1, "\\" "\""); - curr += 2; // skip this one - } else { // already escaped, escape the slash as well - code = code.replace(curr, 1, "\\" "\\" "\""); - curr += 3; // skip this one - } - } - return code; -} - -const char* stringAtAddr(Module& wasm, - std::vector
const& segmentOffsets, - Address address) { - for (unsigned i = 0; i < wasm.memory.segments.size(); ++i) { - Memory::Segment& segment = wasm.memory.segments[i]; - Address offset = segmentOffsets[i]; - if (address >= offset && address < offset + segment.data.size()) { - return &segment.data[address - offset]; - } - } - return nullptr; -} - -std::string codeForConstAddr(Module& wasm, - std::vector
const& segmentOffsets, - Const* addrConst) { - auto address = addrConst->value.geti32(); - const char* str = stringAtAddr(wasm, segmentOffsets, address); - if (!str) { - // If we can't find the segment corresponding with the address, then we - // omitted the segment and the address points to an empty string. - return escape(""); - } - return escape(str); -} - -struct AsmConstWalker : public PostWalker { - Module& wasm; - std::vector
segmentOffsets; // segment index => address offset - - std::map> sigsForCode; - std::map ids; - std::set allSigs; - - AsmConstWalker(Module& _wasm) - : wasm(_wasm), - segmentOffsets(getSegmentOffsets(wasm)) { } - - void visitCallImport(CallImport* curr); - -private: - Literal idLiteralForCode(std::string code); - std::string asmConstSig(std::string baseSig); - Name nameForImportWithSig(std::string sig); - void addImport(Name importName, std::string baseSig); -}; - -void AsmConstWalker::visitCallImport(CallImport* curr) { - Import* import = wasm.getImport(curr->target); - if (import->base.hasSubstring(EMSCRIPTEN_ASM_CONST)) { - auto arg = curr->operands[0]->cast(); - auto code = codeForConstAddr(wasm, segmentOffsets, arg); - arg->value = idLiteralForCode(code); - auto baseSig = getSig(curr); - auto sig = asmConstSig(baseSig); - sigsForCode[code].insert(sig); - auto importName = nameForImportWithSig(sig); - curr->target = importName; - - if (allSigs.count(sig) == 0) { - allSigs.insert(sig); - addImport(importName, baseSig); - } - } -} - -Literal AsmConstWalker::idLiteralForCode(std::string code) { - int32_t id; - if (ids.count(code) == 0) { - id = ids.size(); - ids[code] = id; - } else { - id = ids[code]; - } - return Literal(id); -} - -std::string AsmConstWalker::asmConstSig(std::string baseSig) { - std::string sig = ""; - for (size_t i = 0; i < baseSig.size(); ++i) { - // Omit the signature of the "code" parameter, taken as a string, as the first argument - if (i != 1) { - sig += baseSig[i]; - } - } - return sig; -} - -Name AsmConstWalker::nameForImportWithSig(std::string sig) { - std::string fixedTarget = EMSCRIPTEN_ASM_CONST.str + std::string("_") + sig; - return Name(fixedTarget.c_str()); -} - -void AsmConstWalker::addImport(Name importName, std::string baseSig) { - auto import = new Import; - import->name = import->base = importName; - import->module = ENV; - import->functionType = ensureFunctionType(baseSig, &wasm)->name; - import->kind = ExternalKind::Function; - wasm.addImport(import); -} - -AsmConstWalker fixEmAsmConstsAndReturnWalker(Module& wasm) { - // Collect imports to remove - // This would find our generated functions if we ran it later - std::vector toRemove; - for (auto& import : wasm.imports) { - if (import->base.hasSubstring(EMSCRIPTEN_ASM_CONST)) { - toRemove.push_back(import->name); - } - } - - // Walk the module, generate _sig versions of EM_ASM functions - AsmConstWalker walker(wasm); - walker.walkModule(&wasm); - - // Remove the base functions that we didn't generate - for (auto importName : toRemove) { - wasm.removeImport(importName); - } - return walker; -} - -struct EmJsWalker : public PostWalker { - Module& wasm; - std::vector
segmentOffsets; // segment index => address offset - - std::map codeByName; - - EmJsWalker(Module& _wasm) - : wasm(_wasm), - segmentOffsets(getSegmentOffsets(wasm)) { } - - void visitFunction(Function* curr) { - if (!curr->name.startsWith(EM_JS_PREFIX.str)) { - return; - } - auto funcName = std::string(curr->name.stripPrefix(EM_JS_PREFIX.str)); - auto addrConst = curr->body->dynCast(); - if (addrConst == nullptr) { - auto block = curr->body->dynCast(); - Expression* first = nullptr; - if (block && block->list.size() > 0) { - first = block->list[0]; - } - if (first) { - addrConst = first->dynCast(); - } - } - if (addrConst == nullptr) { - Fatal() << "Unexpected generated __em_js__ function body: " << curr; - } - auto code = codeForConstAddr(wasm, segmentOffsets, addrConst); - codeByName[funcName] = code; - } -}; - -EmJsWalker fixEmJsFuncsAndReturnWalker(Module& wasm) { - EmJsWalker walker(wasm); - walker.walkModule(&wasm); - - std::vector toRemove; - for (auto& func : wasm.functions) { - if (func->name.startsWith(EM_JS_PREFIX.str)) { - toRemove.push_back(func->name); - } - } - for (auto funcName : toRemove) { - wasm.removeFunction(funcName); - wasm.removeExport(funcName); - } - return walker; -} - -void EmscriptenGlueGenerator::fixEmAsmConsts() { - fixEmAsmConstsAndReturnWalker(wasm); - fixEmJsFuncsAndReturnWalker(wasm); -} - -template -void printSet(std::ostream& o, C& c) { - o << "["; - bool first = true; - for (auto& item : c) { - if (first) first = false; - else o << ","; - o << '"' << item << '"'; - } - o << "]"; -} - -std::string EmscriptenGlueGenerator::generateEmscriptenMetadata( - Address staticBump, std::vector const& initializerFunctions, - unsigned numReservedFunctionPointers) { - bool commaFirst; - auto maybeComma = [&commaFirst]() { - if (commaFirst) { - commaFirst = false; - return ""; - } else { - return ","; - } - }; - - std::stringstream meta; - meta << "{ "; - - AsmConstWalker emAsmWalker = fixEmAsmConstsAndReturnWalker(wasm); - - // print - commaFirst = true; - meta << "\"asmConsts\": {"; - for (auto& pair : emAsmWalker.sigsForCode) { - auto& code = pair.first; - auto& sigs = pair.second; - meta << maybeComma(); - meta << '"' << emAsmWalker.ids[code] << "\": [\"" << code << "\", "; - printSet(meta, sigs); - meta << "]"; - } - meta << "},"; - - EmJsWalker emJsWalker = fixEmJsFuncsAndReturnWalker(wasm); - if (emJsWalker.codeByName.size() > 0) { - meta << "\"emJsFuncs\": {"; - commaFirst = true; - for (auto& pair : emJsWalker.codeByName) { - auto& name = pair.first; - auto& code = pair.second; - meta << maybeComma(); - meta << '"' << name << "\": \"" << code << '"'; - } - meta << "},"; - } - - meta << "\"staticBump\": " << staticBump << ", "; - - meta << "\"initializers\": ["; - commaFirst = true; - for (const auto& func : initializerFunctions) { - meta << maybeComma(); - meta << "\"" << func.c_str() << "\""; - } - meta << "]"; - - if (numReservedFunctionPointers) { - JSCallWalker jsCallWalker = getJSCallWalker(wasm); - meta << ", "; - meta << "\"jsCallStartIndex\": " << jsCallWalker.jsCallStartIndex << ", "; - meta << "\"jsCallFuncType\": ["; - commaFirst = true; - for (std::string sig : jsCallWalker.indirectlyCallableSigs) { - meta << maybeComma(); - meta << "\"" << sig << "\""; - } - meta << "]"; - } - - meta << ", \"declares\": ["; - commaFirst = true; - for (const auto& import : wasm.imports) { - if (import->kind == ExternalKind::Function && - (emJsWalker.codeByName.count(import->name.str) == 0) && - !import->name.startsWith(EMSCRIPTEN_ASM_CONST.str) && - !import->name.startsWith("invoke_") && - !import->name.startsWith("jsCall_")) { - meta << maybeComma() << '"' << import->name.str << '"'; - } - } - meta << "]"; - - meta << ", \"externs\": ["; - commaFirst = true; - for (const auto& import : wasm.imports) { - if (import->kind == ExternalKind::Global) { - meta << maybeComma() << "\"_" << import->name.str << '"'; - } - } - meta << "]"; - - meta << ", \"implementedFunctions\": ["; - commaFirst = true; - for (const auto& func : wasm.functions) { - meta << maybeComma() << "\"_" << func->name.str << '"'; - } - meta << "]"; - - meta << ", \"exports\": ["; - commaFirst = true; - for (const auto& ex : wasm.exports) { - meta << maybeComma() << '"' << ex->name.str << '"'; - } - meta << "]"; - - meta << ", \"invokeFuncs\": ["; - commaFirst = true; - for (const auto& import : wasm.imports) { - if (import->name.startsWith("invoke_")) { - meta << maybeComma() << '"' << import->name.str << '"'; - } - } - meta << "]"; - - meta << " }\n"; - - return meta.str(); -} - -std::string emscriptenGlue( - Module& wasm, - bool allowMemoryGrowth, - Address stackPointer, - Address staticBump, - std::vector const& initializerFunctions, - unsigned numReservedFunctionPointers) { - EmscriptenGlueGenerator generator(wasm, stackPointer); - generator.generateRuntimeFunctions(); - - if (allowMemoryGrowth) { - generator.generateMemoryGrowthFunction(); - } - - generator.generateDynCallThunks(); - - if (numReservedFunctionPointers) { - generator.generateJSCallThunks(numReservedFunctionPointers); - } - - return generator.generateEmscriptenMetadata(staticBump, initializerFunctions, - numReservedFunctionPointers); -} - -} // namespace wasm diff --git a/src/wasm-interpreter.cpp b/src/wasm-interpreter.cpp deleted file mode 100644 index e7df785ac..000000000 --- a/src/wasm-interpreter.cpp +++ /dev/null @@ -1,24 +0,0 @@ -#include "wasm-interpreter.h" - -namespace wasm { - -#ifdef WASM_INTERPRETER_DEBUG -int Indenter::indentLevel = 0; - -Indenter::Indenter(const char* entry) : entryName(entry) { - ++indentLevel; -} -Indenter::~Indenter() { - print(); - std::cout << "exit " << entryName << '\n'; - --indentLevel; -} -void Indenter::print() { - std::cout << indentLevel << ':'; - for (int i = 0; i <= indentLevel; ++i) { - std::cout << ' '; - } -} -#endif // WASM_INTERPRETER_DEBUG - -} // namespace wasm diff --git a/src/wasm-linker.cpp b/src/wasm-linker.cpp index c284de81f..df51d85c6 100644 --- a/src/wasm-linker.cpp +++ b/src/wasm-linker.cpp @@ -382,7 +382,13 @@ void Linker::makeDummyFunction() { if (!create) return; wasm::Builder wasmBuilder(out.wasm); Expression *unreachable = wasmBuilder.makeUnreachable(); - Function *dummy = wasmBuilder.makeFunction(Name(dummyFunction), {}, Type::none, {}, unreachable); + Function *dummy = wasmBuilder.makeFunction( + Name(dummyFunction), + std::vector{}, + Type::none, + std::vector{}, + unreachable + ); out.wasm.addFunction(dummy); getFunctionIndex(dummy->name); } diff --git a/src/wasm/CMakeLists.txt b/src/wasm/CMakeLists.txt index 1a8a9b8ba..da876b56f 100644 --- a/src/wasm/CMakeLists.txt +++ b/src/wasm/CMakeLists.txt @@ -2,6 +2,8 @@ SET(wasm_SOURCES literal.cpp wasm.cpp wasm-binary.cpp + wasm-emscripten.cpp + wasm-interpreter.cpp wasm-io.cpp wasm-s-parser.cpp wasm-type.cpp diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 56b35b712..69e939c44 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -1730,11 +1730,11 @@ void WasmBinaryBuilder::readFunctions() { } } auto func = Builder(wasm).makeFunction( - Name::fromInt(i), - std::move(params), - type->result, - std::move(vars) - ); + Name::fromInt(i), + std::move(params), + type->result, + std::move(vars) + ); func->type = type->name; currFunction = func; { diff --git a/src/wasm/wasm-emscripten.cpp b/src/wasm/wasm-emscripten.cpp new file mode 100644 index 000000000..9a393db43 --- /dev/null +++ b/src/wasm/wasm-emscripten.cpp @@ -0,0 +1,664 @@ +/* + * Copyright 2016 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "wasm-emscripten.h" + +#include + +#include "asm_v_wasm.h" +#include "asmjs/shared-constants.h" +#include "shared-constants.h" +#include "wasm-builder.h" +#include "wasm-linker.h" +#include "wasm-traversal.h" +#include "wasm.h" + +namespace wasm { + +cashew::IString EMSCRIPTEN_ASM_CONST("emscripten_asm_const"); +cashew::IString EM_JS_PREFIX("__em_js__"); + +static constexpr const char* dummyFunction = "__wasm_nullptr"; + +void addExportedFunction(Module& wasm, Function* function) { + wasm.addFunction(function); + auto export_ = new Export; + export_->name = export_->value = function->name; + export_->kind = ExternalKind::Function; + wasm.addExport(export_); +} + +Global* EmscriptenGlueGenerator::getStackPointerGlobal() { + // Assumption: first global is __stack_pointer + return wasm.globals[0].get(); +} + +Expression* EmscriptenGlueGenerator::generateLoadStackPointer() { + if (!useStackPointerGlobal) { + return builder.makeLoad( + /* bytes =*/ 4, + /* signed =*/ false, + /* offset =*/ stackPointerOffset, + /* align =*/ 4, + /* ptr =*/ builder.makeConst(Literal(0)), + /* type =*/ i32 + ); + } + Global* stackPointer = getStackPointerGlobal(); + return builder.makeGetGlobal(stackPointer->name, i32); +} + +Expression* EmscriptenGlueGenerator::generateStoreStackPointer(Expression* value) { + if (!useStackPointerGlobal) { + return builder.makeStore( + /* bytes =*/ 4, + /* offset =*/ stackPointerOffset, + /* align =*/ 4, + /* ptr =*/ builder.makeConst(Literal(0)), + /* value =*/ value, + /* type =*/ i32 + ); + } + Global* stackPointer = getStackPointerGlobal(); + return builder.makeSetGlobal(stackPointer->name, value); +} + +void EmscriptenGlueGenerator::generateStackSaveFunction() { + Name name("stackSave"); + std::vector params { }; + Function* function = builder.makeFunction( + name, std::move(params), i32, {} + ); + + function->body = generateLoadStackPointer(); + + addExportedFunction(wasm, function); +} + +void EmscriptenGlueGenerator::generateStackAllocFunction() { + Name name("stackAlloc"); + std::vector params { { "0", i32 } }; + Function* function = builder.makeFunction( + name, std::move(params), i32, { { "1", i32 } } + ); + Expression* loadStack = generateLoadStackPointer(); + GetLocal* getSizeArg = builder.makeGetLocal(0, i32); + Binary* sub = builder.makeBinary(SubInt32, loadStack, getSizeArg); + const static uint32_t bitAlignment = 16; + const static uint32_t bitMask = bitAlignment - 1; + Const* subConst = builder.makeConst(Literal(~bitMask)); + Binary* maskedSub = builder.makeBinary(AndInt32, sub, subConst); + SetLocal* teeStackLocal = builder.makeTeeLocal(1, maskedSub); + Expression* storeStack = generateStoreStackPointer(teeStackLocal); + + Block* block = builder.makeBlock(); + block->list.push_back(storeStack); + GetLocal* getStackLocal2 = builder.makeGetLocal(1, i32); + block->list.push_back(getStackLocal2); + block->type = i32; + function->body = block; + + addExportedFunction(wasm, function); +} + +void EmscriptenGlueGenerator::generateStackRestoreFunction() { + Name name("stackRestore"); + std::vector params { { "0", i32 } }; + Function* function = builder.makeFunction( + name, std::move(params), none, {} + ); + GetLocal* getArg = builder.makeGetLocal(0, i32); + Expression* store = generateStoreStackPointer(getArg); + + function->body = store; + + addExportedFunction(wasm, function); +} + +void EmscriptenGlueGenerator::generateRuntimeFunctions() { + generateStackSaveFunction(); + generateStackAllocFunction(); + generateStackRestoreFunction(); +} + +Function* EmscriptenGlueGenerator::generateMemoryGrowthFunction() { + Name name(GROW_WASM_MEMORY); + std::vector params { { NEW_SIZE, i32 } }; + Function* growFunction = builder.makeFunction( + name, std::move(params), i32, {} + ); + growFunction->body = builder.makeHost( + GrowMemory, + Name(), + { builder.makeGetLocal(0, i32) } + ); + + addExportedFunction(wasm, growFunction); + + return growFunction; +} + +static bool hasI64ResultOrParam(FunctionType* ft) { + if (ft->result == i64) return true; + for (auto ty : ft->params) { + if (ty == i64) return true; + } + return false; +} + +void EmscriptenGlueGenerator::generateDynCallThunks() { + std::unordered_set sigs; + Builder builder(wasm); + std::vector tableSegmentData; + if (wasm.table.segments.size() > 0) { + tableSegmentData = wasm.table.segments[0].data; + } + for (const auto& indirectFunc : tableSegmentData) { + if (indirectFunc == dummyFunction) { + continue; + } + std::string sig; + if (auto import = wasm.getImportOrNull(indirectFunc)) { + sig = getSig(wasm.getFunctionType(import->functionType)); + } else { + sig = getSig(wasm.getFunction(indirectFunc)); + } + auto* funcType = ensureFunctionType(sig, &wasm); + if (hasI64ResultOrParam(funcType)) continue; // Can't export i64s on the web. + if (!sigs.insert(sig).second) continue; // Sig is already in the set + std::vector params; + params.emplace_back("fptr", i32); // function pointer param + int p = 0; + for (const auto& ty : funcType->params) params.emplace_back(std::to_string(p++), ty); + Function* f = builder.makeFunction(std::string("dynCall_") + sig, std::move(params), funcType->result, {}); + Expression* fptr = builder.makeGetLocal(0, i32); + std::vector args; + for (unsigned i = 0; i < funcType->params.size(); ++i) { + args.push_back(builder.makeGetLocal(i + 1, funcType->params[i])); + } + Expression* call = builder.makeCallIndirect(funcType, fptr, args); + f->body = call; + + wasm.addFunction(f); + exportFunction(wasm, f->name, true); + } +} + +struct JSCallWalker : public PostWalker { + Module &wasm; + JSCallWalker(Module &_wasm) : wasm(_wasm) { + if (wasm.table.segments.size() == 0) { + auto emptySegment = + wasm.allocator.alloc()->set(Literal(uint32_t(0))); + wasm.table.segments.emplace_back(emptySegment); + } + const auto& tableSegmentData = wasm.table.segments[0].data; + + // Check if jsCalls have already been created + for (Index i = 0; i < tableSegmentData.size(); ++i) { + if (tableSegmentData[i].startsWith("jsCall_")) { + jsCallStartIndex = i; + return; + } + } + jsCallStartIndex = + wasm.table.segments[0].offset->cast()->value.getInteger() + + tableSegmentData.size(); + } + + // Gather all function signatures used in call_indirect, because any of them + // can be used to call function pointers created by emscripten's addFunction. + void visitCallIndirect(CallIndirect *curr) { + // dynCall thunks are generated in binaryen and call_indirect instructions + // within them cannot be used to call function pointers returned by + // emscripten's addFunction. + if (!getFunction()->name.startsWith("dynCall_")) { + indirectlyCallableSigs.insert( + getSig(wasm.getFunctionType(curr->fullType))); + } + } + + bool createJSCallThunks; + Index jsCallStartIndex; + // Function type signatures used in call_indirect instructions + std::set indirectlyCallableSigs; +}; + +JSCallWalker getJSCallWalker(Module& wasm) { + JSCallWalker walker(wasm); + walker.walkModule(&wasm); + return walker; +} + +void EmscriptenGlueGenerator::generateJSCallThunks( + unsigned numReservedFunctionPointers) { + if (numReservedFunctionPointers == 0) + return; + + JSCallWalker walker = getJSCallWalker(wasm); + auto& tableSegmentData = wasm.table.segments[0].data; + for (std::string sig : walker.indirectlyCallableSigs) { + // Add imports for jsCall_sig (e.g. jsCall_vi). + // Imported jsCall_sig functions have their first parameter as an index to + // the function table, so we should prepend an 'i' to parameters' signature + // (e.g. If the signature of the callee is 'vi', the imported jsCall_vi + // function would have signature 'vii'.) + std::string importSig = std::string(1, sig[0]) + 'i' + sig.substr(1); + FunctionType *importType = ensureFunctionType(importSig, &wasm); + auto import = new Import; + import->name = import->base = "jsCall_" + sig; + import->module = ENV; + import->functionType = importType->name; + import->kind = ExternalKind::Function; + wasm.addImport(import); + FunctionType *funcType = ensureFunctionType(sig, &wasm); + + // Create jsCall_sig_index thunks (e.g. jsCall_vi_0, jsCall_vi_1, ...) + // e.g. If # of reserved function pointers (given by a command line + // argument) is 3 and there are two possible signature 'vi' and 'ii', the + // genereated thunks will be jsCall_vi_0, jsCall_vi_1, jsCall_vi_2, + // jsCall_ii_0, jsCall_ii_1, and jsCall_ii_2. + for (unsigned fp = 0; fp < numReservedFunctionPointers; ++fp) { + std::vector params; + int p = 0; + for (const auto& ty : funcType->params) { + params.emplace_back(std::to_string(p++), ty); + } + Function* f = builder.makeFunction( + std::string("jsCall_") + sig + "_" + std::to_string(fp), + std::move(params), funcType->result, {}); + std::vector args; + args.push_back(builder.makeConst(Literal(fp))); + for (unsigned i = 0; i < funcType->params.size(); ++i) { + args.push_back(builder.makeGetLocal(i, funcType->params[i])); + } + Expression* call = + builder.makeCallImport(import->name, args, funcType->result); + f->body = call; + wasm.addFunction(f); + tableSegmentData.push_back(f->name); + } + } + wasm.table.initial = wasm.table.max = + wasm.table.segments[0].offset->cast()->value.getInteger() + + tableSegmentData.size(); +} + +std::vector
getSegmentOffsets(Module& wasm) { + std::vector
segmentOffsets; + for (unsigned i = 0; i < wasm.memory.segments.size(); ++i) { + Const* addrConst = wasm.memory.segments[i].offset->cast(); + auto address = addrConst->value.geti32(); + segmentOffsets.push_back(address); + } + return segmentOffsets; +} + +std::string escape(const char *input) { + std::string code = input; + // replace newlines quotes with escaped newlines + size_t curr = 0; + while ((curr = code.find("\\n", curr)) != std::string::npos) { + code = code.replace(curr, 2, "\\\\n"); + curr += 3; // skip this one + } + // replace double quotes with escaped single quotes + curr = 0; + while ((curr = code.find('"', curr)) != std::string::npos) { + if (curr == 0 || code[curr-1] != '\\') { + code = code.replace(curr, 1, "\\" "\""); + curr += 2; // skip this one + } else { // already escaped, escape the slash as well + code = code.replace(curr, 1, "\\" "\\" "\""); + curr += 3; // skip this one + } + } + return code; +} + +const char* stringAtAddr(Module& wasm, + std::vector
const& segmentOffsets, + Address address) { + for (unsigned i = 0; i < wasm.memory.segments.size(); ++i) { + Memory::Segment& segment = wasm.memory.segments[i]; + Address offset = segmentOffsets[i]; + if (address >= offset && address < offset + segment.data.size()) { + return &segment.data[address - offset]; + } + } + return nullptr; +} + +std::string codeForConstAddr(Module& wasm, + std::vector
const& segmentOffsets, + Const* addrConst) { + auto address = addrConst->value.geti32(); + const char* str = stringAtAddr(wasm, segmentOffsets, address); + if (!str) { + // If we can't find the segment corresponding with the address, then we + // omitted the segment and the address points to an empty string. + return escape(""); + } + return escape(str); +} + +struct AsmConstWalker : public PostWalker { + Module& wasm; + std::vector
segmentOffsets; // segment index => address offset + + std::map> sigsForCode; + std::map ids; + std::set allSigs; + + AsmConstWalker(Module& _wasm) + : wasm(_wasm), + segmentOffsets(getSegmentOffsets(wasm)) { } + + void visitCallImport(CallImport* curr); + +private: + Literal idLiteralForCode(std::string code); + std::string asmConstSig(std::string baseSig); + Name nameForImportWithSig(std::string sig); + void addImport(Name importName, std::string baseSig); +}; + +void AsmConstWalker::visitCallImport(CallImport* curr) { + Import* import = wasm.getImport(curr->target); + if (import->base.hasSubstring(EMSCRIPTEN_ASM_CONST)) { + auto arg = curr->operands[0]->cast(); + auto code = codeForConstAddr(wasm, segmentOffsets, arg); + arg->value = idLiteralForCode(code); + auto baseSig = getSig(curr); + auto sig = asmConstSig(baseSig); + sigsForCode[code].insert(sig); + auto importName = nameForImportWithSig(sig); + curr->target = importName; + + if (allSigs.count(sig) == 0) { + allSigs.insert(sig); + addImport(importName, baseSig); + } + } +} + +Literal AsmConstWalker::idLiteralForCode(std::string code) { + int32_t id; + if (ids.count(code) == 0) { + id = ids.size(); + ids[code] = id; + } else { + id = ids[code]; + } + return Literal(id); +} + +std::string AsmConstWalker::asmConstSig(std::string baseSig) { + std::string sig = ""; + for (size_t i = 0; i < baseSig.size(); ++i) { + // Omit the signature of the "code" parameter, taken as a string, as the first argument + if (i != 1) { + sig += baseSig[i]; + } + } + return sig; +} + +Name AsmConstWalker::nameForImportWithSig(std::string sig) { + std::string fixedTarget = EMSCRIPTEN_ASM_CONST.str + std::string("_") + sig; + return Name(fixedTarget.c_str()); +} + +void AsmConstWalker::addImport(Name importName, std::string baseSig) { + auto import = new Import; + import->name = import->base = importName; + import->module = ENV; + import->functionType = ensureFunctionType(baseSig, &wasm)->name; + import->kind = ExternalKind::Function; + wasm.addImport(import); +} + +AsmConstWalker fixEmAsmConstsAndReturnWalker(Module& wasm) { + // Collect imports to remove + // This would find our generated functions if we ran it later + std::vector toRemove; + for (auto& import : wasm.imports) { + if (import->base.hasSubstring(EMSCRIPTEN_ASM_CONST)) { + toRemove.push_back(import->name); + } + } + + // Walk the module, generate _sig versions of EM_ASM functions + AsmConstWalker walker(wasm); + walker.walkModule(&wasm); + + // Remove the base functions that we didn't generate + for (auto importName : toRemove) { + wasm.removeImport(importName); + } + return walker; +} + +struct EmJsWalker : public PostWalker { + Module& wasm; + std::vector
segmentOffsets; // segment index => address offset + + std::map codeByName; + + EmJsWalker(Module& _wasm) + : wasm(_wasm), + segmentOffsets(getSegmentOffsets(wasm)) { } + + void visitFunction(Function* curr) { + if (!curr->name.startsWith(EM_JS_PREFIX.str)) { + return; + } + auto funcName = std::string(curr->name.stripPrefix(EM_JS_PREFIX.str)); + auto addrConst = curr->body->dynCast(); + if (addrConst == nullptr) { + auto block = curr->body->dynCast(); + Expression* first = nullptr; + if (block && block->list.size() > 0) { + first = block->list[0]; + } + if (first) { + addrConst = first->dynCast(); + } + } + if (addrConst == nullptr) { + Fatal() << "Unexpected generated __em_js__ function body: " << curr; + } + auto code = codeForConstAddr(wasm, segmentOffsets, addrConst); + codeByName[funcName] = code; + } +}; + +EmJsWalker fixEmJsFuncsAndReturnWalker(Module& wasm) { + EmJsWalker walker(wasm); + walker.walkModule(&wasm); + + std::vector toRemove; + for (auto& func : wasm.functions) { + if (func->name.startsWith(EM_JS_PREFIX.str)) { + toRemove.push_back(func->name); + } + } + for (auto funcName : toRemove) { + wasm.removeFunction(funcName); + wasm.removeExport(funcName); + } + return walker; +} + +void EmscriptenGlueGenerator::fixEmAsmConsts() { + fixEmAsmConstsAndReturnWalker(wasm); + fixEmJsFuncsAndReturnWalker(wasm); +} + +template +void printSet(std::ostream& o, C& c) { + o << "["; + bool first = true; + for (auto& item : c) { + if (first) first = false; + else o << ","; + o << '"' << item << '"'; + } + o << "]"; +} + +std::string EmscriptenGlueGenerator::generateEmscriptenMetadata( + Address staticBump, std::vector const& initializerFunctions, + unsigned numReservedFunctionPointers) { + bool commaFirst; + auto maybeComma = [&commaFirst]() { + if (commaFirst) { + commaFirst = false; + return ""; + } else { + return ","; + } + }; + + std::stringstream meta; + meta << "{ "; + + AsmConstWalker emAsmWalker = fixEmAsmConstsAndReturnWalker(wasm); + + // print + commaFirst = true; + meta << "\"asmConsts\": {"; + for (auto& pair : emAsmWalker.sigsForCode) { + auto& code = pair.first; + auto& sigs = pair.second; + meta << maybeComma(); + meta << '"' << emAsmWalker.ids[code] << "\": [\"" << code << "\", "; + printSet(meta, sigs); + meta << "]"; + } + meta << "},"; + + EmJsWalker emJsWalker = fixEmJsFuncsAndReturnWalker(wasm); + if (emJsWalker.codeByName.size() > 0) { + meta << "\"emJsFuncs\": {"; + commaFirst = true; + for (auto& pair : emJsWalker.codeByName) { + auto& name = pair.first; + auto& code = pair.second; + meta << maybeComma(); + meta << '"' << name << "\": \"" << code << '"'; + } + meta << "},"; + } + + meta << "\"staticBump\": " << staticBump << ", "; + + meta << "\"initializers\": ["; + commaFirst = true; + for (const auto& func : initializerFunctions) { + meta << maybeComma(); + meta << "\"" << func.c_str() << "\""; + } + meta << "]"; + + if (numReservedFunctionPointers) { + JSCallWalker jsCallWalker = getJSCallWalker(wasm); + meta << ", "; + meta << "\"jsCallStartIndex\": " << jsCallWalker.jsCallStartIndex << ", "; + meta << "\"jsCallFuncType\": ["; + commaFirst = true; + for (std::string sig : jsCallWalker.indirectlyCallableSigs) { + meta << maybeComma(); + meta << "\"" << sig << "\""; + } + meta << "]"; + } + + meta << ", \"declares\": ["; + commaFirst = true; + for (const auto& import : wasm.imports) { + if (import->kind == ExternalKind::Function && + (emJsWalker.codeByName.count(import->name.str) == 0) && + !import->name.startsWith(EMSCRIPTEN_ASM_CONST.str) && + !import->name.startsWith("invoke_") && + !import->name.startsWith("jsCall_")) { + meta << maybeComma() << '"' << import->name.str << '"'; + } + } + meta << "]"; + + meta << ", \"externs\": ["; + commaFirst = true; + for (const auto& import : wasm.imports) { + if (import->kind == ExternalKind::Global) { + meta << maybeComma() << "\"_" << import->name.str << '"'; + } + } + meta << "]"; + + meta << ", \"implementedFunctions\": ["; + commaFirst = true; + for (const auto& func : wasm.functions) { + meta << maybeComma() << "\"_" << func->name.str << '"'; + } + meta << "]"; + + meta << ", \"exports\": ["; + commaFirst = true; + for (const auto& ex : wasm.exports) { + meta << maybeComma() << '"' << ex->name.str << '"'; + } + meta << "]"; + + meta << ", \"invokeFuncs\": ["; + commaFirst = true; + for (const auto& import : wasm.imports) { + if (import->name.startsWith("invoke_")) { + meta << maybeComma() << '"' << import->name.str << '"'; + } + } + meta << "]"; + + meta << " }\n"; + + return meta.str(); +} + +std::string emscriptenGlue( + Module& wasm, + bool allowMemoryGrowth, + Address stackPointer, + Address staticBump, + std::vector const& initializerFunctions, + unsigned numReservedFunctionPointers) { + EmscriptenGlueGenerator generator(wasm, stackPointer); + generator.generateRuntimeFunctions(); + + if (allowMemoryGrowth) { + generator.generateMemoryGrowthFunction(); + } + + generator.generateDynCallThunks(); + + if (numReservedFunctionPointers) { + generator.generateJSCallThunks(numReservedFunctionPointers); + } + + return generator.generateEmscriptenMetadata(staticBump, initializerFunctions, + numReservedFunctionPointers); +} + +} // namespace wasm diff --git a/src/wasm/wasm-interpreter.cpp b/src/wasm/wasm-interpreter.cpp new file mode 100644 index 000000000..e7df785ac --- /dev/null +++ b/src/wasm/wasm-interpreter.cpp @@ -0,0 +1,24 @@ +#include "wasm-interpreter.h" + +namespace wasm { + +#ifdef WASM_INTERPRETER_DEBUG +int Indenter::indentLevel = 0; + +Indenter::Indenter(const char* entry) : entryName(entry) { + ++indentLevel; +} +Indenter::~Indenter() { + print(); + std::cout << "exit " << entryName << '\n'; + --indentLevel; +} +void Indenter::print() { + std::cout << indentLevel << ':'; + for (int i = 0; i <= indentLevel; ++i) { + std::cout << ' '; + } +} +#endif // WASM_INTERPRETER_DEBUG + +} // namespace wasm diff --git a/src/wasm/wasm-validator.cpp b/src/wasm/wasm-validator.cpp index 8ff2fb9b8..d6c6e7bd0 100644 --- a/src/wasm/wasm-validator.cpp +++ b/src/wasm/wasm-validator.cpp @@ -806,6 +806,12 @@ void FunctionValidator::visitFunction(Function* curr) { shouldBeTrue(breakTargets.empty(), curr->body, "all named break targets must exist"); returnType = unreachable; labelNames.clear(); + // if function has a named type, it must match up with the function's params and result + if (info.validateGlobally && curr->type.is()) { + auto* ft = getModule()->getFunctionType(curr->type); + shouldBeTrue(ft->params == curr->params, curr->name, "function params must match its declared type"); + shouldBeTrue(ft->result == curr->result, curr->name, "function result must match its declared type"); + } // expressions must not be seen more than once struct Walker : public PostWalker> { std::unordered_set& seen; -- cgit v1.2.3