diff options
author | Alon Zakai <alonzakai@gmail.com> | 2018-03-13 09:29:38 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-03-13 09:29:38 -0700 |
commit | d52213c3f5e96bb3450721d96aa68d3c5e0865b6 (patch) | |
tree | c585377fb842da0c4b87657f5047272196af9e6c /src/passes/FuncCastEmulation.cpp | |
parent | 0008b795d11d65d0759c3c0a71ee74b536f1ecf8 (diff) | |
download | binaryen-d52213c3f5e96bb3450721d96aa68d3c5e0865b6.tar.gz binaryen-d52213c3f5e96bb3450721d96aa68d3c5e0865b6.tar.bz2 binaryen-d52213c3f5e96bb3450721d96aa68d3c5e0865b6.zip |
Function pointer cast emulation (#1468)
This adds a pass that implements "function pointer cast emulation" - allows indirect calls to go through even if the number of arguments or their types is incorrect. That is undefined behavior in C/C++ but in practice somehow works in native archs. It is even relied upon in e.g. Python.
Emscripten already has such emulation for asm.js, which also worked for asm2wasm. This implements something like it in binaryen which also allows the wasm backend to use it. As a result, Python should now be portable using the wasm backend.
The mechanism used for the emulation is to make all indirect calls use a fixed number of arguments, all of type i64, and a return type of also i64. Thunks are then placed in the table which translate the arguments properly for the target, basically by reinterpreting to i64 and back. As a result, receiving an i64 when an i32 is sent will have the upper bits all zero, and the reverse would truncate the upper bits, etc. (Note that this is different than emscripten's existing emulation, which converts (as signed) to a double. That makes sense for JS where double's can contain all numeric values, but in wasm we have i64s. Also, bitwise conversion may be more like what native archs do anyhow. It is enough for Python.)
Also adds validation for a function's type matching the function's actual params and result (surprised we didn't have that before, but we didn't, and there was even a place in the test suite where that was wrong).
Also simplifies the build script by moving two cpp files into the wasm/ subdir, so they can be built once and shared between the various tools.
Diffstat (limited to 'src/passes/FuncCastEmulation.cpp')
-rw-r--r-- | src/passes/FuncCastEmulation.cpp | 235 |
1 files changed, 235 insertions, 0 deletions
diff --git a/src/passes/FuncCastEmulation.cpp b/src/passes/FuncCastEmulation.cpp new file mode 100644 index 000000000..59a2588da --- /dev/null +++ b/src/passes/FuncCastEmulation.cpp @@ -0,0 +1,235 @@ +/* + * Copyright 2017 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// Instruments all indirect calls so that they work even if a function +// pointer was cast incorrectly. For example, if you cast an int (int, float) +// to an int (int, float, int) and call it natively, on most archs it will +// happen to work, ignoring the extra param, whereas in wasm it will trap. +// When porting code that relies on such casts working (like e.g. Python), +// this pass may be useful. It sets a new "ABI" for indirect calls, in which +// they all return an i64 and they have a fixed number of i64 params, and +// the pass converts everything to go through that. +// +// This should work even with dynamic linking, however, the number of +// params must be identical, i.e., the "ABI" must match. + +#include <wasm.h> +#include <wasm-builder.h> +#include <asm_v_wasm.h> +#include <pass.h> +#include <wasm-emscripten.h> +#include <ir/literal-utils.h> + +namespace wasm { + +// This should be enough for everybody. (As described above, we need this +// to match when dynamically linking, and also dynamic linking is why we +// can't just detect this automatically in the module we see.) +static const int NUM_PARAMS = 15; + +// Converts a value to the ABI type of i64. +static Expression* toABI(Expression* value, Module* module) { + Builder builder(*module); + switch (value->type) { + case i32: { + value = builder.makeUnary(ExtendUInt32, value); + break; + } + case i64: { + // already good + break; + } + case f32: { + value = builder.makeUnary( + ExtendUInt32, + builder.makeUnary(ReinterpretFloat32, value) + ); + break; + } + case f64: { + value = builder.makeUnary(ReinterpretFloat64, value); + break; + } + case none: { + // the value is none, but we need a value here + value = builder.makeSequence( + value, + LiteralUtils::makeZero(i64, *module) + ); + break; + } + case unreachable: { + // can leave it, the call isn't taken anyhow + break; + } + default: { + // SIMD may be interesting some day + WASM_UNREACHABLE(); + } + } + return value; +} + +// Converts a value from the ABI type of i64 to the expected type +static Expression* fromABI(Expression* value, Type type, Module* module) { + Builder builder(*module); + switch (type) { + case i32: { + value = builder.makeUnary(WrapInt64, value); + break; + } + case i64: { + // already good + break; + } + case f32: { + value = builder.makeUnary( + ReinterpretInt32, + builder.makeUnary(WrapInt64, value) + ); + break; + } + case f64: { + value = builder.makeUnary(ReinterpretInt64, value); + break; + } + case none: { + value = builder.makeDrop(value); + } + case unreachable: { + // can leave it, the call isn't taken anyhow + break; + } + default: { + // SIMD may be interesting some day + WASM_UNREACHABLE(); + } + } + return value; +} + +struct ParallelFuncCastEmulation : public WalkerPass<PostWalker<ParallelFuncCastEmulation>> { + bool isFunctionParallel() override { return true; } + + Pass* create() override { return new ParallelFuncCastEmulation(ABIType); } + + ParallelFuncCastEmulation(Name ABIType) : ABIType(ABIType) {} + + void visitCallIndirect(CallIndirect* curr) { + if (curr->operands.size() > NUM_PARAMS) { + Fatal() << "FuncCastEmulation::NUM_PARAMS needs to be at least " << + curr->operands.size(); + } + for (Expression*& operand : curr->operands) { + operand = toABI(operand, getModule()); + } + // Add extra operands as needed. + while (curr->operands.size() < NUM_PARAMS) { + curr->operands.push_back(LiteralUtils::makeZero(i64, *getModule())); + } + // Set the new types + auto oldType = curr->type; + curr->type = i64; + curr->fullType = ABIType; + // Fix up return value + replaceCurrent(fromABI(curr, oldType, getModule())); + } + +private: + // the name of a type for a call with the right params and return + Name ABIType; +}; + +struct FuncCastEmulation : public Pass { + void run(PassRunner* runner, Module* module) override { + // we just need the one ABI function type for all indirect calls + std::string sig = "j"; + for (Index i = 0; i < NUM_PARAMS; i++) { + sig += 'j'; + } + ABIType = ensureFunctionType(sig, module)->name; + // Add a way for JS to call into the table (as our i64 ABI means an i64 + // is returned when there is a return value, which JS engines will fail on), + // using dynCalls + EmscriptenGlueGenerator generator(*module); + generator.generateDynCallThunks(); + // Add a thunk for each function in the table, and do the call through it. + std::unordered_map<Name, Name> funcThunks; + for (auto& segment : module->table.segments) { + for (auto& name : segment.data) { + auto iter = funcThunks.find(name); + if (iter == funcThunks.end()) { + auto thunk = makeThunk(name, module); + funcThunks[name] = thunk; + name = thunk; + } else { + name = iter->second; + } + } + } + // update call_indirects + PassRunner subRunner(module, runner->options); + subRunner.setIsNested(true); + subRunner.add<ParallelFuncCastEmulation>(ABIType); + subRunner.run(); + } + +private: + // the name of a type for a call with the right params and return + Name ABIType; + + // Creates a thunk for a function, casting args and return value as needed. + Name makeThunk(Name name, Module* module) { + Name thunk = std::string("byn$fpcast-emu$") + name.str; + if (module->getFunctionOrNull(thunk)) { + Fatal() << "FuncCastEmulation::makeThunk seems a thunk name already in use. Was the pass already run on this code?"; + } + // The item in the table may be a function or a function import. + auto* func = module->getFunctionOrNull(name); + Import* imp = nullptr; + if (!func) imp = module->getImport(name); + std::vector<Type>& params = func ? func->params : module->getFunctionType(imp->functionType)->params; + Type type = func ? func->result : module->getFunctionType(imp->functionType)->result; + Builder builder(*module); + std::vector<Expression*> callOperands; + for (Index i = 0; i < params.size(); i++) { + callOperands.push_back(fromABI(builder.makeGetLocal(i, i64), params[i], module)); + } + Expression* call = func ? (Expression*)builder.makeCall(name, callOperands, type) + : (Expression*)builder.makeCallImport(name, callOperands, type); + std::vector<Type> thunkParams; + for (Index i = 0; i < NUM_PARAMS; i++) { + thunkParams.push_back(i64); + } + auto* thunkFunc = builder.makeFunction( + thunk, + std::move(thunkParams), + i64, + {}, // no vars + toABI(call, module) + ); + thunkFunc->type = ABIType; + module->addFunction(thunkFunc); + return thunk; + } +}; + +Pass* createFuncCastEmulationPass() { + return new FuncCastEmulation(); +} + +} // namespace wasm |