diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/asmjs/shared-constants.cpp | 7 | ||||
-rw-r--r-- | src/asmjs/shared-constants.h | 7 | ||||
-rw-r--r-- | src/ir/module-utils.h | 13 | ||||
-rw-r--r-- | src/passes/CMakeLists.txt | 6 | ||||
-rw-r--r-- | src/passes/I64ToI32Lowering.cpp | 360 | ||||
-rw-r--r-- | src/passes/RemoveNonJSOps.cpp | 406 | ||||
-rw-r--r-- | src/passes/intrinsics-module.h | 27 | ||||
-rw-r--r-- | src/passes/wasm-intrinsics.wast | 1113 | ||||
-rw-r--r-- | src/wasm2asm.h | 53 |
9 files changed, 1307 insertions, 685 deletions
diff --git a/src/asmjs/shared-constants.cpp b/src/asmjs/shared-constants.cpp index 87c3574de..32ddb6c2c 100644 --- a/src/asmjs/shared-constants.cpp +++ b/src/asmjs/shared-constants.cpp @@ -94,5 +94,10 @@ cashew::IString GLOBAL("global"), WASM_NEAREST_F32("__wasm_nearest_f32"), WASM_NEAREST_F64("__wasm_nearest_f64"), WASM_TRUNC_F32("__wasm_trunc_f32"), - WASM_TRUNC_F64("__wasm_trunc_f64"); + WASM_TRUNC_F64("__wasm_trunc_f64"), + WASM_I64_MUL("__wasm_i64_mul"), + WASM_I64_SDIV("__wasm_i64_sdiv"), + WASM_I64_UDIV("__wasm_i64_udiv"), + WASM_I64_SREM("__wasm_i64_srem"), + WASM_I64_UREM("__wasm_i64_urem"); } diff --git a/src/asmjs/shared-constants.h b/src/asmjs/shared-constants.h index da2f7aad8..c57b03ea5 100644 --- a/src/asmjs/shared-constants.h +++ b/src/asmjs/shared-constants.h @@ -97,7 +97,12 @@ extern cashew::IString GLOBAL, WASM_NEAREST_F32, WASM_NEAREST_F64, WASM_TRUNC_F32, - WASM_TRUNC_F64; + WASM_TRUNC_F64, + WASM_I64_MUL, + WASM_I64_SDIV, + WASM_I64_UDIV, + WASM_I64_SREM, + WASM_I64_UREM; } #endif // wasm_asmjs_shared_constants_h diff --git a/src/ir/module-utils.h b/src/ir/module-utils.h index 5bcf2ea99..83625809f 100644 --- a/src/ir/module-utils.h +++ b/src/ir/module-utils.h @@ -85,6 +85,19 @@ inline void copyModule(Module& in, Module& out) { out.debugInfoFileNames = in.debugInfoFileNames; } +inline Function* copyFunction(Module& in, Module& out, Name name) { + Function *ret = out.getFunctionOrNull(name); + if (ret != nullptr) { + return ret; + } + auto* curr = in.getFunction(name); + auto* func = new Function(*curr); + func->body = ExpressionManipulator::copy(func->body, out); + func->type = Name(); + out.addFunction(func); + return func; +} + } // namespace ModuleUtils } // namespace wasm diff --git a/src/passes/CMakeLists.txt b/src/passes/CMakeLists.txt index 2c48e3d6e..05ed3821b 100644 --- a/src/passes/CMakeLists.txt +++ b/src/passes/CMakeLists.txt @@ -1,3 +1,8 @@ +add_custom_command( + OUTPUT WasmIntrinsics.cpp + COMMAND python ../../scripts/embedwast.py ${CMAKE_CURRENT_BINARY_DIR}/WasmIntrinsics.cpp + DEPENDS ../../scripts/embedwast.py wasm-intrinsics.wast) + SET(passes_SOURCES pass.cpp CoalesceLocals.cpp @@ -45,5 +50,6 @@ SET(passes_SOURCES SSAify.cpp Untee.cpp Vacuum.cpp + ${CMAKE_CURRENT_BINARY_DIR}/WasmIntrinsics.cpp ) ADD_LIBRARY(passes STATIC ${passes_SOURCES}) diff --git a/src/passes/I64ToI32Lowering.cpp b/src/passes/I64ToI32Lowering.cpp index 2f6fdf122..f14f33027 100644 --- a/src/passes/I64ToI32Lowering.cpp +++ b/src/passes/I64ToI32Lowering.cpp @@ -118,149 +118,6 @@ struct I64ToI32Lowering : public WalkerPass<PostWalker<I64ToI32Lowering>> { highBits->mutable_ = true; module->addGlobal(highBits); PostWalker<I64ToI32Lowering>::doWalkModule(module); - - if (needRotl64) { - module->addFunction(createRot64(true)); - } - if (needRotr64) { - module->addFunction(createRot64(false)); - } - } - - Function* createRot64(bool leftShift) { - Name name = leftShift ? WASM_ROTL64 : WASM_ROTR64; - - // if rotate == 32 - // hi = leftLow - // lo = leftHigh - // if rotate > 32 - // rotate = rotate - 32 - // hi = (leftLow << rotate) | (leftHigh >> (32 - rotate)) - // lo = (leftHigh << rotate) | (leftLow >> (32 - rotate)) - // else - // hi = (leftHigh << rotate) | (leftLow >> (32 - rotate)) - // lo = (leftLow << rotate) | (leftHigh >> (32 - rotate)) - Index lowBits = 0; - Index highBits = 1; - Index rotate = 2; - Index widthLessRotate = 3; - Binary* is32Rotate = builder->makeBinary( - EqInt32, - builder->makeGetLocal(rotate, i32), - builder->makeConst(Literal(int32_t(32))) - ); - Binary* isLargeRotate = builder->makeBinary( - GeUInt32, - builder->makeGetLocal(rotate, i32), - builder->makeConst(Literal(int32_t(32))) - ); - BinaryOp firstOp = leftShift ? ShlInt32 : ShrUInt32; - BinaryOp secondOp = leftShift ? ShrUInt32 : ShlInt32; - Block* equalRotateBlock = builder->blockify( - builder->makeSetGlobal( - INT64_TO_32_HIGH_BITS, - builder->makeGetLocal(lowBits, i32) - ), - builder->makeGetLocal(highBits, i32) - ); - Block* largeRotateBlock = builder->blockify( - builder->makeSetLocal( - rotate, - builder->makeBinary( - SubInt32, - builder->makeGetLocal(rotate, i32), - builder->makeConst(Literal(int32_t(32))) - ) - ), - builder->makeSetLocal( - widthLessRotate, - builder->makeBinary( - SubInt32, - builder->makeConst(Literal(int32_t(32))), - builder->makeGetLocal(rotate, i32) - ) - ), - builder->makeSetGlobal( - INT64_TO_32_HIGH_BITS, - builder->makeBinary( - OrInt32, - builder->makeBinary( - firstOp, - builder->makeGetLocal(lowBits, i32), - builder->makeGetLocal(rotate, i32) - ), - builder->makeBinary( - secondOp, - builder->makeGetLocal(highBits, i32), - builder->makeGetLocal(widthLessRotate, i32) - ) - ) - ), - builder->makeBinary( - OrInt32, - builder->makeBinary( - firstOp, - builder->makeGetLocal(highBits, i32), - builder->makeGetLocal(rotate, i32) - ), - builder->makeBinary( - secondOp, - builder->makeGetLocal(lowBits, i32), - builder->makeGetLocal(widthLessRotate, i32) - ) - ) - ); - Block* smallRotateBlock = builder->blockify( - builder->makeSetLocal( - widthLessRotate, - builder->makeBinary( - SubInt32, - builder->makeConst(Literal(int32_t(32))), - builder->makeGetLocal(rotate, i32) - ) - ), - builder->makeSetGlobal( - INT64_TO_32_HIGH_BITS, - builder->makeBinary( - OrInt32, - builder->makeBinary( - firstOp, - builder->makeGetLocal(highBits, i32), - builder->makeGetLocal(rotate, i32) - ), - builder->makeBinary( - secondOp, - builder->makeGetLocal(lowBits, i32), - builder->makeGetLocal(widthLessRotate, i32) - ) - ) - ), - builder->makeBinary( - OrInt32, - builder->makeBinary( - firstOp, - builder->makeGetLocal(lowBits, i32), - builder->makeGetLocal(rotate, i32) - ), - builder->makeBinary( - secondOp, - builder->makeGetLocal(highBits, i32), - builder->makeGetLocal(widthLessRotate, i32) - ) - ) - ); - If* condition = builder->makeIf( - is32Rotate, - equalRotateBlock, - builder->makeIf( - isLargeRotate, - largeRotateBlock, - smallRotateBlock - ) - ); - std::vector<Type> params = {i32, i32, i32}; - std::vector<Type> vars = {i32}; - return builder->makeFunction(name, std::move(params), i32, std::move(vars), condition); } void visitFunctionType(FunctionType* curr) { @@ -937,31 +794,6 @@ struct I64ToI32Lowering : public WalkerPass<PostWalker<I64ToI32Lowering>> { replaceCurrent(result); } - void lowerPopcnt64(Unary* curr) { - TempVar highBits = fetchOutParam(curr->value); - TempVar lowBits = getTemp(); - TempVar highResult = getTemp(); - - SetLocal* setLow = builder->makeSetLocal(lowBits, curr->value); - SetLocal* setHigh = builder->makeSetLocal( - highResult, - builder->makeConst(Literal(int32_t(0))) - ); - - Block* result = builder->blockify( - setLow, - setHigh, - builder->makeBinary( - AddInt32, - builder->makeUnary(PopcntInt32, builder->makeGetLocal(highBits, i32)), - builder->makeUnary(PopcntInt32, builder->makeGetLocal(lowBits, i32)) - ) - ); - - setOutParam(result, std::move(highResult)); - replaceCurrent(result); - } - void lowerCountZeros(Unary* curr) { auto lower = [&](Block* result, UnaryOp op32, TempVar&& first, TempVar&& second) { TempVar highResult = getTemp(); @@ -1014,7 +846,8 @@ struct I64ToI32Lowering : public WalkerPass<PostWalker<I64ToI32Lowering>> { lower(result, ClzInt32, std::move(highBits), std::move(lowBits)); break; case CtzInt64: - lower(result, CtzInt32, std::move(lowBits), std::move(highBits)); + std::cerr << "i64.ctz should be removed already" << std::endl; + WASM_UNREACHABLE(); break; default: abort(); @@ -1055,7 +888,6 @@ struct I64ToI32Lowering : public WalkerPass<PostWalker<I64ToI32Lowering>> { switch (curr->op) { case ClzInt64: case CtzInt64: lowerCountZeros(curr); break; - case PopcntInt64: lowerPopcnt64(curr); break; case EqZInt64: lowerEqZInt64(curr); break; case ExtendSInt32: lowerExtendSInt32(curr); break; case ExtendUInt32: lowerExtendUInt32(curr); break; @@ -1070,6 +902,9 @@ struct I64ToI32Lowering : public WalkerPass<PostWalker<I64ToI32Lowering>> { case ConvertSInt64ToFloat64: case ConvertUInt64ToFloat32: case ConvertUInt64ToFloat64: lowerConvertIntToFloat(curr); break; + case PopcntInt64: + std::cerr << "i64.popcnt should already be removed" << std::endl; + WASM_UNREACHABLE(); default: std::cerr << "Unhandled unary operator: " << curr->op << std::endl; abort(); @@ -1161,122 +996,6 @@ struct I64ToI32Lowering : public WalkerPass<PostWalker<I64ToI32Lowering>> { return result; } - Block* lowerMul(Block* result, TempVar&& leftLow, TempVar&& leftHigh, - TempVar&& rightLow, TempVar&& rightHigh) { - // high bits = ll*rh + lh*rl + ll1*rl1 + (ll0*rl1)>>16 + (ll1*rl0)>>16 - // low bits = ll*rl - TempVar leftLow0 = getTemp(); - TempVar leftLow1 = getTemp(); - TempVar rightLow0 = getTemp(); - TempVar rightLow1 = getTemp(); - SetLocal* setLL0 = builder->makeSetLocal( - leftLow0, - builder->makeBinary( - AndInt32, - builder->makeGetLocal(leftLow, i32), - builder->makeConst(Literal(int32_t(0xffff))) - ) - ); - SetLocal* setLL1 = builder->makeSetLocal( - leftLow1, - builder->makeBinary( - ShrUInt32, - builder->makeGetLocal(leftLow, i32), - builder->makeConst(Literal(int32_t(16))) - ) - ); - SetLocal* setRL0 = builder->makeSetLocal( - rightLow0, - builder->makeBinary( - AndInt32, - builder->makeGetLocal(rightLow, i32), - builder->makeConst(Literal(int32_t(0xffff))) - ) - ); - SetLocal* setRL1 = builder->makeSetLocal( - rightLow1, - builder->makeBinary( - ShrUInt32, - builder->makeGetLocal(rightLow, i32), - builder->makeConst(Literal(int32_t(16))) - ) - ); - SetLocal* setLLRH = builder->makeSetLocal( - rightHigh, - builder->makeBinary( - MulInt32, - builder->makeGetLocal(leftLow, i32), - builder->makeGetLocal(rightHigh, i32) - ) - ); - auto addToHighBits = [&](Expression* expr) -> SetLocal* { - return builder->makeSetLocal( - rightHigh, - builder->makeBinary( - AddInt32, - builder->makeGetLocal(rightHigh, i32), - expr - ) - ); - }; - SetLocal* addLHRL = addToHighBits( - builder->makeBinary( - MulInt32, - builder->makeGetLocal(leftHigh, i32), - builder->makeGetLocal(rightLow, i32) - ) - ); - SetLocal* addLL1RL1 = addToHighBits( - builder->makeBinary( - MulInt32, - builder->makeGetLocal(leftLow1, i32), - builder->makeGetLocal(rightLow1, i32) - ) - ); - SetLocal* addLL0RL1 = addToHighBits( - builder->makeBinary( - ShrUInt32, - builder->makeBinary( - MulInt32, - builder->makeGetLocal(leftLow0, i32), - builder->makeGetLocal(rightLow1, i32) - ), - builder->makeConst(Literal(int32_t(16))) - ) - ); - SetLocal* addLL1RL0 = addToHighBits( - builder->makeBinary( - ShrUInt32, - builder->makeBinary( - MulInt32, - builder->makeGetLocal(leftLow1, i32), - builder->makeGetLocal(rightLow0, i32) - ), - builder->makeConst(Literal(int32_t(16))) - ) - ); - Binary* getLow = builder->makeBinary( - MulInt32, - builder->makeGetLocal(leftLow, i32), - builder->makeGetLocal(rightLow, i32) - ); - result = builder->blockify( - result, - setLL0, - setLL1, - setRL0, - setRL1, - setLLRH, - addLHRL, - addLL1RL1, - addLL0RL1, - addLL1RL0, - getLow - ); - setOutParam(result, std::move(rightHigh)); - return result; - } - Block* lowerBitwise(BinaryOp op, Block* result, TempVar&& leftLow, TempVar&& leftHigh, TempVar&& rightLow, TempVar&& rightHigh) { @@ -1530,46 +1249,6 @@ struct I64ToI32Lowering : public WalkerPass<PostWalker<I64ToI32Lowering>> { return result; } - Block* lowerRotate(BinaryOp op, Block* result, TempVar&& leftLow, - TempVar&& leftHigh, TempVar&& rightLow, TempVar&& rightHigh) { - assert(op == RotLInt64 || op == RotRInt64); - Name name; - if (op == RotLInt64) { - needRotl64 = true; - name = WASM_ROTL64; - } else { - needRotr64 = true; - name = WASM_ROTR64; - } - TempVar lowResult = getTemp(); - result = builder->blockify( - result, - builder->makeSetLocal( - lowResult, - builder->makeCall( - name, - { - builder->makeGetLocal(leftLow, i32), - builder->makeGetLocal(leftHigh, i32), - builder->makeBinary( - AndInt32, - builder->makeGetLocal(rightLow, i32), - builder->makeConst(Literal(int32_t(64 - 1))) - ) - }, - i32 - ) - ), - builder->makeSetLocal( - rightHigh, - builder->makeGetGlobal(INT64_TO_32_HIGH_BITS, i32) - ), - builder->makeGetLocal(lowResult, i32) - ); - setOutParam(result, std::move(rightHigh)); - return result; - } - Block* lowerEq(Block* result, TempVar&& leftLow, TempVar&& leftHigh, TempVar&& rightLow, TempVar&& rightHigh) { return builder->blockify( @@ -1762,17 +1441,16 @@ struct I64ToI32Lowering : public WalkerPass<PostWalker<I64ToI32Lowering>> { std::move(rightLow), std::move(rightHigh))); break; } - case MulInt64: { - replaceCurrent( - lowerMul(result, std::move(leftLow), std::move(leftHigh), - std::move(rightLow), std::move(rightHigh)) - ); - break; - } + case MulInt64: case DivSInt64: case DivUInt64: case RemSInt64: - case RemUInt64: goto err; + case RemUInt64: + case RotLInt64: + case RotRInt64: + std::cerr << "should have been removed by now " << curr->op << std::endl; + WASM_UNREACHABLE(); + case AndInt64: case OrInt64: case XorInt64: { @@ -1792,14 +1470,6 @@ struct I64ToI32Lowering : public WalkerPass<PostWalker<I64ToI32Lowering>> { ); break; } - case RotLInt64: - case RotRInt64: { - replaceCurrent( - lowerRotate(curr->op, result, std::move(leftLow), std::move(leftHigh), - std::move(rightLow), std::move(rightHigh)) - ); - break; - } case EqInt64: { replaceCurrent( lowerEq(result, std::move(leftLow), std::move(leftHigh), @@ -1833,7 +1503,7 @@ struct I64ToI32Lowering : public WalkerPass<PostWalker<I64ToI32Lowering>> { ); break; } - err: default: { + default: { std::cerr << "Unhandled binary op " << curr->op << std::endl; abort(); } @@ -1867,13 +1537,11 @@ struct I64ToI32Lowering : public WalkerPass<PostWalker<I64ToI32Lowering>> { private: std::unique_ptr<Builder> builder; std::unordered_map<Index, Index> indexMap; + std::unordered_map<int, std::vector<Index>> freeTemps; std::unordered_map<Expression*, TempVar> highBitVars; std::unordered_map<Name, TempVar> labelHighBitVars; - std::unordered_map<int, std::vector<Index>> freeTemps; std::unordered_map<Index, Type> tempTypes; Index nextTemp; - bool needRotl64 = false; - bool needRotr64 = false; TempVar getTemp(Type ty = i32) { Index ret; diff --git a/src/passes/RemoveNonJSOps.cpp b/src/passes/RemoveNonJSOps.cpp index 77809d6b3..4bd40a6c6 100644 --- a/src/passes/RemoveNonJSOps.cpp +++ b/src/passes/RemoveNonJSOps.cpp @@ -15,341 +15,97 @@ */ // -// // Removes all operations in a wasm module that aren't inherently implementable -// in JS. This includes things like `f32.nearest` and -// `f64.copysign`. Most operations are lowered to a call to an injected +// in JS. This includes things like 64-bit division, `f32.nearest`, +// `f64.copysign`, etc. Most operations are lowered to a call to an injected // intrinsic implementation. Intrinsics don't use themselves to implement // themselves. // +// You'll find a large wast blob in `wasm-intrinsics.wast` next to this file +// which contains all of the injected intrinsics. We manually copy over any +// needed intrinsics from this module into the module that we're optimizing +// after walking the current module. +// #include <wasm.h> #include <pass.h> #include "asmjs/shared-constants.h" #include "wasm-builder.h" +#include "wasm-s-parser.h" +#include "ir/module-utils.h" +#include "ir/find_all.h" +#include "passes/intrinsics-module.h" namespace wasm { struct RemoveNonJSOpsPass : public WalkerPass<PostWalker<RemoveNonJSOpsPass>> { - bool needNearestF32 = false; - bool needNearestF64 = false; - bool needTruncF32 = false; - bool needTruncF64 = false; - bool needCtzInt32 = false; - bool needPopcntInt32 = false; - bool needRotLInt32 = false; - bool needRotRInt32 = false; + std::unique_ptr<Builder> builder; + std::unordered_set<Name> neededIntrinsics; bool isFunctionParallel() override { return false; } Pass* create() override { return new RemoveNonJSOpsPass; } void doWalkModule(Module* module) { + // Discover all of the intrinsics that we need to inject, lowering all + // operations to intrinsic calls while we're at it. if (!builder) builder = make_unique<Builder>(*module); PostWalker<RemoveNonJSOpsPass>::doWalkModule(module); - if (needNearestF32) { - module->addFunction(createNearest(f32)); - } - if (needNearestF64) { - module->addFunction(createNearest(f64)); - } - if (needTruncF32) { - module->addFunction(createTrunc(f32)); - } - if (needTruncF64) { - module->addFunction(createTrunc(f64)); - } - if (needCtzInt32) { - module->addFunction(createCtz()); - } - if (needPopcntInt32) { - module->addFunction(createPopcnt()); + if (neededIntrinsics.size() == 0) { + return; } - if (needRotLInt32) { - module->addFunction(createRot(RotLInt32)); - } - if (needRotRInt32) { - module->addFunction(createRot(RotRInt32)); - } - } - - Function *createNearest(Type f) { - // fn nearest(f: float) -> float { - // let ceil = ceil(f); - // let floor = floor(f); - // let fract = f - floor; - // if fract < 0.5 { - // floor - // } else if fract > 0.5 { - // ceil - // } else { - // let rem = floor / 2.0; - // if rem - floor(rem) == 0.0 { - // floor - // } else { - // ceil - // } - // } - // } - Index arg = 0; - Index ceil = 1; - Index floor = 2; - Index fract = 3; - Index rem = 4; - - UnaryOp ceilOp = CeilFloat32; - UnaryOp floorOp = FloorFloat32; - BinaryOp subOp = SubFloat32; - BinaryOp ltOp = LtFloat32; - BinaryOp gtOp = GtFloat32; - BinaryOp divOp = DivFloat32; - BinaryOp eqOp = EqFloat32; - Literal litHalf((float) 0.5); - Literal litOne((float) 1.0); - Literal litZero((float) 0.0); - Literal litTwo((float) 2.0); - if (f == f64) { - ceilOp = CeilFloat64; - floorOp = FloorFloat64; - subOp = SubFloat64; - ltOp = LtFloat64; - gtOp = GtFloat64; - divOp = DivFloat64; - eqOp = EqFloat64; - litHalf = Literal((double) 0.5); - litOne = Literal((double) 1.0); - litZero = Literal((double) 0.0); - litTwo = Literal((double) 2.0); + // Parse the wast blob we have at the end of this file. + // + // TODO: only do this once per invocation of wasm2asm + Module intrinsicsModule; + std::string input(IntrinsicsModuleWast); + SExpressionParser parser(const_cast<char*>(input.c_str())); + Element& root = *parser.root; + SExpressionWasmBuilder builder(intrinsicsModule, *root[0]); + + std::set<Name> neededFunctions; + + // Iteratively link intrinsics from `intrinsicsModule` into our destination + // module, as needed. + // + // Note that intrinsics often use one another. For example the 64-bit + // division intrinsic ends up using the 32-bit ctz intrinsic, but does so + // via a native instruction. The loop here is used to continuously reprocess + // injected intrinsics to ensure that they never contain non-js ops when + // we're done. + while (neededIntrinsics.size() > 0) { + // Recursively probe all needed intrinsics for transitively used + // functions. This is building up a set of functions we'll link into our + // module. + for (auto &name : neededIntrinsics) { + addNeededFunctions(intrinsicsModule, name, neededFunctions); + } + neededIntrinsics.clear(); + + // Link in everything that wasn't already linked in. After we've done the + // copy we then walk the function to rewrite any non-js operations it has + // as well. + for (auto &name : neededFunctions) { + doWalkFunction(ModuleUtils::copyFunction(intrinsicsModule, *module, name)); + } + neededFunctions.clear(); } - - Expression *body = builder->blockify( - builder->makeSetLocal( - ceil, - builder->makeUnary(ceilOp, builder->makeGetLocal(arg, f)) - ), - builder->makeSetLocal( - floor, - builder->makeUnary(floorOp, builder->makeGetLocal(arg, f)) - ), - builder->makeSetLocal( - fract, - builder->makeBinary( - subOp, - builder->makeGetLocal(arg, f), - builder->makeGetLocal(floor, f) - ) - ), - builder->makeIf( - builder->makeBinary( - ltOp, - builder->makeGetLocal(fract, f), - builder->makeConst(litHalf) - ), - builder->makeGetLocal(floor, f), - builder->makeIf( - builder->makeBinary( - gtOp, - builder->makeGetLocal(fract, f), - builder->makeConst(litHalf) - ), - builder->makeGetLocal(ceil, f), - builder->blockify( - builder->makeSetLocal( - rem, - builder->makeBinary( - divOp, - builder->makeGetLocal(floor, f), - builder->makeConst(litTwo) - ) - ), - builder->makeIf( - builder->makeBinary( - eqOp, - builder->makeBinary( - subOp, - builder->makeGetLocal(rem, f), - builder->makeUnary( - floorOp, - builder->makeGetLocal(rem, f) - ) - ), - builder->makeConst(litZero) - ), - builder->makeGetLocal(floor, f), - builder->makeGetLocal(ceil, f) - ) - ) - ) - ) - ); - std::vector<Type> params = {f}; - std::vector<Type> vars = {f, f, f, f, f}; - Name name = f == f32 ? WASM_NEAREST_F32 : WASM_NEAREST_F64; - return builder->makeFunction(name, std::move(params), f, std::move(vars), body); } - Function *createTrunc(Type f) { - // fn trunc(f: float) -> float { - // if f < 0.0 { - // ceil(f) - // } else { - // floor(f) - // } - // } - - Index arg = 0; - - UnaryOp ceilOp = CeilFloat32; - UnaryOp floorOp = FloorFloat32; - BinaryOp ltOp = LtFloat32; - Literal litZero((float) 0.0); - if (f == f64) { - ceilOp = CeilFloat64; - floorOp = FloorFloat64; - ltOp = LtFloat64; - litZero = Literal((double) 0.0); + void addNeededFunctions(Module &m, Name name, std::set<Name> &needed) { + if (needed.count(name)) { + return; } + needed.insert(name); - Expression *body = builder->makeIf( - builder->makeBinary( - ltOp, - builder->makeGetLocal(arg, f), - builder->makeConst(litZero) - ), - builder->makeUnary(ceilOp, builder->makeGetLocal(arg, f)), - builder->makeUnary(floorOp, builder->makeGetLocal(arg, f)) - ); - std::vector<Type> params = {f}; - std::vector<Type> vars = {}; - Name name = f == f32 ? WASM_TRUNC_F32 : WASM_TRUNC_F64; - return builder->makeFunction(name, std::move(params), f, std::move(vars), body); - } - - Function* createCtz() { - // if eqz(x) then 32 else (32 - clz(x ^ (x - 1))) - Binary* xorExp = builder->makeBinary( - XorInt32, - builder->makeGetLocal(0, i32), - builder->makeBinary( - SubInt32, - builder->makeGetLocal(0, i32), - builder->makeConst(Literal(int32_t(1))) - ) - ); - Binary* subExp = builder->makeBinary( - SubInt32, - builder->makeConst(Literal(int32_t(32 - 1))), - builder->makeUnary(ClzInt32, xorExp) - ); - If* body = builder->makeIf( - builder->makeUnary( - EqZInt32, - builder->makeGetLocal(0, i32) - ), - builder->makeConst(Literal(int32_t(32))), - subExp - ); - return builder->makeFunction( - WASM_CTZ32, - std::vector<NameType>{NameType("x", i32)}, - i32, - std::vector<NameType>{}, - body - ); - } - - Function* createPopcnt() { - // popcnt implemented as: - // int c; for (c = 0; x != 0; c++) { x = x & (x - 1) }; return c - Name loopName("l"); - Name blockName("b"); - Break* brIf = builder->makeBreak( - blockName, - builder->makeGetLocal(1, i32), - builder->makeUnary( - EqZInt32, - builder->makeGetLocal(0, i32) - ) - ); - SetLocal* update = builder->makeSetLocal( - 0, - builder->makeBinary( - AndInt32, - builder->makeGetLocal(0, i32), - builder->makeBinary( - SubInt32, - builder->makeGetLocal(0, i32), - builder->makeConst(Literal(int32_t(1))) - ) - ) - ); - SetLocal* inc = builder->makeSetLocal( - 1, - builder->makeBinary( - AddInt32, - builder->makeGetLocal(1, i32), - builder->makeConst(Literal(1)) - ) - ); - Break* cont = builder->makeBreak(loopName); - Loop* loop = builder->makeLoop( - loopName, - builder->blockify(builder->makeDrop(brIf), update, inc, cont) - ); - Block* loopBlock = builder->blockifyWithName(loop, blockName); - // TODO: not sure why this is necessary... - loopBlock->type = i32; - SetLocal* initCount = builder->makeSetLocal(1, builder->makeConst(Literal(0))); - return builder->makeFunction( - WASM_POPCNT32, - std::vector<NameType>{NameType("x", i32)}, - i32, - std::vector<NameType>{NameType("count", i32)}, - builder->blockify(initCount, loopBlock) - ); - } - - Function* createRot(BinaryOp op) { - // left rotate is: - // (((((~0) >>> k) & x) << k) | ((((~0) << (w - k)) & x) >>> (w - k))) - // where k is shift modulo w. reverse shifts for right rotate - bool isLRot = op == RotLInt32; - BinaryOp lshift = isLRot ? ShlInt32 : ShrUInt32; - BinaryOp rshift = isLRot ? ShrUInt32 : ShlInt32; - Literal widthMask(int32_t(32 - 1)); - Literal width(int32_t(32)); - auto shiftVal = [&]() { - return builder->makeBinary( - AndInt32, - builder->makeGetLocal(1, i32), - builder->makeConst(widthMask) - ); - }; - auto widthSub = [&]() { - return builder->makeBinary(SubInt32, builder->makeConst(width), shiftVal()); - }; - auto fullMask = [&]() { - return builder->makeConst(Literal(~int32_t(0))); - }; - Binary* maskRShift = builder->makeBinary(rshift, fullMask(), shiftVal()); - Binary* lowMask = builder->makeBinary(AndInt32, maskRShift, builder->makeGetLocal(0, i32)); - Binary* lowShift = builder->makeBinary(lshift, lowMask, shiftVal()); - Binary* maskLShift = builder->makeBinary(lshift, fullMask(), widthSub()); - Binary* highMask = - builder->makeBinary(AndInt32, maskLShift, builder->makeGetLocal(0, i32)); - Binary* highShift = builder->makeBinary(rshift, highMask, widthSub()); - Binary* body = builder->makeBinary(OrInt32, lowShift, highShift); - return builder->makeFunction( - isLRot ? WASM_ROTL32 : WASM_ROTR32, - std::vector<NameType>{NameType("x", i32), - NameType("k", i32)}, - i32, - std::vector<NameType>{}, - body - ); + auto function = m.getFunction(name); + FindAll<Call> calls(function->body); + for (auto &call : calls.list) { + this->addNeededFunctions(m, call->target, needed); + } } void doWalkFunction(Function* func) { @@ -366,16 +122,36 @@ struct RemoveNonJSOpsPass : public WalkerPass<PostWalker<RemoveNonJSOpsPass>> { return; case RotLInt32: - needRotLInt32 = true; name = WASM_ROTL32; break; case RotRInt32: - needRotRInt32 = true; name = WASM_ROTR32; break; + case RotLInt64: + name = WASM_ROTL64; + break; + case RotRInt64: + name = WASM_ROTR64; + break; + case MulInt64: + name = WASM_I64_MUL; + break; + case DivSInt64: + name = WASM_I64_SDIV; + break; + case DivUInt64: + name = WASM_I64_UDIV; + break; + case RemSInt64: + name = WASM_I64_SREM; + break; + case RemUInt64: + name = WASM_I64_UREM; + break; default: return; } + neededIntrinsics.insert(name); replaceCurrent(builder->makeCall(name, {curr->left, curr->right}, curr->type)); } @@ -435,40 +211,38 @@ struct RemoveNonJSOpsPass : public WalkerPass<PostWalker<RemoveNonJSOpsPass>> { Name functionCall; switch (curr->op) { case NearestFloat32: - needNearestF32 = true; functionCall = WASM_NEAREST_F32; break; case NearestFloat64: - needNearestF64 = true; functionCall = WASM_NEAREST_F64; break; case TruncFloat32: - needTruncF32 = true; functionCall = WASM_TRUNC_F32; break; case TruncFloat64: - needTruncF64 = true; functionCall = WASM_TRUNC_F64; break; + case PopcntInt64: + functionCall = WASM_POPCNT64; + break; case PopcntInt32: - needPopcntInt32 = true; functionCall = WASM_POPCNT32; break; + case CtzInt64: + functionCall = WASM_CTZ64; + break; case CtzInt32: - needCtzInt32 = true; functionCall = WASM_CTZ32; break; default: return; } + neededIntrinsics.insert(functionCall); replaceCurrent(builder->makeCall(functionCall, {curr->value}, curr->type)); } - -private: - std::unique_ptr<Builder> builder; }; Pass *createRemoveNonJSOpsPass() { diff --git a/src/passes/intrinsics-module.h b/src/passes/intrinsics-module.h new file mode 100644 index 000000000..c9a757dc0 --- /dev/null +++ b/src/passes/intrinsics-module.h @@ -0,0 +1,27 @@ +/* + * Copyright 2018 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef passes_intrinsics_module_h +#define passes_intrinsics_module_h + +namespace wasm { + +extern const char* IntrinsicsModuleWast; + +} // namespace wasm + +#endif // passes_intrinsics_module_h + diff --git a/src/passes/wasm-intrinsics.wast b/src/passes/wasm-intrinsics.wast new file mode 100644 index 000000000..8cd14d51d --- /dev/null +++ b/src/passes/wasm-intrinsics.wast @@ -0,0 +1,1113 @@ +;; A large WAST blob which contains the implementations of all the intrinsics +;; that we inject as part of this module. This blob was generated from a Rust +;; program [1] which uses the Rust compiler-builtins project. It's not +;; necessarily perfect but gets the job done! The idea here is that we inject +;; these pretty early so they can continue to be optimized by further passes +;; (aka inlining and whatnot) +;; +;; [1]: https://gist.github.com/alexcrichton/e7ea67bcdd17ce4b6254e66f77165690 + +(module + (type $0 (func (param i64 i64) (result i64))) + (type $1 (func (param f32) (result f32))) + (type $2 (func (param f64) (result f64))) + (type $3 (func (param i32) (result i32))) + (type $4 (func (param i32 i32) (result i32))) + (import "env" "memory" (memory $0 17)) + (export "__wasm_i64_sdiv" (func $__wasm_i64_sdiv)) + (export "__wasm_i64_udiv" (func $__wasm_i64_udiv)) + (export "__wasm_i64_srem" (func $__wasm_i64_srem)) + (export "__wasm_i64_urem" (func $__wasm_i64_urem)) + (export "__wasm_i64_mul" (func $__wasm_i64_mul)) + (export "__wasm_trunc_f32" (func $__wasm_trunc_f32)) + (export "__wasm_trunc_f64" (func $__wasm_trunc_f64)) + (export "__wasm_ctz_i32" (func $__wasm_ctz_i32)) + (export "__wasm_ctz_i64" (func $__wasm_ctz_i64)) + (export "__wasm_rotl_i32" (func $__wasm_rotl_i32)) + (export "__wasm_rotr_i32" (func $__wasm_rotr_i32)) + (export "__wasm_rotl_i64" (func $__wasm_rotl_i64)) + (export "__wasm_rotr_i64" (func $__wasm_rotr_i64)) + (export "__wasm_nearest_f32" (func $__wasm_nearest_f32)) + (export "__wasm_nearest_f64" (func $__wasm_nearest_f64)) + (export "__wasm_popcnt_i32" (func $__wasm_popcnt_i32)) + (export "__wasm_popcnt_i64" (func $__wasm_popcnt_i64)) + + ;; lowering of the i32.popcnt instruction, counts the number of bits set in the + ;; input and returns the result + (func $__wasm_popcnt_i32 (param $var$0 i32) (result i32) + (local $var$1 i32) + (block $label$1 (result i32) + (loop $label$2 + (drop + (br_if $label$1 + (get_local $var$1) + (i32.eqz + (get_local $var$0) + ) + ) + ) + (set_local $var$0 + (i32.and + (get_local $var$0) + (i32.sub + (get_local $var$0) + (i32.const 1) + ) + ) + ) + (set_local $var$1 + (i32.add + (get_local $var$1) + (i32.const 1) + ) + ) + (br $label$2) + ) + ) + ) + ;; lowering of the i64.popcnt instruction, counts the number of bits set in the + ;; input and returns the result + (func $__wasm_popcnt_i64 (param $var$0 i64) (result i64) + (local $var$1 i64) + (block $label$1 (result i64) + (loop $label$2 + (drop + (br_if $label$1 + (get_local $var$1) + (i64.eqz + (get_local $var$0) + ) + ) + ) + (set_local $var$0 + (i64.and + (get_local $var$0) + (i64.sub + (get_local $var$0) + (i64.const 1) + ) + ) + ) + (set_local $var$1 + (i64.add + (get_local $var$1) + (i64.const 1) + ) + ) + (br $label$2) + ) + ) + ) + ;; lowering of the i64.div_s instruction, return $var0 / $var$1 + (func $__wasm_i64_sdiv (; 0 ;) (type $0) (param $var$0 i64) (param $var$1 i64) (result i64) + (call $_ZN17compiler_builtins3int4sdiv3Div3div17he78fc483e41d7ec7E + (get_local $var$0) + (get_local $var$1) + ) + ) + ;; lowering of the i64.div_u instruction, return $var0 / $var$1 + (func $__wasm_i64_udiv (; 1 ;) (type $0) (param $var$0 i64) (param $var$1 i64) (result i64) + (call $_ZN17compiler_builtins3int4udiv10divmod_u6417h6026910b5ed08e40E + (get_local $var$0) + (get_local $var$1) + ) + ) + ;; lowering of the i64.rem_s instruction, return $var0 % $var$1 + (func $__wasm_i64_srem (; 2 ;) (type $0) (param $var$0 i64) (param $var$1 i64) (result i64) + (call $_ZN17compiler_builtins3int4sdiv3Mod4mod_17h2cbb7bbf36e41d68E + (get_local $var$0) + (get_local $var$1) + ) + ) + ;; lowering of the i64.rem_u instruction, return $var0 % $var$1 + (func $__wasm_i64_urem (; 3 ;) (type $0) (param $var$0 i64) (param $var$1 i64) (result i64) + (drop + (call $_ZN17compiler_builtins3int4udiv10divmod_u6417h6026910b5ed08e40E + (get_local $var$0) + (get_local $var$1) + ) + ) + (i64.load + (i32.const 1024) + ) + ) + ;; lowering of the i64.mul instruction, return $var0 * $var$1 + (func $__wasm_i64_mul (; 4 ;) (type $0) (param $var$0 i64) (param $var$1 i64) (result i64) + (call $_ZN17compiler_builtins3int3mul3Mul3mul17h070e9a1c69faec5bE + (get_local $var$0) + (get_local $var$1) + ) + ) + ;; lowering of the f32.trunc instruction, rounds to the nearest integer, + ;; towards zero + (func $__wasm_trunc_f32 (; 5 ;) (type $1) (param $var$0 f32) (result f32) + (select + (f32.ceil + (get_local $var$0) + ) + (f32.floor + (get_local $var$0) + ) + (f32.lt + (get_local $var$0) + (f32.const 0) + ) + ) + ) + ;; lowering of the f64.trunc instruction, rounds to the nearest integer, + ;; towards zero + (func $__wasm_trunc_f64 (; 6 ;) (type $2) (param $var$0 f64) (result f64) + (select + (f64.ceil + (get_local $var$0) + ) + (f64.floor + (get_local $var$0) + ) + (f64.lt + (get_local $var$0) + (f64.const 0) + ) + ) + ) + ;; lowering of the i32.ctz instruction, counting the number of zeros in $var$0 + (func $__wasm_ctz_i32 (; 7 ;) (type $3) (param $var$0 i32) (result i32) + (if + (get_local $var$0) + (return + (i32.sub + (i32.const 31) + (i32.clz + (i32.xor + (i32.add + (get_local $var$0) + (i32.const -1) + ) + (get_local $var$0) + ) + ) + ) + ) + ) + (i32.const 32) + ) + ;; lowering of the i64.ctz instruction, counting the number of zeros in $var$0 + (func $__wasm_ctz_i64 (; 8 ;) (type $4) (param $var$0 i64) (result i64) + (if + (i32.eqz + (i64.eqz + (get_local $var$0) + ) + ) + (return + (i64.sub + (i64.const 63) + (i64.clz + (i64.xor + (i64.add + (get_local $var$0) + (i64.const -1) + ) + (get_local $var$0) + ) + ) + ) + ) + ) + (i64.const 64) + ) + ;; lowering of the i32.rotl instruction, rotating the first argument, with + ;; wraparound, by the second argument + (func $__wasm_rotl_i32 (; 8 ;) (type $4) (param $var$0 i32) (param $var$1 i32) (result i32) + (local $var$2 i32) + (i32.or + (i32.shl + (i32.and + (i32.shr_u + (i32.const -1) + (tee_local $var$2 + (i32.and + (get_local $var$1) + (i32.const 31) + ) + ) + ) + (get_local $var$0) + ) + (get_local $var$2) + ) + (i32.shr_u + (i32.and + (i32.shl + (i32.const -1) + (tee_local $var$1 + (i32.and + (i32.sub + (i32.const 0) + (get_local $var$1) + ) + (i32.const 31) + ) + ) + ) + (get_local $var$0) + ) + (get_local $var$1) + ) + ) + ) + ;; lowering of the i32.rotr instruction, rotating the first argument, with + ;; wraparound, by the second argument + (func $__wasm_rotr_i32 (; 9 ;) (type $4) (param $var$0 i32) (param $var$1 i32) (result i32) + (local $var$2 i32) + (i32.or + (i32.shr_u + (i32.and + (i32.shl + (i32.const -1) + (tee_local $var$2 + (i32.and + (get_local $var$1) + (i32.const 31) + ) + ) + ) + (get_local $var$0) + ) + (get_local $var$2) + ) + (i32.shl + (i32.and + (i32.shr_u + (i32.const -1) + (tee_local $var$1 + (i32.and + (i32.sub + (i32.const 0) + (get_local $var$1) + ) + (i32.const 31) + ) + ) + ) + (get_local $var$0) + ) + (get_local $var$1) + ) + ) + ) + ;; lowering of the i64.rotl instruction, rotating the first argument, with + ;; wraparound, by the second argument + (func $__wasm_rotl_i64 (; 10 ;) (type $0) (param $var$0 i64) (param $var$1 i64) (result i64) + (local $var$2 i64) + (i64.or + (i64.shl + (i64.and + (i64.shr_u + (i64.const -1) + (tee_local $var$2 + (i64.and + (get_local $var$1) + (i64.const 63) + ) + ) + ) + (get_local $var$0) + ) + (get_local $var$2) + ) + (i64.shr_u + (i64.and + (i64.shl + (i64.const -1) + (tee_local $var$1 + (i64.and + (i64.sub + (i64.const 0) + (get_local $var$1) + ) + (i64.const 63) + ) + ) + ) + (get_local $var$0) + ) + (get_local $var$1) + ) + ) + ) + ;; lowering of the i64.rotr instruction, rotating the first argument, with + ;; wraparound, by the second argument + (func $__wasm_rotr_i64 (; 11 ;) (type $0) (param $var$0 i64) (param $var$1 i64) (result i64) + (local $var$2 i64) + (i64.or + (i64.shr_u + (i64.and + (i64.shl + (i64.const -1) + (tee_local $var$2 + (i64.and + (get_local $var$1) + (i64.const 63) + ) + ) + ) + (get_local $var$0) + ) + (get_local $var$2) + ) + (i64.shl + (i64.and + (i64.shr_u + (i64.const -1) + (tee_local $var$1 + (i64.and + (i64.sub + (i64.const 0) + (get_local $var$1) + ) + (i64.const 63) + ) + ) + ) + (get_local $var$0) + ) + (get_local $var$1) + ) + ) + ) + ;; lowering of the f32.nearest instruction, rounding the input to the nearest + ;; integer while breaking ties by rounding to even + (func $__wasm_nearest_f32 (; 12 ;) (type $1) (param $var$0 f32) (result f32) + (local $var$1 f32) + (local $var$2 f32) + (if + (i32.eqz + (f32.lt + (tee_local $var$2 + (f32.sub + (get_local $var$0) + (tee_local $var$1 + (f32.floor + (get_local $var$0) + ) + ) + ) + ) + (f32.const 0.5) + ) + ) + (block + (set_local $var$0 + (f32.ceil + (get_local $var$0) + ) + ) + (if + (f32.gt + (get_local $var$2) + (f32.const 0.5) + ) + (return + (get_local $var$0) + ) + ) + (set_local $var$1 + (select + (get_local $var$1) + (get_local $var$0) + (f32.eq + (f32.sub + (tee_local $var$2 + (f32.mul + (get_local $var$1) + (f32.const 0.5) + ) + ) + (f32.floor + (get_local $var$2) + ) + ) + (f32.const 0) + ) + ) + ) + ) + ) + (get_local $var$1) + ) + ;; lowering of the f64.nearest instruction, rounding the input to the nearest + ;; integer while breaking ties by rounding to even + (func $__wasm_nearest_f64 (; 13 ;) (type $2) (param $var$0 f64) (result f64) + (local $var$1 f64) + (local $var$2 f64) + (if + (i32.eqz + (f64.lt + (tee_local $var$2 + (f64.sub + (get_local $var$0) + (tee_local $var$1 + (f64.floor + (get_local $var$0) + ) + ) + ) + ) + (f64.const 0.5) + ) + ) + (block + (set_local $var$0 + (f64.ceil + (get_local $var$0) + ) + ) + (if + (f64.gt + (get_local $var$2) + (f64.const 0.5) + ) + (return + (get_local $var$0) + ) + ) + (set_local $var$1 + (select + (get_local $var$1) + (get_local $var$0) + (f64.eq + (f64.sub + (tee_local $var$2 + (f64.mul + (get_local $var$1) + (f64.const 0.5) + ) + ) + (f64.floor + (get_local $var$2) + ) + ) + (f64.const 0) + ) + ) + ) + ) + ) + (get_local $var$1) + ) + (func $_ZN17compiler_builtins3int4udiv10divmod_u6417h6026910b5ed08e40E (; 14 ;) (type $0) (param $var$0 i64) (param $var$1 i64) (result i64) + (local $var$2 i32) + (local $var$3 i32) + (local $var$4 i32) + (local $var$5 i64) + (local $var$6 i64) + (local $var$7 i64) + (local $var$8 i64) + (block $label$1 + (block $label$2 + (block $label$3 + (block $label$4 + (block $label$5 + (block $label$6 + (block $label$7 + (block $label$8 + (block $label$9 + (block $label$10 + (block $label$11 + (if + (tee_local $var$2 + (i32.wrap/i64 + (i64.shr_u + (get_local $var$0) + (i64.const 32) + ) + ) + ) + (block + (br_if $label$11 + (i32.eqz + (tee_local $var$3 + (i32.wrap/i64 + (get_local $var$1) + ) + ) + ) + ) + (br_if $label$9 + (i32.eqz + (tee_local $var$4 + (i32.wrap/i64 + (i64.shr_u + (get_local $var$1) + (i64.const 32) + ) + ) + ) + ) + ) + (br_if $label$8 + (i32.le_u + (tee_local $var$2 + (i32.sub + (i32.clz + (get_local $var$4) + ) + (i32.clz + (get_local $var$2) + ) + ) + ) + (i32.const 31) + ) + ) + (br $label$2) + ) + ) + (br_if $label$2 + (i64.ge_u + (get_local $var$1) + (i64.const 4294967296) + ) + ) + (i64.store + (i32.const 1024) + (i64.extend_u/i32 + (i32.sub + (tee_local $var$2 + (i32.wrap/i64 + (get_local $var$0) + ) + ) + (i32.mul + (tee_local $var$2 + (i32.div_u + (get_local $var$2) + (tee_local $var$3 + (i32.wrap/i64 + (get_local $var$1) + ) + ) + ) + ) + (get_local $var$3) + ) + ) + ) + ) + (return + (i64.extend_u/i32 + (get_local $var$2) + ) + ) + ) + (set_local $var$3 + (i32.wrap/i64 + (i64.shr_u + (get_local $var$1) + (i64.const 32) + ) + ) + ) + (br_if $label$7 + (i32.eqz + (i32.wrap/i64 + (get_local $var$0) + ) + ) + ) + (br_if $label$6 + (i32.eqz + (get_local $var$3) + ) + ) + (br_if $label$6 + (i32.and + (tee_local $var$4 + (i32.add + (get_local $var$3) + (i32.const -1) + ) + ) + (get_local $var$3) + ) + ) + (i64.store + (i32.const 1024) + (i64.or + (i64.shl + (i64.extend_u/i32 + (i32.and + (get_local $var$4) + (get_local $var$2) + ) + ) + (i64.const 32) + ) + (i64.and + (get_local $var$0) + (i64.const 4294967295) + ) + ) + ) + (return + (i64.extend_u/i32 + (i32.shr_u + (get_local $var$2) + (i32.and + (i32.ctz + (get_local $var$3) + ) + (i32.const 31) + ) + ) + ) + ) + ) + (unreachable) + ) + (br_if $label$5 + (i32.eqz + (i32.and + (tee_local $var$4 + (i32.add + (get_local $var$3) + (i32.const -1) + ) + ) + (get_local $var$3) + ) + ) + ) + (set_local $var$3 + (i32.sub + (i32.const 0) + (tee_local $var$2 + (i32.sub + (i32.add + (i32.clz + (get_local $var$3) + ) + (i32.const 33) + ) + (i32.clz + (get_local $var$2) + ) + ) + ) + ) + ) + (br $label$3) + ) + (set_local $var$3 + (i32.sub + (i32.const 63) + (get_local $var$2) + ) + ) + (set_local $var$2 + (i32.add + (get_local $var$2) + (i32.const 1) + ) + ) + (br $label$3) + ) + (i64.store + (i32.const 1024) + (i64.shl + (i64.extend_u/i32 + (i32.sub + (get_local $var$2) + (i32.mul + (tee_local $var$4 + (i32.div_u + (get_local $var$2) + (get_local $var$3) + ) + ) + (get_local $var$3) + ) + ) + ) + (i64.const 32) + ) + ) + (return + (i64.extend_u/i32 + (get_local $var$4) + ) + ) + ) + (br_if $label$4 + (i32.lt_u + (tee_local $var$2 + (i32.sub + (i32.clz + (get_local $var$3) + ) + (i32.clz + (get_local $var$2) + ) + ) + ) + (i32.const 31) + ) + ) + (br $label$2) + ) + (i64.store + (i32.const 1024) + (i64.extend_u/i32 + (i32.and + (get_local $var$4) + (i32.wrap/i64 + (get_local $var$0) + ) + ) + ) + ) + (br_if $label$1 + (i32.eq + (get_local $var$3) + (i32.const 1) + ) + ) + (return + (i64.shr_u + (get_local $var$0) + (i64.extend_u/i32 + (i32.ctz + (get_local $var$3) + ) + ) + ) + ) + ) + (set_local $var$3 + (i32.sub + (i32.const 63) + (get_local $var$2) + ) + ) + (set_local $var$2 + (i32.add + (get_local $var$2) + (i32.const 1) + ) + ) + ) + (set_local $var$5 + (i64.shr_u + (get_local $var$0) + (i64.extend_u/i32 + (i32.and + (get_local $var$2) + (i32.const 63) + ) + ) + ) + ) + (set_local $var$0 + (i64.shl + (get_local $var$0) + (i64.extend_u/i32 + (i32.and + (get_local $var$3) + (i32.const 63) + ) + ) + ) + ) + (block $label$13 + (if + (get_local $var$2) + (block + (set_local $var$8 + (i64.add + (get_local $var$1) + (i64.const -1) + ) + ) + (loop $label$15 + (set_local $var$5 + (i64.sub + (tee_local $var$5 + (i64.or + (i64.shl + (get_local $var$5) + (i64.const 1) + ) + (i64.shr_u + (get_local $var$0) + (i64.const 63) + ) + ) + ) + (i64.and + (tee_local $var$6 + (i64.shr_s + (i64.sub + (get_local $var$8) + (get_local $var$5) + ) + (i64.const 63) + ) + ) + (get_local $var$1) + ) + ) + ) + (set_local $var$0 + (i64.or + (i64.shl + (get_local $var$0) + (i64.const 1) + ) + (get_local $var$7) + ) + ) + (set_local $var$7 + (tee_local $var$6 + (i64.and + (get_local $var$6) + (i64.const 1) + ) + ) + ) + (br_if $label$15 + (tee_local $var$2 + (i32.add + (get_local $var$2) + (i32.const -1) + ) + ) + ) + ) + (br $label$13) + ) + ) + ) + (i64.store + (i32.const 1024) + (get_local $var$5) + ) + (return + (i64.or + (i64.shl + (get_local $var$0) + (i64.const 1) + ) + (get_local $var$6) + ) + ) + ) + (i64.store + (i32.const 1024) + (get_local $var$0) + ) + (set_local $var$0 + (i64.const 0) + ) + ) + (get_local $var$0) + ) + (func $_ZN17compiler_builtins3int3mul3Mul3mul17h070e9a1c69faec5bE (; 15 ;) (type $0) (param $var$0 i64) (param $var$1 i64) (result i64) + (local $var$2 i32) + (local $var$3 i32) + (local $var$4 i32) + (local $var$5 i32) + (local $var$6 i32) + (i64.or + (i64.shl + (i64.extend_u/i32 + (i32.add + (i32.add + (i32.add + (i32.add + (i32.mul + (tee_local $var$4 + (i32.shr_u + (tee_local $var$2 + (i32.wrap/i64 + (get_local $var$1) + ) + ) + (i32.const 16) + ) + ) + (tee_local $var$5 + (i32.shr_u + (tee_local $var$3 + (i32.wrap/i64 + (get_local $var$0) + ) + ) + (i32.const 16) + ) + ) + ) + (i32.mul + (get_local $var$2) + (i32.wrap/i64 + (i64.shr_u + (get_local $var$0) + (i64.const 32) + ) + ) + ) + ) + (i32.mul + (i32.wrap/i64 + (i64.shr_u + (get_local $var$1) + (i64.const 32) + ) + ) + (get_local $var$3) + ) + ) + (i32.shr_u + (tee_local $var$2 + (i32.add + (i32.shr_u + (tee_local $var$6 + (i32.mul + (tee_local $var$2 + (i32.and + (get_local $var$2) + (i32.const 65535) + ) + ) + (tee_local $var$3 + (i32.and + (get_local $var$3) + (i32.const 65535) + ) + ) + ) + ) + (i32.const 16) + ) + (i32.mul + (get_local $var$2) + (get_local $var$5) + ) + ) + ) + (i32.const 16) + ) + ) + (i32.shr_u + (tee_local $var$2 + (i32.add + (i32.and + (get_local $var$2) + (i32.const 65535) + ) + (i32.mul + (get_local $var$4) + (get_local $var$3) + ) + ) + ) + (i32.const 16) + ) + ) + ) + (i64.const 32) + ) + (i64.extend_u/i32 + (i32.or + (i32.shl + (get_local $var$2) + (i32.const 16) + ) + (i32.and + (get_local $var$6) + (i32.const 65535) + ) + ) + ) + ) + ) + (func $_ZN17compiler_builtins3int4sdiv3Div3div17he78fc483e41d7ec7E (; 16 ;) (type $0) (param $var$0 i64) (param $var$1 i64) (result i64) + (local $var$2 i64) + (i64.sub + (i64.xor + (i64.div_u + (i64.sub + (i64.xor + (tee_local $var$2 + (i64.shr_s + (get_local $var$0) + (i64.const 63) + ) + ) + (get_local $var$0) + ) + (get_local $var$2) + ) + (i64.sub + (i64.xor + (tee_local $var$2 + (i64.shr_s + (get_local $var$1) + (i64.const 63) + ) + ) + (get_local $var$1) + ) + (get_local $var$2) + ) + ) + (tee_local $var$0 + (i64.shr_s + (i64.xor + (get_local $var$1) + (get_local $var$0) + ) + (i64.const 63) + ) + ) + ) + (get_local $var$0) + ) + ) + (func $_ZN17compiler_builtins3int4sdiv3Mod4mod_17h2cbb7bbf36e41d68E (; 17 ;) (type $0) (param $var$0 i64) (param $var$1 i64) (result i64) + (local $var$2 i64) + (i64.sub + (i64.xor + (i64.rem_u + (i64.sub + (i64.xor + (tee_local $var$2 + (i64.shr_s + (get_local $var$0) + (i64.const 63) + ) + ) + (get_local $var$0) + ) + (get_local $var$2) + ) + (i64.sub + (i64.xor + (tee_local $var$0 + (i64.shr_s + (get_local $var$1) + (i64.const 63) + ) + ) + (get_local $var$1) + ) + (get_local $var$0) + ) + ) + (get_local $var$2) + ) + (get_local $var$2) + ) + ) + ;; custom section "linking", size 3 +) diff --git a/src/wasm2asm.h b/src/wasm2asm.h index 465262d5b..956ded5e8 100644 --- a/src/wasm2asm.h +++ b/src/wasm2asm.h @@ -29,6 +29,8 @@ #include "asmjs/asmangle.h" #include "wasm.h" #include "wasm-builder.h" +#include "wasm-io.h" +#include "wasm-validator.h" #include "emscripten-optimizer/optimizer.h" #include "mixed_arena.h" #include "asm_v_wasm.h" @@ -228,7 +230,11 @@ private: Ref Wasm2AsmBuilder::processWasm(Module* wasm) { PassRunner runner(wasm); runner.add<AutoDrop>(); - runner.add("remove-non-js-ops"); // must be before i64-to-i32 + // First up remove as many non-JS operations we can, including things like + // 64-bit integer multiplication/division, `f32.nearest` instructions, etc. + // This may inject intrinsics which use i64 so it needs to be run before the + // i64-to-i32 lowering pass. + runner.add("remove-non-js-ops"); // Currently the i64-to-32 lowering pass requires that `flatten` be run before // it produce correct code. For some more details about this see #1480 runner.add("flatten"); @@ -239,6 +245,16 @@ Ref Wasm2AsmBuilder::processWasm(Module* wasm) { runner.add("vacuum"); runner.setDebug(flags.debug); runner.run(); + + // Make sure we didn't corrupt anything if we're in --allow-asserts mode (aka + // tests) +#ifndef NDEBUG + if (!WasmValidator().validate(*wasm)) { + WasmPrinter::printModule(wasm); + Fatal() << "error in validating input"; + } +#endif + Ref ret = ValueBuilder::makeToplevel(); Ref asmFunc = ValueBuilder::makeFunction(ASM_FUNC); ret[1]->push_back(asmFunc); @@ -486,6 +502,17 @@ void Wasm2AsmBuilder::addGlobal(Ref ast, Global* global) { } } +static bool expressionEndsInReturn(Expression *e) { + if (e->is<Return>()) { + return true; + } + if (!e->is<Block>()) { + return false; + } + ExpressionList* stats = &static_cast<Block*>(e)->list; + return expressionEndsInReturn((*stats)[stats->size()-1]); +} + Ref Wasm2AsmBuilder::processFunction(Function* func) { if (flags.debug) { static int fns = 0; @@ -530,12 +557,7 @@ Ref Wasm2AsmBuilder::processFunction(Function* func) { ); }; scanFunctionBody(func->body); - bool isBodyBlock = func->body->is<Block>(); - ExpressionList* stats = isBodyBlock ? - &static_cast<Block*>(func->body)->list : nullptr; - bool endsInReturn = - (isBodyBlock && ((*stats)[stats->size()-1]->is<Return>())) || - func->body->is<Return>(); + bool endsInReturn = expressionEndsInReturn(func->body); if (endsInReturn) { // return already taken care of flattenAppend(ret, processFunctionBody(func, NO_RESULT)); @@ -1244,21 +1266,10 @@ Ref Wasm2AsmBuilder::processFunctionBody(Function* func, IString result) { visit(curr->value, EXPRESSION_RESULT) ); case CtzInt32: - return makeSigning( - ValueBuilder::makeCall( - WASM_CTZ32, - visit(curr->value, EXPRESSION_RESULT) - ), - ASM_SIGNED - ); case PopcntInt32: - return makeSigning( - ValueBuilder::makeCall( - WASM_POPCNT32, - visit(curr->value, EXPRESSION_RESULT) - ), - ASM_SIGNED - ); + std::cerr << "i32 unary should have been removed: " << curr + << std::endl; + WASM_UNREACHABLE(); case EqZInt32: return ValueBuilder::makeBinary( makeAsmCoercion(visit(curr->value, |