diff options
author | Alex Crichton <alex@alexcrichton.com> | 2018-05-25 10:54:05 -0500 |
---|---|---|
committer | Alon Zakai <alonzakai@gmail.com> | 2018-05-25 08:54:05 -0700 |
commit | becfa3ee4214bb4e6fbe1dbdbf6c3756c548d82b (patch) | |
tree | 1108b2519d6ac1f97ae12b0d32792aa15a4e9237 /src | |
parent | 6e56ef195d01936c0c7f7a6d1c0f5d1c7e2c2e52 (diff) | |
download | binaryen-becfa3ee4214bb4e6fbe1dbdbf6c3756c548d82b.tar.gz binaryen-becfa3ee4214bb4e6fbe1dbdbf6c3756c548d82b.tar.bz2 binaryen-becfa3ee4214bb4e6fbe1dbdbf6c3756c548d82b.zip |
wasm2asm: Finish i64 lowering operations (#1563)
* wasm2asm: Finish i64 lowering operations
This commit finishes out lowering i64 operations to JS with implementations of
division and remainder for JS. The primary change here is to have these compiled
from Rust to wasm and then have them "linked in" via intrinsics. The
`RemoveNonJSOps` pass has been updated to include some of what
`I64ToI32Lowering` was previously doing, basically replacing some instructions
with calls to intrinsics. The intrinsics are now all tracked in one location.
Hopefully the intrinsics don't need to be regenerated too much, but for
posterity the source currently [lives in a gist][gist], although I suspect that
gist won't continue to compile and work as-is for all of time.
[gist]: https://gist.github.com/alexcrichton/e7ea67bcdd17ce4b6254e66f77165690
Diffstat (limited to 'src')
-rw-r--r-- | src/asmjs/shared-constants.cpp | 7 | ||||
-rw-r--r-- | src/asmjs/shared-constants.h | 7 | ||||
-rw-r--r-- | src/ir/module-utils.h | 13 | ||||
-rw-r--r-- | src/passes/CMakeLists.txt | 6 | ||||
-rw-r--r-- | src/passes/I64ToI32Lowering.cpp | 360 | ||||
-rw-r--r-- | src/passes/RemoveNonJSOps.cpp | 406 | ||||
-rw-r--r-- | src/passes/intrinsics-module.h | 27 | ||||
-rw-r--r-- | src/passes/wasm-intrinsics.wast | 1113 | ||||
-rw-r--r-- | src/wasm2asm.h | 53 |
9 files changed, 1307 insertions, 685 deletions
diff --git a/src/asmjs/shared-constants.cpp b/src/asmjs/shared-constants.cpp index 87c3574de..32ddb6c2c 100644 --- a/src/asmjs/shared-constants.cpp +++ b/src/asmjs/shared-constants.cpp @@ -94,5 +94,10 @@ cashew::IString GLOBAL("global"), WASM_NEAREST_F32("__wasm_nearest_f32"), WASM_NEAREST_F64("__wasm_nearest_f64"), WASM_TRUNC_F32("__wasm_trunc_f32"), - WASM_TRUNC_F64("__wasm_trunc_f64"); + WASM_TRUNC_F64("__wasm_trunc_f64"), + WASM_I64_MUL("__wasm_i64_mul"), + WASM_I64_SDIV("__wasm_i64_sdiv"), + WASM_I64_UDIV("__wasm_i64_udiv"), + WASM_I64_SREM("__wasm_i64_srem"), + WASM_I64_UREM("__wasm_i64_urem"); } diff --git a/src/asmjs/shared-constants.h b/src/asmjs/shared-constants.h index da2f7aad8..c57b03ea5 100644 --- a/src/asmjs/shared-constants.h +++ b/src/asmjs/shared-constants.h @@ -97,7 +97,12 @@ extern cashew::IString GLOBAL, WASM_NEAREST_F32, WASM_NEAREST_F64, WASM_TRUNC_F32, - WASM_TRUNC_F64; + WASM_TRUNC_F64, + WASM_I64_MUL, + WASM_I64_SDIV, + WASM_I64_UDIV, + WASM_I64_SREM, + WASM_I64_UREM; } #endif // wasm_asmjs_shared_constants_h diff --git a/src/ir/module-utils.h b/src/ir/module-utils.h index 5bcf2ea99..83625809f 100644 --- a/src/ir/module-utils.h +++ b/src/ir/module-utils.h @@ -85,6 +85,19 @@ inline void copyModule(Module& in, Module& out) { out.debugInfoFileNames = in.debugInfoFileNames; } +inline Function* copyFunction(Module& in, Module& out, Name name) { + Function *ret = out.getFunctionOrNull(name); + if (ret != nullptr) { + return ret; + } + auto* curr = in.getFunction(name); + auto* func = new Function(*curr); + func->body = ExpressionManipulator::copy(func->body, out); + func->type = Name(); + out.addFunction(func); + return func; +} + } // namespace ModuleUtils } // namespace wasm diff --git a/src/passes/CMakeLists.txt b/src/passes/CMakeLists.txt index 2c48e3d6e..05ed3821b 100644 --- a/src/passes/CMakeLists.txt +++ b/src/passes/CMakeLists.txt @@ -1,3 +1,8 @@ +add_custom_command( + OUTPUT WasmIntrinsics.cpp + COMMAND python ../../scripts/embedwast.py ${CMAKE_CURRENT_BINARY_DIR}/WasmIntrinsics.cpp + DEPENDS ../../scripts/embedwast.py wasm-intrinsics.wast) + SET(passes_SOURCES pass.cpp CoalesceLocals.cpp @@ -45,5 +50,6 @@ SET(passes_SOURCES SSAify.cpp Untee.cpp Vacuum.cpp + ${CMAKE_CURRENT_BINARY_DIR}/WasmIntrinsics.cpp ) ADD_LIBRARY(passes STATIC ${passes_SOURCES}) diff --git a/src/passes/I64ToI32Lowering.cpp b/src/passes/I64ToI32Lowering.cpp index 2f6fdf122..f14f33027 100644 --- a/src/passes/I64ToI32Lowering.cpp +++ b/src/passes/I64ToI32Lowering.cpp @@ -118,149 +118,6 @@ struct I64ToI32Lowering : public WalkerPass<PostWalker<I64ToI32Lowering>> { highBits->mutable_ = true; module->addGlobal(highBits); PostWalker<I64ToI32Lowering>::doWalkModule(module); - - if (needRotl64) { - module->addFunction(createRot64(true)); - } - if (needRotr64) { - module->addFunction(createRot64(false)); - } - } - - Function* createRot64(bool leftShift) { - Name name = leftShift ? WASM_ROTL64 : WASM_ROTR64; - - // if rotate == 32 - // hi = leftLow - // lo = leftHigh - // if rotate > 32 - // rotate = rotate - 32 - // hi = (leftLow << rotate) | (leftHigh >> (32 - rotate)) - // lo = (leftHigh << rotate) | (leftLow >> (32 - rotate)) - // else - // hi = (leftHigh << rotate) | (leftLow >> (32 - rotate)) - // lo = (leftLow << rotate) | (leftHigh >> (32 - rotate)) - Index lowBits = 0; - Index highBits = 1; - Index rotate = 2; - Index widthLessRotate = 3; - Binary* is32Rotate = builder->makeBinary( - EqInt32, - builder->makeGetLocal(rotate, i32), - builder->makeConst(Literal(int32_t(32))) - ); - Binary* isLargeRotate = builder->makeBinary( - GeUInt32, - builder->makeGetLocal(rotate, i32), - builder->makeConst(Literal(int32_t(32))) - ); - BinaryOp firstOp = leftShift ? ShlInt32 : ShrUInt32; - BinaryOp secondOp = leftShift ? ShrUInt32 : ShlInt32; - Block* equalRotateBlock = builder->blockify( - builder->makeSetGlobal( - INT64_TO_32_HIGH_BITS, - builder->makeGetLocal(lowBits, i32) - ), - builder->makeGetLocal(highBits, i32) - ); - Block* largeRotateBlock = builder->blockify( - builder->makeSetLocal( - rotate, - builder->makeBinary( - SubInt32, - builder->makeGetLocal(rotate, i32), - builder->makeConst(Literal(int32_t(32))) - ) - ), - builder->makeSetLocal( - widthLessRotate, - builder->makeBinary( - SubInt32, - builder->makeConst(Literal(int32_t(32))), - builder->makeGetLocal(rotate, i32) - ) - ), - builder->makeSetGlobal( - INT64_TO_32_HIGH_BITS, - builder->makeBinary( - OrInt32, - builder->makeBinary( - firstOp, - builder->makeGetLocal(lowBits, i32), - builder->makeGetLocal(rotate, i32) - ), - builder->makeBinary( - secondOp, - builder->makeGetLocal(highBits, i32), - builder->makeGetLocal(widthLessRotate, i32) - ) - ) - ), - builder->makeBinary( - OrInt32, - builder->makeBinary( - firstOp, - builder->makeGetLocal(highBits, i32), - builder->makeGetLocal(rotate, i32) - ), - builder->makeBinary( - secondOp, - builder->makeGetLocal(lowBits, i32), - builder->makeGetLocal(widthLessRotate, i32) - ) - ) - ); - Block* smallRotateBlock = builder->blockify( - builder->makeSetLocal( - widthLessRotate, - builder->makeBinary( - SubInt32, - builder->makeConst(Literal(int32_t(32))), - builder->makeGetLocal(rotate, i32) - ) - ), - builder->makeSetGlobal( - INT64_TO_32_HIGH_BITS, - builder->makeBinary( - OrInt32, - builder->makeBinary( - firstOp, - builder->makeGetLocal(highBits, i32), - builder->makeGetLocal(rotate, i32) - ), - builder->makeBinary( - secondOp, - builder->makeGetLocal(lowBits, i32), - builder->makeGetLocal(widthLessRotate, i32) - ) - ) - ), - builder->makeBinary( - OrInt32, - builder->makeBinary( - firstOp, - builder->makeGetLocal(lowBits, i32), - builder->makeGetLocal(rotate, i32) - ), - builder->makeBinary( - secondOp, - builder->makeGetLocal(highBits, i32), - builder->makeGetLocal(widthLessRotate, i32) - ) - ) - ); - If* condition = builder->makeIf( - is32Rotate, - equalRotateBlock, - builder->makeIf( - isLargeRotate, - largeRotateBlock, - smallRotateBlock - ) - ); - std::vector<Type> params = {i32, i32, i32}; - std::vector<Type> vars = {i32}; - return builder->makeFunction(name, std::move(params), i32, std::move(vars), condition); } void visitFunctionType(FunctionType* curr) { @@ -937,31 +794,6 @@ struct I64ToI32Lowering : public WalkerPass<PostWalker<I64ToI32Lowering>> { replaceCurrent(result); } - void lowerPopcnt64(Unary* curr) { - TempVar highBits = fetchOutParam(curr->value); - TempVar lowBits = getTemp(); - TempVar highResult = getTemp(); - - SetLocal* setLow = builder->makeSetLocal(lowBits, curr->value); - SetLocal* setHigh = builder->makeSetLocal( - highResult, - builder->makeConst(Literal(int32_t(0))) - ); - - Block* result = builder->blockify( - setLow, - setHigh, - builder->makeBinary( - AddInt32, - builder->makeUnary(PopcntInt32, builder->makeGetLocal(highBits, i32)), - builder->makeUnary(PopcntInt32, builder->makeGetLocal(lowBits, i32)) - ) - ); - - setOutParam(result, std::move(highResult)); - replaceCurrent(result); - } - void lowerCountZeros(Unary* curr) { auto lower = [&](Block* result, UnaryOp op32, TempVar&& first, TempVar&& second) { TempVar highResult = getTemp(); @@ -1014,7 +846,8 @@ struct I64ToI32Lowering : public WalkerPass<PostWalker<I64ToI32Lowering>> { lower(result, ClzInt32, std::move(highBits), std::move(lowBits)); break; case CtzInt64: - lower(result, CtzInt32, std::move(lowBits), std::move(highBits)); + std::cerr << "i64.ctz should be removed already" << std::endl; + WASM_UNREACHABLE(); break; default: abort(); @@ -1055,7 +888,6 @@ struct I64ToI32Lowering : public WalkerPass<PostWalker<I64ToI32Lowering>> { switch (curr->op) { case ClzInt64: case CtzInt64: lowerCountZeros(curr); break; - case PopcntInt64: lowerPopcnt64(curr); break; case EqZInt64: lowerEqZInt64(curr); break; case ExtendSInt32: lowerExtendSInt32(curr); break; case ExtendUInt32: lowerExtendUInt32(curr); break; @@ -1070,6 +902,9 @@ struct I64ToI32Lowering : public WalkerPass<PostWalker<I64ToI32Lowering>> { case ConvertSInt64ToFloat64: case ConvertUInt64ToFloat32: case ConvertUInt64ToFloat64: lowerConvertIntToFloat(curr); break; + case PopcntInt64: + std::cerr << "i64.popcnt should already be removed" << std::endl; + WASM_UNREACHABLE(); default: std::cerr << "Unhandled unary operator: " << curr->op << std::endl; abort(); @@ -1161,122 +996,6 @@ struct I64ToI32Lowering : public WalkerPass<PostWalker<I64ToI32Lowering>> { return result; } - Block* lowerMul(Block* result, TempVar&& leftLow, TempVar&& leftHigh, - TempVar&& rightLow, TempVar&& rightHigh) { - // high bits = ll*rh + lh*rl + ll1*rl1 + (ll0*rl1)>>16 + (ll1*rl0)>>16 - // low bits = ll*rl - TempVar leftLow0 = getTemp(); - TempVar leftLow1 = getTemp(); - TempVar rightLow0 = getTemp(); - TempVar rightLow1 = getTemp(); - SetLocal* setLL0 = builder->makeSetLocal( - leftLow0, - builder->makeBinary( - AndInt32, - builder->makeGetLocal(leftLow, i32), - builder->makeConst(Literal(int32_t(0xffff))) - ) - ); - SetLocal* setLL1 = builder->makeSetLocal( - leftLow1, - builder->makeBinary( - ShrUInt32, - builder->makeGetLocal(leftLow, i32), - builder->makeConst(Literal(int32_t(16))) - ) - ); - SetLocal* setRL0 = builder->makeSetLocal( - rightLow0, - builder->makeBinary( - AndInt32, - builder->makeGetLocal(rightLow, i32), - builder->makeConst(Literal(int32_t(0xffff))) - ) - ); - SetLocal* setRL1 = builder->makeSetLocal( - rightLow1, - builder->makeBinary( - ShrUInt32, - builder->makeGetLocal(rightLow, i32), - builder->makeConst(Literal(int32_t(16))) - ) - ); - SetLocal* setLLRH = builder->makeSetLocal( - rightHigh, - builder->makeBinary( - MulInt32, - builder->makeGetLocal(leftLow, i32), - builder->makeGetLocal(rightHigh, i32) - ) - ); - auto addToHighBits = [&](Expression* expr) -> SetLocal* { - return builder->makeSetLocal( - rightHigh, - builder->makeBinary( - AddInt32, - builder->makeGetLocal(rightHigh, i32), - expr - ) - ); - }; - SetLocal* addLHRL = addToHighBits( - builder->makeBinary( - MulInt32, - builder->makeGetLocal(leftHigh, i32), - builder->makeGetLocal(rightLow, i32) - ) - ); - SetLocal* addLL1RL1 = addToHighBits( - builder->makeBinary( - MulInt32, - builder->makeGetLocal(leftLow1, i32), - builder->makeGetLocal(rightLow1, i32) - ) - ); - SetLocal* addLL0RL1 = addToHighBits( - builder->makeBinary( - ShrUInt32, - builder->makeBinary( - MulInt32, - builder->makeGetLocal(leftLow0, i32), - builder->makeGetLocal(rightLow1, i32) - ), - builder->makeConst(Literal(int32_t(16))) - ) - ); - SetLocal* addLL1RL0 = addToHighBits( - builder->makeBinary( - ShrUInt32, - builder->makeBinary( - MulInt32, - builder->makeGetLocal(leftLow1, i32), - builder->makeGetLocal(rightLow0, i32) - ), - builder->makeConst(Literal(int32_t(16))) - ) - ); - Binary* getLow = builder->makeBinary( - MulInt32, - builder->makeGetLocal(leftLow, i32), - builder->makeGetLocal(rightLow, i32) - ); - result = builder->blockify( - result, - setLL0, - setLL1, - setRL0, - setRL1, - setLLRH, - addLHRL, - addLL1RL1, - addLL0RL1, - addLL1RL0, - getLow - ); - setOutParam(result, std::move(rightHigh)); - return result; - } - Block* lowerBitwise(BinaryOp op, Block* result, TempVar&& leftLow, TempVar&& leftHigh, TempVar&& rightLow, TempVar&& rightHigh) { @@ -1530,46 +1249,6 @@ struct I64ToI32Lowering : public WalkerPass<PostWalker<I64ToI32Lowering>> { return result; } - Block* lowerRotate(BinaryOp op, Block* result, TempVar&& leftLow, - TempVar&& leftHigh, TempVar&& rightLow, TempVar&& rightHigh) { - assert(op == RotLInt64 || op == RotRInt64); - Name name; - if (op == RotLInt64) { - needRotl64 = true; - name = WASM_ROTL64; - } else { - needRotr64 = true; - name = WASM_ROTR64; - } - TempVar lowResult = getTemp(); - result = builder->blockify( - result, - builder->makeSetLocal( - lowResult, - builder->makeCall( - name, - { - builder->makeGetLocal(leftLow, i32), - builder->makeGetLocal(leftHigh, i32), - builder->makeBinary( - AndInt32, - builder->makeGetLocal(rightLow, i32), - builder->makeConst(Literal(int32_t(64 - 1))) - ) - }, - i32 - ) - ), - builder->makeSetLocal( - rightHigh, - builder->makeGetGlobal(INT64_TO_32_HIGH_BITS, i32) - ), - builder->makeGetLocal(lowResult, i32) - ); - setOutParam(result, std::move(rightHigh)); - return result; - } - Block* lowerEq(Block* result, TempVar&& leftLow, TempVar&& leftHigh, TempVar&& rightLow, TempVar&& rightHigh) { return builder->blockify( @@ -1762,17 +1441,16 @@ struct I64ToI32Lowering : public WalkerPass<PostWalker<I64ToI32Lowering>> { std::move(rightLow), std::move(rightHigh))); break; } - case MulInt64: { - replaceCurrent( - lowerMul(result, std::move(leftLow), std::move(leftHigh), - std::move(rightLow), std::move(rightHigh)) - ); - break; - } + case MulInt64: case DivSInt64: case DivUInt64: case RemSInt64: - case RemUInt64: goto err; + case RemUInt64: + case RotLInt64: + case RotRInt64: + std::cerr << "should have been removed by now " << curr->op << std::endl; + WASM_UNREACHABLE(); + case AndInt64: case OrInt64: case XorInt64: { @@ -1792,14 +1470,6 @@ struct I64ToI32Lowering : public WalkerPass<PostWalker<I64ToI32Lowering>> { ); break; } - case RotLInt64: - case RotRInt64: { - replaceCurrent( - lowerRotate(curr->op, result, std::move(leftLow), std::move(leftHigh), - std::move(rightLow), std::move(rightHigh)) - ); - break; - } case EqInt64: { replaceCurrent( lowerEq(result, std::move(leftLow), std::move(leftHigh), @@ -1833,7 +1503,7 @@ struct I64ToI32Lowering : public WalkerPass<PostWalker<I64ToI32Lowering>> { ); break; } - err: default: { + default: { std::cerr << "Unhandled binary op " << curr->op << std::endl; abort(); } @@ -1867,13 +1537,11 @@ struct I64ToI32Lowering : public WalkerPass<PostWalker<I64ToI32Lowering>> { private: std::unique_ptr<Builder> builder; std::unordered_map<Index, Index> indexMap; + std::unordered_map<int, std::vector<Index>> freeTemps; std::unordered_map<Expression*, TempVar> highBitVars; std::unordered_map<Name, TempVar> labelHighBitVars; - std::unordered_map<int, std::vector<Index>> freeTemps; std::unordered_map<Index, Type> tempTypes; Index nextTemp; - bool needRotl64 = false; - bool needRotr64 = false; TempVar getTemp(Type ty = i32) { Index ret; diff --git a/src/passes/RemoveNonJSOps.cpp b/src/passes/RemoveNonJSOps.cpp index 77809d6b3..4bd40a6c6 100644 --- a/src/passes/RemoveNonJSOps.cpp +++ b/src/passes/RemoveNonJSOps.cpp @@ -15,341 +15,97 @@ */ // -// // Removes all operations in a wasm module that aren't inherently implementable -// in JS. This includes things like `f32.nearest` and -// `f64.copysign`. Most operations are lowered to a call to an injected +// in JS. This includes things like 64-bit division, `f32.nearest`, +// `f64.copysign`, etc. Most operations are lowered to a call to an injected // intrinsic implementation. Intrinsics don't use themselves to implement // themselves. // +// You'll find a large wast blob in `wasm-intrinsics.wast` next to this file +// which contains all of the injected intrinsics. We manually copy over any +// needed intrinsics from this module into the module that we're optimizing +// after walking the current module. +// #include <wasm.h> #include <pass.h> #include "asmjs/shared-constants.h" #include "wasm-builder.h" +#include "wasm-s-parser.h" +#include "ir/module-utils.h" +#include "ir/find_all.h" +#include "passes/intrinsics-module.h" namespace wasm { struct RemoveNonJSOpsPass : public WalkerPass<PostWalker<RemoveNonJSOpsPass>> { - bool needNearestF32 = false; - bool needNearestF64 = false; - bool needTruncF32 = false; - bool needTruncF64 = false; - bool needCtzInt32 = false; - bool needPopcntInt32 = false; - bool needRotLInt32 = false; - bool needRotRInt32 = false; + std::unique_ptr<Builder> builder; + std::unordered_set<Name> neededIntrinsics; bool isFunctionParallel() override { return false; } Pass* create() override { return new RemoveNonJSOpsPass; } void doWalkModule(Module* module) { + // Discover all of the intrinsics that we need to inject, lowering all + // operations to intrinsic calls while we're at it. if (!builder) builder = make_unique<Builder>(*module); PostWalker<RemoveNonJSOpsPass>::doWalkModule(module); - if (needNearestF32) { - module->addFunction(createNearest(f32)); - } - if (needNearestF64) { - module->addFunction(createNearest(f64)); - } - if (needTruncF32) { - module->addFunction(createTrunc(f32)); - } - if (needTruncF64) { - module->addFunction(createTrunc(f64)); - } - if (needCtzInt32) { - module->addFunction(createCtz()); - } - if (needPopcntInt32) { - module->addFunction(createPopcnt()); + if (neededIntrinsics.size() == 0) { + return; } - if (needRotLInt32) { - module->addFunction(createRot(RotLInt32)); - } - if (needRotRInt32) { - module->addFunction(createRot(RotRInt32)); - } - } - - Function *createNearest(Type f) { - // fn nearest(f: float) -> float { - // let ceil = ceil(f); - // let floor = floor(f); - // let fract = f - floor; - // if fract < 0.5 { - // floor - // } else if fract > 0.5 { - // ceil - // } else { - // let rem = floor / 2.0; - // if rem - floor(rem) == 0.0 { - // floor - // } else { - // ceil - // } - // } - // } - Index arg = 0; - Index ceil = 1; - Index floor = 2; - Index fract = 3; - Index rem = 4; - - UnaryOp ceilOp = CeilFloat32; - UnaryOp floorOp = FloorFloat32; - BinaryOp subOp = SubFloat32; - BinaryOp ltOp = LtFloat32; - BinaryOp gtOp = GtFloat32; - BinaryOp divOp = DivFloat32; - BinaryOp eqOp = EqFloat32; - Literal litHalf((float) 0.5); - Literal litOne((float) 1.0); - Literal litZero((float) 0.0); - Literal litTwo((float) 2.0); - if (f == f64) { - ceilOp = CeilFloat64; - floorOp = FloorFloat64; - subOp = SubFloat64; - ltOp = LtFloat64; - gtOp = GtFloat64; - divOp = DivFloat64; - eqOp = EqFloat64; - litHalf = Literal((double) 0.5); - litOne = Literal((double) 1.0); - litZero = Literal((double) 0.0); - litTwo = Literal((double) 2.0); + // Parse the wast blob we have at the end of this file. + // + // TODO: only do this once per invocation of wasm2asm + Module intrinsicsModule; + std::string input(IntrinsicsModuleWast); + SExpressionParser parser(const_cast<char*>(input.c_str())); + Element& root = *parser.root; + SExpressionWasmBuilder builder(intrinsicsModule, *root[0]); + + std::set<Name> neededFunctions; + + // Iteratively link intrinsics from `intrinsicsModule` into our destination + // module, as needed. + // + // Note that intrinsics often use one another. For example the 64-bit + // division intrinsic ends up using the 32-bit ctz intrinsic, but does so + // via a native instruction. The loop here is used to continuously reprocess + // injected intrinsics to ensure that they never contain non-js ops when + // we're done. + while (neededIntrinsics.size() > 0) { + // Recursively probe all needed intrinsics for transitively used + // functions. This is building up a set of functions we'll link into our + // module. + for (auto &name : neededIntrinsics) { + addNeededFunctions(intrinsicsModule, name, neededFunctions); + } + neededIntrinsics.clear(); + + // Link in everything that wasn't already linked in. After we've done the + // copy we then walk the function to rewrite any non-js operations it has + // as well. + for (auto &name : neededFunctions) { + doWalkFunction(ModuleUtils::copyFunction(intrinsicsModule, *module, name)); + } + neededFunctions.clear(); } - - Expression *body = builder->blockify( - builder->makeSetLocal( - ceil, - builder->makeUnary(ceilOp, builder->makeGetLocal(arg, f)) - ), - builder->makeSetLocal( - floor, - builder->makeUnary(floorOp, builder->makeGetLocal(arg, f)) - ), - builder->makeSetLocal( - fract, - builder->makeBinary( - subOp, - builder->makeGetLocal(arg, f), - builder->makeGetLocal(floor, f) - ) - ), - builder->makeIf( - builder->makeBinary( - ltOp, - builder->makeGetLocal(fract, f), - builder->makeConst(litHalf) - ), - builder->makeGetLocal(floor, f), - builder->makeIf( - builder->makeBinary( - gtOp, - builder->makeGetLocal(fract, f), - builder->makeConst(litHalf) - ), - builder->makeGetLocal(ceil, f), - builder->blockify( - builder->makeSetLocal( - rem, - builder->makeBinary( - divOp, - builder->makeGetLocal(floor, f), - builder->makeConst(litTwo) - ) - ), - builder->makeIf( - builder->makeBinary( - eqOp, - builder->makeBinary( - subOp, - builder->makeGetLocal(rem, f), - builder->makeUnary( - floorOp, - builder->makeGetLocal(rem, f) - ) - ), - builder->makeConst(litZero) - ), - builder->makeGetLocal(floor, f), - builder->makeGetLocal(ceil, f) - ) - ) - ) - ) - ); - std::vector<Type> params = {f}; - std::vector<Type> vars = {f, f, f, f, f}; - Name name = f == f32 ? WASM_NEAREST_F32 : WASM_NEAREST_F64; - return builder->makeFunction(name, std::move(params), f, std::move(vars), body); } - Function *createTrunc(Type f) { - // fn trunc(f: float) -> float { - // if f < 0.0 { - // ceil(f) - // } else { - // floor(f) - // } - // } - - Index arg = 0; - - UnaryOp ceilOp = CeilFloat32; - UnaryOp floorOp = FloorFloat32; - BinaryOp ltOp = LtFloat32; - Literal litZero((float) 0.0); - if (f == f64) { - ceilOp = CeilFloat64; - floorOp = FloorFloat64; - ltOp = LtFloat64; - litZero = Literal((double) 0.0); + void addNeededFunctions(Module &m, Name name, std::set<Name> &needed) { + if (needed.count(name)) { + return; } + needed.insert(name); - Expression *body = builder->makeIf( - builder->makeBinary( - ltOp, - builder->makeGetLocal(arg, f), - builder->makeConst(litZero) - ), - builder->makeUnary(ceilOp, builder->makeGetLocal(arg, f)), - builder->makeUnary(floorOp, builder->makeGetLocal(arg, f)) - ); - std::vector<Type> params = {f}; - std::vector<Type> vars = {}; - Name name = f == f32 ? WASM_TRUNC_F32 : WASM_TRUNC_F64; - return builder->makeFunction(name, std::move(params), f, std::move(vars), body); - } - - Function* createCtz() { - // if eqz(x) then 32 else (32 - clz(x ^ (x - 1))) - Binary* xorExp = builder->makeBinary( - XorInt32, - builder->makeGetLocal(0, i32), - builder->makeBinary( - SubInt32, - builder->makeGetLocal(0, i32), - builder->makeConst(Literal(int32_t(1))) - ) - ); - Binary* subExp = builder->makeBinary( - SubInt32, - builder->makeConst(Literal(int32_t(32 - 1))), - builder->makeUnary(ClzInt32, xorExp) - ); - If* body = builder->makeIf( - builder->makeUnary( - EqZInt32, - builder->makeGetLocal(0, i32) - ), - builder->makeConst(Literal(int32_t(32))), - subExp - ); - return builder->makeFunction( - WASM_CTZ32, - std::vector<NameType>{NameType("x", i32)}, - i32, - std::vector<NameType>{}, - body - ); - } - - Function* createPopcnt() { - // popcnt implemented as: - // int c; for (c = 0; x != 0; c++) { x = x & (x - 1) }; return c - Name loopName("l"); - Name blockName("b"); - Break* brIf = builder->makeBreak( - blockName, - builder->makeGetLocal(1, i32), - builder->makeUnary( - EqZInt32, - builder->makeGetLocal(0, i32) - ) - ); - SetLocal* update = builder->makeSetLocal( - 0, - builder->makeBinary( - AndInt32, - builder->makeGetLocal(0, i32), - builder->makeBinary( - SubInt32, - builder->makeGetLocal(0, i32), - builder->makeConst(Literal(int32_t(1))) - ) - ) - ); - SetLocal* inc = builder->makeSetLocal( - 1, - builder->makeBinary( - AddInt32, - builder->makeGetLocal(1, i32), - builder->makeConst(Literal(1)) - ) - ); - Break* cont = builder->makeBreak(loopName); - Loop* loop = builder->makeLoop( - loopName, - builder->blockify(builder->makeDrop(brIf), update, inc, cont) - ); - Block* loopBlock = builder->blockifyWithName(loop, blockName); - // TODO: not sure why this is necessary... - loopBlock->type = i32; - SetLocal* initCount = builder->makeSetLocal(1, builder->makeConst(Literal(0))); - return builder->makeFunction( - WASM_POPCNT32, - std::vector<NameType>{NameType("x", i32)}, - i32, - std::vector<NameType>{NameType("count", i32)}, - builder->blockify(initCount, loopBlock) - ); - } - - Function* createRot(BinaryOp op) { - // left rotate is: - // (((((~0) >>> k) & x) << k) | ((((~0) << (w - k)) & x) >>> (w - k))) - // where k is shift modulo w. reverse shifts for right rotate - bool isLRot = op == RotLInt32; - BinaryOp lshift = isLRot ? ShlInt32 : ShrUInt32; - BinaryOp rshift = isLRot ? ShrUInt32 : ShlInt32; - Literal widthMask(int32_t(32 - 1)); - Literal width(int32_t(32)); - auto shiftVal = [&]() { - return builder->makeBinary( - AndInt32, - builder->makeGetLocal(1, i32), - builder->makeConst(widthMask) - ); - }; - auto widthSub = [&]() { - return builder->makeBinary(SubInt32, builder->makeConst(width), shiftVal()); - }; - auto fullMask = [&]() { - return builder->makeConst(Literal(~int32_t(0))); - }; - Binary* maskRShift = builder->makeBinary(rshift, fullMask(), shiftVal()); - Binary* lowMask = builder->makeBinary(AndInt32, maskRShift, builder->makeGetLocal(0, i32)); - Binary* lowShift = builder->makeBinary(lshift, lowMask, shiftVal()); - Binary* maskLShift = builder->makeBinary(lshift, fullMask(), widthSub()); - Binary* highMask = - builder->makeBinary(AndInt32, maskLShift, builder->makeGetLocal(0, i32)); - Binary* highShift = builder->makeBinary(rshift, highMask, widthSub()); - Binary* body = builder->makeBinary(OrInt32, lowShift, highShift); - return builder->makeFunction( - isLRot ? WASM_ROTL32 : WASM_ROTR32, - std::vector<NameType>{NameType("x", i32), - NameType("k", i32)}, - i32, - std::vector<NameType>{}, - body - ); + auto function = m.getFunction(name); + FindAll<Call> calls(function->body); + for (auto &call : calls.list) { + this->addNeededFunctions(m, call->target, needed); + } } void doWalkFunction(Function* func) { @@ -366,16 +122,36 @@ struct RemoveNonJSOpsPass : public WalkerPass<PostWalker<RemoveNonJSOpsPass>> { return; case RotLInt32: - needRotLInt32 = true; name = WASM_ROTL32; break; case RotRInt32: - needRotRInt32 = true; name = WASM_ROTR32; break; + case RotLInt64: + name = WASM_ROTL64; + break; + case RotRInt64: + name = WASM_ROTR64; + break; + case MulInt64: + name = WASM_I64_MUL; + break; + case DivSInt64: + name = WASM_I64_SDIV; + break; + case DivUInt64: + name = WASM_I64_UDIV; + break; + case RemSInt64: + name = WASM_I64_SREM; + break; + case RemUInt64: + name = WASM_I64_UREM; + break; default: return; } + neededIntrinsics.insert(name); replaceCurrent(builder->makeCall(name, {curr->left, curr->right}, curr->type)); } @@ -435,40 +211,38 @@ struct RemoveNonJSOpsPass : public WalkerPass<PostWalker<RemoveNonJSOpsPass>> { Name functionCall; switch (curr->op) { case NearestFloat32: - needNearestF32 = true; functionCall = WASM_NEAREST_F32; break; case NearestFloat64: - needNearestF64 = true; functionCall = WASM_NEAREST_F64; break; case TruncFloat32: - needTruncF32 = true; functionCall = WASM_TRUNC_F32; break; case TruncFloat64: - needTruncF64 = true; functionCall = WASM_TRUNC_F64; break; + case PopcntInt64: + functionCall = WASM_POPCNT64; + break; case PopcntInt32: - needPopcntInt32 = true; functionCall = WASM_POPCNT32; break; + case CtzInt64: + functionCall = WASM_CTZ64; + break; case CtzInt32: - needCtzInt32 = true; functionCall = WASM_CTZ32; break; default: return; } + neededIntrinsics.insert(functionCall); replaceCurrent(builder->makeCall(functionCall, {curr->value}, curr->type)); } - -private: - std::unique_ptr<Builder> builder; }; Pass *createRemoveNonJSOpsPass() { diff --git a/src/passes/intrinsics-module.h b/src/passes/intrinsics-module.h new file mode 100644 index 000000000..c9a757dc0 --- /dev/null +++ b/src/passes/intrinsics-module.h @@ -0,0 +1,27 @@ +/* + * Copyright 2018 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef passes_intrinsics_module_h +#define passes_intrinsics_module_h + +namespace wasm { + +extern const char* IntrinsicsModuleWast; + +} // namespace wasm + +#endif // passes_intrinsics_module_h + diff --git a/src/passes/wasm-intrinsics.wast b/src/passes/wasm-intrinsics.wast new file mode 100644 index 000000000..8cd14d51d --- /dev/null +++ b/src/passes/wasm-intrinsics.wast @@ -0,0 +1,1113 @@ +;; A large WAST blob which contains the implementations of all the intrinsics +;; that we inject as part of this module. This blob was generated from a Rust +;; program [1] which uses the Rust compiler-builtins project. It's not +;; necessarily perfect but gets the job done! The idea here is that we inject +;; these pretty early so they can continue to be optimized by further passes +;; (aka inlining and whatnot) +;; +;; [1]: https://gist.github.com/alexcrichton/e7ea67bcdd17ce4b6254e66f77165690 + +(module + (type $0 (func (param i64 i64) (result i64))) + (type $1 (func (param f32) (result f32))) + (type $2 (func (param f64) (result f64))) + (type $3 (func (param i32) (result i32))) + (type $4 (func (param i32 i32) (result i32))) + (import "env" "memory" (memory $0 17)) + (export "__wasm_i64_sdiv" (func $__wasm_i64_sdiv)) + (export "__wasm_i64_udiv" (func $__wasm_i64_udiv)) + (export "__wasm_i64_srem" (func $__wasm_i64_srem)) + (export "__wasm_i64_urem" (func $__wasm_i64_urem)) + (export "__wasm_i64_mul" (func $__wasm_i64_mul)) + (export "__wasm_trunc_f32" (func $__wasm_trunc_f32)) + (export "__wasm_trunc_f64" (func $__wasm_trunc_f64)) + (export "__wasm_ctz_i32" (func $__wasm_ctz_i32)) + (export "__wasm_ctz_i64" (func $__wasm_ctz_i64)) + (export "__wasm_rotl_i32" (func $__wasm_rotl_i32)) + (export "__wasm_rotr_i32" (func $__wasm_rotr_i32)) + (export "__wasm_rotl_i64" (func $__wasm_rotl_i64)) + (export "__wasm_rotr_i64" (func $__wasm_rotr_i64)) + (export "__wasm_nearest_f32" (func $__wasm_nearest_f32)) + (export "__wasm_nearest_f64" (func $__wasm_nearest_f64)) + (export "__wasm_popcnt_i32" (func $__wasm_popcnt_i32)) + (export "__wasm_popcnt_i64" (func $__wasm_popcnt_i64)) + + ;; lowering of the i32.popcnt instruction, counts the number of bits set in the + ;; input and returns the result + (func $__wasm_popcnt_i32 (param $var$0 i32) (result i32) + (local $var$1 i32) + (block $label$1 (result i32) + (loop $label$2 + (drop + (br_if $label$1 + (get_local $var$1) + (i32.eqz + (get_local $var$0) + ) + ) + ) + (set_local $var$0 + (i32.and + (get_local $var$0) + (i32.sub + (get_local $var$0) + (i32.const 1) + ) + ) + ) + (set_local $var$1 + (i32.add + (get_local $var$1) + (i32.const 1) + ) + ) + (br $label$2) + ) + ) + ) + ;; lowering of the i64.popcnt instruction, counts the number of bits set in the + ;; input and returns the result + (func $__wasm_popcnt_i64 (param $var$0 i64) (result i64) + (local $var$1 i64) + (block $label$1 (result i64) + (loop $label$2 + (drop + (br_if $label$1 + (get_local $var$1) + (i64.eqz + (get_local $var$0) + ) + ) + ) + (set_local $var$0 + (i64.and + (get_local $var$0) + (i64.sub + (get_local $var$0) + (i64.const 1) + ) + ) + ) + (set_local $var$1 + (i64.add + (get_local $var$1) + (i64.const 1) + ) + ) + (br $label$2) + ) + ) + ) + ;; lowering of the i64.div_s instruction, return $var0 / $var$1 + (func $__wasm_i64_sdiv (; 0 ;) (type $0) (param $var$0 i64) (param $var$1 i64) (result i64) + (call $_ZN17compiler_builtins3int4sdiv3Div3div17he78fc483e41d7ec7E + (get_local $var$0) + (get_local $var$1) + ) + ) + ;; lowering of the i64.div_u instruction, return $var0 / $var$1 + (func $__wasm_i64_udiv (; 1 ;) (type $0) (param $var$0 i64) (param $var$1 i64) (result i64) + (call $_ZN17compiler_builtins3int4udiv10divmod_u6417h6026910b5ed08e40E + (get_local $var$0) + (get_local $var$1) + ) + ) + ;; lowering of the i64.rem_s instruction, return $var0 % $var$1 + (func $__wasm_i64_srem (; 2 ;) (type $0) (param $var$0 i64) (param $var$1 i64) (result i64) + (call $_ZN17compiler_builtins3int4sdiv3Mod4mod_17h2cbb7bbf36e41d68E + (get_local $var$0) + (get_local $var$1) + ) + ) + ;; lowering of the i64.rem_u instruction, return $var0 % $var$1 + (func $__wasm_i64_urem (; 3 ;) (type $0) (param $var$0 i64) (param $var$1 i64) (result i64) + (drop + (call $_ZN17compiler_builtins3int4udiv10divmod_u6417h6026910b5ed08e40E + (get_local $var$0) + (get_local $var$1) + ) + ) + (i64.load + (i32.const 1024) + ) + ) + ;; lowering of the i64.mul instruction, return $var0 * $var$1 + (func $__wasm_i64_mul (; 4 ;) (type $0) (param $var$0 i64) (param $var$1 i64) (result i64) + (call $_ZN17compiler_builtins3int3mul3Mul3mul17h070e9a1c69faec5bE + (get_local $var$0) + (get_local $var$1) + ) + ) + ;; lowering of the f32.trunc instruction, rounds to the nearest integer, + ;; towards zero + (func $__wasm_trunc_f32 (; 5 ;) (type $1) (param $var$0 f32) (result f32) + (select + (f32.ceil + (get_local $var$0) + ) + (f32.floor + (get_local $var$0) + ) + (f32.lt + (get_local $var$0) + (f32.const 0) + ) + ) + ) + ;; lowering of the f64.trunc instruction, rounds to the nearest integer, + ;; towards zero + (func $__wasm_trunc_f64 (; 6 ;) (type $2) (param $var$0 f64) (result f64) + (select + (f64.ceil + (get_local $var$0) + ) + (f64.floor + (get_local $var$0) + ) + (f64.lt + (get_local $var$0) + (f64.const 0) + ) + ) + ) + ;; lowering of the i32.ctz instruction, counting the number of zeros in $var$0 + (func $__wasm_ctz_i32 (; 7 ;) (type $3) (param $var$0 i32) (result i32) + (if + (get_local $var$0) + (return + (i32.sub + (i32.const 31) + (i32.clz + (i32.xor + (i32.add + (get_local $var$0) + (i32.const -1) + ) + (get_local $var$0) + ) + ) + ) + ) + ) + (i32.const 32) + ) + ;; lowering of the i64.ctz instruction, counting the number of zeros in $var$0 + (func $__wasm_ctz_i64 (; 8 ;) (type $4) (param $var$0 i64) (result i64) + (if + (i32.eqz + (i64.eqz + (get_local $var$0) + ) + ) + (return + (i64.sub + (i64.const 63) + (i64.clz + (i64.xor + (i64.add + (get_local $var$0) + (i64.const -1) + ) + (get_local $var$0) + ) + ) + ) + ) + ) + (i64.const 64) + ) + ;; lowering of the i32.rotl instruction, rotating the first argument, with + ;; wraparound, by the second argument + (func $__wasm_rotl_i32 (; 8 ;) (type $4) (param $var$0 i32) (param $var$1 i32) (result i32) + (local $var$2 i32) + (i32.or + (i32.shl + (i32.and + (i32.shr_u + (i32.const -1) + (tee_local $var$2 + (i32.and + (get_local $var$1) + (i32.const 31) + ) + ) + ) + (get_local $var$0) + ) + (get_local $var$2) + ) + (i32.shr_u + (i32.and + (i32.shl + (i32.const -1) + (tee_local $var$1 + (i32.and + (i32.sub + (i32.const 0) + (get_local $var$1) + ) + (i32.const 31) + ) + ) + ) + (get_local $var$0) + ) + (get_local $var$1) + ) + ) + ) + ;; lowering of the i32.rotr instruction, rotating the first argument, with + ;; wraparound, by the second argument + (func $__wasm_rotr_i32 (; 9 ;) (type $4) (param $var$0 i32) (param $var$1 i32) (result i32) + (local $var$2 i32) + (i32.or + (i32.shr_u + (i32.and + (i32.shl + (i32.const -1) + (tee_local $var$2 + (i32.and + (get_local $var$1) + (i32.const 31) + ) + ) + ) + (get_local $var$0) + ) + (get_local $var$2) + ) + (i32.shl + (i32.and + (i32.shr_u + (i32.const -1) + (tee_local $var$1 + (i32.and + (i32.sub + (i32.const 0) + (get_local $var$1) + ) + (i32.const 31) + ) + ) + ) + (get_local $var$0) + ) + (get_local $var$1) + ) + ) + ) + ;; lowering of the i64.rotl instruction, rotating the first argument, with + ;; wraparound, by the second argument + (func $__wasm_rotl_i64 (; 10 ;) (type $0) (param $var$0 i64) (param $var$1 i64) (result i64) + (local $var$2 i64) + (i64.or + (i64.shl + (i64.and + (i64.shr_u + (i64.const -1) + (tee_local $var$2 + (i64.and + (get_local $var$1) + (i64.const 63) + ) + ) + ) + (get_local $var$0) + ) + (get_local $var$2) + ) + (i64.shr_u + (i64.and + (i64.shl + (i64.const -1) + (tee_local $var$1 + (i64.and + (i64.sub + (i64.const 0) + (get_local $var$1) + ) + (i64.const 63) + ) + ) + ) + (get_local $var$0) + ) + (get_local $var$1) + ) + ) + ) + ;; lowering of the i64.rotr instruction, rotating the first argument, with + ;; wraparound, by the second argument + (func $__wasm_rotr_i64 (; 11 ;) (type $0) (param $var$0 i64) (param $var$1 i64) (result i64) + (local $var$2 i64) + (i64.or + (i64.shr_u + (i64.and + (i64.shl + (i64.const -1) + (tee_local $var$2 + (i64.and + (get_local $var$1) + (i64.const 63) + ) + ) + ) + (get_local $var$0) + ) + (get_local $var$2) + ) + (i64.shl + (i64.and + (i64.shr_u + (i64.const -1) + (tee_local $var$1 + (i64.and + (i64.sub + (i64.const 0) + (get_local $var$1) + ) + (i64.const 63) + ) + ) + ) + (get_local $var$0) + ) + (get_local $var$1) + ) + ) + ) + ;; lowering of the f32.nearest instruction, rounding the input to the nearest + ;; integer while breaking ties by rounding to even + (func $__wasm_nearest_f32 (; 12 ;) (type $1) (param $var$0 f32) (result f32) + (local $var$1 f32) + (local $var$2 f32) + (if + (i32.eqz + (f32.lt + (tee_local $var$2 + (f32.sub + (get_local $var$0) + (tee_local $var$1 + (f32.floor + (get_local $var$0) + ) + ) + ) + ) + (f32.const 0.5) + ) + ) + (block + (set_local $var$0 + (f32.ceil + (get_local $var$0) + ) + ) + (if + (f32.gt + (get_local $var$2) + (f32.const 0.5) + ) + (return + (get_local $var$0) + ) + ) + (set_local $var$1 + (select + (get_local $var$1) + (get_local $var$0) + (f32.eq + (f32.sub + (tee_local $var$2 + (f32.mul + (get_local $var$1) + (f32.const 0.5) + ) + ) + (f32.floor + (get_local $var$2) + ) + ) + (f32.const 0) + ) + ) + ) + ) + ) + (get_local $var$1) + ) + ;; lowering of the f64.nearest instruction, rounding the input to the nearest + ;; integer while breaking ties by rounding to even + (func $__wasm_nearest_f64 (; 13 ;) (type $2) (param $var$0 f64) (result f64) + (local $var$1 f64) + (local $var$2 f64) + (if + (i32.eqz + (f64.lt + (tee_local $var$2 + (f64.sub + (get_local $var$0) + (tee_local $var$1 + (f64.floor + (get_local $var$0) + ) + ) + ) + ) + (f64.const 0.5) + ) + ) + (block + (set_local $var$0 + (f64.ceil + (get_local $var$0) + ) + ) + (if + (f64.gt + (get_local $var$2) + (f64.const 0.5) + ) + (return + (get_local $var$0) + ) + ) + (set_local $var$1 + (select + (get_local $var$1) + (get_local $var$0) + (f64.eq + (f64.sub + (tee_local $var$2 + (f64.mul + (get_local $var$1) + (f64.const 0.5) + ) + ) + (f64.floor + (get_local $var$2) + ) + ) + (f64.const 0) + ) + ) + ) + ) + ) + (get_local $var$1) + ) + (func $_ZN17compiler_builtins3int4udiv10divmod_u6417h6026910b5ed08e40E (; 14 ;) (type $0) (param $var$0 i64) (param $var$1 i64) (result i64) + (local $var$2 i32) + (local $var$3 i32) + (local $var$4 i32) + (local $var$5 i64) + (local $var$6 i64) + (local $var$7 i64) + (local $var$8 i64) + (block $label$1 + (block $label$2 + (block $label$3 + (block $label$4 + (block $label$5 + (block $label$6 + (block $label$7 + (block $label$8 + (block $label$9 + (block $label$10 + (block $label$11 + (if + (tee_local $var$2 + (i32.wrap/i64 + (i64.shr_u + (get_local $var$0) + (i64.const 32) + ) + ) + ) + (block + (br_if $label$11 + (i32.eqz + (tee_local $var$3 + (i32.wrap/i64 + (get_local $var$1) + ) + ) + ) + ) + (br_if $label$9 + (i32.eqz + (tee_local $var$4 + (i32.wrap/i64 + (i64.shr_u + (get_local $var$1) + (i64.const 32) + ) + ) + ) + ) + ) + (br_if $label$8 + (i32.le_u + (tee_local $var$2 + (i32.sub + (i32.clz + (get_local $var$4) + ) + (i32.clz + (get_local $var$2) + ) + ) + ) + (i32.const 31) + ) + ) + (br $label$2) + ) + ) + (br_if $label$2 + (i64.ge_u + (get_local $var$1) + (i64.const 4294967296) + ) + ) + (i64.store + (i32.const 1024) + (i64.extend_u/i32 + (i32.sub + (tee_local $var$2 + (i32.wrap/i64 + (get_local $var$0) + ) + ) + (i32.mul + (tee_local $var$2 + (i32.div_u + (get_local $var$2) + (tee_local $var$3 + (i32.wrap/i64 + (get_local $var$1) + ) + ) + ) + ) + (get_local $var$3) + ) + ) + ) + ) + (return + (i64.extend_u/i32 + (get_local $var$2) + ) + ) + ) + (set_local $var$3 + (i32.wrap/i64 + (i64.shr_u + (get_local $var$1) + (i64.const 32) + ) + ) + ) + (br_if $label$7 + (i32.eqz + (i32.wrap/i64 + (get_local $var$0) + ) + ) + ) + (br_if $label$6 + (i32.eqz + (get_local $var$3) + ) + ) + (br_if $label$6 + (i32.and + (tee_local $var$4 + (i32.add + (get_local $var$3) + (i32.const -1) + ) + ) + (get_local $var$3) + ) + ) + (i64.store + (i32.const 1024) + (i64.or + (i64.shl + (i64.extend_u/i32 + (i32.and + (get_local $var$4) + (get_local $var$2) + ) + ) + (i64.const 32) + ) + (i64.and + (get_local $var$0) + (i64.const 4294967295) + ) + ) + ) + (return + (i64.extend_u/i32 + (i32.shr_u + (get_local $var$2) + (i32.and + (i32.ctz + (get_local $var$3) + ) + (i32.const 31) + ) + ) + ) + ) + ) + (unreachable) + ) + (br_if $label$5 + (i32.eqz + (i32.and + (tee_local $var$4 + (i32.add + (get_local $var$3) + (i32.const -1) + ) + ) + (get_local $var$3) + ) + ) + ) + (set_local $var$3 + (i32.sub + (i32.const 0) + (tee_local $var$2 + (i32.sub + (i32.add + (i32.clz + (get_local $var$3) + ) + (i32.const 33) + ) + (i32.clz + (get_local $var$2) + ) + ) + ) + ) + ) + (br $label$3) + ) + (set_local $var$3 + (i32.sub + (i32.const 63) + (get_local $var$2) + ) + ) + (set_local $var$2 + (i32.add + (get_local $var$2) + (i32.const 1) + ) + ) + (br $label$3) + ) + (i64.store + (i32.const 1024) + (i64.shl + (i64.extend_u/i32 + (i32.sub + (get_local $var$2) + (i32.mul + (tee_local $var$4 + (i32.div_u + (get_local $var$2) + (get_local $var$3) + ) + ) + (get_local $var$3) + ) + ) + ) + (i64.const 32) + ) + ) + (return + (i64.extend_u/i32 + (get_local $var$4) + ) + ) + ) + (br_if $label$4 + (i32.lt_u + (tee_local $var$2 + (i32.sub + (i32.clz + (get_local $var$3) + ) + (i32.clz + (get_local $var$2) + ) + ) + ) + (i32.const 31) + ) + ) + (br $label$2) + ) + (i64.store + (i32.const 1024) + (i64.extend_u/i32 + (i32.and + (get_local $var$4) + (i32.wrap/i64 + (get_local $var$0) + ) + ) + ) + ) + (br_if $label$1 + (i32.eq + (get_local $var$3) + (i32.const 1) + ) + ) + (return + (i64.shr_u + (get_local $var$0) + (i64.extend_u/i32 + (i32.ctz + (get_local $var$3) + ) + ) + ) + ) + ) + (set_local $var$3 + (i32.sub + (i32.const 63) + (get_local $var$2) + ) + ) + (set_local $var$2 + (i32.add + (get_local $var$2) + (i32.const 1) + ) + ) + ) + (set_local $var$5 + (i64.shr_u + (get_local $var$0) + (i64.extend_u/i32 + (i32.and + (get_local $var$2) + (i32.const 63) + ) + ) + ) + ) + (set_local $var$0 + (i64.shl + (get_local $var$0) + (i64.extend_u/i32 + (i32.and + (get_local $var$3) + (i32.const 63) + ) + ) + ) + ) + (block $label$13 + (if + (get_local $var$2) + (block + (set_local $var$8 + (i64.add + (get_local $var$1) + (i64.const -1) + ) + ) + (loop $label$15 + (set_local $var$5 + (i64.sub + (tee_local $var$5 + (i64.or + (i64.shl + (get_local $var$5) + (i64.const 1) + ) + (i64.shr_u + (get_local $var$0) + (i64.const 63) + ) + ) + ) + (i64.and + (tee_local $var$6 + (i64.shr_s + (i64.sub + (get_local $var$8) + (get_local $var$5) + ) + (i64.const 63) + ) + ) + (get_local $var$1) + ) + ) + ) + (set_local $var$0 + (i64.or + (i64.shl + (get_local $var$0) + (i64.const 1) + ) + (get_local $var$7) + ) + ) + (set_local $var$7 + (tee_local $var$6 + (i64.and + (get_local $var$6) + (i64.const 1) + ) + ) + ) + (br_if $label$15 + (tee_local $var$2 + (i32.add + (get_local $var$2) + (i32.const -1) + ) + ) + ) + ) + (br $label$13) + ) + ) + ) + (i64.store + (i32.const 1024) + (get_local $var$5) + ) + (return + (i64.or + (i64.shl + (get_local $var$0) + (i64.const 1) + ) + (get_local $var$6) + ) + ) + ) + (i64.store + (i32.const 1024) + (get_local $var$0) + ) + (set_local $var$0 + (i64.const 0) + ) + ) + (get_local $var$0) + ) + (func $_ZN17compiler_builtins3int3mul3Mul3mul17h070e9a1c69faec5bE (; 15 ;) (type $0) (param $var$0 i64) (param $var$1 i64) (result i64) + (local $var$2 i32) + (local $var$3 i32) + (local $var$4 i32) + (local $var$5 i32) + (local $var$6 i32) + (i64.or + (i64.shl + (i64.extend_u/i32 + (i32.add + (i32.add + (i32.add + (i32.add + (i32.mul + (tee_local $var$4 + (i32.shr_u + (tee_local $var$2 + (i32.wrap/i64 + (get_local $var$1) + ) + ) + (i32.const 16) + ) + ) + (tee_local $var$5 + (i32.shr_u + (tee_local $var$3 + (i32.wrap/i64 + (get_local $var$0) + ) + ) + (i32.const 16) + ) + ) + ) + (i32.mul + (get_local $var$2) + (i32.wrap/i64 + (i64.shr_u + (get_local $var$0) + (i64.const 32) + ) + ) + ) + ) + (i32.mul + (i32.wrap/i64 + (i64.shr_u + (get_local $var$1) + (i64.const 32) + ) + ) + (get_local $var$3) + ) + ) + (i32.shr_u + (tee_local $var$2 + (i32.add + (i32.shr_u + (tee_local $var$6 + (i32.mul + (tee_local $var$2 + (i32.and + (get_local $var$2) + (i32.const 65535) + ) + ) + (tee_local $var$3 + (i32.and + (get_local $var$3) + (i32.const 65535) + ) + ) + ) + ) + (i32.const 16) + ) + (i32.mul + (get_local $var$2) + (get_local $var$5) + ) + ) + ) + (i32.const 16) + ) + ) + (i32.shr_u + (tee_local $var$2 + (i32.add + (i32.and + (get_local $var$2) + (i32.const 65535) + ) + (i32.mul + (get_local $var$4) + (get_local $var$3) + ) + ) + ) + (i32.const 16) + ) + ) + ) + (i64.const 32) + ) + (i64.extend_u/i32 + (i32.or + (i32.shl + (get_local $var$2) + (i32.const 16) + ) + (i32.and + (get_local $var$6) + (i32.const 65535) + ) + ) + ) + ) + ) + (func $_ZN17compiler_builtins3int4sdiv3Div3div17he78fc483e41d7ec7E (; 16 ;) (type $0) (param $var$0 i64) (param $var$1 i64) (result i64) + (local $var$2 i64) + (i64.sub + (i64.xor + (i64.div_u + (i64.sub + (i64.xor + (tee_local $var$2 + (i64.shr_s + (get_local $var$0) + (i64.const 63) + ) + ) + (get_local $var$0) + ) + (get_local $var$2) + ) + (i64.sub + (i64.xor + (tee_local $var$2 + (i64.shr_s + (get_local $var$1) + (i64.const 63) + ) + ) + (get_local $var$1) + ) + (get_local $var$2) + ) + ) + (tee_local $var$0 + (i64.shr_s + (i64.xor + (get_local $var$1) + (get_local $var$0) + ) + (i64.const 63) + ) + ) + ) + (get_local $var$0) + ) + ) + (func $_ZN17compiler_builtins3int4sdiv3Mod4mod_17h2cbb7bbf36e41d68E (; 17 ;) (type $0) (param $var$0 i64) (param $var$1 i64) (result i64) + (local $var$2 i64) + (i64.sub + (i64.xor + (i64.rem_u + (i64.sub + (i64.xor + (tee_local $var$2 + (i64.shr_s + (get_local $var$0) + (i64.const 63) + ) + ) + (get_local $var$0) + ) + (get_local $var$2) + ) + (i64.sub + (i64.xor + (tee_local $var$0 + (i64.shr_s + (get_local $var$1) + (i64.const 63) + ) + ) + (get_local $var$1) + ) + (get_local $var$0) + ) + ) + (get_local $var$2) + ) + (get_local $var$2) + ) + ) + ;; custom section "linking", size 3 +) diff --git a/src/wasm2asm.h b/src/wasm2asm.h index 465262d5b..956ded5e8 100644 --- a/src/wasm2asm.h +++ b/src/wasm2asm.h @@ -29,6 +29,8 @@ #include "asmjs/asmangle.h" #include "wasm.h" #include "wasm-builder.h" +#include "wasm-io.h" +#include "wasm-validator.h" #include "emscripten-optimizer/optimizer.h" #include "mixed_arena.h" #include "asm_v_wasm.h" @@ -228,7 +230,11 @@ private: Ref Wasm2AsmBuilder::processWasm(Module* wasm) { PassRunner runner(wasm); runner.add<AutoDrop>(); - runner.add("remove-non-js-ops"); // must be before i64-to-i32 + // First up remove as many non-JS operations we can, including things like + // 64-bit integer multiplication/division, `f32.nearest` instructions, etc. + // This may inject intrinsics which use i64 so it needs to be run before the + // i64-to-i32 lowering pass. + runner.add("remove-non-js-ops"); // Currently the i64-to-32 lowering pass requires that `flatten` be run before // it produce correct code. For some more details about this see #1480 runner.add("flatten"); @@ -239,6 +245,16 @@ Ref Wasm2AsmBuilder::processWasm(Module* wasm) { runner.add("vacuum"); runner.setDebug(flags.debug); runner.run(); + + // Make sure we didn't corrupt anything if we're in --allow-asserts mode (aka + // tests) +#ifndef NDEBUG + if (!WasmValidator().validate(*wasm)) { + WasmPrinter::printModule(wasm); + Fatal() << "error in validating input"; + } +#endif + Ref ret = ValueBuilder::makeToplevel(); Ref asmFunc = ValueBuilder::makeFunction(ASM_FUNC); ret[1]->push_back(asmFunc); @@ -486,6 +502,17 @@ void Wasm2AsmBuilder::addGlobal(Ref ast, Global* global) { } } +static bool expressionEndsInReturn(Expression *e) { + if (e->is<Return>()) { + return true; + } + if (!e->is<Block>()) { + return false; + } + ExpressionList* stats = &static_cast<Block*>(e)->list; + return expressionEndsInReturn((*stats)[stats->size()-1]); +} + Ref Wasm2AsmBuilder::processFunction(Function* func) { if (flags.debug) { static int fns = 0; @@ -530,12 +557,7 @@ Ref Wasm2AsmBuilder::processFunction(Function* func) { ); }; scanFunctionBody(func->body); - bool isBodyBlock = func->body->is<Block>(); - ExpressionList* stats = isBodyBlock ? - &static_cast<Block*>(func->body)->list : nullptr; - bool endsInReturn = - (isBodyBlock && ((*stats)[stats->size()-1]->is<Return>())) || - func->body->is<Return>(); + bool endsInReturn = expressionEndsInReturn(func->body); if (endsInReturn) { // return already taken care of flattenAppend(ret, processFunctionBody(func, NO_RESULT)); @@ -1244,21 +1266,10 @@ Ref Wasm2AsmBuilder::processFunctionBody(Function* func, IString result) { visit(curr->value, EXPRESSION_RESULT) ); case CtzInt32: - return makeSigning( - ValueBuilder::makeCall( - WASM_CTZ32, - visit(curr->value, EXPRESSION_RESULT) - ), - ASM_SIGNED - ); case PopcntInt32: - return makeSigning( - ValueBuilder::makeCall( - WASM_POPCNT32, - visit(curr->value, EXPRESSION_RESULT) - ), - ASM_SIGNED - ); + std::cerr << "i32 unary should have been removed: " << curr + << std::endl; + WASM_UNREACHABLE(); case EqZInt32: return ValueBuilder::makeBinary( makeAsmCoercion(visit(curr->value, |