diff options
author | Alon Zakai <azakai@google.com> | 2019-05-02 11:41:34 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-05-02 11:41:34 -0700 |
commit | 01a4bfdb5c28d54fd480d603cba2d35c943a0bf5 (patch) | |
tree | 0bdeeb9096c8c7dec33a990022f920ebadfbd252 /src | |
parent | 3b4d9013c6c2dd6cfa90e02e2307a758a0f91140 (diff) | |
download | binaryen-01a4bfdb5c28d54fd480d603cba2d35c943a0bf5.tar.gz binaryen-01a4bfdb5c28d54fd480d603cba2d35c943a0bf5.tar.bz2 binaryen-01a4bfdb5c28d54fd480d603cba2d35c943a0bf5.zip |
Add a pass to lower unaligned loads and stores (#2078)
This replaces the wasm2js code that lowered them to pessimistic (1-byte aligned) loads and stores. The new pass will do the optimal thing, keeping 2-byte alignment where possible.
This is also nicer as a standalone pass, which has the simple property that after it runs all loads and stores are aligned, instead of some code scattered inside wasm2js.
Diffstat (limited to 'src')
-rw-r--r-- | src/passes/AlignmentLowering.cpp | 214 | ||||
-rw-r--r-- | src/passes/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/passes/pass.cpp | 3 | ||||
-rw-r--r-- | src/passes/passes.h | 1 | ||||
-rw-r--r-- | src/wasm2js.h | 102 |
5 files changed, 224 insertions, 97 deletions
diff --git a/src/passes/AlignmentLowering.cpp b/src/passes/AlignmentLowering.cpp new file mode 100644 index 000000000..fc03a8b74 --- /dev/null +++ b/src/passes/AlignmentLowering.cpp @@ -0,0 +1,214 @@ +/* + * Copyright 2017 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// Lowers unaligned loads and stores into aligned loads and stores +// that are smaller. This leaves only aligned operations. +// + +#include "ir/bits.h" +#include "pass.h" +#include "wasm-builder.h" +#include "wasm.h" + +namespace wasm { + +struct AlignmentLowering : public WalkerPass<PostWalker<AlignmentLowering>> { + void visitLoad(Load* curr) { + if (curr->align == 0 || curr->align == curr->bytes) { + return; + } + Builder builder(*getModule()); + if (curr->type == unreachable) { + replaceCurrent(curr->ptr); + return; + } + assert(curr->type == i32); // TODO: i64, f32, f64 + auto temp = builder.addVar(getFunction(), i32); + Expression* ret; + if (curr->bytes == 2) { + ret = builder.makeBinary( + OrInt32, + builder.makeLoad( + 1, false, curr->offset, 1, builder.makeGetLocal(temp, i32), i32), + builder.makeBinary(ShlInt32, + builder.makeLoad(1, + false, + curr->offset + 1, + 1, + builder.makeGetLocal(temp, i32), + i32), + builder.makeConst(Literal(int32_t(8))))); + if (curr->signed_) { + ret = Bits::makeSignExt(ret, 2, *getModule()); + } + } else if (curr->bytes == 4) { + if (curr->align == 1) { + ret = builder.makeBinary( + OrInt32, + builder.makeBinary( + OrInt32, + builder.makeLoad( + 1, false, curr->offset, 1, builder.makeGetLocal(temp, i32), i32), + builder.makeBinary(ShlInt32, + builder.makeLoad(1, + false, + curr->offset + 1, + 1, + builder.makeGetLocal(temp, i32), + i32), + builder.makeConst(Literal(int32_t(8))))), + builder.makeBinary( + OrInt32, + builder.makeBinary(ShlInt32, + builder.makeLoad(1, + false, + curr->offset + 2, + 1, + builder.makeGetLocal(temp, i32), + i32), + builder.makeConst(Literal(int32_t(16)))), + builder.makeBinary(ShlInt32, + builder.makeLoad(1, + false, + curr->offset + 3, + 1, + builder.makeGetLocal(temp, i32), + i32), + builder.makeConst(Literal(int32_t(24)))))); + } else if (curr->align == 2) { + ret = builder.makeBinary( + OrInt32, + builder.makeLoad( + 2, false, curr->offset, 2, builder.makeGetLocal(temp, i32), i32), + builder.makeBinary(ShlInt32, + builder.makeLoad(2, + false, + curr->offset + 2, + 2, + builder.makeGetLocal(temp, i32), + i32), + builder.makeConst(Literal(int32_t(16))))); + } else { + WASM_UNREACHABLE(); + } + } else { + WASM_UNREACHABLE(); + } + replaceCurrent( + builder.makeBlock({builder.makeSetLocal(temp, curr->ptr), ret})); + } + + void visitStore(Store* curr) { + if (curr->align == 0 || curr->align == curr->bytes) { + return; + } + Builder builder(*getModule()); + if (curr->type == unreachable) { + replaceCurrent(builder.makeBlock( + {builder.makeDrop(curr->ptr), builder.makeDrop(curr->value)})); + return; + } + assert(curr->value->type == i32); // TODO: i64, f32, f64 + auto tempPtr = builder.addVar(getFunction(), i32); + auto tempValue = builder.addVar(getFunction(), i32); + auto* block = + builder.makeBlock({builder.makeSetLocal(tempPtr, curr->ptr), + builder.makeSetLocal(tempValue, curr->value)}); + if (curr->bytes == 2) { + block->list.push_back( + builder.makeStore(1, + curr->offset, + 1, + builder.makeGetLocal(tempPtr, i32), + builder.makeGetLocal(tempValue, i32), + i32)); + block->list.push_back(builder.makeStore( + 1, + curr->offset + 1, + 1, + builder.makeGetLocal(tempPtr, i32), + builder.makeBinary(ShrUInt32, + builder.makeGetLocal(tempValue, i32), + builder.makeConst(Literal(int32_t(8)))), + i32)); + } else if (curr->bytes == 4) { + if (curr->align == 1) { + block->list.push_back( + builder.makeStore(1, + curr->offset, + 1, + builder.makeGetLocal(tempPtr, i32), + builder.makeGetLocal(tempValue, i32), + i32)); + block->list.push_back(builder.makeStore( + 1, + curr->offset + 1, + 1, + builder.makeGetLocal(tempPtr, i32), + builder.makeBinary(ShrUInt32, + builder.makeGetLocal(tempValue, i32), + builder.makeConst(Literal(int32_t(8)))), + i32)); + block->list.push_back(builder.makeStore( + 1, + curr->offset + 2, + 1, + builder.makeGetLocal(tempPtr, i32), + builder.makeBinary(ShrUInt32, + builder.makeGetLocal(tempValue, i32), + builder.makeConst(Literal(int32_t(16)))), + i32)); + block->list.push_back(builder.makeStore( + 1, + curr->offset + 3, + 1, + builder.makeGetLocal(tempPtr, i32), + builder.makeBinary(ShrUInt32, + builder.makeGetLocal(tempValue, i32), + builder.makeConst(Literal(int32_t(24)))), + i32)); + } else if (curr->align == 2) { + block->list.push_back( + builder.makeStore(2, + curr->offset, + 2, + builder.makeGetLocal(tempPtr, i32), + builder.makeGetLocal(tempValue, i32), + i32)); + block->list.push_back(builder.makeStore( + 2, + curr->offset + 2, + 2, + builder.makeGetLocal(tempPtr, i32), + builder.makeBinary(ShrUInt32, + builder.makeGetLocal(tempValue, i32), + builder.makeConst(Literal(int32_t(16)))), + i32)); + } else { + WASM_UNREACHABLE(); + } + } else { + WASM_UNREACHABLE(); + } + block->finalize(); + replaceCurrent(block); + } +}; + +Pass* createAlignmentLoweringPass() { return new AlignmentLowering(); } + +} // namespace wasm diff --git a/src/passes/CMakeLists.txt b/src/passes/CMakeLists.txt index 8a4b04de8..935c3bec8 100644 --- a/src/passes/CMakeLists.txt +++ b/src/passes/CMakeLists.txt @@ -5,6 +5,7 @@ add_custom_command( SET(passes_SOURCES pass.cpp + AlignmentLowering.cpp CoalesceLocals.cpp CodePushing.cpp CodeFolding.cpp diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index 7cfb21dfe..e4fbc5343 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -69,6 +69,9 @@ std::string PassRegistry::getPassDescription(std::string name) { void PassRegistry::registerPasses() { registerPass( "dae", "removes arguments to calls in an lto-like manner", createDAEPass); + registerPass("alignment-lowering", + "lower unaligned loads and stores to smaller aligned ones", + createAlignmentLoweringPass); registerPass("dae-optimizing", "removes arguments to calls in an lto-like manner, and " "optimizes where we removed", diff --git a/src/passes/passes.h b/src/passes/passes.h index fc01c1cd5..e562f4a42 100644 --- a/src/passes/passes.h +++ b/src/passes/passes.h @@ -22,6 +22,7 @@ namespace wasm { class Pass; // All passes: +Pass* createAlignmentLoweringPass(); Pass* createCoalesceLocalsPass(); Pass* createCoalesceLocalsWithLearningPass(); Pass* createCodeFoldingPass(); diff --git a/src/wasm2js.h b/src/wasm2js.h index 1a5050af4..f15e8f9b7 100644 --- a/src/wasm2js.h +++ b/src/wasm2js.h @@ -284,6 +284,7 @@ Ref Wasm2JSBuilder::processWasm(Module* wasm, Name funcName) { // #1480 runner.add("flatten"); runner.add("i64-to-i32-lowering"); + runner.add("alignment-lowering"); // Next, optimize that as best we can. This should not generate // non-JS-friendly things. if (options.optimizeLevel > 0) { @@ -982,41 +983,8 @@ Ref Wasm2JSBuilder::processFunctionBody(Module* m, } Ref visitLoad(Load* curr) { - if (curr->align != 0 && curr->align < curr->bytes) { - // set the pointer to a local - ScopedTemp temp(i32, parent, func); - SetLocal set(allocator); - set.index = func->getLocalIndex(temp.getName()); - set.value = curr->ptr; - Ref ptrSet = visit(&set, NO_RESULT); - GetLocal get(allocator); - get.index = func->getLocalIndex(temp.getName()); - // fake loads - Load load = *curr; - load.ptr = &get; - load.bytes = 1; // do the worst - load.signed_ = false; - Ref rest; - switch (curr->type) { - case i32: { - rest = makeAsmCoercion(visit(&load, EXPRESSION_RESULT), ASM_INT); - for (size_t i = 1; i < curr->bytes; i++) { - ++load.offset; - Ref add = - makeAsmCoercion(visit(&load, EXPRESSION_RESULT), ASM_INT); - add = ValueBuilder::makeBinary( - add, LSHIFT, ValueBuilder::makeNum(8 * i)); - rest = ValueBuilder::makeBinary(rest, OR, add); - } - break; - } - default: { - std::cerr << "Unhandled type in load: " << curr->type << std::endl; - abort(); - } - } - return ValueBuilder::makeSeq(ptrSet, rest); - } + // Unaligned loads and stores must have been fixed up already. + assert(curr->align == 0 || curr->align == curr->bytes); // normal load Ref ptr = makePointer(curr->ptr, curr->offset); Ref ret; @@ -1110,68 +1078,8 @@ Ref Wasm2JSBuilder::processFunctionBody(Module* m, } // FIXME if memory growth, store ptr cannot contain a function call // also other stores to memory, check them, all makeSub's - if (curr->align != 0 && curr->align < curr->bytes) { - // set the pointer to a local - ScopedTemp temp(i32, parent, func); - SetLocal set(allocator); - set.index = func->getLocalIndex(temp.getName()); - set.value = curr->ptr; - Ref ptrSet = visit(&set, NO_RESULT); - GetLocal get(allocator); - get.index = func->getLocalIndex(temp.getName()); - // set the value to a local - ScopedTemp tempValue(curr->value->type, parent, func); - SetLocal setValue(allocator); - setValue.index = func->getLocalIndex(tempValue.getName()); - setValue.value = curr->value; - Ref valueSet = visit(&setValue, NO_RESULT); - GetLocal getValue(allocator); - getValue.index = func->getLocalIndex(tempValue.getName()); - // fake stores - Store store = *curr; - store.ptr = &get; - store.bytes = 1; // do the worst - Ref rest; - switch (curr->valueType) { - case i32: { - Const _255(allocator); - _255.value = Literal(int32_t(255)); - _255.type = i32; - for (size_t i = 0; i < curr->bytes; i++) { - Const shift(allocator); - shift.value = Literal(int32_t(8 * i)); - shift.type = i32; - Binary shifted(allocator); - shifted.op = ShrUInt32; - shifted.left = &getValue; - shifted.right = &shift; - shifted.type = i32; - Binary anded(allocator); - anded.op = AndInt32; - anded.left = i > 0 ? static_cast<Expression*>(&shifted) - : static_cast<Expression*>(&getValue); - anded.right = &_255; - anded.type = i32; - store.value = &anded; - Ref part = visit(&store, NO_RESULT); - if (i == 0) { - rest = part; - } else { - rest = ValueBuilder::makeSeq(rest, part); - } - ++store.offset; - } - break; - } - default: { - std::cerr << "Unhandled type in store: " << curr->valueType - << std::endl; - abort(); - } - } - return ValueBuilder::makeSeq(ValueBuilder::makeSeq(ptrSet, valueSet), - rest); - } + // Unaligned loads and stores must have been fixed up already. + assert(curr->align == 0 || curr->align == curr->bytes); // normal store Ref ptr = makePointer(curr->ptr, curr->offset); Ref value = visit(curr->value, EXPRESSION_RESULT); |