summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMax Graey <maxgraey@gmail.com>2021-09-14 19:24:34 +0300
committerGitHub <noreply@github.com>2021-09-14 16:24:34 +0000
commit9e2f13a2d14284cbc3e6dd4d5f891fa5df895436 (patch)
treede0a114f1d3bf1affee5ff42b2a78fc5a803c19b /src
parent181b0e7037bc2999a24cb45a02523f3da04b254d (diff)
downloadbinaryen-9e2f13a2d14284cbc3e6dd4d5f891fa5df895436.tar.gz
binaryen-9e2f13a2d14284cbc3e6dd4d5f891fa5df895436.tar.bz2
binaryen-9e2f13a2d14284cbc3e6dd4d5f891fa5df895436.zip
[OptimizeInstructions] Optimize memory.fill with constant arguments (#4130)
This is reland of #3071 Do similar optimizations as in #3038 but for memory.fill. `memory.fill(d, v, 0)` ==> `{ drop(d), drop(v) }` only with `ignoreImplicitTraps` or `trapsNeverHappen` `memory.fill(d, v, 1)` ==> `store8(d, v)` Further simplifications can be done only if v is constant because otherwise binary size would increase: `memory.fill(d, C, 1)` ==> `store8(d, (C & 0xFF))` `memory.fill(d, C, 2)` ==> `store16(d, (C & 0xFF) * 0x0101)` `memory.fill(d, C, 4)` ==> `store32(d, (C & 0xFF) * 0x01010101)` `memory.fill(d, C, 8)` ==> `store64(d, (C & 0xFF) * 0x0101010101010101)` `memory.fill(d, C, 16)` ==> `store128(d, i8x16.splat(C & 0xFF))`
Diffstat (limited to 'src')
-rw-r--r--src/passes/OptimizeInstructions.cpp127
1 files changed, 126 insertions, 1 deletions
diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp
index 768ea5702..217312501 100644
--- a/src/passes/OptimizeInstructions.cpp
+++ b/src/passes/OptimizeInstructions.cpp
@@ -1158,6 +1158,16 @@ struct OptimizeInstructions
}
}
+ void visitMemoryFill(MemoryFill* curr) {
+ if (curr->type == Type::unreachable) {
+ return;
+ }
+ assert(getModule()->features.hasBulkMemory());
+ if (auto* ret = optimizeMemoryFill(curr)) {
+ return replaceCurrent(ret);
+ }
+ }
+
void visitCallRef(CallRef* curr) {
if (curr->target->type == Type::unreachable) {
// The call_ref is not reached; leave this for DCE.
@@ -2863,7 +2873,7 @@ private:
// memory.copy(dst, src, C) ==> store(dst, load(src))
if (auto* csize = memCopy->size->dynCast<Const>()) {
- auto bytes = csize->value.geti32();
+ auto bytes = csize->value.getInteger();
Builder builder(*getModule());
switch (bytes) {
@@ -2919,6 +2929,121 @@ private:
return nullptr;
}
+ Expression* optimizeMemoryFill(MemoryFill* memFill) {
+ if (!memFill->size->is<Const>()) {
+ return nullptr;
+ }
+
+ PassOptions options = getPassOptions();
+ Builder builder(*getModule());
+
+ auto* csize = memFill->size->cast<Const>();
+ auto bytes = csize->value.getInteger();
+
+ if (bytes == 0LL &&
+ (options.ignoreImplicitTraps || options.trapsNeverHappen)) {
+ // memory.fill(d, v, 0) ==> { drop(d), drop(v) }
+ return builder.makeBlock(
+ {builder.makeDrop(memFill->dest), builder.makeDrop(memFill->value)});
+ }
+
+ const uint32_t offset = 0, align = 1;
+
+ if (auto* cvalue = memFill->value->dynCast<Const>()) {
+ uint32_t value = cvalue->value.geti32() & 0xFF;
+ // memory.fill(d, C1, C2) ==>
+ // store(d, (C1 & 0xFF) * (-1U / max(bytes)))
+ switch (bytes) {
+ case 1: {
+ return builder.makeStore(1, // bytes
+ offset,
+ align,
+ memFill->dest,
+ builder.makeConst<uint32_t>(value),
+ Type::i32);
+ }
+ case 2: {
+ return builder.makeStore(2,
+ offset,
+ align,
+ memFill->dest,
+ builder.makeConst<uint32_t>(value * 0x0101U),
+ Type::i32);
+ }
+ case 4: {
+ // transform only when "value" or shrinkLevel equal to zero due to
+ // it could increase size by several bytes
+ if (value == 0 || options.shrinkLevel == 0) {
+ return builder.makeStore(
+ 4,
+ offset,
+ align,
+ memFill->dest,
+ builder.makeConst<uint32_t>(value * 0x01010101U),
+ Type::i32);
+ }
+ break;
+ }
+ case 8: {
+ // transform only when "value" or shrinkLevel equal to zero due to
+ // it could increase size by several bytes
+ if (value == 0 || options.shrinkLevel == 0) {
+ return builder.makeStore(
+ 8,
+ offset,
+ align,
+ memFill->dest,
+ builder.makeConst<uint64_t>(value * 0x0101010101010101ULL),
+ Type::i64);
+ }
+ break;
+ }
+ case 16: {
+ if (options.shrinkLevel == 0) {
+ if (getModule()->features.hasSIMD()) {
+ uint8_t values[16];
+ std::fill_n(values, 16, (uint8_t)value);
+ return builder.makeStore(16,
+ offset,
+ align,
+ memFill->dest,
+ builder.makeConst<uint8_t[16]>(values),
+ Type::v128);
+ } else {
+ // { i64.store(d, C', 0), i64.store(d, C', 8) }
+ return builder.makeBlock({
+ builder.makeStore(
+ 8,
+ offset,
+ align,
+ memFill->dest,
+ builder.makeConst<uint64_t>(value * 0x0101010101010101ULL),
+ Type::i64),
+ builder.makeStore(
+ 8,
+ offset + 8,
+ align,
+ memFill->dest,
+ builder.makeConst<uint64_t>(value * 0x0101010101010101ULL),
+ Type::i64),
+ });
+ }
+ }
+ break;
+ }
+ default: {
+ }
+ }
+ }
+ // memory.fill(d, v, 1) ==> store8(d, v)
+ if (bytes == 1LL) {
+ return builder.makeStore(
+ 1, offset, align, memFill->dest, memFill->value, Type::i32);
+ }
+
+ return nullptr;
+ }
+
// given a binary expression with equal children and no side effects in
// either, we can fold various things
Expression* optimizeBinaryWithEqualEffectlessChildren(Binary* binary) {