diff options
-rw-r--r-- | src/passes/OptimizeInstructions.cpp | 68 | ||||
-rw-r--r-- | test/passes/optimize-instructions_all-features.txt | 67 | ||||
-rw-r--r-- | test/passes/optimize-instructions_all-features.wast | 73 |
3 files changed, 208 insertions, 0 deletions
diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index ad111433d..b8f038dac 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -710,6 +710,11 @@ struct OptimizeInstructions store->value = unary->value; } } + } else if (auto* memCopy = curr->dynCast<MemoryCopy>()) { + assert(features.hasBulkMemory()); + if (auto* ret = optimizeMemoryCopy(memCopy)) { + return ret; + } } return nullptr; } @@ -1393,6 +1398,69 @@ private: return binary; } + Expression* optimizeMemoryCopy(MemoryCopy* memCopy) { + FeatureSet features = getModule()->features; + + // memory.copy(x, x, sz) ==> nop + if (!EffectAnalyzer(getPassOptions(), features, memCopy->dest) + .hasSideEffects() && + ExpressionAnalyzer::equal(memCopy->dest, memCopy->source)) { + return ExpressionManipulator::nop(memCopy); + } + // memory.copy(dst, src, C) ==> store(dst, load(src)) + if (auto* csize = memCopy->size->dynCast<Const>()) { + auto bytes = csize->value.geti32(); + Builder builder(*getModule()); + + switch (bytes) { + case 0: { + return builder.makeBlock({builder.makeDrop(memCopy->dest), + builder.makeDrop(memCopy->source)}); + break; + } + case 1: + case 2: + case 4: { + return builder.makeStore( + bytes, // bytes + 0, // offset + 1, // align + memCopy->dest, + builder.makeLoad(bytes, false, 0, 1, memCopy->source, Type::i32), + Type::i32); + } + case 8: { + return builder.makeStore( + bytes, // bytes + 0, // offset + 1, // align + memCopy->dest, + builder.makeLoad(bytes, false, 0, 1, memCopy->source, Type::i64), + Type::i64); + } + case 16: { + if (getPassOptions().shrinkLevel == 0) { + // This adds an extra 2 bytes so apply it only for + // minimal shrink level + if (features.hasSIMD()) { + return builder.makeStore( + bytes, // bytes + 0, // offset + 1, // align + memCopy->dest, + builder.makeLoad( + bytes, false, 0, 1, memCopy->source, Type::v128), + Type::v128); + } + } + } + default: { + } + } + } + return nullptr; + } + // given a binary expression with equal children and no side effects in // either, we can fold various things // TODO: trinaries, things like (x & (y & x)) ? diff --git a/test/passes/optimize-instructions_all-features.txt b/test/passes/optimize-instructions_all-features.txt index e08702b91..b97106f2d 100644 --- a/test/passes/optimize-instructions_all-features.txt +++ b/test/passes/optimize-instructions_all-features.txt @@ -10,6 +10,7 @@ (type $i64_=>_i64 (func (param i64) (result i64))) (type $i32_i64_f32_=>_none (func (param i32 i64 f32))) (type $i32_i64_f32_f64_=>_none (func (param i32 i64 f32 f64))) + (type $i32_i32_i32_=>_none (func (param i32 i32 i32))) (type $i32_i32_f64_f64_=>_none (func (param i32 i32 f64 f64))) (type $i32_i64_f64_i32_=>_none (func (param i32 i64 f64 i32))) (type $none_=>_f64 (func (result f64))) @@ -3727,6 +3728,72 @@ ) ) ) + (func $optimize-bulk-memory-copy (param $dst i32) (param $src i32) (param $sz i32) + (nop) + (block + (drop + (local.get $dst) + ) + (drop + (local.get $src) + ) + ) + (i32.store8 + (local.get $dst) + (i32.load8_u + (local.get $src) + ) + ) + (i32.store16 align=1 + (local.get $dst) + (i32.load16_u align=1 + (local.get $src) + ) + ) + (memory.copy + (local.get $dst) + (local.get $src) + (i32.const 3) + ) + (i32.store align=1 + (local.get $dst) + (i32.load align=1 + (local.get $src) + ) + ) + (memory.copy + (local.get $dst) + (local.get $src) + (i32.const 5) + ) + (memory.copy + (local.get $dst) + (local.get $src) + (i32.const 6) + ) + (memory.copy + (local.get $dst) + (local.get $src) + (i32.const 7) + ) + (i64.store align=1 + (local.get $dst) + (i64.load align=1 + (local.get $src) + ) + ) + (v128.store align=1 + (local.get $dst) + (v128.load align=1 + (local.get $src) + ) + ) + (memory.copy + (local.get $dst) + (local.get $src) + (local.get $sz) + ) + ) ) (module (type $none_=>_none (func)) diff --git a/test/passes/optimize-instructions_all-features.wast b/test/passes/optimize-instructions_all-features.wast index 2790cc9ea..b62ecdcfd 100644 --- a/test/passes/optimize-instructions_all-features.wast +++ b/test/passes/optimize-instructions_all-features.wast @@ -4233,6 +4233,79 @@ ) )) ) + (func $optimize-bulk-memory-copy (param $dst i32) (param $src i32) (param $sz i32) + (memory.copy ;; nop + (local.get $dst) + (local.get $dst) + (local.get $sz) + ) + + (memory.copy ;; nop + (local.get $dst) + (local.get $src) + (i32.const 0) + ) + + (memory.copy + (local.get $dst) + (local.get $src) + (i32.const 1) + ) + + (memory.copy + (local.get $dst) + (local.get $src) + (i32.const 2) + ) + + (memory.copy + (local.get $dst) + (local.get $src) + (i32.const 3) + ) + + (memory.copy + (local.get $dst) + (local.get $src) + (i32.const 4) + ) + + (memory.copy + (local.get $dst) + (local.get $src) + (i32.const 5) + ) + + (memory.copy + (local.get $dst) + (local.get $src) + (i32.const 6) + ) + + (memory.copy + (local.get $dst) + (local.get $src) + (i32.const 7) + ) + + (memory.copy + (local.get $dst) + (local.get $src) + (i32.const 8) + ) + + (memory.copy + (local.get $dst) + (local.get $src) + (i32.const 16) + ) + + (memory.copy ;; skip + (local.get $dst) + (local.get $src) + (local.get $sz) + ) + ) ) (module (import "env" "memory" (memory $0 (shared 256 256))) |