summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/passes/OptimizeInstructions.cpp68
-rw-r--r--test/passes/optimize-instructions_all-features.txt67
-rw-r--r--test/passes/optimize-instructions_all-features.wast73
3 files changed, 208 insertions, 0 deletions
diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp
index ad111433d..b8f038dac 100644
--- a/src/passes/OptimizeInstructions.cpp
+++ b/src/passes/OptimizeInstructions.cpp
@@ -710,6 +710,11 @@ struct OptimizeInstructions
store->value = unary->value;
}
}
+ } else if (auto* memCopy = curr->dynCast<MemoryCopy>()) {
+ assert(features.hasBulkMemory());
+ if (auto* ret = optimizeMemoryCopy(memCopy)) {
+ return ret;
+ }
}
return nullptr;
}
@@ -1393,6 +1398,69 @@ private:
return binary;
}
+ Expression* optimizeMemoryCopy(MemoryCopy* memCopy) {
+ FeatureSet features = getModule()->features;
+
+ // memory.copy(x, x, sz) ==> nop
+ if (!EffectAnalyzer(getPassOptions(), features, memCopy->dest)
+ .hasSideEffects() &&
+ ExpressionAnalyzer::equal(memCopy->dest, memCopy->source)) {
+ return ExpressionManipulator::nop(memCopy);
+ }
+ // memory.copy(dst, src, C) ==> store(dst, load(src))
+ if (auto* csize = memCopy->size->dynCast<Const>()) {
+ auto bytes = csize->value.geti32();
+ Builder builder(*getModule());
+
+ switch (bytes) {
+ case 0: {
+ return builder.makeBlock({builder.makeDrop(memCopy->dest),
+ builder.makeDrop(memCopy->source)});
+ break;
+ }
+ case 1:
+ case 2:
+ case 4: {
+ return builder.makeStore(
+ bytes, // bytes
+ 0, // offset
+ 1, // align
+ memCopy->dest,
+ builder.makeLoad(bytes, false, 0, 1, memCopy->source, Type::i32),
+ Type::i32);
+ }
+ case 8: {
+ return builder.makeStore(
+ bytes, // bytes
+ 0, // offset
+ 1, // align
+ memCopy->dest,
+ builder.makeLoad(bytes, false, 0, 1, memCopy->source, Type::i64),
+ Type::i64);
+ }
+ case 16: {
+ if (getPassOptions().shrinkLevel == 0) {
+ // This adds an extra 2 bytes so apply it only for
+ // minimal shrink level
+ if (features.hasSIMD()) {
+ return builder.makeStore(
+ bytes, // bytes
+ 0, // offset
+ 1, // align
+ memCopy->dest,
+ builder.makeLoad(
+ bytes, false, 0, 1, memCopy->source, Type::v128),
+ Type::v128);
+ }
+ }
+ }
+ default: {
+ }
+ }
+ }
+ return nullptr;
+ }
+
// given a binary expression with equal children and no side effects in
// either, we can fold various things
// TODO: trinaries, things like (x & (y & x)) ?
diff --git a/test/passes/optimize-instructions_all-features.txt b/test/passes/optimize-instructions_all-features.txt
index e08702b91..b97106f2d 100644
--- a/test/passes/optimize-instructions_all-features.txt
+++ b/test/passes/optimize-instructions_all-features.txt
@@ -10,6 +10,7 @@
(type $i64_=>_i64 (func (param i64) (result i64)))
(type $i32_i64_f32_=>_none (func (param i32 i64 f32)))
(type $i32_i64_f32_f64_=>_none (func (param i32 i64 f32 f64)))
+ (type $i32_i32_i32_=>_none (func (param i32 i32 i32)))
(type $i32_i32_f64_f64_=>_none (func (param i32 i32 f64 f64)))
(type $i32_i64_f64_i32_=>_none (func (param i32 i64 f64 i32)))
(type $none_=>_f64 (func (result f64)))
@@ -3727,6 +3728,72 @@
)
)
)
+ (func $optimize-bulk-memory-copy (param $dst i32) (param $src i32) (param $sz i32)
+ (nop)
+ (block
+ (drop
+ (local.get $dst)
+ )
+ (drop
+ (local.get $src)
+ )
+ )
+ (i32.store8
+ (local.get $dst)
+ (i32.load8_u
+ (local.get $src)
+ )
+ )
+ (i32.store16 align=1
+ (local.get $dst)
+ (i32.load16_u align=1
+ (local.get $src)
+ )
+ )
+ (memory.copy
+ (local.get $dst)
+ (local.get $src)
+ (i32.const 3)
+ )
+ (i32.store align=1
+ (local.get $dst)
+ (i32.load align=1
+ (local.get $src)
+ )
+ )
+ (memory.copy
+ (local.get $dst)
+ (local.get $src)
+ (i32.const 5)
+ )
+ (memory.copy
+ (local.get $dst)
+ (local.get $src)
+ (i32.const 6)
+ )
+ (memory.copy
+ (local.get $dst)
+ (local.get $src)
+ (i32.const 7)
+ )
+ (i64.store align=1
+ (local.get $dst)
+ (i64.load align=1
+ (local.get $src)
+ )
+ )
+ (v128.store align=1
+ (local.get $dst)
+ (v128.load align=1
+ (local.get $src)
+ )
+ )
+ (memory.copy
+ (local.get $dst)
+ (local.get $src)
+ (local.get $sz)
+ )
+ )
)
(module
(type $none_=>_none (func))
diff --git a/test/passes/optimize-instructions_all-features.wast b/test/passes/optimize-instructions_all-features.wast
index 2790cc9ea..b62ecdcfd 100644
--- a/test/passes/optimize-instructions_all-features.wast
+++ b/test/passes/optimize-instructions_all-features.wast
@@ -4233,6 +4233,79 @@
)
))
)
+ (func $optimize-bulk-memory-copy (param $dst i32) (param $src i32) (param $sz i32)
+ (memory.copy ;; nop
+ (local.get $dst)
+ (local.get $dst)
+ (local.get $sz)
+ )
+
+ (memory.copy ;; nop
+ (local.get $dst)
+ (local.get $src)
+ (i32.const 0)
+ )
+
+ (memory.copy
+ (local.get $dst)
+ (local.get $src)
+ (i32.const 1)
+ )
+
+ (memory.copy
+ (local.get $dst)
+ (local.get $src)
+ (i32.const 2)
+ )
+
+ (memory.copy
+ (local.get $dst)
+ (local.get $src)
+ (i32.const 3)
+ )
+
+ (memory.copy
+ (local.get $dst)
+ (local.get $src)
+ (i32.const 4)
+ )
+
+ (memory.copy
+ (local.get $dst)
+ (local.get $src)
+ (i32.const 5)
+ )
+
+ (memory.copy
+ (local.get $dst)
+ (local.get $src)
+ (i32.const 6)
+ )
+
+ (memory.copy
+ (local.get $dst)
+ (local.get $src)
+ (i32.const 7)
+ )
+
+ (memory.copy
+ (local.get $dst)
+ (local.get $src)
+ (i32.const 8)
+ )
+
+ (memory.copy
+ (local.get $dst)
+ (local.get $src)
+ (i32.const 16)
+ )
+
+ (memory.copy ;; skip
+ (local.get $dst)
+ (local.get $src)
+ (local.get $sz)
+ )
+ )
)
(module
(import "env" "memory" (memory $0 (shared 256 256)))