diff options
-rw-r--r-- | src/passes/OptimizeInstructions.cpp | 40 | ||||
-rw-r--r-- | test/emcc_hello_world.fromasm | 22 | ||||
-rw-r--r-- | test/emcc_hello_world.fromasm.imprecise | 22 | ||||
-rw-r--r-- | test/memorygrowth.fromasm | 295 | ||||
-rw-r--r-- | test/memorygrowth.fromasm.imprecise | 295 | ||||
-rw-r--r-- | test/passes/optimize-instructions.txt | 73 | ||||
-rw-r--r-- | test/passes/optimize-instructions.wast | 101 |
7 files changed, 516 insertions, 332 deletions
diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index 1970cf08f..37207ffc7 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -326,7 +326,10 @@ struct OptimizeInstructions : public WalkerPass<PostWalker<OptimizeInstructions, if (br->condition) { br->condition = optimizeBoolean(br->condition); } + } else if (auto* load = curr->dynCast<Load>()) { + optimizeMemoryAccess(load->ptr, load->offset); } else if (auto* store = curr->dynCast<Store>()) { + optimizeMemoryAccess(store->ptr, store->offset); // stores of fewer bits truncates anyhow if (auto* binary = store->value->dynCast<Binary>()) { if (binary->op == AndInt32) { @@ -430,6 +433,43 @@ private: return builder.makeIf(left, right, builder.makeConst(Literal(int32_t(0)))); } } + + // fold constant factors into the offset + void optimizeMemoryAccess(Expression*& ptr, Address& offset) { + while (1) { + auto* add = ptr->dynCast<Binary>(); + if (!add) break; + if (add->op != AddInt32) break; + auto* left = add->left->dynCast<Const>(); + auto* right = add->right->dynCast<Const>(); + // note: in optimized code, we shouldn't see an add of two constants, so don't worry about that much + // (precompute would optimize that) + if (left) { + auto value = left->value.geti32(); + if (value >= 0) { + offset = offset + value; + ptr = add->right; + continue; + } + } + if (right) { + auto value = right->value.geti32(); + if (value >= 0) { + offset = offset + value; + ptr = add->left; + continue; + } + } + break; + } + // finally, ptr may be a const, but it isn't worth folding that in (we still have a const); in fact, + // it's better to do the opposite for gzip purposes as well as for readability. + auto* last = ptr->dynCast<Const>(); + if (last) { + last->value = Literal(int32_t(last->value.geti32() + offset)); + offset = 0; + } + } }; Pass *createOptimizeInstructionsPass() { diff --git a/test/emcc_hello_world.fromasm b/test/emcc_hello_world.fromasm index 06f88befe..afb161718 100644 --- a/test/emcc_hello_world.fromasm +++ b/test/emcc_hello_world.fromasm @@ -4357,14 +4357,11 @@ (i32.store8 (get_local $5) (i32.or - (i32.load8_u - (i32.add - (tee_local $7 - (call $f64-to-int - (get_local $15) - ) + (i32.load8_u offset=4075 + (tee_local $7 + (call $f64-to-int + (get_local $15) ) - (i32.const 4075) ) ) (get_local $13) @@ -6599,13 +6596,10 @@ ) ) (i32.or - (i32.load8_u - (i32.add - (i32.and - (get_local $5) - (i32.const 15) - ) - (i32.const 4075) + (i32.load8_u offset=4075 + (i32.and + (get_local $5) + (i32.const 15) ) ) (get_local $9) diff --git a/test/emcc_hello_world.fromasm.imprecise b/test/emcc_hello_world.fromasm.imprecise index 4363dc349..adadfe086 100644 --- a/test/emcc_hello_world.fromasm.imprecise +++ b/test/emcc_hello_world.fromasm.imprecise @@ -4349,14 +4349,11 @@ (i32.store8 (get_local $5) (i32.or - (i32.load8_u - (i32.add - (tee_local $7 - (i32.trunc_s/f64 - (get_local $15) - ) + (i32.load8_u offset=4075 + (tee_local $7 + (i32.trunc_s/f64 + (get_local $15) ) - (i32.const 4075) ) ) (get_local $13) @@ -6585,13 +6582,10 @@ ) ) (i32.or - (i32.load8_u - (i32.add - (i32.and - (get_local $5) - (i32.const 15) - ) - (i32.const 4075) + (i32.load8_u offset=4075 + (i32.and + (get_local $5) + (i32.const 15) ) ) (get_local $9) diff --git a/test/memorygrowth.fromasm b/test/memorygrowth.fromasm index 1422a65a9..f61e92441 100644 --- a/test/memorygrowth.fromasm +++ b/test/memorygrowth.fromasm @@ -723,83 +723,80 @@ (i32.and (i32.load offset=4 (tee_local $14 - (i32.load - (i32.add - (i32.shl - (i32.add + (i32.load offset=1512 + (i32.shl + (i32.add + (i32.or (i32.or (i32.or (i32.or - (i32.or - (tee_local $8 - (i32.and - (i32.shr_u - (tee_local $4 - (i32.shr_u - (get_local $8) - (get_local $12) - ) - ) - (i32.const 5) - ) - (i32.const 8) - ) - ) - (get_local $12) - ) - (tee_local $4 + (tee_local $8 (i32.and (i32.shr_u - (tee_local $1 + (tee_local $4 (i32.shr_u - (get_local $4) (get_local $8) + (get_local $12) ) ) - (i32.const 2) + (i32.const 5) ) - (i32.const 4) + (i32.const 8) ) ) + (get_local $12) ) - (tee_local $1 + (tee_local $4 (i32.and (i32.shr_u - (tee_local $0 + (tee_local $1 (i32.shr_u - (get_local $1) (get_local $4) + (get_local $8) ) ) - (i32.const 1) + (i32.const 2) ) - (i32.const 2) + (i32.const 4) ) ) ) - (tee_local $0 + (tee_local $1 (i32.and (i32.shr_u - (tee_local $5 + (tee_local $0 (i32.shr_u - (get_local $0) (get_local $1) + (get_local $4) ) ) (i32.const 1) ) - (i32.const 1) + (i32.const 2) ) ) ) - (i32.shr_u - (get_local $5) - (get_local $0) + (tee_local $0 + (i32.and + (i32.shr_u + (tee_local $5 + (i32.shr_u + (get_local $0) + (get_local $1) + ) + ) + (i32.const 1) + ) + (i32.const 1) + ) ) ) - (i32.const 2) + (i32.shr_u + (get_local $5) + (get_local $0) + ) ) - (i32.const 1512) + (i32.const 2) ) ) ) @@ -1466,109 +1463,106 @@ (block $label$break$a (if (tee_local $12 - (i32.load - (i32.add - (i32.shl - (tee_local $27 + (i32.load offset=1512 + (i32.shl + (tee_local $27 + (if i32 + (tee_local $7 + (i32.shr_u + (get_local $1) + (i32.const 8) + ) + ) (if i32 - (tee_local $7 - (i32.shr_u - (get_local $1) - (i32.const 8) - ) + (i32.gt_u + (get_local $0) + (i32.const 16777215) ) - (if i32 - (i32.gt_u - (get_local $0) - (i32.const 16777215) - ) - (i32.const 31) - (i32.or - (i32.and - (i32.shr_u - (get_local $0) - (i32.add - (tee_local $12 - (i32.add - (i32.sub - (i32.const 14) + (i32.const 31) + (i32.or + (i32.and + (i32.shr_u + (get_local $0) + (i32.add + (tee_local $12 + (i32.add + (i32.sub + (i32.const 14) + (i32.or (i32.or - (i32.or - (tee_local $7 - (i32.and - (i32.shr_u - (i32.add - (tee_local $16 - (i32.shl - (get_local $7) - (tee_local $1 - (i32.and - (i32.shr_u - (i32.add - (get_local $7) - (i32.const 1048320) - ) - (i32.const 16) + (tee_local $7 + (i32.and + (i32.shr_u + (i32.add + (tee_local $16 + (i32.shl + (get_local $7) + (tee_local $1 + (i32.and + (i32.shr_u + (i32.add + (get_local $7) + (i32.const 1048320) ) - (i32.const 8) + (i32.const 16) ) + (i32.const 8) ) ) ) - (i32.const 520192) ) - (i32.const 16) + (i32.const 520192) ) - (i32.const 4) + (i32.const 16) ) + (i32.const 4) ) - (get_local $1) ) - (tee_local $16 - (i32.and - (i32.shr_u - (i32.add - (tee_local $14 - (i32.shl - (get_local $16) - (get_local $7) - ) + (get_local $1) + ) + (tee_local $16 + (i32.and + (i32.shr_u + (i32.add + (tee_local $14 + (i32.shl + (get_local $16) + (get_local $7) ) - (i32.const 245760) ) - (i32.const 16) + (i32.const 245760) ) - (i32.const 2) + (i32.const 16) ) + (i32.const 2) ) ) ) - (i32.shr_u - (i32.shl - (get_local $14) - (get_local $16) - ) - (i32.const 15) + ) + (i32.shr_u + (i32.shl + (get_local $14) + (get_local $16) ) + (i32.const 15) ) ) - (i32.const 7) ) + (i32.const 7) ) - (i32.const 1) - ) - (i32.shl - (get_local $12) - (i32.const 1) ) + (i32.const 1) + ) + (i32.shl + (get_local $12) + (i32.const 1) ) ) - (i32.const 0) ) + (i32.const 0) ) - (i32.const 2) ) - (i32.const 1512) + (i32.const 2) ) ) ) @@ -1808,83 +1802,80 @@ (i32.const 16) ) ) - (i32.load - (i32.add - (i32.shl - (i32.add + (i32.load offset=1512 + (i32.shl + (i32.add + (i32.or (i32.or (i32.or (i32.or - (i32.or - (tee_local $12 - (i32.and - (i32.shr_u - (tee_local $3 - (i32.shr_u - (get_local $12) - (get_local $4) - ) - ) - (i32.const 5) - ) - (i32.const 8) - ) - ) - (get_local $4) - ) - (tee_local $3 + (tee_local $12 (i32.and (i32.shr_u - (tee_local $5 + (tee_local $3 (i32.shr_u - (get_local $3) (get_local $12) + (get_local $4) ) ) - (i32.const 2) + (i32.const 5) ) - (i32.const 4) + (i32.const 8) ) ) + (get_local $4) ) - (tee_local $5 + (tee_local $3 (i32.and (i32.shr_u - (tee_local $8 + (tee_local $5 (i32.shr_u - (get_local $5) (get_local $3) + (get_local $12) ) ) - (i32.const 1) + (i32.const 2) ) - (i32.const 2) + (i32.const 4) ) ) ) - (tee_local $8 + (tee_local $5 (i32.and (i32.shr_u - (tee_local $1 + (tee_local $8 (i32.shr_u - (get_local $8) (get_local $5) + (get_local $3) ) ) (i32.const 1) ) - (i32.const 1) + (i32.const 2) ) ) ) - (i32.shr_u - (get_local $1) - (get_local $8) + (tee_local $8 + (i32.and + (i32.shr_u + (tee_local $1 + (i32.shr_u + (get_local $8) + (get_local $5) + ) + ) + (i32.const 1) + ) + (i32.const 1) + ) ) ) - (i32.const 2) + (i32.shr_u + (get_local $1) + (get_local $8) + ) ) - (i32.const 1512) + (i32.const 2) ) ) ) diff --git a/test/memorygrowth.fromasm.imprecise b/test/memorygrowth.fromasm.imprecise index 4301d1105..3d244ccad 100644 --- a/test/memorygrowth.fromasm.imprecise +++ b/test/memorygrowth.fromasm.imprecise @@ -721,83 +721,80 @@ (i32.and (i32.load offset=4 (tee_local $14 - (i32.load - (i32.add - (i32.shl - (i32.add + (i32.load offset=1512 + (i32.shl + (i32.add + (i32.or (i32.or (i32.or (i32.or - (i32.or - (tee_local $8 - (i32.and - (i32.shr_u - (tee_local $4 - (i32.shr_u - (get_local $8) - (get_local $12) - ) - ) - (i32.const 5) - ) - (i32.const 8) - ) - ) - (get_local $12) - ) - (tee_local $4 + (tee_local $8 (i32.and (i32.shr_u - (tee_local $1 + (tee_local $4 (i32.shr_u - (get_local $4) (get_local $8) + (get_local $12) ) ) - (i32.const 2) + (i32.const 5) ) - (i32.const 4) + (i32.const 8) ) ) + (get_local $12) ) - (tee_local $1 + (tee_local $4 (i32.and (i32.shr_u - (tee_local $0 + (tee_local $1 (i32.shr_u - (get_local $1) (get_local $4) + (get_local $8) ) ) - (i32.const 1) + (i32.const 2) ) - (i32.const 2) + (i32.const 4) ) ) ) - (tee_local $0 + (tee_local $1 (i32.and (i32.shr_u - (tee_local $5 + (tee_local $0 (i32.shr_u - (get_local $0) (get_local $1) + (get_local $4) ) ) (i32.const 1) ) - (i32.const 1) + (i32.const 2) ) ) ) - (i32.shr_u - (get_local $5) - (get_local $0) + (tee_local $0 + (i32.and + (i32.shr_u + (tee_local $5 + (i32.shr_u + (get_local $0) + (get_local $1) + ) + ) + (i32.const 1) + ) + (i32.const 1) + ) ) ) - (i32.const 2) + (i32.shr_u + (get_local $5) + (get_local $0) + ) ) - (i32.const 1512) + (i32.const 2) ) ) ) @@ -1464,109 +1461,106 @@ (block $label$break$a (if (tee_local $12 - (i32.load - (i32.add - (i32.shl - (tee_local $27 + (i32.load offset=1512 + (i32.shl + (tee_local $27 + (if i32 + (tee_local $7 + (i32.shr_u + (get_local $1) + (i32.const 8) + ) + ) (if i32 - (tee_local $7 - (i32.shr_u - (get_local $1) - (i32.const 8) - ) + (i32.gt_u + (get_local $0) + (i32.const 16777215) ) - (if i32 - (i32.gt_u - (get_local $0) - (i32.const 16777215) - ) - (i32.const 31) - (i32.or - (i32.and - (i32.shr_u - (get_local $0) - (i32.add - (tee_local $12 - (i32.add - (i32.sub - (i32.const 14) + (i32.const 31) + (i32.or + (i32.and + (i32.shr_u + (get_local $0) + (i32.add + (tee_local $12 + (i32.add + (i32.sub + (i32.const 14) + (i32.or (i32.or - (i32.or - (tee_local $7 - (i32.and - (i32.shr_u - (i32.add - (tee_local $16 - (i32.shl - (get_local $7) - (tee_local $1 - (i32.and - (i32.shr_u - (i32.add - (get_local $7) - (i32.const 1048320) - ) - (i32.const 16) + (tee_local $7 + (i32.and + (i32.shr_u + (i32.add + (tee_local $16 + (i32.shl + (get_local $7) + (tee_local $1 + (i32.and + (i32.shr_u + (i32.add + (get_local $7) + (i32.const 1048320) ) - (i32.const 8) + (i32.const 16) ) + (i32.const 8) ) ) ) - (i32.const 520192) ) - (i32.const 16) + (i32.const 520192) ) - (i32.const 4) + (i32.const 16) ) + (i32.const 4) ) - (get_local $1) ) - (tee_local $16 - (i32.and - (i32.shr_u - (i32.add - (tee_local $14 - (i32.shl - (get_local $16) - (get_local $7) - ) + (get_local $1) + ) + (tee_local $16 + (i32.and + (i32.shr_u + (i32.add + (tee_local $14 + (i32.shl + (get_local $16) + (get_local $7) ) - (i32.const 245760) ) - (i32.const 16) + (i32.const 245760) ) - (i32.const 2) + (i32.const 16) ) + (i32.const 2) ) ) ) - (i32.shr_u - (i32.shl - (get_local $14) - (get_local $16) - ) - (i32.const 15) + ) + (i32.shr_u + (i32.shl + (get_local $14) + (get_local $16) ) + (i32.const 15) ) ) - (i32.const 7) ) + (i32.const 7) ) - (i32.const 1) - ) - (i32.shl - (get_local $12) - (i32.const 1) ) + (i32.const 1) + ) + (i32.shl + (get_local $12) + (i32.const 1) ) ) - (i32.const 0) ) + (i32.const 0) ) - (i32.const 2) ) - (i32.const 1512) + (i32.const 2) ) ) ) @@ -1806,83 +1800,80 @@ (i32.const 16) ) ) - (i32.load - (i32.add - (i32.shl - (i32.add + (i32.load offset=1512 + (i32.shl + (i32.add + (i32.or (i32.or (i32.or (i32.or - (i32.or - (tee_local $12 - (i32.and - (i32.shr_u - (tee_local $3 - (i32.shr_u - (get_local $12) - (get_local $4) - ) - ) - (i32.const 5) - ) - (i32.const 8) - ) - ) - (get_local $4) - ) - (tee_local $3 + (tee_local $12 (i32.and (i32.shr_u - (tee_local $5 + (tee_local $3 (i32.shr_u - (get_local $3) (get_local $12) + (get_local $4) ) ) - (i32.const 2) + (i32.const 5) ) - (i32.const 4) + (i32.const 8) ) ) + (get_local $4) ) - (tee_local $5 + (tee_local $3 (i32.and (i32.shr_u - (tee_local $8 + (tee_local $5 (i32.shr_u - (get_local $5) (get_local $3) + (get_local $12) ) ) - (i32.const 1) + (i32.const 2) ) - (i32.const 2) + (i32.const 4) ) ) ) - (tee_local $8 + (tee_local $5 (i32.and (i32.shr_u - (tee_local $1 + (tee_local $8 (i32.shr_u - (get_local $8) (get_local $5) + (get_local $3) ) ) (i32.const 1) ) - (i32.const 1) + (i32.const 2) ) ) ) - (i32.shr_u - (get_local $1) - (get_local $8) + (tee_local $8 + (i32.and + (i32.shr_u + (tee_local $1 + (i32.shr_u + (get_local $8) + (get_local $5) + ) + ) + (i32.const 1) + ) + (i32.const 1) + ) ) ) - (i32.const 2) + (i32.shr_u + (get_local $1) + (get_local $8) + ) ) - (i32.const 1512) + (i32.const 2) ) ) ) diff --git a/test/passes/optimize-instructions.txt b/test/passes/optimize-instructions.txt index cb043f155..a8cf68dbd 100644 --- a/test/passes/optimize-instructions.txt +++ b/test/passes/optimize-instructions.txt @@ -2,7 +2,9 @@ (type $0 (func (param i32 i64))) (type $1 (func)) (type $2 (func (result i32))) + (type $3 (func (param i32) (result i32))) (memory $0 0) + (export "load-off-2" (func $load-off-2)) (func $f (type $0) (param $i1 i32) (param $i2 i64) (if (i32.eqz @@ -412,4 +414,75 @@ (nop) ) ) + (func $load-off-2 (type $3) (param $0 i32) (result i32) + (i32.store + (i32.const 6) + (get_local $0) + ) + (i32.store + (i32.const 6) + (get_local $0) + ) + (i32.store offset=7 + (get_local $0) + (get_local $0) + ) + (i32.store offset=9 + (get_local $0) + (get_local $0) + ) + (i32.store offset=2 + (i32.add + (get_local $0) + (i32.const -11) + ) + (get_local $0) + ) + (i32.store offset=2 + (i32.add + (get_local $0) + (i32.const -13) + ) + (get_local $0) + ) + (i32.store + (i32.const 4) + (get_local $0) + ) + (i32.store + (i32.const 0) + (get_local $0) + ) + (i32.store + (i32.const 25) + (get_local $0) + ) + (i32.store + (i32.const -23) + (get_local $0) + ) + (drop + (i32.load + (i32.const 8) + ) + ) + (drop + (i32.load + (i32.const 8) + ) + ) + (drop + (i32.load offset=8 + (get_local $0) + ) + ) + (drop + (i32.load + (i32.const 10) + ) + ) + (i32.load offset=12 + (get_local $0) + ) + ) ) diff --git a/test/passes/optimize-instructions.wast b/test/passes/optimize-instructions.wast index 3cf756548..41ca48951 100644 --- a/test/passes/optimize-instructions.wast +++ b/test/passes/optimize-instructions.wast @@ -329,4 +329,105 @@ (nop) ) ) + (func $load-off-2 "load-off-2" (param $0 i32) (result i32) + (i32.store offset=2 + (i32.add + (i32.const 1) + (i32.const 3) + ) + (get_local $0) + ) + (i32.store offset=2 + (i32.add + (i32.const 3) + (i32.const 1) + ) + (get_local $0) + ) + (i32.store offset=2 + (i32.add + (get_local $0) + (i32.const 5) + ) + (get_local $0) + ) + (i32.store offset=2 + (i32.add + (i32.const 7) + (get_local $0) + ) + (get_local $0) + ) + (i32.store offset=2 + (i32.add + (i32.const -11) ;; do not fold this! + (get_local $0) + ) + (get_local $0) + ) + (i32.store offset=2 + (i32.add + (get_local $0) + (i32.const -13) ;; do not fold this! + ) + (get_local $0) + ) + (i32.store offset=2 + (i32.add + (i32.const -15) + (i32.const 17) + ) + (get_local $0) + ) + (i32.store offset=2 + (i32.add + (i32.const -21) + (i32.const 19) + ) + (get_local $0) + ) + (i32.store offset=2 + (i32.const 23) + (get_local $0) + ) + (i32.store offset=2 + (i32.const -25) + (get_local $0) + ) + (drop + (i32.load offset=2 + (i32.add + (i32.const 2) + (i32.const 4) + ) + ) + ) + (drop + (i32.load offset=2 + (i32.add + (i32.const 4) + (i32.const 2) + ) + ) + ) + (drop + (i32.load offset=2 + (i32.add + (get_local $0) + (i32.const 6) + ) + ) + ) + (drop + (i32.load offset=2 + (i32.const 8) + ) + ) + (i32.load offset=2 + (i32.add + (i32.const 10) + (get_local $0) + ) + ) + ) ) |