diff options
author | Laurent Mazare <laurent.mazare@gmail.com> | 2024-03-17 10:49:13 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-17 10:49:13 +0100 |
commit | ce9fbc368211815ef2dddff01575ca1f9d4eccd5 (patch) | |
tree | e260edd957ab716d1789da05059c9a79696b0730 /candle-core/tests/conv_tests.rs | |
parent | db8b24ae92419377283821ee0a65fb224a4f3c4d (diff) | |
download | candle-ce9fbc368211815ef2dddff01575ca1f9d4eccd5.tar.gz candle-ce9fbc368211815ef2dddff01575ca1f9d4eccd5.tar.bz2 candle-ce9fbc368211815ef2dddff01575ca1f9d4eccd5.zip |
Optimize the cat operation on contiguous tensors (#1855)
* Add a specialized kernel for copy2d.
* Move the cat operations.
* Avoid transpositions in cat.
* Bugfix.
* Bugfix for the cuda kernel.
* Add a benchmark.
* Add more testing.
* Test fix.
* Faster kernel.
* Add the missing kernel.
* Tweak the test.
* Add a metal kernel.
* Fix for the metal kernel.
* Get the tests to pass on metal.
* Also use this opportunity to fix the metal kernel for ELU.
* Add some bf16 kernels.
* Clippy fixes.
Diffstat (limited to 'candle-core/tests/conv_tests.rs')
-rw-r--r-- | candle-core/tests/conv_tests.rs | 128 |
1 files changed, 77 insertions, 51 deletions
diff --git a/candle-core/tests/conv_tests.rs b/candle-core/tests/conv_tests.rs index f0f1b7f2..ba60b778 100644 --- a/candle-core/tests/conv_tests.rs +++ b/candle-core/tests/conv_tests.rs @@ -53,6 +53,12 @@ fn conv1d(dev: &Device) -> Result<()> { test_utils::to_vec1_round(&res.flatten_all()?, 4)?, [2.4509, 2.6357, -1.3336, 4.1393, 0.5657, 1.8091, -1.1784, 3.5675, 0.5069, 3.3352] ); + + // conv-transposes are not implemented for metal. + if dev.is_metal() { + return Ok(()); + } + let w = w.transpose(0, 1)?; // The CPU kernels applied in the contiguous and non contiguous cases are different. for w in [w.clone(), w.contiguous()?] { @@ -162,31 +168,33 @@ fn conv2d(dev: &Device) -> Result<()> { 10.389, 3.6023, -4.2808, 0.2672, 5.3646, -5.2023, -2.1955, -9.4075 ] ); - let res = t.conv_transpose2d(&w.transpose(0, 1)?, 0, 0, 1, 1)?; - assert_eq!(res.dims(), [1, 2, 7, 7]); - assert_eq!( - test_utils::to_vec3_round(&res.i(0)?, 4)?, - [ - [ - [-1.9918, 2.6797, -0.4599, -1.6037, 1.4131, -2.4012, 2.9277], - [1.8016, -3.5361, 1.0757, 3.5395, -8.2168, -3.2023, 0.5375], - [0.8243, 1.8675, 7.8929, -4.0746, -6.4415, 5.1139, 1.6889], - [0.2722, 8.9679, 3.3477, 1.8514, -4.2896, -3.8228, -7.5632], - [-8.5412, -5.8142, -7.1587, -1.6095, 0.4651, 0.2748, -2.0985], - [2.0833, -0.6482, -12.1692, -4.1284, -2.9765, -0.0656, -4.5114], - [5.307, 2.6957, 2.3087, 1.0478, 0.7808, -1.1519, -0.9579] - ], + if !dev.is_metal() { + let res = t.conv_transpose2d(&w.transpose(0, 1)?, 0, 0, 1, 1)?; + assert_eq!(res.dims(), [1, 2, 7, 7]); + assert_eq!( + test_utils::to_vec3_round(&res.i(0)?, 4)?, [ - [1.089, 0.1872, -0.6408, -0.9897, 0.8503, 1.1019, -0.9211], - [-0.1741, -0.2915, 4.2472, 1.9417, 1.65, 0.6303, -4.7131], - [1.6555, 2.4026, -2.9293, 2.9953, 0.5328, 3.5873, -0.9621], - [-1.4289, -3.2787, 4.1747, -6.0341, -4.6341, -5.7945, 4.142], - [7.5973, 6.4431, 5.9872, 2.1639, -8.6566, 3.3143, -3.4059], - [-0.8775, -3.048, 11.6543, 0.6442, 2.3218, -0.4765, 1.1516], - [-5.5423, -2.5188, 1.0754, -0.0563, -2.9386, -1.1504, 1.0171] + [ + [-1.9918, 2.6797, -0.4599, -1.6037, 1.4131, -2.4012, 2.9277], + [1.8016, -3.5361, 1.0757, 3.5395, -8.2168, -3.2023, 0.5375], + [0.8243, 1.8675, 7.8929, -4.0746, -6.4415, 5.1139, 1.6889], + [0.2722, 8.9679, 3.3477, 1.8514, -4.2896, -3.8228, -7.5632], + [-8.5412, -5.8142, -7.1587, -1.6095, 0.4651, 0.2748, -2.0985], + [2.0833, -0.6482, -12.1692, -4.1284, -2.9765, -0.0656, -4.5114], + [5.307, 2.6957, 2.3087, 1.0478, 0.7808, -1.1519, -0.9579] + ], + [ + [1.089, 0.1872, -0.6408, -0.9897, 0.8503, 1.1019, -0.9211], + [-0.1741, -0.2915, 4.2472, 1.9417, 1.65, 0.6303, -4.7131], + [1.6555, 2.4026, -2.9293, 2.9953, 0.5328, 3.5873, -0.9621], + [-1.4289, -3.2787, 4.1747, -6.0341, -4.6341, -5.7945, 4.142], + [7.5973, 6.4431, 5.9872, 2.1639, -8.6566, 3.3143, -3.4059], + [-0.8775, -3.048, 11.6543, 0.6442, 2.3218, -0.4765, 1.1516], + [-5.5423, -2.5188, 1.0754, -0.0563, -2.9386, -1.1504, 1.0171] + ] ] - ] - ); + ); + } // Dilations. let res = t.conv2d(&w, 0, 1, 2, 1)?; assert_eq!(res.dims(), [1, 2, 1, 1]); @@ -195,36 +203,44 @@ fn conv2d(dev: &Device) -> Result<()> { [2.45, -2.3504], ); - // Transpose and dilations. - let res = t.conv_transpose2d(&w.transpose(0, 1)?, 0, 0, 1, 2)?; - assert_eq!(res.dims(), [1, 2, 9, 9]); - assert_eq!( - test_utils::to_vec3_round(&res.i(0)?, 4)?, - [ - [ - [-1.9918, 3.1652, -0.6778, -4.3442, 4.4351, 0.6652, -3.0124, -0.6031, 2.9277], - [2.7036, -1.7156, -0.3969, 1.0516, 1.6381, -2.8886, -0.205, 2.4682, -1.0499], - [-0.9459, 3.1631, 3.707, -4.8369, -8.5166, -1.4496, -2.7559, -3.2698, 1.4376], - [-0.2157, 3.7786, -2.0252, -4.2633, 3.6731, -1.5142, 5.9391, -0.2622, -0.141], - [-6.8121, -3.1744, 1.5945, 3.0637, -9.6088, 1.4446, 2.9489, -3.0082, -7.3822], - [0.2371, 3.3303, 0.3861, 2.2646, -4.6784, 4.1235, -0.0109, 0.3176, -0.03], - [-2.5339, -2.9564, -3.4518, -4.4594, -9.1873, -1.9709, -0.4676, 0.51, -3.5024], - [4.007, 0.3067, -2.2954, 1.1105, -0.1992, 1.6372, -2.9268, 0.2807, -1.2787], - [5.307, 1.1317, 1.3518, 0.9049, 3.8116, -0.4075, -0.8874, -0.2241, -0.9579] - ], + if !dev.is_metal() { + // Transpose and dilations. + let res = t.conv_transpose2d(&w.transpose(0, 1)?, 0, 0, 1, 2)?; + assert_eq!(res.dims(), [1, 2, 9, 9]); + assert_eq!( + test_utils::to_vec3_round(&res.i(0)?, 4)?, [ - [1.089, -0.6483, 0.0726, -0.4752, -1.3283, 1.7103, 1.0703, 0.1076, -0.9211], - [-0.8629, 0.1376, 0.3202, 2.0955, 0.9696, 2.8988, -1.0012, 1.5049, -0.1278], - [1.9286, -1.5255, -2.9563, 2.4589, 3.3611, -0.6951, 0.3525, -1.7724, -5.9861], - [1.1226, 2.1561, 3.6417, 4.7546, -0.692, 4.4126, -5.1902, 6.0805, 2.3185], - [1.0111, 0.3604, 0.6432, -3.6605, 7.9517, -9.2955, -5.2988, -3.7803, -2.0642], - [3.3172, -1.7967, -3.6576, -2.0942, 1.3158, 0.112, -1.7405, 2.9167, 0.7957], - [5.1001, 1.8995, -1.8639, 1.1262, 9.9629, 2.683, -3.6319, -1.1607, 0.5856], - [-4.8445, -0.5642, 4.2317, 0.0856, 1.2267, -0.5712, 1.736, 1.0997, 0.6908], - [-5.5423, -1.1831, -1.2176, 0.0843, 0.0446, -0.7545, -2.4798, -0.0827, 1.0171] + [ + [-1.9918, 3.1652, -0.6778, -4.3442, 4.4351, 0.6652, -3.0124, -0.6031, 2.9277], + [2.7036, -1.7156, -0.3969, 1.0516, 1.6381, -2.8886, -0.205, 2.4682, -1.0499], + [-0.9459, 3.1631, 3.707, -4.8369, -8.5166, -1.4496, -2.7559, -3.2698, 1.4376], + [-0.2157, 3.7786, -2.0252, -4.2633, 3.6731, -1.5142, 5.9391, -0.2622, -0.141], + [-6.8121, -3.1744, 1.5945, 3.0637, -9.6088, 1.4446, 2.9489, -3.0082, -7.3822], + [0.2371, 3.3303, 0.3861, 2.2646, -4.6784, 4.1235, -0.0109, 0.3176, -0.03], + [ + -2.5339, -2.9564, -3.4518, -4.4594, -9.1873, -1.9709, -0.4676, 0.51, + -3.5024 + ], + [4.007, 0.3067, -2.2954, 1.1105, -0.1992, 1.6372, -2.9268, 0.2807, -1.2787], + [5.307, 1.1317, 1.3518, 0.9049, 3.8116, -0.4075, -0.8874, -0.2241, -0.9579] + ], + [ + [1.089, -0.6483, 0.0726, -0.4752, -1.3283, 1.7103, 1.0703, 0.1076, -0.9211], + [-0.8629, 0.1376, 0.3202, 2.0955, 0.9696, 2.8988, -1.0012, 1.5049, -0.1278], + [1.9286, -1.5255, -2.9563, 2.4589, 3.3611, -0.6951, 0.3525, -1.7724, -5.9861], + [1.1226, 2.1561, 3.6417, 4.7546, -0.692, 4.4126, -5.1902, 6.0805, 2.3185], + [1.0111, 0.3604, 0.6432, -3.6605, 7.9517, -9.2955, -5.2988, -3.7803, -2.0642], + [3.3172, -1.7967, -3.6576, -2.0942, 1.3158, 0.112, -1.7405, 2.9167, 0.7957], + [5.1001, 1.8995, -1.8639, 1.1262, 9.9629, 2.683, -3.6319, -1.1607, 0.5856], + [-4.8445, -0.5642, 4.2317, 0.0856, 1.2267, -0.5712, 1.736, 1.0997, 0.6908], + [ + -5.5423, -1.1831, -1.2176, 0.0843, 0.0446, -0.7545, -2.4798, -0.0827, + 1.0171 + ] + ] ] - ] - ); + ); + } Ok(()) } @@ -278,6 +294,12 @@ fn conv2d_small(dev: &Device) -> Result<()> { 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000 ] ); + + // conv-transposes are not implemented for metal + if dev.is_metal() { + return Ok(()); + } + let res = t.conv_transpose2d(&w.transpose(0, 1)?, 0, 0, 1, 1)?; assert_eq!(res.dims(), [1, 1, 3, 3]); assert_eq!( @@ -379,6 +401,10 @@ print(w.grad.shape) print(w.grad[0]) */ fn conv2d_grad(dev: &Device) -> Result<()> { + // conv-transposes are not implemented for metal + if dev.is_metal() { + return Ok(()); + } use candle_core::Var; let t = Var::from_slice( &[ |