Skip to content

Commit 7c392c2

Browse files
expect n_per_wmma and m_per_wmma to be 32 for gfx950
1 parent 8143f20 commit 7c392c2

File tree

1 file changed

+5
-3
lines changed

1 file changed

+5
-3
lines changed

experimental/builder/include/ck_tile/builder/conv_algorithm_limits.hpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,9 @@ concept IsValidWmmaConvolutionTileing =
4949
(thread_block.tile_size.m % (gridwise_gemm.m_per_wmma * gridwise_gemm.m_wmma_per_wave) == 0) &&
5050
(thread_block.tile_size.n >= gridwise_gemm.m_per_wmma * gridwise_gemm.n_wmma_per_wave) &&
5151
(thread_block.tile_size.n % (gridwise_gemm.m_per_wmma * gridwise_gemm.n_wmma_per_wave) == 0) &&
52-
(gridwise_gemm.m_per_wmma == 16) && // Check if this holds on MI400 and add switch if it dosen't
53-
(gridwise_gemm.n_per_wmma == 16); // Check if this holds on MI400 and add switch if it dosen't
54-
52+
#if defined(__gfx950__)
53+
(gridwise_gemm.m_per_wmma == 32) && (gridwise_gemm.n_per_wmma == 32);
54+
#else
55+
(gridwise_gemm.m_per_wmma == 16) && (gridwise_gemm.n_per_wmma == 16);
56+
#endif
5557
} // namespace ck_tile::builder

0 commit comments

Comments
 (0)