Skip to content

Commit a32c730

Browse files
authored
Optimize Flash Dynamic Mask Attention Kernel Configurations
2 parents 6bf2371 + 683a7cb commit a32c730

File tree

1 file changed

+0
-1
lines changed

1 file changed

+0
-1
lines changed

csrc/src/flash_bwd_launch_template.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,6 @@ void run_mha_bwd_hdim96(Flash_bwd_params &params, cudaStream_t stream) {
183183
if (status_ != cudaSuccess) {
184184
C10_CUDA_CHECK(status_);
185185
}
186-
// printf("max_smem_per_block = %d\n", max_smem_per_block);
187186
if (max_smem_per_block >= 116 * 1024) { // H100 and A100
188187
// 116KB, 1 CTAs in A100, 1 CTAs in H100.
189188
run_flash_bwd<Flash_bwd_kernel_traits<Headdim, 64, 128, 8, 2, 4, 4, false, false, T>, Is_causal>(params, stream);

0 commit comments

Comments
 (0)