From 9cbda109ea0967e7af1fa56581af9d38bd0c1876 Mon Sep 17 00:00:00 2001 From: Gregory Shtrasberg <156009573+gshtras@users.noreply.github.com> Date: Mon, 16 Dec 2024 17:41:51 -0500 Subject: [PATCH] Fix regression from #246 --- vllm/attention/ops/triton_flash_attention.py | 1 - 1 file changed, 1 deletion(-) diff --git a/vllm/attention/ops/triton_flash_attention.py b/vllm/attention/ops/triton_flash_attention.py index 3671c2f91e3b7..2e647a8e52278 100644 --- a/vllm/attention/ops/triton_flash_attention.py +++ b/vllm/attention/ops/triton_flash_attention.py @@ -394,7 +394,6 @@ def get_autotune_configs(): @triton.autotune( configs=autotune_configs, key=autotune_keys, - use_cuda_graph=True, ) @triton.jit def attn_fwd(