Skip to content

Commit 1f947b5

Browse files
committed
disable triton FA by default
1 parent 22f9066 commit 1f947b5

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

vllm/envs.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
VLLM_ROCM_PREFER_TORCH: bool = False
1515
VLLM_ROCM_PREFER_TRITON: bool = True
1616
VLLM_USE_SDPA_ATTENTION: bool = False
17-
VLLM_USE_TRITON_FLASH_ATTN: bool = True
17+
VLLM_USE_TRITON_FLASH_ATTN: bool = False
1818
VLLM_USE_ROCM_SKINNY_GEMM: bool = True
1919
VLLM_USE_ROCM_CUSTOM_PAGED_ATTN: bool = True
2020
VLLM_USE_ROCM_CUSTOM_PAGED_ATTN_FP8_OUT: bool = True
@@ -227,7 +227,7 @@ def get_default_config_root():
227227

228228
# flag to control if vllm should use triton flash attention
229229
"VLLM_USE_TRITON_FLASH_ATTN":
230-
lambda: (os.environ.get("VLLM_USE_TRITON_FLASH_ATTN", "True").lower() in
230+
lambda: (os.environ.get("VLLM_USE_TRITON_FLASH_ATTN", "False").lower() in
231231
("true", "1")),
232232

233233
# Internal flag to enable Dynamo fullgraph capture

0 commit comments

Comments
 (0)