Skip to content

Commit baee28c

Browse files
authored
Reorder kv dtype check to avoid nvcc not found error on AMD platform (vllm-project#3104)
1 parent 29e70e3 commit baee28c

File tree

1 file changed

+3
-4
lines changed

1 file changed

+3
-4
lines changed

vllm/config.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -330,15 +330,14 @@ def _verify_cache_dtype(self) -> None:
330330
if self.cache_dtype == "auto":
331331
pass
332332
elif self.cache_dtype == "fp8_e5m2":
333+
if is_hip():
334+
raise NotImplementedError(
335+
"FP8_E5M2 KV Cache on AMD GPU has not been supported yet.")
333336
nvcc_cuda_version = get_nvcc_cuda_version()
334337
if nvcc_cuda_version and nvcc_cuda_version < Version("11.8"):
335338
raise ValueError(
336339
"FP8 is not supported when cuda version is lower than 11.8."
337340
)
338-
device_name = torch.cuda.get_device_name()
339-
if "AMD" in device_name:
340-
raise NotImplementedError(
341-
"FP8_E5M2 KV Cache on AMD GPU has not been supported yet.")
342341
logger.info(
343342
"Using fp8_e5m2 data type to store kv cache. It reduces "
344343
"the GPU memory footprint and boosts the performance. "

0 commit comments

Comments
 (0)