Reorder kv dtype check to avoid nvcc not found error on AMD platform (vllm-project#3104)

cloudhan · web-flow · commit baee28c46c24 · 2024-03-02T14:34:48.000+08:00
diff --git a/vllm/config.py b/vllm/config.py
@@ -330,15 +330,14 @@ def _verify_cache_dtype(self) -> None:
         if self.cache_dtype == "auto":
             pass
         elif self.cache_dtype == "fp8_e5m2":
+            if is_hip():
+                raise NotImplementedError(
+                    "FP8_E5M2 KV Cache on AMD GPU has not been supported yet.")
             nvcc_cuda_version = get_nvcc_cuda_version()
             if nvcc_cuda_version and nvcc_cuda_version < Version("11.8"):
                 raise ValueError(
                     "FP8 is not supported when cuda version is lower than 11.8."
                 )
-            device_name = torch.cuda.get_device_name()
-            if "AMD" in device_name:
-                raise NotImplementedError(
-                    "FP8_E5M2 KV Cache on AMD GPU has not been supported yet.")
             logger.info(
                 "Using fp8_e5m2 data type to store kv cache. It reduces "
                 "the GPU memory footprint and boosts the performance. "