File tree Expand file tree Collapse file tree 1 file changed +3
-4
lines changed Expand file tree Collapse file tree 1 file changed +3
-4
lines changed Original file line number Diff line number Diff line change @@ -330,15 +330,14 @@ def _verify_cache_dtype(self) -> None:
330
330
if self .cache_dtype == "auto" :
331
331
pass
332
332
elif self .cache_dtype == "fp8_e5m2" :
333
+ if is_hip ():
334
+ raise NotImplementedError (
335
+ "FP8_E5M2 KV Cache on AMD GPU has not been supported yet." )
333
336
nvcc_cuda_version = get_nvcc_cuda_version ()
334
337
if nvcc_cuda_version and nvcc_cuda_version < Version ("11.8" ):
335
338
raise ValueError (
336
339
"FP8 is not supported when cuda version is lower than 11.8."
337
340
)
338
- device_name = torch .cuda .get_device_name ()
339
- if "AMD" in device_name :
340
- raise NotImplementedError (
341
- "FP8_E5M2 KV Cache on AMD GPU has not been supported yet." )
342
341
logger .info (
343
342
"Using fp8_e5m2 data type to store kv cache. It reduces "
344
343
"the GPU memory footprint and boosts the performance. "
You can’t perform that action at this time.
0 commit comments