openvinotoolkit · ilya-lavrenov · Feb 6, 2025 · Feb 8, 2025 · Feb 9, 2025
diff --git a/src/cpp/src/continuous_batching_impl.cpp b/src/cpp/src/continuous_batching_impl.cpp
@@ -53,11 +53,6 @@ void apply_kv_cache_precision(const std::shared_ptr<ov::Model>& model, const std
             // x86 and ARM have different default kv cache type, take this information from the plugin
             m_kv_cache_type = core.get_property(device, ov::hint::kv_cache_precision);
         }
-
-        // TEMP WA: currently FP16 / BF16 KV cache is faster than U8 for PagedAttention
-        if (m_kv_cache_type == ov::element::u8) {
-            m_kv_cache_type = inference_precision == ov::element::bf16 ? ov::element::bf16 : ov::element::f16;
-        }
     } else if (device.find("GPU") != std::string::npos) {
         if (accuracy_mode) {
             inference_precision = ov::element::f32;