Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,23 @@ def _get_lib_file_extension() -> str:
elif platform_name == 'Darwin':
return "dylib"

def get_gpu_llm_inference_config() -> dict:
"""
Returns a dictionary of OpenVINO properties optimized for LLM inference on NVIDIA GPUs.
This configuration aims to improve performance and stability for stateful models
by leveraging FP16 precision and disabling profiling.
"""
# Import openvino here to ensure it's available when this function is called.
# Using 'ov' as an alias for consistency with typical OpenVINO usage.
import openvino as ov
return {
ov.properties.hint.performance_mode: ov.properties.hint.PerformanceMode.LATENCY,
# Hint for FP16 precision to utilize NVIDIA Tensor Cores for LLM inference
ov.properties.hint.inference_precision: ov.properties.Type.f16,
# Disable profiling to reduce overhead and potentially prevent interference with stateful models
ov.properties.enable_profiling: False,
}


def _register_nvidia_plugin(force=False):
import openvino
Expand Down Expand Up @@ -58,4 +75,4 @@ def __getattr__(name):
_register_nvidia_plugin()


__version__ = "2025.3.0"
__version__ = "2025.3.0"