Skip to content

Commit 364420a

Browse files
FindHaofacebook-github-bot
authored andcommitted
Fix pynvml (#2553)
Summary: Fix #2552 Require `pynvml>=12.0.0`. Pull Request resolved: #2553 Reviewed By: xuzhao9 Differential Revision: D66792733 Pulled By: FindHao fbshipit-source-id: 1e7ed65db13bd592cbbd70dcd38cca740d3eaf26
1 parent 7340ede commit 364420a

File tree

2 files changed

+5
-19
lines changed

2 files changed

+5
-19
lines changed

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ pyyaml
1919
numpy
2020
opencv-python
2121
submitit
22-
pynvml
22+
pynvml>=12.0.0
2323
pandas
2424
scipy
2525
numba

torchbenchmark/_components/model_analyzer/dcgm/nvml_monitor.py

Lines changed: 4 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@
22

33
import pynvml
44

5-
from packaging import version
6-
75
from ..tb_dcgm_types.gpu_free_memory import GPUFreeMemory
86
from ..tb_dcgm_types.gpu_peak_memory import GPUPeakMemory
97
from ..tb_dcgm_types.gpu_power_usage import GPUPowerUsage
@@ -14,9 +12,7 @@
1412

1513

1614
class NVMLMonitor(Monitor):
17-
"""
18-
Use NVML to monitor GPU metrics
19-
"""
15+
"""Use NVML to monitor GPU metrics."""
2016

2117
# Mapping between the NVML Fields and Model Analyzer Records
2218
# For more explainations, please refer to https://docs.nvidia.com/deploy/nvml-api/group__nvmlDeviceQueries.html
@@ -28,7 +24,8 @@ class NVMLMonitor(Monitor):
2824
}
2925

3026
def __init__(self, gpus, frequency, metrics):
31-
"""
27+
"""Initialize the NVML monitor.
28+
3229
Parameters
3330
----------
3431
gpus : list of GPUDevice
@@ -48,24 +45,13 @@ def __init__(self, gpus, frequency, metrics):
4845
self._gpus = gpus
4946
# gpu handles: {gpu: handle}
5047
self._gpu_handles = {}
51-
self._nvmlDeviceGetHandleByUUID = None
52-
self.check_nvml_compatibility()
48+
self._nvmlDeviceGetHandleByUUID = self._nvml.nvmlDeviceGetHandleByUUID
5349
for gpu in self._gpus:
5450
self._gpu_handles[gpu] = self._nvmlDeviceGetHandleByUUID(gpu.device_uuid())
5551
self._records[gpu] = {}
5652
for metric in self._metrics:
5753
self._records[gpu][metric] = []
5854

59-
def check_nvml_compatibility(self):
60-
# check pynvml version, if it is less than 11.5.0, convert uuid to bytes
61-
current_version = version.parse(pynvml.__version__)
62-
if current_version < version.parse("11.5.0"):
63-
self._nvmlDeviceGetHandleByUUID = (
64-
self._nvmlDeviceGetHandleByUUID_for_older_pynvml
65-
)
66-
else:
67-
self._nvmlDeviceGetHandleByUUID = self._nvml.nvmlDeviceGetHandleByUUID
68-
6955
def _nvmlDeviceGetHandleByUUID_for_older_pynvml(self, uuid):
7056
return self._nvml.nvmlDeviceGetHandleByUUID(uuid.encode("ascii"))
7157

0 commit comments

Comments
 (0)