NVIDIA-NeMo · clumsy · Nov 19, 2025 · Nov 20, 2025
@@ -115,6 +115,8 @@ def is_using_tf32() -> bool:
     ("NVIDIA A100 80GB PCIe", torch.float32): 312 / 2 if is_using_tf32() else 19.5,
     ("NVIDIA H100 80GB HBM3", torch.bfloat16): 1979 / 2,
     ("NVIDIA H100 80GB HBM3", torch.float32): 989 / 2 if is_using_tf32() else 67.0,
+    ("NVIDIA H200", torch.bfloat16): 1979 / 2,
+    ("NVIDIA H200", torch.float32): 989 / 2 if is_using_tf32() else 67.0,
     ("NVIDIA B200", torch.bfloat16): 4500 / 2,
     ("NVIDIA B200", torch.float32): 2200 / 2 if is_using_tf32() else 80.0,
     ("NVIDIA B300", torch.bfloat16): 4500 / 2,

@@ -0,0 +1,42 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+
+import torch
+
+from nemo_rl.utils.flops_tracker import get_theoretical_tflops, is_using_tf32
+
+
+@pytest.mark.parametrize(
+    "device_name, model_dtype, tflops",
+    [
+        ("NVIDIA A100 80GB PCIe", torch.bfloat16, 624 / 2),
+        ("NVIDIA A100 80GB PCIe", torch.float32, 312 / 2 if is_using_tf32() else 19.5),
+        ("NVIDIA H100 80GB HBM3", torch.bfloat16, 1979 / 2),
+        ("NVIDIA H100 80GB HBM3", torch.float32, 989 / 2 if is_using_tf32() else 67.0),
+        ("NVIDIA H200", torch.bfloat16, 1979 / 2),
+        ("NVIDIA H200", torch.float32, 989 / 2 if is_using_tf32() else 67.0),
+        ("NVIDIA B200", torch.bfloat16, 4500 / 2),
+        ("NVIDIA B200", torch.float32, 2200 / 2 if is_using_tf32() else 80.0),
+        ("NVIDIA B300", torch.bfloat16, 4500 / 2),
+        ("NVIDIA B300", torch.float32, 2200 / 2 if is_using_tf32() else 80.0),
+        ("NVIDIA GB200", torch.bfloat16, 4900 / 2),
+        ("NVIDIA GB200", torch.float32, 2500 / 2 if is_using_tf32() else 80.0),
+        ("NVIDIA GB300", torch.bfloat16, 4900 / 2),
+        ("NVIDIA GB300", torch.float32, 2500 / 2 if is_using_tf32() else 80.0),
+    ],
+)
+def test_theoretical_tflops(device_name, model_dtype, tflops):
+    assert get_theoretical_tflops(device_name, model_dtype) == pytest.approx(tflops)