DataDog · Kyle-Neale · Nov 27, 2024 · Oct 31, 2024 · Oct 31, 2024 · Oct 31, 2024
@@ -710,6 +710,10 @@ coverage:
         target: 75
         flags:
         - kyverno
+      nvidia_nim:
+        target: 75
+        flags:
+        - nvidia_nim
       tibco_ems:
         target: 75
         flags:
@@ -1289,6 +1293,11 @@ flags:
     paths:
     - nginx_ingress_controller/datadog_checks/nginx_ingress_controller
     - nginx_ingress_controller/tests
+  nvidia_nim:
+    carryforward: true
+    paths:
+    - nvidia_nim/datadog_checks/nvidia_nim
+    - nvidia_nim/tests
   nvidia_triton:
     carryforward: true
     paths:

@@ -365,6 +365,8 @@ integration/ntp:
 - ntp/**/*
 integration/nvidia_jetson:
 - nvidia_jetson/**/*
+integration/nvidia_nim:
+- nvidia_nim/**/*
 integration/nvidia_triton:
 - nvidia_triton/**/*
 integration/oke:

@@ -2654,6 +2654,26 @@ jobs:
       minimum-base-package: ${{ inputs.minimum-base-package }}
       pytest-args: ${{ inputs.pytest-args }}
     secrets: inherit
+  jb705691:
+    uses: ./.github/workflows/test-target.yml
+    with:
+      job-name: nvidia_nim
+      target: nvidia_nim
+      platform: linux
+      runner: '["ubuntu-22.04"]'
+      repo: "${{ inputs.repo }}"
+      python-version: "${{ inputs.python-version }}"
+      standard: ${{ inputs.standard }}
+      latest: ${{ inputs.latest }}
+      agent-image: "${{ inputs.agent-image }}"
+      agent-image-py2: "${{ inputs.agent-image-py2 }}"
+      agent-image-windows: "${{ inputs.agent-image-windows }}"
+      agent-image-windows-py2: "${{ inputs.agent-image-windows-py2 }}"
+      test-py2: ${{ inputs.test-py2 }}
+      test-py3: ${{ inputs.test-py3 }}
+      minimum-base-package: ${{ inputs.minimum-base-package }}
+      pytest-args: ${{ inputs.pytest-args }}
+    secrets: inherit
   j74dc677:
     uses: ./.github/workflows/test-target.yml
     with:

@@ -0,0 +1,4 @@
+# CHANGELOG - nvidia_nim
+
+<!-- towncrier release notes start -->
+
@@ -0,0 +1,62 @@
+# Agent Check: nvidia_nim
+
+## Overview
+
+This check monitors [NVIDIA NIM][1] through the Datadog Agent. 
+
+## Setup
+
+Follow the instructions below to install and configure this check for an Agent running on a host. For containerized environments, see the [Autodiscovery Integration Templates][3] for guidance on applying these instructions.
+
+**Requirements**:
+- This check requires Agent v7.61.0+
+- This check uses [OpenMetrics][10] for metric collection, which requires Python 3.
+
+### Installation
+
+The NVIDIA NIM check is included in the [Datadog Agent][2] package. No additional installation is needed on your server.
+
+### Configuration
+
+NVIDIA NIM provides Prometheus [metrics][1] indicating request statistics. By default, these metrics are available at http://localhost:8000/metrics. The Datadog Agent can collect the exposed metrics using this integration. Follow the instructions below to configure data collection from any or all of the components.
+
+To start collecting your NVIDIA NIM performance data:
+1. Edit the `nvidia_nim.d/conf.yaml` file, in the `conf.d/` folder at the root of your Agent's configuration directory to start collecting your NVIDIA NIM performance data. See the [sample nvidia_nim.d/conf.yaml][4] for all available configuration options.
+
+2. [Restart the Agent][5].
+
+### Validation
+
+[Run the Agent's status subcommand][6] and look for `nvidia_nim` under the Checks section.
+
+## Data Collected
+
+### Metrics
+
+See [metadata.csv][7] for a list of metrics provided by this integration.
+
+### Events
+
+The NVIDIA NIM integration does not include any events.
+
+### Service Checks
+
+The NVIDIA NIM integration does not include any service checks.
+
+See [service_checks.json][8] for a list of service checks provided by this integration.
+
+## Troubleshooting
+
+Need help? Contact [Datadog support][9].
+
+
+[1]: https://docs.nvidia.com/nim/large-language-models/latest/observability.html
+[2]: https://app.datadoghq.com/account/settings/agent/latest
+[3]: https://docs.datadoghq.com/agent/kubernetes/integrations/
+[4]: https://github.com/DataDog/integrations-core/blob/master/nvidia_nim/datadog_checks/nvidia_nim/data/conf.yaml.example
+[5]: https://docs.datadoghq.com/agent/guide/agent-commands/#start-stop-and-restart-the-agent
+[6]: https://docs.datadoghq.com/agent/guide/agent-commands/#agent-status-and-information
+[7]: https://github.com/DataDog/integrations-core/blob/master/nvidia_nim/metadata.csv
+[8]: https://github.com/DataDog/integrations-core/blob/master/nvidia_nim/assets/service_checks.json
+[9]: https://docs.datadoghq.com/help/
+[10]: https://docs.datadoghq.com/integrations/openmetrics/
@@ -0,0 +1,16 @@
+name: nvidia_nim
+files:
+- name: nvidia_nim.yaml
+  options:
+  - template: init_config
+    options:
+    - template: init_config/openmetrics
+  - template: instances
+    options:
+    - template: instances/openmetrics
+      overrides:
+        openmetrics_endpoint.required: true
+        openmetrics_endpoint.value.example: http://localhost:8000/metrics
+        openmetrics_endpoint.description: |
+          Endpoint exposing the NVIDIA NIM's Prometheus metrics. For more information refer to:
+          https://docs.nvidia.com/nim/large-language-models/latest/observability.html
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,4 @@
		# CHANGELOG - nvidia_nim

		<!-- towncrier release notes start -->