crewAIInc · 88plug · Jan 6, 2026 · Jan 7, 2026 · Jan 7, 2026 · Jan 7, 2026
diff --git a/docs/en/learn/llm-connections.mdx b/docs/en/learn/llm-connections.mdx
@@ -14,7 +14,64 @@ CrewAI uses LiteLLM to connect to a wide variety of Language Models (LLMs). This
     You can easily configure your agents to use a different model or provider as described in this guide.
 </Note>
 
-## Supported Providers
+## Native NVIDIA Provider
+
+CrewAI includes native support for NVIDIA NIM (NVIDIA Inference Microservices), providing direct access to 180+ high-performance models including Qwen, LLaMA, DeepSeek R1, and Mistral.
+
+<Note>
+**Auto-Detection**: Models with "/" in the name (e.g., `qwen/qwen3-next-80b-a3b-instruct`) automatically use the NVIDIA native provider. No configuration needed beyond setting `NVIDIA_API_KEY`.
+</Note>
+
+### Quick Start
+
+<Steps>
+  <Step title="Get API Key">
+    Visit [NVIDIA Build](https://build.nvidia.com/) to get a free API key (format: `nvapi-...`)
+  </Step>
+  <Step title="Set Environment Variable">
+    ```bash
+    export NVIDIA_API_KEY="nvapi-your-key-here"
+    ```
+  </Step>
+  <Step title="Use NVIDIA Models">
+    ```python
+    from crewai import Agent, LLM
+
+    # "/" in model name triggers NVIDIA provider automatically
+    llm = LLM(model="qwen/qwen3-next-80b-a3b-instruct", temperature=0.7)
+
+    agent = Agent(
+        role="Research Analyst",
+        goal="Analyze data and provide insights",
+        backstory="Expert in data analysis",
+        llm=llm
+    )
+    ```
+  </Step>
+</Steps>
+
+### Key Features
+
+- **180+ Models**: Chat, code, reasoning, vision, and safety models
+- **Auto-Detection**: Automatic routing for models with "/" in name
+- **Streaming Support**: Real-time response streaming
+- **Vision Models**: Llama 3.2 Vision (11B/90B), Phi-4 Vision
+- **Reasoning Models**: DeepSeek R1, QwQ-32B with chain-of-thought
+- **Built-in Security**: Input validation and resource management
+
+### Popular Models
+
+| Category | Model | Best For |
+|----------|-------|----------|
+| **Chat** | `qwen/qwen3-next-80b-a3b-instruct` | General conversation & analysis |
+| **Chat** | `meta/llama-3.1-70b-instruct` | High-quality responses |
+| **Code** | `qwen/qwen2.5-coder-32b-instruct` | Code generation & debugging |
+| **Reasoning** | `deepseek-ai/deepseek-r1` | Complex problem solving |
+| **Vision** | `meta/llama-3.2-90b-vision-instruct` | Image analysis & understanding |
+
+See [NVIDIA Build](https://build.nvidia.com/) for the complete model catalog.
+
+## LiteLLM Providers
 
 LiteLLM supports a wide range of providers, including but not limited to:
 
@@ -36,7 +93,6 @@ LiteLLM supports a wide range of providers, including but not limited to:
 - Groq
 - SambaNova
 - Nebius AI Studio
-- [NVIDIA NIMs](https://docs.api.nvidia.com/nim/reference/models-1)
 - And many more!
 
 For a complete and up-to-date list of supported providers, please refer to the [LiteLLM Providers documentation](https://docs.litellm.ai/docs/providers).

diff --git a/lib/crewai/src/crewai/llm.py b/lib/crewai/src/crewai/llm.py
@@ -9,6 +9,7 @@
 import os
 import sys
 import threading
+import time
 from typing import (
     TYPE_CHECKING,
     Any,
@@ -24,6 +25,12 @@
 from pydantic import BaseModel, Field
 from typing_extensions import Self
 
+# Cache for NVIDIA model list to avoid repeated API calls
+_nvidia_models_cache: set[str] | None = None
+_nvidia_cache_timestamp: float | None = None
+_NVIDIA_CACHE_TTL = 3600  # 1 hour cache expiration
+_nvidia_cache_lock = threading.Lock()
+
 from crewai.events.event_bus import crewai_event_bus
 from crewai.events.types.llm_events import (
     LLMCallCompletedEvent,
@@ -316,6 +323,7 @@ def writable(self) -> bool:
     "gemini",
     "bedrock",
     "aws",
+    "nvidia",
 ]
 
 
@@ -339,6 +347,75 @@ class AccumulatedToolArgs(BaseModel):
     function: FunctionArgs = Field(default_factory=FunctionArgs)
 
 
+def _get_nvidia_models() -> set[str]:
+    """Fetch and cache the list of models available from NVIDIA NIM API.
+
+    Returns:
+        Set of model IDs available in NVIDIA's catalog
+    """
+    global _nvidia_models_cache, _nvidia_cache_timestamp
+
+    # Check if cache exists and hasn't expired
+    if _nvidia_models_cache is not None and _nvidia_cache_timestamp is not None:
+        if time.time() - _nvidia_cache_timestamp < _NVIDIA_CACHE_TTL:
+            return _nvidia_models_cache
+        # Cache expired - will refresh below
+
+    # Thread-safe cache initialization
+    with _nvidia_cache_lock:
+        # Double-check after acquiring lock (with TTL check)
+        if _nvidia_models_cache is not None and _nvidia_cache_timestamp is not None:
+            if time.time() - _nvidia_cache_timestamp < _NVIDIA_CACHE_TTL:
+                return _nvidia_models_cache
+            # Cache expired - proceed with refresh
+
+        # Accept both NVIDIA_API_KEY (build.nvidia.com) and NVIDIA_NIM_API_KEY (cloud endpoints)
+        api_key = os.getenv("NVIDIA_API_KEY") or os.getenv("NVIDIA_NIM_API_KEY")
+        if not api_key:
+            _nvidia_models_cache = set()
+            _nvidia_cache_timestamp = time.time()
+            return _nvidia_models_cache
+
+        try:
+            # Use httpx instead of requests for better security and async support
+            # All HTTP logic inside lock to prevent race conditions
+            with httpx.Client(timeout=5.0) as client:
+                response = client.get(
+                    "https://integrate.api.nvidia.com/v1/models",
+                    headers={"Authorization": f"Bearer {api_key}"},
+                )
+
+                if response.status_code == 200:
+                    models = response.json().get("data", [])
+                    # Dedupe model IDs (NVIDIA API has some duplicates)
+                    _nvidia_models_cache = set([m["id"] for m in models])
+                    _nvidia_cache_timestamp = time.time()
+                else:
+                    logging.warning(
+                        f"NVIDIA API returned status {response.status_code}"
+                    )
+                    _nvidia_models_cache = set()
+                    _nvidia_cache_timestamp = time.time()
+        except httpx.TimeoutException:
+            logging.warning("NVIDIA API request timed out")
+            _nvidia_models_cache = set()
+            _nvidia_cache_timestamp = time.time()
+        except httpx.HTTPError as e:
+            # Sanitize error message to avoid leaking API keys
+            error_msg = str(e).replace(api_key, "***")
+            logging.warning(f"NVIDIA API request failed: {error_msg}")
+            _nvidia_models_cache = set()
+            _nvidia_cache_timestamp = time.time()
+        except Exception as e:
+            # Catch-all for unexpected errors, with API key sanitization
+            error_msg = str(e).replace(api_key, "***") if api_key else str(e)
+            logging.warning(f"Failed to fetch NVIDIA models: {error_msg}")
+            _nvidia_models_cache = set()
+            _nvidia_cache_timestamp = time.time()
+
+    return _nvidia_models_cache
+
+
 class LLM(BaseLLM):
     completion_cost: float | None = None
 
@@ -363,32 +440,75 @@ def __new__(cls, model: str, is_litellm: bool = False, **kwargs: Any) -> LLM:
             use_native = True
             model_string = model
         elif "/" in model:
-            prefix, _, model_part = model.partition("/")
-
-            provider_mapping = {
-                "openai": "openai",
-                "anthropic": "anthropic",
-                "claude": "anthropic",
-                "azure": "azure",
-                "azure_openai": "azure",
-                "google": "gemini",
-                "gemini": "gemini",
-                "bedrock": "bedrock",
-                "aws": "bedrock",
-            }
-
-            canonical_provider = provider_mapping.get(prefix.lower())
-
-            if canonical_provider and cls._validate_model_in_constants(
-                model_part, canonical_provider
-            ):
-                provider = canonical_provider
-                use_native = True
-                model_string = model_part
+            # If NVIDIA API key is set, check if model is in NVIDIA's catalog FIRST
+            # This is the most accurate way: route to NVIDIA if they have it
+            # Accept both NVIDIA_API_KEY and NVIDIA_NIM_API_KEY
+            if os.getenv("NVIDIA_API_KEY") or os.getenv("NVIDIA_NIM_API_KEY"):
+                nvidia_models = _get_nvidia_models()
+
+                if model in nvidia_models:
+                    # Model is in NVIDIA's catalog - use NVIDIA
+                    provider = "nvidia"
+                    use_native = True
+                    model_string = model
+                else:
+                    # Model NOT in NVIDIA catalog - fall back to standard routing
+                    prefix, _, model_part = model.partition("/")
+
+                    provider_mapping = {
+                        "openai": "openai",
+                        "anthropic": "anthropic",
+                        "claude": "anthropic",
+                        "azure": "azure",
+                        "azure_openai": "azure",
+                        "google": "gemini",
+                        "gemini": "gemini",
+                        "bedrock": "bedrock",
+                        "aws": "bedrock",
+                    }
+
+                    canonical_provider = provider_mapping.get(prefix.lower())
+
+                    if canonical_provider and cls._validate_model_in_constants(
+                        model_part, canonical_provider
+                    ):
+                        provider = canonical_provider
+                        use_native = True
+                        model_string = model_part
+                    else:
+                        # Not in NVIDIA and not recognized - try litellm
+                        provider = prefix
+                        use_native = False
+                        model_string = model_part
             else:
-                provider = prefix
-                use_native = False
-                model_string = model_part
+                prefix, _, model_part = model.partition("/")
+
+                provider_mapping = {
+                    "openai": "openai",
+                    "anthropic": "anthropic",
+                    "claude": "anthropic",
+                    "azure": "azure",
+                    "azure_openai": "azure",
+                    "google": "gemini",
+                    "gemini": "gemini",
+                    "bedrock": "bedrock",
+                    "aws": "bedrock",
+                }
+
+                canonical_provider = provider_mapping.get(prefix.lower())
+
+                if canonical_provider and cls._validate_model_in_constants(
+                    model_part, canonical_provider
+                ):
+                    provider = canonical_provider
+                    use_native = True
+                    model_string = model_part
+                else:
+                    # Unknown provider - fall back to LiteLLM
+                    # (NVIDIA models are handled by catalog check above when API key is set)
+                    provider = prefix
+                    use_native = False
+                    model_string = model_part
         else:
             provider = cls._infer_provider_from_model(model)
             use_native = True
@@ -446,10 +566,9 @@ def _matches_provider_pattern(cls, model: str, provider: str) -> bool:
             )
 
         if provider == "gemini" or provider == "google":
-            return any(
-                model_lower.startswith(prefix)
-                for prefix in ["gemini-", "gemma-", "learnlm-"]
-            )
+            # Only match Gemini-specific models, not open models like Gemma
+            # Gemma can be hosted on NVIDIA/other providers
+            return model_lower.startswith("gemini-") or model_lower.startswith("learnlm-")
 
         if provider == "bedrock":
             return "." in model_lower
@@ -460,6 +579,9 @@ def _matches_provider_pattern(cls, model: str, provider: str) -> bool:
                 for prefix in ["gpt-", "gpt-35-", "o1", "o3", "o4", "azure-"]
             )
 
+        # NVIDIA routing is handled by dynamic catalog check in __new__
+        # No static pattern matching needed - always use catalog lookup
+
         return False
 
     @classmethod
@@ -559,6 +681,11 @@ def _get_native_provider(cls, provider: str) -> type | None:
 
             return BedrockCompletion
 
+        if provider == "nvidia":
+            from crewai.llms.providers.nvidia.completion import NvidiaCompletion
+
+            return NvidiaCompletion
+
         return None
 
     def __init__(

diff --git a/lib/crewai/src/crewai/llms/constants.py b/lib/crewai/src/crewai/llms/constants.py
@@ -566,3 +566,12 @@
     "qwen.qwen3-coder-30b-a3b-v1:0",
     "twelvelabs.pegasus-1-2-v1:0",
 ]
+
+
+# NVIDIA models (Jan 2026) - pattern matching handles all models with "/" format
+NVIDIA_MODELS = [
+    "qwen/qwen3-next-80b-a3b-instruct",  # Latest Qwen3-Next, excellent tool calling (Jan 2026)
+    "qwen/qwen2.5-7b-instruct",  # Efficient general-purpose model
+    "deepseek-ai/deepseek-r1-distill-qwen-14b",  # Reasoning with Qwen base (Jan 2026)
+    "nvidia/cosmos-reason2-8b",  # Vision + reasoning model (Jan 2026)
+]
diff --git a/lib/crewai/src/crewai/llms/providers/nvidia/__init__.py b/lib/crewai/src/crewai/llms/providers/nvidia/__init__.py
@@ -0,0 +1,5 @@
+"""NVIDIA provider for CrewAI."""
+
+from crewai.llms.providers.nvidia.completion import NvidiaCompletion
+
+__all__ = ["NvidiaCompletion"]