Skip to content
Open
60 changes: 58 additions & 2 deletions docs/en/learn/llm-connections.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,64 @@ CrewAI uses LiteLLM to connect to a wide variety of Language Models (LLMs). This
You can easily configure your agents to use a different model or provider as described in this guide.
</Note>

## Supported Providers
## Native NVIDIA Provider

CrewAI includes native support for NVIDIA NIM (NVIDIA Inference Microservices), providing direct access to 180+ high-performance models including Qwen, LLaMA, DeepSeek R1, and Mistral.

<Note>
**Auto-Detection**: Models with "/" in the name (e.g., `qwen/qwen3-next-80b-a3b-instruct`) automatically use the NVIDIA native provider. No configuration needed beyond setting `NVIDIA_API_KEY`.
</Note>

### Quick Start

<Steps>
<Step title="Get API Key">
Visit [NVIDIA Build](https://build.nvidia.com/) to get a free API key (format: `nvapi-...`)
</Step>
<Step title="Set Environment Variable">
```bash
export NVIDIA_API_KEY="nvapi-your-key-here"
```
</Step>
<Step title="Use NVIDIA Models">
```python
from crewai import Agent, LLM

# "/" in model name triggers NVIDIA provider automatically
llm = LLM(model="qwen/qwen3-next-80b-a3b-instruct", temperature=0.7)

agent = Agent(
role="Research Analyst",
goal="Analyze data and provide insights",
backstory="Expert in data analysis",
llm=llm
)
```
</Step>
</Steps>

### Key Features

- **180+ Models**: Chat, code, reasoning, vision, and safety models
- **Auto-Detection**: Automatic routing for models with "/" in name
- **Streaming Support**: Real-time response streaming
- **Vision Models**: Llama 3.2 Vision (11B/90B), Phi-4 Vision
- **Reasoning Models**: DeepSeek R1, QwQ-32B with chain-of-thought
- **Built-in Security**: Input validation and resource management

### Popular Models

| Category | Model | Best For |
|----------|-------|----------|
| **Chat** | `qwen/qwen3-next-80b-a3b-instruct` | General conversation & analysis |
| **Chat** | `meta/llama-3.1-70b-instruct` | High-quality responses |
| **Code** | `qwen/qwen2.5-coder-32b-instruct` | Code generation & debugging |
| **Reasoning** | `deepseek-ai/deepseek-r1` | Complex problem solving |
| **Vision** | `meta/llama-3.2-90b-vision-instruct` | Image analysis & understanding |

See [NVIDIA Build](https://build.nvidia.com/) for the complete model catalog.

## LiteLLM Providers

LiteLLM supports a wide range of providers, including but not limited to:

Expand All @@ -36,7 +93,6 @@ LiteLLM supports a wide range of providers, including but not limited to:
- Groq
- SambaNova
- Nebius AI Studio
- [NVIDIA NIMs](https://docs.api.nvidia.com/nim/reference/models-1)
- And many more!

For a complete and up-to-date list of supported providers, please refer to the [LiteLLM Providers documentation](https://docs.litellm.ai/docs/providers).
Expand Down
185 changes: 156 additions & 29 deletions lib/crewai/src/crewai/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import os
import sys
import threading
import time
from typing import (
TYPE_CHECKING,
Any,
Expand All @@ -24,6 +25,12 @@
from pydantic import BaseModel, Field
from typing_extensions import Self

# Cache for NVIDIA model list to avoid repeated API calls
_nvidia_models_cache: set[str] | None = None
_nvidia_cache_timestamp: float | None = None
_NVIDIA_CACHE_TTL = 3600 # 1 hour cache expiration
_nvidia_cache_lock = threading.Lock()

from crewai.events.event_bus import crewai_event_bus
from crewai.events.types.llm_events import (
LLMCallCompletedEvent,
Expand Down Expand Up @@ -316,6 +323,7 @@ def writable(self) -> bool:
"gemini",
"bedrock",
"aws",
"nvidia",
]


Expand All @@ -339,6 +347,75 @@ class AccumulatedToolArgs(BaseModel):
function: FunctionArgs = Field(default_factory=FunctionArgs)


def _get_nvidia_models() -> set[str]:
"""Fetch and cache the list of models available from NVIDIA NIM API.

Returns:
Set of model IDs available in NVIDIA's catalog
"""
global _nvidia_models_cache, _nvidia_cache_timestamp

# Check if cache exists and hasn't expired
if _nvidia_models_cache is not None and _nvidia_cache_timestamp is not None:
if time.time() - _nvidia_cache_timestamp < _NVIDIA_CACHE_TTL:
return _nvidia_models_cache
# Cache expired - will refresh below

# Thread-safe cache initialization
with _nvidia_cache_lock:
# Double-check after acquiring lock (with TTL check)
if _nvidia_models_cache is not None and _nvidia_cache_timestamp is not None:
if time.time() - _nvidia_cache_timestamp < _NVIDIA_CACHE_TTL:
return _nvidia_models_cache
# Cache expired - proceed with refresh

# Accept both NVIDIA_API_KEY (build.nvidia.com) and NVIDIA_NIM_API_KEY (cloud endpoints)
api_key = os.getenv("NVIDIA_API_KEY") or os.getenv("NVIDIA_NIM_API_KEY")
if not api_key:
_nvidia_models_cache = set()
_nvidia_cache_timestamp = time.time()
return _nvidia_models_cache

try:
# Use httpx instead of requests for better security and async support
# All HTTP logic inside lock to prevent race conditions
with httpx.Client(timeout=5.0) as client:
response = client.get(
"https://integrate.api.nvidia.com/v1/models",
headers={"Authorization": f"Bearer {api_key}"},
)

if response.status_code == 200:
models = response.json().get("data", [])
# Dedupe model IDs (NVIDIA API has some duplicates)
_nvidia_models_cache = set([m["id"] for m in models])
_nvidia_cache_timestamp = time.time()
else:
logging.warning(
f"NVIDIA API returned status {response.status_code}"
)
_nvidia_models_cache = set()
_nvidia_cache_timestamp = time.time()
except httpx.TimeoutException:
logging.warning("NVIDIA API request timed out")
_nvidia_models_cache = set()
_nvidia_cache_timestamp = time.time()
except httpx.HTTPError as e:
# Sanitize error message to avoid leaking API keys
error_msg = str(e).replace(api_key, "***")
logging.warning(f"NVIDIA API request failed: {error_msg}")
_nvidia_models_cache = set()
_nvidia_cache_timestamp = time.time()
except Exception as e:
# Catch-all for unexpected errors, with API key sanitization
error_msg = str(e).replace(api_key, "***") if api_key else str(e)
logging.warning(f"Failed to fetch NVIDIA models: {error_msg}")
_nvidia_models_cache = set()
_nvidia_cache_timestamp = time.time()

return _nvidia_models_cache


class LLM(BaseLLM):
completion_cost: float | None = None

Expand All @@ -363,32 +440,75 @@ def __new__(cls, model: str, is_litellm: bool = False, **kwargs: Any) -> LLM:
use_native = True
model_string = model
elif "/" in model:
prefix, _, model_part = model.partition("/")

provider_mapping = {
"openai": "openai",
"anthropic": "anthropic",
"claude": "anthropic",
"azure": "azure",
"azure_openai": "azure",
"google": "gemini",
"gemini": "gemini",
"bedrock": "bedrock",
"aws": "bedrock",
}

canonical_provider = provider_mapping.get(prefix.lower())

if canonical_provider and cls._validate_model_in_constants(
model_part, canonical_provider
):
provider = canonical_provider
use_native = True
model_string = model_part
# If NVIDIA API key is set, check if model is in NVIDIA's catalog FIRST
# This is the most accurate way: route to NVIDIA if they have it
# Accept both NVIDIA_API_KEY and NVIDIA_NIM_API_KEY
if os.getenv("NVIDIA_API_KEY") or os.getenv("NVIDIA_NIM_API_KEY"):
nvidia_models = _get_nvidia_models()

if model in nvidia_models:
# Model is in NVIDIA's catalog - use NVIDIA
provider = "nvidia"
use_native = True
model_string = model
else:
# Model NOT in NVIDIA catalog - fall back to standard routing
prefix, _, model_part = model.partition("/")

provider_mapping = {
"openai": "openai",
"anthropic": "anthropic",
"claude": "anthropic",
"azure": "azure",
"azure_openai": "azure",
"google": "gemini",
"gemini": "gemini",
"bedrock": "bedrock",
"aws": "bedrock",
}

canonical_provider = provider_mapping.get(prefix.lower())

if canonical_provider and cls._validate_model_in_constants(
model_part, canonical_provider
):
provider = canonical_provider
use_native = True
model_string = model_part
else:
# Not in NVIDIA and not recognized - try litellm
provider = prefix
use_native = False
model_string = model_part
else:
provider = prefix
use_native = False
model_string = model_part
prefix, _, model_part = model.partition("/")

provider_mapping = {
"openai": "openai",
"anthropic": "anthropic",
"claude": "anthropic",
"azure": "azure",
"azure_openai": "azure",
"google": "gemini",
"gemini": "gemini",
"bedrock": "bedrock",
"aws": "bedrock",
}

canonical_provider = provider_mapping.get(prefix.lower())

if canonical_provider and cls._validate_model_in_constants(
model_part, canonical_provider
):
provider = canonical_provider
use_native = True
model_string = model_part
else:
# Unknown provider - fall back to LiteLLM
# (NVIDIA models are handled by catalog check above when API key is set)
provider = prefix
use_native = False
model_string = model_part
else:
provider = cls._infer_provider_from_model(model)
use_native = True
Expand Down Expand Up @@ -446,10 +566,9 @@ def _matches_provider_pattern(cls, model: str, provider: str) -> bool:
)

if provider == "gemini" or provider == "google":
return any(
model_lower.startswith(prefix)
for prefix in ["gemini-", "gemma-", "learnlm-"]
)
# Only match Gemini-specific models, not open models like Gemma
# Gemma can be hosted on NVIDIA/other providers
return model_lower.startswith("gemini-") or model_lower.startswith("learnlm-")

if provider == "bedrock":
return "." in model_lower
Expand All @@ -460,6 +579,9 @@ def _matches_provider_pattern(cls, model: str, provider: str) -> bool:
for prefix in ["gpt-", "gpt-35-", "o1", "o3", "o4", "azure-"]
)

# NVIDIA routing is handled by dynamic catalog check in __new__
# No static pattern matching needed - always use catalog lookup

return False

@classmethod
Expand Down Expand Up @@ -559,6 +681,11 @@ def _get_native_provider(cls, provider: str) -> type | None:

return BedrockCompletion

if provider == "nvidia":
from crewai.llms.providers.nvidia.completion import NvidiaCompletion

return NvidiaCompletion

return None

def __init__(
Expand Down
9 changes: 9 additions & 0 deletions lib/crewai/src/crewai/llms/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -566,3 +566,12 @@
"qwen.qwen3-coder-30b-a3b-v1:0",
"twelvelabs.pegasus-1-2-v1:0",
]


# NVIDIA models (Jan 2026) - pattern matching handles all models with "/" format
NVIDIA_MODELS = [
"qwen/qwen3-next-80b-a3b-instruct", # Latest Qwen3-Next, excellent tool calling (Jan 2026)
"qwen/qwen2.5-7b-instruct", # Efficient general-purpose model
"deepseek-ai/deepseek-r1-distill-qwen-14b", # Reasoning with Qwen base (Jan 2026)
"nvidia/cosmos-reason2-8b", # Vision + reasoning model (Jan 2026)
]
5 changes: 5 additions & 0 deletions lib/crewai/src/crewai/llms/providers/nvidia/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""NVIDIA provider for CrewAI."""

from crewai.llms.providers.nvidia.completion import NvidiaCompletion

__all__ = ["NvidiaCompletion"]
Loading