diff --git a/docs/my-website/docs/providers/azure/videos.md b/docs/my-website/docs/providers/azure/videos.md
index 188713d63351..62f8d0df182d 100644
--- a/docs/my-website/docs/providers/azure/videos.md
+++ b/docs/my-website/docs/providers/azure/videos.md
@@ -25,7 +25,6 @@ LiteLLM supports Azure OpenAI's video generation models including Sora with full
import os
os.environ["AZURE_OPENAI_API_KEY"] = "your-azure-api-key"
os.environ["AZURE_OPENAI_API_BASE"] = "https://your-resource.openai.azure.com/"
-os.environ["AZURE_OPENAI_API_VERSION"] = "2024-02-15-preview"
```
### Basic Usage
@@ -37,7 +36,6 @@ import time
os.environ["AZURE_OPENAI_API_KEY"] = "your-azure-api-key"
os.environ["AZURE_OPENAI_API_BASE"] = "https://your-resource.openai.azure.com/"
-os.environ["AZURE_OPENAI_API_VERSION"] = "2024-02-15-preview"
# Generate video
response = video_generation(
@@ -53,8 +51,7 @@ print(f"Initial Status: {response.status}")
# Check status until video is ready
while True:
status_response = video_status(
- video_id=response.id,
- custom_llm_provider="azure"
+ video_id=response.id
)
print(f"Current Status: {status_response.status}")
@@ -69,8 +66,7 @@ while True:
# Download video content when ready
video_bytes = video_content(
- video_id=response.id,
- custom_llm_provider="azure"
+ video_id=response.id
)
# Save to file
@@ -87,7 +83,6 @@ Here's how to call Azure video generation models with the LiteLLM Proxy Server
```bash
export AZURE_OPENAI_API_KEY="your-azure-api-key"
export AZURE_OPENAI_API_BASE="https://your-resource.openai.azure.com/"
-export AZURE_OPENAI_API_VERSION="2024-02-15-preview"
```
### 2. Start the proxy
@@ -102,7 +97,6 @@ model_list:
model: azure/sora-2
api_key: os.environ/AZURE_OPENAI_API_KEY
api_base: os.environ/AZURE_OPENAI_API_BASE
- api_version: "2024-02-15-preview"
```
@@ -211,8 +205,7 @@ general_settings:
```python
# Download video content
video_bytes = video_content(
- video_id="video_1234567890",
- model="azure/sora-2"
+ video_id="video_1234567890"
)
# Save to file
@@ -243,8 +236,7 @@ def generate_and_download_video(prompt):
# Step 3: Download video
video_bytes = litellm.video_content(
- video_id=video_id,
- custom_llm_provider="azure"
+ video_id=video_id
)
# Step 4: Save to file
@@ -264,9 +256,9 @@ video_file = generate_and_download_video(
```python
# Video editing with reference image
response = litellm.video_remix(
+ video_id="video_456",
prompt="Make the cat jump higher",
input_reference=open("path/to/image.jpg", "rb"), # Reference image as file object
- custom_llm_provider="azure"
seconds="8"
)
diff --git a/docs/my-website/docs/providers/gemini.md b/docs/my-website/docs/providers/gemini.md
index 40d646565286..31d3a491f405 100644
--- a/docs/my-website/docs/providers/gemini.md
+++ b/docs/my-website/docs/providers/gemini.md
@@ -10,7 +10,7 @@ import TabItem from '@theme/TabItem';
| Provider Route on LiteLLM | `gemini/` |
| Provider Doc | [Google AI Studio ↗](https://aistudio.google.com/) |
| API Endpoint for Provider | https://generativelanguage.googleapis.com |
-| Supported OpenAI Endpoints | `/chat/completions`, [`/embeddings`](../embedding/supported_embedding#gemini-ai-embedding-models), `/completions` |
+| Supported OpenAI Endpoints | `/chat/completions`, [`/embeddings`](../embedding/supported_embedding#gemini-ai-embedding-models), `/completions`, [`/videos`](./gemini/videos.md) |
| Pass-through Endpoint | [Supported](../pass_through/google_ai_studio.md) |
diff --git a/docs/my-website/docs/providers/gemini/videos.md b/docs/my-website/docs/providers/gemini/videos.md
new file mode 100644
index 000000000000..5b5d5a8a6369
--- /dev/null
+++ b/docs/my-website/docs/providers/gemini/videos.md
@@ -0,0 +1,409 @@
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+# Gemini Video Generation (Veo)
+
+LiteLLM supports Google's Veo video generation models through a unified API interface.
+
+| Property | Details |
+|-------|-------|
+| Description | Google's Veo AI video generation models |
+| Provider Route on LiteLLM | `gemini/` |
+| Supported Models | `veo-3.0-generate-preview`, `veo-3.1-generate-preview` |
+| Cost Tracking | ✅ Duration-based pricing |
+| Logging Support | ✅ Full request/response logging |
+| Proxy Server Support | ✅ Full proxy integration with virtual keys |
+| Spend Management | ✅ Budget tracking and rate limiting |
+| Link to Provider Doc | [Google Veo Documentation ↗](https://ai.google.dev/gemini-api/docs/video) |
+
+## Quick Start
+
+### Required API Keys
+
+```python
+import os
+os.environ["GEMINI_API_KEY"] = "your-google-api-key"
+# OR
+os.environ["GOOGLE_API_KEY"] = "your-google-api-key"
+```
+
+### Basic Usage
+
+```python
+from litellm import video_generation, video_status, video_content
+import os
+import time
+
+os.environ["GEMINI_API_KEY"] = "your-google-api-key"
+
+# Step 1: Generate video
+response = video_generation(
+ model="gemini/veo-3.0-generate-preview",
+ prompt="A cat playing with a ball of yarn in a sunny garden"
+)
+
+print(f"Video ID: {response.id}")
+print(f"Initial Status: {response.status}") # "processing"
+
+# Step 2: Poll for completion
+while True:
+ status_response = video_status(
+ video_id=response.id
+ )
+
+ print(f"Current Status: {status_response.status}")
+
+ if status_response.status == "completed":
+ break
+ elif status_response.status == "failed":
+ print("Video generation failed")
+ break
+
+ time.sleep(10) # Wait 10 seconds before checking again
+
+# Step 3: Download video content
+video_bytes = video_content(
+ video_id=response.id
+)
+
+# Save to file
+with open("generated_video.mp4", "wb") as f:
+ f.write(video_bytes)
+
+print("Video downloaded successfully!")
+```
+
+## Supported Models
+
+| Model Name | Description | Max Duration | Status |
+|------------|-------------|--------------|--------|
+| veo-3.0-generate-preview | Veo 3.0 video generation | 8 seconds | Preview |
+| veo-3.1-generate-preview | Veo 3.1 video generation | 8 seconds | Preview |
+
+## Video Generation Parameters
+
+LiteLLM automatically maps OpenAI-style parameters to Veo's format:
+
+| OpenAI Parameter | Veo Parameter | Description | Example |
+|------------------|---------------|-------------|---------|
+| `prompt` | `prompt` | Text description of the video | "A cat playing" |
+| `size` | `aspectRatio` | Video dimensions → aspect ratio | "1280x720" → "16:9" |
+| `seconds` | `durationSeconds` | Duration in seconds | "8" → 8 |
+| `input_reference` | `image` | Reference image to animate | File object or path |
+| `model` | `model` | Model to use | "gemini/veo-3.0-generate-preview" |
+
+### Size to Aspect Ratio Mapping
+
+LiteLLM automatically converts size dimensions to Veo's aspect ratio format:
+- `"1280x720"`, `"1920x1080"` → `"16:9"` (landscape)
+- `"720x1280"`, `"1080x1920"` → `"9:16"` (portrait)
+
+### Supported Veo Parameters
+
+Based on Veo's API:
+- **prompt** (required): Text description with optional audio cues
+- **aspectRatio**: `"16:9"` (default) or `"9:16"`
+- **resolution**: `"720p"` (default) or `"1080p"` (Veo 3.1 only, 16:9 aspect ratio only)
+- **durationSeconds**: Video length (max 8 seconds for most models)
+- **image**: Reference image for animation
+- **negativePrompt**: What to exclude from the video (Veo 3.1)
+- **referenceImages**: Style and content references (Veo 3.1 only)
+
+## Complete Workflow Example
+
+```python
+import litellm
+import time
+
+def generate_and_download_veo_video(
+ prompt: str,
+ output_file: str = "video.mp4",
+ size: str = "1280x720",
+ seconds: str = "8"
+):
+ """
+ Complete workflow for Veo video generation.
+
+ Args:
+ prompt: Text description of the video
+ output_file: Where to save the video
+ size: Video dimensions (e.g., "1280x720" for 16:9)
+ seconds: Duration in seconds
+
+ Returns:
+ bool: True if successful
+ """
+ print(f"🎬 Generating video: {prompt}")
+
+ # Step 1: Initiate generation
+ response = litellm.video_generation(
+ model="gemini/veo-3.0-generate-preview",
+ prompt=prompt,
+ size=size, # Maps to aspectRatio
+ seconds=seconds # Maps to durationSeconds
+ )
+
+ video_id = response.id
+ print(f"✓ Video generation started (ID: {video_id})")
+
+ # Step 2: Wait for completion
+ max_wait_time = 600 # 10 minutes
+ start_time = time.time()
+
+ while time.time() - start_time < max_wait_time:
+ status_response = litellm.video_status(video_id=video_id)
+
+ if status_response.status == "completed":
+ print("✓ Video generation completed!")
+ break
+ elif status_response.status == "failed":
+ print("✗ Video generation failed")
+ return False
+
+ print(f"⏳ Status: {status_response.status}")
+ time.sleep(10)
+ else:
+ print("✗ Timeout waiting for video generation")
+ return False
+
+ # Step 3: Download video
+ print("⬇️ Downloading video...")
+ video_bytes = litellm.video_content(video_id=video_id)
+
+ with open(output_file, "wb") as f:
+ f.write(video_bytes)
+
+ print(f"✓ Video saved to {output_file}")
+ return True
+
+# Use it
+generate_and_download_veo_video(
+ prompt="A serene lake at sunset with mountains in the background",
+ output_file="sunset_lake.mp4"
+)
+```
+
+## Async Usage
+
+```python
+from litellm import avideo_generation, avideo_status, avideo_content
+import asyncio
+
+async def async_video_workflow():
+ # Generate video
+ response = await avideo_generation(
+ model="gemini/veo-3.0-generate-preview",
+ prompt="A cat playing with a ball of yarn"
+ )
+
+ # Poll for completion
+ while True:
+ status = await avideo_status(video_id=response.id)
+ if status.status == "completed":
+ break
+ await asyncio.sleep(10)
+
+ # Download content
+ video_bytes = await avideo_content(video_id=response.id)
+
+ with open("video.mp4", "wb") as f:
+ f.write(video_bytes)
+
+# Run it
+asyncio.run(async_video_workflow())
+```
+
+## LiteLLM Proxy Usage
+
+### Configuration
+
+Add Veo models to your `config.yaml`:
+
+```yaml
+model_list:
+ - model_name: veo-3
+ litellm_params:
+ model: gemini/veo-3.0-generate-preview
+ api_key: os.environ/GEMINI_API_KEY
+```
+
+Start the proxy:
+
+```bash
+litellm --config config.yaml
+# Server running on http://0.0.0.0:4000
+```
+
+### Making Requests
+
+
+
+
+```bash
+# Step 1: Generate video
+curl --location 'http://0.0.0.0:4000/v1/videos' \
+--header 'Content-Type: application/json' \
+--header 'Authorization: Bearer sk-1234' \
+--data '{
+ "model": "veo-3",
+ "prompt": "A cat playing with a ball of yarn in a sunny garden"
+}'
+
+# Response: {"id": "gemini::operations/generate_12345::...", "status": "processing", ...}
+
+# Step 2: Check status
+curl --location 'http://localhost:4000/v1/videos/{video_id}' \
+--header 'x-litellm-api-key: sk-1234'
+
+# Step 3: Download video (when status is "completed")
+curl --location 'http://localhost:4000/v1/videos/{video_id}/content' \
+--header 'x-litellm-api-key: sk-1234' \
+--output video.mp4
+```
+
+
+
+
+```python
+import litellm
+
+litellm.api_base = "http://0.0.0.0:4000"
+litellm.api_key = "sk-1234"
+
+# Generate video
+response = litellm.video_generation(
+ model="veo-3",
+ prompt="A cat playing with a ball of yarn in a sunny garden"
+)
+
+# Check status
+import time
+while True:
+ status = litellm.video_status(video_id=response.id)
+ if status.status == "completed":
+ break
+ time.sleep(10)
+
+# Download video
+video_bytes = litellm.video_content(video_id=response.id)
+with open("video.mp4", "wb") as f:
+ f.write(video_bytes)
+```
+
+
+
+
+## Cost Tracking
+
+LiteLLM automatically tracks costs for Veo video generation:
+
+```python
+response = litellm.video_generation(
+ model="gemini/veo-3.0-generate-preview",
+ prompt="A beautiful sunset"
+)
+
+# Cost is calculated based on video duration
+# Veo pricing: ~$0.10 per second (estimated)
+# Default video duration: ~5 seconds
+# Estimated cost: ~$0.50
+```
+
+## Differences from OpenAI Video API
+
+| Feature | OpenAI (Sora) | Gemini (Veo) |
+|---------|---------------|--------------|
+| Reference Images | ✅ Supported | ❌ Not supported |
+| Size Control | ✅ Supported | ❌ Not supported |
+| Duration Control | ✅ Supported | ❌ Not supported |
+| Video Remix/Edit | ✅ Supported | ❌ Not supported |
+| Video List | ✅ Supported | ❌ Not supported |
+| Prompt-based Generation | ✅ Supported | ✅ Supported |
+| Async Operations | ✅ Supported | ✅ Supported |
+
+## Error Handling
+
+```python
+from litellm import video_generation, video_status, video_content
+from litellm.exceptions import APIError, Timeout
+
+try:
+ response = video_generation(
+ model="gemini/veo-3.0-generate-preview",
+ prompt="A beautiful landscape"
+ )
+
+ # Poll with timeout
+ max_attempts = 60 # 10 minutes (60 * 10s)
+ for attempt in range(max_attempts):
+ status = video_status(video_id=response.id)
+
+ if status.status == "completed":
+ video_bytes = video_content(video_id=response.id)
+ with open("video.mp4", "wb") as f:
+ f.write(video_bytes)
+ break
+ elif status.status == "failed":
+ raise APIError("Video generation failed")
+
+ time.sleep(10)
+ else:
+ raise Timeout("Video generation timed out")
+
+except APIError as e:
+ print(f"API Error: {e}")
+except Timeout as e:
+ print(f"Timeout: {e}")
+except Exception as e:
+ print(f"Unexpected error: {e}")
+```
+
+## Best Practices
+
+1. **Always poll for completion**: Veo video generation is asynchronous and can take several minutes
+2. **Set reasonable timeouts**: Allow at least 5-10 minutes for video generation
+3. **Handle failures gracefully**: Check for `failed` status and implement retry logic
+4. **Use descriptive prompts**: More detailed prompts generally produce better results
+5. **Store video IDs**: Save the operation ID/video ID to resume polling if your application restarts
+
+## Troubleshooting
+
+### Video generation times out
+
+```python
+# Increase polling timeout
+max_wait_time = 900 # 15 minutes instead of 10
+```
+
+### Video not found when downloading
+
+```python
+# Make sure video is completed before downloading
+status = video_status(video_id=video_id)
+if status.status != "completed":
+ print("Video not ready yet!")
+```
+
+### API key errors
+
+```python
+# Verify your API key is set
+import os
+print(os.environ.get("GEMINI_API_KEY"))
+
+# Or pass it explicitly
+response = video_generation(
+ model="gemini/veo-3.0-generate-preview",
+ prompt="...",
+ api_key="your-api-key-here"
+)
+```
+
+## See Also
+
+- [OpenAI Video Generation](../openai/videos.md)
+- [Azure Video Generation](../azure/videos.md)
+- [Vertex AI Video Generation](../vertex_ai/videos.md)
+- [Video Generation API Reference](/docs/videos)
+- [Veo Pass-through Endpoints](/docs/pass_through/google_ai_studio#example-4-video-generation-with-veo)
+
diff --git a/docs/my-website/docs/providers/openai/videos.md b/docs/my-website/docs/providers/openai/videos.md
index 72eb3f43a020..202c79c2446e 100644
--- a/docs/my-website/docs/providers/openai/videos.md
+++ b/docs/my-website/docs/providers/openai/videos.md
@@ -36,7 +36,6 @@ print(f"Status: {response.status}")
# Download video content when ready
video_bytes = video_content(
video_id=response.id,
- model="sora-2"
)
# Save to file
@@ -171,8 +170,7 @@ curl http://localhost:4000/v1/videos \
```python
# Download video content
video_bytes = video_content(
- video_id="video_1234567890",
- custom_llm_provider="openai" # Or use model="sora-2"
+ video_id="video_1234567890"
)
# Save to file
@@ -203,8 +201,7 @@ def generate_and_download_video(prompt):
# Step 3: Download video
video_bytes = litellm.video_content(
- video_id=video_id,
- custom_llm_provider="openai"
+ video_id=video_id
)
# Step 4: Save to file
@@ -241,8 +238,7 @@ from litellm.exceptions import BadRequestError, AuthenticationError
try:
response = video_generation(
- prompt="A cat playing with a ball of yarn",
- model="sora-2"
+ prompt="A cat playing with a ball of yarn"
)
except AuthenticationError as e:
print(f"Authentication failed: {e}")
diff --git a/docs/my-website/docs/providers/vertex_ai/videos.md b/docs/my-website/docs/providers/vertex_ai/videos.md
new file mode 100644
index 000000000000..4aaf74354b1e
--- /dev/null
+++ b/docs/my-website/docs/providers/vertex_ai/videos.md
@@ -0,0 +1,268 @@
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+# Vertex AI Video Generation (Veo)
+
+LiteLLM supports Vertex AI's Veo video generation models using the unified OpenAI video API surface.
+
+| Property | Details |
+|-------|-------|
+| Description | Google Cloud Vertex AI Veo video generation models |
+| Provider Route on LiteLLM | `vertex_ai/` |
+| Supported Models | `veo-2.0-generate-001`, `veo-3.0-generate-preview`, `veo-3.0-fast-generate-preview`, `veo-3.1-generate-preview`, `veo-3.1-fast-generate-preview` |
+| Cost Tracking | ✅ Duration-based pricing |
+| Logging Support | ✅ Full request/response logging |
+| Proxy Server Support | ✅ Full proxy integration with virtual keys |
+| Spend Management | ✅ Budget tracking and rate limiting |
+| Link to Provider Doc | [Vertex AI Veo Documentation ↗](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/veo-video-generation) |
+
+## Quick Start
+
+### Required Environment Setup
+
+```python
+import json
+import os
+
+os.environ["VERTEXAI_PROJECT"] = "your-gcp-project-id"
+os.environ["VERTEXAI_LOCATION"] = "us-central1"
+
+# Option 1: Point to a service account file
+os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/path/to/service_account.json"
+
+# Option 2: Store the service account JSON directly
+with open("/path/to/service_account.json", "r", encoding="utf-8") as f:
+ os.environ["VERTEXAI_CREDENTIALS"] = f.read()
+```
+
+### Basic Usage
+
+```python
+from litellm import video_generation, video_status, video_content
+import json
+import os
+import time
+
+with open("/path/to/service_account.json", "r", encoding="utf-8") as f:
+ vertex_credentials = f.read()
+
+response = video_generation(
+ model="vertex_ai/veo-3.0-generate-preview",
+ prompt="A cat playing with a ball of yarn in a sunny garden",
+ vertex_project="your-gcp-project-id",
+ vertex_location="us-central1",
+ vertex_credentials=vertex_credentials,
+ seconds="8",
+ size="1280x720",
+)
+
+print(f"Video ID: {response.id}")
+print(f"Initial Status: {response.status}")
+
+# Poll for completion
+while True:
+ status = video_status(
+ video_id=response.id,
+ vertex_project="your-gcp-project-id",
+ vertex_location="us-central1",
+ vertex_credentials=vertex_credentials,
+ )
+
+ print(f"Current Status: {status.status}")
+
+ if status.status == "completed":
+ break
+ if status.status == "failed":
+ raise RuntimeError("Video generation failed")
+
+ time.sleep(10)
+
+# Download the rendered video
+video_bytes = video_content(
+ video_id=response.id,
+ vertex_project="your-gcp-project-id",
+ vertex_location="us-central1",
+ vertex_credentials=vertex_credentials,
+)
+
+with open("generated_video.mp4", "wb") as f:
+ f.write(video_bytes)
+```
+
+## Supported Models
+
+| Model Name | Description | Max Duration | Status |
+|------------|-------------|--------------|--------|
+| veo-2.0-generate-001 | Veo 2.0 video generation | 5 seconds | GA |
+| veo-3.0-generate-preview | Veo 3.0 high quality | 8 seconds | Preview |
+| veo-3.0-fast-generate-preview | Veo 3.0 fast generation | 8 seconds | Preview |
+| veo-3.1-generate-preview | Veo 3.1 high quality | 10 seconds | Preview |
+| veo-3.1-fast-generate-preview | Veo 3.1 fast | 10 seconds | Preview |
+
+## Video Generation Parameters
+
+LiteLLM converts OpenAI-style parameters to Veo's API shape automatically:
+
+| OpenAI Parameter | Vertex AI Parameter | Description | Example |
+|------------------|---------------------|-------------|---------|
+| `prompt` | `instances[].prompt` | Text description of the video | "A cat playing" |
+| `size` | `parameters.aspectRatio` | Converted to `16:9` or `9:16` | "1280x720" → `16:9` |
+| `seconds` | `parameters.durationSeconds` | Clip length in seconds | "8" → `8` |
+| `input_reference` | `instances[].image` | Reference image for animation | `open("image.jpg", "rb")` |
+| Provider-specific params | `extra_body` | Forwarded to Vertex API | `{"negativePrompt": "blurry"}` |
+
+### Size to Aspect Ratio Mapping
+
+- `1280x720`, `1920x1080` → `16:9`
+- `720x1280`, `1080x1920` → `9:16`
+- Unknown sizes default to `16:9`
+
+## Async Usage
+
+```python
+from litellm import avideo_generation, avideo_status, avideo_content
+import asyncio
+import json
+
+with open("/path/to/service_account.json", "r", encoding="utf-8") as f:
+ vertex_credentials = f.read()
+
+
+async def workflow():
+ response = await avideo_generation(
+ model="vertex_ai/veo-3.1-generate-preview",
+ prompt="Slow motion water droplets splashing into a pool",
+ seconds="10",
+ vertex_project="your-gcp-project-id",
+ vertex_location="us-central1",
+ vertex_credentials=vertex_credentials,
+ )
+
+ while True:
+ status = await avideo_status(
+ video_id=response.id,
+ vertex_project="your-gcp-project-id",
+ vertex_location="us-central1",
+ vertex_credentials=vertex_credentials,
+ )
+
+ if status.status == "completed":
+ break
+ if status.status == "failed":
+ raise RuntimeError("Video generation failed")
+
+ await asyncio.sleep(10)
+
+ video_bytes = await avideo_content(
+ video_id=response.id,
+ vertex_project="your-gcp-project-id",
+ vertex_location="us-central1",
+ vertex_credentials=vertex_credentials,
+ )
+
+ with open("veo_water.mp4", "wb") as f:
+ f.write(video_bytes)
+
+asyncio.run(workflow())
+```
+
+## LiteLLM Proxy Usage
+
+Add Veo models to your `config.yaml`:
+
+```yaml
+model_list:
+ - model_name: veo-3
+ litellm_params:
+ model: vertex_ai/veo-3.0-generate-preview
+ vertex_project: os.environ/VERTEXAI_PROJECT
+ vertex_location: os.environ/VERTEXAI_LOCATION
+ vertex_credentials: os.environ/VERTEXAI_CREDENTIALS
+```
+
+Start the proxy and make requests:
+
+
+
+
+```bash
+# Step 1: Generate video
+curl --location 'http://0.0.0.0:4000/videos' \
+--header 'Content-Type: application/json' \
+--header 'Authorization: Bearer sk-1234' \
+--data '{
+ "model": "veo-3",
+ "prompt": "Aerial shot over a futuristic city at sunrise",
+ "seconds": "8"
+}'
+
+# Step 2: Poll status
+curl --location 'http://localhost:4000/v1/videos/{video_id}' \
+--header 'x-litellm-api-key: sk-1234'
+
+# Step 3: Download video
+curl --location 'http://localhost:4000/v1/videos/{video_id}/content' \
+--header 'x-litellm-api-key: sk-1234' \
+--output video.mp4
+```
+
+
+
+
+```python
+import litellm
+
+litellm.api_base = "http://0.0.0.0:4000"
+litellm.api_key = "sk-1234"
+
+response = litellm.video_generation(
+ model="veo-3",
+ prompt="Aerial shot over a futuristic city at sunrise",
+)
+
+status = litellm.video_status(video_id=response.id)
+while status.status not in ["completed", "failed"]:
+ status = litellm.video_status(video_id=response.id)
+
+if status.status == "completed":
+ content = litellm.video_content(video_id=response.id)
+ with open("veo_city.mp4", "wb") as f:
+ f.write(content)
+```
+
+
+
+
+## Cost Tracking
+
+LiteLLM records the duration returned by Veo so you can apply duration-based pricing.
+
+```python
+with open("/path/to/service_account.json", "r", encoding="utf-8") as f:
+ vertex_credentials = f.read()
+
+response = video_generation(
+ model="vertex_ai/veo-2.0-generate-001",
+ prompt="Flowers blooming in fast forward",
+ seconds="5",
+ vertex_project="your-gcp-project-id",
+ vertex_location="us-central1",
+ vertex_credentials=vertex_credentials,
+)
+
+print(response.usage) # {"duration_seconds": 5.0}
+```
+
+## Troubleshooting
+
+- **`vertex_project is required`**: set `VERTEXAI_PROJECT` env var or pass `vertex_project` in the request.
+- **`Permission denied`**: ensure the service account has the `Vertex AI User` role and the correct region enabled.
+- **Video stuck in `processing`**: Veo operations are long-running. Continue polling every 10–15 seconds up to ~10 minutes.
+
+## See Also
+
+- [OpenAI Video Generation](../openai/videos.md)
+- [Azure Video Generation](../azure/videos.md)
+- [Gemini Video Generation](../gemini/videos.md)
+- [Video Generation API Reference](/docs/videos)
+
diff --git a/docs/my-website/docs/proxy/config_settings.md b/docs/my-website/docs/proxy/config_settings.md
index 869bab3d827b..fbdbe6ea7f3e 100644
--- a/docs/my-website/docs/proxy/config_settings.md
+++ b/docs/my-website/docs/proxy/config_settings.md
@@ -519,6 +519,7 @@ router_settings:
| DEFAULT_SLACK_ALERTING_THRESHOLD | Default threshold for Slack alerting. Default is 300
| DEFAULT_SOFT_BUDGET | Default soft budget for LiteLLM proxy keys. Default is 50.0
| DEFAULT_TRIM_RATIO | Default ratio of tokens to trim from prompt end. Default is 0.75
+| DEFAULT_GOOGLE_VIDEO_DURATION_SECONDS | Default duration for video generation in seconds in google. Default is 8
| DIRECT_URL | Direct URL for service endpoint
| DISABLE_ADMIN_UI | Toggle to disable the admin UI
| DISABLE_AIOHTTP_TRANSPORT | Flag to disable aiohttp transport. When this is set to True, litellm will use httpx instead of aiohttp. **Default is False**
diff --git a/docs/my-website/docs/videos.md b/docs/my-website/docs/videos.md
index 96ff4c8190a4..cc9f1bc9ceac 100644
--- a/docs/my-website/docs/videos.md
+++ b/docs/my-website/docs/videos.md
@@ -9,7 +9,7 @@ Fallbacks | ✅ (Between supported models) |
| Guardrails Support | ✅ Content moderation and safety checks |
| Proxy Server Support | ✅ Full proxy integration with virtual keys |
| Spend Management | ✅ Budget tracking and rate limiting |
-| Supported Providers | `openai`, `azure` |
+| Supported Providers | `openai`, `azure`, `gemini`, `vertex_ai` |
:::tip
@@ -41,8 +41,7 @@ print(f"Initial Status: {response.status}")
# Check status until video is ready
while True:
status_response = video_status(
- video_id=response.id,
- custom_llm_provider="openai"
+ video_id=response.id
)
print(f"Current Status: {status_response.status}")
@@ -57,8 +56,7 @@ while True:
# Download video content when ready
video_bytes = video_content(
- video_id=response.id,
- custom_llm_provider="openai"
+ video_id=response.id
)
# Save to file
@@ -88,8 +86,7 @@ async def test_async_video():
# Check status until video is ready
while True:
status_response = await avideo_status(
- video_id=response.id,
- custom_llm_provider="openai"
+ video_id=response.id
)
print(f"Current Status: {status_response.status}")
@@ -104,8 +101,7 @@ async def test_async_video():
# Download video content when ready
video_bytes = await avideo_content(
- video_id=response.id,
- custom_llm_provider="openai"
+ video_id=response.id
)
# Save to file
@@ -120,21 +116,27 @@ asyncio.run(test_async_video())
```python
from litellm import video_status
-# Check the status of a video generation
status_response = video_status(
- video_id="video_1234567890",
- custom_llm_provider="openai"
+ video_id="video_1234567890"
)
print(f"Video Status: {status_response.status}")
print(f"Created At: {status_response.created_at}")
print(f"Model: {status_response.model}")
+```
+
+### List Videos
+
+For listing videos, you need to specify the provider since there's no video_id to decode from:
+
+```python
+from litellm import video_list
+
+# List videos from OpenAI
+videos = video_list(custom_llm_provider="openai")
-# Possible status values:
-# - "queued": Video is in the queue
-# - "processing": Video is being generated
-# - "completed": Video is ready for download
-# - "failed": Video generation failed
+for video in videos:
+ print(f"Video ID: {video['id']}")
```
### Video Generation with Reference Image
@@ -207,7 +209,7 @@ print(f"Video ID: {response.id}")
LiteLLM provides OpenAI API compatible video endpoints for complete video generation workflow:
-- `/videos/generations` - Generate new videos
+- `/videos` - Generate new videos
- `/videos/remix` - Edit existing videos with reference images
- `/videos/status` - Check video generation status
- `/videos/retrieval` - Download completed videos
@@ -227,7 +229,6 @@ model_list:
model: azure/sora-2
api_key: os.environ/AZURE_OPENAI_API_KEY
api_base: os.environ/AZURE_OPENAI_API_BASE
- api_version: "2024-02-15-preview"
```
Start litellm
@@ -253,31 +254,14 @@ curl --location 'http://localhost:4000/v1/videos' \
Test video status request
```bash
-# Using custom-llm-provider header
-curl --location 'http://localhost:4000/v1/videos/video_id' \
---header 'Accept: application/json' \
---header 'x-litellm-api-key: sk-1234' \
---header 'custom-llm-provider: azure'
-
-# Or using query parameter
-curl --location 'http://localhost:4000/v1/videos/video_id?custom_llm_provider=azure' \
---header 'Accept: application/json' \
+curl --location 'http://localhost:4000/v1/videos/{video_id}' \
--header 'x-litellm-api-key: sk-1234'
```
Test video retrieval request
```bash
-# Using custom-llm-provider header
-curl --location 'http://localhost:4000/v1/videos/video_id/content' \
---header 'Accept: application/json' \
---header 'x-litellm-api-key: sk-1234' \
---header 'custom-llm-provider: openai' \
---output video.mp4
-
-# Or using query parameter
-curl --location 'http://localhost:4000/v1/videos/video_id/content?custom_llm_provider=openai' \
---header 'Accept: application/json' \
+curl --location 'http://localhost:4000/v1/videos/{video_id}/content' \
--header 'x-litellm-api-key: sk-1234' \
--output video.mp4
```
@@ -285,25 +269,25 @@ curl --location 'http://localhost:4000/v1/videos/video_id/content?custom_llm_pro
Test video remix request
```bash
-# Using custom_llm_provider in request body
-curl --location --request POST 'http://localhost:4000/v1/videos/video_id/remix' \
---header 'Accept: application/json' \
+curl --location --request POST 'http://localhost:4000/v1/videos/{video_id}/remix' \
--header 'Content-Type: application/json' \
--header 'x-litellm-api-key: sk-1234' \
--data '{
- "prompt": "New remix instructions",
- "custom_llm_provider": "azure"
+ "prompt": "New remix instructions"
}'
+```
-# Or using custom-llm-provider header
-curl --location --request POST 'http://localhost:4000/v1/videos/video_id/remix' \
---header 'Accept: application/json' \
---header 'Content-Type: application/json' \
+Test video list request (requires custom_llm_provider)
+
+```bash
+# Note: video_list requires custom_llm_provider since there's no video_id to decode from
+curl --location 'http://localhost:4000/v1/videos?custom_llm_provider=openai' \
+--header 'x-litellm-api-key: sk-1234'
+
+# Or using header
+curl --location 'http://localhost:4000/v1/videos' \
--header 'x-litellm-api-key: sk-1234' \
---header 'custom-llm-provider: azure' \
---data '{
- "prompt": "New remix instructions"
-}'
+--header 'custom-llm-provider: azure'
```
Test Azure video generation request
@@ -618,4 +602,6 @@ The response follows OpenAI's video generation format with the following structu
| Provider | Link to Usage |
|-------------|--------------------|
| OpenAI | [Usage](providers/openai/videos) |
-| Azure | [Usage](providers/azure/videos) |
\ No newline at end of file
+| Azure | [Usage](providers/azure/videos) |
+| Gemini | [Usage](providers/gemini/videos) |
+| Vertex AI | [Usage](providers/vertex_ai/videos) |
diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js
index 7c1d5f196b5f..1c62a8507f27 100644
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@@ -478,6 +478,7 @@ const sidebars = {
label: "Vertex AI",
items: [
"providers/vertex",
+ "providers/vertex_ai/videos",
"providers/vertex_partner",
"providers/vertex_self_deployed",
"providers/vertex_image",
@@ -490,6 +491,7 @@ const sidebars = {
label: "Google AI Studio",
items: [
"providers/gemini",
+ "providers/gemini/videos",
"providers/google_ai_studio/files",
"providers/google_ai_studio/image_gen",
"providers/google_ai_studio/realtime",
diff --git a/litellm/constants.py b/litellm/constants.py
index 17de0e0bd192..43fc37ad1c74 100644
--- a/litellm/constants.py
+++ b/litellm/constants.py
@@ -280,6 +280,8 @@
DEFAULT_IMAGE_ENDPOINT_MODEL = "dall-e-2"
DEFAULT_VIDEO_ENDPOINT_MODEL = "sora-2"
+DEFAULT_GOOGLE_VIDEO_DURATION_SECONDS = int(os.getenv("DEFAULT_GOOGLE_VIDEO_DURATION_SECONDS", 8))
+
### DATAFORSEO CONSTANTS ###
DEFAULT_DATAFORSEO_LOCATION_CODE = int(
os.getenv("DEFAULT_DATAFORSEO_LOCATION_CODE", 2250)
diff --git a/litellm/litellm_core_utils/health_check_helpers.py b/litellm/litellm_core_utils/health_check_helpers.py
index 9cbee7fc70d1..cc3916af0693 100644
--- a/litellm/litellm_core_utils/health_check_helpers.py
+++ b/litellm/litellm_core_utils/health_check_helpers.py
@@ -97,6 +97,7 @@ def get_mode_handlers(
"audio_speech",
"audio_transcription",
"image_generation",
+ "video_generation",
"rerank",
"realtime",
"batch",
@@ -159,6 +160,10 @@ def get_mode_handlers(
**_filter_model_params(model_params=model_params),
prompt=prompt,
),
+ "video_generation": lambda: litellm.avideo_generation(
+ **_filter_model_params(model_params=model_params),
+ prompt=prompt or "test video generation",
+ ),
"rerank": lambda: litellm.arerank(
**_filter_model_params(model_params=model_params),
query=prompt or "",
diff --git a/litellm/llms/base_llm/videos/transformation.py b/litellm/llms/base_llm/videos/transformation.py
index 7234093778c5..16341932fe83 100644
--- a/litellm/llms/base_llm/videos/transformation.py
+++ b/litellm/llms/base_llm/videos/transformation.py
@@ -92,10 +92,11 @@ def transform_video_create_request(
self,
model: str,
prompt: str,
+ api_base: str,
video_create_optional_request_params: Dict,
litellm_params: GenericLiteLLMParams,
headers: dict,
- ) -> Tuple[Dict, RequestFiles]:
+ ) -> Tuple[Dict, RequestFiles, str]:
pass
@abstractmethod
@@ -104,6 +105,8 @@ def transform_video_create_response(
model: str,
raw_response: httpx.Response,
logging_obj: LiteLLMLoggingObj,
+ custom_llm_provider: Optional[str] = None,
+ request_data: Optional[Dict] = None,
) -> VideoObject:
pass
@@ -154,6 +157,7 @@ def transform_video_remix_response(
self,
raw_response: httpx.Response,
logging_obj: LiteLLMLoggingObj,
+ custom_llm_provider: Optional[str] = None,
) -> VideoObject:
pass
@@ -181,6 +185,7 @@ def transform_video_list_response(
self,
raw_response: httpx.Response,
logging_obj: LiteLLMLoggingObj,
+ custom_llm_provider: Optional[str] = None,
) -> Dict[str,str]:
pass
@@ -229,6 +234,7 @@ def transform_video_status_retrieve_response(
self,
raw_response: httpx.Response,
logging_obj: LiteLLMLoggingObj,
+ custom_llm_provider: Optional[str] = None,
) -> VideoObject:
pass
diff --git a/litellm/llms/custom_httpx/llm_http_handler.py b/litellm/llms/custom_httpx/llm_http_handler.py
index fb3498fb257e..883f2de44df7 100644
--- a/litellm/llms/custom_httpx/llm_http_handler.py
+++ b/litellm/llms/custom_httpx/llm_http_handler.py
@@ -4099,7 +4099,7 @@ def video_generation_handler(
or {},
model=model,
)
-
+
if extra_headers:
headers.update(extra_headers)
@@ -4109,12 +4109,13 @@ def video_generation_handler(
litellm_params=dict(litellm_params),
)
- data, files = video_generation_provider_config.transform_video_create_request(
+ data, files, api_base = video_generation_provider_config.transform_video_create_request(
model=model,
prompt=prompt,
video_create_optional_request_params=video_generation_optional_request_params,
litellm_params=litellm_params,
headers=headers,
+ api_base=api_base,
)
## LOGGING
@@ -4140,8 +4141,8 @@ def video_generation_handler(
timeout=timeout,
)
- # --- END MOCK VIDEO RESPONSE ---
else:
+ # Use JSON content type for POST requests without files
response = sync_httpx_client.post(
url=api_base,
headers=headers,
@@ -4159,6 +4160,8 @@ def video_generation_handler(
model=model,
raw_response=response,
logging_obj=logging_obj,
+ custom_llm_provider=custom_llm_provider,
+ request_data=data,
)
async def async_video_generation_handler(
@@ -4206,9 +4209,10 @@ async def async_video_generation_handler(
litellm_params=dict(litellm_params),
)
- data, files = video_generation_provider_config.transform_video_create_request(
+ data, files, api_base = video_generation_provider_config.transform_video_create_request(
model=model,
prompt=prompt,
+ api_base=api_base,
video_create_optional_request_params=video_generation_optional_request_params,
litellm_params=litellm_params,
headers=headers,
@@ -4226,7 +4230,7 @@ async def async_video_generation_handler(
)
try:
- # Use JSON when no files, otherwise use form data with files
+ #Use JSON when no files, otherwise use form data with files
if files is None or len(files) == 0:
response = await async_httpx_client.post(
url=api_base,
@@ -4253,6 +4257,8 @@ async def async_video_generation_handler(
model=model,
raw_response=response,
logging_obj=logging_obj,
+ custom_llm_provider=custom_llm_provider,
+ request_data=data,
)
###### VIDEO CONTENT HANDLER ######
@@ -4308,7 +4314,7 @@ def video_content_handler(
)
# Transform the request using the provider config
- url, params = video_content_provider_config.transform_video_content_request(
+ url, data = video_content_provider_config.transform_video_content_request(
video_id=video_id,
api_base=api_base,
litellm_params=litellm_params,
@@ -4316,12 +4322,21 @@ def video_content_handler(
)
try:
- # Make the GET request to download content
- response = sync_httpx_client.get(
- url=url,
- headers=headers,
- params=params,
- )
+ # Use POST if params contains data (e.g., Vertex AI fetchPredictOperation)
+ # Otherwise use GET (e.g., OpenAI video content download)
+ if data:
+ response = sync_httpx_client.post(
+ url=url,
+ headers=headers,
+ json=data,
+ )
+ else:
+ # Otherwise it's a GET request with query params
+ response = sync_httpx_client.get(
+ url=url,
+ headers=headers,
+ params=data,
+ )
# Transform the response using the provider config
return video_content_provider_config.transform_video_content_response(
@@ -4374,7 +4389,7 @@ async def async_video_content_handler(
)
# Transform the request using the provider config
- url, params = video_content_provider_config.transform_video_content_request(
+ url, data = video_content_provider_config.transform_video_content_request(
video_id=video_id,
api_base=api_base,
litellm_params=litellm_params,
@@ -4382,12 +4397,21 @@ async def async_video_content_handler(
)
try:
- # Make the GET request to download content
- response = await async_httpx_client.get(
- url=url,
- headers=headers,
- params=params,
- )
+ # Use POST if params contains data (e.g., Vertex AI fetchPredictOperation)
+ # Otherwise use GET (e.g., OpenAI video content download)
+ if data:
+ response = await async_httpx_client.post(
+ url=url,
+ headers=headers,
+ json=data,
+ )
+ else:
+ # Otherwise it's a GET request with query params
+ response = await async_httpx_client.get(
+ url=url,
+ headers=headers,
+ params=data,
+ )
# Transform the response using the provider config
return video_content_provider_config.transform_video_content_response(
@@ -4492,6 +4516,7 @@ def video_remix_handler(
return video_remix_provider_config.transform_video_remix_response(
raw_response=response,
logging_obj=logging_obj,
+ custom_llm_provider=custom_llm_provider,
)
except Exception as e:
@@ -4573,6 +4598,7 @@ async def async_video_remix_handler(
return video_remix_provider_config.transform_video_remix_response(
raw_response=response,
logging_obj=logging_obj,
+ custom_llm_provider=custom_llm_provider,
)
except Exception as e:
@@ -4708,6 +4734,7 @@ async def async_video_list_handler(
return video_list_provider_config.transform_video_list_response(
raw_response=response,
logging_obj=logging_obj,
+ custom_llm_provider=custom_llm_provider,
)
except Exception as e:
@@ -4863,17 +4890,29 @@ def video_status_handler(
"api_base": url,
"headers": headers,
"video_id": video_id,
+ "data": data,
},
)
try:
- response = sync_httpx_client.get(
- url=url,
- headers=headers,
- )
+ # Use POST if data is provided (e.g., Vertex AI fetchPredictOperation)
+ # Otherwise use GET (e.g., OpenAI video status)
+ if data:
+ response = sync_httpx_client.post(
+ url=url,
+ headers=headers,
+ json=data,
+ )
+ else:
+ response = sync_httpx_client.get(
+ url=url,
+ headers=headers,
+ )
+
return video_status_provider_config.transform_video_status_retrieve_response(
raw_response=response,
logging_obj=logging_obj,
+ custom_llm_provider=custom_llm_provider,
)
except Exception as e:
@@ -4937,17 +4976,28 @@ async def async_video_status_handler(
"api_base": url,
"headers": headers,
"video_id": video_id,
+ "data": data,
},
)
try:
- response = await async_httpx_client.get(
- url=url,
- headers=headers,
- )
+ # Use POST if data is provided (e.g., Vertex AI fetchPredictOperation)
+ # Otherwise use GET (e.g., OpenAI video status)
+ if data:
+ response = await async_httpx_client.post(
+ url=url,
+ headers=headers,
+ json=data,
+ )
+ else:
+ response = await async_httpx_client.get(
+ url=url,
+ headers=headers,
+ )
return video_status_provider_config.transform_video_status_retrieve_response(
raw_response=response,
logging_obj=logging_obj,
+ custom_llm_provider=custom_llm_provider,
)
except Exception as e:
diff --git a/litellm/llms/gemini/videos/__init__.py b/litellm/llms/gemini/videos/__init__.py
new file mode 100644
index 000000000000..c5aed2db2d02
--- /dev/null
+++ b/litellm/llms/gemini/videos/__init__.py
@@ -0,0 +1,5 @@
+# Gemini Video Generation Support
+from .transformation import GeminiVideoConfig
+
+__all__ = ["GeminiVideoConfig"]
+
diff --git a/litellm/llms/gemini/videos/transformation.py b/litellm/llms/gemini/videos/transformation.py
new file mode 100644
index 000000000000..d1ae47af269f
--- /dev/null
+++ b/litellm/llms/gemini/videos/transformation.py
@@ -0,0 +1,523 @@
+from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union
+import base64
+
+import httpx
+from httpx._types import RequestFiles
+
+from litellm.types.videos.main import VideoCreateOptionalRequestParams, VideoObject
+from litellm.types.router import GenericLiteLLMParams
+from litellm.secret_managers.main import get_secret_str
+from litellm.types.videos.utils import (
+ encode_video_id_with_provider,
+ extract_original_video_id,
+)
+from litellm.images.utils import ImageEditRequestUtils
+import litellm
+from litellm.types.llms.gemini import GeminiLongRunningOperationResponse, GeminiVideoGenerationInstance, GeminiVideoGenerationParameters, GeminiVideoGenerationRequest
+from litellm.constants import DEFAULT_GOOGLE_VIDEO_DURATION_SECONDS
+if TYPE_CHECKING:
+ from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
+ from ...base_llm.videos.transformation import BaseVideoConfig as _BaseVideoConfig
+ from ...base_llm.chat.transformation import BaseLLMException as _BaseLLMException
+
+ LiteLLMLoggingObj = _LiteLLMLoggingObj
+ BaseVideoConfig = _BaseVideoConfig
+ BaseLLMException = _BaseLLMException
+else:
+ LiteLLMLoggingObj = Any
+ BaseVideoConfig = Any
+ BaseLLMException = Any
+
+
+def _convert_image_to_gemini_format(image_file) -> Dict[str, str]:
+ """
+ Convert image file to Gemini format with base64 encoding and MIME type.
+
+ Args:
+ image_file: File-like object opened in binary mode (e.g., open("path", "rb"))
+
+ Returns:
+ Dict with bytesBase64Encoded and mimeType
+ """
+ mime_type = ImageEditRequestUtils.get_image_content_type(image_file)
+
+ if hasattr(image_file, 'seek'):
+ image_file.seek(0)
+ image_bytes = image_file.read()
+ base64_encoded = base64.b64encode(image_bytes).decode("utf-8")
+
+ return {
+ "bytesBase64Encoded": base64_encoded,
+ "mimeType": mime_type
+ }
+
+
+class GeminiVideoConfig(BaseVideoConfig):
+ """
+ Configuration class for Gemini (Veo) video generation.
+
+ Veo uses a long-running operation model:
+ 1. POST to :predictLongRunning returns operation name
+ 2. Poll operation until done=true
+ 3. Extract video URI from response
+ 4. Download video using file API
+ """
+
+ def __init__(self):
+ super().__init__()
+
+ def get_supported_openai_params(self, model: str) -> list:
+ """
+ Get the list of supported OpenAI parameters for Veo video generation.
+ Veo supports minimal parameters compared to OpenAI.
+ """
+ return [
+ "model",
+ "prompt",
+ "input_reference",
+ "seconds",
+ "size"
+ ]
+
+ def map_openai_params(
+ self,
+ video_create_optional_params: VideoCreateOptionalRequestParams,
+ model: str,
+ drop_params: bool,
+ ) -> Dict[str, Any]:
+ """
+ Map OpenAI-style parameters to Veo format.
+
+ Mappings:
+ - prompt → prompt
+ - input_reference → image
+ - size → aspectRatio (e.g., "1280x720" → "16:9")
+ - seconds → durationSeconds (defaults to 4 seconds if not provided)
+
+ All other params are passed through as-is to support Gemini-specific parameters.
+ """
+ mapped_params: Dict[str, Any] = {}
+
+ # Get supported OpenAI params (exclude "model" and "prompt" which are handled separately)
+ supported_openai_params = self.get_supported_openai_params(model)
+ openai_params_to_map = {
+ param for param in supported_openai_params
+ if param not in {"model", "prompt"}
+ }
+
+ # Map input_reference to image
+ if "input_reference" in video_create_optional_params:
+ mapped_params["image"] = video_create_optional_params["input_reference"]
+
+ # Map size to aspectRatio
+ if "size" in video_create_optional_params:
+ size = video_create_optional_params["size"]
+ if size is not None:
+ aspect_ratio = self._convert_size_to_aspect_ratio(size)
+ if aspect_ratio:
+ mapped_params["aspectRatio"] = aspect_ratio
+
+ # Map seconds to durationSeconds, default to 4 seconds (matching OpenAI)
+ if "seconds" in video_create_optional_params:
+ seconds = video_create_optional_params["seconds"]
+ try:
+ duration = int(seconds) if isinstance(seconds, str) else seconds
+ if duration is not None:
+ mapped_params["durationSeconds"] = duration
+ except (ValueError, TypeError):
+ # If conversion fails, use default
+ pass
+
+ # Pass through any other params that weren't mapped (Gemini-specific params)
+ for key, value in video_create_optional_params.items():
+ if key not in openai_params_to_map and key not in mapped_params:
+ mapped_params[key] = value
+
+ return mapped_params
+
+ def _convert_size_to_aspect_ratio(self, size: str) -> Optional[str]:
+ """
+ Convert OpenAI size format to Veo aspectRatio format.
+
+ https://cloud.google.com/vertex-ai/generative-ai/docs/image/generate-videos
+
+ Supported aspect ratios: 9:16 (portrait), 16:9 (landscape)
+ """
+ if not size:
+ return None
+
+ aspect_ratio_map = {
+ "1280x720": "16:9",
+ "1920x1080": "16:9",
+ "720x1280": "9:16",
+ "1080x1920": "9:16",
+ }
+
+ return aspect_ratio_map.get(size, "16:9")
+
+
+ def validate_environment(
+ self,
+ headers: dict,
+ model: str,
+ api_key: Optional[str] = None,
+ ) -> dict:
+ """
+ Validate environment and add Gemini API key to headers.
+ Gemini uses x-goog-api-key header for authentication.
+ """
+ api_key = (
+ api_key
+ or litellm.api_key
+ or get_secret_str("GOOGLE_API_KEY")
+ or get_secret_str("GEMINI_API_KEY")
+ )
+
+ if not api_key:
+ raise ValueError(
+ "GEMINI_API_KEY or GOOGLE_API_KEY is required for Veo video generation. "
+ "Set it via environment variable or pass it as api_key parameter."
+ )
+
+ headers.update({
+ "x-goog-api-key": api_key,
+ "Content-Type": "application/json",
+ })
+ return headers
+
+ def get_complete_url(
+ self,
+ model: str,
+ api_base: Optional[str],
+ litellm_params: dict,
+ ) -> str:
+ """
+ Get the complete URL for Veo video generation.
+ For video creation: returns full URL with :predictLongRunning
+ For status/delete: returns base URL only
+ """
+ if api_base is None:
+ api_base = get_secret_str("GEMINI_API_BASE") or "https://generativelanguage.googleapis.com"
+
+ if not model or model == "":
+ return api_base.rstrip('/')
+
+ model_name = model.replace("gemini/", "")
+ url = f"{api_base.rstrip('/')}/v1beta/models/{model_name}:predictLongRunning"
+
+ return url
+
+ def transform_video_create_request(
+ self,
+ model: str,
+ prompt: str,
+ api_base: str,
+ video_create_optional_request_params: Dict,
+ litellm_params: GenericLiteLLMParams,
+ headers: dict,
+ ) -> Tuple[Dict, RequestFiles, str]:
+ """
+ Transform the video creation request for Veo API.
+
+ Veo expects:
+ {
+ "instances": [
+ {
+ "prompt": "A cat playing with a ball of yarn"
+ }
+ ],
+ "parameters": {
+ "aspectRatio": "16:9",
+ "durationSeconds": 8,
+ "resolution": "720p"
+ }
+ }
+ """
+ instance = GeminiVideoGenerationInstance(prompt=prompt)
+
+ params_copy = video_create_optional_request_params.copy()
+
+ if "image" in params_copy and params_copy["image"] is not None:
+ image_data = _convert_image_to_gemini_format(params_copy["image"])
+ params_copy["image"] = image_data
+
+ parameters = GeminiVideoGenerationParameters(**params_copy)
+
+ request_body_obj = GeminiVideoGenerationRequest(
+ instances=[instance],
+ parameters=parameters
+ )
+
+ request_data = request_body_obj.model_dump(exclude_none=True)
+
+ return request_data, [], api_base
+
+ def transform_video_create_response(
+ self,
+ model: str,
+ raw_response: httpx.Response,
+ logging_obj: LiteLLMLoggingObj,
+ custom_llm_provider: Optional[str] = None,
+ request_data: Optional[Dict] = None,
+ ) -> VideoObject:
+ """
+ Transform the Veo video creation response.
+
+ Veo returns:
+ {
+ "name": "operations/generate_1234567890",
+ "metadata": {...},
+ "done": false,
+ "error": {...}
+ }
+
+ We return this as a VideoObject with:
+ - id: operation name (used for polling)
+ - status: "processing"
+ - usage: includes duration_seconds for cost calculation
+ """
+ response_data = raw_response.json()
+
+ # Parse response using Pydantic model for type safety
+ try:
+ operation_response = GeminiLongRunningOperationResponse(**response_data)
+ except Exception as e:
+ raise ValueError(f"Failed to parse operation response: {e}")
+
+ operation_name = operation_response.name
+ if not operation_name:
+ raise ValueError(f"No operation name in Veo response: {response_data}")
+
+ if custom_llm_provider:
+ video_id = encode_video_id_with_provider(operation_name, custom_llm_provider, model)
+ else:
+ video_id = operation_name
+
+ video_obj = VideoObject(
+ id=video_id,
+ object="video",
+ status="processing",
+ model=model,
+ )
+
+ usage_data = {}
+ if request_data:
+ parameters = request_data.get("parameters", {})
+ duration = parameters.get("durationSeconds") or DEFAULT_GOOGLE_VIDEO_DURATION_SECONDS
+ if duration is not None:
+ try:
+ usage_data["duration_seconds"] = float(duration)
+ except (ValueError, TypeError):
+ pass
+
+ video_obj.usage = usage_data
+ return video_obj
+
+ def transform_video_status_retrieve_request(
+ self,
+ video_id: str,
+ api_base: str,
+ litellm_params: GenericLiteLLMParams,
+ headers: dict,
+ ) -> Tuple[str, Dict]:
+ """
+ Transform the video status retrieve request for Veo API.
+
+ Veo polls operations at:
+ GET https://generativelanguage.googleapis.com/v1beta/{operation_name}
+ """
+ operation_name = extract_original_video_id(video_id)
+ url = f"{api_base.rstrip('/')}/v1beta/{operation_name}"
+ params: Dict[str, Any] = {}
+
+ return url, params
+
+ def transform_video_status_retrieve_response(
+ self,
+ raw_response: httpx.Response,
+ logging_obj: LiteLLMLoggingObj,
+ custom_llm_provider: Optional[str] = None,
+ ) -> VideoObject:
+ """
+ Transform the Veo operation status response.
+
+ Veo returns:
+ {
+ "name": "operations/generate_1234567890",
+ "done": false # or true when complete
+ }
+
+ When done=true:
+ {
+ "name": "operations/generate_1234567890",
+ "done": true,
+ "response": {
+ "generateVideoResponse": {
+ "generatedSamples": [
+ {
+ "video": {
+ "uri": "files/abc123..."
+ }
+ }
+ ]
+ }
+ }
+ }
+ """
+ response_data = raw_response.json()
+ # Parse response using Pydantic model for type safety
+ operation_response = GeminiLongRunningOperationResponse(**response_data)
+
+ operation_name = operation_response.name
+ is_done = operation_response.done
+
+ if custom_llm_provider:
+ video_id = encode_video_id_with_provider(operation_name, custom_llm_provider, None)
+ else:
+ video_id = operation_name
+
+ video_obj = VideoObject(
+ id=video_id,
+ object="video",
+ status="processing" if not is_done else "completed"
+ )
+ return video_obj
+
+ def transform_video_content_request(
+ self,
+ video_id: str,
+ api_base: str,
+ litellm_params: GenericLiteLLMParams,
+ headers: dict,
+ ) -> Tuple[str, Dict]:
+ """
+ Transform the video content request for Veo API.
+
+ For Veo, we need to:
+ 1. Get operation status to extract video URI
+ 2. Return download URL for the video
+ """
+ operation_name = extract_original_video_id(video_id)
+
+ status_url = f"{api_base.rstrip('/')}/v1beta/{operation_name}"
+ client = litellm.module_level_client
+ status_response = client.get(url=status_url, headers=headers)
+ status_response.raise_for_status()
+ response_data = status_response.json()
+
+ operation_response = GeminiLongRunningOperationResponse(**response_data)
+
+ if not operation_response.done:
+ raise ValueError(
+ "Video generation is not complete yet. "
+ "Please check status with video_status() before downloading."
+ )
+
+ if not operation_response.response:
+ raise ValueError("No response data in completed operation")
+
+ generated_samples = operation_response.response.generateVideoResponse.generatedSamples
+ download_url = generated_samples[0].video.uri
+
+ params: Dict[str, Any] = {}
+
+ return download_url, params
+
+ def transform_video_content_response(
+ self,
+ raw_response: httpx.Response,
+ logging_obj: LiteLLMLoggingObj,
+ ) -> bytes:
+ """
+ Transform the Veo video content download response.
+ Returns the video bytes directly.
+ """
+ return raw_response.content
+
+ def transform_video_remix_request(
+ self,
+ video_id: str,
+ prompt: str,
+ api_base: str,
+ litellm_params: GenericLiteLLMParams,
+ headers: dict,
+ extra_body: Optional[Dict[str, Any]] = None,
+ ) -> Tuple[str, Dict]:
+ """
+ Video remix is not supported by Veo API.
+ """
+ raise NotImplementedError(
+ "Video remix is not supported by Google Veo. "
+ "Please use video_generation() to create new videos."
+ )
+
+ def transform_video_remix_response(
+ self,
+ raw_response: httpx.Response,
+ logging_obj: LiteLLMLoggingObj,
+ custom_llm_provider: Optional[str] = None,
+ ) -> VideoObject:
+ """Video remix is not supported."""
+ raise NotImplementedError("Video remix is not supported by Google Veo.")
+
+ def transform_video_list_request(
+ self,
+ api_base: str,
+ litellm_params: GenericLiteLLMParams,
+ headers: dict,
+ after: Optional[str] = None,
+ limit: Optional[int] = None,
+ order: Optional[str] = None,
+ extra_query: Optional[Dict[str, Any]] = None,
+ ) -> Tuple[str, Dict]:
+ """
+ Video list is not supported by Veo API.
+ """
+ raise NotImplementedError(
+ "Video list is not supported by Google Veo. "
+ "Use the operations endpoint directly if you need to list operations."
+ )
+
+ def transform_video_list_response(
+ self,
+ raw_response: httpx.Response,
+ logging_obj: LiteLLMLoggingObj,
+ custom_llm_provider: Optional[str] = None,
+ ) -> Dict[str, str]:
+ """Video list is not supported."""
+ raise NotImplementedError("Video list is not supported by Google Veo.")
+
+ def transform_video_delete_request(
+ self,
+ video_id: str,
+ api_base: str,
+ litellm_params: GenericLiteLLMParams,
+ headers: dict,
+ ) -> Tuple[str, Dict]:
+ """
+ Video delete is not supported by Veo API.
+ """
+ raise NotImplementedError(
+ "Video delete is not supported by Google Veo. "
+ "Videos are automatically cleaned up by Google."
+ )
+
+ def transform_video_delete_response(
+ self,
+ raw_response: httpx.Response,
+ logging_obj: LiteLLMLoggingObj,
+ ) -> VideoObject:
+ """Video delete is not supported."""
+ raise NotImplementedError("Video delete is not supported by Google Veo.")
+
+ def get_error_class(
+ self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
+ ) -> BaseLLMException:
+ from ..common_utils import GeminiError
+
+ return GeminiError(
+ status_code=status_code,
+ message=error_message,
+ headers=headers,
+ )
+
diff --git a/litellm/llms/openai/videos/transformation.py b/litellm/llms/openai/videos/transformation.py
index c573f3b59b0d..9848477f32de 100644
--- a/litellm/llms/openai/videos/transformation.py
+++ b/litellm/llms/openai/videos/transformation.py
@@ -9,6 +9,7 @@
from litellm.types.router import GenericLiteLLMParams
from litellm.secret_managers.main import get_secret_str
from litellm.types.videos.main import VideoObject
+from litellm.types.videos.utils import encode_video_id_with_provider, extract_original_video_id
import litellm
from litellm.llms.openai.image_edit.transformation import ImageEditRequestUtils
if TYPE_CHECKING:
@@ -94,17 +95,18 @@ def transform_video_create_request(
self,
model: str,
prompt: str,
+ api_base: str,
video_create_optional_request_params: Dict,
litellm_params: GenericLiteLLMParams,
headers: dict,
- ) -> Tuple[Dict, RequestFiles]:
+ ) -> Tuple[Dict, RequestFiles, str]:
"""
Transform the video creation request for OpenAI API.
"""
# Remove model and extra_headers from optional params as they're handled separately
video_create_optional_request_params = {
k: v for k, v in video_create_optional_request_params.items()
- if k not in ["model", "extra_headers"]
+ if k not in ["model", "extra_headers", "prompt"]
}
# Create the request data
@@ -129,26 +131,24 @@ def transform_video_create_request(
image=_input_reference,
field_name="input_reference",
)
- # Convert to dict for JSON serialization
- return data_without_files, files_list
+ return data_without_files, files_list, api_base
def transform_video_create_response(
self,
model: str,
raw_response: httpx.Response,
logging_obj: LiteLLMLoggingObj,
+ custom_llm_provider: Optional[str] = None,
+ request_data: Optional[Dict] = None,
) -> VideoObject:
- """
- Transform the OpenAI video creation response.
- """
+ """Transform the OpenAI video creation response."""
response_data = raw_response.json()
-
- # Transform the response data
video_obj = VideoObject(**response_data) # type: ignore[arg-type]
- # Create usage object with duration information for cost calculation
- # Video generation API doesn't provide usage, so we create one with duration
+ if custom_llm_provider and video_obj.id:
+ video_obj.id = encode_video_id_with_provider(video_obj.id, custom_llm_provider, model)
+
usage_data = {}
if video_obj:
if hasattr(video_obj, 'seconds') and video_obj.seconds:
@@ -156,9 +156,7 @@ def transform_video_create_response(
usage_data["duration_seconds"] = float(video_obj.seconds)
except (ValueError, TypeError):
pass
- # Create the response
video_obj.usage = usage_data
-
return video_obj
@@ -175,11 +173,13 @@ def transform_video_content_request(
OpenAI API expects the following request:
- GET /v1/videos/{video_id}/content
"""
+ original_video_id = extract_original_video_id(video_id)
+
# Construct the URL for video content download
- url = f"{api_base.rstrip('/')}/{video_id}/content"
+ url = f"{api_base.rstrip('/')}/{original_video_id}/content"
# Add video_id as query parameter
- params = {"video_id": video_id}
+ params = {"video_id": original_video_id}
return url, params
@@ -198,8 +198,10 @@ def transform_video_remix_request(
OpenAI API expects the following request:
- POST /v1/videos/{video_id}/remix
"""
+ original_video_id = extract_original_video_id(video_id)
+
# Construct the URL for video remix
- url = f"{api_base.rstrip('/')}/{video_id}/remix"
+ url = f"{api_base.rstrip('/')}/{original_video_id}/remix"
# Prepare the request data
data = {"prompt": prompt}
@@ -215,17 +217,14 @@ def transform_video_content_response(
raw_response: httpx.Response,
logging_obj: LiteLLMLoggingObj,
) -> bytes:
- """
- Transform the OpenAI video content download response.
- Returns raw video content as bytes.
- """
- # For video content download, return the raw content as bytes
+ """Transform the OpenAI video content download response."""
return raw_response.content
def transform_video_remix_response(
self,
raw_response: httpx.Response,
logging_obj: LiteLLMLoggingObj,
+ custom_llm_provider: Optional[str] = None,
) -> VideoObject:
"""
Transform the OpenAI video remix response.
@@ -235,6 +234,9 @@ def transform_video_remix_response(
# Transform the response data
video_obj = VideoObject(**response_data) # type: ignore[arg-type]
+ if custom_llm_provider and video_obj.id:
+ video_obj.id = encode_video_id_with_provider(video_obj.id, custom_llm_provider, None)
+
# Create usage object with duration information for cost calculation
# Video remix API doesn't provide usage, so we create one with duration
usage_data = {}
@@ -287,8 +289,20 @@ def transform_video_list_response(
self,
raw_response: httpx.Response,
logging_obj: LiteLLMLoggingObj,
+ custom_llm_provider: Optional[str] = None,
) -> Dict[str,str]:
- return raw_response.json()
+ response_data = raw_response.json()
+
+ if custom_llm_provider and "data" in response_data:
+ for video_obj in response_data.get("data", []):
+ if isinstance(video_obj, dict) and "id" in video_obj:
+ video_obj["id"] = encode_video_id_with_provider(
+ video_obj["id"],
+ custom_llm_provider,
+ video_obj.get("model")
+ )
+
+ return response_data
def transform_video_delete_request(
self,
@@ -303,8 +317,10 @@ def transform_video_delete_request(
OpenAI API expects the following request:
- DELETE /v1/videos/{video_id}
"""
+ original_video_id = extract_original_video_id(video_id)
+
# Construct the URL for video delete
- url = f"{api_base.rstrip('/')}/{video_id}"
+ url = f"{api_base.rstrip('/')}/{original_video_id}"
# No data needed for DELETE request
data: Dict[str, Any] = {}
@@ -336,8 +352,11 @@ def transform_video_status_retrieve_request(
"""
Transform the OpenAI video retrieve request.
"""
+ # Extract the original video_id (remove provider encoding if present)
+ original_video_id = extract_original_video_id(video_id)
+
# For video retrieve, we just need to construct the URL
- url = f"{api_base.rstrip('/')}/{video_id}"
+ url = f"{api_base.rstrip('/')}/{original_video_id}"
# No additional data needed for GET request
data: Dict[str, Any] = {}
@@ -348,6 +367,7 @@ def transform_video_status_retrieve_response(
self,
raw_response: httpx.Response,
logging_obj: LiteLLMLoggingObj,
+ custom_llm_provider: Optional[str] = None,
) -> VideoObject:
"""
Transform the OpenAI video retrieve response.
@@ -355,6 +375,9 @@ def transform_video_status_retrieve_response(
response_data = raw_response.json()
# Transform the response data
video_obj = VideoObject(**response_data) # type: ignore[arg-type]
+
+ if custom_llm_provider and video_obj.id:
+ video_obj.id = encode_video_id_with_provider(video_obj.id, custom_llm_provider, None)
return video_obj
diff --git a/litellm/llms/vertex_ai/videos/__init__.py b/litellm/llms/vertex_ai/videos/__init__.py
new file mode 100644
index 000000000000..1dcdbdf4ded2
--- /dev/null
+++ b/litellm/llms/vertex_ai/videos/__init__.py
@@ -0,0 +1,10 @@
+"""
+Vertex AI Video Generation Module
+
+This module provides support for Vertex AI's Veo video generation API.
+"""
+
+from .transformation import VertexAIVideoConfig
+
+__all__ = ["VertexAIVideoConfig"]
+
diff --git a/litellm/llms/vertex_ai/videos/transformation.py b/litellm/llms/vertex_ai/videos/transformation.py
new file mode 100644
index 000000000000..2b6d43dd708a
--- /dev/null
+++ b/litellm/llms/vertex_ai/videos/transformation.py
@@ -0,0 +1,597 @@
+"""
+Vertex AI Video Generation Transformation
+
+Handles transformation of requests/responses for Vertex AI's Veo video generation API.
+Based on: https://docs.cloud.google.com/vertex-ai/generative-ai/docs/model-reference/veo-video-generation
+"""
+
+import base64
+import time
+from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union
+
+import httpx
+from httpx._types import RequestFiles
+
+from litellm.llms.base_llm.videos.transformation import BaseVideoConfig
+from litellm.llms.vertex_ai.common_utils import (
+ _convert_vertex_datetime_to_openai_datetime,
+)
+from litellm.llms.vertex_ai.vertex_llm_base import VertexBase
+from litellm.types.router import GenericLiteLLMParams
+from litellm.types.videos.main import VideoCreateOptionalRequestParams, VideoObject
+from litellm.types.videos.utils import (
+ encode_video_id_with_provider,
+ extract_original_video_id,
+)
+from litellm.images.utils import ImageEditRequestUtils
+from litellm.constants import DEFAULT_GOOGLE_VIDEO_DURATION_SECONDS
+
+if TYPE_CHECKING:
+ from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
+ from litellm.llms.base_llm.chat.transformation import (
+ BaseLLMException as _BaseLLMException,
+ )
+
+ LiteLLMLoggingObj = _LiteLLMLoggingObj
+ BaseLLMException = _BaseLLMException
+else:
+ LiteLLMLoggingObj = Any
+ BaseLLMException = Any
+
+
+def _convert_image_to_vertex_format(image_file) -> Dict[str, str]:
+ """
+ Convert image file to Vertex AI format with base64 encoding and MIME type.
+
+ Args:
+ image_file: File-like object opened in binary mode (e.g., open("path", "rb"))
+
+ Returns:
+ Dict with bytesBase64Encoded and mimeType
+ """
+ mime_type = ImageEditRequestUtils.get_image_content_type(image_file)
+
+ if hasattr(image_file, "seek"):
+ image_file.seek(0)
+ image_bytes = image_file.read()
+ base64_encoded = base64.b64encode(image_bytes).decode("utf-8")
+
+ return {"bytesBase64Encoded": base64_encoded, "mimeType": mime_type}
+
+
+class VertexAIVideoConfig(BaseVideoConfig, VertexBase):
+ """
+ Configuration class for Vertex AI (Veo) video generation.
+
+ Veo uses a long-running operation model:
+ 1. POST to :predictLongRunning returns operation name
+ 2. Poll operation using :fetchPredictOperation until done=true
+ 3. Extract video data (base64) from response
+ """
+
+ def __init__(self):
+ BaseVideoConfig.__init__(self)
+ VertexBase.__init__(self)
+
+ @staticmethod
+ def extract_model_from_operation_name(operation_name: str) -> Optional[str]:
+ """
+ Extract the model name from a Vertex AI operation name.
+
+ Args:
+ operation_name: Operation name in format:
+ projects/PROJECT/locations/LOCATION/publishers/google/models/MODEL/operations/OPERATION_ID
+
+ Returns:
+ Model name (e.g., "veo-2.0-generate-001") or None if extraction fails
+ """
+ parts = operation_name.split("/")
+ # Model is at index 7 in the operation name format
+ if len(parts) >= 8:
+ return parts[7]
+ return None
+
+ def get_supported_openai_params(self, model: str) -> list:
+ """
+ Get the list of supported OpenAI parameters for Veo video generation.
+ Veo supports minimal parameters compared to OpenAI.
+ """
+ return ["model", "prompt", "input_reference", "seconds", "size"]
+
+ def map_openai_params(
+ self,
+ video_create_optional_params: VideoCreateOptionalRequestParams,
+ model: str,
+ drop_params: bool,
+ ) -> Dict[str, Any]:
+ """
+ Map OpenAI-style parameters to Veo format.
+
+ Mappings:
+ - prompt → prompt (in instances)
+ - input_reference → image (in instances)
+ - size → aspectRatio (e.g., "1280x720" → "16:9")
+ - seconds → durationSeconds (defaults to 4 seconds if not provided)
+ """
+ mapped_params: Dict[str, Any] = {}
+
+ # Map input_reference to image (will be processed in transform_video_create_request)
+ if "input_reference" in video_create_optional_params:
+ mapped_params["image"] = video_create_optional_params["input_reference"]
+
+ # Map size to aspectRatio
+ if "size" in video_create_optional_params:
+ size = video_create_optional_params["size"]
+ if size is not None:
+ aspect_ratio = self._convert_size_to_aspect_ratio(size)
+ if aspect_ratio:
+ mapped_params["aspectRatio"] = aspect_ratio
+
+ # Map seconds to durationSeconds, default to 4 seconds (matching OpenAI)
+ if "seconds" in video_create_optional_params:
+ seconds = video_create_optional_params["seconds"]
+ try:
+ duration = int(seconds) if isinstance(seconds, str) else seconds
+ if duration is not None:
+ mapped_params["durationSeconds"] = duration
+ except (ValueError, TypeError):
+ # If conversion fails, use default
+ pass
+
+ return mapped_params
+
+ def _convert_size_to_aspect_ratio(self, size: str) -> Optional[str]:
+ """
+ Convert OpenAI size format to Veo aspectRatio format.
+
+ Supported aspect ratios: 9:16 (portrait), 16:9 (landscape)
+ """
+ if not size:
+ return None
+
+ aspect_ratio_map = {
+ "1280x720": "16:9",
+ "1920x1080": "16:9",
+ "720x1280": "9:16",
+ "1080x1920": "9:16",
+ }
+
+ return aspect_ratio_map.get(size, "16:9")
+
+ def validate_environment(
+ self,
+ headers: Dict,
+ model: str,
+ api_key: Optional[str] = None,
+ api_base: Optional[str] = None,
+ litellm_params: Optional[dict] = None,
+ **kwargs,
+ ) -> Dict:
+ """
+ Validate environment and return headers for Vertex AI OCR.
+
+ Vertex AI uses Bearer token authentication with access token from credentials.
+ """
+ # Extract Vertex AI parameters using safe helpers from VertexBase
+ # Use safe_get_* methods that don't mutate litellm_params dict
+ litellm_params = litellm_params or {}
+
+ vertex_project = VertexBase.safe_get_vertex_ai_project(litellm_params=litellm_params)
+ vertex_credentials = VertexBase.safe_get_vertex_ai_credentials(litellm_params=litellm_params)
+
+ # Get access token from Vertex credentials
+ access_token, project_id = self.get_access_token(
+ credentials=vertex_credentials,
+ project_id=vertex_project,
+ )
+
+ headers = {
+ "Authorization": f"Bearer {access_token}",
+ "Content-Type": "application/json",
+ **headers,
+ }
+
+ return headers
+
+ def get_complete_url(
+ self,
+ model: str,
+ api_base: Optional[str],
+ litellm_params: dict,
+ ) -> str:
+ """
+ Get the complete URL for Veo video generation.
+
+ Returns URL for :predictLongRunning endpoint:
+ https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT/locations/LOCATION/publishers/google/models/MODEL:predictLongRunning
+ """
+ vertex_project = VertexBase.safe_get_vertex_ai_project(litellm_params)
+ vertex_location = VertexBase.safe_get_vertex_ai_location(litellm_params)
+
+ if not vertex_project:
+ raise ValueError(
+ "vertex_project is required for Vertex AI video generation. "
+ "Set it via environment variable VERTEXAI_PROJECT or pass as parameter."
+ )
+
+ # Default to us-central1 if no location specified
+ vertex_location = vertex_location or "us-central1"
+
+ # Extract model name (remove vertex_ai/ prefix if present)
+ model_name = model.replace("vertex_ai/", "")
+
+ # Construct the URL
+ if api_base:
+ base_url = api_base.rstrip("/")
+ else:
+ base_url = f"https://{vertex_location}-aiplatform.googleapis.com"
+
+ url = f"{base_url}/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model_name}"
+
+ return url
+
+ def transform_video_create_request(
+ self,
+ model: str,
+ prompt: str,
+ api_base: str,
+ video_create_optional_request_params: Dict,
+ litellm_params: GenericLiteLLMParams,
+ headers: dict,
+ ) -> Tuple[Dict, RequestFiles, str]:
+ """
+ Transform the video creation request for Veo API.
+
+ Veo expects:
+ {
+ "instances": [
+ {
+ "prompt": "A cat playing with a ball of yarn",
+ "image": {
+ "bytesBase64Encoded": "...",
+ "mimeType": "image/jpeg"
+ }
+ }
+ ],
+ "parameters": {
+ "aspectRatio": "16:9",
+ "durationSeconds": 8
+ }
+ }
+ """
+ # Build instance with prompt
+ instance_dict: Dict[str, Any] = {"prompt": prompt}
+ params_copy = video_create_optional_request_params.copy()
+
+
+ # Check if user wants to provide full instance dict
+ if "instances" in params_copy and isinstance(params_copy["instances"], dict):
+ # Replace/merge with user-provided instance
+ instance_dict.update(params_copy["instances"])
+ params_copy.pop("instances")
+ elif "image" in params_copy and params_copy["image"] is not None:
+ image_data = _convert_image_to_vertex_format(params_copy["image"])
+ instance_dict["image"] = image_data
+ params_copy.pop("image")
+
+ # Build request data directly (TypedDict doesn't have model_dump)
+ request_data: Dict[str, Any] = {"instances": [instance_dict]}
+
+ # Only add parameters if there are any
+ if params_copy:
+ request_data["parameters"] = params_copy
+
+ # Append :predictLongRunning endpoint to api_base
+ url = f"{api_base}:predictLongRunning"
+
+ # No files needed - everything is in JSON
+ return request_data, [], url
+
+ def transform_video_create_response(
+ self,
+ model: str,
+ raw_response: httpx.Response,
+ logging_obj: LiteLLMLoggingObj,
+ custom_llm_provider: Optional[str] = None,
+ request_data: Optional[Dict] = None,
+ ) -> VideoObject:
+ """
+ Transform the Veo video creation response.
+
+ Veo returns:
+ {
+ "name": "projects/PROJECT_ID/locations/LOCATION/publishers/google/models/MODEL/operations/OPERATION_ID"
+ }
+
+ We return this as a VideoObject with:
+ - id: operation name (used for polling)
+ - status: "processing"
+ - usage: includes duration_seconds for cost calculation
+ """
+ response_data = raw_response.json()
+
+ operation_name = response_data.get("name")
+ if not operation_name:
+ raise ValueError(f"No operation name in Veo response: {response_data}")
+
+ if custom_llm_provider:
+ video_id = encode_video_id_with_provider(
+ operation_name, custom_llm_provider, model
+ )
+ else:
+ video_id = operation_name
+
+
+ video_obj = VideoObject(
+ id=video_id,
+ object="video",
+ status="processing",
+ model=model
+ )
+
+ usage_data = {}
+ if request_data:
+ parameters = request_data.get("parameters", {})
+ duration = parameters.get("durationSeconds") or DEFAULT_GOOGLE_VIDEO_DURATION_SECONDS
+ if duration is not None:
+ try:
+ usage_data["duration_seconds"] = float(duration)
+ except (ValueError, TypeError):
+ pass
+
+ video_obj.usage = usage_data
+ return video_obj
+
+ def transform_video_status_retrieve_request(
+ self,
+ video_id: str,
+ api_base: str,
+ litellm_params: GenericLiteLLMParams,
+ headers: dict,
+ ) -> Tuple[str, Dict]:
+ """
+ Transform the video status retrieve request for Veo API.
+
+ Veo polls operations using :fetchPredictOperation endpoint with POST request.
+ """
+ operation_name = extract_original_video_id(video_id)
+ model = self.extract_model_from_operation_name(operation_name)
+
+ if not model:
+ raise ValueError(
+ f"Invalid operation name format: {operation_name}. "
+ "Expected format: projects/PROJECT/locations/LOCATION/publishers/google/models/MODEL/operations/OPERATION_ID"
+ )
+
+ # Construct the full URL including model ID
+ # URL format: https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT/locations/LOCATION/publishers/google/models/MODEL:fetchPredictOperation
+ # Strip trailing slashes from api_base and append model
+ url = f"{api_base.rstrip('/')}/{model}:fetchPredictOperation"
+
+ # Request body contains the operation name
+ params = {"operationName": operation_name}
+
+ return url, params
+
+ def transform_video_status_retrieve_response(
+ self,
+ raw_response: httpx.Response,
+ logging_obj: LiteLLMLoggingObj,
+ custom_llm_provider: Optional[str] = None,
+ ) -> VideoObject:
+ """
+ Transform the Veo operation status response.
+
+ Veo returns:
+ {
+ "name": "projects/.../operations/OPERATION_ID",
+ "done": false # or true when complete
+ }
+
+ When done=true:
+ {
+ "name": "projects/.../operations/OPERATION_ID",
+ "done": true,
+ "response": {
+ "@type": "type.googleapis.com/cloud.ai.large_models.vision.GenerateVideoResponse",
+ "raiMediaFilteredCount": 0,
+ "videos": [
+ {
+ "bytesBase64Encoded": "...",
+ "mimeType": "video/mp4"
+ }
+ ]
+ }
+ }
+ """
+ response_data = raw_response.json()
+
+ operation_name = response_data.get("name", "")
+ is_done = response_data.get("done", False)
+ error_data = response_data.get("error")
+
+ # Extract model from operation name
+ model = self.extract_model_from_operation_name(operation_name)
+
+ if custom_llm_provider:
+ video_id = encode_video_id_with_provider(
+ operation_name, custom_llm_provider, model
+ )
+ else:
+ video_id = operation_name
+
+ # Convert createTime to Unix timestamp
+ create_time_str = response_data.get("metadata", {}).get("createTime")
+ if create_time_str:
+ try:
+ created_at = _convert_vertex_datetime_to_openai_datetime(
+ create_time_str
+ )
+ except Exception:
+ created_at = int(time.time())
+ else:
+ created_at = int(time.time())
+
+ if error_data:
+ status = "failed"
+ elif is_done:
+ status = "completed"
+ else:
+ status = "processing"
+
+ video_obj = VideoObject(
+ id=video_id,
+ object="video",
+ status=status,
+ model=model,
+ created_at=created_at,
+ error=error_data,
+ )
+ return video_obj
+
+ def transform_video_content_request(
+ self,
+ video_id: str,
+ api_base: str,
+ litellm_params: GenericLiteLLMParams,
+ headers: dict,
+ ) -> Tuple[str, Dict]:
+ """
+ Transform the video content request for Veo API.
+
+ For Veo, we need to:
+ 1. Poll the operation status to ensure it's complete
+ 2. Extract the base64 video data from the response
+ 3. Return it for decoding
+
+ Since we need to make an HTTP call here, we'll use the same fetchPredictOperation
+ approach as status retrieval.
+ """
+ return self.transform_video_status_retrieve_request(video_id, api_base, litellm_params, headers)
+
+ def transform_video_content_response(
+ self,
+ raw_response: httpx.Response,
+ logging_obj: LiteLLMLoggingObj,
+ ) -> bytes:
+ """
+ Transform the Veo video content download response.
+
+ Extracts the base64 encoded video from the response and decodes it to bytes.
+ """
+ response_data = raw_response.json()
+
+ if not response_data.get("done", False):
+ raise ValueError(
+ "Video generation is not complete yet. "
+ "Please check status with video_status() before downloading."
+ )
+
+ try:
+ video_response = response_data.get("response", {})
+ videos = video_response.get("videos", [])
+
+ if not videos or len(videos) == 0:
+ raise ValueError("No video data found in completed operation")
+
+ # Get the first video
+ video_data = videos[0]
+ base64_encoded = video_data.get("bytesBase64Encoded")
+
+ if not base64_encoded:
+ raise ValueError("No base64 encoded video data found")
+
+ # Decode base64 to bytes
+ video_bytes = base64.b64decode(base64_encoded)
+ return video_bytes
+
+ except (KeyError, IndexError) as e:
+ raise ValueError(f"Failed to extract video data: {e}")
+
+ def transform_video_remix_request(
+ self,
+ video_id: str,
+ prompt: str,
+ api_base: str,
+ litellm_params: GenericLiteLLMParams,
+ headers: dict,
+ extra_body: Optional[Dict[str, Any]] = None,
+ ) -> Tuple[str, Dict]:
+ """
+ Video remix is not supported by Veo API.
+ """
+ raise NotImplementedError(
+ "Video remix is not supported by Vertex AI Veo. "
+ "Please use video_generation() to create new videos."
+ )
+
+ def transform_video_remix_response(
+ self,
+ raw_response: httpx.Response,
+ logging_obj: LiteLLMLoggingObj,
+ custom_llm_provider: Optional[str] = None,
+ ) -> VideoObject:
+ """Video remix is not supported."""
+ raise NotImplementedError("Video remix is not supported by Vertex AI Veo.")
+
+ def transform_video_list_request(
+ self,
+ api_base: str,
+ litellm_params: GenericLiteLLMParams,
+ headers: dict,
+ after: Optional[str] = None,
+ limit: Optional[int] = None,
+ order: Optional[str] = None,
+ extra_query: Optional[Dict[str, Any]] = None,
+ ) -> Tuple[str, Dict]:
+ """
+ Video list is not supported by Veo API.
+ """
+ raise NotImplementedError(
+ "Video list is not supported by Vertex AI Veo. "
+ "Use the operations endpoint directly if you need to list operations."
+ )
+
+ def transform_video_list_response(
+ self,
+ raw_response: httpx.Response,
+ logging_obj: LiteLLMLoggingObj,
+ custom_llm_provider: Optional[str] = None,
+ ) -> Dict[str, str]:
+ """Video list is not supported."""
+ raise NotImplementedError("Video list is not supported by Vertex AI Veo.")
+
+ def transform_video_delete_request(
+ self,
+ video_id: str,
+ api_base: str,
+ litellm_params: GenericLiteLLMParams,
+ headers: dict,
+ ) -> Tuple[str, Dict]:
+ """
+ Video delete is not supported by Veo API.
+ """
+ raise NotImplementedError(
+ "Video delete is not supported by Vertex AI Veo. "
+ "Videos are automatically cleaned up by Google."
+ )
+
+ def transform_video_delete_response(
+ self,
+ raw_response: httpx.Response,
+ logging_obj: LiteLLMLoggingObj,
+ ) -> VideoObject:
+ """Video delete is not supported."""
+ raise NotImplementedError("Video delete is not supported by Vertex AI Veo.")
+
+ def get_error_class(
+ self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
+ ) -> BaseLLMException:
+ from litellm.llms.vertex_ai.common_utils import VertexAIError
+
+ return VertexAIError(
+ status_code=status_code,
+ message=error_message,
+ headers=headers,
+ )
+
diff --git a/litellm/main.py b/litellm/main.py
index b7af4e8d39c5..85779d8fa87f 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -6001,6 +6001,7 @@ async def ahealth_check(
"audio_speech",
"audio_transcription",
"image_generation",
+ "video_generation",
"batch",
"rerank",
"realtime",
diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
index 9f020fc1ebf9..f571dfb52433 100644
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -12668,6 +12668,34 @@
"video"
]
},
+ "gemini/veo-3.1-fast-generate-preview": {
+ "litellm_provider": "gemini",
+ "max_input_tokens": 1024,
+ "max_tokens": 1024,
+ "mode": "video_generation",
+ "output_cost_per_second": 0.15,
+ "source": "https://ai.google.dev/gemini-api/docs/video",
+ "supported_modalities": [
+ "text"
+ ],
+ "supported_output_modalities": [
+ "video"
+ ]
+ },
+ "gemini/veo-3.1-generate-preview": {
+ "litellm_provider": "gemini",
+ "max_input_tokens": 1024,
+ "max_tokens": 1024,
+ "mode": "video_generation",
+ "output_cost_per_second": 0.40,
+ "source": "https://ai.google.dev/gemini-api/docs/video",
+ "supported_modalities": [
+ "text"
+ ],
+ "supported_output_modalities": [
+ "video"
+ ]
+ },
"google_pse/search": {
"input_cost_per_query": 0.005,
"litellm_provider": "google_pse",
@@ -23374,6 +23402,34 @@
"video"
]
},
+ "vertex_ai/veo-3.1-generate-preview": {
+ "litellm_provider": "vertex_ai-video-models",
+ "max_input_tokens": 1024,
+ "max_tokens": 1024,
+ "mode": "video_generation",
+ "output_cost_per_second": 0.4,
+ "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/veo",
+ "supported_modalities": [
+ "text"
+ ],
+ "supported_output_modalities": [
+ "video"
+ ]
+ },
+ "vertex_ai/veo-3.1-fast-generate-preview": {
+ "litellm_provider": "vertex_ai-video-models",
+ "max_input_tokens": 1024,
+ "max_tokens": 1024,
+ "mode": "video_generation",
+ "output_cost_per_second": 0.15,
+ "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/veo",
+ "supported_modalities": [
+ "text"
+ ],
+ "supported_output_modalities": [
+ "video"
+ ]
+ },
"voyage/rerank-2": {
"input_cost_per_query": 5e-08,
"input_cost_per_token": 5e-08,
diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py
index e8f89baeffb2..b0aa93c8c5d9 100644
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@@ -235,6 +235,7 @@ class LiteLLMRoutes(enum.Enum):
"completion",
"embeddings",
"image_generation",
+ "video_generation",
"audio_transcriptions",
"moderations",
"model_list", # OpenAI /v1/models route
diff --git a/litellm/proxy/health_endpoints/_health_endpoints.py b/litellm/proxy/health_endpoints/_health_endpoints.py
index b42f800734ac..1af6c9cf2874 100644
--- a/litellm/proxy/health_endpoints/_health_endpoints.py
+++ b/litellm/proxy/health_endpoints/_health_endpoints.py
@@ -947,6 +947,7 @@ async def test_model_connection(
"audio_speech",
"audio_transcription",
"image_generation",
+ "video_generation",
"batch",
"rerank",
"realtime",
diff --git a/litellm/proxy/video_endpoints/endpoints.py b/litellm/proxy/video_endpoints/endpoints.py
index 9cd4a698035d..25114921dbc6 100644
--- a/litellm/proxy/video_endpoints/endpoints.py
+++ b/litellm/proxy/video_endpoints/endpoints.py
@@ -15,6 +15,7 @@
get_custom_llm_provider_from_request_headers,
get_custom_llm_provider_from_request_query,
)
+from litellm.types.videos.utils import decode_video_id_with_provider
router = APIRouter()
@@ -237,13 +238,15 @@ async def video_status(
# Create data with video_id
data: Dict[str, Any] = {"video_id": video_id}
- # Extract custom_llm_provider from headers, query params, or body
+ decoded = decode_video_id_with_provider(video_id)
+ provider_from_id = decoded.get("custom_llm_provider")
+
custom_llm_provider = (
get_custom_llm_provider_from_request_headers(request=request)
or get_custom_llm_provider_from_request_query(request=request)
or await get_custom_llm_provider_from_request_body(request=request)
+ or provider_from_id
or "openai"
-
)
if custom_llm_provider:
data["custom_llm_provider"] = custom_llm_provider
@@ -304,7 +307,7 @@ async def video_content(
Example:
```bash
- curl -X GET "http://localhost:4000/v1/videos/video_123/content" \
+ curl -X GET "http://localhost:4000/v1/videos/{video_id}/content" \
-H "Authorization: Bearer sk-1234" \
--output video.mp4
```
@@ -326,11 +329,14 @@ async def video_content(
# Create data with video_id
data: Dict[str, Any] = {"video_id": video_id}
- # Extract custom_llm_provider from headers, query params, or body
+ decoded = decode_video_id_with_provider(video_id)
+ provider_from_id = decoded.get("custom_llm_provider")
+
custom_llm_provider = (
get_custom_llm_provider_from_request_headers(request=request)
or get_custom_llm_provider_from_request_query(request=request)
or await get_custom_llm_provider_from_request_body(request=request)
+ or provider_from_id
)
if custom_llm_provider:
data["custom_llm_provider"] = custom_llm_provider
@@ -428,11 +434,14 @@ async def video_remix(
data = orjson.loads(body)
data["video_id"] = video_id
- # Extract custom_llm_provider from headers, query params, or body
+ decoded = decode_video_id_with_provider(video_id)
+ provider_from_id = decoded.get("custom_llm_provider")
+
custom_llm_provider = (
get_custom_llm_provider_from_request_headers(request=request)
or get_custom_llm_provider_from_request_query(request=request)
or data.get("custom_llm_provider")
+ or provider_from_id
)
if custom_llm_provider:
data["custom_llm_provider"] = custom_llm_provider
diff --git a/litellm/types/llms/gemini.py b/litellm/types/llms/gemini.py
index cfc13cc44a82..e29a2cc19a04 100644
--- a/litellm/types/llms/gemini.py
+++ b/litellm/types/llms/gemini.py
@@ -221,3 +221,125 @@ class GeminiImageGenerationPrediction(TypedDict):
class GeminiImageGenerationResponse(TypedDict):
"""Complete response body from Gemini image generation API"""
predictions: List[GeminiImageGenerationPrediction]
+
+# Video Generation Types
+class GeminiVideoGenerationInstance(TypedDict):
+ """Instance data for Gemini video generation request"""
+ prompt: str
+
+
+class GeminiVideoGenerationParameters(BaseModel):
+ """
+ Parameters for Gemini video generation request.
+
+ See: Veo 3/3.1 parameter guide.
+ """
+ aspectRatio: Optional[str] = None
+ """Aspect ratio for generated video (e.g., '16:9', '9:16')."""
+
+ durationSeconds: Optional[int] = None
+ """
+ Length of the generated video in seconds (e.g., 4, 5, 6, 8).
+ Must be 8 when using extension/interpolation or referenceImages.
+ """
+
+ resolution: Optional[str] = None
+ """
+ Video resolution (e.g., '720p', '1080p').
+ '1080p' only supports 8s duration; extension only supports '720p'.
+ """
+
+ negativePrompt: Optional[str] = None
+ """Text describing what not to include in the video."""
+
+ image: Optional[Any] = None
+ """
+ An initial image to animate (Image object).
+ """
+
+ lastFrame: Optional[Any] = None
+ """
+ The final image for interpolation video to transition.
+ Should be used with the 'image' parameter.
+ """
+
+ referenceImages: Optional[list] = None
+ """
+ Up to three images to be used as style/content references.
+ Only supported in Veo 3.1 (list of VideoGenerationReferenceImage objects).
+ """
+
+ video: Optional[Any] = None
+ """
+ Video to be used for video extension (Video object).
+ Only supported in Veo 3.1 & Veo 3 Fast.
+ """
+
+ personGeneration: Optional[str] = None
+ """
+ Controls the generation of people.
+ Text-to-video & Extension: "allow_all" only
+ Image-to-video, Interpolation, & Reference images (Veo 3.x): "allow_adult" only
+ See documentation for region restrictions & more.
+ """
+
+
+class GeminiVideoGenerationRequest(BaseModel):
+ """Complete request body for Gemini video generation"""
+ instances: List[GeminiVideoGenerationInstance]
+ parameters: Optional[GeminiVideoGenerationParameters] = None
+
+
+# Video Generation Operation Response Types
+class GeminiVideoUri(BaseModel):
+ """Video URI in the generated sample"""
+ uri: str
+ """File URI of the generated video (e.g., 'files/abc123...')"""
+
+
+class GeminiGeneratedVideoSample(BaseModel):
+ """Individual generated video sample"""
+ video: GeminiVideoUri
+ """Video object containing the URI"""
+
+
+class GeminiGenerateVideoResponse(BaseModel):
+ """Generate video response containing the samples"""
+ generatedSamples: List[GeminiGeneratedVideoSample]
+ """List of generated video samples"""
+
+
+class GeminiOperationResponse(BaseModel):
+ """Response object in the operation when done"""
+ generateVideoResponse: GeminiGenerateVideoResponse
+ """Video generation response"""
+
+
+class GeminiOperationMetadata(BaseModel):
+ """Metadata for the operation"""
+ createTime: Optional[str] = None
+ """Creation timestamp"""
+ model: Optional[str] = None
+ """Model used for generation"""
+
+
+class GeminiLongRunningOperationResponse(BaseModel):
+ """
+ Complete response for a long-running operation.
+
+ Used when polling operation status and extracting results.
+ """
+ name: str
+ """Operation name (e.g., 'operations/generate_1234567890')"""
+
+ done: bool = False
+ """Whether the operation is complete"""
+
+ metadata: Optional[GeminiOperationMetadata] = None
+ """Operation metadata"""
+
+ response: Optional[GeminiOperationResponse] = None
+ """Response object when operation is complete"""
+
+ error: Optional[Dict[str, Any]] = None
+ """Error details if operation failed"""
diff --git a/litellm/types/llms/vertex_ai.py b/litellm/types/llms/vertex_ai.py
index cb2075981a87..768818a61076 100644
--- a/litellm/types/llms/vertex_ai.py
+++ b/litellm/types/llms/vertex_ai.py
@@ -638,6 +638,52 @@ class VertexBatchPredictionResponse(TypedDict, total=False):
modelVersionId: str
+class VertexVideoImage(TypedDict, total=False):
+ """Image input for video generation"""
+
+ bytesBase64Encoded: str
+ mimeType: str
+
+
+class VertexVideoGenerationInstance(TypedDict, total=False):
+ """Instance object for Vertex AI video generation request"""
+
+ prompt: Required[str]
+ image: VertexVideoImage
+
+
+class VertexVideoGenerationParameters(TypedDict, total=False):
+ """Parameters for Vertex AI video generation"""
+
+ aspectRatio: Literal["9:16", "16:9"]
+ durationSeconds: int
+
+
+class VertexVideoGenerationRequest(TypedDict):
+ """Complete request body for Vertex AI video generation"""
+
+ instances: Required[List[VertexVideoGenerationInstance]]
+ parameters: VertexVideoGenerationParameters
+
+
+class VertexVideoOutput(TypedDict, total=False):
+ """Video output in response"""
+
+ bytesBase64Encoded: str
+ mimeType: str
+ gcsUri: str
+
+
+class VertexVideoGenerationResponse(TypedDict, total=False):
+ """Response body for Vertex AI video generation"""
+
+ name: str
+ done: bool
+ response: Dict[str, Any]
+ metadata: Dict[str, Any]
+ error: Dict[str, Any]
+
+
VERTEX_CREDENTIALS_TYPES = Union[str, Dict[str, str]]
diff --git a/litellm/types/utils.py b/litellm/types/utils.py
index 396db3addb78..e608df675404 100644
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@@ -2788,6 +2788,10 @@ class SpecialEnums(Enum):
LITELLM_MANAGED_GENERIC_RESPONSE_COMPLETE_STR = "litellm_proxy;model_id:{};generic_response_id:{}" # generic implementation of 'managed batches' - used for finetuning and any future work.
+ LITELLM_MANAGED_VIDEO_COMPLETE_STR = (
+ "litellm:custom_llm_provider:{};model_id:{};video_id:{}"
+ )
+
class ServiceTier(Enum):
"""Enum for service tier types used in cost calculations."""
diff --git a/litellm/types/videos/main.py b/litellm/types/videos/main.py
index f33e1d5a4411..65c4cfe0e003 100644
--- a/litellm/types/videos/main.py
+++ b/litellm/types/videos/main.py
@@ -10,7 +10,7 @@ class VideoObject(BaseModel):
id: str
object: Literal["video"]
status: str
- created_at: int
+ created_at: Optional[int] = None
completed_at: Optional[int] = None
expires_at: Optional[int] = None
error: Optional[Dict[str, Any]] = None
@@ -87,3 +87,10 @@ class VideoCreateRequestParams(VideoCreateOptionalRequestParams, total=False):
Params here: https://platform.openai.com/docs/api-reference/videos/create
"""
prompt: str
+
+class DecodedVideoId(TypedDict, total=False):
+ """Structure representing a decoded video ID"""
+
+ custom_llm_provider: Optional[str]
+ model_id: Optional[str]
+ video_id: str
\ No newline at end of file
diff --git a/litellm/types/videos/utils.py b/litellm/types/videos/utils.py
new file mode 100644
index 000000000000..329aea645c5c
--- /dev/null
+++ b/litellm/types/videos/utils.py
@@ -0,0 +1,100 @@
+"""
+Utility functions for video ID encoding/decoding with provider information.
+
+Follows the pattern used in responses/utils.py for consistency.
+Format: vid_{base64_encoded_string}
+"""
+import base64
+from typing import Tuple, Optional
+from litellm.types.utils import SpecialEnums
+from litellm.types.videos.main import DecodedVideoId
+from litellm._logging import verbose_logger
+
+
+
+VIDEO_ID_PREFIX = "video_"
+
+
+def encode_video_id_with_provider(
+ video_id: str,
+ provider: str,
+ model_id: Optional[str] = None
+) -> str:
+ """Encode provider and model_id into video_id using base64."""
+ if not provider or not video_id:
+ return video_id
+
+ if video_id.startswith(VIDEO_ID_PREFIX):
+ return video_id
+
+ assembled_id = str(
+ SpecialEnums.LITELLM_MANAGED_VIDEO_COMPLETE_STR.value
+ ).format(provider, model_id or "", video_id)
+
+ base64_encoded_id: str = base64.b64encode(assembled_id.encode("utf-8")).decode("utf-8")
+
+ return f"{VIDEO_ID_PREFIX}{base64_encoded_id}"
+
+
+def decode_video_id_with_provider(encoded_video_id: str) -> DecodedVideoId:
+ """Decode provider and model_id from encoded video_id."""
+ if not encoded_video_id:
+ return DecodedVideoId(
+ custom_llm_provider=None,
+ model_id=None,
+ video_id=encoded_video_id,
+ )
+
+ if not encoded_video_id.startswith(VIDEO_ID_PREFIX):
+ return DecodedVideoId(
+ custom_llm_provider=None,
+ model_id=None,
+ video_id=encoded_video_id,
+ )
+
+ try:
+ cleaned_id = encoded_video_id.replace(VIDEO_ID_PREFIX, "")
+ decoded_id = base64.b64decode(cleaned_id.encode("utf-8")).decode("utf-8")
+
+ if ";" not in decoded_id:
+ return DecodedVideoId(
+ custom_llm_provider=None,
+ model_id=None,
+ video_id=encoded_video_id,
+ )
+
+ parts = decoded_id.split(";")
+
+ custom_llm_provider = None
+ model_id = None
+ decoded_video_id = encoded_video_id
+
+ if len(parts) >= 3:
+ custom_llm_provider_part = parts[0]
+ model_id_part = parts[1]
+ video_id_part = parts[2]
+
+ custom_llm_provider = custom_llm_provider_part.replace(
+ "litellm:custom_llm_provider:", ""
+ )
+ model_id = model_id_part.replace("model_id:", "")
+ decoded_video_id = video_id_part.replace("video_id:", "")
+
+ return DecodedVideoId(
+ custom_llm_provider=custom_llm_provider,
+ model_id=model_id,
+ video_id=decoded_video_id,
+ )
+ except Exception as e:
+ verbose_logger.debug(f"Error decoding video_id '{encoded_video_id}': {e}")
+ return DecodedVideoId(
+ custom_llm_provider=None,
+ model_id=None,
+ video_id=encoded_video_id,
+ )
+
+
+def extract_original_video_id(encoded_video_id: str) -> str:
+ """Extract original video ID without encoding."""
+ decoded = decode_video_id_with_provider(encoded_video_id)
+ return decoded.get("video_id", encoded_video_id)
diff --git a/litellm/utils.py b/litellm/utils.py
index e12387899bba..1ac27428f802 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -7648,6 +7648,16 @@ def get_provider_video_config(
from litellm.llms.azure.videos.transformation import AzureVideoConfig
return AzureVideoConfig()
+ elif LlmProviders.GEMINI == provider:
+ from litellm.llms.gemini.videos.transformation import GeminiVideoConfig
+
+ return GeminiVideoConfig()
+ elif LlmProviders.VERTEX_AI == provider:
+ from litellm.llms.vertex_ai.videos.transformation import (
+ VertexAIVideoConfig,
+ )
+
+ return VertexAIVideoConfig()
return None
@staticmethod
diff --git a/litellm/videos/main.py b/litellm/videos/main.py
index cbc59169a7ac..be95b4ab9b22 100644
--- a/litellm/videos/main.py
+++ b/litellm/videos/main.py
@@ -19,6 +19,7 @@
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.llms.base_llm.videos.transformation import BaseVideoConfig
from litellm.llms.custom_httpx.llm_http_handler import BaseLLMHTTPHandler
+from litellm.types.videos.utils import decode_video_id_with_provider
#################### Initialize provider clients ####################
llm_http_handler: BaseLLMHTTPHandler = BaseLLMHTTPHandler()
@@ -303,13 +304,10 @@ def video_content(
```python
import litellm
- # Download video content
video_bytes = litellm.video_content(
- video_id="video_123",
- custom_llm_provider="openai"
+ video_id="video_123"
)
- # Save to file
with open("video.mp4", "wb") as f:
f.write(video_bytes)
```
@@ -320,9 +318,10 @@ def video_content(
litellm_call_id: Optional[str] = kwargs.get("litellm_call_id", None)
_is_async = kwargs.pop("async_call", False) is True
- # Ensure custom_llm_provider is not None - default to openai if not provided
+ # Try to decode provider from video_id if not explicitly provided
if custom_llm_provider is None:
- custom_llm_provider = "openai"
+ decoded = decode_video_id_with_provider(video_id)
+ custom_llm_provider = decoded.get("custom_llm_provider") or "openai"
# get llm provider logic
litellm_params = GenericLiteLLMParams(**kwargs)
@@ -594,9 +593,10 @@ def video_remix( # noqa: PLR0915
response = VideoObject(**mock_response)
return response
- # Ensure custom_llm_provider is not None - default to openai if not provided
+ # Try to decode provider from video_id if not explicitly provided
if custom_llm_provider is None:
- custom_llm_provider = "openai"
+ decoded = decode_video_id_with_provider(video_id)
+ custom_llm_provider = decoded.get("custom_llm_provider") or "openai"
# get llm provider logic
litellm_params = GenericLiteLLMParams(**kwargs)
@@ -907,7 +907,7 @@ async def avideo_status(
Returns:
- `response` (VideoObject): The response returned by the `video_status` function.
-"""
+ """
local_vars = locals()
try:
loop = asyncio.get_event_loop()
@@ -1015,8 +1015,7 @@ def video_status( # noqa: PLR0915
# Get video status
video_status = litellm.video_status(
- video_id="video_123",
- custom_llm_provider="openai"
+ video_id="video_123"
)
print(f"Video status: {video_status.status}")
@@ -1038,9 +1037,10 @@ def video_status( # noqa: PLR0915
response = VideoObject(**mock_response)
return response
- # Ensure custom_llm_provider is not None - default to openai if not provided
+ # Try to decode provider from video_id if not explicitly provided
if custom_llm_provider is None:
- custom_llm_provider = "openai"
+ decoded = decode_video_id_with_provider(video_id)
+ custom_llm_provider = decoded.get("custom_llm_provider") or "openai"
# get llm provider logic
litellm_params = GenericLiteLLMParams(**kwargs)
diff --git a/litellm/videos/utils.py b/litellm/videos/utils.py
index 7cfccab6720d..e04ab9fe180c 100644
--- a/litellm/videos/utils.py
+++ b/litellm/videos/utils.py
@@ -1,8 +1,9 @@
-from typing import Any, Dict, cast, get_type_hints
+from typing import Any, Dict, cast
import litellm
from litellm.llms.base_llm.videos.transformation import BaseVideoConfig
from litellm.types.videos.main import VideoCreateOptionalRequestParams
+from litellm.utils import filter_out_litellm_params
class VideoGenerationRequestUtils:
@@ -25,25 +26,6 @@ def get_optional_params_video_generation(
Returns:
A dictionary of supported parameters for the video generation API
"""
- # Get supported parameters for the model
- supported_params = video_generation_provider_config.get_supported_openai_params(model)
-
- # Check for unsupported parameters
- unsupported_params = [
- param
- for param in video_generation_optional_params
- if param not in supported_params
- ]
-
- if unsupported_params:
- raise litellm.UnsupportedParamsError(
- model=model,
- message=(
- f"The following parameters are not supported for model {model}: "
- f"{', '.join(unsupported_params)}"
- ),
- )
-
# Map parameters to provider-specific format
mapped_params = video_generation_provider_config.map_openai_params(
video_create_optional_params=video_generation_optional_params,
@@ -51,6 +33,15 @@ def get_optional_params_video_generation(
drop_params=litellm.drop_params,
)
+ # Merge extra_body params if present (for provider-specific parameters)
+ if "extra_body" in video_generation_optional_params:
+ extra_body = video_generation_optional_params["extra_body"]
+ if extra_body and isinstance(extra_body, dict):
+ # extra_body params override mapped params
+ mapped_params.update(extra_body)
+ # Remove extra_body from mapped_params since it's not sent to the API
+ mapped_params.pop("extra_body", None)
+
return mapped_params
@staticmethod
@@ -66,9 +57,44 @@ def get_requested_video_generation_optional_param(
Returns:
VideoCreateOptionalRequestParams instance with only the valid parameters
"""
- valid_keys = get_type_hints(VideoCreateOptionalRequestParams).keys()
- filtered_params = {
- k: v for k, v in params.items() if k in valid_keys and v is not None
+ params = dict(params or {})
+
+ raw_kwargs = params.get("kwargs", {})
+ if not isinstance(raw_kwargs, dict):
+ raw_kwargs = {}
+
+ kwargs_extra_body = raw_kwargs.pop("extra_body", None)
+ top_level_extra_body = params.get("extra_body")
+
+ base_params_raw = {
+ key: value
+ for key, value in params.items()
+ if key not in {"kwargs", "extra_body", "prompt", "model"} and value is not None
+ }
+ base_params = filter_out_litellm_params(kwargs=base_params_raw)
+
+ cleaned_kwargs = filter_out_litellm_params(
+ kwargs={k: v for k, v in raw_kwargs.items() if v is not None}
+ )
+
+ optional_params: Dict[str, Any] = {
+ **base_params,
+ **cleaned_kwargs,
}
- return cast(VideoCreateOptionalRequestParams, filtered_params)
+ merged_extra_body: Dict[str, Any] = {}
+ for extra_body_candidate in (top_level_extra_body, kwargs_extra_body):
+ if isinstance(extra_body_candidate, dict):
+ for key, value in extra_body_candidate.items():
+ if value is not None:
+ merged_extra_body[key] = value
+
+ if merged_extra_body:
+ merged_extra_body = filter_out_litellm_params(kwargs=merged_extra_body)
+ if merged_extra_body:
+ optional_params["extra_body"] = merged_extra_body
+ optional_params.update(merged_extra_body)
+
+ optional_params.pop("timeout", None)
+
+ return cast(VideoCreateOptionalRequestParams, optional_params)
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
index 9f020fc1ebf9..f571dfb52433 100644
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@@ -12668,6 +12668,34 @@
"video"
]
},
+ "gemini/veo-3.1-fast-generate-preview": {
+ "litellm_provider": "gemini",
+ "max_input_tokens": 1024,
+ "max_tokens": 1024,
+ "mode": "video_generation",
+ "output_cost_per_second": 0.15,
+ "source": "https://ai.google.dev/gemini-api/docs/video",
+ "supported_modalities": [
+ "text"
+ ],
+ "supported_output_modalities": [
+ "video"
+ ]
+ },
+ "gemini/veo-3.1-generate-preview": {
+ "litellm_provider": "gemini",
+ "max_input_tokens": 1024,
+ "max_tokens": 1024,
+ "mode": "video_generation",
+ "output_cost_per_second": 0.40,
+ "source": "https://ai.google.dev/gemini-api/docs/video",
+ "supported_modalities": [
+ "text"
+ ],
+ "supported_output_modalities": [
+ "video"
+ ]
+ },
"google_pse/search": {
"input_cost_per_query": 0.005,
"litellm_provider": "google_pse",
@@ -23374,6 +23402,34 @@
"video"
]
},
+ "vertex_ai/veo-3.1-generate-preview": {
+ "litellm_provider": "vertex_ai-video-models",
+ "max_input_tokens": 1024,
+ "max_tokens": 1024,
+ "mode": "video_generation",
+ "output_cost_per_second": 0.4,
+ "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/veo",
+ "supported_modalities": [
+ "text"
+ ],
+ "supported_output_modalities": [
+ "video"
+ ]
+ },
+ "vertex_ai/veo-3.1-fast-generate-preview": {
+ "litellm_provider": "vertex_ai-video-models",
+ "max_input_tokens": 1024,
+ "max_tokens": 1024,
+ "mode": "video_generation",
+ "output_cost_per_second": 0.15,
+ "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/veo",
+ "supported_modalities": [
+ "text"
+ ],
+ "supported_output_modalities": [
+ "video"
+ ]
+ },
"voyage/rerank-2": {
"input_cost_per_query": 5e-08,
"input_cost_per_token": 5e-08,
diff --git a/package-lock.json b/package-lock.json
index 1b3c2a690a45..302ec8567885 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -8172,4 +8172,4 @@
}
}
}
-}
+}
\ No newline at end of file
diff --git a/tests/test_litellm/llms/azure/videos/test_azure_video_transformation.py b/tests/test_litellm/llms/azure/videos/test_azure_video_transformation.py
index 14bf0064e614..640933179a68 100644
--- a/tests/test_litellm/llms/azure/videos/test_azure_video_transformation.py
+++ b/tests/test_litellm/llms/azure/videos/test_azure_video_transformation.py
@@ -129,10 +129,12 @@ def test_transform_video_create_request(self):
)
headers = {"Authorization": f"Bearer {self.api_key}"}
+ api_base = f"{self.api_base}/openai/v1/videos"
- data, files = self.config.transform_video_create_request(
+ data, files, url = self.config.transform_video_create_request(
model=self.model,
prompt="A cinematic shot of a city at night",
+ api_base=api_base,
video_create_optional_request_params=video_params,
litellm_params=litellm_params,
headers=headers
@@ -142,6 +144,8 @@ def test_transform_video_create_request(self):
assert data["seconds"] == 8
assert data["size"] == "720x1280"
assert data["model"] == self.model
+ # URL should be returned as-is for Azure
+ assert url == api_base
def test_transform_video_create_response(self):
"""Test video creation response transformation."""
@@ -275,13 +279,15 @@ def test_video_create_with_file_upload(self):
)
headers = {"Authorization": f"Bearer {self.api_key}"}
+ api_base = f"{self.api_base}/openai/v1/videos"
# Mock file existence
with patch('os.path.exists', return_value=True):
with patch('builtins.open', mock_open(read_data=b"fake image data")):
- data, files = self.config.transform_video_create_request(
+ data, files, url = self.config.transform_video_create_request(
model=self.model,
prompt="A video with reference image",
+ api_base=api_base,
video_create_optional_request_params=video_params,
litellm_params=litellm_params,
headers=headers
@@ -291,6 +297,7 @@ def test_video_create_with_file_upload(self):
assert data["seconds"] == 10
assert len(files) == 1
assert files[0][0] == "input_reference"
+ assert url == api_base
def test_error_handling_in_response_transformation(self):
"""Test error handling in response transformation methods."""
diff --git a/tests/test_litellm/llms/gemini/videos/__init__.py b/tests/test_litellm/llms/gemini/videos/__init__.py
new file mode 100644
index 000000000000..7156c063be7f
--- /dev/null
+++ b/tests/test_litellm/llms/gemini/videos/__init__.py
@@ -0,0 +1,2 @@
+# Gemini Video Generation Tests
+
diff --git a/tests/test_litellm/llms/gemini/videos/test_gemini_video_transformation.py b/tests/test_litellm/llms/gemini/videos/test_gemini_video_transformation.py
new file mode 100644
index 000000000000..660974181f9e
--- /dev/null
+++ b/tests/test_litellm/llms/gemini/videos/test_gemini_video_transformation.py
@@ -0,0 +1,680 @@
+"""
+Tests for Gemini (Veo) video generation transformation.
+"""
+import json
+import os
+from unittest.mock import MagicMock, Mock, patch
+
+import httpx
+import pytest
+
+from litellm.llms.gemini.videos.transformation import GeminiVideoConfig
+from litellm.llms.openai.cost_calculation import video_generation_cost
+from litellm.types.router import GenericLiteLLMParams
+from litellm.types.videos.main import VideoObject
+
+
+class TestGeminiVideoConfig:
+ """Test GeminiVideoConfig transformation class."""
+
+ def setup_method(self):
+ """Setup test fixtures."""
+ self.config = GeminiVideoConfig()
+ self.mock_logging_obj = Mock()
+
+ def test_get_supported_openai_params(self):
+ """Test that correct params are supported."""
+ params = self.config.get_supported_openai_params("veo-3.0-generate-preview")
+
+ assert "model" in params
+ assert "prompt" in params
+ assert "input_reference" in params
+ assert "seconds" in params
+ assert "size" in params
+
+ def test_validate_environment_with_api_key(self):
+ """Test environment validation with API key."""
+ headers = {}
+ result = self.config.validate_environment(
+ headers=headers,
+ model="veo-3.0-generate-preview",
+ api_key="test-api-key-123"
+ )
+
+ assert "x-goog-api-key" in result
+ assert result["x-goog-api-key"] == "test-api-key-123"
+ assert "Content-Type" in result
+ assert result["Content-Type"] == "application/json"
+
+ @patch.dict('os.environ', {}, clear=True)
+ def test_validate_environment_missing_api_key(self):
+ """Test that missing API key raises error."""
+ headers = {}
+
+ with pytest.raises(ValueError, match="GEMINI_API_KEY or GOOGLE_API_KEY is required"):
+ self.config.validate_environment(
+ headers=headers,
+ model="veo-3.0-generate-preview",
+ api_key=None
+ )
+
+ def test_get_complete_url(self):
+ """Test URL construction for video generation."""
+ url = self.config.get_complete_url(
+ model="gemini/veo-3.0-generate-preview",
+ api_base="https://generativelanguage.googleapis.com",
+ litellm_params={}
+ )
+
+ expected = "https://generativelanguage.googleapis.com/v1beta/models/veo-3.0-generate-preview:predictLongRunning"
+ assert url == expected
+
+ def test_get_complete_url_default_api_base(self):
+ """Test URL construction with default API base."""
+ url = self.config.get_complete_url(
+ model="gemini/veo-3.0-generate-preview",
+ api_base=None,
+ litellm_params={}
+ )
+
+ assert url.startswith("https://generativelanguage.googleapis.com")
+ assert "veo-3.0-generate-preview:predictLongRunning" in url
+
+ def test_transform_video_create_request(self):
+ """Test transformation of video creation request."""
+ prompt = "A cat playing with a ball of yarn"
+ api_base = "https://generativelanguage.googleapis.com/v1beta/models/veo-3.0-generate-preview:predictLongRunning"
+
+ data, files, url = self.config.transform_video_create_request(
+ model="veo-3.0-generate-preview",
+ prompt=prompt,
+ api_base=api_base,
+ video_create_optional_request_params={},
+ litellm_params=GenericLiteLLMParams(),
+ headers={}
+ )
+
+ # Check Veo format
+ assert "instances" in data
+ assert len(data["instances"]) == 1
+ assert data["instances"][0]["prompt"] == prompt
+
+ # Check no files are uploaded
+ assert files == []
+
+ # URL should be returned as-is for Gemini
+ assert url == api_base
+
+ def test_transform_video_create_request_with_params(self):
+ """Test transformation with optional parameters."""
+ prompt = "A cat playing with a ball of yarn"
+ api_base = "https://generativelanguage.googleapis.com/v1beta/models/veo-3.0-generate-preview:predictLongRunning"
+
+ data, files, url = self.config.transform_video_create_request(
+ model="veo-3.0-generate-preview",
+ prompt=prompt,
+ api_base=api_base,
+ video_create_optional_request_params={
+ "aspectRatio": "16:9",
+ "durationSeconds": 8,
+ "resolution": "1080p"
+ },
+ litellm_params=GenericLiteLLMParams(),
+ headers={}
+ )
+
+ # Check Veo format with instances and parameters separated
+ instance = data["instances"][0]
+ assert instance["prompt"] == prompt
+
+ # Parameters should be in a separate object
+ assert "parameters" in data
+ assert data["parameters"]["aspectRatio"] == "16:9"
+ assert data["parameters"]["durationSeconds"] == 8
+ assert data["parameters"]["resolution"] == "1080p"
+
+ def test_map_openai_params(self):
+ """Test parameter mapping from OpenAI format to Veo format."""
+ openai_params = {
+ "size": "1280x720",
+ "seconds": "8",
+ "input_reference": "test_image.jpg"
+ }
+
+ mapped = self.config.map_openai_params(
+ video_create_optional_params=openai_params,
+ model="veo-3.0-generate-preview",
+ drop_params=False
+ )
+
+ # Check mappings (prompt is not mapped, it's passed separately)
+ assert mapped["aspectRatio"] == "16:9" # 1280x720 is landscape
+ assert mapped["durationSeconds"] == 8
+ assert mapped["image"] == "test_image.jpg"
+
+ def test_map_openai_params_default_duration(self):
+ """Test that durationSeconds is omitted when not provided."""
+ openai_params = {
+ "size": "1280x720",
+ }
+
+ mapped = self.config.map_openai_params(
+ video_create_optional_params=openai_params,
+ model="veo-3.0-generate-preview",
+ drop_params=False
+ )
+
+ assert mapped["aspectRatio"] == "16:9"
+ assert "durationSeconds" not in mapped
+
+ def test_map_openai_params_with_gemini_specific_params(self):
+ """Test that Gemini-specific params are passed through correctly."""
+ params_with_gemini_specific = {
+ "size": "1280x720",
+ "seconds": "8",
+ "video": {"bytesBase64Encoded": "abc123", "mimeType": "video/mp4"},
+ "negativePrompt": "no people",
+ "referenceImages": [{"bytesBase64Encoded": "xyz789"}],
+ "personGeneration": "allow"
+ }
+
+ mapped = self.config.map_openai_params(
+ video_create_optional_params=params_with_gemini_specific,
+ model="veo-3.1-generate-preview",
+ drop_params=False
+ )
+
+ # Check OpenAI params are mapped
+ assert mapped["aspectRatio"] == "16:9"
+ assert mapped["durationSeconds"] == 8
+
+ # Check Gemini-specific params are passed through
+ assert "video" in mapped
+ assert mapped["video"]["bytesBase64Encoded"] == "abc123"
+ assert mapped["negativePrompt"] == "no people"
+ assert mapped["referenceImages"] == [{"bytesBase64Encoded": "xyz789"}]
+ assert mapped["personGeneration"] == "allow"
+
+ def test_map_openai_params_with_extra_body(self):
+ """Test that extra_body params are merged and extra_body is removed."""
+ from litellm.videos.utils import VideoGenerationRequestUtils
+
+ params_with_extra_body = {
+ "seconds": "4",
+ "extra_body": {
+ "negativePrompt": "no people",
+ "personGeneration": "allow",
+ "resolution": "1080p"
+ }
+ }
+
+ mapped = VideoGenerationRequestUtils.get_optional_params_video_generation(
+ model="veo-3.0-generate-preview",
+ video_generation_provider_config=self.config,
+ video_generation_optional_params=params_with_extra_body
+ )
+
+ # Check OpenAI params are mapped
+ assert mapped["durationSeconds"] == 4
+
+ # Check extra_body params are merged
+ assert mapped["negativePrompt"] == "no people"
+ assert mapped["personGeneration"] == "allow"
+ assert mapped["resolution"] == "1080p"
+
+ # Check extra_body itself is removed
+ assert "extra_body" not in mapped
+
+ def test_convert_size_to_aspect_ratio(self):
+ """Test size to aspect ratio conversion."""
+ # Landscape
+ assert self.config._convert_size_to_aspect_ratio("1280x720") == "16:9"
+ assert self.config._convert_size_to_aspect_ratio("1920x1080") == "16:9"
+
+ # Portrait
+ assert self.config._convert_size_to_aspect_ratio("720x1280") == "9:16"
+ assert self.config._convert_size_to_aspect_ratio("1080x1920") == "9:16"
+
+ # Invalid (defaults to 16:9)
+ assert self.config._convert_size_to_aspect_ratio("invalid") == "16:9"
+ # Empty string returns None (no size specified)
+ assert self.config._convert_size_to_aspect_ratio("") is None
+
+ def test_transform_video_create_response(self):
+ """Test transformation of video creation response."""
+ # Mock response
+ mock_response = Mock(spec=httpx.Response)
+ mock_response.json.return_value = {
+ "name": "operations/generate_1234567890",
+ "metadata": {
+ "createTime": "2024-11-04T10:00:00.123456Z"
+ }
+ }
+
+ result = self.config.transform_video_create_response(
+ model="veo-3.0-generate-preview",
+ raw_response=mock_response,
+ logging_obj=self.mock_logging_obj,
+ custom_llm_provider="gemini"
+ )
+
+ assert isinstance(result, VideoObject)
+ # ID is base64 encoded with provider info
+ assert result.id.startswith("video_")
+ assert result.status == "processing"
+ assert result.object == "video"
+
+
+ def test_transform_video_create_response_with_cost_tracking(self):
+ """Test that duration is captured for cost tracking."""
+ # Mock response
+ mock_response = Mock(spec=httpx.Response)
+ mock_response.json.return_value = {
+ "name": "operations/generate_1234567890",
+ }
+
+ # Request data with durationSeconds in parameters
+ request_data = {
+ "instances": [{"prompt": "A test video"}],
+ "parameters": {
+ "durationSeconds": 5,
+ "aspectRatio": "16:9"
+ }
+ }
+
+ result = self.config.transform_video_create_response(
+ model="gemini/veo-3.0-generate-preview",
+ raw_response=mock_response,
+ logging_obj=self.mock_logging_obj,
+ custom_llm_provider="gemini",
+ request_data=request_data
+ )
+
+ assert isinstance(result, VideoObject)
+ assert result.usage is not None, "Usage should be set"
+ assert "duration_seconds" in result.usage, "duration_seconds should be in usage"
+ assert result.usage["duration_seconds"] == 5.0, f"Expected 5.0, got {result.usage['duration_seconds']}"
+
+ def test_transform_video_create_response_cost_tracking_with_different_durations(self):
+ """Test cost tracking with different duration values."""
+ # Mock response
+ mock_response = Mock(spec=httpx.Response)
+ mock_response.json.return_value = {
+ "name": "operations/generate_1234567890",
+ }
+
+ # Test with 8 seconds
+ request_data_8s = {
+ "instances": [{"prompt": "Test"}],
+ "parameters": {"durationSeconds": 8}
+ }
+
+ result_8s = self.config.transform_video_create_response(
+ model="gemini/veo-3.1-generate-preview",
+ raw_response=mock_response,
+ logging_obj=self.mock_logging_obj,
+ custom_llm_provider="gemini",
+ request_data=request_data_8s
+ )
+
+ assert result_8s.usage["duration_seconds"] == 8.0
+
+ # Test with 4 seconds
+ request_data_4s = {
+ "instances": [{"prompt": "Test"}],
+ "parameters": {"durationSeconds": 4}
+ }
+
+ result_4s = self.config.transform_video_create_response(
+ model="gemini/veo-3.1-fast-generate-preview",
+ raw_response=mock_response,
+ logging_obj=self.mock_logging_obj,
+ custom_llm_provider="gemini",
+ request_data=request_data_4s
+ )
+
+ assert result_4s.usage["duration_seconds"] == 4.0
+
+ def test_transform_video_create_response_cost_tracking_no_duration(self):
+ """Test that usage defaults to 8 seconds when no duration in request."""
+ # Mock response
+ mock_response = Mock(spec=httpx.Response)
+ mock_response.json.return_value = {
+ "name": "operations/generate_1234567890",
+ }
+
+ # Request data without durationSeconds (should default to 8 seconds for Google Veo)
+ request_data = {
+ "instances": [{"prompt": "A test video"}],
+ "parameters": {
+ "aspectRatio": "16:9"
+ }
+ }
+
+ result = self.config.transform_video_create_response(
+ model="gemini/veo-3.0-generate-preview",
+ raw_response=mock_response,
+ logging_obj=self.mock_logging_obj,
+ custom_llm_provider="gemini",
+ request_data=request_data
+ )
+
+ assert isinstance(result, VideoObject)
+ # When no duration is provided, it defaults to 8 seconds (Google Veo default)
+ assert result.usage is not None
+ assert "duration_seconds" in result.usage
+ assert result.usage["duration_seconds"] == 8.0, "Should default to 8 seconds when not provided (Google Veo default)"
+
+ def test_transform_video_status_retrieve_request(self):
+ """Test transformation of status retrieve request."""
+ video_id = "gemini::operations/generate_1234567890::veo-3.0"
+
+ url, params = self.config.transform_video_status_retrieve_request(
+ video_id=video_id,
+ api_base="https://generativelanguage.googleapis.com",
+ litellm_params=GenericLiteLLMParams(),
+ headers={}
+ )
+
+ assert "operations/generate_1234567890" in url
+ assert "v1beta" in url
+ assert params == {}
+
+ def test_transform_video_status_retrieve_response_processing(self):
+ """Test transformation of status response when still processing."""
+ mock_response = Mock(spec=httpx.Response)
+ mock_response.json.return_value = {
+ "name": "operations/generate_1234567890",
+ "done": False,
+ "metadata": {
+ "createTime": "2024-11-04T10:00:00.123456Z"
+ }
+ }
+
+ result = self.config.transform_video_status_retrieve_response(
+ raw_response=mock_response,
+ logging_obj=self.mock_logging_obj,
+ custom_llm_provider="gemini"
+ )
+
+ assert isinstance(result, VideoObject)
+ assert result.status == "processing"
+
+ def test_transform_video_status_retrieve_response_completed(self):
+ """Test transformation of status response when completed."""
+ mock_response = Mock(spec=httpx.Response)
+ mock_response.json.return_value = {
+ "name": "operations/generate_1234567890",
+ "done": True,
+ "metadata": {
+ "createTime": "2024-11-04T10:00:00.123456Z"
+ },
+ "response": {
+ "generateVideoResponse": {
+ "generatedSamples": [
+ {
+ "video": {
+ "uri": "files/abc123xyz"
+ }
+ }
+ ]
+ }
+ }
+ }
+
+ result = self.config.transform_video_status_retrieve_response(
+ raw_response=mock_response,
+ logging_obj=self.mock_logging_obj,
+ custom_llm_provider="gemini"
+ )
+
+ assert isinstance(result, VideoObject)
+ assert result.status == "completed"
+
+ @patch('litellm.module_level_client')
+ def test_transform_video_content_request(self, mock_client):
+ """Test transformation of content download request."""
+ video_id = "gemini::operations/generate_1234567890::veo-3.0"
+
+ # Mock the status response
+ mock_status_response = Mock(spec=httpx.Response)
+ mock_status_response.json.return_value = {
+ "name": "operations/generate_1234567890",
+ "done": True,
+ "response": {
+ "generateVideoResponse": {
+ "generatedSamples": [
+ {
+ "video": {
+ "uri": "files/abc123xyz"
+ }
+ }
+ ]
+ }
+ }
+ }
+ mock_status_response.raise_for_status = Mock()
+ mock_client.get.return_value = mock_status_response
+
+ url, params = self.config.transform_video_content_request(
+ video_id=video_id,
+ api_base="https://generativelanguage.googleapis.com",
+ litellm_params=GenericLiteLLMParams(),
+ headers={}
+ )
+
+ # Should return download URL (may or may not include :download suffix)
+ assert "files/abc123xyz" in url
+ # Params are empty for Gemini file URIs
+ assert params == {}
+
+ def test_transform_video_content_response_bytes(self):
+ """Test transformation of content response (returns bytes directly)."""
+ mock_response = Mock(spec=httpx.Response)
+ mock_response.headers = httpx.Headers({
+ "content-type": "video/mp4"
+ })
+ mock_response.content = b"fake_video_data"
+
+ result = self.config.transform_video_content_response(
+ raw_response=mock_response,
+ logging_obj=self.mock_logging_obj
+ )
+
+ assert result == b"fake_video_data"
+
+ def test_video_remix_not_supported(self):
+ """Test that video remix raises NotImplementedError."""
+ with pytest.raises(NotImplementedError, match="Video remix is not supported"):
+ self.config.transform_video_remix_request(
+ video_id="test_id",
+ prompt="test prompt",
+ api_base="https://test.com",
+ litellm_params=GenericLiteLLMParams(),
+ headers={}
+ )
+
+ def test_video_list_not_supported(self):
+ """Test that video list raises NotImplementedError."""
+ with pytest.raises(NotImplementedError, match="Video list is not supported"):
+ self.config.transform_video_list_request(
+ api_base="https://test.com",
+ litellm_params=GenericLiteLLMParams(),
+ headers={}
+ )
+
+ def test_video_delete_not_supported(self):
+ """Test that video delete raises NotImplementedError."""
+ with pytest.raises(NotImplementedError, match="Video delete is not supported"):
+ self.config.transform_video_delete_request(
+ video_id="test_id",
+ api_base="https://test.com",
+ litellm_params=GenericLiteLLMParams(),
+ headers={}
+ )
+
+
+class TestGeminiVideoIntegration:
+ """Integration tests for Gemini video generation workflow."""
+
+ def test_full_workflow_mock(self):
+ """Test full workflow with mocked responses."""
+ config = GeminiVideoConfig()
+ mock_logging_obj = Mock()
+
+ # Step 1: Create request with parameters
+ prompt = "A beautiful sunset over mountains"
+ api_base = "https://generativelanguage.googleapis.com/v1beta/models/veo-3.0-generate-preview:predictLongRunning"
+ data, files, url = config.transform_video_create_request(
+ model="veo-3.0-generate-preview",
+ prompt=prompt,
+ api_base=api_base,
+ video_create_optional_request_params={
+ "aspectRatio": "16:9",
+ "durationSeconds": 8
+ },
+ litellm_params=GenericLiteLLMParams(),
+ headers={}
+ )
+
+ # Verify instances and parameters structure
+ assert data["instances"][0]["prompt"] == prompt
+ assert data["parameters"]["aspectRatio"] == "16:9"
+ assert data["parameters"]["durationSeconds"] == 8
+
+ # Step 2: Parse create response
+ mock_create_response = Mock(spec=httpx.Response)
+ mock_create_response.json.return_value = {
+ "name": "operations/generate_abc123",
+ "metadata": {
+ "createTime": "2024-11-04T10:00:00.123456Z"
+ }
+ }
+
+ video_obj = config.transform_video_create_response(
+ model="veo-3.0-generate-preview",
+ raw_response=mock_create_response,
+ logging_obj=mock_logging_obj,
+ custom_llm_provider="gemini"
+ )
+
+ assert video_obj.status == "processing"
+ assert video_obj.id.startswith("video_")
+
+ # Step 3: Check status (completed)
+ mock_status_response = Mock(spec=httpx.Response)
+ mock_status_response.json.return_value = {
+ "name": "operations/generate_abc123",
+ "done": True,
+ "metadata": {
+ "createTime": "2024-11-04T10:00:00.123456Z"
+ },
+ "response": {
+ "generateVideoResponse": {
+ "generatedSamples": [
+ {
+ "video": {
+ "uri": "files/video123"
+ }
+ }
+ ]
+ }
+ }
+ }
+
+ status_obj = config.transform_video_status_retrieve_response(
+ raw_response=mock_status_response,
+ logging_obj=mock_logging_obj,
+ custom_llm_provider="gemini"
+ )
+
+ assert status_obj.status == "completed"
+
+
+class TestGeminiVideoCostTracking:
+ """Test cost tracking for Gemini video generation."""
+
+ def test_cost_calculation_with_duration(self):
+ """Test that cost is calculated correctly using duration from usage."""
+ # Test VEO 2.0 ($0.35/second)
+ cost_veo2 = video_generation_cost(
+ model="gemini/veo-2.0-generate-001",
+ duration_seconds=5.0,
+ custom_llm_provider="gemini"
+ )
+ expected_veo2 = 0.35 * 5.0 # $1.75
+ assert abs(cost_veo2 - expected_veo2) < 0.001, f"Expected ${expected_veo2}, got ${cost_veo2}"
+
+ # Test VEO 3.0 ($0.75/second)
+ cost_veo3 = video_generation_cost(
+ model="gemini/veo-3.0-generate-preview",
+ duration_seconds=8.0,
+ custom_llm_provider="gemini"
+ )
+ expected_veo3 = 0.75 * 8.0 # $6.00
+ assert abs(cost_veo3 - expected_veo3) < 0.001, f"Expected ${expected_veo3}, got ${cost_veo3}"
+
+ # Test VEO 3.1 Standard ($0.40/second)
+ cost_veo31 = video_generation_cost(
+ model="gemini/veo-3.1-generate-preview",
+ duration_seconds=10.0,
+ custom_llm_provider="gemini"
+ )
+ expected_veo31 = 0.40 * 10.0 # $4.00
+ assert abs(cost_veo31 - expected_veo31) < 0.001, f"Expected ${expected_veo31}, got ${cost_veo31}"
+
+ # Test VEO 3.1 Fast ($0.15/second)
+ cost_veo31_fast = video_generation_cost(
+ model="gemini/veo-3.1-fast-generate-preview",
+ duration_seconds=6.0,
+ custom_llm_provider="gemini"
+ )
+ expected_veo31_fast = 0.15 * 6.0 # $0.90
+ assert abs(cost_veo31_fast - expected_veo31_fast) < 0.001, f"Expected ${expected_veo31_fast}, got ${cost_veo31_fast}"
+
+ def test_cost_calculation_end_to_end(self):
+ """Test complete cost tracking flow: request -> response -> cost calculation."""
+ config = GeminiVideoConfig()
+ mock_logging_obj = Mock()
+
+ # Create request with duration
+ request_data = {
+ "instances": [{"prompt": "A beautiful sunset"}],
+ "parameters": {"durationSeconds": 5}
+ }
+
+ # Mock response
+ mock_response = Mock(spec=httpx.Response)
+ mock_response.json.return_value = {
+ "name": "operations/generate_test123",
+ }
+
+ # Transform response
+ video_obj = config.transform_video_create_response(
+ model="gemini/veo-3.0-generate-preview",
+ raw_response=mock_response,
+ logging_obj=mock_logging_obj,
+ custom_llm_provider="gemini",
+ request_data=request_data
+ )
+
+ # Verify usage has duration
+ assert video_obj.usage is not None
+ assert "duration_seconds" in video_obj.usage
+ duration = video_obj.usage["duration_seconds"]
+
+ # Calculate cost using the duration from usage
+ cost = video_generation_cost(
+ model="gemini/veo-3.0-generate-preview",
+ duration_seconds=duration,
+ custom_llm_provider="gemini"
+ )
+
+ # Verify cost calculation (VEO 3.0 is $0.75/second)
+ expected_cost = 0.75 * 5.0 # $3.75
+ assert abs(cost - expected_cost) < 0.001, f"Expected ${expected_cost}, got ${cost}"
+
+
+if __name__ == "__main__":
+ pytest.main([__file__, "-v"])
+
diff --git a/tests/test_litellm/llms/vertex_ai/videos/__init__.py b/tests/test_litellm/llms/vertex_ai/videos/__init__.py
new file mode 100644
index 000000000000..ab1481fbd4b9
--- /dev/null
+++ b/tests/test_litellm/llms/vertex_ai/videos/__init__.py
@@ -0,0 +1,4 @@
+"""
+Tests for Vertex AI video generation.
+"""
+
diff --git a/tests/test_litellm/llms/vertex_ai/videos/test_vertex_video_transformation.py b/tests/test_litellm/llms/vertex_ai/videos/test_vertex_video_transformation.py
new file mode 100644
index 000000000000..7ae344e4999f
--- /dev/null
+++ b/tests/test_litellm/llms/vertex_ai/videos/test_vertex_video_transformation.py
@@ -0,0 +1,550 @@
+"""
+Tests for Vertex AI (Veo) video generation transformation.
+"""
+import json
+import os
+import pytest
+from unittest.mock import Mock, MagicMock, patch
+import httpx
+import base64
+
+from litellm.llms.vertex_ai.videos.transformation import (
+ VertexAIVideoConfig,
+ _convert_image_to_vertex_format,
+)
+from litellm.types.videos.main import VideoObject
+from litellm.types.router import GenericLiteLLMParams
+
+
+class TestVertexAIVideoConfig:
+ """Test VertexAIVideoConfig transformation class."""
+
+ def setup_method(self):
+ """Setup test fixtures."""
+ self.config = VertexAIVideoConfig()
+ self.mock_logging_obj = Mock()
+
+ def test_get_supported_openai_params(self):
+ """Test that correct params are supported."""
+ params = self.config.get_supported_openai_params("veo-002")
+
+ assert "model" in params
+ assert "prompt" in params
+ assert "input_reference" in params
+ assert "seconds" in params
+ assert "size" in params
+
+ @patch.object(VertexAIVideoConfig, 'get_access_token')
+ def test_validate_environment(self, mock_get_access_token):
+ """Test environment validation for Vertex AI."""
+ # Mock the authentication
+ mock_get_access_token.return_value = ("mock-access-token", "test-project")
+
+ headers = {}
+ litellm_params = {"vertex_project": "test-project"}
+
+ result = self.config.validate_environment(
+ headers=headers,
+ model="veo-002",
+ api_key=None,
+ litellm_params=litellm_params
+ )
+
+ # Should add Authorization header
+ assert "Authorization" in result
+ assert result["Authorization"] == "Bearer mock-access-token"
+ assert "Content-Type" in result
+
+ def test_get_complete_url(self):
+ """Test URL construction for Vertex AI video generation."""
+ litellm_params = {
+ "vertex_project": "test-project",
+ "vertex_location": "us-central1",
+ }
+
+ url = self.config.get_complete_url(
+ model="vertex_ai/veo-002", api_base=None, litellm_params=litellm_params
+ )
+
+ expected = "https://us-central1-aiplatform.googleapis.com/v1/projects/test-project/locations/us-central1/publishers/google/models/veo-002"
+ assert url == expected
+ # Should NOT include endpoint - that's added by transform methods
+ assert not url.endswith(":predictLongRunning")
+
+ def test_get_complete_url_with_custom_api_base(self):
+ """Test URL construction with custom API base."""
+ litellm_params = {
+ "vertex_project": "test-project",
+ "vertex_location": "us-west1",
+ }
+
+ url = self.config.get_complete_url(
+ model="veo-002",
+ api_base="https://custom-endpoint.example.com",
+ litellm_params=litellm_params,
+ )
+
+ assert url.startswith("https://custom-endpoint.example.com")
+ assert "test-project" in url
+ assert "us-west1" in url
+ assert "veo-002" in url
+ # Should NOT include endpoint
+ assert not url.endswith(":predictLongRunning")
+
+ def test_get_complete_url_missing_project(self):
+ """Test that missing vertex_project raises error."""
+ litellm_params = {}
+
+ # Note: The method might not raise if vertex_project can be fetched from env
+ # This test verifies the behavior when completely missing
+ try:
+ url = self.config.get_complete_url(
+ model="veo-002", api_base=None, litellm_params=litellm_params
+ )
+ # If no error is raised, vertex_project was obtained from environment
+ # In that case, just verify a URL was returned
+ assert url is not None
+ except ValueError as e:
+ # Expected behavior when vertex_project is truly missing
+ assert "vertex_project is required" in str(e)
+
+ def test_get_complete_url_default_location(self):
+ """Test URL construction with default location."""
+ litellm_params = {"vertex_project": "test-project"}
+
+ url = self.config.get_complete_url(
+ model="veo-002", api_base=None, litellm_params=litellm_params
+ )
+
+ # Should default to us-central1
+ assert "us-central1" in url
+ # Should NOT include endpoint
+ assert not url.endswith(":predictLongRunning")
+
+ def test_transform_video_create_request(self):
+ """Test transformation of video creation request."""
+ prompt = "A cat playing with a ball of yarn"
+ api_base = "https://us-central1-aiplatform.googleapis.com/v1/projects/test-project/locations/us-central1/publishers/google/models/veo-002"
+
+ data, files, url = self.config.transform_video_create_request(
+ model="veo-002",
+ prompt=prompt,
+ api_base=api_base,
+ video_create_optional_request_params={},
+ litellm_params=GenericLiteLLMParams(),
+ headers={},
+ )
+
+ # Check Vertex AI format
+ assert "instances" in data
+ assert len(data["instances"]) == 1
+ assert data["instances"][0]["prompt"] == prompt
+
+ # Parameters should not be present when empty
+ assert "parameters" not in data or data["parameters"] == {}
+
+ # Check URL has :predictLongRunning appended
+ assert url.endswith(":predictLongRunning")
+ assert api_base in url
+
+ # Check no files are uploaded
+ assert files == []
+
+ def test_transform_video_create_request_with_parameters(self):
+ """Test video creation request with aspect ratio and duration."""
+ prompt = "A dog running in a park"
+ api_base = "https://us-central1-aiplatform.googleapis.com/v1/projects/test-project/locations/us-central1/publishers/google/models/veo-002"
+
+ data, files, url = self.config.transform_video_create_request(
+ model="veo-002",
+ prompt=prompt,
+ api_base=api_base,
+ video_create_optional_request_params={
+ "aspectRatio": "16:9",
+ "durationSeconds": 8,
+ },
+ litellm_params=GenericLiteLLMParams(),
+ headers={},
+ )
+
+ assert data["instances"][0]["prompt"] == prompt
+ assert data["parameters"]["aspectRatio"] == "16:9"
+ assert data["parameters"]["durationSeconds"] == 8
+ assert url.endswith(":predictLongRunning")
+
+ def test_transform_video_create_request_with_image(self):
+ """Test video creation request with image input."""
+ prompt = "Extend this image with animation"
+ api_base = "https://us-central1-aiplatform.googleapis.com/v1/projects/test-project/locations/us-central1/publishers/google/models/veo-002"
+
+ # Create a mock image file
+ mock_image = Mock()
+ mock_image.read.return_value = b"fake_image_data"
+ mock_image.seek = Mock()
+
+ with patch(
+ "litellm.llms.vertex_ai.videos.transformation.ImageEditRequestUtils.get_image_content_type",
+ return_value="image/jpeg",
+ ):
+ data, files, url = self.config.transform_video_create_request(
+ model="veo-002",
+ prompt=prompt,
+ api_base=api_base,
+ video_create_optional_request_params={"image": mock_image},
+ litellm_params=GenericLiteLLMParams(),
+ headers={},
+ )
+
+ # Check image was converted to base64
+ assert "image" in data["instances"][0]
+ assert "bytesBase64Encoded" in data["instances"][0]["image"]
+ assert "mimeType" in data["instances"][0]["image"]
+ assert data["instances"][0]["image"]["mimeType"] == "image/jpeg"
+ assert url.endswith(":predictLongRunning")
+
+ def test_map_openai_params(self):
+ """Test parameter mapping from OpenAI to Vertex AI format."""
+ openai_params = {"seconds": "8", "size": "1280x720"}
+
+ mapped = self.config.map_openai_params(
+ video_create_optional_params=openai_params,
+ model="veo-002",
+ drop_params=False,
+ )
+
+ assert mapped["durationSeconds"] == 8
+ assert mapped["aspectRatio"] == "16:9"
+
+ def test_map_openai_params_default_duration(self):
+ """Test that durationSeconds is omitted when not provided."""
+ openai_params = {"size": "1280x720"}
+
+ mapped = self.config.map_openai_params(
+ video_create_optional_params=openai_params,
+ model="veo-002",
+ drop_params=False,
+ )
+
+ assert mapped["aspectRatio"] == "16:9"
+ assert "durationSeconds" not in mapped
+
+ def test_map_openai_params_size_conversions(self):
+ """Test size to aspect ratio conversions."""
+ test_cases = [
+ ("1280x720", "16:9"),
+ ("1920x1080", "16:9"),
+ ("720x1280", "9:16"),
+ ("1080x1920", "9:16"),
+ ("unknown", "16:9"), # Default
+ ]
+
+ for size, expected_ratio in test_cases:
+ mapped = self.config.map_openai_params(
+ video_create_optional_params={"size": size},
+ model="veo-002",
+ drop_params=False,
+ )
+ assert mapped["aspectRatio"] == expected_ratio
+
+ def test_transform_video_create_response(self):
+ """Test transformation of video creation response."""
+ # Mock response with operation name
+ mock_response = Mock(spec=httpx.Response)
+ mock_response.json.return_value = {
+ "name": "projects/test-project/locations/us-central1/publishers/google/models/veo-002/operations/12345",
+ "metadata": {"createTime": "2024-01-15T10:30:00.000Z"},
+ }
+
+ video_obj = self.config.transform_video_create_response(
+ model="vertex_ai/veo-002",
+ raw_response=mock_response,
+ logging_obj=self.mock_logging_obj,
+ custom_llm_provider="vertex_ai",
+ )
+
+ assert isinstance(video_obj, VideoObject)
+ assert video_obj.status == "processing"
+ assert video_obj.object == "video"
+ # Video ID is encoded with provider info, so just check it's not empty
+ assert video_obj.id
+ assert len(video_obj.id) > 0
+
+ def test_transform_video_create_response_missing_operation_name(self):
+ """Test that missing operation name raises error."""
+ mock_response = Mock(spec=httpx.Response)
+ mock_response.json.return_value = {}
+
+ with pytest.raises(ValueError, match="No operation name in Veo response"):
+ self.config.transform_video_create_response(
+ model="veo-002",
+ raw_response=mock_response,
+ logging_obj=self.mock_logging_obj,
+ )
+
+ def test_transform_video_status_retrieve_request(self):
+ """Test transformation of video status retrieve request."""
+ operation_name = "projects/test-project/locations/us-central1/publishers/google/models/veo-002/operations/12345"
+
+ # Provide an api_base that would be returned from get_complete_url
+ api_base = "https://us-central1-aiplatform.googleapis.com/v1/projects/test-project/locations/us-central1/publishers/google/models/veo-002"
+
+ url, params = self.config.transform_video_status_retrieve_request(
+ video_id=operation_name,
+ api_base=api_base,
+ litellm_params=GenericLiteLLMParams(),
+ headers={},
+ )
+
+ # Check URL contains fetchPredictOperation endpoint
+ assert "fetchPredictOperation" in url
+ assert "test-project" in url
+ assert "us-central1" in url
+ assert "veo-002" in url
+
+ # Check params contain operation name
+ assert params["operationName"] == operation_name
+
+ def test_transform_video_status_retrieve_request_invalid_format(self):
+ """Test that invalid operation name format raises error."""
+ invalid_operation_name = "invalid/operation/name"
+
+ with pytest.raises(ValueError, match="Invalid operation name format"):
+ self.config.transform_video_status_retrieve_request(
+ video_id=invalid_operation_name,
+ api_base=None,
+ litellm_params=GenericLiteLLMParams(),
+ headers={},
+ )
+
+ def test_transform_video_status_retrieve_response_processing(self):
+ """Test transformation of status response while processing."""
+ mock_response = Mock(spec=httpx.Response)
+ mock_response.json.return_value = {
+ "name": "projects/test-project/locations/us-central1/publishers/google/models/veo-002/operations/12345",
+ "done": False,
+ "metadata": {"createTime": "2024-01-15T10:30:00.000Z"},
+ }
+
+ video_obj = self.config.transform_video_status_retrieve_response(
+ raw_response=mock_response,
+ logging_obj=self.mock_logging_obj,
+ custom_llm_provider="vertex_ai",
+ )
+
+ assert isinstance(video_obj, VideoObject)
+ assert video_obj.status == "processing"
+
+ def test_transform_video_status_retrieve_response_completed(self):
+ """Test transformation of status response when completed."""
+ mock_response = Mock(spec=httpx.Response)
+ mock_response.json.return_value = {
+ "name": "projects/test-project/locations/us-central1/publishers/google/models/veo-002/operations/12345",
+ "done": True,
+ "metadata": {"createTime": "2024-01-15T10:30:00.000Z"},
+ "response": {
+ "@type": "type.googleapis.com/cloud.ai.large_models.vision.GenerateVideoResponse",
+ "raiMediaFilteredCount": 0,
+ "videos": [
+ {
+ "bytesBase64Encoded": base64.b64encode(
+ b"fake_video_data"
+ ).decode(),
+ "mimeType": "video/mp4",
+ }
+ ],
+ },
+ }
+
+ video_obj = self.config.transform_video_status_retrieve_response(
+ raw_response=mock_response,
+ logging_obj=self.mock_logging_obj,
+ custom_llm_provider="vertex_ai",
+ )
+
+ assert isinstance(video_obj, VideoObject)
+ assert video_obj.status == "completed"
+
+ def test_transform_video_status_retrieve_response_error(self):
+ """Test transformation of status response when an error is returned."""
+ mock_response = Mock(spec=httpx.Response)
+ mock_response.json.return_value = {
+ "name": "projects/test-project/locations/us-central1/publishers/google/models/veo-002/operations/12345",
+ "done": True,
+ "metadata": {"createTime": "2024-01-15T10:30:00.000Z"},
+ "error": {
+ "code": 3,
+ "message": "Unsupported output video duration 3 seconds, supported durations are [8,5,6,7] for feature text_to_video.",
+ },
+ }
+
+ video_obj = self.config.transform_video_status_retrieve_response(
+ raw_response=mock_response,
+ logging_obj=self.mock_logging_obj,
+ custom_llm_provider="vertex_ai",
+ )
+
+ assert isinstance(video_obj, VideoObject)
+ assert video_obj.status == "failed"
+ assert video_obj.error == mock_response.json.return_value["error"]
+
+ def test_transform_video_content_request(self):
+ """Test transformation of video content request."""
+ operation_name = "projects/test-project/locations/us-central1/publishers/google/models/veo-002/operations/12345"
+ api_base = "https://us-central1-aiplatform.googleapis.com/v1/projects/test-project/locations/us-central1/publishers/google/models/veo-002"
+
+ url, params = self.config.transform_video_content_request(
+ video_id=operation_name,
+ api_base=api_base,
+ litellm_params=GenericLiteLLMParams(),
+ headers={},
+ )
+
+ # Should use same fetchPredictOperation endpoint
+ assert "fetchPredictOperation" in url
+ assert params["operationName"] == operation_name
+
+ def test_transform_video_content_response(self):
+ """Test transformation of video content response."""
+ fake_video_bytes = b"fake_video_data_12345"
+ encoded_video = base64.b64encode(fake_video_bytes).decode()
+
+ mock_response = Mock(spec=httpx.Response)
+ mock_response.json.return_value = {
+ "name": "projects/test-project/locations/us-central1/publishers/google/models/veo-002/operations/12345",
+ "done": True,
+ "response": {
+ "@type": "type.googleapis.com/cloud.ai.large_models.vision.GenerateVideoResponse",
+ "videos": [
+ {"bytesBase64Encoded": encoded_video, "mimeType": "video/mp4"}
+ ],
+ },
+ }
+
+ video_bytes = self.config.transform_video_content_response(
+ raw_response=mock_response, logging_obj=self.mock_logging_obj
+ )
+
+ assert isinstance(video_bytes, bytes)
+ assert video_bytes == fake_video_bytes
+
+ def test_transform_video_content_response_not_complete(self):
+ """Test that incomplete video raises error."""
+ mock_response = Mock(spec=httpx.Response)
+ mock_response.json.return_value = {
+ "name": "projects/test-project/locations/us-central1/publishers/google/models/veo-002/operations/12345",
+ "done": False,
+ }
+
+ with pytest.raises(
+ ValueError, match="Video generation is not complete yet"
+ ):
+ self.config.transform_video_content_response(
+ raw_response=mock_response, logging_obj=self.mock_logging_obj
+ )
+
+ def test_transform_video_content_response_missing_video_data(self):
+ """Test that missing video data raises error."""
+ mock_response = Mock(spec=httpx.Response)
+ mock_response.json.return_value = {
+ "name": "projects/test-project/locations/us-central1/publishers/google/models/veo-002/operations/12345",
+ "done": True,
+ "response": {"videos": []},
+ }
+
+ with pytest.raises(ValueError, match="No video data found"):
+ self.config.transform_video_content_response(
+ raw_response=mock_response, logging_obj=self.mock_logging_obj
+ )
+
+ def test_transform_video_remix_request_not_supported(self):
+ """Test that video remix raises NotImplementedError."""
+ with pytest.raises(
+ NotImplementedError, match="Video remix is not supported"
+ ):
+ self.config.transform_video_remix_request(
+ video_id="test-video-id",
+ prompt="new prompt",
+ api_base="https://example.com",
+ litellm_params=GenericLiteLLMParams(),
+ headers={},
+ )
+
+ def test_transform_video_list_request_not_supported(self):
+ """Test that video list raises NotImplementedError."""
+ with pytest.raises(NotImplementedError, match="Video list is not supported"):
+ self.config.transform_video_list_request(
+ api_base="https://example.com",
+ litellm_params=GenericLiteLLMParams(),
+ headers={},
+ )
+
+ def test_transform_video_delete_request_not_supported(self):
+ """Test that video delete raises NotImplementedError."""
+ with pytest.raises(
+ NotImplementedError, match="Video delete is not supported"
+ ):
+ self.config.transform_video_delete_request(
+ video_id="test-video-id",
+ api_base="https://example.com",
+ litellm_params=GenericLiteLLMParams(),
+ headers={},
+ )
+
+ def test_get_error_class(self):
+ """Test error class generation."""
+ error = self.config.get_error_class(
+ error_message="Test error", status_code=500, headers={}
+ )
+
+ # Should return VertexAIError
+ from litellm.llms.vertex_ai.common_utils import VertexAIError
+
+ assert isinstance(error, VertexAIError)
+ assert error.status_code == 500
+ assert "Test error" in str(error)
+
+
+class TestConvertImageToVertexFormat:
+ """Test the _convert_image_to_vertex_format helper function."""
+
+ def test_convert_image_to_vertex_format(self):
+ """Test image conversion to Vertex AI format."""
+ fake_image_data = b"fake_jpeg_image_data"
+ mock_image = Mock()
+ mock_image.read.return_value = fake_image_data
+ mock_image.seek = Mock()
+
+ with patch(
+ "litellm.llms.vertex_ai.videos.transformation.ImageEditRequestUtils.get_image_content_type",
+ return_value="image/jpeg",
+ ):
+ result = _convert_image_to_vertex_format(mock_image)
+
+ assert "bytesBase64Encoded" in result
+ assert "mimeType" in result
+ assert result["mimeType"] == "image/jpeg"
+
+ # Verify base64 encoding
+ decoded = base64.b64decode(result["bytesBase64Encoded"])
+ assert decoded == fake_image_data
+
+ def test_convert_image_to_vertex_format_with_seek(self):
+ """Test image conversion with seek support."""
+ fake_image_data = b"fake_png_image_data"
+ mock_image = Mock()
+ mock_image.read.return_value = fake_image_data
+ mock_image.seek = Mock()
+
+ with patch(
+ "litellm.llms.vertex_ai.videos.transformation.ImageEditRequestUtils.get_image_content_type",
+ return_value="image/png",
+ ):
+ result = _convert_image_to_vertex_format(mock_image)
+
+ # Verify seek was called
+ mock_image.seek.assert_called_once_with(0)
+
+ assert result["mimeType"] == "image/png"
+ decoded = base64.b64decode(result["bytesBase64Encoded"])
+ assert decoded == fake_image_data
+
diff --git a/tests/test_litellm/test_video_generation.py b/tests/test_litellm/test_video_generation.py
index 6007201a676c..b11e38b32bb6 100644
--- a/tests/test_litellm/test_video_generation.py
+++ b/tests/test_litellm/test_video_generation.py
@@ -150,9 +150,10 @@ def test_video_generation_request_transformation(self):
config = OpenAIVideoConfig()
# Test request transformation
- data, files = config.transform_video_create_request(
+ data, files, returned_api_base = config.transform_video_create_request(
model="sora-2",
prompt="Test video prompt",
+ api_base="https://api.openai.com/v1/videos",
video_create_optional_request_params={
"seconds": "8",
"size": "720x1280"
@@ -166,6 +167,7 @@ def test_video_generation_request_transformation(self):
assert data["seconds"] == "8"
assert data["size"] == "720x1280"
assert files == []
+ assert returned_api_base == "https://api.openai.com/v1/videos"
def test_video_generation_response_transformation(self):
"""Test video generation response transformation."""
@@ -228,9 +230,10 @@ def test_video_generation_with_files(self):
mock_file = MagicMock()
mock_file.read.return_value = b"fake_image_data"
- data, files = config.transform_video_create_request(
+ data, files, returned_api_base = config.transform_video_create_request(
model="sora-2",
prompt="Test video with image",
+ api_base="https://api.openai.com/v1/videos",
video_create_optional_request_params={
"input_reference": mock_file,
"seconds": "8",
@@ -291,42 +294,29 @@ def test_video_generation_parameter_mapping(self):
assert mapped_params["user"] == "test-user"
def test_video_generation_unsupported_parameters(self):
- """Test video generation with unsupported parameters."""
+ """Test video generation with provider-specific parameters via extra_body."""
from litellm.videos.utils import VideoGenerationRequestUtils
- # Test unsupported parameter detection
- with pytest.raises(litellm.UnsupportedParamsError):
- VideoGenerationRequestUtils.get_optional_params_video_generation(
- model="sora-2",
- video_generation_provider_config=OpenAIVideoConfig(),
- video_generation_optional_params={
- "unsupported_param": "value"
+ # Test that provider-specific parameters can be passed via extra_body
+ # This allows support for Vertex AI and Gemini specific parameters
+ result = VideoGenerationRequestUtils.get_optional_params_video_generation(
+ model="sora-2",
+ video_generation_provider_config=OpenAIVideoConfig(),
+ video_generation_optional_params={
+ "seconds": "8",
+ "extra_body": {
+ "vertex_ai_param": "value",
+ "gemini_param": "value2"
}
- )
-
- def test_video_generation_request_utils(self):
- """Test video generation request utilities."""
- from litellm.videos.utils import VideoGenerationRequestUtils
-
- # Test parameter filtering
- params = {
- "prompt": "Test video",
- "model": "sora-2",
- "seconds": "8",
- "size": "720x1280",
- "user": "test-user",
- "invalid_param": "should_be_filtered"
- }
-
- filtered_params = VideoGenerationRequestUtils.get_requested_video_generation_optional_param(params)
+ }
+ )
- # Should only contain valid parameters
- assert "prompt" not in filtered_params # prompt is required, not optional
- assert "seconds" in filtered_params
- assert "size" in filtered_params
- assert "user" in filtered_params
- assert "invalid_param" not in filtered_params
- # Note: model is included in the filtered params as it's part of the TypedDict
+ # extra_body params should be merged into the result
+ assert result["seconds"] == "8"
+ assert result["vertex_ai_param"] == "value"
+ assert result["gemini_param"] == "value2"
+ # extra_body itself should be removed from the result
+ assert "extra_body" not in result
def test_video_generation_types(self):
"""Test video generation type definitions."""
diff --git a/ui/litellm-dashboard/package-lock.json b/ui/litellm-dashboard/package-lock.json
index 7205738a21cf..6e21fc2f5f7c 100644
--- a/ui/litellm-dashboard/package-lock.json
+++ b/ui/litellm-dashboard/package-lock.json
@@ -23233,4 +23233,4 @@
}
}
}
-}
+}
\ No newline at end of file
diff --git a/ui/litellm-dashboard/src/components/add_model/add_model_modes.tsx b/ui/litellm-dashboard/src/components/add_model/add_model_modes.tsx
index b6c2410a0277..30deb79469ca 100644
--- a/ui/litellm-dashboard/src/components/add_model/add_model_modes.tsx
+++ b/ui/litellm-dashboard/src/components/add_model/add_model_modes.tsx
@@ -6,6 +6,7 @@ export const TEST_MODES = [
{ value: "audio_speech", label: "Audio Speech - /audio/speech" },
{ value: "audio_transcription", label: "Audio Transcription - /audio/transcriptions" },
{ value: "image_generation", label: "Image Generation - /images/generations" },
+ { value: "video_generation", label: "Video Generation - /videos" },
{ value: "rerank", label: "Rerank - /rerank" },
{ value: "realtime", label: "Realtime - /realtime" },
{ value: "batch", label: "Batch - /batch" },
diff --git a/ui/litellm-dashboard/src/components/chat_ui/mode_endpoint_mapping.tsx b/ui/litellm-dashboard/src/components/chat_ui/mode_endpoint_mapping.tsx
index 6c1fbcd504a0..2aad64fb6198 100644
--- a/ui/litellm-dashboard/src/components/chat_ui/mode_endpoint_mapping.tsx
+++ b/ui/litellm-dashboard/src/components/chat_ui/mode_endpoint_mapping.tsx
@@ -5,6 +5,7 @@ export enum ModelMode {
AUDIO_SPEECH = "audio_speech",
AUDIO_TRANSCRIPTION = "audio_transcription",
IMAGE_GENERATION = "image_generation",
+ VIDEO_GENERATION = "video_generation",
CHAT = "chat",
RESPONSES = "responses",
IMAGE_EDITS = "image_edits",
@@ -15,6 +16,7 @@ export enum ModelMode {
// Define an enum for the endpoint types your UI calls
export enum EndpointType {
IMAGE = "image",
+ VIDEO = "video",
CHAT = "chat",
RESPONSES = "responses",
IMAGE_EDITS = "image_edits",
@@ -28,6 +30,7 @@ export enum EndpointType {
// Create a mapping between the model mode and the corresponding endpoint type
export const litellmModeMapping: Record = {
[ModelMode.IMAGE_GENERATION]: EndpointType.IMAGE,
+ [ModelMode.VIDEO_GENERATION]: EndpointType.VIDEO,
[ModelMode.CHAT]: EndpointType.CHAT,
[ModelMode.RESPONSES]: EndpointType.RESPONSES,
[ModelMode.IMAGE_EDITS]: EndpointType.IMAGE_EDITS,