diff --git a/docs/my-website/docs/providers/azure/videos.md b/docs/my-website/docs/providers/azure/videos.md index 188713d63351..62f8d0df182d 100644 --- a/docs/my-website/docs/providers/azure/videos.md +++ b/docs/my-website/docs/providers/azure/videos.md @@ -25,7 +25,6 @@ LiteLLM supports Azure OpenAI's video generation models including Sora with full import os os.environ["AZURE_OPENAI_API_KEY"] = "your-azure-api-key" os.environ["AZURE_OPENAI_API_BASE"] = "https://your-resource.openai.azure.com/" -os.environ["AZURE_OPENAI_API_VERSION"] = "2024-02-15-preview" ``` ### Basic Usage @@ -37,7 +36,6 @@ import time os.environ["AZURE_OPENAI_API_KEY"] = "your-azure-api-key" os.environ["AZURE_OPENAI_API_BASE"] = "https://your-resource.openai.azure.com/" -os.environ["AZURE_OPENAI_API_VERSION"] = "2024-02-15-preview" # Generate video response = video_generation( @@ -53,8 +51,7 @@ print(f"Initial Status: {response.status}") # Check status until video is ready while True: status_response = video_status( - video_id=response.id, - custom_llm_provider="azure" + video_id=response.id ) print(f"Current Status: {status_response.status}") @@ -69,8 +66,7 @@ while True: # Download video content when ready video_bytes = video_content( - video_id=response.id, - custom_llm_provider="azure" + video_id=response.id ) # Save to file @@ -87,7 +83,6 @@ Here's how to call Azure video generation models with the LiteLLM Proxy Server ```bash export AZURE_OPENAI_API_KEY="your-azure-api-key" export AZURE_OPENAI_API_BASE="https://your-resource.openai.azure.com/" -export AZURE_OPENAI_API_VERSION="2024-02-15-preview" ``` ### 2. Start the proxy @@ -102,7 +97,6 @@ model_list: model: azure/sora-2 api_key: os.environ/AZURE_OPENAI_API_KEY api_base: os.environ/AZURE_OPENAI_API_BASE - api_version: "2024-02-15-preview" ``` @@ -211,8 +205,7 @@ general_settings: ```python # Download video content video_bytes = video_content( - video_id="video_1234567890", - model="azure/sora-2" + video_id="video_1234567890" ) # Save to file @@ -243,8 +236,7 @@ def generate_and_download_video(prompt): # Step 3: Download video video_bytes = litellm.video_content( - video_id=video_id, - custom_llm_provider="azure" + video_id=video_id ) # Step 4: Save to file @@ -264,9 +256,9 @@ video_file = generate_and_download_video( ```python # Video editing with reference image response = litellm.video_remix( + video_id="video_456", prompt="Make the cat jump higher", input_reference=open("path/to/image.jpg", "rb"), # Reference image as file object - custom_llm_provider="azure" seconds="8" ) diff --git a/docs/my-website/docs/providers/gemini.md b/docs/my-website/docs/providers/gemini.md index 40d646565286..31d3a491f405 100644 --- a/docs/my-website/docs/providers/gemini.md +++ b/docs/my-website/docs/providers/gemini.md @@ -10,7 +10,7 @@ import TabItem from '@theme/TabItem'; | Provider Route on LiteLLM | `gemini/` | | Provider Doc | [Google AI Studio ↗](https://aistudio.google.com/) | | API Endpoint for Provider | https://generativelanguage.googleapis.com | -| Supported OpenAI Endpoints | `/chat/completions`, [`/embeddings`](../embedding/supported_embedding#gemini-ai-embedding-models), `/completions` | +| Supported OpenAI Endpoints | `/chat/completions`, [`/embeddings`](../embedding/supported_embedding#gemini-ai-embedding-models), `/completions`, [`/videos`](./gemini/videos.md) | | Pass-through Endpoint | [Supported](../pass_through/google_ai_studio.md) |
diff --git a/docs/my-website/docs/providers/gemini/videos.md b/docs/my-website/docs/providers/gemini/videos.md new file mode 100644 index 000000000000..5b5d5a8a6369 --- /dev/null +++ b/docs/my-website/docs/providers/gemini/videos.md @@ -0,0 +1,409 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# Gemini Video Generation (Veo) + +LiteLLM supports Google's Veo video generation models through a unified API interface. + +| Property | Details | +|-------|-------| +| Description | Google's Veo AI video generation models | +| Provider Route on LiteLLM | `gemini/` | +| Supported Models | `veo-3.0-generate-preview`, `veo-3.1-generate-preview` | +| Cost Tracking | ✅ Duration-based pricing | +| Logging Support | ✅ Full request/response logging | +| Proxy Server Support | ✅ Full proxy integration with virtual keys | +| Spend Management | ✅ Budget tracking and rate limiting | +| Link to Provider Doc | [Google Veo Documentation ↗](https://ai.google.dev/gemini-api/docs/video) | + +## Quick Start + +### Required API Keys + +```python +import os +os.environ["GEMINI_API_KEY"] = "your-google-api-key" +# OR +os.environ["GOOGLE_API_KEY"] = "your-google-api-key" +``` + +### Basic Usage + +```python +from litellm import video_generation, video_status, video_content +import os +import time + +os.environ["GEMINI_API_KEY"] = "your-google-api-key" + +# Step 1: Generate video +response = video_generation( + model="gemini/veo-3.0-generate-preview", + prompt="A cat playing with a ball of yarn in a sunny garden" +) + +print(f"Video ID: {response.id}") +print(f"Initial Status: {response.status}") # "processing" + +# Step 2: Poll for completion +while True: + status_response = video_status( + video_id=response.id + ) + + print(f"Current Status: {status_response.status}") + + if status_response.status == "completed": + break + elif status_response.status == "failed": + print("Video generation failed") + break + + time.sleep(10) # Wait 10 seconds before checking again + +# Step 3: Download video content +video_bytes = video_content( + video_id=response.id +) + +# Save to file +with open("generated_video.mp4", "wb") as f: + f.write(video_bytes) + +print("Video downloaded successfully!") +``` + +## Supported Models + +| Model Name | Description | Max Duration | Status | +|------------|-------------|--------------|--------| +| veo-3.0-generate-preview | Veo 3.0 video generation | 8 seconds | Preview | +| veo-3.1-generate-preview | Veo 3.1 video generation | 8 seconds | Preview | + +## Video Generation Parameters + +LiteLLM automatically maps OpenAI-style parameters to Veo's format: + +| OpenAI Parameter | Veo Parameter | Description | Example | +|------------------|---------------|-------------|---------| +| `prompt` | `prompt` | Text description of the video | "A cat playing" | +| `size` | `aspectRatio` | Video dimensions → aspect ratio | "1280x720" → "16:9" | +| `seconds` | `durationSeconds` | Duration in seconds | "8" → 8 | +| `input_reference` | `image` | Reference image to animate | File object or path | +| `model` | `model` | Model to use | "gemini/veo-3.0-generate-preview" | + +### Size to Aspect Ratio Mapping + +LiteLLM automatically converts size dimensions to Veo's aspect ratio format: +- `"1280x720"`, `"1920x1080"` → `"16:9"` (landscape) +- `"720x1280"`, `"1080x1920"` → `"9:16"` (portrait) + +### Supported Veo Parameters + +Based on Veo's API: +- **prompt** (required): Text description with optional audio cues +- **aspectRatio**: `"16:9"` (default) or `"9:16"` +- **resolution**: `"720p"` (default) or `"1080p"` (Veo 3.1 only, 16:9 aspect ratio only) +- **durationSeconds**: Video length (max 8 seconds for most models) +- **image**: Reference image for animation +- **negativePrompt**: What to exclude from the video (Veo 3.1) +- **referenceImages**: Style and content references (Veo 3.1 only) + +## Complete Workflow Example + +```python +import litellm +import time + +def generate_and_download_veo_video( + prompt: str, + output_file: str = "video.mp4", + size: str = "1280x720", + seconds: str = "8" +): + """ + Complete workflow for Veo video generation. + + Args: + prompt: Text description of the video + output_file: Where to save the video + size: Video dimensions (e.g., "1280x720" for 16:9) + seconds: Duration in seconds + + Returns: + bool: True if successful + """ + print(f"🎬 Generating video: {prompt}") + + # Step 1: Initiate generation + response = litellm.video_generation( + model="gemini/veo-3.0-generate-preview", + prompt=prompt, + size=size, # Maps to aspectRatio + seconds=seconds # Maps to durationSeconds + ) + + video_id = response.id + print(f"✓ Video generation started (ID: {video_id})") + + # Step 2: Wait for completion + max_wait_time = 600 # 10 minutes + start_time = time.time() + + while time.time() - start_time < max_wait_time: + status_response = litellm.video_status(video_id=video_id) + + if status_response.status == "completed": + print("✓ Video generation completed!") + break + elif status_response.status == "failed": + print("✗ Video generation failed") + return False + + print(f"⏳ Status: {status_response.status}") + time.sleep(10) + else: + print("✗ Timeout waiting for video generation") + return False + + # Step 3: Download video + print("⬇️ Downloading video...") + video_bytes = litellm.video_content(video_id=video_id) + + with open(output_file, "wb") as f: + f.write(video_bytes) + + print(f"✓ Video saved to {output_file}") + return True + +# Use it +generate_and_download_veo_video( + prompt="A serene lake at sunset with mountains in the background", + output_file="sunset_lake.mp4" +) +``` + +## Async Usage + +```python +from litellm import avideo_generation, avideo_status, avideo_content +import asyncio + +async def async_video_workflow(): + # Generate video + response = await avideo_generation( + model="gemini/veo-3.0-generate-preview", + prompt="A cat playing with a ball of yarn" + ) + + # Poll for completion + while True: + status = await avideo_status(video_id=response.id) + if status.status == "completed": + break + await asyncio.sleep(10) + + # Download content + video_bytes = await avideo_content(video_id=response.id) + + with open("video.mp4", "wb") as f: + f.write(video_bytes) + +# Run it +asyncio.run(async_video_workflow()) +``` + +## LiteLLM Proxy Usage + +### Configuration + +Add Veo models to your `config.yaml`: + +```yaml +model_list: + - model_name: veo-3 + litellm_params: + model: gemini/veo-3.0-generate-preview + api_key: os.environ/GEMINI_API_KEY +``` + +Start the proxy: + +```bash +litellm --config config.yaml +# Server running on http://0.0.0.0:4000 +``` + +### Making Requests + + + + +```bash +# Step 1: Generate video +curl --location 'http://0.0.0.0:4000/v1/videos' \ +--header 'Content-Type: application/json' \ +--header 'Authorization: Bearer sk-1234' \ +--data '{ + "model": "veo-3", + "prompt": "A cat playing with a ball of yarn in a sunny garden" +}' + +# Response: {"id": "gemini::operations/generate_12345::...", "status": "processing", ...} + +# Step 2: Check status +curl --location 'http://localhost:4000/v1/videos/{video_id}' \ +--header 'x-litellm-api-key: sk-1234' + +# Step 3: Download video (when status is "completed") +curl --location 'http://localhost:4000/v1/videos/{video_id}/content' \ +--header 'x-litellm-api-key: sk-1234' \ +--output video.mp4 +``` + + + + +```python +import litellm + +litellm.api_base = "http://0.0.0.0:4000" +litellm.api_key = "sk-1234" + +# Generate video +response = litellm.video_generation( + model="veo-3", + prompt="A cat playing with a ball of yarn in a sunny garden" +) + +# Check status +import time +while True: + status = litellm.video_status(video_id=response.id) + if status.status == "completed": + break + time.sleep(10) + +# Download video +video_bytes = litellm.video_content(video_id=response.id) +with open("video.mp4", "wb") as f: + f.write(video_bytes) +``` + + + + +## Cost Tracking + +LiteLLM automatically tracks costs for Veo video generation: + +```python +response = litellm.video_generation( + model="gemini/veo-3.0-generate-preview", + prompt="A beautiful sunset" +) + +# Cost is calculated based on video duration +# Veo pricing: ~$0.10 per second (estimated) +# Default video duration: ~5 seconds +# Estimated cost: ~$0.50 +``` + +## Differences from OpenAI Video API + +| Feature | OpenAI (Sora) | Gemini (Veo) | +|---------|---------------|--------------| +| Reference Images | ✅ Supported | ❌ Not supported | +| Size Control | ✅ Supported | ❌ Not supported | +| Duration Control | ✅ Supported | ❌ Not supported | +| Video Remix/Edit | ✅ Supported | ❌ Not supported | +| Video List | ✅ Supported | ❌ Not supported | +| Prompt-based Generation | ✅ Supported | ✅ Supported | +| Async Operations | ✅ Supported | ✅ Supported | + +## Error Handling + +```python +from litellm import video_generation, video_status, video_content +from litellm.exceptions import APIError, Timeout + +try: + response = video_generation( + model="gemini/veo-3.0-generate-preview", + prompt="A beautiful landscape" + ) + + # Poll with timeout + max_attempts = 60 # 10 minutes (60 * 10s) + for attempt in range(max_attempts): + status = video_status(video_id=response.id) + + if status.status == "completed": + video_bytes = video_content(video_id=response.id) + with open("video.mp4", "wb") as f: + f.write(video_bytes) + break + elif status.status == "failed": + raise APIError("Video generation failed") + + time.sleep(10) + else: + raise Timeout("Video generation timed out") + +except APIError as e: + print(f"API Error: {e}") +except Timeout as e: + print(f"Timeout: {e}") +except Exception as e: + print(f"Unexpected error: {e}") +``` + +## Best Practices + +1. **Always poll for completion**: Veo video generation is asynchronous and can take several minutes +2. **Set reasonable timeouts**: Allow at least 5-10 minutes for video generation +3. **Handle failures gracefully**: Check for `failed` status and implement retry logic +4. **Use descriptive prompts**: More detailed prompts generally produce better results +5. **Store video IDs**: Save the operation ID/video ID to resume polling if your application restarts + +## Troubleshooting + +### Video generation times out + +```python +# Increase polling timeout +max_wait_time = 900 # 15 minutes instead of 10 +``` + +### Video not found when downloading + +```python +# Make sure video is completed before downloading +status = video_status(video_id=video_id) +if status.status != "completed": + print("Video not ready yet!") +``` + +### API key errors + +```python +# Verify your API key is set +import os +print(os.environ.get("GEMINI_API_KEY")) + +# Or pass it explicitly +response = video_generation( + model="gemini/veo-3.0-generate-preview", + prompt="...", + api_key="your-api-key-here" +) +``` + +## See Also + +- [OpenAI Video Generation](../openai/videos.md) +- [Azure Video Generation](../azure/videos.md) +- [Vertex AI Video Generation](../vertex_ai/videos.md) +- [Video Generation API Reference](/docs/videos) +- [Veo Pass-through Endpoints](/docs/pass_through/google_ai_studio#example-4-video-generation-with-veo) + diff --git a/docs/my-website/docs/providers/openai/videos.md b/docs/my-website/docs/providers/openai/videos.md index 72eb3f43a020..202c79c2446e 100644 --- a/docs/my-website/docs/providers/openai/videos.md +++ b/docs/my-website/docs/providers/openai/videos.md @@ -36,7 +36,6 @@ print(f"Status: {response.status}") # Download video content when ready video_bytes = video_content( video_id=response.id, - model="sora-2" ) # Save to file @@ -171,8 +170,7 @@ curl http://localhost:4000/v1/videos \ ```python # Download video content video_bytes = video_content( - video_id="video_1234567890", - custom_llm_provider="openai" # Or use model="sora-2" + video_id="video_1234567890" ) # Save to file @@ -203,8 +201,7 @@ def generate_and_download_video(prompt): # Step 3: Download video video_bytes = litellm.video_content( - video_id=video_id, - custom_llm_provider="openai" + video_id=video_id ) # Step 4: Save to file @@ -241,8 +238,7 @@ from litellm.exceptions import BadRequestError, AuthenticationError try: response = video_generation( - prompt="A cat playing with a ball of yarn", - model="sora-2" + prompt="A cat playing with a ball of yarn" ) except AuthenticationError as e: print(f"Authentication failed: {e}") diff --git a/docs/my-website/docs/providers/vertex_ai/videos.md b/docs/my-website/docs/providers/vertex_ai/videos.md new file mode 100644 index 000000000000..4aaf74354b1e --- /dev/null +++ b/docs/my-website/docs/providers/vertex_ai/videos.md @@ -0,0 +1,268 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# Vertex AI Video Generation (Veo) + +LiteLLM supports Vertex AI's Veo video generation models using the unified OpenAI video API surface. + +| Property | Details | +|-------|-------| +| Description | Google Cloud Vertex AI Veo video generation models | +| Provider Route on LiteLLM | `vertex_ai/` | +| Supported Models | `veo-2.0-generate-001`, `veo-3.0-generate-preview`, `veo-3.0-fast-generate-preview`, `veo-3.1-generate-preview`, `veo-3.1-fast-generate-preview` | +| Cost Tracking | ✅ Duration-based pricing | +| Logging Support | ✅ Full request/response logging | +| Proxy Server Support | ✅ Full proxy integration with virtual keys | +| Spend Management | ✅ Budget tracking and rate limiting | +| Link to Provider Doc | [Vertex AI Veo Documentation ↗](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/veo-video-generation) | + +## Quick Start + +### Required Environment Setup + +```python +import json +import os + +os.environ["VERTEXAI_PROJECT"] = "your-gcp-project-id" +os.environ["VERTEXAI_LOCATION"] = "us-central1" + +# Option 1: Point to a service account file +os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/path/to/service_account.json" + +# Option 2: Store the service account JSON directly +with open("/path/to/service_account.json", "r", encoding="utf-8") as f: + os.environ["VERTEXAI_CREDENTIALS"] = f.read() +``` + +### Basic Usage + +```python +from litellm import video_generation, video_status, video_content +import json +import os +import time + +with open("/path/to/service_account.json", "r", encoding="utf-8") as f: + vertex_credentials = f.read() + +response = video_generation( + model="vertex_ai/veo-3.0-generate-preview", + prompt="A cat playing with a ball of yarn in a sunny garden", + vertex_project="your-gcp-project-id", + vertex_location="us-central1", + vertex_credentials=vertex_credentials, + seconds="8", + size="1280x720", +) + +print(f"Video ID: {response.id}") +print(f"Initial Status: {response.status}") + +# Poll for completion +while True: + status = video_status( + video_id=response.id, + vertex_project="your-gcp-project-id", + vertex_location="us-central1", + vertex_credentials=vertex_credentials, + ) + + print(f"Current Status: {status.status}") + + if status.status == "completed": + break + if status.status == "failed": + raise RuntimeError("Video generation failed") + + time.sleep(10) + +# Download the rendered video +video_bytes = video_content( + video_id=response.id, + vertex_project="your-gcp-project-id", + vertex_location="us-central1", + vertex_credentials=vertex_credentials, +) + +with open("generated_video.mp4", "wb") as f: + f.write(video_bytes) +``` + +## Supported Models + +| Model Name | Description | Max Duration | Status | +|------------|-------------|--------------|--------| +| veo-2.0-generate-001 | Veo 2.0 video generation | 5 seconds | GA | +| veo-3.0-generate-preview | Veo 3.0 high quality | 8 seconds | Preview | +| veo-3.0-fast-generate-preview | Veo 3.0 fast generation | 8 seconds | Preview | +| veo-3.1-generate-preview | Veo 3.1 high quality | 10 seconds | Preview | +| veo-3.1-fast-generate-preview | Veo 3.1 fast | 10 seconds | Preview | + +## Video Generation Parameters + +LiteLLM converts OpenAI-style parameters to Veo's API shape automatically: + +| OpenAI Parameter | Vertex AI Parameter | Description | Example | +|------------------|---------------------|-------------|---------| +| `prompt` | `instances[].prompt` | Text description of the video | "A cat playing" | +| `size` | `parameters.aspectRatio` | Converted to `16:9` or `9:16` | "1280x720" → `16:9` | +| `seconds` | `parameters.durationSeconds` | Clip length in seconds | "8" → `8` | +| `input_reference` | `instances[].image` | Reference image for animation | `open("image.jpg", "rb")` | +| Provider-specific params | `extra_body` | Forwarded to Vertex API | `{"negativePrompt": "blurry"}` | + +### Size to Aspect Ratio Mapping + +- `1280x720`, `1920x1080` → `16:9` +- `720x1280`, `1080x1920` → `9:16` +- Unknown sizes default to `16:9` + +## Async Usage + +```python +from litellm import avideo_generation, avideo_status, avideo_content +import asyncio +import json + +with open("/path/to/service_account.json", "r", encoding="utf-8") as f: + vertex_credentials = f.read() + + +async def workflow(): + response = await avideo_generation( + model="vertex_ai/veo-3.1-generate-preview", + prompt="Slow motion water droplets splashing into a pool", + seconds="10", + vertex_project="your-gcp-project-id", + vertex_location="us-central1", + vertex_credentials=vertex_credentials, + ) + + while True: + status = await avideo_status( + video_id=response.id, + vertex_project="your-gcp-project-id", + vertex_location="us-central1", + vertex_credentials=vertex_credentials, + ) + + if status.status == "completed": + break + if status.status == "failed": + raise RuntimeError("Video generation failed") + + await asyncio.sleep(10) + + video_bytes = await avideo_content( + video_id=response.id, + vertex_project="your-gcp-project-id", + vertex_location="us-central1", + vertex_credentials=vertex_credentials, + ) + + with open("veo_water.mp4", "wb") as f: + f.write(video_bytes) + +asyncio.run(workflow()) +``` + +## LiteLLM Proxy Usage + +Add Veo models to your `config.yaml`: + +```yaml +model_list: + - model_name: veo-3 + litellm_params: + model: vertex_ai/veo-3.0-generate-preview + vertex_project: os.environ/VERTEXAI_PROJECT + vertex_location: os.environ/VERTEXAI_LOCATION + vertex_credentials: os.environ/VERTEXAI_CREDENTIALS +``` + +Start the proxy and make requests: + + + + +```bash +# Step 1: Generate video +curl --location 'http://0.0.0.0:4000/videos' \ +--header 'Content-Type: application/json' \ +--header 'Authorization: Bearer sk-1234' \ +--data '{ + "model": "veo-3", + "prompt": "Aerial shot over a futuristic city at sunrise", + "seconds": "8" +}' + +# Step 2: Poll status +curl --location 'http://localhost:4000/v1/videos/{video_id}' \ +--header 'x-litellm-api-key: sk-1234' + +# Step 3: Download video +curl --location 'http://localhost:4000/v1/videos/{video_id}/content' \ +--header 'x-litellm-api-key: sk-1234' \ +--output video.mp4 +``` + + + + +```python +import litellm + +litellm.api_base = "http://0.0.0.0:4000" +litellm.api_key = "sk-1234" + +response = litellm.video_generation( + model="veo-3", + prompt="Aerial shot over a futuristic city at sunrise", +) + +status = litellm.video_status(video_id=response.id) +while status.status not in ["completed", "failed"]: + status = litellm.video_status(video_id=response.id) + +if status.status == "completed": + content = litellm.video_content(video_id=response.id) + with open("veo_city.mp4", "wb") as f: + f.write(content) +``` + + + + +## Cost Tracking + +LiteLLM records the duration returned by Veo so you can apply duration-based pricing. + +```python +with open("/path/to/service_account.json", "r", encoding="utf-8") as f: + vertex_credentials = f.read() + +response = video_generation( + model="vertex_ai/veo-2.0-generate-001", + prompt="Flowers blooming in fast forward", + seconds="5", + vertex_project="your-gcp-project-id", + vertex_location="us-central1", + vertex_credentials=vertex_credentials, +) + +print(response.usage) # {"duration_seconds": 5.0} +``` + +## Troubleshooting + +- **`vertex_project is required`**: set `VERTEXAI_PROJECT` env var or pass `vertex_project` in the request. +- **`Permission denied`**: ensure the service account has the `Vertex AI User` role and the correct region enabled. +- **Video stuck in `processing`**: Veo operations are long-running. Continue polling every 10–15 seconds up to ~10 minutes. + +## See Also + +- [OpenAI Video Generation](../openai/videos.md) +- [Azure Video Generation](../azure/videos.md) +- [Gemini Video Generation](../gemini/videos.md) +- [Video Generation API Reference](/docs/videos) + diff --git a/docs/my-website/docs/proxy/config_settings.md b/docs/my-website/docs/proxy/config_settings.md index 869bab3d827b..fbdbe6ea7f3e 100644 --- a/docs/my-website/docs/proxy/config_settings.md +++ b/docs/my-website/docs/proxy/config_settings.md @@ -519,6 +519,7 @@ router_settings: | DEFAULT_SLACK_ALERTING_THRESHOLD | Default threshold for Slack alerting. Default is 300 | DEFAULT_SOFT_BUDGET | Default soft budget for LiteLLM proxy keys. Default is 50.0 | DEFAULT_TRIM_RATIO | Default ratio of tokens to trim from prompt end. Default is 0.75 +| DEFAULT_GOOGLE_VIDEO_DURATION_SECONDS | Default duration for video generation in seconds in google. Default is 8 | DIRECT_URL | Direct URL for service endpoint | DISABLE_ADMIN_UI | Toggle to disable the admin UI | DISABLE_AIOHTTP_TRANSPORT | Flag to disable aiohttp transport. When this is set to True, litellm will use httpx instead of aiohttp. **Default is False** diff --git a/docs/my-website/docs/videos.md b/docs/my-website/docs/videos.md index 96ff4c8190a4..cc9f1bc9ceac 100644 --- a/docs/my-website/docs/videos.md +++ b/docs/my-website/docs/videos.md @@ -9,7 +9,7 @@ Fallbacks | ✅ (Between supported models) | | Guardrails Support | ✅ Content moderation and safety checks | | Proxy Server Support | ✅ Full proxy integration with virtual keys | | Spend Management | ✅ Budget tracking and rate limiting | -| Supported Providers | `openai`, `azure` | +| Supported Providers | `openai`, `azure`, `gemini`, `vertex_ai` | :::tip @@ -41,8 +41,7 @@ print(f"Initial Status: {response.status}") # Check status until video is ready while True: status_response = video_status( - video_id=response.id, - custom_llm_provider="openai" + video_id=response.id ) print(f"Current Status: {status_response.status}") @@ -57,8 +56,7 @@ while True: # Download video content when ready video_bytes = video_content( - video_id=response.id, - custom_llm_provider="openai" + video_id=response.id ) # Save to file @@ -88,8 +86,7 @@ async def test_async_video(): # Check status until video is ready while True: status_response = await avideo_status( - video_id=response.id, - custom_llm_provider="openai" + video_id=response.id ) print(f"Current Status: {status_response.status}") @@ -104,8 +101,7 @@ async def test_async_video(): # Download video content when ready video_bytes = await avideo_content( - video_id=response.id, - custom_llm_provider="openai" + video_id=response.id ) # Save to file @@ -120,21 +116,27 @@ asyncio.run(test_async_video()) ```python from litellm import video_status -# Check the status of a video generation status_response = video_status( - video_id="video_1234567890", - custom_llm_provider="openai" + video_id="video_1234567890" ) print(f"Video Status: {status_response.status}") print(f"Created At: {status_response.created_at}") print(f"Model: {status_response.model}") +``` + +### List Videos + +For listing videos, you need to specify the provider since there's no video_id to decode from: + +```python +from litellm import video_list + +# List videos from OpenAI +videos = video_list(custom_llm_provider="openai") -# Possible status values: -# - "queued": Video is in the queue -# - "processing": Video is being generated -# - "completed": Video is ready for download -# - "failed": Video generation failed +for video in videos: + print(f"Video ID: {video['id']}") ``` ### Video Generation with Reference Image @@ -207,7 +209,7 @@ print(f"Video ID: {response.id}") LiteLLM provides OpenAI API compatible video endpoints for complete video generation workflow: -- `/videos/generations` - Generate new videos +- `/videos` - Generate new videos - `/videos/remix` - Edit existing videos with reference images - `/videos/status` - Check video generation status - `/videos/retrieval` - Download completed videos @@ -227,7 +229,6 @@ model_list: model: azure/sora-2 api_key: os.environ/AZURE_OPENAI_API_KEY api_base: os.environ/AZURE_OPENAI_API_BASE - api_version: "2024-02-15-preview" ``` Start litellm @@ -253,31 +254,14 @@ curl --location 'http://localhost:4000/v1/videos' \ Test video status request ```bash -# Using custom-llm-provider header -curl --location 'http://localhost:4000/v1/videos/video_id' \ ---header 'Accept: application/json' \ ---header 'x-litellm-api-key: sk-1234' \ ---header 'custom-llm-provider: azure' - -# Or using query parameter -curl --location 'http://localhost:4000/v1/videos/video_id?custom_llm_provider=azure' \ ---header 'Accept: application/json' \ +curl --location 'http://localhost:4000/v1/videos/{video_id}' \ --header 'x-litellm-api-key: sk-1234' ``` Test video retrieval request ```bash -# Using custom-llm-provider header -curl --location 'http://localhost:4000/v1/videos/video_id/content' \ ---header 'Accept: application/json' \ ---header 'x-litellm-api-key: sk-1234' \ ---header 'custom-llm-provider: openai' \ ---output video.mp4 - -# Or using query parameter -curl --location 'http://localhost:4000/v1/videos/video_id/content?custom_llm_provider=openai' \ ---header 'Accept: application/json' \ +curl --location 'http://localhost:4000/v1/videos/{video_id}/content' \ --header 'x-litellm-api-key: sk-1234' \ --output video.mp4 ``` @@ -285,25 +269,25 @@ curl --location 'http://localhost:4000/v1/videos/video_id/content?custom_llm_pro Test video remix request ```bash -# Using custom_llm_provider in request body -curl --location --request POST 'http://localhost:4000/v1/videos/video_id/remix' \ ---header 'Accept: application/json' \ +curl --location --request POST 'http://localhost:4000/v1/videos/{video_id}/remix' \ --header 'Content-Type: application/json' \ --header 'x-litellm-api-key: sk-1234' \ --data '{ - "prompt": "New remix instructions", - "custom_llm_provider": "azure" + "prompt": "New remix instructions" }' +``` -# Or using custom-llm-provider header -curl --location --request POST 'http://localhost:4000/v1/videos/video_id/remix' \ ---header 'Accept: application/json' \ ---header 'Content-Type: application/json' \ +Test video list request (requires custom_llm_provider) + +```bash +# Note: video_list requires custom_llm_provider since there's no video_id to decode from +curl --location 'http://localhost:4000/v1/videos?custom_llm_provider=openai' \ +--header 'x-litellm-api-key: sk-1234' + +# Or using header +curl --location 'http://localhost:4000/v1/videos' \ --header 'x-litellm-api-key: sk-1234' \ ---header 'custom-llm-provider: azure' \ ---data '{ - "prompt": "New remix instructions" -}' +--header 'custom-llm-provider: azure' ``` Test Azure video generation request @@ -618,4 +602,6 @@ The response follows OpenAI's video generation format with the following structu | Provider | Link to Usage | |-------------|--------------------| | OpenAI | [Usage](providers/openai/videos) | -| Azure | [Usage](providers/azure/videos) | \ No newline at end of file +| Azure | [Usage](providers/azure/videos) | +| Gemini | [Usage](providers/gemini/videos) | +| Vertex AI | [Usage](providers/vertex_ai/videos) | diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index 7c1d5f196b5f..1c62a8507f27 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -478,6 +478,7 @@ const sidebars = { label: "Vertex AI", items: [ "providers/vertex", + "providers/vertex_ai/videos", "providers/vertex_partner", "providers/vertex_self_deployed", "providers/vertex_image", @@ -490,6 +491,7 @@ const sidebars = { label: "Google AI Studio", items: [ "providers/gemini", + "providers/gemini/videos", "providers/google_ai_studio/files", "providers/google_ai_studio/image_gen", "providers/google_ai_studio/realtime", diff --git a/litellm/constants.py b/litellm/constants.py index 17de0e0bd192..43fc37ad1c74 100644 --- a/litellm/constants.py +++ b/litellm/constants.py @@ -280,6 +280,8 @@ DEFAULT_IMAGE_ENDPOINT_MODEL = "dall-e-2" DEFAULT_VIDEO_ENDPOINT_MODEL = "sora-2" +DEFAULT_GOOGLE_VIDEO_DURATION_SECONDS = int(os.getenv("DEFAULT_GOOGLE_VIDEO_DURATION_SECONDS", 8)) + ### DATAFORSEO CONSTANTS ### DEFAULT_DATAFORSEO_LOCATION_CODE = int( os.getenv("DEFAULT_DATAFORSEO_LOCATION_CODE", 2250) diff --git a/litellm/litellm_core_utils/health_check_helpers.py b/litellm/litellm_core_utils/health_check_helpers.py index 9cbee7fc70d1..cc3916af0693 100644 --- a/litellm/litellm_core_utils/health_check_helpers.py +++ b/litellm/litellm_core_utils/health_check_helpers.py @@ -97,6 +97,7 @@ def get_mode_handlers( "audio_speech", "audio_transcription", "image_generation", + "video_generation", "rerank", "realtime", "batch", @@ -159,6 +160,10 @@ def get_mode_handlers( **_filter_model_params(model_params=model_params), prompt=prompt, ), + "video_generation": lambda: litellm.avideo_generation( + **_filter_model_params(model_params=model_params), + prompt=prompt or "test video generation", + ), "rerank": lambda: litellm.arerank( **_filter_model_params(model_params=model_params), query=prompt or "", diff --git a/litellm/llms/base_llm/videos/transformation.py b/litellm/llms/base_llm/videos/transformation.py index 7234093778c5..16341932fe83 100644 --- a/litellm/llms/base_llm/videos/transformation.py +++ b/litellm/llms/base_llm/videos/transformation.py @@ -92,10 +92,11 @@ def transform_video_create_request( self, model: str, prompt: str, + api_base: str, video_create_optional_request_params: Dict, litellm_params: GenericLiteLLMParams, headers: dict, - ) -> Tuple[Dict, RequestFiles]: + ) -> Tuple[Dict, RequestFiles, str]: pass @abstractmethod @@ -104,6 +105,8 @@ def transform_video_create_response( model: str, raw_response: httpx.Response, logging_obj: LiteLLMLoggingObj, + custom_llm_provider: Optional[str] = None, + request_data: Optional[Dict] = None, ) -> VideoObject: pass @@ -154,6 +157,7 @@ def transform_video_remix_response( self, raw_response: httpx.Response, logging_obj: LiteLLMLoggingObj, + custom_llm_provider: Optional[str] = None, ) -> VideoObject: pass @@ -181,6 +185,7 @@ def transform_video_list_response( self, raw_response: httpx.Response, logging_obj: LiteLLMLoggingObj, + custom_llm_provider: Optional[str] = None, ) -> Dict[str,str]: pass @@ -229,6 +234,7 @@ def transform_video_status_retrieve_response( self, raw_response: httpx.Response, logging_obj: LiteLLMLoggingObj, + custom_llm_provider: Optional[str] = None, ) -> VideoObject: pass diff --git a/litellm/llms/custom_httpx/llm_http_handler.py b/litellm/llms/custom_httpx/llm_http_handler.py index fb3498fb257e..883f2de44df7 100644 --- a/litellm/llms/custom_httpx/llm_http_handler.py +++ b/litellm/llms/custom_httpx/llm_http_handler.py @@ -4099,7 +4099,7 @@ def video_generation_handler( or {}, model=model, ) - + if extra_headers: headers.update(extra_headers) @@ -4109,12 +4109,13 @@ def video_generation_handler( litellm_params=dict(litellm_params), ) - data, files = video_generation_provider_config.transform_video_create_request( + data, files, api_base = video_generation_provider_config.transform_video_create_request( model=model, prompt=prompt, video_create_optional_request_params=video_generation_optional_request_params, litellm_params=litellm_params, headers=headers, + api_base=api_base, ) ## LOGGING @@ -4140,8 +4141,8 @@ def video_generation_handler( timeout=timeout, ) - # --- END MOCK VIDEO RESPONSE --- else: + # Use JSON content type for POST requests without files response = sync_httpx_client.post( url=api_base, headers=headers, @@ -4159,6 +4160,8 @@ def video_generation_handler( model=model, raw_response=response, logging_obj=logging_obj, + custom_llm_provider=custom_llm_provider, + request_data=data, ) async def async_video_generation_handler( @@ -4206,9 +4209,10 @@ async def async_video_generation_handler( litellm_params=dict(litellm_params), ) - data, files = video_generation_provider_config.transform_video_create_request( + data, files, api_base = video_generation_provider_config.transform_video_create_request( model=model, prompt=prompt, + api_base=api_base, video_create_optional_request_params=video_generation_optional_request_params, litellm_params=litellm_params, headers=headers, @@ -4226,7 +4230,7 @@ async def async_video_generation_handler( ) try: - # Use JSON when no files, otherwise use form data with files + #Use JSON when no files, otherwise use form data with files if files is None or len(files) == 0: response = await async_httpx_client.post( url=api_base, @@ -4253,6 +4257,8 @@ async def async_video_generation_handler( model=model, raw_response=response, logging_obj=logging_obj, + custom_llm_provider=custom_llm_provider, + request_data=data, ) ###### VIDEO CONTENT HANDLER ###### @@ -4308,7 +4314,7 @@ def video_content_handler( ) # Transform the request using the provider config - url, params = video_content_provider_config.transform_video_content_request( + url, data = video_content_provider_config.transform_video_content_request( video_id=video_id, api_base=api_base, litellm_params=litellm_params, @@ -4316,12 +4322,21 @@ def video_content_handler( ) try: - # Make the GET request to download content - response = sync_httpx_client.get( - url=url, - headers=headers, - params=params, - ) + # Use POST if params contains data (e.g., Vertex AI fetchPredictOperation) + # Otherwise use GET (e.g., OpenAI video content download) + if data: + response = sync_httpx_client.post( + url=url, + headers=headers, + json=data, + ) + else: + # Otherwise it's a GET request with query params + response = sync_httpx_client.get( + url=url, + headers=headers, + params=data, + ) # Transform the response using the provider config return video_content_provider_config.transform_video_content_response( @@ -4374,7 +4389,7 @@ async def async_video_content_handler( ) # Transform the request using the provider config - url, params = video_content_provider_config.transform_video_content_request( + url, data = video_content_provider_config.transform_video_content_request( video_id=video_id, api_base=api_base, litellm_params=litellm_params, @@ -4382,12 +4397,21 @@ async def async_video_content_handler( ) try: - # Make the GET request to download content - response = await async_httpx_client.get( - url=url, - headers=headers, - params=params, - ) + # Use POST if params contains data (e.g., Vertex AI fetchPredictOperation) + # Otherwise use GET (e.g., OpenAI video content download) + if data: + response = await async_httpx_client.post( + url=url, + headers=headers, + json=data, + ) + else: + # Otherwise it's a GET request with query params + response = await async_httpx_client.get( + url=url, + headers=headers, + params=data, + ) # Transform the response using the provider config return video_content_provider_config.transform_video_content_response( @@ -4492,6 +4516,7 @@ def video_remix_handler( return video_remix_provider_config.transform_video_remix_response( raw_response=response, logging_obj=logging_obj, + custom_llm_provider=custom_llm_provider, ) except Exception as e: @@ -4573,6 +4598,7 @@ async def async_video_remix_handler( return video_remix_provider_config.transform_video_remix_response( raw_response=response, logging_obj=logging_obj, + custom_llm_provider=custom_llm_provider, ) except Exception as e: @@ -4708,6 +4734,7 @@ async def async_video_list_handler( return video_list_provider_config.transform_video_list_response( raw_response=response, logging_obj=logging_obj, + custom_llm_provider=custom_llm_provider, ) except Exception as e: @@ -4863,17 +4890,29 @@ def video_status_handler( "api_base": url, "headers": headers, "video_id": video_id, + "data": data, }, ) try: - response = sync_httpx_client.get( - url=url, - headers=headers, - ) + # Use POST if data is provided (e.g., Vertex AI fetchPredictOperation) + # Otherwise use GET (e.g., OpenAI video status) + if data: + response = sync_httpx_client.post( + url=url, + headers=headers, + json=data, + ) + else: + response = sync_httpx_client.get( + url=url, + headers=headers, + ) + return video_status_provider_config.transform_video_status_retrieve_response( raw_response=response, logging_obj=logging_obj, + custom_llm_provider=custom_llm_provider, ) except Exception as e: @@ -4937,17 +4976,28 @@ async def async_video_status_handler( "api_base": url, "headers": headers, "video_id": video_id, + "data": data, }, ) try: - response = await async_httpx_client.get( - url=url, - headers=headers, - ) + # Use POST if data is provided (e.g., Vertex AI fetchPredictOperation) + # Otherwise use GET (e.g., OpenAI video status) + if data: + response = await async_httpx_client.post( + url=url, + headers=headers, + json=data, + ) + else: + response = await async_httpx_client.get( + url=url, + headers=headers, + ) return video_status_provider_config.transform_video_status_retrieve_response( raw_response=response, logging_obj=logging_obj, + custom_llm_provider=custom_llm_provider, ) except Exception as e: diff --git a/litellm/llms/gemini/videos/__init__.py b/litellm/llms/gemini/videos/__init__.py new file mode 100644 index 000000000000..c5aed2db2d02 --- /dev/null +++ b/litellm/llms/gemini/videos/__init__.py @@ -0,0 +1,5 @@ +# Gemini Video Generation Support +from .transformation import GeminiVideoConfig + +__all__ = ["GeminiVideoConfig"] + diff --git a/litellm/llms/gemini/videos/transformation.py b/litellm/llms/gemini/videos/transformation.py new file mode 100644 index 000000000000..d1ae47af269f --- /dev/null +++ b/litellm/llms/gemini/videos/transformation.py @@ -0,0 +1,523 @@ +from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union +import base64 + +import httpx +from httpx._types import RequestFiles + +from litellm.types.videos.main import VideoCreateOptionalRequestParams, VideoObject +from litellm.types.router import GenericLiteLLMParams +from litellm.secret_managers.main import get_secret_str +from litellm.types.videos.utils import ( + encode_video_id_with_provider, + extract_original_video_id, +) +from litellm.images.utils import ImageEditRequestUtils +import litellm +from litellm.types.llms.gemini import GeminiLongRunningOperationResponse, GeminiVideoGenerationInstance, GeminiVideoGenerationParameters, GeminiVideoGenerationRequest +from litellm.constants import DEFAULT_GOOGLE_VIDEO_DURATION_SECONDS +if TYPE_CHECKING: + from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj + from ...base_llm.videos.transformation import BaseVideoConfig as _BaseVideoConfig + from ...base_llm.chat.transformation import BaseLLMException as _BaseLLMException + + LiteLLMLoggingObj = _LiteLLMLoggingObj + BaseVideoConfig = _BaseVideoConfig + BaseLLMException = _BaseLLMException +else: + LiteLLMLoggingObj = Any + BaseVideoConfig = Any + BaseLLMException = Any + + +def _convert_image_to_gemini_format(image_file) -> Dict[str, str]: + """ + Convert image file to Gemini format with base64 encoding and MIME type. + + Args: + image_file: File-like object opened in binary mode (e.g., open("path", "rb")) + + Returns: + Dict with bytesBase64Encoded and mimeType + """ + mime_type = ImageEditRequestUtils.get_image_content_type(image_file) + + if hasattr(image_file, 'seek'): + image_file.seek(0) + image_bytes = image_file.read() + base64_encoded = base64.b64encode(image_bytes).decode("utf-8") + + return { + "bytesBase64Encoded": base64_encoded, + "mimeType": mime_type + } + + +class GeminiVideoConfig(BaseVideoConfig): + """ + Configuration class for Gemini (Veo) video generation. + + Veo uses a long-running operation model: + 1. POST to :predictLongRunning returns operation name + 2. Poll operation until done=true + 3. Extract video URI from response + 4. Download video using file API + """ + + def __init__(self): + super().__init__() + + def get_supported_openai_params(self, model: str) -> list: + """ + Get the list of supported OpenAI parameters for Veo video generation. + Veo supports minimal parameters compared to OpenAI. + """ + return [ + "model", + "prompt", + "input_reference", + "seconds", + "size" + ] + + def map_openai_params( + self, + video_create_optional_params: VideoCreateOptionalRequestParams, + model: str, + drop_params: bool, + ) -> Dict[str, Any]: + """ + Map OpenAI-style parameters to Veo format. + + Mappings: + - prompt → prompt + - input_reference → image + - size → aspectRatio (e.g., "1280x720" → "16:9") + - seconds → durationSeconds (defaults to 4 seconds if not provided) + + All other params are passed through as-is to support Gemini-specific parameters. + """ + mapped_params: Dict[str, Any] = {} + + # Get supported OpenAI params (exclude "model" and "prompt" which are handled separately) + supported_openai_params = self.get_supported_openai_params(model) + openai_params_to_map = { + param for param in supported_openai_params + if param not in {"model", "prompt"} + } + + # Map input_reference to image + if "input_reference" in video_create_optional_params: + mapped_params["image"] = video_create_optional_params["input_reference"] + + # Map size to aspectRatio + if "size" in video_create_optional_params: + size = video_create_optional_params["size"] + if size is not None: + aspect_ratio = self._convert_size_to_aspect_ratio(size) + if aspect_ratio: + mapped_params["aspectRatio"] = aspect_ratio + + # Map seconds to durationSeconds, default to 4 seconds (matching OpenAI) + if "seconds" in video_create_optional_params: + seconds = video_create_optional_params["seconds"] + try: + duration = int(seconds) if isinstance(seconds, str) else seconds + if duration is not None: + mapped_params["durationSeconds"] = duration + except (ValueError, TypeError): + # If conversion fails, use default + pass + + # Pass through any other params that weren't mapped (Gemini-specific params) + for key, value in video_create_optional_params.items(): + if key not in openai_params_to_map and key not in mapped_params: + mapped_params[key] = value + + return mapped_params + + def _convert_size_to_aspect_ratio(self, size: str) -> Optional[str]: + """ + Convert OpenAI size format to Veo aspectRatio format. + + https://cloud.google.com/vertex-ai/generative-ai/docs/image/generate-videos + + Supported aspect ratios: 9:16 (portrait), 16:9 (landscape) + """ + if not size: + return None + + aspect_ratio_map = { + "1280x720": "16:9", + "1920x1080": "16:9", + "720x1280": "9:16", + "1080x1920": "9:16", + } + + return aspect_ratio_map.get(size, "16:9") + + + def validate_environment( + self, + headers: dict, + model: str, + api_key: Optional[str] = None, + ) -> dict: + """ + Validate environment and add Gemini API key to headers. + Gemini uses x-goog-api-key header for authentication. + """ + api_key = ( + api_key + or litellm.api_key + or get_secret_str("GOOGLE_API_KEY") + or get_secret_str("GEMINI_API_KEY") + ) + + if not api_key: + raise ValueError( + "GEMINI_API_KEY or GOOGLE_API_KEY is required for Veo video generation. " + "Set it via environment variable or pass it as api_key parameter." + ) + + headers.update({ + "x-goog-api-key": api_key, + "Content-Type": "application/json", + }) + return headers + + def get_complete_url( + self, + model: str, + api_base: Optional[str], + litellm_params: dict, + ) -> str: + """ + Get the complete URL for Veo video generation. + For video creation: returns full URL with :predictLongRunning + For status/delete: returns base URL only + """ + if api_base is None: + api_base = get_secret_str("GEMINI_API_BASE") or "https://generativelanguage.googleapis.com" + + if not model or model == "": + return api_base.rstrip('/') + + model_name = model.replace("gemini/", "") + url = f"{api_base.rstrip('/')}/v1beta/models/{model_name}:predictLongRunning" + + return url + + def transform_video_create_request( + self, + model: str, + prompt: str, + api_base: str, + video_create_optional_request_params: Dict, + litellm_params: GenericLiteLLMParams, + headers: dict, + ) -> Tuple[Dict, RequestFiles, str]: + """ + Transform the video creation request for Veo API. + + Veo expects: + { + "instances": [ + { + "prompt": "A cat playing with a ball of yarn" + } + ], + "parameters": { + "aspectRatio": "16:9", + "durationSeconds": 8, + "resolution": "720p" + } + } + """ + instance = GeminiVideoGenerationInstance(prompt=prompt) + + params_copy = video_create_optional_request_params.copy() + + if "image" in params_copy and params_copy["image"] is not None: + image_data = _convert_image_to_gemini_format(params_copy["image"]) + params_copy["image"] = image_data + + parameters = GeminiVideoGenerationParameters(**params_copy) + + request_body_obj = GeminiVideoGenerationRequest( + instances=[instance], + parameters=parameters + ) + + request_data = request_body_obj.model_dump(exclude_none=True) + + return request_data, [], api_base + + def transform_video_create_response( + self, + model: str, + raw_response: httpx.Response, + logging_obj: LiteLLMLoggingObj, + custom_llm_provider: Optional[str] = None, + request_data: Optional[Dict] = None, + ) -> VideoObject: + """ + Transform the Veo video creation response. + + Veo returns: + { + "name": "operations/generate_1234567890", + "metadata": {...}, + "done": false, + "error": {...} + } + + We return this as a VideoObject with: + - id: operation name (used for polling) + - status: "processing" + - usage: includes duration_seconds for cost calculation + """ + response_data = raw_response.json() + + # Parse response using Pydantic model for type safety + try: + operation_response = GeminiLongRunningOperationResponse(**response_data) + except Exception as e: + raise ValueError(f"Failed to parse operation response: {e}") + + operation_name = operation_response.name + if not operation_name: + raise ValueError(f"No operation name in Veo response: {response_data}") + + if custom_llm_provider: + video_id = encode_video_id_with_provider(operation_name, custom_llm_provider, model) + else: + video_id = operation_name + + video_obj = VideoObject( + id=video_id, + object="video", + status="processing", + model=model, + ) + + usage_data = {} + if request_data: + parameters = request_data.get("parameters", {}) + duration = parameters.get("durationSeconds") or DEFAULT_GOOGLE_VIDEO_DURATION_SECONDS + if duration is not None: + try: + usage_data["duration_seconds"] = float(duration) + except (ValueError, TypeError): + pass + + video_obj.usage = usage_data + return video_obj + + def transform_video_status_retrieve_request( + self, + video_id: str, + api_base: str, + litellm_params: GenericLiteLLMParams, + headers: dict, + ) -> Tuple[str, Dict]: + """ + Transform the video status retrieve request for Veo API. + + Veo polls operations at: + GET https://generativelanguage.googleapis.com/v1beta/{operation_name} + """ + operation_name = extract_original_video_id(video_id) + url = f"{api_base.rstrip('/')}/v1beta/{operation_name}" + params: Dict[str, Any] = {} + + return url, params + + def transform_video_status_retrieve_response( + self, + raw_response: httpx.Response, + logging_obj: LiteLLMLoggingObj, + custom_llm_provider: Optional[str] = None, + ) -> VideoObject: + """ + Transform the Veo operation status response. + + Veo returns: + { + "name": "operations/generate_1234567890", + "done": false # or true when complete + } + + When done=true: + { + "name": "operations/generate_1234567890", + "done": true, + "response": { + "generateVideoResponse": { + "generatedSamples": [ + { + "video": { + "uri": "files/abc123..." + } + } + ] + } + } + } + """ + response_data = raw_response.json() + # Parse response using Pydantic model for type safety + operation_response = GeminiLongRunningOperationResponse(**response_data) + + operation_name = operation_response.name + is_done = operation_response.done + + if custom_llm_provider: + video_id = encode_video_id_with_provider(operation_name, custom_llm_provider, None) + else: + video_id = operation_name + + video_obj = VideoObject( + id=video_id, + object="video", + status="processing" if not is_done else "completed" + ) + return video_obj + + def transform_video_content_request( + self, + video_id: str, + api_base: str, + litellm_params: GenericLiteLLMParams, + headers: dict, + ) -> Tuple[str, Dict]: + """ + Transform the video content request for Veo API. + + For Veo, we need to: + 1. Get operation status to extract video URI + 2. Return download URL for the video + """ + operation_name = extract_original_video_id(video_id) + + status_url = f"{api_base.rstrip('/')}/v1beta/{operation_name}" + client = litellm.module_level_client + status_response = client.get(url=status_url, headers=headers) + status_response.raise_for_status() + response_data = status_response.json() + + operation_response = GeminiLongRunningOperationResponse(**response_data) + + if not operation_response.done: + raise ValueError( + "Video generation is not complete yet. " + "Please check status with video_status() before downloading." + ) + + if not operation_response.response: + raise ValueError("No response data in completed operation") + + generated_samples = operation_response.response.generateVideoResponse.generatedSamples + download_url = generated_samples[0].video.uri + + params: Dict[str, Any] = {} + + return download_url, params + + def transform_video_content_response( + self, + raw_response: httpx.Response, + logging_obj: LiteLLMLoggingObj, + ) -> bytes: + """ + Transform the Veo video content download response. + Returns the video bytes directly. + """ + return raw_response.content + + def transform_video_remix_request( + self, + video_id: str, + prompt: str, + api_base: str, + litellm_params: GenericLiteLLMParams, + headers: dict, + extra_body: Optional[Dict[str, Any]] = None, + ) -> Tuple[str, Dict]: + """ + Video remix is not supported by Veo API. + """ + raise NotImplementedError( + "Video remix is not supported by Google Veo. " + "Please use video_generation() to create new videos." + ) + + def transform_video_remix_response( + self, + raw_response: httpx.Response, + logging_obj: LiteLLMLoggingObj, + custom_llm_provider: Optional[str] = None, + ) -> VideoObject: + """Video remix is not supported.""" + raise NotImplementedError("Video remix is not supported by Google Veo.") + + def transform_video_list_request( + self, + api_base: str, + litellm_params: GenericLiteLLMParams, + headers: dict, + after: Optional[str] = None, + limit: Optional[int] = None, + order: Optional[str] = None, + extra_query: Optional[Dict[str, Any]] = None, + ) -> Tuple[str, Dict]: + """ + Video list is not supported by Veo API. + """ + raise NotImplementedError( + "Video list is not supported by Google Veo. " + "Use the operations endpoint directly if you need to list operations." + ) + + def transform_video_list_response( + self, + raw_response: httpx.Response, + logging_obj: LiteLLMLoggingObj, + custom_llm_provider: Optional[str] = None, + ) -> Dict[str, str]: + """Video list is not supported.""" + raise NotImplementedError("Video list is not supported by Google Veo.") + + def transform_video_delete_request( + self, + video_id: str, + api_base: str, + litellm_params: GenericLiteLLMParams, + headers: dict, + ) -> Tuple[str, Dict]: + """ + Video delete is not supported by Veo API. + """ + raise NotImplementedError( + "Video delete is not supported by Google Veo. " + "Videos are automatically cleaned up by Google." + ) + + def transform_video_delete_response( + self, + raw_response: httpx.Response, + logging_obj: LiteLLMLoggingObj, + ) -> VideoObject: + """Video delete is not supported.""" + raise NotImplementedError("Video delete is not supported by Google Veo.") + + def get_error_class( + self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers] + ) -> BaseLLMException: + from ..common_utils import GeminiError + + return GeminiError( + status_code=status_code, + message=error_message, + headers=headers, + ) + diff --git a/litellm/llms/openai/videos/transformation.py b/litellm/llms/openai/videos/transformation.py index c573f3b59b0d..9848477f32de 100644 --- a/litellm/llms/openai/videos/transformation.py +++ b/litellm/llms/openai/videos/transformation.py @@ -9,6 +9,7 @@ from litellm.types.router import GenericLiteLLMParams from litellm.secret_managers.main import get_secret_str from litellm.types.videos.main import VideoObject +from litellm.types.videos.utils import encode_video_id_with_provider, extract_original_video_id import litellm from litellm.llms.openai.image_edit.transformation import ImageEditRequestUtils if TYPE_CHECKING: @@ -94,17 +95,18 @@ def transform_video_create_request( self, model: str, prompt: str, + api_base: str, video_create_optional_request_params: Dict, litellm_params: GenericLiteLLMParams, headers: dict, - ) -> Tuple[Dict, RequestFiles]: + ) -> Tuple[Dict, RequestFiles, str]: """ Transform the video creation request for OpenAI API. """ # Remove model and extra_headers from optional params as they're handled separately video_create_optional_request_params = { k: v for k, v in video_create_optional_request_params.items() - if k not in ["model", "extra_headers"] + if k not in ["model", "extra_headers", "prompt"] } # Create the request data @@ -129,26 +131,24 @@ def transform_video_create_request( image=_input_reference, field_name="input_reference", ) - # Convert to dict for JSON serialization - return data_without_files, files_list + return data_without_files, files_list, api_base def transform_video_create_response( self, model: str, raw_response: httpx.Response, logging_obj: LiteLLMLoggingObj, + custom_llm_provider: Optional[str] = None, + request_data: Optional[Dict] = None, ) -> VideoObject: - """ - Transform the OpenAI video creation response. - """ + """Transform the OpenAI video creation response.""" response_data = raw_response.json() - - # Transform the response data video_obj = VideoObject(**response_data) # type: ignore[arg-type] - # Create usage object with duration information for cost calculation - # Video generation API doesn't provide usage, so we create one with duration + if custom_llm_provider and video_obj.id: + video_obj.id = encode_video_id_with_provider(video_obj.id, custom_llm_provider, model) + usage_data = {} if video_obj: if hasattr(video_obj, 'seconds') and video_obj.seconds: @@ -156,9 +156,7 @@ def transform_video_create_response( usage_data["duration_seconds"] = float(video_obj.seconds) except (ValueError, TypeError): pass - # Create the response video_obj.usage = usage_data - return video_obj @@ -175,11 +173,13 @@ def transform_video_content_request( OpenAI API expects the following request: - GET /v1/videos/{video_id}/content """ + original_video_id = extract_original_video_id(video_id) + # Construct the URL for video content download - url = f"{api_base.rstrip('/')}/{video_id}/content" + url = f"{api_base.rstrip('/')}/{original_video_id}/content" # Add video_id as query parameter - params = {"video_id": video_id} + params = {"video_id": original_video_id} return url, params @@ -198,8 +198,10 @@ def transform_video_remix_request( OpenAI API expects the following request: - POST /v1/videos/{video_id}/remix """ + original_video_id = extract_original_video_id(video_id) + # Construct the URL for video remix - url = f"{api_base.rstrip('/')}/{video_id}/remix" + url = f"{api_base.rstrip('/')}/{original_video_id}/remix" # Prepare the request data data = {"prompt": prompt} @@ -215,17 +217,14 @@ def transform_video_content_response( raw_response: httpx.Response, logging_obj: LiteLLMLoggingObj, ) -> bytes: - """ - Transform the OpenAI video content download response. - Returns raw video content as bytes. - """ - # For video content download, return the raw content as bytes + """Transform the OpenAI video content download response.""" return raw_response.content def transform_video_remix_response( self, raw_response: httpx.Response, logging_obj: LiteLLMLoggingObj, + custom_llm_provider: Optional[str] = None, ) -> VideoObject: """ Transform the OpenAI video remix response. @@ -235,6 +234,9 @@ def transform_video_remix_response( # Transform the response data video_obj = VideoObject(**response_data) # type: ignore[arg-type] + if custom_llm_provider and video_obj.id: + video_obj.id = encode_video_id_with_provider(video_obj.id, custom_llm_provider, None) + # Create usage object with duration information for cost calculation # Video remix API doesn't provide usage, so we create one with duration usage_data = {} @@ -287,8 +289,20 @@ def transform_video_list_response( self, raw_response: httpx.Response, logging_obj: LiteLLMLoggingObj, + custom_llm_provider: Optional[str] = None, ) -> Dict[str,str]: - return raw_response.json() + response_data = raw_response.json() + + if custom_llm_provider and "data" in response_data: + for video_obj in response_data.get("data", []): + if isinstance(video_obj, dict) and "id" in video_obj: + video_obj["id"] = encode_video_id_with_provider( + video_obj["id"], + custom_llm_provider, + video_obj.get("model") + ) + + return response_data def transform_video_delete_request( self, @@ -303,8 +317,10 @@ def transform_video_delete_request( OpenAI API expects the following request: - DELETE /v1/videos/{video_id} """ + original_video_id = extract_original_video_id(video_id) + # Construct the URL for video delete - url = f"{api_base.rstrip('/')}/{video_id}" + url = f"{api_base.rstrip('/')}/{original_video_id}" # No data needed for DELETE request data: Dict[str, Any] = {} @@ -336,8 +352,11 @@ def transform_video_status_retrieve_request( """ Transform the OpenAI video retrieve request. """ + # Extract the original video_id (remove provider encoding if present) + original_video_id = extract_original_video_id(video_id) + # For video retrieve, we just need to construct the URL - url = f"{api_base.rstrip('/')}/{video_id}" + url = f"{api_base.rstrip('/')}/{original_video_id}" # No additional data needed for GET request data: Dict[str, Any] = {} @@ -348,6 +367,7 @@ def transform_video_status_retrieve_response( self, raw_response: httpx.Response, logging_obj: LiteLLMLoggingObj, + custom_llm_provider: Optional[str] = None, ) -> VideoObject: """ Transform the OpenAI video retrieve response. @@ -355,6 +375,9 @@ def transform_video_status_retrieve_response( response_data = raw_response.json() # Transform the response data video_obj = VideoObject(**response_data) # type: ignore[arg-type] + + if custom_llm_provider and video_obj.id: + video_obj.id = encode_video_id_with_provider(video_obj.id, custom_llm_provider, None) return video_obj diff --git a/litellm/llms/vertex_ai/videos/__init__.py b/litellm/llms/vertex_ai/videos/__init__.py new file mode 100644 index 000000000000..1dcdbdf4ded2 --- /dev/null +++ b/litellm/llms/vertex_ai/videos/__init__.py @@ -0,0 +1,10 @@ +""" +Vertex AI Video Generation Module + +This module provides support for Vertex AI's Veo video generation API. +""" + +from .transformation import VertexAIVideoConfig + +__all__ = ["VertexAIVideoConfig"] + diff --git a/litellm/llms/vertex_ai/videos/transformation.py b/litellm/llms/vertex_ai/videos/transformation.py new file mode 100644 index 000000000000..2b6d43dd708a --- /dev/null +++ b/litellm/llms/vertex_ai/videos/transformation.py @@ -0,0 +1,597 @@ +""" +Vertex AI Video Generation Transformation + +Handles transformation of requests/responses for Vertex AI's Veo video generation API. +Based on: https://docs.cloud.google.com/vertex-ai/generative-ai/docs/model-reference/veo-video-generation +""" + +import base64 +import time +from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union + +import httpx +from httpx._types import RequestFiles + +from litellm.llms.base_llm.videos.transformation import BaseVideoConfig +from litellm.llms.vertex_ai.common_utils import ( + _convert_vertex_datetime_to_openai_datetime, +) +from litellm.llms.vertex_ai.vertex_llm_base import VertexBase +from litellm.types.router import GenericLiteLLMParams +from litellm.types.videos.main import VideoCreateOptionalRequestParams, VideoObject +from litellm.types.videos.utils import ( + encode_video_id_with_provider, + extract_original_video_id, +) +from litellm.images.utils import ImageEditRequestUtils +from litellm.constants import DEFAULT_GOOGLE_VIDEO_DURATION_SECONDS + +if TYPE_CHECKING: + from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj + from litellm.llms.base_llm.chat.transformation import ( + BaseLLMException as _BaseLLMException, + ) + + LiteLLMLoggingObj = _LiteLLMLoggingObj + BaseLLMException = _BaseLLMException +else: + LiteLLMLoggingObj = Any + BaseLLMException = Any + + +def _convert_image_to_vertex_format(image_file) -> Dict[str, str]: + """ + Convert image file to Vertex AI format with base64 encoding and MIME type. + + Args: + image_file: File-like object opened in binary mode (e.g., open("path", "rb")) + + Returns: + Dict with bytesBase64Encoded and mimeType + """ + mime_type = ImageEditRequestUtils.get_image_content_type(image_file) + + if hasattr(image_file, "seek"): + image_file.seek(0) + image_bytes = image_file.read() + base64_encoded = base64.b64encode(image_bytes).decode("utf-8") + + return {"bytesBase64Encoded": base64_encoded, "mimeType": mime_type} + + +class VertexAIVideoConfig(BaseVideoConfig, VertexBase): + """ + Configuration class for Vertex AI (Veo) video generation. + + Veo uses a long-running operation model: + 1. POST to :predictLongRunning returns operation name + 2. Poll operation using :fetchPredictOperation until done=true + 3. Extract video data (base64) from response + """ + + def __init__(self): + BaseVideoConfig.__init__(self) + VertexBase.__init__(self) + + @staticmethod + def extract_model_from_operation_name(operation_name: str) -> Optional[str]: + """ + Extract the model name from a Vertex AI operation name. + + Args: + operation_name: Operation name in format: + projects/PROJECT/locations/LOCATION/publishers/google/models/MODEL/operations/OPERATION_ID + + Returns: + Model name (e.g., "veo-2.0-generate-001") or None if extraction fails + """ + parts = operation_name.split("/") + # Model is at index 7 in the operation name format + if len(parts) >= 8: + return parts[7] + return None + + def get_supported_openai_params(self, model: str) -> list: + """ + Get the list of supported OpenAI parameters for Veo video generation. + Veo supports minimal parameters compared to OpenAI. + """ + return ["model", "prompt", "input_reference", "seconds", "size"] + + def map_openai_params( + self, + video_create_optional_params: VideoCreateOptionalRequestParams, + model: str, + drop_params: bool, + ) -> Dict[str, Any]: + """ + Map OpenAI-style parameters to Veo format. + + Mappings: + - prompt → prompt (in instances) + - input_reference → image (in instances) + - size → aspectRatio (e.g., "1280x720" → "16:9") + - seconds → durationSeconds (defaults to 4 seconds if not provided) + """ + mapped_params: Dict[str, Any] = {} + + # Map input_reference to image (will be processed in transform_video_create_request) + if "input_reference" in video_create_optional_params: + mapped_params["image"] = video_create_optional_params["input_reference"] + + # Map size to aspectRatio + if "size" in video_create_optional_params: + size = video_create_optional_params["size"] + if size is not None: + aspect_ratio = self._convert_size_to_aspect_ratio(size) + if aspect_ratio: + mapped_params["aspectRatio"] = aspect_ratio + + # Map seconds to durationSeconds, default to 4 seconds (matching OpenAI) + if "seconds" in video_create_optional_params: + seconds = video_create_optional_params["seconds"] + try: + duration = int(seconds) if isinstance(seconds, str) else seconds + if duration is not None: + mapped_params["durationSeconds"] = duration + except (ValueError, TypeError): + # If conversion fails, use default + pass + + return mapped_params + + def _convert_size_to_aspect_ratio(self, size: str) -> Optional[str]: + """ + Convert OpenAI size format to Veo aspectRatio format. + + Supported aspect ratios: 9:16 (portrait), 16:9 (landscape) + """ + if not size: + return None + + aspect_ratio_map = { + "1280x720": "16:9", + "1920x1080": "16:9", + "720x1280": "9:16", + "1080x1920": "9:16", + } + + return aspect_ratio_map.get(size, "16:9") + + def validate_environment( + self, + headers: Dict, + model: str, + api_key: Optional[str] = None, + api_base: Optional[str] = None, + litellm_params: Optional[dict] = None, + **kwargs, + ) -> Dict: + """ + Validate environment and return headers for Vertex AI OCR. + + Vertex AI uses Bearer token authentication with access token from credentials. + """ + # Extract Vertex AI parameters using safe helpers from VertexBase + # Use safe_get_* methods that don't mutate litellm_params dict + litellm_params = litellm_params or {} + + vertex_project = VertexBase.safe_get_vertex_ai_project(litellm_params=litellm_params) + vertex_credentials = VertexBase.safe_get_vertex_ai_credentials(litellm_params=litellm_params) + + # Get access token from Vertex credentials + access_token, project_id = self.get_access_token( + credentials=vertex_credentials, + project_id=vertex_project, + ) + + headers = { + "Authorization": f"Bearer {access_token}", + "Content-Type": "application/json", + **headers, + } + + return headers + + def get_complete_url( + self, + model: str, + api_base: Optional[str], + litellm_params: dict, + ) -> str: + """ + Get the complete URL for Veo video generation. + + Returns URL for :predictLongRunning endpoint: + https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT/locations/LOCATION/publishers/google/models/MODEL:predictLongRunning + """ + vertex_project = VertexBase.safe_get_vertex_ai_project(litellm_params) + vertex_location = VertexBase.safe_get_vertex_ai_location(litellm_params) + + if not vertex_project: + raise ValueError( + "vertex_project is required for Vertex AI video generation. " + "Set it via environment variable VERTEXAI_PROJECT or pass as parameter." + ) + + # Default to us-central1 if no location specified + vertex_location = vertex_location or "us-central1" + + # Extract model name (remove vertex_ai/ prefix if present) + model_name = model.replace("vertex_ai/", "") + + # Construct the URL + if api_base: + base_url = api_base.rstrip("/") + else: + base_url = f"https://{vertex_location}-aiplatform.googleapis.com" + + url = f"{base_url}/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model_name}" + + return url + + def transform_video_create_request( + self, + model: str, + prompt: str, + api_base: str, + video_create_optional_request_params: Dict, + litellm_params: GenericLiteLLMParams, + headers: dict, + ) -> Tuple[Dict, RequestFiles, str]: + """ + Transform the video creation request for Veo API. + + Veo expects: + { + "instances": [ + { + "prompt": "A cat playing with a ball of yarn", + "image": { + "bytesBase64Encoded": "...", + "mimeType": "image/jpeg" + } + } + ], + "parameters": { + "aspectRatio": "16:9", + "durationSeconds": 8 + } + } + """ + # Build instance with prompt + instance_dict: Dict[str, Any] = {"prompt": prompt} + params_copy = video_create_optional_request_params.copy() + + + # Check if user wants to provide full instance dict + if "instances" in params_copy and isinstance(params_copy["instances"], dict): + # Replace/merge with user-provided instance + instance_dict.update(params_copy["instances"]) + params_copy.pop("instances") + elif "image" in params_copy and params_copy["image"] is not None: + image_data = _convert_image_to_vertex_format(params_copy["image"]) + instance_dict["image"] = image_data + params_copy.pop("image") + + # Build request data directly (TypedDict doesn't have model_dump) + request_data: Dict[str, Any] = {"instances": [instance_dict]} + + # Only add parameters if there are any + if params_copy: + request_data["parameters"] = params_copy + + # Append :predictLongRunning endpoint to api_base + url = f"{api_base}:predictLongRunning" + + # No files needed - everything is in JSON + return request_data, [], url + + def transform_video_create_response( + self, + model: str, + raw_response: httpx.Response, + logging_obj: LiteLLMLoggingObj, + custom_llm_provider: Optional[str] = None, + request_data: Optional[Dict] = None, + ) -> VideoObject: + """ + Transform the Veo video creation response. + + Veo returns: + { + "name": "projects/PROJECT_ID/locations/LOCATION/publishers/google/models/MODEL/operations/OPERATION_ID" + } + + We return this as a VideoObject with: + - id: operation name (used for polling) + - status: "processing" + - usage: includes duration_seconds for cost calculation + """ + response_data = raw_response.json() + + operation_name = response_data.get("name") + if not operation_name: + raise ValueError(f"No operation name in Veo response: {response_data}") + + if custom_llm_provider: + video_id = encode_video_id_with_provider( + operation_name, custom_llm_provider, model + ) + else: + video_id = operation_name + + + video_obj = VideoObject( + id=video_id, + object="video", + status="processing", + model=model + ) + + usage_data = {} + if request_data: + parameters = request_data.get("parameters", {}) + duration = parameters.get("durationSeconds") or DEFAULT_GOOGLE_VIDEO_DURATION_SECONDS + if duration is not None: + try: + usage_data["duration_seconds"] = float(duration) + except (ValueError, TypeError): + pass + + video_obj.usage = usage_data + return video_obj + + def transform_video_status_retrieve_request( + self, + video_id: str, + api_base: str, + litellm_params: GenericLiteLLMParams, + headers: dict, + ) -> Tuple[str, Dict]: + """ + Transform the video status retrieve request for Veo API. + + Veo polls operations using :fetchPredictOperation endpoint with POST request. + """ + operation_name = extract_original_video_id(video_id) + model = self.extract_model_from_operation_name(operation_name) + + if not model: + raise ValueError( + f"Invalid operation name format: {operation_name}. " + "Expected format: projects/PROJECT/locations/LOCATION/publishers/google/models/MODEL/operations/OPERATION_ID" + ) + + # Construct the full URL including model ID + # URL format: https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT/locations/LOCATION/publishers/google/models/MODEL:fetchPredictOperation + # Strip trailing slashes from api_base and append model + url = f"{api_base.rstrip('/')}/{model}:fetchPredictOperation" + + # Request body contains the operation name + params = {"operationName": operation_name} + + return url, params + + def transform_video_status_retrieve_response( + self, + raw_response: httpx.Response, + logging_obj: LiteLLMLoggingObj, + custom_llm_provider: Optional[str] = None, + ) -> VideoObject: + """ + Transform the Veo operation status response. + + Veo returns: + { + "name": "projects/.../operations/OPERATION_ID", + "done": false # or true when complete + } + + When done=true: + { + "name": "projects/.../operations/OPERATION_ID", + "done": true, + "response": { + "@type": "type.googleapis.com/cloud.ai.large_models.vision.GenerateVideoResponse", + "raiMediaFilteredCount": 0, + "videos": [ + { + "bytesBase64Encoded": "...", + "mimeType": "video/mp4" + } + ] + } + } + """ + response_data = raw_response.json() + + operation_name = response_data.get("name", "") + is_done = response_data.get("done", False) + error_data = response_data.get("error") + + # Extract model from operation name + model = self.extract_model_from_operation_name(operation_name) + + if custom_llm_provider: + video_id = encode_video_id_with_provider( + operation_name, custom_llm_provider, model + ) + else: + video_id = operation_name + + # Convert createTime to Unix timestamp + create_time_str = response_data.get("metadata", {}).get("createTime") + if create_time_str: + try: + created_at = _convert_vertex_datetime_to_openai_datetime( + create_time_str + ) + except Exception: + created_at = int(time.time()) + else: + created_at = int(time.time()) + + if error_data: + status = "failed" + elif is_done: + status = "completed" + else: + status = "processing" + + video_obj = VideoObject( + id=video_id, + object="video", + status=status, + model=model, + created_at=created_at, + error=error_data, + ) + return video_obj + + def transform_video_content_request( + self, + video_id: str, + api_base: str, + litellm_params: GenericLiteLLMParams, + headers: dict, + ) -> Tuple[str, Dict]: + """ + Transform the video content request for Veo API. + + For Veo, we need to: + 1. Poll the operation status to ensure it's complete + 2. Extract the base64 video data from the response + 3. Return it for decoding + + Since we need to make an HTTP call here, we'll use the same fetchPredictOperation + approach as status retrieval. + """ + return self.transform_video_status_retrieve_request(video_id, api_base, litellm_params, headers) + + def transform_video_content_response( + self, + raw_response: httpx.Response, + logging_obj: LiteLLMLoggingObj, + ) -> bytes: + """ + Transform the Veo video content download response. + + Extracts the base64 encoded video from the response and decodes it to bytes. + """ + response_data = raw_response.json() + + if not response_data.get("done", False): + raise ValueError( + "Video generation is not complete yet. " + "Please check status with video_status() before downloading." + ) + + try: + video_response = response_data.get("response", {}) + videos = video_response.get("videos", []) + + if not videos or len(videos) == 0: + raise ValueError("No video data found in completed operation") + + # Get the first video + video_data = videos[0] + base64_encoded = video_data.get("bytesBase64Encoded") + + if not base64_encoded: + raise ValueError("No base64 encoded video data found") + + # Decode base64 to bytes + video_bytes = base64.b64decode(base64_encoded) + return video_bytes + + except (KeyError, IndexError) as e: + raise ValueError(f"Failed to extract video data: {e}") + + def transform_video_remix_request( + self, + video_id: str, + prompt: str, + api_base: str, + litellm_params: GenericLiteLLMParams, + headers: dict, + extra_body: Optional[Dict[str, Any]] = None, + ) -> Tuple[str, Dict]: + """ + Video remix is not supported by Veo API. + """ + raise NotImplementedError( + "Video remix is not supported by Vertex AI Veo. " + "Please use video_generation() to create new videos." + ) + + def transform_video_remix_response( + self, + raw_response: httpx.Response, + logging_obj: LiteLLMLoggingObj, + custom_llm_provider: Optional[str] = None, + ) -> VideoObject: + """Video remix is not supported.""" + raise NotImplementedError("Video remix is not supported by Vertex AI Veo.") + + def transform_video_list_request( + self, + api_base: str, + litellm_params: GenericLiteLLMParams, + headers: dict, + after: Optional[str] = None, + limit: Optional[int] = None, + order: Optional[str] = None, + extra_query: Optional[Dict[str, Any]] = None, + ) -> Tuple[str, Dict]: + """ + Video list is not supported by Veo API. + """ + raise NotImplementedError( + "Video list is not supported by Vertex AI Veo. " + "Use the operations endpoint directly if you need to list operations." + ) + + def transform_video_list_response( + self, + raw_response: httpx.Response, + logging_obj: LiteLLMLoggingObj, + custom_llm_provider: Optional[str] = None, + ) -> Dict[str, str]: + """Video list is not supported.""" + raise NotImplementedError("Video list is not supported by Vertex AI Veo.") + + def transform_video_delete_request( + self, + video_id: str, + api_base: str, + litellm_params: GenericLiteLLMParams, + headers: dict, + ) -> Tuple[str, Dict]: + """ + Video delete is not supported by Veo API. + """ + raise NotImplementedError( + "Video delete is not supported by Vertex AI Veo. " + "Videos are automatically cleaned up by Google." + ) + + def transform_video_delete_response( + self, + raw_response: httpx.Response, + logging_obj: LiteLLMLoggingObj, + ) -> VideoObject: + """Video delete is not supported.""" + raise NotImplementedError("Video delete is not supported by Vertex AI Veo.") + + def get_error_class( + self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers] + ) -> BaseLLMException: + from litellm.llms.vertex_ai.common_utils import VertexAIError + + return VertexAIError( + status_code=status_code, + message=error_message, + headers=headers, + ) + diff --git a/litellm/main.py b/litellm/main.py index b7af4e8d39c5..85779d8fa87f 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -6001,6 +6001,7 @@ async def ahealth_check( "audio_speech", "audio_transcription", "image_generation", + "video_generation", "batch", "rerank", "realtime", diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 9f020fc1ebf9..f571dfb52433 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -12668,6 +12668,34 @@ "video" ] }, + "gemini/veo-3.1-fast-generate-preview": { + "litellm_provider": "gemini", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.15, + "source": "https://ai.google.dev/gemini-api/docs/video", + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] + }, + "gemini/veo-3.1-generate-preview": { + "litellm_provider": "gemini", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.40, + "source": "https://ai.google.dev/gemini-api/docs/video", + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] + }, "google_pse/search": { "input_cost_per_query": 0.005, "litellm_provider": "google_pse", @@ -23374,6 +23402,34 @@ "video" ] }, + "vertex_ai/veo-3.1-generate-preview": { + "litellm_provider": "vertex_ai-video-models", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.4, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/veo", + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] + }, + "vertex_ai/veo-3.1-fast-generate-preview": { + "litellm_provider": "vertex_ai-video-models", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.15, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/veo", + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] + }, "voyage/rerank-2": { "input_cost_per_query": 5e-08, "input_cost_per_token": 5e-08, diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py index e8f89baeffb2..b0aa93c8c5d9 100644 --- a/litellm/proxy/_types.py +++ b/litellm/proxy/_types.py @@ -235,6 +235,7 @@ class LiteLLMRoutes(enum.Enum): "completion", "embeddings", "image_generation", + "video_generation", "audio_transcriptions", "moderations", "model_list", # OpenAI /v1/models route diff --git a/litellm/proxy/health_endpoints/_health_endpoints.py b/litellm/proxy/health_endpoints/_health_endpoints.py index b42f800734ac..1af6c9cf2874 100644 --- a/litellm/proxy/health_endpoints/_health_endpoints.py +++ b/litellm/proxy/health_endpoints/_health_endpoints.py @@ -947,6 +947,7 @@ async def test_model_connection( "audio_speech", "audio_transcription", "image_generation", + "video_generation", "batch", "rerank", "realtime", diff --git a/litellm/proxy/video_endpoints/endpoints.py b/litellm/proxy/video_endpoints/endpoints.py index 9cd4a698035d..25114921dbc6 100644 --- a/litellm/proxy/video_endpoints/endpoints.py +++ b/litellm/proxy/video_endpoints/endpoints.py @@ -15,6 +15,7 @@ get_custom_llm_provider_from_request_headers, get_custom_llm_provider_from_request_query, ) +from litellm.types.videos.utils import decode_video_id_with_provider router = APIRouter() @@ -237,13 +238,15 @@ async def video_status( # Create data with video_id data: Dict[str, Any] = {"video_id": video_id} - # Extract custom_llm_provider from headers, query params, or body + decoded = decode_video_id_with_provider(video_id) + provider_from_id = decoded.get("custom_llm_provider") + custom_llm_provider = ( get_custom_llm_provider_from_request_headers(request=request) or get_custom_llm_provider_from_request_query(request=request) or await get_custom_llm_provider_from_request_body(request=request) + or provider_from_id or "openai" - ) if custom_llm_provider: data["custom_llm_provider"] = custom_llm_provider @@ -304,7 +307,7 @@ async def video_content( Example: ```bash - curl -X GET "http://localhost:4000/v1/videos/video_123/content" \ + curl -X GET "http://localhost:4000/v1/videos/{video_id}/content" \ -H "Authorization: Bearer sk-1234" \ --output video.mp4 ``` @@ -326,11 +329,14 @@ async def video_content( # Create data with video_id data: Dict[str, Any] = {"video_id": video_id} - # Extract custom_llm_provider from headers, query params, or body + decoded = decode_video_id_with_provider(video_id) + provider_from_id = decoded.get("custom_llm_provider") + custom_llm_provider = ( get_custom_llm_provider_from_request_headers(request=request) or get_custom_llm_provider_from_request_query(request=request) or await get_custom_llm_provider_from_request_body(request=request) + or provider_from_id ) if custom_llm_provider: data["custom_llm_provider"] = custom_llm_provider @@ -428,11 +434,14 @@ async def video_remix( data = orjson.loads(body) data["video_id"] = video_id - # Extract custom_llm_provider from headers, query params, or body + decoded = decode_video_id_with_provider(video_id) + provider_from_id = decoded.get("custom_llm_provider") + custom_llm_provider = ( get_custom_llm_provider_from_request_headers(request=request) or get_custom_llm_provider_from_request_query(request=request) or data.get("custom_llm_provider") + or provider_from_id ) if custom_llm_provider: data["custom_llm_provider"] = custom_llm_provider diff --git a/litellm/types/llms/gemini.py b/litellm/types/llms/gemini.py index cfc13cc44a82..e29a2cc19a04 100644 --- a/litellm/types/llms/gemini.py +++ b/litellm/types/llms/gemini.py @@ -221,3 +221,125 @@ class GeminiImageGenerationPrediction(TypedDict): class GeminiImageGenerationResponse(TypedDict): """Complete response body from Gemini image generation API""" predictions: List[GeminiImageGenerationPrediction] + +# Video Generation Types +class GeminiVideoGenerationInstance(TypedDict): + """Instance data for Gemini video generation request""" + prompt: str + + +class GeminiVideoGenerationParameters(BaseModel): + """ + Parameters for Gemini video generation request. + + See: Veo 3/3.1 parameter guide. + """ + aspectRatio: Optional[str] = None + """Aspect ratio for generated video (e.g., '16:9', '9:16').""" + + durationSeconds: Optional[int] = None + """ + Length of the generated video in seconds (e.g., 4, 5, 6, 8). + Must be 8 when using extension/interpolation or referenceImages. + """ + + resolution: Optional[str] = None + """ + Video resolution (e.g., '720p', '1080p'). + '1080p' only supports 8s duration; extension only supports '720p'. + """ + + negativePrompt: Optional[str] = None + """Text describing what not to include in the video.""" + + image: Optional[Any] = None + """ + An initial image to animate (Image object). + """ + + lastFrame: Optional[Any] = None + """ + The final image for interpolation video to transition. + Should be used with the 'image' parameter. + """ + + referenceImages: Optional[list] = None + """ + Up to three images to be used as style/content references. + Only supported in Veo 3.1 (list of VideoGenerationReferenceImage objects). + """ + + video: Optional[Any] = None + """ + Video to be used for video extension (Video object). + Only supported in Veo 3.1 & Veo 3 Fast. + """ + + personGeneration: Optional[str] = None + """ + Controls the generation of people. + Text-to-video & Extension: "allow_all" only + Image-to-video, Interpolation, & Reference images (Veo 3.x): "allow_adult" only + See documentation for region restrictions & more. + """ + + +class GeminiVideoGenerationRequest(BaseModel): + """Complete request body for Gemini video generation""" + instances: List[GeminiVideoGenerationInstance] + parameters: Optional[GeminiVideoGenerationParameters] = None + + +# Video Generation Operation Response Types +class GeminiVideoUri(BaseModel): + """Video URI in the generated sample""" + uri: str + """File URI of the generated video (e.g., 'files/abc123...')""" + + +class GeminiGeneratedVideoSample(BaseModel): + """Individual generated video sample""" + video: GeminiVideoUri + """Video object containing the URI""" + + +class GeminiGenerateVideoResponse(BaseModel): + """Generate video response containing the samples""" + generatedSamples: List[GeminiGeneratedVideoSample] + """List of generated video samples""" + + +class GeminiOperationResponse(BaseModel): + """Response object in the operation when done""" + generateVideoResponse: GeminiGenerateVideoResponse + """Video generation response""" + + +class GeminiOperationMetadata(BaseModel): + """Metadata for the operation""" + createTime: Optional[str] = None + """Creation timestamp""" + model: Optional[str] = None + """Model used for generation""" + + +class GeminiLongRunningOperationResponse(BaseModel): + """ + Complete response for a long-running operation. + + Used when polling operation status and extracting results. + """ + name: str + """Operation name (e.g., 'operations/generate_1234567890')""" + + done: bool = False + """Whether the operation is complete""" + + metadata: Optional[GeminiOperationMetadata] = None + """Operation metadata""" + + response: Optional[GeminiOperationResponse] = None + """Response object when operation is complete""" + + error: Optional[Dict[str, Any]] = None + """Error details if operation failed""" diff --git a/litellm/types/llms/vertex_ai.py b/litellm/types/llms/vertex_ai.py index cb2075981a87..768818a61076 100644 --- a/litellm/types/llms/vertex_ai.py +++ b/litellm/types/llms/vertex_ai.py @@ -638,6 +638,52 @@ class VertexBatchPredictionResponse(TypedDict, total=False): modelVersionId: str +class VertexVideoImage(TypedDict, total=False): + """Image input for video generation""" + + bytesBase64Encoded: str + mimeType: str + + +class VertexVideoGenerationInstance(TypedDict, total=False): + """Instance object for Vertex AI video generation request""" + + prompt: Required[str] + image: VertexVideoImage + + +class VertexVideoGenerationParameters(TypedDict, total=False): + """Parameters for Vertex AI video generation""" + + aspectRatio: Literal["9:16", "16:9"] + durationSeconds: int + + +class VertexVideoGenerationRequest(TypedDict): + """Complete request body for Vertex AI video generation""" + + instances: Required[List[VertexVideoGenerationInstance]] + parameters: VertexVideoGenerationParameters + + +class VertexVideoOutput(TypedDict, total=False): + """Video output in response""" + + bytesBase64Encoded: str + mimeType: str + gcsUri: str + + +class VertexVideoGenerationResponse(TypedDict, total=False): + """Response body for Vertex AI video generation""" + + name: str + done: bool + response: Dict[str, Any] + metadata: Dict[str, Any] + error: Dict[str, Any] + + VERTEX_CREDENTIALS_TYPES = Union[str, Dict[str, str]] diff --git a/litellm/types/utils.py b/litellm/types/utils.py index 396db3addb78..e608df675404 100644 --- a/litellm/types/utils.py +++ b/litellm/types/utils.py @@ -2788,6 +2788,10 @@ class SpecialEnums(Enum): LITELLM_MANAGED_GENERIC_RESPONSE_COMPLETE_STR = "litellm_proxy;model_id:{};generic_response_id:{}" # generic implementation of 'managed batches' - used for finetuning and any future work. + LITELLM_MANAGED_VIDEO_COMPLETE_STR = ( + "litellm:custom_llm_provider:{};model_id:{};video_id:{}" + ) + class ServiceTier(Enum): """Enum for service tier types used in cost calculations.""" diff --git a/litellm/types/videos/main.py b/litellm/types/videos/main.py index f33e1d5a4411..65c4cfe0e003 100644 --- a/litellm/types/videos/main.py +++ b/litellm/types/videos/main.py @@ -10,7 +10,7 @@ class VideoObject(BaseModel): id: str object: Literal["video"] status: str - created_at: int + created_at: Optional[int] = None completed_at: Optional[int] = None expires_at: Optional[int] = None error: Optional[Dict[str, Any]] = None @@ -87,3 +87,10 @@ class VideoCreateRequestParams(VideoCreateOptionalRequestParams, total=False): Params here: https://platform.openai.com/docs/api-reference/videos/create """ prompt: str + +class DecodedVideoId(TypedDict, total=False): + """Structure representing a decoded video ID""" + + custom_llm_provider: Optional[str] + model_id: Optional[str] + video_id: str \ No newline at end of file diff --git a/litellm/types/videos/utils.py b/litellm/types/videos/utils.py new file mode 100644 index 000000000000..329aea645c5c --- /dev/null +++ b/litellm/types/videos/utils.py @@ -0,0 +1,100 @@ +""" +Utility functions for video ID encoding/decoding with provider information. + +Follows the pattern used in responses/utils.py for consistency. +Format: vid_{base64_encoded_string} +""" +import base64 +from typing import Tuple, Optional +from litellm.types.utils import SpecialEnums +from litellm.types.videos.main import DecodedVideoId +from litellm._logging import verbose_logger + + + +VIDEO_ID_PREFIX = "video_" + + +def encode_video_id_with_provider( + video_id: str, + provider: str, + model_id: Optional[str] = None +) -> str: + """Encode provider and model_id into video_id using base64.""" + if not provider or not video_id: + return video_id + + if video_id.startswith(VIDEO_ID_PREFIX): + return video_id + + assembled_id = str( + SpecialEnums.LITELLM_MANAGED_VIDEO_COMPLETE_STR.value + ).format(provider, model_id or "", video_id) + + base64_encoded_id: str = base64.b64encode(assembled_id.encode("utf-8")).decode("utf-8") + + return f"{VIDEO_ID_PREFIX}{base64_encoded_id}" + + +def decode_video_id_with_provider(encoded_video_id: str) -> DecodedVideoId: + """Decode provider and model_id from encoded video_id.""" + if not encoded_video_id: + return DecodedVideoId( + custom_llm_provider=None, + model_id=None, + video_id=encoded_video_id, + ) + + if not encoded_video_id.startswith(VIDEO_ID_PREFIX): + return DecodedVideoId( + custom_llm_provider=None, + model_id=None, + video_id=encoded_video_id, + ) + + try: + cleaned_id = encoded_video_id.replace(VIDEO_ID_PREFIX, "") + decoded_id = base64.b64decode(cleaned_id.encode("utf-8")).decode("utf-8") + + if ";" not in decoded_id: + return DecodedVideoId( + custom_llm_provider=None, + model_id=None, + video_id=encoded_video_id, + ) + + parts = decoded_id.split(";") + + custom_llm_provider = None + model_id = None + decoded_video_id = encoded_video_id + + if len(parts) >= 3: + custom_llm_provider_part = parts[0] + model_id_part = parts[1] + video_id_part = parts[2] + + custom_llm_provider = custom_llm_provider_part.replace( + "litellm:custom_llm_provider:", "" + ) + model_id = model_id_part.replace("model_id:", "") + decoded_video_id = video_id_part.replace("video_id:", "") + + return DecodedVideoId( + custom_llm_provider=custom_llm_provider, + model_id=model_id, + video_id=decoded_video_id, + ) + except Exception as e: + verbose_logger.debug(f"Error decoding video_id '{encoded_video_id}': {e}") + return DecodedVideoId( + custom_llm_provider=None, + model_id=None, + video_id=encoded_video_id, + ) + + +def extract_original_video_id(encoded_video_id: str) -> str: + """Extract original video ID without encoding.""" + decoded = decode_video_id_with_provider(encoded_video_id) + return decoded.get("video_id", encoded_video_id) diff --git a/litellm/utils.py b/litellm/utils.py index e12387899bba..1ac27428f802 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -7648,6 +7648,16 @@ def get_provider_video_config( from litellm.llms.azure.videos.transformation import AzureVideoConfig return AzureVideoConfig() + elif LlmProviders.GEMINI == provider: + from litellm.llms.gemini.videos.transformation import GeminiVideoConfig + + return GeminiVideoConfig() + elif LlmProviders.VERTEX_AI == provider: + from litellm.llms.vertex_ai.videos.transformation import ( + VertexAIVideoConfig, + ) + + return VertexAIVideoConfig() return None @staticmethod diff --git a/litellm/videos/main.py b/litellm/videos/main.py index cbc59169a7ac..be95b4ab9b22 100644 --- a/litellm/videos/main.py +++ b/litellm/videos/main.py @@ -19,6 +19,7 @@ from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj from litellm.llms.base_llm.videos.transformation import BaseVideoConfig from litellm.llms.custom_httpx.llm_http_handler import BaseLLMHTTPHandler +from litellm.types.videos.utils import decode_video_id_with_provider #################### Initialize provider clients #################### llm_http_handler: BaseLLMHTTPHandler = BaseLLMHTTPHandler() @@ -303,13 +304,10 @@ def video_content( ```python import litellm - # Download video content video_bytes = litellm.video_content( - video_id="video_123", - custom_llm_provider="openai" + video_id="video_123" ) - # Save to file with open("video.mp4", "wb") as f: f.write(video_bytes) ``` @@ -320,9 +318,10 @@ def video_content( litellm_call_id: Optional[str] = kwargs.get("litellm_call_id", None) _is_async = kwargs.pop("async_call", False) is True - # Ensure custom_llm_provider is not None - default to openai if not provided + # Try to decode provider from video_id if not explicitly provided if custom_llm_provider is None: - custom_llm_provider = "openai" + decoded = decode_video_id_with_provider(video_id) + custom_llm_provider = decoded.get("custom_llm_provider") or "openai" # get llm provider logic litellm_params = GenericLiteLLMParams(**kwargs) @@ -594,9 +593,10 @@ def video_remix( # noqa: PLR0915 response = VideoObject(**mock_response) return response - # Ensure custom_llm_provider is not None - default to openai if not provided + # Try to decode provider from video_id if not explicitly provided if custom_llm_provider is None: - custom_llm_provider = "openai" + decoded = decode_video_id_with_provider(video_id) + custom_llm_provider = decoded.get("custom_llm_provider") or "openai" # get llm provider logic litellm_params = GenericLiteLLMParams(**kwargs) @@ -907,7 +907,7 @@ async def avideo_status( Returns: - `response` (VideoObject): The response returned by the `video_status` function. -""" + """ local_vars = locals() try: loop = asyncio.get_event_loop() @@ -1015,8 +1015,7 @@ def video_status( # noqa: PLR0915 # Get video status video_status = litellm.video_status( - video_id="video_123", - custom_llm_provider="openai" + video_id="video_123" ) print(f"Video status: {video_status.status}") @@ -1038,9 +1037,10 @@ def video_status( # noqa: PLR0915 response = VideoObject(**mock_response) return response - # Ensure custom_llm_provider is not None - default to openai if not provided + # Try to decode provider from video_id if not explicitly provided if custom_llm_provider is None: - custom_llm_provider = "openai" + decoded = decode_video_id_with_provider(video_id) + custom_llm_provider = decoded.get("custom_llm_provider") or "openai" # get llm provider logic litellm_params = GenericLiteLLMParams(**kwargs) diff --git a/litellm/videos/utils.py b/litellm/videos/utils.py index 7cfccab6720d..e04ab9fe180c 100644 --- a/litellm/videos/utils.py +++ b/litellm/videos/utils.py @@ -1,8 +1,9 @@ -from typing import Any, Dict, cast, get_type_hints +from typing import Any, Dict, cast import litellm from litellm.llms.base_llm.videos.transformation import BaseVideoConfig from litellm.types.videos.main import VideoCreateOptionalRequestParams +from litellm.utils import filter_out_litellm_params class VideoGenerationRequestUtils: @@ -25,25 +26,6 @@ def get_optional_params_video_generation( Returns: A dictionary of supported parameters for the video generation API """ - # Get supported parameters for the model - supported_params = video_generation_provider_config.get_supported_openai_params(model) - - # Check for unsupported parameters - unsupported_params = [ - param - for param in video_generation_optional_params - if param not in supported_params - ] - - if unsupported_params: - raise litellm.UnsupportedParamsError( - model=model, - message=( - f"The following parameters are not supported for model {model}: " - f"{', '.join(unsupported_params)}" - ), - ) - # Map parameters to provider-specific format mapped_params = video_generation_provider_config.map_openai_params( video_create_optional_params=video_generation_optional_params, @@ -51,6 +33,15 @@ def get_optional_params_video_generation( drop_params=litellm.drop_params, ) + # Merge extra_body params if present (for provider-specific parameters) + if "extra_body" in video_generation_optional_params: + extra_body = video_generation_optional_params["extra_body"] + if extra_body and isinstance(extra_body, dict): + # extra_body params override mapped params + mapped_params.update(extra_body) + # Remove extra_body from mapped_params since it's not sent to the API + mapped_params.pop("extra_body", None) + return mapped_params @staticmethod @@ -66,9 +57,44 @@ def get_requested_video_generation_optional_param( Returns: VideoCreateOptionalRequestParams instance with only the valid parameters """ - valid_keys = get_type_hints(VideoCreateOptionalRequestParams).keys() - filtered_params = { - k: v for k, v in params.items() if k in valid_keys and v is not None + params = dict(params or {}) + + raw_kwargs = params.get("kwargs", {}) + if not isinstance(raw_kwargs, dict): + raw_kwargs = {} + + kwargs_extra_body = raw_kwargs.pop("extra_body", None) + top_level_extra_body = params.get("extra_body") + + base_params_raw = { + key: value + for key, value in params.items() + if key not in {"kwargs", "extra_body", "prompt", "model"} and value is not None + } + base_params = filter_out_litellm_params(kwargs=base_params_raw) + + cleaned_kwargs = filter_out_litellm_params( + kwargs={k: v for k, v in raw_kwargs.items() if v is not None} + ) + + optional_params: Dict[str, Any] = { + **base_params, + **cleaned_kwargs, } - return cast(VideoCreateOptionalRequestParams, filtered_params) + merged_extra_body: Dict[str, Any] = {} + for extra_body_candidate in (top_level_extra_body, kwargs_extra_body): + if isinstance(extra_body_candidate, dict): + for key, value in extra_body_candidate.items(): + if value is not None: + merged_extra_body[key] = value + + if merged_extra_body: + merged_extra_body = filter_out_litellm_params(kwargs=merged_extra_body) + if merged_extra_body: + optional_params["extra_body"] = merged_extra_body + optional_params.update(merged_extra_body) + + optional_params.pop("timeout", None) + + return cast(VideoCreateOptionalRequestParams, optional_params) diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 9f020fc1ebf9..f571dfb52433 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -12668,6 +12668,34 @@ "video" ] }, + "gemini/veo-3.1-fast-generate-preview": { + "litellm_provider": "gemini", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.15, + "source": "https://ai.google.dev/gemini-api/docs/video", + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] + }, + "gemini/veo-3.1-generate-preview": { + "litellm_provider": "gemini", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.40, + "source": "https://ai.google.dev/gemini-api/docs/video", + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] + }, "google_pse/search": { "input_cost_per_query": 0.005, "litellm_provider": "google_pse", @@ -23374,6 +23402,34 @@ "video" ] }, + "vertex_ai/veo-3.1-generate-preview": { + "litellm_provider": "vertex_ai-video-models", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.4, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/veo", + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] + }, + "vertex_ai/veo-3.1-fast-generate-preview": { + "litellm_provider": "vertex_ai-video-models", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.15, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/veo", + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] + }, "voyage/rerank-2": { "input_cost_per_query": 5e-08, "input_cost_per_token": 5e-08, diff --git a/package-lock.json b/package-lock.json index 1b3c2a690a45..302ec8567885 100644 --- a/package-lock.json +++ b/package-lock.json @@ -8172,4 +8172,4 @@ } } } -} +} \ No newline at end of file diff --git a/tests/test_litellm/llms/azure/videos/test_azure_video_transformation.py b/tests/test_litellm/llms/azure/videos/test_azure_video_transformation.py index 14bf0064e614..640933179a68 100644 --- a/tests/test_litellm/llms/azure/videos/test_azure_video_transformation.py +++ b/tests/test_litellm/llms/azure/videos/test_azure_video_transformation.py @@ -129,10 +129,12 @@ def test_transform_video_create_request(self): ) headers = {"Authorization": f"Bearer {self.api_key}"} + api_base = f"{self.api_base}/openai/v1/videos" - data, files = self.config.transform_video_create_request( + data, files, url = self.config.transform_video_create_request( model=self.model, prompt="A cinematic shot of a city at night", + api_base=api_base, video_create_optional_request_params=video_params, litellm_params=litellm_params, headers=headers @@ -142,6 +144,8 @@ def test_transform_video_create_request(self): assert data["seconds"] == 8 assert data["size"] == "720x1280" assert data["model"] == self.model + # URL should be returned as-is for Azure + assert url == api_base def test_transform_video_create_response(self): """Test video creation response transformation.""" @@ -275,13 +279,15 @@ def test_video_create_with_file_upload(self): ) headers = {"Authorization": f"Bearer {self.api_key}"} + api_base = f"{self.api_base}/openai/v1/videos" # Mock file existence with patch('os.path.exists', return_value=True): with patch('builtins.open', mock_open(read_data=b"fake image data")): - data, files = self.config.transform_video_create_request( + data, files, url = self.config.transform_video_create_request( model=self.model, prompt="A video with reference image", + api_base=api_base, video_create_optional_request_params=video_params, litellm_params=litellm_params, headers=headers @@ -291,6 +297,7 @@ def test_video_create_with_file_upload(self): assert data["seconds"] == 10 assert len(files) == 1 assert files[0][0] == "input_reference" + assert url == api_base def test_error_handling_in_response_transformation(self): """Test error handling in response transformation methods.""" diff --git a/tests/test_litellm/llms/gemini/videos/__init__.py b/tests/test_litellm/llms/gemini/videos/__init__.py new file mode 100644 index 000000000000..7156c063be7f --- /dev/null +++ b/tests/test_litellm/llms/gemini/videos/__init__.py @@ -0,0 +1,2 @@ +# Gemini Video Generation Tests + diff --git a/tests/test_litellm/llms/gemini/videos/test_gemini_video_transformation.py b/tests/test_litellm/llms/gemini/videos/test_gemini_video_transformation.py new file mode 100644 index 000000000000..660974181f9e --- /dev/null +++ b/tests/test_litellm/llms/gemini/videos/test_gemini_video_transformation.py @@ -0,0 +1,680 @@ +""" +Tests for Gemini (Veo) video generation transformation. +""" +import json +import os +from unittest.mock import MagicMock, Mock, patch + +import httpx +import pytest + +from litellm.llms.gemini.videos.transformation import GeminiVideoConfig +from litellm.llms.openai.cost_calculation import video_generation_cost +from litellm.types.router import GenericLiteLLMParams +from litellm.types.videos.main import VideoObject + + +class TestGeminiVideoConfig: + """Test GeminiVideoConfig transformation class.""" + + def setup_method(self): + """Setup test fixtures.""" + self.config = GeminiVideoConfig() + self.mock_logging_obj = Mock() + + def test_get_supported_openai_params(self): + """Test that correct params are supported.""" + params = self.config.get_supported_openai_params("veo-3.0-generate-preview") + + assert "model" in params + assert "prompt" in params + assert "input_reference" in params + assert "seconds" in params + assert "size" in params + + def test_validate_environment_with_api_key(self): + """Test environment validation with API key.""" + headers = {} + result = self.config.validate_environment( + headers=headers, + model="veo-3.0-generate-preview", + api_key="test-api-key-123" + ) + + assert "x-goog-api-key" in result + assert result["x-goog-api-key"] == "test-api-key-123" + assert "Content-Type" in result + assert result["Content-Type"] == "application/json" + + @patch.dict('os.environ', {}, clear=True) + def test_validate_environment_missing_api_key(self): + """Test that missing API key raises error.""" + headers = {} + + with pytest.raises(ValueError, match="GEMINI_API_KEY or GOOGLE_API_KEY is required"): + self.config.validate_environment( + headers=headers, + model="veo-3.0-generate-preview", + api_key=None + ) + + def test_get_complete_url(self): + """Test URL construction for video generation.""" + url = self.config.get_complete_url( + model="gemini/veo-3.0-generate-preview", + api_base="https://generativelanguage.googleapis.com", + litellm_params={} + ) + + expected = "https://generativelanguage.googleapis.com/v1beta/models/veo-3.0-generate-preview:predictLongRunning" + assert url == expected + + def test_get_complete_url_default_api_base(self): + """Test URL construction with default API base.""" + url = self.config.get_complete_url( + model="gemini/veo-3.0-generate-preview", + api_base=None, + litellm_params={} + ) + + assert url.startswith("https://generativelanguage.googleapis.com") + assert "veo-3.0-generate-preview:predictLongRunning" in url + + def test_transform_video_create_request(self): + """Test transformation of video creation request.""" + prompt = "A cat playing with a ball of yarn" + api_base = "https://generativelanguage.googleapis.com/v1beta/models/veo-3.0-generate-preview:predictLongRunning" + + data, files, url = self.config.transform_video_create_request( + model="veo-3.0-generate-preview", + prompt=prompt, + api_base=api_base, + video_create_optional_request_params={}, + litellm_params=GenericLiteLLMParams(), + headers={} + ) + + # Check Veo format + assert "instances" in data + assert len(data["instances"]) == 1 + assert data["instances"][0]["prompt"] == prompt + + # Check no files are uploaded + assert files == [] + + # URL should be returned as-is for Gemini + assert url == api_base + + def test_transform_video_create_request_with_params(self): + """Test transformation with optional parameters.""" + prompt = "A cat playing with a ball of yarn" + api_base = "https://generativelanguage.googleapis.com/v1beta/models/veo-3.0-generate-preview:predictLongRunning" + + data, files, url = self.config.transform_video_create_request( + model="veo-3.0-generate-preview", + prompt=prompt, + api_base=api_base, + video_create_optional_request_params={ + "aspectRatio": "16:9", + "durationSeconds": 8, + "resolution": "1080p" + }, + litellm_params=GenericLiteLLMParams(), + headers={} + ) + + # Check Veo format with instances and parameters separated + instance = data["instances"][0] + assert instance["prompt"] == prompt + + # Parameters should be in a separate object + assert "parameters" in data + assert data["parameters"]["aspectRatio"] == "16:9" + assert data["parameters"]["durationSeconds"] == 8 + assert data["parameters"]["resolution"] == "1080p" + + def test_map_openai_params(self): + """Test parameter mapping from OpenAI format to Veo format.""" + openai_params = { + "size": "1280x720", + "seconds": "8", + "input_reference": "test_image.jpg" + } + + mapped = self.config.map_openai_params( + video_create_optional_params=openai_params, + model="veo-3.0-generate-preview", + drop_params=False + ) + + # Check mappings (prompt is not mapped, it's passed separately) + assert mapped["aspectRatio"] == "16:9" # 1280x720 is landscape + assert mapped["durationSeconds"] == 8 + assert mapped["image"] == "test_image.jpg" + + def test_map_openai_params_default_duration(self): + """Test that durationSeconds is omitted when not provided.""" + openai_params = { + "size": "1280x720", + } + + mapped = self.config.map_openai_params( + video_create_optional_params=openai_params, + model="veo-3.0-generate-preview", + drop_params=False + ) + + assert mapped["aspectRatio"] == "16:9" + assert "durationSeconds" not in mapped + + def test_map_openai_params_with_gemini_specific_params(self): + """Test that Gemini-specific params are passed through correctly.""" + params_with_gemini_specific = { + "size": "1280x720", + "seconds": "8", + "video": {"bytesBase64Encoded": "abc123", "mimeType": "video/mp4"}, + "negativePrompt": "no people", + "referenceImages": [{"bytesBase64Encoded": "xyz789"}], + "personGeneration": "allow" + } + + mapped = self.config.map_openai_params( + video_create_optional_params=params_with_gemini_specific, + model="veo-3.1-generate-preview", + drop_params=False + ) + + # Check OpenAI params are mapped + assert mapped["aspectRatio"] == "16:9" + assert mapped["durationSeconds"] == 8 + + # Check Gemini-specific params are passed through + assert "video" in mapped + assert mapped["video"]["bytesBase64Encoded"] == "abc123" + assert mapped["negativePrompt"] == "no people" + assert mapped["referenceImages"] == [{"bytesBase64Encoded": "xyz789"}] + assert mapped["personGeneration"] == "allow" + + def test_map_openai_params_with_extra_body(self): + """Test that extra_body params are merged and extra_body is removed.""" + from litellm.videos.utils import VideoGenerationRequestUtils + + params_with_extra_body = { + "seconds": "4", + "extra_body": { + "negativePrompt": "no people", + "personGeneration": "allow", + "resolution": "1080p" + } + } + + mapped = VideoGenerationRequestUtils.get_optional_params_video_generation( + model="veo-3.0-generate-preview", + video_generation_provider_config=self.config, + video_generation_optional_params=params_with_extra_body + ) + + # Check OpenAI params are mapped + assert mapped["durationSeconds"] == 4 + + # Check extra_body params are merged + assert mapped["negativePrompt"] == "no people" + assert mapped["personGeneration"] == "allow" + assert mapped["resolution"] == "1080p" + + # Check extra_body itself is removed + assert "extra_body" not in mapped + + def test_convert_size_to_aspect_ratio(self): + """Test size to aspect ratio conversion.""" + # Landscape + assert self.config._convert_size_to_aspect_ratio("1280x720") == "16:9" + assert self.config._convert_size_to_aspect_ratio("1920x1080") == "16:9" + + # Portrait + assert self.config._convert_size_to_aspect_ratio("720x1280") == "9:16" + assert self.config._convert_size_to_aspect_ratio("1080x1920") == "9:16" + + # Invalid (defaults to 16:9) + assert self.config._convert_size_to_aspect_ratio("invalid") == "16:9" + # Empty string returns None (no size specified) + assert self.config._convert_size_to_aspect_ratio("") is None + + def test_transform_video_create_response(self): + """Test transformation of video creation response.""" + # Mock response + mock_response = Mock(spec=httpx.Response) + mock_response.json.return_value = { + "name": "operations/generate_1234567890", + "metadata": { + "createTime": "2024-11-04T10:00:00.123456Z" + } + } + + result = self.config.transform_video_create_response( + model="veo-3.0-generate-preview", + raw_response=mock_response, + logging_obj=self.mock_logging_obj, + custom_llm_provider="gemini" + ) + + assert isinstance(result, VideoObject) + # ID is base64 encoded with provider info + assert result.id.startswith("video_") + assert result.status == "processing" + assert result.object == "video" + + + def test_transform_video_create_response_with_cost_tracking(self): + """Test that duration is captured for cost tracking.""" + # Mock response + mock_response = Mock(spec=httpx.Response) + mock_response.json.return_value = { + "name": "operations/generate_1234567890", + } + + # Request data with durationSeconds in parameters + request_data = { + "instances": [{"prompt": "A test video"}], + "parameters": { + "durationSeconds": 5, + "aspectRatio": "16:9" + } + } + + result = self.config.transform_video_create_response( + model="gemini/veo-3.0-generate-preview", + raw_response=mock_response, + logging_obj=self.mock_logging_obj, + custom_llm_provider="gemini", + request_data=request_data + ) + + assert isinstance(result, VideoObject) + assert result.usage is not None, "Usage should be set" + assert "duration_seconds" in result.usage, "duration_seconds should be in usage" + assert result.usage["duration_seconds"] == 5.0, f"Expected 5.0, got {result.usage['duration_seconds']}" + + def test_transform_video_create_response_cost_tracking_with_different_durations(self): + """Test cost tracking with different duration values.""" + # Mock response + mock_response = Mock(spec=httpx.Response) + mock_response.json.return_value = { + "name": "operations/generate_1234567890", + } + + # Test with 8 seconds + request_data_8s = { + "instances": [{"prompt": "Test"}], + "parameters": {"durationSeconds": 8} + } + + result_8s = self.config.transform_video_create_response( + model="gemini/veo-3.1-generate-preview", + raw_response=mock_response, + logging_obj=self.mock_logging_obj, + custom_llm_provider="gemini", + request_data=request_data_8s + ) + + assert result_8s.usage["duration_seconds"] == 8.0 + + # Test with 4 seconds + request_data_4s = { + "instances": [{"prompt": "Test"}], + "parameters": {"durationSeconds": 4} + } + + result_4s = self.config.transform_video_create_response( + model="gemini/veo-3.1-fast-generate-preview", + raw_response=mock_response, + logging_obj=self.mock_logging_obj, + custom_llm_provider="gemini", + request_data=request_data_4s + ) + + assert result_4s.usage["duration_seconds"] == 4.0 + + def test_transform_video_create_response_cost_tracking_no_duration(self): + """Test that usage defaults to 8 seconds when no duration in request.""" + # Mock response + mock_response = Mock(spec=httpx.Response) + mock_response.json.return_value = { + "name": "operations/generate_1234567890", + } + + # Request data without durationSeconds (should default to 8 seconds for Google Veo) + request_data = { + "instances": [{"prompt": "A test video"}], + "parameters": { + "aspectRatio": "16:9" + } + } + + result = self.config.transform_video_create_response( + model="gemini/veo-3.0-generate-preview", + raw_response=mock_response, + logging_obj=self.mock_logging_obj, + custom_llm_provider="gemini", + request_data=request_data + ) + + assert isinstance(result, VideoObject) + # When no duration is provided, it defaults to 8 seconds (Google Veo default) + assert result.usage is not None + assert "duration_seconds" in result.usage + assert result.usage["duration_seconds"] == 8.0, "Should default to 8 seconds when not provided (Google Veo default)" + + def test_transform_video_status_retrieve_request(self): + """Test transformation of status retrieve request.""" + video_id = "gemini::operations/generate_1234567890::veo-3.0" + + url, params = self.config.transform_video_status_retrieve_request( + video_id=video_id, + api_base="https://generativelanguage.googleapis.com", + litellm_params=GenericLiteLLMParams(), + headers={} + ) + + assert "operations/generate_1234567890" in url + assert "v1beta" in url + assert params == {} + + def test_transform_video_status_retrieve_response_processing(self): + """Test transformation of status response when still processing.""" + mock_response = Mock(spec=httpx.Response) + mock_response.json.return_value = { + "name": "operations/generate_1234567890", + "done": False, + "metadata": { + "createTime": "2024-11-04T10:00:00.123456Z" + } + } + + result = self.config.transform_video_status_retrieve_response( + raw_response=mock_response, + logging_obj=self.mock_logging_obj, + custom_llm_provider="gemini" + ) + + assert isinstance(result, VideoObject) + assert result.status == "processing" + + def test_transform_video_status_retrieve_response_completed(self): + """Test transformation of status response when completed.""" + mock_response = Mock(spec=httpx.Response) + mock_response.json.return_value = { + "name": "operations/generate_1234567890", + "done": True, + "metadata": { + "createTime": "2024-11-04T10:00:00.123456Z" + }, + "response": { + "generateVideoResponse": { + "generatedSamples": [ + { + "video": { + "uri": "files/abc123xyz" + } + } + ] + } + } + } + + result = self.config.transform_video_status_retrieve_response( + raw_response=mock_response, + logging_obj=self.mock_logging_obj, + custom_llm_provider="gemini" + ) + + assert isinstance(result, VideoObject) + assert result.status == "completed" + + @patch('litellm.module_level_client') + def test_transform_video_content_request(self, mock_client): + """Test transformation of content download request.""" + video_id = "gemini::operations/generate_1234567890::veo-3.0" + + # Mock the status response + mock_status_response = Mock(spec=httpx.Response) + mock_status_response.json.return_value = { + "name": "operations/generate_1234567890", + "done": True, + "response": { + "generateVideoResponse": { + "generatedSamples": [ + { + "video": { + "uri": "files/abc123xyz" + } + } + ] + } + } + } + mock_status_response.raise_for_status = Mock() + mock_client.get.return_value = mock_status_response + + url, params = self.config.transform_video_content_request( + video_id=video_id, + api_base="https://generativelanguage.googleapis.com", + litellm_params=GenericLiteLLMParams(), + headers={} + ) + + # Should return download URL (may or may not include :download suffix) + assert "files/abc123xyz" in url + # Params are empty for Gemini file URIs + assert params == {} + + def test_transform_video_content_response_bytes(self): + """Test transformation of content response (returns bytes directly).""" + mock_response = Mock(spec=httpx.Response) + mock_response.headers = httpx.Headers({ + "content-type": "video/mp4" + }) + mock_response.content = b"fake_video_data" + + result = self.config.transform_video_content_response( + raw_response=mock_response, + logging_obj=self.mock_logging_obj + ) + + assert result == b"fake_video_data" + + def test_video_remix_not_supported(self): + """Test that video remix raises NotImplementedError.""" + with pytest.raises(NotImplementedError, match="Video remix is not supported"): + self.config.transform_video_remix_request( + video_id="test_id", + prompt="test prompt", + api_base="https://test.com", + litellm_params=GenericLiteLLMParams(), + headers={} + ) + + def test_video_list_not_supported(self): + """Test that video list raises NotImplementedError.""" + with pytest.raises(NotImplementedError, match="Video list is not supported"): + self.config.transform_video_list_request( + api_base="https://test.com", + litellm_params=GenericLiteLLMParams(), + headers={} + ) + + def test_video_delete_not_supported(self): + """Test that video delete raises NotImplementedError.""" + with pytest.raises(NotImplementedError, match="Video delete is not supported"): + self.config.transform_video_delete_request( + video_id="test_id", + api_base="https://test.com", + litellm_params=GenericLiteLLMParams(), + headers={} + ) + + +class TestGeminiVideoIntegration: + """Integration tests for Gemini video generation workflow.""" + + def test_full_workflow_mock(self): + """Test full workflow with mocked responses.""" + config = GeminiVideoConfig() + mock_logging_obj = Mock() + + # Step 1: Create request with parameters + prompt = "A beautiful sunset over mountains" + api_base = "https://generativelanguage.googleapis.com/v1beta/models/veo-3.0-generate-preview:predictLongRunning" + data, files, url = config.transform_video_create_request( + model="veo-3.0-generate-preview", + prompt=prompt, + api_base=api_base, + video_create_optional_request_params={ + "aspectRatio": "16:9", + "durationSeconds": 8 + }, + litellm_params=GenericLiteLLMParams(), + headers={} + ) + + # Verify instances and parameters structure + assert data["instances"][0]["prompt"] == prompt + assert data["parameters"]["aspectRatio"] == "16:9" + assert data["parameters"]["durationSeconds"] == 8 + + # Step 2: Parse create response + mock_create_response = Mock(spec=httpx.Response) + mock_create_response.json.return_value = { + "name": "operations/generate_abc123", + "metadata": { + "createTime": "2024-11-04T10:00:00.123456Z" + } + } + + video_obj = config.transform_video_create_response( + model="veo-3.0-generate-preview", + raw_response=mock_create_response, + logging_obj=mock_logging_obj, + custom_llm_provider="gemini" + ) + + assert video_obj.status == "processing" + assert video_obj.id.startswith("video_") + + # Step 3: Check status (completed) + mock_status_response = Mock(spec=httpx.Response) + mock_status_response.json.return_value = { + "name": "operations/generate_abc123", + "done": True, + "metadata": { + "createTime": "2024-11-04T10:00:00.123456Z" + }, + "response": { + "generateVideoResponse": { + "generatedSamples": [ + { + "video": { + "uri": "files/video123" + } + } + ] + } + } + } + + status_obj = config.transform_video_status_retrieve_response( + raw_response=mock_status_response, + logging_obj=mock_logging_obj, + custom_llm_provider="gemini" + ) + + assert status_obj.status == "completed" + + +class TestGeminiVideoCostTracking: + """Test cost tracking for Gemini video generation.""" + + def test_cost_calculation_with_duration(self): + """Test that cost is calculated correctly using duration from usage.""" + # Test VEO 2.0 ($0.35/second) + cost_veo2 = video_generation_cost( + model="gemini/veo-2.0-generate-001", + duration_seconds=5.0, + custom_llm_provider="gemini" + ) + expected_veo2 = 0.35 * 5.0 # $1.75 + assert abs(cost_veo2 - expected_veo2) < 0.001, f"Expected ${expected_veo2}, got ${cost_veo2}" + + # Test VEO 3.0 ($0.75/second) + cost_veo3 = video_generation_cost( + model="gemini/veo-3.0-generate-preview", + duration_seconds=8.0, + custom_llm_provider="gemini" + ) + expected_veo3 = 0.75 * 8.0 # $6.00 + assert abs(cost_veo3 - expected_veo3) < 0.001, f"Expected ${expected_veo3}, got ${cost_veo3}" + + # Test VEO 3.1 Standard ($0.40/second) + cost_veo31 = video_generation_cost( + model="gemini/veo-3.1-generate-preview", + duration_seconds=10.0, + custom_llm_provider="gemini" + ) + expected_veo31 = 0.40 * 10.0 # $4.00 + assert abs(cost_veo31 - expected_veo31) < 0.001, f"Expected ${expected_veo31}, got ${cost_veo31}" + + # Test VEO 3.1 Fast ($0.15/second) + cost_veo31_fast = video_generation_cost( + model="gemini/veo-3.1-fast-generate-preview", + duration_seconds=6.0, + custom_llm_provider="gemini" + ) + expected_veo31_fast = 0.15 * 6.0 # $0.90 + assert abs(cost_veo31_fast - expected_veo31_fast) < 0.001, f"Expected ${expected_veo31_fast}, got ${cost_veo31_fast}" + + def test_cost_calculation_end_to_end(self): + """Test complete cost tracking flow: request -> response -> cost calculation.""" + config = GeminiVideoConfig() + mock_logging_obj = Mock() + + # Create request with duration + request_data = { + "instances": [{"prompt": "A beautiful sunset"}], + "parameters": {"durationSeconds": 5} + } + + # Mock response + mock_response = Mock(spec=httpx.Response) + mock_response.json.return_value = { + "name": "operations/generate_test123", + } + + # Transform response + video_obj = config.transform_video_create_response( + model="gemini/veo-3.0-generate-preview", + raw_response=mock_response, + logging_obj=mock_logging_obj, + custom_llm_provider="gemini", + request_data=request_data + ) + + # Verify usage has duration + assert video_obj.usage is not None + assert "duration_seconds" in video_obj.usage + duration = video_obj.usage["duration_seconds"] + + # Calculate cost using the duration from usage + cost = video_generation_cost( + model="gemini/veo-3.0-generate-preview", + duration_seconds=duration, + custom_llm_provider="gemini" + ) + + # Verify cost calculation (VEO 3.0 is $0.75/second) + expected_cost = 0.75 * 5.0 # $3.75 + assert abs(cost - expected_cost) < 0.001, f"Expected ${expected_cost}, got ${cost}" + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) + diff --git a/tests/test_litellm/llms/vertex_ai/videos/__init__.py b/tests/test_litellm/llms/vertex_ai/videos/__init__.py new file mode 100644 index 000000000000..ab1481fbd4b9 --- /dev/null +++ b/tests/test_litellm/llms/vertex_ai/videos/__init__.py @@ -0,0 +1,4 @@ +""" +Tests for Vertex AI video generation. +""" + diff --git a/tests/test_litellm/llms/vertex_ai/videos/test_vertex_video_transformation.py b/tests/test_litellm/llms/vertex_ai/videos/test_vertex_video_transformation.py new file mode 100644 index 000000000000..7ae344e4999f --- /dev/null +++ b/tests/test_litellm/llms/vertex_ai/videos/test_vertex_video_transformation.py @@ -0,0 +1,550 @@ +""" +Tests for Vertex AI (Veo) video generation transformation. +""" +import json +import os +import pytest +from unittest.mock import Mock, MagicMock, patch +import httpx +import base64 + +from litellm.llms.vertex_ai.videos.transformation import ( + VertexAIVideoConfig, + _convert_image_to_vertex_format, +) +from litellm.types.videos.main import VideoObject +from litellm.types.router import GenericLiteLLMParams + + +class TestVertexAIVideoConfig: + """Test VertexAIVideoConfig transformation class.""" + + def setup_method(self): + """Setup test fixtures.""" + self.config = VertexAIVideoConfig() + self.mock_logging_obj = Mock() + + def test_get_supported_openai_params(self): + """Test that correct params are supported.""" + params = self.config.get_supported_openai_params("veo-002") + + assert "model" in params + assert "prompt" in params + assert "input_reference" in params + assert "seconds" in params + assert "size" in params + + @patch.object(VertexAIVideoConfig, 'get_access_token') + def test_validate_environment(self, mock_get_access_token): + """Test environment validation for Vertex AI.""" + # Mock the authentication + mock_get_access_token.return_value = ("mock-access-token", "test-project") + + headers = {} + litellm_params = {"vertex_project": "test-project"} + + result = self.config.validate_environment( + headers=headers, + model="veo-002", + api_key=None, + litellm_params=litellm_params + ) + + # Should add Authorization header + assert "Authorization" in result + assert result["Authorization"] == "Bearer mock-access-token" + assert "Content-Type" in result + + def test_get_complete_url(self): + """Test URL construction for Vertex AI video generation.""" + litellm_params = { + "vertex_project": "test-project", + "vertex_location": "us-central1", + } + + url = self.config.get_complete_url( + model="vertex_ai/veo-002", api_base=None, litellm_params=litellm_params + ) + + expected = "https://us-central1-aiplatform.googleapis.com/v1/projects/test-project/locations/us-central1/publishers/google/models/veo-002" + assert url == expected + # Should NOT include endpoint - that's added by transform methods + assert not url.endswith(":predictLongRunning") + + def test_get_complete_url_with_custom_api_base(self): + """Test URL construction with custom API base.""" + litellm_params = { + "vertex_project": "test-project", + "vertex_location": "us-west1", + } + + url = self.config.get_complete_url( + model="veo-002", + api_base="https://custom-endpoint.example.com", + litellm_params=litellm_params, + ) + + assert url.startswith("https://custom-endpoint.example.com") + assert "test-project" in url + assert "us-west1" in url + assert "veo-002" in url + # Should NOT include endpoint + assert not url.endswith(":predictLongRunning") + + def test_get_complete_url_missing_project(self): + """Test that missing vertex_project raises error.""" + litellm_params = {} + + # Note: The method might not raise if vertex_project can be fetched from env + # This test verifies the behavior when completely missing + try: + url = self.config.get_complete_url( + model="veo-002", api_base=None, litellm_params=litellm_params + ) + # If no error is raised, vertex_project was obtained from environment + # In that case, just verify a URL was returned + assert url is not None + except ValueError as e: + # Expected behavior when vertex_project is truly missing + assert "vertex_project is required" in str(e) + + def test_get_complete_url_default_location(self): + """Test URL construction with default location.""" + litellm_params = {"vertex_project": "test-project"} + + url = self.config.get_complete_url( + model="veo-002", api_base=None, litellm_params=litellm_params + ) + + # Should default to us-central1 + assert "us-central1" in url + # Should NOT include endpoint + assert not url.endswith(":predictLongRunning") + + def test_transform_video_create_request(self): + """Test transformation of video creation request.""" + prompt = "A cat playing with a ball of yarn" + api_base = "https://us-central1-aiplatform.googleapis.com/v1/projects/test-project/locations/us-central1/publishers/google/models/veo-002" + + data, files, url = self.config.transform_video_create_request( + model="veo-002", + prompt=prompt, + api_base=api_base, + video_create_optional_request_params={}, + litellm_params=GenericLiteLLMParams(), + headers={}, + ) + + # Check Vertex AI format + assert "instances" in data + assert len(data["instances"]) == 1 + assert data["instances"][0]["prompt"] == prompt + + # Parameters should not be present when empty + assert "parameters" not in data or data["parameters"] == {} + + # Check URL has :predictLongRunning appended + assert url.endswith(":predictLongRunning") + assert api_base in url + + # Check no files are uploaded + assert files == [] + + def test_transform_video_create_request_with_parameters(self): + """Test video creation request with aspect ratio and duration.""" + prompt = "A dog running in a park" + api_base = "https://us-central1-aiplatform.googleapis.com/v1/projects/test-project/locations/us-central1/publishers/google/models/veo-002" + + data, files, url = self.config.transform_video_create_request( + model="veo-002", + prompt=prompt, + api_base=api_base, + video_create_optional_request_params={ + "aspectRatio": "16:9", + "durationSeconds": 8, + }, + litellm_params=GenericLiteLLMParams(), + headers={}, + ) + + assert data["instances"][0]["prompt"] == prompt + assert data["parameters"]["aspectRatio"] == "16:9" + assert data["parameters"]["durationSeconds"] == 8 + assert url.endswith(":predictLongRunning") + + def test_transform_video_create_request_with_image(self): + """Test video creation request with image input.""" + prompt = "Extend this image with animation" + api_base = "https://us-central1-aiplatform.googleapis.com/v1/projects/test-project/locations/us-central1/publishers/google/models/veo-002" + + # Create a mock image file + mock_image = Mock() + mock_image.read.return_value = b"fake_image_data" + mock_image.seek = Mock() + + with patch( + "litellm.llms.vertex_ai.videos.transformation.ImageEditRequestUtils.get_image_content_type", + return_value="image/jpeg", + ): + data, files, url = self.config.transform_video_create_request( + model="veo-002", + prompt=prompt, + api_base=api_base, + video_create_optional_request_params={"image": mock_image}, + litellm_params=GenericLiteLLMParams(), + headers={}, + ) + + # Check image was converted to base64 + assert "image" in data["instances"][0] + assert "bytesBase64Encoded" in data["instances"][0]["image"] + assert "mimeType" in data["instances"][0]["image"] + assert data["instances"][0]["image"]["mimeType"] == "image/jpeg" + assert url.endswith(":predictLongRunning") + + def test_map_openai_params(self): + """Test parameter mapping from OpenAI to Vertex AI format.""" + openai_params = {"seconds": "8", "size": "1280x720"} + + mapped = self.config.map_openai_params( + video_create_optional_params=openai_params, + model="veo-002", + drop_params=False, + ) + + assert mapped["durationSeconds"] == 8 + assert mapped["aspectRatio"] == "16:9" + + def test_map_openai_params_default_duration(self): + """Test that durationSeconds is omitted when not provided.""" + openai_params = {"size": "1280x720"} + + mapped = self.config.map_openai_params( + video_create_optional_params=openai_params, + model="veo-002", + drop_params=False, + ) + + assert mapped["aspectRatio"] == "16:9" + assert "durationSeconds" not in mapped + + def test_map_openai_params_size_conversions(self): + """Test size to aspect ratio conversions.""" + test_cases = [ + ("1280x720", "16:9"), + ("1920x1080", "16:9"), + ("720x1280", "9:16"), + ("1080x1920", "9:16"), + ("unknown", "16:9"), # Default + ] + + for size, expected_ratio in test_cases: + mapped = self.config.map_openai_params( + video_create_optional_params={"size": size}, + model="veo-002", + drop_params=False, + ) + assert mapped["aspectRatio"] == expected_ratio + + def test_transform_video_create_response(self): + """Test transformation of video creation response.""" + # Mock response with operation name + mock_response = Mock(spec=httpx.Response) + mock_response.json.return_value = { + "name": "projects/test-project/locations/us-central1/publishers/google/models/veo-002/operations/12345", + "metadata": {"createTime": "2024-01-15T10:30:00.000Z"}, + } + + video_obj = self.config.transform_video_create_response( + model="vertex_ai/veo-002", + raw_response=mock_response, + logging_obj=self.mock_logging_obj, + custom_llm_provider="vertex_ai", + ) + + assert isinstance(video_obj, VideoObject) + assert video_obj.status == "processing" + assert video_obj.object == "video" + # Video ID is encoded with provider info, so just check it's not empty + assert video_obj.id + assert len(video_obj.id) > 0 + + def test_transform_video_create_response_missing_operation_name(self): + """Test that missing operation name raises error.""" + mock_response = Mock(spec=httpx.Response) + mock_response.json.return_value = {} + + with pytest.raises(ValueError, match="No operation name in Veo response"): + self.config.transform_video_create_response( + model="veo-002", + raw_response=mock_response, + logging_obj=self.mock_logging_obj, + ) + + def test_transform_video_status_retrieve_request(self): + """Test transformation of video status retrieve request.""" + operation_name = "projects/test-project/locations/us-central1/publishers/google/models/veo-002/operations/12345" + + # Provide an api_base that would be returned from get_complete_url + api_base = "https://us-central1-aiplatform.googleapis.com/v1/projects/test-project/locations/us-central1/publishers/google/models/veo-002" + + url, params = self.config.transform_video_status_retrieve_request( + video_id=operation_name, + api_base=api_base, + litellm_params=GenericLiteLLMParams(), + headers={}, + ) + + # Check URL contains fetchPredictOperation endpoint + assert "fetchPredictOperation" in url + assert "test-project" in url + assert "us-central1" in url + assert "veo-002" in url + + # Check params contain operation name + assert params["operationName"] == operation_name + + def test_transform_video_status_retrieve_request_invalid_format(self): + """Test that invalid operation name format raises error.""" + invalid_operation_name = "invalid/operation/name" + + with pytest.raises(ValueError, match="Invalid operation name format"): + self.config.transform_video_status_retrieve_request( + video_id=invalid_operation_name, + api_base=None, + litellm_params=GenericLiteLLMParams(), + headers={}, + ) + + def test_transform_video_status_retrieve_response_processing(self): + """Test transformation of status response while processing.""" + mock_response = Mock(spec=httpx.Response) + mock_response.json.return_value = { + "name": "projects/test-project/locations/us-central1/publishers/google/models/veo-002/operations/12345", + "done": False, + "metadata": {"createTime": "2024-01-15T10:30:00.000Z"}, + } + + video_obj = self.config.transform_video_status_retrieve_response( + raw_response=mock_response, + logging_obj=self.mock_logging_obj, + custom_llm_provider="vertex_ai", + ) + + assert isinstance(video_obj, VideoObject) + assert video_obj.status == "processing" + + def test_transform_video_status_retrieve_response_completed(self): + """Test transformation of status response when completed.""" + mock_response = Mock(spec=httpx.Response) + mock_response.json.return_value = { + "name": "projects/test-project/locations/us-central1/publishers/google/models/veo-002/operations/12345", + "done": True, + "metadata": {"createTime": "2024-01-15T10:30:00.000Z"}, + "response": { + "@type": "type.googleapis.com/cloud.ai.large_models.vision.GenerateVideoResponse", + "raiMediaFilteredCount": 0, + "videos": [ + { + "bytesBase64Encoded": base64.b64encode( + b"fake_video_data" + ).decode(), + "mimeType": "video/mp4", + } + ], + }, + } + + video_obj = self.config.transform_video_status_retrieve_response( + raw_response=mock_response, + logging_obj=self.mock_logging_obj, + custom_llm_provider="vertex_ai", + ) + + assert isinstance(video_obj, VideoObject) + assert video_obj.status == "completed" + + def test_transform_video_status_retrieve_response_error(self): + """Test transformation of status response when an error is returned.""" + mock_response = Mock(spec=httpx.Response) + mock_response.json.return_value = { + "name": "projects/test-project/locations/us-central1/publishers/google/models/veo-002/operations/12345", + "done": True, + "metadata": {"createTime": "2024-01-15T10:30:00.000Z"}, + "error": { + "code": 3, + "message": "Unsupported output video duration 3 seconds, supported durations are [8,5,6,7] for feature text_to_video.", + }, + } + + video_obj = self.config.transform_video_status_retrieve_response( + raw_response=mock_response, + logging_obj=self.mock_logging_obj, + custom_llm_provider="vertex_ai", + ) + + assert isinstance(video_obj, VideoObject) + assert video_obj.status == "failed" + assert video_obj.error == mock_response.json.return_value["error"] + + def test_transform_video_content_request(self): + """Test transformation of video content request.""" + operation_name = "projects/test-project/locations/us-central1/publishers/google/models/veo-002/operations/12345" + api_base = "https://us-central1-aiplatform.googleapis.com/v1/projects/test-project/locations/us-central1/publishers/google/models/veo-002" + + url, params = self.config.transform_video_content_request( + video_id=operation_name, + api_base=api_base, + litellm_params=GenericLiteLLMParams(), + headers={}, + ) + + # Should use same fetchPredictOperation endpoint + assert "fetchPredictOperation" in url + assert params["operationName"] == operation_name + + def test_transform_video_content_response(self): + """Test transformation of video content response.""" + fake_video_bytes = b"fake_video_data_12345" + encoded_video = base64.b64encode(fake_video_bytes).decode() + + mock_response = Mock(spec=httpx.Response) + mock_response.json.return_value = { + "name": "projects/test-project/locations/us-central1/publishers/google/models/veo-002/operations/12345", + "done": True, + "response": { + "@type": "type.googleapis.com/cloud.ai.large_models.vision.GenerateVideoResponse", + "videos": [ + {"bytesBase64Encoded": encoded_video, "mimeType": "video/mp4"} + ], + }, + } + + video_bytes = self.config.transform_video_content_response( + raw_response=mock_response, logging_obj=self.mock_logging_obj + ) + + assert isinstance(video_bytes, bytes) + assert video_bytes == fake_video_bytes + + def test_transform_video_content_response_not_complete(self): + """Test that incomplete video raises error.""" + mock_response = Mock(spec=httpx.Response) + mock_response.json.return_value = { + "name": "projects/test-project/locations/us-central1/publishers/google/models/veo-002/operations/12345", + "done": False, + } + + with pytest.raises( + ValueError, match="Video generation is not complete yet" + ): + self.config.transform_video_content_response( + raw_response=mock_response, logging_obj=self.mock_logging_obj + ) + + def test_transform_video_content_response_missing_video_data(self): + """Test that missing video data raises error.""" + mock_response = Mock(spec=httpx.Response) + mock_response.json.return_value = { + "name": "projects/test-project/locations/us-central1/publishers/google/models/veo-002/operations/12345", + "done": True, + "response": {"videos": []}, + } + + with pytest.raises(ValueError, match="No video data found"): + self.config.transform_video_content_response( + raw_response=mock_response, logging_obj=self.mock_logging_obj + ) + + def test_transform_video_remix_request_not_supported(self): + """Test that video remix raises NotImplementedError.""" + with pytest.raises( + NotImplementedError, match="Video remix is not supported" + ): + self.config.transform_video_remix_request( + video_id="test-video-id", + prompt="new prompt", + api_base="https://example.com", + litellm_params=GenericLiteLLMParams(), + headers={}, + ) + + def test_transform_video_list_request_not_supported(self): + """Test that video list raises NotImplementedError.""" + with pytest.raises(NotImplementedError, match="Video list is not supported"): + self.config.transform_video_list_request( + api_base="https://example.com", + litellm_params=GenericLiteLLMParams(), + headers={}, + ) + + def test_transform_video_delete_request_not_supported(self): + """Test that video delete raises NotImplementedError.""" + with pytest.raises( + NotImplementedError, match="Video delete is not supported" + ): + self.config.transform_video_delete_request( + video_id="test-video-id", + api_base="https://example.com", + litellm_params=GenericLiteLLMParams(), + headers={}, + ) + + def test_get_error_class(self): + """Test error class generation.""" + error = self.config.get_error_class( + error_message="Test error", status_code=500, headers={} + ) + + # Should return VertexAIError + from litellm.llms.vertex_ai.common_utils import VertexAIError + + assert isinstance(error, VertexAIError) + assert error.status_code == 500 + assert "Test error" in str(error) + + +class TestConvertImageToVertexFormat: + """Test the _convert_image_to_vertex_format helper function.""" + + def test_convert_image_to_vertex_format(self): + """Test image conversion to Vertex AI format.""" + fake_image_data = b"fake_jpeg_image_data" + mock_image = Mock() + mock_image.read.return_value = fake_image_data + mock_image.seek = Mock() + + with patch( + "litellm.llms.vertex_ai.videos.transformation.ImageEditRequestUtils.get_image_content_type", + return_value="image/jpeg", + ): + result = _convert_image_to_vertex_format(mock_image) + + assert "bytesBase64Encoded" in result + assert "mimeType" in result + assert result["mimeType"] == "image/jpeg" + + # Verify base64 encoding + decoded = base64.b64decode(result["bytesBase64Encoded"]) + assert decoded == fake_image_data + + def test_convert_image_to_vertex_format_with_seek(self): + """Test image conversion with seek support.""" + fake_image_data = b"fake_png_image_data" + mock_image = Mock() + mock_image.read.return_value = fake_image_data + mock_image.seek = Mock() + + with patch( + "litellm.llms.vertex_ai.videos.transformation.ImageEditRequestUtils.get_image_content_type", + return_value="image/png", + ): + result = _convert_image_to_vertex_format(mock_image) + + # Verify seek was called + mock_image.seek.assert_called_once_with(0) + + assert result["mimeType"] == "image/png" + decoded = base64.b64decode(result["bytesBase64Encoded"]) + assert decoded == fake_image_data + diff --git a/tests/test_litellm/test_video_generation.py b/tests/test_litellm/test_video_generation.py index 6007201a676c..b11e38b32bb6 100644 --- a/tests/test_litellm/test_video_generation.py +++ b/tests/test_litellm/test_video_generation.py @@ -150,9 +150,10 @@ def test_video_generation_request_transformation(self): config = OpenAIVideoConfig() # Test request transformation - data, files = config.transform_video_create_request( + data, files, returned_api_base = config.transform_video_create_request( model="sora-2", prompt="Test video prompt", + api_base="https://api.openai.com/v1/videos", video_create_optional_request_params={ "seconds": "8", "size": "720x1280" @@ -166,6 +167,7 @@ def test_video_generation_request_transformation(self): assert data["seconds"] == "8" assert data["size"] == "720x1280" assert files == [] + assert returned_api_base == "https://api.openai.com/v1/videos" def test_video_generation_response_transformation(self): """Test video generation response transformation.""" @@ -228,9 +230,10 @@ def test_video_generation_with_files(self): mock_file = MagicMock() mock_file.read.return_value = b"fake_image_data" - data, files = config.transform_video_create_request( + data, files, returned_api_base = config.transform_video_create_request( model="sora-2", prompt="Test video with image", + api_base="https://api.openai.com/v1/videos", video_create_optional_request_params={ "input_reference": mock_file, "seconds": "8", @@ -291,42 +294,29 @@ def test_video_generation_parameter_mapping(self): assert mapped_params["user"] == "test-user" def test_video_generation_unsupported_parameters(self): - """Test video generation with unsupported parameters.""" + """Test video generation with provider-specific parameters via extra_body.""" from litellm.videos.utils import VideoGenerationRequestUtils - # Test unsupported parameter detection - with pytest.raises(litellm.UnsupportedParamsError): - VideoGenerationRequestUtils.get_optional_params_video_generation( - model="sora-2", - video_generation_provider_config=OpenAIVideoConfig(), - video_generation_optional_params={ - "unsupported_param": "value" + # Test that provider-specific parameters can be passed via extra_body + # This allows support for Vertex AI and Gemini specific parameters + result = VideoGenerationRequestUtils.get_optional_params_video_generation( + model="sora-2", + video_generation_provider_config=OpenAIVideoConfig(), + video_generation_optional_params={ + "seconds": "8", + "extra_body": { + "vertex_ai_param": "value", + "gemini_param": "value2" } - ) - - def test_video_generation_request_utils(self): - """Test video generation request utilities.""" - from litellm.videos.utils import VideoGenerationRequestUtils - - # Test parameter filtering - params = { - "prompt": "Test video", - "model": "sora-2", - "seconds": "8", - "size": "720x1280", - "user": "test-user", - "invalid_param": "should_be_filtered" - } - - filtered_params = VideoGenerationRequestUtils.get_requested_video_generation_optional_param(params) + } + ) - # Should only contain valid parameters - assert "prompt" not in filtered_params # prompt is required, not optional - assert "seconds" in filtered_params - assert "size" in filtered_params - assert "user" in filtered_params - assert "invalid_param" not in filtered_params - # Note: model is included in the filtered params as it's part of the TypedDict + # extra_body params should be merged into the result + assert result["seconds"] == "8" + assert result["vertex_ai_param"] == "value" + assert result["gemini_param"] == "value2" + # extra_body itself should be removed from the result + assert "extra_body" not in result def test_video_generation_types(self): """Test video generation type definitions.""" diff --git a/ui/litellm-dashboard/package-lock.json b/ui/litellm-dashboard/package-lock.json index 7205738a21cf..6e21fc2f5f7c 100644 --- a/ui/litellm-dashboard/package-lock.json +++ b/ui/litellm-dashboard/package-lock.json @@ -23233,4 +23233,4 @@ } } } -} +} \ No newline at end of file diff --git a/ui/litellm-dashboard/src/components/add_model/add_model_modes.tsx b/ui/litellm-dashboard/src/components/add_model/add_model_modes.tsx index b6c2410a0277..30deb79469ca 100644 --- a/ui/litellm-dashboard/src/components/add_model/add_model_modes.tsx +++ b/ui/litellm-dashboard/src/components/add_model/add_model_modes.tsx @@ -6,6 +6,7 @@ export const TEST_MODES = [ { value: "audio_speech", label: "Audio Speech - /audio/speech" }, { value: "audio_transcription", label: "Audio Transcription - /audio/transcriptions" }, { value: "image_generation", label: "Image Generation - /images/generations" }, + { value: "video_generation", label: "Video Generation - /videos" }, { value: "rerank", label: "Rerank - /rerank" }, { value: "realtime", label: "Realtime - /realtime" }, { value: "batch", label: "Batch - /batch" }, diff --git a/ui/litellm-dashboard/src/components/chat_ui/mode_endpoint_mapping.tsx b/ui/litellm-dashboard/src/components/chat_ui/mode_endpoint_mapping.tsx index 6c1fbcd504a0..2aad64fb6198 100644 --- a/ui/litellm-dashboard/src/components/chat_ui/mode_endpoint_mapping.tsx +++ b/ui/litellm-dashboard/src/components/chat_ui/mode_endpoint_mapping.tsx @@ -5,6 +5,7 @@ export enum ModelMode { AUDIO_SPEECH = "audio_speech", AUDIO_TRANSCRIPTION = "audio_transcription", IMAGE_GENERATION = "image_generation", + VIDEO_GENERATION = "video_generation", CHAT = "chat", RESPONSES = "responses", IMAGE_EDITS = "image_edits", @@ -15,6 +16,7 @@ export enum ModelMode { // Define an enum for the endpoint types your UI calls export enum EndpointType { IMAGE = "image", + VIDEO = "video", CHAT = "chat", RESPONSES = "responses", IMAGE_EDITS = "image_edits", @@ -28,6 +30,7 @@ export enum EndpointType { // Create a mapping between the model mode and the corresponding endpoint type export const litellmModeMapping: Record = { [ModelMode.IMAGE_GENERATION]: EndpointType.IMAGE, + [ModelMode.VIDEO_GENERATION]: EndpointType.VIDEO, [ModelMode.CHAT]: EndpointType.CHAT, [ModelMode.RESPONSES]: EndpointType.RESPONSES, [ModelMode.IMAGE_EDITS]: EndpointType.IMAGE_EDITS,