Skip to content

Commit

Permalink
build(model_prices_and_context_window.json): add tpm/rpm limits for a…
Browse files Browse the repository at this point in the history
…ll gemini models

Allows for ratelimit tracking for gemini models even with wildcard routing enabled

Addresses #6914
  • Loading branch information
krrishdholakia committed Nov 26, 2024
1 parent 7f346ba commit 1b99371
Show file tree
Hide file tree
Showing 3 changed files with 124 additions and 6 deletions.
58 changes: 55 additions & 3 deletions litellm/model_prices_and_context_window_backup.json
Original file line number Diff line number Diff line change
Expand Up @@ -3460,6 +3460,30 @@
"rpm": 2000,
"source": "https://ai.google.dev/pricing"
},
"gemini/gemini-1.5-flash-8b": {
"max_tokens": 8192,
"max_input_tokens": 1048576,
"max_output_tokens": 8192,
"max_images_per_prompt": 3000,
"max_videos_per_prompt": 10,
"max_video_length": 1,
"max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1,
"max_pdf_size_mb": 30,
"input_cost_per_token": 0,
"input_cost_per_token_above_128k_tokens": 0,
"output_cost_per_token": 0,
"output_cost_per_token_above_128k_tokens": 0,
"litellm_provider": "gemini",
"mode": "chat",
"supports_system_messages": true,
"supports_function_calling": true,
"supports_vision": true,
"supports_response_schema": true,
"tpm": 4000000,
"rpm": 4000,
"source": "https://ai.google.dev/pricing"
},
"gemini/gemini-1.5-flash-8b-exp-0924": {
"max_tokens": 8192,
"max_input_tokens": 1048576,
Expand All @@ -3481,7 +3505,7 @@
"supports_vision": true,
"supports_response_schema": true,
"tpm": 4000000,
"rpm": 2000,
"rpm": 4000,
"source": "https://ai.google.dev/pricing"
},
"gemini/gemini-exp-1114": {
Expand All @@ -3504,7 +3528,12 @@
"supports_function_calling": true,
"supports_vision": true,
"supports_response_schema": true,
"source": "https://ai.google.dev/pricing"
"tpm": 4000000,
"rpm": 1000,
"source": "https://ai.google.dev/pricing",
"metadata": {
"notes": "Rate limits not documented for gemini-exp-1114. Assuming same as gemini-1.5-pro."
}
},
"gemini/gemini-1.5-flash-exp-0827": {
"max_tokens": 8192,
Expand All @@ -3526,6 +3555,8 @@
"supports_function_calling": true,
"supports_vision": true,
"supports_response_schema": true,
"tpm": 4000000,
"rpm": 2000,
"source": "https://ai.google.dev/pricing"
},
"gemini/gemini-1.5-flash-8b-exp-0827": {
Expand All @@ -3547,6 +3578,9 @@
"supports_system_messages": true,
"supports_function_calling": true,
"supports_vision": true,
"supports_response_schema": true,
"tpm": 4000000,
"rpm": 4000,
"source": "https://ai.google.dev/pricing"
},
"gemini/gemini-pro": {
Expand All @@ -3560,7 +3594,10 @@
"litellm_provider": "gemini",
"mode": "chat",
"supports_function_calling": true,
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
"rpd": 30000,
"tpm": 120000,
"rpm": 360,
"source": "https://ai.google.dev/gemini-api/docs/models/gemini"
},
"gemini/gemini-1.5-pro": {
"max_tokens": 8192,
Expand All @@ -3577,6 +3614,8 @@
"supports_vision": true,
"supports_tool_choice": true,
"supports_response_schema": true,
"tpm": 4000000,
"rpm": 1000,
"source": "https://ai.google.dev/pricing"
},
"gemini/gemini-1.5-pro-002": {
Expand All @@ -3595,6 +3634,8 @@
"supports_tool_choice": true,
"supports_response_schema": true,
"supports_prompt_caching": true,
"tpm": 4000000,
"rpm": 1000,
"source": "https://ai.google.dev/pricing"
},
"gemini/gemini-1.5-pro-001": {
Expand All @@ -3613,6 +3654,8 @@
"supports_tool_choice": true,
"supports_response_schema": true,
"supports_prompt_caching": true,
"tpm": 4000000,
"rpm": 1000,
"source": "https://ai.google.dev/pricing"
},
"gemini/gemini-1.5-pro-exp-0801": {
Expand All @@ -3630,6 +3673,8 @@
"supports_vision": true,
"supports_tool_choice": true,
"supports_response_schema": true,
"tpm": 4000000,
"rpm": 1000,
"source": "https://ai.google.dev/pricing"
},
"gemini/gemini-1.5-pro-exp-0827": {
Expand All @@ -3647,6 +3692,8 @@
"supports_vision": true,
"supports_tool_choice": true,
"supports_response_schema": true,
"tpm": 4000000,
"rpm": 1000,
"source": "https://ai.google.dev/pricing"
},
"gemini/gemini-1.5-pro-latest": {
Expand All @@ -3664,6 +3711,8 @@
"supports_vision": true,
"supports_tool_choice": true,
"supports_response_schema": true,
"tpm": 4000000,
"rpm": 1000,
"source": "https://ai.google.dev/pricing"
},
"gemini/gemini-pro-vision": {
Expand All @@ -3678,6 +3727,9 @@
"mode": "chat",
"supports_function_calling": true,
"supports_vision": true,
"rpd": 30000,
"tpm": 120000,
"rpm": 360,
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
},
"gemini/gemini-gemma-2-27b-it": {
Expand Down
58 changes: 55 additions & 3 deletions model_prices_and_context_window.json
Original file line number Diff line number Diff line change
Expand Up @@ -3460,6 +3460,30 @@
"rpm": 2000,
"source": "https://ai.google.dev/pricing"
},
"gemini/gemini-1.5-flash-8b": {
"max_tokens": 8192,
"max_input_tokens": 1048576,
"max_output_tokens": 8192,
"max_images_per_prompt": 3000,
"max_videos_per_prompt": 10,
"max_video_length": 1,
"max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1,
"max_pdf_size_mb": 30,
"input_cost_per_token": 0,
"input_cost_per_token_above_128k_tokens": 0,
"output_cost_per_token": 0,
"output_cost_per_token_above_128k_tokens": 0,
"litellm_provider": "gemini",
"mode": "chat",
"supports_system_messages": true,
"supports_function_calling": true,
"supports_vision": true,
"supports_response_schema": true,
"tpm": 4000000,
"rpm": 4000,
"source": "https://ai.google.dev/pricing"
},
"gemini/gemini-1.5-flash-8b-exp-0924": {
"max_tokens": 8192,
"max_input_tokens": 1048576,
Expand All @@ -3481,7 +3505,7 @@
"supports_vision": true,
"supports_response_schema": true,
"tpm": 4000000,
"rpm": 2000,
"rpm": 4000,
"source": "https://ai.google.dev/pricing"
},
"gemini/gemini-exp-1114": {
Expand All @@ -3504,7 +3528,12 @@
"supports_function_calling": true,
"supports_vision": true,
"supports_response_schema": true,
"source": "https://ai.google.dev/pricing"
"tpm": 4000000,
"rpm": 1000,
"source": "https://ai.google.dev/pricing",
"metadata": {
"notes": "Rate limits not documented for gemini-exp-1114. Assuming same as gemini-1.5-pro."
}
},
"gemini/gemini-1.5-flash-exp-0827": {
"max_tokens": 8192,
Expand All @@ -3526,6 +3555,8 @@
"supports_function_calling": true,
"supports_vision": true,
"supports_response_schema": true,
"tpm": 4000000,
"rpm": 2000,
"source": "https://ai.google.dev/pricing"
},
"gemini/gemini-1.5-flash-8b-exp-0827": {
Expand All @@ -3547,6 +3578,9 @@
"supports_system_messages": true,
"supports_function_calling": true,
"supports_vision": true,
"supports_response_schema": true,
"tpm": 4000000,
"rpm": 4000,
"source": "https://ai.google.dev/pricing"
},
"gemini/gemini-pro": {
Expand All @@ -3560,7 +3594,10 @@
"litellm_provider": "gemini",
"mode": "chat",
"supports_function_calling": true,
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
"rpd": 30000,
"tpm": 120000,
"rpm": 360,
"source": "https://ai.google.dev/gemini-api/docs/models/gemini"
},
"gemini/gemini-1.5-pro": {
"max_tokens": 8192,
Expand All @@ -3577,6 +3614,8 @@
"supports_vision": true,
"supports_tool_choice": true,
"supports_response_schema": true,
"tpm": 4000000,
"rpm": 1000,
"source": "https://ai.google.dev/pricing"
},
"gemini/gemini-1.5-pro-002": {
Expand All @@ -3595,6 +3634,8 @@
"supports_tool_choice": true,
"supports_response_schema": true,
"supports_prompt_caching": true,
"tpm": 4000000,
"rpm": 1000,
"source": "https://ai.google.dev/pricing"
},
"gemini/gemini-1.5-pro-001": {
Expand All @@ -3613,6 +3654,8 @@
"supports_tool_choice": true,
"supports_response_schema": true,
"supports_prompt_caching": true,
"tpm": 4000000,
"rpm": 1000,
"source": "https://ai.google.dev/pricing"
},
"gemini/gemini-1.5-pro-exp-0801": {
Expand All @@ -3630,6 +3673,8 @@
"supports_vision": true,
"supports_tool_choice": true,
"supports_response_schema": true,
"tpm": 4000000,
"rpm": 1000,
"source": "https://ai.google.dev/pricing"
},
"gemini/gemini-1.5-pro-exp-0827": {
Expand All @@ -3647,6 +3692,8 @@
"supports_vision": true,
"supports_tool_choice": true,
"supports_response_schema": true,
"tpm": 4000000,
"rpm": 1000,
"source": "https://ai.google.dev/pricing"
},
"gemini/gemini-1.5-pro-latest": {
Expand All @@ -3664,6 +3711,8 @@
"supports_vision": true,
"supports_tool_choice": true,
"supports_response_schema": true,
"tpm": 4000000,
"rpm": 1000,
"source": "https://ai.google.dev/pricing"
},
"gemini/gemini-pro-vision": {
Expand All @@ -3678,6 +3727,9 @@
"mode": "chat",
"supports_function_calling": true,
"supports_vision": true,
"rpd": 30000,
"tpm": 120000,
"rpm": 360,
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
},
"gemini/gemini-gemma-2-27b-it": {
Expand Down
14 changes: 14 additions & 0 deletions tests/local_testing/test_get_model_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,3 +102,17 @@ def test_get_model_info_ollama_chat():
print(mock_client.call_args.kwargs)

assert mock_client.call_args.kwargs["json"]["name"] == "mistral"


def test_get_model_info_gemini():
"""
Tests if ALL gemini models have 'tpm' and 'rpm' in the model info
"""
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")

model_map = litellm.model_cost
for model, info in model_map.items():
if model.startswith("gemini/") and not "gemma" in model:
assert info.get("tpm") is not None, f"{model} does not have tpm"
assert info.get("rpm") is not None, f"{model} does not have rpm"

0 comments on commit 1b99371

Please sign in to comment.