Skip to content

Commit

Permalink
(feat) Add support for using @google/generative-ai JS with LiteLLM Pr…
Browse files Browse the repository at this point in the history
…oxy (#6899)

* feat - allow using gemini js SDK with LiteLLM

* add auth for gemini_proxy_route

* basic local test for js

* test cost tagging gemini js requests

* add js sdk test for gemini with litellm

* add docs on gemini JS SDK

* run node.js tests

* fix google ai studio tests

* fix vertex js spend test
  • Loading branch information
ishaan-jaff authored Nov 25, 2024
1 parent f77bf49 commit c60261c
Show file tree
Hide file tree
Showing 8 changed files with 323 additions and 12 deletions.
6 changes: 4 additions & 2 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1191,6 +1191,7 @@ jobs:
-e DATABASE_URL=$PROXY_DATABASE_URL \
-e LITELLM_MASTER_KEY="sk-1234" \
-e OPENAI_API_KEY=$OPENAI_API_KEY \
-e GEMINI_API_KEY=$GEMINI_API_KEY \
-e ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY \
-e LITELLM_LICENSE=$LITELLM_LICENSE \
--name my-app \
Expand Down Expand Up @@ -1228,12 +1229,13 @@ jobs:
name: Install Node.js dependencies
command: |
npm install @google-cloud/vertexai
npm install @google/generative-ai
npm install --save-dev jest
- run:
name: Run Vertex AI tests
name: Run Vertex AI, Google AI Studio Node.js tests
command: |
npx jest tests/pass_through_tests/test_vertex.test.js --verbose
npx jest tests/pass_through_tests --verbose
no_output_timeout: 30m
- run:
name: Run tests
Expand Down
128 changes: 123 additions & 5 deletions docs/my-website/docs/pass_through/google_ai_studio.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,21 @@
import Image from '@theme/IdealImage';
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';


# Google AI Studio SDK

Pass-through endpoints for Google AI Studio - call provider-specific endpoint, in native format (no translation).

Just replace `https://generativelanguage.googleapis.com` with `LITELLM_PROXY_BASE_URL/gemini` 🚀
Just replace `https://generativelanguage.googleapis.com` with `LITELLM_PROXY_BASE_URL/gemini`

#### **Example Usage**

<Tabs>
<TabItem value="curl" label="curl">

```bash
http://0.0.0.0:4000/gemini/v1beta/models/gemini-1.5-flash:countTokens?key=sk-anything' \
curl 'http://0.0.0.0:4000/gemini/v1beta/models/gemini-1.5-flash:countTokens?key=sk-anything' \
-H 'Content-Type: application/json' \
-d '{
"contents": [{
Expand All @@ -17,6 +26,53 @@ http://0.0.0.0:4000/gemini/v1beta/models/gemini-1.5-flash:countTokens?key=sk-any
}'
```

</TabItem>
<TabItem value="js" label="Google AI Node.js SDK">

```javascript
const { GoogleGenerativeAI } = require("@google/generative-ai");

const modelParams = {
model: 'gemini-pro',
};

const requestOptions = {
baseUrl: 'http://localhost:4000/gemini', // http://<proxy-base-url>/gemini
};

const genAI = new GoogleGenerativeAI("sk-1234"); // litellm proxy API key
const model = genAI.getGenerativeModel(modelParams, requestOptions);

async function main() {
try {
const result = await model.generateContent("Explain how AI works");
console.log(result.response.text());
} catch (error) {
console.error('Error:', error);
}
}

// For streaming responses
async function main_streaming() {
try {
const streamingResult = await model.generateContentStream("Explain how AI works");
for await (const chunk of streamingResult.stream) {
console.log('Stream chunk:', JSON.stringify(chunk));
}
const aggregatedResponse = await streamingResult.response;
console.log('Aggregated response:', JSON.stringify(aggregatedResponse));
} catch (error) {
console.error('Error:', error);
}
}

main();
// main_streaming();
```

</TabItem>
</Tabs>

Supports **ALL** Google AI Studio Endpoints (including streaming).

[**See All Google AI Studio Endpoints**](https://ai.google.dev/api)
Expand Down Expand Up @@ -166,14 +222,14 @@ curl -X POST "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5
```
## Advanced - Use with Virtual Keys
## Advanced
Pre-requisites
- [Setup proxy with DB](../proxy/virtual_keys.md#setup)
Use this, to avoid giving developers the raw Google AI Studio key, but still letting them use Google AI Studio endpoints.
### Usage
### Use with Virtual Keys
1. Setup environment
Expand Down Expand Up @@ -220,4 +276,66 @@ http://0.0.0.0:4000/gemini/v1beta/models/gemini-1.5-flash:countTokens?key=sk-123
}]
}]
}'
```
```


### Send `tags` in request headers

Use this if you want `tags` to be tracked in the LiteLLM DB and on logging callbacks.

Pass tags in request headers as a comma separated list. In the example below the following tags will be tracked

```
tags: ["gemini-js-sdk", "pass-through-endpoint"]
```

<Tabs>
<TabItem value="curl" label="curl">

```bash
curl 'http://0.0.0.0:4000/gemini/v1beta/models/gemini-1.5-flash:generateContent?key=sk-anything' \
-H 'Content-Type: application/json' \
-H 'tags: gemini-js-sdk,pass-through-endpoint' \
-d '{
"contents": [{
"parts":[{
"text": "The quick brown fox jumps over the lazy dog."
}]
}]
}'
```

</TabItem>
<TabItem value="js" label="Google AI Node.js SDK">

```javascript
const { GoogleGenerativeAI } = require("@google/generative-ai");

const modelParams = {
model: 'gemini-pro',
};

const requestOptions = {
baseUrl: 'http://localhost:4000/gemini', // http://<proxy-base-url>/gemini
customHeaders: {
"tags": "gemini-js-sdk,pass-through-endpoint"
}
};

const genAI = new GoogleGenerativeAI("sk-1234");
const model = genAI.getGenerativeModel(modelParams, requestOptions);

async function main() {
try {
const result = await model.generateContent("Explain how AI works");
console.log(result.response.text());
} catch (error) {
console.error('Error:', error);
}
}

main();
```

</TabItem>
</Tabs>
1 change: 1 addition & 0 deletions litellm/proxy/_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -2111,6 +2111,7 @@ class SpecialHeaders(enum.Enum):
openai_authorization = "Authorization"
azure_authorization = "API-Key"
anthropic_authorization = "x-api-key"
google_ai_studio_authorization = "x-goog-api-key"


class LitellmDataForBackendLLMCall(TypedDict, total=False):
Expand Down
10 changes: 10 additions & 0 deletions litellm/proxy/auth/user_api_key_auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,11 @@
auto_error=False,
description="If anthropic client used.",
)
google_ai_studio_api_key_header = APIKeyHeader(
name=SpecialHeaders.google_ai_studio_authorization.value,
auto_error=False,
description="If google ai studio client used.",
)


def _get_bearer_token(
Expand Down Expand Up @@ -197,6 +202,9 @@ async def user_api_key_auth( # noqa: PLR0915
anthropic_api_key_header: Optional[str] = fastapi.Security(
anthropic_api_key_header
),
google_ai_studio_api_key_header: Optional[str] = fastapi.Security(
google_ai_studio_api_key_header
),
) -> UserAPIKeyAuth:
from litellm.proxy.proxy_server import (
general_settings,
Expand Down Expand Up @@ -233,6 +241,8 @@ async def user_api_key_auth( # noqa: PLR0915
api_key = azure_api_key_header
elif isinstance(anthropic_api_key_header, str):
api_key = anthropic_api_key_header
elif isinstance(google_ai_studio_api_key_header, str):
api_key = google_ai_studio_api_key_header
elif pass_through_endpoints is not None:
for endpoint in pass_through_endpoints:
if endpoint.get("path", "") == route:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,12 @@ async def gemini_proxy_route(
fastapi_response: Response,
):
## CHECK FOR LITELLM API KEY IN THE QUERY PARAMS - ?..key=LITELLM_API_KEY
api_key = request.query_params.get("key")
google_ai_studio_api_key = request.query_params.get("key") or request.headers.get(
"x-goog-api-key"
)

user_api_key_dict = await user_api_key_auth(
request=request, api_key="Bearer {}".format(api_key)
request=request, api_key=f"Bearer {google_ai_studio_api_key}"
)

base_target_url = "https://generativelanguage.googleapis.com"
Expand Down
123 changes: 123 additions & 0 deletions tests/pass_through_tests/test_gemini_with_spend.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
const { GoogleGenerativeAI } = require("@google/generative-ai");
const fs = require('fs');
const path = require('path');

// Import fetch if the SDK uses it
const originalFetch = global.fetch || require('node-fetch');

let lastCallId;

// Monkey-patch the fetch used internally
global.fetch = async function patchedFetch(url, options) {
const response = await originalFetch(url, options);

// Store the call ID if it exists
lastCallId = response.headers.get('x-litellm-call-id');

return response;
};

describe('Gemini AI Tests', () => {
test('should successfully generate non-streaming content with tags', async () => {
const genAI = new GoogleGenerativeAI("sk-1234"); // litellm proxy API key

const requestOptions = {
baseUrl: 'http://127.0.0.1:4000/gemini',
customHeaders: {
"tags": "gemini-js-sdk,pass-through-endpoint"
}
};

const model = genAI.getGenerativeModel({
model: 'gemini-pro'
}, requestOptions);

const prompt = 'Say "hello test" and nothing else';

const result = await model.generateContent(prompt);
expect(result).toBeDefined();

// Use the captured callId
const callId = lastCallId;
console.log("Captured Call ID:", callId);

// Wait for spend to be logged
await new Promise(resolve => setTimeout(resolve, 15000));

// Check spend logs
const spendResponse = await fetch(
`http://127.0.0.1:4000/spend/logs?request_id=${callId}`,
{
headers: {
'Authorization': 'Bearer sk-1234'
}
}
);

const spendData = await spendResponse.json();
console.log("spendData", spendData)
expect(spendData).toBeDefined();
expect(spendData[0].request_id).toBe(callId);
expect(spendData[0].call_type).toBe('pass_through_endpoint');
expect(spendData[0].request_tags).toEqual(['gemini-js-sdk', 'pass-through-endpoint']);
expect(spendData[0].metadata).toHaveProperty('user_api_key');
expect(spendData[0].model).toContain('gemini');
expect(spendData[0].spend).toBeGreaterThan(0);
}, 25000);

test('should successfully generate streaming content with tags', async () => {
const genAI = new GoogleGenerativeAI("sk-1234"); // litellm proxy API key

const requestOptions = {
baseUrl: 'http://127.0.0.1:4000/gemini',
customHeaders: {
"tags": "gemini-js-sdk,pass-through-endpoint"
}
};

const model = genAI.getGenerativeModel({
model: 'gemini-pro'
}, requestOptions);

const prompt = 'Say "hello test" and nothing else';

const streamingResult = await model.generateContentStream(prompt);
expect(streamingResult).toBeDefined();

for await (const chunk of streamingResult.stream) {
console.log('stream chunk:', JSON.stringify(chunk));
expect(chunk).toBeDefined();
}

const aggregatedResponse = await streamingResult.response;
console.log('aggregated response:', JSON.stringify(aggregatedResponse));
expect(aggregatedResponse).toBeDefined();

// Use the captured callId
const callId = lastCallId;
console.log("Captured Call ID:", callId);

// Wait for spend to be logged
await new Promise(resolve => setTimeout(resolve, 15000));

// Check spend logs
const spendResponse = await fetch(
`http://127.0.0.1:4000/spend/logs?request_id=${callId}`,
{
headers: {
'Authorization': 'Bearer sk-1234'
}
}
);

const spendData = await spendResponse.json();
console.log("spendData", spendData)
expect(spendData).toBeDefined();
expect(spendData[0].request_id).toBe(callId);
expect(spendData[0].call_type).toBe('pass_through_endpoint');
expect(spendData[0].request_tags).toEqual(['gemini-js-sdk', 'pass-through-endpoint']);
expect(spendData[0].metadata).toHaveProperty('user_api_key');
expect(spendData[0].model).toContain('gemini');
expect(spendData[0].spend).toBeGreaterThan(0);
}, 25000);
});
Loading

0 comments on commit c60261c

Please sign in to comment.