Skip to content

Commit 8004511

Browse files
committed
docs on vertex js pass tags
1 parent 5baf33b commit 8004511

File tree

1 file changed

+156
-39
lines changed

1 file changed

+156
-39
lines changed

docs/my-website/docs/pass_through/vertex_ai.md

Lines changed: 156 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,9 @@ import TabItem from '@theme/TabItem';
44

55
# Vertex AI SDK
66

7-
Use VertexAI SDK to call endpoints on LiteLLM Gateway (native provider format)
8-
9-
:::tip
10-
11-
Looking for the Unified API (OpenAI format) for VertexAI ? [Go here - using vertexAI with LiteLLM SDK or LiteLLM Proxy Server](../providers/vertex.md)
12-
13-
:::
14-
157
Pass-through endpoints for Vertex AI - call provider-specific endpoint, in native format (no translation).
168

17-
Just replace `https://REGION-aiplatform.googleapis.com` with `LITELLM_PROXY_BASE_URL/vertex-ai`
9+
Just replace `https://REGION-aiplatform.googleapis.com` with `LITELLM_PROXY_BASE_URL/vertex_ai`
1810

1911

2012
#### **Example Usage**
@@ -23,9 +15,9 @@ Just replace `https://REGION-aiplatform.googleapis.com` with `LITELLM_PROXY_BASE
2315
<TabItem value="curl" label="curl">
2416

2517
```bash
26-
curl http://localhost:4000/vertex-ai/publishers/google/models/gemini-1.0-pro:generateContent \
18+
curl http://localhost:4000/vertex_ai/publishers/google/models/gemini-1.0-pro:generateContent \
2719
-H "Content-Type: application/json" \
28-
-H "Authorization: Bearer sk-1234" \
20+
-H "x-litellm-api-key: Bearer sk-1234" \
2921
-d '{
3022
"contents":[{
3123
"role": "user",
@@ -43,7 +35,7 @@ const { VertexAI } = require('@google-cloud/vertexai');
4335
const vertexAI = new VertexAI({
4436
project: 'your-project-id', // enter your vertex project id
4537
location: 'us-central1', // enter your vertex region
46-
apiEndpoint: "localhost:4000/vertex-ai" // <proxy-server-url>/vertex-ai # note, do not include 'https://' in the url
38+
apiEndpoint: "localhost:4000/vertex_ai" // <proxy-server-url>/vertex_ai # note, do not include 'https://' in the url
4739
});
4840

4941
const model = vertexAI.getGenerativeModel({
@@ -87,7 +79,7 @@ generateContent();
8779
- Tuning API
8880
- CountTokens API
8981

90-
## Authentication to Vertex AI
82+
#### Authentication to Vertex AI
9183

9284
LiteLLM Proxy Server supports two methods of authentication to Vertex AI:
9385

@@ -116,9 +108,9 @@ from vertexai.preview.generative_models import GenerativeModel
116108
LITE_LLM_ENDPOINT = "http://localhost:4000"
117109

118110
vertexai.init(
119-
project="<your-vertex-ai-project-id>", # enter your project id
120-
location="<your-vertex-ai-location>", # enter your region
121-
api_endpoint=f"{LITE_LLM_ENDPOINT}/vertex-ai", # route on litellm
111+
project="<your-vertex_ai-project-id>", # enter your project id
112+
location="<your-vertex_ai-location>", # enter your region
113+
api_endpoint=f"{LITE_LLM_ENDPOINT}/vertex_ai", # route on litellm
122114
api_transport="rest",
123115
)
124116

@@ -158,7 +150,7 @@ from google.auth.credentials import Credentials
158150
from vertexai.generative_models import GenerativeModel
159151

160152
LITELLM_PROXY_API_KEY = "sk-1234"
161-
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex-ai"
153+
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
162154

163155
import datetime
164156

@@ -219,7 +211,7 @@ import vertexai
219211
from vertexai.generative_models import GenerativeModel
220212

221213
LITELLM_PROXY_API_KEY = "sk-1234"
222-
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex-ai"
214+
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
223215

224216
vertexai.init(
225217
project="adroit-crow-413218",
@@ -247,7 +239,7 @@ from google.auth.credentials import Credentials
247239
from vertexai.generative_models import GenerativeModel
248240

249241
LITELLM_PROXY_API_KEY = "sk-1234"
250-
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex-ai"
242+
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
251243

252244
import datetime
253245

@@ -297,9 +289,9 @@ print(response.text)
297289
<TabItem value="Curl" label="Curl">
298290

299291
```shell
300-
curl http://localhost:4000/vertex-ai/publishers/google/models/gemini-1.5-flash-001:generateContent \
292+
curl http://localhost:4000/vertex_ai/publishers/google/models/gemini-1.5-flash-001:generateContent \
301293
-H "Content-Type: application/json" \
302-
-H "Authorization: Bearer sk-1234" \
294+
-H "x-litellm-api-key: Bearer sk-1234" \
303295
-d '{"contents":[{"role": "user", "parts":[{"text": "hi"}]}]}'
304296
```
305297

@@ -320,7 +312,7 @@ import vertexai
320312
from vertexai.generative_models import GenerativeModel
321313

322314
LITELLM_PROXY_API_KEY = "sk-1234"
323-
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex-ai"
315+
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
324316

325317
import datetime
326318

@@ -358,7 +350,7 @@ from google.auth.credentials import Credentials
358350
from vertexai.generative_models import GenerativeModel
359351

360352
LITELLM_PROXY_API_KEY = "sk-1234"
361-
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex-ai"
353+
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
362354

363355
import datetime
364356

@@ -413,9 +405,9 @@ def embed_text(
413405
<TabItem value="curl" label="Curl">
414406

415407
```shell
416-
curl http://localhost:4000/vertex-ai/publishers/google/models/textembedding-gecko@001:predict \
408+
curl http://localhost:4000/vertex_ai/publishers/google/models/textembedding-gecko@001:predict \
417409
-H "Content-Type: application/json" \
418-
-H "Authorization: Bearer sk-1234" \
410+
-H "x-litellm-api-key: Bearer sk-1234" \
419411
-d '{"instances":[{"content": "gm"}]}'
420412
```
421413

@@ -437,7 +429,7 @@ import vertexai
437429
from google.auth.credentials import Credentials
438430

439431
LITELLM_PROXY_API_KEY = "sk-1234"
440-
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex-ai"
432+
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
441433

442434
import datetime
443435

@@ -482,7 +474,7 @@ import vertexai
482474
from google.auth.credentials import Credentials
483475

484476
LITELLM_PROXY_API_KEY = "sk-1234"
485-
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex-ai"
477+
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
486478

487479
import datetime
488480

@@ -547,9 +539,9 @@ print(f"Created output image using {len(images[0]._image_bytes)} bytes")
547539
<TabItem value="curl" label="Curl">
548540

549541
```shell
550-
curl http://localhost:4000/vertex-ai/publishers/google/models/imagen-3.0-generate-001:predict \
542+
curl http://localhost:4000/vertex_ai/publishers/google/models/imagen-3.0-generate-001:predict \
551543
-H "Content-Type: application/json" \
552-
-H "Authorization: Bearer sk-1234" \
544+
-H "x-litellm-api-key: Bearer sk-1234" \
553545
-d '{"instances":[{"prompt": "make an otter"}], "parameters": {"sampleCount": 1}}'
554546
```
555547

@@ -571,7 +563,7 @@ from vertexai.generative_models import GenerativeModel
571563
import vertexai
572564

573565
LITELLM_PROXY_API_KEY = "sk-1234"
574-
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex-ai"
566+
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
575567

576568
import datetime
577569

@@ -614,7 +606,7 @@ import vertexai
614606
from google.auth.credentials import Credentials
615607

616608
LITELLM_PROXY_API_KEY = "sk-1234"
617-
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex-ai"
609+
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
618610

619611
import datetime
620612

@@ -677,9 +669,9 @@ print(f"Total Token Count: {usage_metadata.total_token_count}")
677669

678670

679671
```shell
680-
curl http://localhost:4000/vertex-ai/publishers/google/models/gemini-1.5-flash-001:countTokens \
672+
curl http://localhost:4000/vertex_ai/publishers/google/models/gemini-1.5-flash-001:countTokens \
681673
-H "Content-Type: application/json" \
682-
-H "Authorization: Bearer sk-1234" \
674+
-H "x-litellm-api-key: Bearer sk-1234" \
683675
-d '{"contents":[{"role": "user", "parts":[{"text": "hi"}]}]}'
684676
```
685677

@@ -700,7 +692,7 @@ from vertexai.preview.tuning import sft
700692
import vertexai
701693

702694
LITELLM_PROXY_API_KEY = "sk-1234"
703-
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex-ai"
695+
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
704696

705697

706698
vertexai.init(
@@ -741,7 +733,7 @@ import vertexai
741733
from google.auth.credentials import Credentials
742734

743735
LITELLM_PROXY_API_KEY = "sk-1234"
744-
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex-ai"
736+
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
745737

746738
import datetime
747739

@@ -801,9 +793,9 @@ print(sft_tuning_job.experiment)
801793
<TabItem value="curl" label="Curl">
802794

803795
```shell
804-
curl http://localhost:4000/vertex-ai/tuningJobs \
796+
curl http://localhost:4000/vertex_ai/tuningJobs \
805797
-H "Content-Type: application/json" \
806-
-H "Authorization: Bearer sk-1234" \
798+
-H "x-litellm-api-key: Bearer sk-1234" \
807799
-d '{
808800
"baseModel": "gemini-1.0-pro-002",
809801
"supervisedTuningSpec" : {
@@ -872,8 +864,8 @@ httpx_client = httpx.Client(timeout=30)
872864

873865
print("Creating cached content")
874866
create_cache = httpx_client.post(
875-
url=f"{LITELLM_BASE_URL}/vertex-ai/cachedContents",
876-
headers={"Authorization": f"Bearer {LITELLM_PROXY_API_KEY}"},
867+
url=f"{LITELLM_BASE_URL}/vertex_ai/cachedContents",
868+
headers={"x-litellm-api-key": f"Bearer {LITELLM_PROXY_API_KEY}"},
877869
json={
878870
"model": "gemini-1.5-pro-001",
879871
"contents": [
@@ -920,5 +912,130 @@ response = client.chat.completions.create(
920912
print("Response from proxy:", response)
921913
```
922914

915+
</TabItem>
916+
</Tabs>
917+
918+
919+
## Advanced
920+
921+
Pre-requisites
922+
- [Setup proxy with DB](../proxy/virtual_keys.md#setup)
923+
924+
Use this, to avoid giving developers the raw Anthropic API key, but still letting them use Anthropic endpoints.
925+
926+
### Use with Virtual Keys
927+
928+
1. Setup environment
929+
930+
```bash
931+
export DATABASE_URL=""
932+
export LITELLM_MASTER_KEY=""
933+
```
934+
935+
```bash
936+
litellm
937+
938+
# RUNNING on http://0.0.0.0:4000
939+
```
940+
941+
2. Generate virtual key
942+
943+
```bash
944+
curl -X POST 'http://0.0.0.0:4000/key/generate' \
945+
-H 'x-litellm-api-key: Bearer sk-1234' \
946+
-H 'Content-Type: application/json' \
947+
-d '{}'
948+
```
949+
950+
Expected Response
951+
952+
```bash
953+
{
954+
...
955+
"key": "sk-1234ewknldferwedojwojw"
956+
}
957+
```
958+
959+
3. Test it!
960+
961+
962+
```bash
963+
curl http://localhost:4000/vertex_ai/publishers/google/models/gemini-1.0-pro:generateContent \
964+
-H "Content-Type: application/json" \
965+
-H "x-litellm-api-key: Bearer sk-1234" \
966+
-d '{
967+
"contents":[{
968+
"role": "user",
969+
"parts":[{"text": "How are you doing today?"}]
970+
}]
971+
}'
972+
```
973+
974+
### Send `tags` in request headers
975+
976+
Use this if you wants `tags` to be tracked in the LiteLLM DB and on logging callbacks
977+
978+
Pass `tags` in request headers as a comma separated list. In the example below the following tags will be tracked
979+
980+
```
981+
tags: ["vertex-js-sdk", "pass-through-endpoint"]
982+
```
983+
984+
<Tabs>
985+
<TabItem value="curl" label="curl">
986+
987+
```bash
988+
curl http://localhost:4000/vertex-ai/publishers/google/models/gemini-1.0-pro:generateContent \
989+
-H "Content-Type: application/json" \
990+
-H "x-litellm-api-key: Bearer sk-1234" \
991+
-H "tags: vertex-js-sdk,pass-through-endpoint" \
992+
-d '{
993+
"contents":[{
994+
"role": "user",
995+
"parts":[{"text": "How are you doing today?"}]
996+
}]
997+
}'
998+
```
999+
1000+
</TabItem>
1001+
<TabItem value="js" label="Vertex Node.js SDK">
1002+
1003+
```javascript
1004+
const { VertexAI } = require('@google-cloud/vertexai');
1005+
1006+
const vertexAI = new VertexAI({
1007+
project: 'your-project-id', // enter your vertex project id
1008+
location: 'us-central1', // enter your vertex region
1009+
apiEndpoint: "localhost:4000/vertex_ai" // <proxy-server-url>/vertex_ai # note, do not include 'https://' in the url
1010+
});
1011+
1012+
const model = vertexAI.getGenerativeModel({
1013+
model: 'gemini-1.0-pro'
1014+
}, {
1015+
customHeaders: {
1016+
"x-litellm-api-key": "sk-1234", // Your litellm Virtual Key
1017+
"tags": "vertex-js-sdk,pass-through-endpoint"
1018+
}
1019+
});
1020+
1021+
async function generateContent() {
1022+
try {
1023+
const prompt = {
1024+
contents: [{
1025+
role: 'user',
1026+
parts: [{ text: 'How are you doing today?' }]
1027+
}]
1028+
};
1029+
1030+
const response = await model.generateContent(prompt);
1031+
console.log('Response:', response);
1032+
} catch (error) {
1033+
console.error('Error:', error);
1034+
}
1035+
}
1036+
1037+
generateContent();
1038+
```
1039+
9231040
</TabItem>
9241041
</Tabs>

0 commit comments

Comments
 (0)