@@ -4,17 +4,9 @@ import TabItem from '@theme/TabItem';
4
4
5
5
# Vertex AI SDK
6
6
7
- Use VertexAI SDK to call endpoints on LiteLLM Gateway (native provider format)
8
-
9
- ::: tip
10
-
11
- Looking for the Unified API (OpenAI format) for VertexAI ? [ Go here - using vertexAI with LiteLLM SDK or LiteLLM Proxy Server] ( ../providers/vertex.md )
12
-
13
- :::
14
-
15
7
Pass-through endpoints for Vertex AI - call provider-specific endpoint, in native format (no translation).
16
8
17
- Just replace ` https://REGION-aiplatform.googleapis.com ` with ` LITELLM_PROXY_BASE_URL/vertex-ai `
9
+ Just replace ` https://REGION-aiplatform.googleapis.com ` with ` LITELLM_PROXY_BASE_URL/vertex_ai `
18
10
19
11
20
12
#### ** Example Usage**
@@ -23,9 +15,9 @@ Just replace `https://REGION-aiplatform.googleapis.com` with `LITELLM_PROXY_BASE
23
15
<TabItem value =" curl " label =" curl " >
24
16
25
17
``` bash
26
- curl http://localhost:4000/vertex-ai /publishers/google/models/gemini-1.0-pro:generateContent \
18
+ curl http://localhost:4000/vertex_ai /publishers/google/models/gemini-1.0-pro:generateContent \
27
19
-H " Content-Type: application/json" \
28
- -H " Authorization : Bearer sk-1234" \
20
+ -H " x-litellm-api-key : Bearer sk-1234" \
29
21
-d ' {
30
22
"contents":[{
31
23
"role": "user",
@@ -43,7 +35,7 @@ const { VertexAI } = require('@google-cloud/vertexai');
43
35
const vertexAI = new VertexAI ({
44
36
project: ' your-project-id' , // enter your vertex project id
45
37
location: ' us-central1' , // enter your vertex region
46
- apiEndpoint: " localhost:4000/vertex-ai " // <proxy-server-url>/vertex-ai # note, do not include 'https://' in the url
38
+ apiEndpoint: " localhost:4000/vertex_ai " // <proxy-server-url>/vertex_ai # note, do not include 'https://' in the url
47
39
});
48
40
49
41
const model = vertexAI .getGenerativeModel ({
@@ -87,7 +79,7 @@ generateContent();
87
79
- Tuning API
88
80
- CountTokens API
89
81
90
- ## Authentication to Vertex AI
82
+ #### Authentication to Vertex AI
91
83
92
84
LiteLLM Proxy Server supports two methods of authentication to Vertex AI:
93
85
@@ -116,9 +108,9 @@ from vertexai.preview.generative_models import GenerativeModel
116
108
LITE_LLM_ENDPOINT = " http://localhost:4000"
117
109
118
110
vertexai.init(
119
- project = " <your-vertex-ai -project-id>" , # enter your project id
120
- location = " <your-vertex-ai -location>" , # enter your region
121
- api_endpoint = f " { LITE_LLM_ENDPOINT } /vertex-ai " , # route on litellm
111
+ project = " <your-vertex_ai -project-id>" , # enter your project id
112
+ location = " <your-vertex_ai -location>" , # enter your region
113
+ api_endpoint = f " { LITE_LLM_ENDPOINT } /vertex_ai " , # route on litellm
122
114
api_transport = " rest" ,
123
115
)
124
116
@@ -158,7 +150,7 @@ from google.auth.credentials import Credentials
158
150
from vertexai.generative_models import GenerativeModel
159
151
160
152
LITELLM_PROXY_API_KEY = " sk-1234"
161
- LITELLM_PROXY_BASE = " http://0.0.0.0:4000/vertex-ai "
153
+ LITELLM_PROXY_BASE = " http://0.0.0.0:4000/vertex_ai "
162
154
163
155
import datetime
164
156
@@ -219,7 +211,7 @@ import vertexai
219
211
from vertexai.generative_models import GenerativeModel
220
212
221
213
LITELLM_PROXY_API_KEY = " sk-1234"
222
- LITELLM_PROXY_BASE = " http://0.0.0.0:4000/vertex-ai "
214
+ LITELLM_PROXY_BASE = " http://0.0.0.0:4000/vertex_ai "
223
215
224
216
vertexai.init(
225
217
project = " adroit-crow-413218" ,
@@ -247,7 +239,7 @@ from google.auth.credentials import Credentials
247
239
from vertexai.generative_models import GenerativeModel
248
240
249
241
LITELLM_PROXY_API_KEY = " sk-1234"
250
- LITELLM_PROXY_BASE = " http://0.0.0.0:4000/vertex-ai "
242
+ LITELLM_PROXY_BASE = " http://0.0.0.0:4000/vertex_ai "
251
243
252
244
import datetime
253
245
@@ -297,9 +289,9 @@ print(response.text)
297
289
<TabItem value =" Curl " label =" Curl " >
298
290
299
291
``` shell
300
- curl http://localhost:4000/vertex-ai /publishers/google/models/gemini-1.5-flash-001:generateContent \
292
+ curl http://localhost:4000/vertex_ai /publishers/google/models/gemini-1.5-flash-001:generateContent \
301
293
-H " Content-Type: application/json" \
302
- -H " Authorization : Bearer sk-1234" \
294
+ -H " x-litellm-api-key : Bearer sk-1234" \
303
295
-d ' {"contents":[{"role": "user", "parts":[{"text": "hi"}]}]}'
304
296
```
305
297
@@ -320,7 +312,7 @@ import vertexai
320
312
from vertexai.generative_models import GenerativeModel
321
313
322
314
LITELLM_PROXY_API_KEY = " sk-1234"
323
- LITELLM_PROXY_BASE = " http://0.0.0.0:4000/vertex-ai "
315
+ LITELLM_PROXY_BASE = " http://0.0.0.0:4000/vertex_ai "
324
316
325
317
import datetime
326
318
@@ -358,7 +350,7 @@ from google.auth.credentials import Credentials
358
350
from vertexai.generative_models import GenerativeModel
359
351
360
352
LITELLM_PROXY_API_KEY = " sk-1234"
361
- LITELLM_PROXY_BASE = " http://0.0.0.0:4000/vertex-ai "
353
+ LITELLM_PROXY_BASE = " http://0.0.0.0:4000/vertex_ai "
362
354
363
355
import datetime
364
356
@@ -413,9 +405,9 @@ def embed_text(
413
405
<TabItem value =" curl " label =" Curl " >
414
406
415
407
``` shell
416
- curl http://localhost:4000/vertex-ai /publishers/google/models/textembedding-gecko@001:predict \
408
+ curl http://localhost:4000/vertex_ai /publishers/google/models/textembedding-gecko@001:predict \
417
409
-H " Content-Type: application/json" \
418
- -H " Authorization : Bearer sk-1234" \
410
+ -H " x-litellm-api-key : Bearer sk-1234" \
419
411
-d ' {"instances":[{"content": "gm"}]}'
420
412
```
421
413
@@ -437,7 +429,7 @@ import vertexai
437
429
from google.auth.credentials import Credentials
438
430
439
431
LITELLM_PROXY_API_KEY = " sk-1234"
440
- LITELLM_PROXY_BASE = " http://0.0.0.0:4000/vertex-ai "
432
+ LITELLM_PROXY_BASE = " http://0.0.0.0:4000/vertex_ai "
441
433
442
434
import datetime
443
435
@@ -482,7 +474,7 @@ import vertexai
482
474
from google.auth.credentials import Credentials
483
475
484
476
LITELLM_PROXY_API_KEY = " sk-1234"
485
- LITELLM_PROXY_BASE = " http://0.0.0.0:4000/vertex-ai "
477
+ LITELLM_PROXY_BASE = " http://0.0.0.0:4000/vertex_ai "
486
478
487
479
import datetime
488
480
@@ -547,9 +539,9 @@ print(f"Created output image using {len(images[0]._image_bytes)} bytes")
547
539
<TabItem value =" curl " label =" Curl " >
548
540
549
541
``` shell
550
- curl http://localhost:4000/vertex-ai /publishers/google/models/imagen-3.0-generate-001:predict \
542
+ curl http://localhost:4000/vertex_ai /publishers/google/models/imagen-3.0-generate-001:predict \
551
543
-H " Content-Type: application/json" \
552
- -H " Authorization : Bearer sk-1234" \
544
+ -H " x-litellm-api-key : Bearer sk-1234" \
553
545
-d ' {"instances":[{"prompt": "make an otter"}], "parameters": {"sampleCount": 1}}'
554
546
```
555
547
@@ -571,7 +563,7 @@ from vertexai.generative_models import GenerativeModel
571
563
import vertexai
572
564
573
565
LITELLM_PROXY_API_KEY = " sk-1234"
574
- LITELLM_PROXY_BASE = " http://0.0.0.0:4000/vertex-ai "
566
+ LITELLM_PROXY_BASE = " http://0.0.0.0:4000/vertex_ai "
575
567
576
568
import datetime
577
569
@@ -614,7 +606,7 @@ import vertexai
614
606
from google.auth.credentials import Credentials
615
607
616
608
LITELLM_PROXY_API_KEY = " sk-1234"
617
- LITELLM_PROXY_BASE = " http://0.0.0.0:4000/vertex-ai "
609
+ LITELLM_PROXY_BASE = " http://0.0.0.0:4000/vertex_ai "
618
610
619
611
import datetime
620
612
@@ -677,9 +669,9 @@ print(f"Total Token Count: {usage_metadata.total_token_count}")
677
669
678
670
679
671
``` shell
680
- curl http://localhost:4000/vertex-ai /publishers/google/models/gemini-1.5-flash-001:countTokens \
672
+ curl http://localhost:4000/vertex_ai /publishers/google/models/gemini-1.5-flash-001:countTokens \
681
673
-H " Content-Type: application/json" \
682
- -H " Authorization : Bearer sk-1234" \
674
+ -H " x-litellm-api-key : Bearer sk-1234" \
683
675
-d ' {"contents":[{"role": "user", "parts":[{"text": "hi"}]}]}'
684
676
```
685
677
@@ -700,7 +692,7 @@ from vertexai.preview.tuning import sft
700
692
import vertexai
701
693
702
694
LITELLM_PROXY_API_KEY = " sk-1234"
703
- LITELLM_PROXY_BASE = " http://0.0.0.0:4000/vertex-ai "
695
+ LITELLM_PROXY_BASE = " http://0.0.0.0:4000/vertex_ai "
704
696
705
697
706
698
vertexai.init(
@@ -741,7 +733,7 @@ import vertexai
741
733
from google.auth.credentials import Credentials
742
734
743
735
LITELLM_PROXY_API_KEY = " sk-1234"
744
- LITELLM_PROXY_BASE = " http://0.0.0.0:4000/vertex-ai "
736
+ LITELLM_PROXY_BASE = " http://0.0.0.0:4000/vertex_ai "
745
737
746
738
import datetime
747
739
@@ -801,9 +793,9 @@ print(sft_tuning_job.experiment)
801
793
<TabItem value =" curl " label =" Curl " >
802
794
803
795
``` shell
804
- curl http://localhost:4000/vertex-ai /tuningJobs \
796
+ curl http://localhost:4000/vertex_ai /tuningJobs \
805
797
-H " Content-Type: application/json" \
806
- -H " Authorization : Bearer sk-1234" \
798
+ -H " x-litellm-api-key : Bearer sk-1234" \
807
799
-d ' {
808
800
"baseModel": "gemini-1.0-pro-002",
809
801
"supervisedTuningSpec" : {
@@ -872,8 +864,8 @@ httpx_client = httpx.Client(timeout=30)
872
864
873
865
print (" Creating cached content" )
874
866
create_cache = httpx_client.post(
875
- url = f " { LITELLM_BASE_URL } /vertex-ai /cachedContents " ,
876
- headers = {" Authorization " : f " Bearer { LITELLM_PROXY_API_KEY } " },
867
+ url = f " { LITELLM_BASE_URL } /vertex_ai /cachedContents " ,
868
+ headers = {" x-litellm-api-key " : f " Bearer { LITELLM_PROXY_API_KEY } " },
877
869
json = {
878
870
" model" : " gemini-1.5-pro-001" ,
879
871
" contents" : [
@@ -920,5 +912,130 @@ response = client.chat.completions.create(
920
912
print (" Response from proxy:" , response)
921
913
```
922
914
915
+ </TabItem >
916
+ </Tabs >
917
+
918
+
919
+ ## Advanced
920
+
921
+ Pre-requisites
922
+ - [ Setup proxy with DB] ( ../proxy/virtual_keys.md#setup )
923
+
924
+ Use this, to avoid giving developers the raw Anthropic API key, but still letting them use Anthropic endpoints.
925
+
926
+ ### Use with Virtual Keys
927
+
928
+ 1 . Setup environment
929
+
930
+ ``` bash
931
+ export DATABASE_URL=" "
932
+ export LITELLM_MASTER_KEY=" "
933
+ ```
934
+
935
+ ``` bash
936
+ litellm
937
+
938
+ # RUNNING on http://0.0.0.0:4000
939
+ ```
940
+
941
+ 2 . Generate virtual key
942
+
943
+ ``` bash
944
+ curl -X POST ' http://0.0.0.0:4000/key/generate' \
945
+ -H ' x-litellm-api-key: Bearer sk-1234' \
946
+ -H ' Content-Type: application/json' \
947
+ -d ' {}'
948
+ ```
949
+
950
+ Expected Response
951
+
952
+ ``` bash
953
+ {
954
+ ...
955
+ " key" : " sk-1234ewknldferwedojwojw"
956
+ }
957
+ ```
958
+
959
+ 3 . Test it!
960
+
961
+
962
+ ``` bash
963
+ curl http://localhost:4000/vertex_ai/publishers/google/models/gemini-1.0-pro:generateContent \
964
+ -H " Content-Type: application/json" \
965
+ -H " x-litellm-api-key: Bearer sk-1234" \
966
+ -d ' {
967
+ "contents":[{
968
+ "role": "user",
969
+ "parts":[{"text": "How are you doing today?"}]
970
+ }]
971
+ }'
972
+ ```
973
+
974
+ ### Send ` tags ` in request headers
975
+
976
+ Use this if you wants ` tags ` to be tracked in the LiteLLM DB and on logging callbacks
977
+
978
+ Pass ` tags ` in request headers as a comma separated list. In the example below the following tags will be tracked
979
+
980
+ ```
981
+ tags: ["vertex-js-sdk", "pass-through-endpoint"]
982
+ ```
983
+
984
+ <Tabs >
985
+ <TabItem value =" curl " label =" curl " >
986
+
987
+ ``` bash
988
+ curl http://localhost:4000/vertex-ai/publishers/google/models/gemini-1.0-pro:generateContent \
989
+ -H " Content-Type: application/json" \
990
+ -H " x-litellm-api-key: Bearer sk-1234" \
991
+ -H " tags: vertex-js-sdk,pass-through-endpoint" \
992
+ -d ' {
993
+ "contents":[{
994
+ "role": "user",
995
+ "parts":[{"text": "How are you doing today?"}]
996
+ }]
997
+ }'
998
+ ```
999
+
1000
+ </TabItem >
1001
+ <TabItem value =" js " label =" Vertex Node.js SDK " >
1002
+
1003
+ ``` javascript
1004
+ const { VertexAI } = require (' @google-cloud/vertexai' );
1005
+
1006
+ const vertexAI = new VertexAI ({
1007
+ project: ' your-project-id' , // enter your vertex project id
1008
+ location: ' us-central1' , // enter your vertex region
1009
+ apiEndpoint: " localhost:4000/vertex_ai" // <proxy-server-url>/vertex_ai # note, do not include 'https://' in the url
1010
+ });
1011
+
1012
+ const model = vertexAI .getGenerativeModel ({
1013
+ model: ' gemini-1.0-pro'
1014
+ }, {
1015
+ customHeaders: {
1016
+ " x-litellm-api-key" : " sk-1234" , // Your litellm Virtual Key
1017
+ " tags" : " vertex-js-sdk,pass-through-endpoint"
1018
+ }
1019
+ });
1020
+
1021
+ async function generateContent () {
1022
+ try {
1023
+ const prompt = {
1024
+ contents: [{
1025
+ role: ' user' ,
1026
+ parts: [{ text: ' How are you doing today?' }]
1027
+ }]
1028
+ };
1029
+
1030
+ const response = await model .generateContent (prompt);
1031
+ console .log (' Response:' , response);
1032
+ } catch (error) {
1033
+ console .error (' Error:' , error);
1034
+ }
1035
+ }
1036
+
1037
+ generateContent ();
1038
+ ```
1039
+
923
1040
</TabItem >
924
1041
</Tabs >
0 commit comments