googleapis · gcf-owl-bot · Feb 12, 2025 · Feb 12, 2025 · Feb 19, 2025 · Feb 19, 2025
@@ -1,4 +1,4 @@
-// Copyright 2024 Google LLC
+// Copyright 2025 Google LLC
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -40,7 +40,7 @@ module.exports = {
     includePattern: '\\.js$'
   },
   templates: {
-    copyright: 'Copyright 2024 Google LLC',
+    copyright: 'Copyright 2025 Google LLC',
     includeDate: false,
     sourceFiles: false,
     systemName: '@google-cloud/aiplatform',

@@ -57,6 +57,9 @@ enum AcceleratorType {
   // Nvidia H100 80Gb GPU.
   NVIDIA_H100_80GB = 13;
 
+  // Nvidia H100 Mega 80Gb GPU.
+  NVIDIA_H100_MEGA_80GB = 14;
+
   // TPU v2.
   TPU_V2 = 6;
 

@@ -0,0 +1,53 @@
+// Copyright 2024 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto3";
+
+package google.cloud.aiplatform.v1;
+
+import "google/api/field_behavior.proto";
+import "google/api/resource.proto";
+
+option csharp_namespace = "Google.Cloud.AIPlatform.V1";
+option go_package = "cloud.google.com/go/aiplatform/apiv1/aiplatformpb;aiplatformpb";
+option java_multiple_files = true;
+option java_outer_classname = "ApiAuthProto";
+option java_package = "com.google.cloud.aiplatform.v1";
+option php_namespace = "Google\\Cloud\\AIPlatform\\V1";
+option ruby_package = "Google::Cloud::AIPlatform::V1";
+option (google.api.resource_definition) = {
+  type: "secretmanager.googleapis.com/SecretVersion"
+  pattern: "projects/{project}/secrets/{secret}/versions/{secret_version}"
+};
+
+// The generic reusable api auth config.
+message ApiAuth {
+  // The API secret.
+  message ApiKeyConfig {
+    // Required. The SecretManager secret version resource name storing API key.
+    // e.g. projects/{project}/secrets/{secret}/versions/{version}
+    string api_key_secret_version = 1 [
+      (google.api.field_behavior) = REQUIRED,
+      (google.api.resource_reference) = {
+        type: "secretmanager.googleapis.com/SecretVersion"
+      }
+    ];
+  }
+
+  // The auth config.
+  oneof auth_config {
+    // The API secret.
+    ApiKeyConfig api_key_config = 1;
+  }
+}
@@ -0,0 +1,136 @@
+// Copyright 2024 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto3";
+
+package google.cloud.aiplatform.v1;
+
+import "google/api/field_behavior.proto";
+import "google/api/resource.proto";
+import "google/cloud/aiplatform/v1/content.proto";
+import "google/cloud/aiplatform/v1/tool.proto";
+import "google/protobuf/duration.proto";
+import "google/protobuf/timestamp.proto";
+
+option csharp_namespace = "Google.Cloud.AIPlatform.V1";
+option go_package = "cloud.google.com/go/aiplatform/apiv1/aiplatformpb;aiplatformpb";
+option java_multiple_files = true;
+option java_outer_classname = "CachedContentProto";
+option java_package = "com.google.cloud.aiplatform.v1";
+option php_namespace = "Google\\Cloud\\AIPlatform\\V1";
+option ruby_package = "Google::Cloud::AIPlatform::V1";
+
+// A resource used in LLM queries for users to explicitly specify what to cache
+// and how to cache.
+message CachedContent {
+  option (google.api.resource) = {
+    type: "aiplatform.googleapis.com/CachedContent"
+    pattern: "projects/{project}/locations/{location}/cachedContents/{cached_content}"
+    plural: "cachedContents"
+    singular: "cachedContent"
+  };
+
+  // Metadata on the usage of the cached content.
+  message UsageMetadata {
+    // Total number of tokens that the cached content consumes.
+    int32 total_token_count = 1;
+
+    // Number of text characters.
+    int32 text_count = 2;
+
+    // Number of images.
+    int32 image_count = 3;
+
+    // Duration of video in seconds.
+    int32 video_duration_seconds = 4;
+
+    // Duration of audio in seconds.
+    int32 audio_duration_seconds = 5;
+  }
+
+  // Expiration time of the cached content.
+  oneof expiration {
+    // Timestamp of when this resource is considered expired.
+    // This is *always* provided on output, regardless of what was sent
+    // on input.
+    google.protobuf.Timestamp expire_time = 9;
+
+    // Input only. The TTL for this resource. The expiration time is computed:
+    // now + TTL.
+    google.protobuf.Duration ttl = 10
+        [(google.api.field_behavior) = INPUT_ONLY];
+  }
+
+  // Immutable. Identifier. The server-generated resource name of the cached
+  // content Format:
+  // projects/{project}/locations/{location}/cachedContents/{cached_content}
+  string name = 1 [
+    (google.api.field_behavior) = IDENTIFIER,
+    (google.api.field_behavior) = IMMUTABLE
+  ];
+
+  // Optional. Immutable. The user-generated meaningful display name of the
+  // cached content.
+  string display_name = 11 [
+    (google.api.field_behavior) = OPTIONAL,
+    (google.api.field_behavior) = IMMUTABLE
+  ];
+
+  // Immutable. The name of the publisher model to use for cached content.
+  // Format:
+  // projects/{project}/locations/{location}/publishers/{publisher}/models/{model}
+  string model = 2 [(google.api.field_behavior) = IMMUTABLE];
+
+  // Optional. Input only. Immutable. Developer set system instruction.
+  // Currently, text only
+  Content system_instruction = 3 [
+    (google.api.field_behavior) = OPTIONAL,
+    (google.api.field_behavior) = IMMUTABLE,
+    (google.api.field_behavior) = INPUT_ONLY
+  ];
+
+  // Optional. Input only. Immutable. The content to cache
+  repeated Content contents = 4 [
+    (google.api.field_behavior) = OPTIONAL,
+    (google.api.field_behavior) = IMMUTABLE,
+    (google.api.field_behavior) = INPUT_ONLY
+  ];
+
+  // Optional. Input only. Immutable. A list of `Tools` the model may use to
+  // generate the next response
+  repeated Tool tools = 5 [
+    (google.api.field_behavior) = OPTIONAL,
+    (google.api.field_behavior) = IMMUTABLE,
+    (google.api.field_behavior) = INPUT_ONLY
+  ];
+
+  // Optional. Input only. Immutable. Tool config. This config is shared for all
+  // tools
+  ToolConfig tool_config = 6 [
+    (google.api.field_behavior) = OPTIONAL,
+    (google.api.field_behavior) = IMMUTABLE,
+    (google.api.field_behavior) = INPUT_ONLY
+  ];
+
+  // Output only. Creatation time of the cache entry.
+  google.protobuf.Timestamp create_time = 7
+      [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only. When the cache entry was last updated in UTC time.
+  google.protobuf.Timestamp update_time = 8
+      [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only. Metadata on the usage of the cached content.
+  UsageMetadata usage_metadata = 12 [(google.api.field_behavior) = OUTPUT_ONLY];
+}
@@ -51,6 +51,27 @@ enum HarmCategory {
   HARM_CATEGORY_CIVIC_INTEGRITY = 5;
 }
 
+// Content Part modality
+enum Modality {
+  // Unspecified modality.
+  MODALITY_UNSPECIFIED = 0;
+
+  // Plain text.
+  TEXT = 1;
+
+  // Image.
+  IMAGE = 2;
+
+  // Video.
+  VIDEO = 3;
+
+  // Audio.
+  AUDIO = 4;
+
+  // Document, e.g. PDF.
+  DOCUMENT = 5;
+}
+
 // The base structured datatype containing multi-part content of a message.
 //
 // A `Content` includes a `role` field designating the producer of the `Content`
@@ -97,6 +118,13 @@ message Part {
     // the model.
     FunctionResponse function_response = 6
         [(google.api.field_behavior) = OPTIONAL];
+
+    // Optional. Code generated by the model that is meant to be executed.
+    ExecutableCode executable_code = 8 [(google.api.field_behavior) = OPTIONAL];
+
+    // Optional. Result of executing the [ExecutableCode].
+    CodeExecutionResult code_execution_result = 9
+        [(google.api.field_behavior) = OPTIONAL];
   }
 
   oneof metadata {
@@ -592,3 +620,12 @@ message RetrievalMetadata {
   float google_search_dynamic_retrieval_score = 2
       [(google.api.field_behavior) = OPTIONAL];
 }
+
+// Represents token counting info for a single modality.
+message ModalityTokenCount {
+  // The modality associated with this token count.
+  Modality modality = 1;
+
+  // Number of tokens.
+  int32 token_count = 2;
+}
@@ -180,6 +180,20 @@ message Endpoint {
 
 // A deployment of a Model. Endpoints contain one or more DeployedModels.
 message DeployedModel {
+  // Runtime status of the deployed model.
+  message Status {
+    // Output only. The latest deployed model's status message (if any).
+    string message = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
+
+    // Output only. The time at which the status was last updated.
+    google.protobuf.Timestamp last_update_time = 2
+        [(google.api.field_behavior) = OUTPUT_ONLY];
+
+    // Output only. The number of available replicas of the deployed model.
+    int32 available_replica_count = 3
+        [(google.api.field_behavior) = OUTPUT_ONLY];
+  }
+
   // The prediction (for example, the machine) resources that the DeployedModel
   // uses. The user is billed for the resources (at least their minimal amount)
   // even if the DeployedModel receives no traffic.
@@ -301,9 +315,16 @@ message DeployedModel {
   // Configuration for faster model deployment.
   FasterDeploymentConfig faster_deployment_config = 23;
 
+  // Output only. Runtime status of the deployed model.
+  Status status = 26 [(google.api.field_behavior) = OUTPUT_ONLY];
+
   // System labels to apply to Model Garden deployments.
   // System labels are managed by Google for internal use only.
   map<string, string> system_labels = 28;
+
+  // Optional. Spec for configuring speculative decoding.
+  SpeculativeDecodingSpec speculative_decoding_spec = 30
+      [(google.api.field_behavior) = OPTIONAL];
 }
 
 // PrivateEndpoints proto is used to provide paths for users to send
@@ -344,14 +365,52 @@ message PredictRequestResponseLoggingConfig {
   BigQueryDestination bigquery_destination = 3;
 }
 
+// Configurations (e.g. inference timeout) that are applied on your endpoints.
+message ClientConnectionConfig {
+  // Customizable online prediction request timeout.
+  google.protobuf.Duration inference_timeout = 1;
+}
+
 // Configuration for faster model deployment.
 message FasterDeploymentConfig {
   // If true, enable fast tryout feature for this deployed model.
   bool fast_tryout_enabled = 2;
 }
 
-// Configurations (e.g. inference timeout) that are applied on your endpoints.
-message ClientConnectionConfig {
-  // Customizable online prediction request timeout.
-  google.protobuf.Duration inference_timeout = 1;
+// Configuration for Speculative Decoding.
+message SpeculativeDecodingSpec {
+  // Draft model speculation works by using the smaller model to generate
+  // candidate tokens for speculative decoding.
+  message DraftModelSpeculation {
+    // Required. The resource name of the draft model.
+    string draft_model = 1 [
+      (google.api.field_behavior) = REQUIRED,
+      (google.api.resource_reference) = {
+        type: "aiplatform.googleapis.com/Model"
+      }
+    ];
+  }
+
+  // N-Gram speculation works by trying to find matching tokens in the
+  // previous prompt sequence and use those as speculation for generating
+  // new tokens.
+  message NgramSpeculation {
+    // The number of last N input tokens used as ngram to search/match
+    // against the previous prompt sequence.
+    // This is equal to the N in N-Gram.
+    // The default value is 3 if not specified.
+    int32 ngram_size = 1;
+  }
+
+  // The type of speculation method to use.
+  oneof speculation {
+    // draft model speculation.
+    DraftModelSpeculation draft_model_speculation = 2;
+
+    // N-Gram speculation.
+    NgramSpeculation ngram_speculation = 3;
+  }
+
+  // The number of speculative tokens to generate at each step.
+  int32 speculative_token_count = 1;
 }
@@ -178,13 +178,13 @@ message NearestNeighborQuery {
   // Numeric filter is used to search a subset of the entities by using boolean
   // rules on numeric columns.
   // For example:
-  // Database Point 0: {name: “a” value_int: 42} {name: “b” value_float: 1.0}
-  // Database Point 1:  {name: “a” value_int: 10} {name: “b” value_float: 2.0}
-  // Database Point 2: {name: “a” value_int: -1} {name: “b” value_float: 3.0}
-  // Query: {name: “a” value_int: 12 operator: LESS}    // Matches Point 1, 2
-  // {name: “b” value_float: 2.0 operator: EQUAL} // Matches Point 1
+  // Database Point 0: {name: "a" value_int: 42} {name: "b" value_float: 1.0}
+  // Database Point 1:  {name: "a" value_int: 10} {name: "b" value_float: 2.0}
+  // Database Point 2: {name: "a" value_int: -1} {name: "b" value_float: 3.0}
+  // Query: {name: "a" value_int: 12 operator: LESS}    // Matches Point 1, 2
+  // {name: "b" value_float: 2.0 operator: EQUAL} // Matches Point 1
   message NumericFilter {
-    // Datapoints for which Operator is true relative to the query’s Value
+    // Datapoints for which Operator is true relative to the query's Value
     // field will be allowlisted.
     enum Operator {
       // Unspecified operator.