Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: A new field response_id is added to message .google.cloud.aiplatform.v1.GenerateContentResponse #5836

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
  •  
  •  
  •  
4 changes: 2 additions & 2 deletions packages/google-cloud-aiplatform/.jsdoc.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2024 Google LLC
// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -40,7 +40,7 @@ module.exports = {
includePattern: '\\.js$'
},
templates: {
copyright: 'Copyright 2024 Google LLC',
copyright: 'Copyright 2025 Google LLC',
includeDate: false,
sourceFiles: false,
systemName: '@google-cloud/aiplatform',
Expand Down
33 changes: 33 additions & 0 deletions packages/google-cloud-aiplatform/README.md

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ enum AcceleratorType {
// Nvidia H100 80Gb GPU.
NVIDIA_H100_80GB = 13;

// Nvidia H100 Mega 80Gb GPU.
NVIDIA_H100_MEGA_80GB = 14;

// TPU v2.
TPU_V2 = 6;

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.cloud.aiplatform.v1;

import "google/api/field_behavior.proto";
import "google/api/resource.proto";

option csharp_namespace = "Google.Cloud.AIPlatform.V1";
option go_package = "cloud.google.com/go/aiplatform/apiv1/aiplatformpb;aiplatformpb";
option java_multiple_files = true;
option java_outer_classname = "ApiAuthProto";
option java_package = "com.google.cloud.aiplatform.v1";
option php_namespace = "Google\\Cloud\\AIPlatform\\V1";
option ruby_package = "Google::Cloud::AIPlatform::V1";
option (google.api.resource_definition) = {
type: "secretmanager.googleapis.com/SecretVersion"
pattern: "projects/{project}/secrets/{secret}/versions/{secret_version}"
};

// The generic reusable api auth config.
message ApiAuth {
// The API secret.
message ApiKeyConfig {
// Required. The SecretManager secret version resource name storing API key.
// e.g. projects/{project}/secrets/{secret}/versions/{version}
string api_key_secret_version = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "secretmanager.googleapis.com/SecretVersion"
}
];
}

// The auth config.
oneof auth_config {
// The API secret.
ApiKeyConfig api_key_config = 1;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.cloud.aiplatform.v1;

import "google/api/field_behavior.proto";
import "google/api/resource.proto";
import "google/cloud/aiplatform/v1/content.proto";
import "google/cloud/aiplatform/v1/tool.proto";
import "google/protobuf/duration.proto";
import "google/protobuf/timestamp.proto";

option csharp_namespace = "Google.Cloud.AIPlatform.V1";
option go_package = "cloud.google.com/go/aiplatform/apiv1/aiplatformpb;aiplatformpb";
option java_multiple_files = true;
option java_outer_classname = "CachedContentProto";
option java_package = "com.google.cloud.aiplatform.v1";
option php_namespace = "Google\\Cloud\\AIPlatform\\V1";
option ruby_package = "Google::Cloud::AIPlatform::V1";

// A resource used in LLM queries for users to explicitly specify what to cache
// and how to cache.
message CachedContent {
option (google.api.resource) = {
type: "aiplatform.googleapis.com/CachedContent"
pattern: "projects/{project}/locations/{location}/cachedContents/{cached_content}"
plural: "cachedContents"
singular: "cachedContent"
};

// Metadata on the usage of the cached content.
message UsageMetadata {
// Total number of tokens that the cached content consumes.
int32 total_token_count = 1;

// Number of text characters.
int32 text_count = 2;

// Number of images.
int32 image_count = 3;

// Duration of video in seconds.
int32 video_duration_seconds = 4;

// Duration of audio in seconds.
int32 audio_duration_seconds = 5;
}

// Expiration time of the cached content.
oneof expiration {
// Timestamp of when this resource is considered expired.
// This is *always* provided on output, regardless of what was sent
// on input.
google.protobuf.Timestamp expire_time = 9;

// Input only. The TTL for this resource. The expiration time is computed:
// now + TTL.
google.protobuf.Duration ttl = 10
[(google.api.field_behavior) = INPUT_ONLY];
}

// Immutable. Identifier. The server-generated resource name of the cached
// content Format:
// projects/{project}/locations/{location}/cachedContents/{cached_content}
string name = 1 [
(google.api.field_behavior) = IDENTIFIER,
(google.api.field_behavior) = IMMUTABLE
];

// Optional. Immutable. The user-generated meaningful display name of the
// cached content.
string display_name = 11 [
(google.api.field_behavior) = OPTIONAL,
(google.api.field_behavior) = IMMUTABLE
];

// Immutable. The name of the publisher model to use for cached content.
// Format:
// projects/{project}/locations/{location}/publishers/{publisher}/models/{model}
string model = 2 [(google.api.field_behavior) = IMMUTABLE];

// Optional. Input only. Immutable. Developer set system instruction.
// Currently, text only
Content system_instruction = 3 [
(google.api.field_behavior) = OPTIONAL,
(google.api.field_behavior) = IMMUTABLE,
(google.api.field_behavior) = INPUT_ONLY
];

// Optional. Input only. Immutable. The content to cache
repeated Content contents = 4 [
(google.api.field_behavior) = OPTIONAL,
(google.api.field_behavior) = IMMUTABLE,
(google.api.field_behavior) = INPUT_ONLY
];

// Optional. Input only. Immutable. A list of `Tools` the model may use to
// generate the next response
repeated Tool tools = 5 [
(google.api.field_behavior) = OPTIONAL,
(google.api.field_behavior) = IMMUTABLE,
(google.api.field_behavior) = INPUT_ONLY
];

// Optional. Input only. Immutable. Tool config. This config is shared for all
// tools
ToolConfig tool_config = 6 [
(google.api.field_behavior) = OPTIONAL,
(google.api.field_behavior) = IMMUTABLE,
(google.api.field_behavior) = INPUT_ONLY
];

// Output only. Creatation time of the cache entry.
google.protobuf.Timestamp create_time = 7
[(google.api.field_behavior) = OUTPUT_ONLY];

// Output only. When the cache entry was last updated in UTC time.
google.protobuf.Timestamp update_time = 8
[(google.api.field_behavior) = OUTPUT_ONLY];

// Output only. Metadata on the usage of the cached content.
UsageMetadata usage_metadata = 12 [(google.api.field_behavior) = OUTPUT_ONLY];
}
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,27 @@ enum HarmCategory {
HARM_CATEGORY_CIVIC_INTEGRITY = 5;
}

// Content Part modality
enum Modality {
// Unspecified modality.
MODALITY_UNSPECIFIED = 0;

// Plain text.
TEXT = 1;

// Image.
IMAGE = 2;

// Video.
VIDEO = 3;

// Audio.
AUDIO = 4;

// Document, e.g. PDF.
DOCUMENT = 5;
}

// The base structured datatype containing multi-part content of a message.
//
// A `Content` includes a `role` field designating the producer of the `Content`
Expand Down Expand Up @@ -97,6 +118,13 @@ message Part {
// the model.
FunctionResponse function_response = 6
[(google.api.field_behavior) = OPTIONAL];

// Optional. Code generated by the model that is meant to be executed.
ExecutableCode executable_code = 8 [(google.api.field_behavior) = OPTIONAL];

// Optional. Result of executing the [ExecutableCode].
CodeExecutionResult code_execution_result = 9
[(google.api.field_behavior) = OPTIONAL];
}

oneof metadata {
Expand Down Expand Up @@ -592,3 +620,12 @@ message RetrievalMetadata {
float google_search_dynamic_retrieval_score = 2
[(google.api.field_behavior) = OPTIONAL];
}

// Represents token counting info for a single modality.
message ModalityTokenCount {
// The modality associated with this token count.
Modality modality = 1;

// Number of tokens.
int32 token_count = 2;
}
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,20 @@ message Endpoint {

// A deployment of a Model. Endpoints contain one or more DeployedModels.
message DeployedModel {
// Runtime status of the deployed model.
message Status {
// Output only. The latest deployed model's status message (if any).
string message = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

// Output only. The time at which the status was last updated.
google.protobuf.Timestamp last_update_time = 2
[(google.api.field_behavior) = OUTPUT_ONLY];

// Output only. The number of available replicas of the deployed model.
int32 available_replica_count = 3
[(google.api.field_behavior) = OUTPUT_ONLY];
}

// The prediction (for example, the machine) resources that the DeployedModel
// uses. The user is billed for the resources (at least their minimal amount)
// even if the DeployedModel receives no traffic.
Expand Down Expand Up @@ -301,9 +315,16 @@ message DeployedModel {
// Configuration for faster model deployment.
FasterDeploymentConfig faster_deployment_config = 23;

// Output only. Runtime status of the deployed model.
Status status = 26 [(google.api.field_behavior) = OUTPUT_ONLY];

// System labels to apply to Model Garden deployments.
// System labels are managed by Google for internal use only.
map<string, string> system_labels = 28;

// Optional. Spec for configuring speculative decoding.
SpeculativeDecodingSpec speculative_decoding_spec = 30
[(google.api.field_behavior) = OPTIONAL];
}

// PrivateEndpoints proto is used to provide paths for users to send
Expand Down Expand Up @@ -344,14 +365,52 @@ message PredictRequestResponseLoggingConfig {
BigQueryDestination bigquery_destination = 3;
}

// Configurations (e.g. inference timeout) that are applied on your endpoints.
message ClientConnectionConfig {
// Customizable online prediction request timeout.
google.protobuf.Duration inference_timeout = 1;
}

// Configuration for faster model deployment.
message FasterDeploymentConfig {
// If true, enable fast tryout feature for this deployed model.
bool fast_tryout_enabled = 2;
}

// Configurations (e.g. inference timeout) that are applied on your endpoints.
message ClientConnectionConfig {
// Customizable online prediction request timeout.
google.protobuf.Duration inference_timeout = 1;
// Configuration for Speculative Decoding.
message SpeculativeDecodingSpec {
// Draft model speculation works by using the smaller model to generate
// candidate tokens for speculative decoding.
message DraftModelSpeculation {
// Required. The resource name of the draft model.
string draft_model = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "aiplatform.googleapis.com/Model"
}
];
}

// N-Gram speculation works by trying to find matching tokens in the
// previous prompt sequence and use those as speculation for generating
// new tokens.
message NgramSpeculation {
// The number of last N input tokens used as ngram to search/match
// against the previous prompt sequence.
// This is equal to the N in N-Gram.
// The default value is 3 if not specified.
int32 ngram_size = 1;
}

// The type of speculation method to use.
oneof speculation {
// draft model speculation.
DraftModelSpeculation draft_model_speculation = 2;

// N-Gram speculation.
NgramSpeculation ngram_speculation = 3;
}

// The number of speculative tokens to generate at each step.
int32 speculative_token_count = 1;
}
Original file line number Diff line number Diff line change
Expand Up @@ -178,13 +178,13 @@ message NearestNeighborQuery {
// Numeric filter is used to search a subset of the entities by using boolean
// rules on numeric columns.
// For example:
// Database Point 0: {name: “a” value_int: 42} {name: “b” value_float: 1.0}
// Database Point 1: {name: “a” value_int: 10} {name: “b” value_float: 2.0}
// Database Point 2: {name: “a” value_int: -1} {name: “b” value_float: 3.0}
// Query: {name: “a” value_int: 12 operator: LESS} // Matches Point 1, 2
// {name: “b” value_float: 2.0 operator: EQUAL} // Matches Point 1
// Database Point 0: {name: "a" value_int: 42} {name: "b" value_float: 1.0}
// Database Point 1: {name: "a" value_int: 10} {name: "b" value_float: 2.0}
// Database Point 2: {name: "a" value_int: -1} {name: "b" value_float: 3.0}
// Query: {name: "a" value_int: 12 operator: LESS} // Matches Point 1, 2
// {name: "b" value_float: 2.0 operator: EQUAL} // Matches Point 1
message NumericFilter {
// Datapoints for which Operator is true relative to the querys Value
// Datapoints for which Operator is true relative to the query's Value
// field will be allowlisted.
enum Operator {
// Unspecified operator.
Expand Down
Loading
Loading