// Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto3"; package google.cloud.aiplatform.v1beta1; import "google/api/annotations.proto"; import "google/api/client.proto"; import "google/api/field_behavior.proto"; import "google/api/httpbody.proto"; import "google/api/resource.proto"; import "google/cloud/aiplatform/v1beta1/content.proto"; import "google/cloud/aiplatform/v1beta1/explanation.proto"; import "google/cloud/aiplatform/v1beta1/tool.proto"; import "google/cloud/aiplatform/v1beta1/types.proto"; import "google/protobuf/struct.proto"; option csharp_namespace = "Google.Cloud.AIPlatform.V1Beta1"; option go_package = "cloud.google.com/go/aiplatform/apiv1beta1/aiplatformpb;aiplatformpb"; option java_multiple_files = true; option java_outer_classname = "PredictionServiceProto"; option java_package = "com.google.cloud.aiplatform.v1beta1"; option php_namespace = "Google\\Cloud\\AIPlatform\\V1beta1"; option ruby_package = "Google::Cloud::AIPlatform::V1beta1"; // A service for online predictions and explanations. service PredictionService { option (google.api.default_host) = "aiplatform.googleapis.com"; option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform," "https://www.googleapis.com/auth/cloud-platform.read-only"; // Perform an online prediction. rpc Predict(PredictRequest) returns (PredictResponse) { option (google.api.http) = { post: "/v1beta1/{endpoint=projects/*/locations/*/endpoints/*}:predict" body: "*" additional_bindings { post: "/v1beta1/{endpoint=projects/*/locations/*/publishers/*/models/*}:predict" body: "*" } }; option (google.api.method_signature) = "endpoint,instances,parameters"; } // Perform an online prediction with an arbitrary HTTP payload. // // The response includes the following HTTP headers: // // * `X-Vertex-AI-Endpoint-Id`: ID of the // [Endpoint][google.cloud.aiplatform.v1beta1.Endpoint] that served this // prediction. // // * `X-Vertex-AI-Deployed-Model-Id`: ID of the Endpoint's // [DeployedModel][google.cloud.aiplatform.v1beta1.DeployedModel] that served // this prediction. rpc RawPredict(RawPredictRequest) returns (google.api.HttpBody) { option (google.api.http) = { post: "/v1beta1/{endpoint=projects/*/locations/*/endpoints/*}:rawPredict" body: "*" additional_bindings { post: "/v1beta1/{endpoint=projects/*/locations/*/publishers/*/models/*}:rawPredict" body: "*" } }; option (google.api.method_signature) = "endpoint,http_body"; } // Perform a streaming online prediction with an arbitrary HTTP payload. rpc StreamRawPredict(StreamRawPredictRequest) returns (stream google.api.HttpBody) { option (google.api.http) = { post: "/v1beta1/{endpoint=projects/*/locations/*/endpoints/*}:streamRawPredict" body: "*" additional_bindings { post: "/v1beta1/{endpoint=projects/*/locations/*/publishers/*/models/*}:streamRawPredict" body: "*" } }; option (google.api.method_signature) = "endpoint,http_body"; } // Perform an unary online prediction request to a gRPC model server for // Vertex first-party products and frameworks. rpc DirectPredict(DirectPredictRequest) returns (DirectPredictResponse) { option (google.api.http) = { post: "/v1beta1/{endpoint=projects/*/locations/*/endpoints/*}:directPredict" body: "*" }; } // Perform an unary online prediction request to a gRPC model server for // custom containers. rpc DirectRawPredict(DirectRawPredictRequest) returns (DirectRawPredictResponse) { option (google.api.http) = { post: "/v1beta1/{endpoint=projects/*/locations/*/endpoints/*}:directRawPredict" body: "*" }; } // Perform a streaming online prediction request to a gRPC model server for // Vertex first-party products and frameworks. rpc StreamDirectPredict(stream StreamDirectPredictRequest) returns (stream StreamDirectPredictResponse) {} // Perform a streaming online prediction request to a gRPC model server for // custom containers. rpc StreamDirectRawPredict(stream StreamDirectRawPredictRequest) returns (stream StreamDirectRawPredictResponse) {} // Perform a streaming online prediction request for Vertex first-party // products and frameworks. rpc StreamingPredict(stream StreamingPredictRequest) returns (stream StreamingPredictResponse) {} // Perform a server-side streaming online prediction request for Vertex // LLM streaming. rpc ServerStreamingPredict(StreamingPredictRequest) returns (stream StreamingPredictResponse) { option (google.api.http) = { post: "/v1beta1/{endpoint=projects/*/locations/*/endpoints/*}:serverStreamingPredict" body: "*" additional_bindings { post: "/v1beta1/{endpoint=projects/*/locations/*/publishers/*/models/*}:serverStreamingPredict" body: "*" } }; } // Perform a streaming online prediction request through gRPC. rpc StreamingRawPredict(stream StreamingRawPredictRequest) returns (stream StreamingRawPredictResponse) {} // Perform an online explanation. // // If // [deployed_model_id][google.cloud.aiplatform.v1beta1.ExplainRequest.deployed_model_id] // is specified, the corresponding DeployModel must have // [explanation_spec][google.cloud.aiplatform.v1beta1.DeployedModel.explanation_spec] // populated. If // [deployed_model_id][google.cloud.aiplatform.v1beta1.ExplainRequest.deployed_model_id] // is not specified, all DeployedModels must have // [explanation_spec][google.cloud.aiplatform.v1beta1.DeployedModel.explanation_spec] // populated. rpc Explain(ExplainRequest) returns (ExplainResponse) { option (google.api.http) = { post: "/v1beta1/{endpoint=projects/*/locations/*/endpoints/*}:explain" body: "*" }; option (google.api.method_signature) = "endpoint,instances,parameters,deployed_model_id"; } // Perform a token counting. rpc CountTokens(CountTokensRequest) returns (CountTokensResponse) { option (google.api.http) = { post: "/v1beta1/{endpoint=projects/*/locations/*/endpoints/*}:countTokens" body: "*" additional_bindings { post: "/v1beta1/{endpoint=projects/*/locations/*/publishers/*/models/*}:countTokens" body: "*" } additional_bindings { post: "/v1beta1/{endpoint=endpoints/*}:countTokens" body: "*" } additional_bindings { post: "/v1beta1/{endpoint=publishers/*/models/*}:countTokens" body: "*" } }; option (google.api.method_signature) = "endpoint,instances"; } // Generate content with multimodal inputs. rpc GenerateContent(GenerateContentRequest) returns (GenerateContentResponse) { option (google.api.http) = { post: "/v1beta1/{model=projects/*/locations/*/endpoints/*}:generateContent" body: "*" additional_bindings { post: "/v1beta1/{model=projects/*/locations/*/publishers/*/models/*}:generateContent" body: "*" } additional_bindings { post: "/v1beta1/{model=endpoints/*}:generateContent" body: "*" } additional_bindings { post: "/v1beta1/{model=publishers/*/models/*}:generateContent" body: "*" } }; option (google.api.method_signature) = "model,contents"; } // Generate content with multimodal inputs with streaming support. rpc StreamGenerateContent(GenerateContentRequest) returns (stream GenerateContentResponse) { option (google.api.http) = { post: "/v1beta1/{model=projects/*/locations/*/endpoints/*}:streamGenerateContent" body: "*" additional_bindings { post: "/v1beta1/{model=projects/*/locations/*/publishers/*/models/*}:streamGenerateContent" body: "*" } additional_bindings { post: "/v1beta1/{model=endpoints/*}:streamGenerateContent" body: "*" } additional_bindings { post: "/v1beta1/{model=publishers/*/models/*}:streamGenerateContent" body: "*" } }; option (google.api.method_signature) = "model,contents"; } // Exposes an OpenAI-compatible endpoint for chat completions. rpc ChatCompletions(ChatCompletionsRequest) returns (stream google.api.HttpBody) { option (google.api.http) = { post: "/v1beta1/{endpoint=projects/*/locations/*/endpoints/*}/chat/completions" body: "http_body" }; option (google.api.method_signature) = "endpoint,http_body"; } } // Request message for // [PredictionService.Predict][google.cloud.aiplatform.v1beta1.PredictionService.Predict]. message PredictRequest { // Required. The name of the Endpoint requested to serve the prediction. // Format: // `projects/{project}/locations/{location}/endpoints/{endpoint}` string endpoint = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "aiplatform.googleapis.com/Endpoint" } ]; // Required. The instances that are the input to the prediction call. // A DeployedModel may have an upper limit on the number of instances it // supports per request, and when it is exceeded the prediction call errors // in case of AutoML Models, or, in case of customer created Models, the // behaviour is as documented by that Model. // The schema of any single instance may be specified via Endpoint's // DeployedModels' // [Model's][google.cloud.aiplatform.v1beta1.DeployedModel.model] // [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata] // [instance_schema_uri][google.cloud.aiplatform.v1beta1.PredictSchemata.instance_schema_uri]. repeated google.protobuf.Value instances = 2 [(google.api.field_behavior) = REQUIRED]; // The parameters that govern the prediction. The schema of the parameters may // be specified via Endpoint's DeployedModels' [Model's // ][google.cloud.aiplatform.v1beta1.DeployedModel.model] // [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata] // [parameters_schema_uri][google.cloud.aiplatform.v1beta1.PredictSchemata.parameters_schema_uri]. google.protobuf.Value parameters = 3; } // Response message for // [PredictionService.Predict][google.cloud.aiplatform.v1beta1.PredictionService.Predict]. message PredictResponse { // The predictions that are the output of the predictions call. // The schema of any single prediction may be specified via Endpoint's // DeployedModels' [Model's // ][google.cloud.aiplatform.v1beta1.DeployedModel.model] // [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata] // [prediction_schema_uri][google.cloud.aiplatform.v1beta1.PredictSchemata.prediction_schema_uri]. repeated google.protobuf.Value predictions = 1; // ID of the Endpoint's DeployedModel that served this prediction. string deployed_model_id = 2; // Output only. The resource name of the Model which is deployed as the // DeployedModel that this prediction hits. string model = 3 [ (google.api.field_behavior) = OUTPUT_ONLY, (google.api.resource_reference) = { type: "aiplatform.googleapis.com/Model" } ]; // Output only. The version ID of the Model which is deployed as the // DeployedModel that this prediction hits. string model_version_id = 5 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. The [display // name][google.cloud.aiplatform.v1beta1.Model.display_name] of the Model // which is deployed as the DeployedModel that this prediction hits. string model_display_name = 4 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Request-level metadata returned by the model. The metadata // type will be dependent upon the model implementation. google.protobuf.Value metadata = 6 [(google.api.field_behavior) = OUTPUT_ONLY]; } // Request message for // [PredictionService.RawPredict][google.cloud.aiplatform.v1beta1.PredictionService.RawPredict]. message RawPredictRequest { // Required. The name of the Endpoint requested to serve the prediction. // Format: // `projects/{project}/locations/{location}/endpoints/{endpoint}` string endpoint = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "aiplatform.googleapis.com/Endpoint" } ]; // The prediction input. Supports HTTP headers and arbitrary data payload. // // A [DeployedModel][google.cloud.aiplatform.v1beta1.DeployedModel] may have // an upper limit on the number of instances it supports per request. When // this limit it is exceeded for an AutoML model, the // [RawPredict][google.cloud.aiplatform.v1beta1.PredictionService.RawPredict] // method returns an error. When this limit is exceeded for a custom-trained // model, the behavior varies depending on the model. // // You can specify the schema for each instance in the // [predict_schemata.instance_schema_uri][google.cloud.aiplatform.v1beta1.PredictSchemata.instance_schema_uri] // field when you create a [Model][google.cloud.aiplatform.v1beta1.Model]. // This schema applies when you deploy the `Model` as a `DeployedModel` to an // [Endpoint][google.cloud.aiplatform.v1beta1.Endpoint] and use the // `RawPredict` method. google.api.HttpBody http_body = 2; } // Request message for // [PredictionService.StreamRawPredict][google.cloud.aiplatform.v1beta1.PredictionService.StreamRawPredict]. message StreamRawPredictRequest { // Required. The name of the Endpoint requested to serve the prediction. // Format: // `projects/{project}/locations/{location}/endpoints/{endpoint}` string endpoint = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "aiplatform.googleapis.com/Endpoint" } ]; // The prediction input. Supports HTTP headers and arbitrary data payload. google.api.HttpBody http_body = 2; } // Request message for // [PredictionService.DirectPredict][google.cloud.aiplatform.v1beta1.PredictionService.DirectPredict]. message DirectPredictRequest { // Required. The name of the Endpoint requested to serve the prediction. // Format: // `projects/{project}/locations/{location}/endpoints/{endpoint}` string endpoint = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "aiplatform.googleapis.com/Endpoint" } ]; // The prediction input. repeated Tensor inputs = 2; // The parameters that govern the prediction. Tensor parameters = 3; } // Response message for // [PredictionService.DirectPredict][google.cloud.aiplatform.v1beta1.PredictionService.DirectPredict]. message DirectPredictResponse { // The prediction output. repeated Tensor outputs = 1; // The parameters that govern the prediction. Tensor parameters = 2; } // Request message for // [PredictionService.DirectRawPredict][google.cloud.aiplatform.v1beta1.PredictionService.DirectRawPredict]. message DirectRawPredictRequest { // Required. The name of the Endpoint requested to serve the prediction. // Format: // `projects/{project}/locations/{location}/endpoints/{endpoint}` string endpoint = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "aiplatform.googleapis.com/Endpoint" } ]; // Fully qualified name of the API method being invoked to perform // predictions. // // Format: // `/namespace.Service/Method/` // Example: // `/tensorflow.serving.PredictionService/Predict` string method_name = 2; // The prediction input. bytes input = 3; } // Response message for // [PredictionService.DirectRawPredict][google.cloud.aiplatform.v1beta1.PredictionService.DirectRawPredict]. message DirectRawPredictResponse { // The prediction output. bytes output = 1; } // Request message for // [PredictionService.StreamDirectPredict][google.cloud.aiplatform.v1beta1.PredictionService.StreamDirectPredict]. // // The first message must contain // [endpoint][google.cloud.aiplatform.v1beta1.StreamDirectPredictRequest.endpoint] // field and optionally [input][]. The subsequent messages must contain // [input][]. message StreamDirectPredictRequest { // Required. The name of the Endpoint requested to serve the prediction. // Format: // `projects/{project}/locations/{location}/endpoints/{endpoint}` string endpoint = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "aiplatform.googleapis.com/Endpoint" } ]; // Optional. The prediction input. repeated Tensor inputs = 2 [(google.api.field_behavior) = OPTIONAL]; // Optional. The parameters that govern the prediction. Tensor parameters = 3 [(google.api.field_behavior) = OPTIONAL]; } // Response message for // [PredictionService.StreamDirectPredict][google.cloud.aiplatform.v1beta1.PredictionService.StreamDirectPredict]. message StreamDirectPredictResponse { // The prediction output. repeated Tensor outputs = 1; // The parameters that govern the prediction. Tensor parameters = 2; } // Request message for // [PredictionService.StreamDirectRawPredict][google.cloud.aiplatform.v1beta1.PredictionService.StreamDirectRawPredict]. // // The first message must contain // [endpoint][google.cloud.aiplatform.v1beta1.StreamDirectRawPredictRequest.endpoint] // and // [method_name][google.cloud.aiplatform.v1beta1.StreamDirectRawPredictRequest.method_name] // fields and optionally // [input][google.cloud.aiplatform.v1beta1.StreamDirectRawPredictRequest.input]. // The subsequent messages must contain // [input][google.cloud.aiplatform.v1beta1.StreamDirectRawPredictRequest.input]. // [method_name][google.cloud.aiplatform.v1beta1.StreamDirectRawPredictRequest.method_name] // in the subsequent messages have no effect. message StreamDirectRawPredictRequest { // Required. The name of the Endpoint requested to serve the prediction. // Format: // `projects/{project}/locations/{location}/endpoints/{endpoint}` string endpoint = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "aiplatform.googleapis.com/Endpoint" } ]; // Optional. Fully qualified name of the API method being invoked to perform // predictions. // // Format: // `/namespace.Service/Method/` // Example: // `/tensorflow.serving.PredictionService/Predict` string method_name = 2 [(google.api.field_behavior) = OPTIONAL]; // Optional. The prediction input. bytes input = 3 [(google.api.field_behavior) = OPTIONAL]; } // Response message for // [PredictionService.StreamDirectRawPredict][google.cloud.aiplatform.v1beta1.PredictionService.StreamDirectRawPredict]. message StreamDirectRawPredictResponse { // The prediction output. bytes output = 1; } // Request message for // [PredictionService.StreamingPredict][google.cloud.aiplatform.v1beta1.PredictionService.StreamingPredict]. // // The first message must contain // [endpoint][google.cloud.aiplatform.v1beta1.StreamingPredictRequest.endpoint] // field and optionally [input][]. The subsequent messages must contain // [input][]. message StreamingPredictRequest { // Required. The name of the Endpoint requested to serve the prediction. // Format: // `projects/{project}/locations/{location}/endpoints/{endpoint}` string endpoint = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "aiplatform.googleapis.com/Endpoint" } ]; // The prediction input. repeated Tensor inputs = 2; // The parameters that govern the prediction. Tensor parameters = 3; } // Response message for // [PredictionService.StreamingPredict][google.cloud.aiplatform.v1beta1.PredictionService.StreamingPredict]. message StreamingPredictResponse { // The prediction output. repeated Tensor outputs = 1; // The parameters that govern the prediction. Tensor parameters = 2; } // Request message for // [PredictionService.StreamingRawPredict][google.cloud.aiplatform.v1beta1.PredictionService.StreamingRawPredict]. // // The first message must contain // [endpoint][google.cloud.aiplatform.v1beta1.StreamingRawPredictRequest.endpoint] // and // [method_name][google.cloud.aiplatform.v1beta1.StreamingRawPredictRequest.method_name] // fields and optionally // [input][google.cloud.aiplatform.v1beta1.StreamingRawPredictRequest.input]. // The subsequent messages must contain // [input][google.cloud.aiplatform.v1beta1.StreamingRawPredictRequest.input]. // [method_name][google.cloud.aiplatform.v1beta1.StreamingRawPredictRequest.method_name] // in the subsequent messages have no effect. message StreamingRawPredictRequest { // Required. The name of the Endpoint requested to serve the prediction. // Format: // `projects/{project}/locations/{location}/endpoints/{endpoint}` string endpoint = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "aiplatform.googleapis.com/Endpoint" } ]; // Fully qualified name of the API method being invoked to perform // predictions. // // Format: // `/namespace.Service/Method/` // Example: // `/tensorflow.serving.PredictionService/Predict` string method_name = 2; // The prediction input. bytes input = 3; } // Response message for // [PredictionService.StreamingRawPredict][google.cloud.aiplatform.v1beta1.PredictionService.StreamingRawPredict]. message StreamingRawPredictResponse { // The prediction output. bytes output = 1; } // Request message for // [PredictionService.Explain][google.cloud.aiplatform.v1beta1.PredictionService.Explain]. message ExplainRequest { // Required. The name of the Endpoint requested to serve the explanation. // Format: // `projects/{project}/locations/{location}/endpoints/{endpoint}` string endpoint = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "aiplatform.googleapis.com/Endpoint" } ]; // Required. The instances that are the input to the explanation call. // A DeployedModel may have an upper limit on the number of instances it // supports per request, and when it is exceeded the explanation call errors // in case of AutoML Models, or, in case of customer created Models, the // behaviour is as documented by that Model. // The schema of any single instance may be specified via Endpoint's // DeployedModels' // [Model's][google.cloud.aiplatform.v1beta1.DeployedModel.model] // [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata] // [instance_schema_uri][google.cloud.aiplatform.v1beta1.PredictSchemata.instance_schema_uri]. repeated google.protobuf.Value instances = 2 [(google.api.field_behavior) = REQUIRED]; // The parameters that govern the prediction. The schema of the parameters may // be specified via Endpoint's DeployedModels' [Model's // ][google.cloud.aiplatform.v1beta1.DeployedModel.model] // [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata] // [parameters_schema_uri][google.cloud.aiplatform.v1beta1.PredictSchemata.parameters_schema_uri]. google.protobuf.Value parameters = 4; // If specified, overrides the // [explanation_spec][google.cloud.aiplatform.v1beta1.DeployedModel.explanation_spec] // of the DeployedModel. Can be used for explaining prediction results with // different configurations, such as: // - Explaining top-5 predictions results as opposed to top-1; // - Increasing path count or step count of the attribution methods to reduce // approximate errors; // - Using different baselines for explaining the prediction results. ExplanationSpecOverride explanation_spec_override = 5; // Optional. This field is the same as the one above, but supports multiple // explanations to occur in parallel. The key can be any string. Each override // will be run against the model, then its explanations will be grouped // together. // // Note - these explanations are run **In Addition** to the default // Explanation in the deployed model. map concurrent_explanation_spec_override = 6 [(google.api.field_behavior) = OPTIONAL]; // If specified, this ExplainRequest will be served by the chosen // DeployedModel, overriding // [Endpoint.traffic_split][google.cloud.aiplatform.v1beta1.Endpoint.traffic_split]. string deployed_model_id = 3; } // Response message for // [PredictionService.Explain][google.cloud.aiplatform.v1beta1.PredictionService.Explain]. message ExplainResponse { // This message is a wrapper grouping Concurrent Explanations. message ConcurrentExplanation { // The explanations of the Model's // [PredictResponse.predictions][google.cloud.aiplatform.v1beta1.PredictResponse.predictions]. // // It has the same number of elements as // [instances][google.cloud.aiplatform.v1beta1.ExplainRequest.instances] to // be explained. repeated Explanation explanations = 1; } // The explanations of the Model's // [PredictResponse.predictions][google.cloud.aiplatform.v1beta1.PredictResponse.predictions]. // // It has the same number of elements as // [instances][google.cloud.aiplatform.v1beta1.ExplainRequest.instances] to be // explained. repeated Explanation explanations = 1; // This field stores the results of the explanations run in parallel with // The default explanation strategy/method. map concurrent_explanations = 4; // ID of the Endpoint's DeployedModel that served this explanation. string deployed_model_id = 2; // The predictions that are the output of the predictions call. // Same as // [PredictResponse.predictions][google.cloud.aiplatform.v1beta1.PredictResponse.predictions]. repeated google.protobuf.Value predictions = 3; } // Request message for // [PredictionService.CountTokens][google.cloud.aiplatform.v1beta1.PredictionService.CountTokens]. message CountTokensRequest { // Required. The name of the Endpoint requested to perform token counting. // Format: // `projects/{project}/locations/{location}/endpoints/{endpoint}` string endpoint = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "aiplatform.googleapis.com/Endpoint" } ]; // Optional. The name of the publisher model requested to serve the // prediction. Format: // `projects/{project}/locations/{location}/publishers/*/models/*` string model = 3 [(google.api.field_behavior) = OPTIONAL]; // Optional. The instances that are the input to token counting call. // Schema is identical to the prediction schema of the underlying model. repeated google.protobuf.Value instances = 2 [(google.api.field_behavior) = OPTIONAL]; // Optional. Input content. repeated Content contents = 4 [(google.api.field_behavior) = OPTIONAL]; // Optional. The user provided system instructions for the model. // Note: only text should be used in parts and content in each part will be in // a separate paragraph. optional Content system_instruction = 5 [(google.api.field_behavior) = OPTIONAL]; // Optional. A list of `Tools` the model may use to generate the next // response. // // A `Tool` is a piece of code that enables the system to interact with // external systems to perform an action, or set of actions, outside of // knowledge and scope of the model. repeated Tool tools = 6 [(google.api.field_behavior) = OPTIONAL]; // Optional. Generation config that the model will use to generate the // response. optional GenerationConfig generation_config = 7 [(google.api.field_behavior) = OPTIONAL]; } // Response message for // [PredictionService.CountTokens][google.cloud.aiplatform.v1beta1.PredictionService.CountTokens]. message CountTokensResponse { // The total number of tokens counted across all instances from the request. int32 total_tokens = 1; // The total number of billable characters counted across all instances from // the request. int32 total_billable_characters = 2; } // Request message for [PredictionService.GenerateContent]. message GenerateContentRequest { // Required. The fully qualified name of the publisher model or tuned model // endpoint to use. // // Publisher model format: // `projects/{project}/locations/{location}/publishers/*/models/*` // // Tuned model endpoint format: // `projects/{project}/locations/{location}/endpoints/{endpoint}` string model = 5 [(google.api.field_behavior) = REQUIRED]; // Required. The content of the current conversation with the model. // // For single-turn queries, this is a single instance. For multi-turn queries, // this is a repeated field that contains conversation history + latest // request. repeated Content contents = 2 [(google.api.field_behavior) = REQUIRED]; // Optional. The user provided system instructions for the model. // Note: only text should be used in parts and content in each part will be in // a separate paragraph. optional Content system_instruction = 8 [(google.api.field_behavior) = OPTIONAL]; // Optional. The name of the cached content used as context to serve the // prediction. Note: only used in explicit caching, where users can have // control over caching (e.g. what content to cache) and enjoy guaranteed cost // savings. Format: // `projects/{project}/locations/{location}/cachedContents/{cachedContent}` string cached_content = 9 [ (google.api.field_behavior) = OPTIONAL, (google.api.resource_reference) = { type: "aiplatform.googleapis.com/CachedContent" } ]; // Optional. A list of `Tools` the model may use to generate the next // response. // // A `Tool` is a piece of code that enables the system to interact with // external systems to perform an action, or set of actions, outside of // knowledge and scope of the model. repeated Tool tools = 6 [(google.api.field_behavior) = OPTIONAL]; // Optional. Tool config. This config is shared for all tools provided in the // request. ToolConfig tool_config = 7 [(google.api.field_behavior) = OPTIONAL]; // Optional. The labels with user-defined metadata for the request. It is used // for billing and reporting only. // // Label keys and values can be no longer than 63 characters // (Unicode codepoints) and can only contain lowercase letters, numeric // characters, underscores, and dashes. International characters are allowed. // Label values are optional. Label keys must start with a letter. map labels = 10 [(google.api.field_behavior) = OPTIONAL]; // Optional. Per request settings for blocking unsafe content. // Enforced on GenerateContentResponse.candidates. repeated SafetySetting safety_settings = 3 [(google.api.field_behavior) = OPTIONAL]; // Optional. Generation config. GenerationConfig generation_config = 4 [(google.api.field_behavior) = OPTIONAL]; } // Response message for [PredictionService.GenerateContent]. message GenerateContentResponse { // Content filter results for a prompt sent in the request. message PromptFeedback { // Blocked reason enumeration. enum BlockedReason { // Unspecified blocked reason. BLOCKED_REASON_UNSPECIFIED = 0; // Candidates blocked due to safety. SAFETY = 1; // Candidates blocked due to other reason. OTHER = 2; // Candidates blocked due to the terms which are included from the // terminology blocklist. BLOCKLIST = 3; // Candidates blocked due to prohibited content. PROHIBITED_CONTENT = 4; } // Output only. Blocked reason. BlockedReason block_reason = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Safety ratings. repeated SafetyRating safety_ratings = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. A readable block reason message. string block_reason_message = 3 [(google.api.field_behavior) = OUTPUT_ONLY]; } // Usage metadata about response(s). message UsageMetadata { // Number of tokens in the request. When `cached_content` is set, this is // still the total effective prompt size meaning this includes the number of // tokens in the cached content. int32 prompt_token_count = 1; // Number of tokens in the response(s). int32 candidates_token_count = 2; // Total token count for prompt and response candidates. int32 total_token_count = 3; // Output only. Number of tokens in the cached part in the input (the cached // content). int32 cached_content_token_count = 5 [(google.api.field_behavior) = OUTPUT_ONLY]; } // Output only. Generated candidates. repeated Candidate candidates = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. The model version used to generate the response. string model_version = 11 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Content filter results for a prompt sent in the request. // Note: Sent only in the first stream chunk. // Only happens when no candidates were generated due to content violations. PromptFeedback prompt_feedback = 3 [(google.api.field_behavior) = OUTPUT_ONLY]; // Usage metadata about the response(s). UsageMetadata usage_metadata = 4; } // Request message for [PredictionService.ChatCompletions] message ChatCompletionsRequest { // Required. The name of the endpoint requested to serve the prediction. // Format: // `projects/{project}/locations/{location}/endpoints/{endpoint}` string endpoint = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "aiplatform.googleapis.com/Endpoint" } ]; // Optional. The prediction input. Supports HTTP headers and arbitrary data // payload. google.api.HttpBody http_body = 2 [(google.api.field_behavior) = OPTIONAL]; } // Response message for [PredictionService.PredictLongRunning] message PredictLongRunningResponse { // The response of the long running operation. oneof response { // The response of the video generation prediction. GenerateVideoResponse generate_video_response = 1; } } // Metadata for PredictLongRunning long running operations. message PredictLongRunningMetadata {} // Generate video response. message GenerateVideoResponse { // The cloud storage uris of the generated videos. repeated string generated_samples = 1; // Returns if any videos were filtered due to RAI policies. optional int32 rai_media_filtered_count = 2; // Returns rai failure reasons if any. repeated string rai_media_filtered_reasons = 3; }