// Copyright 2023 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto3"; package google.cloud.aiplatform.v1; import "google/api/annotations.proto"; import "google/api/client.proto"; import "google/api/field_behavior.proto"; import "google/api/httpbody.proto"; import "google/api/resource.proto"; import "google/cloud/aiplatform/v1/explanation.proto"; import "google/cloud/aiplatform/v1/types.proto"; import "google/protobuf/struct.proto"; option csharp_namespace = "Google.Cloud.AIPlatform.V1"; option go_package = "cloud.google.com/go/aiplatform/apiv1/aiplatformpb;aiplatformpb"; option java_multiple_files = true; option java_outer_classname = "PredictionServiceProto"; option java_package = "com.google.cloud.aiplatform.v1"; option php_namespace = "Google\\Cloud\\AIPlatform\\V1"; option ruby_package = "Google::Cloud::AIPlatform::V1"; // A service for online predictions and explanations. service PredictionService { option (google.api.default_host) = "aiplatform.googleapis.com"; option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform"; // Perform an online prediction. rpc Predict(PredictRequest) returns (PredictResponse) { option (google.api.http) = { post: "/v1/{endpoint=projects/*/locations/*/endpoints/*}:predict" body: "*" additional_bindings { post: "/v1/{endpoint=projects/*/locations/*/publishers/*/models/*}:predict" body: "*" } }; option (google.api.method_signature) = "endpoint,instances,parameters"; } // Perform an online prediction with an arbitrary HTTP payload. // // The response includes the following HTTP headers: // // * `X-Vertex-AI-Endpoint-Id`: ID of the // [Endpoint][google.cloud.aiplatform.v1.Endpoint] that served this // prediction. // // * `X-Vertex-AI-Deployed-Model-Id`: ID of the Endpoint's // [DeployedModel][google.cloud.aiplatform.v1.DeployedModel] that served this // prediction. rpc RawPredict(RawPredictRequest) returns (google.api.HttpBody) { option (google.api.http) = { post: "/v1/{endpoint=projects/*/locations/*/endpoints/*}:rawPredict" body: "*" additional_bindings { post: "/v1/{endpoint=projects/*/locations/*/publishers/*/models/*}:rawPredict" body: "*" } }; option (google.api.method_signature) = "endpoint,http_body"; } // Perform a server-side streaming online prediction request for Vertex // LLM streaming. rpc ServerStreamingPredict(StreamingPredictRequest) returns (stream StreamingPredictResponse) { option (google.api.http) = { post: "/v1/{endpoint=projects/*/locations/*/endpoints/*}:serverStreamingPredict" body: "*" additional_bindings { post: "/v1/{endpoint=projects/*/locations/*/publishers/*/models/*}:serverStreamingPredict" body: "*" } }; } // Perform an online explanation. // // If // [deployed_model_id][google.cloud.aiplatform.v1.ExplainRequest.deployed_model_id] // is specified, the corresponding DeployModel must have // [explanation_spec][google.cloud.aiplatform.v1.DeployedModel.explanation_spec] // populated. If // [deployed_model_id][google.cloud.aiplatform.v1.ExplainRequest.deployed_model_id] // is not specified, all DeployedModels must have // [explanation_spec][google.cloud.aiplatform.v1.DeployedModel.explanation_spec] // populated. rpc Explain(ExplainRequest) returns (ExplainResponse) { option (google.api.http) = { post: "/v1/{endpoint=projects/*/locations/*/endpoints/*}:explain" body: "*" }; option (google.api.method_signature) = "endpoint,instances,parameters,deployed_model_id"; } } // Request message for // [PredictionService.Predict][google.cloud.aiplatform.v1.PredictionService.Predict]. message PredictRequest { // Required. The name of the Endpoint requested to serve the prediction. // Format: // `projects/{project}/locations/{location}/endpoints/{endpoint}` string endpoint = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "aiplatform.googleapis.com/Endpoint" } ]; // Required. The instances that are the input to the prediction call. // A DeployedModel may have an upper limit on the number of instances it // supports per request, and when it is exceeded the prediction call errors // in case of AutoML Models, or, in case of customer created Models, the // behaviour is as documented by that Model. // The schema of any single instance may be specified via Endpoint's // DeployedModels' [Model's][google.cloud.aiplatform.v1.DeployedModel.model] // [PredictSchemata's][google.cloud.aiplatform.v1.Model.predict_schemata] // [instance_schema_uri][google.cloud.aiplatform.v1.PredictSchemata.instance_schema_uri]. repeated google.protobuf.Value instances = 2 [(google.api.field_behavior) = REQUIRED]; // The parameters that govern the prediction. The schema of the parameters may // be specified via Endpoint's DeployedModels' [Model's // ][google.cloud.aiplatform.v1.DeployedModel.model] // [PredictSchemata's][google.cloud.aiplatform.v1.Model.predict_schemata] // [parameters_schema_uri][google.cloud.aiplatform.v1.PredictSchemata.parameters_schema_uri]. google.protobuf.Value parameters = 3; } // Response message for // [PredictionService.Predict][google.cloud.aiplatform.v1.PredictionService.Predict]. message PredictResponse { // The predictions that are the output of the predictions call. // The schema of any single prediction may be specified via Endpoint's // DeployedModels' [Model's ][google.cloud.aiplatform.v1.DeployedModel.model] // [PredictSchemata's][google.cloud.aiplatform.v1.Model.predict_schemata] // [prediction_schema_uri][google.cloud.aiplatform.v1.PredictSchemata.prediction_schema_uri]. repeated google.protobuf.Value predictions = 1; // ID of the Endpoint's DeployedModel that served this prediction. string deployed_model_id = 2; // Output only. The resource name of the Model which is deployed as the // DeployedModel that this prediction hits. string model = 3 [ (google.api.field_behavior) = OUTPUT_ONLY, (google.api.resource_reference) = { type: "aiplatform.googleapis.com/Model" } ]; // Output only. The version ID of the Model which is deployed as the // DeployedModel that this prediction hits. string model_version_id = 5 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. The [display // name][google.cloud.aiplatform.v1.Model.display_name] of the Model which is // deployed as the DeployedModel that this prediction hits. string model_display_name = 4 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Request-level metadata returned by the model. The metadata // type will be dependent upon the model implementation. google.protobuf.Value metadata = 6 [(google.api.field_behavior) = OUTPUT_ONLY]; } // Request message for // [PredictionService.RawPredict][google.cloud.aiplatform.v1.PredictionService.RawPredict]. message RawPredictRequest { // Required. The name of the Endpoint requested to serve the prediction. // Format: // `projects/{project}/locations/{location}/endpoints/{endpoint}` string endpoint = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "aiplatform.googleapis.com/Endpoint" } ]; // The prediction input. Supports HTTP headers and arbitrary data payload. // // A [DeployedModel][google.cloud.aiplatform.v1.DeployedModel] may have an // upper limit on the number of instances it supports per request. When this // limit it is exceeded for an AutoML model, the // [RawPredict][google.cloud.aiplatform.v1.PredictionService.RawPredict] // method returns an error. When this limit is exceeded for a custom-trained // model, the behavior varies depending on the model. // // You can specify the schema for each instance in the // [predict_schemata.instance_schema_uri][google.cloud.aiplatform.v1.PredictSchemata.instance_schema_uri] // field when you create a [Model][google.cloud.aiplatform.v1.Model]. This // schema applies when you deploy the `Model` as a `DeployedModel` to an // [Endpoint][google.cloud.aiplatform.v1.Endpoint] and use the `RawPredict` // method. google.api.HttpBody http_body = 2; } // Request message for // [PredictionService.StreamingPredict][google.cloud.aiplatform.v1.PredictionService.StreamingPredict]. // // The first message must contain // [endpoint][google.cloud.aiplatform.v1.StreamingPredictRequest.endpoint] field // and optionally [input][]. The subsequent messages must contain [input][]. message StreamingPredictRequest { // Required. The name of the Endpoint requested to serve the prediction. // Format: // `projects/{project}/locations/{location}/endpoints/{endpoint}` string endpoint = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "aiplatform.googleapis.com/Endpoint" } ]; // The prediction input. repeated Tensor inputs = 2; // The parameters that govern the prediction. Tensor parameters = 3; } // Response message for // [PredictionService.StreamingPredict][google.cloud.aiplatform.v1.PredictionService.StreamingPredict]. message StreamingPredictResponse { // The prediction output. repeated Tensor outputs = 1; // The parameters that govern the prediction. Tensor parameters = 2; } // Request message for // [PredictionService.Explain][google.cloud.aiplatform.v1.PredictionService.Explain]. message ExplainRequest { // Required. The name of the Endpoint requested to serve the explanation. // Format: // `projects/{project}/locations/{location}/endpoints/{endpoint}` string endpoint = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "aiplatform.googleapis.com/Endpoint" } ]; // Required. The instances that are the input to the explanation call. // A DeployedModel may have an upper limit on the number of instances it // supports per request, and when it is exceeded the explanation call errors // in case of AutoML Models, or, in case of customer created Models, the // behaviour is as documented by that Model. // The schema of any single instance may be specified via Endpoint's // DeployedModels' [Model's][google.cloud.aiplatform.v1.DeployedModel.model] // [PredictSchemata's][google.cloud.aiplatform.v1.Model.predict_schemata] // [instance_schema_uri][google.cloud.aiplatform.v1.PredictSchemata.instance_schema_uri]. repeated google.protobuf.Value instances = 2 [(google.api.field_behavior) = REQUIRED]; // The parameters that govern the prediction. The schema of the parameters may // be specified via Endpoint's DeployedModels' [Model's // ][google.cloud.aiplatform.v1.DeployedModel.model] // [PredictSchemata's][google.cloud.aiplatform.v1.Model.predict_schemata] // [parameters_schema_uri][google.cloud.aiplatform.v1.PredictSchemata.parameters_schema_uri]. google.protobuf.Value parameters = 4; // If specified, overrides the // [explanation_spec][google.cloud.aiplatform.v1.DeployedModel.explanation_spec] // of the DeployedModel. Can be used for explaining prediction results with // different configurations, such as: // - Explaining top-5 predictions results as opposed to top-1; // - Increasing path count or step count of the attribution methods to reduce // approximate errors; // - Using different baselines for explaining the prediction results. ExplanationSpecOverride explanation_spec_override = 5; // If specified, this ExplainRequest will be served by the chosen // DeployedModel, overriding // [Endpoint.traffic_split][google.cloud.aiplatform.v1.Endpoint.traffic_split]. string deployed_model_id = 3; } // Response message for // [PredictionService.Explain][google.cloud.aiplatform.v1.PredictionService.Explain]. message ExplainResponse { // The explanations of the Model's // [PredictResponse.predictions][google.cloud.aiplatform.v1.PredictResponse.predictions]. // // It has the same number of elements as // [instances][google.cloud.aiplatform.v1.ExplainRequest.instances] to be // explained. repeated Explanation explanations = 1; // ID of the Endpoint's DeployedModel that served this explanation. string deployed_model_id = 2; // The predictions that are the output of the predictions call. // Same as // [PredictResponse.predictions][google.cloud.aiplatform.v1.PredictResponse.predictions]. repeated google.protobuf.Value predictions = 3; }