// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.cloud.aiplatform.v1;

import "google/api/annotations.proto";
import "google/api/client.proto";
import "google/api/field_behavior.proto";
import "google/api/httpbody.proto";
import "google/api/resource.proto";
import "google/cloud/aiplatform/v1/explanation.proto";
import "google/cloud/aiplatform/v1/types.proto";
import "google/protobuf/struct.proto";

option csharp_namespace = "Google.Cloud.AIPlatform.V1";
option go_package = "cloud.google.com/go/aiplatform/apiv1/aiplatformpb;aiplatformpb";
option java_multiple_files = true;
option java_outer_classname = "PredictionServiceProto";
option java_package = "com.google.cloud.aiplatform.v1";
option php_namespace = "Google\\Cloud\\AIPlatform\\V1";
option ruby_package = "Google::Cloud::AIPlatform::V1";

// A service for online predictions and explanations.
service PredictionService {
  option (google.api.default_host) = "aiplatform.googleapis.com";
  option (google.api.oauth_scopes) =
      "https://www.googleapis.com/auth/cloud-platform";

  // Perform an online prediction.
  rpc Predict(PredictRequest) returns (PredictResponse) {
    option (google.api.http) = {
      post: "/v1/{endpoint=projects/*/locations/*/endpoints/*}:predict"
      body: "*"
      additional_bindings {
        post: "/v1/{endpoint=projects/*/locations/*/publishers/*/models/*}:predict"
        body: "*"
      }
    };
    option (google.api.method_signature) = "endpoint,instances,parameters";
  }

  // Perform an online prediction with an arbitrary HTTP payload.
  //
  // The response includes the following HTTP headers:
  //
  // * `X-Vertex-AI-Endpoint-Id`: ID of the
  // [Endpoint][google.cloud.aiplatform.v1.Endpoint] that served this
  // prediction.
  //
  // * `X-Vertex-AI-Deployed-Model-Id`: ID of the Endpoint's
  // [DeployedModel][google.cloud.aiplatform.v1.DeployedModel] that served this
  // prediction.
  rpc RawPredict(RawPredictRequest) returns (google.api.HttpBody) {
    option (google.api.http) = {
      post: "/v1/{endpoint=projects/*/locations/*/endpoints/*}:rawPredict"
      body: "*"
      additional_bindings {
        post: "/v1/{endpoint=projects/*/locations/*/publishers/*/models/*}:rawPredict"
        body: "*"
      }
    };
    option (google.api.method_signature) = "endpoint,http_body";
  }

  // Perform a server-side streaming online prediction request for Vertex
  // LLM streaming.
  rpc ServerStreamingPredict(StreamingPredictRequest)
      returns (stream StreamingPredictResponse) {
    option (google.api.http) = {
      post: "/v1/{endpoint=projects/*/locations/*/endpoints/*}:serverStreamingPredict"
      body: "*"
      additional_bindings {
        post: "/v1/{endpoint=projects/*/locations/*/publishers/*/models/*}:serverStreamingPredict"
        body: "*"
      }
    };
  }

  // Perform an online explanation.
  //
  // If
  // [deployed_model_id][google.cloud.aiplatform.v1.ExplainRequest.deployed_model_id]
  // is specified, the corresponding DeployModel must have
  // [explanation_spec][google.cloud.aiplatform.v1.DeployedModel.explanation_spec]
  // populated. If
  // [deployed_model_id][google.cloud.aiplatform.v1.ExplainRequest.deployed_model_id]
  // is not specified, all DeployedModels must have
  // [explanation_spec][google.cloud.aiplatform.v1.DeployedModel.explanation_spec]
  // populated.
  rpc Explain(ExplainRequest) returns (ExplainResponse) {
    option (google.api.http) = {
      post: "/v1/{endpoint=projects/*/locations/*/endpoints/*}:explain"
      body: "*"
    };
    option (google.api.method_signature) =
        "endpoint,instances,parameters,deployed_model_id";
  }
}

// Request message for
// [PredictionService.Predict][google.cloud.aiplatform.v1.PredictionService.Predict].
message PredictRequest {
  // Required. The name of the Endpoint requested to serve the prediction.
  // Format:
  // `projects/{project}/locations/{location}/endpoints/{endpoint}`
  string endpoint = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "aiplatform.googleapis.com/Endpoint"
    }
  ];

  // Required. The instances that are the input to the prediction call.
  // A DeployedModel may have an upper limit on the number of instances it
  // supports per request, and when it is exceeded the prediction call errors
  // in case of AutoML Models, or, in case of customer created Models, the
  // behaviour is as documented by that Model.
  // The schema of any single instance may be specified via Endpoint's
  // DeployedModels' [Model's][google.cloud.aiplatform.v1.DeployedModel.model]
  // [PredictSchemata's][google.cloud.aiplatform.v1.Model.predict_schemata]
  // [instance_schema_uri][google.cloud.aiplatform.v1.PredictSchemata.instance_schema_uri].
  repeated google.protobuf.Value instances = 2
      [(google.api.field_behavior) = REQUIRED];

  // The parameters that govern the prediction. The schema of the parameters may
  // be specified via Endpoint's DeployedModels' [Model's
  // ][google.cloud.aiplatform.v1.DeployedModel.model]
  // [PredictSchemata's][google.cloud.aiplatform.v1.Model.predict_schemata]
  // [parameters_schema_uri][google.cloud.aiplatform.v1.PredictSchemata.parameters_schema_uri].
  google.protobuf.Value parameters = 3;
}

// Response message for
// [PredictionService.Predict][google.cloud.aiplatform.v1.PredictionService.Predict].
message PredictResponse {
  // The predictions that are the output of the predictions call.
  // The schema of any single prediction may be specified via Endpoint's
  // DeployedModels' [Model's ][google.cloud.aiplatform.v1.DeployedModel.model]
  // [PredictSchemata's][google.cloud.aiplatform.v1.Model.predict_schemata]
  // [prediction_schema_uri][google.cloud.aiplatform.v1.PredictSchemata.prediction_schema_uri].
  repeated google.protobuf.Value predictions = 1;

  // ID of the Endpoint's DeployedModel that served this prediction.
  string deployed_model_id = 2;

  // Output only. The resource name of the Model which is deployed as the
  // DeployedModel that this prediction hits.
  string model = 3 [
    (google.api.field_behavior) = OUTPUT_ONLY,
    (google.api.resource_reference) = {
      type: "aiplatform.googleapis.com/Model"
    }
  ];

  // Output only. The version ID of the Model which is deployed as the
  // DeployedModel that this prediction hits.
  string model_version_id = 5 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. The [display
  // name][google.cloud.aiplatform.v1.Model.display_name] of the Model which is
  // deployed as the DeployedModel that this prediction hits.
  string model_display_name = 4 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Request-level metadata returned by the model. The metadata
  // type will be dependent upon the model implementation.
  google.protobuf.Value metadata = 6
      [(google.api.field_behavior) = OUTPUT_ONLY];
}

// Request message for
// [PredictionService.RawPredict][google.cloud.aiplatform.v1.PredictionService.RawPredict].
message RawPredictRequest {
  // Required. The name of the Endpoint requested to serve the prediction.
  // Format:
  // `projects/{project}/locations/{location}/endpoints/{endpoint}`
  string endpoint = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "aiplatform.googleapis.com/Endpoint"
    }
  ];

  // The prediction input. Supports HTTP headers and arbitrary data payload.
  //
  // A [DeployedModel][google.cloud.aiplatform.v1.DeployedModel] may have an
  // upper limit on the number of instances it supports per request. When this
  // limit it is exceeded for an AutoML model, the
  // [RawPredict][google.cloud.aiplatform.v1.PredictionService.RawPredict]
  // method returns an error. When this limit is exceeded for a custom-trained
  // model, the behavior varies depending on the model.
  //
  // You can specify the schema for each instance in the
  // [predict_schemata.instance_schema_uri][google.cloud.aiplatform.v1.PredictSchemata.instance_schema_uri]
  // field when you create a [Model][google.cloud.aiplatform.v1.Model]. This
  // schema applies when you deploy the `Model` as a `DeployedModel` to an
  // [Endpoint][google.cloud.aiplatform.v1.Endpoint] and use the `RawPredict`
  // method.
  google.api.HttpBody http_body = 2;
}

// Request message for
// [PredictionService.StreamingPredict][google.cloud.aiplatform.v1.PredictionService.StreamingPredict].
//
// The first message must contain
// [endpoint][google.cloud.aiplatform.v1.StreamingPredictRequest.endpoint] field
// and optionally [input][]. The subsequent messages must contain [input][].
message StreamingPredictRequest {
  // Required. The name of the Endpoint requested to serve the prediction.
  // Format:
  // `projects/{project}/locations/{location}/endpoints/{endpoint}`
  string endpoint = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "aiplatform.googleapis.com/Endpoint"
    }
  ];

  // The prediction input.
  repeated Tensor inputs = 2;

  // The parameters that govern the prediction.
  Tensor parameters = 3;
}

// Response message for
// [PredictionService.StreamingPredict][google.cloud.aiplatform.v1.PredictionService.StreamingPredict].
message StreamingPredictResponse {
  // The prediction output.
  repeated Tensor outputs = 1;

  // The parameters that govern the prediction.
  Tensor parameters = 2;
}

// Request message for
// [PredictionService.Explain][google.cloud.aiplatform.v1.PredictionService.Explain].
message ExplainRequest {
  // Required. The name of the Endpoint requested to serve the explanation.
  // Format:
  // `projects/{project}/locations/{location}/endpoints/{endpoint}`
  string endpoint = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "aiplatform.googleapis.com/Endpoint"
    }
  ];

  // Required. The instances that are the input to the explanation call.
  // A DeployedModel may have an upper limit on the number of instances it
  // supports per request, and when it is exceeded the explanation call errors
  // in case of AutoML Models, or, in case of customer created Models, the
  // behaviour is as documented by that Model.
  // The schema of any single instance may be specified via Endpoint's
  // DeployedModels' [Model's][google.cloud.aiplatform.v1.DeployedModel.model]
  // [PredictSchemata's][google.cloud.aiplatform.v1.Model.predict_schemata]
  // [instance_schema_uri][google.cloud.aiplatform.v1.PredictSchemata.instance_schema_uri].
  repeated google.protobuf.Value instances = 2
      [(google.api.field_behavior) = REQUIRED];

  // The parameters that govern the prediction. The schema of the parameters may
  // be specified via Endpoint's DeployedModels' [Model's
  // ][google.cloud.aiplatform.v1.DeployedModel.model]
  // [PredictSchemata's][google.cloud.aiplatform.v1.Model.predict_schemata]
  // [parameters_schema_uri][google.cloud.aiplatform.v1.PredictSchemata.parameters_schema_uri].
  google.protobuf.Value parameters = 4;

  // If specified, overrides the
  // [explanation_spec][google.cloud.aiplatform.v1.DeployedModel.explanation_spec]
  // of the DeployedModel. Can be used for explaining prediction results with
  // different configurations, such as:
  //  - Explaining top-5 predictions results as opposed to top-1;
  //  - Increasing path count or step count of the attribution methods to reduce
  //    approximate errors;
  //  - Using different baselines for explaining the prediction results.
  ExplanationSpecOverride explanation_spec_override = 5;

  // If specified, this ExplainRequest will be served by the chosen
  // DeployedModel, overriding
  // [Endpoint.traffic_split][google.cloud.aiplatform.v1.Endpoint.traffic_split].
  string deployed_model_id = 3;
}

// Response message for
// [PredictionService.Explain][google.cloud.aiplatform.v1.PredictionService.Explain].
message ExplainResponse {
  // The explanations of the Model's
  // [PredictResponse.predictions][google.cloud.aiplatform.v1.PredictResponse.predictions].
  //
  // It has the same number of elements as
  // [instances][google.cloud.aiplatform.v1.ExplainRequest.instances] to be
  // explained.
  repeated Explanation explanations = 1;

  // ID of the Endpoint's DeployedModel that served this explanation.
  string deployed_model_id = 2;

  // The predictions that are the output of the predictions call.
  // Same as
  // [PredictResponse.predictions][google.cloud.aiplatform.v1.PredictResponse.predictions].
  repeated google.protobuf.Value predictions = 3;
}