// Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto3"; package google.cloud.aiplatform.v1; import "google/api/field_behavior.proto"; import "google/api/resource.proto"; import "google/cloud/aiplatform/v1/encryption_spec.proto"; import "google/cloud/aiplatform/v1/explanation.proto"; import "google/cloud/aiplatform/v1/io.proto"; import "google/cloud/aiplatform/v1/machine_resources.proto"; import "google/cloud/aiplatform/v1/service_networking.proto"; import "google/protobuf/timestamp.proto"; option csharp_namespace = "Google.Cloud.AIPlatform.V1"; option go_package = "cloud.google.com/go/aiplatform/apiv1/aiplatformpb;aiplatformpb"; option java_multiple_files = true; option java_outer_classname = "EndpointProto"; option java_package = "com.google.cloud.aiplatform.v1"; option php_namespace = "Google\\Cloud\\AIPlatform\\V1"; option ruby_package = "Google::Cloud::AIPlatform::V1"; // Models are deployed into it, and afterwards Endpoint is called to obtain // predictions and explanations. message Endpoint { option (google.api.resource) = { type: "aiplatform.googleapis.com/Endpoint" pattern: "projects/{project}/locations/{location}/endpoints/{endpoint}" pattern: "projects/{project}/locations/{location}/publishers/{publisher}/models/{model}" }; // Output only. The resource name of the Endpoint. string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; // Required. The display name of the Endpoint. // The name can be up to 128 characters long and can consist of any UTF-8 // characters. string display_name = 2 [(google.api.field_behavior) = REQUIRED]; // The description of the Endpoint. string description = 3; // Output only. The models deployed in this Endpoint. // To add or remove DeployedModels use // [EndpointService.DeployModel][google.cloud.aiplatform.v1.EndpointService.DeployModel] // and // [EndpointService.UndeployModel][google.cloud.aiplatform.v1.EndpointService.UndeployModel] // respectively. repeated DeployedModel deployed_models = 4 [(google.api.field_behavior) = OUTPUT_ONLY]; // A map from a DeployedModel's ID to the percentage of this Endpoint's // traffic that should be forwarded to that DeployedModel. // // If a DeployedModel's ID is not listed in this map, then it receives no // traffic. // // The traffic percentage values must add up to 100, or map must be empty if // the Endpoint is to not accept any traffic at a moment. map<string, int32> traffic_split = 5; // Used to perform consistent read-modify-write updates. If not set, a blind // "overwrite" update happens. string etag = 6; // The labels with user-defined metadata to organize your Endpoints. // // Label keys and values can be no longer than 64 characters // (Unicode codepoints), can only contain lowercase letters, numeric // characters, underscores and dashes. International characters are allowed. // // See https://goo.gl/xmQnxf for more information and examples of labels. map<string, string> labels = 7; // Output only. Timestamp when this Endpoint was created. google.protobuf.Timestamp create_time = 8 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Timestamp when this Endpoint was last updated. google.protobuf.Timestamp update_time = 9 [(google.api.field_behavior) = OUTPUT_ONLY]; // Customer-managed encryption key spec for an Endpoint. If set, this // Endpoint and all sub-resources of this Endpoint will be secured by // this key. EncryptionSpec encryption_spec = 10; // Optional. The full name of the Google Compute Engine // [network](https://cloud.google.com//compute/docs/networks-and-firewalls#networks) // to which the Endpoint should be peered. // // Private services access must already be configured for the network. If left // unspecified, the Endpoint is not peered with any network. // // Only one of the fields, // [network][google.cloud.aiplatform.v1.Endpoint.network] or // [enable_private_service_connect][google.cloud.aiplatform.v1.Endpoint.enable_private_service_connect], // can be set. // // [Format](https://cloud.google.com/compute/docs/reference/rest/v1/networks/insert): // `projects/{project}/global/networks/{network}`. // Where `{project}` is a project number, as in `12345`, and `{network}` is // network name. string network = 13 [ (google.api.field_behavior) = OPTIONAL, (google.api.resource_reference) = { type: "compute.googleapis.com/Network" } ]; // Deprecated: If true, expose the Endpoint via private service connect. // // Only one of the fields, // [network][google.cloud.aiplatform.v1.Endpoint.network] or // [enable_private_service_connect][google.cloud.aiplatform.v1.Endpoint.enable_private_service_connect], // can be set. bool enable_private_service_connect = 17 [deprecated = true]; // Optional. Configuration for private service connect. // // [network][google.cloud.aiplatform.v1.Endpoint.network] and // [private_service_connect_config][google.cloud.aiplatform.v1.Endpoint.private_service_connect_config] // are mutually exclusive. PrivateServiceConnectConfig private_service_connect_config = 21 [(google.api.field_behavior) = OPTIONAL]; // Output only. Resource name of the Model Monitoring job associated with this // Endpoint if monitoring is enabled by // [JobService.CreateModelDeploymentMonitoringJob][google.cloud.aiplatform.v1.JobService.CreateModelDeploymentMonitoringJob]. // Format: // `projects/{project}/locations/{location}/modelDeploymentMonitoringJobs/{model_deployment_monitoring_job}` string model_deployment_monitoring_job = 14 [ (google.api.field_behavior) = OUTPUT_ONLY, (google.api.resource_reference) = { type: "aiplatform.googleapis.com/ModelDeploymentMonitoringJob" } ]; // Configures the request-response logging for online prediction. PredictRequestResponseLoggingConfig predict_request_response_logging_config = 18; // If true, the endpoint will be exposed through a dedicated // DNS [Endpoint.dedicated_endpoint_dns]. Your request to the dedicated DNS // will be isolated from other users' traffic and will have better performance // and reliability. // Note: Once you enabled dedicated endpoint, you won't be able to send // request to the shared DNS {region}-aiplatform.googleapis.com. The // limitation will be removed soon. bool dedicated_endpoint_enabled = 24; // Output only. DNS of the dedicated endpoint. Will only be populated if // dedicated_endpoint_enabled is true. // Format: // `https://{endpoint_id}.{region}-{project_number}.prediction.vertexai.goog`. string dedicated_endpoint_dns = 25 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Reserved for future use. bool satisfies_pzs = 27 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Reserved for future use. bool satisfies_pzi = 28 [(google.api.field_behavior) = OUTPUT_ONLY]; } // A deployment of a Model. Endpoints contain one or more DeployedModels. message DeployedModel { // The prediction (for example, the machine) resources that the DeployedModel // uses. The user is billed for the resources (at least their minimal amount) // even if the DeployedModel receives no traffic. // Not all Models support all resources types. See // [Model.supported_deployment_resources_types][google.cloud.aiplatform.v1.Model.supported_deployment_resources_types]. // Required except for Large Model Deploy use cases. oneof prediction_resources { // A description of resources that are dedicated to the DeployedModel, and // that need a higher degree of manual configuration. DedicatedResources dedicated_resources = 7; // A description of resources that to large degree are decided by Vertex // AI, and require only a modest additional configuration. AutomaticResources automatic_resources = 8; // The resource name of the shared DeploymentResourcePool to deploy on. // Format: // `projects/{project}/locations/{location}/deploymentResourcePools/{deployment_resource_pool}` string shared_resources = 17 [(google.api.resource_reference) = { type: "aiplatform.googleapis.com/DeploymentResourcePool" }]; } // Immutable. The ID of the DeployedModel. If not provided upon deployment, // Vertex AI will generate a value for this ID. // // This value should be 1-10 characters, and valid characters are `/[0-9]/`. string id = 1 [(google.api.field_behavior) = IMMUTABLE]; // Required. The resource name of the Model that this is the deployment of. // Note that the Model may be in a different location than the DeployedModel's // Endpoint. // // The resource name may contain version id or version alias to specify the // version. // Example: `projects/{project}/locations/{location}/models/{model}@2` // or // `projects/{project}/locations/{location}/models/{model}@golden` // if no version is specified, the default version will be deployed. string model = 2 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "aiplatform.googleapis.com/Model" } ]; // Output only. The version ID of the model that is deployed. string model_version_id = 18 [(google.api.field_behavior) = OUTPUT_ONLY]; // The display name of the DeployedModel. If not provided upon creation, // the Model's display_name is used. string display_name = 3; // Output only. Timestamp when the DeployedModel was created. google.protobuf.Timestamp create_time = 6 [(google.api.field_behavior) = OUTPUT_ONLY]; // Explanation configuration for this DeployedModel. // // When deploying a Model using // [EndpointService.DeployModel][google.cloud.aiplatform.v1.EndpointService.DeployModel], // this value overrides the value of // [Model.explanation_spec][google.cloud.aiplatform.v1.Model.explanation_spec]. // All fields of // [explanation_spec][google.cloud.aiplatform.v1.DeployedModel.explanation_spec] // are optional in the request. If a field of // [explanation_spec][google.cloud.aiplatform.v1.DeployedModel.explanation_spec] // is not populated, the value of the same field of // [Model.explanation_spec][google.cloud.aiplatform.v1.Model.explanation_spec] // is inherited. If the corresponding // [Model.explanation_spec][google.cloud.aiplatform.v1.Model.explanation_spec] // is not populated, all fields of the // [explanation_spec][google.cloud.aiplatform.v1.DeployedModel.explanation_spec] // will be used for the explanation configuration. ExplanationSpec explanation_spec = 9; // If true, deploy the model without explainable feature, regardless the // existence of // [Model.explanation_spec][google.cloud.aiplatform.v1.Model.explanation_spec] // or // [explanation_spec][google.cloud.aiplatform.v1.DeployedModel.explanation_spec]. bool disable_explanations = 19; // The service account that the DeployedModel's container runs as. Specify the // email address of the service account. If this service account is not // specified, the container runs as a service account that doesn't have access // to the resource project. // // Users deploying the Model must have the `iam.serviceAccounts.actAs` // permission on this service account. string service_account = 11; // For custom-trained Models and AutoML Tabular Models, the container of the // DeployedModel instances will send `stderr` and `stdout` streams to // Cloud Logging by default. Please note that the logs incur cost, // which are subject to [Cloud Logging // pricing](https://cloud.google.com/logging/pricing). // // User can disable container logging by setting this flag to true. bool disable_container_logging = 15; // If true, online prediction access logs are sent to Cloud // Logging. // These logs are like standard server access logs, containing // information like timestamp and latency for each prediction request. // // Note that logs may incur a cost, especially if your project // receives prediction requests at a high queries per second rate (QPS). // Estimate your costs before enabling this option. bool enable_access_logging = 13; // Output only. Provide paths for users to send predict/explain/health // requests directly to the deployed model services running on Cloud via // private services access. This field is populated if // [network][google.cloud.aiplatform.v1.Endpoint.network] is configured. PrivateEndpoints private_endpoints = 14 [(google.api.field_behavior) = OUTPUT_ONLY]; } // PrivateEndpoints proto is used to provide paths for users to send // requests privately. // To send request via private service access, use predict_http_uri, // explain_http_uri or health_http_uri. To send request via private service // connect, use service_attachment. message PrivateEndpoints { // Output only. Http(s) path to send prediction requests. string predict_http_uri = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Http(s) path to send explain requests. string explain_http_uri = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Http(s) path to send health check requests. string health_http_uri = 3 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. The name of the service attachment resource. Populated if // private service connect is enabled. string service_attachment = 4 [(google.api.field_behavior) = OUTPUT_ONLY]; } // Configuration for logging request-response to a BigQuery table. message PredictRequestResponseLoggingConfig { // If logging is enabled or not. bool enabled = 1; // Percentage of requests to be logged, expressed as a fraction in // range(0,1]. double sampling_rate = 2; // BigQuery table for logging. // If only given a project, a new dataset will be created with name // `logging_<endpoint-display-name>_<endpoint-id>` where // <endpoint-display-name> will be made BigQuery-dataset-name compatible (e.g. // most special characters will become underscores). If no table name is // given, a new table will be created with name `request_response_logging` BigQueryDestination bigquery_destination = 3; }