// Copyright 2023 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto3"; package google.cloud.aiplatform.v1beta1; import "google/api/field_behavior.proto"; import "google/cloud/aiplatform/v1beta1/accelerator_type.proto"; option csharp_namespace = "Google.Cloud.AIPlatform.V1Beta1"; option go_package = "cloud.google.com/go/aiplatform/apiv1beta1/aiplatformpb;aiplatformpb"; option java_multiple_files = true; option java_outer_classname = "MachineResourcesProto"; option java_package = "com.google.cloud.aiplatform.v1beta1"; option php_namespace = "Google\\Cloud\\AIPlatform\\V1beta1"; option ruby_package = "Google::Cloud::AIPlatform::V1beta1"; // Specification of a single machine. message MachineSpec { // Immutable. The type of the machine. // // See the [list of machine types supported for // prediction](https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types) // // See the [list of machine types supported for custom // training](https://cloud.google.com/vertex-ai/docs/training/configure-compute#machine-types). // // For [DeployedModel][google.cloud.aiplatform.v1beta1.DeployedModel] this // field is optional, and the default value is `n1-standard-2`. For // [BatchPredictionJob][google.cloud.aiplatform.v1beta1.BatchPredictionJob] or // as part of [WorkerPoolSpec][google.cloud.aiplatform.v1beta1.WorkerPoolSpec] // this field is required. string machine_type = 1 [(google.api.field_behavior) = IMMUTABLE]; // Immutable. The type of accelerator(s) that may be attached to the machine // as per // [accelerator_count][google.cloud.aiplatform.v1beta1.MachineSpec.accelerator_count]. AcceleratorType accelerator_type = 2 [(google.api.field_behavior) = IMMUTABLE]; // The number of accelerators to attach to the machine. int32 accelerator_count = 3; } // A description of resources that are dedicated to a DeployedModel, and // that need a higher degree of manual configuration. message DedicatedResources { // Required. Immutable. The specification of a single machine used by the // prediction. MachineSpec machine_spec = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.field_behavior) = IMMUTABLE ]; // Required. Immutable. The minimum number of machine replicas this // DeployedModel will be always deployed on. This value must be greater than // or equal to 1. // // If traffic against the DeployedModel increases, it may dynamically be // deployed onto more replicas, and as traffic decreases, some of these extra // replicas may be freed. int32 min_replica_count = 2 [ (google.api.field_behavior) = REQUIRED, (google.api.field_behavior) = IMMUTABLE ]; // Immutable. The maximum number of replicas this DeployedModel may be // deployed on when the traffic against it increases. If the requested value // is too large, the deployment will error, but if deployment succeeds then // the ability to scale the model to that many replicas is guaranteed (barring // service outages). If traffic against the DeployedModel increases beyond // what its replicas at maximum may handle, a portion of the traffic will be // dropped. If this value is not provided, will use // [min_replica_count][google.cloud.aiplatform.v1beta1.DedicatedResources.min_replica_count] // as the default value. // // The value of this field impacts the charge against Vertex CPU and GPU // quotas. Specifically, you will be charged for (max_replica_count * // number of cores in the selected machine type) and (max_replica_count * // number of GPUs per replica in the selected machine type). int32 max_replica_count = 3 [(google.api.field_behavior) = IMMUTABLE]; // Immutable. The metric specifications that overrides a resource // utilization metric (CPU utilization, accelerator's duty cycle, and so on) // target value (default to 60 if not set). At most one entry is allowed per // metric. // // If // [machine_spec.accelerator_count][google.cloud.aiplatform.v1beta1.MachineSpec.accelerator_count] // is above 0, the autoscaling will be based on both CPU utilization and // accelerator's duty cycle metrics and scale up when either metrics exceeds // its target value while scale down if both metrics are under their target // value. The default target value is 60 for both metrics. // // If // [machine_spec.accelerator_count][google.cloud.aiplatform.v1beta1.MachineSpec.accelerator_count] // is 0, the autoscaling will be based on CPU utilization metric only with // default target value 60 if not explicitly set. // // For example, in the case of Online Prediction, if you want to override // target CPU utilization to 80, you should set // [autoscaling_metric_specs.metric_name][google.cloud.aiplatform.v1beta1.AutoscalingMetricSpec.metric_name] // to `aiplatform.googleapis.com/prediction/online/cpu/utilization` and // [autoscaling_metric_specs.target][google.cloud.aiplatform.v1beta1.AutoscalingMetricSpec.target] // to `80`. repeated AutoscalingMetricSpec autoscaling_metric_specs = 4 [(google.api.field_behavior) = IMMUTABLE]; } // A description of resources that to large degree are decided by Vertex AI, // and require only a modest additional configuration. // Each Model supporting these resources documents its specific guidelines. message AutomaticResources { // Immutable. The minimum number of replicas this DeployedModel will be always // deployed on. If traffic against it increases, it may dynamically be // deployed onto more replicas up to // [max_replica_count][google.cloud.aiplatform.v1beta1.AutomaticResources.max_replica_count], // and as traffic decreases, some of these extra replicas may be freed. If the // requested value is too large, the deployment will error. int32 min_replica_count = 1 [(google.api.field_behavior) = IMMUTABLE]; // Immutable. The maximum number of replicas this DeployedModel may be // deployed on when the traffic against it increases. If the requested value // is too large, the deployment will error, but if deployment succeeds then // the ability to scale the model to that many replicas is guaranteed (barring // service outages). If traffic against the DeployedModel increases beyond // what its replicas at maximum may handle, a portion of the traffic will be // dropped. If this value is not provided, a no upper bound for scaling under // heavy traffic will be assume, though Vertex AI may be unable to scale // beyond certain replica number. int32 max_replica_count = 2 [(google.api.field_behavior) = IMMUTABLE]; } // A description of resources that are used for performing batch operations, are // dedicated to a Model, and need manual configuration. message BatchDedicatedResources { // Required. Immutable. The specification of a single machine. MachineSpec machine_spec = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.field_behavior) = IMMUTABLE ]; // Immutable. The number of machine replicas used at the start of the batch // operation. If not set, Vertex AI decides starting number, not greater than // [max_replica_count][google.cloud.aiplatform.v1beta1.BatchDedicatedResources.max_replica_count] int32 starting_replica_count = 2 [(google.api.field_behavior) = IMMUTABLE]; // Immutable. The maximum number of machine replicas the batch operation may // be scaled to. The default value is 10. int32 max_replica_count = 3 [(google.api.field_behavior) = IMMUTABLE]; } // Statistics information about resource consumption. message ResourcesConsumed { // Output only. The number of replica hours used. Note that many replicas may // run in parallel, and additionally any given work may be queued for some // time. Therefore this value is not strictly related to wall time. double replica_hours = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; } // Represents the spec of disk options. message DiskSpec { // Type of the boot disk (default is "pd-ssd"). // Valid values: "pd-ssd" (Persistent Disk Solid State Drive) or // "pd-standard" (Persistent Disk Hard Disk Drive). string boot_disk_type = 1; // Size in GB of the boot disk (default is 100GB). int32 boot_disk_size_gb = 2; } // Represents a mount configuration for Network File System (NFS) to mount. message NfsMount { // Required. IP address of the NFS server. string server = 1 [(google.api.field_behavior) = REQUIRED]; // Required. Source path exported from NFS server. // Has to start with '/', and combined with the ip address, it indicates // the source mount path in the form of `server:path` string path = 2 [(google.api.field_behavior) = REQUIRED]; // Required. Destination mount path. The NFS will be mounted for the user // under /mnt/nfs/ string mount_point = 3 [(google.api.field_behavior) = REQUIRED]; } // The metric specification that defines the target resource utilization // (CPU utilization, accelerator's duty cycle, and so on) for calculating the // desired replica count. message AutoscalingMetricSpec { // Required. The resource metric name. // Supported metrics: // // * For Online Prediction: // * `aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle` // * `aiplatform.googleapis.com/prediction/online/cpu/utilization` string metric_name = 1 [(google.api.field_behavior) = REQUIRED]; // The target resource utilization in percentage (1% - 100%) for the given // metric; once the real usage deviates from the target by a certain // percentage, the machine replicas change. The default value is 60 // (representing 60%) if not provided. int32 target = 2; }