// Copyright 2020 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto3"; package google.cloud.aiplatform.v1; import "google/api/field_behavior.proto"; import "google/cloud/aiplatform/v1/accelerator_type.proto"; import "google/api/annotations.proto"; option go_package = "google.golang.org/genproto/googleapis/cloud/aiplatform/v1;aiplatform"; option java_multiple_files = true; option java_outer_classname = "MachineResourcesProto"; option java_package = "com.google.cloud.aiplatform.v1"; // Specification of a single machine. message MachineSpec { // Immutable. The type of the machine. For the machine types supported for prediction, // see https://tinyurl.com/aip-docs/predictions/machine-types. // For machine types supported for creating a custom training job, see // https://tinyurl.com/aip-docs/training/configure-compute. // // For [DeployedModel][google.cloud.aiplatform.v1.DeployedModel] this field is optional, and the default // value is `n1-standard-2`. For [BatchPredictionJob][google.cloud.aiplatform.v1.BatchPredictionJob] or as part of // [WorkerPoolSpec][google.cloud.aiplatform.v1.WorkerPoolSpec] this field is required. string machine_type = 1 [(google.api.field_behavior) = IMMUTABLE]; // Immutable. The type of accelerator(s) that may be attached to the machine as per // [accelerator_count][google.cloud.aiplatform.v1.MachineSpec.accelerator_count]. AcceleratorType accelerator_type = 2 [(google.api.field_behavior) = IMMUTABLE]; // The number of accelerators to attach to the machine. int32 accelerator_count = 3; } // A description of resources that are dedicated to a DeployedModel, and // that need a higher degree of manual configuration. message DedicatedResources { // Required. Immutable. The specification of a single machine used by the prediction. MachineSpec machine_spec = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.field_behavior) = IMMUTABLE ]; // Required. Immutable. The minimum number of machine replicas this DeployedModel will be always // deployed on. If traffic against it increases, it may dynamically be // deployed onto more replicas, and as traffic decreases, some of these extra // replicas may be freed. // Note: if [machine_spec.accelerator_count][google.cloud.aiplatform.v1.MachineSpec.accelerator_count] is // above 0, currently the model will be always deployed precisely on // [min_replica_count][google.cloud.aiplatform.v1.DedicatedResources.min_replica_count]. int32 min_replica_count = 2 [ (google.api.field_behavior) = REQUIRED, (google.api.field_behavior) = IMMUTABLE ]; // Immutable. The maximum number of replicas this DeployedModel may be deployed on when // the traffic against it increases. If the requested value is too large, // the deployment will error, but if deployment succeeds then the ability // to scale the model to that many replicas is guaranteed (barring service // outages). If traffic against the DeployedModel increases beyond what its // replicas at maximum may handle, a portion of the traffic will be dropped. // If this value is not provided, will use [min_replica_count][google.cloud.aiplatform.v1.DedicatedResources.min_replica_count] as the // default value. int32 max_replica_count = 3 [(google.api.field_behavior) = IMMUTABLE]; } // A description of resources that to large degree are decided by AI Platform, // and require only a modest additional configuration. // Each Model supporting these resources documents its specific guidelines. message AutomaticResources { // Immutable. The minimum number of replicas this DeployedModel will be always deployed // on. If traffic against it increases, it may dynamically be deployed onto // more replicas up to [max_replica_count][google.cloud.aiplatform.v1.AutomaticResources.max_replica_count], and as traffic decreases, some // of these extra replicas may be freed. // If the requested value is too large, the deployment will error. int32 min_replica_count = 1 [(google.api.field_behavior) = IMMUTABLE]; // Immutable. The maximum number of replicas this DeployedModel may be deployed on when // the traffic against it increases. If the requested value is too large, // the deployment will error, but if deployment succeeds then the ability // to scale the model to that many replicas is guaranteed (barring service // outages). If traffic against the DeployedModel increases beyond what its // replicas at maximum may handle, a portion of the traffic will be dropped. // If this value is not provided, a no upper bound for scaling under heavy // traffic will be assume, though AI Platform may be unable to scale beyond // certain replica number. int32 max_replica_count = 2 [(google.api.field_behavior) = IMMUTABLE]; } // A description of resources that are used for performing batch operations, are // dedicated to a Model, and need manual configuration. message BatchDedicatedResources { // Required. Immutable. The specification of a single machine. MachineSpec machine_spec = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.field_behavior) = IMMUTABLE ]; // Immutable. The number of machine replicas used at the start of the batch operation. // If not set, AI Platform decides starting number, not greater than // [max_replica_count][google.cloud.aiplatform.v1.BatchDedicatedResources.max_replica_count] int32 starting_replica_count = 2 [(google.api.field_behavior) = IMMUTABLE]; // Immutable. The maximum number of machine replicas the batch operation may be scaled // to. The default value is 10. int32 max_replica_count = 3 [(google.api.field_behavior) = IMMUTABLE]; } // Statistics information about resource consumption. message ResourcesConsumed { // Output only. The number of replica hours used. Note that many replicas may run in // parallel, and additionally any given work may be queued for some time. // Therefore this value is not strictly related to wall time. double replica_hours = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; } // Represents the spec of disk options. message DiskSpec { // Type of the boot disk (default is "pd-ssd"). // Valid values: "pd-ssd" (Persistent Disk Solid State Drive) or // "pd-standard" (Persistent Disk Hard Disk Drive). string boot_disk_type = 1; // Size in GB of the boot disk (default is 100GB). int32 boot_disk_size_gb = 2; }