// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.cloud.aiplatform.v1;

import "google/api/field_behavior.proto";
import "google/cloud/aiplatform/v1/accelerator_type.proto";
import "google/api/annotations.proto";

option go_package = "google.golang.org/genproto/googleapis/cloud/aiplatform/v1;aiplatform";
option java_multiple_files = true;
option java_outer_classname = "MachineResourcesProto";
option java_package = "com.google.cloud.aiplatform.v1";

// Specification of a single machine.
message MachineSpec {
  // Immutable. The type of the machine. For the machine types supported for prediction,
  // see https://tinyurl.com/aip-docs/predictions/machine-types.
  // For machine types supported for creating a custom training job, see
  // https://tinyurl.com/aip-docs/training/configure-compute.
  //
  // For [DeployedModel][google.cloud.aiplatform.v1.DeployedModel] this field is optional, and the default
  // value is `n1-standard-2`. For [BatchPredictionJob][google.cloud.aiplatform.v1.BatchPredictionJob] or as part of
  // [WorkerPoolSpec][google.cloud.aiplatform.v1.WorkerPoolSpec] this field is required.
  string machine_type = 1 [(google.api.field_behavior) = IMMUTABLE];

  // Immutable. The type of accelerator(s) that may be attached to the machine as per
  // [accelerator_count][google.cloud.aiplatform.v1.MachineSpec.accelerator_count].
  AcceleratorType accelerator_type = 2 [(google.api.field_behavior) = IMMUTABLE];

  // The number of accelerators to attach to the machine.
  int32 accelerator_count = 3;
}

// A description of resources that are dedicated to a DeployedModel, and
// that need a higher degree of manual configuration.
message DedicatedResources {
  // Required. Immutable. The specification of a single machine used by the prediction.
  MachineSpec machine_spec = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.field_behavior) = IMMUTABLE
  ];

  // Required. Immutable. The minimum number of machine replicas this DeployedModel will be always
  // deployed on. If traffic against it increases, it may dynamically be
  // deployed onto more replicas, and as traffic decreases, some of these extra
  // replicas may be freed.
  // Note: if [machine_spec.accelerator_count][google.cloud.aiplatform.v1.MachineSpec.accelerator_count] is
  // above 0, currently the model will be always deployed precisely on
  // [min_replica_count][google.cloud.aiplatform.v1.DedicatedResources.min_replica_count].
  int32 min_replica_count = 2 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.field_behavior) = IMMUTABLE
  ];

  // Immutable. The maximum number of replicas this DeployedModel may be deployed on when
  // the traffic against it increases. If the requested value is too large,
  // the deployment will error, but if deployment succeeds then the ability
  // to scale the model to that many replicas is guaranteed (barring service
  // outages). If traffic against the DeployedModel increases beyond what its
  // replicas at maximum may handle, a portion of the traffic will be dropped.
  // If this value is not provided, will use [min_replica_count][google.cloud.aiplatform.v1.DedicatedResources.min_replica_count] as the
  // default value.
  int32 max_replica_count = 3 [(google.api.field_behavior) = IMMUTABLE];
}

// A description of resources that to large degree are decided by AI Platform,
// and require only a modest additional configuration.
// Each Model supporting these resources documents its specific guidelines.
message AutomaticResources {
  // Immutable. The minimum number of replicas this DeployedModel will be always deployed
  // on. If traffic against it increases, it may dynamically be deployed onto
  // more replicas up to [max_replica_count][google.cloud.aiplatform.v1.AutomaticResources.max_replica_count], and as traffic decreases, some
  // of these extra replicas may be freed.
  // If the requested value is too large, the deployment will error.
  int32 min_replica_count = 1 [(google.api.field_behavior) = IMMUTABLE];

  // Immutable. The maximum number of replicas this DeployedModel may be deployed on when
  // the traffic against it increases. If the requested value is too large,
  // the deployment will error, but if deployment succeeds then the ability
  // to scale the model to that many replicas is guaranteed (barring service
  // outages). If traffic against the DeployedModel increases beyond what its
  // replicas at maximum may handle, a portion of the traffic will be dropped.
  // If this value is not provided, a no upper bound for scaling under heavy
  // traffic will be assume, though AI Platform may be unable to scale beyond
  // certain replica number.
  int32 max_replica_count = 2 [(google.api.field_behavior) = IMMUTABLE];
}

// A description of resources that are used for performing batch operations, are
// dedicated to a Model, and need manual configuration.
message BatchDedicatedResources {
  // Required. Immutable. The specification of a single machine.
  MachineSpec machine_spec = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.field_behavior) = IMMUTABLE
  ];

  // Immutable. The number of machine replicas used at the start of the batch operation.
  // If not set, AI Platform decides starting number, not greater than
  // [max_replica_count][google.cloud.aiplatform.v1.BatchDedicatedResources.max_replica_count]
  int32 starting_replica_count = 2 [(google.api.field_behavior) = IMMUTABLE];

  // Immutable. The maximum number of machine replicas the batch operation may be scaled
  // to. The default value is 10.
  int32 max_replica_count = 3 [(google.api.field_behavior) = IMMUTABLE];
}

// Statistics information about resource consumption.
message ResourcesConsumed {
  // Output only. The number of replica hours used. Note that many replicas may run in
  // parallel, and additionally any given work may be queued for some time.
  // Therefore this value is not strictly related to wall time.
  double replica_hours = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
}

// Represents the spec of disk options.
message DiskSpec {
  // Type of the boot disk (default is "pd-ssd").
  // Valid values: "pd-ssd" (Persistent Disk Solid State Drive) or
  // "pd-standard" (Persistent Disk Hard Disk Drive).
  string boot_disk_type = 1;

  // Size in GB of the boot disk (default is 100GB).
  int32 boot_disk_size_gb = 2;
}