// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.cloud.dataproc.v1;

import "google/api/field_behavior.proto";

option go_package = "google.golang.org/genproto/googleapis/cloud/dataproc/v1;dataproc";
option java_multiple_files = true;
option java_outer_classname = "SharedProto";
option java_package = "com.google.cloud.dataproc.v1";

// Runtime configuration for a workload.
message RuntimeConfig {
  // Optional. Version of the batch runtime.
  string version = 1 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Optional custom container image for the job runtime environment. If
  // not specified, a default container image will be used.
  string container_image = 2 [(google.api.field_behavior) = OPTIONAL];

  // Optional. A mapping of property names to values, which are used to configure workload
  // execution.
  map<string, string> properties = 3 [(google.api.field_behavior) = OPTIONAL];
}

// Environment configuration for a workload.
message EnvironmentConfig {
  // Optional. Execution configuration for a workload.
  ExecutionConfig execution_config = 1 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Peripherals configuration that workload has access to.
  PeripheralsConfig peripherals_config = 2 [(google.api.field_behavior) = OPTIONAL];
}

// Execution configuration for a workload.
message ExecutionConfig {
  // Optional. Service account that used to execute workload.
  string service_account = 2 [(google.api.field_behavior) = OPTIONAL];

  // Network configuration for workload execution.
  oneof network {
    // Optional. Network URI to connect workload to.
    string network_uri = 4 [(google.api.field_behavior) = OPTIONAL];

    // Optional. Subnetwork URI to connect workload to.
    string subnetwork_uri = 5 [(google.api.field_behavior) = OPTIONAL];
  }

  // Optional. Tags used for network traffic control.
  repeated string network_tags = 6 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The Cloud KMS key to use for encryption.
  string kms_key = 7 [(google.api.field_behavior) = OPTIONAL];
}

// Spark History Server configuration for the workload.
message SparkHistoryServerConfig {
  // Optional. Resource name of an existing Dataproc Cluster to act as a Spark History
  // Server for the workload.
  //
  // Example:
  //
  // * `projects/[project_id]/regions/[region]/clusters/[cluster_name]`
  string dataproc_cluster = 1 [
    (google.api.field_behavior) = OPTIONAL
  ];
}

// Auxiliary services configuration for a workload.
message PeripheralsConfig {
  // Optional. Resource name of an existing Dataproc Metastore service.
  //
  // Example:
  //
  // * `projects/[project_id]/locations/[region]/services/[service_id]`
  string metastore_service = 1 [
    (google.api.field_behavior) = OPTIONAL
  ];

  // Optional. The Spark History Server configuration for the workload.
  SparkHistoryServerConfig spark_history_server_config = 2 [(google.api.field_behavior) = OPTIONAL];
}

// Runtime information about workload execution.
message RuntimeInfo {
  // Output only. Map of remote access endpoints (such as web interfaces and APIs) to their
  // URIs.
  map<string, string> endpoints = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. A URI pointing to the location of the stdout and stderr of the workload.
  string output_uri = 2 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. A URI pointing to the location of the diagnostics tarball.
  string diagnostic_output_uri = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
}

// The cluster's GKE config.
message GkeClusterConfig {
  // Optional. A target GKE cluster to deploy to. It must be in the same project and
  // region as the Dataproc cluster (the GKE cluster can be zonal or regional).
  // Format: 'projects/{project}/locations/{location}/clusters/{cluster_id}'
  string gke_cluster_target = 2 [
    (google.api.field_behavior) = OPTIONAL
  ];

  // Optional. GKE NodePools where workloads will be scheduled. At least one node pool
  // must be assigned the 'default' role. Each role can be given to only a
  // single NodePoolTarget. All NodePools must have the same location settings.
  // If a nodePoolTarget is not specified, Dataproc constructs a default
  // nodePoolTarget.
  repeated GkeNodePoolTarget node_pool_target = 3 [(google.api.field_behavior) = OPTIONAL];
}

// The configuration for running the Dataproc cluster on Kubernetes.
message KubernetesClusterConfig {
  // Optional. A namespace within the Kubernetes cluster to deploy into. If this namespace
  // does not exist, it is created. If it exists, Dataproc
  // verifies that another Dataproc VirtualCluster is not installed
  // into it. If not specified, the name of the Dataproc Cluster is used.
  string kubernetes_namespace = 1 [(google.api.field_behavior) = OPTIONAL];

  oneof config {
    // Required. The configuration for running the Dataproc cluster on GKE.
    GkeClusterConfig gke_cluster_config = 2 [(google.api.field_behavior) = REQUIRED];
  }

  // Optional. The software configuration for this Dataproc cluster running on Kubernetes.
  KubernetesSoftwareConfig kubernetes_software_config = 3 [(google.api.field_behavior) = OPTIONAL];
}

// The software configuration for this Dataproc cluster running on Kubernetes.
message KubernetesSoftwareConfig {
  // The components that should be installed in this Dataproc cluster. The key
  // must be a string from the KubernetesComponent enumeration. The value is
  // the version of the software to be installed.
  // At least one entry must be specified.
  map<string, string> component_version = 1;

  // The properties to set on daemon config files.
  //
  // Property keys are specified in `prefix:property` format, for example
  // `spark:spark.kubernetes.container.image`. The following are supported
  // prefixes and their mappings:
  //
  // * spark:  `spark-defaults.conf`
  //
  // For more information, see [Cluster
  // properties](https://cloud.google.com/dataproc/docs/concepts/cluster-properties).
  map<string, string> properties = 2;
}

// GKE NodePools that Dataproc workloads run on.
message GkeNodePoolTarget {
  // `Role` specifies whose tasks will run on the NodePool. The roles can be
  // specific to workloads. Exactly one GkeNodePoolTarget within the
  // VirtualCluster must have 'default' role, which is used to run all workloads
  // that are not associated with a NodePool.
  enum Role {
    // Role is unspecified.
    ROLE_UNSPECIFIED = 0;

    // Any roles that are not directly assigned to a NodePool run on the
    // `default` role's NodePool.
    DEFAULT = 1;

    // Run controllers and webhooks.
    CONTROLLER = 2;

    // Run spark driver.
    SPARK_DRIVER = 3;

    // Run spark executors.
    SPARK_EXECUTOR = 4;
  }

  // Required. The target GKE NodePool.
  // Format:
  // 'projects/{project}/locations/{location}/clusters/{cluster}/nodePools/{node_pool}'
  string node_pool = 1 [
    (google.api.field_behavior) = REQUIRED
  ];

  // Required. The types of role for a GKE NodePool
  repeated Role roles = 2 [(google.api.field_behavior) = REQUIRED];

  // Optional. The configuration for the GKE NodePool.
  //
  // If specified, Dataproc attempts to create a NodePool with the
  // specified shape. If one with the same name already exists, it is
  // verified against all specified fields. If a field differs, the
  // virtual cluster creation will fail.
  //
  // If omitted, any NodePool with the specified name is used. If a
  // NodePool with the specified name does not exist, Dataproc create a NodePool
  // with default values.
  GkeNodePoolConfig node_pool_config = 3 [(google.api.field_behavior) = OPTIONAL];
}

// The configuration of a GKE NodePool used by a [Dataproc-on-GKE
// cluster](https://cloud.google.com/dataproc/docs/concepts/jobs/dataproc-gke#create-a-dataproc-on-gke-cluster).
message GkeNodePoolConfig {
  // Parameters that describe cluster nodes.
  message GkeNodeConfig {
    // Optional. The name of a Compute Engine [machine
    // type](https://cloud.google.com/compute/docs/machine-types).
    string machine_type = 1 [(google.api.field_behavior) = OPTIONAL];

    // Optional. Whether the nodes are created as [preemptible VM
    // instances](https://cloud.google.com/compute/docs/instances/preemptible).
    bool preemptible = 10 [(google.api.field_behavior) = OPTIONAL];

    // Optional. The number of local SSD disks to attach to the node, which is limited by
    // the maximum number of disks allowable per zone (see [Adding Local
    // SSDs](https://cloud.google.com/compute/docs/disks/local-ssd)).
    int32 local_ssd_count = 7 [(google.api.field_behavior) = OPTIONAL];

    // Optional. A list of [hardware
    // accelerators](https://cloud.google.com/compute/docs/gpus) to attach to
    // each node.
    repeated GkeNodePoolAcceleratorConfig accelerators = 11 [(google.api.field_behavior) = OPTIONAL];

    // Optional. [Minimum CPU
    // platform](https://cloud.google.com/compute/docs/instances/specify-min-cpu-platform)
    // to be used by this instance. The instance may be scheduled on the
    // specified or a newer CPU platform. Specify the friendly names of CPU
    // platforms, such as "Intel Haswell"` or Intel Sandy Bridge".
    string min_cpu_platform = 13 [(google.api.field_behavior) = OPTIONAL];
  }

  // A GkeNodeConfigAcceleratorConfig represents a Hardware Accelerator request
  // for a NodePool.
  message GkeNodePoolAcceleratorConfig {
    // The number of accelerator cards exposed to an instance.
    int64 accelerator_count = 1;

    // The accelerator type resource namename (see GPUs on Compute Engine).
    string accelerator_type = 2;
  }

  // GkeNodePoolAutoscaling contains information the cluster autoscaler needs to
  // adjust the size of the node pool to the current cluster usage.
  message GkeNodePoolAutoscalingConfig {
    // The minimum number of nodes in the NodePool. Must be >= 0 and <=
    // max_node_count.
    int32 min_node_count = 2;

    // The maximum number of nodes in the NodePool. Must be >= min_node_count.
    // **Note:** Quota must be sufficient to scale up the cluster.
    int32 max_node_count = 3;
  }

  // Optional. The node pool configuration.
  GkeNodeConfig config = 2 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The list of Compute Engine
  // [zones](https://cloud.google.com/compute/docs/zones#available) where
  // NodePool's nodes will be located.
  //
  // **Note:** Currently, only one zone may be specified.
  //
  // If a location is not specified during NodePool creation, Dataproc will
  // choose a location.
  repeated string locations = 13 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The autoscaler configuration for this NodePool. The autoscaler is enabled
  // only when a valid configuration is present.
  GkeNodePoolAutoscalingConfig autoscaling = 4 [(google.api.field_behavior) = OPTIONAL];
}

// Cluster components that can be activated.
enum Component {
  // Unspecified component. Specifying this will cause Cluster creation to fail.
  COMPONENT_UNSPECIFIED = 0;

  // The Anaconda python distribution. The Anaconda component is not supported
  // in the Dataproc
  // <a
  // href="/dataproc/docs/concepts/versioning/dataproc-release-2.0">2.0
  // image</a>. The 2.0 image is pre-installed with Miniconda.
  ANACONDA = 5;

  // Docker
  DOCKER = 13;

  // The Druid query engine. (alpha)
  DRUID = 9;

  // Flink
  FLINK = 14;

  // HBase. (beta)
  HBASE = 11;

  // The Hive Web HCatalog (the REST service for accessing HCatalog).
  HIVE_WEBHCAT = 3;

  // The Jupyter Notebook.
  JUPYTER = 1;

  // The Presto query engine.
  PRESTO = 6;

  // The Ranger service.
  RANGER = 12;

  // The Solr service.
  SOLR = 10;

  // The Zeppelin notebook.
  ZEPPELIN = 4;

  // The Zookeeper service.
  ZOOKEEPER = 8;
}

// Actions in response to failure of a resource associated with a cluster.
enum FailureAction {
  // When FailureAction is unspecified, failure action defaults to NO_ACTION.
  FAILURE_ACTION_UNSPECIFIED = 0;

  // Take no action on failure to create a cluster resource. NO_ACTION is the
  // default.
  NO_ACTION = 1;

  // Delete the failed cluster resource.
  DELETE = 2;
}