// Copyright 2020 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto3"; package google.cloud.aiplatform.v1beta1; import "google/api/field_behavior.proto"; import "google/api/resource.proto"; import "google/cloud/aiplatform/v1beta1/env_var.proto"; import "google/cloud/aiplatform/v1beta1/io.proto"; import "google/cloud/aiplatform/v1beta1/job_state.proto"; import "google/cloud/aiplatform/v1beta1/machine_resources.proto"; import "google/protobuf/duration.proto"; import "google/protobuf/timestamp.proto"; import "google/rpc/status.proto"; import "google/api/annotations.proto"; option go_package = "google.golang.org/genproto/googleapis/cloud/aiplatform/v1beta1;aiplatform"; option java_multiple_files = true; option java_outer_classname = "CustomJobProto"; option java_package = "com.google.cloud.aiplatform.v1beta1"; // Represents a job that runs custom workloads such as a Docker container or a // Python package. A CustomJob can have multiple worker pools and each worker // pool can have its own machine and input spec. A CustomJob will be cleaned up // once the job enters terminal state (failed or succeeded). message CustomJob { option (google.api.resource) = { type: "aiplatform.googleapis.com/CustomJob" pattern: "projects/{project}/locations/{location}/customJobs/{custom_job}" }; // Output only. Resource name of a CustomJob. string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; // Required. The display name of the CustomJob. // The name can be up to 128 characters long and can be consist of any UTF-8 // characters. string display_name = 2 [(google.api.field_behavior) = REQUIRED]; // Required. Job spec. CustomJobSpec job_spec = 4 [(google.api.field_behavior) = REQUIRED]; // Output only. The detailed state of the job. JobState state = 5 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Time when the CustomJob was created. google.protobuf.Timestamp create_time = 6 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Time when the CustomJob for the first time entered the // `JOB_STATE_RUNNING` state. google.protobuf.Timestamp start_time = 7 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Time when the CustomJob entered any of the following states: // `JOB_STATE_SUCCEEDED`, `JOB_STATE_FAILED`, `JOB_STATE_CANCELLED`. google.protobuf.Timestamp end_time = 8 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Time when the CustomJob was most recently updated. google.protobuf.Timestamp update_time = 9 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Only populated when job's state is `JOB_STATE_FAILED` or // `JOB_STATE_CANCELLED`. google.rpc.Status error = 10 [(google.api.field_behavior) = OUTPUT_ONLY]; // The labels with user-defined metadata to organize CustomJobs. // // Label keys and values can be no longer than 64 characters // (Unicode codepoints), can only contain lowercase letters, numeric // characters, underscores and dashes. International characters are allowed. // // See https://goo.gl/xmQnxf for more information and examples of labels. map labels = 11; } // Represents the spec of a CustomJob. message CustomJobSpec { // Required. The spec of the worker pools including machine type and Docker image. repeated WorkerPoolSpec worker_pool_specs = 1 [(google.api.field_behavior) = REQUIRED]; // Scheduling options for a CustomJob. Scheduling scheduling = 3; // Specifies the service account for workload run-as account. // Users submitting jobs must have act-as permission on this run-as account. string service_account = 4; // The full name of the Compute Engine // [network](/compute/docs/networks-and-firewalls#networks) to which the Job // should be peered. For example, projects/12345/global/networks/myVPC. // // [Format](https: // //cloud.google.com/compute/docs/reference/rest/v1/networks/insert) // is of the form projects/{project}/global/networks/{network}. // Where {project} is a project number, as in '12345', and {network} is // network name. // // Private services access must already be configured for the network. If left // unspecified, the job is not peered with any network. string network = 5; // The Google Cloud Storage location to store the output of this CustomJob or // HyperparameterTuningJob. For HyperparameterTuningJob, // [base_output_directory][CustomJob.job_spec.base_output_directory] of // each child CustomJob backing a Trial is set to a subdirectory of name // [id][google.cloud.aiplatform.v1beta1.Trial.id] under parent HyperparameterTuningJob's // // [base_output_directory][HyperparameterTuningJob.trial_job_spec.base_output_directory]. // // Following AI Platform environment variables will be passed to containers or // python modules when this field is set: // // For CustomJob: // * AIP_MODEL_DIR = `/model/` // * AIP_CHECKPOINT_DIR = `/checkpoints/` // * AIP_TENSORBOARD_LOG_DIR = `/logs/` // // For CustomJob backing a Trial of HyperparameterTuningJob: // * AIP_MODEL_DIR = `//model/` // * AIP_CHECKPOINT_DIR = `//checkpoints/` // * AIP_TENSORBOARD_LOG_DIR = `//logs/` GcsDestination base_output_directory = 6; } // Represents the spec of a worker pool in a job. message WorkerPoolSpec { // The custom task to be executed in this worker pool. oneof task { // The custom container task. ContainerSpec container_spec = 6; // The Python packaged task. PythonPackageSpec python_package_spec = 7; } // Required. Immutable. The specification of a single machine. MachineSpec machine_spec = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.field_behavior) = IMMUTABLE ]; // Required. The number of worker replicas to use for this worker pool. int64 replica_count = 2 [(google.api.field_behavior) = REQUIRED]; // Disk spec. DiskSpec disk_spec = 5; } // The spec of a Container. message ContainerSpec { // Required. The URI of a container image in the Container Registry that is to be run on // each worker replica. string image_uri = 1 [(google.api.field_behavior) = REQUIRED]; // The command to be invoked when the container is started. // It overrides the entrypoint instruction in Dockerfile when provided. repeated string command = 2; // The arguments to be passed when starting the container. repeated string args = 3; } // The spec of a Python packaged code. message PythonPackageSpec { // Required. The URI of a container image in the Container Registry that will run the // provided python package. AI Platform provides wide range of executor images // with pre-installed packages to meet users' various use cases. Only one of // the provided images can be set here. string executor_image_uri = 1 [(google.api.field_behavior) = REQUIRED]; // Required. The Google Cloud Storage location of the Python package files which are // the training program and its dependent packages. // The maximum number of package URIs is 100. repeated string package_uris = 2 [(google.api.field_behavior) = REQUIRED]; // Required. The Python module name to run after installing the packages. string python_module = 3 [(google.api.field_behavior) = REQUIRED]; // Command line arguments to be passed to the Python task. repeated string args = 4; } // All parameters related to queuing and scheduling of custom jobs. message Scheduling { // The maximum job running time. The default is 7 days. google.protobuf.Duration timeout = 1; // Restarts the entire CustomJob if a worker gets restarted. // This feature can be used by distributed training jobs that are not // resilient to workers leaving and joining a job. bool restart_job_on_worker_restart = 3; }