// Copyright 2023 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto3"; package google.cloud.aiplatform.v1; import "google/api/field_behavior.proto"; import "google/api/resource.proto"; import "google/cloud/aiplatform/v1/encryption_spec.proto"; import "google/cloud/aiplatform/v1/io.proto"; import "google/cloud/aiplatform/v1/model.proto"; import "google/cloud/aiplatform/v1/pipeline_state.proto"; import "google/protobuf/struct.proto"; import "google/protobuf/timestamp.proto"; import "google/rpc/status.proto"; option csharp_namespace = "Google.Cloud.AIPlatform.V1"; option go_package = "cloud.google.com/go/aiplatform/apiv1/aiplatformpb;aiplatformpb"; option java_multiple_files = true; option java_outer_classname = "TrainingPipelineProto"; option java_package = "com.google.cloud.aiplatform.v1"; option php_namespace = "Google\\Cloud\\AIPlatform\\V1"; option ruby_package = "Google::Cloud::AIPlatform::V1"; // The TrainingPipeline orchestrates tasks associated with training a Model. It // always executes the training task, and optionally may also // export data from Vertex AI's Dataset which becomes the training input, // [upload][google.cloud.aiplatform.v1.ModelService.UploadModel] the Model to // Vertex AI, and evaluate the Model. message TrainingPipeline { option (google.api.resource) = { type: "aiplatform.googleapis.com/TrainingPipeline" pattern: "projects/{project}/locations/{location}/trainingPipelines/{training_pipeline}" }; // Output only. Resource name of the TrainingPipeline. string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; // Required. The user-defined name of this TrainingPipeline. string display_name = 2 [(google.api.field_behavior) = REQUIRED]; // Specifies Vertex AI owned input data that may be used for training the // Model. The TrainingPipeline's // [training_task_definition][google.cloud.aiplatform.v1.TrainingPipeline.training_task_definition] // should make clear whether this config is used and if there are any special // requirements on how it should be filled. If nothing about this config is // mentioned in the // [training_task_definition][google.cloud.aiplatform.v1.TrainingPipeline.training_task_definition], // then it should be assumed that the TrainingPipeline does not depend on this // configuration. InputDataConfig input_data_config = 3; // Required. A Google Cloud Storage path to the YAML file that defines the // training task which is responsible for producing the model artifact, and // may also include additional auxiliary work. The definition files that can // be used here are found in // gs://google-cloud-aiplatform/schema/trainingjob/definition/. // Note: The URI given on output will be immutable and probably different, // including the URI scheme, than the one given on input. The output URI will // point to a location where the user only has a read access. string training_task_definition = 4 [(google.api.field_behavior) = REQUIRED]; // Required. The training task's parameter(s), as specified in the // [training_task_definition][google.cloud.aiplatform.v1.TrainingPipeline.training_task_definition]'s // `inputs`. google.protobuf.Value training_task_inputs = 5 [(google.api.field_behavior) = REQUIRED]; // Output only. The metadata information as specified in the // [training_task_definition][google.cloud.aiplatform.v1.TrainingPipeline.training_task_definition]'s // `metadata`. This metadata is an auxiliary runtime and final information // about the training task. While the pipeline is running this information is // populated only at a best effort basis. Only present if the // pipeline's // [training_task_definition][google.cloud.aiplatform.v1.TrainingPipeline.training_task_definition] // contains `metadata` object. google.protobuf.Value training_task_metadata = 6 [(google.api.field_behavior) = OUTPUT_ONLY]; // Describes the Model that may be uploaded (via // [ModelService.UploadModel][google.cloud.aiplatform.v1.ModelService.UploadModel]) // by this TrainingPipeline. The TrainingPipeline's // [training_task_definition][google.cloud.aiplatform.v1.TrainingPipeline.training_task_definition] // should make clear whether this Model description should be populated, and // if there are any special requirements regarding how it should be filled. If // nothing is mentioned in the // [training_task_definition][google.cloud.aiplatform.v1.TrainingPipeline.training_task_definition], // then it should be assumed that this field should not be filled and the // training task either uploads the Model without a need of this information, // or that training task does not support uploading a Model as part of the // pipeline. When the Pipeline's state becomes `PIPELINE_STATE_SUCCEEDED` and // the trained Model had been uploaded into Vertex AI, then the // model_to_upload's resource [name][google.cloud.aiplatform.v1.Model.name] is // populated. The Model is always uploaded into the Project and Location in // which this pipeline is. Model model_to_upload = 7; // Optional. The ID to use for the uploaded Model, which will become the final // component of the model resource name. // // This value may be up to 63 characters, and valid characters are // `[a-z0-9_-]`. The first character cannot be a number or hyphen. string model_id = 22 [(google.api.field_behavior) = OPTIONAL]; // Optional. When specify this field, the `model_to_upload` will not be // uploaded as a new model, instead, it will become a new version of this // `parent_model`. string parent_model = 21 [(google.api.field_behavior) = OPTIONAL]; // Output only. The detailed state of the pipeline. PipelineState state = 9 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Only populated when the pipeline's state is // `PIPELINE_STATE_FAILED` or `PIPELINE_STATE_CANCELLED`. google.rpc.Status error = 10 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Time when the TrainingPipeline was created. google.protobuf.Timestamp create_time = 11 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Time when the TrainingPipeline for the first time entered the // `PIPELINE_STATE_RUNNING` state. google.protobuf.Timestamp start_time = 12 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Time when the TrainingPipeline entered any of the following // states: `PIPELINE_STATE_SUCCEEDED`, `PIPELINE_STATE_FAILED`, // `PIPELINE_STATE_CANCELLED`. google.protobuf.Timestamp end_time = 13 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Time when the TrainingPipeline was most recently updated. google.protobuf.Timestamp update_time = 14 [(google.api.field_behavior) = OUTPUT_ONLY]; // The labels with user-defined metadata to organize TrainingPipelines. // // Label keys and values can be no longer than 64 characters // (Unicode codepoints), can only contain lowercase letters, numeric // characters, underscores and dashes. International characters are allowed. // // See https://goo.gl/xmQnxf for more information and examples of labels. map labels = 15; // Customer-managed encryption key spec for a TrainingPipeline. If set, this // TrainingPipeline will be secured by this key. // // Note: Model trained by this TrainingPipeline is also secured by this key if // [model_to_upload][google.cloud.aiplatform.v1.TrainingPipeline.encryption_spec] // is not set separately. EncryptionSpec encryption_spec = 18; } // Specifies Vertex AI owned input data to be used for training, and // possibly evaluating, the Model. message InputDataConfig { // The instructions how the input data should be split between the // training, validation and test sets. // If no split type is provided, the // [fraction_split][google.cloud.aiplatform.v1.InputDataConfig.fraction_split] // is used by default. oneof split { // Split based on fractions defining the size of each set. FractionSplit fraction_split = 2; // Split based on the provided filters for each set. FilterSplit filter_split = 3; // Supported only for tabular Datasets. // // Split based on a predefined key. PredefinedSplit predefined_split = 4; // Supported only for tabular Datasets. // // Split based on the timestamp of the input data pieces. TimestampSplit timestamp_split = 5; // Supported only for tabular Datasets. // // Split based on the distribution of the specified column. StratifiedSplit stratified_split = 12; } // Only applicable to Custom and Hyperparameter Tuning TrainingPipelines. // // The destination of the training data to be written to. // // Supported destination file formats: // * For non-tabular data: "jsonl". // * For tabular data: "csv" and "bigquery". // // The following Vertex AI environment variables are passed to containers // or python modules of the training task when this field is set: // // * AIP_DATA_FORMAT : Exported data format. // * AIP_TRAINING_DATA_URI : Sharded exported training data uris. // * AIP_VALIDATION_DATA_URI : Sharded exported validation data uris. // * AIP_TEST_DATA_URI : Sharded exported test data uris. oneof destination { // The Cloud Storage location where the training data is to be // written to. In the given directory a new directory is created with // name: // `dataset---` // where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. // All training input data is written into that directory. // // The Vertex AI environment variables representing Cloud Storage // data URIs are represented in the Cloud Storage wildcard // format to support sharded data. e.g.: "gs://.../training-*.jsonl" // // * AIP_DATA_FORMAT = "jsonl" for non-tabular data, "csv" for tabular data // * AIP_TRAINING_DATA_URI = // "gcs_destination/dataset---