// Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto3"; package google.cloud.aiplatform.v1; import "google/api/field_behavior.proto"; import "google/api/resource.proto"; import "google/cloud/aiplatform/v1/content.proto"; import "google/cloud/aiplatform/v1/job_state.proto"; import "google/protobuf/timestamp.proto"; import "google/rpc/status.proto"; option csharp_namespace = "Google.Cloud.AIPlatform.V1"; option go_package = "cloud.google.com/go/aiplatform/apiv1/aiplatformpb;aiplatformpb"; option java_multiple_files = true; option java_outer_classname = "TuningJobProto"; option java_package = "com.google.cloud.aiplatform.v1"; option php_namespace = "Google\\Cloud\\AIPlatform\\V1"; option ruby_package = "Google::Cloud::AIPlatform::V1"; // Represents a TuningJob that runs with Google owned models. message TuningJob { option (google.api.resource) = { type: "aiplatform.googleapis.com/TuningJob" pattern: "projects/{project}/locations/{location}/tuningJobs/{tuning_job}" plural: "tuningJobs" singular: "tuningJob" }; oneof source_model { // The base model that is being tuned, e.g., "gemini-1.0-pro-002". string base_model = 4; } oneof tuning_spec { // Tuning Spec for Supervised Fine Tuning. SupervisedTuningSpec supervised_tuning_spec = 5; } // Output only. Identifier. Resource name of a TuningJob. Format: // `projects/{project}/locations/{location}/tuningJobs/{tuning_job}` string name = 1 [ (google.api.field_behavior) = IDENTIFIER, (google.api.field_behavior) = OUTPUT_ONLY ]; // Optional. The display name of the // [TunedModel][google.cloud.aiplatform.v1.Model]. The name can be up to 128 // characters long and can consist of any UTF-8 characters. string tuned_model_display_name = 2 [(google.api.field_behavior) = OPTIONAL]; // Optional. The description of the // [TuningJob][google.cloud.aiplatform.v1.TuningJob]. string description = 3 [(google.api.field_behavior) = OPTIONAL]; // Output only. The detailed state of the job. JobState state = 6 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Time when the // [TuningJob][google.cloud.aiplatform.v1.TuningJob] was created. google.protobuf.Timestamp create_time = 7 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Time when the // [TuningJob][google.cloud.aiplatform.v1.TuningJob] for the first time // entered the `JOB_STATE_RUNNING` state. google.protobuf.Timestamp start_time = 8 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Time when the TuningJob entered any of the following // [JobStates][google.cloud.aiplatform.v1.JobState]: `JOB_STATE_SUCCEEDED`, // `JOB_STATE_FAILED`, `JOB_STATE_CANCELLED`, `JOB_STATE_EXPIRED`. google.protobuf.Timestamp end_time = 9 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Time when the // [TuningJob][google.cloud.aiplatform.v1.TuningJob] was most recently // updated. google.protobuf.Timestamp update_time = 10 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Only populated when job's state is `JOB_STATE_FAILED` or // `JOB_STATE_CANCELLED`. google.rpc.Status error = 11 [(google.api.field_behavior) = OUTPUT_ONLY]; // Optional. The labels with user-defined metadata to organize // [TuningJob][google.cloud.aiplatform.v1.TuningJob] and generated resources // such as [Model][google.cloud.aiplatform.v1.Model] and // [Endpoint][google.cloud.aiplatform.v1.Endpoint]. // // Label keys and values can be no longer than 64 characters // (Unicode codepoints), can only contain lowercase letters, numeric // characters, underscores and dashes. International characters are allowed. // // See https://goo.gl/xmQnxf for more information and examples of labels. map labels = 12 [(google.api.field_behavior) = OPTIONAL]; // Output only. The Experiment associated with this // [TuningJob][google.cloud.aiplatform.v1.TuningJob]. string experiment = 13 [ (google.api.field_behavior) = OUTPUT_ONLY, (google.api.resource_reference) = { type: "aiplatform.googleapis.com/Context" } ]; // Output only. The tuned model resources assiociated with this // [TuningJob][google.cloud.aiplatform.v1.TuningJob]. TunedModel tuned_model = 14 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. The tuning data statistics associated with this // [TuningJob][google.cloud.aiplatform.v1.TuningJob]. TuningDataStats tuning_data_stats = 15 [(google.api.field_behavior) = OUTPUT_ONLY]; } // The Model Registry Model and Online Prediction Endpoint assiociated with // this [TuningJob][google.cloud.aiplatform.v1.TuningJob]. message TunedModel { // Output only. The resource name of the TunedModel. Format: // `projects/{project}/locations/{location}/models/{model}`. string model = 1 [ (google.api.field_behavior) = OUTPUT_ONLY, (google.api.resource_reference) = { type: "aiplatform.googleapis.com/Model" } ]; // Output only. A resource name of an Endpoint. Format: // `projects/{project}/locations/{location}/endpoints/{endpoint}`. string endpoint = 2 [ (google.api.field_behavior) = OUTPUT_ONLY, (google.api.resource_reference) = { type: "aiplatform.googleapis.com/Endpoint" } ]; } // Dataset distribution for Supervised Tuning. message SupervisedTuningDatasetDistribution { // Dataset bucket used to create a histogram for the distribution given a // population of values. message DatasetBucket { // Output only. Number of values in the bucket. double count = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Left bound of the bucket. double left = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Right bound of the bucket. double right = 3 [(google.api.field_behavior) = OUTPUT_ONLY]; } // Output only. Sum of a given population of values. int64 sum = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. The minimum of the population values. double min = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. The maximum of the population values. double max = 3 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. The arithmetic mean of the values in the population. double mean = 4 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. The median of the values in the population. double median = 5 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. The 5th percentile of the values in the population. double p5 = 6 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. The 95th percentile of the values in the population. double p95 = 7 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Defines the histogram bucket. repeated DatasetBucket buckets = 8 [(google.api.field_behavior) = OUTPUT_ONLY]; } // Tuning data statistics for Supervised Tuning. message SupervisedTuningDataStats { // Output only. Number of examples in the tuning dataset. int64 tuning_dataset_example_count = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Number of tuning characters in the tuning dataset. int64 total_tuning_character_count = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Number of billable characters in the tuning dataset. int64 total_billable_character_count = 3 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Number of tuning steps for this Tuning Job. int64 tuning_step_count = 4 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Dataset distributions for the user input tokens. SupervisedTuningDatasetDistribution user_input_token_distribution = 5 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Dataset distributions for the user output tokens. SupervisedTuningDatasetDistribution user_output_token_distribution = 6 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Dataset distributions for the messages per example. SupervisedTuningDatasetDistribution user_message_per_example_distribution = 7 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Sample user messages in the training dataset uri. repeated Content user_dataset_examples = 8 [(google.api.field_behavior) = OUTPUT_ONLY]; } // The tuning data statistic values for // [TuningJob][google.cloud.aiplatform.v1.TuningJob]. message TuningDataStats { oneof tuning_data_stats { // The SFT Tuning data stats. SupervisedTuningDataStats supervised_tuning_data_stats = 1; } } // Hyperparameters for SFT. message SupervisedHyperParameters { // Supported adapter sizes for tuning. enum AdapterSize { // Adapter size is unspecified. ADAPTER_SIZE_UNSPECIFIED = 0; // Adapter size 1. ADAPTER_SIZE_ONE = 1; // Adapter size 4. ADAPTER_SIZE_FOUR = 2; // Adapter size 8. ADAPTER_SIZE_EIGHT = 3; // Adapter size 16. ADAPTER_SIZE_SIXTEEN = 4; } // Optional. Number of complete passes the model makes over the entire // training dataset during training. int64 epoch_count = 1 [(google.api.field_behavior) = OPTIONAL]; // Optional. Multiplier for adjusting the default learning rate. double learning_rate_multiplier = 2 [(google.api.field_behavior) = OPTIONAL]; // Optional. Adapter size for tuning. AdapterSize adapter_size = 3 [(google.api.field_behavior) = OPTIONAL]; } // Tuning Spec for Supervised Tuning. message SupervisedTuningSpec { // Required. Cloud Storage path to file containing training dataset for // tuning. The dataset must be formatted as a JSONL file. string training_dataset_uri = 1 [(google.api.field_behavior) = REQUIRED]; // Optional. Cloud Storage path to file containing validation dataset for // tuning. The dataset must be formatted as a JSONL file. string validation_dataset_uri = 2 [(google.api.field_behavior) = OPTIONAL]; // Optional. Hyperparameters for SFT. SupervisedHyperParameters hyper_parameters = 3 [(google.api.field_behavior) = OPTIONAL]; }