// Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto3"; package google.cloud.aiplatform.v1; import "google/api/field_behavior.proto"; import "google/api/resource.proto"; import "google/cloud/aiplatform/v1/completion_stats.proto"; import "google/cloud/aiplatform/v1/encryption_spec.proto"; import "google/cloud/aiplatform/v1/explanation.proto"; import "google/cloud/aiplatform/v1/io.proto"; import "google/cloud/aiplatform/v1/job_state.proto"; import "google/cloud/aiplatform/v1/machine_resources.proto"; import "google/cloud/aiplatform/v1/manual_batch_tuning_parameters.proto"; import "google/cloud/aiplatform/v1/unmanaged_container_model.proto"; import "google/protobuf/struct.proto"; import "google/protobuf/timestamp.proto"; import "google/rpc/status.proto"; option csharp_namespace = "Google.Cloud.AIPlatform.V1"; option go_package = "cloud.google.com/go/aiplatform/apiv1/aiplatformpb;aiplatformpb"; option java_multiple_files = true; option java_outer_classname = "BatchPredictionJobProto"; option java_package = "com.google.cloud.aiplatform.v1"; option php_namespace = "Google\\Cloud\\AIPlatform\\V1"; option ruby_package = "Google::Cloud::AIPlatform::V1"; // A job that uses a // [Model][google.cloud.aiplatform.v1.BatchPredictionJob.model] to produce // predictions on multiple [input // instances][google.cloud.aiplatform.v1.BatchPredictionJob.input_config]. If // predictions for significant portion of the instances fail, the job may finish // without attempting predictions for all remaining instances. message BatchPredictionJob { option (google.api.resource) = { type: "aiplatform.googleapis.com/BatchPredictionJob" pattern: "projects/{project}/locations/{location}/batchPredictionJobs/{batch_prediction_job}" }; // Configures the input to // [BatchPredictionJob][google.cloud.aiplatform.v1.BatchPredictionJob]. See // [Model.supported_input_storage_formats][google.cloud.aiplatform.v1.Model.supported_input_storage_formats] // for Model's supported input formats, and how instances should be expressed // via any of them. message InputConfig { // Required. The source of the input. oneof source { // The Cloud Storage location for the input instances. GcsSource gcs_source = 2; // The BigQuery location of the input table. // The schema of the table should be in the format described by the given // context OpenAPI Schema, if one is provided. The table may contain // additional columns that are not described by the schema, and they will // be ignored. BigQuerySource bigquery_source = 3; } // Required. The format in which instances are given, must be one of the // [Model's][google.cloud.aiplatform.v1.BatchPredictionJob.model] // [supported_input_storage_formats][google.cloud.aiplatform.v1.Model.supported_input_storage_formats]. string instances_format = 1 [(google.api.field_behavior) = REQUIRED]; } // Configuration defining how to transform batch prediction input instances to // the instances that the Model accepts. message InstanceConfig { // The format of the instance that the Model accepts. Vertex AI will // convert compatible // [batch prediction input instance // formats][google.cloud.aiplatform.v1.BatchPredictionJob.InputConfig.instances_format] // to the specified format. // // Supported values are: // // * `object`: Each input is converted to JSON object format. // * For `bigquery`, each row is converted to an object. // * For `jsonl`, each line of the JSONL input must be an object. // * Does not apply to `csv`, `file-list`, `tf-record`, or // `tf-record-gzip`. // // * `array`: Each input is converted to JSON array format. // * For `bigquery`, each row is converted to an array. The order // of columns is determined by the BigQuery column order, unless // [included_fields][google.cloud.aiplatform.v1.BatchPredictionJob.InstanceConfig.included_fields] // is populated. // [included_fields][google.cloud.aiplatform.v1.BatchPredictionJob.InstanceConfig.included_fields] // must be populated for specifying field orders. // * For `jsonl`, if each line of the JSONL input is an object, // [included_fields][google.cloud.aiplatform.v1.BatchPredictionJob.InstanceConfig.included_fields] // must be populated for specifying field orders. // * Does not apply to `csv`, `file-list`, `tf-record`, or // `tf-record-gzip`. // // If not specified, Vertex AI converts the batch prediction input as // follows: // // * For `bigquery` and `csv`, the behavior is the same as `array`. The // order of columns is the same as defined in the file or table, unless // [included_fields][google.cloud.aiplatform.v1.BatchPredictionJob.InstanceConfig.included_fields] // is populated. // * For `jsonl`, the prediction instance format is determined by // each line of the input. // * For `tf-record`/`tf-record-gzip`, each record will be converted to // an object in the format of `{"b64": }`, where `` is // the Base64-encoded string of the content of the record. // * For `file-list`, each file in the list will be converted to an // object in the format of `{"b64": }`, where `` is // the Base64-encoded string of the content of the file. string instance_type = 1; // The name of the field that is considered as a key. // // The values identified by the key field is not included in the transformed // instances that is sent to the Model. This is similar to // specifying this name of the field in // [excluded_fields][google.cloud.aiplatform.v1.BatchPredictionJob.InstanceConfig.excluded_fields]. // In addition, the batch prediction output will not include the instances. // Instead the output will only include the value of the key field, in a // field named `key` in the output: // // * For `jsonl` output format, the output will have a `key` field // instead of the `instance` field. // * For `csv`/`bigquery` output format, the output will have have a `key` // column instead of the instance feature columns. // // The input must be JSONL with objects at each line, CSV, BigQuery // or TfRecord. string key_field = 2; // Fields that will be included in the prediction instance that is // sent to the Model. // // If // [instance_type][google.cloud.aiplatform.v1.BatchPredictionJob.InstanceConfig.instance_type] // is `array`, the order of field names in included_fields also determines // the order of the values in the array. // // When included_fields is populated, // [excluded_fields][google.cloud.aiplatform.v1.BatchPredictionJob.InstanceConfig.excluded_fields] // must be empty. // // The input must be JSONL with objects at each line, BigQuery // or TfRecord. repeated string included_fields = 3; // Fields that will be excluded in the prediction instance that is // sent to the Model. // // Excluded will be attached to the batch prediction output if // [key_field][google.cloud.aiplatform.v1.BatchPredictionJob.InstanceConfig.key_field] // is not specified. // // When excluded_fields is populated, // [included_fields][google.cloud.aiplatform.v1.BatchPredictionJob.InstanceConfig.included_fields] // must be empty. // // The input must be JSONL with objects at each line, BigQuery // or TfRecord. repeated string excluded_fields = 4; } // Configures the output of // [BatchPredictionJob][google.cloud.aiplatform.v1.BatchPredictionJob]. See // [Model.supported_output_storage_formats][google.cloud.aiplatform.v1.Model.supported_output_storage_formats] // for supported output formats, and how predictions are expressed via any of // them. message OutputConfig { // Required. The destination of the output. oneof destination { // The Cloud Storage location of the directory where the output is // to be written to. In the given directory a new directory is created. // Its name is `prediction--`, // where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. // Inside of it files `predictions_0001.`, // `predictions_0002.`, ..., `predictions_N.` // are created where `` depends on chosen // [predictions_format][google.cloud.aiplatform.v1.BatchPredictionJob.OutputConfig.predictions_format], // and N may equal 0001 and depends on the total number of successfully // predicted instances. If the Model has both // [instance][google.cloud.aiplatform.v1.PredictSchemata.instance_schema_uri] // and // [prediction][google.cloud.aiplatform.v1.PredictSchemata.parameters_schema_uri] // schemata defined then each such file contains predictions as per the // [predictions_format][google.cloud.aiplatform.v1.BatchPredictionJob.OutputConfig.predictions_format]. // If prediction for any instance failed (partially or completely), then // an additional `errors_0001.`, `errors_0002.`,..., // `errors_N.` files are created (N depends on total number // of failed predictions). These files contain the failed instances, // as per their schema, followed by an additional `error` field which as // value has [google.rpc.Status][google.rpc.Status] // containing only `code` and `message` fields. GcsDestination gcs_destination = 2; // The BigQuery project or dataset location where the output is to be // written to. If project is provided, a new dataset is created with name // `prediction__` // where is made // BigQuery-dataset-name compatible (for example, most special characters // become underscores), and timestamp is in // YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset // two tables will be created, `predictions`, and `errors`. // If the Model has both // [instance][google.cloud.aiplatform.v1.PredictSchemata.instance_schema_uri] // and // [prediction][google.cloud.aiplatform.v1.PredictSchemata.parameters_schema_uri] // schemata defined then the tables have columns as follows: The // `predictions` table contains instances for which the prediction // succeeded, it has columns as per a concatenation of the Model's // instance and prediction schemata. The `errors` table contains rows for // which the prediction has failed, it has instance columns, as per the // instance schema, followed by a single "errors" column, which as values // has [google.rpc.Status][google.rpc.Status] // represented as a STRUCT, and containing only `code` and `message`. BigQueryDestination bigquery_destination = 3; } // Required. The format in which Vertex AI gives the predictions, must be // one of the [Model's][google.cloud.aiplatform.v1.BatchPredictionJob.model] // [supported_output_storage_formats][google.cloud.aiplatform.v1.Model.supported_output_storage_formats]. string predictions_format = 1 [(google.api.field_behavior) = REQUIRED]; } // Further describes this job's output. // Supplements // [output_config][google.cloud.aiplatform.v1.BatchPredictionJob.output_config]. message OutputInfo { // The output location into which prediction output is written. oneof output_location { // Output only. The full path of the Cloud Storage directory created, into // which the prediction output is written. string gcs_output_directory = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. The path of the BigQuery dataset created, in // `bq://projectId.bqDatasetId` // format, into which the prediction output is written. string bigquery_output_dataset = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; } // Output only. The name of the BigQuery table created, in // `predictions_` // format, into which the prediction output is written. // Can be used by UI to generate the BigQuery output path, for example. string bigquery_output_table = 4 [(google.api.field_behavior) = OUTPUT_ONLY]; } // Output only. Resource name of the BatchPredictionJob. string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; // Required. The user-defined name of this BatchPredictionJob. string display_name = 2 [(google.api.field_behavior) = REQUIRED]; // The name of the Model resource that produces the predictions via this job, // must share the same ancestor Location. // Starting this job has no impact on any existing deployments of the Model // and their resources. // Exactly one of model and unmanaged_container_model must be set. // // The model resource name may contain version id or version alias to specify // the version. // Example: `projects/{project}/locations/{location}/models/{model}@2` // or // `projects/{project}/locations/{location}/models/{model}@golden` // if no version is specified, the default version will be deployed. // // The model resource could also be a publisher model. // Example: `publishers/{publisher}/models/{model}` // or // `projects/{project}/locations/{location}/publishers/{publisher}/models/{model}` string model = 3 [(google.api.resource_reference) = { type: "aiplatform.googleapis.com/Model" }]; // Output only. The version ID of the Model that produces the predictions via // this job. string model_version_id = 30 [(google.api.field_behavior) = OUTPUT_ONLY]; // Contains model information necessary to perform batch prediction without // requiring uploading to model registry. // Exactly one of model and unmanaged_container_model must be set. UnmanagedContainerModel unmanaged_container_model = 28; // Required. Input configuration of the instances on which predictions are // performed. The schema of any single instance may be specified via the // [Model's][google.cloud.aiplatform.v1.BatchPredictionJob.model] // [PredictSchemata's][google.cloud.aiplatform.v1.Model.predict_schemata] // [instance_schema_uri][google.cloud.aiplatform.v1.PredictSchemata.instance_schema_uri]. InputConfig input_config = 4 [(google.api.field_behavior) = REQUIRED]; // Configuration for how to convert batch prediction input instances to the // prediction instances that are sent to the Model. InstanceConfig instance_config = 27; // The parameters that govern the predictions. The schema of the parameters // may be specified via the // [Model's][google.cloud.aiplatform.v1.BatchPredictionJob.model] // [PredictSchemata's][google.cloud.aiplatform.v1.Model.predict_schemata] // [parameters_schema_uri][google.cloud.aiplatform.v1.PredictSchemata.parameters_schema_uri]. google.protobuf.Value model_parameters = 5; // Required. The Configuration specifying where output predictions should // be written. // The schema of any single prediction may be specified as a concatenation // of [Model's][google.cloud.aiplatform.v1.BatchPredictionJob.model] // [PredictSchemata's][google.cloud.aiplatform.v1.Model.predict_schemata] // [instance_schema_uri][google.cloud.aiplatform.v1.PredictSchemata.instance_schema_uri] // and // [prediction_schema_uri][google.cloud.aiplatform.v1.PredictSchemata.prediction_schema_uri]. OutputConfig output_config = 6 [(google.api.field_behavior) = REQUIRED]; // The config of resources used by the Model during the batch prediction. If // the Model // [supports][google.cloud.aiplatform.v1.Model.supported_deployment_resources_types] // DEDICATED_RESOURCES this config may be provided (and the job will use these // resources), if the Model doesn't support AUTOMATIC_RESOURCES, this config // must be provided. BatchDedicatedResources dedicated_resources = 7; // The service account that the DeployedModel's container runs as. If not // specified, a system generated one will be used, which // has minimal permissions and the custom container, if used, may not have // enough permission to access other Google Cloud resources. // // Users deploying the Model must have the `iam.serviceAccounts.actAs` // permission on this service account. string service_account = 29; // Immutable. Parameters configuring the batch behavior. Currently only // applicable when // [dedicated_resources][google.cloud.aiplatform.v1.BatchPredictionJob.dedicated_resources] // are used (in other cases Vertex AI does the tuning itself). ManualBatchTuningParameters manual_batch_tuning_parameters = 8 [(google.api.field_behavior) = IMMUTABLE]; // Generate explanation with the batch prediction results. // // When set to `true`, the batch prediction output changes based on the // `predictions_format` field of the // [BatchPredictionJob.output_config][google.cloud.aiplatform.v1.BatchPredictionJob.output_config] // object: // // * `bigquery`: output includes a column named `explanation`. The value // is a struct that conforms to the // [Explanation][google.cloud.aiplatform.v1.Explanation] object. // * `jsonl`: The JSON objects on each line include an additional entry // keyed `explanation`. The value of the entry is a JSON object that // conforms to the [Explanation][google.cloud.aiplatform.v1.Explanation] // object. // * `csv`: Generating explanations for CSV format is not supported. // // If this field is set to true, either the // [Model.explanation_spec][google.cloud.aiplatform.v1.Model.explanation_spec] // or // [explanation_spec][google.cloud.aiplatform.v1.BatchPredictionJob.explanation_spec] // must be populated. bool generate_explanation = 23; // Explanation configuration for this BatchPredictionJob. Can be // specified only if // [generate_explanation][google.cloud.aiplatform.v1.BatchPredictionJob.generate_explanation] // is set to `true`. // // This value overrides the value of // [Model.explanation_spec][google.cloud.aiplatform.v1.Model.explanation_spec]. // All fields of // [explanation_spec][google.cloud.aiplatform.v1.BatchPredictionJob.explanation_spec] // are optional in the request. If a field of the // [explanation_spec][google.cloud.aiplatform.v1.BatchPredictionJob.explanation_spec] // object is not populated, the corresponding field of the // [Model.explanation_spec][google.cloud.aiplatform.v1.Model.explanation_spec] // object is inherited. ExplanationSpec explanation_spec = 25; // Output only. Information further describing the output of this job. OutputInfo output_info = 9 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. The detailed state of the job. JobState state = 10 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Only populated when the job's state is JOB_STATE_FAILED or // JOB_STATE_CANCELLED. google.rpc.Status error = 11 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Partial failures encountered. // For example, single files that can't be read. // This field never exceeds 20 entries. // Status details fields contain standard Google Cloud error details. repeated google.rpc.Status partial_failures = 12 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Information about resources that had been consumed by this // job. Provided in real time at best effort basis, as well as a final value // once the job completes. // // Note: This field currently may be not populated for batch predictions that // use AutoML Models. ResourcesConsumed resources_consumed = 13 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Statistics on completed and failed prediction instances. CompletionStats completion_stats = 14 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Time when the BatchPredictionJob was created. google.protobuf.Timestamp create_time = 15 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Time when the BatchPredictionJob for the first time entered // the `JOB_STATE_RUNNING` state. google.protobuf.Timestamp start_time = 16 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Time when the BatchPredictionJob entered any of the following // states: `JOB_STATE_SUCCEEDED`, `JOB_STATE_FAILED`, `JOB_STATE_CANCELLED`. google.protobuf.Timestamp end_time = 17 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Time when the BatchPredictionJob was most recently updated. google.protobuf.Timestamp update_time = 18 [(google.api.field_behavior) = OUTPUT_ONLY]; // The labels with user-defined metadata to organize BatchPredictionJobs. // // Label keys and values can be no longer than 64 characters // (Unicode codepoints), can only contain lowercase letters, numeric // characters, underscores and dashes. International characters are allowed. // // See https://goo.gl/xmQnxf for more information and examples of labels. map labels = 19; // Customer-managed encryption key options for a BatchPredictionJob. If this // is set, then all resources created by the BatchPredictionJob will be // encrypted with the provided encryption key. EncryptionSpec encryption_spec = 24; // For custom-trained Models and AutoML Tabular Models, the container of the // DeployedModel instances will send `stderr` and `stdout` streams to // Cloud Logging by default. Please note that the logs incur cost, // which are subject to [Cloud Logging // pricing](https://cloud.google.com/logging/pricing). // // User can disable container logging by setting this flag to true. bool disable_container_logging = 34; }