// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.cloud.bigquery.v2;

import "google/api/annotations.proto";
import "google/api/client.proto";
import "google/api/field_behavior.proto";
import "google/cloud/bigquery/v2/encryption_config.proto";
import "google/cloud/bigquery/v2/model_reference.proto";
import "google/cloud/bigquery/v2/standard_sql.proto";
import "google/cloud/bigquery/v2/table_reference.proto";
import "google/protobuf/empty.proto";
import "google/protobuf/timestamp.proto";
import "google/protobuf/wrappers.proto";

option go_package = "cloud.google.com/go/bigquery/apiv2/bigquerypb;bigquerypb";
option java_outer_classname = "ModelProto";
option java_package = "com.google.cloud.bigquery.v2";

// This is an experimental RPC service definition for the BigQuery
// Model Service.
//
// It should not be relied on for production use cases at this time.
service ModelService {
  option (google.api.default_host) = "bigquery.googleapis.com";
  option (google.api.oauth_scopes) =
      "https://www.googleapis.com/auth/bigquery,"
      "https://www.googleapis.com/auth/cloud-platform,"
      "https://www.googleapis.com/auth/cloud-platform.read-only";

  // Gets the specified model resource by model ID.
  rpc GetModel(GetModelRequest) returns (Model) {
    option (google.api.http) = {
      get: "/bigquery/v2/projects/{project_id=*}/datasets/{dataset_id=*}/models/{model_id=*}"
    };
    option (google.api.method_signature) = "project_id,dataset_id,model_id";
  }

  // Lists all models in the specified dataset. Requires the READER dataset
  // role. After retrieving the list of models, you can get information about a
  // particular model by calling the models.get method.
  rpc ListModels(ListModelsRequest) returns (ListModelsResponse) {
    option (google.api.http) = {
      get: "/bigquery/v2/projects/{project_id=*}/datasets/{dataset_id=*}/models"
    };
    option (google.api.method_signature) = "project_id,dataset_id,max_results";
  }

  // Patch specific fields in the specified model.
  rpc PatchModel(PatchModelRequest) returns (Model) {
    option (google.api.http) = {
      patch: "/bigquery/v2/projects/{project_id=*}/datasets/{dataset_id=*}/models/{model_id=*}"
      body: "model"
    };
    option (google.api.method_signature) =
        "project_id,dataset_id,model_id,model";
  }

  // Deletes the model specified by modelId from the dataset.
  rpc DeleteModel(DeleteModelRequest) returns (google.protobuf.Empty) {
    option (google.api.http) = {
      delete: "/bigquery/v2/projects/{project_id=*}/datasets/{dataset_id=*}/models/{model_id=*}"
    };
    option (google.api.method_signature) = "project_id,dataset_id,model_id";
  }
}

// Remote Model Info
message RemoteModelInfo {
  // Supported service type for remote model.
  enum RemoteServiceType {
    // Unspecified remote service type.
    REMOTE_SERVICE_TYPE_UNSPECIFIED = 0;

    // V3 Cloud AI Translation API. See more details at [Cloud Translation API]
    // (https://cloud.google.com/translate/docs/reference/rest).
    CLOUD_AI_TRANSLATE_V3 = 1;

    // V1 Cloud AI Vision API See more details at [Cloud Vision API]
    // (https://cloud.google.com/vision/docs/reference/rest).
    CLOUD_AI_VISION_V1 = 2;

    // V1 Cloud AI Natural Language API. See more details at [REST Resource:
    // documents](https://cloud.google.com/natural-language/docs/reference/rest/v1/documents).
    CLOUD_AI_NATURAL_LANGUAGE_V1 = 3;

    // V2 Speech-to-Text API. See more details at [Google Cloud Speech-to-Text
    // V2 API](https://cloud.google.com/speech-to-text/v2/docs)
    CLOUD_AI_SPEECH_TO_TEXT_V2 = 7;
  }

  // Remote services are services outside of BigQuery used by remote models for
  // predictions. A remote service is backed by either an arbitrary endpoint or
  // a selected remote service type, but not both.
  oneof remote_service {
    // Output only. The endpoint for remote model.
    string endpoint = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

    // Output only. The remote service type for remote model.
    RemoteServiceType remote_service_type = 2
        [(google.api.field_behavior) = OUTPUT_ONLY];
  }

  // Output only. Fully qualified name of the user-provided connection object of
  // the remote model. Format:
  // ```"projects/{project_id}/locations/{location_id}/connections/{connection_id}"```
  string connection = 3 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Max number of rows in each batch sent to the remote service.
  // If unset, the number of rows in each batch is set dynamically.
  int64 max_batching_rows = 4 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. The model version for LLM.
  string remote_model_version = 5 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. The name of the speech recognizer to use for speech
  // recognition. The expected format is
  // `projects/{project}/locations/{location}/recognizers/{recognizer}`.
  // Customers can specify this field at model creation. If not specified, a
  // default recognizer `projects/{model
  // project}/locations/global/recognizers/_` will be used. See more details at
  // [recognizers](https://cloud.google.com/speech-to-text/v2/docs/reference/rest/v2/projects.locations.recognizers)
  string speech_recognizer = 7 [(google.api.field_behavior) = OUTPUT_ONLY];
}

// Information about a single transform column.
message TransformColumn {
  // Output only. Name of the column.
  string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Data type of the column after the transform.
  StandardSqlDataType type = 2 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. The SQL expression used in the column transform.
  string transform_sql = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
}

message Model {
  // Indicates the type of the Model.
  enum ModelType {
    // Default value.
    MODEL_TYPE_UNSPECIFIED = 0;

    // Linear regression model.
    LINEAR_REGRESSION = 1;

    // Logistic regression based classification model.
    LOGISTIC_REGRESSION = 2;

    // K-means clustering model.
    KMEANS = 3;

    // Matrix factorization model.
    MATRIX_FACTORIZATION = 4;

    // DNN classifier model.
    DNN_CLASSIFIER = 5;

    // An imported TensorFlow model.
    TENSORFLOW = 6;

    // DNN regressor model.
    DNN_REGRESSOR = 7;

    // An imported XGBoost model.
    XGBOOST = 8;

    // Boosted tree regressor model.
    BOOSTED_TREE_REGRESSOR = 9;

    // Boosted tree classifier model.
    BOOSTED_TREE_CLASSIFIER = 10;

    // ARIMA model.
    ARIMA = 11;

    // AutoML Tables regression model.
    AUTOML_REGRESSOR = 12;

    // AutoML Tables classification model.
    AUTOML_CLASSIFIER = 13;

    // Prinpical Component Analysis model.
    PCA = 14;

    // Wide-and-deep classifier model.
    DNN_LINEAR_COMBINED_CLASSIFIER = 16;

    // Wide-and-deep regressor model.
    DNN_LINEAR_COMBINED_REGRESSOR = 17;

    // Autoencoder model.
    AUTOENCODER = 18;

    // New name for the ARIMA model.
    ARIMA_PLUS = 19;

    // ARIMA with external regressors.
    ARIMA_PLUS_XREG = 23;

    // Random forest regressor model.
    RANDOM_FOREST_REGRESSOR = 24;

    // Random forest classifier model.
    RANDOM_FOREST_CLASSIFIER = 25;

    // An imported TensorFlow Lite model.
    TENSORFLOW_LITE = 26;

    // An imported ONNX model.
    ONNX = 28;

    // Model to capture the columns and logic in the TRANSFORM clause along with
    // statistics useful for ML analytic functions.
    TRANSFORM_ONLY = 29;
  }

  // Loss metric to evaluate model training performance.
  enum LossType {
    // Default value.
    LOSS_TYPE_UNSPECIFIED = 0;

    // Mean squared loss, used for linear regression.
    MEAN_SQUARED_LOSS = 1;

    // Mean log loss, used for logistic regression.
    MEAN_LOG_LOSS = 2;
  }

  // Distance metric used to compute the distance between two points.
  enum DistanceType {
    // Default value.
    DISTANCE_TYPE_UNSPECIFIED = 0;

    // Eculidean distance.
    EUCLIDEAN = 1;

    // Cosine distance.
    COSINE = 2;
  }

  // Indicates the method to split input data into multiple tables.
  enum DataSplitMethod {
    // Default value.
    DATA_SPLIT_METHOD_UNSPECIFIED = 0;

    // Splits data randomly.
    RANDOM = 1;

    // Splits data with the user provided tags.
    CUSTOM = 2;

    // Splits data sequentially.
    SEQUENTIAL = 3;

    // Data split will be skipped.
    NO_SPLIT = 4;

    // Splits data automatically: Uses NO_SPLIT if the data size is small.
    // Otherwise uses RANDOM.
    AUTO_SPLIT = 5;
  }

  // Type of supported data frequency for time series forecasting models.
  enum DataFrequency {
    // Default value.
    DATA_FREQUENCY_UNSPECIFIED = 0;

    // Automatically inferred from timestamps.
    AUTO_FREQUENCY = 1;

    // Yearly data.
    YEARLY = 2;

    // Quarterly data.
    QUARTERLY = 3;

    // Monthly data.
    MONTHLY = 4;

    // Weekly data.
    WEEKLY = 5;

    // Daily data.
    DAILY = 6;

    // Hourly data.
    HOURLY = 7;

    // Per-minute data.
    PER_MINUTE = 8;
  }

  // Type of supported holiday regions for time series forecasting models.
  enum HolidayRegion {
    // Holiday region unspecified.
    HOLIDAY_REGION_UNSPECIFIED = 0;

    // Global.
    GLOBAL = 1;

    // North America.
    NA = 2;

    // Japan and Asia Pacific: Korea, Greater China, India, Australia, and New
    // Zealand.
    JAPAC = 3;

    // Europe, the Middle East and Africa.
    EMEA = 4;

    // Latin America and the Caribbean.
    LAC = 5;

    // United Arab Emirates
    AE = 6;

    // Argentina
    AR = 7;

    // Austria
    AT = 8;

    // Australia
    AU = 9;

    // Belgium
    BE = 10;

    // Brazil
    BR = 11;

    // Canada
    CA = 12;

    // Switzerland
    CH = 13;

    // Chile
    CL = 14;

    // China
    CN = 15;

    // Colombia
    CO = 16;

    // Czechoslovakia
    CS = 17;

    // Czech Republic
    CZ = 18;

    // Germany
    DE = 19;

    // Denmark
    DK = 20;

    // Algeria
    DZ = 21;

    // Ecuador
    EC = 22;

    // Estonia
    EE = 23;

    // Egypt
    EG = 24;

    // Spain
    ES = 25;

    // Finland
    FI = 26;

    // France
    FR = 27;

    // Great Britain (United Kingdom)
    GB = 28;

    // Greece
    GR = 29;

    // Hong Kong
    HK = 30;

    // Hungary
    HU = 31;

    // Indonesia
    ID = 32;

    // Ireland
    IE = 33;

    // Israel
    IL = 34;

    // India
    IN = 35;

    // Iran
    IR = 36;

    // Italy
    IT = 37;

    // Japan
    JP = 38;

    // Korea (South)
    KR = 39;

    // Latvia
    LV = 40;

    // Morocco
    MA = 41;

    // Mexico
    MX = 42;

    // Malaysia
    MY = 43;

    // Nigeria
    NG = 44;

    // Netherlands
    NL = 45;

    // Norway
    NO = 46;

    // New Zealand
    NZ = 47;

    // Peru
    PE = 48;

    // Philippines
    PH = 49;

    // Pakistan
    PK = 50;

    // Poland
    PL = 51;

    // Portugal
    PT = 52;

    // Romania
    RO = 53;

    // Serbia
    RS = 54;

    // Russian Federation
    RU = 55;

    // Saudi Arabia
    SA = 56;

    // Sweden
    SE = 57;

    // Singapore
    SG = 58;

    // Slovenia
    SI = 59;

    // Slovakia
    SK = 60;

    // Thailand
    TH = 61;

    // Turkey
    TR = 62;

    // Taiwan
    TW = 63;

    // Ukraine
    UA = 64;

    // United States
    US = 65;

    // Venezuela
    VE = 66;

    // Viet Nam
    VN = 67;

    // South Africa
    ZA = 68;
  }

  // Enums for seasonal period.
  message SeasonalPeriod {
    // Seasonal period type.
    enum SeasonalPeriodType {
      // Unspecified seasonal period.
      SEASONAL_PERIOD_TYPE_UNSPECIFIED = 0;

      // No seasonality
      NO_SEASONALITY = 1;

      // Daily period, 24 hours.
      DAILY = 2;

      // Weekly period, 7 days.
      WEEKLY = 3;

      // Monthly period, 30 days or irregular.
      MONTHLY = 4;

      // Quarterly period, 90 days or irregular.
      QUARTERLY = 5;

      // Yearly period, 365 days or irregular.
      YEARLY = 6;
    }
  }

  // Enums for color space, used for processing images in Object Table.
  // See more details at
  // https://www.tensorflow.org/io/tutorials/colorspace.
  enum ColorSpace {
    // Unspecified color space
    COLOR_SPACE_UNSPECIFIED = 0;

    // RGB
    RGB = 1;

    // HSV
    HSV = 2;

    // YIQ
    YIQ = 3;

    // YUV
    YUV = 4;

    // GRAYSCALE
    GRAYSCALE = 5;
  }

  // Enums for kmeans model type.
  message KmeansEnums {
    // Indicates the method used to initialize the centroids for KMeans
    // clustering algorithm.
    enum KmeansInitializationMethod {
      // Unspecified initialization method.
      KMEANS_INITIALIZATION_METHOD_UNSPECIFIED = 0;

      // Initializes the centroids randomly.
      RANDOM = 1;

      // Initializes the centroids using data specified in
      // kmeans_initialization_column.
      CUSTOM = 2;

      // Initializes with kmeans++.
      KMEANS_PLUS_PLUS = 3;
    }
  }

  // Enums for XGBoost model type.
  message BoostedTreeOptionEnums {
    // Booster types supported. Refer to booster parameter in XGBoost.
    enum BoosterType {
      // Unspecified booster type.
      BOOSTER_TYPE_UNSPECIFIED = 0;

      // Gbtree booster.
      GBTREE = 1;

      // Dart booster.
      DART = 2;
    }

    // Type of normalization algorithm for boosted tree models using dart
    // booster. Refer to normalize_type in XGBoost.
    enum DartNormalizeType {
      // Unspecified dart normalize type.
      DART_NORMALIZE_TYPE_UNSPECIFIED = 0;

      // New trees have the same weight of each of dropped trees.
      TREE = 1;

      // New trees have the same weight of sum of dropped trees.
      FOREST = 2;
    }

    // Tree construction algorithm used in boosted tree models.
    // Refer to tree_method in XGBoost.
    enum TreeMethod {
      // Unspecified tree method.
      TREE_METHOD_UNSPECIFIED = 0;

      // Use heuristic to choose the fastest method.
      AUTO = 1;

      // Exact greedy algorithm.
      EXACT = 2;

      // Approximate greedy algorithm using quantile sketch and gradient
      // histogram.
      APPROX = 3;

      // Fast histogram optimized approximate greedy algorithm.
      HIST = 4;
    }
  }

  // Enums for hyperparameter tuning.
  message HparamTuningEnums {
    // Available evaluation metrics used as hyperparameter tuning objectives.
    enum HparamTuningObjective {
      // Unspecified evaluation metric.
      HPARAM_TUNING_OBJECTIVE_UNSPECIFIED = 0;

      // Mean absolute error.
      // mean_absolute_error = AVG(ABS(label - predicted))
      MEAN_ABSOLUTE_ERROR = 1;

      // Mean squared error.
      // mean_squared_error = AVG(POW(label - predicted, 2))
      MEAN_SQUARED_ERROR = 2;

      // Mean squared log error.
      // mean_squared_log_error = AVG(POW(LN(1 + label) - LN(1 + predicted), 2))
      MEAN_SQUARED_LOG_ERROR = 3;

      // Mean absolute error.
      // median_absolute_error = APPROX_QUANTILES(absolute_error, 2)[OFFSET(1)]
      MEDIAN_ABSOLUTE_ERROR = 4;

      // R^2 score. This corresponds to r2_score in ML.EVALUATE.
      // r_squared = 1 - SUM(squared_error)/(COUNT(label)*VAR_POP(label))
      R_SQUARED = 5;

      // Explained variance.
      // explained_variance = 1 - VAR_POP(label_error)/VAR_POP(label)
      EXPLAINED_VARIANCE = 6;

      // Precision is the fraction of actual positive predictions that had
      // positive actual labels. For multiclass this is a macro-averaged metric
      // treating each class as a binary classifier.
      PRECISION = 7;

      // Recall is the fraction of actual positive labels that were given a
      // positive prediction. For multiclass this is a macro-averaged metric.
      RECALL = 8;

      // Accuracy is the fraction of predictions given the correct label. For
      // multiclass this is a globally micro-averaged metric.
      ACCURACY = 9;

      // The F1 score is an average of recall and precision. For multiclass this
      // is a macro-averaged metric.
      F1_SCORE = 10;

      // Logorithmic Loss. For multiclass this is a macro-averaged metric.
      LOG_LOSS = 11;

      // Area Under an ROC Curve. For multiclass this is a macro-averaged
      // metric.
      ROC_AUC = 12;

      // Davies-Bouldin Index.
      DAVIES_BOULDIN_INDEX = 13;

      // Mean Average Precision.
      MEAN_AVERAGE_PRECISION = 14;

      // Normalized Discounted Cumulative Gain.
      NORMALIZED_DISCOUNTED_CUMULATIVE_GAIN = 15;

      // Average Rank.
      AVERAGE_RANK = 16;
    }
  }

  // Indicates the learning rate optimization strategy to use.
  enum LearnRateStrategy {
    // Default value.
    LEARN_RATE_STRATEGY_UNSPECIFIED = 0;

    // Use line search to determine learning rate.
    LINE_SEARCH = 1;

    // Use a constant learning rate.
    CONSTANT = 2;
  }

  // Indicates the optimization strategy used for training.
  enum OptimizationStrategy {
    // Default value.
    OPTIMIZATION_STRATEGY_UNSPECIFIED = 0;

    // Uses an iterative batch gradient descent algorithm.
    BATCH_GRADIENT_DESCENT = 1;

    // Uses a normal equation to solve linear regression problem.
    NORMAL_EQUATION = 2;
  }

  // Indicates the training algorithm to use for matrix factorization models.
  enum FeedbackType {
    // Default value.
    FEEDBACK_TYPE_UNSPECIFIED = 0;

    // Use weighted-als for implicit feedback problems.
    IMPLICIT = 1;

    // Use nonweighted-als for explicit feedback problems.
    EXPLICIT = 2;
  }

  // Evaluation metrics for regression and explicit feedback type matrix
  // factorization models.
  message RegressionMetrics {
    // Mean absolute error.
    google.protobuf.DoubleValue mean_absolute_error = 1;

    // Mean squared error.
    google.protobuf.DoubleValue mean_squared_error = 2;

    // Mean squared log error.
    google.protobuf.DoubleValue mean_squared_log_error = 3;

    // Median absolute error.
    google.protobuf.DoubleValue median_absolute_error = 4;

    // R^2 score. This corresponds to r2_score in ML.EVALUATE.
    google.protobuf.DoubleValue r_squared = 5;
  }

  // Aggregate metrics for classification/classifier models. For multi-class
  // models, the metrics are either macro-averaged or micro-averaged. When
  // macro-averaged, the metrics are calculated for each label and then an
  // unweighted average is taken of those values. When micro-averaged, the
  // metric is calculated globally by counting the total number of correctly
  // predicted rows.
  message AggregateClassificationMetrics {
    // Precision is the fraction of actual positive predictions that had
    // positive actual labels. For multiclass this is a macro-averaged
    // metric treating each class as a binary classifier.
    google.protobuf.DoubleValue precision = 1;

    // Recall is the fraction of actual positive labels that were given a
    // positive prediction. For multiclass this is a macro-averaged metric.
    google.protobuf.DoubleValue recall = 2;

    // Accuracy is the fraction of predictions given the correct label. For
    // multiclass this is a micro-averaged metric.
    google.protobuf.DoubleValue accuracy = 3;

    // Threshold at which the metrics are computed. For binary
    // classification models this is the positive class threshold.
    // For multi-class classfication models this is the confidence
    // threshold.
    google.protobuf.DoubleValue threshold = 4;

    // The F1 score is an average of recall and precision. For multiclass
    // this is a macro-averaged metric.
    google.protobuf.DoubleValue f1_score = 5;

    // Logarithmic Loss. For multiclass this is a macro-averaged metric.
    google.protobuf.DoubleValue log_loss = 6;

    // Area Under a ROC Curve. For multiclass this is a macro-averaged
    // metric.
    google.protobuf.DoubleValue roc_auc = 7;
  }

  // Evaluation metrics for binary classification/classifier models.
  message BinaryClassificationMetrics {
    // Confusion matrix for binary classification models.
    message BinaryConfusionMatrix {
      // Threshold value used when computing each of the following metric.
      google.protobuf.DoubleValue positive_class_threshold = 1;

      // Number of true samples predicted as true.
      google.protobuf.Int64Value true_positives = 2;

      // Number of false samples predicted as true.
      google.protobuf.Int64Value false_positives = 3;

      // Number of true samples predicted as false.
      google.protobuf.Int64Value true_negatives = 4;

      // Number of false samples predicted as false.
      google.protobuf.Int64Value false_negatives = 5;

      // The fraction of actual positive predictions that had positive actual
      // labels.
      google.protobuf.DoubleValue precision = 6;

      // The fraction of actual positive labels that were given a positive
      // prediction.
      google.protobuf.DoubleValue recall = 7;

      // The equally weighted average of recall and precision.
      google.protobuf.DoubleValue f1_score = 8;

      // The fraction of predictions given the correct label.
      google.protobuf.DoubleValue accuracy = 9;
    }

    // Aggregate classification metrics.
    AggregateClassificationMetrics aggregate_classification_metrics = 1;

    // Binary confusion matrix at multiple thresholds.
    repeated BinaryConfusionMatrix binary_confusion_matrix_list = 2;

    // Label representing the positive class.
    string positive_label = 3;

    // Label representing the negative class.
    string negative_label = 4;
  }

  // Evaluation metrics for multi-class classification/classifier models.
  message MultiClassClassificationMetrics {
    // Confusion matrix for multi-class classification models.
    message ConfusionMatrix {
      // A single entry in the confusion matrix.
      message Entry {
        // The predicted label. For confidence_threshold > 0, we will
        // also add an entry indicating the number of items under the
        // confidence threshold.
        string predicted_label = 1;

        // Number of items being predicted as this label.
        google.protobuf.Int64Value item_count = 2;
      }

      // A single row in the confusion matrix.
      message Row {
        // The original label of this row.
        string actual_label = 1;

        // Info describing predicted label distribution.
        repeated Entry entries = 2;
      }

      // Confidence threshold used when computing the entries of the
      // confusion matrix.
      google.protobuf.DoubleValue confidence_threshold = 1;

      // One row per actual label.
      repeated Row rows = 2;
    }

    // Aggregate classification metrics.
    AggregateClassificationMetrics aggregate_classification_metrics = 1;

    // Confusion matrix at different thresholds.
    repeated ConfusionMatrix confusion_matrix_list = 2;
  }

  // Evaluation metrics for clustering models.
  message ClusteringMetrics {
    // Message containing the information about one cluster.
    message Cluster {
      // Representative value of a single feature within the cluster.
      message FeatureValue {
        // Representative value of a categorical feature.
        message CategoricalValue {
          // Represents the count of a single category within the cluster.
          message CategoryCount {
            // The name of category.
            string category = 1;

            // The count of training samples matching the category within the
            // cluster.
            google.protobuf.Int64Value count = 2;
          }

          // Counts of all categories for the categorical feature. If there are
          // more than ten categories, we return top ten (by count) and return
          // one more CategoryCount with category "_OTHER_" and count as
          // aggregate counts of remaining categories.
          repeated CategoryCount category_counts = 1;
        }

        // The feature column name.
        string feature_column = 1;

        // Value.
        oneof value {
          // The numerical feature value. This is the centroid value for this
          // feature.
          google.protobuf.DoubleValue numerical_value = 2;

          // The categorical feature value.
          CategoricalValue categorical_value = 3;
        }
      }

      // Centroid id.
      int64 centroid_id = 1;

      // Values of highly variant features for this cluster.
      repeated FeatureValue feature_values = 2;

      // Count of training data rows that were assigned to this cluster.
      google.protobuf.Int64Value count = 3;
    }

    // Davies-Bouldin index.
    google.protobuf.DoubleValue davies_bouldin_index = 1;

    // Mean of squared distances between each sample to its cluster centroid.
    google.protobuf.DoubleValue mean_squared_distance = 2;

    // Information for all clusters.
    repeated Cluster clusters = 3;
  }

  // Evaluation metrics used by weighted-ALS models specified by
  // feedback_type=implicit.
  message RankingMetrics {
    // Calculates a precision per user for all the items by ranking them and
    // then averages all the precisions across all the users.
    google.protobuf.DoubleValue mean_average_precision = 1;

    // Similar to the mean squared error computed in regression and explicit
    // recommendation models except instead of computing the rating directly,
    // the output from evaluate is computed against a preference which is 1 or 0
    // depending on if the rating exists or not.
    google.protobuf.DoubleValue mean_squared_error = 2;

    // A metric to determine the goodness of a ranking calculated from the
    // predicted confidence by comparing it to an ideal rank measured by the
    // original ratings.
    google.protobuf.DoubleValue normalized_discounted_cumulative_gain = 3;

    // Determines the goodness of a ranking by computing the percentile rank
    // from the predicted confidence and dividing it by the original rank.
    google.protobuf.DoubleValue average_rank = 4;
  }

  // Model evaluation metrics for ARIMA forecasting models.
  message ArimaForecastingMetrics {
    // Model evaluation metrics for a single ARIMA forecasting model.
    message ArimaSingleModelForecastingMetrics {
      // Non-seasonal order.
      ArimaOrder non_seasonal_order = 1;

      // Arima fitting metrics.
      ArimaFittingMetrics arima_fitting_metrics = 2;

      // Is arima model fitted with drift or not. It is always false when d
      // is not 1.
      google.protobuf.BoolValue has_drift = 3;

      // The time_series_id value for this time series. It will be one of
      // the unique values from the time_series_id_column specified during
      // ARIMA model training. Only present when time_series_id_column
      // training option was used.
      string time_series_id = 4;

      // The tuple of time_series_ids identifying this time series. It will
      // be one of the unique tuples of values present in the
      // time_series_id_columns specified during ARIMA model training. Only
      // present when time_series_id_columns training option was used and
      // the order of values here are same as the order of
      // time_series_id_columns.
      repeated string time_series_ids = 9;

      // Seasonal periods. Repeated because multiple periods are supported
      // for one time series.
      repeated SeasonalPeriod.SeasonalPeriodType seasonal_periods = 5;

      // If true, holiday_effect is a part of time series decomposition result.
      google.protobuf.BoolValue has_holiday_effect = 6;

      // If true, spikes_and_dips is a part of time series decomposition result.
      google.protobuf.BoolValue has_spikes_and_dips = 7;

      // If true, step_changes is a part of time series decomposition result.
      google.protobuf.BoolValue has_step_changes = 8;
    }

    // Repeated as there can be many metric sets (one for each model) in
    // auto-arima and the large-scale case.
    repeated ArimaSingleModelForecastingMetrics
        arima_single_model_forecasting_metrics = 6;
  }

  // Model evaluation metrics for dimensionality reduction models.
  message DimensionalityReductionMetrics {
    // Total percentage of variance explained by the selected principal
    // components.
    google.protobuf.DoubleValue total_explained_variance_ratio = 1;
  }

  // Evaluation metrics of a model. These are either computed on all training
  // data or just the eval data based on whether eval data was used during
  // training. These are not present for imported models.
  message EvaluationMetrics {
    // Metrics.
    oneof metrics {
      // Populated for regression models and explicit feedback type matrix
      // factorization models.
      RegressionMetrics regression_metrics = 1;

      // Populated for binary classification/classifier models.
      BinaryClassificationMetrics binary_classification_metrics = 2;

      // Populated for multi-class classification/classifier models.
      MultiClassClassificationMetrics multi_class_classification_metrics = 3;

      // Populated for clustering models.
      ClusteringMetrics clustering_metrics = 4;

      // Populated for implicit feedback type matrix factorization models.
      RankingMetrics ranking_metrics = 5;

      // Populated for ARIMA models.
      ArimaForecastingMetrics arima_forecasting_metrics = 6;

      // Evaluation metrics when the model is a dimensionality reduction model,
      // which currently includes PCA.
      DimensionalityReductionMetrics dimensionality_reduction_metrics = 7;
    }
  }

  // Data split result. This contains references to the training and evaluation
  // data tables that were used to train the model.
  message DataSplitResult {
    // Table reference of the training data after split.
    TableReference training_table = 1;

    // Table reference of the evaluation data after split.
    TableReference evaluation_table = 2;

    // Table reference of the test data after split.
    TableReference test_table = 3;
  }

  // Arima order, can be used for both non-seasonal and seasonal parts.
  message ArimaOrder {
    // Order of the autoregressive part.
    google.protobuf.Int64Value p = 1;

    // Order of the differencing part.
    google.protobuf.Int64Value d = 2;

    // Order of the moving-average part.
    google.protobuf.Int64Value q = 3;
  }

  // ARIMA model fitting metrics.
  message ArimaFittingMetrics {
    // Log-likelihood.
    google.protobuf.DoubleValue log_likelihood = 1;

    // AIC.
    google.protobuf.DoubleValue aic = 2;

    // Variance.
    google.protobuf.DoubleValue variance = 3;
  }

  // Global explanations containing the top most important features
  // after training.
  message GlobalExplanation {
    // Explanation for a single feature.
    message Explanation {
      // The full feature name. For non-numerical features, will be formatted
      // like `<column_name>.<encoded_feature_name>`. Overall size of feature
      // name will always be truncated to first 120 characters.
      string feature_name = 1;

      // Attribution of feature.
      google.protobuf.DoubleValue attribution = 2;
    }

    // A list of the top global explanations. Sorted by absolute value of
    // attribution in descending order.
    repeated Explanation explanations = 1;

    // Class label for this set of global explanations. Will be empty/null for
    // binary logistic and linear regression models. Sorted alphabetically in
    // descending order.
    string class_label = 2;
  }

  // Encoding methods for categorical features.
  message CategoryEncodingMethod {
    // Supported encoding methods for categorical features.
    enum EncodingMethod {
      // Unspecified encoding method.
      ENCODING_METHOD_UNSPECIFIED = 0;

      // Applies one-hot encoding.
      ONE_HOT_ENCODING = 1;

      // Applies label encoding.
      LABEL_ENCODING = 2;

      // Applies dummy encoding.
      DUMMY_ENCODING = 3;
    }
  }

  // PCA solver options.
  message PcaSolverOptionEnums {
    // Enums for supported PCA solvers.
    enum PcaSolver {
      // Default value.
      UNSPECIFIED = 0;

      // Full eigen-decoposition.
      FULL = 1;

      // Randomized SVD.
      RANDOMIZED = 2;

      // Auto.
      AUTO = 3;
    }
  }

  // Model registry options.
  message ModelRegistryOptionEnums {
    // Enums for supported model registries.
    enum ModelRegistry {
      // Default value.
      MODEL_REGISTRY_UNSPECIFIED = 0;

      // Vertex AI.
      VERTEX_AI = 1;
    }
  }

  // Information about a single training query run for the model.
  message TrainingRun {
    // Options used in model training.
    message TrainingOptions {
      // The maximum number of iterations in training. Used only for iterative
      // training algorithms.
      int64 max_iterations = 1;

      // Type of loss function used during training run.
      LossType loss_type = 2;

      // Learning rate in training. Used only for iterative training algorithms.
      double learn_rate = 3;

      // L1 regularization coefficient.
      google.protobuf.DoubleValue l1_regularization = 4;

      // L2 regularization coefficient.
      google.protobuf.DoubleValue l2_regularization = 5;

      // When early_stop is true, stops training when accuracy improvement is
      // less than 'min_relative_progress'. Used only for iterative training
      // algorithms.
      google.protobuf.DoubleValue min_relative_progress = 6;

      // Whether to train a model from the last checkpoint.
      google.protobuf.BoolValue warm_start = 7;

      // Whether to stop early when the loss doesn't improve significantly
      // any more (compared to min_relative_progress). Used only for iterative
      // training algorithms.
      google.protobuf.BoolValue early_stop = 8;

      // Name of input label columns in training data.
      repeated string input_label_columns = 9;

      // The data split type for training and evaluation, e.g. RANDOM.
      DataSplitMethod data_split_method = 10;

      // The fraction of evaluation data over the whole input data. The rest
      // of data will be used as training data. The format should be double.
      // Accurate to two decimal places.
      // Default value is 0.2.
      double data_split_eval_fraction = 11;

      // The column to split data with. This column won't be used as a
      // feature.
      // 1. When data_split_method is CUSTOM, the corresponding column should
      // be boolean. The rows with true value tag are eval data, and the false
      // are training data.
      // 2. When data_split_method is SEQ, the first DATA_SPLIT_EVAL_FRACTION
      // rows (from smallest to largest) in the corresponding column are used
      // as training data, and the rest are eval data. It respects the order
      // in Orderable data types:
      // https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data-type-properties
      string data_split_column = 12;

      // The strategy to determine learn rate for the current iteration.
      LearnRateStrategy learn_rate_strategy = 13;

      // Specifies the initial learning rate for the line search learn rate
      // strategy.
      double initial_learn_rate = 16;

      // Weights associated with each label class, for rebalancing the
      // training data. Only applicable for classification models.
      map<string, double> label_class_weights = 17;

      // User column specified for matrix factorization models.
      string user_column = 18;

      // Item column specified for matrix factorization models.
      string item_column = 19;

      // Distance type for clustering models.
      DistanceType distance_type = 20;

      // Number of clusters for clustering models.
      int64 num_clusters = 21;

      // Google Cloud Storage URI from which the model was imported. Only
      // applicable for imported models.
      string model_uri = 22;

      // Optimization strategy for training linear regression models.
      OptimizationStrategy optimization_strategy = 23;

      // Hidden units for dnn models.
      repeated int64 hidden_units = 24;

      // Batch size for dnn models.
      int64 batch_size = 25;

      // Dropout probability for dnn models.
      google.protobuf.DoubleValue dropout = 26;

      // Maximum depth of a tree for boosted tree models.
      int64 max_tree_depth = 27;

      // Subsample fraction of the training data to grow tree to prevent
      // overfitting for boosted tree models.
      double subsample = 28;

      // Minimum split loss for boosted tree models.
      google.protobuf.DoubleValue min_split_loss = 29;

      // Booster type for boosted tree models.
      BoostedTreeOptionEnums.BoosterType booster_type = 60;

      // Number of parallel trees constructed during each iteration for boosted
      // tree models.
      google.protobuf.Int64Value num_parallel_tree = 61;

      // Type of normalization algorithm for boosted tree models using
      // dart booster.
      BoostedTreeOptionEnums.DartNormalizeType dart_normalize_type = 62;

      // Tree construction algorithm for boosted tree models.
      BoostedTreeOptionEnums.TreeMethod tree_method = 63;

      // Minimum sum of instance weight needed in a child for boosted tree
      // models.
      google.protobuf.Int64Value min_tree_child_weight = 64;

      // Subsample ratio of columns when constructing each tree for boosted tree
      // models.
      google.protobuf.DoubleValue colsample_bytree = 65;

      // Subsample ratio of columns for each level for boosted tree models.
      google.protobuf.DoubleValue colsample_bylevel = 66;

      // Subsample ratio of columns for each node(split) for boosted tree
      // models.
      google.protobuf.DoubleValue colsample_bynode = 67;

      // Num factors specified for matrix factorization models.
      int64 num_factors = 30;

      // Feedback type that specifies which algorithm to run for matrix
      // factorization.
      FeedbackType feedback_type = 31;

      // Hyperparameter for matrix factoration when implicit feedback type is
      // specified.
      google.protobuf.DoubleValue wals_alpha = 32;

      // The method used to initialize the centroids for kmeans algorithm.
      KmeansEnums.KmeansInitializationMethod kmeans_initialization_method = 33;

      // The column used to provide the initial centroids for kmeans algorithm
      // when kmeans_initialization_method is CUSTOM.
      string kmeans_initialization_column = 34;

      // Column to be designated as time series timestamp for ARIMA model.
      string time_series_timestamp_column = 35;

      // Column to be designated as time series data for ARIMA model.
      string time_series_data_column = 36;

      // Whether to enable auto ARIMA or not.
      google.protobuf.BoolValue auto_arima = 37;

      // A specification of the non-seasonal part of the ARIMA model: the three
      // components (p, d, q) are the AR order, the degree of differencing, and
      // the MA order.
      ArimaOrder non_seasonal_order = 38;

      // The data frequency of a time series.
      DataFrequency data_frequency = 39;

      // Whether or not p-value test should be computed for this model. Only
      // available for linear and logistic regression models.
      google.protobuf.BoolValue calculate_p_values = 40;

      // Include drift when fitting an ARIMA model.
      google.protobuf.BoolValue include_drift = 41;

      // The geographical region based on which the holidays are considered in
      // time series modeling. If a valid value is specified, then holiday
      // effects modeling is enabled.
      HolidayRegion holiday_region = 42;

      // A list of geographical regions that are used for time series modeling.
      repeated HolidayRegion holiday_regions = 71;

      // The time series id column that was used during ARIMA model training.
      string time_series_id_column = 43;

      // The time series id columns that were used during ARIMA model training.
      repeated string time_series_id_columns = 51;

      // The number of periods ahead that need to be forecasted.
      int64 horizon = 44;

      // The max value of the sum of non-seasonal p and q.
      int64 auto_arima_max_order = 46;

      // The min value of the sum of non-seasonal p and q.
      int64 auto_arima_min_order = 83;

      // Number of trials to run this hyperparameter tuning job.
      int64 num_trials = 47;

      // Maximum number of trials to run in parallel.
      int64 max_parallel_trials = 48;

      // The target evaluation metrics to optimize the hyperparameters for.
      repeated HparamTuningEnums.HparamTuningObjective
          hparam_tuning_objectives = 54;

      // If true, perform decompose time series and save the results.
      google.protobuf.BoolValue decompose_time_series = 50;

      // If true, clean spikes and dips in the input time series.
      google.protobuf.BoolValue clean_spikes_and_dips = 52;

      // If true, detect step changes and make data adjustment in the input time
      // series.
      google.protobuf.BoolValue adjust_step_changes = 53;

      // If true, enable global explanation during training.
      google.protobuf.BoolValue enable_global_explain = 55;

      // Number of paths for the sampled Shapley explain method.
      int64 sampled_shapley_num_paths = 56;

      // Number of integral steps for the integrated gradients explain method.
      int64 integrated_gradients_num_steps = 57;

      // Categorical feature encoding method.
      CategoryEncodingMethod.EncodingMethod category_encoding_method = 58;

      // Based on the selected TF version, the corresponding docker image is
      // used to train external models.
      string tf_version = 70;

      // Enums for color space, used for processing images in Object Table.
      // See more details at
      // https://www.tensorflow.org/io/tutorials/colorspace.
      ColorSpace color_space = 72;

      // Name of the instance weight column for training data.
      // This column isn't be used as a feature.
      string instance_weight_column = 73;

      // Smoothing window size for the trend component. When a positive value is
      // specified, a center moving average smoothing is applied on the history
      // trend. When the smoothing window is out of the boundary at the
      // beginning or the end of the trend, the first element or the last
      // element is padded to fill the smoothing window before the average is
      // applied.
      int64 trend_smoothing_window_size = 74;

      // The fraction of the interpolated length of the time series that's used
      // to model the time series trend component. All of the time points of the
      // time series are used to model the non-trend component. This training
      // option accelerates modeling training without sacrificing much
      // forecasting accuracy. You can use this option with
      // `minTimeSeriesLength` but not with `maxTimeSeriesLength`.
      double time_series_length_fraction = 75;

      // The minimum number of time points in a time series that are used in
      // modeling the trend component of the time series. If you use this option
      // you must also set the `timeSeriesLengthFraction` option. This training
      // option ensures that enough time points are available when you use
      // `timeSeriesLengthFraction` in trend modeling. This is particularly
      // important when forecasting multiple time series in a single query using
      // `timeSeriesIdColumn`. If the total number of time points is less than
      // the `minTimeSeriesLength` value, then the query uses all available time
      // points.
      int64 min_time_series_length = 76;

      // The maximum number of time points in a time series that can be used in
      // modeling the trend component of the time series. Don't use this option
      // with the `timeSeriesLengthFraction` or `minTimeSeriesLength` options.
      int64 max_time_series_length = 77;

      // User-selected XGBoost versions for training of XGBoost models.
      string xgboost_version = 78;

      // Whether to use approximate feature contribution method in XGBoost model
      // explanation for global explain.
      google.protobuf.BoolValue approx_global_feature_contrib = 84;

      // Whether the model should include intercept during model training.
      google.protobuf.BoolValue fit_intercept = 85;

      // Number of principal components to keep in the PCA model. Must be <= the
      // number of features.
      int64 num_principal_components = 86;

      // The minimum ratio of cumulative explained variance that needs to be
      // given by the PCA model.
      double pca_explained_variance_ratio = 87;

      // If true, scale the feature values by dividing the feature standard
      // deviation. Currently only apply to PCA.
      google.protobuf.BoolValue scale_features = 88;

      // The solver for PCA.
      PcaSolverOptionEnums.PcaSolver pca_solver = 89;

      // Whether to calculate class weights automatically based on the
      // popularity of each label.
      google.protobuf.BoolValue auto_class_weights = 90;

      // Activation function of the neural nets.
      string activation_fn = 91;

      // Optimizer used for training the neural nets.
      string optimizer = 92;

      // Budget in hours for AutoML training.
      double budget_hours = 93;

      // Whether to standardize numerical features. Default to true.
      google.protobuf.BoolValue standardize_features = 94;

      // L1 regularization coefficient to activations.
      double l1_reg_activation = 95;

      // The model registry.
      ModelRegistryOptionEnums.ModelRegistry model_registry = 96;

      // The version aliases to apply in Vertex AI model registry. Always
      // overwrite if the version aliases exists in a existing model.
      repeated string vertex_ai_model_version_aliases = 97;
    }

    // Information about a single iteration of the training run.
    message IterationResult {
      // Information about a single cluster for clustering model.
      message ClusterInfo {
        // Centroid id.
        int64 centroid_id = 1;

        // Cluster radius, the average distance from centroid
        // to each point assigned to the cluster.
        google.protobuf.DoubleValue cluster_radius = 2;

        // Cluster size, the total number of points assigned to the cluster.
        google.protobuf.Int64Value cluster_size = 3;
      }

      // (Auto-)arima fitting result. Wrap everything in ArimaResult for easier
      // refactoring if we want to use model-specific iteration results.
      message ArimaResult {
        // Arima coefficients.
        message ArimaCoefficients {
          // Auto-regressive coefficients, an array of double.
          repeated double auto_regressive_coefficients = 1;

          // Moving-average coefficients, an array of double.
          repeated double moving_average_coefficients = 2;

          // Intercept coefficient, just a double not an array.
          google.protobuf.DoubleValue intercept_coefficient = 3;
        }

        // Arima model information.
        message ArimaModelInfo {
          // Non-seasonal order.
          ArimaOrder non_seasonal_order = 1;

          // Arima coefficients.
          ArimaCoefficients arima_coefficients = 2;

          // Arima fitting metrics.
          ArimaFittingMetrics arima_fitting_metrics = 3;

          // Whether Arima model fitted with drift or not. It is always false
          // when d is not 1.
          google.protobuf.BoolValue has_drift = 4;

          // The time_series_id value for this time series. It will be one of
          // the unique values from the time_series_id_column specified during
          // ARIMA model training. Only present when time_series_id_column
          // training option was used.
          string time_series_id = 5;

          // The tuple of time_series_ids identifying this time series. It will
          // be one of the unique tuples of values present in the
          // time_series_id_columns specified during ARIMA model training. Only
          // present when time_series_id_columns training option was used and
          // the order of values here are same as the order of
          // time_series_id_columns.
          repeated string time_series_ids = 10;

          // Seasonal periods. Repeated because multiple periods are supported
          // for one time series.
          repeated SeasonalPeriod.SeasonalPeriodType seasonal_periods = 6;

          // If true, holiday_effect is a part of time series decomposition
          // result.
          google.protobuf.BoolValue has_holiday_effect = 7;

          // If true, spikes_and_dips is a part of time series decomposition
          // result.
          google.protobuf.BoolValue has_spikes_and_dips = 8;

          // If true, step_changes is a part of time series decomposition
          // result.
          google.protobuf.BoolValue has_step_changes = 9;
        }

        // This message is repeated because there are multiple arima models
        // fitted in auto-arima. For non-auto-arima model, its size is one.
        repeated ArimaModelInfo arima_model_info = 1;

        // Seasonal periods. Repeated because multiple periods are supported for
        // one time series.
        repeated SeasonalPeriod.SeasonalPeriodType seasonal_periods = 2;
      }

      // Principal component infos, used only for eigen decomposition based
      // models, e.g., PCA. Ordered by explained_variance in the descending
      // order.
      message PrincipalComponentInfo {
        // Id of the principal component.
        google.protobuf.Int64Value principal_component_id = 1;

        // Explained variance by this principal component, which is simply the
        // eigenvalue.
        google.protobuf.DoubleValue explained_variance = 2;

        // Explained_variance over the total explained variance.
        google.protobuf.DoubleValue explained_variance_ratio = 3;

        // The explained_variance is pre-ordered in the descending order to
        // compute the cumulative explained variance ratio.
        google.protobuf.DoubleValue cumulative_explained_variance_ratio = 4;
      }

      // Index of the iteration, 0 based.
      google.protobuf.Int32Value index = 1;

      // Time taken to run the iteration in milliseconds.
      google.protobuf.Int64Value duration_ms = 4;

      // Loss computed on the training data at the end of iteration.
      google.protobuf.DoubleValue training_loss = 5;

      // Loss computed on the eval data at the end of iteration.
      google.protobuf.DoubleValue eval_loss = 6;

      // Learn rate used for this iteration.
      double learn_rate = 7;

      // Information about top clusters for clustering models.
      repeated ClusterInfo cluster_infos = 8;

      // Arima result.
      ArimaResult arima_result = 9;

      // The information of the principal components.
      repeated PrincipalComponentInfo principal_component_infos = 10;
    }

    // Output only. Options that were used for this training run, includes
    // user specified and default options that were used.
    TrainingOptions training_options = 1
        [(google.api.field_behavior) = OUTPUT_ONLY];

    // Output only. The start time of this training run.
    google.protobuf.Timestamp start_time = 8
        [(google.api.field_behavior) = OUTPUT_ONLY];

    // Output only. Output of each iteration run, results.size() <=
    // max_iterations.
    repeated IterationResult results = 6
        [(google.api.field_behavior) = OUTPUT_ONLY];

    // Output only. The evaluation metrics over training/eval data that were
    // computed at the end of training.
    EvaluationMetrics evaluation_metrics = 7
        [(google.api.field_behavior) = OUTPUT_ONLY];

    // Output only. Data split result of the training run. Only set when the
    // input data is actually split.
    DataSplitResult data_split_result = 9
        [(google.api.field_behavior) = OUTPUT_ONLY];

    // Output only. Global explanation contains the explanation of top features
    // on the model level. Applies to both regression and classification models.
    GlobalExplanation model_level_global_explanation = 11
        [(google.api.field_behavior) = OUTPUT_ONLY];

    // Output only. Global explanation contains the explanation of top features
    // on the class level. Applies to classification models only.
    repeated GlobalExplanation class_level_global_explanations = 12
        [(google.api.field_behavior) = OUTPUT_ONLY];

    // The model id in the [Vertex AI Model
    // Registry](https://cloud.google.com/vertex-ai/docs/model-registry/introduction)
    // for this training run.
    string vertex_ai_model_id = 14;

    // Output only. The model version in the [Vertex AI Model
    // Registry](https://cloud.google.com/vertex-ai/docs/model-registry/introduction)
    // for this training run.
    string vertex_ai_model_version = 15
        [(google.api.field_behavior) = OUTPUT_ONLY];
  }

  // Search space for a double hyperparameter.
  message DoubleHparamSearchSpace {
    // Range of a double hyperparameter.
    message DoubleRange {
      // Min value of the double parameter.
      google.protobuf.DoubleValue min = 1;

      // Max value of the double parameter.
      google.protobuf.DoubleValue max = 2;
    }

    // Discrete candidates of a double hyperparameter.
    message DoubleCandidates {
      // Candidates for the double parameter in increasing order.
      repeated google.protobuf.DoubleValue candidates = 1;
    }

    // Search space.
    oneof search_space {
      // Range of the double hyperparameter.
      DoubleRange range = 1;

      // Candidates of the double hyperparameter.
      DoubleCandidates candidates = 2;
    }
  }

  // Search space for an int hyperparameter.
  message IntHparamSearchSpace {
    // Range of an int hyperparameter.
    message IntRange {
      // Min value of the int parameter.
      google.protobuf.Int64Value min = 1;

      // Max value of the int parameter.
      google.protobuf.Int64Value max = 2;
    }

    // Discrete candidates of an int hyperparameter.
    message IntCandidates {
      // Candidates for the int parameter in increasing order.
      repeated google.protobuf.Int64Value candidates = 1;
    }

    // Search space.
    oneof search_space {
      // Range of the int hyperparameter.
      IntRange range = 1;

      // Candidates of the int hyperparameter.
      IntCandidates candidates = 2;
    }
  }

  // Search space for string and enum.
  message StringHparamSearchSpace {
    // Canididates for the string or enum parameter in lower case.
    repeated string candidates = 1;
  }

  // Search space for int array.
  message IntArrayHparamSearchSpace {
    // An array of int.
    message IntArray {
      // Elements in the int array.
      repeated int64 elements = 1;
    }

    // Candidates for the int array parameter.
    repeated IntArray candidates = 1;
  }

  // Hyperparameter search spaces.
  // These should be a subset of training_options.
  message HparamSearchSpaces {
    // Learning rate of training jobs.
    DoubleHparamSearchSpace learn_rate = 2;

    // L1 regularization coefficient.
    DoubleHparamSearchSpace l1_reg = 3;

    // L2 regularization coefficient.
    DoubleHparamSearchSpace l2_reg = 4;

    // Number of clusters for k-means.
    IntHparamSearchSpace num_clusters = 26;

    // Number of latent factors to train on.
    IntHparamSearchSpace num_factors = 31;

    // Hidden units for neural network models.
    IntArrayHparamSearchSpace hidden_units = 34;

    // Mini batch sample size.
    IntHparamSearchSpace batch_size = 37;

    // Dropout probability for dnn model training and boosted tree models
    // using dart booster.
    DoubleHparamSearchSpace dropout = 38;

    // Maximum depth of a tree for boosted tree models.
    IntHparamSearchSpace max_tree_depth = 41;

    // Subsample the training data to grow tree to prevent overfitting for
    // boosted tree models.
    DoubleHparamSearchSpace subsample = 42;

    // Minimum split loss for boosted tree models.
    DoubleHparamSearchSpace min_split_loss = 43;

    // Hyperparameter for matrix factoration when implicit feedback type is
    // specified.
    DoubleHparamSearchSpace wals_alpha = 49;

    // Booster type for boosted tree models.
    StringHparamSearchSpace booster_type = 56;

    // Number of parallel trees for boosted tree models.
    IntHparamSearchSpace num_parallel_tree = 57;

    // Dart normalization type for boosted tree models.
    StringHparamSearchSpace dart_normalize_type = 58;

    // Tree construction algorithm for boosted tree models.
    StringHparamSearchSpace tree_method = 59;

    // Minimum sum of instance weight needed in a child for boosted tree models.
    IntHparamSearchSpace min_tree_child_weight = 60;

    // Subsample ratio of columns when constructing each tree for boosted tree
    // models.
    DoubleHparamSearchSpace colsample_bytree = 61;

    // Subsample ratio of columns for each level for boosted tree models.
    DoubleHparamSearchSpace colsample_bylevel = 62;

    // Subsample ratio of columns for each node(split) for boosted tree models.
    DoubleHparamSearchSpace colsample_bynode = 63;

    // Activation functions of neural network models.
    StringHparamSearchSpace activation_fn = 67;

    // Optimizer of TF models.
    StringHparamSearchSpace optimizer = 68;
  }

  // Training info of a trial in [hyperparameter
  // tuning](https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-hp-tuning-overview)
  // models.
  message HparamTuningTrial {
    // Current status of the trial.
    enum TrialStatus {
      // Default value.
      TRIAL_STATUS_UNSPECIFIED = 0;

      // Scheduled but not started.
      NOT_STARTED = 1;

      // Running state.
      RUNNING = 2;

      // The trial succeeded.
      SUCCEEDED = 3;

      // The trial failed.
      FAILED = 4;

      // The trial is infeasible due to the invalid params.
      INFEASIBLE = 5;

      // Trial stopped early because it's not promising.
      STOPPED_EARLY = 6;
    }

    // 1-based index of the trial.
    int64 trial_id = 1;

    // Starting time of the trial.
    int64 start_time_ms = 2;

    // Ending time of the trial.
    int64 end_time_ms = 3;

    // The hyperprameters selected for this trial.
    TrainingRun.TrainingOptions hparams = 4;

    // Evaluation metrics of this trial calculated on the test data.
    // Empty in Job API.
    EvaluationMetrics evaluation_metrics = 5;

    // The status of the trial.
    TrialStatus status = 6;

    // Error message for FAILED and INFEASIBLE trial.
    string error_message = 7;

    // Loss computed on the training data at the end of trial.
    google.protobuf.DoubleValue training_loss = 8;

    // Loss computed on the eval data at the end of trial.
    google.protobuf.DoubleValue eval_loss = 9;

    // Hyperparameter tuning evaluation metrics of this trial calculated on the
    // eval data. Unlike evaluation_metrics, only the fields corresponding to
    // the hparam_tuning_objectives are set.
    EvaluationMetrics hparam_tuning_evaluation_metrics = 10;
  }

  // Output only. A hash of this resource.
  string etag = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Required. Unique identifier for this model.
  ModelReference model_reference = 2 [(google.api.field_behavior) = REQUIRED];

  // Output only. The time when this model was created, in millisecs since the
  // epoch.
  int64 creation_time = 5 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. The time when this model was last modified, in millisecs since
  // the epoch.
  int64 last_modified_time = 6 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Optional. A user-friendly description of this model.
  string description = 12 [(google.api.field_behavior) = OPTIONAL];

  // Optional. A descriptive name for this model.
  string friendly_name = 14 [(google.api.field_behavior) = OPTIONAL];

  // The labels associated with this model. You can use these to organize
  // and group your models. Label keys and values can be no longer
  // than 63 characters, can only contain lowercase letters, numeric
  // characters, underscores and dashes. International characters are allowed.
  // Label values are optional. Label keys must start with a letter and each
  // label in the list must have a different key.
  map<string, string> labels = 15;

  // Optional. The time when this model expires, in milliseconds since the
  // epoch. If not present, the model will persist indefinitely. Expired models
  // will be deleted and their storage reclaimed.  The defaultTableExpirationMs
  // property of the encapsulating dataset can be used to set a default
  // expirationTime on newly created models.
  int64 expiration_time = 16 [(google.api.field_behavior) = OPTIONAL];

  // Output only. The geographic location where the model resides. This value
  // is inherited from the dataset.
  string location = 13 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Custom encryption configuration (e.g., Cloud KMS keys). This shows the
  // encryption configuration of the model data while stored in BigQuery
  // storage. This field can be used with PatchModel to update encryption key
  // for an already encrypted model.
  EncryptionConfiguration encryption_configuration = 17;

  // Output only. Type of the model resource.
  ModelType model_type = 7 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Information for all training runs in increasing order of start_time.
  repeated TrainingRun training_runs = 9;

  // Output only. Input feature columns for the model inference. If the model is
  // trained with TRANSFORM clause, these are the input of the TRANSFORM clause.
  repeated StandardSqlField feature_columns = 10
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Label columns that were used to train this model.
  // The output of the model will have a "predicted_" prefix to these columns.
  repeated StandardSqlField label_columns = 11
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. This field will be populated if a TRANSFORM clause was used to
  // train a model. TRANSFORM clause (if used) takes feature_columns as input
  // and outputs transform_columns. transform_columns then are used to train the
  // model.
  repeated TransformColumn transform_columns = 26
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. All hyperparameter search spaces in this model.
  HparamSearchSpaces hparam_search_spaces = 18
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. The default trial_id to use in TVFs when the trial_id is not
  // passed in. For single-objective [hyperparameter
  // tuning](https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-hp-tuning-overview)
  // models, this is the best trial ID. For multi-objective [hyperparameter
  // tuning](https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-hp-tuning-overview)
  // models, this is the smallest trial ID among all Pareto optimal trials.
  int64 default_trial_id = 21 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Trials of a [hyperparameter
  // tuning](https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-hp-tuning-overview)
  // model sorted by trial_id.
  repeated HparamTuningTrial hparam_trials = 20
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. For single-objective [hyperparameter
  // tuning](https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-hp-tuning-overview)
  // models, it only contains the best trial. For multi-objective
  // [hyperparameter
  // tuning](https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-hp-tuning-overview)
  // models, it contains all Pareto optimal trials sorted by trial_id.
  repeated int64 optimal_trial_ids = 22
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Remote model info
  RemoteModelInfo remote_model_info = 25
      [(google.api.field_behavior) = OUTPUT_ONLY];
}

// Request format for getting information about a BigQuery ML model.
message GetModelRequest {
  // Required. Project ID of the requested model.
  string project_id = 1 [(google.api.field_behavior) = REQUIRED];

  // Required. Dataset ID of the requested model.
  string dataset_id = 2 [(google.api.field_behavior) = REQUIRED];

  // Required. Model ID of the requested model.
  string model_id = 3 [(google.api.field_behavior) = REQUIRED];
}

message PatchModelRequest {
  // Required. Project ID of the model to patch.
  string project_id = 1 [(google.api.field_behavior) = REQUIRED];

  // Required. Dataset ID of the model to patch.
  string dataset_id = 2 [(google.api.field_behavior) = REQUIRED];

  // Required. Model ID of the model to patch.
  string model_id = 3 [(google.api.field_behavior) = REQUIRED];

  // Required. Patched model.
  // Follows RFC5789 patch semantics. Missing fields are not updated.
  // To clear a field, explicitly set to default value.
  Model model = 4 [(google.api.field_behavior) = REQUIRED];
}

// Request format for deleting BigQuery ML models.
message DeleteModelRequest {
  // Required. Project ID of the model to delete.
  string project_id = 1 [(google.api.field_behavior) = REQUIRED];

  // Required. Dataset ID of the model to delete.
  string dataset_id = 2 [(google.api.field_behavior) = REQUIRED];

  // Required. Model ID of the model to delete.
  string model_id = 3 [(google.api.field_behavior) = REQUIRED];
}

// Request format for listing BigQuery ML models.
message ListModelsRequest {
  // Required. Project ID of the models to list.
  string project_id = 1 [(google.api.field_behavior) = REQUIRED];

  // Required. Dataset ID of the models to list.
  string dataset_id = 2 [(google.api.field_behavior) = REQUIRED];

  // The maximum number of results to return in a single response page.
  // Leverage the page tokens to iterate through the entire collection.
  google.protobuf.UInt32Value max_results = 3;

  // Page token, returned by a previous call to request the next page of
  // results
  string page_token = 4;
}

// Response format for a single page when listing BigQuery ML models.
message ListModelsResponse {
  // Models in the requested dataset. Only the following fields are populated:
  // model_reference, model_type, creation_time, last_modified_time and
  // labels.
  repeated Model models = 1;

  // A token to request the next page of results.
  string next_page_token = 2;
}