// Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto3"; package google.cloud.bigquery.v2; import "google/api/annotations.proto"; import "google/api/client.proto"; import "google/api/field_behavior.proto"; import "google/cloud/bigquery/v2/encryption_config.proto"; import "google/cloud/bigquery/v2/model_reference.proto"; import "google/cloud/bigquery/v2/standard_sql.proto"; import "google/cloud/bigquery/v2/table_reference.proto"; import "google/protobuf/empty.proto"; import "google/protobuf/timestamp.proto"; import "google/protobuf/wrappers.proto"; option go_package = "cloud.google.com/go/bigquery/apiv2/bigquerypb;bigquerypb"; option java_outer_classname = "ModelProto"; option java_package = "com.google.cloud.bigquery.v2"; // This is an experimental RPC service definition for the BigQuery // Model Service. // // It should not be relied on for production use cases at this time. service ModelService { option (google.api.default_host) = "bigquery.googleapis.com"; option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/bigquery," "https://www.googleapis.com/auth/cloud-platform," "https://www.googleapis.com/auth/cloud-platform.read-only"; // Gets the specified model resource by model ID. rpc GetModel(GetModelRequest) returns (Model) { option (google.api.http) = { get: "/bigquery/v2/projects/{project_id=*}/datasets/{dataset_id=*}/models/{model_id=*}" }; option (google.api.method_signature) = "project_id,dataset_id,model_id"; } // Lists all models in the specified dataset. Requires the READER dataset // role. After retrieving the list of models, you can get information about a // particular model by calling the models.get method. rpc ListModels(ListModelsRequest) returns (ListModelsResponse) { option (google.api.http) = { get: "/bigquery/v2/projects/{project_id=*}/datasets/{dataset_id=*}/models" }; option (google.api.method_signature) = "project_id,dataset_id,max_results"; } // Patch specific fields in the specified model. rpc PatchModel(PatchModelRequest) returns (Model) { option (google.api.http) = { patch: "/bigquery/v2/projects/{project_id=*}/datasets/{dataset_id=*}/models/{model_id=*}" body: "model" }; option (google.api.method_signature) = "project_id,dataset_id,model_id,model"; } // Deletes the model specified by modelId from the dataset. rpc DeleteModel(DeleteModelRequest) returns (google.protobuf.Empty) { option (google.api.http) = { delete: "/bigquery/v2/projects/{project_id=*}/datasets/{dataset_id=*}/models/{model_id=*}" }; option (google.api.method_signature) = "project_id,dataset_id,model_id"; } } // Remote Model Info message RemoteModelInfo { // Supported service type for remote model. enum RemoteServiceType { // Unspecified remote service type. REMOTE_SERVICE_TYPE_UNSPECIFIED = 0; // V3 Cloud AI Translation API. See more details at [Cloud Translation API] // (https://cloud.google.com/translate/docs/reference/rest). CLOUD_AI_TRANSLATE_V3 = 1; // V1 Cloud AI Vision API See more details at [Cloud Vision API] // (https://cloud.google.com/vision/docs/reference/rest). CLOUD_AI_VISION_V1 = 2; // V1 Cloud AI Natural Language API. See more details at [REST Resource: // documents](https://cloud.google.com/natural-language/docs/reference/rest/v1/documents). CLOUD_AI_NATURAL_LANGUAGE_V1 = 3; // V2 Speech-to-Text API. See more details at [Google Cloud Speech-to-Text // V2 API](https://cloud.google.com/speech-to-text/v2/docs) CLOUD_AI_SPEECH_TO_TEXT_V2 = 7; } // Remote services are services outside of BigQuery used by remote models for // predictions. A remote service is backed by either an arbitrary endpoint or // a selected remote service type, but not both. oneof remote_service { // Output only. The endpoint for remote model. string endpoint = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. The remote service type for remote model. RemoteServiceType remote_service_type = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; } // Output only. Fully qualified name of the user-provided connection object of // the remote model. Format: // ```"projects/{project_id}/locations/{location_id}/connections/{connection_id}"``` string connection = 3 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Max number of rows in each batch sent to the remote service. // If unset, the number of rows in each batch is set dynamically. int64 max_batching_rows = 4 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. The model version for LLM. string remote_model_version = 5 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. The name of the speech recognizer to use for speech // recognition. The expected format is // `projects/{project}/locations/{location}/recognizers/{recognizer}`. // Customers can specify this field at model creation. If not specified, a // default recognizer `projects/{model // project}/locations/global/recognizers/_` will be used. See more details at // [recognizers](https://cloud.google.com/speech-to-text/v2/docs/reference/rest/v2/projects.locations.recognizers) string speech_recognizer = 7 [(google.api.field_behavior) = OUTPUT_ONLY]; } // Information about a single transform column. message TransformColumn { // Output only. Name of the column. string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Data type of the column after the transform. StandardSqlDataType type = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. The SQL expression used in the column transform. string transform_sql = 3 [(google.api.field_behavior) = OUTPUT_ONLY]; } message Model { // Indicates the type of the Model. enum ModelType { // Default value. MODEL_TYPE_UNSPECIFIED = 0; // Linear regression model. LINEAR_REGRESSION = 1; // Logistic regression based classification model. LOGISTIC_REGRESSION = 2; // K-means clustering model. KMEANS = 3; // Matrix factorization model. MATRIX_FACTORIZATION = 4; // DNN classifier model. DNN_CLASSIFIER = 5; // An imported TensorFlow model. TENSORFLOW = 6; // DNN regressor model. DNN_REGRESSOR = 7; // An imported XGBoost model. XGBOOST = 8; // Boosted tree regressor model. BOOSTED_TREE_REGRESSOR = 9; // Boosted tree classifier model. BOOSTED_TREE_CLASSIFIER = 10; // ARIMA model. ARIMA = 11; // AutoML Tables regression model. AUTOML_REGRESSOR = 12; // AutoML Tables classification model. AUTOML_CLASSIFIER = 13; // Prinpical Component Analysis model. PCA = 14; // Wide-and-deep classifier model. DNN_LINEAR_COMBINED_CLASSIFIER = 16; // Wide-and-deep regressor model. DNN_LINEAR_COMBINED_REGRESSOR = 17; // Autoencoder model. AUTOENCODER = 18; // New name for the ARIMA model. ARIMA_PLUS = 19; // ARIMA with external regressors. ARIMA_PLUS_XREG = 23; // Random forest regressor model. RANDOM_FOREST_REGRESSOR = 24; // Random forest classifier model. RANDOM_FOREST_CLASSIFIER = 25; // An imported TensorFlow Lite model. TENSORFLOW_LITE = 26; // An imported ONNX model. ONNX = 28; // Model to capture the columns and logic in the TRANSFORM clause along with // statistics useful for ML analytic functions. TRANSFORM_ONLY = 29; } // Loss metric to evaluate model training performance. enum LossType { // Default value. LOSS_TYPE_UNSPECIFIED = 0; // Mean squared loss, used for linear regression. MEAN_SQUARED_LOSS = 1; // Mean log loss, used for logistic regression. MEAN_LOG_LOSS = 2; } // Distance metric used to compute the distance between two points. enum DistanceType { // Default value. DISTANCE_TYPE_UNSPECIFIED = 0; // Eculidean distance. EUCLIDEAN = 1; // Cosine distance. COSINE = 2; } // Indicates the method to split input data into multiple tables. enum DataSplitMethod { // Default value. DATA_SPLIT_METHOD_UNSPECIFIED = 0; // Splits data randomly. RANDOM = 1; // Splits data with the user provided tags. CUSTOM = 2; // Splits data sequentially. SEQUENTIAL = 3; // Data split will be skipped. NO_SPLIT = 4; // Splits data automatically: Uses NO_SPLIT if the data size is small. // Otherwise uses RANDOM. AUTO_SPLIT = 5; } // Type of supported data frequency for time series forecasting models. enum DataFrequency { // Default value. DATA_FREQUENCY_UNSPECIFIED = 0; // Automatically inferred from timestamps. AUTO_FREQUENCY = 1; // Yearly data. YEARLY = 2; // Quarterly data. QUARTERLY = 3; // Monthly data. MONTHLY = 4; // Weekly data. WEEKLY = 5; // Daily data. DAILY = 6; // Hourly data. HOURLY = 7; // Per-minute data. PER_MINUTE = 8; } // Type of supported holiday regions for time series forecasting models. enum HolidayRegion { // Holiday region unspecified. HOLIDAY_REGION_UNSPECIFIED = 0; // Global. GLOBAL = 1; // North America. NA = 2; // Japan and Asia Pacific: Korea, Greater China, India, Australia, and New // Zealand. JAPAC = 3; // Europe, the Middle East and Africa. EMEA = 4; // Latin America and the Caribbean. LAC = 5; // United Arab Emirates AE = 6; // Argentina AR = 7; // Austria AT = 8; // Australia AU = 9; // Belgium BE = 10; // Brazil BR = 11; // Canada CA = 12; // Switzerland CH = 13; // Chile CL = 14; // China CN = 15; // Colombia CO = 16; // Czechoslovakia CS = 17; // Czech Republic CZ = 18; // Germany DE = 19; // Denmark DK = 20; // Algeria DZ = 21; // Ecuador EC = 22; // Estonia EE = 23; // Egypt EG = 24; // Spain ES = 25; // Finland FI = 26; // France FR = 27; // Great Britain (United Kingdom) GB = 28; // Greece GR = 29; // Hong Kong HK = 30; // Hungary HU = 31; // Indonesia ID = 32; // Ireland IE = 33; // Israel IL = 34; // India IN = 35; // Iran IR = 36; // Italy IT = 37; // Japan JP = 38; // Korea (South) KR = 39; // Latvia LV = 40; // Morocco MA = 41; // Mexico MX = 42; // Malaysia MY = 43; // Nigeria NG = 44; // Netherlands NL = 45; // Norway NO = 46; // New Zealand NZ = 47; // Peru PE = 48; // Philippines PH = 49; // Pakistan PK = 50; // Poland PL = 51; // Portugal PT = 52; // Romania RO = 53; // Serbia RS = 54; // Russian Federation RU = 55; // Saudi Arabia SA = 56; // Sweden SE = 57; // Singapore SG = 58; // Slovenia SI = 59; // Slovakia SK = 60; // Thailand TH = 61; // Turkey TR = 62; // Taiwan TW = 63; // Ukraine UA = 64; // United States US = 65; // Venezuela VE = 66; // Viet Nam VN = 67; // South Africa ZA = 68; } // Enums for seasonal period. message SeasonalPeriod { // Seasonal period type. enum SeasonalPeriodType { // Unspecified seasonal period. SEASONAL_PERIOD_TYPE_UNSPECIFIED = 0; // No seasonality NO_SEASONALITY = 1; // Daily period, 24 hours. DAILY = 2; // Weekly period, 7 days. WEEKLY = 3; // Monthly period, 30 days or irregular. MONTHLY = 4; // Quarterly period, 90 days or irregular. QUARTERLY = 5; // Yearly period, 365 days or irregular. YEARLY = 6; } } // Enums for color space, used for processing images in Object Table. // See more details at // https://www.tensorflow.org/io/tutorials/colorspace. enum ColorSpace { // Unspecified color space COLOR_SPACE_UNSPECIFIED = 0; // RGB RGB = 1; // HSV HSV = 2; // YIQ YIQ = 3; // YUV YUV = 4; // GRAYSCALE GRAYSCALE = 5; } // Enums for kmeans model type. message KmeansEnums { // Indicates the method used to initialize the centroids for KMeans // clustering algorithm. enum KmeansInitializationMethod { // Unspecified initialization method. KMEANS_INITIALIZATION_METHOD_UNSPECIFIED = 0; // Initializes the centroids randomly. RANDOM = 1; // Initializes the centroids using data specified in // kmeans_initialization_column. CUSTOM = 2; // Initializes with kmeans++. KMEANS_PLUS_PLUS = 3; } } // Enums for XGBoost model type. message BoostedTreeOptionEnums { // Booster types supported. Refer to booster parameter in XGBoost. enum BoosterType { // Unspecified booster type. BOOSTER_TYPE_UNSPECIFIED = 0; // Gbtree booster. GBTREE = 1; // Dart booster. DART = 2; } // Type of normalization algorithm for boosted tree models using dart // booster. Refer to normalize_type in XGBoost. enum DartNormalizeType { // Unspecified dart normalize type. DART_NORMALIZE_TYPE_UNSPECIFIED = 0; // New trees have the same weight of each of dropped trees. TREE = 1; // New trees have the same weight of sum of dropped trees. FOREST = 2; } // Tree construction algorithm used in boosted tree models. // Refer to tree_method in XGBoost. enum TreeMethod { // Unspecified tree method. TREE_METHOD_UNSPECIFIED = 0; // Use heuristic to choose the fastest method. AUTO = 1; // Exact greedy algorithm. EXACT = 2; // Approximate greedy algorithm using quantile sketch and gradient // histogram. APPROX = 3; // Fast histogram optimized approximate greedy algorithm. HIST = 4; } } // Enums for hyperparameter tuning. message HparamTuningEnums { // Available evaluation metrics used as hyperparameter tuning objectives. enum HparamTuningObjective { // Unspecified evaluation metric. HPARAM_TUNING_OBJECTIVE_UNSPECIFIED = 0; // Mean absolute error. // mean_absolute_error = AVG(ABS(label - predicted)) MEAN_ABSOLUTE_ERROR = 1; // Mean squared error. // mean_squared_error = AVG(POW(label - predicted, 2)) MEAN_SQUARED_ERROR = 2; // Mean squared log error. // mean_squared_log_error = AVG(POW(LN(1 + label) - LN(1 + predicted), 2)) MEAN_SQUARED_LOG_ERROR = 3; // Mean absolute error. // median_absolute_error = APPROX_QUANTILES(absolute_error, 2)[OFFSET(1)] MEDIAN_ABSOLUTE_ERROR = 4; // R^2 score. This corresponds to r2_score in ML.EVALUATE. // r_squared = 1 - SUM(squared_error)/(COUNT(label)*VAR_POP(label)) R_SQUARED = 5; // Explained variance. // explained_variance = 1 - VAR_POP(label_error)/VAR_POP(label) EXPLAINED_VARIANCE = 6; // Precision is the fraction of actual positive predictions that had // positive actual labels. For multiclass this is a macro-averaged metric // treating each class as a binary classifier. PRECISION = 7; // Recall is the fraction of actual positive labels that were given a // positive prediction. For multiclass this is a macro-averaged metric. RECALL = 8; // Accuracy is the fraction of predictions given the correct label. For // multiclass this is a globally micro-averaged metric. ACCURACY = 9; // The F1 score is an average of recall and precision. For multiclass this // is a macro-averaged metric. F1_SCORE = 10; // Logorithmic Loss. For multiclass this is a macro-averaged metric. LOG_LOSS = 11; // Area Under an ROC Curve. For multiclass this is a macro-averaged // metric. ROC_AUC = 12; // Davies-Bouldin Index. DAVIES_BOULDIN_INDEX = 13; // Mean Average Precision. MEAN_AVERAGE_PRECISION = 14; // Normalized Discounted Cumulative Gain. NORMALIZED_DISCOUNTED_CUMULATIVE_GAIN = 15; // Average Rank. AVERAGE_RANK = 16; } } // Indicates the learning rate optimization strategy to use. enum LearnRateStrategy { // Default value. LEARN_RATE_STRATEGY_UNSPECIFIED = 0; // Use line search to determine learning rate. LINE_SEARCH = 1; // Use a constant learning rate. CONSTANT = 2; } // Indicates the optimization strategy used for training. enum OptimizationStrategy { // Default value. OPTIMIZATION_STRATEGY_UNSPECIFIED = 0; // Uses an iterative batch gradient descent algorithm. BATCH_GRADIENT_DESCENT = 1; // Uses a normal equation to solve linear regression problem. NORMAL_EQUATION = 2; } // Indicates the training algorithm to use for matrix factorization models. enum FeedbackType { // Default value. FEEDBACK_TYPE_UNSPECIFIED = 0; // Use weighted-als for implicit feedback problems. IMPLICIT = 1; // Use nonweighted-als for explicit feedback problems. EXPLICIT = 2; } // Evaluation metrics for regression and explicit feedback type matrix // factorization models. message RegressionMetrics { // Mean absolute error. google.protobuf.DoubleValue mean_absolute_error = 1; // Mean squared error. google.protobuf.DoubleValue mean_squared_error = 2; // Mean squared log error. google.protobuf.DoubleValue mean_squared_log_error = 3; // Median absolute error. google.protobuf.DoubleValue median_absolute_error = 4; // R^2 score. This corresponds to r2_score in ML.EVALUATE. google.protobuf.DoubleValue r_squared = 5; } // Aggregate metrics for classification/classifier models. For multi-class // models, the metrics are either macro-averaged or micro-averaged. When // macro-averaged, the metrics are calculated for each label and then an // unweighted average is taken of those values. When micro-averaged, the // metric is calculated globally by counting the total number of correctly // predicted rows. message AggregateClassificationMetrics { // Precision is the fraction of actual positive predictions that had // positive actual labels. For multiclass this is a macro-averaged // metric treating each class as a binary classifier. google.protobuf.DoubleValue precision = 1; // Recall is the fraction of actual positive labels that were given a // positive prediction. For multiclass this is a macro-averaged metric. google.protobuf.DoubleValue recall = 2; // Accuracy is the fraction of predictions given the correct label. For // multiclass this is a micro-averaged metric. google.protobuf.DoubleValue accuracy = 3; // Threshold at which the metrics are computed. For binary // classification models this is the positive class threshold. // For multi-class classfication models this is the confidence // threshold. google.protobuf.DoubleValue threshold = 4; // The F1 score is an average of recall and precision. For multiclass // this is a macro-averaged metric. google.protobuf.DoubleValue f1_score = 5; // Logarithmic Loss. For multiclass this is a macro-averaged metric. google.protobuf.DoubleValue log_loss = 6; // Area Under a ROC Curve. For multiclass this is a macro-averaged // metric. google.protobuf.DoubleValue roc_auc = 7; } // Evaluation metrics for binary classification/classifier models. message BinaryClassificationMetrics { // Confusion matrix for binary classification models. message BinaryConfusionMatrix { // Threshold value used when computing each of the following metric. google.protobuf.DoubleValue positive_class_threshold = 1; // Number of true samples predicted as true. google.protobuf.Int64Value true_positives = 2; // Number of false samples predicted as true. google.protobuf.Int64Value false_positives = 3; // Number of true samples predicted as false. google.protobuf.Int64Value true_negatives = 4; // Number of false samples predicted as false. google.protobuf.Int64Value false_negatives = 5; // The fraction of actual positive predictions that had positive actual // labels. google.protobuf.DoubleValue precision = 6; // The fraction of actual positive labels that were given a positive // prediction. google.protobuf.DoubleValue recall = 7; // The equally weighted average of recall and precision. google.protobuf.DoubleValue f1_score = 8; // The fraction of predictions given the correct label. google.protobuf.DoubleValue accuracy = 9; } // Aggregate classification metrics. AggregateClassificationMetrics aggregate_classification_metrics = 1; // Binary confusion matrix at multiple thresholds. repeated BinaryConfusionMatrix binary_confusion_matrix_list = 2; // Label representing the positive class. string positive_label = 3; // Label representing the negative class. string negative_label = 4; } // Evaluation metrics for multi-class classification/classifier models. message MultiClassClassificationMetrics { // Confusion matrix for multi-class classification models. message ConfusionMatrix { // A single entry in the confusion matrix. message Entry { // The predicted label. For confidence_threshold > 0, we will // also add an entry indicating the number of items under the // confidence threshold. string predicted_label = 1; // Number of items being predicted as this label. google.protobuf.Int64Value item_count = 2; } // A single row in the confusion matrix. message Row { // The original label of this row. string actual_label = 1; // Info describing predicted label distribution. repeated Entry entries = 2; } // Confidence threshold used when computing the entries of the // confusion matrix. google.protobuf.DoubleValue confidence_threshold = 1; // One row per actual label. repeated Row rows = 2; } // Aggregate classification metrics. AggregateClassificationMetrics aggregate_classification_metrics = 1; // Confusion matrix at different thresholds. repeated ConfusionMatrix confusion_matrix_list = 2; } // Evaluation metrics for clustering models. message ClusteringMetrics { // Message containing the information about one cluster. message Cluster { // Representative value of a single feature within the cluster. message FeatureValue { // Representative value of a categorical feature. message CategoricalValue { // Represents the count of a single category within the cluster. message CategoryCount { // The name of category. string category = 1; // The count of training samples matching the category within the // cluster. google.protobuf.Int64Value count = 2; } // Counts of all categories for the categorical feature. If there are // more than ten categories, we return top ten (by count) and return // one more CategoryCount with category "_OTHER_" and count as // aggregate counts of remaining categories. repeated CategoryCount category_counts = 1; } // The feature column name. string feature_column = 1; // Value. oneof value { // The numerical feature value. This is the centroid value for this // feature. google.protobuf.DoubleValue numerical_value = 2; // The categorical feature value. CategoricalValue categorical_value = 3; } } // Centroid id. int64 centroid_id = 1; // Values of highly variant features for this cluster. repeated FeatureValue feature_values = 2; // Count of training data rows that were assigned to this cluster. google.protobuf.Int64Value count = 3; } // Davies-Bouldin index. google.protobuf.DoubleValue davies_bouldin_index = 1; // Mean of squared distances between each sample to its cluster centroid. google.protobuf.DoubleValue mean_squared_distance = 2; // Information for all clusters. repeated Cluster clusters = 3; } // Evaluation metrics used by weighted-ALS models specified by // feedback_type=implicit. message RankingMetrics { // Calculates a precision per user for all the items by ranking them and // then averages all the precisions across all the users. google.protobuf.DoubleValue mean_average_precision = 1; // Similar to the mean squared error computed in regression and explicit // recommendation models except instead of computing the rating directly, // the output from evaluate is computed against a preference which is 1 or 0 // depending on if the rating exists or not. google.protobuf.DoubleValue mean_squared_error = 2; // A metric to determine the goodness of a ranking calculated from the // predicted confidence by comparing it to an ideal rank measured by the // original ratings. google.protobuf.DoubleValue normalized_discounted_cumulative_gain = 3; // Determines the goodness of a ranking by computing the percentile rank // from the predicted confidence and dividing it by the original rank. google.protobuf.DoubleValue average_rank = 4; } // Model evaluation metrics for ARIMA forecasting models. message ArimaForecastingMetrics { // Model evaluation metrics for a single ARIMA forecasting model. message ArimaSingleModelForecastingMetrics { // Non-seasonal order. ArimaOrder non_seasonal_order = 1; // Arima fitting metrics. ArimaFittingMetrics arima_fitting_metrics = 2; // Is arima model fitted with drift or not. It is always false when d // is not 1. google.protobuf.BoolValue has_drift = 3; // The time_series_id value for this time series. It will be one of // the unique values from the time_series_id_column specified during // ARIMA model training. Only present when time_series_id_column // training option was used. string time_series_id = 4; // The tuple of time_series_ids identifying this time series. It will // be one of the unique tuples of values present in the // time_series_id_columns specified during ARIMA model training. Only // present when time_series_id_columns training option was used and // the order of values here are same as the order of // time_series_id_columns. repeated string time_series_ids = 9; // Seasonal periods. Repeated because multiple periods are supported // for one time series. repeated SeasonalPeriod.SeasonalPeriodType seasonal_periods = 5; // If true, holiday_effect is a part of time series decomposition result. google.protobuf.BoolValue has_holiday_effect = 6; // If true, spikes_and_dips is a part of time series decomposition result. google.protobuf.BoolValue has_spikes_and_dips = 7; // If true, step_changes is a part of time series decomposition result. google.protobuf.BoolValue has_step_changes = 8; } // Repeated as there can be many metric sets (one for each model) in // auto-arima and the large-scale case. repeated ArimaSingleModelForecastingMetrics arima_single_model_forecasting_metrics = 6; } // Model evaluation metrics for dimensionality reduction models. message DimensionalityReductionMetrics { // Total percentage of variance explained by the selected principal // components. google.protobuf.DoubleValue total_explained_variance_ratio = 1; } // Evaluation metrics of a model. These are either computed on all training // data or just the eval data based on whether eval data was used during // training. These are not present for imported models. message EvaluationMetrics { // Metrics. oneof metrics { // Populated for regression models and explicit feedback type matrix // factorization models. RegressionMetrics regression_metrics = 1; // Populated for binary classification/classifier models. BinaryClassificationMetrics binary_classification_metrics = 2; // Populated for multi-class classification/classifier models. MultiClassClassificationMetrics multi_class_classification_metrics = 3; // Populated for clustering models. ClusteringMetrics clustering_metrics = 4; // Populated for implicit feedback type matrix factorization models. RankingMetrics ranking_metrics = 5; // Populated for ARIMA models. ArimaForecastingMetrics arima_forecasting_metrics = 6; // Evaluation metrics when the model is a dimensionality reduction model, // which currently includes PCA. DimensionalityReductionMetrics dimensionality_reduction_metrics = 7; } } // Data split result. This contains references to the training and evaluation // data tables that were used to train the model. message DataSplitResult { // Table reference of the training data after split. TableReference training_table = 1; // Table reference of the evaluation data after split. TableReference evaluation_table = 2; // Table reference of the test data after split. TableReference test_table = 3; } // Arima order, can be used for both non-seasonal and seasonal parts. message ArimaOrder { // Order of the autoregressive part. google.protobuf.Int64Value p = 1; // Order of the differencing part. google.protobuf.Int64Value d = 2; // Order of the moving-average part. google.protobuf.Int64Value q = 3; } // ARIMA model fitting metrics. message ArimaFittingMetrics { // Log-likelihood. google.protobuf.DoubleValue log_likelihood = 1; // AIC. google.protobuf.DoubleValue aic = 2; // Variance. google.protobuf.DoubleValue variance = 3; } // Global explanations containing the top most important features // after training. message GlobalExplanation { // Explanation for a single feature. message Explanation { // The full feature name. For non-numerical features, will be formatted // like `.`. Overall size of feature // name will always be truncated to first 120 characters. string feature_name = 1; // Attribution of feature. google.protobuf.DoubleValue attribution = 2; } // A list of the top global explanations. Sorted by absolute value of // attribution in descending order. repeated Explanation explanations = 1; // Class label for this set of global explanations. Will be empty/null for // binary logistic and linear regression models. Sorted alphabetically in // descending order. string class_label = 2; } // Encoding methods for categorical features. message CategoryEncodingMethod { // Supported encoding methods for categorical features. enum EncodingMethod { // Unspecified encoding method. ENCODING_METHOD_UNSPECIFIED = 0; // Applies one-hot encoding. ONE_HOT_ENCODING = 1; // Applies label encoding. LABEL_ENCODING = 2; // Applies dummy encoding. DUMMY_ENCODING = 3; } } // PCA solver options. message PcaSolverOptionEnums { // Enums for supported PCA solvers. enum PcaSolver { // Default value. UNSPECIFIED = 0; // Full eigen-decoposition. FULL = 1; // Randomized SVD. RANDOMIZED = 2; // Auto. AUTO = 3; } } // Model registry options. message ModelRegistryOptionEnums { // Enums for supported model registries. enum ModelRegistry { // Default value. MODEL_REGISTRY_UNSPECIFIED = 0; // Vertex AI. VERTEX_AI = 1; } } // Information about a single training query run for the model. message TrainingRun { // Options used in model training. message TrainingOptions { // The maximum number of iterations in training. Used only for iterative // training algorithms. int64 max_iterations = 1; // Type of loss function used during training run. LossType loss_type = 2; // Learning rate in training. Used only for iterative training algorithms. double learn_rate = 3; // L1 regularization coefficient. google.protobuf.DoubleValue l1_regularization = 4; // L2 regularization coefficient. google.protobuf.DoubleValue l2_regularization = 5; // When early_stop is true, stops training when accuracy improvement is // less than 'min_relative_progress'. Used only for iterative training // algorithms. google.protobuf.DoubleValue min_relative_progress = 6; // Whether to train a model from the last checkpoint. google.protobuf.BoolValue warm_start = 7; // Whether to stop early when the loss doesn't improve significantly // any more (compared to min_relative_progress). Used only for iterative // training algorithms. google.protobuf.BoolValue early_stop = 8; // Name of input label columns in training data. repeated string input_label_columns = 9; // The data split type for training and evaluation, e.g. RANDOM. DataSplitMethod data_split_method = 10; // The fraction of evaluation data over the whole input data. The rest // of data will be used as training data. The format should be double. // Accurate to two decimal places. // Default value is 0.2. double data_split_eval_fraction = 11; // The column to split data with. This column won't be used as a // feature. // 1. When data_split_method is CUSTOM, the corresponding column should // be boolean. The rows with true value tag are eval data, and the false // are training data. // 2. When data_split_method is SEQ, the first DATA_SPLIT_EVAL_FRACTION // rows (from smallest to largest) in the corresponding column are used // as training data, and the rest are eval data. It respects the order // in Orderable data types: // https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data-type-properties string data_split_column = 12; // The strategy to determine learn rate for the current iteration. LearnRateStrategy learn_rate_strategy = 13; // Specifies the initial learning rate for the line search learn rate // strategy. double initial_learn_rate = 16; // Weights associated with each label class, for rebalancing the // training data. Only applicable for classification models. map label_class_weights = 17; // User column specified for matrix factorization models. string user_column = 18; // Item column specified for matrix factorization models. string item_column = 19; // Distance type for clustering models. DistanceType distance_type = 20; // Number of clusters for clustering models. int64 num_clusters = 21; // Google Cloud Storage URI from which the model was imported. Only // applicable for imported models. string model_uri = 22; // Optimization strategy for training linear regression models. OptimizationStrategy optimization_strategy = 23; // Hidden units for dnn models. repeated int64 hidden_units = 24; // Batch size for dnn models. int64 batch_size = 25; // Dropout probability for dnn models. google.protobuf.DoubleValue dropout = 26; // Maximum depth of a tree for boosted tree models. int64 max_tree_depth = 27; // Subsample fraction of the training data to grow tree to prevent // overfitting for boosted tree models. double subsample = 28; // Minimum split loss for boosted tree models. google.protobuf.DoubleValue min_split_loss = 29; // Booster type for boosted tree models. BoostedTreeOptionEnums.BoosterType booster_type = 60; // Number of parallel trees constructed during each iteration for boosted // tree models. google.protobuf.Int64Value num_parallel_tree = 61; // Type of normalization algorithm for boosted tree models using // dart booster. BoostedTreeOptionEnums.DartNormalizeType dart_normalize_type = 62; // Tree construction algorithm for boosted tree models. BoostedTreeOptionEnums.TreeMethod tree_method = 63; // Minimum sum of instance weight needed in a child for boosted tree // models. google.protobuf.Int64Value min_tree_child_weight = 64; // Subsample ratio of columns when constructing each tree for boosted tree // models. google.protobuf.DoubleValue colsample_bytree = 65; // Subsample ratio of columns for each level for boosted tree models. google.protobuf.DoubleValue colsample_bylevel = 66; // Subsample ratio of columns for each node(split) for boosted tree // models. google.protobuf.DoubleValue colsample_bynode = 67; // Num factors specified for matrix factorization models. int64 num_factors = 30; // Feedback type that specifies which algorithm to run for matrix // factorization. FeedbackType feedback_type = 31; // Hyperparameter for matrix factoration when implicit feedback type is // specified. google.protobuf.DoubleValue wals_alpha = 32; // The method used to initialize the centroids for kmeans algorithm. KmeansEnums.KmeansInitializationMethod kmeans_initialization_method = 33; // The column used to provide the initial centroids for kmeans algorithm // when kmeans_initialization_method is CUSTOM. string kmeans_initialization_column = 34; // Column to be designated as time series timestamp for ARIMA model. string time_series_timestamp_column = 35; // Column to be designated as time series data for ARIMA model. string time_series_data_column = 36; // Whether to enable auto ARIMA or not. google.protobuf.BoolValue auto_arima = 37; // A specification of the non-seasonal part of the ARIMA model: the three // components (p, d, q) are the AR order, the degree of differencing, and // the MA order. ArimaOrder non_seasonal_order = 38; // The data frequency of a time series. DataFrequency data_frequency = 39; // Whether or not p-value test should be computed for this model. Only // available for linear and logistic regression models. google.protobuf.BoolValue calculate_p_values = 40; // Include drift when fitting an ARIMA model. google.protobuf.BoolValue include_drift = 41; // The geographical region based on which the holidays are considered in // time series modeling. If a valid value is specified, then holiday // effects modeling is enabled. HolidayRegion holiday_region = 42; // A list of geographical regions that are used for time series modeling. repeated HolidayRegion holiday_regions = 71; // The time series id column that was used during ARIMA model training. string time_series_id_column = 43; // The time series id columns that were used during ARIMA model training. repeated string time_series_id_columns = 51; // The number of periods ahead that need to be forecasted. int64 horizon = 44; // The max value of the sum of non-seasonal p and q. int64 auto_arima_max_order = 46; // The min value of the sum of non-seasonal p and q. int64 auto_arima_min_order = 83; // Number of trials to run this hyperparameter tuning job. int64 num_trials = 47; // Maximum number of trials to run in parallel. int64 max_parallel_trials = 48; // The target evaluation metrics to optimize the hyperparameters for. repeated HparamTuningEnums.HparamTuningObjective hparam_tuning_objectives = 54; // If true, perform decompose time series and save the results. google.protobuf.BoolValue decompose_time_series = 50; // If true, clean spikes and dips in the input time series. google.protobuf.BoolValue clean_spikes_and_dips = 52; // If true, detect step changes and make data adjustment in the input time // series. google.protobuf.BoolValue adjust_step_changes = 53; // If true, enable global explanation during training. google.protobuf.BoolValue enable_global_explain = 55; // Number of paths for the sampled Shapley explain method. int64 sampled_shapley_num_paths = 56; // Number of integral steps for the integrated gradients explain method. int64 integrated_gradients_num_steps = 57; // Categorical feature encoding method. CategoryEncodingMethod.EncodingMethod category_encoding_method = 58; // Based on the selected TF version, the corresponding docker image is // used to train external models. string tf_version = 70; // Enums for color space, used for processing images in Object Table. // See more details at // https://www.tensorflow.org/io/tutorials/colorspace. ColorSpace color_space = 72; // Name of the instance weight column for training data. // This column isn't be used as a feature. string instance_weight_column = 73; // Smoothing window size for the trend component. When a positive value is // specified, a center moving average smoothing is applied on the history // trend. When the smoothing window is out of the boundary at the // beginning or the end of the trend, the first element or the last // element is padded to fill the smoothing window before the average is // applied. int64 trend_smoothing_window_size = 74; // The fraction of the interpolated length of the time series that's used // to model the time series trend component. All of the time points of the // time series are used to model the non-trend component. This training // option accelerates modeling training without sacrificing much // forecasting accuracy. You can use this option with // `minTimeSeriesLength` but not with `maxTimeSeriesLength`. double time_series_length_fraction = 75; // The minimum number of time points in a time series that are used in // modeling the trend component of the time series. If you use this option // you must also set the `timeSeriesLengthFraction` option. This training // option ensures that enough time points are available when you use // `timeSeriesLengthFraction` in trend modeling. This is particularly // important when forecasting multiple time series in a single query using // `timeSeriesIdColumn`. If the total number of time points is less than // the `minTimeSeriesLength` value, then the query uses all available time // points. int64 min_time_series_length = 76; // The maximum number of time points in a time series that can be used in // modeling the trend component of the time series. Don't use this option // with the `timeSeriesLengthFraction` or `minTimeSeriesLength` options. int64 max_time_series_length = 77; // User-selected XGBoost versions for training of XGBoost models. string xgboost_version = 78; // Whether to use approximate feature contribution method in XGBoost model // explanation for global explain. google.protobuf.BoolValue approx_global_feature_contrib = 84; // Whether the model should include intercept during model training. google.protobuf.BoolValue fit_intercept = 85; // Number of principal components to keep in the PCA model. Must be <= the // number of features. int64 num_principal_components = 86; // The minimum ratio of cumulative explained variance that needs to be // given by the PCA model. double pca_explained_variance_ratio = 87; // If true, scale the feature values by dividing the feature standard // deviation. Currently only apply to PCA. google.protobuf.BoolValue scale_features = 88; // The solver for PCA. PcaSolverOptionEnums.PcaSolver pca_solver = 89; // Whether to calculate class weights automatically based on the // popularity of each label. google.protobuf.BoolValue auto_class_weights = 90; // Activation function of the neural nets. string activation_fn = 91; // Optimizer used for training the neural nets. string optimizer = 92; // Budget in hours for AutoML training. double budget_hours = 93; // Whether to standardize numerical features. Default to true. google.protobuf.BoolValue standardize_features = 94; // L1 regularization coefficient to activations. double l1_reg_activation = 95; // The model registry. ModelRegistryOptionEnums.ModelRegistry model_registry = 96; // The version aliases to apply in Vertex AI model registry. Always // overwrite if the version aliases exists in a existing model. repeated string vertex_ai_model_version_aliases = 97; } // Information about a single iteration of the training run. message IterationResult { // Information about a single cluster for clustering model. message ClusterInfo { // Centroid id. int64 centroid_id = 1; // Cluster radius, the average distance from centroid // to each point assigned to the cluster. google.protobuf.DoubleValue cluster_radius = 2; // Cluster size, the total number of points assigned to the cluster. google.protobuf.Int64Value cluster_size = 3; } // (Auto-)arima fitting result. Wrap everything in ArimaResult for easier // refactoring if we want to use model-specific iteration results. message ArimaResult { // Arima coefficients. message ArimaCoefficients { // Auto-regressive coefficients, an array of double. repeated double auto_regressive_coefficients = 1; // Moving-average coefficients, an array of double. repeated double moving_average_coefficients = 2; // Intercept coefficient, just a double not an array. google.protobuf.DoubleValue intercept_coefficient = 3; } // Arima model information. message ArimaModelInfo { // Non-seasonal order. ArimaOrder non_seasonal_order = 1; // Arima coefficients. ArimaCoefficients arima_coefficients = 2; // Arima fitting metrics. ArimaFittingMetrics arima_fitting_metrics = 3; // Whether Arima model fitted with drift or not. It is always false // when d is not 1. google.protobuf.BoolValue has_drift = 4; // The time_series_id value for this time series. It will be one of // the unique values from the time_series_id_column specified during // ARIMA model training. Only present when time_series_id_column // training option was used. string time_series_id = 5; // The tuple of time_series_ids identifying this time series. It will // be one of the unique tuples of values present in the // time_series_id_columns specified during ARIMA model training. Only // present when time_series_id_columns training option was used and // the order of values here are same as the order of // time_series_id_columns. repeated string time_series_ids = 10; // Seasonal periods. Repeated because multiple periods are supported // for one time series. repeated SeasonalPeriod.SeasonalPeriodType seasonal_periods = 6; // If true, holiday_effect is a part of time series decomposition // result. google.protobuf.BoolValue has_holiday_effect = 7; // If true, spikes_and_dips is a part of time series decomposition // result. google.protobuf.BoolValue has_spikes_and_dips = 8; // If true, step_changes is a part of time series decomposition // result. google.protobuf.BoolValue has_step_changes = 9; } // This message is repeated because there are multiple arima models // fitted in auto-arima. For non-auto-arima model, its size is one. repeated ArimaModelInfo arima_model_info = 1; // Seasonal periods. Repeated because multiple periods are supported for // one time series. repeated SeasonalPeriod.SeasonalPeriodType seasonal_periods = 2; } // Principal component infos, used only for eigen decomposition based // models, e.g., PCA. Ordered by explained_variance in the descending // order. message PrincipalComponentInfo { // Id of the principal component. google.protobuf.Int64Value principal_component_id = 1; // Explained variance by this principal component, which is simply the // eigenvalue. google.protobuf.DoubleValue explained_variance = 2; // Explained_variance over the total explained variance. google.protobuf.DoubleValue explained_variance_ratio = 3; // The explained_variance is pre-ordered in the descending order to // compute the cumulative explained variance ratio. google.protobuf.DoubleValue cumulative_explained_variance_ratio = 4; } // Index of the iteration, 0 based. google.protobuf.Int32Value index = 1; // Time taken to run the iteration in milliseconds. google.protobuf.Int64Value duration_ms = 4; // Loss computed on the training data at the end of iteration. google.protobuf.DoubleValue training_loss = 5; // Loss computed on the eval data at the end of iteration. google.protobuf.DoubleValue eval_loss = 6; // Learn rate used for this iteration. double learn_rate = 7; // Information about top clusters for clustering models. repeated ClusterInfo cluster_infos = 8; // Arima result. ArimaResult arima_result = 9; // The information of the principal components. repeated PrincipalComponentInfo principal_component_infos = 10; } // Output only. Options that were used for this training run, includes // user specified and default options that were used. TrainingOptions training_options = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. The start time of this training run. google.protobuf.Timestamp start_time = 8 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Output of each iteration run, results.size() <= // max_iterations. repeated IterationResult results = 6 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. The evaluation metrics over training/eval data that were // computed at the end of training. EvaluationMetrics evaluation_metrics = 7 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Data split result of the training run. Only set when the // input data is actually split. DataSplitResult data_split_result = 9 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Global explanation contains the explanation of top features // on the model level. Applies to both regression and classification models. GlobalExplanation model_level_global_explanation = 11 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Global explanation contains the explanation of top features // on the class level. Applies to classification models only. repeated GlobalExplanation class_level_global_explanations = 12 [(google.api.field_behavior) = OUTPUT_ONLY]; // The model id in the [Vertex AI Model // Registry](https://cloud.google.com/vertex-ai/docs/model-registry/introduction) // for this training run. string vertex_ai_model_id = 14; // Output only. The model version in the [Vertex AI Model // Registry](https://cloud.google.com/vertex-ai/docs/model-registry/introduction) // for this training run. string vertex_ai_model_version = 15 [(google.api.field_behavior) = OUTPUT_ONLY]; } // Search space for a double hyperparameter. message DoubleHparamSearchSpace { // Range of a double hyperparameter. message DoubleRange { // Min value of the double parameter. google.protobuf.DoubleValue min = 1; // Max value of the double parameter. google.protobuf.DoubleValue max = 2; } // Discrete candidates of a double hyperparameter. message DoubleCandidates { // Candidates for the double parameter in increasing order. repeated google.protobuf.DoubleValue candidates = 1; } // Search space. oneof search_space { // Range of the double hyperparameter. DoubleRange range = 1; // Candidates of the double hyperparameter. DoubleCandidates candidates = 2; } } // Search space for an int hyperparameter. message IntHparamSearchSpace { // Range of an int hyperparameter. message IntRange { // Min value of the int parameter. google.protobuf.Int64Value min = 1; // Max value of the int parameter. google.protobuf.Int64Value max = 2; } // Discrete candidates of an int hyperparameter. message IntCandidates { // Candidates for the int parameter in increasing order. repeated google.protobuf.Int64Value candidates = 1; } // Search space. oneof search_space { // Range of the int hyperparameter. IntRange range = 1; // Candidates of the int hyperparameter. IntCandidates candidates = 2; } } // Search space for string and enum. message StringHparamSearchSpace { // Canididates for the string or enum parameter in lower case. repeated string candidates = 1; } // Search space for int array. message IntArrayHparamSearchSpace { // An array of int. message IntArray { // Elements in the int array. repeated int64 elements = 1; } // Candidates for the int array parameter. repeated IntArray candidates = 1; } // Hyperparameter search spaces. // These should be a subset of training_options. message HparamSearchSpaces { // Learning rate of training jobs. DoubleHparamSearchSpace learn_rate = 2; // L1 regularization coefficient. DoubleHparamSearchSpace l1_reg = 3; // L2 regularization coefficient. DoubleHparamSearchSpace l2_reg = 4; // Number of clusters for k-means. IntHparamSearchSpace num_clusters = 26; // Number of latent factors to train on. IntHparamSearchSpace num_factors = 31; // Hidden units for neural network models. IntArrayHparamSearchSpace hidden_units = 34; // Mini batch sample size. IntHparamSearchSpace batch_size = 37; // Dropout probability for dnn model training and boosted tree models // using dart booster. DoubleHparamSearchSpace dropout = 38; // Maximum depth of a tree for boosted tree models. IntHparamSearchSpace max_tree_depth = 41; // Subsample the training data to grow tree to prevent overfitting for // boosted tree models. DoubleHparamSearchSpace subsample = 42; // Minimum split loss for boosted tree models. DoubleHparamSearchSpace min_split_loss = 43; // Hyperparameter for matrix factoration when implicit feedback type is // specified. DoubleHparamSearchSpace wals_alpha = 49; // Booster type for boosted tree models. StringHparamSearchSpace booster_type = 56; // Number of parallel trees for boosted tree models. IntHparamSearchSpace num_parallel_tree = 57; // Dart normalization type for boosted tree models. StringHparamSearchSpace dart_normalize_type = 58; // Tree construction algorithm for boosted tree models. StringHparamSearchSpace tree_method = 59; // Minimum sum of instance weight needed in a child for boosted tree models. IntHparamSearchSpace min_tree_child_weight = 60; // Subsample ratio of columns when constructing each tree for boosted tree // models. DoubleHparamSearchSpace colsample_bytree = 61; // Subsample ratio of columns for each level for boosted tree models. DoubleHparamSearchSpace colsample_bylevel = 62; // Subsample ratio of columns for each node(split) for boosted tree models. DoubleHparamSearchSpace colsample_bynode = 63; // Activation functions of neural network models. StringHparamSearchSpace activation_fn = 67; // Optimizer of TF models. StringHparamSearchSpace optimizer = 68; } // Training info of a trial in [hyperparameter // tuning](https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-hp-tuning-overview) // models. message HparamTuningTrial { // Current status of the trial. enum TrialStatus { // Default value. TRIAL_STATUS_UNSPECIFIED = 0; // Scheduled but not started. NOT_STARTED = 1; // Running state. RUNNING = 2; // The trial succeeded. SUCCEEDED = 3; // The trial failed. FAILED = 4; // The trial is infeasible due to the invalid params. INFEASIBLE = 5; // Trial stopped early because it's not promising. STOPPED_EARLY = 6; } // 1-based index of the trial. int64 trial_id = 1; // Starting time of the trial. int64 start_time_ms = 2; // Ending time of the trial. int64 end_time_ms = 3; // The hyperprameters selected for this trial. TrainingRun.TrainingOptions hparams = 4; // Evaluation metrics of this trial calculated on the test data. // Empty in Job API. EvaluationMetrics evaluation_metrics = 5; // The status of the trial. TrialStatus status = 6; // Error message for FAILED and INFEASIBLE trial. string error_message = 7; // Loss computed on the training data at the end of trial. google.protobuf.DoubleValue training_loss = 8; // Loss computed on the eval data at the end of trial. google.protobuf.DoubleValue eval_loss = 9; // Hyperparameter tuning evaluation metrics of this trial calculated on the // eval data. Unlike evaluation_metrics, only the fields corresponding to // the hparam_tuning_objectives are set. EvaluationMetrics hparam_tuning_evaluation_metrics = 10; } // Output only. A hash of this resource. string etag = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; // Required. Unique identifier for this model. ModelReference model_reference = 2 [(google.api.field_behavior) = REQUIRED]; // Output only. The time when this model was created, in millisecs since the // epoch. int64 creation_time = 5 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. The time when this model was last modified, in millisecs since // the epoch. int64 last_modified_time = 6 [(google.api.field_behavior) = OUTPUT_ONLY]; // Optional. A user-friendly description of this model. string description = 12 [(google.api.field_behavior) = OPTIONAL]; // Optional. A descriptive name for this model. string friendly_name = 14 [(google.api.field_behavior) = OPTIONAL]; // The labels associated with this model. You can use these to organize // and group your models. Label keys and values can be no longer // than 63 characters, can only contain lowercase letters, numeric // characters, underscores and dashes. International characters are allowed. // Label values are optional. Label keys must start with a letter and each // label in the list must have a different key. map labels = 15; // Optional. The time when this model expires, in milliseconds since the // epoch. If not present, the model will persist indefinitely. Expired models // will be deleted and their storage reclaimed. The defaultTableExpirationMs // property of the encapsulating dataset can be used to set a default // expirationTime on newly created models. int64 expiration_time = 16 [(google.api.field_behavior) = OPTIONAL]; // Output only. The geographic location where the model resides. This value // is inherited from the dataset. string location = 13 [(google.api.field_behavior) = OUTPUT_ONLY]; // Custom encryption configuration (e.g., Cloud KMS keys). This shows the // encryption configuration of the model data while stored in BigQuery // storage. This field can be used with PatchModel to update encryption key // for an already encrypted model. EncryptionConfiguration encryption_configuration = 17; // Output only. Type of the model resource. ModelType model_type = 7 [(google.api.field_behavior) = OUTPUT_ONLY]; // Information for all training runs in increasing order of start_time. repeated TrainingRun training_runs = 9; // Output only. Input feature columns for the model inference. If the model is // trained with TRANSFORM clause, these are the input of the TRANSFORM clause. repeated StandardSqlField feature_columns = 10 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Label columns that were used to train this model. // The output of the model will have a "predicted_" prefix to these columns. repeated StandardSqlField label_columns = 11 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. This field will be populated if a TRANSFORM clause was used to // train a model. TRANSFORM clause (if used) takes feature_columns as input // and outputs transform_columns. transform_columns then are used to train the // model. repeated TransformColumn transform_columns = 26 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. All hyperparameter search spaces in this model. HparamSearchSpaces hparam_search_spaces = 18 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. The default trial_id to use in TVFs when the trial_id is not // passed in. For single-objective [hyperparameter // tuning](https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-hp-tuning-overview) // models, this is the best trial ID. For multi-objective [hyperparameter // tuning](https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-hp-tuning-overview) // models, this is the smallest trial ID among all Pareto optimal trials. int64 default_trial_id = 21 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Trials of a [hyperparameter // tuning](https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-hp-tuning-overview) // model sorted by trial_id. repeated HparamTuningTrial hparam_trials = 20 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. For single-objective [hyperparameter // tuning](https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-hp-tuning-overview) // models, it only contains the best trial. For multi-objective // [hyperparameter // tuning](https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-hp-tuning-overview) // models, it contains all Pareto optimal trials sorted by trial_id. repeated int64 optimal_trial_ids = 22 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Remote model info RemoteModelInfo remote_model_info = 25 [(google.api.field_behavior) = OUTPUT_ONLY]; } // Request format for getting information about a BigQuery ML model. message GetModelRequest { // Required. Project ID of the requested model. string project_id = 1 [(google.api.field_behavior) = REQUIRED]; // Required. Dataset ID of the requested model. string dataset_id = 2 [(google.api.field_behavior) = REQUIRED]; // Required. Model ID of the requested model. string model_id = 3 [(google.api.field_behavior) = REQUIRED]; } message PatchModelRequest { // Required. Project ID of the model to patch. string project_id = 1 [(google.api.field_behavior) = REQUIRED]; // Required. Dataset ID of the model to patch. string dataset_id = 2 [(google.api.field_behavior) = REQUIRED]; // Required. Model ID of the model to patch. string model_id = 3 [(google.api.field_behavior) = REQUIRED]; // Required. Patched model. // Follows RFC5789 patch semantics. Missing fields are not updated. // To clear a field, explicitly set to default value. Model model = 4 [(google.api.field_behavior) = REQUIRED]; } // Request format for deleting BigQuery ML models. message DeleteModelRequest { // Required. Project ID of the model to delete. string project_id = 1 [(google.api.field_behavior) = REQUIRED]; // Required. Dataset ID of the model to delete. string dataset_id = 2 [(google.api.field_behavior) = REQUIRED]; // Required. Model ID of the model to delete. string model_id = 3 [(google.api.field_behavior) = REQUIRED]; } // Request format for listing BigQuery ML models. message ListModelsRequest { // Required. Project ID of the models to list. string project_id = 1 [(google.api.field_behavior) = REQUIRED]; // Required. Dataset ID of the models to list. string dataset_id = 2 [(google.api.field_behavior) = REQUIRED]; // The maximum number of results to return in a single response page. // Leverage the page tokens to iterate through the entire collection. google.protobuf.UInt32Value max_results = 3; // Page token, returned by a previous call to request the next page of // results string page_token = 4; } // Response format for a single page when listing BigQuery ML models. message ListModelsResponse { // Models in the requested dataset. Only the following fields are populated: // model_reference, model_type, creation_time, last_modified_time and // labels. repeated Model models = 1; // A token to request the next page of results. string next_page_token = 2; }