// Copyright 2021 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto3"; package google.cloud.aiplatform.v1; import "google/api/resource.proto"; import "google/cloud/aiplatform/v1/io.proto"; import "google/api/annotations.proto"; option csharp_namespace = "Google.Cloud.AIPlatform.V1"; option go_package = "google.golang.org/genproto/googleapis/cloud/aiplatform/v1;aiplatform"; option java_multiple_files = true; option java_outer_classname = "ModelMonitoringProto"; option java_package = "com.google.cloud.aiplatform.v1"; option php_namespace = "Google\\Cloud\\AIPlatform\\V1"; option ruby_package = "Google::Cloud::AIPlatform::V1"; // Next ID: 6 message ModelMonitoringObjectiveConfig { // Training Dataset information. message TrainingDataset { oneof data_source { // The resource name of the Dataset used to train this Model. string dataset = 3 [(google.api.resource_reference) = { type: "aiplatform.googleapis.com/Dataset" }]; // The Google Cloud Storage uri of the unmanaged Dataset used to train // this Model. GcsSource gcs_source = 4; // The BigQuery table of the unmanaged Dataset used to train this // Model. BigQuerySource bigquery_source = 5; } // Data format of the dataset, only applicable if the input is from // Google Cloud Storage. // The possible formats are: // // "tf-record" // The source file is a TFRecord file. // // "csv" // The source file is a CSV file. string data_format = 2; // The target field name the model is to predict. // This field will be excluded when doing Predict and (or) Explain for the // training data. string target_field = 6; // Strategy to sample data from Training Dataset. // If not set, we process the whole dataset. SamplingStrategy logging_sampling_strategy = 7; } // The config for Training & Prediction data skew detection. It specifies the // training dataset sources and the skew detection parameters. message TrainingPredictionSkewDetectionConfig { // Key is the feature name and value is the threshold. If a feature needs to // be monitored for skew, a value threshold must be configured for that // feature. The threshold here is against feature distribution distance // between the training and prediction feature. map skew_thresholds = 1; // Key is the feature name and value is the threshold. The threshold here is // against attribution score distance between the training and prediction // feature. map attribution_score_skew_thresholds = 2; } // The config for Prediction data drift detection. message PredictionDriftDetectionConfig { // Key is the feature name and value is the threshold. If a feature needs to // be monitored for drift, a value threshold must be configured for that // feature. The threshold here is against feature distribution distance // between different time windws. map drift_thresholds = 1; // Key is the feature name and value is the threshold. The threshold here is // against attribution score distance between different time windows. map attribution_score_drift_thresholds = 2; } // The config for integrated with Explainable AI. Only applicable if the Model // has explanation_spec populated. message ExplanationConfig { // Output from [BatchPredictionJob][google.cloud.aiplatform.v1.BatchPredictionJob] for Model Monitoring baseline dataset, // which can be used to generate baseline attribution scores. message ExplanationBaseline { // The storage format of the predictions generated BatchPrediction job. enum PredictionFormat { // Should not be set. PREDICTION_FORMAT_UNSPECIFIED = 0; // Predictions are in JSONL files. JSONL = 2; // Predictions are in BigQuery. BIGQUERY = 3; } // The configuration specifying of BatchExplain job output. This can be // used to generate the baseline of feature attribution scores. oneof destination { // Cloud Storage location for BatchExplain output. GcsDestination gcs = 2; // BigQuery location for BatchExplain output. BigQueryDestination bigquery = 3; } // The storage format of the predictions generated BatchPrediction job. PredictionFormat prediction_format = 1; } // If want to analyze the Explainable AI feature attribute scores or not. // If set to true, Vertex AI will log the feature attributions from // explain response and do the skew/drift detection for them. bool enable_feature_attributes = 1; // Predictions generated by the BatchPredictionJob using baseline dataset. ExplanationBaseline explanation_baseline = 2; } // Training dataset for models. This field has to be set only if // TrainingPredictionSkewDetectionConfig is specified. TrainingDataset training_dataset = 1; // The config for skew between training data and prediction data. TrainingPredictionSkewDetectionConfig training_prediction_skew_detection_config = 2; // The config for drift of prediction data. PredictionDriftDetectionConfig prediction_drift_detection_config = 3; // The config for integrated with Explainable AI. ExplanationConfig explanation_config = 5; } // Next ID: 2 message ModelMonitoringAlertConfig { // The config for email alert. message EmailAlertConfig { // The email addresses to send the alert. repeated string user_emails = 1; } oneof alert { // Email alert config. EmailAlertConfig email_alert_config = 1; } } // The config for feature monitoring threshold. // Next ID: 3 message ThresholdConfig { oneof threshold { // Specify a threshold value that can trigger the alert. // If this threshold config is for feature distribution distance: // 1. For categorical feature, the distribution distance is calculated by // L-inifinity norm. // 2. For numerical feature, the distribution distance is calculated by // Jensen–Shannon divergence. // Each feature must have a non-zero threshold if they need to be monitored. // Otherwise no alert will be triggered for that feature. double value = 1; } } // Sampling Strategy for logging, can be for both training and prediction // dataset. // Next ID: 2 message SamplingStrategy { // Requests are randomly selected. message RandomSampleConfig { // Sample rate (0, 1] double sample_rate = 1; } // Random sample config. Will support more sampling strategies later. RandomSampleConfig random_sample_config = 1; }