// Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto3"; package google.cloud.aiplatform.v1beta1; import "google/api/resource.proto"; import "google/cloud/aiplatform/v1beta1/io.proto"; option csharp_namespace = "Google.Cloud.AIPlatform.V1Beta1"; option go_package = "cloud.google.com/go/aiplatform/apiv1beta1/aiplatformpb;aiplatformpb"; option java_multiple_files = true; option java_outer_classname = "ModelMonitoringProto"; option java_package = "com.google.cloud.aiplatform.v1beta1"; option php_namespace = "Google\\Cloud\\AIPlatform\\V1beta1"; option ruby_package = "Google::Cloud::AIPlatform::V1beta1"; option (google.api.resource_definition) = { type: "monitoring.googleapis.com/NotificationChannel" pattern: "projects/{project}/notificationChannels/{notification_channel}" }; // The model monitoring configuration used for Batch Prediction Job. message ModelMonitoringConfig { // Model monitoring objective config. repeated ModelMonitoringObjectiveConfig objective_configs = 3; // Model monitoring alert config. ModelMonitoringAlertConfig alert_config = 2; // YAML schema file uri in Cloud Storage describing the format of a single // instance that you want Tensorflow Data Validation (TFDV) to analyze. // // If there are any data type differences between predict instance and TFDV // instance, this field can be used to override the schema. // For models trained with Vertex AI, this field must be set as all the // fields in predict instance formatted as string. string analysis_instance_schema_uri = 4; // A Google Cloud Storage location for batch prediction model monitoring to // dump statistics and anomalies. // If not provided, a folder will be created in customer project to hold // statistics and anomalies. GcsDestination stats_anomalies_base_directory = 5; } // The objective configuration for model monitoring, including the information // needed to detect anomalies for one particular model. message ModelMonitoringObjectiveConfig { // Training Dataset information. message TrainingDataset { oneof data_source { // The resource name of the Dataset used to train this Model. string dataset = 3 [(google.api.resource_reference) = { type: "aiplatform.googleapis.com/Dataset" }]; // The Google Cloud Storage uri of the unmanaged Dataset used to train // this Model. GcsSource gcs_source = 4; // The BigQuery table of the unmanaged Dataset used to train this // Model. BigQuerySource bigquery_source = 5; } // Data format of the dataset, only applicable if the input is from // Google Cloud Storage. // The possible formats are: // // "tf-record" // The source file is a TFRecord file. // // "csv" // The source file is a CSV file. // "jsonl" // The source file is a JSONL file. string data_format = 2; // The target field name the model is to predict. // This field will be excluded when doing Predict and (or) Explain for the // training data. string target_field = 6; // Strategy to sample data from Training Dataset. // If not set, we process the whole dataset. SamplingStrategy logging_sampling_strategy = 7; } // The config for Training & Prediction data skew detection. It specifies the // training dataset sources and the skew detection parameters. message TrainingPredictionSkewDetectionConfig { // Key is the feature name and value is the threshold. If a feature needs to // be monitored for skew, a value threshold must be configured for that // feature. The threshold here is against feature distribution distance // between the training and prediction feature. map skew_thresholds = 1; // Key is the feature name and value is the threshold. The threshold here is // against attribution score distance between the training and prediction // feature. map attribution_score_skew_thresholds = 2; // Skew anomaly detection threshold used by all features. // When the per-feature thresholds are not set, this field can be used to // specify a threshold for all features. ThresholdConfig default_skew_threshold = 6; } // The config for Prediction data drift detection. message PredictionDriftDetectionConfig { // Key is the feature name and value is the threshold. If a feature needs to // be monitored for drift, a value threshold must be configured for that // feature. The threshold here is against feature distribution distance // between different time windws. map drift_thresholds = 1; // Key is the feature name and value is the threshold. The threshold here is // against attribution score distance between different time windows. map attribution_score_drift_thresholds = 2; // Drift anomaly detection threshold used by all features. // When the per-feature thresholds are not set, this field can be used to // specify a threshold for all features. ThresholdConfig default_drift_threshold = 5; } // The config for integrating with Vertex Explainable AI. Only applicable if // the Model has explanation_spec populated. message ExplanationConfig { // Output from // [BatchPredictionJob][google.cloud.aiplatform.v1beta1.BatchPredictionJob] // for Model Monitoring baseline dataset, which can be used to generate // baseline attribution scores. message ExplanationBaseline { // The storage format of the predictions generated BatchPrediction job. enum PredictionFormat { // Should not be set. PREDICTION_FORMAT_UNSPECIFIED = 0; // Predictions are in JSONL files. JSONL = 2; // Predictions are in BigQuery. BIGQUERY = 3; } // The configuration specifying of BatchExplain job output. This can be // used to generate the baseline of feature attribution scores. oneof destination { // Cloud Storage location for BatchExplain output. GcsDestination gcs = 2; // BigQuery location for BatchExplain output. BigQueryDestination bigquery = 3; } // The storage format of the predictions generated BatchPrediction job. PredictionFormat prediction_format = 1; } // If want to analyze the Vertex Explainable AI feature attribute scores or // not. If set to true, Vertex AI will log the feature attributions from // explain response and do the skew/drift detection for them. bool enable_feature_attributes = 1; // Predictions generated by the BatchPredictionJob using baseline dataset. ExplanationBaseline explanation_baseline = 2; } // Training dataset for models. This field has to be set only if // TrainingPredictionSkewDetectionConfig is specified. TrainingDataset training_dataset = 1; // The config for skew between training data and prediction data. TrainingPredictionSkewDetectionConfig training_prediction_skew_detection_config = 2; // The config for drift of prediction data. PredictionDriftDetectionConfig prediction_drift_detection_config = 3; // The config for integrating with Vertex Explainable AI. ExplanationConfig explanation_config = 5; } // The alert config for model monitoring. message ModelMonitoringAlertConfig { // The config for email alert. message EmailAlertConfig { // The email addresses to send the alert. repeated string user_emails = 1; } oneof alert { // Email alert config. EmailAlertConfig email_alert_config = 1; } // Dump the anomalies to Cloud Logging. The anomalies will be put to json // payload encoded from proto // [google.cloud.aiplatform.logging.ModelMonitoringAnomaliesLogEntry][]. // This can be further sinked to Pub/Sub or any other services supported // by Cloud Logging. bool enable_logging = 2; // Resource names of the NotificationChannels to send alert. // Must be of the format // `projects//notificationChannels/` repeated string notification_channels = 3 [(google.api.resource_reference) = { type: "monitoring.googleapis.com/NotificationChannel" }]; } // The config for feature monitoring threshold. message ThresholdConfig { oneof threshold { // Specify a threshold value that can trigger the alert. // If this threshold config is for feature distribution distance: // 1. For categorical feature, the distribution distance is calculated by // L-inifinity norm. // 2. For numerical feature, the distribution distance is calculated by // Jensen–Shannon divergence. // Each feature must have a non-zero threshold if they need to be monitored. // Otherwise no alert will be triggered for that feature. double value = 1; } } // Sampling Strategy for logging, can be for both training and prediction // dataset. message SamplingStrategy { // Requests are randomly selected. message RandomSampleConfig { // Sample rate (0, 1] double sample_rate = 1; } // Random sample config. Will support more sampling strategies later. RandomSampleConfig random_sample_config = 1; }