// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.cloud.aiplatform.v1beta1;

import "google/api/field_behavior.proto";
import "google/cloud/aiplatform/v1beta1/explanation_metadata.proto";
import "google/protobuf/struct.proto";
import "google/api/annotations.proto";

option go_package = "google.golang.org/genproto/googleapis/cloud/aiplatform/v1beta1;aiplatform";
option java_multiple_files = true;
option java_outer_classname = "ExplanationProto";
option java_package = "com.google.cloud.aiplatform.v1beta1";

// Explanation of a prediction (provided in [PredictResponse.predictions][google.cloud.aiplatform.v1beta1.PredictResponse.predictions])
// produced by the Model on a given [instance][google.cloud.aiplatform.v1beta1.ExplainRequest.instances].
message Explanation {
  // Output only. Feature attributions grouped by predicted outputs.
  //
  // For Models that predict only one output, such as regression Models that
  // predict only one score, there is only one attibution that explains the
  // predicted output. For Models that predict multiple outputs, such as
  // multiclass Models that predict multiple classes, each element explains one
  // specific item. [Attribution.output_index][google.cloud.aiplatform.v1beta1.Attribution.output_index] can be used to identify which
  // output this attribution is explaining.
  //
  // If users set [ExplanationParameters.top_k][google.cloud.aiplatform.v1beta1.ExplanationParameters.top_k], the attributions are sorted
  // by [instance_output_value][Attributions.instance_output_value] in
  // descending order. If [ExplanationParameters.output_indices][google.cloud.aiplatform.v1beta1.ExplanationParameters.output_indices] is specified,
  // the attributions are stored by [Attribution.output_index][google.cloud.aiplatform.v1beta1.Attribution.output_index] in the same
  // order as they appear in the output_indices.
  repeated Attribution attributions = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
}

// Aggregated explanation metrics for a Model over a set of instances.
message ModelExplanation {
  // Output only. Aggregated attributions explaning the Model's prediction outputs over the
  // set of instances. The attributions are grouped by outputs.
  //
  // For Models that predict only one output, such as regression Models that
  // predict only one score, there is only one attibution that explains the
  // predicted output. For Models that predict multiple outputs, such as
  // multiclass Models that predict multiple classes, each element explains one
  // specific item. [Attribution.output_index][google.cloud.aiplatform.v1beta1.Attribution.output_index] can be used to identify which
  // output this attribution is explaining.
  //
  // The [baselineOutputValue][google.cloud.aiplatform.v1beta1.Attribution.baseline_output_value],
  // [instanceOutputValue][google.cloud.aiplatform.v1beta1.Attribution.instance_output_value] and
  // [featureAttributions][google.cloud.aiplatform.v1beta1.Attribution.feature_attributions] fields are
  // averaged over the test data.
  //
  // NOTE: Currently AutoML tabular classification Models produce only one
  // attribution, which averages attributions over all the classes it predicts.
  // [Attribution.approximation_error][google.cloud.aiplatform.v1beta1.Attribution.approximation_error] is not populated.
  repeated Attribution mean_attributions = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
}

// Attribution that explains a particular prediction output.
message Attribution {
  // Output only. Model predicted output if the input instance is constructed from the
  // baselines of all the features defined in [ExplanationMetadata.inputs][google.cloud.aiplatform.v1beta1.ExplanationMetadata.inputs].
  // The field name of the output is determined by the key in
  // [ExplanationMetadata.outputs][google.cloud.aiplatform.v1beta1.ExplanationMetadata.outputs].
  //
  // If the Model's predicted output has multiple dimensions (rank > 1), this is
  // the value in the output located by [output_index][google.cloud.aiplatform.v1beta1.Attribution.output_index].
  //
  // If there are multiple baselines, their output values are averaged.
  double baseline_output_value = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Model predicted output on the corresponding [explanation
  // instance][ExplainRequest.instances]. The field name of the output is
  // determined by the key in [ExplanationMetadata.outputs][google.cloud.aiplatform.v1beta1.ExplanationMetadata.outputs].
  //
  // If the Model predicted output has multiple dimensions, this is the value in
  // the output located by [output_index][google.cloud.aiplatform.v1beta1.Attribution.output_index].
  double instance_output_value = 2 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Attributions of each explained feature. Features are extracted from
  // the [prediction instances][google.cloud.aiplatform.v1beta1.ExplainRequest.instances] according to
  // [explanation metadata for inputs][google.cloud.aiplatform.v1beta1.ExplanationMetadata.inputs].
  //
  // The value is a struct, whose keys are the name of the feature. The values
  // are how much the feature in the [instance][google.cloud.aiplatform.v1beta1.ExplainRequest.instances]
  // contributed to the predicted result.
  //
  // The format of the value is determined by the feature's input format:
  //
  //   * If the feature is a scalar value, the attribution value is a
  //     [floating number][google.protobuf.Value.number_value].
  //
  //   * If the feature is an array of scalar values, the attribution value is
  //     an [array][google.protobuf.Value.list_value].
  //
  //   * If the feature is a struct, the attribution value is a
  //     [struct][google.protobuf.Value.struct_value]. The keys in the
  //     attribution value struct are the same as the keys in the feature
  //     struct. The formats of the values in the attribution struct are
  //     determined by the formats of the values in the feature struct.
  //
  // The [ExplanationMetadata.feature_attributions_schema_uri][google.cloud.aiplatform.v1beta1.ExplanationMetadata.feature_attributions_schema_uri] field,
  // pointed to by the [ExplanationSpec][google.cloud.aiplatform.v1beta1.ExplanationSpec] field of the
  // [Endpoint.deployed_models][google.cloud.aiplatform.v1beta1.Endpoint.deployed_models] object, points to the schema file that
  // describes the features and their attribution values (if it is populated).
  google.protobuf.Value feature_attributions = 3 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. The index that locates the explained prediction output.
  //
  // If the prediction output is a scalar value, output_index is not populated.
  // If the prediction output has multiple dimensions, the length of the
  // output_index list is the same as the number of dimensions of the output.
  // The i-th element in output_index is the element index of the i-th dimension
  // of the output vector. Indices start from 0.
  repeated int32 output_index = 4 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. The display name of the output identified by [output_index][google.cloud.aiplatform.v1beta1.Attribution.output_index], e.g. the
  // predicted class name by a multi-classification Model.
  //
  // This field is only populated iff the Model predicts display names as a
  // separate field along with the explained output. The predicted display name
  // must has the same shape of the explained output, and can be located using
  // output_index.
  string output_display_name = 5 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Error of [feature_attributions][google.cloud.aiplatform.v1beta1.Attribution.feature_attributions] caused by approximation used in the
  // explanation method. Lower value means more precise attributions.
  //
  // * For [Sampled Shapley
  // attribution][ExplanationParameters.sampled_shapley_attribution], increasing
  // [path_count][google.cloud.aiplatform.v1beta1.SampledShapleyAttribution.path_count] may reduce the error.
  // * For [Integrated Gradients
  // attribution][ExplanationParameters.integrated_gradients_attribution],
  // increasing [step_count][google.cloud.aiplatform.v1beta1.IntegratedGradientsAttribution.step_count] may
  // reduce the error.
  // * For [XRAI
  // attribution][ExplanationParameters.xrai_attribution], increasing
  // [step_count][google.cloud.aiplatform.v1beta1.XraiAttribution.step_count] may reduce the error.
  //
  // Refer to  AI Explanations Whitepaper for more details:
  //
  // https:
  // //storage.googleapis.com/cloud-ai-whitep
  // // apers/AI%20Explainability%20Whitepaper.pdf
  double approximation_error = 6 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Name of the explain output. Specified as the key in
  // [ExplanationMetadata.outputs][google.cloud.aiplatform.v1beta1.ExplanationMetadata.outputs].
  string output_name = 7 [(google.api.field_behavior) = OUTPUT_ONLY];
}

// Specification of Model explanation.
message ExplanationSpec {
  // Required. Parameters that configure explaining of the Model's predictions.
  ExplanationParameters parameters = 1 [(google.api.field_behavior) = REQUIRED];

  // Required. Metadata describing the Model's input and output for explanation.
  ExplanationMetadata metadata = 2 [(google.api.field_behavior) = REQUIRED];
}

// Parameters to configure explaining for Model's predictions.
message ExplanationParameters {
  oneof method {
    // An attribution method that approximates Shapley values for features that
    // contribute to the label being predicted. A sampling strategy is used to
    // approximate the value rather than considering all subsets of features.
    // Refer to this paper for model details: https://arxiv.org/abs/1306.4265.
    SampledShapleyAttribution sampled_shapley_attribution = 1;

    // An attribution method that computes Aumann-Shapley values taking
    // advantage of the model's fully differentiable structure. Refer to this
    // paper for more details: https://arxiv.org/abs/1703.01365
    IntegratedGradientsAttribution integrated_gradients_attribution = 2;

    // An attribution method that redistributes Integrated Gradients
    // attribution to segmented regions, taking advantage of the model's fully
    // differentiable structure. Refer to this paper for
    // more details: https://arxiv.org/abs/1906.02825
    //
    // XRAI currently performs better on natural images, like a picture of a
    // house or an animal. If the images are taken in artificial environments,
    // like a lab or manufacturing line, or from diagnostic equipment, like
    // x-rays or quality-control cameras, use Integrated Gradients instead.
    XraiAttribution xrai_attribution = 3;
  }

  // If populated, returns attributions for top K indices of outputs
  // (defaults to 1). Only applies to Models that predicts more than one outputs
  // (e,g, multi-class Models). When set to -1, returns explanations for all
  // outputs.
  int32 top_k = 4;

  // If populated, only returns attributions that have
  // [output_index][Attributions.output_index] contained in output_indices. It
  // must be an ndarray of integers, with the same shape of the output it's
  // explaining.
  //
  // If not populated, returns attributions for [top_k][google.cloud.aiplatform.v1beta1.ExplanationParameters.top_k] indices of outputs.
  // If neither top_k nor output_indeices is populated, returns the argmax
  // index of the outputs.
  //
  // Only applicable to Models that predict multiple outputs (e,g, multi-class
  // Models that predict multiple classes).
  google.protobuf.ListValue output_indices = 5;
}

// An attribution method that approximates Shapley values for features that
// contribute to the label being predicted. A sampling strategy is used to
// approximate the value rather than considering all subsets of features.
message SampledShapleyAttribution {
  // Required. The number of feature permutations to consider when approximating the
  // Shapley values.
  //
  // Valid range of its value is [1, 50], inclusively.
  int32 path_count = 1 [(google.api.field_behavior) = REQUIRED];
}

// An attribution method that computes the Aumann-Shapley value taking advantage
// of the model's fully differentiable structure. Refer to this paper for
// more details: https://arxiv.org/abs/1703.01365
message IntegratedGradientsAttribution {
  // Required. The number of steps for approximating the path integral.
  // A good value to start is 50 and gradually increase until the
  // sum to diff property is within the desired error range.
  //
  // Valid range of its value is [1, 100], inclusively.
  int32 step_count = 1 [(google.api.field_behavior) = REQUIRED];

  // Config for SmoothGrad approximation of gradients.
  //
  // When enabled, the gradients are approximated by averaging the gradients
  // from noisy samples in the vicinity of the inputs. Adding
  // noise can help improve the computed gradients. Refer to this paper for more
  // details: https://arxiv.org/pdf/1706.03825.pdf
  SmoothGradConfig smooth_grad_config = 2;
}

// An explanation method that redistributes Integrated Gradients
// attributions to segmented regions, taking advantage of the model's fully
// differentiable structure. Refer to this paper for more details:
// https://arxiv.org/abs/1906.02825
//
// Only supports image Models ([modality][InputMetadata.modality] is IMAGE).
message XraiAttribution {
  // Required. The number of steps for approximating the path integral.
  // A good value to start is 50 and gradually increase until the
  // sum to diff property is met within the desired error range.
  //
  // Valid range of its value is [1, 100], inclusively.
  int32 step_count = 1 [(google.api.field_behavior) = REQUIRED];

  // Config for SmoothGrad approximation of gradients.
  //
  // When enabled, the gradients are approximated by averaging the gradients
  // from noisy samples in the vicinity of the inputs. Adding
  // noise can help improve the computed gradients. Refer to this paper for more
  // details: https://arxiv.org/pdf/1706.03825.pdf
  SmoothGradConfig smooth_grad_config = 2;
}

// Config for SmoothGrad approximation of gradients.
//
// When enabled, the gradients are approximated by averaging the gradients from
// noisy samples in the vicinity of the inputs. Adding noise can help improve
// the computed gradients. Refer to this paper for more details:
// https://arxiv.org/pdf/1706.03825.pdf
message SmoothGradConfig {
  // Represents the standard deviation of the gaussian kernel
  // that will be used to add noise to the interpolated inputs
  // prior to computing gradients.
  oneof GradientNoiseSigma {
    // This is a single float value and will be used to add noise to all the
    // features. Use this field when all features are normalized to have the
    // same distribution: scale to range [0, 1], [-1, 1] or z-scoring, where
    // features are normalized to have 0-mean and 1-variance. Refer to
    // this doc for more details about normalization:
    //
    // https:
    // //developers.google.com/machine-learning
    // // /data-prep/transform/normalization.
    //
    // For best results the recommended value is about 10% - 20% of the standard
    // deviation of the input feature. Refer to section 3.2 of the SmoothGrad
    // paper: https://arxiv.org/pdf/1706.03825.pdf. Defaults to 0.1.
    //
    // If the distribution is different per feature, set
    // [feature_noise_sigma][google.cloud.aiplatform.v1beta1.SmoothGradConfig.feature_noise_sigma] instead
    // for each feature.
    float noise_sigma = 1;

    // This is similar to [noise_sigma][google.cloud.aiplatform.v1beta1.SmoothGradConfig.noise_sigma], but
    // provides additional flexibility. A separate noise sigma can be provided
    // for each feature, which is useful if their distributions are different.
    // No noise is added to features that are not set. If this field is unset,
    // [noise_sigma][google.cloud.aiplatform.v1beta1.SmoothGradConfig.noise_sigma] will be used for all
    // features.
    FeatureNoiseSigma feature_noise_sigma = 2;
  }

  // The number of gradient samples to use for
  // approximation. The higher this number, the more accurate the gradient
  // is, but the runtime complexity increases by this factor as well.
  // Valid range of its value is [1, 50]. Defaults to 3.
  int32 noisy_sample_count = 3;
}

// Noise sigma by features. Noise sigma represents the standard deviation of the
// gaussian kernel that will be used to add noise to interpolated inputs prior
// to computing gradients.
message FeatureNoiseSigma {
  // Noise sigma for a single feature.
  message NoiseSigmaForFeature {
    // The name of the input feature for which noise sigma is provided. The
    // features are defined in
    // [explanation metadata inputs][google.cloud.aiplatform.v1beta1.ExplanationMetadata.inputs].
    string name = 1;

    // This represents the standard deviation of the Gaussian kernel that will
    // be used to add noise to the feature prior to computing gradients. Similar
    // to [noise_sigma][google.cloud.aiplatform.v1beta1.SmoothGradConfig.noise_sigma] but represents the
    // noise added to the current feature. Defaults to 0.1.
    float sigma = 2;
  }

  // Noise sigma per feature. No noise is added to features that are not set.
  repeated NoiseSigmaForFeature noise_sigma = 1;
}