// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
syntax = "proto3";
package google.cloud.datalabeling.v1beta1;
import "google/api/resource.proto";
import "google/cloud/datalabeling/v1beta1/annotation.proto";
import "google/cloud/datalabeling/v1beta1/annotation_spec_set.proto";
import "google/protobuf/timestamp.proto";
option csharp_namespace = "Google.Cloud.DataLabeling.V1Beta1";
option go_package = "cloud.google.com/go/datalabeling/apiv1beta1/datalabelingpb;datalabelingpb";
option java_multiple_files = true;
option java_package = "com.google.cloud.datalabeling.v1beta1";
option php_namespace = "Google\\Cloud\\DataLabeling\\V1beta1";
option ruby_package = "Google::Cloud::DataLabeling::V1beta1";
// Describes an evaluation between a machine learning model's predictions and
// ground truth labels. Created when an [EvaluationJob][google.cloud.datalabeling.v1beta1.EvaluationJob] runs successfully.
message Evaluation {
option (google.api.resource) = {
type: "datalabeling.googleapis.com/Evaluation"
pattern: "projects/{project}/datasets/{dataset}/evaluations/{evaluation}"
};
// Output only. Resource name of an evaluation. The name has the following
// format:
//
// "projects/{project_id}/datasets/{dataset_id}/evaluations/{evaluation_id}'
string name = 1;
// Output only. Options used in the evaluation job that created this
// evaluation.
EvaluationConfig config = 2;
// Output only. Timestamp for when the evaluation job that created this
// evaluation ran.
google.protobuf.Timestamp evaluation_job_run_time = 3;
// Output only. Timestamp for when this evaluation was created.
google.protobuf.Timestamp create_time = 4;
// Output only. Metrics comparing predictions to ground truth labels.
EvaluationMetrics evaluation_metrics = 5;
// Output only. Type of task that the model version being evaluated performs,
// as defined in the
//
// [evaluationJobConfig.inputConfig.annotationType][google.cloud.datalabeling.v1beta1.EvaluationJobConfig.input_config]
// field of the evaluation job that created this evaluation.
AnnotationType annotation_type = 6;
// Output only. The number of items in the ground truth dataset that were used
// for this evaluation. Only populated when the evaulation is for certain
// AnnotationTypes.
int64 evaluated_item_count = 7;
}
// Configuration details used for calculating evaluation metrics and creating an
// [Evaluation][google.cloud.datalabeling.v1beta1.Evaluation].
message EvaluationConfig {
// Vertical specific options for general metrics.
oneof vertical_option {
// Only specify this field if the related model performs image object
// detection (`IMAGE_BOUNDING_BOX_ANNOTATION`). Describes how to evaluate
// bounding boxes.
BoundingBoxEvaluationOptions bounding_box_evaluation_options = 1;
}
}
// Options regarding evaluation between bounding boxes.
message BoundingBoxEvaluationOptions {
// Minimum
// [intersection-over-union
//
// (IOU)](/vision/automl/object-detection/docs/evaluate#intersection-over-union)
// required for 2 bounding boxes to be considered a match. This must be a
// number between 0 and 1.
float iou_threshold = 1;
}
message EvaluationMetrics {
// Common metrics covering most general cases.
oneof metrics {
ClassificationMetrics classification_metrics = 1;
ObjectDetectionMetrics object_detection_metrics = 2;
}
}
// Metrics calculated for a classification model.
message ClassificationMetrics {
// Precision-recall curve based on ground truth labels, predicted labels, and
// scores for the predicted labels.
PrCurve pr_curve = 1;
// Confusion matrix of predicted labels vs. ground truth labels.
ConfusionMatrix confusion_matrix = 2;
}
// Metrics calculated for an image object detection (bounding box) model.
message ObjectDetectionMetrics {
// Precision-recall curve.
PrCurve pr_curve = 1;
}
message PrCurve {
message ConfidenceMetricsEntry {
// Threshold used for this entry.
//
// For classification tasks, this is a classification threshold: a
// predicted label is categorized as positive or negative (in the context of
// this point on the PR curve) based on whether the label's score meets this
// threshold.
//
// For image object detection (bounding box) tasks, this is the
// [intersection-over-union
//
// (IOU)](/vision/automl/object-detection/docs/evaluate#intersection-over-union)
// threshold for the context of this point on the PR curve.
float confidence_threshold = 1;
// Recall value.
float recall = 2;
// Precision value.
float precision = 3;
// Harmonic mean of recall and precision.
float f1_score = 4;
// Recall value for entries with label that has highest score.
float recall_at1 = 5;
// Precision value for entries with label that has highest score.
float precision_at1 = 6;
// The harmonic mean of [recall_at1][google.cloud.datalabeling.v1beta1.PrCurve.ConfidenceMetricsEntry.recall_at1] and [precision_at1][google.cloud.datalabeling.v1beta1.PrCurve.ConfidenceMetricsEntry.precision_at1].
float f1_score_at1 = 7;
// Recall value for entries with label that has highest 5 scores.
float recall_at5 = 8;
// Precision value for entries with label that has highest 5 scores.
float precision_at5 = 9;
// The harmonic mean of [recall_at5][google.cloud.datalabeling.v1beta1.PrCurve.ConfidenceMetricsEntry.recall_at5] and [precision_at5][google.cloud.datalabeling.v1beta1.PrCurve.ConfidenceMetricsEntry.precision_at5].
float f1_score_at5 = 10;
}
// The annotation spec of the label for which the precision-recall curve
// calculated. If this field is empty, that means the precision-recall curve
// is an aggregate curve for all labels.
AnnotationSpec annotation_spec = 1;
// Area under the precision-recall curve. Not to be confused with area under
// a receiver operating characteristic (ROC) curve.
float area_under_curve = 2;
// Entries that make up the precision-recall graph. Each entry is a "point" on
// the graph drawn for a different `confidence_threshold`.
repeated ConfidenceMetricsEntry confidence_metrics_entries = 3;
// Mean average prcision of this curve.
float mean_average_precision = 4;
}
// Confusion matrix of the model running the classification. Only applicable
// when the metrics entry aggregates multiple labels. Not applicable when the
// entry is for a single label.
message ConfusionMatrix {
message ConfusionMatrixEntry {
// The annotation spec of a predicted label.
AnnotationSpec annotation_spec = 1;
// Number of items predicted to have this label. (The ground truth label for
// these items is the `Row.annotationSpec` of this entry's parent.)
int32 item_count = 2;
}
// A row in the confusion matrix. Each entry in this row has the same
// ground truth label.
message Row {
// The annotation spec of the ground truth label for this row.
AnnotationSpec annotation_spec = 1;
// A list of the confusion matrix entries. One entry for each possible
// predicted label.
repeated ConfusionMatrixEntry entries = 2;
}
repeated Row row = 1;
}