// Copyright 2023 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto3"; package google.cloud.visionai.v1; import "google/protobuf/struct.proto"; import "google/protobuf/timestamp.proto"; option csharp_namespace = "Google.Cloud.VisionAI.V1"; option go_package = "cloud.google.com/go/visionai/apiv1/visionaipb;visionaipb"; option java_multiple_files = true; option java_outer_classname = "AnnotationsProto"; option java_package = "com.google.cloud.visionai.v1"; option php_namespace = "Google\\Cloud\\VisionAI\\V1"; option ruby_package = "Google::Cloud::VisionAI::V1"; // Enum describing all possible types of a stream annotation. enum StreamAnnotationType { // Type UNSPECIFIED. STREAM_ANNOTATION_TYPE_UNSPECIFIED = 0; // active_zone annotation defines a polygon on top of the content from an // image/video based stream, following processing will only focus on the // content inside the active zone. STREAM_ANNOTATION_TYPE_ACTIVE_ZONE = 1; // crossing_line annotation defines a polyline on top of the content from an // image/video based Vision AI stream, events happening across the line will // be captured. For example, the counts of people who goes acroos the line // in Occupancy Analytic Processor. STREAM_ANNOTATION_TYPE_CROSSING_LINE = 2; } // Output format for Personal Protective Equipment Detection Operator. message PersonalProtectiveEquipmentDetectionOutput { // The entity info for annotations from person detection prediction result. message PersonEntity { // Entity id. int64 person_entity_id = 1; } // The entity info for annotations from PPE detection prediction result. message PPEEntity { // Label id. int64 ppe_label_id = 1; // Human readable string of the label (Examples: helmet, glove, mask). string ppe_label_string = 2; // Human readable string of the super category label (Examples: head_cover, // hands_cover, face_cover). string ppe_supercategory_label_string = 3; // Entity id. int64 ppe_entity_id = 4; } // Bounding Box in the normalized coordinates. message NormalizedBoundingBox { // Min in x coordinate. float xmin = 1; // Min in y coordinate. float ymin = 2; // Width of the bounding box. float width = 3; // Height of the bounding box. float height = 4; } // PersonIdentified box contains the location and the entity info of the // person. message PersonIdentifiedBox { // An unique id for this box. int64 box_id = 1; // Bounding Box in the normalized coordinates. NormalizedBoundingBox normalized_bounding_box = 2; // Confidence score associated with this box. float confidence_score = 3; // Person entity info. PersonEntity person_entity = 4; } // PPEIdentified box contains the location and the entity info of the PPE. message PPEIdentifiedBox { // An unique id for this box. int64 box_id = 1; // Bounding Box in the normalized coordinates. NormalizedBoundingBox normalized_bounding_box = 2; // Confidence score associated with this box. float confidence_score = 3; // PPE entity info. PPEEntity ppe_entity = 4; } // Detected Person contains the detected person and their associated // ppes and their protecting information. message DetectedPerson { // The id of detected person. int64 person_id = 1; // The info of detected person identified box. PersonIdentifiedBox detected_person_identified_box = 2; // The info of detected person associated ppe identified boxes. repeated PPEIdentifiedBox detected_ppe_identified_boxes = 3; // Coverage score for each body part. // Coverage score for face. optional float face_coverage_score = 4; // Coverage score for eyes. optional float eyes_coverage_score = 5; // Coverage score for head. optional float head_coverage_score = 6; // Coverage score for hands. optional float hands_coverage_score = 7; // Coverage score for body. optional float body_coverage_score = 8; // Coverage score for feet. optional float feet_coverage_score = 9; } // Current timestamp. google.protobuf.Timestamp current_time = 1; // A list of DetectedPersons. repeated DetectedPerson detected_persons = 2; } // Prediction output format for Generic Object Detection. message ObjectDetectionPredictionResult { // The entity info for annotations from object detection prediction result. message Entity { // Label id. int64 label_id = 1; // Human readable string of the label. string label_string = 2; } // Identified box contains location and the entity of the object. message IdentifiedBox { // Bounding Box in the normalized coordinates. message NormalizedBoundingBox { // Min in x coordinate. float xmin = 1; // Min in y coordinate. float ymin = 2; // Width of the bounding box. float width = 3; // Height of the bounding box. float height = 4; } // An unique id for this box. int64 box_id = 1; // Bounding Box in the normalized coordinates. NormalizedBoundingBox normalized_bounding_box = 2; // Confidence score associated with this box. float confidence_score = 3; // Entity of this box. Entity entity = 4; } // Current timestamp. google.protobuf.Timestamp current_time = 1; // A list of identified boxes. repeated IdentifiedBox identified_boxes = 2; } // Prediction output format for Image Object Detection. message ImageObjectDetectionPredictionResult { // The resource IDs of the AnnotationSpecs that had been identified, ordered // by the confidence score descendingly. It is the id segment instead of full // resource name. repeated int64 ids = 1; // The display names of the AnnotationSpecs that had been identified, order // matches the IDs. repeated string display_names = 2; // The Model's confidences in correctness of the predicted IDs, higher value // means higher confidence. Order matches the Ids. repeated float confidences = 3; // Bounding boxes, i.e. the rectangles over the image, that pinpoint // the found AnnotationSpecs. Given in order that matches the IDs. Each // bounding box is an array of 4 numbers `xMin`, `xMax`, `yMin`, and // `yMax`, which represent the extremal coordinates of the box. They are // relative to the image size, and the point 0,0 is in the top left // of the image. repeated google.protobuf.ListValue bboxes = 4; } // Prediction output format for Image and Text Classification. message ClassificationPredictionResult { // The resource IDs of the AnnotationSpecs that had been identified. repeated int64 ids = 1; // The display names of the AnnotationSpecs that had been identified, order // matches the IDs. repeated string display_names = 2; // The Model's confidences in correctness of the predicted IDs, higher value // means higher confidence. Order matches the Ids. repeated float confidences = 3; } // Prediction output format for Image Segmentation. message ImageSegmentationPredictionResult { // A PNG image where each pixel in the mask represents the category in which // the pixel in the original image was predicted to belong to. The size of // this image will be the same as the original image. The mapping between the // AnntoationSpec and the color can be found in model's metadata. The model // will choose the most likely category and if none of the categories reach // the confidence threshold, the pixel will be marked as background. string category_mask = 1; // A one channel image which is encoded as an 8bit lossless PNG. The size of // the image will be the same as the original image. For a specific pixel, // darker color means less confidence in correctness of the cateogry in the // categoryMask for the corresponding pixel. Black means no confidence and // white means complete confidence. string confidence_mask = 2; } // Prediction output format for Video Action Recognition. message VideoActionRecognitionPredictionResult { // Each IdentifiedAction is one particular identification of an action // specified with the AnnotationSpec id, display_name and the associated // confidence score. message IdentifiedAction { // The resource ID of the AnnotationSpec that had been identified. string id = 1; // The display name of the AnnotationSpec that had been identified. string display_name = 2; // The Model's confidence in correction of this identification, higher // value means higher confidence. float confidence = 3; } // The beginning, inclusive, of the video's time segment in which the // actions have been identified. google.protobuf.Timestamp segment_start_time = 1; // The end, inclusive, of the video's time segment in which the actions have // been identified. Particularly, if the end is the same as the start, it // means the identification happens on a specific video frame. google.protobuf.Timestamp segment_end_time = 2; // All of the actions identified in the time range. repeated IdentifiedAction actions = 3; } // Prediction output format for Video Object Tracking. message VideoObjectTrackingPredictionResult { // Boundingbox for detected object. I.e. the rectangle over the video frame // pinpointing the found AnnotationSpec. The coordinates are relative to the // frame size, and the point 0,0 is in the top left of the frame. message BoundingBox { // The leftmost coordinate of the bounding box. float x_min = 1; // The rightmost coordinate of the bounding box. float x_max = 2; // The topmost coordinate of the bounding box. float y_min = 3; // The bottommost coordinate of the bounding box. float y_max = 4; } // Each DetectedObject is one particular identification of an object // specified with the AnnotationSpec id and display_name, the bounding box, // the associated confidence score and the corresponding track_id. message DetectedObject { // The resource ID of the AnnotationSpec that had been identified. string id = 1; // The display name of the AnnotationSpec that had been identified. string display_name = 2; // Boundingbox. BoundingBox bounding_box = 3; // The Model's confidence in correction of this identification, higher // value means higher confidence. float confidence = 4; // The same object may be identified on muitiple frames which are typical // adjacent. The set of frames where a particular object has been detected // form a track. This track_id can be used to trace down all frames for an // detected object. int64 track_id = 5; } // The beginning, inclusive, of the video's time segment in which the // current identifications happens. google.protobuf.Timestamp segment_start_time = 1; // The end, inclusive, of the video's time segment in which the current // identifications happen. Particularly, if the end is the same as the start, // it means the identifications happen on a specific video frame. google.protobuf.Timestamp segment_end_time = 2; // All of the objects detected in the specified time range. repeated DetectedObject objects = 3; } // Prediction output format for Video Classification. message VideoClassificationPredictionResult { // Each IdentifiedClassification is one particular identification of an // classification specified with the AnnotationSpec id and display_name, // and the associated confidence score. message IdentifiedClassification { // The resource ID of the AnnotationSpec that had been identified. string id = 1; // The display name of the AnnotationSpec that had been identified. string display_name = 2; // The Model's confidence in correction of this identification, higher // value means higher confidence. float confidence = 3; } // The beginning, inclusive, of the video's time segment in which the // classifications have been identified. google.protobuf.Timestamp segment_start_time = 1; // The end, inclusive, of the video's time segment in which the // classifications have been identified. Particularly, if the end is the same // as the start, it means the identification happens on a specific video // frame. google.protobuf.Timestamp segment_end_time = 2; // All of the classifications identified in the time range. repeated IdentifiedClassification classifications = 3; } // The prediction result proto for occupancy counting. message OccupancyCountingPredictionResult { // The entity info for annotations from occupancy counting operator. message Entity { // Label id. int64 label_id = 1; // Human readable string of the label. string label_string = 2; } // Identified box contains location and the entity of the object. message IdentifiedBox { // Bounding Box in the normalized coordinates. message NormalizedBoundingBox { // Min in x coordinate. float xmin = 1; // Min in y coordinate. float ymin = 2; // Width of the bounding box. float width = 3; // Height of the bounding box. float height = 4; } // An unique id for this box. int64 box_id = 1; // Bounding Box in the normalized coordinates. NormalizedBoundingBox normalized_bounding_box = 2; // Confidence score associated with this box. float score = 3; // Entity of this box. Entity entity = 4; // An unique id to identify a track. It should be consistent across frames. // It only exists if tracking is enabled. int64 track_id = 5; } // The statistics info for annotations from occupancy counting operator. message Stats { // The object info and instant count for annotations from occupancy counting // operator. message ObjectCount { // Entity of this object. Entity entity = 1; // Count of the object. int32 count = 2; } // The object info and accumulated count for annotations from occupancy // counting operator. message AccumulatedObjectCount { // The start time of the accumulated count. google.protobuf.Timestamp start_time = 1; // The object count for the accumulated count. ObjectCount object_count = 2; } // Message for Crossing line count. message CrossingLineCount { // Line annotation from the user. StreamAnnotation annotation = 1; // The direction that follows the right hand rule. repeated ObjectCount positive_direction_counts = 2; // The direction that is opposite to the right hand rule. repeated ObjectCount negative_direction_counts = 3; // The accumulated positive count. repeated AccumulatedObjectCount accumulated_positive_direction_counts = 4; // The accumulated negative count. repeated AccumulatedObjectCount accumulated_negative_direction_counts = 5; } // Message for the active zone count. message ActiveZoneCount { // Active zone annotation from the user. StreamAnnotation annotation = 1; // Counts in the zone. repeated ObjectCount counts = 2; } // Counts of the full frame. repeated ObjectCount full_frame_count = 1; // Crossing line counts. repeated CrossingLineCount crossing_line_counts = 2; // Active zone counts. repeated ActiveZoneCount active_zone_counts = 3; } // The track info for annotations from occupancy counting operator. message TrackInfo { // An unique id to identify a track. It should be consistent across frames. string track_id = 1; // Start timestamp of this track. google.protobuf.Timestamp start_time = 2; } // The dwell time info for annotations from occupancy counting operator. message DwellTimeInfo { // An unique id to identify a track. It should be consistent across frames. string track_id = 1; // The unique id for the zone in which the object is dwelling/waiting. string zone_id = 2; // The beginning time when a dwelling object has been identified in a zone. google.protobuf.Timestamp dwell_start_time = 3; // The end time when a dwelling object has exited in a zone. google.protobuf.Timestamp dwell_end_time = 4; } // Current timestamp. google.protobuf.Timestamp current_time = 1; // A list of identified boxes. repeated IdentifiedBox identified_boxes = 2; // Detection statistics. Stats stats = 3; // Track related information. All the tracks that are live at this timestamp. // It only exists if tracking is enabled. repeated TrackInfo track_info = 4; // Dwell time related information. All the tracks that are live in a given // zone with a start and end dwell time timestamp repeated DwellTimeInfo dwell_time_info = 5; } // message about annotations about Vision AI stream resource. message StreamAnnotation { oneof annotation_payload { // Annotation for type ACTIVE_ZONE NormalizedPolygon active_zone = 5; // Annotation for type CROSSING_LINE NormalizedPolyline crossing_line = 6; } // ID of the annotation. It must be unique when used in the certain context. // For example, all the annotations to one input streams of a Vision AI // application. string id = 1; // User-friendly name for the annotation. string display_name = 2; // The Vision AI stream resource name. string source_stream = 3; // The actual type of Annotation. StreamAnnotationType type = 4; } // A wrapper of repeated StreamAnnotation. message StreamAnnotations { // Multiple annotations. repeated StreamAnnotation stream_annotations = 1; } // Normalized Polygon. message NormalizedPolygon { // The bounding polygon normalized vertices. Top left corner of the image // will be [0, 0]. repeated NormalizedVertex normalized_vertices = 1; } // Normalized Pplyline, which represents a curve consisting of connected // straight-line segments. message NormalizedPolyline { // A sequence of vertices connected by straight lines. repeated NormalizedVertex normalized_vertices = 1; } // A vertex represents a 2D point in the image. // NOTE: the normalized vertex coordinates are relative to the original image // and range from 0 to 1. message NormalizedVertex { // X coordinate. float x = 1; // Y coordinate. float y = 2; } // Message of essential metadata of App Platform. // This message is usually attached to a certain processor output annotation for // customer to identify the source of the data. message AppPlatformMetadata { // The application resource name. string application = 1; // The instance resource id. Instance is the nested resource of application // under collection 'instances'. string instance_id = 2; // The node name of the application graph. string node = 3; // The referred processor resource name of the application node. string processor = 4; } // For any cloud function based customer processing logic, customer's cloud // function is expected to receive AppPlatformCloudFunctionRequest as request // and send back AppPlatformCloudFunctionResponse as response. // Message of request from AppPlatform to Cloud Function. message AppPlatformCloudFunctionRequest { // A general annotation message that uses struct format to represent different // concrete annotation protobufs. message StructedInputAnnotation { // The ingestion time of the current annotation. int64 ingestion_time_micros = 1; // The struct format of the actual annotation. google.protobuf.Struct annotation = 2; } // The metadata of the AppPlatform for customer to identify the source of the // payload. AppPlatformMetadata app_platform_metadata = 1; // The actual annotations to be processed by the customized Cloud Function. repeated StructedInputAnnotation annotations = 2; } // Message of the response from customer's Cloud Function to AppPlatform. message AppPlatformCloudFunctionResponse { // A general annotation message that uses struct format to represent different // concrete annotation protobufs. message StructedOutputAnnotation { // The struct format of the actual annotation. google.protobuf.Struct annotation = 1; } // The modified annotations that is returned back to AppPlatform. // If the annotations fields are empty, then those annotations will be dropped // by AppPlatform. repeated StructedOutputAnnotation annotations = 2; // If set to true, AppPlatform will use original annotations instead of // dropping them, even if it is empty in the annotations filed. bool annotation_passthrough = 3; // The event notifications that is returned back to AppPlatform. Typically it // will then be configured to be consumed/forwared to a operator that handles // events, such as Pub/Sub operator. repeated AppPlatformEventBody events = 4; } // Message of content of appPlatform event message AppPlatformEventBody { // Human readable string of the event like "There are more than 6 people in // the scene". or "Shelf is empty!". string event_message = 1; // For the case of Pub/Sub, it will be stored in the message attributes. // ​​pubsub.proto google.protobuf.Struct payload = 2; // User defined Event Id, used to classify event, within a delivery interval, // events from the same application instance with the same id will be // de-duplicated & only first one will be sent out. Empty event_id will be // treated as "". string event_id = 3; }