// Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto3"; package google.cloud.vision.v1; import "google/cloud/vision/v1/geometry.proto"; option cc_enable_arenas = true; option go_package = "cloud.google.com/go/vision/v2/apiv1/visionpb;visionpb"; option java_multiple_files = true; option java_outer_classname = "TextAnnotationProto"; option java_package = "com.google.cloud.vision.v1"; option objc_class_prefix = "GCVN"; // TextAnnotation contains a structured representation of OCR extracted text. // The hierarchy of an OCR extracted text structure is like this: // TextAnnotation -> Page -> Block -> Paragraph -> Word -> Symbol // Each structural component, starting from Page, may further have their own // properties. Properties describe detected languages, breaks etc.. Please refer // to the // [TextAnnotation.TextProperty][google.cloud.vision.v1.TextAnnotation.TextProperty] // message definition below for more detail. message TextAnnotation { // Detected language for a structural component. message DetectedLanguage { // The BCP-47 language code, such as "en-US" or "sr-Latn". For more // information, see // http://www.unicode.org/reports/tr35/#Unicode_locale_identifier. string language_code = 1; // Confidence of detected language. Range [0, 1]. float confidence = 2; } // Detected start or end of a structural component. message DetectedBreak { // Enum to denote the type of break found. New line, space etc. enum BreakType { // Unknown break label type. UNKNOWN = 0; // Regular space. SPACE = 1; // Sure space (very wide). SURE_SPACE = 2; // Line-wrapping break. EOL_SURE_SPACE = 3; // End-line hyphen that is not present in text; does not co-occur with // `SPACE`, `LEADER_SPACE`, or `LINE_BREAK`. HYPHEN = 4; // Line break that ends a paragraph. LINE_BREAK = 5; } // Detected break type. BreakType type = 1; // True if break prepends the element. bool is_prefix = 2; } // Additional information detected on the structural component. message TextProperty { // A list of detected languages together with confidence. repeated DetectedLanguage detected_languages = 1; // Detected start or end of a text segment. DetectedBreak detected_break = 2; } // List of pages detected by OCR. repeated Page pages = 1; // UTF-8 text detected on the pages. string text = 2; } // Detected page from OCR. message Page { // Additional information detected on the page. TextAnnotation.TextProperty property = 1; // Page width. For PDFs the unit is points. For images (including // TIFFs) the unit is pixels. int32 width = 2; // Page height. For PDFs the unit is points. For images (including // TIFFs) the unit is pixels. int32 height = 3; // List of blocks of text, images etc on this page. repeated Block blocks = 4; // Confidence of the OCR results on the page. Range [0, 1]. float confidence = 5; } // Logical element on the page. message Block { // Type of a block (text, image etc) as identified by OCR. enum BlockType { // Unknown block type. UNKNOWN = 0; // Regular text block. TEXT = 1; // Table block. TABLE = 2; // Image block. PICTURE = 3; // Horizontal/vertical line box. RULER = 4; // Barcode block. BARCODE = 5; } // Additional information detected for the block. TextAnnotation.TextProperty property = 1; // The bounding box for the block. // The vertices are in the order of top-left, top-right, bottom-right, // bottom-left. When a rotation of the bounding box is detected the rotation // is represented as around the top-left corner as defined when the text is // read in the 'natural' orientation. // For example: // // * when the text is horizontal it might look like: // // 0----1 // | | // 3----2 // // * when it's rotated 180 degrees around the top-left corner it becomes: // // 2----3 // | | // 1----0 // // and the vertex order will still be (0, 1, 2, 3). BoundingPoly bounding_box = 2; // List of paragraphs in this block (if this blocks is of type text). repeated Paragraph paragraphs = 3; // Detected block type (text, image etc) for this block. BlockType block_type = 4; // Confidence of the OCR results on the block. Range [0, 1]. float confidence = 5; } // Structural unit of text representing a number of words in certain order. message Paragraph { // Additional information detected for the paragraph. TextAnnotation.TextProperty property = 1; // The bounding box for the paragraph. // The vertices are in the order of top-left, top-right, bottom-right, // bottom-left. When a rotation of the bounding box is detected the rotation // is represented as around the top-left corner as defined when the text is // read in the 'natural' orientation. // For example: // * when the text is horizontal it might look like: // 0----1 // | | // 3----2 // * when it's rotated 180 degrees around the top-left corner it becomes: // 2----3 // | | // 1----0 // and the vertex order will still be (0, 1, 2, 3). BoundingPoly bounding_box = 2; // List of all words in this paragraph. repeated Word words = 3; // Confidence of the OCR results for the paragraph. Range [0, 1]. float confidence = 4; } // A word representation. message Word { // Additional information detected for the word. TextAnnotation.TextProperty property = 1; // The bounding box for the word. // The vertices are in the order of top-left, top-right, bottom-right, // bottom-left. When a rotation of the bounding box is detected the rotation // is represented as around the top-left corner as defined when the text is // read in the 'natural' orientation. // For example: // * when the text is horizontal it might look like: // 0----1 // | | // 3----2 // * when it's rotated 180 degrees around the top-left corner it becomes: // 2----3 // | | // 1----0 // and the vertex order will still be (0, 1, 2, 3). BoundingPoly bounding_box = 2; // List of symbols in the word. // The order of the symbols follows the natural reading order. repeated Symbol symbols = 3; // Confidence of the OCR results for the word. Range [0, 1]. float confidence = 4; } // A single symbol representation. message Symbol { // Additional information detected for the symbol. TextAnnotation.TextProperty property = 1; // The bounding box for the symbol. // The vertices are in the order of top-left, top-right, bottom-right, // bottom-left. When a rotation of the bounding box is detected the rotation // is represented as around the top-left corner as defined when the text is // read in the 'natural' orientation. // For example: // * when the text is horizontal it might look like: // 0----1 // | | // 3----2 // * when it's rotated 180 degrees around the top-left corner it becomes: // 2----3 // | | // 1----0 // and the vertex order will still be (0, 1, 2, 3). BoundingPoly bounding_box = 2; // The actual UTF-8 representation of the symbol. string text = 3; // Confidence of the OCR results for the symbol. Range [0, 1]. float confidence = 4; }