// Copyright 2020 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto3"; package google.cloud.documentai.v1beta2; import "google/api/field_behavior.proto"; import "google/cloud/documentai/v1beta2/geometry.proto"; import "google/rpc/status.proto"; import "google/type/color.proto"; import "google/api/annotations.proto"; option go_package = "google.golang.org/genproto/googleapis/cloud/documentai/v1beta2;documentai"; option java_multiple_files = true; option java_outer_classname = "DocumentProto"; option java_package = "com.google.cloud.documentai.v1beta2"; // Document represents the canonical document resource in Document Understanding // AI. // It is an interchange format that provides insights into documents and allows // for collaboration between users and Document Understanding AI to iterate and // optimize for quality. message Document { // For a large document, sharding may be performed to produce several // document shards. Each document shard contains this field to detail which // shard it is. message ShardInfo { // The 0-based index of this shard. int64 shard_index = 1; // Total number of shards. int64 shard_count = 2; // The index of the first character in [Document.text][google.cloud.documentai.v1beta2.Document.text] in the overall // document global text. int64 text_offset = 3; } // Label attaches schema information and/or other metadata to segments within // a [Document][google.cloud.documentai.v1beta2.Document]. Multiple [Label][google.cloud.documentai.v1beta2.Document.Label]s on a single field can denote either // different labels, different instances of the same label created at // different times, or some combination of both. message Label { // Provenance of the label. oneof source { // Label is generated AutoML model. This field stores the full resource // name of the AutoML model. // // Format: // `projects/{project-id}/locations/{location-id}/models/{model-id}` string automl_model = 2; } // Name of the label. // // When the label is generated from AutoML Text Classification model, this // field represents the name of the category. string name = 1; // Confidence score between 0 and 1 for label assignment. float confidence = 3; } // Annotation for common text style attributes. This adheres to CSS // conventions as much as possible. message Style { // Font size with unit. message FontSize { // Font size for the text. float size = 1; // Unit for the font size. Follows CSS naming (in, px, pt, etc.). string unit = 2; } // Text anchor indexing into the [Document.text][google.cloud.documentai.v1beta2.Document.text]. TextAnchor text_anchor = 1; // Text color. google.type.Color color = 2; // Text background color. google.type.Color background_color = 3; // Font weight. Possible values are normal, bold, bolder, and lighter. // https://www.w3schools.com/cssref/pr_font_weight.asp string font_weight = 4; // Text style. Possible values are normal, italic, and oblique. // https://www.w3schools.com/cssref/pr_font_font-style.asp string text_style = 5; // Text decoration. Follows CSS standard. // // https://www.w3schools.com/cssref/pr_text_text-decoration.asp string text_decoration = 6; // Font size. FontSize font_size = 7; } // A page in a [Document][google.cloud.documentai.v1beta2.Document]. message Page { // Dimension for the page. message Dimension { // Page width. float width = 1; // Page height. float height = 2; // Dimension unit. string unit = 3; } // Visual element describing a layout unit on a page. message Layout { // Detected human reading orientation. enum Orientation { // Unspecified orientation. ORIENTATION_UNSPECIFIED = 0; // Orientation is aligned with page up. PAGE_UP = 1; // Orientation is aligned with page right. // Turn the head 90 degrees clockwise from upright to read. PAGE_RIGHT = 2; // Orientation is aligned with page down. // Turn the head 180 degrees from upright to read. PAGE_DOWN = 3; // Orientation is aligned with page left. // Turn the head 90 degrees counterclockwise from upright to read. PAGE_LEFT = 4; } // Text anchor indexing into the [Document.text][google.cloud.documentai.v1beta2.Document.text]. TextAnchor text_anchor = 1; // Confidence of the current [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] within context of the object this // layout is for. e.g. confidence can be for a single token, a table, // a visual element, etc. depending on context. Range [0, 1]. float confidence = 2; // The bounding polygon for the [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout]. BoundingPoly bounding_poly = 3; // Detected orientation for the [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout]. Orientation orientation = 4; // Optional. This is the identifier used by referencing [PageAnchor][google.cloud.documentai.v1beta2.Document.PageAnchor]s. string id = 5 [(google.api.field_behavior) = OPTIONAL]; } // A block has a set of lines (collected into paragraphs) that have a // common line-spacing and orientation. message Block { // [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for [Block][google.cloud.documentai.v1beta2.Document.Page.Block]. Layout layout = 1; // A list of detected languages together with confidence. repeated DetectedLanguage detected_languages = 2; } // A collection of lines that a human would perceive as a paragraph. message Paragraph { // [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for [Paragraph][google.cloud.documentai.v1beta2.Document.Page.Paragraph]. Layout layout = 1; // A list of detected languages together with confidence. repeated DetectedLanguage detected_languages = 2; } // A collection of tokens that a human would perceive as a line. // Does not cross column boundaries, can be horizontal, vertical, etc. message Line { // [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for [Line][google.cloud.documentai.v1beta2.Document.Page.Line]. Layout layout = 1; // A list of detected languages together with confidence. repeated DetectedLanguage detected_languages = 2; } // A detected token. message Token { // Detected break at the end of a [Token][google.cloud.documentai.v1beta2.Document.Page.Token]. message DetectedBreak { // Enum to denote the type of break found. enum Type { // Unspecified break type. TYPE_UNSPECIFIED = 0; // A single whitespace. SPACE = 1; // A wider whitespace. WIDE_SPACE = 2; // A hyphen that indicates that a token has been split across lines. HYPHEN = 3; } // Detected break type. Type type = 1; } // [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for [Token][google.cloud.documentai.v1beta2.Document.Page.Token]. Layout layout = 1; // Detected break at the end of a [Token][google.cloud.documentai.v1beta2.Document.Page.Token]. DetectedBreak detected_break = 2; // A list of detected languages together with confidence. repeated DetectedLanguage detected_languages = 3; } // Detected non-text visual elements e.g. checkbox, signature etc. on the // page. message VisualElement { // [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for [VisualElement][google.cloud.documentai.v1beta2.Document.Page.VisualElement]. Layout layout = 1; // Type of the [VisualElement][google.cloud.documentai.v1beta2.Document.Page.VisualElement]. string type = 2; // A list of detected languages together with confidence. repeated DetectedLanguage detected_languages = 3; } // A table representation similar to HTML table structure. message Table { // A row of table cells. message TableRow { // Cells that make up this row. repeated TableCell cells = 1; } // A cell representation inside the table. message TableCell { // [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for [TableCell][google.cloud.documentai.v1beta2.Document.Page.Table.TableCell]. Layout layout = 1; // How many rows this cell spans. int32 row_span = 2; // How many columns this cell spans. int32 col_span = 3; // A list of detected languages together with confidence. repeated DetectedLanguage detected_languages = 4; } // [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for [Table][google.cloud.documentai.v1beta2.Document.Page.Table]. Layout layout = 1; // Header rows of the table. repeated TableRow header_rows = 2; // Body rows of the table. repeated TableRow body_rows = 3; // A list of detected languages together with confidence. repeated DetectedLanguage detected_languages = 4; } // A form field detected on the page. message FormField { // [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for the [FormField][google.cloud.documentai.v1beta2.Document.Page.FormField] name. e.g. `Address`, `Email`, // `Grand total`, `Phone number`, etc. Layout field_name = 1; // [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for the [FormField][google.cloud.documentai.v1beta2.Document.Page.FormField] value. Layout field_value = 2; // A list of detected languages for name together with confidence. repeated DetectedLanguage name_detected_languages = 3; // A list of detected languages for value together with confidence. repeated DetectedLanguage value_detected_languages = 4; // If the value is non-textual, this field represents the type. Current // valid values are: // - blank (this indicates the field_value is normal text) // - "unfilled_checkbox" // - "filled_checkbox" string value_type = 5; // An internal field, created for Labeling UI to export key text. string corrected_key_text = 6; // An internal field, created for Labeling UI to export value text. string corrected_value_text = 7; } // Detected language for a structural component. message DetectedLanguage { // The BCP-47 language code, such as "en-US" or "sr-Latn". For more // information, see // http://www.unicode.org/reports/tr35/#Unicode_locale_identifier. string language_code = 1; // Confidence of detected language. Range [0, 1]. float confidence = 2; } // 1-based index for current [Page][google.cloud.documentai.v1beta2.Document.Page] in a parent [Document][google.cloud.documentai.v1beta2.Document]. // Useful when a page is taken out of a [Document][google.cloud.documentai.v1beta2.Document] for individual // processing. int32 page_number = 1; // Physical dimension of the page. Dimension dimension = 2; // [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for the page. Layout layout = 3; // A list of detected languages together with confidence. repeated DetectedLanguage detected_languages = 4; // A list of visually detected text blocks on the page. // A block has a set of lines (collected into paragraphs) that have a common // line-spacing and orientation. repeated Block blocks = 5; // A list of visually detected text paragraphs on the page. // A collection of lines that a human would perceive as a paragraph. repeated Paragraph paragraphs = 6; // A list of visually detected text lines on the page. // A collection of tokens that a human would perceive as a line. repeated Line lines = 7; // A list of visually detected tokens on the page. repeated Token tokens = 8; // A list of detected non-text visual elements e.g. checkbox, // signature etc. on the page. repeated VisualElement visual_elements = 9; // A list of visually detected tables on the page. repeated Table tables = 10; // A list of visually detected form fields on the page. repeated FormField form_fields = 11; } // A phrase in the text that is a known entity type, such as a person, an // organization, or location. message Entity { // Provenance of the entity. // Text anchor indexing into the [Document.text][google.cloud.documentai.v1beta2.Document.text]. TextAnchor text_anchor = 1; // Entity type from a schema e.g. `Address`. string type = 2; // Text value in the document e.g. `1600 Amphitheatre Pkwy`. string mention_text = 3; // Deprecated. Use `id` field instead. string mention_id = 4; // Optional. Confidence of detected Schema entity. Range [0, 1]. float confidence = 5 [(google.api.field_behavior) = OPTIONAL]; // Optional. Represents the provenance of this entity wrt. the location on the // page where it was found. PageAnchor page_anchor = 6 [(google.api.field_behavior) = OPTIONAL]; // Optional. Canonical id. This will be a unique value in the entity list // for this document. string id = 7 [(google.api.field_behavior) = OPTIONAL]; // Optional. Temporary field to store the bounding poly for short-term POCs. Used by // the frontend only. Do not use before you talk to ybo@ and lukasr@. BoundingPoly bounding_poly_for_demo_frontend = 8 [(google.api.field_behavior) = OPTIONAL]; } // Relationship between [Entities][google.cloud.documentai.v1beta2.Document.Entity]. message EntityRelation { // Subject entity id. string subject_id = 1; // Object entity id. string object_id = 2; // Relationship description. string relation = 3; } // Text reference indexing into the [Document.text][google.cloud.documentai.v1beta2.Document.text]. message TextAnchor { // A text segment in the [Document.text][google.cloud.documentai.v1beta2.Document.text]. The indices may be out of bounds // which indicate that the text extends into another document shard for // large sharded documents. See [ShardInfo.text_offset][google.cloud.documentai.v1beta2.Document.ShardInfo.text_offset] message TextSegment { // [TextSegment][google.cloud.documentai.v1beta2.Document.TextAnchor.TextSegment] start UTF-8 char index in the [Document.text][google.cloud.documentai.v1beta2.Document.text]. int64 start_index = 1; // [TextSegment][google.cloud.documentai.v1beta2.Document.TextAnchor.TextSegment] half open end UTF-8 char index in the // [Document.text][google.cloud.documentai.v1beta2.Document.text]. int64 end_index = 2; } // The text segments from the [Document.text][google.cloud.documentai.v1beta2.Document.text]. repeated TextSegment text_segments = 1; } // Referencing elements in [Document.pages][google.cloud.documentai.v1beta2.Document.pages]. message PageAnchor { // Represents a weak reference to a page element within a document. message PageRef { // The type of layout that is being referenced. enum LayoutType { // Layout Unspecified. LAYOUT_TYPE_UNSPECIFIED = 0; // References a [Page.blocks][google.cloud.documentai.v1beta2.Document.Page.blocks] element. BLOCK = 1; // References a [Page.paragraphs][google.cloud.documentai.v1beta2.Document.Page.paragraphs] element. PARAGRAPH = 2; // References a [Page.lines][google.cloud.documentai.v1beta2.Document.Page.lines] element. LINE = 3; // References a [Page.tokens][google.cloud.documentai.v1beta2.Document.Page.tokens] element. TOKEN = 4; // References a [Page.visual_elements][google.cloud.documentai.v1beta2.Document.Page.visual_elements] element. VISUAL_ELEMENT = 5; // Refrrences a [Page.tables][google.cloud.documentai.v1beta2.Document.Page.tables] element. TABLE = 6; // References a [Page.form_fields][google.cloud.documentai.v1beta2.Document.Page.form_fields] element. FORM_FIELD = 7; } // Required. Index into the [Document.pages][google.cloud.documentai.v1beta2.Document.pages] element int64 page = 1 [(google.api.field_behavior) = REQUIRED]; // Optional. The type of the layout element that is being referenced. If not // specified the whole page is assumed to be referenced. LayoutType layout_type = 2 [(google.api.field_behavior) = OPTIONAL]; // Optional. The [Page.Layout.id][google.cloud.documentai.v1beta2.Document.Page.Layout.id] on the page that this element // references. If [LayoutRef.type][] is specified this id must also be // specified. string layout_id = 3 [(google.api.field_behavior) = OPTIONAL]; } // One or more references to visual page elements repeated PageRef page_refs = 1; } // Original source document from the user. oneof source { // Currently supports Google Cloud Storage URI of the form // `gs://bucket_name/object_name`. Object versioning is not supported. // See [Google Cloud Storage Request // URIs](https://cloud.google.com/storage/docs/reference-uris) for more // info. string uri = 1; // Inline document content, represented as a stream of bytes. // Note: As with all `bytes` fields, protobuffers use a pure binary // representation, whereas JSON representations use base64. bytes content = 2; } // An IANA published MIME type (also referred to as media type). For more // information, see // https://www.iana.org/assignments/media-types/media-types.xhtml. string mime_type = 3; // UTF-8 encoded text in reading order from the document. string text = 4; // Styles for the [Document.text][google.cloud.documentai.v1beta2.Document.text]. repeated Style text_styles = 5; // Visual page layout for the [Document][google.cloud.documentai.v1beta2.Document]. repeated Page pages = 6; // A list of entities detected on [Document.text][google.cloud.documentai.v1beta2.Document.text]. For document shards, // entities in this list may cross shard boundaries. repeated Entity entities = 7; // Relationship among [Document.entities][google.cloud.documentai.v1beta2.Document.entities]. repeated EntityRelation entity_relations = 8; // Information about the sharding if this document is sharded part of a larger // document. If the document is not sharded, this message is not specified. ShardInfo shard_info = 9; // [Label][google.cloud.documentai.v1beta2.Document.Label]s for this document. repeated Label labels = 11; // Any error that occurred while processing this document. google.rpc.Status error = 10; }