// Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto3"; package google.cloud.documentai.v1; option csharp_namespace = "Google.Cloud.DocumentAI.V1"; option go_package = "cloud.google.com/go/documentai/apiv1/documentaipb;documentaipb"; option java_multiple_files = true; option java_outer_classname = "DocumentAiDocumentSchema"; option java_package = "com.google.cloud.documentai.v1"; option php_namespace = "Google\\Cloud\\DocumentAI\\V1"; option ruby_package = "Google::Cloud::DocumentAI::V1"; // The schema defines the output of the processed document by a processor. message DocumentSchema { // EntityType is the wrapper of a label of the corresponding model with // detailed attributes and limitations for entity-based processors. Multiple // types can also compose a dependency tree to represent nested types. message EntityType { // Defines the a list of enum values. message EnumValues { // The individual values that this enum values type can include. repeated string values = 1; } // Defines properties that can be part of the entity type. message Property { // Types of occurrences of the entity type in the document. This // represents the number of instances, not mentions, of an entity. // For example, a bank statement might only have one // `account_number`, but this account number can be mentioned in several // places on the document. In this case, the `account_number` is // considered a `REQUIRED_ONCE` entity type. If, on the other hand, we // expect a bank statement to contain the status of multiple different // accounts for the customers, the occurrence type is set to // `REQUIRED_MULTIPLE`. enum OccurrenceType { // Unspecified occurrence type. OCCURRENCE_TYPE_UNSPECIFIED = 0; // There will be zero or one instance of this entity type. The same // entity instance may be mentioned multiple times. OPTIONAL_ONCE = 1; // The entity type will appear zero or multiple times. OPTIONAL_MULTIPLE = 2; // The entity type will only appear exactly once. The same // entity instance may be mentioned multiple times. REQUIRED_ONCE = 3; // The entity type will appear once or more times. REQUIRED_MULTIPLE = 4; } // The name of the property. Follows the same guidelines as the // EntityType name. string name = 1; // User defined name for the property. string display_name = 6; // A reference to the value type of the property. This type is subject // to the same conventions as the `Entity.base_types` field. string value_type = 2; // Occurrence type limits the number of instances an entity type appears // in the document. OccurrenceType occurrence_type = 3; } oneof value_source { // If specified, lists all the possible values for this entity. This // should not be more than a handful of values. If the number of values // is >10 or could change frequently use the `EntityType.value_ontology` // field and specify a list of all possible values in a value ontology // file. EnumValues enum_values = 14; } // User defined name for the type. string display_name = 13; // Name of the type. It must be unique within the schema file and // cannot be a "Common Type". The following naming conventions are used: // // - Use `snake_casing`. // - Name matching is case-sensitive. // - Maximum 64 characters. // - Must start with a letter. // - Allowed characters: ASCII letters `[a-z0-9_-]`. (For backward // compatibility internal infrastructure and tooling can handle any ascii // character.) // - The `/` is sometimes used to denote a property of a type. For example // `line_item/amount`. This convention is deprecated, but will still be // honored for backward compatibility. string name = 1; // The entity type that this type is derived from. For now, one and only // one should be set. repeated string base_types = 2; // Description the nested structure, or composition of an entity. repeated Property properties = 6; } // Metadata for global schema behavior. message Metadata { // If true, a `document` entity type can be applied to subdocument // (splitting). Otherwise, it can only be applied to the entire document // (classification). bool document_splitter = 1; // If true, on a given page, there can be multiple `document` annotations // covering it. bool document_allow_multiple_labels = 2; // If set, all the nested entities must be prefixed with the parents. bool prefixed_naming_on_properties = 6; // If set, we will skip the naming format validation in the schema. So the // string values in `DocumentSchema.EntityType.name` and // `DocumentSchema.EntityType.Property.name` will not be checked. bool skip_naming_validation = 7; } // Display name to show to users. string display_name = 1; // Description of the schema. string description = 2; // Entity types of the schema. repeated EntityType entity_types = 3; // Metadata of the schema. Metadata metadata = 4; }