// Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto3"; package google.cloud.discoveryengine.v1beta; import "google/api/field_behavior.proto"; import "google/api/resource.proto"; option csharp_namespace = "Google.Cloud.DiscoveryEngine.V1Beta"; option go_package = "cloud.google.com/go/discoveryengine/apiv1beta/discoveryenginepb;discoveryenginepb"; option java_multiple_files = true; option java_outer_classname = "DocumentProcessingConfigProto"; option java_package = "com.google.cloud.discoveryengine.v1beta"; option objc_class_prefix = "DISCOVERYENGINE"; option php_namespace = "Google\\Cloud\\DiscoveryEngine\\V1beta"; option ruby_package = "Google::Cloud::DiscoveryEngine::V1beta"; // A singleton resource of // [DataStore][google.cloud.discoveryengine.v1beta.DataStore]. It's empty when // [DataStore][google.cloud.discoveryengine.v1beta.DataStore] is created, which // defaults to digital parser. The first call to // [DataStoreService.UpdateDocumentProcessingConfig][] method will initialize // the config. message DocumentProcessingConfig { option (google.api.resource) = { type: "discoveryengine.googleapis.com/DocumentProcessingConfig" pattern: "projects/{project}/locations/{location}/dataStores/{data_store}/documentProcessingConfig" pattern: "projects/{project}/locations/{location}/collections/{collection}/dataStores/{data_store}/documentProcessingConfig" }; // Related configurations applied to a specific type of document parser. message ParsingConfig { // The digital parsing configurations for documents. message DigitalParsingConfig {} // The OCR parsing configurations for documents. message OcrParsingConfig { // [DEPRECATED] This field is deprecated. To use the additional enhanced // document elements processing, please switch to `layout_parsing_config`. repeated string enhanced_document_elements = 1 [deprecated = true]; // If true, will use native text instead of OCR text on pages containing // native text. bool use_native_text = 2; } // Configs for document processing types. oneof type_dedicated_config { // Configurations applied to digital parser. DigitalParsingConfig digital_parsing_config = 1; // Configurations applied to OCR parser. Currently it only applies to // PDFs. OcrParsingConfig ocr_parsing_config = 2; } } // The full resource name of the Document Processing Config. // Format: // `projects/*/locations/*/collections/*/dataStores/*/documentProcessingConfig`. string name = 1; // Configurations for default Document parser. // If not specified, we will configure it as default DigitalParsingConfig, and // the default parsing config will be applied to all file types for Document // parsing. ParsingConfig default_parsing_config = 4; // Map from file type to override the default parsing configuration based on // the file type. Supported keys: // * `pdf`: Override parsing config for PDF files, either digital parsing, ocr // parsing or layout parsing is supported. // * `html`: Override parsing config for HTML files, only digital parsing and // or layout parsing are supported. // * `docx`: Override parsing config for DOCX files, only digital parsing and // or layout parsing are supported. map parsing_config_overrides = 5; }