// Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto3"; package google.cloud.discoveryengine.v1alpha; import "google/api/field_behavior.proto"; import "google/api/resource.proto"; import "google/cloud/discoveryengine/v1alpha/common.proto"; import "google/protobuf/struct.proto"; import "google/protobuf/timestamp.proto"; import "google/rpc/status.proto"; option csharp_namespace = "Google.Cloud.DiscoveryEngine.V1Alpha"; option go_package = "cloud.google.com/go/discoveryengine/apiv1alpha/discoveryenginepb;discoveryenginepb"; option java_multiple_files = true; option java_outer_classname = "DocumentProto"; option java_package = "com.google.cloud.discoveryengine.v1alpha"; option objc_class_prefix = "DISCOVERYENGINE"; option php_namespace = "Google\\Cloud\\DiscoveryEngine\\V1alpha"; option ruby_package = "Google::Cloud::DiscoveryEngine::V1alpha"; // Document captures all raw metadata information of items to be recommended or // searched. message Document { option (google.api.resource) = { type: "discoveryengine.googleapis.com/Document" pattern: "projects/{project}/locations/{location}/dataStores/{data_store}/branches/{branch}/documents/{document}" pattern: "projects/{project}/locations/{location}/collections/{collection}/dataStores/{data_store}/branches/{branch}/documents/{document}" }; // Unstructured data linked to this document. message Content { oneof content { // The content represented as a stream of bytes. The maximum length is // 1,000,000 bytes (1 MB / ~0.95 MiB). // // Note: As with all `bytes` fields, this field is represented as pure // binary in Protocol Buffers and base64-encoded string in JSON. For // example, `abc123!?$*&()'-=@~` should be represented as // `YWJjMTIzIT8kKiYoKSctPUB+` in JSON. See // https://developers.google.com/protocol-buffers/docs/proto3#json. bytes raw_bytes = 2; // The URI of the content. Only Cloud Storage URIs (e.g. // `gs://bucket-name/path/to/file`) are supported. The maximum file size // is 2.5 MB for text-based formats, 200 MB for other formats. string uri = 3; } // The MIME type of the content. Supported types: // // * `application/pdf` (PDF, only native PDFs are supported for now) // * `text/html` (HTML) // * `application/vnd.openxmlformats-officedocument.wordprocessingml.document` (DOCX) // * `application/vnd.openxmlformats-officedocument.presentationml.presentation` (PPTX) // * `text/plain` (TXT) // // See https://www.iana.org/assignments/media-types/media-types.xhtml. string mime_type = 1; } // ACL Information of the Document. message AclInfo { // AclRestriction to model complex inheritance restrictions. // // Example: Modeling a "Both Permit" inheritance, where to access a // child document, user needs to have access to parent document. // // Document Hierarchy - Space_S --> Page_P. // // Readers: // Space_S: group_1, user_1 // Page_P: group_2, group_3, user_2 // // Space_S ACL Restriction - // { // "acl_info": { // "readers": [ // { // "principals": [ // { // "group_id": "group_1" // }, // { // "user_id": "user_1" // } // ] // } // ] // } // } // // Page_P ACL Restriction. // { // "acl_info": { // "readers": [ // { // "principals": [ // { // "group_id": "group_2" // }, // { // "group_id": "group_3" // }, // { // "user_id": "user_2" // } // ], // }, // { // "principals": [ // { // "group_id": "group_1" // }, // { // "user_id": "user_1" // } // ], // } // ] // } // } message AccessRestriction { // List of principals. repeated Principal principals = 1; // All users within the Identity Provider. bool idp_wide = 2; } // Readers of the document. repeated AccessRestriction readers = 1; } // Index status of the document. message IndexStatus { // The time when the document was indexed. // If this field is populated, it means the document has been indexed. google.protobuf.Timestamp index_time = 1; // A sample of errors encountered while indexing the document. // If this field is populated, the document is not indexed due to errors. repeated google.rpc.Status error_samples = 2; } // Data representation. One of // [struct_data][google.cloud.discoveryengine.v1alpha.Document.struct_data] or // [json_data][google.cloud.discoveryengine.v1alpha.Document.json_data] should // be provided otherwise an `INVALID_ARGUMENT` error is thrown. oneof data { // The structured JSON data for the document. It should conform to the // registered [Schema][google.cloud.discoveryengine.v1alpha.Schema] or an // `INVALID_ARGUMENT` error is thrown. google.protobuf.Struct struct_data = 4; // The JSON string representation of the document. It should conform to the // registered [Schema][google.cloud.discoveryengine.v1alpha.Schema] or an // `INVALID_ARGUMENT` error is thrown. string json_data = 5; } // Immutable. The full resource name of the document. // Format: // `projects/{project}/locations/{location}/collections/{collection}/dataStores/{data_store}/branches/{branch}/documents/{document_id}`. // // This field must be a UTF-8 encoded string with a length limit of 1024 // characters. string name = 1 [(google.api.field_behavior) = IMMUTABLE]; // Immutable. The identifier of the document. // // Id should conform to [RFC-1034](https://tools.ietf.org/html/rfc1034) // standard with a length limit of 63 characters. string id = 2 [(google.api.field_behavior) = IMMUTABLE]; // The identifier of the schema located in the same data store. string schema_id = 3; // The unstructured data linked to this document. Content must be set if this // document is under a // `CONTENT_REQUIRED` data store. Content content = 10; // The identifier of the parent document. Currently supports at most two level // document hierarchy. // // Id should conform to [RFC-1034](https://tools.ietf.org/html/rfc1034) // standard with a length limit of 63 characters. string parent_document_id = 7; // Output only. This field is OUTPUT_ONLY. // It contains derived data that are not in the original input document. google.protobuf.Struct derived_struct_data = 6 [(google.api.field_behavior) = OUTPUT_ONLY]; // Access control information for the document. AclInfo acl_info = 11; // Output only. The last time the document was indexed. If this field is set, // the document could be returned in search results. // // This field is OUTPUT_ONLY. If this field is not populated, it means the // document has never been indexed. google.protobuf.Timestamp index_time = 13 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. The index status of the document. // // * If document is indexed successfully, the index_time field is populated. // * Otherwise, if document is not indexed due to errors, the error_samples // field is populated. // * Otherwise, index_status is unset. IndexStatus index_status = 15 [(google.api.field_behavior) = OUTPUT_ONLY]; } // Document captures all raw metadata information of items to be recommended or // searched. message ProcessedDocument { // Output format of the processed document. oneof processed_data_format { // The JSON string representation of the processed document. string json_data = 2; } // Required. Full resource name of the referenced document, in the format // `projects/*/locations/*/collections/*/dataStores/*/branches/*/documents/*`. string document = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "discoveryengine.googleapis.com/Document" } ]; }