// Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto3"; package google.cloud.discoveryengine.v1beta; import "google/api/field_behavior.proto"; import "google/api/resource.proto"; import "google/protobuf/struct.proto"; import "google/protobuf/timestamp.proto"; option csharp_namespace = "Google.Cloud.DiscoveryEngine.V1Beta"; option go_package = "cloud.google.com/go/discoveryengine/apiv1beta/discoveryenginepb;discoveryenginepb"; option java_multiple_files = true; option java_outer_classname = "DocumentProto"; option java_package = "com.google.cloud.discoveryengine.v1beta"; option objc_class_prefix = "DISCOVERYENGINE"; option php_namespace = "Google\\Cloud\\DiscoveryEngine\\V1beta"; option ruby_package = "Google::Cloud::DiscoveryEngine::V1beta"; // Document captures all raw metadata information of items to be recommended or // searched. message Document { option (google.api.resource) = { type: "discoveryengine.googleapis.com/Document" pattern: "projects/{project}/locations/{location}/dataStores/{data_store}/branches/{branch}/documents/{document}" pattern: "projects/{project}/locations/{location}/collections/{collection}/dataStores/{data_store}/branches/{branch}/documents/{document}" }; // Unstructured data linked to this document. message Content { oneof content { // The content represented as a stream of bytes. The maximum length is // 1,000,000 bytes (1 MB / ~0.95 MiB). // // Note: As with all `bytes` fields, this field is represented as pure // binary in Protocol Buffers and base64-encoded string in JSON. For // example, `abc123!?$*&()'-=@~` should be represented as // `YWJjMTIzIT8kKiYoKSctPUB+` in JSON. See // https://developers.google.com/protocol-buffers/docs/proto3#json. bytes raw_bytes = 2; // The URI of the content. Only Cloud Storage URIs (e.g. // `gs://bucket-name/path/to/file`) are supported. The maximum file size // is 2.5 MB for text-based formats, 100 MB for other formats. string uri = 3; } // The MIME type of the content. Supported types: // // * `application/pdf` (PDF, only native PDFs are supported for now) // * `text/html` (HTML) // * `application/vnd.openxmlformats-officedocument.wordprocessingml.document` (DOCX) // * `application/vnd.openxmlformats-officedocument.presentationml.presentation` (PPTX) // * `text/plain` (TXT) // // See https://www.iana.org/assignments/media-types/media-types.xhtml. string mime_type = 1; } // Data representation. One of // [struct_data][google.cloud.discoveryengine.v1beta.Document.struct_data] or // [json_data][google.cloud.discoveryengine.v1beta.Document.json_data] should // be provided otherwise an `INVALID_ARGUMENT` error is thrown. oneof data { // The structured JSON data for the document. It should conform to the // registered [Schema][google.cloud.discoveryengine.v1beta.Schema] or an // `INVALID_ARGUMENT` error is thrown. google.protobuf.Struct struct_data = 4; // The JSON string representation of the document. It should conform to the // registered [Schema][google.cloud.discoveryengine.v1beta.Schema] or an // `INVALID_ARGUMENT` error is thrown. string json_data = 5; } // Immutable. The full resource name of the document. // Format: // `projects/{project}/locations/{location}/collections/{collection}/dataStores/{data_store}/branches/{branch}/documents/{document_id}`. // // This field must be a UTF-8 encoded string with a length limit of 1024 // characters. string name = 1 [(google.api.field_behavior) = IMMUTABLE]; // Immutable. The identifier of the document. // // Id should conform to [RFC-1034](https://tools.ietf.org/html/rfc1034) // standard with a length limit of 63 characters. string id = 2 [(google.api.field_behavior) = IMMUTABLE]; // The identifier of the schema located in the same data store. string schema_id = 3; // The unstructured data linked to this document. Content must be set if this // document is under a // `CONTENT_REQUIRED` data store. Content content = 10; // The identifier of the parent document. Currently supports at most two level // document hierarchy. // // Id should conform to [RFC-1034](https://tools.ietf.org/html/rfc1034) // standard with a length limit of 63 characters. string parent_document_id = 7; // Output only. This field is OUTPUT_ONLY. // It contains derived data that are not in the original input document. google.protobuf.Struct derived_struct_data = 6 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. The last time the document was indexed. If this field is set, // the document could be returned in search results. // // This field is OUTPUT_ONLY. If this field is not populated, it means the // document has never been indexed. google.protobuf.Timestamp index_time = 13 [(google.api.field_behavior) = OUTPUT_ONLY]; }