// Copyright 2022 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto3"; package google.cloud.discoveryengine.v1; import "google/api/field_behavior.proto"; import "google/api/resource.proto"; import "google/cloud/discoveryengine/v1/document.proto"; import "google/cloud/discoveryengine/v1/user_event.proto"; import "google/protobuf/timestamp.proto"; import "google/rpc/status.proto"; import "google/type/date.proto"; option csharp_namespace = "Google.Cloud.DiscoveryEngine.V1"; option go_package = "cloud.google.com/go/discoveryengine/apiv1/discoveryenginepb;discoveryenginepb"; option java_multiple_files = true; option java_outer_classname = "ImportConfigProto"; option java_package = "com.google.cloud.discoveryengine.v1"; option objc_class_prefix = "DISCOVERYENGINE"; option php_namespace = "Google\\Cloud\\DiscoveryEngine\\V1"; option ruby_package = "Google::Cloud::DiscoveryEngine::V1"; // Cloud Storage location for input content. message GcsSource { // Required. Cloud Storage URIs to input files. URI can be up to // 2000 characters long. URIs can match the full object path (for example, // `gs://bucket/directory/object.json`) or a pattern matching one or more // files, such as `gs://bucket/directory/*.json`. // // A request can contain at most 100 files (or 100,000 files if `data_schema` // is `content`). Each file can be up to 2 GB (or 100 MB if `data_schema` is // `content`). repeated string input_uris = 1 [(google.api.field_behavior) = REQUIRED]; // The schema to use when parsing the data from the source. // // Supported values for document imports: // // * `document` (default): One JSON // [Document][google.cloud.discoveryengine.v1.Document] per line. Each // document must // have a valid [Document.id][google.cloud.discoveryengine.v1.Document.id]. // * `content`: Unstructured data (e.g. PDF, HTML). Each file matched by // `input_uris` will become a document, with the ID set to the first 128 // bits of SHA256(URI) encoded as a hex string. // * `custom`: One custom data JSON per row in arbitrary format that conforms // the defined [Schema][google.cloud.discoveryengine.v1.Schema] of the data // store. This can only be used by the GENERIC Data Store vertical. // // Supported values for user even imports: // // * `user_event` (default): One JSON // [UserEvent][google.cloud.discoveryengine.v1.UserEvent] per line. string data_schema = 2; } // BigQuery source import data from. message BigQuerySource { // BigQuery table partition info. Leave this empty if the BigQuery table // is not partitioned. oneof partition { // BigQuery time partitioned table's _PARTITIONDATE in YYYY-MM-DD format. google.type.Date partition_date = 5; } // The project ID (can be project # or ID) that the BigQuery source is in with // a length limit of 128 characters. If not specified, inherits the project // ID from the parent request. string project_id = 1; // Required. The BigQuery data set to copy the data from with a length limit // of 1,024 characters. string dataset_id = 2 [(google.api.field_behavior) = REQUIRED]; // Required. The BigQuery table to copy the data from with a length limit of // 1,024 characters. string table_id = 3 [(google.api.field_behavior) = REQUIRED]; // Intermediate Cloud Storage directory used for the import with a length // limit of 2,000 characters. Can be specified if one wants to have the // BigQuery export to a specific Cloud Storage directory. string gcs_staging_dir = 4; // The schema to use when parsing the data from the source. // // Supported values for user event imports: // // * `user_event` (default): One // [UserEvent][google.cloud.discoveryengine.v1.UserEvent] per row. // // Supported values for document imports: // // * `document` (default): One // [Document][google.cloud.discoveryengine.v1.Document] format per // row. Each document must have a valid // [Document.id][google.cloud.discoveryengine.v1.Document.id] and one of // [Document.json_data][google.cloud.discoveryengine.v1.Document.json_data] // or // [Document.struct_data][google.cloud.discoveryengine.v1.Document.struct_data]. // * `custom`: One custom data per row in arbitrary format that conforms the // defined [Schema][google.cloud.discoveryengine.v1.Schema] of the data // store. This can only be used by the GENERIC Data Store vertical. string data_schema = 6; } // Configuration of destination for Import related errors. message ImportErrorConfig { // Required. Errors destination. oneof destination { // Cloud Storage prefix for import errors. This must be an empty, // existing Cloud Storage directory. Import errors will be written to // sharded files in this directory, one per line, as a JSON-encoded // `google.rpc.Status` message. string gcs_prefix = 1; } } // Request message for the ImportUserEvents request. message ImportUserEventsRequest { // The inline source for the input config for ImportUserEvents method. message InlineSource { // Required. A list of user events to import. Recommended max of 10k items. repeated UserEvent user_events = 1 [(google.api.field_behavior) = REQUIRED]; } // The desired input source of the user event data. oneof source { // Required. The Inline source for the input content for UserEvents. InlineSource inline_source = 2 [(google.api.field_behavior) = REQUIRED]; // Required. Cloud Storage location for the input content. GcsSource gcs_source = 3 [(google.api.field_behavior) = REQUIRED]; // Required. BigQuery input source. BigQuerySource bigquery_source = 4 [(google.api.field_behavior) = REQUIRED]; } // Required. Parent DataStore resource name, of the form // `projects/{project}/locations/{location}/collections/{collection}/dataStores/{data_store}` string parent = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "discoveryengine.googleapis.com/DataStore" } ]; // The desired location of errors incurred during the Import. Cannot be set // for inline user event imports. ImportErrorConfig error_config = 5; } // Response of the ImportUserEventsRequest. If the long running // operation was successful, then this message is returned by the // google.longrunning.Operations.response field if the operation was successful. message ImportUserEventsResponse { // A sample of errors encountered while processing the request. repeated google.rpc.Status error_samples = 1; // Echoes the destination for the complete errors if this field was set in // the request. ImportErrorConfig error_config = 2; // Count of user events imported with complete existing Documents. int64 joined_events_count = 3; // Count of user events imported, but with Document information not found // in the existing Branch. int64 unjoined_events_count = 4; } // Metadata related to the progress of the Import operation. This will be // returned by the google.longrunning.Operation.metadata field. message ImportUserEventsMetadata { // Operation create time. google.protobuf.Timestamp create_time = 1; // Operation last update time. If the operation is done, this is also the // finish time. google.protobuf.Timestamp update_time = 2; // Count of entries that were processed successfully. int64 success_count = 3; // Count of entries that encountered errors while processing. int64 failure_count = 4; } // Metadata related to the progress of the ImportDocuments operation. This will // be returned by the google.longrunning.Operation.metadata field. message ImportDocumentsMetadata { // Operation create time. google.protobuf.Timestamp create_time = 1; // Operation last update time. If the operation is done, this is also the // finish time. google.protobuf.Timestamp update_time = 2; // Count of entries that were processed successfully. int64 success_count = 3; // Count of entries that encountered errors while processing. int64 failure_count = 4; } // Request message for Import methods. message ImportDocumentsRequest { // The inline source for the input config for ImportDocuments method. message InlineSource { // Required. A list of documents to update/create. Each document must have a // valid [Document.id][google.cloud.discoveryengine.v1.Document.id]. // Recommended max of 100 items. repeated Document documents = 1 [(google.api.field_behavior) = REQUIRED]; } // Indicates how imported documents are reconciled with the existing documents // created or imported before. enum ReconciliationMode { // Defaults to INCREMENTAL. RECONCILIATION_MODE_UNSPECIFIED = 0; // Inserts new documents or updates existing documents. INCREMENTAL = 1; // Calculates diff and replaces the entire document dataset. Existing // documents may be deleted if they are not present in the source location. FULL = 2; } // Required. The source of the input. oneof source { // The Inline source for the input content for documents. InlineSource inline_source = 2; // Cloud Storage location for the input content. GcsSource gcs_source = 3; // BigQuery input source. BigQuerySource bigquery_source = 4; } // Required. The parent branch resource name, such as // `projects/{project}/locations/{location}/collections/{collection}/dataStores/{data_store}/branches/{branch}`. // Requires create/update permission. string parent = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "discoveryengine.googleapis.com/Branch" } ]; // The desired location of errors incurred during the Import. ImportErrorConfig error_config = 5; // The mode of reconciliation between existing documents and the documents to // be imported. Defaults to // [ReconciliationMode.INCREMENTAL][google.cloud.discoveryengine.v1.ImportDocumentsRequest.ReconciliationMode.INCREMENTAL]. ReconciliationMode reconciliation_mode = 6; // Whether to automatically generate IDs for the documents if absent. // // If set to `true`, // [Document.id][google.cloud.discoveryengine.v1.Document.id]s are // automatically generated based on the hash of the payload, where IDs may not // be consistent during multiple imports. In which case // [ReconciliationMode.FULL][google.cloud.discoveryengine.v1.ImportDocumentsRequest.ReconciliationMode.FULL] // is highly recommended to avoid duplicate contents. If unset or set to // `false`, [Document.id][google.cloud.discoveryengine.v1.Document.id]s have // to be specified using // [id_field][google.cloud.discoveryengine.v1.ImportDocumentsRequest.id_field], // otherwises, documents without IDs will fail to be imported. // // Only set this field when using // [GcsSource][google.cloud.discoveryengine.v1.GcsSource] or // [BigQuerySource][google.cloud.discoveryengine.v1.BigQuerySource], and when // [GcsSource.data_schema][google.cloud.discoveryengine.v1.GcsSource.data_schema] // or // [BigQuerySource.data_schema][google.cloud.discoveryengine.v1.BigQuerySource.data_schema] // is `custom`. Otherwise, an INVALID_ARGUMENT error is thrown. bool auto_generate_ids = 8; // The field in the Cloud Storage and BigQuery sources that indicates the // unique IDs of the documents. // // For [GcsSource][google.cloud.discoveryengine.v1.GcsSource] it is the key of // the JSON field. For instance, `my_id` for JSON `{"my_id": "some_uuid"}`. // For [BigQuerySource][google.cloud.discoveryengine.v1.BigQuerySource] it is // the column name of the BigQuery table where the unique ids are stored. // // The values of the JSON field or the BigQuery column will be used as the // [Document.id][google.cloud.discoveryengine.v1.Document.id]s. The JSON field // or the BigQuery column must be of string type, and the values must be set // as valid strings conform to [RFC-1034](https://tools.ietf.org/html/rfc1034) // with 1-63 characters. Otherwise, documents without valid IDs will fail to // be imported. // // Only set this field when using // [GcsSource][google.cloud.discoveryengine.v1.GcsSource] or // [BigQuerySource][google.cloud.discoveryengine.v1.BigQuerySource], and when // [GcsSource.data_schema][google.cloud.discoveryengine.v1.GcsSource.data_schema] // or // [BigQuerySource.data_schema][google.cloud.discoveryengine.v1.BigQuerySource.data_schema] // is `custom`. And only set this field when // [auto_generate_ids][google.cloud.discoveryengine.v1.ImportDocumentsRequest.auto_generate_ids] // is unset or set as `false`. Otherwise, an INVALID_ARGUMENT error is thrown. // // If it is unset, a default value `_id` is used when importing from the // allowed data sources. string id_field = 9; } // Response of the // [ImportDocumentsRequest][google.cloud.discoveryengine.v1.ImportDocumentsRequest]. // If the long running operation is done, then this message is returned by the // google.longrunning.Operations.response field if the operation was successful. message ImportDocumentsResponse { // A sample of errors encountered while processing the request. repeated google.rpc.Status error_samples = 1; // Echoes the destination for the complete errors in the request if set. ImportErrorConfig error_config = 2; }