// Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto3"; package google.cloud.discoveryengine.v1beta; import "google/api/field_behavior.proto"; import "google/api/resource.proto"; import "google/cloud/discoveryengine/v1beta/completion.proto"; import "google/cloud/discoveryengine/v1beta/document.proto"; import "google/cloud/discoveryengine/v1beta/user_event.proto"; import "google/protobuf/field_mask.proto"; import "google/protobuf/timestamp.proto"; import "google/rpc/status.proto"; import "google/type/date.proto"; option csharp_namespace = "Google.Cloud.DiscoveryEngine.V1Beta"; option go_package = "cloud.google.com/go/discoveryengine/apiv1beta/discoveryenginepb;discoveryenginepb"; option java_multiple_files = true; option java_outer_classname = "ImportConfigProto"; option java_package = "com.google.cloud.discoveryengine.v1beta"; option objc_class_prefix = "DISCOVERYENGINE"; option php_namespace = "Google\\Cloud\\DiscoveryEngine\\V1beta"; option ruby_package = "Google::Cloud::DiscoveryEngine::V1beta"; // Cloud Storage location for input content. message GcsSource { // Required. Cloud Storage URIs to input files. URI can be up to // 2000 characters long. URIs can match the full object path (for example, // `gs://bucket/directory/object.json`) or a pattern matching one or more // files, such as `gs://bucket/directory/*.json`. // // A request can contain at most 100 files (or 100,000 files if `data_schema` // is `content`). Each file can be up to 2 GB (or 100 MB if `data_schema` is // `content`). repeated string input_uris = 1 [(google.api.field_behavior) = REQUIRED]; // The schema to use when parsing the data from the source. // // Supported values for document imports: // // * `document` (default): One JSON // [Document][google.cloud.discoveryengine.v1beta.Document] per line. Each // document must // have a valid // [Document.id][google.cloud.discoveryengine.v1beta.Document.id]. // * `content`: Unstructured data (e.g. PDF, HTML). Each file matched by // `input_uris` becomes a document, with the ID set to the first 128 // bits of SHA256(URI) encoded as a hex string. // * `custom`: One custom data JSON per row in arbitrary format that conforms // to the defined [Schema][google.cloud.discoveryengine.v1beta.Schema] of // the data store. This can only be used by the GENERIC Data Store vertical. // * `csv`: A CSV file with header conforming to the defined // [Schema][google.cloud.discoveryengine.v1beta.Schema] of the // data store. Each entry after the header is imported as a Document. // This can only be used by the GENERIC Data Store vertical. // // Supported values for user even imports: // // * `user_event` (default): One JSON // [UserEvent][google.cloud.discoveryengine.v1beta.UserEvent] per line. string data_schema = 2; } // BigQuery source import data from. message BigQuerySource { // BigQuery table partition info. Leave this empty if the BigQuery table // is not partitioned. oneof partition { // BigQuery time partitioned table's _PARTITIONDATE in YYYY-MM-DD format. google.type.Date partition_date = 5; } // The project ID (can be project # or ID) that the BigQuery source is in with // a length limit of 128 characters. If not specified, inherits the project // ID from the parent request. string project_id = 1; // Required. The BigQuery data set to copy the data from with a length limit // of 1,024 characters. string dataset_id = 2 [(google.api.field_behavior) = REQUIRED]; // Required. The BigQuery table to copy the data from with a length limit of // 1,024 characters. string table_id = 3 [(google.api.field_behavior) = REQUIRED]; // Intermediate Cloud Storage directory used for the import with a length // limit of 2,000 characters. Can be specified if one wants to have the // BigQuery export to a specific Cloud Storage directory. string gcs_staging_dir = 4; // The schema to use when parsing the data from the source. // // Supported values for user event imports: // // * `user_event` (default): One // [UserEvent][google.cloud.discoveryengine.v1beta.UserEvent] per row. // // Supported values for document imports: // // * `document` (default): One // [Document][google.cloud.discoveryengine.v1beta.Document] format per // row. Each document must have a valid // [Document.id][google.cloud.discoveryengine.v1beta.Document.id] and one of // [Document.json_data][google.cloud.discoveryengine.v1beta.Document.json_data] // or // [Document.struct_data][google.cloud.discoveryengine.v1beta.Document.struct_data]. // * `custom`: One custom data per row in arbitrary format that conforms to // the defined [Schema][google.cloud.discoveryengine.v1beta.Schema] of the // data store. This can only be used by the GENERIC Data Store vertical. string data_schema = 6; } // The Spanner source for importing data message SpannerSource { // The project ID that the Spanner source is in with a length limit of 128 // characters. If not specified, inherits the project ID from the parent // request. string project_id = 1; // Required. The instance ID of the source Spanner table. string instance_id = 2 [(google.api.field_behavior) = REQUIRED]; // Required. The database ID of the source Spanner table. string database_id = 3 [(google.api.field_behavior) = REQUIRED]; // Required. The table name of the Spanner database that needs to be imported. string table_id = 4 [(google.api.field_behavior) = REQUIRED]; // Whether to apply data boost on Spanner export. Enabling this option will // incur additional cost. More info can be found // [here](https://cloud.google.com/spanner/docs/databoost/databoost-overview#billing_and_quotas). bool enable_data_boost = 5; } // The Bigtable Options object that contains information to support // the import. message BigtableOptions { // The column family of the Bigtable. message BigtableColumnFamily { // The field name to use for this column family in the document. The // name has to match the pattern `[a-zA-Z0-9][a-zA-Z0-9-_]*`. If not set, // it is parsed from the family name with best effort. However, due to // different naming patterns, field name collisions could happen, where // parsing behavior is undefined. string field_name = 1; // The encoding mode of the values when the type is not STRING. // Acceptable encoding values are: // // * `TEXT`: indicates values are alphanumeric text strings. // * `BINARY`: indicates values are encoded using `HBase Bytes.toBytes` // family of functions. This can be overridden for a specific column // by listing that column in `columns` and specifying an encoding for it. Encoding encoding = 2; // The type of values in this column family. // The values are expected to be encoded using `HBase Bytes.toBytes` // function when the encoding value is set to `BINARY`. Type type = 3; // The list of objects that contains column level information for each // column. If a column is not present in this list it will be ignored. repeated BigtableColumn columns = 4; } // The column of the Bigtable. message BigtableColumn { // Required. Qualifier of the column. If it cannot be decoded with utf-8, // use a base-64 encoded string instead. bytes qualifier = 1 [(google.api.field_behavior) = REQUIRED]; // The field name to use for this column in the document. The name has to // match the pattern `[a-zA-Z0-9][a-zA-Z0-9-_]*`. // If not set, it is parsed from the qualifier bytes with best effort. // However, due to different naming patterns, field name collisions could // happen, where parsing behavior is undefined. string field_name = 2; // The encoding mode of the values when the type is not `STRING`. // Acceptable encoding values are: // // * `TEXT`: indicates values are alphanumeric text strings. // * `BINARY`: indicates values are encoded using `HBase Bytes.toBytes` // family of functions. This can be overridden for a specific column // by listing that column in `columns` and specifying an encoding for it. Encoding encoding = 3; // The type of values in this column family. // The values are expected to be encoded using `HBase Bytes.toBytes` // function when the encoding value is set to `BINARY`. Type type = 4; } // The type of values in a Bigtable column or column family. // The values are expected to be encoded using // [HBase // Bytes.toBytes](https://hbase.apache.org/apidocs/org/apache/hadoop/hbase/util/Bytes.html) // function when the encoding value is set to `BINARY`. enum Type { // The type is unspecified. TYPE_UNSPECIFIED = 0; // String type. STRING = 1; // Numerical type. NUMBER = 2; // Integer type. INTEGER = 3; // Variable length integer type. VAR_INTEGER = 4; // BigDecimal type. BIG_NUMERIC = 5; // Boolean type. BOOLEAN = 6; // JSON type. JSON = 7; } // The encoding mode of a Bigtable column or column family. enum Encoding { // The encoding is unspecified. ENCODING_UNSPECIFIED = 0; // Text encoding. TEXT = 1; // Binary encoding. BINARY = 2; } // The field name used for saving row key value in the document. The name has // to match the pattern `[a-zA-Z0-9][a-zA-Z0-9-_]*`. string key_field_name = 1; // The mapping from family names to an object that contains column families // level information for the given column family. If a family is not present // in this map it will be ignored. map families = 2; } // The Cloud Bigtable source for importing data. message BigtableSource { // The project ID that the Bigtable source is in with a length limit of 128 // characters. If not specified, inherits the project ID from the parent // request. string project_id = 1; // Required. The instance ID of the Cloud Bigtable that needs to be imported. string instance_id = 2 [(google.api.field_behavior) = REQUIRED]; // Required. The table ID of the Cloud Bigtable that needs to be imported. string table_id = 3 [(google.api.field_behavior) = REQUIRED]; // Required. Bigtable options that contains information needed when parsing // data into typed structures. For example, column type annotations. BigtableOptions bigtable_options = 4 [(google.api.field_behavior) = REQUIRED]; } // Cloud FhirStore source import data from. message FhirStoreSource { // Required. The full resource name of the FHIR store to import data from, in // the format of // `projects/{project}/locations/{location}/datasets/{dataset}/fhirStores/{fhir_store}`. string fhir_store = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "healthcare.googleapis.com/FhirStore" } ]; // Intermediate Cloud Storage directory used for the import with a length // limit of 2,000 characters. Can be specified if one wants to have the // FhirStore export to a specific Cloud Storage directory. string gcs_staging_dir = 2; } // Cloud SQL source import data from. message CloudSqlSource { // The project ID that the Cloud SQL source is in with a length limit of 128 // characters. If not specified, inherits the project ID from the parent // request. string project_id = 1; // Required. The Cloud SQL instance to copy the data from with a length limit // of 256 characters. string instance_id = 2 [(google.api.field_behavior) = REQUIRED]; // Required. The Cloud SQL database to copy the data from with a length limit // of 256 characters. string database_id = 3 [(google.api.field_behavior) = REQUIRED]; // Required. The Cloud SQL table to copy the data from with a length limit of // 256 characters. string table_id = 4 [(google.api.field_behavior) = REQUIRED]; // Intermediate Cloud Storage directory used for the import with a length // limit of 2,000 characters. Can be specified if one wants to have the // Cloud SQL export to a specific Cloud Storage directory. // // Ensure that the Cloud SQL service account has the necessary Cloud // Storage Admin permissions to access the specified Cloud Storage directory. string gcs_staging_dir = 5; // Option for serverless export. Enabling this option will incur additional // cost. More info can be found // [here](https://cloud.google.com/sql/pricing#serverless). bool offload = 6; } // Firestore source import data from. message FirestoreSource { // The project ID that the Cloud SQL source is in with a length limit of 128 // characters. If not specified, inherits the project ID from the parent // request. string project_id = 1; // Required. The Firestore database to copy the data from with a length limit // of 256 characters. string database_id = 2 [(google.api.field_behavior) = REQUIRED]; // Required. The Firestore collection to copy the data from with a length // limit of 1,500 characters. string collection_id = 3 [(google.api.field_behavior) = REQUIRED]; // Intermediate Cloud Storage directory used for the import with a length // limit of 2,000 characters. Can be specified if one wants to have the // Firestore export to a specific Cloud Storage directory. // // Ensure that the Firestore service account has the necessary Cloud // Storage Admin permissions to access the specified Cloud Storage directory. string gcs_staging_dir = 4; } // Configuration of destination for Import related errors. message ImportErrorConfig { // Required. Errors destination. oneof destination { // Cloud Storage prefix for import errors. This must be an empty, // existing Cloud Storage directory. Import errors are written to // sharded files in this directory, one per line, as a JSON-encoded // `google.rpc.Status` message. string gcs_prefix = 1; } } // Request message for the ImportUserEvents request. message ImportUserEventsRequest { // The inline source for the input config for ImportUserEvents method. message InlineSource { // Required. A list of user events to import. Recommended max of 10k items. repeated UserEvent user_events = 1 [(google.api.field_behavior) = REQUIRED]; } // Required - The desired input source of the user event data. oneof source { // The Inline source for the input content for UserEvents. InlineSource inline_source = 2; // Cloud Storage location for the input content. GcsSource gcs_source = 3; // BigQuery input source. BigQuerySource bigquery_source = 4; } // Required. Parent DataStore resource name, of the form // `projects/{project}/locations/{location}/collections/{collection}/dataStores/{data_store}` string parent = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "discoveryengine.googleapis.com/DataStore" } ]; // The desired location of errors incurred during the Import. Cannot be set // for inline user event imports. ImportErrorConfig error_config = 5; } // Response of the ImportUserEventsRequest. If the long running // operation was successful, then this message is returned by the // google.longrunning.Operations.response field if the operation was successful. message ImportUserEventsResponse { // A sample of errors encountered while processing the request. repeated google.rpc.Status error_samples = 1; // Echoes the destination for the complete errors if this field was set in // the request. ImportErrorConfig error_config = 2; // Count of user events imported with complete existing Documents. int64 joined_events_count = 3; // Count of user events imported, but with Document information not found // in the existing Branch. int64 unjoined_events_count = 4; } // Metadata related to the progress of the Import operation. This is // returned by the google.longrunning.Operation.metadata field. message ImportUserEventsMetadata { // Operation create time. google.protobuf.Timestamp create_time = 1; // Operation last update time. If the operation is done, this is also the // finish time. google.protobuf.Timestamp update_time = 2; // Count of entries that were processed successfully. int64 success_count = 3; // Count of entries that encountered errors while processing. int64 failure_count = 4; } // Metadata related to the progress of the ImportDocuments operation. This is // returned by the google.longrunning.Operation.metadata field. message ImportDocumentsMetadata { // Operation create time. google.protobuf.Timestamp create_time = 1; // Operation last update time. If the operation is done, this is also the // finish time. google.protobuf.Timestamp update_time = 2; // Count of entries that were processed successfully. int64 success_count = 3; // Count of entries that encountered errors while processing. int64 failure_count = 4; // Total count of entries that were processed. int64 total_count = 5; } // Request message for Import methods. message ImportDocumentsRequest { // The inline source for the input config for ImportDocuments method. message InlineSource { // Required. A list of documents to update/create. Each document must have a // valid [Document.id][google.cloud.discoveryengine.v1beta.Document.id]. // Recommended max of 100 items. repeated Document documents = 1 [(google.api.field_behavior) = REQUIRED]; } // Indicates how imported documents are reconciled with the existing documents // created or imported before. enum ReconciliationMode { // Defaults to `INCREMENTAL`. RECONCILIATION_MODE_UNSPECIFIED = 0; // Inserts new documents or updates existing documents. INCREMENTAL = 1; // Calculates diff and replaces the entire document dataset. Existing // documents may be deleted if they are not present in the source location. FULL = 2; } // Required. The source of the input. oneof source { // The Inline source for the input content for documents. InlineSource inline_source = 2; // Cloud Storage location for the input content. GcsSource gcs_source = 3; // BigQuery input source. BigQuerySource bigquery_source = 4; // FhirStore input source. FhirStoreSource fhir_store_source = 10; // Spanner input source. SpannerSource spanner_source = 11; // Cloud SQL input source. CloudSqlSource cloud_sql_source = 12; // Firestore input source. FirestoreSource firestore_source = 13; // Cloud Bigtable input source. BigtableSource bigtable_source = 15; } // Required. The parent branch resource name, such as // `projects/{project}/locations/{location}/collections/{collection}/dataStores/{data_store}/branches/{branch}`. // Requires create/update permission. string parent = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "discoveryengine.googleapis.com/Branch" } ]; // The desired location of errors incurred during the Import. ImportErrorConfig error_config = 5; // The mode of reconciliation between existing documents and the documents to // be imported. Defaults to // [ReconciliationMode.INCREMENTAL][google.cloud.discoveryengine.v1beta.ImportDocumentsRequest.ReconciliationMode.INCREMENTAL]. ReconciliationMode reconciliation_mode = 6; // Indicates which fields in the provided imported documents to update. If // not set, the default is to update all fields. google.protobuf.FieldMask update_mask = 7; // Whether to automatically generate IDs for the documents if absent. // // If set to `true`, // [Document.id][google.cloud.discoveryengine.v1beta.Document.id]s are // automatically generated based on the hash of the payload, where IDs may not // be consistent during multiple imports. In which case // [ReconciliationMode.FULL][google.cloud.discoveryengine.v1beta.ImportDocumentsRequest.ReconciliationMode.FULL] // is highly recommended to avoid duplicate contents. If unset or set to // `false`, [Document.id][google.cloud.discoveryengine.v1beta.Document.id]s // have to be specified using // [id_field][google.cloud.discoveryengine.v1beta.ImportDocumentsRequest.id_field], // otherwise, documents without IDs fail to be imported. // // Supported data sources: // // * [GcsSource][google.cloud.discoveryengine.v1beta.GcsSource]. // [GcsSource.data_schema][google.cloud.discoveryengine.v1beta.GcsSource.data_schema] // must be `custom` or `csv`. Otherwise, an INVALID_ARGUMENT error is thrown. // * [BigQuerySource][google.cloud.discoveryengine.v1beta.BigQuerySource]. // [BigQuerySource.data_schema][google.cloud.discoveryengine.v1beta.BigQuerySource.data_schema] // must be `custom` or `csv`. Otherwise, an INVALID_ARGUMENT error is thrown. // * [SpannerSource][google.cloud.discoveryengine.v1beta.SpannerSource]. // * [CloudSqlSource][google.cloud.discoveryengine.v1beta.CloudSqlSource]. // * [FirestoreSource][google.cloud.discoveryengine.v1beta.FirestoreSource]. // * [BigtableSource][google.cloud.discoveryengine.v1beta.BigtableSource]. bool auto_generate_ids = 8; // The field indicates the ID field or column to be used as unique IDs of // the documents. // // For [GcsSource][google.cloud.discoveryengine.v1beta.GcsSource] it is the // key of the JSON field. For instance, `my_id` for JSON `{"my_id": // "some_uuid"}`. For others, it may be the column name of the table where the // unique ids are stored. // // The values of the JSON field or the table column are used as the // [Document.id][google.cloud.discoveryengine.v1beta.Document.id]s. The JSON // field or the table column must be of string type, and the values must be // set as valid strings conform to // [RFC-1034](https://tools.ietf.org/html/rfc1034) with 1-63 characters. // Otherwise, documents without valid IDs fail to be imported. // // Only set this field when // [auto_generate_ids][google.cloud.discoveryengine.v1beta.ImportDocumentsRequest.auto_generate_ids] // is unset or set as `false`. Otherwise, an INVALID_ARGUMENT error is thrown. // // If it is unset, a default value `_id` is used when importing from the // allowed data sources. // // Supported data sources: // // * [GcsSource][google.cloud.discoveryengine.v1beta.GcsSource]. // [GcsSource.data_schema][google.cloud.discoveryengine.v1beta.GcsSource.data_schema] // must be `custom` or `csv`. Otherwise, an INVALID_ARGUMENT error is thrown. // * [BigQuerySource][google.cloud.discoveryengine.v1beta.BigQuerySource]. // [BigQuerySource.data_schema][google.cloud.discoveryengine.v1beta.BigQuerySource.data_schema] // must be `custom` or `csv`. Otherwise, an INVALID_ARGUMENT error is thrown. // * [SpannerSource][google.cloud.discoveryengine.v1beta.SpannerSource]. // * [CloudSqlSource][google.cloud.discoveryengine.v1beta.CloudSqlSource]. // * [FirestoreSource][google.cloud.discoveryengine.v1beta.FirestoreSource]. // * [BigtableSource][google.cloud.discoveryengine.v1beta.BigtableSource]. string id_field = 9; } // Response of the // [ImportDocumentsRequest][google.cloud.discoveryengine.v1beta.ImportDocumentsRequest]. // If the long running operation is done, then this message is returned by the // google.longrunning.Operations.response field if the operation was successful. message ImportDocumentsResponse { // A sample of errors encountered while processing the request. repeated google.rpc.Status error_samples = 1; // Echoes the destination for the complete errors in the request if set. ImportErrorConfig error_config = 2; } // Request message for // [CompletionService.ImportSuggestionDenyListEntries][google.cloud.discoveryengine.v1beta.CompletionService.ImportSuggestionDenyListEntries] // method. message ImportSuggestionDenyListEntriesRequest { // The inline source for SuggestionDenyListEntry. message InlineSource { // Required. A list of all denylist entries to import. Max of 1000 items. repeated SuggestionDenyListEntry entries = 1 [(google.api.field_behavior) = REQUIRED]; } // The source of the updated SuggestionDenyList. oneof source { // The Inline source for the input content for suggestion deny list entries. InlineSource inline_source = 2; // Cloud Storage location for the input content. // // Only 1 file can be specified that contains all entries to import. // Supported values `gcs_source.schema` for autocomplete suggestion deny // list entry imports: // // * `suggestion_deny_list` (default): One JSON [SuggestionDenyListEntry] // per line. GcsSource gcs_source = 3; } // Required. The parent data store resource name for which to import denylist // entries. Follows pattern projects/*/locations/*/collections/*/dataStores/*. string parent = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "discoveryengine.googleapis.com/DataStore" } ]; } // Response message for // [CompletionService.ImportSuggestionDenyListEntries][google.cloud.discoveryengine.v1beta.CompletionService.ImportSuggestionDenyListEntries] // method. message ImportSuggestionDenyListEntriesResponse { // A sample of errors encountered while processing the request. repeated google.rpc.Status error_samples = 1; // Count of deny list entries successfully imported. int64 imported_entries_count = 2; // Count of deny list entries that failed to be imported. int64 failed_entries_count = 3; } // Metadata related to the progress of the ImportSuggestionDenyListEntries // operation. This is returned by the google.longrunning.Operation.metadata // field. message ImportSuggestionDenyListEntriesMetadata { // Operation create time. google.protobuf.Timestamp create_time = 1; // Operation last update time. If the operation is done, this is also the // finish time. google.protobuf.Timestamp update_time = 2; }