// Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto3"; package google.cloud.discoveryengine.v1alpha; import "google/api/annotations.proto"; import "google/api/client.proto"; import "google/api/field_behavior.proto"; import "google/api/resource.proto"; import "google/cloud/discoveryengine/v1alpha/chunk.proto"; import "google/cloud/discoveryengine/v1alpha/common.proto"; import "google/cloud/discoveryengine/v1alpha/document.proto"; import "google/protobuf/struct.proto"; option csharp_namespace = "Google.Cloud.DiscoveryEngine.V1Alpha"; option go_package = "cloud.google.com/go/discoveryengine/apiv1alpha/discoveryenginepb;discoveryenginepb"; option java_multiple_files = true; option java_outer_classname = "SearchServiceProto"; option java_package = "com.google.cloud.discoveryengine.v1alpha"; option objc_class_prefix = "DISCOVERYENGINE"; option php_namespace = "Google\\Cloud\\DiscoveryEngine\\V1alpha"; option ruby_package = "Google::Cloud::DiscoveryEngine::V1alpha"; // Service for search. service SearchService { option (google.api.default_host) = "discoveryengine.googleapis.com"; option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform"; // Performs a search. rpc Search(SearchRequest) returns (SearchResponse) { option (google.api.http) = { post: "/v1alpha/{serving_config=projects/*/locations/*/dataStores/*/servingConfigs/*}:search" body: "*" additional_bindings { post: "/v1alpha/{serving_config=projects/*/locations/*/collections/*/dataStores/*/servingConfigs/*}:search" body: "*" } additional_bindings { post: "/v1alpha/{serving_config=projects/*/locations/*/collections/*/engines/*/servingConfigs/*}:search" body: "*" } }; } } // Request message for // [SearchService.Search][google.cloud.discoveryengine.v1alpha.SearchService.Search] // method. message SearchRequest { // Specifies the image query input. message ImageQuery { oneof image { // Base64 encoded image bytes. Supported image formats: JPEG, PNG, and // BMP. string image_bytes = 1; } } // A struct to define data stores to filter on in a search call. message DataStoreSpec { // Required. Full resource name of // [DataStore][google.cloud.discoveryengine.v1alpha.DataStore], such as // `projects/{project}/locations/{location}/collections/{collection_id}/dataStores/{data_store_id}`. string data_store = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "discoveryengine.googleapis.com/DataStore" } ]; } // A facet specification to perform faceted search. message FacetSpec { // Specifies how a facet is computed. message FacetKey { // Required. Supported textual and numerical facet keys in // [Document][google.cloud.discoveryengine.v1alpha.Document] object, over // which the facet values are computed. Facet key is case-sensitive. string key = 1 [(google.api.field_behavior) = REQUIRED]; // Set only if values should be bucketed into intervals. Must be set // for facets with numerical values. Must not be set for facet with text // values. Maximum number of intervals is 30. repeated Interval intervals = 2; // Only get facet for the given restricted values. Only supported on // textual fields. For example, suppose "category" has three values // "Action > 2022", "Action > 2021" and "Sci-Fi > 2022". If set // "restricted_values" to "Action > 2022", the "category" facet only // contains "Action > 2022". Only supported on textual fields. Maximum // is 10. repeated string restricted_values = 3; // Only get facet values that start with the given string prefix. For // example, suppose "category" has three values "Action > 2022", // "Action > 2021" and "Sci-Fi > 2022". If set "prefixes" to "Action", the // "category" facet only contains "Action > 2022" and "Action > 2021". // Only supported on textual fields. Maximum is 10. repeated string prefixes = 4; // Only get facet values that contains the given strings. For example, // suppose "category" has three values "Action > 2022", // "Action > 2021" and "Sci-Fi > 2022". If set "contains" to "2022", the // "category" facet only contains "Action > 2022" and "Sci-Fi > 2022". // Only supported on textual fields. Maximum is 10. repeated string contains = 5; // True to make facet keys case insensitive when getting faceting // values with prefixes or contains; false otherwise. bool case_insensitive = 6; // The order in which documents are returned. // // Allowed values are: // // * "count desc", which means order by // [SearchResponse.Facet.values.count][google.cloud.discoveryengine.v1alpha.SearchResponse.Facet.FacetValue.count] // descending. // // * "value desc", which means order by // [SearchResponse.Facet.values.value][google.cloud.discoveryengine.v1alpha.SearchResponse.Facet.FacetValue.value] // descending. // Only applies to textual facets. // // If not set, textual values are sorted in [natural // order](https://en.wikipedia.org/wiki/Natural_sort_order); numerical // intervals are sorted in the order given by // [FacetSpec.FacetKey.intervals][google.cloud.discoveryengine.v1alpha.SearchRequest.FacetSpec.FacetKey.intervals]. string order_by = 7; } // Required. The facet key specification. FacetKey facet_key = 1 [(google.api.field_behavior) = REQUIRED]; // Maximum of facet values that should be returned for this facet. If // unspecified, defaults to 20. The maximum allowed value is 300. Values // above 300 are coerced to 300. // // If this field is negative, an `INVALID_ARGUMENT` is returned. int32 limit = 2; // List of keys to exclude when faceting. // // // By default, // [FacetKey.key][google.cloud.discoveryengine.v1alpha.SearchRequest.FacetSpec.FacetKey.key] // is not excluded from the filter unless it is listed in this field. // // Listing a facet key in this field allows its values to appear as facet // results, even when they are filtered out of search results. Using this // field does not affect what search results are returned. // // For example, suppose there are 100 documents with the color facet "Red" // and 200 documents with the color facet "Blue". A query containing the // filter "color:ANY("Red")" and having "color" as // [FacetKey.key][google.cloud.discoveryengine.v1alpha.SearchRequest.FacetSpec.FacetKey.key] // would by default return only "Red" documents in the search results, and // also return "Red" with count 100 as the only color facet. Although there // are also blue documents available, "Blue" would not be shown as an // available facet value. // // If "color" is listed in "excludedFilterKeys", then the query returns the // facet values "Red" with count 100 and "Blue" with count 200, because the // "color" key is now excluded from the filter. Because this field doesn't // affect search results, the search results are still correctly filtered to // return only "Red" documents. // // A maximum of 100 values are allowed. Otherwise, an `INVALID_ARGUMENT` // error is returned. repeated string excluded_filter_keys = 3; // Enables dynamic position for this facet. If set to true, the position of // this facet among all facets in the response is determined automatically. // If dynamic facets are enabled, it is ordered together. // If set to false, the position of this facet in the // response is the same as in the request, and it is ranked before // the facets with dynamic position enable and all dynamic facets. // // For example, you may always want to have rating facet returned in // the response, but it's not necessarily to always display the rating facet // at the top. In that case, you can set enable_dynamic_position to true so // that the position of rating facet in response is determined // automatically. // // Another example, assuming you have the following facets in the request: // // * "rating", enable_dynamic_position = true // // * "price", enable_dynamic_position = false // // * "brands", enable_dynamic_position = false // // And also you have a dynamic facets enabled, which generates a facet // `gender`. Then the final order of the facets in the response can be // ("price", "brands", "rating", "gender") or ("price", "brands", "gender", // "rating") depends on how API orders "gender" and "rating" facets. // However, notice that "price" and "brands" are always // ranked at first and second position because their enable_dynamic_position // is false. bool enable_dynamic_position = 4; } // Boost specification to boost certain documents. message BoostSpec { // Boost applies to documents which match a condition. message ConditionBoostSpec { // Specification for custom ranking based on customer specified attribute // value. It provides more controls for customized ranking than the simple // (condition, boost) combination above. message BoostControlSpec { // The control points used to define the curve. The curve defined // through these control points can only be monotonically increasing // or decreasing(constant values are acceptable). message ControlPoint { // Can be one of: // 1. The numerical field value. // 2. The duration spec for freshness: // The value must be formatted as an XSD `dayTimeDuration` value (a // restricted subset of an ISO 8601 duration value). The pattern for // this is: `[nD][T[nH][nM][nS]]`. string attribute_value = 1; // The value between -1 to 1 by which to boost the score if the // attribute_value evaluates to the value specified above. float boost_amount = 2; } // The attribute(or function) for which the custom ranking is to be // applied. enum AttributeType { // Unspecified AttributeType. ATTRIBUTE_TYPE_UNSPECIFIED = 0; // The value of the numerical field will be used to dynamically update // the boost amount. In this case, the attribute_value (the x value) // of the control point will be the actual value of the numerical // field for which the boost_amount is specified. NUMERICAL = 1; // For the freshness use case the attribute value will be the duration // between the current time and the date in the datetime field // specified. The value must be formatted as an XSD `dayTimeDuration` // value (a restricted subset of an ISO 8601 duration value). The // pattern for this is: `[nD][T[nH][nM][nS]]`. // E.g. `5D`, `3DT12H30M`, `T24H`. FRESHNESS = 2; } // The interpolation type to be applied. Default will be linear // (Piecewise Linear). enum InterpolationType { // Interpolation type is unspecified. In this case, it defaults to // Linear. INTERPOLATION_TYPE_UNSPECIFIED = 0; // Piecewise linear interpolation will be applied. LINEAR = 1; } // The name of the field whose value will be used to determine the // boost amount. string field_name = 1; // The attribute type to be used to determine the boost amount. The // attribute value can be derived from the field value of the specified // field_name. In the case of numerical it is straightforward i.e. // attribute_value = numerical_field_value. In the case of freshness // however, attribute_value = (time.now() - datetime_field_value). AttributeType attribute_type = 2; // The interpolation type to be applied to connect the control points // listed below. InterpolationType interpolation_type = 3; // The control points used to define the curve. The monotonic function // (defined through the interpolation_type above) passes through the // control points listed here. repeated ControlPoint control_points = 4; } // An expression which specifies a boost condition. The syntax and // supported fields are the same as a filter expression. See // [SearchRequest.filter][google.cloud.discoveryengine.v1alpha.SearchRequest.filter] // for detail syntax and limitations. // // Examples: // // * To boost documents with document ID "doc_1" or "doc_2", and // color "Red" or "Blue": // `(document_id: ANY("doc_1", "doc_2")) AND (color: ANY("Red", "Blue"))` string condition = 1; // Strength of the condition boost, which should be in [-1, 1]. Negative // boost means demotion. Default is 0.0. // // Setting to 1.0 gives the document a big promotion. However, it does // not necessarily mean that the boosted document will be the top result // at all times, nor that other documents will be excluded. Results // could still be shown even when none of them matches the condition. // And results that are significantly more relevant to the search query // can still trump your heavily favored but irrelevant documents. // // Setting to -1.0 gives the document a big demotion. However, results // that are deeply relevant might still be shown. The document will have // an upstream battle to get a fairly high ranking, but it is not // blocked out completely. // // Setting to 0.0 means no boost applied. The boosting condition is // ignored. Only one of the (condition, boost) combination or the // boost_control_spec below are set. If both are set then the global boost // is ignored and the more fine-grained boost_control_spec is applied. float boost = 2; // Complex specification for custom ranking based on customer defined // attribute value. BoostControlSpec boost_control_spec = 3; } // Condition boost specifications. If a document matches multiple conditions // in the specifictions, boost scores from these specifications are all // applied and combined in a non-linear way. Maximum number of // specifications is 20. repeated ConditionBoostSpec condition_boost_specs = 1; } // Specification to determine under which conditions query expansion should // occur. message QueryExpansionSpec { // Enum describing under which condition query expansion should occur. enum Condition { // Unspecified query expansion condition. In this case, server behavior // defaults to // [Condition.DISABLED][google.cloud.discoveryengine.v1alpha.SearchRequest.QueryExpansionSpec.Condition.DISABLED]. CONDITION_UNSPECIFIED = 0; // Disabled query expansion. Only the exact search query is used, even if // [SearchResponse.total_size][google.cloud.discoveryengine.v1alpha.SearchResponse.total_size] // is zero. DISABLED = 1; // Automatic query expansion built by the Search API. AUTO = 2; } // The condition under which query expansion should occur. Default to // [Condition.DISABLED][google.cloud.discoveryengine.v1alpha.SearchRequest.QueryExpansionSpec.Condition.DISABLED]. Condition condition = 1; // Whether to pin unexpanded results. If this field is set to true, // unexpanded products are always at the top of the search results, followed // by the expanded results. bool pin_unexpanded_results = 2; } // The specification for query spell correction. message SpellCorrectionSpec { // Enum describing under which mode spell correction should occur. enum Mode { // Unspecified spell correction mode. In this case, server behavior // defaults to // [Mode.AUTO][google.cloud.discoveryengine.v1alpha.SearchRequest.SpellCorrectionSpec.Mode.AUTO]. MODE_UNSPECIFIED = 0; // Search API will try to find a spell suggestion if there // is any and put in the // [SearchResponse.corrected_query][google.cloud.discoveryengine.v1alpha.SearchResponse.corrected_query]. // The spell suggestion will not be used as the search query. SUGGESTION_ONLY = 1; // Automatic spell correction built by the Search API. Search will // be based on the corrected query if found. AUTO = 2; } // The mode under which spell correction should take effect to // replace the original search query. Default to // [Mode.AUTO][google.cloud.discoveryengine.v1alpha.SearchRequest.SpellCorrectionSpec.Mode.AUTO]. Mode mode = 1; } // A specification for configuring the behavior of content search. message ContentSearchSpec { // A specification for configuring snippets in a search response. message SnippetSpec { // [DEPRECATED] This field is deprecated. To control snippet return, use // `return_snippet` field. For backwards compatibility, we will return // snippet if max_snippet_count > 0. int32 max_snippet_count = 1 [deprecated = true]; // [DEPRECATED] This field is deprecated and will have no affect on the // snippet. bool reference_only = 2 [deprecated = true]; // If `true`, then return snippet. If no snippet can be generated, we // return "No snippet is available for this page." A `snippet_status` with // `SUCCESS` or `NO_SNIPPET_AVAILABLE` will also be returned. bool return_snippet = 3; } // A specification for configuring a summary returned in a search // response. message SummarySpec { // Specification of the prompt to use with the model. message ModelPromptSpec { // Text at the beginning of the prompt that instructs the assistant. // Examples are available in the user guide. string preamble = 1; } // Specification of the model. message ModelSpec { // The model version used to generate the summary. // // Supported values are: // // * `stable`: string. Default value when no value is specified. Uses a // generally available, fine-tuned model. For more information, see // [Answer generation model versions and // lifecycle](https://cloud.google.com/generative-ai-app-builder/docs/answer-generation-models). // * `preview`: string. (Public preview) Uses a preview model. For more // information, see // [Answer generation model versions and // lifecycle](https://cloud.google.com/generative-ai-app-builder/docs/answer-generation-models). string version = 1; } // The number of top results to generate the summary from. If the number // of results returned is less than `summaryResultCount`, the summary is // generated from all of the results. // // At most 10 results for documents mode, or 50 for chunks mode, can be // used to generate a summary. The chunks mode is used when // [SearchRequest.ContentSearchSpec.search_result_mode][google.cloud.discoveryengine.v1alpha.SearchRequest.ContentSearchSpec.search_result_mode] // is set to // [CHUNKS][google.cloud.discoveryengine.v1alpha.SearchRequest.ContentSearchSpec.SearchResultMode.CHUNKS]. int32 summary_result_count = 1; // Specifies whether to include citations in the summary. The default // value is `false`. // // When this field is set to `true`, summaries include in-line citation // numbers. // // Example summary including citations: // // BigQuery is Google Cloud's fully managed and completely serverless // enterprise data warehouse [1]. BigQuery supports all data types, works // across clouds, and has built-in machine learning and business // intelligence, all within a unified platform [2, 3]. // // The citation numbers refer to the returned search results and are // 1-indexed. For example, [1] means that the sentence is attributed to // the first search result. [2, 3] means that the sentence is attributed // to both the second and third search results. bool include_citations = 2; // Specifies whether to filter out adversarial queries. The default value // is `false`. // // Google employs search-query classification to detect adversarial // queries. No summary is returned if the search query is classified as an // adversarial query. For example, a user might ask a question regarding // negative comments about the company or submit a query designed to // generate unsafe, policy-violating output. If this field is set to // `true`, we skip generating summaries for adversarial queries and return // fallback messages instead. bool ignore_adversarial_query = 3; // Specifies whether to filter out queries that are not summary-seeking. // The default value is `false`. // // Google employs search-query classification to detect summary-seeking // queries. No summary is returned if the search query is classified as a // non-summary seeking query. For example, `why is the sky blue` and `Who // is the best soccer player in the world?` are summary-seeking queries, // but `SFO airport` and `world cup 2026` are not. They are most likely // navigational queries. If this field is set to `true`, we skip // generating summaries for non-summary seeking queries and return // fallback messages instead. bool ignore_non_summary_seeking_query = 4; // If specified, the spec will be used to modify the prompt provided to // the LLM. ModelPromptSpec model_prompt_spec = 5; // Language code for Summary. Use language tags defined by // [BCP47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt). // Note: This is an experimental feature. string language_code = 6; // If specified, the spec will be used to modify the model specification // provided to the LLM. ModelSpec model_spec = 7; // If true, answer will be generated from most relevant chunks from top // search results. This feature will improve summary quality. // Note that with this feature enabled, not all top search results // will be referenced and included in the reference list, so the citation // source index only points to the search results listed in the reference // list. bool use_semantic_chunks = 8; } // A specification for configuring the extractive content in a search // response. message ExtractiveContentSpec { // The maximum number of extractive answers returned in each search // result. // // An extractive answer is a verbatim answer extracted from the original // document, which provides a precise and contextually relevant answer to // the search query. // // If the number of matching answers is less than the // `max_extractive_answer_count`, return all of the answers. Otherwise, // return the `max_extractive_answer_count`. // // At most five answers are returned for each // [SearchResult][google.cloud.discoveryengine.v1alpha.SearchResponse.SearchResult]. int32 max_extractive_answer_count = 1; // The max number of extractive segments returned in each search result. // Only applied if the // [DataStore][google.cloud.discoveryengine.v1alpha.DataStore] is set to // [DataStore.ContentConfig.CONTENT_REQUIRED][google.cloud.discoveryengine.v1alpha.DataStore.ContentConfig.CONTENT_REQUIRED] // or // [DataStore.solution_types][google.cloud.discoveryengine.v1alpha.DataStore.solution_types] // is // [SOLUTION_TYPE_CHAT][google.cloud.discoveryengine.v1alpha.SolutionType.SOLUTION_TYPE_CHAT]. // // An extractive segment is a text segment extracted from the original // document that is relevant to the search query, and, in general, more // verbose than an extractive answer. The segment could then be used as // input for LLMs to generate summaries and answers. // // If the number of matching segments is less than // `max_extractive_segment_count`, return all of the segments. Otherwise, // return the `max_extractive_segment_count`. int32 max_extractive_segment_count = 2; // Specifies whether to return the confidence score from the extractive // segments in each search result. This feature is available only for new // or allowlisted data stores. To allowlist your data store, // contact your Customer Engineer. The default value is `false`. bool return_extractive_segment_score = 3; // Specifies whether to also include the adjacent from each selected // segments. // Return at most `num_previous_segments` segments before each selected // segments. int32 num_previous_segments = 4; // Return at most `num_next_segments` segments after each selected // segments. int32 num_next_segments = 5; } // Specifies the chunk spec to be returned from the search response. // Only available if the // [SearchRequest.ContentSearchSpec.search_result_mode][google.cloud.discoveryengine.v1alpha.SearchRequest.ContentSearchSpec.search_result_mode] // is set to // [CHUNKS][google.cloud.discoveryengine.v1alpha.SearchRequest.ContentSearchSpec.SearchResultMode.CHUNKS] message ChunkSpec { // The number of previous chunks to be returned of the current chunk. The // maximum allowed value is 3. // If not specified, no previous chunks will be returned. int32 num_previous_chunks = 1; // The number of next chunks to be returned of the current chunk. The // maximum allowed value is 3. // If not specified, no next chunks will be returned. int32 num_next_chunks = 2; } // Specifies the search result mode. If unspecified, the // search result mode is based on // [DataStore.DocumentProcessingConfig.chunking_config][]: // * If [DataStore.DocumentProcessingConfig.chunking_config][] is specified, // it defaults to `CHUNKS`. // * Otherwise, it defaults to `DOCUMENTS`. enum SearchResultMode { // Default value. SEARCH_RESULT_MODE_UNSPECIFIED = 0; // Returns documents in the search result. DOCUMENTS = 1; // Returns chunks in the search result. Only available if the // [DataStore.DocumentProcessingConfig.chunking_config][] is specified. CHUNKS = 2; } // If `snippetSpec` is not specified, snippets are not included in the // search response. SnippetSpec snippet_spec = 1; // If `summarySpec` is not specified, summaries are not included in the // search response. SummarySpec summary_spec = 2; // If there is no extractive_content_spec provided, there will be no // extractive answer in the search response. ExtractiveContentSpec extractive_content_spec = 3; // Specifies the search result mode. If unspecified, the // search result mode is based on // [DataStore.DocumentProcessingConfig.chunking_config][]: // * If [DataStore.DocumentProcessingConfig.chunking_config][] is specified, // it defaults to `CHUNKS`. // * Otherwise, it defaults to `DOCUMENTS`. SearchResultMode search_result_mode = 4; // Specifies the chunk spec to be returned from the search response. // Only available if the // [SearchRequest.ContentSearchSpec.search_result_mode][google.cloud.discoveryengine.v1alpha.SearchRequest.ContentSearchSpec.search_result_mode] // is set to // [CHUNKS][google.cloud.discoveryengine.v1alpha.SearchRequest.ContentSearchSpec.SearchResultMode.CHUNKS] ChunkSpec chunk_spec = 5; } // The specification that uses customized query embedding vector to do // semantic document retrieval. message EmbeddingSpec { // Embedding vector. message EmbeddingVector { // Embedding field path in schema. string field_path = 1; // Query embedding vector. repeated float vector = 2; } // The embedding vector used for retrieval. Limit to 1. repeated EmbeddingVector embedding_vectors = 1; } // Required. The resource name of the Search serving config, such as // `projects/*/locations/global/collections/default_collection/engines/*/servingConfigs/default_serving_config`, // or // `projects/*/locations/global/collections/default_collection/dataStores/default_data_store/servingConfigs/default_serving_config`. // This field is used to identify the serving configuration name, set // of models used to make the search. string serving_config = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "discoveryengine.googleapis.com/ServingConfig" } ]; // The branch resource name, such as // `projects/*/locations/global/collections/default_collection/dataStores/default_data_store/branches/0`. // // Use `default_branch` as the branch ID or leave this field empty, to search // documents under the default branch. string branch = 2 [(google.api.resource_reference) = { type: "discoveryengine.googleapis.com/Branch" }]; // Raw search query. string query = 3; // Raw image query. ImageQuery image_query = 19; // Maximum number of // [Document][google.cloud.discoveryengine.v1alpha.Document]s to return. The // maximum allowed value depends on the data type. Values above the maximum // value are coerced to the maximum value. // // * Websites with basic indexing: Default `10`, Maximum `25`. // * Websites with advanced indexing: Default `25`, Maximum `50`. // * Other: Default `50`, Maximum `100`. // // If this field is negative, an `INVALID_ARGUMENT` is returned. int32 page_size = 4; // A page token received from a previous // [SearchService.Search][google.cloud.discoveryengine.v1alpha.SearchService.Search] // call. Provide this to retrieve the subsequent page. // // When paginating, all other parameters provided to // [SearchService.Search][google.cloud.discoveryengine.v1alpha.SearchService.Search] // must match the call that provided the page token. Otherwise, an // `INVALID_ARGUMENT` error is returned. string page_token = 5; // A 0-indexed integer that specifies the current offset (that is, starting // result location, amongst the // [Document][google.cloud.discoveryengine.v1alpha.Document]s deemed by the // API as relevant) in search results. This field is only considered if // [page_token][google.cloud.discoveryengine.v1alpha.SearchRequest.page_token] // is unset. // // If this field is negative, an `INVALID_ARGUMENT` is returned. int32 offset = 6; // A list of data store specs to apply on a search call. repeated DataStoreSpec data_store_specs = 32; // The filter syntax consists of an expression language for constructing a // predicate from one or more fields of the documents being filtered. Filter // expression is case-sensitive. // // If this field is unrecognizable, an `INVALID_ARGUMENT` is returned. // // Filtering in Vertex AI Search is done by mapping the LHS filter key to a // key property defined in the Vertex AI Search backend -- this mapping is // defined by the customer in their schema. For example a media customer might // have a field 'name' in their schema. In this case the filter would look // like this: filter --> name:'ANY("king kong")' // // For more information about filtering including syntax and filter // operators, see // [Filter](https://cloud.google.com/generative-ai-app-builder/docs/filter-search-metadata) string filter = 7; // The default filter that is applied when a user performs a search without // checking any filters on the search page. // // The filter applied to every search request when quality improvement such as // query expansion is needed. In the case a query does not have a sufficient // amount of results this filter will be used to determine whether or not to // enable the query expansion flow. The original filter will still be used for // the query expanded search. // This field is strongly recommended to achieve high search quality. // // For more information about filter syntax, see // [SearchRequest.filter][google.cloud.discoveryengine.v1alpha.SearchRequest.filter]. string canonical_filter = 29; // The order in which documents are returned. Documents can be ordered by // a field in an [Document][google.cloud.discoveryengine.v1alpha.Document] // object. Leave it unset if ordered by relevance. `order_by` expression is // case-sensitive. For more information on ordering, see // [Ordering](https://cloud.google.com/retail/docs/filter-and-order#order) // // If this field is unrecognizable, an `INVALID_ARGUMENT` is returned. string order_by = 8; // Information about the end user. // Highly recommended for analytics. // [UserInfo.user_agent][google.cloud.discoveryengine.v1alpha.UserInfo.user_agent] // is used to deduce `device_type` for analytics. UserInfo user_info = 21; // Facet specifications for faceted search. If empty, no facets are returned. // // A maximum of 100 values are allowed. Otherwise, an `INVALID_ARGUMENT` // error is returned. repeated FacetSpec facet_specs = 9; // Boost specification to boost certain documents. // For more information on boosting, see // [Boosting](https://cloud.google.com/retail/docs/boosting#boost) BoostSpec boost_spec = 10; // Additional search parameters. // // For public website search only, supported values are: // // * `user_country_code`: string. Default empty. If set to non-empty, results // are restricted or boosted based on the location provided. // Example: // user_country_code: "au" // // For available codes see [Country // Codes](https://developers.google.com/custom-search/docs/json_api_reference#countryCodes) // // * `search_type`: double. Default empty. Enables non-webpage searching // depending on the value. The only valid non-default value is 1, // which enables image searching. // Example: // search_type: 1 map params = 11; // The query expansion specification that specifies the conditions under which // query expansion occurs. QueryExpansionSpec query_expansion_spec = 13; // The spell correction specification that specifies the mode under // which spell correction takes effect. SpellCorrectionSpec spell_correction_spec = 14; // A unique identifier for tracking visitors. For example, this could be // implemented with an HTTP cookie, which should be able to uniquely identify // a visitor on a single device. This unique identifier should not change if // the visitor logs in or out of the website. // // This field should NOT have a fixed value such as `unknown_visitor`. // // This should be the same identifier as // [UserEvent.user_pseudo_id][google.cloud.discoveryengine.v1alpha.UserEvent.user_pseudo_id] // and // [CompleteQueryRequest.user_pseudo_id][google.cloud.discoveryengine.v1alpha.CompleteQueryRequest.user_pseudo_id] // // The field must be a UTF-8 encoded string with a length limit of 128 // characters. Otherwise, an `INVALID_ARGUMENT` error is returned. string user_pseudo_id = 15; // A specification for configuring the behavior of content search. ContentSearchSpec content_search_spec = 24; // Uses the provided embedding to do additional semantic document retrieval. // The retrieval is based on the dot product of // [SearchRequest.EmbeddingSpec.EmbeddingVector.vector][google.cloud.discoveryengine.v1alpha.SearchRequest.EmbeddingSpec.EmbeddingVector.vector] // and the document embedding that is provided in // [SearchRequest.EmbeddingSpec.EmbeddingVector.field_path][google.cloud.discoveryengine.v1alpha.SearchRequest.EmbeddingSpec.EmbeddingVector.field_path]. // // If // [SearchRequest.EmbeddingSpec.EmbeddingVector.field_path][google.cloud.discoveryengine.v1alpha.SearchRequest.EmbeddingSpec.EmbeddingVector.field_path] // is not provided, it will use // [ServingConfig.EmbeddingConfig.field_path][google.cloud.discoveryengine.v1alpha.ServingConfig.embedding_config]. EmbeddingSpec embedding_spec = 23; // The ranking expression controls the customized ranking on retrieval // documents. This overrides // [ServingConfig.ranking_expression][google.cloud.discoveryengine.v1alpha.ServingConfig.ranking_expression]. // The ranking expression is a single function or multiple functions that are // joint by "+". // * ranking_expression = function, { " + ", function }; // Supported functions: // * double * relevance_score // * double * dotProduct(embedding_field_path) // Function variables: // `relevance_score`: pre-defined keywords, used for measure relevance // between query and document. // `embedding_field_path`: the document embedding field // used with query embedding vector. // `dotProduct`: embedding function between embedding_field_path and query // embedding vector. // // Example ranking expression: // If document has an embedding field doc_embedding, the ranking expression // could be `0.5 * relevance_score + 0.3 * dotProduct(doc_embedding)`. string ranking_expression = 26; // Whether to turn on safe search. This is only supported for // website search. bool safe_search = 20; // The user labels applied to a resource must meet the following requirements: // // * Each resource can have multiple labels, up to a maximum of 64. // * Each label must be a key-value pair. // * Keys have a minimum length of 1 character and a maximum length of 63 // characters and cannot be empty. Values can be empty and have a maximum // length of 63 characters. // * Keys and values can contain only lowercase letters, numeric characters, // underscores, and dashes. All characters must use UTF-8 encoding, and // international characters are allowed. // * The key portion of a label must be unique. However, you can use the same // key with multiple resources. // * Keys must start with a lowercase letter or international character. // // See [Google Cloud // Document](https://cloud.google.com/resource-manager/docs/creating-managing-labels#requirements) // for more details. map user_labels = 22; // Custom fine tuning configs. CustomFineTuningSpec custom_fine_tuning_spec = 34; } // Response message for // [SearchService.Search][google.cloud.discoveryengine.v1alpha.SearchService.Search] // method. message SearchResponse { // Represents the search results. message SearchResult { // [Document.id][google.cloud.discoveryengine.v1alpha.Document.id] of the // searched [Document][google.cloud.discoveryengine.v1alpha.Document]. string id = 1; // The document data snippet in the search response. Only fields that are // marked as retrievable are populated. Document document = 2; // The chunk data in the search response if the // [SearchRequest.ContentSearchSpec.search_result_mode][google.cloud.discoveryengine.v1alpha.SearchRequest.ContentSearchSpec.search_result_mode] // is set to // [CHUNKS][google.cloud.discoveryengine.v1alpha.SearchRequest.ContentSearchSpec.SearchResultMode.CHUNKS]. Chunk chunk = 18; // Google provided available scores. map model_scores = 4; } // A facet result. message Facet { // A facet value which contains value names and their count. message FacetValue { // A facet value which contains values. oneof facet_value { // Text value of a facet, such as "Black" for facet "colors". string value = 1; // Interval value for a facet, such as [10, 20) for facet "price". It // matches // [SearchRequest.FacetSpec.FacetKey.intervals][google.cloud.discoveryengine.v1alpha.SearchRequest.FacetSpec.FacetKey.intervals]. Interval interval = 2; } // Number of items that have this facet value. int64 count = 3; } // The key for this facet. E.g., "colors" or "price". It matches // [SearchRequest.FacetSpec.FacetKey.key][google.cloud.discoveryengine.v1alpha.SearchRequest.FacetSpec.FacetKey.key]. string key = 1; // The facet values for this field. repeated FacetValue values = 2; // Whether the facet is dynamically generated. bool dynamic_facet = 3; } // Guided search result. The guided search helps user to refine the search // results and narrow down to the real needs from a broaded search results. message GuidedSearchResult { // Useful attribute for search result refinements. message RefinementAttribute { // Attribute key used to refine the results e.g. 'movie_type'. string attribute_key = 1; // Attribute value used to refine the results e.g. 'drama'. string attribute_value = 2; } // A list of ranked refinement attributes. repeated RefinementAttribute refinement_attributes = 1; // Suggested follow-up questions. repeated string follow_up_questions = 2; } // Summary of the top N search result specified by the summary spec. message Summary { // Safety Attribute categories and their associated confidence scores. message SafetyAttributes { // The display names of Safety Attribute categories associated with the // generated content. Order matches the Scores. repeated string categories = 1; // The confidence scores of the each category, higher // value means higher confidence. Order matches the Categories. repeated float scores = 2; } // Citation metadata. message CitationMetadata { // Citations for segments. repeated Citation citations = 1; } // Citation info for a segment. message Citation { // Index indicates the start of the segment, measured in bytes/unicode. int64 start_index = 1; // End of the attributed segment, exclusive. int64 end_index = 2; // Citation sources for the attributed segment. repeated CitationSource sources = 3; } // Citation source. message CitationSource { // Document reference index from SummaryWithMetadata.references. // It is 0-indexed and the value will be zero if the reference_index is // not set explicitly. int64 reference_index = 4; } // Document reference. message Reference { // Chunk content. message ChunkContent { // Chunk textual content. string content = 1; // Page identifier. string page_identifier = 2; } // Title of the document. string title = 1; // Required. // [Document.name][google.cloud.discoveryengine.v1alpha.Document.name] of // the document. Full resource name of the referenced document, in the // format // `projects/*/locations/*/collections/*/dataStores/*/branches/*/documents/*`. string document = 2 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "discoveryengine.googleapis.com/Document" } ]; // Cloud Storage or HTTP uri for the document. string uri = 3; // List of cited chunk contents derived from document content. repeated ChunkContent chunk_contents = 4; } // Summary with metadata information. message SummaryWithMetadata { // Summary text with no citation information. string summary = 1; // Citation metadata for given summary. CitationMetadata citation_metadata = 2; // Document References. repeated Reference references = 3; } // An Enum for summary-skipped reasons. enum SummarySkippedReason { // Default value. The summary skipped reason is not specified. SUMMARY_SKIPPED_REASON_UNSPECIFIED = 0; // The adversarial query ignored case. // // Only populated when // [SummarySpec.ignore_adversarial_query][google.cloud.discoveryengine.v1alpha.SearchRequest.ContentSearchSpec.SummarySpec.ignore_adversarial_query] // is set to `true`. ADVERSARIAL_QUERY_IGNORED = 1; // The non-summary seeking query ignored case. // // Only populated when // [SummarySpec.ignore_non_summary_seeking_query][google.cloud.discoveryengine.v1alpha.SearchRequest.ContentSearchSpec.SummarySpec.ignore_non_summary_seeking_query] // is set to `true`. NON_SUMMARY_SEEKING_QUERY_IGNORED = 2; // The out-of-domain query ignored case. // // Google skips the summary if there are no high-relevance search results. // For example, the data store contains facts about company A but the // user query is asking questions about company B. OUT_OF_DOMAIN_QUERY_IGNORED = 3; // The potential policy violation case. // // Google skips the summary if there is a potential policy violation // detected. This includes content that may be violent or toxic. POTENTIAL_POLICY_VIOLATION = 4; // The LLM addon not enabled case. // // Google skips the summary if the LLM addon is not enabled. LLM_ADDON_NOT_ENABLED = 5; } // The summary content. string summary_text = 1; // Additional summary-skipped reasons. This provides the reason for ignored // cases. If nothing is skipped, this field is not set. repeated SummarySkippedReason summary_skipped_reasons = 2; // A collection of Safety Attribute categories and their associated // confidence scores. SafetyAttributes safety_attributes = 3; // Summary with metadata information. SummaryWithMetadata summary_with_metadata = 4; } // Debug information specifically related to forward geocoding issues arising // from Geolocation Search. message GeoSearchDebugInfo { // The address from which forward geocoding ingestion produced issues. string original_address_query = 1; // The error produced. string error_message = 2; } // Information describing query expansion including whether expansion has // occurred. message QueryExpansionInfo { // Bool describing whether query expansion has occurred. bool expanded_query = 1; // Number of pinned results. This field will only be set when expansion // happens and // [SearchRequest.QueryExpansionSpec.pin_unexpanded_results][google.cloud.discoveryengine.v1alpha.SearchRequest.QueryExpansionSpec.pin_unexpanded_results] // is set to true. int64 pinned_result_count = 2; } // A list of matched documents. The order represents the ranking. repeated SearchResult results = 1; // Results of facets requested by user. repeated Facet facets = 2; // Guided search result. GuidedSearchResult guided_search_result = 8; // The estimated total count of matched items irrespective of pagination. The // count of // [results][google.cloud.discoveryengine.v1alpha.SearchResponse.results] // returned by pagination may be less than the // [total_size][google.cloud.discoveryengine.v1alpha.SearchResponse.total_size] // that matches. int32 total_size = 3; // A unique search token. This should be included in the // [UserEvent][google.cloud.discoveryengine.v1alpha.UserEvent] logs resulting // from this search, which enables accurate attribution of search model // performance. string attribution_token = 4; // The URI of a customer-defined redirect page. If redirect action is // triggered, no search is performed, and only // [redirect_uri][google.cloud.discoveryengine.v1alpha.SearchResponse.redirect_uri] // and // [attribution_token][google.cloud.discoveryengine.v1alpha.SearchResponse.attribution_token] // are set in the response. string redirect_uri = 12; // A token that can be sent as // [SearchRequest.page_token][google.cloud.discoveryengine.v1alpha.SearchRequest.page_token] // to retrieve the next page. If this field is omitted, there are no // subsequent pages. string next_page_token = 5; // Contains the spell corrected query, if found. If the spell correction type // is AUTOMATIC, then the search results are based on corrected_query. // Otherwise the original query is used for search. string corrected_query = 7; // A summary as part of the search results. // This field is only returned if // [SearchRequest.ContentSearchSpec.summary_spec][google.cloud.discoveryengine.v1alpha.SearchRequest.ContentSearchSpec.summary_spec] // is set. Summary summary = 9; // Controls applied as part of the Control service. repeated string applied_controls = 10; repeated GeoSearchDebugInfo geo_search_debug_info = 16; // Query expansion information for the returned results. QueryExpansionInfo query_expansion_info = 14; }