// Copyright 2019 Google LLC. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // syntax = "proto3"; package google.cloud.dialogflow.v2beta1; import "google/api/annotations.proto"; import "google/api/field_behavior.proto"; import "google/api/resource.proto"; import "google/cloud/dialogflow/v2beta1/agent.proto"; import "google/cloud/dialogflow/v2beta1/audio_config.proto"; import "google/cloud/dialogflow/v2beta1/context.proto"; import "google/cloud/dialogflow/v2beta1/intent.proto"; import "google/cloud/dialogflow/v2beta1/session_entity_type.proto"; import "google/protobuf/duration.proto"; import "google/protobuf/struct.proto"; import "google/rpc/status.proto"; import "google/type/latlng.proto"; import "google/api/client.proto"; option cc_enable_arenas = true; option csharp_namespace = "Google.Cloud.Dialogflow.V2beta1"; option go_package = "google.golang.org/genproto/googleapis/cloud/dialogflow/v2beta1;dialogflow"; option java_multiple_files = true; option java_outer_classname = "SessionProto"; option java_package = "com.google.cloud.dialogflow.v2beta1"; option objc_class_prefix = "DF"; // A session represents an interaction with a user. You retrieve user input // and pass it to the [DetectIntent][google.cloud.dialogflow.v2beta1.Sessions.DetectIntent] (or // [StreamingDetectIntent][google.cloud.dialogflow.v2beta1.Sessions.StreamingDetectIntent]) method to determine // user intent and respond. service Sessions { option (google.api.default_host) = "dialogflow.googleapis.com"; option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform," "https://www.googleapis.com/auth/dialogflow"; // Processes a natural language query and returns structured, actionable data // as a result. This method is not idempotent, because it may cause contexts // and session entity types to be updated, which in turn might affect // results of future queries. rpc DetectIntent(DetectIntentRequest) returns (DetectIntentResponse) { option (google.api.http) = { post: "/v2beta1/{session=projects/*/agent/sessions/*}:detectIntent" body: "*" additional_bindings { post: "/v2beta1/{session=projects/*/agent/environments/*/users/*/sessions/*}:detectIntent" body: "*" } additional_bindings { post: "/v2beta1/{session=projects/*/locations/*/agent/sessions/*}:detectIntent" body: "*" } additional_bindings { post: "/v2beta1/{session=projects/*/locations/*/agent/environments/*/users/*/sessions/*}:detectIntent" body: "*" } }; } // Processes a natural language query in audio format in a streaming fashion // and returns structured, actionable data as a result. This method is only // available via the gRPC API (not REST). rpc StreamingDetectIntent(stream StreamingDetectIntentRequest) returns (stream StreamingDetectIntentResponse) { } } // The request to detect user's intent. message DetectIntentRequest { // Required. The name of the session this query is sent to. Format: // `projects//agent/sessions/`, or // `projects//agent/environments//users//sessions/`. If `Environment ID` is not specified, we assume // default 'draft' environment. If `User ID` is not specified, we are using // "-". It's up to the API caller to choose an appropriate `Session ID` and // `User Id`. They can be a random number or some type of user and session // identifiers (preferably hashed). The length of the `Session ID` and // `User ID` must not exceed 36 characters. string session = 1 [(google.api.resource_reference) = { type: "dialogflow.googleapis.com/Session" }]; // Optional. The parameters of this query. QueryParameters query_params = 2; // Required. The input specification. It can be set to: // // 1. an audio config // which instructs the speech recognizer how to process the speech audio, // // 2. a conversational query in the form of text, or // // 3. an event that specifies which intent to trigger. QueryInput query_input = 3; // Optional. Instructs the speech synthesizer how to generate the output // audio. If this field is not set and agent-level speech synthesizer is not // configured, no output audio is generated. OutputAudioConfig output_audio_config = 4; // Optional. The natural language speech audio to be processed. This field // should be populated iff `query_input` is set to an input audio config. // A single request can contain up to 1 minute of speech audio data. bytes input_audio = 5; } // The message returned from the DetectIntent method. message DetectIntentResponse { // The unique identifier of the response. It can be used to // locate a response in the training example set or for reporting issues. string response_id = 1; // The selected results of the conversational query or event processing. // See `alternative_query_results` for additional potential results. QueryResult query_result = 2; // If Knowledge Connectors are enabled, there could be more than one result // returned for a given query or event, and this field will contain all // results except for the top one, which is captured in query_result. The // alternative results are ordered by decreasing // `QueryResult.intent_detection_confidence`. If Knowledge Connectors are // disabled, this field will be empty until multiple responses for regular // intents are supported, at which point those additional results will be // surfaced here. repeated QueryResult alternative_query_results = 5; // Specifies the status of the webhook request. google.rpc.Status webhook_status = 3; // The audio data bytes encoded as specified in the request. // Note: The output audio is generated based on the values of default platform // text responses found in the `query_result.fulfillment_messages` field. If // multiple default text responses exist, they will be concatenated when // generating audio. If no default platform text responses exist, the // generated audio content will be empty. bytes output_audio = 4; // The config used by the speech synthesizer to generate the output audio. OutputAudioConfig output_audio_config = 6; } // Represents the parameters of the conversational query. message QueryParameters { // Optional. The time zone of this conversational query from the // [time zone database](https://www.iana.org/time-zones), e.g., // America/New_York, Europe/Paris. If not provided, the time zone specified in // agent settings is used. string time_zone = 1; // Optional. The geo location of this conversational query. google.type.LatLng geo_location = 2; // Optional. The collection of contexts to be activated before this query is // executed. repeated Context contexts = 3; // Optional. Specifies whether to delete all contexts in the current session // before the new ones are activated. bool reset_contexts = 4; // Optional. Additional session entity types to replace or extend developer // entity types with. The entity synonyms apply to all languages and persist // for the session of this query. repeated SessionEntityType session_entity_types = 5; // Optional. This field can be used to pass custom data into the webhook // associated with the agent. Arbitrary JSON objects are supported. google.protobuf.Struct payload = 6; // Optional. KnowledgeBases to get alternative results from. If not set, the // KnowledgeBases enabled in the agent (through UI) will be used. // Format: `projects//knowledgeBases/`. repeated string knowledge_base_names = 12; // Optional. Configures the type of sentiment analysis to perform. If not // provided, sentiment analysis is not performed. // Note: Sentiment Analysis is only currently available for Enterprise Edition // agents. SentimentAnalysisRequestConfig sentiment_analysis_request_config = 10; } // Represents the query input. It can contain either: // // 1. An audio config which // instructs the speech recognizer how to process the speech audio. // // 2. A conversational query in the form of text. // // 3. An event that specifies which intent to trigger. message QueryInput { // Required. The input specification. oneof input { // Instructs the speech recognizer how to process the speech audio. InputAudioConfig audio_config = 1; // The natural language text to be processed. TextInput text = 2; // The event to be processed. EventInput event = 3; } } // Represents the result of conversational query or event processing. message QueryResult { // The original conversational query text: // // - If natural language text was provided as input, `query_text` contains // a copy of the input. // - If natural language speech audio was provided as input, `query_text` // contains the speech recognition result. If speech recognizer produced // multiple alternatives, a particular one is picked. // - If automatic spell correction is enabled, `query_text` will contain the // corrected user input. string query_text = 1; // The language that was triggered during intent detection. // See [Language // Support](https://cloud.google.com/dialogflow/docs/reference/language) // for a list of the currently supported language codes. string language_code = 15; // The Speech recognition confidence between 0.0 and 1.0. A higher number // indicates an estimated greater likelihood that the recognized words are // correct. The default of 0.0 is a sentinel value indicating that confidence // was not set. // // This field is not guaranteed to be accurate or set. In particular this // field isn't set for StreamingDetectIntent since the streaming endpoint has // separate confidence estimates per portion of the audio in // StreamingRecognitionResult. float speech_recognition_confidence = 2; // The action name from the matched intent. string action = 3; // The collection of extracted parameters. google.protobuf.Struct parameters = 4; // This field is set to: // // - `false` if the matched intent has required parameters and not all of // the required parameter values have been collected. // - `true` if all required parameter values have been collected, or if the // matched intent doesn't contain any required parameters. bool all_required_params_present = 5; // The text to be pronounced to the user or shown on the screen. // Note: This is a legacy field, `fulfillment_messages` should be preferred. string fulfillment_text = 6; // The collection of rich messages to present to the user. repeated Intent.Message fulfillment_messages = 7; // If the query was fulfilled by a webhook call, this field is set to the // value of the `source` field returned in the webhook response. string webhook_source = 8; // If the query was fulfilled by a webhook call, this field is set to the // value of the `payload` field returned in the webhook response. google.protobuf.Struct webhook_payload = 9; // The collection of output contexts. If applicable, // `output_contexts.parameters` contains entries with name // `.original` containing the original parameter values // before the query. repeated Context output_contexts = 10; // The intent that matched the conversational query. Some, not // all fields are filled in this message, including but not limited to: // `name`, `display_name`, `end_interaction` and `is_fallback`. Intent intent = 11; // The intent detection confidence. Values range from 0.0 // (completely uncertain) to 1.0 (completely certain). // This value is for informational purpose only and is only used to // help match the best intent within the classification threshold. // This value may change for the same end-user expression at any time due to a // model retraining or change in implementation. // If there are `multiple knowledge_answers` messages, this value is set to // the greatest `knowledgeAnswers.match_confidence` value in the list. float intent_detection_confidence = 12; // The free-form diagnostic info. For example, this field could contain // webhook call latency. The string keys of the Struct's fields map can change // without notice. google.protobuf.Struct diagnostic_info = 14; // The sentiment analysis result, which depends on the // `sentiment_analysis_request_config` specified in the request. SentimentAnalysisResult sentiment_analysis_result = 17; // The result from Knowledge Connector (if any), ordered by decreasing // `KnowledgeAnswers.match_confidence`. KnowledgeAnswers knowledge_answers = 18; } // Represents the result of querying a Knowledge base. message KnowledgeAnswers { // An answer from Knowledge Connector. message Answer { // Represents the system's confidence that this knowledge answer is a good // match for this conversational query. enum MatchConfidenceLevel { // Not specified. MATCH_CONFIDENCE_LEVEL_UNSPECIFIED = 0; // Indicates that the confidence is low. LOW = 1; // Indicates our confidence is medium. MEDIUM = 2; // Indicates our confidence is high. HIGH = 3; } // Indicates which Knowledge Document this answer was extracted from. // Format: `projects//knowledgeBases//documents/`. string source = 1 [(google.api.resource_reference) = { type: "dialogflow.googleapis.com/Document" }]; // The corresponding FAQ question if the answer was extracted from a FAQ // Document, empty otherwise. string faq_question = 2; // The piece of text from the `source` knowledge base document that answers // this conversational query. string answer = 3; // The system's confidence level that this knowledge answer is a good match // for this conversational query. // NOTE: The confidence level for a given `` pair may change // without notice, as it depends on models that are constantly being // improved. However, it will change less frequently than the confidence // score below, and should be preferred for referencing the quality of an // answer. MatchConfidenceLevel match_confidence_level = 4; // The system's confidence score that this Knowledge answer is a good match // for this conversational query. // The range is from 0.0 (completely uncertain) to 1.0 (completely certain). // Note: The confidence score is likely to vary somewhat (possibly even for // identical requests), as the underlying model is under constant // improvement. It may be deprecated in the future. We recommend using // `match_confidence_level` which should be generally more stable. float match_confidence = 5; } // A list of answers from Knowledge Connector. repeated Answer answers = 1; } // The top-level message sent by the client to the // [StreamingDetectIntent][] method. // // Multiple request messages should be sent in order: // // 1. The first message must contain [StreamingDetectIntentRequest.session][google.cloud.dialogflow.v2beta1.StreamingDetectIntentRequest.session], // [StreamingDetectIntentRequest.query_input] plus optionally // [StreamingDetectIntentRequest.query_params]. If the client wants to // receive an audio response, it should also contain // [StreamingDetectIntentRequest.output_audio_config][google.cloud.dialogflow.v2beta1.StreamingDetectIntentRequest.output_audio_config]. The message // must not contain [StreamingDetectIntentRequest.input_audio][google.cloud.dialogflow.v2beta1.StreamingDetectIntentRequest.input_audio]. // 2. If [StreamingDetectIntentRequest.query_input][google.cloud.dialogflow.v2beta1.StreamingDetectIntentRequest.query_input] was set to // [StreamingDetectIntentRequest.query_input.audio_config][], all subsequent // messages must contain [StreamingDetectIntentRequest.input_audio] to // continue with Speech recognition. // If you decide to rather detect an intent from text input after you // already started Speech recognition, please send a message with // [StreamingDetectIntentRequest.query_input.text][]. // // However, note that: // // * Dialogflow will bill you for the audio duration so far. // * Dialogflow discards all Speech recognition results in favor of the // input text. // * Dialogflow will use the language code from the first message. // // After you sent all input, you must half-close or abort the request stream. message StreamingDetectIntentRequest { // Required. The name of the session the query is sent to. // Format of the session name: // `projects//agent/sessions/`, or // `projects//agent/environments//users//sessions/`. If `Environment ID` is not specified, we assume // default 'draft' environment. If `User ID` is not specified, we are using // "-". It's up to the API caller to choose an appropriate `Session ID` and // `User Id`. They can be a random number or some type of user and session // identifiers (preferably hashed). The length of the `Session ID` and // `User ID` must not exceed 36 characters. string session = 1; // Optional. The parameters of this query. QueryParameters query_params = 2; // Required. The input specification. It can be set to: // // 1. an audio config which instructs the speech recognizer how to process // the speech audio, // // 2. a conversational query in the form of text, or // // 3. an event that specifies which intent to trigger. QueryInput query_input = 3; // DEPRECATED. Please use [InputAudioConfig.single_utterance][google.cloud.dialogflow.v2beta1.InputAudioConfig.single_utterance] instead. // Optional. If `false` (default), recognition does not cease until the // client closes the stream. // If `true`, the recognizer will detect a single spoken utterance in input // audio. Recognition ceases when it detects the audio's voice has // stopped or paused. In this case, once a detected intent is received, the // client should close the stream and start a new request with a new stream as // needed. // This setting is ignored when `query_input` is a piece of text or an event. bool single_utterance = 4; // Optional. Instructs the speech synthesizer how to generate the output // audio. If this field is not set and agent-level speech synthesizer is not // configured, no output audio is generated. OutputAudioConfig output_audio_config = 5; // Optional. The input audio content to be recognized. Must be sent if // `query_input` was set to a streaming input audio config. The complete audio // over all streaming messages must not exceed 1 minute. bytes input_audio = 6; } // The top-level message returned from the // `StreamingDetectIntent` method. // // Multiple response messages can be returned in order: // // 1. If the input was set to streaming audio, the first one or more messages // contain `recognition_result`. Each `recognition_result` represents a more // complete transcript of what the user said. The last `recognition_result` // has `is_final` set to `true`. // // 2. The next message contains `response_id`, `query_result`, // `alternative_query_results` and optionally `webhook_status` if a WebHook // was called. // // 3. If `output_audio_config` was specified in the request or agent-level // speech synthesizer is configured, all subsequent messages contain // `output_audio` and `output_audio_config`. message StreamingDetectIntentResponse { // The unique identifier of the response. It can be used to // locate a response in the training example set or for reporting issues. string response_id = 1; // The result of speech recognition. StreamingRecognitionResult recognition_result = 2; // The selected results of the conversational query or event processing. // See `alternative_query_results` for additional potential results. QueryResult query_result = 3; // If Knowledge Connectors are enabled, there could be more than one result // returned for a given query or event, and this field will contain all // results except for the top one, which is captured in query_result. The // alternative results are ordered by decreasing // `QueryResult.intent_detection_confidence`. If Knowledge Connectors are // disabled, this field will be empty until multiple responses for regular // intents are supported, at which point those additional results will be // surfaced here. repeated QueryResult alternative_query_results = 7; // Specifies the status of the webhook request. google.rpc.Status webhook_status = 4; // The audio data bytes encoded as specified in the request. // Note: The output audio is generated based on the values of default platform // text responses found in the `query_result.fulfillment_messages` field. If // multiple default text responses exist, they will be concatenated when // generating audio. If no default platform text responses exist, the // generated audio content will be empty. bytes output_audio = 5; // The config used by the speech synthesizer to generate the output audio. OutputAudioConfig output_audio_config = 6; } // Contains a speech recognition result corresponding to a portion of the audio // that is currently being processed or an indication that this is the end // of the single requested utterance. // // Example: // // 1. transcript: "tube" // // 2. transcript: "to be a" // // 3. transcript: "to be" // // 4. transcript: "to be or not to be" // is_final: true // // 5. transcript: " that's" // // 6. transcript: " that is" // // 7. message_type: `END_OF_SINGLE_UTTERANCE` // // 8. transcript: " that is the question" // is_final: true // // Only two of the responses contain final results (#4 and #8 indicated by // `is_final: true`). Concatenating these generates the full transcript: "to be // or not to be that is the question". // // In each response we populate: // // * for `TRANSCRIPT`: `transcript` and possibly `is_final`. // // * for `END_OF_SINGLE_UTTERANCE`: only `message_type`. message StreamingRecognitionResult { // Type of the response message. enum MessageType { // Not specified. Should never be used. MESSAGE_TYPE_UNSPECIFIED = 0; // Message contains a (possibly partial) transcript. TRANSCRIPT = 1; // Event indicates that the server has detected the end of the user's speech // utterance and expects no additional speech. Therefore, the server will // not process additional audio (although it may subsequently return // additional results). The client should stop sending additional audio // data, half-close the gRPC connection, and wait for any additional results // until the server closes the gRPC connection. This message is only sent if // `single_utterance` was set to `true`, and is not used otherwise. END_OF_SINGLE_UTTERANCE = 2; } // Type of the result message. MessageType message_type = 1; // Transcript text representing the words that the user spoke. // Populated if and only if `message_type` = `TRANSCRIPT`. string transcript = 2; // If `false`, the `StreamingRecognitionResult` represents an // interim result that may change. If `true`, the recognizer will not return // any further hypotheses about this piece of the audio. May only be populated // for `message_type` = `TRANSCRIPT`. bool is_final = 3; // The Speech confidence between 0.0 and 1.0 for the current portion of audio. // A higher number indicates an estimated greater likelihood that the // recognized words are correct. The default of 0.0 is a sentinel value // indicating that confidence was not set. // // This field is typically only provided if `is_final` is true and you should // not rely on it being accurate or even set. float confidence = 4; // An estimate of the likelihood that the speech recognizer will // not change its guess about this interim recognition result: // * If the value is unspecified or 0.0, Dialogflow didn't compute the // stability. In particular, Dialogflow will only provide stability for // `TRANSCRIPT` results with `is_final = false`. // * Otherwise, the value is in (0.0, 1.0] where 0.0 means completely // unstable and 1.0 means completely stable. float stability = 6; // Word-specific information for the words recognized by Speech in // [transcript][google.cloud.dialogflow.v2beta1.StreamingRecognitionResult.transcript]. Populated if and only if `message_type` = `TRANSCRIPT` and // [InputAudioConfig.enable_word_info] is set. repeated SpeechWordInfo speech_word_info = 7; // Time offset of the end of this Speech recognition result relative to the // beginning of the audio. Only populated for `message_type` = `TRANSCRIPT`. google.protobuf.Duration speech_end_offset = 8; } // Represents the natural language text to be processed. message TextInput { // Required. The UTF-8 encoded natural language text to be processed. // Text length must not exceed 256 characters. string text = 1; // Required. The language of this conversational query. See [Language // Support](https://cloud.google.com/dialogflow/docs/reference/language) // for a list of the currently supported language codes. Note that queries in // the same session do not necessarily need to specify the same language. string language_code = 2; } // Events allow for matching intents by event name instead of the natural // language input. For instance, input `` can trigger a personalized welcome response. // The parameter `name` may be used by the agent in the response: // `"Hello #welcome_event.name! What can I do for you today?"`. message EventInput { // Required. The unique identifier of the event. string name = 1; // Optional. The collection of parameters associated with the event. google.protobuf.Struct parameters = 2; // Required. The language of this query. See [Language // Support](https://cloud.google.com/dialogflow/docs/reference/language) // for a list of the currently supported language codes. Note that queries in // the same session do not necessarily need to specify the same language. string language_code = 3; } // Configures the types of sentiment analysis to perform. message SentimentAnalysisRequestConfig { // Optional. Instructs the service to perform sentiment analysis on // `query_text`. If not provided, sentiment analysis is not performed on // `query_text`. bool analyze_query_text_sentiment = 1; } // The result of sentiment analysis as configured by // `sentiment_analysis_request_config`. message SentimentAnalysisResult { // The sentiment analysis result for `query_text`. Sentiment query_text_sentiment = 1; } // The sentiment, such as positive/negative feeling or association, for a unit // of analysis, such as the query text. message Sentiment { // Sentiment score between -1.0 (negative sentiment) and 1.0 (positive // sentiment). float score = 1; // A non-negative number in the [0, +inf) range, which represents the absolute // magnitude of sentiment, regardless of score (positive or negative). float magnitude = 2; }