// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.cloud.dialogflow.cx.v3;

import "google/api/field_behavior.proto";
import "google/protobuf/struct.proto";

option cc_enable_arenas = true;
option csharp_namespace = "Google.Cloud.Dialogflow.Cx.V3";
option go_package = "cloud.google.com/go/dialogflow/cx/apiv3/cxpb;cxpb";
option java_multiple_files = true;
option java_outer_classname = "ResponseMessageProto";
option java_package = "com.google.cloud.dialogflow.cx.v3";
option objc_class_prefix = "DF";
option ruby_package = "Google::Cloud::Dialogflow::CX::V3";

// Represents a response message that can be returned by a conversational agent.
//
// Response messages are also used for output audio synthesis. The approach is
// as follows:
//
// * If at least one OutputAudioText response is present, then all
//   OutputAudioText responses are linearly concatenated, and the result is used
//   for output audio synthesis.
// * If the OutputAudioText responses are a mixture of text and SSML, then the
//   concatenated result is treated as SSML; otherwise, the result is treated as
//   either text or SSML as appropriate. The agent designer should ideally use
//   either text or SSML consistently throughout the bot design.
// * Otherwise, all Text responses are linearly concatenated, and the result is
//   used for output audio synthesis.
//
// This approach allows for more sophisticated user experience scenarios, where
// the text displayed to the user may differ from what is heard.
message ResponseMessage {
  // The text response message.
  message Text {
    // Required. A collection of text responses.
    repeated string text = 1 [(google.api.field_behavior) = REQUIRED];

    // Output only. Whether the playback of this message can be interrupted by
    // the end user's speech and the client can then starts the next Dialogflow
    // request.
    bool allow_playback_interruption = 2
        [(google.api.field_behavior) = OUTPUT_ONLY];
  }

  // Indicates that the conversation should be handed off to a live agent.
  //
  // Dialogflow only uses this to determine which conversations were handed off
  // to a human agent for measurement purposes. What else to do with this signal
  // is up to you and your handoff procedures.
  //
  // You may set this, for example:
  // * In the
  // [entry_fulfillment][google.cloud.dialogflow.cx.v3.Page.entry_fulfillment]
  // of a [Page][google.cloud.dialogflow.cx.v3.Page] if
  //   entering the page indicates something went extremely wrong in the
  //   conversation.
  // * In a webhook response when you determine that the customer issue can only
  //   be handled by a human.
  message LiveAgentHandoff {
    // Custom metadata for your handoff procedure. Dialogflow doesn't impose
    // any structure on this.
    google.protobuf.Struct metadata = 1;
  }

  // Indicates that the conversation succeeded, i.e., the bot handled the issue
  // that the customer talked to it about.
  //
  // Dialogflow only uses this to determine which conversations should be
  // counted as successful and doesn't process the metadata in this message in
  // any way. Note that Dialogflow also considers conversations that get to the
  // conversation end page as successful even if they don't return
  // [ConversationSuccess][google.cloud.dialogflow.cx.v3.ResponseMessage.ConversationSuccess].
  //
  // You may set this, for example:
  // * In the
  // [entry_fulfillment][google.cloud.dialogflow.cx.v3.Page.entry_fulfillment]
  // of a [Page][google.cloud.dialogflow.cx.v3.Page] if
  //   entering the page indicates that the conversation succeeded.
  // * In a webhook response when you determine that you handled the customer
  //   issue.
  message ConversationSuccess {
    // Custom metadata. Dialogflow doesn't impose any structure on this.
    google.protobuf.Struct metadata = 1;
  }

  // A text or ssml response that is preferentially used for TTS output audio
  // synthesis, as described in the comment on the ResponseMessage message.
  message OutputAudioText {
    // The source, which is either plain text or SSML.
    oneof source {
      // The raw text to be synthesized.
      string text = 1;

      // The SSML text to be synthesized. For more information, see
      // [SSML](/speech/text-to-speech/docs/ssml).
      string ssml = 2;
    }

    // Output only. Whether the playback of this message can be interrupted by
    // the end user's speech and the client can then starts the next Dialogflow
    // request.
    bool allow_playback_interruption = 3
        [(google.api.field_behavior) = OUTPUT_ONLY];
  }

  // Indicates that interaction with the Dialogflow agent has ended.
  // This message is generated by Dialogflow only and not supposed to be
  // defined by the user.
  message EndInteraction {}

  // Specifies an audio clip to be played by the client as part of the response.
  message PlayAudio {
    // Required. URI of the audio clip. Dialogflow does not impose any
    // validation on this value. It is specific to the client that reads it.
    string audio_uri = 1 [(google.api.field_behavior) = REQUIRED];

    // Output only. Whether the playback of this message can be interrupted by
    // the end user's speech and the client can then starts the next Dialogflow
    // request.
    bool allow_playback_interruption = 2
        [(google.api.field_behavior) = OUTPUT_ONLY];
  }

  // Represents an audio message that is composed of both segments
  // synthesized from the Dialogflow agent prompts and ones hosted externally
  // at the specified URIs.
  // The external URIs are specified via
  // [play_audio][google.cloud.dialogflow.cx.v3.ResponseMessage.play_audio].
  // This message is generated by Dialogflow only and not supposed to be
  // defined by the user.
  message MixedAudio {
    // Represents one segment of audio.
    message Segment {
      // Content of the segment.
      oneof content {
        // Raw audio synthesized from the Dialogflow agent's response using
        // the output config specified in the request.
        bytes audio = 1;

        // Client-specific URI that points to an audio clip accessible to the
        // client. Dialogflow does not impose any validation on it.
        string uri = 2;
      }

      // Output only. Whether the playback of this segment can be interrupted by
      // the end user's speech and the client should then start the next
      // Dialogflow request.
      bool allow_playback_interruption = 3
          [(google.api.field_behavior) = OUTPUT_ONLY];
    }

    // Segments this audio response is composed of.
    repeated Segment segments = 1;
  }

  // Represents the signal that telles the client to transfer the phone call
  // connected to the agent to a third-party endpoint.
  message TelephonyTransferCall {
    // Endpoint to transfer the call to.
    oneof endpoint {
      // Transfer the call to a phone number
      // in [E.164 format](https://en.wikipedia.org/wiki/E.164).
      string phone_number = 1;
    }
  }

  // Required. The rich response message.
  oneof message {
    // Returns a text response.
    Text text = 1;

    // Returns a response containing a custom, platform-specific payload.
    google.protobuf.Struct payload = 2;

    // Indicates that the conversation succeeded.
    ConversationSuccess conversation_success = 9;

    // A text or ssml response that is preferentially used for TTS output audio
    // synthesis, as described in the comment on the ResponseMessage message.
    OutputAudioText output_audio_text = 8;

    // Hands off conversation to a human agent.
    LiveAgentHandoff live_agent_handoff = 10;

    // Output only. A signal that indicates the interaction with the Dialogflow
    // agent has ended. This message is generated by Dialogflow only when the
    // conversation reaches `END_SESSION` page. It is not supposed to be defined
    // by the user.
    //
    // It's guaranteed that there is at most one such message in each response.
    EndInteraction end_interaction = 11
        [(google.api.field_behavior) = OUTPUT_ONLY];

    // Signal that the client should play an audio clip hosted at a
    // client-specific URI. Dialogflow uses this to construct
    // [mixed_audio][google.cloud.dialogflow.cx.v3.ResponseMessage.mixed_audio].
    // However, Dialogflow itself does not try to read or process the URI in any
    // way.
    PlayAudio play_audio = 12;

    // Output only. An audio response message composed of both the synthesized
    // Dialogflow agent responses and responses defined via
    // [play_audio][google.cloud.dialogflow.cx.v3.ResponseMessage.play_audio].
    // This message is generated by Dialogflow only and not supposed to be
    // defined by the user.
    MixedAudio mixed_audio = 13 [(google.api.field_behavior) = OUTPUT_ONLY];

    // A signal that the client should transfer the phone call connected to
    // this agent to a third-party endpoint.
    TelephonyTransferCall telephony_transfer_call = 18;
  }

  // The channel which the response is associated with. Clients can specify the
  // channel via
  // [QueryParameters.channel][google.cloud.dialogflow.cx.v3.QueryParameters.channel],
  // and only associated channel response will be returned.
  string channel = 19;
}