// Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto3"; package google.ai.generativelanguage.v1beta; import "google/ai/generativelanguage/v1beta/citation.proto"; import "google/ai/generativelanguage/v1beta/content.proto"; import "google/ai/generativelanguage/v1beta/retriever.proto"; import "google/ai/generativelanguage/v1beta/safety.proto"; import "google/api/annotations.proto"; import "google/api/client.proto"; import "google/api/field_behavior.proto"; import "google/api/resource.proto"; option go_package = "cloud.google.com/go/ai/generativelanguage/apiv1beta/generativelanguagepb;generativelanguagepb"; option java_multiple_files = true; option java_outer_classname = "GenerativeServiceProto"; option java_package = "com.google.ai.generativelanguage.v1beta"; // API for using Large Models that generate multimodal content and have // additional capabilities beyond text generation. service GenerativeService { option (google.api.default_host) = "generativelanguage.googleapis.com"; // Generates a response from the model given an input // `GenerateContentRequest`. // // Input capabilities differ between models, including tuned models. See the // [model guide](https://ai.google.dev/models/gemini) and // [tuning guide](https://ai.google.dev/docs/model_tuning_guidance) for // details. rpc GenerateContent(GenerateContentRequest) returns (GenerateContentResponse) { option (google.api.http) = { post: "/v1beta/{model=models/*}:generateContent" body: "*" additional_bindings { post: "/v1beta/{model=tunedModels/*}:generateContent" body: "*" } }; option (google.api.method_signature) = "model,contents"; } // Generates a grounded answer from the model given an input // `GenerateAnswerRequest`. rpc GenerateAnswer(GenerateAnswerRequest) returns (GenerateAnswerResponse) { option (google.api.http) = { post: "/v1beta/{model=models/*}:generateAnswer" body: "*" }; option (google.api.method_signature) = "model,contents,safety_settings,answer_style"; } // Generates a streamed response from the model given an input // `GenerateContentRequest`. rpc StreamGenerateContent(GenerateContentRequest) returns (stream GenerateContentResponse) { option (google.api.http) = { post: "/v1beta/{model=models/*}:streamGenerateContent" body: "*" }; option (google.api.method_signature) = "model,contents"; } // Generates an embedding from the model given an input `Content`. rpc EmbedContent(EmbedContentRequest) returns (EmbedContentResponse) { option (google.api.http) = { post: "/v1beta/{model=models/*}:embedContent" body: "*" }; option (google.api.method_signature) = "model,content"; } // Generates multiple embeddings from the model given input text in a // synchronous call. rpc BatchEmbedContents(BatchEmbedContentsRequest) returns (BatchEmbedContentsResponse) { option (google.api.http) = { post: "/v1beta/{model=models/*}:batchEmbedContents" body: "*" }; option (google.api.method_signature) = "model,requests"; } // Runs a model's tokenizer on input content and returns the token count. rpc CountTokens(CountTokensRequest) returns (CountTokensResponse) { option (google.api.http) = { post: "/v1beta/{model=models/*}:countTokens" body: "*" }; option (google.api.method_signature) = "model,contents"; } } // Type of task for which the embedding will be used. enum TaskType { // Unset value, which will default to one of the other enum values. TASK_TYPE_UNSPECIFIED = 0; // Specifies the given text is a query in a search/retrieval setting. RETRIEVAL_QUERY = 1; // Specifies the given text is a document from the corpus being searched. RETRIEVAL_DOCUMENT = 2; // Specifies the given text will be used for STS. SEMANTIC_SIMILARITY = 3; // Specifies that the given text will be classified. CLASSIFICATION = 4; // Specifies that the embeddings will be used for clustering. CLUSTERING = 5; // Specifies that the given text will be used for question answering. QUESTION_ANSWERING = 6; // Specifies that the given text will be used for fact verification. FACT_VERIFICATION = 7; } // Request to generate a completion from the model. message GenerateContentRequest { // Required. The name of the `Model` to use for generating the completion. // // Format: `name=models/{model}`. string model = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "generativelanguage.googleapis.com/Model" } ]; // Optional. Developer set system instruction. Currently, text only. optional Content system_instruction = 8 [(google.api.field_behavior) = OPTIONAL]; // Required. The content of the current conversation with the model. // // For single-turn queries, this is a single instance. For multi-turn queries, // this is a repeated field that contains conversation history + latest // request. repeated Content contents = 2 [(google.api.field_behavior) = REQUIRED]; // Optional. A list of `Tools` the model may use to generate the next // response. // // A `Tool` is a piece of code that enables the system to interact with // external systems to perform an action, or set of actions, outside of // knowledge and scope of the model. The only supported tool is currently // `Function`. repeated Tool tools = 5 [(google.api.field_behavior) = OPTIONAL]; // Optional. Tool configuration for any `Tool` specified in the request. ToolConfig tool_config = 7 [(google.api.field_behavior) = OPTIONAL]; // Optional. A list of unique `SafetySetting` instances for blocking unsafe // content. // // This will be enforced on the `GenerateContentRequest.contents` and // `GenerateContentResponse.candidates`. There should not be more than one // setting for each `SafetyCategory` type. The API will block any contents and // responses that fail to meet the thresholds set by these settings. This list // overrides the default settings for each `SafetyCategory` specified in the // safety_settings. If there is no `SafetySetting` for a given // `SafetyCategory` provided in the list, the API will use the default safety // setting for that category. Harm categories HARM_CATEGORY_HATE_SPEECH, // HARM_CATEGORY_SEXUALLY_EXPLICIT, HARM_CATEGORY_DANGEROUS_CONTENT, // HARM_CATEGORY_HARASSMENT are supported. repeated SafetySetting safety_settings = 3 [(google.api.field_behavior) = OPTIONAL]; // Optional. Configuration options for model generation and outputs. optional GenerationConfig generation_config = 4 [(google.api.field_behavior) = OPTIONAL]; } // Configuration options for model generation and outputs. Not all parameters // may be configurable for every model. message GenerationConfig { // Optional. Number of generated responses to return. // // Currently, this value can only be set to 1. If unset, this will default // to 1. optional int32 candidate_count = 1 [(google.api.field_behavior) = OPTIONAL]; // Optional. The set of character sequences (up to 5) that will stop output // generation. If specified, the API will stop at the first appearance of a // stop sequence. The stop sequence will not be included as part of the // response. repeated string stop_sequences = 2 [(google.api.field_behavior) = OPTIONAL]; // Optional. The maximum number of tokens to include in a candidate. // // Note: The default value varies by model, see the `Model.output_token_limit` // attribute of the `Model` returned from the `getModel` function. optional int32 max_output_tokens = 4 [(google.api.field_behavior) = OPTIONAL]; // Optional. Controls the randomness of the output. // // Note: The default value varies by model, see the `Model.temperature` // attribute of the `Model` returned from the `getModel` function. // // Values can range from [0.0, 2.0]. optional float temperature = 5 [(google.api.field_behavior) = OPTIONAL]; // Optional. The maximum cumulative probability of tokens to consider when // sampling. // // The model uses combined Top-k and nucleus sampling. // // Tokens are sorted based on their assigned probabilities so that only the // most likely tokens are considered. Top-k sampling directly limits the // maximum number of tokens to consider, while Nucleus sampling limits number // of tokens based on the cumulative probability. // // Note: The default value varies by model, see the `Model.top_p` // attribute of the `Model` returned from the `getModel` function. optional float top_p = 6 [(google.api.field_behavior) = OPTIONAL]; // Optional. The maximum number of tokens to consider when sampling. // // Models use nucleus sampling or combined Top-k and nucleus sampling. // Top-k sampling considers the set of `top_k` most probable tokens. // Models running with nucleus sampling don't allow top_k setting. // // Note: The default value varies by model, see the `Model.top_k` // attribute of the `Model` returned from the `getModel` function. Empty // `top_k` field in `Model` indicates the model doesn't apply top-k sampling // and doesn't allow setting `top_k` on requests. optional int32 top_k = 7 [(google.api.field_behavior) = OPTIONAL]; // Optional. Output response mimetype of the generated candidate text. // Supported mimetype: // `text/plain`: (default) Text output. // `application/json`: JSON response in the candidates. string response_mime_type = 13 [(google.api.field_behavior) = OPTIONAL]; // Optional. Output response schema of the generated candidate text when // response mime type can have schema. Schema can be objects, primitives or // arrays and is a subset of [OpenAPI // schema](https://spec.openapis.org/oas/v3.0.3#schema). // // If set, a compatible response_mime_type must also be set. // Compatible mimetypes: // `application/json`: Schema for JSON response. Schema response_schema = 14 [(google.api.field_behavior) = OPTIONAL]; } // Configuration for retrieving grounding content from a `Corpus` or // `Document` created using the Semantic Retriever API. message SemanticRetrieverConfig { // Required. Name of the resource for retrieval, e.g. corpora/123 or // corpora/123/documents/abc. string source = 1 [(google.api.field_behavior) = REQUIRED]; // Required. Query to use for similarity matching `Chunk`s in the given // resource. Content query = 2 [(google.api.field_behavior) = REQUIRED]; // Optional. Filters for selecting `Document`s and/or `Chunk`s from the // resource. repeated MetadataFilter metadata_filters = 3 [(google.api.field_behavior) = OPTIONAL]; // Optional. Maximum number of relevant `Chunk`s to retrieve. optional int32 max_chunks_count = 4 [(google.api.field_behavior) = OPTIONAL]; // Optional. Minimum relevance score for retrieved relevant `Chunk`s. optional float minimum_relevance_score = 5 [(google.api.field_behavior) = OPTIONAL]; } // Response from the model supporting multiple candidates. // // Note on safety ratings and content filtering. They are reported for both // prompt in `GenerateContentResponse.prompt_feedback` and for each candidate // in `finish_reason` and in `safety_ratings`. The API contract is that: // - either all requested candidates are returned or no candidates at all // - no candidates are returned only if there was something wrong with the // prompt (see `prompt_feedback`) // - feedback on each candidate is reported on `finish_reason` and // `safety_ratings`. message GenerateContentResponse { // A set of the feedback metadata the prompt specified in // `GenerateContentRequest.content`. message PromptFeedback { // Specifies what was the reason why prompt was blocked. enum BlockReason { // Default value. This value is unused. BLOCK_REASON_UNSPECIFIED = 0; // Prompt was blocked due to safety reasons. You can inspect // `safety_ratings` to understand which safety category blocked it. SAFETY = 1; // Prompt was blocked due to unknown reaasons. OTHER = 2; } // Optional. If set, the prompt was blocked and no candidates are returned. // Rephrase your prompt. BlockReason block_reason = 1 [(google.api.field_behavior) = OPTIONAL]; // Ratings for safety of the prompt. // There is at most one rating per category. repeated SafetyRating safety_ratings = 2; } // Metadata on the generation request's token usage. message UsageMetadata { // Number of tokens in the prompt. int32 prompt_token_count = 1; // Total number of tokens across the generated candidates. int32 candidates_token_count = 2; // Total token count for the generation request (prompt + candidates). int32 total_token_count = 3; } // Candidate responses from the model. repeated Candidate candidates = 1; // Returns the prompt's feedback related to the content filters. PromptFeedback prompt_feedback = 2; // Output only. Metadata on the generation requests' token usage. UsageMetadata usage_metadata = 3 [(google.api.field_behavior) = OUTPUT_ONLY]; } // A response candidate generated from the model. message Candidate { // Defines the reason why the model stopped generating tokens. enum FinishReason { // Default value. This value is unused. FINISH_REASON_UNSPECIFIED = 0; // Natural stop point of the model or provided stop sequence. STOP = 1; // The maximum number of tokens as specified in the request was reached. MAX_TOKENS = 2; // The candidate content was flagged for safety reasons. SAFETY = 3; // The candidate content was flagged for recitation reasons. RECITATION = 4; // Unknown reason. OTHER = 5; } // Output only. Index of the candidate in the list of candidates. optional int32 index = 3 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Generated content returned from the model. Content content = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; // Optional. Output only. The reason why the model stopped generating tokens. // // If empty, the model has not stopped generating the tokens. FinishReason finish_reason = 2 [ (google.api.field_behavior) = OPTIONAL, (google.api.field_behavior) = OUTPUT_ONLY ]; // List of ratings for the safety of a response candidate. // // There is at most one rating per category. repeated SafetyRating safety_ratings = 5; // Output only. Citation information for model-generated candidate. // // This field may be populated with recitation information for any text // included in the `content`. These are passages that are "recited" from // copyrighted material in the foundational LLM's training data. CitationMetadata citation_metadata = 6 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Token count for this candidate. int32 token_count = 7 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Attribution information for sources that contributed to a // grounded answer. // // This field is populated for `GenerateAnswer` calls. repeated GroundingAttribution grounding_attributions = 8 [(google.api.field_behavior) = OUTPUT_ONLY]; } // Identifier for the source contributing to this attribution. message AttributionSourceId { // Identifier for a part within a `GroundingPassage`. message GroundingPassageId { // Output only. ID of the passage matching the `GenerateAnswerRequest`'s // `GroundingPassage.id`. string passage_id = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Index of the part within the `GenerateAnswerRequest`'s // `GroundingPassage.content`. int32 part_index = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; } // Identifier for a `Chunk` retrieved via Semantic Retriever specified in the // `GenerateAnswerRequest` using `SemanticRetrieverConfig`. message SemanticRetrieverChunk { // Output only. Name of the source matching the request's // `SemanticRetrieverConfig.source`. Example: `corpora/123` or // `corpora/123/documents/abc` string source = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Name of the `Chunk` containing the attributed text. // Example: `corpora/123/documents/abc/chunks/xyz` string chunk = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; } oneof source { // Identifier for an inline passage. GroundingPassageId grounding_passage = 1; // Identifier for a `Chunk` fetched via Semantic Retriever. SemanticRetrieverChunk semantic_retriever_chunk = 2; } } // Attribution for a source that contributed to an answer. message GroundingAttribution { // Output only. Identifier for the source contributing to this attribution. AttributionSourceId source_id = 3 [(google.api.field_behavior) = OUTPUT_ONLY]; // Grounding source content that makes up this attribution. Content content = 2; } // Request to generate a grounded answer from the model. message GenerateAnswerRequest { // Style for grounded answers. enum AnswerStyle { // Unspecified answer style. ANSWER_STYLE_UNSPECIFIED = 0; // Succint but abstract style. ABSTRACTIVE = 1; // Very brief and extractive style. EXTRACTIVE = 2; // Verbose style including extra details. The response may be formatted as a // sentence, paragraph, multiple paragraphs, or bullet points, etc. VERBOSE = 3; } // The sources in which to ground the answer. oneof grounding_source { // Passages provided inline with the request. GroundingPassages inline_passages = 6; // Content retrieved from resources created via the Semantic Retriever // API. SemanticRetrieverConfig semantic_retriever = 7; } // Required. The name of the `Model` to use for generating the grounded // response. // // Format: `model=models/{model}`. string model = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "generativelanguage.googleapis.com/Model" } ]; // Required. The content of the current conversation with the model. For // single-turn queries, this is a single question to answer. For multi-turn // queries, this is a repeated field that contains conversation history and // the last `Content` in the list containing the question. // // Note: GenerateAnswer currently only supports queries in English. repeated Content contents = 2 [(google.api.field_behavior) = REQUIRED]; // Required. Style in which answers should be returned. AnswerStyle answer_style = 5 [(google.api.field_behavior) = REQUIRED]; // Optional. A list of unique `SafetySetting` instances for blocking unsafe // content. // // This will be enforced on the `GenerateAnswerRequest.contents` and // `GenerateAnswerResponse.candidate`. There should not be more than one // setting for each `SafetyCategory` type. The API will block any contents and // responses that fail to meet the thresholds set by these settings. This list // overrides the default settings for each `SafetyCategory` specified in the // safety_settings. If there is no `SafetySetting` for a given // `SafetyCategory` provided in the list, the API will use the default safety // setting for that category. Harm categories HARM_CATEGORY_HATE_SPEECH, // HARM_CATEGORY_SEXUALLY_EXPLICIT, HARM_CATEGORY_DANGEROUS_CONTENT, // HARM_CATEGORY_HARASSMENT are supported. repeated SafetySetting safety_settings = 3 [(google.api.field_behavior) = OPTIONAL]; // Optional. Controls the randomness of the output. // // Values can range from [0.0,1.0], inclusive. A value closer to 1.0 will // produce responses that are more varied and creative, while a value closer // to 0.0 will typically result in more straightforward responses from the // model. A low temperature (~0.2) is usually recommended for // Attributed-Question-Answering use cases. optional float temperature = 4 [(google.api.field_behavior) = OPTIONAL]; } // Response from the model for a grounded answer. message GenerateAnswerResponse { // Feedback related to the input data used to answer the question, as opposed // to model-generated response to the question. message InputFeedback { // Specifies what was the reason why input was blocked. enum BlockReason { // Default value. This value is unused. BLOCK_REASON_UNSPECIFIED = 0; // Input was blocked due to safety reasons. You can inspect // `safety_ratings` to understand which safety category blocked it. SAFETY = 1; // Input was blocked due to other reasons. OTHER = 2; } // Optional. If set, the input was blocked and no candidates are returned. // Rephrase your input. optional BlockReason block_reason = 1 [(google.api.field_behavior) = OPTIONAL]; // Ratings for safety of the input. // There is at most one rating per category. repeated SafetyRating safety_ratings = 2; } // Candidate answer from the model. // // Note: The model *always* attempts to provide a grounded answer, even when // the answer is unlikely to be answerable from the given passages. // In that case, a low-quality or ungrounded answer may be provided, along // with a low `answerable_probability`. Candidate answer = 1; // Output only. The model's estimate of the probability that its answer is // correct and grounded in the input passages. // // A low answerable_probability indicates that the answer might not be // grounded in the sources. // // When `answerable_probability` is low, some clients may wish to: // // * Display a message to the effect of "We couldn’t answer that question" to // the user. // * Fall back to a general-purpose LLM that answers the question from world // knowledge. The threshold and nature of such fallbacks will depend on // individual clients’ use cases. 0.5 is a good starting threshold. optional float answerable_probability = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Feedback related to the input data used to answer the // question, as opposed to model-generated response to the question. // // "Input data" can be one or more of the following: // // - Question specified by the last entry in `GenerateAnswerRequest.content` // - Conversation history specified by the other entries in // `GenerateAnswerRequest.content` // - Grounding sources (`GenerateAnswerRequest.semantic_retriever` or // `GenerateAnswerRequest.inline_passages`) optional InputFeedback input_feedback = 3 [(google.api.field_behavior) = OUTPUT_ONLY]; } // Request containing the `Content` for the model to embed. message EmbedContentRequest { // Required. The model's resource name. This serves as an ID for the Model to // use. // // This name should match a model name returned by the `ListModels` method. // // Format: `models/{model}` string model = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "generativelanguage.googleapis.com/Model" } ]; // Required. The content to embed. Only the `parts.text` fields will be // counted. Content content = 2 [(google.api.field_behavior) = REQUIRED]; // Optional. Optional task type for which the embeddings will be used. Can // only be set for `models/embedding-001`. optional TaskType task_type = 3 [(google.api.field_behavior) = OPTIONAL]; // Optional. An optional title for the text. Only applicable when TaskType is // `RETRIEVAL_DOCUMENT`. // // Note: Specifying a `title` for `RETRIEVAL_DOCUMENT` provides better quality // embeddings for retrieval. optional string title = 4 [(google.api.field_behavior) = OPTIONAL]; // Optional. Optional reduced dimension for the output embedding. If set, // excessive values in the output embedding are truncated from the end. // Supported by newer models since 2024, and the earlier model // (`models/embedding-001`) cannot specify this value. optional int32 output_dimensionality = 5 [(google.api.field_behavior) = OPTIONAL]; } // A list of floats representing an embedding. message ContentEmbedding { // The embedding values. repeated float values = 1; } // The response to an `EmbedContentRequest`. message EmbedContentResponse { // Output only. The embedding generated from the input content. ContentEmbedding embedding = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; } // Batch request to get embeddings from the model for a list of prompts. message BatchEmbedContentsRequest { // Required. The model's resource name. This serves as an ID for the Model to // use. // // This name should match a model name returned by the `ListModels` method. // // Format: `models/{model}` string model = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "generativelanguage.googleapis.com/Model" } ]; // Required. Embed requests for the batch. The model in each of these requests // must match the model specified `BatchEmbedContentsRequest.model`. repeated EmbedContentRequest requests = 2 [(google.api.field_behavior) = REQUIRED]; } // The response to a `BatchEmbedContentsRequest`. message BatchEmbedContentsResponse { // Output only. The embeddings for each request, in the same order as provided // in the batch request. repeated ContentEmbedding embeddings = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; } // Counts the number of tokens in the `prompt` sent to a model. // // Models may tokenize text differently, so each model may return a different // `token_count`. message CountTokensRequest { // Required. The model's resource name. This serves as an ID for the Model to // use. // // This name should match a model name returned by the `ListModels` method. // // Format: `models/{model}` string model = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "generativelanguage.googleapis.com/Model" } ]; // Optional. The input given to the model as a prompt. This field is ignored // when `generate_content_request` is set. repeated Content contents = 2 [(google.api.field_behavior) = OPTIONAL]; // Optional. The overall input given to the model. CountTokens will count // prompt, function calling, etc. GenerateContentRequest generate_content_request = 3 [(google.api.field_behavior) = OPTIONAL]; } // A response from `CountTokens`. // // It returns the model's `token_count` for the `prompt`. message CountTokensResponse { // The number of tokens that the `model` tokenizes the `prompt` into. // // Always non-negative. int32 total_tokens = 1; }