// Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto3"; package google.ai.generativelanguage.v1; import "google/ai/generativelanguage/v1/citation.proto"; import "google/ai/generativelanguage/v1/content.proto"; import "google/ai/generativelanguage/v1/safety.proto"; import "google/api/annotations.proto"; import "google/api/client.proto"; import "google/api/field_behavior.proto"; import "google/api/resource.proto"; option go_package = "cloud.google.com/go/ai/generativelanguage/apiv1/generativelanguagepb;generativelanguagepb"; option java_multiple_files = true; option java_outer_classname = "GenerativeServiceProto"; option java_package = "com.google.ai.generativelanguage.v1"; // API for using Large Models that generate multimodal content and have // additional capabilities beyond text generation. service GenerativeService { option (google.api.default_host) = "generativelanguage.googleapis.com"; // Generates a model response given an input `GenerateContentRequest`. // Refer to the [text generation // guide](https://ai.google.dev/gemini-api/docs/text-generation) for detailed // usage information. Input capabilities differ between models, including // tuned models. Refer to the [model // guide](https://ai.google.dev/gemini-api/docs/models/gemini) and [tuning // guide](https://ai.google.dev/gemini-api/docs/model-tuning) for details. rpc GenerateContent(GenerateContentRequest) returns (GenerateContentResponse) { option (google.api.http) = { post: "/v1/{model=models/*}:generateContent" body: "*" additional_bindings { post: "/v1/{model=tunedModels/*}:generateContent" body: "*" } }; option (google.api.method_signature) = "model,contents"; } // Generates a [streamed // response](https://ai.google.dev/gemini-api/docs/text-generation?lang=python#generate-a-text-stream) // from the model given an input `GenerateContentRequest`. rpc StreamGenerateContent(GenerateContentRequest) returns (stream GenerateContentResponse) { option (google.api.http) = { post: "/v1/{model=models/*}:streamGenerateContent" body: "*" }; option (google.api.method_signature) = "model,contents"; } // Generates a text embedding vector from the input `Content` using the // specified [Gemini Embedding // model](https://ai.google.dev/gemini-api/docs/models/gemini#text-embedding). rpc EmbedContent(EmbedContentRequest) returns (EmbedContentResponse) { option (google.api.http) = { post: "/v1/{model=models/*}:embedContent" body: "*" }; option (google.api.method_signature) = "model,content"; } // Generates multiple embedding vectors from the input `Content` which // consists of a batch of strings represented as `EmbedContentRequest` // objects. rpc BatchEmbedContents(BatchEmbedContentsRequest) returns (BatchEmbedContentsResponse) { option (google.api.http) = { post: "/v1/{model=models/*}:batchEmbedContents" body: "*" }; option (google.api.method_signature) = "model,requests"; } // Runs a model's tokenizer on input `Content` and returns the token count. // Refer to the [tokens guide](https://ai.google.dev/gemini-api/docs/tokens) // to learn more about tokens. rpc CountTokens(CountTokensRequest) returns (CountTokensResponse) { option (google.api.http) = { post: "/v1/{model=models/*}:countTokens" body: "*" }; option (google.api.method_signature) = "model,contents"; } } // Type of task for which the embedding will be used. enum TaskType { // Unset value, which will default to one of the other enum values. TASK_TYPE_UNSPECIFIED = 0; // Specifies the given text is a query in a search/retrieval setting. RETRIEVAL_QUERY = 1; // Specifies the given text is a document from the corpus being searched. RETRIEVAL_DOCUMENT = 2; // Specifies the given text will be used for STS. SEMANTIC_SIMILARITY = 3; // Specifies that the given text will be classified. CLASSIFICATION = 4; // Specifies that the embeddings will be used for clustering. CLUSTERING = 5; // Specifies that the given text will be used for question answering. QUESTION_ANSWERING = 6; // Specifies that the given text will be used for fact verification. FACT_VERIFICATION = 7; } // Request to generate a completion from the model. message GenerateContentRequest { // Required. The name of the `Model` to use for generating the completion. // // Format: `name=models/{model}`. string model = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "generativelanguage.googleapis.com/Model" } ]; // Required. The content of the current conversation with the model. // // For single-turn queries, this is a single instance. For multi-turn queries // like [chat](https://ai.google.dev/gemini-api/docs/text-generation#chat), // this is a repeated field that contains the conversation history and the // latest request. repeated Content contents = 2 [(google.api.field_behavior) = REQUIRED]; // Optional. A list of unique `SafetySetting` instances for blocking unsafe // content. // // This will be enforced on the `GenerateContentRequest.contents` and // `GenerateContentResponse.candidates`. There should not be more than one // setting for each `SafetyCategory` type. The API will block any contents and // responses that fail to meet the thresholds set by these settings. This list // overrides the default settings for each `SafetyCategory` specified in the // safety_settings. If there is no `SafetySetting` for a given // `SafetyCategory` provided in the list, the API will use the default safety // setting for that category. Harm categories HARM_CATEGORY_HATE_SPEECH, // HARM_CATEGORY_SEXUALLY_EXPLICIT, HARM_CATEGORY_DANGEROUS_CONTENT, // HARM_CATEGORY_HARASSMENT are supported. Refer to the // [guide](https://ai.google.dev/gemini-api/docs/safety-settings) // for detailed information on available safety settings. Also refer to the // [Safety guidance](https://ai.google.dev/gemini-api/docs/safety-guidance) to // learn how to incorporate safety considerations in your AI applications. repeated SafetySetting safety_settings = 3 [(google.api.field_behavior) = OPTIONAL]; // Optional. Configuration options for model generation and outputs. optional GenerationConfig generation_config = 4 [(google.api.field_behavior) = OPTIONAL]; } // Configuration options for model generation and outputs. Not all parameters // are configurable for every model. message GenerationConfig { // Optional. Number of generated responses to return. // // Currently, this value can only be set to 1. If unset, this will default // to 1. optional int32 candidate_count = 1 [(google.api.field_behavior) = OPTIONAL]; // Optional. The set of character sequences (up to 5) that will stop output // generation. If specified, the API will stop at the first appearance of a // `stop_sequence`. The stop sequence will not be included as part of the // response. repeated string stop_sequences = 2 [(google.api.field_behavior) = OPTIONAL]; // Optional. The maximum number of tokens to include in a response candidate. // // Note: The default value varies by model, see the `Model.output_token_limit` // attribute of the `Model` returned from the `getModel` function. optional int32 max_output_tokens = 4 [(google.api.field_behavior) = OPTIONAL]; // Optional. Controls the randomness of the output. // // Note: The default value varies by model, see the `Model.temperature` // attribute of the `Model` returned from the `getModel` function. // // Values can range from [0.0, 2.0]. optional float temperature = 5 [(google.api.field_behavior) = OPTIONAL]; // Optional. The maximum cumulative probability of tokens to consider when // sampling. // // The model uses combined Top-k and Top-p (nucleus) sampling. // // Tokens are sorted based on their assigned probabilities so that only the // most likely tokens are considered. Top-k sampling directly limits the // maximum number of tokens to consider, while Nucleus sampling limits the // number of tokens based on the cumulative probability. // // Note: The default value varies by `Model` and is specified by // the`Model.top_p` attribute returned from the `getModel` function. An empty // `top_k` attribute indicates that the model doesn't apply top-k sampling // and doesn't allow setting `top_k` on requests. optional float top_p = 6 [(google.api.field_behavior) = OPTIONAL]; // Optional. The maximum number of tokens to consider when sampling. // // Gemini models use Top-p (nucleus) sampling or a combination of Top-k and // nucleus sampling. Top-k sampling considers the set of `top_k` most probable // tokens. Models running with nucleus sampling don't allow top_k setting. // // Note: The default value varies by `Model` and is specified by // the`Model.top_p` attribute returned from the `getModel` function. An empty // `top_k` attribute indicates that the model doesn't apply top-k sampling // and doesn't allow setting `top_k` on requests. optional int32 top_k = 7 [(google.api.field_behavior) = OPTIONAL]; // Optional. Presence penalty applied to the next token's logprobs if the // token has already been seen in the response. // // This penalty is binary on/off and not dependant on the number of times the // token is used (after the first). Use // [frequency_penalty][google.ai.generativelanguage.v1.GenerationConfig.frequency_penalty] // for a penalty that increases with each use. // // A positive penalty will discourage the use of tokens that have already // been used in the response, increasing the vocabulary. // // A negative penalty will encourage the use of tokens that have already been // used in the response, decreasing the vocabulary. optional float presence_penalty = 15 [(google.api.field_behavior) = OPTIONAL]; // Optional. Frequency penalty applied to the next token's logprobs, // multiplied by the number of times each token has been seen in the respponse // so far. // // A positive penalty will discourage the use of tokens that have already // been used, proportional to the number of times the token has been used: // The more a token is used, the more dificult it is for the model to use // that token again increasing the vocabulary of responses. // // Caution: A _negative_ penalty will encourage the model to reuse tokens // proportional to the number of times the token has been used. Small // negative values will reduce the vocabulary of a response. Larger negative // values will cause the model to start repeating a common token until it // hits the // [max_output_tokens][google.ai.generativelanguage.v1.GenerationConfig.max_output_tokens] // limit: "...the the the the the...". optional float frequency_penalty = 16 [(google.api.field_behavior) = OPTIONAL]; // Optional. If true, export the logprobs results in response. optional bool response_logprobs = 17 [(google.api.field_behavior) = OPTIONAL]; // Optional. Only valid if // [response_logprobs=True][google.ai.generativelanguage.v1.GenerationConfig.response_logprobs]. // This sets the number of top logprobs to return at each decoding step in the // [Candidate.logprobs_result][google.ai.generativelanguage.v1.Candidate.logprobs_result]. optional int32 logprobs = 18 [(google.api.field_behavior) = OPTIONAL]; } // Response from the model supporting multiple candidate responses. // // Safety ratings and content filtering are reported for both // prompt in `GenerateContentResponse.prompt_feedback` and for each candidate // in `finish_reason` and in `safety_ratings`. The API: // - Returns either all requested candidates or none of them // - Returns no candidates at all only if there was something wrong with the // prompt (check `prompt_feedback`) // - Reports feedback on each candidate in `finish_reason` and // `safety_ratings`. message GenerateContentResponse { // A set of the feedback metadata the prompt specified in // `GenerateContentRequest.content`. message PromptFeedback { // Specifies the reason why the prompt was blocked. enum BlockReason { // Default value. This value is unused. BLOCK_REASON_UNSPECIFIED = 0; // Prompt was blocked due to safety reasons. Inspect `safety_ratings` // to understand which safety category blocked it. SAFETY = 1; // Prompt was blocked due to unknown reasons. OTHER = 2; // Prompt was blocked due to the terms which are included from the // terminology blocklist. BLOCKLIST = 3; // Prompt was blocked due to prohibited content. PROHIBITED_CONTENT = 4; } // Optional. If set, the prompt was blocked and no candidates are returned. // Rephrase the prompt. BlockReason block_reason = 1 [(google.api.field_behavior) = OPTIONAL]; // Ratings for safety of the prompt. // There is at most one rating per category. repeated SafetyRating safety_ratings = 2; } // Metadata on the generation request's token usage. message UsageMetadata { // Number of tokens in the prompt. When `cached_content` is set, this is // still the total effective prompt size meaning this includes the number of // tokens in the cached content. int32 prompt_token_count = 1; // Total number of tokens across all the generated response candidates. int32 candidates_token_count = 2; // Total token count for the generation request (prompt + response // candidates). int32 total_token_count = 3; } // Candidate responses from the model. repeated Candidate candidates = 1; // Returns the prompt's feedback related to the content filters. PromptFeedback prompt_feedback = 2; // Output only. Metadata on the generation requests' token usage. UsageMetadata usage_metadata = 3 [(google.api.field_behavior) = OUTPUT_ONLY]; } // A response candidate generated from the model. message Candidate { // Defines the reason why the model stopped generating tokens. enum FinishReason { // Default value. This value is unused. FINISH_REASON_UNSPECIFIED = 0; // Natural stop point of the model or provided stop sequence. STOP = 1; // The maximum number of tokens as specified in the request was reached. MAX_TOKENS = 2; // The response candidate content was flagged for safety reasons. SAFETY = 3; // The response candidate content was flagged for recitation reasons. RECITATION = 4; // The response candidate content was flagged for using an unsupported // language. LANGUAGE = 6; // Unknown reason. OTHER = 5; // Token generation stopped because the content contains forbidden terms. BLOCKLIST = 7; // Token generation stopped for potentially containing prohibited content. PROHIBITED_CONTENT = 8; // Token generation stopped because the content potentially contains // Sensitive Personally Identifiable Information (SPII). SPII = 9; // The function call generated by the model is invalid. MALFORMED_FUNCTION_CALL = 10; } // Output only. Index of the candidate in the list of response candidates. optional int32 index = 3 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Generated content returned from the model. Content content = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; // Optional. Output only. The reason why the model stopped generating tokens. // // If empty, the model has not stopped generating tokens. FinishReason finish_reason = 2 [ (google.api.field_behavior) = OPTIONAL, (google.api.field_behavior) = OUTPUT_ONLY ]; // List of ratings for the safety of a response candidate. // // There is at most one rating per category. repeated SafetyRating safety_ratings = 5; // Output only. Citation information for model-generated candidate. // // This field may be populated with recitation information for any text // included in the `content`. These are passages that are "recited" from // copyrighted material in the foundational LLM's training data. CitationMetadata citation_metadata = 6 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Token count for this candidate. int32 token_count = 7 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. double avg_logprobs = 10 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Log-likelihood scores for the response tokens and top tokens LogprobsResult logprobs_result = 11 [(google.api.field_behavior) = OUTPUT_ONLY]; } // Logprobs Result message LogprobsResult { // Candidate for the logprobs token and score. message Candidate { // The candidate’s token string value. optional string token = 1; // The candidate’s token id value. optional int32 token_id = 3; // The candidate's log probability. optional float log_probability = 2; } // Candidates with top log probabilities at each decoding step. message TopCandidates { // Sorted by log probability in descending order. repeated Candidate candidates = 1; } // Length = total number of decoding steps. repeated TopCandidates top_candidates = 1; // Length = total number of decoding steps. // The chosen candidates may or may not be in top_candidates. repeated Candidate chosen_candidates = 2; } // Request containing the `Content` for the model to embed. message EmbedContentRequest { // Required. The model's resource name. This serves as an ID for the Model to // use. // // This name should match a model name returned by the `ListModels` method. // // Format: `models/{model}` string model = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "generativelanguage.googleapis.com/Model" } ]; // Required. The content to embed. Only the `parts.text` fields will be // counted. Content content = 2 [(google.api.field_behavior) = REQUIRED]; // Optional. Optional task type for which the embeddings will be used. Can // only be set for `models/embedding-001`. optional TaskType task_type = 3 [(google.api.field_behavior) = OPTIONAL]; // Optional. An optional title for the text. Only applicable when TaskType is // `RETRIEVAL_DOCUMENT`. // // Note: Specifying a `title` for `RETRIEVAL_DOCUMENT` provides better quality // embeddings for retrieval. optional string title = 4 [(google.api.field_behavior) = OPTIONAL]; // Optional. Optional reduced dimension for the output embedding. If set, // excessive values in the output embedding are truncated from the end. // Supported by newer models since 2024 only. You cannot set this value if // using the earlier model (`models/embedding-001`). optional int32 output_dimensionality = 5 [(google.api.field_behavior) = OPTIONAL]; } // A list of floats representing an embedding. message ContentEmbedding { // The embedding values. repeated float values = 1; } // The response to an `EmbedContentRequest`. message EmbedContentResponse { // Output only. The embedding generated from the input content. ContentEmbedding embedding = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; } // Batch request to get embeddings from the model for a list of prompts. message BatchEmbedContentsRequest { // Required. The model's resource name. This serves as an ID for the Model to // use. // // This name should match a model name returned by the `ListModels` method. // // Format: `models/{model}` string model = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "generativelanguage.googleapis.com/Model" } ]; // Required. Embed requests for the batch. The model in each of these requests // must match the model specified `BatchEmbedContentsRequest.model`. repeated EmbedContentRequest requests = 2 [(google.api.field_behavior) = REQUIRED]; } // The response to a `BatchEmbedContentsRequest`. message BatchEmbedContentsResponse { // Output only. The embeddings for each request, in the same order as provided // in the batch request. repeated ContentEmbedding embeddings = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; } // Counts the number of tokens in the `prompt` sent to a model. // // Models may tokenize text differently, so each model may return a different // `token_count`. message CountTokensRequest { // Required. The model's resource name. This serves as an ID for the Model to // use. // // This name should match a model name returned by the `ListModels` method. // // Format: `models/{model}` string model = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "generativelanguage.googleapis.com/Model" } ]; // Optional. The input given to the model as a prompt. This field is ignored // when `generate_content_request` is set. repeated Content contents = 2 [(google.api.field_behavior) = OPTIONAL]; // Optional. The overall input given to the `Model`. This includes the prompt // as well as other model steering information like [system // instructions](https://ai.google.dev/gemini-api/docs/system-instructions), // and/or function declarations for [function // calling](https://ai.google.dev/gemini-api/docs/function-calling). // `Model`s/`Content`s and `generate_content_request`s are mutually // exclusive. You can either send `Model` + `Content`s or a // `generate_content_request`, but never both. GenerateContentRequest generate_content_request = 3 [(google.api.field_behavior) = OPTIONAL]; } // A response from `CountTokens`. // // It returns the model's `token_count` for the `prompt`. message CountTokensResponse { // The number of tokens that the `Model` tokenizes the `prompt` into. Always // non-negative. int32 total_tokens = 1; }