// Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto3"; package google.ai.generativelanguage.v1; import "google/ai/generativelanguage/v1/citation.proto"; import "google/ai/generativelanguage/v1/content.proto"; import "google/ai/generativelanguage/v1/safety.proto"; import "google/api/annotations.proto"; import "google/api/client.proto"; import "google/api/field_behavior.proto"; import "google/api/resource.proto"; option go_package = "cloud.google.com/go/ai/generativelanguage/apiv1/generativelanguagepb;generativelanguagepb"; option java_multiple_files = true; option java_outer_classname = "GenerativeServiceProto"; option java_package = "com.google.ai.generativelanguage.v1"; // API for using Large Models that generate multimodal content and have // additional capabilities beyond text generation. service GenerativeService { option (google.api.default_host) = "generativelanguage.googleapis.com"; // Generates a response from the model given an input // `GenerateContentRequest`. // // Input capabilities differ between models, including tuned models. See the // [model guide](https://ai.google.dev/models/gemini) and // [tuning guide](https://ai.google.dev/docs/model_tuning_guidance) for // details. rpc GenerateContent(GenerateContentRequest) returns (GenerateContentResponse) { option (google.api.http) = { post: "/v1/{model=models/*}:generateContent" body: "*" additional_bindings { post: "/v1/{model=tunedModels/*}:generateContent" body: "*" } }; option (google.api.method_signature) = "model,contents"; } // Generates a streamed response from the model given an input // `GenerateContentRequest`. rpc StreamGenerateContent(GenerateContentRequest) returns (stream GenerateContentResponse) { option (google.api.http) = { post: "/v1/{model=models/*}:streamGenerateContent" body: "*" }; option (google.api.method_signature) = "model,contents"; } // Generates an embedding from the model given an input `Content`. rpc EmbedContent(EmbedContentRequest) returns (EmbedContentResponse) { option (google.api.http) = { post: "/v1/{model=models/*}:embedContent" body: "*" }; option (google.api.method_signature) = "model,content"; } // Generates multiple embeddings from the model given input text in a // synchronous call. rpc BatchEmbedContents(BatchEmbedContentsRequest) returns (BatchEmbedContentsResponse) { option (google.api.http) = { post: "/v1/{model=models/*}:batchEmbedContents" body: "*" }; option (google.api.method_signature) = "model,requests"; } // Runs a model's tokenizer on input content and returns the token count. rpc CountTokens(CountTokensRequest) returns (CountTokensResponse) { option (google.api.http) = { post: "/v1/{model=models/*}:countTokens" body: "*" }; option (google.api.method_signature) = "model,contents"; } } // Type of task for which the embedding will be used. enum TaskType { // Unset value, which will default to one of the other enum values. TASK_TYPE_UNSPECIFIED = 0; // Specifies the given text is a query in a search/retrieval setting. RETRIEVAL_QUERY = 1; // Specifies the given text is a document from the corpus being searched. RETRIEVAL_DOCUMENT = 2; // Specifies the given text will be used for STS. SEMANTIC_SIMILARITY = 3; // Specifies that the given text will be classified. CLASSIFICATION = 4; // Specifies that the embeddings will be used for clustering. CLUSTERING = 5; // Specifies that the given text will be used for question answering. QUESTION_ANSWERING = 6; // Specifies that the given text will be used for fact verification. FACT_VERIFICATION = 7; } // Request to generate a completion from the model. message GenerateContentRequest { // Required. The name of the `Model` to use for generating the completion. // // Format: `name=models/{model}`. string model = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "generativelanguage.googleapis.com/Model" } ]; // Required. The content of the current conversation with the model. // // For single-turn queries, this is a single instance. For multi-turn queries, // this is a repeated field that contains conversation history + latest // request. repeated Content contents = 2 [(google.api.field_behavior) = REQUIRED]; // Optional. A list of unique `SafetySetting` instances for blocking unsafe // content. // // This will be enforced on the `GenerateContentRequest.contents` and // `GenerateContentResponse.candidates`. There should not be more than one // setting for each `SafetyCategory` type. The API will block any contents and // responses that fail to meet the thresholds set by these settings. This list // overrides the default settings for each `SafetyCategory` specified in the // safety_settings. If there is no `SafetySetting` for a given // `SafetyCategory` provided in the list, the API will use the default safety // setting for that category. Harm categories HARM_CATEGORY_HATE_SPEECH, // HARM_CATEGORY_SEXUALLY_EXPLICIT, HARM_CATEGORY_DANGEROUS_CONTENT, // HARM_CATEGORY_HARASSMENT are supported. repeated SafetySetting safety_settings = 3 [(google.api.field_behavior) = OPTIONAL]; // Optional. Configuration options for model generation and outputs. optional GenerationConfig generation_config = 4 [(google.api.field_behavior) = OPTIONAL]; } // Configuration options for model generation and outputs. Not all parameters // may be configurable for every model. message GenerationConfig { // Optional. Number of generated responses to return. // // Currently, this value can only be set to 1. If unset, this will default // to 1. optional int32 candidate_count = 1 [(google.api.field_behavior) = OPTIONAL]; // Optional. The set of character sequences (up to 5) that will stop output // generation. If specified, the API will stop at the first appearance of a // stop sequence. The stop sequence will not be included as part of the // response. repeated string stop_sequences = 2 [(google.api.field_behavior) = OPTIONAL]; // Optional. The maximum number of tokens to include in a candidate. // // Note: The default value varies by model, see the `Model.output_token_limit` // attribute of the `Model` returned from the `getModel` function. optional int32 max_output_tokens = 4 [(google.api.field_behavior) = OPTIONAL]; // Optional. Controls the randomness of the output. // // Note: The default value varies by model, see the `Model.temperature` // attribute of the `Model` returned from the `getModel` function. // // Values can range from [0.0, 2.0]. optional float temperature = 5 [(google.api.field_behavior) = OPTIONAL]; // Optional. The maximum cumulative probability of tokens to consider when // sampling. // // The model uses combined Top-k and nucleus sampling. // // Tokens are sorted based on their assigned probabilities so that only the // most likely tokens are considered. Top-k sampling directly limits the // maximum number of tokens to consider, while Nucleus sampling limits number // of tokens based on the cumulative probability. // // Note: The default value varies by model, see the `Model.top_p` // attribute of the `Model` returned from the `getModel` function. optional float top_p = 6 [(google.api.field_behavior) = OPTIONAL]; // Optional. The maximum number of tokens to consider when sampling. // // Models use nucleus sampling or combined Top-k and nucleus sampling. // Top-k sampling considers the set of `top_k` most probable tokens. // Models running with nucleus sampling don't allow top_k setting. // // Note: The default value varies by model, see the `Model.top_k` // attribute of the `Model` returned from the `getModel` function. Empty // `top_k` field in `Model` indicates the model doesn't apply top-k sampling // and doesn't allow setting `top_k` on requests. optional int32 top_k = 7 [(google.api.field_behavior) = OPTIONAL]; } // Response from the model supporting multiple candidates. // // Note on safety ratings and content filtering. They are reported for both // prompt in `GenerateContentResponse.prompt_feedback` and for each candidate // in `finish_reason` and in `safety_ratings`. The API contract is that: // - either all requested candidates are returned or no candidates at all // - no candidates are returned only if there was something wrong with the // prompt (see `prompt_feedback`) // - feedback on each candidate is reported on `finish_reason` and // `safety_ratings`. message GenerateContentResponse { // A set of the feedback metadata the prompt specified in // `GenerateContentRequest.content`. message PromptFeedback { // Specifies what was the reason why prompt was blocked. enum BlockReason { // Default value. This value is unused. BLOCK_REASON_UNSPECIFIED = 0; // Prompt was blocked due to safety reasons. You can inspect // `safety_ratings` to understand which safety category blocked it. SAFETY = 1; // Prompt was blocked due to unknown reaasons. OTHER = 2; } // Optional. If set, the prompt was blocked and no candidates are returned. // Rephrase your prompt. BlockReason block_reason = 1 [(google.api.field_behavior) = OPTIONAL]; // Ratings for safety of the prompt. // There is at most one rating per category. repeated SafetyRating safety_ratings = 2; } // Metadata on the generation request's token usage. message UsageMetadata { // Number of tokens in the prompt. int32 prompt_token_count = 1; // Total number of tokens across the generated candidates. int32 candidates_token_count = 2; // Total token count for the generation request (prompt + candidates). int32 total_token_count = 3; } // Candidate responses from the model. repeated Candidate candidates = 1; // Returns the prompt's feedback related to the content filters. PromptFeedback prompt_feedback = 2; // Output only. Metadata on the generation requests' token usage. UsageMetadata usage_metadata = 3 [(google.api.field_behavior) = OUTPUT_ONLY]; } // A response candidate generated from the model. message Candidate { // Defines the reason why the model stopped generating tokens. enum FinishReason { // Default value. This value is unused. FINISH_REASON_UNSPECIFIED = 0; // Natural stop point of the model or provided stop sequence. STOP = 1; // The maximum number of tokens as specified in the request was reached. MAX_TOKENS = 2; // The candidate content was flagged for safety reasons. SAFETY = 3; // The candidate content was flagged for recitation reasons. RECITATION = 4; // Unknown reason. OTHER = 5; } // Output only. Index of the candidate in the list of candidates. optional int32 index = 3 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Generated content returned from the model. Content content = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; // Optional. Output only. The reason why the model stopped generating tokens. // // If empty, the model has not stopped generating the tokens. FinishReason finish_reason = 2 [ (google.api.field_behavior) = OPTIONAL, (google.api.field_behavior) = OUTPUT_ONLY ]; // List of ratings for the safety of a response candidate. // // There is at most one rating per category. repeated SafetyRating safety_ratings = 5; // Output only. Citation information for model-generated candidate. // // This field may be populated with recitation information for any text // included in the `content`. These are passages that are "recited" from // copyrighted material in the foundational LLM's training data. CitationMetadata citation_metadata = 6 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Token count for this candidate. int32 token_count = 7 [(google.api.field_behavior) = OUTPUT_ONLY]; } // Request containing the `Content` for the model to embed. message EmbedContentRequest { // Required. The model's resource name. This serves as an ID for the Model to // use. // // This name should match a model name returned by the `ListModels` method. // // Format: `models/{model}` string model = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "generativelanguage.googleapis.com/Model" } ]; // Required. The content to embed. Only the `parts.text` fields will be // counted. Content content = 2 [(google.api.field_behavior) = REQUIRED]; // Optional. Optional task type for which the embeddings will be used. Can // only be set for `models/embedding-001`. optional TaskType task_type = 3 [(google.api.field_behavior) = OPTIONAL]; // Optional. An optional title for the text. Only applicable when TaskType is // `RETRIEVAL_DOCUMENT`. // // Note: Specifying a `title` for `RETRIEVAL_DOCUMENT` provides better quality // embeddings for retrieval. optional string title = 4 [(google.api.field_behavior) = OPTIONAL]; // Optional. Optional reduced dimension for the output embedding. If set, // excessive values in the output embedding are truncated from the end. // Supported by newer models since 2024, and the earlier model // (`models/embedding-001`) cannot specify this value. optional int32 output_dimensionality = 5 [(google.api.field_behavior) = OPTIONAL]; } // A list of floats representing an embedding. message ContentEmbedding { // The embedding values. repeated float values = 1; } // The response to an `EmbedContentRequest`. message EmbedContentResponse { // Output only. The embedding generated from the input content. ContentEmbedding embedding = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; } // Batch request to get embeddings from the model for a list of prompts. message BatchEmbedContentsRequest { // Required. The model's resource name. This serves as an ID for the Model to // use. // // This name should match a model name returned by the `ListModels` method. // // Format: `models/{model}` string model = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "generativelanguage.googleapis.com/Model" } ]; // Required. Embed requests for the batch. The model in each of these requests // must match the model specified `BatchEmbedContentsRequest.model`. repeated EmbedContentRequest requests = 2 [(google.api.field_behavior) = REQUIRED]; } // The response to a `BatchEmbedContentsRequest`. message BatchEmbedContentsResponse { // Output only. The embeddings for each request, in the same order as provided // in the batch request. repeated ContentEmbedding embeddings = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; } // Counts the number of tokens in the `prompt` sent to a model. // // Models may tokenize text differently, so each model may return a different // `token_count`. message CountTokensRequest { // Required. The model's resource name. This serves as an ID for the Model to // use. // // This name should match a model name returned by the `ListModels` method. // // Format: `models/{model}` string model = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "generativelanguage.googleapis.com/Model" } ]; // Optional. The input given to the model as a prompt. This field is ignored // when `generate_content_request` is set. repeated Content contents = 2 [(google.api.field_behavior) = OPTIONAL]; // Optional. The overall input given to the model. CountTokens will count // prompt, function calling, etc. GenerateContentRequest generate_content_request = 3 [(google.api.field_behavior) = OPTIONAL]; } // A response from `CountTokens`. // // It returns the model's `token_count` for the `prompt`. message CountTokensResponse { // The number of tokens that the `model` tokenizes the `prompt` into. // // Always non-negative. int32 total_tokens = 1; }