// Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto3"; package google.ai.generativelanguage.v1beta3; import "google/ai/generativelanguage/v1beta3/citation.proto"; import "google/ai/generativelanguage/v1beta3/safety.proto"; import "google/api/annotations.proto"; import "google/api/client.proto"; import "google/api/field_behavior.proto"; import "google/api/resource.proto"; option go_package = "cloud.google.com/go/ai/generativelanguage/apiv1beta3/generativelanguagepb;generativelanguagepb"; option java_multiple_files = true; option java_outer_classname = "TextServiceProto"; option java_package = "com.google.ai.generativelanguage.v1beta3"; // API for using Generative Language Models (GLMs) trained to generate text. // // Also known as Large Language Models (LLM)s, these generate text given an // input prompt from the user. service TextService { option (google.api.default_host) = "generativelanguage.googleapis.com"; // Generates a response from the model given an input message. rpc GenerateText(GenerateTextRequest) returns (GenerateTextResponse) { option (google.api.http) = { post: "/v1beta3/{model=models/*}:generateText" body: "*" additional_bindings { post: "/v1beta3/{model=tunedModels/*}:generateText" body: "*" } }; option (google.api.method_signature) = "model,prompt,temperature,candidate_count,max_output_tokens,top_p,top_k"; } // Generates an embedding from the model given an input message. rpc EmbedText(EmbedTextRequest) returns (EmbedTextResponse) { option (google.api.http) = { post: "/v1beta3/{model=models/*}:embedText" body: "*" }; option (google.api.method_signature) = "model,text"; } // Generates multiple embeddings from the model given input text in a // synchronous call. rpc BatchEmbedText(BatchEmbedTextRequest) returns (BatchEmbedTextResponse) { option (google.api.http) = { post: "/v1beta3/{model=models/*}:batchEmbedText" body: "*" }; option (google.api.method_signature) = "model,texts"; } // Runs a model's tokenizer on a text and returns the token count. rpc CountTextTokens(CountTextTokensRequest) returns (CountTextTokensResponse) { option (google.api.http) = { post: "/v1beta3/{model=models/*}:countTextTokens" body: "*" }; option (google.api.method_signature) = "model,prompt"; } } // Request to generate a text completion response from the model. message GenerateTextRequest { // Required. The name of the `Model` or `TunedModel` to use for generating the // completion. // Examples: // models/text-bison-001 // tunedModels/sentence-translator-u3b7m string model = 1 [(google.api.field_behavior) = REQUIRED]; // Required. The free-form input text given to the model as a prompt. // // Given a prompt, the model will generate a TextCompletion response it // predicts as the completion of the input text. TextPrompt prompt = 2 [(google.api.field_behavior) = REQUIRED]; // Optional. Controls the randomness of the output. // Note: The default value varies by model, see the `Model.temperature` // attribute of the `Model` returned the `getModel` function. // // Values can range from [0.0,1.0], // inclusive. A value closer to 1.0 will produce responses that are more // varied and creative, while a value closer to 0.0 will typically result in // more straightforward responses from the model. optional float temperature = 3 [(google.api.field_behavior) = OPTIONAL]; // Optional. Number of generated responses to return. // // This value must be between [1, 8], inclusive. If unset, this will default // to 1. optional int32 candidate_count = 4 [(google.api.field_behavior) = OPTIONAL]; // Optional. The maximum number of tokens to include in a candidate. // // If unset, this will default to output_token_limit specified in the `Model` // specification. optional int32 max_output_tokens = 5 [(google.api.field_behavior) = OPTIONAL]; // Optional. The maximum cumulative probability of tokens to consider when // sampling. // // The model uses combined Top-k and nucleus sampling. // // Tokens are sorted based on their assigned probabilities so that only the // most likely tokens are considered. Top-k sampling directly limits the // maximum number of tokens to consider, while Nucleus sampling limits number // of tokens based on the cumulative probability. // // Note: The default value varies by model, see the `Model.top_p` // attribute of the `Model` returned the `getModel` function. optional float top_p = 6 [(google.api.field_behavior) = OPTIONAL]; // Optional. The maximum number of tokens to consider when sampling. // // The model uses combined Top-k and nucleus sampling. // // Top-k sampling considers the set of `top_k` most probable tokens. // Defaults to 40. // // Note: The default value varies by model, see the `Model.top_k` // attribute of the `Model` returned the `getModel` function. optional int32 top_k = 7 [(google.api.field_behavior) = OPTIONAL]; // A list of unique `SafetySetting` instances for blocking unsafe content. // // that will be enforced on the `GenerateTextRequest.prompt` and // `GenerateTextResponse.candidates`. There should not be more than one // setting for each `SafetyCategory` type. The API will block any prompts and // responses that fail to meet the thresholds set by these settings. This list // overrides the default settings for each `SafetyCategory` specified in the // safety_settings. If there is no `SafetySetting` for a given // `SafetyCategory` provided in the list, the API will use the default safety // setting for that category. repeated SafetySetting safety_settings = 8; // The set of character sequences (up to 5) that will stop output generation. // If specified, the API will stop at the first appearance of a stop // sequence. The stop sequence will not be included as part of the response. repeated string stop_sequences = 9; } // The response from the model, including candidate completions. message GenerateTextResponse { // Candidate responses from the model. repeated TextCompletion candidates = 1; // A set of content filtering metadata for the prompt and response // text. // // This indicates which `SafetyCategory`(s) blocked a // candidate from this response, the lowest `HarmProbability` // that triggered a block, and the HarmThreshold setting for that category. // This indicates the smallest change to the `SafetySettings` that would be // necessary to unblock at least 1 response. // // The blocking is configured by the `SafetySettings` in the request (or the // default `SafetySettings` of the API). repeated ContentFilter filters = 3; // Returns any safety feedback related to content filtering. repeated SafetyFeedback safety_feedback = 4; } // Text given to the model as a prompt. // // The Model will use this TextPrompt to Generate a text completion. message TextPrompt { // Required. The prompt text. string text = 1 [(google.api.field_behavior) = REQUIRED]; } // Output text returned from a model. message TextCompletion { // Output only. The generated text returned from the model. string output = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; // Ratings for the safety of a response. // // There is at most one rating per category. repeated SafetyRating safety_ratings = 2; // Output only. Citation information for model-generated `output` in this // `TextCompletion`. // // This field may be populated with attribution information for any text // included in the `output`. optional CitationMetadata citation_metadata = 3 [(google.api.field_behavior) = OUTPUT_ONLY]; } // Request to get a text embedding from the model. message EmbedTextRequest { // Required. The model name to use with the format model=models/{model}. string model = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "generativelanguage.googleapis.com/Model" } ]; // Required. The free-form input text that the model will turn into an // embedding. string text = 2 [(google.api.field_behavior) = REQUIRED]; } // The response to a EmbedTextRequest. message EmbedTextResponse { // Output only. The embedding generated from the input text. optional Embedding embedding = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; } // Batch request to get a text embedding from the model. message BatchEmbedTextRequest { // Required. The name of the `Model` to use for generating the embedding. // Examples: // models/embedding-gecko-001 string model = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "generativelanguage.googleapis.com/Model" } ]; // Required. The free-form input texts that the model will turn into an // embedding. The current limit is 100 texts, over which an error will be // thrown. repeated string texts = 2 [(google.api.field_behavior) = REQUIRED]; } // The response to a EmbedTextRequest. message BatchEmbedTextResponse { // Output only. The embeddings generated from the input text. repeated Embedding embeddings = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; } // A list of floats representing the embedding. message Embedding { // The embedding values. repeated float value = 1; } // Counts the number of tokens in the `prompt` sent to a model. // // Models may tokenize text differently, so each model may return a different // `token_count`. message CountTextTokensRequest { // Required. The model's resource name. This serves as an ID for the Model to // use. // // This name should match a model name returned by the `ListModels` method. // // Format: `models/{model}` string model = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "generativelanguage.googleapis.com/Model" } ]; // Required. The free-form input text given to the model as a prompt. TextPrompt prompt = 2 [(google.api.field_behavior) = REQUIRED]; } // A response from `CountTextTokens`. // // It returns the model's `token_count` for the `prompt`. message CountTextTokensResponse { // The number of tokens that the `model` tokenizes the `prompt` into. // // Always non-negative. int32 token_count = 1; }