// Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto3"; package google.cloud.aiplatform.v1; import "google/api/annotations.proto"; import "google/api/client.proto"; import "google/api/field_behavior.proto"; import "google/api/resource.proto"; import "google/cloud/aiplatform/v1/content.proto"; import "google/cloud/aiplatform/v1/prediction_service.proto"; import "google/protobuf/struct.proto"; option csharp_namespace = "Google.Cloud.AIPlatform.V1"; option go_package = "cloud.google.com/go/aiplatform/apiv1/aiplatformpb;aiplatformpb"; option java_multiple_files = true; option java_outer_classname = "LlmUtilityServiceProto"; option java_package = "com.google.cloud.aiplatform.v1"; option php_namespace = "Google\\Cloud\\AIPlatform\\V1"; option ruby_package = "Google::Cloud::AIPlatform::V1"; // Service for LLM related utility functions. service LlmUtilityService { option (google.api.default_host) = "aiplatform.googleapis.com"; option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform"; // Perform a token counting. rpc CountTokens(CountTokensRequest) returns (CountTokensResponse) { option (google.api.http) = { post: "/v1/{endpoint=projects/*/locations/*/endpoints/*}:countTokens" body: "*" additional_bindings { post: "/v1/{endpoint=projects/*/locations/*/publishers/*/models/*}:countTokens" body: "*" } additional_bindings { post: "/v1/{endpoint=endpoints/*}:countTokens" body: "*" } additional_bindings { post: "/v1/{endpoint=publishers/*/models/*}:countTokens" body: "*" } }; option (google.api.method_signature) = "endpoint,instances"; } // Return a list of tokens based on the input text. rpc ComputeTokens(ComputeTokensRequest) returns (ComputeTokensResponse) { option (google.api.http) = { post: "/v1/{endpoint=projects/*/locations/*/endpoints/*}:computeTokens" body: "*" additional_bindings { post: "/v1/{endpoint=projects/*/locations/*/publishers/*/models/*}:computeTokens" body: "*" } additional_bindings { post: "/v1/{endpoint=endpoints/*}:computeTokens" body: "*" } additional_bindings { post: "/v1/{endpoint=publishers/*/models/*}:computeTokens" body: "*" } }; option (google.api.method_signature) = "endpoint,instances"; } } // Request message for ComputeTokens RPC call. message ComputeTokensRequest { // Required. The name of the Endpoint requested to get lists of tokens and // token ids. string endpoint = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "aiplatform.googleapis.com/Endpoint" } ]; // Optional. The instances that are the input to token computing API call. // Schema is identical to the prediction schema of the text model, even for // the non-text models, like chat models, or Codey models. repeated google.protobuf.Value instances = 2 [(google.api.field_behavior) = OPTIONAL]; // Optional. The name of the publisher model requested to serve the // prediction. Format: // projects/{project}/locations/{location}/publishers/*/models/* string model = 3 [(google.api.field_behavior) = OPTIONAL]; // Optional. Input content. repeated Content contents = 4 [(google.api.field_behavior) = OPTIONAL]; } // Tokens info with a list of tokens and the corresponding list of token ids. message TokensInfo { // A list of tokens from the input. repeated bytes tokens = 1; // A list of token ids from the input. repeated int64 token_ids = 2; // Optional. Optional fields for the role from the corresponding Content. string role = 3 [(google.api.field_behavior) = OPTIONAL]; } // Response message for ComputeTokens RPC call. message ComputeTokensResponse { // Lists of tokens info from the input. A ComputeTokensRequest could have // multiple instances with a prompt in each instance. We also need to return // lists of tokens info for the request with multiple instances. repeated TokensInfo tokens_info = 1; }