// Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto3"; package google.ai.generativelanguage.v1beta; import "google/api/field_behavior.proto"; import "google/api/resource.proto"; import "google/protobuf/timestamp.proto"; option go_package = "cloud.google.com/go/ai/generativelanguage/apiv1beta/generativelanguagepb;generativelanguagepb"; option java_multiple_files = true; option java_outer_classname = "RetrieverProto"; option java_package = "com.google.ai.generativelanguage.v1beta"; // A `Corpus` is a collection of `Document`s. // A project can create up to 5 corpora. message Corpus { option (google.api.resource) = { type: "generativelanguage.googleapis.com/Corpus" pattern: "corpora/{corpus}" plural: "corpora" singular: "corpus" }; // Immutable. Identifier. The `Corpus` resource name. The ID (name excluding // the "corpora/" prefix) can contain up to 40 characters that are lowercase // alphanumeric or dashes // (-). The ID cannot start or end with a dash. If the name is empty on // create, a unique name will be derived from `display_name` along with a 12 // character random suffix. // Example: `corpora/my-awesome-corpora-123a456b789c` string name = 1 [ (google.api.field_behavior) = IDENTIFIER, (google.api.field_behavior) = IMMUTABLE ]; // Optional. The human-readable display name for the `Corpus`. The display // name must be no more than 512 characters in length, including spaces. // Example: "Docs on Semantic Retriever" string display_name = 2 [(google.api.field_behavior) = OPTIONAL]; // Output only. The Timestamp of when the `Corpus` was created. google.protobuf.Timestamp create_time = 3 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. The Timestamp of when the `Corpus` was last updated. google.protobuf.Timestamp update_time = 4 [(google.api.field_behavior) = OUTPUT_ONLY]; } // A `Document` is a collection of `Chunk`s. // A `Corpus` can have a maximum of 10,000 `Document`s. message Document { option (google.api.resource) = { type: "generativelanguage.googleapis.com/Document" pattern: "corpora/{corpus}/documents/{document}" plural: "documents" singular: "document" }; // Immutable. Identifier. The `Document` resource name. The ID (name excluding // the "corpora/*/documents/" prefix) can contain up to 40 characters that are // lowercase alphanumeric or dashes (-). The ID cannot start or end with a // dash. If the name is empty on create, a unique name will be derived from // `display_name` along with a 12 character random suffix. // Example: `corpora/{corpus_id}/documents/my-awesome-doc-123a456b789c` string name = 1 [ (google.api.field_behavior) = IDENTIFIER, (google.api.field_behavior) = IMMUTABLE ]; // Optional. The human-readable display name for the `Document`. The display // name must be no more than 512 characters in length, including spaces. // Example: "Semantic Retriever Documentation" string display_name = 2 [(google.api.field_behavior) = OPTIONAL]; // Optional. User provided custom metadata stored as key-value pairs used for // querying. A `Document` can have a maximum of 20 `CustomMetadata`. repeated CustomMetadata custom_metadata = 3 [(google.api.field_behavior) = OPTIONAL]; // Output only. The Timestamp of when the `Document` was last updated. google.protobuf.Timestamp update_time = 4 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. The Timestamp of when the `Document` was created. google.protobuf.Timestamp create_time = 5 [(google.api.field_behavior) = OUTPUT_ONLY]; } // User provided string values assigned to a single metadata key. message StringList { // The string values of the metadata to store. repeated string values = 1; } // User provided metadata stored as key-value pairs. message CustomMetadata { oneof value { // The string value of the metadata to store. string string_value = 2; // The StringList value of the metadata to store. StringList string_list_value = 6; // The numeric value of the metadata to store. float numeric_value = 7; } // Required. The key of the metadata to store. string key = 1 [(google.api.field_behavior) = REQUIRED]; } // User provided filter to limit retrieval based on `Chunk` or `Document` level // metadata values. // Example (genre = drama OR genre = action): // key = "document.custom_metadata.genre" // conditions = [{string_value = "drama", operation = EQUAL}, // {string_value = "action", operation = EQUAL}] message MetadataFilter { // Required. The key of the metadata to filter on. string key = 1 [(google.api.field_behavior) = REQUIRED]; // Required. The `Condition`s for the given key that will trigger this filter. // Multiple `Condition`s are joined by logical ORs. repeated Condition conditions = 2 [(google.api.field_behavior) = REQUIRED]; } // Filter condition applicable to a single key. message Condition { // Defines the valid operators that can be applied to a key-value pair. enum Operator { // The default value. This value is unused. OPERATOR_UNSPECIFIED = 0; // Supported by numeric. LESS = 1; // Supported by numeric. LESS_EQUAL = 2; // Supported by numeric & string. EQUAL = 3; // Supported by numeric. GREATER_EQUAL = 4; // Supported by numeric. GREATER = 5; // Supported by numeric & string. NOT_EQUAL = 6; // Supported by string only when `CustomMetadata` value type for the given // key has a `string_list_value`. INCLUDES = 7; // Supported by string only when `CustomMetadata` value type for the given // key has a `string_list_value`. EXCLUDES = 8; } // The value type must be consistent with the value type defined in the field // for the corresponding key. If the value types are not consistent, the // result will be an empty set. When the `CustomMetadata` has a `StringList` // value type, the filtering condition should use `string_value` paired with // an INCLUDES/EXCLUDES operation, otherwise the result will also be an empty // set. oneof value { // The string value to filter the metadata on. string string_value = 1; // The numeric value to filter the metadata on. float numeric_value = 6; } // Required. Operator applied to the given key-value pair to trigger the // condition. Operator operation = 5 [(google.api.field_behavior) = REQUIRED]; } // A `Chunk` is a subpart of a `Document` that is treated as an independent unit // for the purposes of vector representation and storage. // A `Corpus` can have a maximum of 1 million `Chunk`s. message Chunk { option (google.api.resource) = { type: "generativelanguage.googleapis.com/Chunk" pattern: "corpora/{corpus}/documents/{document}/chunks/{chunk}" plural: "chunks" singular: "chunk" }; // States for the lifecycle of a `Chunk`. enum State { // The default value. This value is used if the state is omitted. STATE_UNSPECIFIED = 0; // `Chunk` is being processed (embedding and vector storage). STATE_PENDING_PROCESSING = 1; // `Chunk` is processed and available for querying. STATE_ACTIVE = 2; // `Chunk` failed processing. STATE_FAILED = 10; } // Immutable. Identifier. The `Chunk` resource name. The ID (name excluding // the "corpora/*/documents/*/chunks/" prefix) can contain up to 40 characters // that are lowercase alphanumeric or dashes (-). The ID cannot start or end // with a dash. If the name is empty on create, a random 12-character unique // ID will be generated. // Example: `corpora/{corpus_id}/documents/{document_id}/chunks/123a456b789c` string name = 1 [ (google.api.field_behavior) = IDENTIFIER, (google.api.field_behavior) = IMMUTABLE ]; // Required. The content for the `Chunk`, such as the text string. // The maximum number of tokens per chunk is 2043. ChunkData data = 2 [(google.api.field_behavior) = REQUIRED]; // Optional. User provided custom metadata stored as key-value pairs. // The maximum number of `CustomMetadata` per chunk is 20. repeated CustomMetadata custom_metadata = 3 [(google.api.field_behavior) = OPTIONAL]; // Output only. The Timestamp of when the `Chunk` was created. google.protobuf.Timestamp create_time = 4 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. The Timestamp of when the `Chunk` was last updated. google.protobuf.Timestamp update_time = 5 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Current state of the `Chunk`. State state = 6 [(google.api.field_behavior) = OUTPUT_ONLY]; } // Extracted data that represents the `Chunk` content. message ChunkData { oneof data { // The `Chunk` content as a string. // The maximum number of tokens per chunk is 2043. string string_value = 1; } }