// Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto3"; package google.cloud.aiplatform.v1beta1; import "google/api/field_behavior.proto"; import "google/api/resource.proto"; import "google/protobuf/timestamp.proto"; option csharp_namespace = "Google.Cloud.AIPlatform.V1Beta1"; option go_package = "cloud.google.com/go/aiplatform/apiv1beta1/aiplatformpb;aiplatformpb"; option java_multiple_files = true; option java_outer_classname = "FeatureViewProto"; option java_package = "com.google.cloud.aiplatform.v1beta1"; option php_namespace = "Google\\Cloud\\AIPlatform\\V1beta1"; option ruby_package = "Google::Cloud::AIPlatform::V1beta1"; // FeatureView is representation of values that the FeatureOnlineStore will // serve based on its syncConfig. message FeatureView { option (google.api.resource) = { type: "aiplatform.googleapis.com/FeatureView" pattern: "projects/{project}/locations/{location}/featureOnlineStores/{feature_online_store}/featureViews/{feature_view}" }; message BigQuerySource { // Required. The BigQuery view URI that will be materialized on each sync // trigger based on FeatureView.SyncConfig. string uri = 1 [(google.api.field_behavior) = REQUIRED]; // Required. Columns to construct entity_id / row keys. repeated string entity_id_columns = 2 [(google.api.field_behavior) = REQUIRED]; } // Configuration for Sync. Only one option is set. message SyncConfig { // Cron schedule (https://en.wikipedia.org/wiki/Cron) to launch scheduled // runs. To explicitly set a timezone to the cron tab, apply a prefix in // the cron tab: "CRON_TZ=${IANA_TIME_ZONE}" or "TZ=${IANA_TIME_ZONE}". // The ${IANA_TIME_ZONE} may only be a valid string from IANA time zone // database. For example, "CRON_TZ=America/New_York 1 * * * *", or // "TZ=America/New_York 1 * * * *". string cron = 1; } // Deprecated. Use // [IndexConfig][google.cloud.aiplatform.v1beta1.FeatureView.IndexConfig] // instead. message VectorSearchConfig { option deprecated = true; message BruteForceConfig {} message TreeAHConfig { // Optional. Number of embeddings on each leaf node. The default value is // 1000 if not set. optional int64 leaf_node_embedding_count = 1 [(google.api.field_behavior) = OPTIONAL]; } enum DistanceMeasureType { // Should not be set. DISTANCE_MEASURE_TYPE_UNSPECIFIED = 0; // Euclidean (L_2) Distance. SQUARED_L2_DISTANCE = 1; // Cosine Distance. Defined as 1 - cosine similarity. // // We strongly suggest using DOT_PRODUCT_DISTANCE + UNIT_L2_NORM instead // of COSINE distance. Our algorithms have been more optimized for // DOT_PRODUCT distance which, when combined with UNIT_L2_NORM, is // mathematically equivalent to COSINE distance and results in the same // ranking. COSINE_DISTANCE = 2; // Dot Product Distance. Defined as a negative of the dot product. DOT_PRODUCT_DISTANCE = 3; } // The configuration with regard to the algorithms used for efficient // search. oneof algorithm_config { // Optional. Configuration options for the tree-AH algorithm (Shallow tree // + Asymmetric Hashing). Please refer to this paper for more details: // https://arxiv.org/abs/1908.10396 TreeAHConfig tree_ah_config = 8 [(google.api.field_behavior) = OPTIONAL]; // Optional. Configuration options for using brute force search, which // simply implements the standard linear search in the database for each // query. It is primarily meant for benchmarking and to generate the // ground truth for approximate search. BruteForceConfig brute_force_config = 9 [(google.api.field_behavior) = OPTIONAL]; } // Optional. Column of embedding. This column contains the source data to // create index for vector search. embedding_column must be set when using // vector search. string embedding_column = 3 [(google.api.field_behavior) = OPTIONAL]; // Optional. Columns of features that're used to filter vector search // results. repeated string filter_columns = 4 [(google.api.field_behavior) = OPTIONAL]; // Optional. Column of crowding. This column contains crowding attribute // which is a constraint on a neighbor list produced by // [FeatureOnlineStoreService.SearchNearestEntities][google.cloud.aiplatform.v1beta1.FeatureOnlineStoreService.SearchNearestEntities] // to diversify search results. If // [NearestNeighborQuery.per_crowding_attribute_neighbor_count][google.cloud.aiplatform.v1beta1.NearestNeighborQuery.per_crowding_attribute_neighbor_count] // is set to K in // [SearchNearestEntitiesRequest][google.cloud.aiplatform.v1beta1.SearchNearestEntitiesRequest], // it's guaranteed that no more than K entities of the same crowding // attribute are returned in the response. string crowding_column = 5 [(google.api.field_behavior) = OPTIONAL]; // Optional. The number of dimensions of the input embedding. optional int32 embedding_dimension = 6 [(google.api.field_behavior) = OPTIONAL]; // Optional. The distance measure used in nearest neighbor search. DistanceMeasureType distance_measure_type = 7 [(google.api.field_behavior) = OPTIONAL]; } // Configuration for vector indexing. message IndexConfig { // Configuration options for using brute force search. message BruteForceConfig {} // Configuration options for the tree-AH algorithm. message TreeAHConfig { // Optional. Number of embeddings on each leaf node. The default value is // 1000 if not set. optional int64 leaf_node_embedding_count = 1 [(google.api.field_behavior) = OPTIONAL]; } // The distance measure used in nearest neighbor search. enum DistanceMeasureType { // Should not be set. DISTANCE_MEASURE_TYPE_UNSPECIFIED = 0; // Euclidean (L_2) Distance. SQUARED_L2_DISTANCE = 1; // Cosine Distance. Defined as 1 - cosine similarity. // // We strongly suggest using DOT_PRODUCT_DISTANCE + UNIT_L2_NORM instead // of COSINE distance. Our algorithms have been more optimized for // DOT_PRODUCT distance which, when combined with UNIT_L2_NORM, is // mathematically equivalent to COSINE distance and results in the same // ranking. COSINE_DISTANCE = 2; // Dot Product Distance. Defined as a negative of the dot product. DOT_PRODUCT_DISTANCE = 3; } // The configuration with regard to the algorithms used for efficient // search. oneof algorithm_config { // Optional. Configuration options for the tree-AH algorithm (Shallow tree // + Asymmetric Hashing). Please refer to this paper for more details: // https://arxiv.org/abs/1908.10396 TreeAHConfig tree_ah_config = 6 [(google.api.field_behavior) = OPTIONAL]; // Optional. Configuration options for using brute force search, which // simply implements the standard linear search in the database for each // query. It is primarily meant for benchmarking and to generate the // ground truth for approximate search. BruteForceConfig brute_force_config = 7 [(google.api.field_behavior) = OPTIONAL]; } // Optional. Column of embedding. This column contains the source data to // create index for vector search. embedding_column must be set when using // vector search. string embedding_column = 1 [(google.api.field_behavior) = OPTIONAL]; // Optional. Columns of features that're used to filter vector search // results. repeated string filter_columns = 2 [(google.api.field_behavior) = OPTIONAL]; // Optional. Column of crowding. This column contains crowding attribute // which is a constraint on a neighbor list produced by // [FeatureOnlineStoreService.SearchNearestEntities][google.cloud.aiplatform.v1beta1.FeatureOnlineStoreService.SearchNearestEntities] // to diversify search results. If // [NearestNeighborQuery.per_crowding_attribute_neighbor_count][google.cloud.aiplatform.v1beta1.NearestNeighborQuery.per_crowding_attribute_neighbor_count] // is set to K in // [SearchNearestEntitiesRequest][google.cloud.aiplatform.v1beta1.SearchNearestEntitiesRequest], // it's guaranteed that no more than K entities of the same crowding // attribute are returned in the response. string crowding_column = 3 [(google.api.field_behavior) = OPTIONAL]; // Optional. The number of dimensions of the input embedding. optional int32 embedding_dimension = 4 [(google.api.field_behavior) = OPTIONAL]; // Optional. The distance measure used in nearest neighbor search. DistanceMeasureType distance_measure_type = 5 [(google.api.field_behavior) = OPTIONAL]; } // A Feature Registry source for features that need to be synced to Online // Store. message FeatureRegistrySource { // Features belonging to a single feature group that will be // synced to Online Store. message FeatureGroup { // Required. Identifier of the feature group. string feature_group_id = 1 [(google.api.field_behavior) = REQUIRED]; // Required. Identifiers of features under the feature group. repeated string feature_ids = 2 [(google.api.field_behavior) = REQUIRED]; } // Required. List of features that need to be synced to Online Store. repeated FeatureGroup feature_groups = 1 [(google.api.field_behavior) = REQUIRED]; // Optional. The project number of the parent project of the Feature Groups. optional int64 project_number = 2 [(google.api.field_behavior) = OPTIONAL]; } // A Vertex Rag source for features that need to be synced to Online // Store. message VertexRagSource { // Required. The BigQuery view/table URI that will be materialized on each // manual sync trigger. The table/view is expected to have the following // columns and types at least: // - `corpus_id` (STRING, NULLABLE/REQUIRED) // - `file_id` (STRING, NULLABLE/REQUIRED) // - `chunk_id` (STRING, NULLABLE/REQUIRED) // - `chunk_data_type` (STRING, NULLABLE/REQUIRED) // - `chunk_data` (STRING, NULLABLE/REQUIRED) // - `embeddings` (FLOAT, REPEATED) // - `file_original_uri` (STRING, NULLABLE/REQUIRED) string uri = 1 [(google.api.field_behavior) = REQUIRED]; // Optional. The RAG corpus id corresponding to this FeatureView. int64 rag_corpus_id = 2 [(google.api.field_behavior) = OPTIONAL]; } // Service agent type used during data sync. enum ServiceAgentType { // By default, the project-level Vertex AI Service Agent is enabled. SERVICE_AGENT_TYPE_UNSPECIFIED = 0; // Indicates the project-level Vertex AI Service Agent // (https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) // will be used during sync jobs. SERVICE_AGENT_TYPE_PROJECT = 1; // Enable a FeatureView service account to be created by Vertex AI and // output in the field `service_account_email`. This service account will // be used to read from the source BigQuery table during sync. SERVICE_AGENT_TYPE_FEATURE_VIEW = 2; } oneof source { // Optional. Configures how data is supposed to be extracted from a BigQuery // source to be loaded onto the FeatureOnlineStore. BigQuerySource big_query_source = 6 [(google.api.field_behavior) = OPTIONAL]; // Optional. Configures the features from a Feature Registry source that // need to be loaded onto the FeatureOnlineStore. FeatureRegistrySource feature_registry_source = 9 [(google.api.field_behavior) = OPTIONAL]; // Optional. The Vertex RAG Source that the FeatureView is linked to. VertexRagSource vertex_rag_source = 18 [(google.api.field_behavior) = OPTIONAL]; } // Identifier. Name of the FeatureView. Format: // `projects/{project}/locations/{location}/featureOnlineStores/{feature_online_store}/featureViews/{feature_view}` string name = 1 [(google.api.field_behavior) = IDENTIFIER]; // Output only. Timestamp when this FeatureView was created. google.protobuf.Timestamp create_time = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Timestamp when this FeatureView was last updated. google.protobuf.Timestamp update_time = 3 [(google.api.field_behavior) = OUTPUT_ONLY]; // Optional. Used to perform consistent read-modify-write updates. If not set, // a blind "overwrite" update happens. string etag = 4 [(google.api.field_behavior) = OPTIONAL]; // Optional. The labels with user-defined metadata to organize your // FeatureViews. // // Label keys and values can be no longer than 64 characters // (Unicode codepoints), can only contain lowercase letters, numeric // characters, underscores and dashes. International characters are allowed. // // See https://goo.gl/xmQnxf for more information on and examples of labels. // No more than 64 user labels can be associated with one // FeatureOnlineStore(System labels are excluded)." System reserved label keys // are prefixed with "aiplatform.googleapis.com/" and are immutable. map labels = 5 [(google.api.field_behavior) = OPTIONAL]; // Configures when data is to be synced/updated for this FeatureView. At the // end of the sync the latest featureValues for each entityId of this // FeatureView are made ready for online serving. SyncConfig sync_config = 7; // Optional. Deprecated: please use // [FeatureView.index_config][google.cloud.aiplatform.v1beta1.FeatureView.index_config] // instead. VectorSearchConfig vector_search_config = 8 [deprecated = true, (google.api.field_behavior) = OPTIONAL]; // Optional. Configuration for index preparation for vector search. It // contains the required configurations to create an index from source data, // so that approximate nearest neighbor (a.k.a ANN) algorithms search can be // performed during online serving. IndexConfig index_config = 15 [(google.api.field_behavior) = OPTIONAL]; // Optional. Service agent type used during data sync. By default, the Vertex // AI Service Agent is used. When using an IAM Policy to isolate this // FeatureView within a project, a separate service account should be // provisioned by setting this field to `SERVICE_AGENT_TYPE_FEATURE_VIEW`. // This will generate a separate service account to access the BigQuery source // table. ServiceAgentType service_agent_type = 14 [(google.api.field_behavior) = OPTIONAL]; // Output only. A Service Account unique to this FeatureView. The role // bigquery.dataViewer should be granted to this service account to allow // Vertex AI Feature Store to sync data to the online store. string service_account_email = 13 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Reserved for future use. bool satisfies_pzs = 19 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Reserved for future use. bool satisfies_pzi = 20 [(google.api.field_behavior) = OUTPUT_ONLY]; }