// Copyright 2022 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto3"; package google.cloud.dataplex.v1; import "google/api/annotations.proto"; import "google/api/client.proto"; import "google/api/field_behavior.proto"; import "google/api/resource.proto"; import "google/protobuf/empty.proto"; import "google/protobuf/timestamp.proto"; option go_package = "google.golang.org/genproto/googleapis/cloud/dataplex/v1;dataplex"; option java_multiple_files = true; option java_outer_classname = "MetadataProto"; option java_package = "com.google.cloud.dataplex.v1"; // Metadata service manages metadata resources such as tables, filesets and // partitions. service MetadataService { option (google.api.default_host) = "dataplex.googleapis.com"; option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform"; // Create a metadata entity. rpc CreateEntity(CreateEntityRequest) returns (Entity) { option (google.api.http) = { post: "/v1/{parent=projects/*/locations/*/lakes/*/zones/*}/entities" body: "entity" }; option (google.api.method_signature) = "parent,entity"; } // Update a metadata entity. Only supports full resource update. rpc UpdateEntity(UpdateEntityRequest) returns (Entity) { option (google.api.http) = { put: "/v1/{entity.name=projects/*/locations/*/lakes/*/zones/*/entities/*}" body: "entity" }; } // Delete a metadata entity. rpc DeleteEntity(DeleteEntityRequest) returns (google.protobuf.Empty) { option (google.api.http) = { delete: "/v1/{name=projects/*/locations/*/lakes/*/zones/*/entities/*}" }; option (google.api.method_signature) = "name"; } // Get a metadata entity. rpc GetEntity(GetEntityRequest) returns (Entity) { option (google.api.http) = { get: "/v1/{name=projects/*/locations/*/lakes/*/zones/*/entities/*}" }; option (google.api.method_signature) = "name"; } // List metadata entities in a zone. rpc ListEntities(ListEntitiesRequest) returns (ListEntitiesResponse) { option (google.api.http) = { get: "/v1/{parent=projects/*/locations/*/lakes/*/zones/*}/entities" }; option (google.api.method_signature) = "parent"; } // Create a metadata partition. rpc CreatePartition(CreatePartitionRequest) returns (Partition) { option (google.api.http) = { post: "/v1/{parent=projects/*/locations/*/lakes/*/zones/*/entities/*}/partitions" body: "partition" }; option (google.api.method_signature) = "parent,partition"; } // Delete a metadata partition. rpc DeletePartition(DeletePartitionRequest) returns (google.protobuf.Empty) { option (google.api.http) = { delete: "/v1/{name=projects/*/locations/*/lakes/*/zones/*/entities/*/partitions/**}" }; option (google.api.method_signature) = "name"; } // Get a metadata partition of an entity. rpc GetPartition(GetPartitionRequest) returns (Partition) { option (google.api.http) = { get: "/v1/{name=projects/*/locations/*/lakes/*/zones/*/entities/*/partitions/**}" }; option (google.api.method_signature) = "name"; } // List metadata partitions of an entity. rpc ListPartitions(ListPartitionsRequest) returns (ListPartitionsResponse) { option (google.api.http) = { get: "/v1/{parent=projects/*/locations/*/lakes/*/zones/*/entities/*}/partitions" }; option (google.api.method_signature) = "parent"; } } // Create a metadata entity request. message CreateEntityRequest { // Required. The resource name of the parent zone: // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}`. string parent = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "dataplex.googleapis.com/Zone" } ]; // Required. Entity resource. Entity entity = 3 [(google.api.field_behavior) = REQUIRED]; // Optional. Only validate the request, but do not perform mutations. // The default is false. bool validate_only = 4 [(google.api.field_behavior) = OPTIONAL]; } // Update a metadata entity request. // The exiting entity will be fully replaced by the entity in the request. // The entity ID is mutable. To modify the ID, use the current entity ID in the // request URL and specify the new ID in the request body. message UpdateEntityRequest { // Required. Update description. Entity entity = 2 [(google.api.field_behavior) = REQUIRED]; // Optional. Only validate the request, but do not perform mutations. // The default is false. bool validate_only = 3 [(google.api.field_behavior) = OPTIONAL]; } // Delete a metadata entity request. message DeleteEntityRequest { // Required. The resource name of the entity: // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}/entities/{entity_id}`. string name = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "dataplex.googleapis.com/Entity" } ]; // Required. The etag associated with the entity, which can be retrieved with a // [GetEntity][] request. string etag = 2 [(google.api.field_behavior) = REQUIRED]; } // List metadata entities request. message ListEntitiesRequest { // Entity views. enum EntityView { // The default unset value. Return both table and fileset entities // if unspecified. ENTITY_VIEW_UNSPECIFIED = 0; // Only list table entities. TABLES = 1; // Only list fileset entities. FILESETS = 2; } // Required. The resource name of the parent zone: // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}`. string parent = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "dataplex.googleapis.com/Zone" } ]; // Required. Specify the entity view to make a partial list request. EntityView view = 2 [(google.api.field_behavior) = REQUIRED]; // Optional. Maximum number of entities to return. The service may return fewer than // this value. If unspecified, 100 entities will be returned by default. The // maximum value is 500; larger values will will be truncated to 500. int32 page_size = 3 [(google.api.field_behavior) = OPTIONAL]; // Optional. Page token received from a previous `ListEntities` call. Provide // this to retrieve the subsequent page. When paginating, all other parameters // provided to `ListEntities` must match the call that provided the // page token. string page_token = 4 [(google.api.field_behavior) = OPTIONAL]; // Optional. The following filter parameters can be added to the URL to limit the // entities returned by the API: // // - Entity ID: ?filter="id=entityID" // - Asset ID: ?filter="asset=assetID" // - Data path ?filter="data_path=gs://my-bucket" // - Is HIVE compatible: ?filter="hive_compatible=true" // - Is BigQuery compatible: ?filter="bigquery_compatible=true" string filter = 5 [(google.api.field_behavior) = OPTIONAL]; } // List metadata entities response. message ListEntitiesResponse { // Entities in the specified parent zone. repeated Entity entities = 1; // Token to retrieve the next page of results, or empty if there are no // remaining results in the list. string next_page_token = 2; } // Get metadata entity request. message GetEntityRequest { // Entity views for get entity partial result. enum EntityView { // The API will default to the `BASIC` view. ENTITY_VIEW_UNSPECIFIED = 0; // Minimal view that does not include the schema. BASIC = 1; // Include basic information and schema. SCHEMA = 2; // Include everything. Currently, this is the same as the SCHEMA view. FULL = 4; } // Required. The resource name of the entity: // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}/entities/{entity_id}.` string name = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "dataplex.googleapis.com/Entity" } ]; // Optional. Used to select the subset of entity information to return. // Defaults to `BASIC`. EntityView view = 2 [(google.api.field_behavior) = OPTIONAL]; } // List metadata partitions request. message ListPartitionsRequest { // Required. The resource name of the parent entity: // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}/entities/{entity_id}`. string parent = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "dataplex.googleapis.com/Entity" } ]; // Optional. Maximum number of partitions to return. The service may return fewer than // this value. If unspecified, 100 partitions will be returned by default. The // maximum page size is 500; larger values will will be truncated to 500. int32 page_size = 2 [(google.api.field_behavior) = OPTIONAL]; // Optional. Page token received from a previous `ListPartitions` call. Provide // this to retrieve the subsequent page. When paginating, all other parameters // provided to `ListPartitions` must match the call that provided the // page token. string page_token = 3 [(google.api.field_behavior) = OPTIONAL]; // Optional. Filter the partitions returned to the caller using a key value pair // expression. Supported operators and syntax: // // - logic operators: AND, OR // - comparison operators: <, >, >=, <= ,=, != // - LIKE operators: // - The right hand of a LIKE operator supports "." and // "*" for wildcard searches, for example "value1 LIKE ".*oo.*" // - parenthetical grouping: ( ) // // Sample filter expression: `?filter="key1 < value1 OR key2 > value2" // // **Notes:** // // - Keys to the left of operators are case insensitive. // - Partition results are sorted first by creation time, then by // lexicographic order. // - Up to 20 key value filter pairs are allowed, but due to performance // considerations, only the first 10 will be used as a filter. string filter = 4 [(google.api.field_behavior) = OPTIONAL]; } // Create metadata partition request. message CreatePartitionRequest { // Required. The resource name of the parent zone: // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}/entities/{entity_id}`. string parent = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "dataplex.googleapis.com/Entity" } ]; // Required. Partition resource. Partition partition = 3 [(google.api.field_behavior) = REQUIRED]; // Optional. Only validate the request, but do not perform mutations. // The default is false. bool validate_only = 4 [(google.api.field_behavior) = OPTIONAL]; } // Delete metadata partition request. message DeletePartitionRequest { // Required. The resource name of the partition. // format: // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}/entities/{entity_id}/partitions/{partition_value_path}`. // The {partition_value_path} segment consists of an ordered sequence of // partition values separated by "/". All values must be provided. string name = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "dataplex.googleapis.com/Partition" } ]; // Optional. The etag associated with the partition. string etag = 2 [ deprecated = true, (google.api.field_behavior) = OPTIONAL ]; } // List metadata partitions response. message ListPartitionsResponse { // Partitions under the specified parent entity. repeated Partition partitions = 1; // Token to retrieve the next page of results, or empty if there are no // remaining results in the list. string next_page_token = 2; } // Get metadata partition request. message GetPartitionRequest { // Required. The resource name of the partition: // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}/entities/{entity_id}/partitions/{partition_value_path}`. // The {partition_value_path} segment consists of an ordered sequence of // partition values separated by "/". All values must be provided. string name = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "dataplex.googleapis.com/Partition" } ]; } // Represents tables and fileset metadata contained within a zone. message Entity { option (google.api.resource) = { type: "dataplex.googleapis.com/Entity" pattern: "projects/{project}/locations/{location}/lakes/{lake}/zones/{zone}/entities/{entity}" }; // The type of entity. enum Type { // Type unspecified. TYPE_UNSPECIFIED = 0; // Structured and semi-structured data. TABLE = 1; // Unstructured data. FILESET = 2; } // Provides compatibility information for various metadata stores. message CompatibilityStatus { // Provides compatibility information for a specific metadata store. message Compatibility { // Output only. Whether the entity is compatible and can be represented in the metadata // store. bool compatible = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Provides additional detail if the entity is incompatible with the // metadata store. string reason = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; } // Output only. Whether this entity is compatible with Hive Metastore. Compatibility hive_metastore = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Whether this entity is compatible with BigQuery. Compatibility bigquery = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; } // Output only. The resource name of the entity, of the form: // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}/entities/{id}`. string name = 1 [ (google.api.field_behavior) = OUTPUT_ONLY, (google.api.resource_reference) = { type: "dataplex.googleapis.com/Entity" } ]; // Optional. Display name must be shorter than or equal to 256 characters. string display_name = 2 [(google.api.field_behavior) = OPTIONAL]; // Optional. User friendly longer description text. Must be shorter than or equal to // 1024 characters. string description = 3 [(google.api.field_behavior) = OPTIONAL]; // Output only. The time when the entity was created. google.protobuf.Timestamp create_time = 5 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. The time when the entity was last updated. google.protobuf.Timestamp update_time = 6 [(google.api.field_behavior) = OUTPUT_ONLY]; // Required. A user-provided entity ID. It is mutable, and will be used as the // published table name. Specifying a new ID in an update entity // request will override the existing value. // The ID must contain only letters (a-z, A-Z), numbers (0-9), and // underscores. Must begin with a letter and consist of 256 or fewer // characters. string id = 7 [(google.api.field_behavior) = REQUIRED]; // Optional. The etag associated with the entity, which can be retrieved with a // [GetEntity][] request. Required for update and delete requests. string etag = 8 [(google.api.field_behavior) = OPTIONAL]; // Required. Immutable. The type of entity. Type type = 10 [ (google.api.field_behavior) = REQUIRED, (google.api.field_behavior) = IMMUTABLE ]; // Required. Immutable. The ID of the asset associated with the storage location containing the // entity data. The entity must be with in the same zone with the asset. string asset = 11 [ (google.api.field_behavior) = REQUIRED, (google.api.field_behavior) = IMMUTABLE ]; // Required. Immutable. The storage path of the entity data. // For Cloud Storage data, this is the fully-qualified path to the entity, // such as `gs://bucket/path/to/data`. For BigQuery data, this is the name of // the table resource, such as // `projects/project_id/datasets/dataset_id/tables/table_id`. string data_path = 12 [ (google.api.field_behavior) = REQUIRED, (google.api.field_behavior) = IMMUTABLE ]; // Optional. The set of items within the data path constituting the data in the entity, // represented as a glob path. // Example: `gs://bucket/path/to/data/**/*.csv`. string data_path_pattern = 13 [(google.api.field_behavior) = OPTIONAL]; // Output only. The name of the associated Data Catalog entry. string catalog_entry = 14 [(google.api.field_behavior) = OUTPUT_ONLY]; // Required. Immutable. Identifies the storage system of the entity data. StorageSystem system = 15 [ (google.api.field_behavior) = REQUIRED, (google.api.field_behavior) = IMMUTABLE ]; // Required. Identifies the storage format of the entity data. // It does not apply to entities with data stored in BigQuery. StorageFormat format = 16 [(google.api.field_behavior) = REQUIRED]; // Output only. Metadata stores that the entity is compatible with. CompatibilityStatus compatibility = 19 [(google.api.field_behavior) = OUTPUT_ONLY]; // Required. The description of the data structure and layout. // The schema is not included in list responses. It is only included in // `SCHEMA` and `FULL` entity views of a `GetEntity` response. Schema schema = 50 [(google.api.field_behavior) = REQUIRED]; } // Represents partition metadata contained within entity instances. message Partition { option (google.api.resource) = { type: "dataplex.googleapis.com/Partition" pattern: "projects/{project}/locations/{location}/lakes/{lake}/zones/{zone}/entities/{entity}/partitions/{partition}" }; // Output only. Partition values used in the HTTP URL must be // double encoded. For example, `url_encode(url_encode(value))` can be used // to encode "US:CA/CA#Sunnyvale so that the request URL ends // with "/partitions/US%253ACA/CA%2523Sunnyvale". // The name field in the response retains the encoded format. string name = 1 [ (google.api.field_behavior) = OUTPUT_ONLY, (google.api.resource_reference) = { type: "dataplex.googleapis.com/Partition" } ]; // Required. Immutable. The set of values representing the partition, which correspond to the // partition schema defined in the parent entity. repeated string values = 2 [ (google.api.field_behavior) = REQUIRED, (google.api.field_behavior) = IMMUTABLE ]; // Required. Immutable. The location of the entity data within the partition, for example, // `gs://bucket/path/to/entity/key1=value1/key2=value2`. // Or `projects//datasets//tables/` string location = 3 [ (google.api.field_behavior) = REQUIRED, (google.api.field_behavior) = IMMUTABLE ]; // Optional. The etag for this partition. string etag = 4 [ deprecated = true, (google.api.field_behavior) = OPTIONAL ]; } // Schema information describing the structure and layout of the data. message Schema { // Type information for fields in schemas and partition schemas. enum Type { // SchemaType unspecified. TYPE_UNSPECIFIED = 0; // Boolean field. BOOLEAN = 1; // Single byte numeric field. BYTE = 2; // 16-bit numeric field. INT16 = 3; // 32-bit numeric field. INT32 = 4; // 64-bit numeric field. INT64 = 5; // Floating point numeric field. FLOAT = 6; // Double precision numeric field. DOUBLE = 7; // Real value numeric field. DECIMAL = 8; // Sequence of characters field. STRING = 9; // Sequence of bytes field. BINARY = 10; // Date and time field. TIMESTAMP = 11; // Date field. DATE = 12; // Time field. TIME = 13; // Structured field. Nested fields that define the structure of the map. // If all nested fields are nullable, this field represents a union. RECORD = 14; // Null field that does not have values. NULL = 100; } // Additional qualifiers to define field semantics. enum Mode { // Mode unspecified. MODE_UNSPECIFIED = 0; // The field has required semantics. REQUIRED = 1; // The field has optional semantics, and may be null. NULLABLE = 2; // The field has repeated (0 or more) semantics, and is a list of values. REPEATED = 3; } // Represents a column field within a table schema. message SchemaField { // Required. The name of the field. Must contain only letters, numbers and // underscores, with a maximum length of 767 characters, // and must begin with a letter or underscore. string name = 1 [(google.api.field_behavior) = REQUIRED]; // Optional. User friendly field description. Must be less than or equal to 1024 // characters. string description = 2 [(google.api.field_behavior) = OPTIONAL]; // Required. The type of field. Type type = 3 [(google.api.field_behavior) = REQUIRED]; // Required. Additional field semantics. Mode mode = 4 [(google.api.field_behavior) = REQUIRED]; // Optional. Any nested field for complex types. repeated SchemaField fields = 10 [(google.api.field_behavior) = OPTIONAL]; } // Represents a key field within the entity's partition structure. You could // have up to 20 partition fields, but only the first 10 partitions have the // filtering ability due to performance consideration. **Note:** // Partition fields are immutable. message PartitionField { // Required. Partition field name must consist of letters, numbers, and underscores // only, with a maximum of length of 256 characters, // and must begin with a letter or underscore.. string name = 1 [(google.api.field_behavior) = REQUIRED]; // Required. Immutable. The type of field. Type type = 2 [ (google.api.field_behavior) = REQUIRED, (google.api.field_behavior) = IMMUTABLE ]; } // The structure of paths within the entity, which represent partitions. enum PartitionStyle { // PartitionStyle unspecified PARTITION_STYLE_UNSPECIFIED = 0; // Partitions are hive-compatible. // Examples: `gs://bucket/path/to/table/dt=2019-10-31/lang=en`, // `gs://bucket/path/to/table/dt=2019-10-31/lang=en/late`. HIVE_COMPATIBLE = 1; } // Required. Set to `true` if user-managed or `false` if managed by Dataplex. The // default is `false` (managed by Dataplex). // // - Set to `false`to enable Dataplex discovery to update the schema. // including new data discovery, schema inference, and schema evolution. // Users retain the ability to input and edit the schema. Dataplex // treats schema input by the user as though produced // by a previous Dataplex discovery operation, and it will // evolve the schema and take action based on that treatment. // // - Set to `true` to fully manage the entity // schema. This setting guarantees that Dataplex will not // change schema fields. bool user_managed = 1 [(google.api.field_behavior) = REQUIRED]; // Optional. The sequence of fields describing data in table entities. // **Note:** BigQuery SchemaFields are immutable. repeated SchemaField fields = 2 [(google.api.field_behavior) = OPTIONAL]; // Optional. The sequence of fields describing the partition structure in entities. // If this field is empty, there are no partitions within the data. repeated PartitionField partition_fields = 3 [(google.api.field_behavior) = OPTIONAL]; // Optional. The structure of paths containing partition data within the entity. PartitionStyle partition_style = 4 [(google.api.field_behavior) = OPTIONAL]; } // Describes the format of the data within its storage location. message StorageFormat { // Describes CSV and similar semi-structured data formats. message CsvOptions { // Optional. The character encoding of the data. Accepts "US-ASCII", "UTF-8", and // "ISO-8859-1". Defaults to UTF-8 if unspecified. string encoding = 1 [(google.api.field_behavior) = OPTIONAL]; // Optional. The number of rows to interpret as header rows that should be skipped // when reading data rows. Defaults to 0. int32 header_rows = 2 [(google.api.field_behavior) = OPTIONAL]; // Optional. The delimiter used to separate values. Defaults to ','. string delimiter = 3 [(google.api.field_behavior) = OPTIONAL]; // Optional. The character used to quote column values. Accepts '"' // (double quotation mark) or ''' (single quotation mark). Defaults to // '"' (double quotation mark) if unspecified. string quote = 4 [(google.api.field_behavior) = OPTIONAL]; } // Describes JSON data format. message JsonOptions { // Optional. The character encoding of the data. Accepts "US-ASCII", "UTF-8" and // "ISO-8859-1". Defaults to UTF-8 if not specified. string encoding = 1 [(google.api.field_behavior) = OPTIONAL]; } // The specific file format of the data. enum Format { // Format unspecified. FORMAT_UNSPECIFIED = 0; // Parquet-formatted structured data. PARQUET = 1; // Avro-formatted structured data. AVRO = 2; // Orc-formatted structured data. ORC = 3; // Csv-formatted semi-structured data. CSV = 100; // Json-formatted semi-structured data. JSON = 101; // Image data formats (such as jpg and png). IMAGE = 200; // Audio data formats (such as mp3, and wav). AUDIO = 201; // Video data formats (such as mp4 and mpg). VIDEO = 202; // Textual data formats (such as txt and xml). TEXT = 203; // TensorFlow record format. TFRECORD = 204; // Data that doesn't match a specific format. OTHER = 1000; // Data of an unknown format. UNKNOWN = 1001; } // The specific compressed file format of the data. enum CompressionFormat { // CompressionFormat unspecified. Implies uncompressed data. COMPRESSION_FORMAT_UNSPECIFIED = 0; // GZip compressed set of files. GZIP = 2; // BZip2 compressed set of files. BZIP2 = 3; } // Output only. The data format associated with the stored data, which represents // content type values. The value is inferred from mime type. Format format = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; // Optional. The compression type associated with the stored data. // If unspecified, the data is uncompressed. CompressionFormat compression_format = 2 [(google.api.field_behavior) = OPTIONAL]; // Required. The mime type descriptor for the data. Must match the pattern // {type}/{subtype}. Supported values: // // - application/x-parquet // - application/x-avro // - application/x-orc // - application/x-tfrecord // - application/json // - application/{subtypes} // - text/csv // - text/ // - image/{image subtype} // - video/{video subtype} // - audio/{audio subtype} string mime_type = 3 [(google.api.field_behavior) = REQUIRED]; // Additional format-specific options. oneof options { // Optional. Additional information about CSV formatted data. CsvOptions csv = 10 [(google.api.field_behavior) = OPTIONAL]; // Optional. Additional information about CSV formatted data. JsonOptions json = 11 [(google.api.field_behavior) = OPTIONAL]; } } // Identifies the cloud system that manages the data storage. enum StorageSystem { // Storage system unspecified. STORAGE_SYSTEM_UNSPECIFIED = 0; // The entity data is contained within a Cloud Storage bucket. CLOUD_STORAGE = 1; // The entity data is contained within a BigQuery dataset. BIGQUERY = 2; }