// Copyright 2022 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto3"; package google.cloud.dataplex.v1; import "google/api/field_behavior.proto"; import "google/api/resource.proto"; import "google/protobuf/duration.proto"; import "google/protobuf/timestamp.proto"; option go_package = "google.golang.org/genproto/googleapis/cloud/dataplex/v1;dataplex"; option java_multiple_files = true; option java_outer_classname = "ResourcesProto"; option java_package = "com.google.cloud.dataplex.v1"; // A lake is a centralized repository for managing enterprise data across the // organization distributed across many cloud projects, and stored in a variety // of storage services such as Google Cloud Storage and BigQuery. The resources // attached to a lake are referred to as managed resources. Data within these // managed resources can be structured or unstructured. A lake provides data // admins with tools to organize, secure and manage their data at scale, and // provides data scientists and data engineers an integrated experience to // easily search, discover, analyze and transform data and associated metadata. message Lake { option (google.api.resource) = { type: "dataplex.googleapis.com/Lake" pattern: "projects/{project}/locations/{location}/lakes/{lake}" }; // Settings to manage association of Dataproc Metastore with a lake. message Metastore { // Optional. A relative reference to the Dataproc Metastore // (https://cloud.google.com/dataproc-metastore/docs) service associated // with the lake: // `projects/{project_id}/locations/{location_id}/services/{service_id}` string service = 1 [(google.api.field_behavior) = OPTIONAL]; } // Status of Lake and Dataproc Metastore service instance association. message MetastoreStatus { // Current state of association. enum State { // Unspecified. STATE_UNSPECIFIED = 0; // A Metastore service instance is not associated with the lake. NONE = 1; // A Metastore service instance is attached to the lake. READY = 2; // Attach/detach is in progress. UPDATING = 3; // Attach/detach could not be done due to errors. ERROR = 4; } // Current state of association. State state = 1; // Additional information about the current status. string message = 2; // Last update time of the metastore status of the lake. google.protobuf.Timestamp update_time = 3; // The URI of the endpoint used to access the Metastore service. string endpoint = 4; } // Output only. The relative resource name of the lake, of the form: // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}`. string name = 1 [ (google.api.field_behavior) = OUTPUT_ONLY, (google.api.resource_reference) = { type: "dataplex.googleapis.com/Lake" } ]; // Optional. User friendly display name. string display_name = 2 [(google.api.field_behavior) = OPTIONAL]; // Output only. System generated globally unique ID for the lake. This ID will be // different if the lake is deleted and re-created with the same name. string uid = 3 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. The time when the lake was created. google.protobuf.Timestamp create_time = 4 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. The time when the lake was last updated. google.protobuf.Timestamp update_time = 5 [(google.api.field_behavior) = OUTPUT_ONLY]; // Optional. User-defined labels for the lake. map labels = 6 [(google.api.field_behavior) = OPTIONAL]; // Optional. Description of the lake. string description = 7 [(google.api.field_behavior) = OPTIONAL]; // Output only. Current state of the lake. State state = 8 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Service account associated with this lake. This service account must be // authorized to access or operate on resources managed by the lake. string service_account = 9 [(google.api.field_behavior) = OUTPUT_ONLY]; // Optional. Settings to manage lake and Dataproc Metastore service instance // association. Metastore metastore = 102 [(google.api.field_behavior) = OPTIONAL]; // Output only. Aggregated status of the underlying assets of the lake. AssetStatus asset_status = 103 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Metastore status of the lake. MetastoreStatus metastore_status = 104 [(google.api.field_behavior) = OUTPUT_ONLY]; } // Aggregated status of the underlying assets of a lake or zone. message AssetStatus { // Last update time of the status. google.protobuf.Timestamp update_time = 1; // Number of active assets. int32 active_assets = 2; // Number of assets that are in process of updating the security policy on // attached resources. int32 security_policy_applying_assets = 3; } // A zone represents a logical group of related assets within a lake. A zone can // be used to map to organizational structure or represent stages of data // readiness from raw to curated. It provides managing behavior that is shared // or inherited by all contained assets. message Zone { option (google.api.resource) = { type: "dataplex.googleapis.com/Zone" pattern: "projects/{project}/locations/{location}/lakes/{lake}/zones/{zone}" }; // Type of zone. enum Type { // Zone type not specified. TYPE_UNSPECIFIED = 0; // A zone that contains data that needs further processing before it is // considered generally ready for consumption and analytics workloads. RAW = 1; // A zone that contains data that is considered to be ready for broader // consumption and analytics workloads. Curated structured data stored in // Cloud Storage must conform to certain file formats (parquet, avro and // orc) and organized in a hive-compatible directory layout. CURATED = 2; } // Settings for resources attached as assets within a zone. message ResourceSpec { // Location type of the resources attached to a zone. enum LocationType { // Unspecified location type. LOCATION_TYPE_UNSPECIFIED = 0; // Resources that are associated with a single region. SINGLE_REGION = 1; // Resources that are associated with a multi-region location. MULTI_REGION = 2; } // Required. Immutable. The location type of the resources that are allowed to be attached to the // assets within this zone. LocationType location_type = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.field_behavior) = IMMUTABLE ]; } // Settings to manage the metadata discovery and publishing in a zone. message DiscoverySpec { // Describe CSV and similar semi-structured data formats. message CsvOptions { // Optional. The number of rows to interpret as header rows that should be skipped // when reading data rows. int32 header_rows = 1 [(google.api.field_behavior) = OPTIONAL]; // Optional. The delimiter being used to separate values. This defaults to ','. string delimiter = 2 [(google.api.field_behavior) = OPTIONAL]; // Optional. The character encoding of the data. The default is UTF-8. string encoding = 3 [(google.api.field_behavior) = OPTIONAL]; // Optional. Whether to disable the inference of data type for CSV data. // If true, all columns will be registered as strings. bool disable_type_inference = 4 [(google.api.field_behavior) = OPTIONAL]; } // Describe JSON data format. message JsonOptions { // Optional. The character encoding of the data. The default is UTF-8. string encoding = 1 [(google.api.field_behavior) = OPTIONAL]; // Optional. Whether to disable the inference of data type for Json data. // If true, all columns will be registered as their primitive types // (strings, number or boolean). bool disable_type_inference = 2 [(google.api.field_behavior) = OPTIONAL]; } // Required. Whether discovery is enabled. bool enabled = 1 [(google.api.field_behavior) = REQUIRED]; // Optional. The list of patterns to apply for selecting data to include during // discovery if only a subset of the data should considered. For Cloud // Storage bucket assets, these are interpreted as glob patterns used to // match object names. For BigQuery dataset assets, these are // interpreted as patterns to match table names. repeated string include_patterns = 2 [(google.api.field_behavior) = OPTIONAL]; // Optional. The list of patterns to apply for selecting data to exclude during // discovery. For Cloud Storage bucket assets, these are interpreted as // glob patterns used to match object names. For BigQuery dataset assets, // these are interpreted as patterns to match table names. repeated string exclude_patterns = 3 [(google.api.field_behavior) = OPTIONAL]; // Optional. Configuration for CSV data. CsvOptions csv_options = 4 [(google.api.field_behavior) = OPTIONAL]; // Optional. Configuration for Json data. JsonOptions json_options = 5 [(google.api.field_behavior) = OPTIONAL]; // Determines when discovery is triggered. oneof trigger { // Optional. Cron schedule (https://en.wikipedia.org/wiki/Cron) for running // discovery periodically. Successive discovery runs must be scheduled at // least 60 minutes apart. // The default value is to run discovery every 60 minutes. // To explicitly set a timezone to the cron tab, apply a prefix in the // cron tab: "CRON_TZ=${IANA_TIME_ZONE}" or TZ=${IANA_TIME_ZONE}". // The ${IANA_TIME_ZONE} may only be a valid string from IANA time zone // database. For example, "CRON_TZ=America/New_York 1 * * * *", or // "TZ=America/New_York 1 * * * *". string schedule = 10 [(google.api.field_behavior) = OPTIONAL]; } } // Output only. The relative resource name of the zone, of the form: // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}`. string name = 1 [ (google.api.field_behavior) = OUTPUT_ONLY, (google.api.resource_reference) = { type: "dataplex.googleapis.com/Zone" } ]; // Optional. User friendly display name. string display_name = 2 [(google.api.field_behavior) = OPTIONAL]; // Output only. System generated globally unique ID for the zone. This ID will be // different if the zone is deleted and re-created with the same name. string uid = 3 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. The time when the zone was created. google.protobuf.Timestamp create_time = 4 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. The time when the zone was last updated. google.protobuf.Timestamp update_time = 5 [(google.api.field_behavior) = OUTPUT_ONLY]; // Optional. User defined labels for the zone. map labels = 6 [(google.api.field_behavior) = OPTIONAL]; // Optional. Description of the zone. string description = 7 [(google.api.field_behavior) = OPTIONAL]; // Output only. Current state of the zone. State state = 8 [(google.api.field_behavior) = OUTPUT_ONLY]; // Required. Immutable. The type of the zone. Type type = 9 [ (google.api.field_behavior) = REQUIRED, (google.api.field_behavior) = IMMUTABLE ]; // Optional. Specification of the discovery feature applied to data in this zone. DiscoverySpec discovery_spec = 103 [(google.api.field_behavior) = OPTIONAL]; // Required. Specification of the resources that are referenced by the assets within // this zone. ResourceSpec resource_spec = 104 [(google.api.field_behavior) = REQUIRED]; // Output only. Aggregated status of the underlying assets of the zone. AssetStatus asset_status = 105 [(google.api.field_behavior) = OUTPUT_ONLY]; } // Action represents an issue requiring administrator action for resolution. message Action { option (google.api.resource) = { type: "dataplex.googleapis.com/Action" pattern: "projects/{project}/locations/{location}/lakes/{lake}/actions/{action}" pattern: "projects/{project}/locations/{location}/lakes/{lake}/zones/{zone}/actions/{action}" pattern: "projects/{project}/locations/{location}/lakes/{lake}/zones/{zone}/assets/{asset}/actions/{action}" }; // The category of issues. enum Category { // Unspecified category. CATEGORY_UNSPECIFIED = 0; // Resource management related issues. RESOURCE_MANAGEMENT = 1; // Security policy related issues. SECURITY_POLICY = 2; // Data and discovery related issues. DATA_DISCOVERY = 3; } // Action details for resource references in assets that cannot be located. message MissingResource { } // Action details for unauthorized resource issues raised to indicate that the // service account associated with the lake instance is not authorized to // access or manage the resource associated with an asset. message UnauthorizedResource { } // Failed to apply security policy to the managed resource(s) under a // lake, zone or an asset. For a lake or zone resource, one or more underlying // assets has a failure applying security policy to the associated managed // resource. message FailedSecurityPolicyApply { // Resource name of one of the assets with failing security policy // application. Populated for a lake or zone resource only. string asset = 1; } // Action details for invalid or unsupported data files detected by discovery. message InvalidDataFormat { // The list of data locations sampled and used for format/schema // inference. repeated string sampled_data_locations = 1; // The expected data format of the entity. string expected_format = 2; // The new unexpected data format within the entity. string new_format = 3; } // Action details for incompatible schemas detected by discovery. message IncompatibleDataSchema { // Whether the action relates to a schema that is incompatible or modified. enum SchemaChange { // Schema change unspecified. SCHEMA_CHANGE_UNSPECIFIED = 0; // Newly discovered schema is incompatible with existing schema. INCOMPATIBLE = 1; // Newly discovered schema has changed from existing schema for data in a // curated zone. MODIFIED = 2; } // The name of the table containing invalid data. string table = 1; // The existing and expected schema of the table. The schema is provided as // a JSON formatted structure listing columns and data types. string existing_schema = 2; // The new and incompatible schema within the table. The schema is provided // as a JSON formatted structured listing columns and data types. string new_schema = 3; // The list of data locations sampled and used for format/schema // inference. repeated string sampled_data_locations = 4; // Whether the action relates to a schema that is incompatible or modified. SchemaChange schema_change = 5; } // Action details for invalid or unsupported partitions detected by discovery. message InvalidDataPartition { // The expected partition structure. enum PartitionStructure { // PartitionStructure unspecified. PARTITION_STRUCTURE_UNSPECIFIED = 0; // Consistent hive-style partition definition (both raw and curated zone). CONSISTENT_KEYS = 1; // Hive style partition definition (curated zone only). HIVE_STYLE_KEYS = 2; } // The issue type of InvalidDataPartition. PartitionStructure expected_structure = 1; } // Action details for absence of data detected by discovery. message MissingData { } // Action details for invalid data arrangement. message InvalidDataOrganization { } // The category of issue associated with the action. Category category = 1; // Detailed description of the issue requiring action. string issue = 2; // The time that the issue was detected. google.protobuf.Timestamp detect_time = 4; // Output only. The relative resource name of the action, of the form: // `projects/{project}/locations/{location}/lakes/{lake}/actions/{action}` // `projects/{project}/locations/{location}/lakes/{lake}/zones/{zone}/actions/{action}` // `projects/{project}/locations/{location}/lakes/{lake}/zones/{zone}/assets/{asset}/actions/{action}`. string name = 5 [ (google.api.field_behavior) = OUTPUT_ONLY, (google.api.resource_reference) = { type: "dataplex.googleapis.com/Action" } ]; // Output only. The relative resource name of the lake, of the form: // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}`. string lake = 6 [ (google.api.field_behavior) = OUTPUT_ONLY, (google.api.resource_reference) = { type: "dataplex.googleapis.com/Lake" } ]; // Output only. The relative resource name of the zone, of the form: // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}`. string zone = 7 [ (google.api.field_behavior) = OUTPUT_ONLY, (google.api.resource_reference) = { type: "dataplex.googleapis.com/Zone" } ]; // Output only. The relative resource name of the asset, of the form: // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}/assets/{asset_id}`. string asset = 8 [ (google.api.field_behavior) = OUTPUT_ONLY, (google.api.resource_reference) = { type: "dataplex.googleapis.com/Asset" } ]; // The list of data locations associated with this action. Cloud Storage // locations are represented as URI paths(E.g. // `gs://bucket/table1/year=2020/month=Jan/`). BigQuery locations refer to // resource names(E.g. // `bigquery.googleapis.com/projects/project-id/datasets/dataset-id`). repeated string data_locations = 9; // Additional details about the action based on the action category. oneof details { // Details for issues related to invalid or unsupported data formats. InvalidDataFormat invalid_data_format = 10; // Details for issues related to incompatible schemas detected within data. IncompatibleDataSchema incompatible_data_schema = 11; // Details for issues related to invalid or unsupported data partition // structure. InvalidDataPartition invalid_data_partition = 12; // Details for issues related to absence of data within managed resources. MissingData missing_data = 13; // Details for issues related to absence of a managed resource. MissingResource missing_resource = 14; // Details for issues related to lack of permissions to access data // resources. UnauthorizedResource unauthorized_resource = 15; // Details for issues related to applying security policy. FailedSecurityPolicyApply failed_security_policy_apply = 21; // Details for issues related to invalid data arrangement. InvalidDataOrganization invalid_data_organization = 22; } } // An asset represents a cloud resource that is being managed within a lake as a // member of a zone. message Asset { option (google.api.resource) = { type: "dataplex.googleapis.com/Asset" pattern: "projects/{project}/locations/{location}/lakes/{lake}/zones/{zone}/assets/{asset}" }; // Security policy status of the asset. Data security policy, i.e., readers, // writers & owners, should be specified in the lake/zone/asset IAM policy. message SecurityStatus { // The state of the security policy. enum State { // State unspecified. STATE_UNSPECIFIED = 0; // Security policy has been successfully applied to the attached resource. READY = 1; // Security policy is in the process of being applied to the attached // resource. APPLYING = 2; // Security policy could not be applied to the attached resource due to // errors. ERROR = 3; } // The current state of the security policy applied to the attached // resource. State state = 1; // Additional information about the current state. string message = 2; // Last update time of the status. google.protobuf.Timestamp update_time = 3; } // Settings to manage the metadata discovery and publishing for an asset. message DiscoverySpec { // Describe CSV and similar semi-structured data formats. message CsvOptions { // Optional. The number of rows to interpret as header rows that should be skipped // when reading data rows. int32 header_rows = 1 [(google.api.field_behavior) = OPTIONAL]; // Optional. The delimiter being used to separate values. This defaults to ','. string delimiter = 2 [(google.api.field_behavior) = OPTIONAL]; // Optional. The character encoding of the data. The default is UTF-8. string encoding = 3 [(google.api.field_behavior) = OPTIONAL]; // Optional. Whether to disable the inference of data type for CSV data. // If true, all columns will be registered as strings. bool disable_type_inference = 4 [(google.api.field_behavior) = OPTIONAL]; } // Describe JSON data format. message JsonOptions { // Optional. The character encoding of the data. The default is UTF-8. string encoding = 1 [(google.api.field_behavior) = OPTIONAL]; // Optional. Whether to disable the inference of data type for Json data. // If true, all columns will be registered as their primitive types // (strings, number or boolean). bool disable_type_inference = 2 [(google.api.field_behavior) = OPTIONAL]; } // Optional. Whether discovery is enabled. bool enabled = 1 [(google.api.field_behavior) = OPTIONAL]; // Optional. The list of patterns to apply for selecting data to include during // discovery if only a subset of the data should considered. For Cloud // Storage bucket assets, these are interpreted as glob patterns used to // match object names. For BigQuery dataset assets, these are interpreted as // patterns to match table names. repeated string include_patterns = 2 [(google.api.field_behavior) = OPTIONAL]; // Optional. The list of patterns to apply for selecting data to exclude during // discovery. For Cloud Storage bucket assets, these are interpreted as // glob patterns used to match object names. For BigQuery dataset assets, // these are interpreted as patterns to match table names. repeated string exclude_patterns = 3 [(google.api.field_behavior) = OPTIONAL]; // Optional. Configuration for CSV data. CsvOptions csv_options = 4 [(google.api.field_behavior) = OPTIONAL]; // Optional. Configuration for Json data. JsonOptions json_options = 5 [(google.api.field_behavior) = OPTIONAL]; // Determines when discovery is triggered. oneof trigger { // Optional. Cron schedule (https://en.wikipedia.org/wiki/Cron) for running // discovery periodically. Successive discovery runs must be scheduled at // least 60 minutes apart. // The default value is to run discovery every 60 minutes. // To explicitly set a timezone to the cron tab, apply a prefix in the // cron tab: "CRON_TZ=${IANA_TIME_ZONE}" or TZ=${IANA_TIME_ZONE}". // The ${IANA_TIME_ZONE} may only be a valid string from IANA time zone // database. For example, "CRON_TZ=America/New_York 1 * * * *", or // "TZ=America/New_York 1 * * * *". string schedule = 10 [(google.api.field_behavior) = OPTIONAL]; } } // Identifies the cloud resource that is referenced by this asset. message ResourceSpec { // Type of resource. enum Type { // Type not specified. TYPE_UNSPECIFIED = 0; // Cloud Storage bucket. STORAGE_BUCKET = 1; // BigQuery dataset. BIGQUERY_DATASET = 2; } // Immutable. Relative name of the cloud resource that contains the data that is // being managed within a lake. For example: // `projects/{project_number}/buckets/{bucket_id}` // `projects/{project_number}/datasets/{dataset_id}` string name = 1 [(google.api.field_behavior) = IMMUTABLE]; // Required. Immutable. Type of resource. Type type = 2 [ (google.api.field_behavior) = REQUIRED, (google.api.field_behavior) = IMMUTABLE ]; } // Status of the resource referenced by an asset. message ResourceStatus { // The state of a resource. enum State { // State unspecified. STATE_UNSPECIFIED = 0; // Resource does not have any errors. READY = 1; // Resource has errors. ERROR = 2; } // The current state of the managed resource. State state = 1; // Additional information about the current state. string message = 2; // Last update time of the status. google.protobuf.Timestamp update_time = 3; } // Status of discovery for an asset. message DiscoveryStatus { // Current state of discovery. enum State { // State is unspecified. STATE_UNSPECIFIED = 0; // Discovery for the asset is scheduled. SCHEDULED = 1; // Discovery for the asset is running. IN_PROGRESS = 2; // Discovery for the asset is currently paused (e.g. due to a lack // of available resources). It will be automatically resumed. PAUSED = 3; // Discovery for the asset is disabled. DISABLED = 5; } // The aggregated data statistics for the asset reported by discovery. message Stats { // The count of data items within the referenced resource. int64 data_items = 1; // The number of stored data bytes within the referenced resource. int64 data_size = 2; // The count of table entities within the referenced resource. int64 tables = 3; // The count of fileset entities within the referenced resource. int64 filesets = 4; } // The current status of the discovery feature. State state = 1; // Additional information about the current state. string message = 2; // Last update time of the status. google.protobuf.Timestamp update_time = 3; // The start time of the last discovery run. google.protobuf.Timestamp last_run_time = 4; // Data Stats of the asset reported by discovery. Stats stats = 6; // The duration of the last discovery run. google.protobuf.Duration last_run_duration = 7; } // Output only. The relative resource name of the asset, of the form: // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}/assets/{asset_id}`. string name = 1 [ (google.api.field_behavior) = OUTPUT_ONLY, (google.api.resource_reference) = { type: "dataplex.googleapis.com/Asset" } ]; // Optional. User friendly display name. string display_name = 2 [(google.api.field_behavior) = OPTIONAL]; // Output only. System generated globally unique ID for the asset. This ID will be // different if the asset is deleted and re-created with the same name. string uid = 3 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. The time when the asset was created. google.protobuf.Timestamp create_time = 4 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. The time when the asset was last updated. google.protobuf.Timestamp update_time = 5 [(google.api.field_behavior) = OUTPUT_ONLY]; // Optional. User defined labels for the asset. map labels = 6 [(google.api.field_behavior) = OPTIONAL]; // Optional. Description of the asset. string description = 7 [(google.api.field_behavior) = OPTIONAL]; // Output only. Current state of the asset. State state = 8 [(google.api.field_behavior) = OUTPUT_ONLY]; // Required. Specification of the resource that is referenced by this asset. ResourceSpec resource_spec = 100 [(google.api.field_behavior) = REQUIRED]; // Output only. Status of the resource referenced by this asset. ResourceStatus resource_status = 101 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Status of the security policy applied to resource referenced by this asset. SecurityStatus security_status = 103 [(google.api.field_behavior) = OUTPUT_ONLY]; // Optional. Specification of the discovery feature applied to data referenced by this // asset. // When this spec is left unset, the asset will use the spec set on the parent // zone. DiscoverySpec discovery_spec = 106 [(google.api.field_behavior) = OPTIONAL]; // Output only. Status of the discovery feature applied to data referenced by this asset. DiscoveryStatus discovery_status = 107 [(google.api.field_behavior) = OUTPUT_ONLY]; } // State of a resource. enum State { // State is not specified. STATE_UNSPECIFIED = 0; // Resource is active, i.e., ready to use. ACTIVE = 1; // Resource is under creation. CREATING = 2; // Resource is under deletion. DELETING = 3; // Resource is active but has unresolved actions. ACTION_REQUIRED = 4; }