// Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto3"; package google.cloud.dataplex.v1; import "google/api/annotations.proto"; import "google/api/client.proto"; import "google/api/field_behavior.proto"; import "google/api/resource.proto"; import "google/cloud/dataplex/v1/data_profile.proto"; import "google/cloud/dataplex/v1/data_quality.proto"; import "google/cloud/dataplex/v1/processing.proto"; import "google/cloud/dataplex/v1/resources.proto"; import "google/cloud/dataplex/v1/service.proto"; import "google/longrunning/operations.proto"; import "google/protobuf/empty.proto"; import "google/protobuf/field_mask.proto"; import "google/protobuf/timestamp.proto"; option go_package = "cloud.google.com/go/dataplex/apiv1/dataplexpb;dataplexpb"; option java_multiple_files = true; option java_outer_classname = "DataScansProto"; option java_package = "com.google.cloud.dataplex.v1"; // DataScanService manages DataScan resources which can be configured to run // various types of data scanning workload and generate enriched metadata (e.g. // Data Profile, Data Quality) for the data source. service DataScanService { option (google.api.default_host) = "dataplex.googleapis.com"; option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform"; // Creates a DataScan resource. rpc CreateDataScan(CreateDataScanRequest) returns (google.longrunning.Operation) { option (google.api.http) = { post: "/v1/{parent=projects/*/locations/*}/dataScans" body: "data_scan" }; option (google.api.method_signature) = "parent,data_scan,data_scan_id"; option (google.longrunning.operation_info) = { response_type: "DataScan" metadata_type: "OperationMetadata" }; } // Updates a DataScan resource. rpc UpdateDataScan(UpdateDataScanRequest) returns (google.longrunning.Operation) { option (google.api.http) = { patch: "/v1/{data_scan.name=projects/*/locations/*/dataScans/*}" body: "data_scan" }; option (google.api.method_signature) = "data_scan,update_mask"; option (google.longrunning.operation_info) = { response_type: "DataScan" metadata_type: "OperationMetadata" }; } // Deletes a DataScan resource. rpc DeleteDataScan(DeleteDataScanRequest) returns (google.longrunning.Operation) { option (google.api.http) = { delete: "/v1/{name=projects/*/locations/*/dataScans/*}" }; option (google.api.method_signature) = "name"; option (google.longrunning.operation_info) = { response_type: "google.protobuf.Empty" metadata_type: "OperationMetadata" }; } // Gets a DataScan resource. rpc GetDataScan(GetDataScanRequest) returns (DataScan) { option (google.api.http) = { get: "/v1/{name=projects/*/locations/*/dataScans/*}" }; option (google.api.method_signature) = "name"; } // Lists DataScans. rpc ListDataScans(ListDataScansRequest) returns (ListDataScansResponse) { option (google.api.http) = { get: "/v1/{parent=projects/*/locations/*}/dataScans" }; option (google.api.method_signature) = "parent"; } // Runs an on-demand execution of a DataScan rpc RunDataScan(RunDataScanRequest) returns (RunDataScanResponse) { option (google.api.http) = { post: "/v1/{name=projects/*/locations/*/dataScans/*}:run" body: "*" }; option (google.api.method_signature) = "name"; } // Gets a DataScanJob resource. rpc GetDataScanJob(GetDataScanJobRequest) returns (DataScanJob) { option (google.api.http) = { get: "/v1/{name=projects/*/locations/*/dataScans/*/jobs/*}" }; option (google.api.method_signature) = "name"; } // Lists DataScanJobs under the given DataScan. rpc ListDataScanJobs(ListDataScanJobsRequest) returns (ListDataScanJobsResponse) { option (google.api.http) = { get: "/v1/{parent=projects/*/locations/*/dataScans/*}/jobs" }; option (google.api.method_signature) = "parent"; } // Generates recommended data quality rules based on the results of a data // profiling scan. // // Use the recommendations to build rules for a data quality scan. rpc GenerateDataQualityRules(GenerateDataQualityRulesRequest) returns (GenerateDataQualityRulesResponse) { option (google.api.http) = { post: "/v1/{name=projects/*/locations/*/dataScans/*}:generateDataQualityRules" body: "*" additional_bindings { post: "/v1/{name=projects/*/locations/*/dataScans/*/jobs/*}:generateDataQualityRules" body: "*" } }; option (google.api.method_signature) = "name"; } } // Create dataScan request. message CreateDataScanRequest { // Required. The resource name of the parent location: // `projects/{project}/locations/{location_id}` // where `project` refers to a *project_id* or *project_number* and // `location_id` refers to a GCP region. string parent = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "locations.googleapis.com/Location" } ]; // Required. DataScan resource. DataScan data_scan = 2 [(google.api.field_behavior) = REQUIRED]; // Required. DataScan identifier. // // * Must contain only lowercase letters, numbers and hyphens. // * Must start with a letter. // * Must end with a number or a letter. // * Must be between 1-63 characters. // * Must be unique within the customer project / location. string data_scan_id = 3 [(google.api.field_behavior) = REQUIRED]; // Optional. Only validate the request, but do not perform mutations. // The default is `false`. bool validate_only = 4 [(google.api.field_behavior) = OPTIONAL]; } // Update dataScan request. message UpdateDataScanRequest { // Required. DataScan resource to be updated. // // Only fields specified in `update_mask` are updated. DataScan data_scan = 1 [(google.api.field_behavior) = REQUIRED]; // Required. Mask of fields to update. google.protobuf.FieldMask update_mask = 2 [(google.api.field_behavior) = REQUIRED]; // Optional. Only validate the request, but do not perform mutations. // The default is `false`. bool validate_only = 3 [(google.api.field_behavior) = OPTIONAL]; } // Delete dataScan request. message DeleteDataScanRequest { // Required. The resource name of the dataScan: // `projects/{project}/locations/{location_id}/dataScans/{data_scan_id}` // where `project` refers to a *project_id* or *project_number* and // `location_id` refers to a GCP region. string name = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "dataplex.googleapis.com/DataScan" } ]; } // Get dataScan request. message GetDataScanRequest { // DataScan view options. enum DataScanView { // The API will default to the `BASIC` view. DATA_SCAN_VIEW_UNSPECIFIED = 0; // Basic view that does not include *spec* and *result*. BASIC = 1; // Include everything. FULL = 10; } // Required. The resource name of the dataScan: // `projects/{project}/locations/{location_id}/dataScans/{data_scan_id}` // where `project` refers to a *project_id* or *project_number* and // `location_id` refers to a GCP region. string name = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "dataplex.googleapis.com/DataScan" } ]; // Optional. Select the DataScan view to return. Defaults to `BASIC`. DataScanView view = 2 [(google.api.field_behavior) = OPTIONAL]; } // List dataScans request. message ListDataScansRequest { // Required. The resource name of the parent location: // `projects/{project}/locations/{location_id}` // where `project` refers to a *project_id* or *project_number* and // `location_id` refers to a GCP region. string parent = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "locations.googleapis.com/Location" } ]; // Optional. Maximum number of dataScans to return. The service may return // fewer than this value. If unspecified, at most 500 scans will be returned. // The maximum value is 1000; values above 1000 will be coerced to 1000. int32 page_size = 2 [(google.api.field_behavior) = OPTIONAL]; // Optional. Page token received from a previous `ListDataScans` call. Provide // this to retrieve the subsequent page. When paginating, all other parameters // provided to `ListDataScans` must match the call that provided the // page token. string page_token = 3 [(google.api.field_behavior) = OPTIONAL]; // Optional. Filter request. string filter = 4 [(google.api.field_behavior) = OPTIONAL]; // Optional. Order by fields (`name` or `create_time`) for the result. // If not specified, the ordering is undefined. string order_by = 5 [(google.api.field_behavior) = OPTIONAL]; } // List dataScans response. message ListDataScansResponse { // DataScans (`BASIC` view only) under the given parent location. repeated DataScan data_scans = 1; // Token to retrieve the next page of results, or empty if there are no more // results in the list. string next_page_token = 2; // Locations that could not be reached. repeated string unreachable = 3; } // Run DataScan Request message RunDataScanRequest { // Required. The resource name of the DataScan: // `projects/{project}/locations/{location_id}/dataScans/{data_scan_id}`. // where `project` refers to a *project_id* or *project_number* and // `location_id` refers to a GCP region. // // Only **OnDemand** data scans are allowed. string name = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "dataplex.googleapis.com/DataScan" } ]; } // Run DataScan Response. message RunDataScanResponse { // DataScanJob created by RunDataScan request. DataScanJob job = 1; } // Get DataScanJob request. message GetDataScanJobRequest { // DataScanJob view options. enum DataScanJobView { // The API will default to the `BASIC` view. DATA_SCAN_JOB_VIEW_UNSPECIFIED = 0; // Basic view that does not include *spec* and *result*. BASIC = 1; // Include everything. FULL = 10; } // Required. The resource name of the DataScanJob: // `projects/{project}/locations/{location_id}/dataScans/{data_scan_id}/jobs/{data_scan_job_id}` // where `project` refers to a *project_id* or *project_number* and // `location_id` refers to a GCP region. string name = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "dataplex.googleapis.com/DataScanJob" } ]; // Optional. Select the DataScanJob view to return. Defaults to `BASIC`. DataScanJobView view = 2 [(google.api.field_behavior) = OPTIONAL]; } // List DataScanJobs request. message ListDataScanJobsRequest { // Required. The resource name of the parent environment: // `projects/{project}/locations/{location_id}/dataScans/{data_scan_id}` // where `project` refers to a *project_id* or *project_number* and // `location_id` refers to a GCP region. string parent = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "dataplex.googleapis.com/DataScan" } ]; // Optional. Maximum number of DataScanJobs to return. The service may return // fewer than this value. If unspecified, at most 10 DataScanJobs will be // returned. The maximum value is 1000; values above 1000 will be coerced to // 1000. int32 page_size = 2 [(google.api.field_behavior) = OPTIONAL]; // Optional. Page token received from a previous `ListDataScanJobs` call. // Provide this to retrieve the subsequent page. When paginating, all other // parameters provided to `ListDataScanJobs` must match the call that provided // the page token. string page_token = 3 [(google.api.field_behavior) = OPTIONAL]; // Optional. An expression for filtering the results of the ListDataScanJobs // request. // // If unspecified, all datascan jobs will be returned. Multiple filters can be // applied (with `AND`, `OR` logical operators). Filters are case-sensitive. // // Allowed fields are: // // - `start_time` // - `end_time` // // `start_time` and `end_time` expect RFC-3339 formatted strings (e.g. // 2018-10-08T18:30:00-07:00). // // For instance, 'start_time > 2018-10-08T00:00:00.123456789Z AND end_time < // 2018-10-09T00:00:00.123456789Z' limits results to DataScanJobs between // specified start and end times. string filter = 4 [(google.api.field_behavior) = OPTIONAL]; } // List DataScanJobs response. message ListDataScanJobsResponse { // DataScanJobs (`BASIC` view only) under a given dataScan. repeated DataScanJob data_scan_jobs = 1; // Token to retrieve the next page of results, or empty if there are no more // results in the list. string next_page_token = 2; } // Request details for generating data quality rule recommendations. message GenerateDataQualityRulesRequest { // Required. The name must be one of the following: // // * The name of a data scan with at least one successful, completed data // profiling job // * The name of a successful, completed data profiling job (a data scan job // where the job type is data profiling) string name = 1 [(google.api.field_behavior) = REQUIRED]; } // Response details for data quality rule recommendations. message GenerateDataQualityRulesResponse { // The data quality rules that Dataplex generates based on the results // of a data profiling scan. repeated DataQualityRule rule = 1; } // Represents a user-visible job which provides the insights for the related // data source. // // For example: // // * Data Quality: generates queries based on the rules and runs against the // data to get data quality check results. // * Data Profile: analyzes the data in table(s) and generates insights about // the structure, content and relationships (such as null percent, // cardinality, min/max/mean, etc). message DataScan { option (google.api.resource) = { type: "dataplex.googleapis.com/DataScan" pattern: "projects/{project}/locations/{location}/dataScans/{dataScan}" }; // DataScan execution settings. message ExecutionSpec { // Optional. Spec related to how often and when a scan should be triggered. // // If not specified, the default is `OnDemand`, which means the scan will // not run until the user calls `RunDataScan` API. Trigger trigger = 1 [(google.api.field_behavior) = OPTIONAL]; // Spec related to incremental scan of the data // // When an option is selected for incremental scan, it cannot be unset or // changed. If not specified, a data scan will run for all data in the // table. oneof incremental { // Immutable. The unnested field (of type *Date* or *Timestamp*) that // contains values which monotonically increase over time. // // If not specified, a data scan will run for all data in the table. string field = 100 [(google.api.field_behavior) = IMMUTABLE]; } } // Status of the data scan execution. message ExecutionStatus { // The time when the latest DataScanJob started. google.protobuf.Timestamp latest_job_start_time = 4; // The time when the latest DataScanJob ended. google.protobuf.Timestamp latest_job_end_time = 5; // Optional. The time when the DataScanJob execution was created. google.protobuf.Timestamp latest_job_create_time = 6 [(google.api.field_behavior) = OPTIONAL]; } // Output only. The relative resource name of the scan, of the form: // `projects/{project}/locations/{location_id}/dataScans/{datascan_id}`, // where `project` refers to a *project_id* or *project_number* and // `location_id` refers to a GCP region. string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. System generated globally unique ID for the scan. This ID will // be different if the scan is deleted and re-created with the same name. string uid = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; // Optional. Description of the scan. // // * Must be between 1-1024 characters. string description = 3 [(google.api.field_behavior) = OPTIONAL]; // Optional. User friendly display name. // // * Must be between 1-256 characters. string display_name = 4 [(google.api.field_behavior) = OPTIONAL]; // Optional. User-defined labels for the scan. map labels = 5 [(google.api.field_behavior) = OPTIONAL]; // Output only. Current state of the DataScan. State state = 6 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. The time when the scan was created. google.protobuf.Timestamp create_time = 7 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. The time when the scan was last updated. google.protobuf.Timestamp update_time = 8 [(google.api.field_behavior) = OUTPUT_ONLY]; // Required. The data source for DataScan. DataSource data = 9 [(google.api.field_behavior) = REQUIRED]; // Optional. DataScan execution settings. // // If not specified, the fields in it will use their default values. ExecutionSpec execution_spec = 10 [(google.api.field_behavior) = OPTIONAL]; // Output only. Status of the data scan execution. ExecutionStatus execution_status = 11 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. The type of DataScan. DataScanType type = 12 [(google.api.field_behavior) = OUTPUT_ONLY]; // Data Scan related setting. // It is required and immutable which means once data_quality_spec is set, it // cannot be changed to data_profile_spec. oneof spec { // DataQualityScan related setting. DataQualitySpec data_quality_spec = 100; // DataProfileScan related setting. DataProfileSpec data_profile_spec = 101; } // The result of the data scan. oneof result { // Output only. The result of the data quality scan. DataQualityResult data_quality_result = 200 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. The result of the data profile scan. DataProfileResult data_profile_result = 201 [(google.api.field_behavior) = OUTPUT_ONLY]; } } // A DataScanJob represents an instance of DataScan execution. message DataScanJob { option (google.api.resource) = { type: "dataplex.googleapis.com/DataScanJob" pattern: "projects/{project}/locations/{location}/dataScans/{dataScan}/jobs/{job}" }; // Execution state for the DataScanJob. enum State { // The DataScanJob state is unspecified. STATE_UNSPECIFIED = 0; // The DataScanJob is running. RUNNING = 1; // The DataScanJob is canceling. CANCELING = 2; // The DataScanJob cancellation was successful. CANCELLED = 3; // The DataScanJob completed successfully. SUCCEEDED = 4; // The DataScanJob is no longer running due to an error. FAILED = 5; // The DataScanJob has been created but not started to run yet. PENDING = 7; } // Output only. The relative resource name of the DataScanJob, of the form: // `projects/{project}/locations/{location_id}/dataScans/{datascan_id}/jobs/{job_id}`, // where `project` refers to a *project_id* or *project_number* and // `location_id` refers to a GCP region. string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. System generated globally unique ID for the DataScanJob. string uid = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. The time when the DataScanJob was started. google.protobuf.Timestamp start_time = 3 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. The time when the DataScanJob ended. google.protobuf.Timestamp end_time = 4 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Execution state for the DataScanJob. State state = 5 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Additional information about the current state. string message = 6 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. The type of the parent DataScan. DataScanType type = 7 [(google.api.field_behavior) = OUTPUT_ONLY]; // Data Scan related setting. oneof spec { // Output only. DataQualityScan related setting. DataQualitySpec data_quality_spec = 100 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. DataProfileScan related setting. DataProfileSpec data_profile_spec = 101 [(google.api.field_behavior) = OUTPUT_ONLY]; } // The result of the data scan. oneof result { // Output only. The result of the data quality scan. DataQualityResult data_quality_result = 200 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. The result of the data profile scan. DataProfileResult data_profile_result = 201 [(google.api.field_behavior) = OUTPUT_ONLY]; } } // The type of DataScan. enum DataScanType { // The DataScan type is unspecified. DATA_SCAN_TYPE_UNSPECIFIED = 0; // Data Quality scan. DATA_QUALITY = 1; // Data Profile scan. DATA_PROFILE = 2; }