// Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto3"; package google.privacy.dlp.v2; import "google/api/resource.proto"; import "google/protobuf/timestamp.proto"; option csharp_namespace = "Google.Cloud.Dlp.V2"; option go_package = "cloud.google.com/go/dlp/apiv2/dlppb;dlppb"; option java_multiple_files = true; option java_outer_classname = "DlpStorage"; option java_package = "com.google.privacy.dlp.v2"; option php_namespace = "Google\\Cloud\\Dlp\\V2"; option ruby_package = "Google::Cloud::Dlp::V2"; // Type of information detected by the API. message InfoType { // Name of the information type. Either a name of your choosing when // creating a CustomInfoType, or one of the names listed // at // https://cloud.google.com/sensitive-data-protection/docs/infotypes-reference // when specifying a built-in type. When sending Cloud DLP results to Data // Catalog, infoType names should conform to the pattern // `[A-Za-z0-9$_-]{1,64}`. string name = 1; // Optional version name for this InfoType. string version = 2; // Optional custom sensitivity for this InfoType. // This only applies to data profiling. SensitivityScore sensitivity_score = 3; } // Score is calculated from of all elements in the data profile. // A higher level means the data is more sensitive. message SensitivityScore { // Various sensitivity score levels for resources. enum SensitivityScoreLevel { // Unused. SENSITIVITY_SCORE_UNSPECIFIED = 0; // No sensitive information detected. The resource isn't publicly // accessible. SENSITIVITY_LOW = 10; // Unable to determine sensitivity. SENSITIVITY_UNKNOWN = 12; // Medium risk. Contains personally identifiable information (PII), // potentially sensitive data, or fields with free-text data that are at a // higher risk of having intermittent sensitive data. Consider limiting // access. SENSITIVITY_MODERATE = 20; // High risk. Sensitive personally identifiable information (SPII) can be // present. Exfiltration of data can lead to user data loss. // Re-identification of users might be possible. Consider limiting usage and // or removing SPII. SENSITIVITY_HIGH = 30; } // The sensitivity score applied to the resource. SensitivityScoreLevel score = 1; } // Coarse-grained confidence level of how well a particular finding // satisfies the criteria to match a particular infoType. // // Likelihood is calculated based on the number of signals a // finding has that implies that the finding matches the infoType. For // example, a string that has an '@' and a '.com' is more likely to be a // match for an email address than a string that only has an '@'. // // In general, the highest likelihood level has the strongest signals that // indicate a match. That is, a finding with a high likelihood has a low chance // of being a false positive. // // For more information about each likelihood level // and how likelihood works, see [Match // likelihood](https://cloud.google.com/sensitive-data-protection/docs/likelihood). enum Likelihood { // Default value; same as POSSIBLE. LIKELIHOOD_UNSPECIFIED = 0; // Highest chance of a false positive. VERY_UNLIKELY = 1; // High chance of a false positive. UNLIKELY = 2; // Some matching signals. The default value. POSSIBLE = 3; // Low chance of a false positive. LIKELY = 4; // Confidence level is high. Lowest chance of a false positive. VERY_LIKELY = 5; } // A reference to a StoredInfoType to use with scanning. message StoredType { // Resource name of the requested `StoredInfoType`, for example // `organizations/433245324/storedInfoTypes/432452342` or // `projects/project-id/storedInfoTypes/432452342`. string name = 1; // Timestamp indicating when the version of the `StoredInfoType` used for // inspection was created. Output-only field, populated by the system. google.protobuf.Timestamp create_time = 2; } // Custom information type provided by the user. Used to find domain-specific // sensitive information configurable to the data in question. message CustomInfoType { // Custom information type based on a dictionary of words or phrases. This can // be used to match sensitive information specific to the data, such as a list // of employee IDs or job titles. // // Dictionary words are case-insensitive and all characters other than letters // and digits in the unicode [Basic Multilingual // Plane](https://en.wikipedia.org/wiki/Plane_%28Unicode%29#Basic_Multilingual_Plane) // will be replaced with whitespace when scanning for matches, so the // dictionary phrase "Sam Johnson" will match all three phrases "sam johnson", // "Sam, Johnson", and "Sam (Johnson)". Additionally, the characters // surrounding any match must be of a different type than the adjacent // characters within the word, so letters must be next to non-letters and // digits next to non-digits. For example, the dictionary word "jen" will // match the first three letters of the text "jen123" but will return no // matches for "jennifer". // // Dictionary words containing a large number of characters that are not // letters or digits may result in unexpected findings because such characters // are treated as whitespace. The // [limits](https://cloud.google.com/sensitive-data-protection/limits) page // contains details about the size limits of dictionaries. For dictionaries // that do not fit within these constraints, consider using // `LargeCustomDictionaryConfig` in the `StoredInfoType` API. message Dictionary { // Message defining a list of words or phrases to search for in the data. message WordList { // Words or phrases defining the dictionary. The dictionary must contain // at least one phrase and every phrase must contain at least 2 characters // that are letters or digits. [required] repeated string words = 1; } // The potential places the data can be read from. oneof source { // List of words or phrases to search for. WordList word_list = 1; // Newline-delimited file of words in Cloud Storage. Only a single file // is accepted. CloudStoragePath cloud_storage_path = 3; } } // Message defining a custom regular expression. message Regex { // Pattern defining the regular expression. Its syntax // (https://github.com/google/re2/wiki/Syntax) can be found under the // google/re2 repository on GitHub. string pattern = 1; // The index of the submatch to extract as findings. When not // specified, the entire match is returned. No more than 3 may be included. repeated int32 group_indexes = 2; } // Message for detecting output from deidentification transformations // such as // [`CryptoReplaceFfxFpeConfig`](https://cloud.google.com/sensitive-data-protection/docs/reference/rest/v2/organizations.deidentifyTemplates#cryptoreplaceffxfpeconfig). // These types of transformations are // those that perform pseudonymization, thereby producing a "surrogate" as // output. This should be used in conjunction with a field on the // transformation such as `surrogate_info_type`. This CustomInfoType does // not support the use of `detection_rules`. message SurrogateType {} // Deprecated; use `InspectionRuleSet` instead. Rule for modifying a // `CustomInfoType` to alter behavior under certain circumstances, depending // on the specific details of the rule. Not supported for the `surrogate_type` // custom infoType. message DetectionRule { // Message for specifying a window around a finding to apply a detection // rule. message Proximity { // Number of characters before the finding to consider. For tabular data, // if you want to modify the likelihood of an entire column of findngs, // set this to 1. For more information, see // [Hotword example: Set the match likelihood of a table column] // (https://cloud.google.com/sensitive-data-protection/docs/creating-custom-infotypes-likelihood#match-column-values). int32 window_before = 1; // Number of characters after the finding to consider. int32 window_after = 2; } // Message for specifying an adjustment to the likelihood of a finding as // part of a detection rule. message LikelihoodAdjustment { // How the likelihood will be modified. oneof adjustment { // Set the likelihood of a finding to a fixed value. Likelihood fixed_likelihood = 1; // Increase or decrease the likelihood by the specified number of // levels. For example, if a finding would be `POSSIBLE` without the // detection rule and `relative_likelihood` is 1, then it is upgraded to // `LIKELY`, while a value of -1 would downgrade it to `UNLIKELY`. // Likelihood may never drop below `VERY_UNLIKELY` or exceed // `VERY_LIKELY`, so applying an adjustment of 1 followed by an // adjustment of -1 when base likelihood is `VERY_LIKELY` will result in // a final likelihood of `LIKELY`. int32 relative_likelihood = 2; } } // The rule that adjusts the likelihood of findings within a certain // proximity of hotwords. message HotwordRule { // Regular expression pattern defining what qualifies as a hotword. Regex hotword_regex = 1; // Range of characters within which the entire hotword must reside. // The total length of the window cannot exceed 1000 characters. // The finding itself will be included in the window, so that hotwords can // be used to match substrings of the finding itself. Suppose you // want Cloud DLP to promote the likelihood of the phone number // regex "\(\d{3}\) \d{3}-\d{4}" if the area code is known to be the // area code of a company's office. In this case, use the hotword regex // "\(xxx\)", where "xxx" is the area code in question. // // For tabular data, if you want to modify the likelihood of an entire // column of findngs, see // [Hotword example: Set the match likelihood of a table column] // (https://cloud.google.com/sensitive-data-protection/docs/creating-custom-infotypes-likelihood#match-column-values). Proximity proximity = 2; // Likelihood adjustment to apply to all matching findings. LikelihoodAdjustment likelihood_adjustment = 3; } // Type of hotword rule. oneof type { // Hotword-based detection rule. HotwordRule hotword_rule = 1; } } // Type of exclusion rule. enum ExclusionType { // A finding of this custom info type will not be excluded from results. EXCLUSION_TYPE_UNSPECIFIED = 0; // A finding of this custom info type will be excluded from final results, // but can still affect rule execution. EXCLUSION_TYPE_EXCLUDE = 1; } // CustomInfoType can either be a new infoType, or an extension of built-in // infoType, when the name matches one of existing infoTypes and that infoType // is specified in `InspectContent.info_types` field. Specifying the latter // adds findings to the one detected by the system. If built-in info type is // not specified in `InspectContent.info_types` list then the name is treated // as a custom info type. InfoType info_type = 1; // Likelihood to return for this CustomInfoType. This base value can be // altered by a detection rule if the finding meets the criteria specified by // the rule. Defaults to `VERY_LIKELY` if not specified. Likelihood likelihood = 6; // Type of custom detector. oneof type { // A list of phrases to detect as a CustomInfoType. Dictionary dictionary = 2; // Regular expression based CustomInfoType. Regex regex = 3; // Message for detecting output from deidentification transformations that // support reversing. SurrogateType surrogate_type = 4; // Load an existing `StoredInfoType` resource for use in // `InspectDataSource`. Not currently supported in `InspectContent`. StoredType stored_type = 5; } // Set of detection rules to apply to all findings of this CustomInfoType. // Rules are applied in order that they are specified. Not supported for the // `surrogate_type` CustomInfoType. repeated DetectionRule detection_rules = 7; // If set to EXCLUSION_TYPE_EXCLUDE this infoType will not cause a finding // to be returned. It still can be used for rules matching. ExclusionType exclusion_type = 8; // Sensitivity for this CustomInfoType. If this CustomInfoType extends an // existing InfoType, the sensitivity here will take precedence over that of // the original InfoType. If unset for a CustomInfoType, it will default to // HIGH. // This only applies to data profiling. SensitivityScore sensitivity_score = 9; } // General identifier of a data field in a storage service. message FieldId { // Name describing the field. string name = 1; } // Datastore partition ID. // A partition ID identifies a grouping of entities. The grouping is always // by project and namespace, however the namespace ID may be empty. // // A partition ID contains several dimensions: // project ID and namespace ID. message PartitionId { // The ID of the project to which the entities belong. string project_id = 2; // If not empty, the ID of the namespace to which the entities belong. string namespace_id = 4; } // A representation of a Datastore kind. message KindExpression { // The name of the kind. string name = 1; } // Options defining a data set within Google Cloud Datastore. message DatastoreOptions { // A partition ID identifies a grouping of entities. The grouping is always // by project and namespace, however the namespace ID may be empty. PartitionId partition_id = 1; // The kind to process. KindExpression kind = 2; } // Definitions of file type groups to scan. New types will be added to this // list. enum FileType { // Includes all files. FILE_TYPE_UNSPECIFIED = 0; // Includes all file extensions not covered by another entry. Binary // scanning attempts to convert the content of the file to utf_8 to scan // the file. // If you wish to avoid this fall back, specify one or more of the other // file types in your storage scan. BINARY_FILE = 1; // Included file extensions: // asc,asp, aspx, brf, c, cc,cfm, cgi, cpp, csv, cxx, c++, cs, css, dart, // dat, dot, eml,, epbub, ged, go, h, hh, hpp, hxx, h++, hs, html, htm, // mkd, markdown, m, ml, mli, perl, pl, plist, pm, php, phtml, pht, // properties, py, pyw, rb, rbw, rs, rss, rc, scala, sh, sql, swift, tex, // shtml, shtm, xhtml, lhs, ics, ini, java, js, json, jsonl, kix, kml, // ocaml, md, txt, text, tsv, vb, vcard, vcs, wml, xcodeproj, xml, xsl, xsd, // yml, yaml. TEXT_FILE = 2; // Included file extensions: // bmp, gif, jpg, jpeg, jpe, png. Setting // [bytes_limit_per_file][google.privacy.dlp.v2.CloudStorageOptions.bytes_limit_per_file] // or // [bytes_limit_per_file_percent][google.privacy.dlp.v2.CloudStorageOptions.bytes_limit_per_file] // has no effect on image files. Image inspection is restricted to the // `global`, `us`, `asia`, and `europe` regions. IMAGE = 3; // Microsoft Word files larger than 30 MB will be scanned as binary files. // Included file extensions: // docx, dotx, docm, dotm. Setting `bytes_limit_per_file` or // `bytes_limit_per_file_percent` has no effect on Word files. WORD = 5; // PDF files larger than 30 MB will be scanned as binary files. // Included file extensions: // pdf. Setting `bytes_limit_per_file` or `bytes_limit_per_file_percent` // has no effect on PDF files. PDF = 6; // Included file extensions: // avro AVRO = 7; // Included file extensions: // csv CSV = 8; // Included file extensions: // tsv TSV = 9; // Microsoft PowerPoint files larger than 30 MB will be scanned as binary // files. Included file extensions: // pptx, pptm, potx, potm, pot. Setting `bytes_limit_per_file` or // `bytes_limit_per_file_percent` has no effect on PowerPoint files. POWERPOINT = 11; // Microsoft Excel files larger than 30 MB will be scanned as binary files. // Included file extensions: // xlsx, xlsm, xltx, xltm. Setting `bytes_limit_per_file` or // `bytes_limit_per_file_percent` has no effect on Excel files. EXCEL = 12; } // Message representing a set of files in a Cloud Storage bucket. Regular // expressions are used to allow fine-grained control over which files in the // bucket to include. // // Included files are those that match at least one item in `include_regex` and // do not match any items in `exclude_regex`. Note that a file that matches // items from both lists will _not_ be included. For a match to occur, the // entire file path (i.e., everything in the url after the bucket name) must // match the regular expression. // // For example, given the input `{bucket_name: "mybucket", include_regex: // ["directory1/.*"], exclude_regex: // ["directory1/excluded.*"]}`: // // * `gs://mybucket/directory1/myfile` will be included // * `gs://mybucket/directory1/directory2/myfile` will be included (`.*` matches // across `/`) // * `gs://mybucket/directory0/directory1/myfile` will _not_ be included (the // full path doesn't match any items in `include_regex`) // * `gs://mybucket/directory1/excludedfile` will _not_ be included (the path // matches an item in `exclude_regex`) // // If `include_regex` is left empty, it will match all files by default // (this is equivalent to setting `include_regex: [".*"]`). // // Some other common use cases: // // * `{bucket_name: "mybucket", exclude_regex: [".*\.pdf"]}` will include all // files in `mybucket` except for .pdf files // * `{bucket_name: "mybucket", include_regex: ["directory/[^/]+"]}` will // include all files directly under `gs://mybucket/directory/`, without matching // across `/` message CloudStorageRegexFileSet { // The name of a Cloud Storage bucket. Required. string bucket_name = 1; // A list of regular expressions matching file paths to include. All files in // the bucket that match at least one of these regular expressions will be // included in the set of files, except for those that also match an item in // `exclude_regex`. Leaving this field empty will match all files by default // (this is equivalent to including `.*` in the list). // // Regular expressions use RE2 // [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found // under the google/re2 repository on GitHub. repeated string include_regex = 2; // A list of regular expressions matching file paths to exclude. All files in // the bucket that match at least one of these regular expressions will be // excluded from the scan. // // Regular expressions use RE2 // [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found // under the google/re2 repository on GitHub. repeated string exclude_regex = 3; } // Options defining a file or a set of files within a Cloud Storage // bucket. message CloudStorageOptions { // Set of files to scan. message FileSet { // The Cloud Storage url of the file(s) to scan, in the format // `gs:///`. Trailing wildcard in the path is allowed. // // If the url ends in a trailing slash, the bucket or directory represented // by the url will be scanned non-recursively (content in sub-directories // will not be scanned). This means that `gs://mybucket/` is equivalent to // `gs://mybucket/*`, and `gs://mybucket/directory/` is equivalent to // `gs://mybucket/directory/*`. // // Exactly one of `url` or `regex_file_set` must be set. string url = 1; // The regex-filtered set of files to scan. Exactly one of `url` or // `regex_file_set` must be set. CloudStorageRegexFileSet regex_file_set = 2; } // How to sample bytes if not all bytes are scanned. Meaningful only when used // in conjunction with bytes_limit_per_file. If not specified, scanning would // start from the top. enum SampleMethod { // No sampling. SAMPLE_METHOD_UNSPECIFIED = 0; // Scan from the top (default). TOP = 1; // For each file larger than bytes_limit_per_file, randomly pick the offset // to start scanning. The scanned bytes are contiguous. RANDOM_START = 2; } // The set of one or more files to scan. FileSet file_set = 1; // Max number of bytes to scan from a file. If a scanned file's size is bigger // than this value then the rest of the bytes are omitted. Only one of // `bytes_limit_per_file` and `bytes_limit_per_file_percent` can be specified. // This field can't be set if de-identification is requested. For certain file // types, setting this field has no effect. For more information, see [Limits // on bytes scanned per // file](https://cloud.google.com/sensitive-data-protection/docs/supported-file-types#max-byte-size-per-file). int64 bytes_limit_per_file = 4; // Max percentage of bytes to scan from a file. The rest are omitted. The // number of bytes scanned is rounded down. Must be between 0 and 100, // inclusively. Both 0 and 100 means no limit. Defaults to 0. Only one of // bytes_limit_per_file and bytes_limit_per_file_percent can be specified. // This field can't be set if de-identification is requested. For certain file // types, setting this field has no effect. For more information, see [Limits // on bytes scanned per // file](https://cloud.google.com/sensitive-data-protection/docs/supported-file-types#max-byte-size-per-file). int32 bytes_limit_per_file_percent = 8; // List of file type groups to include in the scan. // If empty, all files are scanned and available data format processors // are applied. In addition, the binary content of the selected files // is always scanned as well. // Images are scanned only as binary if the specified region // does not support image inspection and no file_types were specified. // Image inspection is restricted to 'global', 'us', 'asia', and 'europe'. repeated FileType file_types = 5; // How to sample the data. SampleMethod sample_method = 6; // Limits the number of files to scan to this percentage of the input FileSet. // Number of files scanned is rounded down. Must be between 0 and 100, // inclusively. Both 0 and 100 means no limit. Defaults to 0. int32 files_limit_percent = 7; } // Message representing a set of files in Cloud Storage. message CloudStorageFileSet { // The url, in the format `gs:///`. Trailing wildcard in the // path is allowed. string url = 1; } // Message representing a single file or path in Cloud Storage. message CloudStoragePath { // A URL representing a file or path (no wildcards) in Cloud Storage. // Example: `gs://[BUCKET_NAME]/dictionary.txt` string path = 1; } // Options defining BigQuery table and row identifiers. message BigQueryOptions { // How to sample rows if not all rows are scanned. Meaningful only when used // in conjunction with either rows_limit or rows_limit_percent. If not // specified, rows are scanned in the order BigQuery reads them. enum SampleMethod { // No sampling. SAMPLE_METHOD_UNSPECIFIED = 0; // Scan groups of rows in the order BigQuery provides (default). Multiple // groups of rows may be scanned in parallel, so results may not appear in // the same order the rows are read. TOP = 1; // Randomly pick groups of rows to scan. RANDOM_START = 2; } // Complete BigQuery table reference. BigQueryTable table_reference = 1; // Table fields that may uniquely identify a row within the table. When // `actions.saveFindings.outputConfig.table` is specified, the values of // columns specified here are available in the output table under // `location.content_locations.record_location.record_key.id_values`. Nested // fields such as `person.birthdate.year` are allowed. repeated FieldId identifying_fields = 2; // Max number of rows to scan. If the table has more rows than this value, the // rest of the rows are omitted. If not set, or if set to 0, all rows will be // scanned. Only one of rows_limit and rows_limit_percent can be specified. // Cannot be used in conjunction with TimespanConfig. int64 rows_limit = 3; // Max percentage of rows to scan. The rest are omitted. The number of rows // scanned is rounded down. Must be between 0 and 100, inclusively. Both 0 and // 100 means no limit. Defaults to 0. Only one of rows_limit and // rows_limit_percent can be specified. Cannot be used in conjunction with // TimespanConfig. // // Caution: A [known // issue](https://cloud.google.com/sensitive-data-protection/docs/known-issues#bq-sampling) // is causing the `rowsLimitPercent` field to behave unexpectedly. We // recommend using `rowsLimit` instead. int32 rows_limit_percent = 6; // How to sample the data. SampleMethod sample_method = 4; // References to fields excluded from scanning. This allows you to skip // inspection of entire columns which you know have no findings. // When inspecting a table, we recommend that you inspect all columns. // Otherwise, findings might be affected because hints from excluded columns // will not be used. repeated FieldId excluded_fields = 5; // Limit scanning only to these fields. // When inspecting a table, we recommend that you inspect all columns. // Otherwise, findings might be affected because hints from excluded columns // will not be used. repeated FieldId included_fields = 7; } // Shared message indicating Cloud storage type. message StorageConfig { // Configuration of the timespan of the items to include in scanning. // Currently only supported when inspecting Cloud Storage and BigQuery. message TimespanConfig { // Exclude files, tables, or rows older than this value. // If not set, no lower time limit is applied. google.protobuf.Timestamp start_time = 1; // Exclude files, tables, or rows newer than this value. // If not set, no upper time limit is applied. google.protobuf.Timestamp end_time = 2; // Specification of the field containing the timestamp of scanned items. // Used for data sources like Datastore and BigQuery. // // **For BigQuery** // // If this value is not specified and the table was modified between the // given start and end times, the entire table will be scanned. If this // value is specified, then rows are filtered based on the given start and // end times. Rows with a `NULL` value in the provided BigQuery column are // skipped. // Valid data types of the provided BigQuery column are: `INTEGER`, `DATE`, // `TIMESTAMP`, and `DATETIME`. // // If your BigQuery table is [partitioned at ingestion // time](https://cloud.google.com/bigquery/docs/partitioned-tables#ingestion_time), // you can use any of the following pseudo-columns as your timestamp field. // When used with Cloud DLP, these pseudo-column names are case sensitive. // // - `_PARTITIONTIME` // - `_PARTITIONDATE` // - `_PARTITION_LOAD_TIME` // // **For Datastore** // // If this value is specified, then entities are filtered based on the given // start and end times. If an entity does not contain the provided timestamp // property or contains empty or invalid values, then it is included. // Valid data types of the provided timestamp property are: `TIMESTAMP`. // // See the // [known // issue](https://cloud.google.com/sensitive-data-protection/docs/known-issues#bq-timespan) // related to this operation. FieldId timestamp_field = 3; // When the job is started by a JobTrigger we will automatically figure out // a valid start_time to avoid scanning files that have not been modified // since the last time the JobTrigger executed. This will be based on the // time of the execution of the last run of the JobTrigger or the timespan // end_time used in the last run of the JobTrigger. // // **For BigQuery** // // Inspect jobs triggered by automatic population will scan data that is at // least three hours old when the job starts. This is because streaming // buffer rows are not read during inspection and reading up to the current // timestamp will result in skipped rows. // // See the [known // issue](https://cloud.google.com/sensitive-data-protection/docs/known-issues#recently-streamed-data) // related to this operation. bool enable_auto_population_of_timespan_config = 4; } // Type of storage system to inspect. oneof type { // Google Cloud Datastore options. DatastoreOptions datastore_options = 2; // Cloud Storage options. CloudStorageOptions cloud_storage_options = 3; // BigQuery options. BigQueryOptions big_query_options = 4; // Hybrid inspection options. HybridOptions hybrid_options = 9; } // Configuration of the timespan of the items to include in scanning. TimespanConfig timespan_config = 6; } // Configuration to control jobs where the content being inspected is outside // of Google Cloud Platform. message HybridOptions { // A short description of where the data is coming from. Will be stored once // in the job. 256 max length. string description = 1; // These are labels that each inspection request must include within their // 'finding_labels' map. Request may contain others, but any missing one of // these will be rejected. // // Label keys must be between 1 and 63 characters long and must conform // to the following regular expression: `[a-z]([-a-z0-9]*[a-z0-9])?`. // // No more than 10 keys can be required. repeated string required_finding_label_keys = 2; // To organize findings, these labels will be added to each finding. // // Label keys must be between 1 and 63 characters long and must conform // to the following regular expression: `[a-z]([-a-z0-9]*[a-z0-9])?`. // // Label values must be between 0 and 63 characters long and must conform // to the regular expression `([a-z]([-a-z0-9]*[a-z0-9])?)?`. // // No more than 10 labels can be associated with a given finding. // // Examples: // // * `"environment" : "production"` // * `"pipeline" : "etl"` map labels = 3; // If the container is a table, additional information to make findings // meaningful such as the columns that are primary keys. TableOptions table_options = 4; } // Row key for identifying a record in BigQuery table. message BigQueryKey { // Complete BigQuery table reference. BigQueryTable table_reference = 1; // Row number inferred at the time the table was scanned. This value is // nondeterministic, cannot be queried, and may be null for inspection // jobs. To locate findings within a table, specify // `inspect_job.storage_config.big_query_options.identifying_fields` in // `CreateDlpJobRequest`. int64 row_number = 2; } // Record key for a finding in Cloud Datastore. message DatastoreKey { // Datastore entity key. Key entity_key = 1; } // A unique identifier for a Datastore entity. // If a key's partition ID or any of its path kinds or names are // reserved/read-only, the key is reserved/read-only. // A reserved/read-only key is forbidden in certain documented contexts. message Key { // A (kind, ID/name) pair used to construct a key path. // // If either name or ID is set, the element is complete. // If neither is set, the element is incomplete. message PathElement { // The kind of the entity. // A kind matching regex `__.*__` is reserved/read-only. // A kind must not contain more than 1500 bytes when UTF-8 encoded. // Cannot be `""`. string kind = 1; // The type of ID. oneof id_type { // The auto-allocated ID of the entity. // Never equal to zero. Values less than zero are discouraged and may not // be supported in the future. int64 id = 2; // The name of the entity. // A name matching regex `__.*__` is reserved/read-only. // A name must not be more than 1500 bytes when UTF-8 encoded. // Cannot be `""`. string name = 3; } } // Entities are partitioned into subsets, currently identified by a project // ID and namespace ID. // Queries are scoped to a single partition. PartitionId partition_id = 1; // The entity path. // An entity path consists of one or more elements composed of a kind and a // string or numerical identifier, which identify entities. The first // element identifies a _root entity_, the second element identifies // a _child_ of the root entity, the third element identifies a child of the // second entity, and so forth. The entities identified by all prefixes of // the path are called the element's _ancestors_. // // A path can never be empty, and a path can have at most 100 elements. repeated PathElement path = 2; } // Message for a unique key indicating a record that contains a finding. message RecordKey { // Type of key oneof type { // BigQuery key DatastoreKey datastore_key = 2; // Datastore key BigQueryKey big_query_key = 3; } // Values of identifying columns in the given row. Order of values matches // the order of `identifying_fields` specified in the scanning request. repeated string id_values = 5; } // Message defining the location of a BigQuery table. A table is uniquely // identified by its project_id, dataset_id, and table_name. Within a query // a table is often referenced with a string in the format of: // `:.` or // `..`. message BigQueryTable { // The Google Cloud Platform project ID of the project containing the table. // If omitted, project ID is inferred from the API call. string project_id = 1; // Dataset ID of the table. string dataset_id = 2; // Name of the table. string table_id = 3; } // Message defining the location of a BigQuery table with the projectId inferred // from the parent project. message TableReference { // Dataset ID of the table. string dataset_id = 1; // Name of the table. string table_id = 2; } // Message defining a field of a BigQuery table. message BigQueryField { // Source table of the field. BigQueryTable table = 1; // Designated field in the BigQuery table. FieldId field = 2; } // An entity in a dataset is a field or set of fields that correspond to a // single person. For example, in medical records the `EntityId` might be a // patient identifier, or for financial records it might be an account // identifier. This message is used when generalizations or analysis must take // into account that multiple rows correspond to the same entity. message EntityId { // Composite key indicating which field contains the entity identifier. FieldId field = 1; } // Instructions regarding the table content being inspected. message TableOptions { // The columns that are the primary keys for table objects included in // ContentItem. A copy of this cell's value will stored alongside alongside // each finding so that the finding can be traced to the specific row it came // from. No more than 3 may be provided. repeated FieldId identifying_fields = 1; }