// Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto3"; package google.cloud.bigquery.v2; import "google/api/field_behavior.proto"; import "google/cloud/bigquery/v2/decimal_target_types.proto"; import "google/cloud/bigquery/v2/file_set_specification_type.proto"; import "google/cloud/bigquery/v2/hive_partitioning.proto"; import "google/cloud/bigquery/v2/json_extension.proto"; import "google/cloud/bigquery/v2/map_target_type.proto"; import "google/cloud/bigquery/v2/table_schema.proto"; import "google/protobuf/wrappers.proto"; option go_package = "cloud.google.com/go/bigquery/apiv2/bigquerypb;bigquerypb"; option java_outer_classname = "ExternalDataConfigProto"; option java_package = "com.google.cloud.bigquery.v2"; // Options for external data sources. message AvroOptions { // Optional. If sourceFormat is set to "AVRO", indicates whether to interpret // logical types as the corresponding BigQuery data type (for example, // TIMESTAMP), instead of using the raw type (for example, INTEGER). google.protobuf.BoolValue use_avro_logical_types = 1 [(google.api.field_behavior) = OPTIONAL]; } // Parquet Options for load and make external tables. message ParquetOptions { // Optional. Indicates whether to infer Parquet ENUM logical type as STRING // instead of BYTES by default. google.protobuf.BoolValue enum_as_string = 1 [(google.api.field_behavior) = OPTIONAL]; // Optional. Indicates whether to use schema inference specifically for // Parquet LIST logical type. google.protobuf.BoolValue enable_list_inference = 2 [(google.api.field_behavior) = OPTIONAL]; // Optional. Indicates how to represent a Parquet map if present. MapTargetType map_target_type = 3 [(google.api.field_behavior) = OPTIONAL]; } // Information related to a CSV data source. message CsvOptions { // Optional. The separator character for fields in a CSV file. The separator // is interpreted as a single byte. For files encoded in ISO-8859-1, any // single character can be used as a separator. For files encoded in UTF-8, // characters represented in decimal range 1-127 (U+0001-U+007F) can be used // without any modification. UTF-8 characters encoded with multiple bytes // (i.e. U+0080 and above) will have only the first byte used for separating // fields. The remaining bytes will be treated as a part of the field. // BigQuery also supports the escape sequence "\t" (U+0009) to specify a tab // separator. The default value is comma (",", U+002C). string field_delimiter = 1 [(google.api.field_behavior) = OPTIONAL]; // Optional. The number of rows at the top of a CSV file that BigQuery will // skip when reading the data. The default value is 0. This property is // useful if you have header rows in the file that should be skipped. // When autodetect is on, the behavior is the following: // // * skipLeadingRows unspecified - Autodetect tries to detect headers in the // first row. If they are not detected, the row is read as data. Otherwise // data is read starting from the second row. // * skipLeadingRows is 0 - Instructs autodetect that there are no headers and // data should be read starting from the first row. // * skipLeadingRows = N > 0 - Autodetect skips N-1 rows and tries to detect // headers in row N. If headers are not detected, row N is just skipped. // Otherwise row N is used to extract column names for the detected schema. google.protobuf.Int64Value skip_leading_rows = 2 [(google.api.field_behavior) = OPTIONAL]; // Optional. The value that is used to quote data sections in a CSV file. // BigQuery converts the string to ISO-8859-1 encoding, and then uses the // first byte of the encoded string to split the data in its raw, binary // state. // The default value is a double-quote ("). // If your data does not contain quoted sections, // set the property value to an empty string. // If your data contains quoted newline characters, you must also set the // allowQuotedNewlines property to true. // To include the specific quote character within a quoted value, precede it // with an additional matching quote character. For example, if you want to // escape the default character ' " ', use ' "" '. google.protobuf.StringValue quote = 3 [(google.api.field_behavior) = OPTIONAL]; // Optional. Indicates if BigQuery should allow quoted data sections that // contain newline characters in a CSV file. The default value is false. google.protobuf.BoolValue allow_quoted_newlines = 4 [(google.api.field_behavior) = OPTIONAL]; // Optional. Indicates if BigQuery should accept rows that are missing // trailing optional columns. If true, BigQuery treats missing trailing // columns as null values. // If false, records with missing trailing columns are treated as bad records, // and if there are too many bad records, an invalid error is returned in the // job result. The default value is false. google.protobuf.BoolValue allow_jagged_rows = 5 [(google.api.field_behavior) = OPTIONAL]; // Optional. The character encoding of the data. // The supported values are UTF-8, ISO-8859-1, UTF-16BE, UTF-16LE, UTF-32BE, // and UTF-32LE. The default value is UTF-8. // BigQuery decodes the data after the raw, binary data has been split using // the values of the quote and fieldDelimiter properties. string encoding = 6 [(google.api.field_behavior) = OPTIONAL]; // Optional. Indicates if the embedded ASCII control characters (the first 32 // characters in the ASCII-table, from '\x00' to '\x1F') are preserved. google.protobuf.BoolValue preserve_ascii_control_characters = 7 [(google.api.field_behavior) = OPTIONAL]; // Optional. Specifies a string that represents a null value in a CSV file. // For example, if you specify "\N", BigQuery interprets "\N" as a null value // when querying a CSV file. // The default value is the empty string. If you set this property to a custom // value, BigQuery throws an error if an empty string is present for all data // types except for STRING and BYTE. For STRING and BYTE columns, BigQuery // interprets the empty string as an empty value. google.protobuf.StringValue null_marker = 8 [(google.api.field_behavior) = OPTIONAL]; } // Json Options for load and make external tables. message JsonOptions { // Optional. The character encoding of the data. // The supported values are UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, // and UTF-32LE. The default value is UTF-8. string encoding = 1 [(google.api.field_behavior) = OPTIONAL]; } // Information related to a Bigtable column. message BigtableColumn { // [Required] Qualifier of the column. // Columns in the parent column family that has this exact qualifier are // exposed as `.` field. // If the qualifier is valid UTF-8 string, it can be specified in the // qualifier_string field. Otherwise, a base-64 encoded value must be set to // qualifier_encoded. // The column field name is the same as the column qualifier. However, if the // qualifier is not a valid BigQuery field identifier i.e. does not match // [a-zA-Z][a-zA-Z0-9_]*, a valid identifier must be provided as field_name. google.protobuf.BytesValue qualifier_encoded = 1; // Qualifier string. google.protobuf.StringValue qualifier_string = 2; // Optional. If the qualifier is not a valid BigQuery field identifier i.e. // does not match [a-zA-Z][a-zA-Z0-9_]*, a valid identifier must be provided // as the column field name and is used as field name in queries. string field_name = 3 [(google.api.field_behavior) = OPTIONAL]; // Optional. The type to convert the value in cells of this column. // The values are expected to be encoded using HBase Bytes.toBytes function // when using the BINARY encoding value. // Following BigQuery types are allowed (case-sensitive): // // * BYTES // * STRING // * INTEGER // * FLOAT // * BOOLEAN // * JSON // // Default type is BYTES. // 'type' can also be set at the column family level. However, the setting at // this level takes precedence if 'type' is set at both levels. string type = 4 [(google.api.field_behavior) = OPTIONAL]; // Optional. The encoding of the values when the type is not STRING. // Acceptable encoding values are: // TEXT - indicates values are alphanumeric text strings. // BINARY - indicates values are encoded using HBase Bytes.toBytes family of // functions. // 'encoding' can also be set at the column family level. However, the setting // at this level takes precedence if 'encoding' is set at both levels. string encoding = 5 [(google.api.field_behavior) = OPTIONAL]; // Optional. If this is set, only the latest version of value in this column // are exposed. // 'onlyReadLatest' can also be set at the column family level. However, the // setting at this level takes precedence if 'onlyReadLatest' is set at both // levels. google.protobuf.BoolValue only_read_latest = 6 [(google.api.field_behavior) = OPTIONAL]; } // Information related to a Bigtable column family. message BigtableColumnFamily { // Identifier of the column family. string family_id = 1; // Optional. The type to convert the value in cells of this column family. // The values are expected to be encoded using HBase Bytes.toBytes function // when using the BINARY encoding value. // Following BigQuery types are allowed (case-sensitive): // // * BYTES // * STRING // * INTEGER // * FLOAT // * BOOLEAN // * JSON // // Default type is BYTES. // This can be overridden for a specific column by listing that column in // 'columns' and specifying a type for it. string type = 2 [(google.api.field_behavior) = OPTIONAL]; // Optional. The encoding of the values when the type is not STRING. // Acceptable encoding values are: // TEXT - indicates values are alphanumeric text strings. // BINARY - indicates values are encoded using HBase Bytes.toBytes family of // functions. // This can be overridden for a specific column by listing that column in // 'columns' and specifying an encoding for it. string encoding = 3 [(google.api.field_behavior) = OPTIONAL]; // Optional. Lists of columns that should be exposed as individual fields as // opposed to a list of (column name, value) pairs. // All columns whose qualifier matches a qualifier in this list can be // accessed as `.`. // Other columns can be accessed as a list through // the `.Column` field. repeated BigtableColumn columns = 4 [(google.api.field_behavior) = OPTIONAL]; // Optional. If this is set only the latest version of value are exposed for // all columns in this column family. // This can be overridden for a specific column by listing that column in // 'columns' and specifying a different setting // for that column. google.protobuf.BoolValue only_read_latest = 5 [(google.api.field_behavior) = OPTIONAL]; } // Options specific to Google Cloud Bigtable data sources. message BigtableOptions { // Optional. List of column families to expose in the table schema along with // their types. // This list restricts the column families that can be referenced in queries // and specifies their value types. // You can use this list to do type conversions - see the 'type' field for // more details. // If you leave this list empty, all column families are present in the table // schema and their values are read as BYTES. // During a query only the column families referenced in that query are read // from Bigtable. repeated BigtableColumnFamily column_families = 1 [(google.api.field_behavior) = OPTIONAL]; // Optional. If field is true, then the column families that are not // specified in columnFamilies list are not exposed in the table schema. // Otherwise, they are read with BYTES type values. // The default value is false. google.protobuf.BoolValue ignore_unspecified_column_families = 2 [(google.api.field_behavior) = OPTIONAL]; // Optional. If field is true, then the rowkey column families will be read // and converted to string. Otherwise they are read with BYTES type values and // users need to manually cast them with CAST if necessary. // The default value is false. google.protobuf.BoolValue read_rowkey_as_string = 3 [(google.api.field_behavior) = OPTIONAL]; // Optional. If field is true, then each column family will be read as a // single JSON column. Otherwise they are read as a repeated cell structure // containing timestamp/value tuples. The default value is false. google.protobuf.BoolValue output_column_families_as_json = 4 [(google.api.field_behavior) = OPTIONAL]; } // Options specific to Google Sheets data sources. message GoogleSheetsOptions { // Optional. The number of rows at the top of a sheet that BigQuery will skip // when reading the data. The default value is 0. This property is useful if // you have header rows that should be skipped. When autodetect is on, // the behavior is the following: // * skipLeadingRows unspecified - Autodetect tries to detect headers in the // first row. If they are not detected, the row is read as data. Otherwise // data is read starting from the second row. // * skipLeadingRows is 0 - Instructs autodetect that there are no headers and // data should be read starting from the first row. // * skipLeadingRows = N > 0 - Autodetect skips N-1 rows and tries to detect // headers in row N. If headers are not detected, row N is just skipped. // Otherwise row N is used to extract column names for the detected schema. google.protobuf.Int64Value skip_leading_rows = 1 [(google.api.field_behavior) = OPTIONAL]; // Optional. Range of a sheet to query from. Only used when non-empty. // Typical format: sheet_name!top_left_cell_id:bottom_right_cell_id // For example: sheet1!A1:B20 string range = 2 [(google.api.field_behavior) = OPTIONAL]; } message ExternalDataConfiguration { // Supported Object Metadata Types. enum ObjectMetadata { // Unspecified by default. OBJECT_METADATA_UNSPECIFIED = 0; // A synonym for `SIMPLE`. DIRECTORY = 1; // Directory listing of objects. SIMPLE = 2; } // MetadataCacheMode identifies if the table should use metadata caching for // files from external source (eg Google Cloud Storage). enum MetadataCacheMode { // Unspecified metadata cache mode. METADATA_CACHE_MODE_UNSPECIFIED = 0; // Set this mode to trigger automatic background refresh of metadata cache // from the external source. Queries will use the latest available cache // version within the table's maxStaleness interval. AUTOMATIC = 1; // Set this mode to enable triggering manual refresh of the metadata cache // from external source. Queries will use the latest manually triggered // cache version within the table's maxStaleness interval. MANUAL = 2; } // [Required] The fully-qualified URIs that point to your data in Google // Cloud. For Google Cloud Storage URIs: // Each URI can contain one '*' wildcard character and it must come after // the 'bucket' name. // Size limits related to load jobs apply to external data sources. // For Google Cloud Bigtable URIs: // Exactly one URI can be specified and it has be a fully specified and // valid HTTPS URL for a Google Cloud Bigtable table. // For Google Cloud Datastore backups, exactly one URI can be specified. Also, // the '*' wildcard character is not allowed. repeated string source_uris = 1; // Optional. Specifies how source URIs are interpreted for constructing the // file set to load. By default source URIs are expanded against the // underlying storage. Other options include specifying manifest files. Only // applicable to object storage systems. FileSetSpecType file_set_spec_type = 25 [(google.api.field_behavior) = OPTIONAL]; // Optional. The schema for the data. // Schema is required for CSV and JSON formats if autodetect is not on. // Schema is disallowed for Google Cloud Bigtable, Cloud Datastore backups, // Avro, ORC and Parquet formats. TableSchema schema = 2 [(google.api.field_behavior) = OPTIONAL]; // [Required] The data format. // For CSV files, specify "CSV". // For Google sheets, specify "GOOGLE_SHEETS". // For newline-delimited JSON, specify "NEWLINE_DELIMITED_JSON". // For Avro files, specify "AVRO". // For Google Cloud Datastore backups, specify "DATASTORE_BACKUP". // For Apache Iceberg tables, specify "ICEBERG". // For ORC files, specify "ORC". // For Parquet files, specify "PARQUET". // [Beta] For Google Cloud Bigtable, specify "BIGTABLE". string source_format = 3; // Optional. The maximum number of bad records that BigQuery can ignore when // reading data. If the number of bad records exceeds this value, an invalid // error is returned in the job result. The default value is 0, which requires // that all records are valid. This setting is ignored for Google Cloud // Bigtable, Google Cloud Datastore backups, Avro, ORC and Parquet formats. google.protobuf.Int32Value max_bad_records = 4 [(google.api.field_behavior) = OPTIONAL]; // Try to detect schema and format options automatically. // Any option specified explicitly will be honored. google.protobuf.BoolValue autodetect = 5; // Optional. Indicates if BigQuery should allow extra values that are not // represented in the table schema. // If true, the extra values are ignored. // If false, records with extra columns are treated as bad records, and if // there are too many bad records, an invalid error is returned in the job // result. // The default value is false. // The sourceFormat property determines what BigQuery treats as an extra // value: // CSV: Trailing columns // JSON: Named values that don't match any column names // Google Cloud Bigtable: This setting is ignored. // Google Cloud Datastore backups: This setting is ignored. // Avro: This setting is ignored. // ORC: This setting is ignored. // Parquet: This setting is ignored. google.protobuf.BoolValue ignore_unknown_values = 6 [(google.api.field_behavior) = OPTIONAL]; // Optional. The compression type of the data source. // Possible values include GZIP and NONE. The default value is NONE. // This setting is ignored for Google Cloud Bigtable, Google Cloud Datastore // backups, Avro, ORC and Parquet // formats. An empty string is an invalid value. string compression = 7 [(google.api.field_behavior) = OPTIONAL]; // Optional. Additional properties to set if sourceFormat is set to CSV. CsvOptions csv_options = 8 [(google.api.field_behavior) = OPTIONAL]; // Optional. Additional properties to set if sourceFormat is set to JSON. JsonOptions json_options = 26 [(google.api.field_behavior) = OPTIONAL]; // Optional. Additional options if sourceFormat is set to BIGTABLE. BigtableOptions bigtable_options = 9 [(google.api.field_behavior) = OPTIONAL]; // Optional. Additional options if sourceFormat is set to GOOGLE_SHEETS. GoogleSheetsOptions google_sheets_options = 10 [(google.api.field_behavior) = OPTIONAL]; // Optional. When set, configures hive partitioning support. Not all storage // formats support hive partitioning -- requesting hive partitioning on an // unsupported format will lead to an error, as will providing an invalid // specification. HivePartitioningOptions hive_partitioning_options = 13 [(google.api.field_behavior) = OPTIONAL]; // Optional. The connection specifying the credentials to be used to read // external storage, such as Azure Blob, Cloud Storage, or S3. The // connection_id can have the form // `{project_id}.{location_id};{connection_id}` or // `projects/{project_id}/locations/{location_id}/connections/{connection_id}`. string connection_id = 14 [(google.api.field_behavior) = OPTIONAL]; // Defines the list of possible SQL data types to which the source decimal // values are converted. This list and the precision and the scale parameters // of the decimal field determine the target type. In the order of NUMERIC, // BIGNUMERIC, and STRING, a // type is picked if it is in the specified list and if it supports the // precision and the scale. STRING supports all precision and scale values. // If none of the listed types supports the precision and the scale, the type // supporting the widest range in the specified list is picked, and if a value // exceeds the supported range when reading the data, an error will be thrown. // // Example: Suppose the value of this field is ["NUMERIC", "BIGNUMERIC"]. // If (precision,scale) is: // // * (38,9) -> NUMERIC; // * (39,9) -> BIGNUMERIC (NUMERIC cannot hold 30 integer digits); // * (38,10) -> BIGNUMERIC (NUMERIC cannot hold 10 fractional digits); // * (76,38) -> BIGNUMERIC; // * (77,38) -> BIGNUMERIC (error if value exeeds supported range). // // This field cannot contain duplicate types. The order of the types in this // field is ignored. For example, ["BIGNUMERIC", "NUMERIC"] is the same as // ["NUMERIC", "BIGNUMERIC"] and NUMERIC always takes precedence over // BIGNUMERIC. // // Defaults to ["NUMERIC", "STRING"] for ORC and ["NUMERIC"] for the other // file formats. repeated DecimalTargetType decimal_target_types = 16; // Optional. Additional properties to set if sourceFormat is set to AVRO. AvroOptions avro_options = 17 [(google.api.field_behavior) = OPTIONAL]; // Optional. Load option to be used together with source_format // newline-delimited JSON to indicate that a variant of JSON is being loaded. // To load newline-delimited GeoJSON, specify GEOJSON (and source_format must // be set to NEWLINE_DELIMITED_JSON). JsonExtension json_extension = 18 [(google.api.field_behavior) = OPTIONAL]; // Optional. Additional properties to set if sourceFormat is set to PARQUET. ParquetOptions parquet_options = 19 [(google.api.field_behavior) = OPTIONAL]; // Optional. ObjectMetadata is used to create Object Tables. Object Tables // contain a listing of objects (with their metadata) found at the // source_uris. If ObjectMetadata is set, source_format should be omitted. // // Currently SIMPLE is the only supported Object Metadata type. optional ObjectMetadata object_metadata = 22 [(google.api.field_behavior) = OPTIONAL]; // Optional. When creating an external table, the user can provide a reference // file with the table schema. This is enabled for the following formats: // AVRO, PARQUET, ORC. google.protobuf.StringValue reference_file_schema_uri = 23 [(google.api.field_behavior) = OPTIONAL]; // Optional. Metadata Cache Mode for the table. Set this to enable caching of // metadata from external data source. MetadataCacheMode metadata_cache_mode = 24 [(google.api.field_behavior) = OPTIONAL]; }