// SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright The Lance Authors syntax = "proto3"; package lance.encodings; import "google/protobuf/empty.proto"; // This file contains a specification for encodings that can be used // to store and load Arrow data into a Lance file. // // # Types // // This file assumes the user wants to load data into Arrow arrays and // explains how to map Arrow arrays into Lance files. Encodings are divided // into "array encoding" (which maps to an Arrow array and may contain multiple // buffers) and "buffer encoding" (which encodes a single buffer of data). // // # Encoding Tree // // Most encodings are layered on top of each other. These form a tree of // encodings with a single root node. To encode an array you will typically // start with the root node and then take the output from that root encoding // and feed it into child encodings. The decoding process works in reverse. // // # Multi-column Encodings // // Some Arrow arrays will map to more than one column of Lance data. For // example, struct arrays and list arrays. This file only contains encodings // for a single column. However, it does describe how multi-column arrays can // be encoded. // A pointer to a buffer in a Lance file // // A writer can place a buffer in three different locations. The buffer // can go in the data page, in the column metadata, or in the file metadata. // The writer is free to choose whatever is most appropriate (for example, a dictionary // that is shared across all pages in a column will probably go in the column // metadata). This specification does not dictate where the buffer should go. message Buffer { // The index of the buffer in the collection of buffers uint32 buffer_index = 1; // The collection holding the buffer enum BufferType { // The buffer is stored in the data page itself page = 0; // The buffer is stored in the column metadata column = 1; // The buffer is stored in the file metadata file = 2; }; BufferType buffer_type = 2; } // An encoding that adds nullability to another array encoding // // This can wrap any array encoding and add nullability information message Nullable { message NoNull { ArrayEncoding values = 1; } message AllNull {} message SomeNull { ArrayEncoding validity = 1; ArrayEncoding values = 2; } oneof nullability { // The array has no nulls and there is a single buffer needed NoNull no_nulls = 1; // The array may have nulls and we need two buffers SomeNull some_nulls = 2; // All values are null (no buffers needed) AllNull all_nulls = 3; } } // An array encoding for variable-length list fields message List { // An array containing the offsets into an items array. // // This array will have num_rows items and will never // have nulls. // // If the list at index i is not null then offsets[i] will // contain `base + len(list)` where `base` is defined as: // i == 0: 0 // i > 0: (offsets[i-1] % null_offset_adjustment) // // To help understand we can consider the following example list: // [ [A, B], null, [], [C, D, E] ] // // The offsets will be [2, ?, 2, 5] // // If the incoming list at index i IS null then offsets[i] will // contain `base + len(list) + null_offset_adjustment` where `base` // is defined the same as above. // // To complete the above example let's assume that `null_offset_adjustment` // is 7. Then the offsets will be [2, 9, 2, 5] // // If there are no nulls then the offsets we write here are exactly the // same as the offsets in an Arrow list array (except we omit the leading // 0 which is redundant) // // The reason we do this is so that reading a single list at index i only // requires us to load the indices at i and i-1. // // If the offset at index i is greater than `null_offset_adjustment`` // then the list at index i is null. // // Otherwise the length of the list is `offsets[i] - base` where // base is defined the same as above. // // Let's consider our example offsets: [2, 9, 2, 5] // // We can take any range of lists and determine how many list items are // referenced by the sublist. // // 0..3: [_, 5] -> items 0..5 (base = 0* and end is 5) // 0..2: [_, 2] -> items 0..2 (base = 0* and end is 2) // 0..1: [_, 9] -> items 0..2 (base = 0* and end is 9 % 7) // 1..3: [2, 5] -> items 2..5 (base = 2 and end is 5) // 1..2: [2, 2] -> items 2..2 (base = 2 and end is 2) // 2..3: [9, 5] -> items 2..5 (base = 9 % 7 and end is 5) // // * When the start of our range is the 0th item the base is always 0 and we only // need to load a single index from disk to determine the range. // // The data type of the offsets array is flexible and does not need // to match the data type of the destination array. Please note that the offsets // array is very likely to be efficiently encoded by bit packing deltas. ArrayEncoding offsets = 1; // If a list is null then we add this value to the offset // // This value must be greater than the length of the items so that // (offset + null_offset_adjustment) is never used by a non-null list. // // Note that this value cannot be equal to the length of the items // because then a page with a single list would store [ X ] and we // couldn't know if that is a null list or a list with X items. // // Therefore, the best choice for this value is 1 + # of items. // Choosing this will maximize the bit packing that we can apply to the offsets. uint64 null_offset_adjustment = 2; // How many items are referenced by these offsets. This is needed in // order to determine which items pages map to this offsets page. uint64 num_items = 3; } // An array encoding for fixed-size list fields message FixedSizeList { /// The number of items in each list uint32 dimension = 1; /// The items in the list ArrayEncoding items = 2; } message Compression { string scheme = 1; } // Fixed width items placed contiguously in a buffer message Flat { // the number of bits per value, must be greater than 0, does // not need to be a multiple of 8 uint64 bits_per_value = 1; // the buffer of values Buffer buffer = 2; // The Compression message can specify the compression scheme (e.g. zstd) and any // other information that is needed for decompression. Compression compression = 3; } // An array encoding for shredded structs that will never be null // // There is no actual data in this column. // // TODO: Struct validity bitmaps will be placed here. message SimpleStruct {} // An array encoding for binary fields message Binary { ArrayEncoding indices = 1; ArrayEncoding bytes = 2; uint64 null_adjustment = 3; } // An array encoding for dictionary-encoded fields message Dictionary { ArrayEncoding indices = 1; ArrayEncoding items = 2; uint32 num_dictionary_items = 3; } // Encodings that decode into an Arrow array message ArrayEncoding { oneof array_encoding { Flat flat = 1; Nullable nullable = 2; FixedSizeList fixed_size_list = 3; List list = 4; SimpleStruct struct = 5; Binary binary = 6; Dictionary dictionary = 7; } } // Wraps a column with a zone map index that can be used // to apply pushdown filters message ZoneIndex { uint32 rows_per_zone = 1; Buffer zone_map_buffer = 2; ColumnEncoding inner = 3; } // Encodings that describe a column of values message ColumnEncoding { oneof column_encoding { // No special encoding, just column values google.protobuf.Empty values = 1; ZoneIndex zone_index = 2; } }