// SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright The Lance Authors syntax = "proto3"; package lance.table; import "google/protobuf/timestamp.proto"; import "file.proto"; /* Format: +----------------------------------------+ | Encoded Column 0, Chunk 0 | ... | Encoded Column M, Chunk N - 1 | | Encoded Column M, Chunk N | | Indices ... | | Chunk Position (M x N x 8) | | Manifest (Optional) | | Metadata | | i64: metadata position | | MAJOR_VERSION | MINOR_VERSION | "LANC" | +----------------------------------------+ */ /// UUID type. encoded as 16 bytes. message UUID { bytes uuid = 1; } // Manifest is a global section shared between all the files. message Manifest { // All fields of the dataset, including the nested fields. repeated lance.file.Field fields = 1; // Fragments of the dataset. repeated DataFragment fragments = 2; // Snapshot version number. uint64 version = 3; // The file position of the version auxiliary data. // * It is not inheritable between versions. // * It is not loaded by default during query. uint64 version_aux_data = 4; // Schema metadata. map metadata = 5; message WriterVersion { // The name of the library that created this file. string library = 1; // The version of the library that created this file. Because we cannot assume // that the library is semantically versioned, this is a string. However, if it // is semantically versioned, it should be a valid semver string without any 'v' // prefix. For example: `2.0.0`, `2.0.0-rc.1`. string version = 2; } // The version of the writer that created this file. // // This information may be used to detect whether the file may have known bugs // associated with that writer. WriterVersion writer_version = 13; // If presented, the file position of the index metadata. optional uint64 index_section = 6; // Version creation Timestamp, UTC timezone google.protobuf.Timestamp timestamp = 7; // Optional version tag string tag = 8; // Feature flags for readers. // // A bitmap of flags that indicate which features are required to be able to // read the table. If a reader does not recognize a flag that is set, it // should not attempt to read the dataset. // // Known flags: // * 1: deletion files are present // * 2: move_stable_row_ids: row IDs are tracked and stable after move operations // (such as compaction), but not updates. uint64 reader_feature_flags = 9; // Feature flags for writers. // // A bitmap of flags that indicate which features are required to be able to // write to the dataset. if a writer does not recognize a flag that is set, it // should not attempt to write to the dataset. // // The flags are the same as for reader_feature_flags, although they will not // always apply to both. uint64 writer_feature_flags = 10; // The highest fragment ID that has been used so far. // // This ID is not guaranteed to be present in the current version, but it may // have been used in previous versions. // // For a single file, will be zero. uint32 max_fragment_id = 11; // Path to the transaction file, relative to `{root}/_transactions` // // This contains a serialized Transaction message representing the transaction // that created this version. // // May be empty if no transaction file was written. // // The path format is "{read_version}-{uuid}.txn" where {read_version} is the // version of the table the transaction read from, and {uuid} is a // hyphen-separated UUID. string transaction_file = 12; // The next unused row id. If zero, then the table does not have any rows. // // This is only used if the "move_stable_row_ids" feature flag is set. uint64 next_row_id = 14; } // Manifest // Auxiliary Data attached to a version. // Only load on-demand. message VersionAuxData { // key-value metadata. map metadata = 3; } // Metadata describing the index. message IndexMetadata { // Unique ID of an index. It is unique across all the dataset versions. UUID uuid = 1; // The columns to build the index. repeated int32 fields = 2; // Index name. Must be unique within one dataset version. string name = 3; // The version of the dataset this index was built from. uint64 dataset_version = 4; /// A bitmap of the included fragment ids. /// /// This may by used to determine how much of the dataset is covered by the /// index. This information can be retrieved from the dataset by looking at /// the dataset at `dataset_version`. However, since the old version may be /// deleted while the index is still in use, this information is also stored /// in the index. /// /// The bitmap is stored as a 32-bit Roaring bitmap. bytes fragment_bitmap = 5; } // Index Section, containing a list of index metadata for one dataset version. message IndexSection { repeated IndexMetadata indices = 1; } // Data fragment. A fragment is a set of files which represent the // different columns of the same rows. // If column exists in the schema, but the related file does not exist, // treat this column as nulls. message DataFragment { // Unique ID of each DataFragment uint64 id = 1; repeated DataFile files = 2; // File that indicates which rows, if any, should be considered deleted. DeletionFile deletion_file = 3; // TODO: What's the simplest way we can allow an inline tombstone bitmap? // A serialized RowIdSequence message (see rowids.proto). // // These are the row ids for the fragment, in order of the rows as they appear. // That is, if a fragment has 3 rows, and the row ids are [1, 42, 3], then the // first row is row 1, the second row is row 42, and the third row is row 3. oneof row_id_sequence { // If small (< 200KB), the row ids are stored inline. bytes inline_row_ids = 5; // Otherwise, stored as part of a file. ExternalFile external_row_ids = 6; } // row_id_sequence // Number of original rows in the fragment, this includes rows that are // now marked with deletion tombstones. To compute the current number of rows, // subtract `deletion_file.num_deleted_rows` from this value. uint64 physical_rows = 4; } // Lance Data File message DataFile { // Relative path to the root. string path = 1; // The ids of the fields/columns in this file. // // In Lance v1 IDs are assigned based on position in the file, offset by the max // existing field id in the table (if any already). So when a fragment is first // created with one file of N columns, the field ids will be 1, 2, ..., N. If a // second, fragment is created with M columns, the field ids will be N+1, N+2, // ..., N+M. // // In Lance v1 there is one field for each field in the input schema, this includes // nested fields (both struct and list). Fixed size list fields have only a single // field id (these are not considered nested fields in Lance v1). // // This allows column indices to be calculated from field IDs and the input schema. // // In Lance v2 the field IDs generally follow the same pattern but there is no // way to calculate the column index from the field ID. This is because a given // field could be encoded in many different ways, some of which occupy a different // number of columns. For example, a struct field could be encoded into N + 1 columns // or it could be encoded into a single packed column. To determine column indices // the column_indices property should be used instead. // // In Lance v1 these ids must be sorted but might not always be contiguous. repeated int32 fields = 2; // The top-level column indices for each field in the file. // // If the data file is version 1 then this property will be empty // // Otherwise there must be one entry for each field in `fields`. // // Some fields may not correspond to a top-level column in the file. In these cases // the index will -1. // // For example, consider the schema: // // - dimension: packed-struct (0): // - x: u32 (1) // - y: u32 (2) // - path: list (3) // - embedding: fsl<768> (4) // - fp64 // - borders: fsl<4> (5) // - simple-struct (6) // - margin: fp64 (7) // - padding: fp64 (8) // // One possible column indices array could be: // [0, -1, -1, 1, 3, 4, 5, 6, 7] // // This reflects quite a few phenomenon: // - The packed struct is encoded into a single column and there is no top-level column // for the x or y fields // - The variable sized list is encoded into two columns // - The embedding is encoded into a single column (common for FSL of primitive) and there // is not "FSL column" // - The borders field actually does have an "FSL column" // // The column indices table may not have duplicates (other than -1) repeated int32 column_indices = 3; // The major file version used to create the file uint32 file_major_version = 4; // The minor file version used to create the file // // If both `file_major_version` and `file_minor_version` are set to 0, // then this is a version 0.1 or version 0.2 file. uint32 file_minor_version = 5; } // DataFile // Deletion File // // The path of the deletion file is constructed as: // {root}/_deletions/{fragment_id}-{read_version}-{id}.{extension} // where {extension} is `.arrow` or `.bin` depending on the type of deletion. message DeletionFile { // Type of deletion file, which varies depending on what is the most efficient // way to store the deleted row offsets. If none, then will be unspecified. If there are // sparsely deleted rows, then ARROW_ARRAY is the most efficient. If there are // densely deleted rows, then BIT_MAP is the most efficient. enum DeletionFileType { // Deletion file is a single Int32Array of deleted row offsets. This is stored as // an Arrow IPC file with one batch and one column. Has a .arrow extension. ARROW_ARRAY = 0; // Deletion file is a Roaring Bitmap of deleted row offsets. Has a .bin extension. BITMAP = 1; } // Type of deletion file. If it is unspecified, then the remaining fields will be missing. DeletionFileType file_type = 1; // The version of the dataset this deletion file was built from. uint64 read_version = 2; // An opaque id used to differentiate this file from others written by concurrent // writers. uint64 id = 3; // The number of rows that are marked as deleted. uint64 num_deleted_rows = 4; } // DeletionFile message ExternalFile { // Path to the file, relative to the root of the table. string path = 1; // The offset in the file where the data starts. uint64 offset = 2; // The size of the data in the file. uint64 size = 3; }