// SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright The Lance Authors syntax = "proto3"; package lance.table; // TODO: what would it take to store this in a LanceV2 file? // Or would flatbuffers be better for this? /// A sequence of row IDs. This is split up into one or more segments, /// each of which can be encoded in different ways. The encodings are optimized /// for values that are sorted, which will often be the case with row ids. /// They also have optimized forms depending on how sparse the values are. message RowIdSequence { repeated U64Segment segments = 1; } /// Different ways to encode a sequence of u64 values. message U64Segment { /// A range of u64 values. message Range { /// The start of the range, inclusive. uint64 start = 1; /// The end of the range, exclusive. uint64 end = 2; } /// A range of u64 values with holes. message RangeWithHoles { /// The start of the range, inclusive. uint64 start = 1; /// The end of the range, exclusive. uint64 end = 2; /// The holes in the range, as a sorted array of values; /// Binary search can be used to check whether a value is a hole and should /// be skipped. This can also be used to count the number of holes before a /// given value, if you need to find the logical offset of a value in the /// segment. EncodedU64Array holes = 3; } /// A range of u64 values with a bitmap. message RangeWithBitmap { /// The start of the range, inclusive. uint64 start = 1; /// The end of the range, exclusive. uint64 end = 2; /// A bitmap of the values in the range. The bitmap is a sequence of bytes, /// where each byte represents 8 values. The first byte represents values /// start to start + 7, the second byte represents values start + 8 to /// start + 15, and so on. The most significant bit of each byte represents /// the first value in the range, and the least significant bit represents /// the last value in the range. If the bit is set, the value is in the /// range; if it is not set, the value is not in the range. bytes bitmap = 3; } oneof segment { /// When the values are sorted and contiguous. Range range = 1; /// When the values are sorted but have a few gaps. RangeWithHoles range_with_holes = 2; /// When the values are sorted but have many gaps. RangeWithBitmap range_with_bitmap = 3; /// When the values are sorted but are sparse. EncodedU64Array sorted_array = 4; /// A general array of values, which is not sorted. EncodedU64Array array = 5; } } // RowIdSegment /// A basic bitpacked array of u64 values. message EncodedU64Array { message U16Array { uint64 base = 1; /// The deltas are stored as 16-bit unsigned integers. /// (protobuf doesn't support 16-bit integers, so we use bytes instead) bytes offsets = 2; } message U32Array { uint64 base = 1; /// The deltas are stored as 32-bit unsigned integers. /// (we use bytes instead of uint32 to avoid overhead of varint encoding) bytes offsets = 2; } message U64Array { /// (We use bytes instead of uint64 to avoid overhead of varint encoding) bytes values = 2; } oneof array { U16Array u16_array = 1; U32Array u32_array = 2; U64Array u64_array = 3; } }