/** * @file dd_compressor.h * * @section LICENSE * * The MIT License * * @copyright Copyright (c) 2017-2020 TileDB, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * * @section DESCRIPTION * * This file defines the double delta compressor class. */ #ifndef TILEDB_DOUBLE_DELTA_H #define TILEDB_DOUBLE_DELTA_H #include "tiledb/common/status.h" using namespace tiledb::common; namespace tiledb { namespace sm { class Buffer; class ConstBuffer; class PreallocatedBuffer; enum class Datatype : uint8_t; /** Implements a double delta compressor. */ class DoubleDelta { public: /** * Constant overhead (equal to 1 byte for the bitsize, 8 bytes for * the number of cells, and 8 bytes for a potential extra 64-bit chunk). */ static const uint64_t OVERHEAD; /* ****************************** */ /* API */ /* ****************************** */ /** * Compression function. The algorithm works as follows: * * Let the input buffer contain the following values: * * in_0 | in_1 | in_2 | ... | in_n * * The output buffer will contain the following after compression: * * bitsize | n | in_0 | in_1 | b_2 | abs(dd_2) | b_3 | abs(dd_3) | ... * ... | b_n | abs(dd_n) * * where: * - *bitsize* (char) is the minimum number of bits required to represent * any abs(dd_i). * - *n* (uint64_t) is the number of values in the input buffer. * - **b_i** is the sign of dd_i * - **dd_i** is equal to (in_{i} - in_{i-1}) - (in_{i-1} - in_{i-2}) * * In case the bitsize is equal to the size of the data type plus one (for * the sign), then it does not make sense to compress and, thus, the * algorithm simply copies the input to the output, though after *bitsize* * and *n* that are always written to the output buffer. * * The algorithm populates and writes to the output buffer a 64-bit chunk * at a time. Therefore, the output buffer may end up having a worst-case * overhead of 1 (bitsize) + 8 (n) + 8 (last, potentially almost empty chunk) * bytes. * * @param type The type of the input values. * @param input_buffer Input buffer to read from. * @param output_buffer Output buffer to write to the compressed data. * @return Status * * @note The function will fail with an error in two cases: (i) the output * buffer incurs a memory allocation error, and (ii) some double delta * value is out of bounds. Note that all double deltas are represented * as int64_t. Therefore, the out of bounds case occurs when adding * two huge positive numbers (e.g., INT64_MAX) resulting in a negative * value, or adding to very small negative numbers (e.g., -INT64_MAX), * resulting in a positive number. However, both these two cases are * extreme. */ static Status compress( Datatype type, ConstBuffer* input_buffer, Buffer* output_buffer); /** * Decompression function. * * @param type The type of the original decompressed values. * @param input_buffer Input buffer to read from. * @param output_buffer Output buffer to write the decompressed data to. * @return Status */ static Status decompress( Datatype type, ConstBuffer* input_buffer, PreallocatedBuffer* output_buffer); /** Returns the compression overhead for the given input. */ static uint64_t overhead(uint64_t nbytes); private: /* ****************************** */ /* PRIVATE METHODS */ /* ****************************** */ /** Templated version of *compress* on the type of buffer values. */ template static Status compress(ConstBuffer* input_buffer, Buffer* output_buffer); /** * Calculates the bitsize all the double deltas will have. Note that * the sign bit is not counted. * * @tparam The datatype of the values. * @param in The input buffer. * @param num The number of values in the buffer. * @param bitsize The bitsize of the double deltas to be retrieved. * @return Status */ template static Status compute_bitsize(T* in, uint64_t num, unsigned int* bitsize); /** * Decompression function. * * @tparam The datatype of the values. * @param input_buffer Input buffer to read from. * @param output_buffer Output buffer to write the decompressed data to. * @return Status */ template static Status decompress( ConstBuffer* input_buffer, PreallocatedBuffer* output_buffer); /** * Reads/reconstructs a double delta value from a compressed buffer. * * @param buff The input buffer. * @param double_delta The double delta value to be retrieved. * @param bitsize The bitsize of the double delta compression. * @param chunk The function reads from the buffer in whole 64-bit chunks. * This parameters represents some state about the current chunk. * @param bit_in_chunk Part of the chunk state. Stores the current bit * in the chunk the next read will start from. * @return Status */ static Status read_double_delta( ConstBuffer* buff, int64_t* double_delta, int bitsize, uint64_t* chunk, int* bit_in_chunk); /** * Writes a double delta value to a buffer, after reducing its bitsize * to match the input one. * * @param buff The output buffer. * @param double_delta The double delta value to write. * @param bitsize The bitsize the double delta value will reduce to * before being written to the buffer. * @param chunk The function writes to the buffer in whole 64-bit chunks. * This parameters represents some state about the current chunk. * @param bit_in_chunk Part of the chunk state. Stores the current bit * in the chunk the next write will start from. * @return Status */ static Status write_double_delta( Buffer* buff, int64_t double_delta, int bitsize, uint64_t* chunk, int* bit_in_chunk); }; } // namespace sm } // namespace tiledb #endif // TILEDB_GZIP_H