#ifndef OSRM_INDEXED_DATA_HPP #define OSRM_INDEXED_DATA_HPP #include "storage/tar_fwd.hpp" #include "util/exception.hpp" #include "util/string_view.hpp" #include "util/vector_view.hpp" #include #include #include #include #include #include #include namespace osrm { namespace util { namespace detail { template struct IndexedDataImpl; } namespace serialization { template inline void read(storage::tar::FileReader &reader, const std::string &name, detail::IndexedDataImpl &index_data); template inline void write(storage::tar::FileWriter &writer, const std::string &name, const detail::IndexedDataImpl &index_data); } template struct VariableGroupBlock { static constexpr std::uint32_t BLOCK_SIZE = N; using ResultType = T; using ValueType = typename T::value_type; static_assert(0 <= BLOCK_SIZE && BLOCK_SIZE <= 16, "incorrect block size"); static_assert(sizeof(ValueType) == 1, "data basic type must char"); struct BlockReference { std::uint32_t offset; std::uint32_t descriptor; }; VariableGroupBlock() {} /// Returns ceiling(log_256(value + 1)) inline std::uint32_t log256(std::uint32_t value) const { BOOST_ASSERT(value < 0x1000000); return value == 0 ? 0 : value < 0x100 ? 1 : value < 0x10000 ? 2 : 3; } /// Advance data iterator by the value of byte_length bytes at length iterator. /// Advance length iterator by byte_length. template inline void var_advance(DataIterator &data, DataIterator &length, std::uint32_t byte_length) const { if (byte_length == 0) { } else if (byte_length == 1) { data += static_cast(*length++); } else if (byte_length == 2) { data += static_cast(*length++); data += static_cast(*length++) << 8; } else { BOOST_ASSERT(byte_length == 3); data += static_cast(*length++); data += static_cast(*length++) << 8; data += static_cast(*length++) << 16; } } /// Summation of 16 2-bit values using SWAR inline std::uint32_t sum2bits(std::uint32_t value) const { value = (value >> 2 & 0x33333333) + (value & 0x33333333); value = (value >> 4 & 0x0f0f0f0f) + (value & 0x0f0f0f0f); value = (value >> 8 & 0x00ff00ff) + (value & 0x00ff00ff); return (value >> 16 & 0x0000ffff) + (value & 0x0000ffff); } /// Write a block reference {offset, descriptor}, where offset /// is a global block offset and descriptor is a 32-bit value /// of prefix length. sum(descriptor) equals to the block /// prefix length. /// Returns the block prefix length. template OutIter WriteBlockReference(OffsetIterator first, OffsetIterator last, Offset &data_offset, OutIter out) const { BOOST_ASSERT(data_offset <= std::numeric_limits::max()); Offset prefix_length = 0; BlockReference refernce{static_cast(data_offset), 0}; for (; first != last; --last) { const std::uint32_t data_length = *last - *std::prev(last); if (data_length >= 0x1000000) throw util::exception(boost::format("too large data length %1%") % data_length); const std::uint32_t byte_length = log256(data_length); refernce.descriptor = (refernce.descriptor << 2) | byte_length; prefix_length += byte_length; } data_offset += prefix_length; *out++ = refernce; return out; } /// Write a block prefix that is an array of variable encoded data lengths: /// 0 is omitted; /// 1..255 is 1 byte; /// 256..65535 is 2 bytes; /// 65536..16777215 is 3 bytes. /// [first..last] is an inclusive range of block data. /// The length of the last item in the block is not stored. template OutByteIter WriteBlockPrefix(OffsetIterator first, OffsetIterator last, OutByteIter out) const { for (OffsetIterator curr = first, next = std::next(first); curr != last; ++curr, ++next) { const std::uint32_t data_length = *next - *curr; const std::uint32_t byte_length = log256(data_length); if (byte_length == 0) continue; // Here, we're only writing a few bytes from the 4-byte std::uint32_t, // so we need to cast to (char *) out = std::copy_n((const char *)&data_length, byte_length, out); } return out; } /// Advances the range to an item stored in the referenced block. /// Input [first..last) is a range of the complete block data with prefix. /// Output [first..last) is a range of the referenced data at local_index. template void ReadRefrencedBlock(const BlockReference &reference, std::uint32_t local_index, DataIterator &first, DataIterator &last) const { std::uint32_t descriptor = reference.descriptor; DataIterator var_lengths = first; // iterator to the variable lengths part std::advance(first, sum2bits(descriptor)); // advance first to the block data part for (std::uint32_t i = 0; i < local_index; ++i, descriptor >>= 2) { var_advance(first, var_lengths, descriptor & 0x3); } if (local_index < BLOCK_SIZE) { last = first; var_advance(last, var_lengths, descriptor & 0x3); } } }; template struct FixedGroupBlock { static constexpr std::uint32_t BLOCK_SIZE = N; using ResultType = T; using ValueType = typename T::value_type; static_assert(sizeof(ValueType) == 1, "data basic type must char"); struct BlockReference { std::uint32_t offset; }; FixedGroupBlock() {} /// Write a block reference {offset}, where offset is a global block offset /// Returns the fixed block prefix length. template OutIterator WriteBlockReference(OffsetIterator, OffsetIterator, Offset &data_offset, OutIterator out) const { BOOST_ASSERT(data_offset <= std::numeric_limits::max()); BlockReference refernce{static_cast(data_offset)}; data_offset += BLOCK_SIZE; *out++ = refernce; return out; } /// Write a fixed length block prefix. template OutByteIter WriteBlockPrefix(OffsetIterator first, OffsetIterator last, OutByteIter out) const { constexpr std::size_t MAX_LENGTH = std::numeric_limits>::max(); auto index = 0; std::array prefix; for (OffsetIterator curr = first, next = std::next(first); curr != last; ++curr, ++next) { const std::uint32_t data_length = *next - *curr; if (data_length > MAX_LENGTH) throw util::exception(boost::format("too large data length %1% > %2%") % data_length % MAX_LENGTH); prefix[index++] = data_length; } out = std::copy_n((const char *)prefix.data(), sizeof(ValueType) * BLOCK_SIZE, out); return out; } /// Advances the range to an item stored in the referenced block. /// Input [first..last) is a range of the complete block data with prefix. /// Output [first..last) is a range of the referenced data at local_index. template void ReadRefrencedBlock(const BlockReference &, std::uint32_t local_index, DataIterator &first, DataIterator &last) const { DataIterator fixed_lengths = first; // iterator to the fixed lengths part std::advance(first, BLOCK_SIZE); // advance first to the block data part for (std::uint32_t i = 0; i < local_index; ++i) { first += static_cast(*fixed_lengths++); } if (local_index < BLOCK_SIZE) { last = first + static_cast(*fixed_lengths); } } }; namespace detail { template struct IndexedDataImpl { static constexpr std::uint32_t BLOCK_SIZE = GroupBlockPolicy::BLOCK_SIZE; using BlocksNumberType = std::uint32_t; using DataSizeType = std::uint64_t; using BlockReference = typename GroupBlockPolicy::BlockReference; using ResultType = typename GroupBlockPolicy::ResultType; using ValueType = typename GroupBlockPolicy::ValueType; static_assert(sizeof(ValueType) == 1, "data basic type must char"); IndexedDataImpl() = default; IndexedDataImpl(util::vector_view blocks_, util::vector_view values_) : blocks(std::move(blocks_)), values(std::move(values_)) { } bool empty() const { return blocks.empty(); } template IndexedDataImpl(OffsetIterator first, OffsetIterator last, DataIterator data) { static_assert(sizeof(typename DataIterator::value_type) == 1, "data basic type must char"); using diff_type = typename OffsetIterator::difference_type; BOOST_ASSERT(first < last); const OffsetIterator sentinel = std::prev(last); // Write number of blocks const auto number_of_elements = std::distance(first, sentinel); const BlocksNumberType number_of_blocks = number_of_elements == 0 ? 0 : 1 + (std::distance(first, sentinel) - 1) / (BLOCK_SIZE + 1); blocks.resize(number_of_blocks); // Write block references and compute the total data size that includes prefix and data const GroupBlockPolicy block; auto block_iter = blocks.begin(); DataSizeType data_size = 0; for (OffsetIterator curr = first, next = first; next != sentinel; curr = next) { std::advance(next, std::min(BLOCK_SIZE, std::distance(next, sentinel))); block_iter = block.WriteBlockReference(curr, next, data_size, block_iter); std::advance(next, std::min(1, std::distance(next, sentinel))); data_size += *next - *curr; } values.resize(data_size); auto values_byte_iter = reinterpret_cast(values.data()); // Write data blocks that are (prefix, data) for (OffsetIterator curr = first, next = first; next != sentinel; curr = next) { std::advance(next, std::min(BLOCK_SIZE, std::distance(next, sentinel))); values_byte_iter = block.WriteBlockPrefix(curr, next, values_byte_iter); std::advance(next, std::min(1, std::distance(next, sentinel))); auto to_bytes = [&](const auto &data) { values_byte_iter = std::copy_n(&data, sizeof(ValueType), values_byte_iter); }; std::copy(data + *curr, data + *next, boost::make_function_output_iterator(std::cref(to_bytes))); } } // Return value at the given index ResultType at(std::uint32_t index) const { // Get block external ad internal indices const BlocksNumberType block_idx = index / (BLOCK_SIZE + 1); const std::uint32_t internal_idx = index % (BLOCK_SIZE + 1); if (block_idx >= blocks.size()) return ResultType(); // Get block first and last iterators auto first = values.begin() + blocks[block_idx].offset; auto last = block_idx + 1 == blocks.size() ? values.end() : values.begin() + blocks[block_idx + 1].offset; const GroupBlockPolicy block; block.ReadRefrencedBlock(blocks[block_idx], internal_idx, first, last); return adapt(&*first, &*last); } friend void serialization::read(storage::tar::FileReader &reader, const std::string &name, IndexedDataImpl &index_data); friend void serialization::write(storage::tar::FileWriter &writer, const std::string &name, const IndexedDataImpl &index_data); private: template typename std::enable_if::value, T>::type adapt(const ValueType *first, const ValueType *last) const { return ResultType(first, last); } template typename std::enable_if::value, T>::type adapt(const ValueType *first, const ValueType *last) const { return ResultType(first, std::distance(first, last)); } template using Vector = util::ViewOrVector; Vector blocks; Vector values; }; } template using IndexedData = detail::IndexedDataImpl; template using IndexedDataView = detail::IndexedDataImpl; } } #endif // OSRM_INDEXED_DATA_HPP