/* * Copyright 2021 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #pragma once #include #include CUB_NAMESPACE_BEGIN namespace detail { namespace temporary_storage { class slot; template class alias; template class layout; /** * @brief Temporary storage slot that can be considered a C++ union with an * arbitrary fields count. * * @warning slot lifetime is defined by the lifetime of the associated layout. * It's impossible to request new array if layout is already mapped. * * @par A Simple Example * @code * auto slot = temporary_storage.get_slot(0); * * // Add fields into the slot * // Create an int alias with 0 elements: * auto int_array = slot->create_alias(); * // Create a double alias with 2 elements: * auto double_array = slot->create_alias(2); * // Create a char alias with 0 elements: * auto empty_array = slot->create_alias(); * // Slot size is defined by double_array size (2 * sizeof(double)) * * if (condition) * { * int_array.grow(42); * // Now slot size is defined by int_array size (42 * sizeof(int)) * } * * // Temporary storage mapping * // ... * int *d_int_array = int_array.get(); * double *d_double_array = double_array.get(); * char *d_empty_array = empty_array.get(); // Guaranteed to return nullptr * @endcode */ class slot { std::size_t m_size{}; void *m_pointer{}; public: slot() = default; /** * @brief Returns an array of type @p T and length @p elements */ template __host__ __device__ alias create_alias(std::size_t elements = 0); private: __host__ __device__ void set_bytes_required(std::size_t new_size) { m_size = (max)(m_size, new_size); } __host__ __device__ std::size_t get_bytes_required() const { return m_size; } __host__ __device__ void set_storage(void *ptr) { m_pointer = ptr; } __host__ __device__ void *get_storage() const { return m_pointer; } template friend class alias; template friend class layout; }; /** * @brief Named memory region of a temporary storage slot * * @par Overview * This class provides a typed wrapper of a temporary slot memory region. * It can be considered as a field in the C++ union. It's only possible to * increase the array size. * * @warning alias lifetime is defined by the lifetime of the associated slot * It's impossible to grow the array if the layout is already mapped. */ template class alias { slot &m_slot; std::size_t m_elements{}; __host__ __device__ explicit alias(slot &slot, std::size_t elements = 0) : m_slot(slot) , m_elements(elements) { this->update_slot(); } __host__ __device__ void update_slot() { m_slot.set_bytes_required(m_elements * sizeof(T)); } public: alias() = delete; /** * @brief Increases the number of elements * * @warning * This method should be called before temporary storage mapping stage. * * @param[in] new_elements Increases the memory region occupied in the * temporary slot to fit up to @p new_elements items * of type @p T. */ __host__ __device__ void grow(std::size_t new_elements) { m_elements = new_elements; this->update_slot(); } /** * @brief Returns pointer to array * * If the @p elements number is equal to zero, or storage layout isn't mapped, * @p nullptr is returned. */ __host__ __device__ T *get() const { if (m_elements == 0) { return nullptr; } return reinterpret_cast(m_slot.get_storage()); } friend class slot; }; template __host__ __device__ alias slot::create_alias(std::size_t elements) { return alias(*this, elements); } /** * @brief Temporary storage layout represents a structure with * @p SlotsCount union-like fields * * The layout can be mapped to a temporary buffer only once. * * @par A Simple Example * @code * cub::detail::temporary_storage::layout<3> temporary_storage; * * auto slot_1 = temporary_storage.get_slot(0); * auto slot_2 = temporary_storage.get_slot(1); * * // Add fields into the first slot * auto int_array = slot_1->create_alias(1); * auto double_array = slot_1->create_alias(2); * * // Add fields into the second slot * auto char_array = slot_2->create_alias(); * * // The equivalent C++ structure could look like * // struct StorageLayout * // { * // union { * // } slot_0; * // std::byte padding_0[256 - sizeof (slot_0)]; * // * // union { * // int alias_0[1]; * // double alias_1[2]; * // } slot_1; * // std::byte padding_1[256 - sizeof (slot_1)]; * // * // union { * // char alias_0[0]; * // } slot_2; * // std::byte padding_2[256 - sizeof (slot_2)]; * // }; * * // The third slot is empty * * // Temporary storage mapping * if (d_temp_storage == nullptr) * { * temp_storage_bytes = temporary_storage.get_size(); * return; * } * else * { * temporary_storage.map_to_buffer(d_temp_storage, temp_storage_bytes); * } * * // Use pointers * int *d_int_array = int_array.get(); * double *d_double_array = double_array.get(); * char *d_char_array = char_array.get(); * @endcode */ template class layout { slot m_slots[SlotsCount]; std::size_t m_sizes[SlotsCount]; void *m_pointers[SlotsCount]; bool m_layout_was_mapped {}; public: layout() = default; __host__ __device__ slot *get_slot(int slot_id) { if (slot_id < SlotsCount) { return &m_slots[slot_id]; } return nullptr; } /** * @brief Returns required temporary storage size in bytes */ __host__ __device__ std::size_t get_size() { this->prepare_interface(); // AliasTemporaries can return error only in mapping stage, // so it's safe to ignore it here. std::size_t temp_storage_bytes{}; AliasTemporaries(nullptr, temp_storage_bytes, m_pointers, m_sizes); if (temp_storage_bytes == 0) { // The current CUB convention implies that there are two stages for each // device-scope function call. The first one returns the required storage // size. The second stage consumes temporary storage to perform some work. // The only way to distinguish between the two stages is by checking the // value of the temporary storage pointer. If zero bytes are requested, // `cudaMalloc` will return `nullptr`. This fact makes it impossible to // distinguish between the two stages, so we request some fixed amount of // bytes (even if we don't need it) to have a non-null temporary storage // pointer. return 1; } return temp_storage_bytes; } /** * @brief Maps the layout to the temporary storage buffer. */ __host__ __device__ cudaError_t map_to_buffer(void *d_temp_storage, std::size_t temp_storage_bytes) { if (m_layout_was_mapped) { return cudaErrorAlreadyMapped; } this->prepare_interface(); cudaError_t error = cudaSuccess; if ((error = AliasTemporaries(d_temp_storage, temp_storage_bytes, m_pointers, m_sizes))) { return error; } for (std::size_t slot_id = 0; slot_id < SlotsCount; slot_id++) { m_slots[slot_id].set_storage(m_pointers[slot_id]); } m_layout_was_mapped = true; return error; } private: __host__ __device__ void prepare_interface() { if (m_layout_was_mapped) { return; } for (std::size_t slot_id = 0; slot_id < SlotsCount; slot_id++) { const std::size_t slot_size = m_slots[slot_id].get_bytes_required(); m_sizes[slot_id] = slot_size; m_pointers[slot_id] = nullptr; } } }; } // namespace temporary_storage } // namespace detail CUB_NAMESPACE_END