#include #include #include #include #if THRUST_CPP_DIALECT >= 2011 #include #endif template struct reference { typedef T & type; }; template<> struct reference { typedef void type; }; struct unit {}; template struct tracked_pointer : thrust::iterator_facade< tracked_pointer, T, thrust::host_system_tag, thrust::random_access_traversal_tag, typename reference::type, std::ptrdiff_t > { typedef T * raw_pointer; std::size_t id; std::size_t size; std::size_t alignment; std::size_t offset; void * ptr; __host__ __device__ explicit tracked_pointer(T * ptr = NULL) : id(), size(), alignment(), offset(), ptr(ptr) { } __host__ __device__ ~tracked_pointer() { } template operator tracked_pointer() const { tracked_pointer ret; ret.id = id; ret.size = size; ret.alignment = alignment; ret.offset = offset; ret.ptr = ptr; return ret; } __host__ __device__ std::ptrdiff_t distance_to(const tracked_pointer & other) const { return static_cast(other.ptr) - static_cast(ptr); } __host__ __device__ T * get() const { return static_cast(ptr); } // globally qualified, because MSVC somehow prefers the name from the dependent base // of this class over the `reference` template that's visible in the global namespace of this file... __host__ __device__ typename ::reference::type dereference() const { return *get(); } __host__ __device__ void increment() { advance(1); } __host__ __device__ void decrement() { advance(-1); } __host__ __device__ void advance(std::ptrdiff_t diff) { ptr = get() + diff; offset += diff * sizeof(T); } __host__ __device__ bool equal(const tracked_pointer & other) const { return id == other.id && size == other.size && alignment == other.alignment && offset == other.offset && ptr == other.ptr; } }; class tracked_resource final : public thrust::mr::memory_resource > { public: tracked_resource() : id_to_allocate(0), id_to_deallocate(0) { } ~tracked_resource() { ASSERT_EQUAL(id_to_allocate, 0u); ASSERT_EQUAL(id_to_deallocate, 0u); } virtual tracked_pointer do_allocate(std::size_t n, std::size_t alignment = THRUST_MR_DEFAULT_ALIGNMENT) override { ASSERT_EQUAL(static_cast(id_to_allocate), true); void * raw = upstream.do_allocate(n, alignment); tracked_pointer ret(raw); ret.id = id_to_allocate; ret.size = n; ret.alignment = alignment; id_to_allocate = 0; return ret; } virtual void do_deallocate(tracked_pointer p, std::size_t n, std::size_t alignment = THRUST_MR_DEFAULT_ALIGNMENT) override { ASSERT_EQUAL(p.size, n); ASSERT_EQUAL(p.alignment, alignment); if (id_to_deallocate != 0) { ASSERT_EQUAL(p.id, id_to_deallocate); id_to_deallocate = 0; } upstream.do_deallocate(p.ptr, n, alignment); } std::size_t id_to_allocate; std::size_t id_to_deallocate; private: thrust::mr::new_delete_resource upstream; }; template class PoolTemplate> void TestPool() { tracked_resource upstream; upstream.id_to_allocate = -1u; typedef PoolTemplate< tracked_resource > Pool; thrust::mr::pool_options opts = Pool::get_default_options(); opts.cache_oversized = false; // avoid having the destructor run when an assertion failure is raised // (the destructor will try to release, which in turn calls do_deallocate, // which may fail with an assertion failure exception...) Pool * pool = new Pool(&upstream, opts); upstream.id_to_allocate = 1; // first allocation tracked_pointer a1 = pool->do_allocate(12, THRUST_MR_DEFAULT_ALIGNMENT); ASSERT_EQUAL(a1.id, 1u); // due to chunking, the above allocation should be enough for the next one too tracked_pointer a2 = pool->do_allocate(16, THRUST_MR_DEFAULT_ALIGNMENT); ASSERT_EQUAL(a2.id, 1u); // deallocating and allocating back should give the same resource back pool->do_deallocate(a1, 12, THRUST_MR_DEFAULT_ALIGNMENT); tracked_pointer a3 = pool->do_allocate(12, THRUST_MR_DEFAULT_ALIGNMENT); ASSERT_EQUAL(a1.id, a3.id); ASSERT_EQUAL(a1.size, a3.size); ASSERT_EQUAL(a1.alignment, a3.alignment); ASSERT_EQUAL(a1.offset, a3.offset); // allocating over-aligned memory should give non-cached results // unlike with the disjoint version, nothing sensible can be said about the chunk size upstream.id_to_allocate = 2; tracked_pointer a4 = pool->do_allocate(32, THRUST_MR_DEFAULT_ALIGNMENT * 2); ASSERT_EQUAL(a4.id, 2u); ASSERT_EQUAL(a4.alignment, (std::size_t)THRUST_MR_DEFAULT_ALIGNMENT * 2); // and deallocating it should return it back to upstream upstream.id_to_deallocate = 2; pool->do_deallocate(a4, 32u, THRUST_MR_DEFAULT_ALIGNMENT * 2); ASSERT_EQUAL(upstream.id_to_deallocate, 0u); // release actually returns properly sized memory to upstream upstream.id_to_deallocate = 1; pool->release(); ASSERT_EQUAL(upstream.id_to_deallocate, 0u); // and does the same for oversized/overaligned memory upstream.id_to_allocate = 3; tracked_pointer a5 = pool->do_allocate(1024, THRUST_MR_DEFAULT_ALIGNMENT * 2); ASSERT_EQUAL(upstream.id_to_allocate, 0u); ASSERT_EQUAL(a5.id, 3u); upstream.id_to_deallocate = 3; pool->release(); ASSERT_EQUAL(upstream.id_to_deallocate, 0u); // and after that, the formerly cached memory isn't used anymore, // so new memory from upstream is returned back upstream.id_to_allocate = 4; tracked_pointer a6 = pool->do_allocate(16, THRUST_MR_DEFAULT_ALIGNMENT); ASSERT_EQUAL(upstream.id_to_allocate, 0u); ASSERT_EQUAL(a6.id, 4u); // destruction also returns memory upstream.id_to_deallocate = 4; // actually destroy the pool; reasons why RAII is not used outlined at the beginning // of this function delete pool; ASSERT_EQUAL(upstream.id_to_deallocate, 0u); } void TestUnsynchronizedPool() { TestPool(); } DECLARE_UNITTEST(TestUnsynchronizedPool); #if THRUST_CPP_DIALECT >= 2011 void TestSynchronizedPool() { TestPool(); } DECLARE_UNITTEST(TestSynchronizedPool); #endif template class PoolTemplate> void TestPoolCachingOversized() { tracked_resource upstream; upstream.id_to_allocate = -1u; typedef PoolTemplate< tracked_resource > Pool; thrust::mr::pool_options opts = Pool::get_default_options(); opts.cache_oversized = true; opts.largest_block_size = 1024; Pool pool(&upstream, opts); upstream.id_to_allocate = 1; tracked_pointer a1 = pool.do_allocate(2048, 32); ASSERT_EQUAL(a1.id, 1u); upstream.id_to_allocate = 2; tracked_pointer a2 = pool.do_allocate(64, 32); ASSERT_EQUAL(a2.id, 2u); pool.do_deallocate(a2, 64, 32); pool.do_deallocate(a1, 2048, 32); // make sure a good fit is used from the cache tracked_pointer a3 = pool.do_allocate(32, 32); ASSERT_EQUAL(a3.id, 2u); tracked_pointer a4 = pool.do_allocate(1024, 32); ASSERT_EQUAL(a4.id, 1u); pool.do_deallocate(a4, 1024, 32); // make sure that a new block is allocated when there's nothing cached with // the required alignment upstream.id_to_allocate = 3; tracked_pointer a5 = pool.do_allocate(32, 64); ASSERT_EQUAL(a5.id, 3u); pool.release(); // make sure that release actually clears caches upstream.id_to_allocate = 4; tracked_pointer a6 = pool.do_allocate(32, 64); ASSERT_EQUAL(a6.id, 4u); upstream.id_to_allocate = 5; tracked_pointer a7 = pool.do_allocate(2048, 1024); ASSERT_EQUAL(a7.id, 5u); pool.do_deallocate(a7, 2048, 1024); // make sure that the 'ridiculousness' factor for size (options.cached_size_cutoff_factor) // is respected upstream.id_to_allocate = 6; tracked_pointer a8 = pool.do_allocate(24, 1024); ASSERT_EQUAL(a8.id, 6u); // make sure that the 'ridiculousness' factor for alignment (options.cached_alignment_cutoff_factor) // is respected upstream.id_to_allocate = 7; tracked_pointer a9 = pool.do_allocate(2048, 32); ASSERT_EQUAL(a9.id, 7u); } void TestUnsynchronizedPoolCachingOversized() { TestPoolCachingOversized(); } DECLARE_UNITTEST(TestUnsynchronizedPoolCachingOversized); #if THRUST_CPP_DIALECT >= 2011 void TestSynchronizedPoolCachingOversized() { TestPoolCachingOversized(); } DECLARE_UNITTEST(TestSynchronizedPoolCachingOversized); #endif template class PoolTemplate> void TestGlobalPool() { typedef PoolTemplate< thrust::mr::new_delete_resource > Pool; ASSERT_EQUAL(thrust::mr::get_global_resource() != NULL, true); } void TestUnsynchronizedGlobalPool() { TestGlobalPool(); } DECLARE_UNITTEST(TestUnsynchronizedGlobalPool); #if THRUST_CPP_DIALECT >= 2011 void TestSynchronizedGlobalPool() { TestGlobalPool(); } DECLARE_UNITTEST(TestSynchronizedGlobalPool); #endif