// Copyright 2020 Google LLC // SPDX-License-Identifier: Apache-2.0 // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "hwy/aligned_allocator.h" #include #include #include // malloc #include #include #include #include #include "gtest/gtest.h" #include "hwy/base.h" #include "hwy/per_target.h" namespace { // Sample object that keeps track on an external counter of how many times was // the explicit constructor and destructor called. template class SampleObject { public: SampleObject() { data_[0] = 'a'; } explicit SampleObject(int* counter) : counter_(counter) { if (counter) (*counter)++; data_[0] = 'b'; } ~SampleObject() { if (counter_) (*counter_)--; } static_assert(N > sizeof(int*), "SampleObject size too small."); int* counter_ = nullptr; char data_[N - sizeof(int*)]; }; class FakeAllocator { public: // static AllocPtr and FreePtr member to be used with the aligned // allocator. These functions calls the private non-static members. static void* StaticAlloc(void* opaque, size_t bytes) { return reinterpret_cast(opaque)->Alloc(bytes); } static void StaticFree(void* opaque, void* memory) { return reinterpret_cast(opaque)->Free(memory); } // Returns the number of pending allocations to be freed. size_t PendingAllocs() { return allocs_.size(); } private: void* Alloc(size_t bytes) { void* ret = malloc(bytes); allocs_.insert(ret); return ret; } void Free(void* memory) { if (!memory) return; EXPECT_NE(allocs_.end(), allocs_.find(memory)); allocs_.erase(memory); free(memory); } std::set allocs_; }; } // namespace namespace hwy { class AlignedAllocatorTest : public testing::Test {}; TEST(AlignedAllocatorTest, FreeNullptr) { // Calling free with a nullptr is always ok. FreeAlignedBytes(/*aligned_pointer=*/nullptr, /*free_ptr=*/nullptr, /*opaque_ptr=*/nullptr); } TEST(AlignedAllocatorTest, Log2) { EXPECT_EQ(0u, detail::ShiftCount(1)); EXPECT_EQ(1u, detail::ShiftCount(2)); EXPECT_EQ(3u, detail::ShiftCount(8)); } // Allocator returns null when it detects overflow of items * sizeof(T). TEST(AlignedAllocatorTest, Overflow) { constexpr size_t max = ~size_t(0); constexpr size_t msb = (max >> 1) + 1; using Size5 = std::array; using Size10 = std::array; EXPECT_EQ(nullptr, detail::AllocateAlignedItems(max / 2, nullptr, nullptr)); EXPECT_EQ(nullptr, detail::AllocateAlignedItems(max / 3, nullptr, nullptr)); EXPECT_EQ(nullptr, detail::AllocateAlignedItems(max / 4, nullptr, nullptr)); EXPECT_EQ(nullptr, detail::AllocateAlignedItems(msb, nullptr, nullptr)); EXPECT_EQ(nullptr, detail::AllocateAlignedItems(msb + 1, nullptr, nullptr)); EXPECT_EQ(nullptr, detail::AllocateAlignedItems(msb / 4, nullptr, nullptr)); } TEST(AlignedAllocatorTest, AllocDefaultPointers) { const size_t kSize = 7777; void* ptr = AllocateAlignedBytes(kSize, /*alloc_ptr=*/nullptr, /*opaque_ptr=*/nullptr); ASSERT_NE(nullptr, ptr); // Make sure the pointer is actually aligned. EXPECT_EQ(0U, reinterpret_cast(ptr) % HWY_ALIGNMENT); char* p = static_cast(ptr); size_t ret = 0; for (size_t i = 0; i < kSize; i++) { // Performs a computation using p[] to prevent it being optimized away. p[i] = static_cast(i & 0x7F); if (i) ret += static_cast(p[i] * p[i - 1]); } EXPECT_NE(0U, ret); FreeAlignedBytes(ptr, /*free_ptr=*/nullptr, /*opaque_ptr=*/nullptr); } TEST(AlignedAllocatorTest, EmptyAlignedUniquePtr) { AlignedUniquePtr> ptr(nullptr, AlignedDeleter()); AlignedUniquePtr[]> arr(nullptr, AlignedDeleter()); } TEST(AlignedAllocatorTest, EmptyAlignedFreeUniquePtr) { AlignedFreeUniquePtr> ptr(nullptr, AlignedFreer()); AlignedFreeUniquePtr[]> arr(nullptr, AlignedFreer()); } TEST(AlignedAllocatorTest, CustomAlloc) { FakeAllocator fake_alloc; const size_t kSize = 7777; void* ptr = AllocateAlignedBytes(kSize, &FakeAllocator::StaticAlloc, &fake_alloc); ASSERT_NE(nullptr, ptr); // We should have only requested one alloc from the allocator. EXPECT_EQ(1U, fake_alloc.PendingAllocs()); // Make sure the pointer is actually aligned. EXPECT_EQ(0U, reinterpret_cast(ptr) % HWY_ALIGNMENT); FreeAlignedBytes(ptr, &FakeAllocator::StaticFree, &fake_alloc); EXPECT_EQ(0U, fake_alloc.PendingAllocs()); } TEST(AlignedAllocatorTest, MakeUniqueAlignedDefaultConstructor) { { auto ptr = MakeUniqueAligned>(); // Default constructor sets the data_[0] to 'a'. EXPECT_EQ('a', ptr->data_[0]); EXPECT_EQ(nullptr, ptr->counter_); } } TEST(AlignedAllocatorTest, MakeUniqueAligned) { int counter = 0; { // Creates the object, initializes it with the explicit constructor and // returns an unique_ptr to it. auto ptr = MakeUniqueAligned>(&counter); EXPECT_EQ(1, counter); // Custom constructor sets the data_[0] to 'b'. EXPECT_EQ('b', ptr->data_[0]); } EXPECT_EQ(0, counter); } TEST(AlignedAllocatorTest, MakeUniqueAlignedArray) { int counter = 0; { // Creates the array of objects and initializes them with the explicit // constructor. auto arr = MakeUniqueAlignedArray>(7, &counter); EXPECT_EQ(7, counter); for (size_t i = 0; i < 7; i++) { // Custom constructor sets the data_[0] to 'b'. EXPECT_EQ('b', arr[i].data_[0]) << "Where i = " << i; } } EXPECT_EQ(0, counter); } TEST(AlignedAllocatorTest, AllocSingleInt) { auto ptr = AllocateAligned(1); ASSERT_NE(nullptr, ptr.get()); EXPECT_EQ(0U, reinterpret_cast(ptr.get()) % HWY_ALIGNMENT); // Force delete of the unique_ptr now to check that it doesn't crash. ptr.reset(nullptr); EXPECT_EQ(nullptr, ptr.get()); } TEST(AlignedAllocatorTest, AllocMultipleInt) { const size_t kSize = 7777; auto ptr = AllocateAligned(kSize); ASSERT_NE(nullptr, ptr.get()); EXPECT_EQ(0U, reinterpret_cast(ptr.get()) % HWY_ALIGNMENT); // ptr[i] is actually (*ptr.get())[i] which will use the operator[] of the // underlying type chosen by AllocateAligned() for the std::unique_ptr. EXPECT_EQ(&(ptr[0]) + 1, &(ptr[1])); size_t ret = 0; for (size_t i = 0; i < kSize; i++) { // Performs a computation using ptr[] to prevent it being optimized away. ptr[i] = static_cast(i); if (i) ret += static_cast(ptr[i]) * ptr[i - 1]; } EXPECT_NE(0U, ret); } TEST(AlignedAllocatorTest, AllocateAlignedObjectWithoutDestructor) { int counter = 0; { // This doesn't call the constructor. auto obj = AllocateAligned>(1); obj[0].counter_ = &counter; } // Destroying the unique_ptr shouldn't have called the destructor of the // SampleObject<24>. EXPECT_EQ(0, counter); } TEST(AlignedAllocatorTest, MakeUniqueAlignedArrayWithCustomAlloc) { FakeAllocator fake_alloc; int counter = 0; { // Creates the array of objects and initializes them with the explicit // constructor. auto arr = MakeUniqueAlignedArrayWithAlloc>( 7, FakeAllocator::StaticAlloc, FakeAllocator::StaticFree, &fake_alloc, &counter); ASSERT_NE(nullptr, arr.get()); // An array should still only call a single allocation. EXPECT_EQ(1u, fake_alloc.PendingAllocs()); EXPECT_EQ(7, counter); for (size_t i = 0; i < 7; i++) { // Custom constructor sets the data_[0] to 'b'. EXPECT_EQ('b', arr[i].data_[0]) << "Where i = " << i; } } EXPECT_EQ(0, counter); EXPECT_EQ(0u, fake_alloc.PendingAllocs()); } TEST(AlignedAllocatorTest, DefaultInit) { // The test is whether this compiles. Default-init is useful for output params // and per-thread storage. std::vector> ptrs; std::vector> free_ptrs; ptrs.resize(128); free_ptrs.resize(128); // The following is to prevent elision of the pointers. std::mt19937 rng(129); // Emscripten lacks random_device. std::uniform_int_distribution dist(0, 127); ptrs[dist(rng)] = MakeUniqueAlignedArray(123); free_ptrs[dist(rng)] = AllocateAligned(456); // "Use" pointer without resorting to printf. 0 == 0. Can't shift by 64. const auto addr1 = reinterpret_cast(ptrs[dist(rng)].get()); const auto addr2 = reinterpret_cast(free_ptrs[dist(rng)].get()); constexpr size_t kBits = sizeof(uintptr_t) * 8; EXPECT_EQ((addr1 >> (kBits - 1)) >> (kBits - 1), (addr2 >> (kBits - 1)) >> (kBits - 1)); } namespace { using std::array; using std::vector; template void CheckEqual(const T& t1, const T& t2) { EXPECT_EQ(t1.size(), t2.size()); for (size_t i = 0; i < t1.size(); i++) { EXPECT_EQ(t1[i], t2[i]) << "i=" << i; } } template void CheckEqual(const AlignedNDArray& a, const vector& v) { const array want_shape({v.size()}); const array got_shape = a.shape(); CheckEqual(got_shape, want_shape); Span a_span = a[{}]; EXPECT_EQ(a_span.size(), v.size()); for (size_t i = 0; i < a_span.size(); i++) { EXPECT_EQ(a_span[i], v[i]) << "i=" << i; EXPECT_EQ(*(a_span.data() + i), v[i]) << "i=" << i; } } template void CheckEqual(const AlignedNDArray& a, const vector>& v) { const array want_shape({v.size(), v[1].size()}); for (const vector& row : v) { EXPECT_EQ(row.size(), want_shape[1]); } const std::array got_shape = a.shape(); CheckEqual(got_shape, want_shape); EXPECT_EQ(a.size(), want_shape[0] * want_shape[1]); for (size_t row_index = 0; row_index < v.size(); ++row_index) { vector want_row = v[row_index]; Span got_row = a[{row_index}]; EXPECT_EQ(got_row.size(), want_row.size()) << "row_index=" << row_index; for (size_t column_index = 0; column_index < got_row.size(); column_index++) { EXPECT_EQ(a[{row_index}][column_index], want_row[column_index]) << "row_index=" << row_index << ", column_index=" << column_index; EXPECT_EQ(got_row[column_index], want_row[column_index]) << "row_index=" << row_index << ", column_index=" << column_index; EXPECT_EQ(*(a[{row_index}].data() + column_index), want_row[column_index]) << "row_index=" << row_index << ", column_index=" << column_index; } } } TEST(AlignedAllocatorTest, AlignedNDArray) { AlignedNDArray a1({4}); CheckEqual(a1, {0, 0, 0, 0}); a1[{}][2] = 3.4f; CheckEqual(a1, {0, 0, 3.4f, 0}); AlignedNDArray a2({2, 3}); CheckEqual(a2, {{0, 0, 0}, {0, 0, 0}}); a2[{1}][1] = 5.1f; CheckEqual(a2, {{0, 0, 0}, {0, 5.1f, 0}}); float f0[] = {1.0f, 2.0f, 3.0f}; float f1[] = {4.0f, 5.0f, 6.0f}; hwy::CopyBytes(f0, a2[{0}].data(), 3 * sizeof(float)); hwy::CopyBytes(f1, a2[{1}].data(), 3 * sizeof(float)); CheckEqual(a2, {{1.0f, 2.0f, 3.0f}, {4.0f, 5.0f, 6.0f}}); } // Tests that each innermost row in an AlignedNDArray is aligned to the max // bytes available for SIMD operations on this architecture. TEST(AlignedAllocatorTest, AlignedNDArrayAlignment) { AlignedNDArray a({3, 3, 3, 3}); for (size_t d0 = 0; d0 < a.shape()[0]; d0++) { for (size_t d1 = 0; d1 < a.shape()[1]; d1++) { for (size_t d2 = 0; d2 < a.shape()[2]; d2++) { // Check that the address this innermost array starts at is an even // number of VectorBytes(), which is the max bytes available for SIMD // operations. EXPECT_EQ( reinterpret_cast(a[{d0, d1, d2}].data()) % VectorBytes(), 0); } } } } TEST(AlignedAllocatorTest, SpanCopyAssignment) { AlignedNDArray a({2, 2}); CheckEqual(a, {{0.0f, 0.0f}, {0.0f, 0.0f}}); a[{0}] = {1.0f, 2.0f}; a[{1}] = {3.0f, 4.0f}; CheckEqual(a, {{1.0f, 2.0f}, {3.0f, 4.0f}}); } TEST(AlignedAllocatorTest, AlignedNDArrayTruncate) { AlignedNDArray a({8, 8, 8, 8}); const size_t last_axis_memory_shape = a.memory_shape()[3]; const auto compute_value = [&](const std::array& index) { return index[0] * 8 * 8 * 8 + index[1] * 8 * 8 + index[2] * 8 * 8 + index[3]; }; for (size_t axis0 = 0; axis0 < a.shape()[0]; ++axis0) { for (size_t axis1 = 0; axis1 < a.shape()[1]; ++axis1) { for (size_t axis2 = 0; axis2 < a.shape()[2]; ++axis2) { for (size_t axis3 = 0; axis3 < a.shape()[3]; ++axis3) { a[{axis0, axis1, axis2}][axis3] = compute_value({axis0, axis1, axis2, axis3}); } } } } const auto verify_values = [&](const AlignedNDArray& array) { for (size_t axis0 = 0; axis0 < array.shape()[0]; ++axis0) { for (size_t axis1 = 0; axis1 < array.shape()[1]; ++axis1) { for (size_t axis2 = 0; axis2 < array.shape()[2]; ++axis2) { for (size_t axis3 = 0; axis3 < array.shape()[3]; ++axis3) { EXPECT_EQ((array[{axis0, axis1, axis2}][axis3]), (compute_value({axis0, axis1, axis2, axis3}))); } } } } }; a.truncate({7, 7, 7, 7}); EXPECT_EQ(a.shape()[0], 7); EXPECT_EQ(a.shape()[1], 7); EXPECT_EQ(a.shape()[2], 7); EXPECT_EQ(a.shape()[3], 7); EXPECT_EQ(a.memory_shape()[0], 8); EXPECT_EQ(a.memory_shape()[1], 8); EXPECT_EQ(a.memory_shape()[2], 8); EXPECT_EQ(a.memory_shape()[3], last_axis_memory_shape); verify_values(a); a.truncate({6, 5, 4, 3}); EXPECT_EQ(a.shape()[0], 6); EXPECT_EQ(a.shape()[1], 5); EXPECT_EQ(a.shape()[2], 4); EXPECT_EQ(a.shape()[3], 3); EXPECT_EQ(a.memory_shape()[0], 8); EXPECT_EQ(a.memory_shape()[1], 8); EXPECT_EQ(a.memory_shape()[2], 8); EXPECT_EQ(a.memory_shape()[3], last_axis_memory_shape); verify_values(a); } } // namespace } // namespace hwy