// Ceres Solver - A fast non-linear least squares minimizer // Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are met: // // * Redistributions of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // * Neither the name of Google Inc. nor the names of its contributors may be // used to endorse or promote products derived from this software without // specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE // POSSIBILITY OF SUCH DAMAGE. // // Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin) #include "ceres/cuda_block_sparse_crs_view.h" #include #include #include #ifndef CERES_NO_CUDA namespace ceres::internal { class CudaBlockSparseCRSViewTest : public ::testing::Test { protected: void SetUp() final { std::string message; CHECK(context_.InitCuda(&message)) << "InitCuda() failed because: " << message; BlockSparseMatrix::RandomMatrixOptions options; options.num_row_blocks = 1234; options.min_row_block_size = 1; options.max_row_block_size = 10; options.num_col_blocks = 567; options.min_col_block_size = 1; options.max_col_block_size = 10; options.block_density = 0.2; std::mt19937 rng; // Block-sparse matrix with order of values different from CRS block_sparse_non_crs_compatible_ = BlockSparseMatrix::CreateRandomMatrix(options, rng, true); std::iota(block_sparse_non_crs_compatible_->mutable_values(), block_sparse_non_crs_compatible_->mutable_values() + block_sparse_non_crs_compatible_->num_nonzeros(), 1); options.max_row_block_size = 1; // Block-sparse matrix with CRS order of values (row-blocks are rows) block_sparse_crs_compatible_rows_ = BlockSparseMatrix::CreateRandomMatrix(options, rng, true); std::iota(block_sparse_crs_compatible_rows_->mutable_values(), block_sparse_crs_compatible_rows_->mutable_values() + block_sparse_crs_compatible_rows_->num_nonzeros(), 1); // Block-sparse matrix with CRS order of values (single cell per row-block) auto bs = std::make_unique( *block_sparse_non_crs_compatible_->block_structure()); int num_nonzeros = 0; for (auto& r : bs->rows) { const int num_cells = r.cells.size(); if (num_cells > 1) { std::uniform_int_distribution uniform_cell(0, num_cells - 1); const int selected_cell = uniform_cell(rng); std::swap(r.cells[0], r.cells[selected_cell]); r.cells.resize(1); } const int row_block_size = r.block.size; for (auto& c : r.cells) { c.position = num_nonzeros; const int col_block_size = bs->cols[c.block_id].size; num_nonzeros += col_block_size * row_block_size; } } block_sparse_crs_compatible_single_cell_ = std::make_unique(bs.release()); std::iota(block_sparse_crs_compatible_single_cell_->mutable_values(), block_sparse_crs_compatible_single_cell_->mutable_values() + block_sparse_crs_compatible_single_cell_->num_nonzeros(), 1); } void Compare(const BlockSparseMatrix& bsm, const CudaSparseMatrix& csm) { ASSERT_EQ(csm.num_cols(), bsm.num_cols()); ASSERT_EQ(csm.num_rows(), bsm.num_rows()); ASSERT_EQ(csm.num_nonzeros(), bsm.num_nonzeros()); const int num_rows = bsm.num_rows(); const int num_cols = bsm.num_cols(); Vector x(num_cols); Vector y(num_rows); CudaVector x_cuda(&context_, num_cols); CudaVector y_cuda(&context_, num_rows); Vector y_cuda_host(num_rows); for (int i = 0; i < num_cols; ++i) { x.setZero(); y.setZero(); y_cuda.SetZero(); x[i] = 1.; x_cuda.CopyFromCpu(x); csm.RightMultiplyAndAccumulate(x_cuda, &y_cuda); bsm.RightMultiplyAndAccumulate( x.data(), y.data(), &context_, std::thread::hardware_concurrency()); y_cuda.CopyTo(&y_cuda_host); // There will be up to 1 non-zero product per row, thus we expect an exact // match on 32-bit integer indices EXPECT_EQ((y - y_cuda_host).squaredNorm(), 0.); } } std::unique_ptr block_sparse_non_crs_compatible_; std::unique_ptr block_sparse_crs_compatible_rows_; std::unique_ptr block_sparse_crs_compatible_single_cell_; ContextImpl context_; }; TEST_F(CudaBlockSparseCRSViewTest, CreateUpdateValuesNonCompatible) { auto view = CudaBlockSparseCRSView(*block_sparse_non_crs_compatible_, &context_); ASSERT_EQ(view.IsCrsCompatible(), false); auto matrix = view.crs_matrix(); Compare(*block_sparse_non_crs_compatible_, *matrix); } TEST_F(CudaBlockSparseCRSViewTest, CreateUpdateValuesCompatibleRows) { auto view = CudaBlockSparseCRSView(*block_sparse_crs_compatible_rows_, &context_); ASSERT_EQ(view.IsCrsCompatible(), true); auto matrix = view.crs_matrix(); Compare(*block_sparse_crs_compatible_rows_, *matrix); } TEST_F(CudaBlockSparseCRSViewTest, CreateUpdateValuesCompatibleSingleCell) { auto view = CudaBlockSparseCRSView(*block_sparse_crs_compatible_single_cell_, &context_); ASSERT_EQ(view.IsCrsCompatible(), true); auto matrix = view.crs_matrix(); Compare(*block_sparse_crs_compatible_single_cell_, *matrix); } } // namespace ceres::internal #endif // CERES_NO_CUDA