#ifndef PARDENSEMATRIX_H #define PARDENSEMATRIX_H #include "MatrixDef.h" #include "DenseMatrix.h" #include "DenseVector.h" #include "MPI_Wrappers.h" #include "ATC_Error.h" using ATC::ATC_Error; #include #include namespace ATC_matrix { /** * @class ParDenseMatrix * @brief Parallelized version of DenseMatrix class. */ template class ParDenseMatrix : public DenseMatrix { public: MPI_Comm _comm; ParDenseMatrix(MPI_Comm comm, INDEX rows=0, INDEX cols=0, bool z=1) : DenseMatrix(rows, cols, z), _comm(comm) {} ParDenseMatrix(MPI_Comm comm, const DenseMatrix& c) : DenseMatrix(c), _comm(comm) {} ParDenseMatrix(MPI_Comm comm, const SparseMatrix& c) : DenseMatrix(c), _comm(comm) {} ParDenseMatrix(MPI_Comm comm, const Matrix& c) : DenseMatrix(c), _comm(comm) {} ////////////////////////////////////////////////////////////////////////////// //* performs a matrix-vector multiply void ParMultMv(const Vector &v, DenseVector &c, const bool At, T a, T b) { // We can't generically support parallel multiplication because the data // types must be specified when using MPI MultMv(*this, v, c, At, a, b); } }; template<> class ParDenseMatrix : public DenseMatrix { public: MPI_Comm _comm; ParDenseMatrix(MPI_Comm comm, INDEX rows=0, INDEX cols=0, bool z=1) : DenseMatrix(rows, cols, z), _comm(comm) {} ParDenseMatrix(MPI_Comm comm, const DenseMatrix& c) : DenseMatrix(c), _comm(comm) {} ParDenseMatrix(MPI_Comm comm, const SparseMatrix& c) : DenseMatrix(c), _comm(comm) {} ParDenseMatrix(MPI_Comm comm, const Matrix& c) : DenseMatrix(c), _comm(comm) {} void ParMultMv(const Vector &v, DenseVector &c, const bool At, double a, double b) const { // We don't support parallel vec-Mat multiplication yet if (At) { MultMv(*this, v, c, At, a, b); return; } const INDEX nRows = this->nRows(); const INDEX nCols = this->nCols(); if (c.size() != nRows) { c.resize(nRows); // set size of C c.zero(); // do not add result to C } else c *= b; // Determine how many rows will be handled on each processor int nProcs = MPI_Wrappers::size(_comm); int myRank = MPI_Wrappers::rank(_comm); int *majorCounts = new int[nProcs]; int *offsets = new int[nProcs]; #ifdef COL_STORAGE // Column-major storage int nMajor = nCols; int nMinor = nRows; int ParDenseMatrix::*majorField = &ParDenseMatrix::_nCols; int ParDenseMatrix::*minorField = &ParDenseMatrix::_nRows; #else // Row-major storage int nMajor = nRows; int nMinor = nCols; int ParDenseMatrix::*majorField = &ParDenseMatrix::_nRows; int ParDenseMatrix::*minorField = &ParDenseMatrix::_nCols; #endif for (int i = 0; i < nProcs; i++) { // If we have an uneven row-or-col/processors number, or too few rows // or cols, some processors will need to receive fewer rows/cols. offsets[i] = (i * nMajor) / nProcs; majorCounts[i] = (((i + 1) * nMajor) / nProcs) - offsets[i]; } int myNMajor = majorCounts[myRank]; int myMajorOffset = offsets[myRank]; // Take data from an offset version of A ParDenseMatrix A_local(_comm); A_local._data = this->_data + myMajorOffset * nMinor; A_local.*majorField = myNMajor; A_local.*minorField = nMinor; #ifdef COL_STORAGE // Column-major storage // When splitting by columns, we split the vector as well, and sum the // results. DenseVector v_local(myNMajor); for (int i = 0; i < myNMajor; i++) v_local(i) = v(myMajorOffset + i); // Store results in a local vector DenseVector c_local = A_local * v_local; // Sum all vectors onto each processor MPI_Wrappers::allsum(_comm, c_local.ptr(), c.ptr(), c_local.size()); #else // Row-major storage // When splitting by rows, we use the whole vector and concatenate the // results. // Store results in a small local vector DenseVector c_local(myNMajor); for (int i = 0; i < myNMajor; i++) c_local(i) = c(myMajorOffset + i); MultMv(A_local, v, c_local, At, a, b); // Gather the results onto each processor allgatherv(_comm, c_local.ptr(), c_local.size(), c.ptr(), majorCounts, offsets); #endif // Clear out the local matrix's pointer so we don't double-free A_local._data = NULL; delete [] majorCounts; delete [] offsets; } }; // Operator for dense Matrix - dense vector product template DenseVector operator*(const ParDenseMatrix &A, const Vector &b) { DenseVector c; A.ParMultMv(b, c, 0, 1.0, 0.0); return c; } } // end namespace #endif