// ------------------------------------------------------------- // cuDPP -- CUDA Data Parallel Primitives library // ------------------------------------------------------------- // $Revision: 3572$ // $Date: 2007-11-19 13:58:06 +0000 (Mon, 19 Nov 2007) $ // ------------------------------------------------------------- // This source code is distributed under the terms of license.txt // in the root directory of this source distribution. // ------------------------------------------------------------- #include "cudpp.h" #include "cudpp_plan_manager.h" #include "cudpp_scan.h" //#include "cudpp_segscan.h" //#include "cudpp_compact.h" //#include "cudpp_spmvmult.h" #include "cudpp_radixsort.h" #include CUDPPPlanManager* CUDPPPlanManager::m_instance = NULL; CUDPPResult validateOptions(CUDPPConfiguration config, size_t /*numElements*/, size_t numRows, size_t /*rowPitch*/) { CUDPPResult ret = CUDPP_SUCCESS; if ((config.options & CUDPP_OPTION_BACKWARD) && (config.options & CUDPP_OPTION_FORWARD)) ret = CUDPP_ERROR_ILLEGAL_CONFIGURATION; if ((config.options & CUDPP_OPTION_EXCLUSIVE) && (config.options & CUDPP_OPTION_INCLUSIVE)) ret = CUDPP_ERROR_ILLEGAL_CONFIGURATION; if (config.algorithm == CUDPP_COMPACT && numRows > 1) ret = CUDPP_ERROR_ILLEGAL_CONFIGURATION; //!< @todo: add support for multi-row cudppCompact return ret; } /** @addtogroup publicInterface * @{ */ /** @name Plan Interface * @{ */ /** @brief Create a CUDPP plan * * A plan is a data structure containing state and intermediate storage space * that CUDPP uses to execute algorithms on data. A plan is created by * passing to cudppPlan() a CUDPPConfiguration that specifies the algorithm, * operator, datatype, and options. The size of the data must also be passed * to cudppPlan(), in the \a numElements, \a numRows, and \a rowPitch * arguments. These sizes are used to allocate internal storage space at the * time the plan is created. The CUDPP planner may use the sizes, options, * and information about the present hardware to choose optimal settings. * * Note that \a numElements is the maximum size of the array to be processed * with this plan. That means that a plan may be re-used to process (for * example, to sort or scan) smaller arrays. * * @param[out] planHandle A pointer to an opaque handle to the internal plan * @param[in] config The configuration struct specifying algorithm and options * @param[in] numElements The maximum number of elements to be processed * @param[in] numRows The number of rows (for 2D operations) to be processed * @param[in] rowPitch The pitch of the rows of input data, in elements */ CUDPP_DLL CUDPPResult cudppPlan(CUDPPHandle *planHandle, CUDPPConfiguration config, size_t numElements, size_t numRows, size_t rowPitch) { CUDPPResult result = CUDPP_SUCCESS; CUDPPPlan *plan; result = validateOptions(config, numElements, numRows, rowPitch); if (result != CUDPP_SUCCESS) { *planHandle = CUDPP_INVALID_HANDLE; return result; } switch (config.algorithm) { case CUDPP_SCAN: { plan = new CUDPPScanPlan(config, numElements, numRows, rowPitch); break; } // case CUDPP_COMPACT: // { // plan = new CUDPPCompactPlan(config, numElements, numRows, rowPitch); // break; // } case CUDPP_SORT_RADIX: //case CUDPP_SORT_RADIX_GLOBAL: { plan = new CUDPPRadixSortPlan(config, numElements); break; } /* case CUDPP_SEGMENTED_SCAN: { plan = new CUDPPSegmentedScanPlan(config, numElements); break; } //new rand plan case CUDPP_RAND_MD5: { plan = new CUDPPRandPlan(config, numElements); break; } case CUDPP_REDUCE:*/ default: //! @todo: implement cudppReduce() return CUDPP_ERROR_ILLEGAL_CONFIGURATION; break; } *planHandle = CUDPPPlanManager::AddPlan(plan); if (CUDPP_INVALID_HANDLE == *planHandle) return CUDPP_ERROR_UNKNOWN; else return CUDPP_SUCCESS; } /** @brief Destroy a CUDPP Plan * * Deletes the plan referred to by \a planHandle and all associated internal * storage. * * @param[in] planHandle The CUDPPHandle to the plan to be destroyed */ CUDPP_DLL CUDPPResult cudppDestroyPlan(CUDPPHandle planHandle) { if (CUDPPPlanManager::RemovePlan(planHandle) == false) return CUDPP_ERROR_INVALID_HANDLE; else return CUDPP_SUCCESS; } /** @brief Create a CUDPP Sparse Matrix Object * * The sparse matrix plan is a data structure containing state and intermediate storage space * that CUDPP uses to perform sparse matrix dense vector multiply. This plan is created by * passing to CUDPPSparseMatrixVectorMultiplyPlan() a CUDPPConfiguration that specifies the * algorithm (sprarse matrix-dense vector multiply) and datatype, along with the sparse matrix * itself in CSR format. The number of non-zero elements in the sparse matrix must also be passed * as \a numNonZeroElements. This is used to allocate internal storage space at the time the * sparse matrix plan is created. * * @param[out] sparseMatrixHandle A pointer to an opaque handle to the sparse matrix object * @param[in] config The configuration struct specifying algorithm and options * @param[in] numNonZeroElements The number of non zero elements in the sparse matrix * @param[in] numRows This is the number of rows in y, x and A for y = A * x * @param[in] A The matrix data * @param[in] h_rowIndices An array containing the index of the start of each row in \a A * @param[in] h_indices An array containing the index of each nonzero element in \a A CUDPP_DLL CUDPPResult cudppSparseMatrix(CUDPPHandle *sparseMatrixHandle, CUDPPConfiguration config, size_t numNonZeroElements, size_t numRows, const void *A, const unsigned int *h_rowIndices, const unsigned int *h_indices) { CUDPPResult result = CUDPP_SUCCESS; CUDPPPlan *sparseMatrix; if ((config.algorithm != CUDPP_SPMVMULT) || (numNonZeroElements <= 0) || (numRows <= 0)) { result = CUDPP_ERROR_ILLEGAL_CONFIGURATION; } if (result != CUDPP_SUCCESS) { *sparseMatrixHandle = CUDPP_INVALID_HANDLE; return result; } sparseMatrix = new CUDPPSparseMatrixVectorMultiplyPlan(config, numNonZeroElements, A, h_rowIndices, h_indices, numRows); *sparseMatrixHandle = CUDPPPlanManager::AddPlan(sparseMatrix); if (CUDPP_INVALID_HANDLE == *sparseMatrixHandle) return CUDPP_ERROR_UNKNOWN; else return CUDPP_SUCCESS; } */ /** @brief Destroy a CUDPP Sparse Matrix Object * * Deletes the sparse matrix data and plan referred to by \a sparseMatrixHandle * and all associated internal storage. * * @param[in] sparseMatrixHandle The CUDPPHandle to the matrix object to be destroyed CUDPP_DLL CUDPPResult cudppDestroySparseMatrix(CUDPPHandle sparseMatrixHandle) { return cudppDestroyPlan(sparseMatrixHandle); } */ /** @} */ // end Plan Interface /** @} */ // end publicInterface /** @brief Plan base class constructor * * @param[in] config The configuration struct specifying algorithm and options * @param[in] numElements The maximum number of elements to be processed * @param[in] numRows The number of rows (for 2D operations) to be processed * @param[in] rowPitch The pitch of the rows of input data, in elements */ CUDPPPlan::CUDPPPlan(CUDPPConfiguration config, size_t numElements, size_t numRows, size_t rowPitch) : m_config(config), m_numElements(numElements), m_numRows(numRows), m_rowPitch(rowPitch) { } /** @brief Scan Plan constructor * * @param[in] config The configuration struct specifying algorithm and options * @param[in] numElements The maximum number of elements to be scanned * @param[in] numRows The maximum number of rows (for 2D operations) to be scanned * @param[in] rowPitch The pitch of the rows of input data, in elements */ CUDPPScanPlan::CUDPPScanPlan(CUDPPConfiguration config, size_t numElements, size_t numRows, size_t rowPitch) : CUDPPPlan(config, numElements, numRows, rowPitch), m_blockSums(0), m_rowPitches(0), m_numEltsAllocated(0), m_numRowsAllocated(0), m_numLevelsAllocated(0) { allocScanStorage(this); } /** @brief CUDPP scan plan destructor */ CUDPPScanPlan::~CUDPPScanPlan() { freeScanStorage(this); } /** @brief SegmentedScan Plan constructor * * @param[in] config The configuration struct specifying options * @param[in] numElements The maximum number of elements to be scanned CUDPPSegmentedScanPlan::CUDPPSegmentedScanPlan(CUDPPConfiguration config, size_t numElements) : CUDPPPlan(config, numElements, 1, 0), m_blockSums(0), m_blockFlags(0), m_blockIndices(0), m_numEltsAllocated(0), m_numLevelsAllocated(0) { allocSegmentedScanStorage(this); } */ /** @brief SegmentedScan plan destructor CUDPPSegmentedScanPlan::~CUDPPSegmentedScanPlan() { freeSegmentedScanStorage(this); } */ /** @brief Compact Plan constructor * * @param[in] config The configuration struct specifying options * @param[in] numElements The maximum number of elements to be compacted * @param[in] numRows The number of rows (for 2D operations) to be compacted * @param[in] rowPitch The pitch of the rows of input data, in elements CUDPPCompactPlan::CUDPPCompactPlan(CUDPPConfiguration config, size_t numElements, size_t numRows, size_t rowPitch) : CUDPPPlan(config, numElements, numRows, rowPitch), m_d_outputIndices(0) { assert(numRows == 1); //!< @todo Add support for multirow compaction CUDPPConfiguration scanConfig = { CUDPP_SCAN, CUDPP_ADD, CUDPP_UINT, (config.options & CUDPP_OPTION_BACKWARD) ? CUDPP_OPTION_BACKWARD | CUDPP_OPTION_EXCLUSIVE : CUDPP_OPTION_FORWARD | CUDPP_OPTION_EXCLUSIVE }; m_scanPlan = new CUDPPScanPlan(scanConfig, numElements, numRows, rowPitch); allocCompactStorage(this); } */ /** @brief Compact plan destructor CUDPPCompactPlan::~CUDPPCompactPlan() { delete m_scanPlan; freeCompactStorage(this); } */ /** @brief Sort Plan constructor * * @param[in] config The configuration struct specifying algorithm and options * @param[in] numElements The maximum number of elements to be sorted */ /*CUDPPSortPlan::CUDPPSortPlan(CUDPPConfiguration config, size_t numElements) : CUDPPPlan(config, numElements, 1, 0), m_scanPlan(0), m_d_temp(0), m_d_tempAddress(0) { CUDPPConfiguration scanConfig = { CUDPP_SCAN, CUDPP_ADD, CUDPP_UINT, CUDPP_OPTION_FORWARD | CUDPP_OPTION_EXCLUSIVE }; //if (config.algorithm == CUDPP_SORT_RADIX_GLOBAL) { m_scanPlan = new CUDPPScanPlan(scanConfig, numElements, 1, 0); } allocSortStorage(this); }*/ /** @brief Sort plan destructor */ /*CUDPPSortPlan::~CUDPPSortPlan() { delete m_scanPlan; freeSortStorage(this); }*/ CUDPPRadixSortPlan::CUDPPRadixSortPlan(CUDPPConfiguration config, size_t numElements) : CUDPPPlan(config, numElements, 1, 0), m_scanPlan(0), m_tempKeys(0), m_tempValues(0), m_counters(0), m_countersSum(0), m_blockOffsets(0) { size_t numBlocks2 = ((numElements % (SORT_CTA_SIZE * 2)) == 0) ? (numElements / (SORT_CTA_SIZE * 2)) : (numElements / (SORT_CTA_SIZE * 2) + 1); CUDPPConfiguration scanConfig = { CUDPP_SCAN, CUDPP_ADD, CUDPP_UINT, CUDPP_OPTION_FORWARD | CUDPP_OPTION_EXCLUSIVE }; if(m_config.options == CUDPP_OPTION_KEYS_ONLY) m_bKeysOnly = true; else m_bKeysOnly = false; m_scanPlan = new CUDPPScanPlan(scanConfig, numBlocks2*16, 1, 0); allocRadixSortStorage(this); } CUDPPRadixSortPlan::~CUDPPRadixSortPlan() { delete m_scanPlan; freeRadixSortStorage(this); } /** @brief SparseMatrixVectorMultiply Plan constructor * * @param[in] config The configuration struct specifying options * @param[in] numNonZeroElements The number of non-zero elements in sparse matrix * @param[in] A Array of non-zero matrix elements * @param[in] rowIndex Array of indices of the first element of each row * in the "flattened" version of the sparse matrix * @param[in] index Array of indices of non-zero elements in the matrix * @param[in] numRows The number of rows in the sparse matrix CUDPPSparseMatrixVectorMultiplyPlan::CUDPPSparseMatrixVectorMultiplyPlan( CUDPPConfiguration config, size_t numNonZeroElements, const void *A, const unsigned int *rowIndex, const unsigned int *index, size_t numRows ) : CUDPPPlan(config, numNonZeroElements, 1, 0), m_segmentedScanPlan(0), m_d_prod(0), m_d_flags(0), m_d_rowFinalIndex(0), m_rowFinalIndex(0), m_numRows(numRows), m_numNonZeroElements(numNonZeroElements) { CUDPPConfiguration segScanConfig = { CUDPP_SEGMENTED_SCAN, CUDPP_ADD, config.datatype, (CUDPP_OPTION_FORWARD | CUDPP_OPTION_INCLUSIVE) }; m_segmentedScanPlan = new CUDPPSegmentedScanPlan(segScanConfig, m_numNonZeroElements); // Generate an array of the indices of the last element of each row // in the "flattened" version of the sparse matrix m_rowFinalIndex = new unsigned int [m_numRows]; for (unsigned int i=0; i < m_numRows; ++i) { if (i < m_numRows-1) m_rowFinalIndex[i] = rowIndex[i+1]; else m_rowFinalIndex[i] = (unsigned int)numNonZeroElements; } allocSparseMatrixVectorMultiplyStorage(this, A, rowIndex, index); } */ /** @brief Sparse matrix-vector plan destructor CUDPPSparseMatrixVectorMultiplyPlan::~CUDPPSparseMatrixVectorMultiplyPlan() { freeSparseMatrixVectorMultiplyStorage(this); delete m_segmentedScanPlan; delete [] m_rowFinalIndex; } */ /** @brief CUDPP Rand Plan Constructor * @param[in] config The configuration struct specifying options * @param[in] num_elements The number of elements to generate random bits for CUDPPRandPlan::CUDPPRandPlan(CUDPPConfiguration config, size_t num_elements) : CUDPPPlan(config, num_elements, 1, 0), m_seed(0) { } */