//------------------------------------------------------------------------------ // GraphBLAS/CUDA/GB_cuda_AxB_dot3_branch: decide to use GPU for dot3 //------------------------------------------------------------------------------ // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2024, All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //------------------------------------------------------------------------------ // Decide branch direction for GPU use for the dot-product C=A'*B #include "GB_cuda.hpp" #include bool GB_cuda_AxB_dot3_branch ( const GrB_Matrix M, // mask matrix const bool Mask_struct, // if true, use the only structure of M const GrB_Matrix A, // input matrix const GrB_Matrix B, // input matrix const GrB_Semiring semiring, // semiring that defines C=A*B const bool flipxy // if true, do z=fmult(b,a) vs fmult(a,b) ) { if (!GB_cuda_type_branch (A->type) || !GB_cuda_type_branch (B->type) || !GB_cuda_type_branch (semiring->multiply->xtype) || !GB_cuda_type_branch (semiring->multiply->ytype) || !GB_cuda_type_branch (semiring->multiply->ztype)) { // one or more types are not yet supported on the GPU return (false) ; } if (A->vlen == 0) { // C has no entries: no need to compute it on the GPU return (false) ; } // very rough estimate of the work to do double adeg = ((double) GB_nnz (A)) / ((double) GB_IMAX (1, A->nvec)) ; double bdeg = ((double) GB_nnz (B)) / ((double) GB_IMAX (1, B->nvec)) ; double work = GB_nnz (M) * GB_IMIN (adeg, bdeg) ; int ngpus_to_use = GB_ngpus_to_use (work) ; GBURBLE (" work:%g GPUs:%d ", work, ngpus_to_use) ; if (ngpus_to_use > 0) { // FIXME: or do this in GB_AxB_dot3_cuda // int gpu_id = GB_Context_gpu_id_get ( ) ; // cudaSetDevice (gpu_id) ; return true ; } else { // std::cout << "Not using cuda path for dot3." << std::endl; return false ; } }