//------------------------------------------------------------------------------ // GB_ek_slice_kernels.h: slice the entries and vectors of a matrix //------------------------------------------------------------------------------ // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2023, All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //------------------------------------------------------------------------------ #ifndef GB_EK_SLICE_KERNELS_H #define GB_EK_SLICE_KERNELS_H //------------------------------------------------------------------------------ // GB_ek_slice_ntasks: determine # of threads and tasks to use for GB_ek_slice //------------------------------------------------------------------------------ static inline void GB_ek_slice_ntasks ( // output int *nthreads, // # of threads to use for GB_ek_slice int *ntasks, // # of tasks to create for GB_ek_slice // input int64_t anz_held, // GB_nnz_held(A) of the matrix to slice int ntasks_per_thread, // # of tasks per thread double work, // total work to do double chunk, // give each thread at least this much work int nthreads_max // max # of threads to use ) { if (anz_held == 0) { (*nthreads) = 1 ; (*ntasks) = 1 ; } else { (*nthreads) = GB_nthreads (work, chunk, nthreads_max) ; (*ntasks) = (*nthreads == 1) ? 1 : ((ntasks_per_thread) * (*nthreads)) ; (*ntasks) = GB_IMIN (*ntasks, anz_held) ; (*ntasks) = GB_IMAX (*ntasks, 1) ; } } //------------------------------------------------------------------------------ // GB_SLICE_MATRIX: slice a single matrix using GB_ek_slice //------------------------------------------------------------------------------ // chunk and nthreads_max must already be defined. #define GB_SLICE_MATRIX_WORK(X,ntasks_per_thread,work,xnz_held) \ GB_ek_slice_ntasks (&(X ## _nthreads), &(X ## _ntasks), xnz_held, \ ntasks_per_thread, work, chunk, nthreads_max) ; \ GB_WERK_PUSH (X ## _ek_slicing, 3*(X ## _ntasks)+1, int64_t) ; \ if (X ## _ek_slicing == NULL) \ { \ /* out of memory */ \ GB_FREE_ALL ; \ return (GrB_OUT_OF_MEMORY) ; \ } \ GB_ek_slice (X ## _ek_slicing, X, X ## _ntasks) ; \ const int64_t *kfirst_ ## X ## slice = X ## _ek_slicing ; \ const int64_t *klast_ ## X ## slice = X ## _ek_slicing + X ## _ntasks ; \ const int64_t *pstart_ ## X ## slice = X ## _ek_slicing + X ## _ntasks*2 ; #define GB_SLICE_MATRIX(X,ntasks_per_thread) \ const int64_t X ## _held = GB_nnz_held (X) ; \ const double X ## _wrk = X ## _held + X->nvec ; \ GB_SLICE_MATRIX_WORK (X, ntasks_per_thread, X ## _wrk, X ## _held) //------------------------------------------------------------------------------ // GB_GET_PA_AND_PC: find the part of A(:,k) and C(:,k) for this task //------------------------------------------------------------------------------ // The tasks were generated by GB_ek_slice. // as a macro, where p0, p1, and p2 are first obtained as above: // p0 = GBP_A (Ap, k, avlen) ; // p1 = GBP_A (Ap, k+1, avlen) ; // p2 = GBP (Cp, k, cvlen) ; #define GB_GET_PA_AND_PC(pA_start,pA_end,pC,tid,k,kfirst,klast,pstart_slice,Cp_kfirst,p0,p1,p2) \ int64_t pA_start, pA_end, pC ; \ if (k == kfirst) \ { \ /* First vector for task tid; may only be partially owned. */ \ pA_start = pstart_slice [tid] ; \ pA_end = GB_IMIN (p1, pstart_slice [tid+1]) ; \ pC = Cp_kfirst [tid] ; \ } \ else if (k == klast) \ { \ /* Last vector for task tid; may only be partially owned. */ \ pA_start = p0 ; \ pA_end = pstart_slice [tid+1] ; \ pC = p2 ; \ } \ else \ { \ /* task tid entirely owns this vector A(:,k). */ \ pA_start = p0 ; \ pA_end = p1 ; \ pC = p2 ; \ } //------------------------------------------------------------------------------ // GB_GET_PA: find the part of A(:,k) to be operated on by this task //------------------------------------------------------------------------------ // The tasks were generated by GB_ek_slice. // as a macro, where p0 and p1 are first obtained as above: // p0 = GBP_A (Ap, k, avlen) ; // p1 = GBP_A (Ap, k+1, avlen) ; #define GB_GET_PA(pA_start,pA_end,tid,k,kfirst,klast,pstart_slice,p0,p1) \ int64_t pA_start, pA_end ; \ if (k == kfirst) \ { \ /* First vector for task tid; may only be partially owned. */ \ pA_start = pstart_slice [tid] ; \ pA_end = GB_IMIN (p1, pstart_slice [tid+1]) ; \ } \ else if (k == klast) \ { \ /* Last vector for task tid; may only be partially owned. */ \ pA_start = p0 ; \ pA_end = pstart_slice [tid+1] ; \ } \ else \ { \ /* task tid entirely owns this vector A(:,k). */ \ pA_start = p0 ; \ pA_end = p1 ; \ } #endif