//------------------------------------------------------------------------------ // GB_AxB_saxpy5_A_bitmap.c: C+=A*B when C is full, A bitmap, B sparse/hyper //------------------------------------------------------------------------------ // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2023, All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //------------------------------------------------------------------------------ // C is full. // A is bitmap, and not iso-valued and not pattern-only. // B is sparse or hypersparse. // The monoid is identical to the accum op, and is not the ANY operator. // The type of A must match the multiply operator input. // The type of C must match the monoid/accum op. B can be typecasted for a // JIT kernel but not the FactoryKernels. // This method is used for both built-in semirings with no typecasting, in // the FactoryKernels, and the JIT kernels. #ifdef GB_GENERIC #error "saxpy5 generic kernel undefined" #endif { //-------------------------------------------------------------------------- // get C, A, and B //-------------------------------------------------------------------------- const int64_t m = C->vlen ; // # of rows of C and A const int8_t *restrict Ab = A->b ; const int64_t *restrict Bp = B->p ; const int64_t *restrict Bh = B->h ; const int64_t *restrict Bi = B->i ; #ifdef GB_JIT_KERNEL #define B_iso GB_B_ISO #else const bool B_iso = B->iso ; #endif const GB_A_TYPE *restrict Ax = (GB_A_TYPE *) A->x ; #if !GB_B_IS_PATTERN const GB_B_TYPE *restrict Bx = (GB_B_TYPE *) B->x ; #endif GB_C_TYPE *restrict Cx = (GB_C_TYPE *) C->x ; //-------------------------------------------------------------------------- // C += A*B where A is bitmap (and not iso or pattern-only) //-------------------------------------------------------------------------- int tid ; #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) for (tid = 0 ; tid < ntasks ; tid++) { // get the task descriptor const int64_t jB_start = B_slice [tid] ; const int64_t jB_end = B_slice [tid+1] ; // C(:,jB_start:jB_end-1) += A * B(:,jB_start:jB_end-1) for (int64_t jB = jB_start ; jB < jB_end ; jB++) { // get B(:,j) and C(:,j) const int64_t j = GBH_B (Bh, jB) ; const int64_t pC = j * m ; const int64_t pB_start = Bp [jB] ; const int64_t pB_end = Bp [jB+1] ; // C(:,j) += A*B(:,j) for (int64_t pB = pB_start ; pB < pB_end ; pB++) { // get B(k,j) const int64_t k = Bi [pB] ; GB_DECLAREB (bkj) ; GB_GETB (bkj, Bx, pB, B_iso) ; // get A(:,k) const int64_t pA = k * m ; // C(:,j) += A(:,k)*B(k,j) for (int64_t i = 0 ; i < m ; i++) { if (!Ab [pA+i]) continue ; // C(i,j) += A(i,k)*B(k,j) ; GB_MULTADD (Cx [pC + i], Ax [pA + i], bkj, i, k, j) ; } } } } }