//------------------------------------------------------------------------------ // GB_emult_02_phase1: C = A.*B where A is sparse/hyper and B is bitmap/full //------------------------------------------------------------------------------ // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2023, All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //------------------------------------------------------------------------------ // JIT: not needed, but could use one for each mask type. // Symbolic analysis phase for GB_emult_02 and GB_emult_03. #define GB_FREE_ALL ; #include "GB_ewise.h" #include "GB_emult.h" #include "GB_binop.h" #include "GB_stringify.h" GrB_Info GB_emult_02_phase1 // symbolic analysis for GB_emult_02 and GB_emult_03 ( // input/output: GrB_Matrix C, // input: const bool C_iso, const GrB_Matrix M, const bool Mask_struct, const bool Mask_comp, const GrB_Matrix A, const GrB_Matrix B, const int64_t *restrict A_ek_slicing, const int A_ntasks, const int A_nthreads, // workspace: int64_t *restrict Wfirst, int64_t *restrict Wlast, // output: int64_t *Cp_kfirst, GB_Werk Werk ) { //-------------------------------------------------------------------------- // get C, M, A, and B //-------------------------------------------------------------------------- GrB_Info info ; const int8_t *restrict Mb = (M == NULL) ? NULL : M->b ; const GB_M_TYPE *restrict Mx = (M == NULL || Mask_struct) ? NULL : (const GB_M_TYPE *) M->x ; const size_t msize = (M == NULL) ? 0 : M->type->size ; const int64_t *restrict Ap = A->p ; const int64_t *restrict Ah = A->h ; const int64_t *restrict Ai = A->i ; const int64_t vlen = A->vlen ; const int64_t nvec = A->nvec ; const int64_t anz = GB_nnz (A) ; const int8_t *restrict Bb = B->b ; const bool B_is_bitmap = GB_IS_BITMAP (B) ; int64_t *restrict Cp = C->p ; const int64_t *restrict kfirst_Aslice = A_ek_slicing ; const int64_t *restrict klast_Aslice = A_ek_slicing + A_ntasks ; const int64_t *restrict pstart_Aslice = A_ek_slicing + A_ntasks * 2 ; //-------------------------------------------------------------------------- // count entries in C //-------------------------------------------------------------------------- C->nvec_nonempty = A->nvec_nonempty ; C->nvec = nvec ; const bool C_has_pattern_of_A = !B_is_bitmap && (M == NULL) ; if (!C_has_pattern_of_A) { // This phase is very similar to GB_select_entry_phase1_template.c. if (M == NULL) { //------------------------------------------------------------------ // Method2/3(a): C = A.*B where A is sparse/hyper and B is bitmap //------------------------------------------------------------------ ASSERT (GB_IS_BITMAP (B)) ; int tid ; #pragma omp parallel for num_threads(A_nthreads) schedule(dynamic,1) for (tid = 0 ; tid < A_ntasks ; tid++) { int64_t kfirst = kfirst_Aslice [tid] ; int64_t klast = klast_Aslice [tid] ; Wfirst [tid] = 0 ; Wlast [tid] = 0 ; for (int64_t k = kfirst ; k <= klast ; k++) { // count the entries in C(:,j) int64_t j = GBH (Ah, k) ; int64_t pB_start = j * vlen ; int64_t pA, pA_end ; GB_get_pA (&pA, &pA_end, tid, k, kfirst, klast, pstart_Aslice, Ap, vlen) ; int64_t cjnz = 0 ; for ( ; pA < pA_end ; pA++) { cjnz += Bb [pB_start + Ai [pA]] ; } if (k == kfirst) { Wfirst [tid] = cjnz ; } else if (k == klast) { Wlast [tid] = cjnz ; } else { Cp [k] = cjnz ; } } } } else { //------------------------------------------------------------------ // Method2/3(c): C<#M> = A.*B; A is sparse/hyper; M, B bitmap/full //------------------------------------------------------------------ ASSERT (M != NULL) ; int tid ; #pragma omp parallel for num_threads(A_nthreads) schedule(dynamic,1) for (tid = 0 ; tid < A_ntasks ; tid++) { int64_t kfirst = kfirst_Aslice [tid] ; int64_t klast = klast_Aslice [tid] ; Wfirst [tid] = 0 ; Wlast [tid] = 0 ; for (int64_t k = kfirst ; k <= klast ; k++) { // count the entries in C(:,j) int64_t j = GBH (Ah, k) ; int64_t pB_start = j * vlen ; int64_t pA, pA_end ; GB_get_pA (&pA, &pA_end, tid, k, kfirst, klast, pstart_Aslice, Ap, vlen) ; int64_t cjnz = 0 ; for ( ; pA < pA_end ; pA++) { int64_t i = Ai [pA] ; int64_t pB = pB_start + i ; bool mij = GBB (Mb, pB) && GB_MCAST (Mx, pB, msize) ; mij = mij ^ Mask_comp ; cjnz += (mij && GBB (Bb, pB)) ; } if (k == kfirst) { Wfirst [tid] = cjnz ; } else if (k == klast) { Wlast [tid] = cjnz ; } else { Cp [k] = cjnz ; } } } } //---------------------------------------------------------------------- // finalize Cp, cumulative sum of Cp and compute Cp_kfirst //---------------------------------------------------------------------- GB_ek_slice_merge1 (Cp, Wfirst, Wlast, A_ek_slicing, A_ntasks) ; GB_ek_slice_merge2 (&(C->nvec_nonempty), Cp_kfirst, Cp, nvec, Wfirst, Wlast, A_ek_slicing, A_ntasks, A_nthreads, Werk) ; } //-------------------------------------------------------------------------- // allocate C->i and C->x //-------------------------------------------------------------------------- int64_t cnz = (C_has_pattern_of_A) ? anz : Cp [nvec] ; // set C->iso = C_iso OK GB_OK (GB_bix_alloc (C, cnz, GxB_SPARSE, false, true, C_iso)) ; //-------------------------------------------------------------------------- // copy pattern into C //-------------------------------------------------------------------------- // TODO: could make these components of C shallow instead of memcpy if (GB_IS_HYPERSPARSE (A)) { // copy A->h into C->h GB_memcpy (C->h, Ah, nvec * sizeof (int64_t), A_nthreads) ; } if (C_has_pattern_of_A) { // Method2/3(b): B is full and no mask present, so the pattern of C is // the same as the pattern of A GB_memcpy (Cp, Ap, (nvec+1) * sizeof (int64_t), A_nthreads) ; GB_memcpy (C->i, Ai, cnz * sizeof (int64_t), A_nthreads) ; } C->nvals = cnz ; C->jumbled = A->jumbled ; C->magic = GB_MAGIC ; return (GrB_SUCCESS) ; }