//------------------------------------------------------------------------------ // GB_ek_slice_merge2: merge final results for matrix C //------------------------------------------------------------------------------ // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2023, All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //------------------------------------------------------------------------------ // Prior to calling this function, a method using GB_ek_slice to slice an input // matrix A has computed the vector counts Cp, where Cp [k] is the number of // entries in the kth vector of C on input to this function. // The input matrix and the matrix C is sliced by kfirst_Aslice and // klast_Aslice, where kfirst = kfirst_Aslice [tid] is the first vector in A // and C computed by task tid, and klast = klast_Aslice [tid] is the last // vector computed by task tid. Tasks tid and tid+1 may cooperate on a single // vector, however, where klast_Aslice [tid] may be the same as kfirst_Aslice // [tid]. The method has also computed Wfirst [tid] and Wlast [tid] for each // task id, tid. Wfirst [tid] is the number of entries task tid contributes to // C(:,kfirst), and Wlast [tid] is the number of entries task tid contributes // to C(:,klast). // On output, Cp [0:cnvec] is overwritten with its cumulative sum. // C_nvec_nonempty is the count of how many vectors in C are empty. // Cp_kfirst [tid] is the position in C where task tid owns entries in // C(:,kfirst), which is a cumulative sum of the entries computed in C(:,k) for // all tasks that cooperate to compute that vector, starting at Cp [kfirst]. // There is no need to compute this for C(:,klast): if kfirst < klast, then // either task tid fully owns C(:,klast), or it is always the first task to // operate on C(:,klast). In both cases, task tid starts its computations at // the top of C(:,klast), which can be found at Cp [klast]. #include "GB_ek_slice.h" void GB_ek_slice_merge2 // merge final results for matrix C ( // output int64_t *C_nvec_nonempty, // # of non-empty vectors in C int64_t *restrict Cp_kfirst, // size ntasks // input/output int64_t *restrict Cp, // size cnvec+1 // input const int64_t cnvec, const int64_t *restrict Wfirst, // size ntasks const int64_t *restrict Wlast, // size ntasks const int64_t *A_ek_slicing, // size 3*ntasks+1 const int ntasks, // # of tasks used to construct C const int nthreads, // # of threads to use GB_Werk Werk ) { //-------------------------------------------------------------------------- // Cp = cumsum (Cp) //-------------------------------------------------------------------------- GB_cumsum (Cp, cnvec, C_nvec_nonempty, nthreads, Werk) ; //-------------------------------------------------------------------------- // determine the slice boundaries in the new C matrix //-------------------------------------------------------------------------- const int64_t *restrict kfirst_Aslice = A_ek_slicing ; const int64_t *restrict klast_Aslice = A_ek_slicing + ntasks ; // const int64_t *restrict pstart_Aslice = A_ek_slicing + ntasks * 2 ; int64_t kprior = -1 ; int64_t pC = 0 ; for (int tid = 0 ; tid < ntasks ; tid++) { int64_t kfirst = kfirst_Aslice [tid] ; if (kprior < kfirst) { // Task tid is the first one to do work on C(:,kfirst), so it // starts at Cp [kfirst], and it contributes Wfirst [tid] entries // to C(:,kfirst). pC = Cp [kfirst] ; kprior = kfirst ; } // Task tid contributes Wfirst [tid] entries to C(:,kfirst) Cp_kfirst [tid] = pC ; pC += Wfirst [tid] ; int64_t klast = klast_Aslice [tid] ; if (kfirst < klast) { // Task tid is the last to contribute to C(:,kfirst). ASSERT (pC == Cp [kfirst+1]) ; // Task tid contributes the first Wlast [tid] entries to // C(:,klast), so the next task tid+1 starts at location Cp [klast] // + Wlast [tid], if its first vector is klast of this task. pC = Cp [klast] + Wlast [tid] ; kprior = klast ; } } }