//------------------------------------------------------------------------------ // GB_transpose_sparse: C=op(cast(A')), transpose, typecast, and apply op //------------------------------------------------------------------------------ // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2023, All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //------------------------------------------------------------------------------ { //---------------------------------------------------------------------- // A is sparse or hypersparse; C is sparse //---------------------------------------------------------------------- ASSERT (GB_IS_SPARSE (A) || GB_IS_HYPERSPARSE (A)) ; ASSERT (GB_IS_SPARSE (C)) ; const int64_t *restrict Ap = A->p ; const int64_t *restrict Ah = A->h ; const int64_t *restrict Ai = A->i ; const int64_t anvec = A->nvec ; int64_t *restrict Ci = C->i ; if (nthreads == 1) { //------------------------------------------------------------------ // sequential method //------------------------------------------------------------------ int64_t *restrict workspace = Workspaces [0] ; for (int64_t k = 0 ; k < anvec ; k++) { // iterate over the entries in A(:,j) int64_t j = GBH_S (Ah, k) ; int64_t pA_start = Ap [k] ; int64_t pA_end = Ap [k+1] ; for (int64_t pA = pA_start ; pA < pA_end ; pA++) { // C(j,i) = A(i,j) int64_t i = Ai [pA] ; int64_t pC = workspace [i]++ ; Ci [pC] = j ; #ifndef GB_ISO_TRANSPOSE // Cx [pC] = op (Ax [pA]) GB_APPLY_OP (pC, pA) ; #endif } } } else if (nworkspaces == 1) { //------------------------------------------------------------------ // atomic method //------------------------------------------------------------------ int64_t *restrict workspace = Workspaces [0] ; int tid ; #pragma omp parallel for num_threads(nthreads) schedule(static) for (tid = 0 ; tid < nthreads ; tid++) { for (int64_t k = A_slice [tid] ; k < A_slice [tid+1] ; k++) { // iterate over the entries in A(:,j) int64_t j = GBH_S (Ah, k) ; int64_t pA_start = Ap [k] ; int64_t pA_end = Ap [k+1] ; for (int64_t pA = pA_start ; pA < pA_end ; pA++) { // C(j,i) = A(i,j) int64_t i = Ai [pA] ; // do this atomically: pC = workspace [i]++ int64_t pC ; GB_ATOMIC_CAPTURE_INC64 (pC, workspace [i]) ; Ci [pC] = j ; #ifndef GB_ISO_TRANSPOSE // Cx [pC] = op (Ax [pA]) GB_APPLY_OP (pC, pA) ; #endif } } } } else { //------------------------------------------------------------------ // non-atomic method //------------------------------------------------------------------ int tid ; #pragma omp parallel for num_threads(nthreads) schedule(static) for (tid = 0 ; tid < nthreads ; tid++) { int64_t *restrict workspace = Workspaces [tid] ; for (int64_t k = A_slice [tid] ; k < A_slice [tid+1] ; k++) { // iterate over the entries in A(:,j) int64_t j = GBH_S (Ah, k) ; int64_t pA_start = Ap [k] ; int64_t pA_end = Ap [k+1] ; for (int64_t pA = pA_start ; pA < pA_end ; pA++) { // C(j,i) = A(i,j) int64_t i = Ai [pA] ; int64_t pC = workspace [i]++ ; Ci [pC] = j ; #ifndef GB_ISO_TRANSPOSE // Cx [pC] = op (Ax [pA]) GB_APPLY_OP (pC, pA) ; #endif } } } } }