//------------------------------------------------------------------------------ // LG_CC_FastSV5: connected components //------------------------------------------------------------------------------ // LAGraph, (c) 2019-2022 by The LAGraph Contributors, All Rights Reserved. // SPDX-License-Identifier: BSD-2-Clause // // For additional details (including references to third party source code and // other files) see the LICENSE file or contact permission@sei.cmu.edu. See // Contributors.txt for a full list of contributors. Created, in part, with // funding and support from the U.S. Government (see Acknowledgments.txt file). // DM22-0790 // Contributed by Yongzhe Zhang; modified by Tim Davis, Texas A&M University //------------------------------------------------------------------------------ // Code is based on the algorithm described in the following paper // Zhang, Azad, Hu. FastSV: FastSV: A Distributed-Memory Connected Component // Algorithm with Fast Convergence (SIAM PP20) // A subsequent update to the algorithm is here (which might not be reflected // in this code): // // Yongzhe Zhang, Ariful Azad, Aydin Buluc: Parallel algorithms for finding // connected components using linear algebra. J. Parallel Distributed Comput. // 144: 14-27 (2020). // Modified by Tim Davis, Texas A&M University // The input matrix A must be symmetric. Self-edges (diagonal entries) are // OK, and are ignored. The values and type of A are ignored; just its // structure is accessed. // The matrix A must have dimension 2^32 or less. // todo: Need a 64-bit version of this method. // todo: this function is not thread-safe, since it exports G->A and then // reimports it back. G->A is unchanged when the function returns, but during // execution G->A is invalid. // A note about "todo" and "fixme" in this file: these do not need to be fixed // or changed for this method, since the revised version appears as // src/algorithm/LG_CC_FastSV6.c. They have thus been changed here to lower // case. // todo: free all workspace in LG_FREE_ALL #define LG_FREE_ALL ; #include "LG_internal.h" #if LAGRAPH_SUITESPARSE //------------------------------------------------------------------------------ // hash functions: todo describe me //------------------------------------------------------------------------------ // hash table size must be a power of 2 #define HASH_SIZE 1024 // number of samples to insert into the hash table // todo: this seems to be a lot of entries for a HASH_SIZE of 1024. // There could be lots of collisions. #define HASH_SAMPLES 864 #define HASH(x) (((x << 4) + x) & (HASH_SIZE-1)) #define NEXT(x) ((x + 23) & (HASH_SIZE-1)) //------------------------------------------------------------------------------ // ht_init: todo describe me //------------------------------------------------------------------------------ // Clear the hash table counts (ht_val [0:HASH_SIZE-1] = 0), and set all hash // table entries as empty (ht_key [0:HASH_SIZE-1] =-1). // todo: the memset of ht_key is confusing // todo: the name "ht_val" is confusing. It is not a value, but a count of // the number of times the value x = ht_key [h] has been inserted into the // hth position in the hash table. It should be renamed ht_cnt. static inline void ht_init ( int32_t *ht_key, int32_t *ht_val ) { memset (ht_key, -1, sizeof (int32_t) * HASH_SIZE) ; memset (ht_val, 0, sizeof (int32_t) * HASH_SIZE) ; } //------------------------------------------------------------------------------ // ht_sample: todo describe me //------------------------------------------------------------------------------ // static inline void ht_sample ( uint32_t *V32, // array of size n (todo: this is a bad variable name) int32_t n, int32_t samples, // number of samples to take from V32 int32_t *ht_key, int32_t *ht_val, uint64_t *seed ) { for (int32_t k = 0 ; k < samples ; k++) { // select an entry from V32 at random int32_t x = V32 [LG_Random60 (seed) % n] ; // find x in the hash table // todo: make this loop a static inline function (see also below) int32_t h = HASH (x) ; while (ht_key [h] != -1 && ht_key [h] != x) { h = NEXT (h) ; } ht_key [h] = x ; ht_val [h]++ ; } } //------------------------------------------------------------------------------ // ht_most_frequent: todo describe me //------------------------------------------------------------------------------ // todo what if key is returned as -1? Code breaks. todo: handle this case static inline int32_t ht_most_frequent ( int32_t *ht_key, int32_t *ht_val ) { int32_t key = -1 ; int32_t val = 0 ; // max (ht_val [0:HASH_SIZE-1]) for (int32_t h = 0 ; h < HASH_SIZE ; h++) { if (ht_val [h] > val) { key = ht_key [h] ; val = ht_val [h] ; } } return (key) ; // return most frequent key } //------------------------------------------------------------------------------ // Reduce_assign32: w (index) += s, using MIN as the "+=" accum operator //------------------------------------------------------------------------------ // mask = NULL, accumulator = GrB_MIN_UINT32, descriptor = NULL. // Duplicates are summed with the accumulator, which differs from how // GrB_assign works. GrB_assign states that the presence of duplicates results // in undefined behavior. GrB_assign in SuiteSparse:GraphBLAS follows the // MATLAB rule, which discards all but the first of the duplicates. // todo: add this to GraphBLAS as a variant of GrB_assign, either as // GxB_assign_accum (or another name), or as a GxB_* descriptor setting. static inline int Reduce_assign32 ( GrB_Vector *w_handle, // vector of size n, all entries present GrB_Vector *s_handle, // vector of size n, all entries present uint32_t *index, // array of size n, can have duplicates GrB_Index n, int nthreads, int32_t *ht_key, // hash table int32_t *ht_val, // hash table (count of # of entries) uint64_t *seed, // random char *msg ) { GrB_Type w_type, s_type ; GrB_Index w_n, s_n, w_nvals, s_nvals, *w_i, *s_i, w_size, s_size ; uint32_t *w_x, *s_x ; bool s_iso = false ; //-------------------------------------------------------------------------- // export w and s //-------------------------------------------------------------------------- // export the GrB_Vectors w and s as full arrays, to get direct access to // their contents. Note that this would fail if w or s are not full, with // all entries present. GRB_TRY (GxB_Vector_export_Full (w_handle, &w_type, &w_n, (void **) &w_x, &w_size, NULL, NULL)) ; GRB_TRY (GxB_Vector_export_Full (s_handle, &s_type, &s_n, (void **) &s_x, &s_size, &s_iso, NULL)) ; #if defined ( COVERAGE ) if (n >= 200) // for test coverage only; do not use in production!! #else if (nthreads >= 4) #endif { // allocate a buf array for each thread, of size HASH_SIZE uint32_t *mem ; LAGRAPH_TRY (LAGraph_Malloc ((void **) &mem, nthreads*HASH_SIZE, sizeof (uint32_t), msg)) ; // todo: check out-of-memory condition here // todo why is hashing needed here? hashing is slow for what needs // to be computed here. GraphBLAS has fast MIN atomic monoids that // do not require hashing. ht_init (ht_key, ht_val) ; ht_sample (index, n, HASH_SAMPLES, ht_key, ht_val, seed) ; int tid; #pragma omp parallel for num_threads(nthreads) schedule(static) for (tid = 0 ; tid < nthreads ; tid++) { // get the thread-specific buf array of size HASH_SIZE // todo: buf is a bad variable name; it's not a "buffer", // but a local workspace to compute the local version of w_x. uint32_t *buf = mem + tid * HASH_SIZE ; // copy the values from the global hash table into buf for (int32_t h = 0 ; h < HASH_SIZE ; h++) { if (ht_key [h] != -1) { buf [h] = w_x [ht_key [h]] ; } } // this thread works on index [kstart:kend] int32_t kstart = (n * tid + nthreads - 1) / nthreads ; int32_t kend = (n * tid + n + nthreads - 1) / nthreads ; for (int32_t k = kstart ; k < kend ; k++) { uint32_t i = index [k] ; // todo: make this loop a static inline function int32_t h = HASH (i) ; while (ht_key [h] != -1 && ht_key [h] != i) { h = NEXT (h) ; } if (ht_key [h] == -1) { // todo is this a race condition? w_x [i] = LAGRAPH_MIN (w_x [i], s_x [s_iso?0:k]) ; } else { buf [h] = LAGRAPH_MIN (buf [h], s_x [s_iso?0:k]) ; } } } // combine intermediate results from each thread for (int32_t h = 0 ; h < HASH_SIZE ; h++) { int32_t i = ht_key [h] ; if (i != -1) { for (tid = 0 ; tid < nthreads ; tid++) { w_x [i] = LAGRAPH_MIN (w_x [i], mem [tid * HASH_SIZE + h]) ; } } } LAGraph_Free ((void **) &mem, NULL) ; } else { // sequential version for (GrB_Index k = 0 ; k < n ; k++) { uint32_t i = index [k] ; w_x [i] = LAGRAPH_MIN (w_x [i], s_x [s_iso?0:k]) ; } } //-------------------------------------------------------------------------- // reimport w and s back into GrB_Vectors, and return result //-------------------------------------------------------------------------- // s is unchanged. It was exported only to compute w (index) += s GRB_TRY (GxB_Vector_import_Full (w_handle, w_type, w_n, (void **) &w_x, w_size, false, NULL)) ; GRB_TRY (GxB_Vector_import_Full (s_handle, s_type, s_n, (void **) &s_x, s_size, s_iso, NULL)) ; return (GrB_SUCCESS) ; } //------------------------------------------------------------------------------ // LG_CC_FastSV5 //------------------------------------------------------------------------------ // The output of LG_CC_FastSV5 is a vector component, where // component(i)=s if node i is in the connected compononent whose // representative node is node s. If s is a representative, then // component(s)=s. The number of connected components in the graph G is the // number of representatives. #undef LG_FREE_ALL #define LG_FREE_ALL \ { \ LAGraph_Free ((void **) &I, NULL) ; \ LAGraph_Free ((void **) &V32, NULL) ; \ LAGraph_Free ((void **) &ht_key, NULL) ; \ LAGraph_Free ((void **) &ht_val, NULL) ; \ /* todo why is T not freed?? */ \ GrB_free (&f) ; \ GrB_free (&gp) ; \ GrB_free (&mngp) ; \ GrB_free (&gp_new) ; \ GrB_free (&mod) ; \ } #endif int LG_CC_FastSV5 // SuiteSparse:GraphBLAS method, with GxB extensions ( // output GrB_Vector *component, // component(i)=s if node is in the component s // inputs LAGraph_Graph G, // input graph, G->A can change char *msg ) { //-------------------------------------------------------------------------- // check inputs //-------------------------------------------------------------------------- LG_CLEAR_MSG ; #if !LAGRAPH_SUITESPARSE LG_ASSERT_MSG (false, GrB_NOT_IMPLEMENTED, "SuiteSparse required") ; #else uint32_t *V32 = NULL ; int32_t *ht_key = NULL, *ht_val = NULL ; GrB_Index n, nnz, *I = NULL ; GrB_Vector f = NULL, gp_new = NULL, mngp = NULL, mod = NULL, gp = NULL ; GrB_Matrix T = NULL ; LG_TRY (LAGraph_CheckGraph (G, msg)) ; LG_ASSERT (component != NULL, GrB_NULL_POINTER) ; LG_ASSERT_MSG ((G->kind == LAGraph_ADJACENCY_UNDIRECTED || (G->kind == LAGraph_ADJACENCY_DIRECTED && G->is_symmetric_structure == LAGraph_TRUE)), -1001, "G->A must be known to be symmetric") ; GrB_Matrix S = G->A ; GRB_TRY (GrB_Matrix_nrows (&n, S)) ; GRB_TRY (GrB_Matrix_nvals (&nnz, S)) ; LG_ASSERT_MSG (n <= UINT32_MAX, -1, "problem too large (fixme)") ; #define FASTSV_SAMPLES 4 bool sampling = (n * FASTSV_SAMPLES * 2 < nnz) ; // random number seed uint64_t seed = n ; //-------------------------------------------------------------------------- // initializations //-------------------------------------------------------------------------- // determine # of threads to use for Reduce_assign int nthreads, nthreads_outer, nthreads_inner ; LG_TRY (LAGraph_GetNumThreads (&nthreads_outer, &nthreads_inner, msg)) ; nthreads = nthreads_outer * nthreads_inner ; nthreads = LAGRAPH_MIN (nthreads, n / 16) ; nthreads = LAGRAPH_MAX (nthreads, 1) ; // # of threads to use for typecast int nthreads2 = n / (64*1024) ; nthreads2 = LAGRAPH_MIN (nthreads2, nthreads) ; nthreads2 = LAGRAPH_MAX (nthreads2, 1) ; // vectors GRB_TRY (GrB_Vector_new (&f, GrB_UINT32, n)) ; GRB_TRY (GrB_Vector_new (&gp_new, GrB_UINT32, n)) ; GRB_TRY (GrB_Vector_new (&mod, GrB_BOOL, n)) ; // temporary arrays LAGRAPH_TRY (LAGraph_Malloc ((void **) &I , n, sizeof (GrB_Index), msg)) ; LAGRAPH_TRY (LAGraph_Malloc ((void **) &V32, n, sizeof (uint32_t), msg)) ; // prepare vectors int64_t i; #pragma omp parallel for num_threads(nthreads2) schedule(static) for (i = 0 ; i < n ; i++) { I [i] = i ; V32 [i] = (uint32_t) i ; } GRB_TRY (GrB_Vector_build (f, I, V32, n, GrB_PLUS_UINT32)) ; GRB_TRY (GrB_Vector_dup (&gp, f)) ; GRB_TRY (GrB_Vector_dup (&mngp, f)) ; // allocate the hash table LAGRAPH_TRY (LAGraph_Malloc ((void **) &ht_key, HASH_SIZE, sizeof (int32_t), msg)) ; LAGRAPH_TRY (LAGraph_Malloc ((void **) &ht_val, HASH_SIZE, sizeof (int32_t), msg)) ; //-------------------------------------------------------------------------- // sample phase //-------------------------------------------------------------------------- if (sampling) { //---------------------------------------------------------------------- // export S = G->A in CSR format //---------------------------------------------------------------------- // S is not modified. It is only exported so that its contents can be // read by the parallel loops below. GrB_Type type ; GrB_Index nrows, ncols, nvals ; size_t typesize ; int64_t nonempty ; GrB_Index *Sp, *Sj ; void *Sx ; bool S_jumbled = false ; GrB_Index Sp_size, Sj_size, Sx_size ; bool S_iso = false ; GRB_TRY (GrB_Matrix_nvals (&nvals, S)) ; GRB_TRY (GxB_Matrix_export_CSR (&S, &type, &nrows, &ncols, &Sp, &Sj, &Sx, &Sp_size, &Sj_size, &Sx_size, &S_iso, &S_jumbled, NULL)) ; GRB_TRY (GxB_Type_size (&typesize, type)) ; G->A = NULL ; //---------------------------------------------------------------------- // allocate space to construct T //---------------------------------------------------------------------- GrB_Index Tp_len = nrows+1, Tp_size = Tp_len*sizeof(GrB_Index); GrB_Index Tj_len = nvals, Tj_size = Tj_len*sizeof(GrB_Index); GrB_Index Tx_len = nvals ; GrB_Index *Tp = NULL, *Tj = NULL ; GrB_Index Tx_size = typesize ; void *Tx = NULL ; int32_t *range = NULL ; GrB_Index *count = NULL ; LAGRAPH_TRY (LAGraph_Malloc ((void **) &Tp, Tp_len, sizeof (GrB_Index), msg)) ; LAGRAPH_TRY (LAGraph_Malloc ((void **) &Tj, Tj_len, sizeof (GrB_Index), msg)) ; LAGRAPH_TRY (LAGraph_Calloc (&Tx, 1, typesize, msg)) ; // T is iso //---------------------------------------------------------------------- // allocate workspace //---------------------------------------------------------------------- LAGRAPH_TRY (LAGraph_Malloc ((void **) &range, nthreads + 1, sizeof (int32_t), msg)) ; LAGRAPH_TRY (LAGraph_Malloc ((void **) &count, nthreads + 1, sizeof (GrB_Index), msg)) ; memset (count, 0, sizeof (GrB_Index) * (nthreads + 1)) ; //---------------------------------------------------------------------- // define parallel tasks to construct T //---------------------------------------------------------------------- // thread tid works on rows range[tid]:range[tid+1]-1 of S and T for (int tid = 0 ; tid <= nthreads ; tid++) { range [tid] = (n * tid + nthreads - 1) / nthreads ; } //---------------------------------------------------------------------- // determine the number entries to be constructed in T for each thread //---------------------------------------------------------------------- int tid; #pragma omp parallel for num_threads(nthreads) schedule(static) for (tid = 0 ; tid < nthreads ; tid++) { for (int32_t i = range [tid] ; i < range [tid+1] ; i++) { int32_t deg = Sp [i + 1] - Sp [i] ; count [tid + 1] += LAGRAPH_MIN (FASTSV_SAMPLES, deg) ; } } //---------------------------------------------------------------------- // count = cumsum (count) //---------------------------------------------------------------------- for (tid = 0 ; tid < nthreads ; tid++) { count [tid + 1] += count [tid] ; } //---------------------------------------------------------------------- // construct T //---------------------------------------------------------------------- // T (i,:) consists of the first FASTSV_SAMPLES of S (i,:). // todo: this could be done by GxB_Select, using a new operator. Need // to define a set of GxB_SelectOp operators that would allow for this. // Note that Tx is not modified. Only Tp and Tj are constructed. #pragma omp parallel for num_threads(nthreads) schedule(static) for (tid = 0 ; tid < nthreads ; tid++) { GrB_Index p = count [tid] ; Tp [range [tid]] = p ; for (int32_t i = range [tid] ; i < range [tid+1] ; i++) { // construct T (i,:) from the first entries in S (i,:) for (int32_t j = 0 ; j < FASTSV_SAMPLES && Sp [i] + j < Sp [i + 1] ; j++) { Tj [p++] = Sj [Sp [i] + j] ; } Tp [i + 1] = p ; } } //---------------------------------------------------------------------- // import the result into the GrB_Matrix T //---------------------------------------------------------------------- // Note that Tx is unmodified. // in SuiteSparse:GraphBLAS v5, sizes are in bytes, not entries GrB_Index Tp_siz = Tp_size ; GrB_Index Tj_siz = Tj_size ; GrB_Index Tx_siz = Tx_size ; GrB_Index t_nvals = Tp [nrows] ; GRB_TRY (GxB_Matrix_import_CSR (&T, type, nrows, ncols, &Tp, &Tj, &Tx, Tp_siz, Tj_siz, Tx_siz, true, // T is iso S_jumbled, NULL)) ; //---------------------------------------------------------------------- // find the connected components of T //---------------------------------------------------------------------- // todo: this is nearly identical to the final phase below. // Make this a function bool change = true, is_first = true ; while (change) { // hooking & shortcutting GRB_TRY (GrB_mxv (mngp, NULL, GrB_MIN_UINT32, GrB_MIN_SECOND_SEMIRING_UINT32, T, gp, NULL)) ; if (!is_first) { LG_TRY (Reduce_assign32 (&f, &mngp, V32, n, nthreads, ht_key, ht_val, &seed, msg)) ; } GRB_TRY (GrB_eWiseAdd (f, NULL, GrB_MIN_UINT32, GrB_MIN_UINT32, mngp, gp, NULL)) ; // calculate grandparent // fixme: NULL parameter is SS:GrB extension GRB_TRY (GrB_Vector_extractTuples (NULL, V32, &n, f)) ; // fixme int32_t i; #pragma omp parallel for num_threads(nthreads2) schedule(static) for (i = 0 ; i < n ; i++) { I [i] = (GrB_Index) V32 [i] ; } GRB_TRY (GrB_extract (gp_new, NULL, NULL, f, I, n, NULL)) ; // todo: GrB_Vector_extract should have a variant where the index // list is not given by an array I, but as a GrB_Vector of type // GrB_UINT64 (or which can be typecast to GrB_UINT64). This is a // common issue that arises in other algorithms as well. // Likewise GrB_Matrix_extract, and all forms of GrB_assign. // check termination GRB_TRY (GrB_eWiseMult (mod, NULL, NULL, GrB_NE_UINT32, gp_new, gp, NULL)) ; GRB_TRY (GrB_reduce (&change, NULL, GrB_LOR_MONOID_BOOL, mod, NULL)) ; // swap gp and gp_new GrB_Vector t = gp ; gp = gp_new ; gp_new = t ; is_first = false ; } //---------------------------------------------------------------------- // todo: describe me //---------------------------------------------------------------------- ht_init (ht_key, ht_val) ; ht_sample (V32, n, HASH_SAMPLES, ht_key, ht_val, &seed) ; int32_t key = ht_most_frequent (ht_key, ht_val) ; // todo: what if key is returned as -1? Then T below is invalid. int64_t t_nonempty = -1 ; bool T_jumbled = false, T_iso = true ; // export T GRB_TRY (GxB_Matrix_export_CSR (&T, &type, &nrows, &ncols, &Tp, &Tj, &Tx, &Tp_siz, &Tj_siz, &Tx_siz, &T_iso, &T_jumbled, NULL)) ; // todo what is this phase doing? It is constructing a matrix T that // depends only on S, key, and V32. T contains a subset of the entries // in S, except that T (i,:) is empty if // The prior content of T is ignored; it is exported from the earlier // phase, only to reuse the allocated space for T. However, T_jumbled // is preserved from the prior matrix T, which doesn't make sense. // This parallel loop is badly load balanced. Each thread operates on // the same number of rows of S, regardless of how many entries appear // in each set of rows. It uses one thread per task, statically // scheduled. #pragma omp parallel for num_threads(nthreads) schedule(static) for (tid = 0 ; tid < nthreads ; tid++) { GrB_Index ptr = Sp [range [tid]] ; // thread tid scans S (range [tid]:range [tid+1]-1,:), // and constructs T(i,:) for all rows in this range. for (int32_t i = range [tid] ; i < range [tid+1] ; i++) { int32_t pv = V32 [i] ; // what is pv? Tp [i] = ptr ; // start the construction of T(i,:) // T(i,:) is empty if pv == key if (pv != key) { // scan S(i,:) for (GrB_Index p = Sp [i] ; p < Sp [i+1] ; p++) { // get S(i,j) int32_t j = Sj [p] ; if (V32 [j] != key) { // add the entry T(i,j) to T, but skip it if // V32 [j] is equal to key Tj [ptr++] = j ; } } // add the entry T(i,key) if there is room for it in T(i,:) if (ptr - Tp [i] < Sp [i+1] - Sp [i]) { Tj [ptr++] = key ; } } } // count the number of entries inserted into T by this thread? count [tid] = ptr - Tp [range [tid]] ; } // Compact empty space out of Tj not filled in from the above phase. // This is a lot of work and should be done in parallel. GrB_Index offset = 0 ; for (tid = 0 ; tid < nthreads ; tid++) { // this memcpy is not safe (src/dest can overlap) // memcpy (Tj + offset, Tj + Tp [range [tid]], // sizeof (GrB_Index) * count [tid]) ; // // using a for loop instead: // GrB_Index *Tj_dest = Tj + offset ; // GrB_Index *Tj_src = Tj + Tp [range [tid]] ; // for (int64_t k = 0 ; k < count [tid] ; k++) // { // Tj_dest [k] = Tj_src [k] ; // } // this is safe (memmove_s not necessary): memmove (Tj + offset, Tj + Tp [range [tid]], sizeof (GrB_Index) * count [tid]) ; offset += count [tid] ; count [tid] = offset - count [tid] ; } // Compact empty space out of Tp #pragma omp parallel for num_threads(nthreads) schedule(static) for (tid = 0 ; tid < nthreads ; tid++) { GrB_Index ptr = Tp [range [tid]] ; for (int32_t i = range [tid] ; i < range [tid+1] ; i++) { Tp [i] -= ptr - count [tid] ; } } // finalize T Tp [n] = offset ; // free workspace LAGraph_Free ((void **) &count, NULL) ; LAGraph_Free ((void **) &range, NULL) ; // import S (unchanged since last export) GRB_TRY (GxB_Matrix_import_CSR (&S, type, nrows, ncols, &Sp, &Sj, &Sx, Sp_size, Sj_size, Sx_size, S_iso, S_jumbled, NULL)) ; // import T for the final phase GRB_TRY (GxB_Matrix_import_CSR (&T, type, nrows, ncols, &Tp, &Tj, &Tx, Tp_siz, Tj_siz, Tx_siz, T_iso, /* T is jumbled: */ true, NULL)) ; // restore G->A G->A = S ; } else { // no sampling; the final phase operates on the whole graph T = S ; } //-------------------------------------------------------------------------- // final phase //-------------------------------------------------------------------------- GRB_TRY (GrB_Matrix_nvals (&nnz, T)) ; bool change = true ; while (change && nnz > 0) { // hooking & shortcutting GRB_TRY (GrB_mxv (mngp, NULL, GrB_MIN_UINT32, GrB_MIN_SECOND_SEMIRING_UINT32, T, gp, NULL)) ; GRB_TRY (Reduce_assign32 (&f, &mngp, V32, n, nthreads, ht_key, ht_val, &seed, msg)) ; GRB_TRY (GrB_eWiseAdd (f, NULL, GrB_MIN_UINT32, GrB_MIN_UINT32, mngp, gp, NULL)) ; // calculate grandparent // fixme: NULL parameter is SS:GrB extension GRB_TRY (GrB_Vector_extractTuples (NULL, V32, &n, f)) ; // fixme int32_t k; #pragma omp parallel for num_threads(nthreads2) schedule(static) for (k = 0 ; k < n ; k++) { I [k] = (GrB_Index) V32 [k] ; } GRB_TRY (GrB_extract (gp_new, NULL, NULL, f, I, n, NULL)) ; // check termination GRB_TRY (GrB_eWiseMult (mod, NULL, NULL, GrB_NE_UINT32, gp_new, gp, NULL)) ; GRB_TRY (GrB_reduce (&change, NULL, GrB_LOR_MONOID_BOOL, mod, NULL)) ; // swap gp and gp_new GrB_Vector t = gp ; gp = gp_new ; gp_new = t ; } //-------------------------------------------------------------------------- // free workspace and return result //-------------------------------------------------------------------------- (*component) = f ; f = NULL ; if (sampling) { GrB_free (&T) ; } LG_FREE_ALL ; return (GrB_SUCCESS) ; #endif }