//------------------------------------------------------------------------------ // GB_kroner: Kronecker product, C = kron (A,B) //------------------------------------------------------------------------------ // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2023, All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //------------------------------------------------------------------------------ // JIT: needed. // C = kron(A,B) where op determines the binary multiplier to use. The type of // A and B are compatible with the x and y inputs of z=op(x,y), but can be // different. The type of C is the type of z. C is hypersparse if either A // or B are hypersparse. // FUTURE: this would be faster with built-in types and operators. // FUTURE: at most one thread is used for each vector of C=kron(A,B). The // matrix C is normally very large, but if both A and B are n-by-1, then C is // n^2-by-1 and only a single thread is used. A better method for this case // would construct vectors of C in parallel. // FUTURE: each vector C(:,k) takes O(nnz(C(:,k))) work, but this is not // accounted for in the parallel load-balancing. #define GB_FREE_WORKSPACE \ { \ GB_Matrix_free (&Awork) ; \ GB_Matrix_free (&Bwork) ; \ } #define GB_FREE_ALL \ { \ GB_FREE_WORKSPACE ; \ GB_phybix_free (C) ; \ } #include "GB_kron.h" #include "GB_emult.h" GrB_Info GB_kroner // C = kron (A,B) ( GrB_Matrix C, // output matrix const bool C_is_csc, // desired format of C const GrB_BinaryOp op, // multiply operator const GrB_Matrix A_in, // input matrix bool A_is_pattern, // true if values of A are not used const GrB_Matrix B_in, // input matrix bool B_is_pattern, // true if values of B are not used GB_Werk Werk ) { //-------------------------------------------------------------------------- // check inputs //-------------------------------------------------------------------------- GrB_Info info ; ASSERT (C != NULL && (C->static_header || GBNSTATIC)) ; struct GB_Matrix_opaque Awork_header, Bwork_header ; GrB_Matrix Awork = NULL, Bwork = NULL ; ASSERT_MATRIX_OK (A_in, "A_in for kron (A,B)", GB0) ; ASSERT_MATRIX_OK (B_in, "B_in for kron (A,B)", GB0) ; ASSERT_BINARYOP_OK (op, "op for kron (A,B)", GB0) ; //-------------------------------------------------------------------------- // finish any pending work //-------------------------------------------------------------------------- GB_MATRIX_WAIT (A_in) ; GB_MATRIX_WAIT (B_in) ; //-------------------------------------------------------------------------- // bitmap case: create sparse copies of A and B if they are bitmap //-------------------------------------------------------------------------- GrB_Matrix A = A_in ; if (GB_IS_BITMAP (A)) { GBURBLE ("A:") ; // set Awork->iso = A->iso OK: no need for burble GB_CLEAR_STATIC_HEADER (Awork, &Awork_header) ; GB_OK (GB_dup_worker (&Awork, A->iso, A, true, NULL)) ; ASSERT_MATRIX_OK (Awork, "dup Awork for kron (A,B)", GB0) ; GB_OK (GB_convert_bitmap_to_sparse (Awork, Werk)) ; ASSERT_MATRIX_OK (Awork, "to sparse, Awork for kron (A,B)", GB0) ; A = Awork ; } GrB_Matrix B = B_in ; if (GB_IS_BITMAP (B)) { GBURBLE ("B:") ; // set Bwork->iso = B->iso OK: no need for burble GB_CLEAR_STATIC_HEADER (Bwork, &Bwork_header) ; GB_OK (GB_dup_worker (&Bwork, B->iso, B, true, NULL)) ; ASSERT_MATRIX_OK (Bwork, "dup Bwork for kron (A,B)", GB0) ; GB_OK (GB_convert_bitmap_to_sparse (Bwork, Werk)) ; ASSERT_MATRIX_OK (Bwork, "to sparse, Bwork for kron (A,B)", GB0) ; B = Bwork ; } //-------------------------------------------------------------------------- // get inputs //-------------------------------------------------------------------------- const int64_t *restrict Ap = A->p ; const int64_t *restrict Ah = A->h ; const int64_t *restrict Ai = A->i ; const GB_void *restrict Ax = A_is_pattern ? NULL : ((GB_void *) A->x) ; const int64_t asize = A->type->size ; const int64_t avlen = A->vlen ; const int64_t avdim = A->vdim ; int64_t anvec = A->nvec ; int64_t anz = GB_nnz (A) ; const int64_t *restrict Bp = B->p ; const int64_t *restrict Bh = B->h ; const int64_t *restrict Bi = B->i ; const GB_void *restrict Bx = B_is_pattern ? NULL : ((GB_void *) B->x) ; const int64_t bsize = B->type->size ; const int64_t bvlen = B->vlen ; const int64_t bvdim = B->vdim ; int64_t bnvec = B->nvec ; int64_t bnz = GB_nnz (B) ; //-------------------------------------------------------------------------- // determine the number of threads to use //-------------------------------------------------------------------------- double work = ((double) anz) * ((double) bnz) + (((double) anvec) * ((double) bnvec)) ; int nthreads_max = GB_Context_nthreads_max ( ) ; double chunk = GB_Context_chunk ( ) ; int nthreads = GB_nthreads (work, chunk, nthreads_max) ; //-------------------------------------------------------------------------- // check if C is iso and compute its iso value if it is //-------------------------------------------------------------------------- GrB_Type ctype = op->ztype ; const size_t csize = ctype->size ; GB_void cscalar [GB_VLA(csize)] ; bool C_iso = GB_emult_iso (cscalar, ctype, A, B, op) ; //-------------------------------------------------------------------------- // allocate the output matrix C //-------------------------------------------------------------------------- // C has the same type as z for the multiply operator, z=op(x,y) GrB_Index cvlen, cvdim, cnzmax, cnvec ; bool ok = GB_int64_multiply (&cvlen, avlen, bvlen) ; ok = ok & GB_int64_multiply (&cvdim, avdim, bvdim) ; ok = ok & GB_int64_multiply (&cnzmax, anz, bnz) ; ok = ok & GB_int64_multiply (&cnvec, anvec, bnvec) ; ASSERT (ok) ; if (C_iso) { // the values of A and B are no longer needed if C is iso GBURBLE ("(iso kron) ") ; A_is_pattern = true ; B_is_pattern = true ; } // C is hypersparse if either A or B are hypersparse. It is never bitmap. bool C_is_hyper = (cvdim > 1) && (Ah != NULL || Bh != NULL) ; bool C_is_full = GB_as_if_full (A) && GB_as_if_full (B) ; int sparsity = C_is_full ? GxB_FULL : ((C_is_hyper) ? GxB_HYPERSPARSE : GxB_SPARSE) ; // set C->iso = C_iso OK GB_OK (GB_new_bix (&C, // full, sparse, or hyper; existing header ctype, (int64_t) cvlen, (int64_t) cvdim, GB_Ap_malloc, C_is_csc, sparsity, true, B->hyper_switch, cnvec, cnzmax, true, C_iso)) ; //-------------------------------------------------------------------------- // get C and the operator //-------------------------------------------------------------------------- int64_t *restrict Cp = C->p ; int64_t *restrict Ch = C->h ; int64_t *restrict Ci = C->i ; GB_void *restrict Cx = (GB_void *) C->x ; int64_t *restrict Cx_int64 = NULL ; int32_t *restrict Cx_int32 = NULL ; GxB_binary_function fmult = op->binop_function ; GB_Opcode opcode = op->opcode ; bool op_is_positional = GB_OPCODE_IS_POSITIONAL (opcode) ; GB_cast_function cast_A = NULL, cast_B = NULL ; if (!A_is_pattern) { cast_A = GB_cast_factory (op->xtype->code, A->type->code) ; } if (!B_is_pattern) { cast_B = GB_cast_factory (op->ytype->code, B->type->code) ; } int64_t offset = 0 ; bool depends_on_j = false ; if (op_is_positional) { offset = GB_positional_offset (opcode, NULL, &depends_on_j) ; Cx_int64 = (int64_t *) Cx ; Cx_int32 = (int32_t *) Cx ; } bool is64 = (ctype == GrB_INT64) ; //-------------------------------------------------------------------------- // compute the column counts of C, and C->h if C is hypersparse //-------------------------------------------------------------------------- int64_t kC ; if (!C_is_full) { // C is sparse or hypersparse #pragma omp parallel for num_threads(nthreads) schedule(guided) for (kC = 0 ; kC < cnvec ; kC++) { const int64_t kA = kC / bnvec ; const int64_t kB = kC % bnvec ; // get A(:,jA), the (kA)th vector of A const int64_t jA = GBH (Ah, kA) ; const int64_t aknz = (Ap == NULL) ? avlen : (Ap [kA+1] - Ap [kA]) ; // get B(:,jB), the (kB)th vector of B const int64_t jB = GBH (Bh, kB) ; const int64_t bknz = (Bp == NULL) ? bvlen : (Bp [kB+1] - Bp [kB]) ; // determine # entries in C(:,jC), the (kC)th vector of C // int64_t kC = kA * bnvec + kB ; Cp [kC] = aknz * bknz ; if (C_is_hyper) { Ch [kC] = jA * bvdim + jB ; } } GB_cumsum (Cp, cnvec, &(C->nvec_nonempty), nthreads, Werk) ; C->nvals = Cp [cnvec] ; if (C_is_hyper) C->nvec = cnvec ; } C->magic = GB_MAGIC ; //-------------------------------------------------------------------------- // C = kron (A,B) where C is iso and full //-------------------------------------------------------------------------- if (C_iso) { // Cx [0] = cscalar = op (A,B) memcpy (C->x, cscalar, csize) ; if (C_is_full) { // no more work to do if C is iso and full ASSERT_MATRIX_OK (C, "C=kron(A,B), iso full", GB0) ; GB_FREE_WORKSPACE ; return (GrB_SUCCESS) ; } } //-------------------------------------------------------------------------- // C = kron (A,B) //-------------------------------------------------------------------------- const bool A_iso = A->iso ; const bool B_iso = B->iso ; #pragma omp parallel for num_threads(nthreads) schedule(guided) for (kC = 0 ; kC < cnvec ; kC++) { int64_t kA = kC / bnvec ; int64_t kB = kC % bnvec ; // get B(:,jB), the (kB)th vector of B int64_t jB = GBH (Bh, kB) ; int64_t pB_start = GBP (Bp, kB, bvlen) ; int64_t pB_end = GBP (Bp, kB+1, bvlen) ; int64_t bknz = pB_start - pB_end ; if (bknz == 0) continue ; GB_void bwork [GB_VLA(bsize)] ; if (!B_is_pattern && B_iso) { cast_B (bwork, Bx, bsize) ; } // get C(:,jC), the (kC)th vector of C // int64_t kC = kA * bnvec + kB ; int64_t pC = GBP (Cp, kC, cvlen) ; // get A(:,jA), the (kA)th vector of A int64_t jA = GBH (Ah, kA) ; int64_t pA_start = GBP (Ap, kA, avlen) ; int64_t pA_end = GBP (Ap, kA+1, avlen) ; GB_void awork [GB_VLA(asize)] ; if (!A_is_pattern && A_iso) { cast_A (awork, Ax, asize) ; } for (int64_t pA = pA_start ; pA < pA_end ; pA++) { // awork = A(iA,jA), typecasted to op->xtype int64_t iA = GBI (Ai, pA, avlen) ; int64_t iAblock = iA * bvlen ; if (!A_is_pattern && !A_iso) { cast_A (awork, Ax + (pA*asize), asize) ; } for (int64_t pB = pB_start ; pB < pB_end ; pB++) { // bwork = B(iB,jB), typecasted to op->ytype int64_t iB = GBI (Bi, pB, bvlen) ; if (!B_is_pattern && !B_iso) { cast_B (bwork, Bx +(pB*bsize), bsize) ; } // C(iC,jC) = A(iA,jA) * B(iB,jB) if (!C_is_full) { int64_t iC = iAblock + iB ; Ci [pC] = iC ; } if (op_is_positional) { // positional binary operator switch (opcode) { case GB_FIRSTI_binop_code : // z = first_i(A(iA,jA),y) == iA case GB_FIRSTI1_binop_code : // z = first_i1(A(iA,jA),y) == iA+1 if (is64) { Cx_int64 [pC] = iA + offset ; } else { Cx_int32 [pC] = (int32_t) (iA + offset) ; } break ; case GB_FIRSTJ_binop_code : // z = first_j(A(iA,jA),y) == jA case GB_FIRSTJ1_binop_code : // z = first_j1(A(iA,jA),y) == jA+1 if (is64) { Cx_int64 [pC] = jA + offset ; } else { Cx_int32 [pC] = (int32_t) (jA + offset) ; } break ; case GB_SECONDI_binop_code : // z = second_i(x,B(iB,jB)) == iB case GB_SECONDI1_binop_code : // z = second_i1(x,B(iB,jB)) == iB+1 if (is64) { Cx_int64 [pC] = iB + offset ; } else { Cx_int32 [pC] = (int32_t) (iB + offset) ; } break ; case GB_SECONDJ_binop_code : // z = second_j(x,B(iB,jB)) == jB case GB_SECONDJ1_binop_code : // z = second_j1(x,B(iB,jB)) == jB+1 if (is64) { Cx_int64 [pC] = jB + offset ; } else { Cx_int32 [pC] = (int32_t) (jB + offset) ; } break ; default: ; } } else if (!C_iso) { // standard binary operator fmult (Cx +(pC*csize), awork, bwork) ; } pC++ ; } } } //-------------------------------------------------------------------------- // remove empty vectors from C, if hypersparse //-------------------------------------------------------------------------- GB_OK (GB_hypermatrix_prune (C, Werk)) ; //-------------------------------------------------------------------------- // return result //-------------------------------------------------------------------------- ASSERT_MATRIX_OK (C, "C=kron(A,B)", GB0) ; GB_FREE_WORKSPACE ; return (GrB_SUCCESS) ; }