//------------------------------------------------------------------------------ // GB_AxB__land_gt_fp32.c: matrix multiply for a single semiring //------------------------------------------------------------------------------ // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2023, All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //------------------------------------------------------------------------------ #include "GB.h" #include "GB_control.h" #include "GB_AxB_kernels.h" #include "GB_AxB__include2.h" // semiring operators: #define GB_MULTADD(z,a,b,i,k,j) z &= (a > b) #define GB_MULT(z,a,b,i,k,j) z = (a > b) #define GB_ADD(z,zin,t) z = zin && t #define GB_UPDATE(z,t) z &= t // identity: true // A matrix, typecast to A2 for multiplier input #define GB_A_TYPE float #define GB_A2TYPE float #define GB_DECLAREA(aik) float aik #define GB_GETA(aik,Ax,pA,A_iso) aik = Ax [(A_iso) ? 0 : (pA)] // B matrix, typecast to B2 for multiplier input #define GB_B_TYPE float #define GB_B2TYPE float #define GB_DECLAREB(bkj) float bkj #define GB_GETB(bkj,Bx,pB,B_iso) bkj = Bx [(B_iso) ? 0 : (pB)] // C matrix #define GB_C_ISO 0 #define GB_C_TYPE bool #define GB_PUTC(cij,Cx,p) Cx [p] = cij // special case semirings: // monoid properties: #define GB_Z_TYPE bool #define GB_DECLARE_IDENTITY(z) bool z = true #define GB_DECLARE_IDENTITY_CONST(z) const bool z = true #define GB_Z_NBITS 8 #define GB_HAS_IDENTITY_BYTE 1 #define GB_IDENTITY_BYTE 1 #define GB_Z_ATOMIC_BITS 8 #define GB_Z_HAS_ATOMIC_UPDATE 1 #define GB_Z_HAS_OMP_ATOMIC_UPDATE (!GB_COMPILER_MSC) #define GB_MONOID_IS_TERMINAL 1 #define GB_TERMINAL_CONDITION(z,zterminal) (z == false) #define GB_IF_TERMINAL_BREAK(z,zterminal) if (z == false) { break ; } #define GB_DECLARE_TERMINAL_CONST(zterminal) const bool zterminal = false // special case multipliers: // disable this semiring and use the generic case if these conditions hold #if (defined(GxB_NO_LAND) || defined(GxB_NO_GT) || defined(GxB_NO_FP32) || defined(GxB_NO_LAND_BOOL) || defined(GxB_NO_GT_FP32) || defined(GxB_NO_LAND_GT_FP32)) #define GB_DISABLE 1 #else #define GB_DISABLE 0 #endif #include "GB_mxm_shared_definitions.h" //------------------------------------------------------------------------------ // GB_Adot2B: C=A'*B, C=A'*B, or C=A'*B: dot product method, C is bitmap //------------------------------------------------------------------------------ // if A_not_transposed is true, then C=A*B is computed where A is bitmap or full GrB_Info GB (_Adot2B__land_gt_fp32) ( GrB_Matrix C, const GrB_Matrix M, const bool Mask_comp, const bool Mask_struct, const bool A_not_transposed, const GrB_Matrix A, int64_t *restrict A_slice, const GrB_Matrix B, int64_t *restrict B_slice, int nthreads, int naslice, int nbslice ) { #if GB_DISABLE return (GrB_NO_VALUE) ; #else #include "GB_AxB_dot2_meta.c" return (GrB_SUCCESS) ; #endif } //------------------------------------------------------------------------------ // GB_Adot3B: C=A'*B: masked dot product, C is sparse or hyper //------------------------------------------------------------------------------ GrB_Info GB (_Adot3B__land_gt_fp32) ( GrB_Matrix C, const GrB_Matrix M, const bool Mask_struct, const GrB_Matrix A, const GrB_Matrix B, const GB_task_struct *restrict TaskList, const int ntasks, const int nthreads ) { #if GB_DISABLE return (GrB_NO_VALUE) ; #else #include "GB_AxB_dot3_meta.c" return (GrB_SUCCESS) ; #endif } //------------------------------------------------------------------------------ // GB_Adot4B: C+=A'*B: dense dot product //------------------------------------------------------------------------------ GrB_Info GB (_Adot4B__land_gt_fp32) ( GrB_Matrix C, const bool C_in_iso, const GrB_Matrix A, const GrB_Matrix B, const int64_t *restrict A_slice, const int64_t *restrict B_slice, const int naslice, const int nbslice, const int nthreads, GB_Werk Werk ) { #if GB_DISABLE return (GrB_NO_VALUE) ; #else #include "GB_AxB_dot4_meta.c" return (GrB_SUCCESS) ; #endif } //------------------------------------------------------------------------------ // GB_AsaxbitB: C=A*B, C=A*B, C=A*B: saxpy method, C is bitmap only //------------------------------------------------------------------------------ #include "GB_AxB_saxpy3_template.h" GrB_Info GB (_AsaxbitB__land_gt_fp32) ( GrB_Matrix C, const GrB_Matrix M, const bool Mask_comp, const bool Mask_struct, const GrB_Matrix A, const GrB_Matrix B, const int ntasks, const int nthreads, const int nfine_tasks_per_vector, const bool use_coarse_tasks, const bool use_atomics, const int64_t *restrict M_ek_slicing, const int M_nthreads, const int M_ntasks, const int64_t *restrict A_slice, const int64_t *restrict H_slice, GB_void *restrict Wcx, int8_t *restrict Wf ) { #if GB_DISABLE return (GrB_NO_VALUE) ; #else int nthreads_max = GB_Context_nthreads_max ( ) ; double chunk = GB_Context_chunk ( ) ; #include "GB_AxB_saxbit_template.c" return (GrB_SUCCESS) ; #endif } //------------------------------------------------------------------------------ // GB_Asaxpy4B: C += A*B when C is full //------------------------------------------------------------------------------ GrB_Info GB (_Asaxpy4B__land_gt_fp32) ( GrB_Matrix C, const GrB_Matrix A, const GrB_Matrix B, const int ntasks, const int nthreads, const int nfine_tasks_per_vector, const bool use_coarse_tasks, const bool use_atomics, const int64_t *A_slice, const int64_t *H_slice, GB_void *restrict Wcx, int8_t *restrict Wf ) { #if GB_DISABLE return (GrB_NO_VALUE) ; #else #include "GB_AxB_saxpy4_meta.c" return (GrB_SUCCESS) ; #endif } //------------------------------------------------------------------------------ // GB_Asaxpy5B: C += A*B when C is full, A is bitmap/full, B is sparse/hyper //------------------------------------------------------------------------------ #if !GB_DISABLE && !GB_A_IS_PATTERN //---------------------------------------------------------------------- // saxpy5 method unrolled, with no vectors //---------------------------------------------------------------------- #undef GB_V16 #undef GB_V8 #undef GB_V4 #define GB_V16 0 #define GB_V8 0 #define GB_V4 0 static inline void GB_AxB_saxpy5_unrolled_vanilla ( GrB_Matrix C, const GrB_Matrix A, const GrB_Matrix B, const int ntasks, const int nthreads, const int64_t *B_slice ) { #include "GB_AxB_saxpy5_unrolled.c" } #endif GrB_Info GB (_Asaxpy5B__land_gt_fp32) ( GrB_Matrix C, const GrB_Matrix A, const GrB_Matrix B, const int ntasks, const int nthreads, const int64_t *B_slice ) { #if GB_DISABLE return (GrB_NO_VALUE) ; #else #include "GB_AxB_saxpy5_meta.c" return (GrB_SUCCESS) ; #endif } //------------------------------------------------------------------------------ // GB_Asaxpy3B: C=A*B, C=A*B, C=A*B: saxpy method (Gustavson + Hash) //------------------------------------------------------------------------------ GrB_Info GB (_Asaxpy3B__land_gt_fp32) ( GrB_Matrix C, // C=A*B, C sparse or hypersparse const GrB_Matrix M, const bool Mask_comp, const bool Mask_struct, const bool M_in_place, const GrB_Matrix A, const GrB_Matrix B, GB_saxpy3task_struct *restrict SaxpyTasks, const int ntasks, const int nfine, const int nthreads, const int do_sort, GB_Werk Werk ) { #if GB_DISABLE return (GrB_NO_VALUE) ; #else ASSERT (GB_IS_SPARSE (C) || GB_IS_HYPERSPARSE (C)) ; if (M == NULL) { // C = A*B, no mask return (GB (_Asaxpy3B_noM__land_gt_fp32) (C, A, B, SaxpyTasks, ntasks, nfine, nthreads, do_sort, Werk)) ; } else if (!Mask_comp) { // C = A*B return (GB (_Asaxpy3B_M__land_gt_fp32) (C, M, Mask_struct, M_in_place, A, B, SaxpyTasks, ntasks, nfine, nthreads, do_sort, Werk)) ; } else { // C = A*B return (GB (_Asaxpy3B_notM__land_gt_fp32) (C, M, Mask_struct, M_in_place, A, B, SaxpyTasks, ntasks, nfine, nthreads, do_sort, Werk)) ; } #endif } //------------------------------------------------------------------------------ // GB_Asaxpy3B_M: C=A*B: saxpy method (Gustavson + Hash) //------------------------------------------------------------------------------ #if ( !GB_DISABLE ) GrB_Info GB (_Asaxpy3B_M__land_gt_fp32) ( GrB_Matrix C, // C=A*B, C sparse or hypersparse const GrB_Matrix M, const bool Mask_struct, const bool M_in_place, const GrB_Matrix A, const GrB_Matrix B, GB_saxpy3task_struct *restrict SaxpyTasks, const int ntasks, const int nfine, const int nthreads, const int do_sort, GB_Werk Werk ) { int nthreads_max = GB_Context_nthreads_max ( ) ; double chunk = GB_Context_chunk ( ) ; if (GB_IS_SPARSE (A) && GB_IS_SPARSE (B)) { // both A and B are sparse #define GB_META16 #define GB_NO_MASK 0 #define GB_MASK_COMP 0 #define GB_A_IS_SPARSE 1 #define GB_A_IS_HYPER 0 #define GB_A_IS_BITMAP 0 #define GB_A_IS_FULL 0 #define GB_B_IS_SPARSE 1 #define GB_B_IS_HYPER 0 #define GB_B_IS_BITMAP 0 #define GB_B_IS_FULL 0 #include "GB_meta16_definitions.h" #include "GB_AxB_saxpy3_template.c" } else { // general case #undef GB_META16 #define GB_NO_MASK 0 #define GB_MASK_COMP 0 #include "GB_meta16_definitions.h" #include "GB_AxB_saxpy3_template.c" } return (GrB_SUCCESS) ; } #endif //------------------------------------------------------------------------------ // GB_Asaxpy3B_noM: C=A*B: saxpy method (Gustavson + Hash) //------------------------------------------------------------------------------ #if ( !GB_DISABLE ) GrB_Info GB (_Asaxpy3B_noM__land_gt_fp32) ( GrB_Matrix C, // C=A*B, C sparse or hypersparse const GrB_Matrix A, const GrB_Matrix B, GB_saxpy3task_struct *restrict SaxpyTasks, const int ntasks, const int nfine, const int nthreads, const int do_sort, GB_Werk Werk ) { int nthreads_max = GB_Context_nthreads_max ( ) ; double chunk = GB_Context_chunk ( ) ; if (GB_IS_SPARSE (A) && GB_IS_SPARSE (B)) { // both A and B are sparse #define GB_META16 #define GB_NO_MASK 1 #define GB_MASK_COMP 0 #define GB_A_IS_SPARSE 1 #define GB_A_IS_HYPER 0 #define GB_A_IS_BITMAP 0 #define GB_A_IS_FULL 0 #define GB_B_IS_SPARSE 1 #define GB_B_IS_HYPER 0 #define GB_B_IS_BITMAP 0 #define GB_B_IS_FULL 0 #include "GB_meta16_definitions.h" #include "GB_AxB_saxpy3_template.c" } else { // general case #undef GB_META16 #define GB_NO_MASK 1 #define GB_MASK_COMP 0 #include "GB_meta16_definitions.h" #include "GB_AxB_saxpy3_template.c" } return (GrB_SUCCESS) ; } #endif //------------------------------------------------------------------------------ // GB_Asaxpy3B_notM: C=A*B: saxpy method (Gustavson + Hash) //------------------------------------------------------------------------------ #if ( !GB_DISABLE ) GrB_Info GB (_Asaxpy3B_notM__land_gt_fp32) ( GrB_Matrix C, // C=A*B, C sparse or hypersparse const GrB_Matrix M, const bool Mask_struct, const bool M_in_place, const GrB_Matrix A, const GrB_Matrix B, GB_saxpy3task_struct *restrict SaxpyTasks, const int ntasks, const int nfine, const int nthreads, const int do_sort, GB_Werk Werk ) { int nthreads_max = GB_Context_nthreads_max ( ) ; double chunk = GB_Context_chunk ( ) ; if (GB_IS_SPARSE (A) && GB_IS_SPARSE (B)) { // both A and B are sparse #define GB_META16 #define GB_NO_MASK 0 #define GB_MASK_COMP 1 #define GB_A_IS_SPARSE 1 #define GB_A_IS_HYPER 0 #define GB_A_IS_BITMAP 0 #define GB_A_IS_FULL 0 #define GB_B_IS_SPARSE 1 #define GB_B_IS_HYPER 0 #define GB_B_IS_BITMAP 0 #define GB_B_IS_FULL 0 #include "GB_meta16_definitions.h" #include "GB_AxB_saxpy3_template.c" } else { // general case #undef GB_META16 #define GB_NO_MASK 0 #define GB_MASK_COMP 1 #include "GB_meta16_definitions.h" #include "GB_AxB_saxpy3_template.c" } return (GrB_SUCCESS) ; } #endif