//------------------------------------------------------------------------------ // GB_AxB.c: matrix multiply for a single semiring //------------------------------------------------------------------------------ // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2023, All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //------------------------------------------------------------------------------ #include "GB.h" #include "GB_control.h" #include "GB_AxB_kernels.h" GB_axb__include_h // semiring operators: GB_multiply_add GB_multiply GB_add_op GB_add_update // identity: GB_identity // A matrix, typecast to A2 for multiplier input GB_a_is_pattern GB_atype GB_a2type GB_declarea GB_geta // B matrix, typecast to B2 for multiplier input GB_b_is_pattern GB_btype GB_b2type GB_declareb GB_getb GB_bsize // C matrix GB_c_iso GB_ctype GB_putc // special case semirings: GB_is_any_pair_semiring GB_is_lxor_pair_semiring GB_is_plus_pair_real_semiring GB_is_plus_pair_8_semiring GB_is_plus_pair_16_semiring GB_is_plus_pair_32_semiring GB_is_plus_pair_big_semiring GB_is_plus_pair_fc32_semiring GB_is_plus_pair_fc64_semiring GB_is_min_firstj_semiring GB_is_max_firstj_semiring GB_semiring_has_avx // monoid properties: GB_ztype GB_declare_identity GB_declare_const_identity GB_z_nbits GB_has_identity_byte GB_identity_byte GB_z_atomic_bits GB_z_atomic_type GB_z_has_atomic_update GB_z_has_omp_atomic_update GB_ztype_ignore_overflow GB_pragma_simd_reduction_monoid GB_is_any_monoid GB_is_imin_monoid GB_is_imax_monoid GB_is_fmin_monoid GB_is_fmax_monoid GB_is_plus_fc32_monoid GB_is_plus_fc64_monoid GB_monoid_is_terminal GB_terminal_condition GB_if_terminal_break GB_declare_const_terminal GB_ztype_is_complex // special case multipliers: GB_is_pair_multiplier GB_pair_one GB_offset GB_is_firsti_multiplier GB_is_firstj_multiplier GB_is_secondj_multiplier // disable this semiring and use the generic case if these conditions hold GB_disable #include "GB_mxm_shared_definitions.h" //------------------------------------------------------------------------------ // GB_Adot2B: C=A'*B, C=A'*B, or C=A'*B: dot product method, C is bitmap //------------------------------------------------------------------------------ // if A_not_transposed is true, then C=A*B is computed where A is bitmap or full GrB_Info GB (_Adot2B) ( GrB_Matrix C, const GrB_Matrix M, const bool Mask_comp, const bool Mask_struct, const bool A_not_transposed, const GrB_Matrix A, int64_t *restrict A_slice, const GrB_Matrix B, int64_t *restrict B_slice, int nthreads, int naslice, int nbslice ) { #if GB_DISABLE return (GrB_NO_VALUE) ; #else #include "GB_AxB_dot2_meta.c" return (GrB_SUCCESS) ; #endif } //------------------------------------------------------------------------------ // GB_Adot3B: C=A'*B: masked dot product, C is sparse or hyper //------------------------------------------------------------------------------ GrB_Info GB (_Adot3B) ( GrB_Matrix C, const GrB_Matrix M, const bool Mask_struct, const GrB_Matrix A, const GrB_Matrix B, const GB_task_struct *restrict TaskList, const int ntasks, const int nthreads ) { #if GB_DISABLE return (GrB_NO_VALUE) ; #else #include "GB_AxB_dot3_meta.c" return (GrB_SUCCESS) ; #endif } m4_divert(if_dot4_enabled) //------------------------------------------------------------------------------ // GB_Adot4B: C+=A'*B: dense dot product //------------------------------------------------------------------------------ GrB_Info GB (_Adot4B) ( GrB_Matrix C, const bool C_in_iso, const GrB_Matrix A, const GrB_Matrix B, const int64_t *restrict A_slice, const int64_t *restrict B_slice, const int naslice, const int nbslice, const int nthreads, GB_Werk Werk ) { #if GB_DISABLE return (GrB_NO_VALUE) ; #else #include "GB_AxB_dot4_meta.c" return (GrB_SUCCESS) ; #endif } m4_divert(0) //------------------------------------------------------------------------------ // GB_AsaxbitB: C=A*B, C=A*B, C=A*B: saxpy method, C is bitmap only //------------------------------------------------------------------------------ #include "GB_AxB_saxpy3_template.h" GrB_Info GB (_AsaxbitB) ( GrB_Matrix C, const GrB_Matrix M, const bool Mask_comp, const bool Mask_struct, const GrB_Matrix A, const GrB_Matrix B, const int ntasks, const int nthreads, const int nfine_tasks_per_vector, const bool use_coarse_tasks, const bool use_atomics, const int64_t *restrict M_ek_slicing, const int M_nthreads, const int M_ntasks, const int64_t *restrict A_slice, const int64_t *restrict H_slice, GB_void *restrict Wcx, int8_t *restrict Wf ) { #if GB_DISABLE return (GrB_NO_VALUE) ; #else int nthreads_max = GB_Context_nthreads_max ( ) ; double chunk = GB_Context_chunk ( ) ; #include "GB_AxB_saxbit_template.c" return (GrB_SUCCESS) ; #endif } m4_divert(if_saxpy4_enabled) //------------------------------------------------------------------------------ // GB_Asaxpy4B: C += A*B when C is full //------------------------------------------------------------------------------ GrB_Info GB (_Asaxpy4B) ( GrB_Matrix C, const GrB_Matrix A, const GrB_Matrix B, const int ntasks, const int nthreads, const int nfine_tasks_per_vector, const bool use_coarse_tasks, const bool use_atomics, const int64_t *A_slice, const int64_t *H_slice, GB_void *restrict Wcx, int8_t *restrict Wf ) { #if GB_DISABLE return (GrB_NO_VALUE) ; #else #include "GB_AxB_saxpy4_meta.c" return (GrB_SUCCESS) ; #endif } m4_divert(0) m4_divert(if_saxpy5_enabled) //------------------------------------------------------------------------------ // GB_Asaxpy5B: C += A*B when C is full, A is bitmap/full, B is sparse/hyper //------------------------------------------------------------------------------ #if !GB_DISABLE && !GB_A_IS_PATTERN m4_divert(if_semiring_has_avx) //---------------------------------------------------------------------- // saxpy5 method with vectors of length 8 for double, 16 for single //---------------------------------------------------------------------- // AVX512F: vector registers are 512 bits, or 64 bytes, which can hold // 16 floats or 8 doubles. #define GB_V16_512 (16 * GB_Z_NBITS <= 512) #define GB_V8_512 ( 8 * GB_Z_NBITS <= 512) #define GB_V4_512 ( 4 * GB_Z_NBITS <= 512) #define GB_V16 GB_V16_512 #define GB_V8 GB_V8_512 #define GB_V4 GB_V4_512 #if GB_COMPILER_SUPPORTS_AVX512F && GB_V4_512 GB_TARGET_AVX512F static inline void GB_AxB_saxpy5_unrolled_avx512f ( GrB_Matrix C, const GrB_Matrix A, const GrB_Matrix B, const int ntasks, const int nthreads, const int64_t *B_slice ) { #include "GB_AxB_saxpy5_unrolled.c" } #endif //---------------------------------------------------------------------- // saxpy5 method with vectors of length 4 for double, 8 for single //---------------------------------------------------------------------- // AVX2: vector registers are 256 bits, or 32 bytes, which can hold // 8 floats or 4 doubles. #define GB_V16_256 (16 * GB_Z_NBITS <= 256) #define GB_V8_256 ( 8 * GB_Z_NBITS <= 256) #define GB_V4_256 ( 4 * GB_Z_NBITS <= 256) #undef GB_V16 #undef GB_V8 #undef GB_V4 #define GB_V16 GB_V16_256 #define GB_V8 GB_V8_256 #define GB_V4 GB_V4_256 #if GB_COMPILER_SUPPORTS_AVX2 && GB_V4_256 GB_TARGET_AVX2 static inline void GB_AxB_saxpy5_unrolled_avx2 ( GrB_Matrix C, const GrB_Matrix A, const GrB_Matrix B, const int ntasks, const int nthreads, const int64_t *B_slice ) { #include "GB_AxB_saxpy5_unrolled.c" } #endif m4_divert(if_saxpy5_enabled) //---------------------------------------------------------------------- // saxpy5 method unrolled, with no vectors //---------------------------------------------------------------------- #undef GB_V16 #undef GB_V8 #undef GB_V4 #define GB_V16 0 #define GB_V8 0 #define GB_V4 0 static inline void GB_AxB_saxpy5_unrolled_vanilla ( GrB_Matrix C, const GrB_Matrix A, const GrB_Matrix B, const int ntasks, const int nthreads, const int64_t *B_slice ) { #include "GB_AxB_saxpy5_unrolled.c" } #endif GrB_Info GB (_Asaxpy5B) ( GrB_Matrix C, const GrB_Matrix A, const GrB_Matrix B, const int ntasks, const int nthreads, const int64_t *B_slice ) { #if GB_DISABLE return (GrB_NO_VALUE) ; #else #include "GB_AxB_saxpy5_meta.c" return (GrB_SUCCESS) ; #endif } m4_divert(0) //------------------------------------------------------------------------------ // GB_Asaxpy3B: C=A*B, C=A*B, C=A*B: saxpy method (Gustavson + Hash) //------------------------------------------------------------------------------ GrB_Info GB (_Asaxpy3B) ( GrB_Matrix C, // C=A*B, C sparse or hypersparse const GrB_Matrix M, const bool Mask_comp, const bool Mask_struct, const bool M_in_place, const GrB_Matrix A, const GrB_Matrix B, GB_saxpy3task_struct *restrict SaxpyTasks, const int ntasks, const int nfine, const int nthreads, const int do_sort, GB_Werk Werk ) { #if GB_DISABLE return (GrB_NO_VALUE) ; #else ASSERT (GB_IS_SPARSE (C) || GB_IS_HYPERSPARSE (C)) ; if (M == NULL) { // C = A*B, no mask return (GB (_Asaxpy3B_noM) (C, A, B, SaxpyTasks, ntasks, nfine, nthreads, do_sort, Werk)) ; } else if (!Mask_comp) { // C = A*B return (GB (_Asaxpy3B_M) (C, M, Mask_struct, M_in_place, A, B, SaxpyTasks, ntasks, nfine, nthreads, do_sort, Werk)) ; } else { // C = A*B return (GB (_Asaxpy3B_notM) (C, M, Mask_struct, M_in_place, A, B, SaxpyTasks, ntasks, nfine, nthreads, do_sort, Werk)) ; } #endif } //------------------------------------------------------------------------------ // GB_Asaxpy3B_M: C=A*B: saxpy method (Gustavson + Hash) //------------------------------------------------------------------------------ #if ( !GB_DISABLE ) GrB_Info GB (_Asaxpy3B_M) ( GrB_Matrix C, // C=A*B, C sparse or hypersparse const GrB_Matrix M, const bool Mask_struct, const bool M_in_place, const GrB_Matrix A, const GrB_Matrix B, GB_saxpy3task_struct *restrict SaxpyTasks, const int ntasks, const int nfine, const int nthreads, const int do_sort, GB_Werk Werk ) { int nthreads_max = GB_Context_nthreads_max ( ) ; double chunk = GB_Context_chunk ( ) ; if (GB_IS_SPARSE (A) && GB_IS_SPARSE (B)) { // both A and B are sparse #define GB_META16 #define GB_NO_MASK 0 #define GB_MASK_COMP 0 #define GB_A_IS_SPARSE 1 #define GB_A_IS_HYPER 0 #define GB_A_IS_BITMAP 0 #define GB_A_IS_FULL 0 #define GB_B_IS_SPARSE 1 #define GB_B_IS_HYPER 0 #define GB_B_IS_BITMAP 0 #define GB_B_IS_FULL 0 #include "GB_meta16_definitions.h" #include "GB_AxB_saxpy3_template.c" } else { // general case #undef GB_META16 #define GB_NO_MASK 0 #define GB_MASK_COMP 0 #include "GB_meta16_definitions.h" #include "GB_AxB_saxpy3_template.c" } return (GrB_SUCCESS) ; } #endif //------------------------------------------------------------------------------ // GB_Asaxpy3B_noM: C=A*B: saxpy method (Gustavson + Hash) //------------------------------------------------------------------------------ #if ( !GB_DISABLE ) GrB_Info GB (_Asaxpy3B_noM) ( GrB_Matrix C, // C=A*B, C sparse or hypersparse const GrB_Matrix A, const GrB_Matrix B, GB_saxpy3task_struct *restrict SaxpyTasks, const int ntasks, const int nfine, const int nthreads, const int do_sort, GB_Werk Werk ) { int nthreads_max = GB_Context_nthreads_max ( ) ; double chunk = GB_Context_chunk ( ) ; if (GB_IS_SPARSE (A) && GB_IS_SPARSE (B)) { // both A and B are sparse #define GB_META16 #define GB_NO_MASK 1 #define GB_MASK_COMP 0 #define GB_A_IS_SPARSE 1 #define GB_A_IS_HYPER 0 #define GB_A_IS_BITMAP 0 #define GB_A_IS_FULL 0 #define GB_B_IS_SPARSE 1 #define GB_B_IS_HYPER 0 #define GB_B_IS_BITMAP 0 #define GB_B_IS_FULL 0 #include "GB_meta16_definitions.h" #include "GB_AxB_saxpy3_template.c" } else { // general case #undef GB_META16 #define GB_NO_MASK 1 #define GB_MASK_COMP 0 #include "GB_meta16_definitions.h" #include "GB_AxB_saxpy3_template.c" } return (GrB_SUCCESS) ; } #endif //------------------------------------------------------------------------------ // GB_Asaxpy3B_notM: C=A*B: saxpy method (Gustavson + Hash) //------------------------------------------------------------------------------ #if ( !GB_DISABLE ) GrB_Info GB (_Asaxpy3B_notM) ( GrB_Matrix C, // C=A*B, C sparse or hypersparse const GrB_Matrix M, const bool Mask_struct, const bool M_in_place, const GrB_Matrix A, const GrB_Matrix B, GB_saxpy3task_struct *restrict SaxpyTasks, const int ntasks, const int nfine, const int nthreads, const int do_sort, GB_Werk Werk ) { int nthreads_max = GB_Context_nthreads_max ( ) ; double chunk = GB_Context_chunk ( ) ; if (GB_IS_SPARSE (A) && GB_IS_SPARSE (B)) { // both A and B are sparse #define GB_META16 #define GB_NO_MASK 0 #define GB_MASK_COMP 1 #define GB_A_IS_SPARSE 1 #define GB_A_IS_HYPER 0 #define GB_A_IS_BITMAP 0 #define GB_A_IS_FULL 0 #define GB_B_IS_SPARSE 1 #define GB_B_IS_HYPER 0 #define GB_B_IS_BITMAP 0 #define GB_B_IS_FULL 0 #include "GB_meta16_definitions.h" #include "GB_AxB_saxpy3_template.c" } else { // general case #undef GB_META16 #define GB_NO_MASK 0 #define GB_MASK_COMP 1 #include "GB_meta16_definitions.h" #include "GB_AxB_saxpy3_template.c" } return (GrB_SUCCESS) ; } #endif