*> \brief \b IPARAM2STAGE
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download IPARAM2STAGE + dependencies
*>
*> [TGZ]
*>
*> [ZIP]
*>
*> [TXT]
*> \endhtmlonly
*
* Definition:
* ===========
*
* INTEGER FUNCTION IPARAM2STAGE( ISPEC, NAME, OPTS,
* NI, NBI, IBI, NXI )
* #if defined(_OPENMP)
* use omp_lib
* #endif
* IMPLICIT NONE
*
* .. Scalar Arguments ..
* CHARACTER*( * ) NAME, OPTS
* INTEGER ISPEC, NI, NBI, IBI, NXI
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> This program sets problem and machine dependent parameters
*> useful for xHETRD_2STAGE, xHETRD_H@2HB, xHETRD_HB2ST,
*> xGEBRD_2STAGE, xGEBRD_GE2GB, xGEBRD_GB2BD
*> and related subroutines for eigenvalue problems.
*> It is called whenever ILAENV is called with 17 <= ISPEC <= 21
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] ISPEC
*> \verbatim
*> ISPEC is integer scalar
*> ISPEC specifies which tunable parameter IPARAM2STAGE should
*> return.
*>
*> ISPEC=17: the optimal blocksize nb for the reduction to
* BAND
*>
*> ISPEC=18: the optimal blocksize ib for the eigenvectors
*> singular vectors update routine
*>
*> ISPEC=19: The length of the array that store the Housholder
*> representation for the second stage
*> Band to Tridiagonal or Bidiagonal
*>
*> ISPEC=20: The workspace needed for the routine in input.
*>
*> ISPEC=21: For future release.
*> \endverbatim
*>
*> \param[in] NAME
*> \verbatim
*> NAME is character string
*> Name of the calling subroutine
*> \endverbatim
*>
*> \param[in] OPTS
*> \verbatim
*> OPTS is CHARACTER*(*)
*> The character options to the subroutine NAME, concatenated
*> into a single character string. For example, UPLO = 'U',
*> TRANS = 'T', and DIAG = 'N' for a triangular routine would
*> be specified as OPTS = 'UTN'.
*> \endverbatim
*>
*> \param[in] NI
*> \verbatim
*> NI is INTEGER which is the size of the matrix
*> \endverbatim
*>
*> \param[in] NBI
*> \verbatim
*> NBI is INTEGER which is the used in the reduciton,
* (e.g., the size of the band), needed to compute workspace
* and LHOUS2.
*> \endverbatim
*>
*> \param[in] IBI
*> \verbatim
*> IBI is INTEGER which represent the IB of the reduciton,
* needed to compute workspace and LHOUS2.
*> \endverbatim
*>
*> \param[in] NXI
*> \verbatim
*> NXI is INTEGER needed in the future release.
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \date June 2016
*
*> \ingroup auxOTHERauxiliary
*
*> \par Further Details:
* =====================
*>
*> \verbatim
*>
*> Implemented by Azzam Haidar.
*>
*> All detail are available on technical report, SC11, SC13 papers.
*>
*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
*> Parallel reduction to condensed forms for symmetric eigenvalue problems
*> using aggregated fine-grained and memory-aware kernels. In Proceedings
*> of 2011 International Conference for High Performance Computing,
*> Networking, Storage and Analysis (SC '11), New York, NY, USA,
*> Article 8 , 11 pages.
*> http://doi.acm.org/10.1145/2063384.2063394
*>
*> A. Haidar, J. Kurzak, P. Luszczek, 2013.
*> An improved parallel singular value algorithm and its implementation
*> for multicore hardware, In Proceedings of 2013 International Conference
*> for High Performance Computing, Networking, Storage and Analysis (SC '13).
*> Denver, Colorado, USA, 2013.
*> Article 90, 12 pages.
*> http://doi.acm.org/10.1145/2503210.2503292
*>
*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure
*> calculations based on fine-grained memory aware tasks.
*> International Journal of High Performance Computing Applications.
*> Volume 28 Issue 2, Pages 196-209, May 2014.
*> http://hpc.sagepub.com/content/28/2/196
*>
*> \endverbatim
*>
* =====================================================================
INTEGER FUNCTION IPARAM2STAGE( ISPEC, NAME, OPTS,
$ NI, NBI, IBI, NXI )
#if defined(_OPENMP)
use omp_lib
#endif
IMPLICIT NONE
*
* -- LAPACK auxiliary routine (version 3.7.0) --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* June 2016
*
* .. Scalar Arguments ..
CHARACTER*( * ) NAME, OPTS
INTEGER ISPEC, NI, NBI, IBI, NXI
*
* ================================================================
* ..
* .. Local Scalars ..
INTEGER I, IC, IZ, KD, IB, LHOUS, LWORK, NTHREADS,
$ FACTOPTNB, QROPTNB, LQOPTNB
LOGICAL RPREC, CPREC
CHARACTER PREC*1, ALGO*3, STAG*5, SUBNAM*12, VECT*1
* ..
* .. Intrinsic Functions ..
INTRINSIC CHAR, ICHAR, MAX
* ..
* .. External Functions ..
INTEGER ILAENV
EXTERNAL ILAENV
* ..
* .. Executable Statements ..
*
* Invalid value for ISPEC
*
IF( (ISPEC.LT.17).OR.(ISPEC.GT.21) ) THEN
IPARAM2STAGE = -1
RETURN
ENDIF
*
* Get the number of threads
*
NTHREADS = 1
#if defined(_OPENMP)
!$OMP PARALLEL
NTHREADS = OMP_GET_NUM_THREADS()
!$OMP END PARALLEL
#endif
* WRITE(*,*) 'IPARAM VOICI NTHREADS ISPEC ',NTHREADS, ISPEC
*
IF( ISPEC .NE. 19 ) THEN
*
* Convert NAME to upper case if the first character is lower case.
*
IPARAM2STAGE = -1
SUBNAM = NAME
IC = ICHAR( SUBNAM( 1: 1 ) )
IZ = ICHAR( 'Z' )
IF( IZ.EQ.90 .OR. IZ.EQ.122 ) THEN
*
* ASCII character set
*
IF( IC.GE.97 .AND. IC.LE.122 ) THEN
SUBNAM( 1: 1 ) = CHAR( IC-32 )
DO 100 I = 2, 12
IC = ICHAR( SUBNAM( I: I ) )
IF( IC.GE.97 .AND. IC.LE.122 )
$ SUBNAM( I: I ) = CHAR( IC-32 )
100 CONTINUE
END IF
*
ELSE IF( IZ.EQ.233 .OR. IZ.EQ.169 ) THEN
*
* EBCDIC character set
*
IF( ( IC.GE.129 .AND. IC.LE.137 ) .OR.
$ ( IC.GE.145 .AND. IC.LE.153 ) .OR.
$ ( IC.GE.162 .AND. IC.LE.169 ) ) THEN
SUBNAM( 1: 1 ) = CHAR( IC+64 )
DO 110 I = 2, 12
IC = ICHAR( SUBNAM( I: I ) )
IF( ( IC.GE.129 .AND. IC.LE.137 ) .OR.
$ ( IC.GE.145 .AND. IC.LE.153 ) .OR.
$ ( IC.GE.162 .AND. IC.LE.169 ) )SUBNAM( I:
$ I ) = CHAR( IC+64 )
110 CONTINUE
END IF
*
ELSE IF( IZ.EQ.218 .OR. IZ.EQ.250 ) THEN
*
* Prime machines: ASCII+128
*
IF( IC.GE.225 .AND. IC.LE.250 ) THEN
SUBNAM( 1: 1 ) = CHAR( IC-32 )
DO 120 I = 2, 12
IC = ICHAR( SUBNAM( I: I ) )
IF( IC.GE.225 .AND. IC.LE.250 )
$ SUBNAM( I: I ) = CHAR( IC-32 )
120 CONTINUE
END IF
END IF
*
PREC = SUBNAM( 1: 1 )
ALGO = SUBNAM( 4: 6 )
STAG = SUBNAM( 8:12 )
RPREC = PREC.EQ.'S' .OR. PREC.EQ.'D'
CPREC = PREC.EQ.'C' .OR. PREC.EQ.'Z'
*
* Invalid value for PRECISION
*
IF( .NOT.( RPREC .OR. CPREC ) ) THEN
IPARAM2STAGE = -1
RETURN
ENDIF
ENDIF
* WRITE(*,*),'RPREC,CPREC ',RPREC,CPREC,
* $ ' ALGO ',ALGO,' STAGE ',STAG
*
*
IF (( ISPEC .EQ. 17 ) .OR. ( ISPEC .EQ. 18 )) THEN
*
* ISPEC = 17, 18: block size KD, IB
* Could be also dependent from N but for now it
* depend only on sequential or parallel
*
IF( NTHREADS.GT.4 ) THEN
IF( CPREC ) THEN
KD = 128
IB = 32
ELSE
KD = 160
IB = 40
ENDIF
ELSE IF( NTHREADS.GT.1 ) THEN
IF( CPREC ) THEN
KD = 64
IB = 32
ELSE
KD = 64
IB = 32
ENDIF
ELSE
IF( CPREC ) THEN
KD = 16
IB = 16
ELSE
KD = 32
IB = 16
ENDIF
ENDIF
IF( ISPEC.EQ.17 ) IPARAM2STAGE = KD
IF( ISPEC.EQ.18 ) IPARAM2STAGE = IB
*
ELSE IF ( ISPEC .EQ. 19 ) THEN
*
* ISPEC = 19:
* LHOUS length of the Houselholder representation
* matrix (V,T) of the second stage. should be >= 1.
*
* Will add the VECT OPTION HERE next release
VECT = OPTS(1:1)
IF( VECT.EQ.'N' ) THEN
LHOUS = MAX( 1, 4*NI )
ELSE
* This is not correct, it need to call the ALGO and the stage2
LHOUS = MAX( 1, 4*NI ) + IBI
ENDIF
IF( LHOUS.GE.0 ) THEN
IPARAM2STAGE = LHOUS
ELSE
IPARAM2STAGE = -1
ENDIF
*
ELSE IF ( ISPEC .EQ. 20 ) THEN
*
* ISPEC = 20: (21 for future use)
* LWORK length of the workspace for
* either or both stages for TRD and BRD. should be >= 1.
* TRD:
* TRD_stage 1: = LT + LW + LS1 + LS2
* = LDT*KD + N*KD + N*MAX(KD,FACTOPTNB) + LDS2*KD
* where LDT=LDS2=KD
* = N*KD + N*max(KD,FACTOPTNB) + 2*KD*KD
* TRD_stage 2: = (2NB+1)*N + KD*NTHREADS
* TRD_both : = max(stage1,stage2) + AB ( AB=(KD+1)*N )
* = N*KD + N*max(KD+1,FACTOPTNB)
* + max(2*KD*KD, KD*NTHREADS)
* + (KD+1)*N
LWORK = -1
SUBNAM(1:1) = PREC
SUBNAM(2:6) = 'GEQRF'
QROPTNB = ILAENV( 1, SUBNAM, ' ', NI, NBI, -1, -1 )
SUBNAM(2:6) = 'GELQF'
LQOPTNB = ILAENV( 1, SUBNAM, ' ', NBI, NI, -1, -1 )
* Could be QR or LQ for TRD and the max for BRD
FACTOPTNB = MAX(QROPTNB, LQOPTNB)
IF( ALGO.EQ.'TRD' ) THEN
IF( STAG.EQ.'2STAG' ) THEN
LWORK = NI*NBI + NI*MAX(NBI+1,FACTOPTNB)
$ + MAX(2*NBI*NBI, NBI*NTHREADS)
$ + (NBI+1)*NI
ELSE IF( (STAG.EQ.'HE2HB').OR.(STAG.EQ.'SY2SB') ) THEN
LWORK = NI*NBI + NI*MAX(NBI,FACTOPTNB) + 2*NBI*NBI
ELSE IF( (STAG.EQ.'HB2ST').OR.(STAG.EQ.'SB2ST') ) THEN
LWORK = (2*NBI+1)*NI + NBI*NTHREADS
ENDIF
ELSE IF( ALGO.EQ.'BRD' ) THEN
IF( STAG.EQ.'2STAG' ) THEN
LWORK = 2*NI*NBI + NI*MAX(NBI+1,FACTOPTNB)
$ + MAX(2*NBI*NBI, NBI*NTHREADS)
$ + (NBI+1)*NI
ELSE IF( STAG.EQ.'GE2GB' ) THEN
LWORK = NI*NBI + NI*MAX(NBI,FACTOPTNB) + 2*NBI*NBI
ELSE IF( STAG.EQ.'GB2BD' ) THEN
LWORK = (3*NBI+1)*NI + NBI*NTHREADS
ENDIF
ENDIF
LWORK = MAX ( 1, LWORK )
IF( LWORK.GT.0 ) THEN
IPARAM2STAGE = LWORK
ELSE
IPARAM2STAGE = -1
ENDIF
*
ELSE IF ( ISPEC .EQ. 21 ) THEN
*
* ISPEC = 21 for future use
IPARAM2STAGE = NXI
ENDIF
*
* ==== End of IPARAM2STAGE ====
*
END