*> \brief \b CHETRD_HE2HB
*
* @generated from zhetrd_he2hb.f, fortran z -> c, Wed Dec 7 08:22:40 2016
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download CHETRD_HE2HB + dependencies
*>
*> [TGZ]
*>
*> [ZIP]
*>
*> [TXT]
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE CHETRD_HE2HB( UPLO, N, KD, A, LDA, AB, LDAB, TAU,
* WORK, LWORK, INFO )
*
* IMPLICIT NONE
*
* .. Scalar Arguments ..
* CHARACTER UPLO
* INTEGER INFO, LDA, LDAB, LWORK, N, KD
* ..
* .. Array Arguments ..
* COMPLEX A( LDA, * ), AB( LDAB, * ),
* TAU( * ), WORK( * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> CHETRD_HE2HB reduces a complex Hermitian matrix A to complex Hermitian
*> band-diagonal form AB by a unitary similarity transformation:
*> Q**H * A * Q = AB.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] UPLO
*> \verbatim
*> UPLO is CHARACTER*1
*> = 'U': Upper triangle of A is stored;
*> = 'L': Lower triangle of A is stored.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The order of the matrix A. N >= 0.
*> \endverbatim
*>
*> \param[in] KD
*> \verbatim
*> KD is INTEGER
*> The number of superdiagonals of the reduced matrix if UPLO = 'U',
*> or the number of subdiagonals if UPLO = 'L'. KD >= 0.
*> The reduced matrix is stored in the array AB.
*> \endverbatim
*>
*> \param[in,out] A
*> \verbatim
*> A is COMPLEX array, dimension (LDA,N)
*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading
*> N-by-N upper triangular part of A contains the upper
*> triangular part of the matrix A, and the strictly lower
*> triangular part of A is not referenced. If UPLO = 'L', the
*> leading N-by-N lower triangular part of A contains the lower
*> triangular part of the matrix A, and the strictly upper
*> triangular part of A is not referenced.
*> On exit, if UPLO = 'U', the diagonal and first superdiagonal
*> of A are overwritten by the corresponding elements of the
*> tridiagonal matrix T, and the elements above the first
*> superdiagonal, with the array TAU, represent the unitary
*> matrix Q as a product of elementary reflectors; if UPLO
*> = 'L', the diagonal and first subdiagonal of A are over-
*> written by the corresponding elements of the tridiagonal
*> matrix T, and the elements below the first subdiagonal, with
*> the array TAU, represent the unitary matrix Q as a product
*> of elementary reflectors. See Further Details.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max(1,N).
*> \endverbatim
*>
*> \param[out] AB
*> \verbatim
*> AB is COMPLEX array, dimension (LDAB,N)
*> On exit, the upper or lower triangle of the Hermitian band
*> matrix A, stored in the first KD+1 rows of the array. The
*> j-th column of A is stored in the j-th column of the array AB
*> as follows:
*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd).
*> \endverbatim
*>
*> \param[in] LDAB
*> \verbatim
*> LDAB is INTEGER
*> The leading dimension of the array AB. LDAB >= KD+1.
*> \endverbatim
*>
*> \param[out] TAU
*> \verbatim
*> TAU is COMPLEX array, dimension (N-KD)
*> The scalar factors of the elementary reflectors (see Further
*> Details).
*> \endverbatim
*>
*> \param[out] WORK
*> \verbatim
*> WORK is COMPLEX array, dimension (LWORK)
*> On exit, if INFO = 0, or if LWORK=-1,
*> WORK(1) returns the size of LWORK.
*> \endverbatim
*>
*> \param[in] LWORK
*> \verbatim
*> LWORK is INTEGER
*> The dimension of the array WORK which should be calculated
*> by a workspace query. LWORK = MAX(1, LWORK_QUERY)
*> If LWORK = -1, then a workspace query is assumed; the routine
*> only calculates the optimal size of the WORK array, returns
*> this value as the first entry of the WORK array, and no error
*> message related to LWORK is issued by XERBLA.
*> LWORK_QUERY = N*KD + N*max(KD,FACTOPTNB) + 2*KD*KD
*> where FACTOPTNB is the blocking used by the QR or LQ
*> algorithm, usually FACTOPTNB=128 is a good choice otherwise
*> putting LWORK=-1 will provide the size of WORK.
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> < 0: if INFO = -i, the i-th argument had an illegal value
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup complexHEcomputational
*
*> \par Further Details:
* =====================
*>
*> \verbatim
*>
*> Implemented by Azzam Haidar.
*>
*> All details are available on technical report, SC11, SC13 papers.
*>
*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
*> Parallel reduction to condensed forms for symmetric eigenvalue problems
*> using aggregated fine-grained and memory-aware kernels. In Proceedings
*> of 2011 International Conference for High Performance Computing,
*> Networking, Storage and Analysis (SC '11), New York, NY, USA,
*> Article 8 , 11 pages.
*> http://doi.acm.org/10.1145/2063384.2063394
*>
*> A. Haidar, J. Kurzak, P. Luszczek, 2013.
*> An improved parallel singular value algorithm and its implementation
*> for multicore hardware, In Proceedings of 2013 International Conference
*> for High Performance Computing, Networking, Storage and Analysis (SC '13).
*> Denver, Colorado, USA, 2013.
*> Article 90, 12 pages.
*> http://doi.acm.org/10.1145/2503210.2503292
*>
*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure
*> calculations based on fine-grained memory aware tasks.
*> International Journal of High Performance Computing Applications.
*> Volume 28 Issue 2, Pages 196-209, May 2014.
*> http://hpc.sagepub.com/content/28/2/196
*>
*> \endverbatim
*>
*> \verbatim
*>
*> If UPLO = 'U', the matrix Q is represented as a product of elementary
*> reflectors
*>
*> Q = H(k)**H . . . H(2)**H H(1)**H, where k = n-kd.
*>
*> Each H(i) has the form
*>
*> H(i) = I - tau * v * v**H
*>
*> where tau is a complex scalar, and v is a complex vector with
*> v(1:i+kd-1) = 0 and v(i+kd) = 1; conjg(v(i+kd+1:n)) is stored on exit in
*> A(i,i+kd+1:n), and tau in TAU(i).
*>
*> If UPLO = 'L', the matrix Q is represented as a product of elementary
*> reflectors
*>
*> Q = H(1) H(2) . . . H(k), where k = n-kd.
*>
*> Each H(i) has the form
*>
*> H(i) = I - tau * v * v**H
*>
*> where tau is a complex scalar, and v is a complex vector with
*> v(kd+1:i) = 0 and v(i+kd+1) = 1; v(i+kd+2:n) is stored on exit in
*> A(i+kd+2:n,i), and tau in TAU(i).
*>
*> The contents of A on exit are illustrated by the following examples
*> with n = 5:
*>
*> if UPLO = 'U': if UPLO = 'L':
*>
*> ( ab ab/v1 v1 v1 v1 ) ( ab )
*> ( ab ab/v2 v2 v2 ) ( ab/v1 ab )
*> ( ab ab/v3 v3 ) ( v1 ab/v2 ab )
*> ( ab ab/v4 ) ( v1 v2 ab/v3 ab )
*> ( ab ) ( v1 v2 v3 ab/v4 ab )
*>
*> where d and e denote diagonal and off-diagonal elements of T, and vi
*> denotes an element of the vector defining H(i).
*> \endverbatim
*>
* =====================================================================
SUBROUTINE CHETRD_HE2HB( UPLO, N, KD, A, LDA, AB, LDAB, TAU,
$ WORK, LWORK, INFO )
*
IMPLICIT NONE
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
CHARACTER UPLO
INTEGER INFO, LDA, LDAB, LWORK, N, KD
* ..
* .. Array Arguments ..
COMPLEX A( LDA, * ), AB( LDAB, * ),
$ TAU( * ), WORK( * )
* ..
*
* =====================================================================
*
* .. Parameters ..
REAL RONE
COMPLEX ZERO, ONE, HALF
PARAMETER ( RONE = 1.0E+0,
$ ZERO = ( 0.0E+0, 0.0E+0 ),
$ ONE = ( 1.0E+0, 0.0E+0 ),
$ HALF = ( 0.5E+0, 0.0E+0 ) )
* ..
* .. Local Scalars ..
LOGICAL LQUERY, UPPER
INTEGER I, J, IINFO, LWMIN, PN, PK, LK,
$ LDT, LDW, LDS2, LDS1,
$ LS2, LS1, LW, LT,
$ TPOS, WPOS, S2POS, S1POS
* ..
* .. External Subroutines ..
EXTERNAL XERBLA, CHER2K, CHEMM, CGEMM, CCOPY,
$ CLARFT, CGELQF, CGEQRF, CLASET
* ..
* .. Intrinsic Functions ..
INTRINSIC MIN, MAX
* ..
* .. External Functions ..
LOGICAL LSAME
INTEGER ILAENV2STAGE
EXTERNAL LSAME, ILAENV2STAGE
* ..
* .. Executable Statements ..
*
* Determine the minimal workspace size required
* and test the input parameters
*
INFO = 0
UPPER = LSAME( UPLO, 'U' )
LQUERY = ( LWORK.EQ.-1 )
LWMIN = ILAENV2STAGE( 4, 'CHETRD_HE2HB', '', N, KD, -1, -1 )
IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
INFO = -1
ELSE IF( N.LT.0 ) THEN
INFO = -2
ELSE IF( KD.LT.0 ) THEN
INFO = -3
ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
INFO = -5
ELSE IF( LDAB.LT.MAX( 1, KD+1 ) ) THEN
INFO = -7
ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
INFO = -10
END IF
*
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'CHETRD_HE2HB', -INFO )
RETURN
ELSE IF( LQUERY ) THEN
WORK( 1 ) = LWMIN
RETURN
END IF
*
* Quick return if possible
* Copy the upper/lower portion of A into AB
*
IF( N.LE.KD+1 ) THEN
IF( UPPER ) THEN
DO 100 I = 1, N
LK = MIN( KD+1, I )
CALL CCOPY( LK, A( I-LK+1, I ), 1,
$ AB( KD+1-LK+1, I ), 1 )
100 CONTINUE
ELSE
DO 110 I = 1, N
LK = MIN( KD+1, N-I+1 )
CALL CCOPY( LK, A( I, I ), 1, AB( 1, I ), 1 )
110 CONTINUE
ENDIF
WORK( 1 ) = 1
RETURN
END IF
*
* Determine the pointer position for the workspace
*
LDT = KD
LDS1 = KD
LT = LDT*KD
LW = N*KD
LS1 = LDS1*KD
LS2 = LWMIN - LT - LW - LS1
* LS2 = N*MAX(KD,FACTOPTNB)
TPOS = 1
WPOS = TPOS + LT
S1POS = WPOS + LW
S2POS = S1POS + LS1
IF( UPPER ) THEN
LDW = KD
LDS2 = KD
ELSE
LDW = N
LDS2 = N
ENDIF
*
*
* Set the workspace of the triangular matrix T to zero once such a
* way every time T is generated the upper/lower portion will be always zero
*
CALL CLASET( "A", LDT, KD, ZERO, ZERO, WORK( TPOS ), LDT )
*
IF( UPPER ) THEN
DO 10 I = 1, N - KD, KD
PN = N-I-KD+1
PK = MIN( N-I-KD+1, KD )
*
* Compute the LQ factorization of the current block
*
CALL CGELQF( KD, PN, A( I, I+KD ), LDA,
$ TAU( I ), WORK( S2POS ), LS2, IINFO )
*
* Copy the upper portion of A into AB
*
DO 20 J = I, I+PK-1
LK = MIN( KD, N-J ) + 1
CALL CCOPY( LK, A( J, J ), LDA, AB( KD+1, J ), LDAB-1 )
20 CONTINUE
*
CALL CLASET( 'Lower', PK, PK, ZERO, ONE,
$ A( I, I+KD ), LDA )
*
* Form the matrix T
*
CALL CLARFT( 'Forward', 'Rowwise', PN, PK,
$ A( I, I+KD ), LDA, TAU( I ),
$ WORK( TPOS ), LDT )
*
* Compute W:
*
CALL CGEMM( 'Conjugate', 'No transpose', PK, PN, PK,
$ ONE, WORK( TPOS ), LDT,
$ A( I, I+KD ), LDA,
$ ZERO, WORK( S2POS ), LDS2 )
*
CALL CHEMM( 'Right', UPLO, PK, PN,
$ ONE, A( I+KD, I+KD ), LDA,
$ WORK( S2POS ), LDS2,
$ ZERO, WORK( WPOS ), LDW )
*
CALL CGEMM( 'No transpose', 'Conjugate', PK, PK, PN,
$ ONE, WORK( WPOS ), LDW,
$ WORK( S2POS ), LDS2,
$ ZERO, WORK( S1POS ), LDS1 )
*
CALL CGEMM( 'No transpose', 'No transpose', PK, PN, PK,
$ -HALF, WORK( S1POS ), LDS1,
$ A( I, I+KD ), LDA,
$ ONE, WORK( WPOS ), LDW )
*
*
* Update the unreduced submatrix A(i+kd:n,i+kd:n), using
* an update of the form: A := A - V'*W - W'*V
*
CALL CHER2K( UPLO, 'Conjugate', PN, PK,
$ -ONE, A( I, I+KD ), LDA,
$ WORK( WPOS ), LDW,
$ RONE, A( I+KD, I+KD ), LDA )
10 CONTINUE
*
* Copy the upper band to AB which is the band storage matrix
*
DO 30 J = N-KD+1, N
LK = MIN(KD, N-J) + 1
CALL CCOPY( LK, A( J, J ), LDA, AB( KD+1, J ), LDAB-1 )
30 CONTINUE
*
ELSE
*
* Reduce the lower triangle of A to lower band matrix
*
DO 40 I = 1, N - KD, KD
PN = N-I-KD+1
PK = MIN( N-I-KD+1, KD )
*
* Compute the QR factorization of the current block
*
CALL CGEQRF( PN, KD, A( I+KD, I ), LDA,
$ TAU( I ), WORK( S2POS ), LS2, IINFO )
*
* Copy the upper portion of A into AB
*
DO 50 J = I, I+PK-1
LK = MIN( KD, N-J ) + 1
CALL CCOPY( LK, A( J, J ), 1, AB( 1, J ), 1 )
50 CONTINUE
*
CALL CLASET( 'Upper', PK, PK, ZERO, ONE,
$ A( I+KD, I ), LDA )
*
* Form the matrix T
*
CALL CLARFT( 'Forward', 'Columnwise', PN, PK,
$ A( I+KD, I ), LDA, TAU( I ),
$ WORK( TPOS ), LDT )
*
* Compute W:
*
CALL CGEMM( 'No transpose', 'No transpose', PN, PK, PK,
$ ONE, A( I+KD, I ), LDA,
$ WORK( TPOS ), LDT,
$ ZERO, WORK( S2POS ), LDS2 )
*
CALL CHEMM( 'Left', UPLO, PN, PK,
$ ONE, A( I+KD, I+KD ), LDA,
$ WORK( S2POS ), LDS2,
$ ZERO, WORK( WPOS ), LDW )
*
CALL CGEMM( 'Conjugate', 'No transpose', PK, PK, PN,
$ ONE, WORK( S2POS ), LDS2,
$ WORK( WPOS ), LDW,
$ ZERO, WORK( S1POS ), LDS1 )
*
CALL CGEMM( 'No transpose', 'No transpose', PN, PK, PK,
$ -HALF, A( I+KD, I ), LDA,
$ WORK( S1POS ), LDS1,
$ ONE, WORK( WPOS ), LDW )
*
*
* Update the unreduced submatrix A(i+kd:n,i+kd:n), using
* an update of the form: A := A - V*W' - W*V'
*
CALL CHER2K( UPLO, 'No transpose', PN, PK,
$ -ONE, A( I+KD, I ), LDA,
$ WORK( WPOS ), LDW,
$ RONE, A( I+KD, I+KD ), LDA )
* ==================================================================
* RESTORE A FOR COMPARISON AND CHECKING TO BE REMOVED
* DO 45 J = I, I+PK-1
* LK = MIN( KD, N-J ) + 1
* CALL CCOPY( LK, AB( 1, J ), 1, A( J, J ), 1 )
* 45 CONTINUE
* ==================================================================
40 CONTINUE
*
* Copy the lower band to AB which is the band storage matrix
*
DO 60 J = N-KD+1, N
LK = MIN(KD, N-J) + 1
CALL CCOPY( LK, A( J, J ), 1, AB( 1, J ), 1 )
60 CONTINUE
END IF
*
WORK( 1 ) = LWMIN
RETURN
*
* End of CHETRD_HE2HB
*
END