*> \brief \b SGBTRF
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download SGBTRF + dependencies
*>
*> [TGZ]
*>
*> [ZIP]
*>
*> [TXT]
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE SGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO )
*
* .. Scalar Arguments ..
* INTEGER INFO, KL, KU, LDAB, M, N
* ..
* .. Array Arguments ..
* INTEGER IPIV( * )
* REAL AB( LDAB, * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> SGBTRF computes an LU factorization of a real m-by-n band matrix A
*> using partial pivoting with row interchanges.
*>
*> This is the blocked version of the algorithm, calling Level 3 BLAS.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> The number of rows of the matrix A. M >= 0.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The number of columns of the matrix A. N >= 0.
*> \endverbatim
*>
*> \param[in] KL
*> \verbatim
*> KL is INTEGER
*> The number of subdiagonals within the band of A. KL >= 0.
*> \endverbatim
*>
*> \param[in] KU
*> \verbatim
*> KU is INTEGER
*> The number of superdiagonals within the band of A. KU >= 0.
*> \endverbatim
*>
*> \param[in,out] AB
*> \verbatim
*> AB is REAL array, dimension (LDAB,N)
*> On entry, the matrix A in band storage, in rows KL+1 to
*> 2*KL+KU+1; rows 1 to KL of the array need not be set.
*> The j-th column of A is stored in the j-th column of the
*> array AB as follows:
*> AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
*>
*> On exit, details of the factorization: U is stored as an
*> upper triangular band matrix with KL+KU superdiagonals in
*> rows 1 to KL+KU+1, and the multipliers used during the
*> factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
*> See below for further details.
*> \endverbatim
*>
*> \param[in] LDAB
*> \verbatim
*> LDAB is INTEGER
*> The leading dimension of the array AB. LDAB >= 2*KL+KU+1.
*> \endverbatim
*>
*> \param[out] IPIV
*> \verbatim
*> IPIV is INTEGER array, dimension (min(M,N))
*> The pivot indices; for 1 <= i <= min(M,N), row i of the
*> matrix was interchanged with row IPIV(i).
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> < 0: if INFO = -i, the i-th argument had an illegal value
*> > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
*> has been completed, but the factor U is exactly
*> singular, and division by zero will occur if it is used
*> to solve a system of equations.
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \date November 2011
*
*> \ingroup realGBcomputational
*
*> \par Further Details:
* =====================
*>
*> \verbatim
*>
*> The band storage scheme is illustrated by the following example, when
*> M = N = 6, KL = 2, KU = 1:
*>
*> On entry: On exit:
*>
*> * * * + + + * * * u14 u25 u36
*> * * + + + + * * u13 u24 u35 u46
*> * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56
*> a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66
*> a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 *
*> a31 a42 a53 a64 * * m31 m42 m53 m64 * *
*>
*> Array elements marked * are not used by the routine; elements marked
*> + need not be set on entry, but are required by the routine to store
*> elements of U because of fill-in resulting from the row interchanges.
*> \endverbatim
*>
* =====================================================================
SUBROUTINE SGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO )
*
* -- LAPACK computational routine (version 3.4.0) --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* November 2011
*
* .. Scalar Arguments ..
INTEGER INFO, KL, KU, LDAB, M, N
* ..
* .. Array Arguments ..
INTEGER IPIV( * )
REAL AB( LDAB, * )
* ..
*
* =====================================================================
*
* .. Parameters ..
REAL ONE, ZERO
PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 )
INTEGER NBMAX, LDWORK
PARAMETER ( NBMAX = 64, LDWORK = NBMAX+1 )
* ..
* .. Local Scalars ..
INTEGER I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP,
$ JU, K2, KM, KV, NB, NW
REAL TEMP
* ..
* .. Local Arrays ..
REAL WORK13( LDWORK, NBMAX ),
$ WORK31( LDWORK, NBMAX )
* ..
* .. External Functions ..
INTEGER ILAENV, ISAMAX
EXTERNAL ILAENV, ISAMAX
* ..
* .. External Subroutines ..
EXTERNAL SCOPY, SGBTF2, SGEMM, SGER, SLASWP, SSCAL,
$ SSWAP, STRSM, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
* ..
* .. Executable Statements ..
*
* KV is the number of superdiagonals in the factor U, allowing for
* fill-in
*
KV = KU + KL
*
* Test the input parameters.
*
INFO = 0
IF( M.LT.0 ) THEN
INFO = -1
ELSE IF( N.LT.0 ) THEN
INFO = -2
ELSE IF( KL.LT.0 ) THEN
INFO = -3
ELSE IF( KU.LT.0 ) THEN
INFO = -4
ELSE IF( LDAB.LT.KL+KV+1 ) THEN
INFO = -6
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'SGBTRF', -INFO )
RETURN
END IF
*
* Quick return if possible
*
IF( M.EQ.0 .OR. N.EQ.0 )
$ RETURN
*
* Determine the block size for this environment
*
NB = ILAENV( 1, 'SGBTRF', ' ', M, N, KL, KU )
*
* The block size must not exceed the limit set by the size of the
* local arrays WORK13 and WORK31.
*
NB = MIN( NB, NBMAX )
*
IF( NB.LE.1 .OR. NB.GT.KL ) THEN
*
* Use unblocked code
*
CALL SGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO )
ELSE
*
* Use blocked code
*
* Zero the superdiagonal elements of the work array WORK13
*
DO 20 J = 1, NB
DO 10 I = 1, J - 1
WORK13( I, J ) = ZERO
10 CONTINUE
20 CONTINUE
*
* Zero the subdiagonal elements of the work array WORK31
*
DO 40 J = 1, NB
DO 30 I = J + 1, NB
WORK31( I, J ) = ZERO
30 CONTINUE
40 CONTINUE
*
* Gaussian elimination with partial pivoting
*
* Set fill-in elements in columns KU+2 to KV to zero
*
DO 60 J = KU + 2, MIN( KV, N )
DO 50 I = KV - J + 2, KL
AB( I, J ) = ZERO
50 CONTINUE
60 CONTINUE
*
* JU is the index of the last column affected by the current
* stage of the factorization
*
JU = 1
*
DO 180 J = 1, MIN( M, N ), NB
JB = MIN( NB, MIN( M, N )-J+1 )
*
* The active part of the matrix is partitioned
*
* A11 A12 A13
* A21 A22 A23
* A31 A32 A33
*
* Here A11, A21 and A31 denote the current block of JB columns
* which is about to be factorized. The number of rows in the
* partitioning are JB, I2, I3 respectively, and the numbers
* of columns are JB, J2, J3. The superdiagonal elements of A13
* and the subdiagonal elements of A31 lie outside the band.
*
I2 = MIN( KL-JB, M-J-JB+1 )
I3 = MIN( JB, M-J-KL+1 )
*
* J2 and J3 are computed after JU has been updated.
*
* Factorize the current block of JB columns
*
DO 80 JJ = J, J + JB - 1
*
* Set fill-in elements in column JJ+KV to zero
*
IF( JJ+KV.LE.N ) THEN
DO 70 I = 1, KL
AB( I, JJ+KV ) = ZERO
70 CONTINUE
END IF
*
* Find pivot and test for singularity. KM is the number of
* subdiagonal elements in the current column.
*
KM = MIN( KL, M-JJ )
JP = ISAMAX( KM+1, AB( KV+1, JJ ), 1 )
IPIV( JJ ) = JP + JJ - J
IF( AB( KV+JP, JJ ).NE.ZERO ) THEN
JU = MAX( JU, MIN( JJ+KU+JP-1, N ) )
IF( JP.NE.1 ) THEN
*
* Apply interchange to columns J to J+JB-1
*
IF( JP+JJ-1.LT.J+KL ) THEN
*
CALL SSWAP( JB, AB( KV+1+JJ-J, J ), LDAB-1,
$ AB( KV+JP+JJ-J, J ), LDAB-1 )
ELSE
*
* The interchange affects columns J to JJ-1 of A31
* which are stored in the work array WORK31
*
CALL SSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
$ WORK31( JP+JJ-J-KL, 1 ), LDWORK )
CALL SSWAP( J+JB-JJ, AB( KV+1, JJ ), LDAB-1,
$ AB( KV+JP, JJ ), LDAB-1 )
END IF
END IF
*
* Compute multipliers
*
CALL SSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ),
$ 1 )
*
* Update trailing submatrix within the band and within
* the current block. JM is the index of the last column
* which needs to be updated.
*
JM = MIN( JU, J+JB-1 )
IF( JM.GT.JJ )
$ CALL SGER( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1,
$ AB( KV, JJ+1 ), LDAB-1,
$ AB( KV+1, JJ+1 ), LDAB-1 )
ELSE
*
* If pivot is zero, set INFO to the index of the pivot
* unless a zero pivot has already been found.
*
IF( INFO.EQ.0 )
$ INFO = JJ
END IF
*
* Copy current column of A31 into the work array WORK31
*
NW = MIN( JJ-J+1, I3 )
IF( NW.GT.0 )
$ CALL SCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1,
$ WORK31( 1, JJ-J+1 ), 1 )
80 CONTINUE
IF( J+JB.LE.N ) THEN
*
* Apply the row interchanges to the other blocks.
*
J2 = MIN( JU-J+1, KV ) - JB
J3 = MAX( 0, JU-J-KV+1 )
*
* Use SLASWP to apply the row interchanges to A12, A22, and
* A32.
*
CALL SLASWP( J2, AB( KV+1-JB, J+JB ), LDAB-1, 1, JB,
$ IPIV( J ), 1 )
*
* Adjust the pivot indices.
*
DO 90 I = J, J + JB - 1
IPIV( I ) = IPIV( I ) + J - 1
90 CONTINUE
*
* Apply the row interchanges to A13, A23, and A33
* columnwise.
*
K2 = J - 1 + JB + J2
DO 110 I = 1, J3
JJ = K2 + I
DO 100 II = J + I - 1, J + JB - 1
IP = IPIV( II )
IF( IP.NE.II ) THEN
TEMP = AB( KV+1+II-JJ, JJ )
AB( KV+1+II-JJ, JJ ) = AB( KV+1+IP-JJ, JJ )
AB( KV+1+IP-JJ, JJ ) = TEMP
END IF
100 CONTINUE
110 CONTINUE
*
* Update the relevant part of the trailing submatrix
*
IF( J2.GT.0 ) THEN
*
* Update A12
*
CALL STRSM( 'Left', 'Lower', 'No transpose', 'Unit',
$ JB, J2, ONE, AB( KV+1, J ), LDAB-1,
$ AB( KV+1-JB, J+JB ), LDAB-1 )
*
IF( I2.GT.0 ) THEN
*
* Update A22
*
CALL SGEMM( 'No transpose', 'No transpose', I2, J2,
$ JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
$ AB( KV+1-JB, J+JB ), LDAB-1, ONE,
$ AB( KV+1, J+JB ), LDAB-1 )
END IF
*
IF( I3.GT.0 ) THEN
*
* Update A32
*
CALL SGEMM( 'No transpose', 'No transpose', I3, J2,
$ JB, -ONE, WORK31, LDWORK,
$ AB( KV+1-JB, J+JB ), LDAB-1, ONE,
$ AB( KV+KL+1-JB, J+JB ), LDAB-1 )
END IF
END IF
*
IF( J3.GT.0 ) THEN
*
* Copy the lower triangle of A13 into the work array
* WORK13
*
DO 130 JJ = 1, J3
DO 120 II = JJ, JB
WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 )
120 CONTINUE
130 CONTINUE
*
* Update A13 in the work array
*
CALL STRSM( 'Left', 'Lower', 'No transpose', 'Unit',
$ JB, J3, ONE, AB( KV+1, J ), LDAB-1,
$ WORK13, LDWORK )
*
IF( I2.GT.0 ) THEN
*
* Update A23
*
CALL SGEMM( 'No transpose', 'No transpose', I2, J3,
$ JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
$ WORK13, LDWORK, ONE, AB( 1+JB, J+KV ),
$ LDAB-1 )
END IF
*
IF( I3.GT.0 ) THEN
*
* Update A33
*
CALL SGEMM( 'No transpose', 'No transpose', I3, J3,
$ JB, -ONE, WORK31, LDWORK, WORK13,
$ LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 )
END IF
*
* Copy the lower triangle of A13 back into place
*
DO 150 JJ = 1, J3
DO 140 II = JJ, JB
AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ )
140 CONTINUE
150 CONTINUE
END IF
ELSE
*
* Adjust the pivot indices.
*
DO 160 I = J, J + JB - 1
IPIV( I ) = IPIV( I ) + J - 1
160 CONTINUE
END IF
*
* Partially undo the interchanges in the current block to
* restore the upper triangular form of A31 and copy the upper
* triangle of A31 back into place
*
DO 170 JJ = J + JB - 1, J, -1
JP = IPIV( JJ ) - JJ + 1
IF( JP.NE.1 ) THEN
*
* Apply interchange to columns J to JJ-1
*
IF( JP+JJ-1.LT.J+KL ) THEN
*
* The interchange does not affect A31
*
CALL SSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
$ AB( KV+JP+JJ-J, J ), LDAB-1 )
ELSE
*
* The interchange does affect A31
*
CALL SSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
$ WORK31( JP+JJ-J-KL, 1 ), LDWORK )
END IF
END IF
*
* Copy the current column of A31 back into place
*
NW = MIN( I3, JJ-J+1 )
IF( NW.GT.0 )
$ CALL SCOPY( NW, WORK31( 1, JJ-J+1 ), 1,
$ AB( KV+KL+1-JJ+J, JJ ), 1 )
170 CONTINUE
180 CONTINUE
END IF
*
RETURN
*
* End of SGBTRF
*
END