/* linalg/qr_uz.c * * Copyright (C) 2020 Patrick Alken * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or (at * your option) any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include #include #include #include #include #include #include #include /* * this module contains routines for the QR factorization of a matrix * using the recursive Level 3 BLAS algorithm of Elmroth and Gustavson with * additional modifications courtesy of Julien Langou. */ static double qrtz_householder_transform (double *v0, gsl_vector * v); /* gsl_linalg_QR_UZ_decomp() Compute the QR decomposition of the "triangle on top of trapezoidal" matrix [ S ] = Q [ R ] [ A ] [ 0 ] where S is N-by-N upper triangular and A is M-by-N upper trapezoidal Inputs: S - on input, upper triangular N-by-N matrix on output, R factor in upper triangle A - on input, M-by-N upper trapezoidal matrix on output, upper trapezoidal of Householder matrix V T - (output) block reflector matrix, N-by-N Notes: 1) Based on the Elmroth/Gustavson algorithm, taking into account the sparse structure of the S,A matrices 2) The Householder matrix V has the special form: N V = [ I ] N [ V~ ] M with V~ upper trapezoidal: The matrix V~ is stored in A on output; the identity is not stored 3) The orthogonal matrix is Q = I - V T V^T */ int gsl_linalg_QR_UZ_decomp (gsl_matrix * S, gsl_matrix * A, gsl_matrix * T) { const size_t M = A->size1; const size_t N = S->size1; if (M < N) { GSL_ERROR ("M must be >= N", GSL_EBADLEN); } else if (N != S->size2) { GSL_ERROR ("S matrix must be square", GSL_ENOTSQR); } else if (N != A->size2) { GSL_ERROR ("S and A must have same number of columns", GSL_EBADLEN); } else if (T->size1 != N || T->size2 != N) { GSL_ERROR ("T matrix has wrong dimensions", GSL_EBADLEN); } else if (M == N) { /* triangle on top of triangle */ return gsl_linalg_QR_UU_decomp(S, A, T); } else if (N == 1) { /* base case, compute Householder transform for single column matrix */ double * T00 = gsl_matrix_ptr(T, 0, 0); double * S00 = gsl_matrix_ptr(S, 0, 0); gsl_vector_view v = gsl_matrix_column(A, 0); *T00 = qrtz_householder_transform(S00, &v.vector); return GSL_SUCCESS; } else { /* * partition matrices: * * N1 N2 N1 N2 * N1 [ S11 S12 ] and N1 [ T11 T12 ] * N2 [ 0 S22 ] N2 [ 0 T22 ] * M [ A1 A2 ] * N1 [ U11 U12 ] * N2 [ 0 U22 ] */ int status; const size_t N1 = N / 2; const size_t N2 = N - N1; gsl_matrix_view S11 = gsl_matrix_submatrix(S, 0, 0, N1, N1); gsl_matrix_view S12 = gsl_matrix_submatrix(S, 0, N1, N1, N2); gsl_matrix_view S22 = gsl_matrix_submatrix(S, N1, N1, N2, N2); gsl_matrix_view A1 = gsl_matrix_submatrix(A, 0, 0, M - N, N1); gsl_matrix_view A2 = gsl_matrix_submatrix(A, 0, N1, M - N, N2); gsl_matrix_view U11 = gsl_matrix_submatrix(A, M - N, 0, N1, N1); gsl_matrix_view U12 = gsl_matrix_submatrix(A, M - N, N1, N1, N2); gsl_matrix_view T11 = gsl_matrix_submatrix(T, 0, 0, N1, N1); gsl_matrix_view T12 = gsl_matrix_submatrix(T, 0, N1, N1, N2); gsl_matrix_view T22 = gsl_matrix_submatrix(T, N1, N1, N2, N2); gsl_matrix_view m; /* * Eq. 2: recursively factor * * N1 N1 * N1 [ S11 ] = Q1 [ R11 ] N1 * N2 [ 0 ] [ 0 ] N2 * M-N [ A1 ] [ 0 ] M-N * N1 [ U11 ] [ 0 ] N1 * N2 [ 0 ] [ 0 ] N2 */ m = gsl_matrix_submatrix(A, 0, 0, M - N2, N1); status = gsl_linalg_QR_UZ_decomp(&S11.matrix, &m.matrix, &T11.matrix); if (status) return status; /* * Eq. 3: * * N2 N2 N2 * N1 [ R12 ] = Q1^T [ S12 ] = [ S12 - W ] N1 * N2 [ S22~ ] [ S22 ] [ S22 ] N2 * M-N [ A2~ ] [ A2 ] [ A2 - V11 W ] M-N * N1 [ U12~ ] [ U12 ] [ U12 - V21 W ] N1 * N2 [ U22~ ] [ U22 ] [ U22 ] N2 * * where W = T11^T ( S12 + V11^T A2 + V21^T U12 ), using T12 as temporary storage, and * * N1 * V1 = [ I ] N1 * [ 0 ] N2 * [ V11 ] M-N * [ V21 ] N1 * [ 0 ] N2 */ gsl_matrix_memcpy(&T12.matrix, &U12.matrix); /* W := U12 */ gsl_blas_dtrmm(CblasLeft, CblasUpper, CblasTrans, CblasNonUnit, 1.0, &U11.matrix, &T12.matrix); /* W := V21^T U12 */ gsl_matrix_add(&T12.matrix, &S12.matrix); /* W := S12 + V21^T U12 */ gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, &A1.matrix, &A2.matrix, 1.0, &T12.matrix); /* W := S12 + V21^T U12 + V11^T A2 */ gsl_blas_dtrmm(CblasLeft, CblasUpper, CblasTrans, CblasNonUnit, 1.0, &T11.matrix, &T12.matrix); /* W := T11^T W */ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, -1.0, &A1.matrix, &T12.matrix, 1.0, &A2.matrix); /* A2 := A2 - V11 W */ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, -1.0, &U11.matrix, &T12.matrix, 1.0, &U12.matrix); /* U12 := U12 - V21 W */ gsl_matrix_sub(&S12.matrix, &T12.matrix); /* R12 := S12 - W */ /* * Eq. 4: recursively factor * * [ S22~ ] = Q2~ [ R22 ] * [ A2~ ] [ 0 ] * [ U12~ ] [ 0 ] * [ U22~ ] [ 0 ] */ m = gsl_matrix_submatrix(A, 0, N1, M, N2); status = gsl_linalg_QR_UZ_decomp(&S22.matrix, &m.matrix, &T22.matrix); if (status) return status; /* * Eq. 13: update T12 := -T11 * V1^T * V2 * T22 * * where: * * N1 N2 * V1 = [ I ] N1 V2 = [ 0 ] N1 * [ 0 ] N2 [ I ] N2 * [ V11 ] M-N [ V12 ] M-N * [ V21 ] N1 [ V22 ] N1 * [ 0 ] N2 [ V32 ] N2 * * Note: V1^T V2 = V11^T V12 + V21^T V22 */ gsl_matrix_memcpy(&T12.matrix, &U12.matrix); /* T12 := V22 */ gsl_blas_dtrmm(CblasLeft, CblasUpper, CblasTrans, CblasNonUnit, 1.0, &U11.matrix, &T12.matrix); /* T12 := V21^T V22 */ gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, &A1.matrix, &A2.matrix, 1.0, &T12.matrix); /* T12 := T12 + V11^T * V12 */ gsl_blas_dtrmm(CblasLeft, CblasUpper, CblasNoTrans, CblasNonUnit, -1.0, &T11.matrix, &T12.matrix); /* T12 := -T11 * T12 */ gsl_blas_dtrmm(CblasRight, CblasUpper, CblasNoTrans, CblasNonUnit, 1.0, &T22.matrix, &T12.matrix); /* T12 := T12 * T22 */ return GSL_SUCCESS; } } /* qrtz_householder_transform() This routine is an optimized version of gsl_linalg_householder_transform(), designed for the QR decomposition of M-by-N matrices of the form: B = [ S ] [ A ] where S is N-by-N upper triangular, and A is M-by-N dense. This routine computes a householder transformation (tau,v) of a x so that P x = [ I - tau*v*v' ] x annihilates x(1:n-1). x will be a subcolumn of the matrix B, and so its structure will be: x = [ x0 ] <- 1 nonzero value for the diagonal element of S [ 0 ] <- N - j - 1 zeros, where j is column of matrix in [0,N-1] [ x ] <- M nonzero values for the dense part A Inputs: v0 - pointer to diagonal element of S on input, v0 = x0; v - on input, x vector on output, householder vector v */ static double qrtz_householder_transform (double *v0, gsl_vector * v) { /* replace v[0:M-1] with a householder vector (v[0:M-1]) and coefficient tau that annihilate v[1:M-1] */ double alpha, beta, tau ; /* compute xnorm = || [ 0 ; v ] ||, ignoring zero part of vector */ double xnorm = gsl_blas_dnrm2(v); if (xnorm == 0) { return 0.0; /* tau = 0 */ } alpha = *v0; beta = - GSL_SIGN(alpha) * hypot(alpha, xnorm) ; tau = (beta - alpha) / beta ; { double s = (alpha - beta); if (fabs(s) > GSL_DBL_MIN) { gsl_blas_dscal (1.0 / s, v); *v0 = beta; } else { gsl_blas_dscal (GSL_DBL_EPSILON / s, v); gsl_blas_dscal (1.0 / GSL_DBL_EPSILON, v); *v0 = beta; } } return tau; }