/* linalg/qr_ud.c * * Copyright (C) 2020 Patrick Alken * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or (at * your option) any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include #include #include #include #include #include #include #include /* * this module contains routines for the QR factorization of a matrix * using the recursive Level 3 BLAS algorithm of Elmroth and Gustavson with * additional modifications courtesy of Julien Langou. */ static int aux_QR_TRD_decomp (gsl_matrix * U, gsl_matrix * A, const gsl_vector * D, gsl_matrix * T); static double qrtd_householder_transform (double *v0, double *v1); static double qrtrd_householder_transform (double *v0, gsl_vector * v, double *d); /* gsl_linalg_QR_UD_decomp() Compute the QR decomposition of the "triangle on top of diagonal" matrix [ U ] = Q [ R ] [ D ] [ 0 ] where U is N-by-N upper triangular, D is N-by-N diagonal Inputs: U - on input, upper triangular N-by-N matrix on output, R factor in upper triangle D - diagonal N-by-N matrix Y - (output) upper triangular N-by-N matrix (lower part of V) T - (output) block reflector matrix, N-by-N Notes: 1) Based on the Elmroth/Gustavson algorithm, taking into account the sparse structure of the U,S matrices 2) The Householder matrix V has the special form: N V = [ I ] N [ Y ] N with Y upper triangular 3) The orthogonal matrix is Q = I - V T V^T */ int gsl_linalg_QR_UD_decomp (gsl_matrix * U, const gsl_vector * D, gsl_matrix * Y, gsl_matrix * T) { const size_t N = U->size1; if (N != U->size2) { GSL_ERROR ("U matrix must be square", GSL_ENOTSQR); } else if (D->size != N) { GSL_ERROR ("D matrix does not match dimensions of U", GSL_EBADLEN); } else if (Y->size1 != Y->size2) { GSL_ERROR ("Y matrix must be square", GSL_ENOTSQR); } else if (Y->size1 != N) { GSL_ERROR ("Y matrix does not match dimensions of U", GSL_EBADLEN); } else if (T->size1 != N || T->size2 != N) { GSL_ERROR ("T matrix has wrong dimensions", GSL_EBADLEN); } else if (N == 1) { /* base case, compute Householder transform for single column matrix */ double * T00 = gsl_matrix_ptr(T, 0, 0); double * U00 = gsl_matrix_ptr(U, 0, 0); double * Y00 = gsl_matrix_ptr(Y, 0, 0); *Y00 = gsl_vector_get(D, 0); *T00 = qrtd_householder_transform(U00, Y00); return GSL_SUCCESS; } else { /* * partition matrices: * * N1 N2 N1 N2 * N1 [ U11 U12 ] and N1 [ T11 T12 ] * N2 [ 0 U22 ] N2 [ 0 T22 ] * N1 [ D11 D12 ] * N2 [ 0 D22 ] */ int status; const size_t N1 = N / 2; const size_t N2 = N - N1; gsl_matrix_view U11 = gsl_matrix_submatrix(U, 0, 0, N1, N1); gsl_matrix_view U12 = gsl_matrix_submatrix(U, 0, N1, N1, N2); gsl_matrix_view U22 = gsl_matrix_submatrix(U, N1, N1, N2, N2); gsl_vector_const_view D1 = gsl_vector_const_subvector(D, 0, N1); gsl_vector_const_view D2 = gsl_vector_const_subvector(D, N1, N2); gsl_matrix_view Y11 = gsl_matrix_submatrix(Y, 0, 0, N1, N1); gsl_matrix_view Y12 = gsl_matrix_submatrix(Y, 0, N1, N1, N2); gsl_matrix_view T11 = gsl_matrix_submatrix(T, 0, 0, N1, N1); gsl_matrix_view T12 = gsl_matrix_submatrix(T, 0, N1, N1, N2); gsl_matrix_view T22 = gsl_matrix_submatrix(T, N1, N1, N2, N2); gsl_matrix_view m; /* * Eq. 2: recursively factor * * N1 N1 * N1 [ U11 ] = Q1 [ R11 ] N1 * N2 [ 0 ] [ 0 ] N2 * N1 [ D11 ] [ 0 ] N1 * N2 [ 0 ] [ 0 ] N2 */ status = gsl_linalg_QR_UD_decomp(&U11.matrix, &D1.vector, &Y11.matrix, &T11.matrix); if (status) return status; /* * Eq. 3: * * N2 N2 N2 * N1 [ R12 ] = Q1^T [ U12 ] = [ U12 - W ] N1 * N2 [ U22~ ] [ U22 ] [ U22 ] N2 * N1 [ D12~ ] [ 0 ] [ - V31 W ] N1 * N2 [ D22~ ] [ D22 ] [ D22 ] N2 * * where W = T11^T U12, using T12 as temporary storage, and * * N1 * V1 = [ I ] N1 * [ 0 ] N2 * [ V31 ] N1 * [ 0 ] N2 */ gsl_matrix_memcpy(&T12.matrix, &U12.matrix); /* W := U12 */ gsl_blas_dtrmm(CblasLeft, CblasUpper, CblasTrans, CblasNonUnit, 1.0, &T11.matrix, &T12.matrix); /* W := T11^T W */ gsl_matrix_sub(&U12.matrix, &T12.matrix); /* R12 := U12 - W */ gsl_blas_dtrmm(CblasLeft, CblasUpper, CblasNoTrans, CblasNonUnit, -1.0, &Y11.matrix, &T12.matrix); /* W := -V31 W */ gsl_matrix_memcpy(&Y12.matrix, &T12.matrix); /* Y12 := -V31 W */ /* * Eq. 4: factor the "triangle on top of rectangle on top of diagonal" matrix * * N2 [ U22 ] = Q2~ [ R22 ] N2 * N1 [ Y12 ] [ 0 ] N1 * N2 [ D22 ] [ 0 ] N2 */ m = gsl_matrix_submatrix(Y, 0, N1, N, N2); status = aux_QR_TRD_decomp(&U22.matrix, &m.matrix, &D2.vector, &T22.matrix); if (status) return status; /* * Eq. 13: update T12 := -T11 * V1^T * V2 * T22 * * where: * * N1 N2 * V1 = [ I ] N1 V2 = [ 0 ] N1 * [ 0 ] N2 [ I ] N2 * [ V31~ ] N1 [ V32~ ] N1 * [ 0 ] N2 [ V42~ ] N2 * * Note: V1^T V2 = V31~^T V32~ */ gsl_matrix_memcpy(&T12.matrix, &Y12.matrix); /* T12 := V32~ */ gsl_blas_dtrmm(CblasLeft, CblasUpper, CblasTrans, CblasNonUnit, 1.0, &Y11.matrix, &T12.matrix); /* T12 := V31~^T V32~ */ gsl_blas_dtrmm(CblasLeft, CblasUpper, CblasNoTrans, CblasNonUnit, -1.0, &T11.matrix, &T12.matrix); /* T12 := -T11 * T12 */ gsl_blas_dtrmm(CblasRight, CblasUpper, CblasNoTrans, CblasNonUnit, 1.0, &T22.matrix, &T12.matrix); /* T12 := T12 * T22 */ return GSL_SUCCESS; } } /* Find the least squares solution to the overdetermined system * * [ U ] x = b * [ D ] * * using the QR factorization [ U; D ] = Q R. * * Inputs: R - upper triangular R matrix, N-by-N * Y - upper triangular Y matrix, N-by-N * T - upper triangular block reflector, N-by-N * b - right hand side, size 2*N * x - (output) solution, size 2*N * x(1:N) = least squares solution vector * x(N+1:2*N) = vector whose norm equals ||b - Ax|| * work - workspace, size N */ int gsl_linalg_QR_UD_lssolve (const gsl_matrix * R, const gsl_matrix * Y, const gsl_matrix * T, const gsl_vector * b, gsl_vector * x, gsl_vector * work) { const size_t N = R->size1; const size_t M = 2 * N; if (R->size2 != N) { GSL_ERROR ("R matrix must be square", GSL_ENOTSQR); } else if (Y->size1 != Y->size2) { GSL_ERROR ("Y matrix must be square", GSL_ENOTSQR); } else if (Y->size1 != N) { GSL_ERROR ("Y and R must have same dimensions", GSL_EBADLEN); } else if (T->size1 != N || T->size2 != N) { GSL_ERROR ("T matrix must be N-by-N", GSL_EBADLEN); } else if (M != b->size) { GSL_ERROR ("matrix size must match b size", GSL_EBADLEN); } else if (M != x->size) { GSL_ERROR ("matrix size must match solution size", GSL_EBADLEN); } else if (N != work->size) { GSL_ERROR ("workspace must be length N", GSL_EBADLEN); } else { return gsl_linalg_QR_UU_lssolve(R, Y, T, b, x, work); } } /* aux_QR_TRD_decomp() Compute the QR decomposition of the "triangle on top of rectangle on top of diagonal" matrix [ U ] = Q [ R ] [ A ] [ 0 ] [ D ] [ 0 ] where U is N-by-N upper triangular, A is M-by-N dense, D is N-by-N diagonal Inputs: U - on input, upper triangular N-by-N matrix on output, R factor in upper triangle A - on input, A is stored in A(1:M,1:N) on output, Y is stored in A(1:M+N,1:N) size (M+N)-by-N D - diagonal N-by-N matrix T - (output) block reflector matrix, N-by-N Notes: 1) Based on the Elmroth/Gustavson algorithm, taking into account the sparse structure of the U,S matrices 2) The Householder matrix V has the special form: N V = [ I ] N [ Y ] M+N with Y upper trapezoidal 3) The orthogonal matrix is Q = I - V T V^T */ static int aux_QR_TRD_decomp (gsl_matrix * U, gsl_matrix * A, const gsl_vector * D, gsl_matrix * T) { const size_t N = U->size1; if (N != U->size2) { GSL_ERROR ("U matrix must be square", GSL_ENOTSQR); } else if (A->size1 <= N) { GSL_ERROR ("A matrix must have > N rows", GSL_EBADLEN); } else if (D->size != N) { GSL_ERROR ("D matrix does not match dimensions of U", GSL_EBADLEN); } else if (T->size1 != N || T->size2 != N) { GSL_ERROR ("T matrix has wrong dimensions", GSL_EBADLEN); } else if (N == 1) { /* base case, compute Householder transform for single column matrix */ const size_t M = A->size1 - N; double * T00 = gsl_matrix_ptr(T, 0, 0); double * U00 = gsl_matrix_ptr(U, 0, 0); gsl_vector_view v = gsl_matrix_subcolumn(A, 0, 0, M); double * D00 = gsl_matrix_ptr(A, M, 0); *D00 = gsl_vector_get(D, 0); *T00 = qrtrd_householder_transform(U00, &v.vector, D00); return GSL_SUCCESS; } else { /* * partition matrices: * * N1 N2 N1 N2 * N1 [ U11 U12 ] and N1 [ T11 T12 ] * N2 [ 0 U22 ] N2 [ 0 T22 ] * N1 [ D11 D12 ] * N2 [ 0 D22 ] */ int status; const size_t M = A->size1 - N; const size_t N1 = N / 2; const size_t N2 = N - N1; gsl_matrix_view U11 = gsl_matrix_submatrix(U, 0, 0, N1, N1); gsl_matrix_view U12 = gsl_matrix_submatrix(U, 0, N1, N1, N2); gsl_matrix_view U22 = gsl_matrix_submatrix(U, N1, N1, N2, N2); gsl_matrix_view A1 = gsl_matrix_submatrix(A, 0, 0, M, N1); gsl_matrix_view A2 = gsl_matrix_submatrix(A, 0, N1, M, N2); gsl_vector_const_view D1 = gsl_vector_const_subvector(D, 0, N1); gsl_vector_const_view D2 = gsl_vector_const_subvector(D, N1, N2); gsl_matrix_view T11 = gsl_matrix_submatrix(T, 0, 0, N1, N1); gsl_matrix_view T12 = gsl_matrix_submatrix(T, 0, N1, N1, N2); gsl_matrix_view T22 = gsl_matrix_submatrix(T, N1, N1, N2, N2); gsl_matrix_view Y41 = gsl_matrix_submatrix(A, M, 0, N1, N1); gsl_matrix_view Y42 = gsl_matrix_submatrix(A, M, N1, N1, N2); gsl_matrix_view m; /* * Eq. 2: recursively factor * * N1 N1 * N1 [ U11 ] = Q1 [ R11 ] N1 * N2 [ 0 ] [ 0 ] N2 * M [ A1 ] [ 0 ] M * N1 [ D11 ] [ 0 ] N1 * N2 [ 0 ] [ 0 ] N2 */ m = gsl_matrix_submatrix(A, 0, 0, M + N1, N1); status = aux_QR_TRD_decomp(&U11.matrix, &m.matrix, &D1.vector, &T11.matrix); if (status) return status; /* * Eq. 3: * * N2 N2 N2 * N1 [ R12 ] = Q1^T [ U12 ] = [ U12 - W ] N1 * N2 [ U22~ ] [ U22 ] [ U22 ] N2 * M [ A2~ ] [ A2 ] [ A2 - V31 W ] M * N1 [ D12~ ] [ 0 ] [ - V41 W ] N1 * N2 [ D22~ ] [ D22 ] [ D22 ] N2 * * where W = T11^T ( U12 + V31^T A2 ), using T12 as temporary storage, and * * N1 * V1 = [ I ] N1 * [ 0 ] N2 * [ V31 ] M * [ V41 ] N1 * [ 0 ] N2 */ gsl_matrix_memcpy(&T12.matrix, &U12.matrix); /* W := U12 */ gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, &A1.matrix, &A2.matrix, 1.0, &T12.matrix); /* W := U12 + V31^T A2 */ gsl_blas_dtrmm(CblasLeft, CblasUpper, CblasTrans, CblasNonUnit, 1.0, &T11.matrix, &T12.matrix); /* W := T11^T W */ gsl_matrix_sub(&U12.matrix, &T12.matrix); /* R12 := U12 - W */ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, -1.0, &A1.matrix, &T12.matrix, 1.0, &A2.matrix); /* A2 := A2 - V31 W */ gsl_blas_dtrmm(CblasLeft, CblasUpper, CblasNoTrans, CblasNonUnit, -1.0, &Y41.matrix, &T12.matrix); /* W := -V41 W */ gsl_matrix_memcpy(&Y42.matrix, &T12.matrix); /* V42 := -431 W */ /* * Eq. 4: factor the "triangle on top of rectangle on top of diagonal" matrix * * N2 [ U22 ] = Q2~ [ R22 ] N2 * N1 [ Y12 ] [ 0 ] N1 * N2 [ D22 ] [ 0 ] N2 */ m = gsl_matrix_submatrix(A, 0, N1, M + N, N2); status = aux_QR_TRD_decomp(&U22.matrix, &m.matrix, &D2.vector, &T22.matrix); if (status) return status; /* * Eq. 13: update T12 := -T11 * V1^T * V2 * T22 * * where: * * N1 N2 * V1 = [ I ] N1 V2 = [ 0 ] N1 * [ 0 ] N2 [ I ] N2 * [ V31~ ] M [ V32~ ] M * [ V41~ ] N1 [ V42~ ] N1 * [ 0 ] N2 [ V52~ ] N2 * * Note: V1^T V2 = V31~^T V32~ + V41~^T V42~ */ gsl_matrix_memcpy(&T12.matrix, &Y42.matrix); /* T12 := V42~ */ gsl_blas_dtrmm(CblasLeft, CblasUpper, CblasTrans, CblasNonUnit, 1.0, &Y41.matrix, &T12.matrix); /* T12 := V41~^T V42~ */ gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, &A1.matrix, &A2.matrix, 1.0, &T12.matrix); /* T12 := V31~^T V32~^T + V41~^T V42~ */ gsl_blas_dtrmm(CblasLeft, CblasUpper, CblasNoTrans, CblasNonUnit, -1.0, &T11.matrix, &T12.matrix); /* T12 := -T11 * T12 */ gsl_blas_dtrmm(CblasRight, CblasUpper, CblasNoTrans, CblasNonUnit, 1.0, &T22.matrix, &T12.matrix); /* T12 := T12 * T22 */ return GSL_SUCCESS; } } /* qrtd_householder_transform() This routine is an optimized version of gsl_linalg_householder_transform(), designed for the QR decomposition of M-by-N matrices of the form: B = [ U ] [ S ] where U,S are N-by-N upper triangular. This routine computes a householder transformation (tau,v) of a x so that P x = [ I - tau*v*v' ] x annihilates x(1:n-1). x will be a subcolumn of the matrix B, and so its structure will be: x = [ x0 ] <- 1 nonzero value for the diagonal element of U [ y0 ] <- 1 nonzero value for the diagonal element of S Inputs: v0 - pointer to diagonal element of U on input, v0 = x0; v1 - on input, pointer to diagonal element of S on output, householder vector v */ static double qrtd_householder_transform (double *v0, double *v1) { /* replace v[0:M-1] with a householder vector (v[0:M-1]) and coefficient tau that annihilate v[1:M-1] */ double alpha, beta, tau ; /* compute xnorm = || [ 0 ; v ] ||, ignoring zero part of vector */ double xnorm = fabs(*v1); if (xnorm == 0) { return 0.0; /* tau = 0 */ } alpha = *v0; beta = - GSL_SIGN(alpha) * hypot(alpha, xnorm) ; tau = (beta - alpha) / beta ; { double s = (alpha - beta); if (fabs(s) > GSL_DBL_MIN) { *v1 /= s; *v0 = beta; } else { *v1 *= GSL_DBL_EPSILON / s; *v1 /= GSL_DBL_EPSILON; *v0 = beta; } } return tau; } /* qrtrd_householder_transform() This routine is an optimized version of gsl_linalg_householder_transform(), designed for the QR decomposition of M-by-N matrices of the form: B = [ U ] [ A ] [ D ] where U is N-by-N upper triangular A is M-by-N dense and D is N-by-N diagonal. This routine computes a householder transformation (tau,v) of a x so that P x = [ I - tau*v*v' ] x annihilates x(1:n-1). x will be a subcolumn of the matrix B, and so its structure will be: x = [ x0 ] <- 1 nonzero value for the diagonal element of S [ 0 ] <- N - j - 1 zeros, where j is column of matrix in [0,N-1] [ x ] <- M nonzero values for the dense part A Inputs: v0 - pointer to diagonal element of S on input, v0 = x0; v - on input, x vector on output, householder vector v d - on input, diagonal element of D on output, householder element */ static double qrtrd_householder_transform (double *v0, gsl_vector * v, double *d) { /* replace v[0:M-1] with a householder vector (v[0:M-1]) and coefficient tau that annihilate v[1:M-1] */ double alpha, beta, tau ; /* compute xnorm = || [ 0 ; v ; 0 ; d] ||, ignoring zero part of vector */ double xnorm; xnorm = gsl_blas_dnrm2(v); xnorm = gsl_hypot(xnorm, *d); if (xnorm == 0) { return 0.0; /* tau = 0 */ } alpha = *v0; beta = - GSL_SIGN(alpha) * hypot(alpha, xnorm) ; tau = (beta - alpha) / beta ; { double s = (alpha - beta); if (fabs(s) > GSL_DBL_MIN) { gsl_blas_dscal (1.0 / s, v); *d /= s; *v0 = beta; } else { gsl_blas_dscal (GSL_DBL_EPSILON / s, v); gsl_blas_dscal (1.0 / GSL_DBL_EPSILON, v); *d *= GSL_DBL_EPSILON / s; *d /= GSL_DBL_EPSILON; *v0 = beta; } } return tau; }