/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ #include #include #include #include #include "slu_ddefs.h" #define epsmac 1.0e-16 extern double ddot_(int *, double [], int *, double [], int *); extern double dnrm2_(int *, double [], int *); extern void daxpy_(int *, double *, double [], int *, double [], int *); extern double dcopy_(int *, double [], int *, double [], int *); int fgmr(int n, void (*matvec) (double, double[], double, double[]), void (*psolve) (int, double[], double[]), double *rhs, double *sol, double tol, int im, int *itmax, FILE * fits) { /*---------------------------------------------------------------------- | *** Preconditioned FGMRES *** +----------------------------------------------------------------------- | This is a simple version of the ARMS preconditioned FGMRES algorithm. +----------------------------------------------------------------------- | Y. S. Dec. 2000. -- Apr. 2008 +----------------------------------------------------------------------- | on entry: |---------- | | rhs = real vector of length n containing the right hand side. | sol = real vector of length n containing an initial guess to the | solution on input. | tol = tolerance for stopping iteration | im = Krylov subspace dimension | (itmax) = max number of iterations allowed. | fits = NULL: no output | != NULL: file handle to output " resid vs time and its" | | on return: |---------- | fgmr int = 0 --> successful return. | int = 1 --> convergence not achieved in itmax iterations. | sol = contains an approximate solution (upon successful return). | itmax = has changed. It now contains the number of steps required | to converge -- +----------------------------------------------------------------------- | internal work arrays: |---------- | vv = work array of length [im+1][n] (used to store the Arnoldi | basis) | hh = work array of length [im][im+1] (Householder matrix) | z = work array of length [im][n] to store preconditioned vectors +----------------------------------------------------------------------- | subroutines called : | matvec - matrix-vector multiplication operation | psolve - (right) preconditionning operation | psolve can be a NULL pointer (GMRES without preconditioner) +---------------------------------------------------------------------*/ int maxits = *itmax; int i, i1, ii, j, k, k1, its, retval, i_1 = 1; double **hh, *c, *s, *rs, t, t0; double beta, eps1 = 0.0, gam, **vv, **z; its = 0; vv = (double **)SUPERLU_MALLOC((im + 1) * sizeof(double *)); for (i = 0; i <= im; i++) vv[i] = doubleMalloc(n); z = (double **)SUPERLU_MALLOC(im * sizeof(double *)); hh = (double **)SUPERLU_MALLOC(im * sizeof(double *)); for (i = 0; i < im; i++) { hh[i] = doubleMalloc(i + 2); z[i] = doubleMalloc(n); } c = doubleMalloc(im); s = doubleMalloc(im); rs = doubleMalloc(im + 1); /*---- outer loop starts here ----*/ do { /*---- compute initial residual vector ----*/ matvec(1.0, sol, 0.0, vv[0]); for (j = 0; j < n; j++) vv[0][j] = rhs[j] - vv[0][j]; /* vv[0]= initial residual */ beta = dnrm2_(&n, vv[0], &i_1); /*---- print info if fits != null ----*/ if (fits != NULL && its == 0) fprintf(fits, "%8d %10.2e\n", its, beta); /*if ( beta < tol * dnrm2_(&n, rhs, &i_1) )*/ if ( !(beta >= tol * dnrm2_(&n, rhs, &i_1)) ) break; t = 1.0 / beta; /*---- normalize: vv[0] = vv[0] / beta ----*/ for (j = 0; j < n; j++) vv[0][j] = vv[0][j] * t; if (its == 0) eps1 = tol * beta; /*---- initialize 1-st term of rhs of hessenberg system ----*/ rs[0] = beta; for (i = 0; i < im; i++) { its++; i1 = i + 1; /*------------------------------------------------------------ | (Right) Preconditioning Operation z_{j} = M^{-1} v_{j} +-----------------------------------------------------------*/ if (psolve) psolve(n, z[i], vv[i]); else dcopy_(&n, vv[i], &i_1, z[i], &i_1); /*---- matvec operation w = A z_{j} = A M^{-1} v_{j} ----*/ matvec(1.0, z[i], 0.0, vv[i1]); /*------------------------------------------------------------ | modified gram - schmidt... | h_{i,j} = (w,v_{i}) | w = w - h_{i,j} v_{i} +------------------------------------------------------------*/ t0 = dnrm2_(&n, vv[i1], &i_1); for (j = 0; j <= i; j++) { double negt; t = ddot_(&n, vv[j], &i_1, vv[i1], &i_1); hh[i][j] = t; negt = -t; daxpy_(&n, &negt, vv[j], &i_1, vv[i1], &i_1); } /*---- h_{j+1,j} = ||w||_{2} ----*/ t = dnrm2_(&n, vv[i1], &i_1); while (t < 0.5 * t0) { t0 = t; for (j = 0; j <= i; j++) { double negt; t = ddot_(&n, vv[j], &i_1, vv[i1], &i_1); hh[i][j] += t; negt = -t; daxpy_(&n, &negt, vv[j], &i_1, vv[i1], &i_1); } t = dnrm2_(&n, vv[i1], &i_1); } hh[i][i1] = t; if (t != 0.0) { /*---- v_{j+1} = w / h_{j+1,j} ----*/ t = 1.0 / t; for (k = 0; k < n; k++) vv[i1][k] = vv[i1][k] * t; } /*--------------------------------------------------- | done with modified gram schimdt and arnoldi step | now update factorization of hh +--------------------------------------------------*/ /*-------------------------------------------------------- | perform previous transformations on i-th column of h +-------------------------------------------------------*/ for (k = 1; k <= i; k++) { k1 = k - 1; t = hh[i][k1]; hh[i][k1] = c[k1] * t + s[k1] * hh[i][k]; hh[i][k] = -s[k1] * t + c[k1] * hh[i][k]; } gam = sqrt(pow(hh[i][i], 2) + pow(hh[i][i1], 2)); /*--------------------------------------------------- | if gamma is zero then any small value will do | affect only residual estimate +--------------------------------------------------*/ /* if (gam == 0.0) gam = epsmac; */ /*---- get next plane rotation ---*/ if (gam > 0.0) { c[i] = hh[i][i] / gam; s[i] = hh[i][i1] / gam; } else { c[i] = 1.0; s[i] = 0.0; } rs[i1] = -s[i] * rs[i]; rs[i] = c[i] * rs[i]; /*---------------------------------------------------- | determine residual norm and test for convergence +---------------------------------------------------*/ hh[i][i] = c[i] * hh[i][i] + s[i] * hh[i][i1]; beta = fabs(rs[i1]); if (fits != NULL) fprintf(fits, "%8d %10.2e\n", its, beta); if (beta <= eps1 || its >= maxits) break; } if (i == im) i--; /*---- now compute solution. 1st, solve upper triangular system ----*/ rs[i] = rs[i] / hh[i][i]; for (ii = 1; ii <= i; ii++) { k = i - ii; k1 = k + 1; t = rs[k]; for (j = k1; j <= i; j++) t = t - hh[j][k] * rs[j]; rs[k] = t / hh[k][k]; } /*---- linear combination of v[i]'s to get sol. ----*/ for (j = 0; j <= i; j++) { t = rs[j]; for (k = 0; k < n; k++) sol[k] += t * z[j][k]; } /* calculate the residual and output */ matvec(1.0, sol, 0.0, vv[0]); for (j = 0; j < n; j++) vv[0][j] = rhs[j] - vv[0][j]; /* vv[0]= initial residual */ /*---- print info if fits != null ----*/ beta = dnrm2_(&n, vv[0], &i_1); /*---- restart outer loop if needed ----*/ /*if (beta >= eps1 / tol)*/ if ( !(beta < eps1 / tol) ) { its = maxits + 10; break; } if (beta <= eps1) break; } while(its < maxits); retval = (its >= maxits); for (i = 0; i <= im; i++) SUPERLU_FREE(vv[i]); SUPERLU_FREE(vv); for (i = 0; i < im; i++) { SUPERLU_FREE(hh[i]); SUPERLU_FREE(z[i]); } SUPERLU_FREE(hh); SUPERLU_FREE(z); SUPERLU_FREE(c); SUPERLU_FREE(s); SUPERLU_FREE(rs); *itmax = its; return retval; } /*----end of fgmr ----*/