program psndrv3
c
c     Message Passing Layer: MPI
c
c     Simple program to illustrate the idea of reverse communication
c     in inverse mode for a generalized nonsymmetric eigenvalue problem.
c
c     We implement example three of ex-nonsym.doc in DOCUMENTS directory
c
c\Example-3
c     ... Suppose we want to solve A*x = lambda*B*x in inverse mode,
c         where A is derived from the 1-dimensional convection-diffusion
c         operator on the interval [0,1] with zero boundary condition,
c         and M is the tridiagonal matrix with 4 on the diagonal and 1
c         on the subdiagonals.
c     ... So OP = inv[M]*A  and  B = M.
c     ... Use mode 2 of PSNAUPD.
c
c\BeginLib
c
c\Routines called:
c     psnaupd Parallel ARPACK reverse communication interface routine.
c     psneupd Parallel ARPACK routine that returns Ritz values and (optionally)
c              Ritz vectors.
c     spttrf  LAPACK symmetric positive definite tridiagonal factorization
c             routine.
c     spttrs  LAPACK symmetric positive definite tridiagonal solve routine.
c     slapy2  LAPACK routine to compute sqrt(x**2+y**2) carefully.
c     saxpy   Level 1 BLAS that computes y <- alpha*x+y.
c     psnorm2 Parallel version of Level 1 BLAS that computes the norm of a vector.
c     av      Parallel Matrix vector multiplication routine that computes A*x.
c     mv      Matrix vector multiplication routine that computes M*x.
c
c\Author
c     Richard Lehoucq
c     Danny Sorensen
c     Chao Yang
c     Dept. of Computational &
c     Applied Mathematics
c     Rice University
c     Houston, Texas
c
c\Parallel Modifications
c     Kristi Maschhoff
c
c\Revision history:
c     Starting Point: Serial Code FILE: ndrv3.F   SID: 2.2
c
c\SCCS Information:
c FILE: ndrv3.F   SID: 1.1   DATE OF SID: 8/13/96   RELEASE: 1
c
c\Remarks
c     1. None
c
c\EndLib
c--------------------------------------------------------------------------
c
      include 'mpif.h'
      include 'debug.h'
      include 'stat.h'

c     %-------------------------------%
c     | MPI INTERFACE                 |
c     | ILP64 is not supported by MPI |
c     | integer*4 must be imposed in  |
c     | all calls involving MPI.      |
c     |                               |
c     | Use ierr for MPI calls.       |
c     %-------------------------------%

      integer*4         comm, myid, nprocs, rc, ierr

c
c     %-----------------------------%
c     | Define leading dimensions   |
c     | for all arrays.             |
c     | MAXN:   Maximum dimension   |
c     |         of the A allowed.   |
c     | MAXNEV: Maximum NEV allowed |
c     | MAXNCV: Maximum NCV allowed |
c     %-----------------------------%
c
      integer           maxn, maxnev, maxncv, ldv
      parameter         (maxn=256, maxnev=10, maxncv=25,
     &                   ldv=maxn )
c
c     %--------------%
c     | Local Arrays |
c     %--------------%
c
      integer           iparam(11), ipntr(14)
      logical           select(maxncv)
      Real
     &                  ax(maxn), mx(maxn), d(maxncv, 3), resid(maxn),
     &                  v(ldv,maxncv), workd(3*maxn),
     &                  workev(3*maxncv),
     &                  workl(3*maxncv*maxncv+6*maxncv),
     &                  md(maxn), me(maxn-1), temp(maxn), temp_buf(maxn)
c
c     %------------------------------------%
c     | Local Scalars                      |
c     |                                    |
c     | Use info if ILP64 can be supported |
c     | (call to BLAS, LAPACK, ARPACK).    |
c     %------------------------------------%
c
      character         bmat*1, which*2
      integer           ido, n, nev, ncv, lworkl, info, nloc, j,
     &                  nconv, maxitr, ishfts, mode, blk
      Real
     &                  tol, sigmar, sigmai
      logical           first, rvec
c
c     %------------%
c     | Parameters |
c     %------------%
c
      Real
     &                  zero, one
      parameter         (zero = 0.0, one = 1.0)
c
c     %-----------------------------%
c     | BLAS & LAPACK routines used |
c     %-----------------------------%
      Real
     &                  psnorm2, slapy2
      external          saxpy, psnorm2, spttrf, spttrs, slapy2
c
c     %-----------------------%
c     | Executable Statements |
c     %-----------------------%
c
      call MPI_INIT( ierr )
      comm = MPI_COMM_WORLD
      call MPI_COMM_RANK( comm, myid, ierr )
      call MPI_COMM_SIZE( comm, nprocs, ierr )
c
      ndigit = -3
      logfil = 6
      mnaupd = 1
c
c     %----------------------------------------------------%
c     | The number N is the dimension of the matrix.  A    |
c     | generalized eigenvalue problem is solved (BMAT =   |
c     | 'G').  NEV is the number of eigenvalues to be      |
c     | approximated.  The user can modify NEV, NCV, WHICH |
c     | to solve problems of different sizes, and to get   |
c     | different parts of the spectrum.  However, The     |
c     | following conditions must be satisfied:            |
c     |                    N <= MAXN,                      |
c     |                  NEV <= MAXNEV,                    |
c     |              NEV + 2 <= NCV <= MAXNCV              |
c     %----------------------------------------------------%
c
      n     = 100
      nev   = 4
      ncv   = 20
c
c     %--------------------------------------%
c     | Set up distribution of data to nodes |
c     %--------------------------------------%
c
      nloc = (n / nprocs )
      blk = nloc
      if ( mod(n, nprocs) .gt. 0 ) then
         if ( myid .eq. nprocs-1 ) nloc = nloc + mod(n, nprocs)
*      if ( mod(n, nprocs) .gt. myid ) nloc = nloc + 1
      endif
c
      if ( nloc .gt. maxn ) then
         print *, ' ERROR with _NDRV3: NLOC is greater than MAXN '
         go to 9000
      else if ( nev .gt. maxnev ) then
         print *, ' ERROR with _NDRV3: NEV is greater than MAXNEV '
         go to 9000
      else if ( ncv .gt. maxncv ) then
         print *, ' ERROR with _NDRV3: NCV is greater than MAXNCV '
         go to 9000
      end if
      bmat  = 'G'
      which = 'LM'
c
c     %-----------------------------------------------------%
c     | The matrix M is chosen to be the symmetric tri-     |
c     | diagonal matrix with 4 on the diagonal and 1 on the |
c     | off diagonals. It is factored by LAPACK subroutine  |
c     | spttrf.                                             |
c     %-----------------------------------------------------%
c
      do 20 j = 1, n-1
         md(j) = 4.0
         me(j) = one
  20  continue
      md(n) = 4.0*one
c
      call spttrf(n, md, me, info)
      if ( info .ne. 0 ) then
         print*, ' '
         print*, ' ERROR with _pttrf. '
         print*, ' '
         go to 9000
      end if
c
c     %-----------------------------------------------------%
c     | The work array WORKL is used in SNAUPD as           |
c     | workspace.  Its dimension LWORKL is set as          |
c     | illustrated below.  The parameter TOL determines    |
c     | the stopping criterion. If TOL<=0, machine          |
c     | precision is used.  The variable IDO is used for    |
c     | reverse communication, and is initially set to 0.   |
c     | Setting INFO=0 indicates that a random vector is    |
c     | generated in SNAUPD to start the Arnoldi iteration. |
c     %-----------------------------------------------------%
c
      lworkl = 3*ncv**2+6*ncv
      tol    = 0.0
      ido    = 0
      info   = 0
c
c     %---------------------------------------------------%
c     | This program uses exact shifts with respect to    |
c     | the current Hessenberg matrix (IPARAM(1) = 1).    |
c     | IPARAM(3) specifies the maximum number of Arnoldi |
c     | iterations allowed.  Mode 2 of SNAUPD is used     |
c     | (IPARAM(7) = 2).  All these options can be        |
c     | changed by the user. For details, see the         |
c     | documentation in SNAUPD.                          |
c     %---------------------------------------------------%
c
      ishfts = 1
      maxitr = 300
      mode   = 2
c
      iparam(1) = ishfts
      iparam(3) = maxitr
      iparam(7) = mode
c
c     %-------------------------------------------%
c     | M A I N   L O O P (Reverse communication) |
c     %-------------------------------------------%
c
 10   continue
c
c        %---------------------------------------------%
c        | Repeatedly call the routine SNAUPD and take |
c        | actions indicated by parameter IDO until    |
c        | either convergence is indicated or maxitr   |
c        | has been exceeded.                          |
c        %---------------------------------------------%
c
         call psnaupd( comm, ido, bmat, nloc, which, nev, tol, resid,
     &                 ncv, v, ldv, iparam, ipntr, workd,
     &                 workl, lworkl, info )
c
         if (ido .eq. -1 .or. ido .eq. 1) then
c
c           %----------------------------------------%
c           | Perform  y <--- OP*x = inv[M]*A*x      |
c           | The user should supply his/her own     |
c           | matrix vector routine and a linear     |
c           | system solver.  The matrix-vector      |
c           | subroutine should take workd(ipntr(1)) |
c           | as input, and the final result should  |
c           | be returned to workd(ipntr(2)).        |
c           %----------------------------------------%
c
            call av (comm, nloc, n, workd(ipntr(1)), workd(ipntr(2)))
c======== Hack for Linear system ======= ccc
            call sscal(n, zero, temp, 1)
            call sscal(n, zero, temp_buf, 1)
            do 15 j=1,nloc
               temp_buf(myid*blk + j) = workd(ipntr(2) + j - 1)
   15       continue
            call MPI_ALLREDUCE( temp_buf, temp, n,
     &            MPI_REAL, MPI_SUM, comm, ierr )
            call spttrs(n, 1, md, me, temp, n,
     &                  info)
            if ( info .ne. 0 ) then
               print*, ' '
               print*, ' ERROR with _pttrs. '
               print*, ' '
               go to 9000
            end if
            do 16 j=1,nloc
               workd(ipntr(2) + j - 1 ) = temp(myid*blk + j)
   16       continue
c
c           %-----------------------------------------%
c           | L O O P   B A C K to call SNAUPD again. |
c           %-----------------------------------------%
c
            go to 10
c
         else if ( ido .eq. 2) then
c
c           %-------------------------------------%
c           |        Perform  y <--- M*x          |
c           | The matrix vector multiplication    |
c           | routine should take workd(ipntr(1)) |
c           | as input and return the result to   |
c           | workd(ipntr(2)).                    |
c           %-------------------------------------%
c
            call mv (comm, nloc, workd(ipntr(1)), workd(ipntr(2)))
c
c           %-----------------------------------------%
c           | L O O P   B A C K to call SNAUPD again. |
c           %-----------------------------------------%
c
            go to 10
c
         end if
c
c
c     %-----------------------------------------%
c     | Either we have convergence, or there is |
c     | an error.                               |
c     %-----------------------------------------%
c
      if ( info .lt. 0 ) then
c
c        %---------------------------%
c        | Error message. Check the  |
c        | documentation in PSNAUPD. |
c        %---------------------------%
c
         if ( myid .eq. 0 ) then
            print *, ' '
            print *, ' Error with _naupd, info = ', info
            print *, ' Check the documentation of _naupd.'
            print *, ' '
         endif
c
      else
c
c        %-------------------------------------------%
c        | No fatal errors occurred.                 |
c        | Post-Process using SNEUPD.                |
c        |                                           |
c        | Computed eigenvalues may be extracted.    |
c        |                                           |
c        | Eigenvectors may also be computed now if  |
c        | desired.  (indicated by rvec = .true.)    |
c        %-------------------------------------------%
c
         rvec = .true.
         call psneupd ( comm, rvec, 'A', select, d, d(1,2), v, ldv,
     &        sigmar, sigmai, workev, bmat, nloc, which, nev, tol,
     &        resid, ncv, v, ldv, iparam, ipntr, workd,
     &        workl, lworkl, info )
c
c        %-----------------------------------------------%
c        | The real part of the eigenvalue is returned   |
c        | in the first column of the two dimensional    |
c        | array D, and the IMAGINARY part is returned   |
c        | in the second column of D.  The corresponding |
c        | eigenvectors are returned in the first NEV    |
c        | columns of the two dimensional array V if     |
c        | requested.  Otherwise, an orthogonal basis    |
c        | for the invariant subspace corresponding to   |
c        | the eigenvalues in D is returned in V.        |
c        %-----------------------------------------------%
c
         if ( info .ne. 0 ) then
c
c           %------------------------------------%
c           | Error condition:                   |
c           | Check the documentation of SNEUPD. |
c           %------------------------------------%
c
            if ( myid .eq. 0 ) then
               print *, ' '
               print *, ' Error with _neupd, info = ', info
               print *, ' Check the documentation of _neupd'
               print *, ' '
            endif
c
         else
c
            first = .true.
            nconv = iparam(5)
            do 30 j=1, iparam(5)
c
c              %---------------------------%
c              | Compute the residual norm |
c              |                           |
c              |  ||  A*x - lambda*M*x ||  |
c              |                           |
c              | for the NCONV accurately  |
c              | computed eigenvalues and  |
c              | eigenvectors.  (iparam(5) |
c              | indicates how many are    |
c              | accurate to the requested |
c              | tolerance)                |
c              %---------------------------%
c
               if (d(j,2) .eq. zero)  then
c
c                 %--------------------%
c                 | Ritz value is real |
c                 %--------------------%
c
                  call av(comm, nloc, n, v(1,j), ax)
                  call mv(comm, nloc, v(1,j), mx)
                  call saxpy(nloc, -d(j,1), mx, 1, ax, 1)
                  d(j,3) = psnorm2(comm, nloc, ax, 1)
c
               else if (first) then
c
c                 %------------------------%
c                 | Ritz value is complex  |
c                 | Residual of one Ritz   |
c                 | value of the conjugate |
c                 | pair is computed.      |
c                 %------------------------%
c
                  call av(comm, nloc, n, v(1,j), ax)
                  call mv(comm, nloc, v(1,j), mx)
                  call saxpy(nloc, -d(j,1), mx, 1, ax, 1)
                  call mv(comm, nloc, v(1,j+1), mx)
                  call saxpy(nloc, d(j,2), mx, 1, ax, 1)
                  d(j,3) = psnorm2(comm, nloc, ax, 1)**2
                  call av(comm, nloc, n, v(1,j+1), ax)
                  call mv(comm, nloc, v(1,j+1), mx)
                  call saxpy(nloc, -d(j,1), mx, 1, ax, 1)
                  call mv(comm, nloc, v(1,j), mx)
                  call saxpy(nloc, -d(j,2), mx, 1, ax, 1)
                  d(j,3) = slapy2( d(j,3), psnorm2(comm,nloc,ax,1) )
                  d(j+1,3) = d(j,3)
                  first = .false.
               else
                  first = .true.
               end if
c
  30        continue
c
c           %-----------------------------%
c           | Display computed residuals. |
c           %-----------------------------%
c
            call psmout(comm, 6, nconv, 3, d, maxncv, -6,
     &           'Ritz values (Real,Imag) and direct residuals')
c
         end if
c
c        %------------------------------------------%
c        | Print additional convergence information |
c        %------------------------------------------%
c
         if (myid .eq. 0)then
         if ( info .eq. 1) then
            print *, ' '
            print *, ' Maximum number of iterations reached.'
            print *, ' '
         else if ( info .eq. 3) then
            print *, ' '
            print *, ' No shifts could be applied during implicit
     &                 Arnoldi update, try increasing NCV.'
            print *, ' '
         end if
c
         print *, ' '
         print *, '_NDRV3 '
         print *, '====== '
         print *, ' '
         print *, ' Size of the matrix is ', n
         print *, ' The number of processors is ', nprocs
         print *, ' The number of Ritz values requested is ', nev
         print *, ' The number of Arnoldi vectors generated',
     &            ' (NCV) is ', ncv
         print *, ' What portion of the spectrum: ', which
         print *, ' The number of converged Ritz values is ',
     &              nconv
         print *, ' The number of Implicit Arnoldi update',
     &            ' iterations taken is ', iparam(3)
         print *, ' The number of OP*x is ', iparam(9)
         print *, ' The convergence criterion is ', tol
         print *, ' '
c
         endif
      end if
c
c     %----------------------------%
c     | Done with program psndrv3. |
c     %----------------------------%
c
 9000 continue
c
c     %-------------------------%
c     | Release resources MPI |
c     %-------------------------%
c
      call MPI_FINALIZE(rc)
c
      end
c
c==========================================================================
c
c     parallel matrix vector multiplication subroutine
c
c     Compute the matrix vector multiplication y<---A*x
c     where A is a n by n nonsymmetric tridiagonal matrix derived
c     from the central difference discretization of the 1-dimensional
c     convection diffusion operator on the interval [0,1] with
c     zero Dirichlet boundary condition.
c
      subroutine av (comm, nloc, n, v, w)
c
c     .. MPI Declarations ...
      include           'mpif.h'
      integer*4         comm, nprocs, myid, ierr,
     &                  status(MPI_STATUS_SIZE)
c
      integer           nloc, n, j, next, prev
      Real
     &                  v(nloc), w(nloc), one, two, dd, dl, du,
     &                  s, h, rho, mv_buf
      parameter         ( rho = 10.0, one = 1.0,
     &                    two = 2.0)
c
      call MPI_COMM_RANK( comm, myid, ierr )
      call MPI_COMM_SIZE( comm, nprocs, ierr )
      h = one / real(n+1)
      s = rho*h / two
      dd = two
      dl = -one - s
      du = -one + s
c
      w(1) =  dd*v(1) + du*v(2)
      do 10 j = 2,nloc-1
         w(j) = dl*v(j-1) + dd*v(j) + du*v(j+1)
 10   continue
      w(nloc) =  dl*v(nloc-1) + dd*v(nloc)
c
      next = myid + 1
      prev = myid - 1
      if ( myid .lt. nprocs-1 ) then
         call mpi_send( v(nloc), 1, MPI_REAL,
     &                  next, myid+1, comm, ierr )
      endif
      if ( myid .gt. 0 ) then
         call mpi_recv( mv_buf, 1, MPI_REAL, prev, myid,
     &                  comm, status, ierr )
         w(1) = w(1) + dl*mv_buf
      endif
c
      if ( myid .gt. 0 ) then
         call mpi_send( v(1), 1, MPI_REAL,
     &                  prev, myid-1, comm, ierr )
      endif
      if ( myid .lt. nprocs-1 ) then
         call mpi_recv( mv_buf, 1, MPI_REAL, next, myid,
     &                  comm, status, ierr )
         w(nloc) = w(nloc) + du*mv_buf
      endif
c
      return
      end
c------------------------------------------------------------------------
c
c     Compute the matrix vector multiplication y<---M*x
c     where M is a n by n tridiagonal matrix with 4 on the
c     diagonal, 1 on the subdiagonal and the superdiagonal.
c
      subroutine mv (comm, nloc, v, w)
c
c     .. MPI Declarations ...
      include           'mpif.h'
      integer*4         comm, nprocs, myid, ierr,
     &                  status(MPI_STATUS_SIZE)
c
      integer           nloc, j, next, prev
      Real
     &                  v(nloc), w(nloc), one, four, mv_buf
      parameter         ( one = 1.0, four = 4.0)
c
      call MPI_COMM_RANK( comm, myid, ierr )
      call MPI_COMM_SIZE( comm, nprocs, ierr )
c
      w(1) =  four*v(1) + one*v(2)
      do 10 j = 2,nloc-1
         w(j) = one*v(j-1) + four*v(j) + one*v(j+1)
 10   continue
      w(nloc) =  one*v(nloc-1) + four*v(nloc)
c
      next = myid + 1
      prev = myid - 1
      if ( myid .lt. nprocs-1 ) then
         call mpi_send( v(nloc), 1, MPI_REAL,
     &                  next, myid+1, comm, ierr )
      endif
      if ( myid .gt. 0 ) then
         call mpi_recv( mv_buf, 1, MPI_REAL, prev, myid,
     &                  comm, status, ierr )
         w(1) = w(1) + mv_buf
      endif
c
      if ( myid .gt. 0 ) then
         call mpi_send( v(1), 1, MPI_REAL,
     &                  prev, myid-1, comm, ierr )
      endif
      if ( myid .lt. nprocs-1 ) then
         call mpi_recv( mv_buf, 1, MPI_REAL, next, myid,
     &                  comm, status, ierr )
         w(nloc) = w(nloc) + mv_buf
      endif
c
      return
      end
c------------------------------------------------------------
      subroutine mv2 (comm, n, v, w)
      integer           n, j, comm
      Real
     &                  v(n), w(n)
      do 10 j=1,n
         w(j) = v(j)
 10   continue
c
      return
      end