SuperLU
5.2.0
|
Header file for real operations. More...
#include <math.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include "slu_Cnames.h"
#include "supermatrix.h"
#include "slu_util.h"
#include "slu_scomplex.h"
Go to the source code of this file.
Typedefs | |
typedef int | int_t |
Functions | |
void | cgssv (superlu_options_t *, SuperMatrix *, int *, int *, SuperMatrix *, SuperMatrix *, SuperMatrix *, SuperLUStat_t *, int *) |
Driver routines. More... | |
void | cgssvx (superlu_options_t *, SuperMatrix *, int *, int *, int *, char *, float *, float *, SuperMatrix *, SuperMatrix *, void *, int, SuperMatrix *, SuperMatrix *, float *, float *, float *, float *, GlobalLU_t *, mem_usage_t *, SuperLUStat_t *, int *) |
void | cgsisv (superlu_options_t *, SuperMatrix *, int *, int *, SuperMatrix *, SuperMatrix *, SuperMatrix *, SuperLUStat_t *, int *) |
void | cgsisx (superlu_options_t *, SuperMatrix *, int *, int *, int *, char *, float *, float *, SuperMatrix *, SuperMatrix *, void *, int, SuperMatrix *, SuperMatrix *, float *, float *, GlobalLU_t *, mem_usage_t *, SuperLUStat_t *, int *) |
void | cCreate_CompCol_Matrix (SuperMatrix *, int, int, int, complex *, int *, int *, Stype_t, Dtype_t, Mtype_t) |
Supernodal LU factor related. More... | |
void | cCreate_CompRow_Matrix (SuperMatrix *, int, int, int, complex *, int *, int *, Stype_t, Dtype_t, Mtype_t) |
void | cCopy_CompCol_Matrix (SuperMatrix *, SuperMatrix *) |
Copy matrix A into matrix B. More... | |
void | cCreate_Dense_Matrix (SuperMatrix *, int, int, complex *, int, Stype_t, Dtype_t, Mtype_t) |
void | cCreate_SuperNode_Matrix (SuperMatrix *, int, int, int, complex *, int *, int *, int *, int *, int *, Stype_t, Dtype_t, Mtype_t) |
void | cCopy_Dense_Matrix (int, int, complex *, int, complex *, int) |
void | countnz (const int, int *, int *, int *, GlobalLU_t *) |
Count the total number of nonzeros in factors L and U, and in the symmetrically reduced L. More... | |
void | ilu_countnz (const int, int *, int *, GlobalLU_t *) |
Count the total number of nonzeros in factors L and U. More... | |
void | fixupL (const int, const int *, GlobalLU_t *) |
Fix up the data storage lsub for L-subscripts. It removes the subscript sets for structural pruning, and applies permuation to the remaining subscripts. More... | |
void | callocateA (int, int, complex **, int **, int **) |
Allocate storage for original matrix A. More... | |
void | cgstrf (superlu_options_t *, SuperMatrix *, int, int, int *, void *, int, int *, int *, SuperMatrix *, SuperMatrix *, GlobalLU_t *, SuperLUStat_t *, int *) |
int | csnode_dfs (const int, const int, const int *, const int *, const int *, int *, int *, GlobalLU_t *) |
int | csnode_bmod (const int, const int, const int, complex *, complex *, GlobalLU_t *, SuperLUStat_t *) |
Performs numeric block updates within the relaxed snode. More... | |
void | cpanel_dfs (const int, const int, const int, SuperMatrix *, int *, int *, complex *, int *, int *, int *, int *, int *, int *, int *, GlobalLU_t *) |
void | cpanel_bmod (const int, const int, const int, const int, complex *, complex *, int *, int *, GlobalLU_t *, SuperLUStat_t *) |
int | ccolumn_dfs (const int, const int, int *, int *, int *, int *, int *, int *, int *, int *, int *, GlobalLU_t *) |
int | ccolumn_bmod (const int, const int, complex *, complex *, int *, int *, int, GlobalLU_t *, SuperLUStat_t *) |
int | ccopy_to_ucol (int, int, int *, int *, int *, complex *, GlobalLU_t *) |
int | cpivotL (const int, const double, int *, int *, int *, int *, int *, GlobalLU_t *, SuperLUStat_t *) |
void | cpruneL (const int, const int *, const int, const int, const int *, const int *, int *, GlobalLU_t *) |
void | creadmt (int *, int *, int *, complex **, int **, int **) |
void | cGenXtrue (int, int, complex *, int) |
void | cFillRHS (trans_t, int, complex *, int, SuperMatrix *, SuperMatrix *) |
Let rhs[i] = sum of i-th row of A, so the solution vector is all 1's. More... | |
void | cgstrs (trans_t, SuperMatrix *, SuperMatrix *, int *, int *, SuperMatrix *, SuperLUStat_t *, int *) |
void | cgsitrf (superlu_options_t *, SuperMatrix *, int, int, int *, void *, int, int *, int *, SuperMatrix *, SuperMatrix *, GlobalLU_t *, SuperLUStat_t *, int *) |
int | cldperm (int, int, int, int[], int[], complex[], int[], float[], float[]) |
int | ilu_csnode_dfs (const int, const int, const int *, const int *, const int *, int *, GlobalLU_t *) |
void | ilu_cpanel_dfs (const int, const int, const int, SuperMatrix *, int *, int *, complex *, float *, int *, int *, int *, int *, int *, int *, GlobalLU_t *) |
int | ilu_ccolumn_dfs (const int, const int, int *, int *, int *, int *, int *, int *, int *, int *, GlobalLU_t *) |
int | ilu_ccopy_to_ucol (int, int, int *, int *, int *, complex *, int, milu_t, double, int, complex *, int *, GlobalLU_t *, float *) |
int | ilu_cpivotL (const int, const double, int *, int *, int, int *, int *, int *, int *, double, milu_t, complex, GlobalLU_t *, SuperLUStat_t *) |
int | ilu_cdrop_row (superlu_options_t *, int, int, double, int, int *, double *, GlobalLU_t *, float *, float *, int) |
void | cgsequ (SuperMatrix *, float *, float *, float *, float *, float *, int *) |
Driver related. More... | |
void | claqgs (SuperMatrix *, float *, float *, float, float, float, char *) |
void | cgscon (char *, SuperMatrix *, SuperMatrix *, float, float *, SuperLUStat_t *, int *) |
float | cPivotGrowth (int, SuperMatrix *, int *, SuperMatrix *, SuperMatrix *) |
void | cgsrfs (trans_t, SuperMatrix *, SuperMatrix *, SuperMatrix *, int *, int *, char *, float *, float *, SuperMatrix *, SuperMatrix *, float *, float *, SuperLUStat_t *, int *) |
int | sp_ctrsv (char *, char *, char *, SuperMatrix *, SuperMatrix *, complex *, SuperLUStat_t *, int *) |
Solves one of the systems of equations A*x = b, or A'*x = b. More... | |
int | sp_cgemv (char *, complex, SuperMatrix *, complex *, int, complex, complex *, int) |
Performs one of the matrix-vector operations y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y. More... | |
int | sp_cgemm (char *, char *, int, int, int, complex, SuperMatrix *, complex *, int, complex, complex *, int) |
float | smach (char *) |
int | cLUMemInit (fact_t, void *, int, int, int, int, int, float, SuperMatrix *, SuperMatrix *, GlobalLU_t *, int **, complex **) |
Memory-related. More... | |
void | cSetRWork (int, int, complex *, complex **, complex **) |
Set up pointers for real working arrays. More... | |
void | cLUWorkFree (int *, complex *, GlobalLU_t *) |
Free the working storage used by factor routines. More... | |
int | cLUMemXpand (int, int, MemType, int *, GlobalLU_t *) |
Expand the data structures for L and U during the factorization. More... | |
complex * | complexMalloc (int) |
complex * | complexCalloc (int) |
float * | floatMalloc (int) |
float * | floatCalloc (int) |
int | cmemory_usage (const int, const int, const int, const int) |
int | cQuerySpace (SuperMatrix *, SuperMatrix *, mem_usage_t *) |
int | ilu_cQuerySpace (SuperMatrix *, SuperMatrix *, mem_usage_t *) |
void | creadhb (FILE *, int *, int *, int *, complex **, int **, int **) |
Auxiliary routines. More... | |
void | creadrb (int *, int *, int *, complex **, int **, int **) |
void | creadtriple (int *, int *, int *, complex **, int **, int **) |
void | cCompRow_to_CompCol (int, int, int, complex *, int *, int *, complex **, int **, int **) |
Convert a row compressed storage into a column compressed storage. More... | |
void | cfill (complex *, int, complex) |
Fills a complex precision array with a given value. More... | |
void | cinf_norm_error (int, SuperMatrix *, complex *) |
Check the inf-norm of the error vector. More... | |
float | sqselect (int, float *, int) |
void | cPrint_CompCol_Matrix (char *, SuperMatrix *) |
Routines for debugging. More... | |
void | cPrint_SuperNode_Matrix (char *, SuperMatrix *) |
void | cPrint_Dense_Matrix (char *, SuperMatrix *) |
void | cprint_lu_col (char *, int, int, int *, GlobalLU_t *) |
Diagnostic print of column "jcol" in the U/L factor. More... | |
int | print_double_vec (char *, int, double *) |
void | ccheck_tempv (int, complex *) |
Check whether tempv[] == 0. This should be true before and after calling any numeric routines, i.e., "panel_bmod" and "column_bmod". More... | |
int | cgemm_ (const char *, const char *, const int *, const int *, const int *, const complex *, const complex *, const int *, const complex *, const int *, const complex *, complex *, const int *) |
BLAS. More... | |
int | ctrsv_ (char *, char *, char *, int *, complex *, int *, complex *, int *) |
int | ctrsm_ (char *, char *, char *, char *, int *, int *, complex *, complex *, int *, complex *, int *) |
int | cgemv_ (char *, int *, int *, complex *, complex *a, int *, complex *, int *, complex *, complex *, int *) |
Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
All rights reserved.
The source code is distributed under BSD license, see the file License.txt at the top-level directory.
– SuperLU routine (version 4.1) – Univ. of California Berkeley, Xerox Palo Alto Research Center, and Lawrence Berkeley National Lab. November, 2010
Global data structures used in LU factorization -
nsuper: #supernodes = nsuper + 1, numbered [0, nsuper]. (xsup,supno): supno[i] is the supernode no to which i belongs; xsup(s) points to the beginning of the s-th supernode. e.g. supno 0 1 2 2 3 3 3 4 4 4 4 4 (n=12) xsup 0 1 2 4 7 12 Note: dfs will be performed on supernode rep. relative to the new row pivoting ordering
(xlsub,lsub): lsub[*] contains the compressed subscript of rectangular supernodes; xlsub[j] points to the starting location of the j-th column in lsub[*]. Note that xlsub is indexed by column. Storage: original row subscripts
During the course of sparse LU factorization, we also use (xlsub,lsub) for the purpose of symmetric pruning. For each supernode {s,s+1,...,t=s+r} with first column s and last column t, the subscript set lsub[j], j=xlsub[s], .., xlsub[s+1]-1 is the structure of column s (i.e. structure of this supernode). It is used for the storage of numerical values. Furthermore, lsub[j], j=xlsub[t], .., xlsub[t+1]-1 is the structure of the last column t of this supernode. It is for the purpose of symmetric pruning. Therefore, the structural subscripts can be rearranged without making physical interchanges among the numerical values.
However, if the supernode has only one column, then we only keep one set of subscripts. For any subscript interchange performed, similar interchange must be done on the numerical values.
The last column structures (for pruning) will be removed after the numercial LU factorization phase.
(xlusup,lusup): lusup[*] contains the numerical values of the rectangular supernodes; xlusup[j] points to the starting location of the j-th column in storage vector lusup[*] Note: xlusup is indexed by column. Each rectangular supernode is stored by column-major scheme, consistent with Fortran 2-dim array storage.
(xusub,ucol,usub): ucol[*] stores the numerical values of U-columns outside the rectangular supernodes. The row subscript of nonzero ucol[k] is stored in usub[k]. xusub[i] points to the starting location of column i in ucol. Storage: new row subscripts; that is subscripts of PA.
typedef int int_t |
void callocateA | ( | int | , |
int | , | ||
complex ** | , | ||
int ** | , | ||
int ** | |||
) |
void ccheck_tempv | ( | int | , |
complex * | |||
) |
int ccolumn_bmod | ( | const int | jcol, |
const int | nseg, | ||
complex * | dense, | ||
complex * | tempv, | ||
int * | segrep, | ||
int * | repfnz, | ||
int | fpanelc, | ||
GlobalLU_t * | Glu, | ||
SuperLUStat_t * | stat | ||
) |
Purpose:
Performs numeric block updates (sup-col) in topological order. It features: col-col, 2cols-col, 3cols-col, and sup-col updates. Special processing on the supernodal portion of L[*,j] Return value: 0 - successful return > 0 - number of bytes allocated when run out of space
int ccolumn_dfs | ( | const int | m, |
const int | jcol, | ||
int * | perm_r, | ||
int * | nseg, | ||
int * | lsub_col, | ||
int * | segrep, | ||
int * | repfnz, | ||
int * | xprune, | ||
int * | marker, | ||
int * | parent, | ||
int * | xplore, | ||
GlobalLU_t * | Glu | ||
) |
Purpose
CCOLUMN_DFS performs a symbolic factorization on column jcol, and decide the supernode boundary.
This routine does not use numeric values, but only use the RHS row indices to start the dfs.
A supernode representative is the last column of a supernode. The nonzeros in U[*,j] are segments that end at supernodal representatives. The routine returns a list of such supernodal representatives in topological order of the dfs that generates them. The location of the first nonzero in each such supernodal segment (supernodal entry location) is also returned.
Local parameters
nseg: no of segments in current U[*,j] jsuper: jsuper=EMPTY if column j does not belong to the same supernode as j-1. Otherwise, jsuper=nsuper.
marker2: A-row –> A-row/col (0/1) repfnz: SuperA-col –> PA-row parent: SuperA-col –> SuperA-col xplore: SuperA-col –> index to L-structure
Return value
0 success;0 number of bytes allocated when run out of space.
void cCompRow_to_CompCol | ( | int | , |
int | , | ||
int | , | ||
complex * | , | ||
int * | , | ||
int * | , | ||
complex ** | , | ||
int ** | , | ||
int ** | |||
) |
void cCopy_CompCol_Matrix | ( | SuperMatrix * | , |
SuperMatrix * | |||
) |
Copies a two-dimensional matrix X to another matrix Y.
int ccopy_to_ucol | ( | int | , |
int | , | ||
int * | , | ||
int * | , | ||
int * | , | ||
complex * | , | ||
GlobalLU_t * | |||
) |
void cCreate_CompCol_Matrix | ( | SuperMatrix * | , |
int | , | ||
int | , | ||
int | , | ||
complex * | , | ||
int * | , | ||
int * | , | ||
Stype_t | , | ||
Dtype_t | , | ||
Mtype_t | |||
) |
void cCreate_CompRow_Matrix | ( | SuperMatrix * | , |
int | , | ||
int | , | ||
int | , | ||
complex * | , | ||
int * | , | ||
int * | , | ||
Stype_t | , | ||
Dtype_t | , | ||
Mtype_t | |||
) |
void cCreate_Dense_Matrix | ( | SuperMatrix * | , |
int | , | ||
int | , | ||
complex * | , | ||
int | , | ||
Stype_t | , | ||
Dtype_t | , | ||
Mtype_t | |||
) |
void cCreate_SuperNode_Matrix | ( | SuperMatrix * | , |
int | , | ||
int | , | ||
int | , | ||
complex * | , | ||
int * | , | ||
int * | , | ||
int * | , | ||
int * | , | ||
int * | , | ||
Stype_t | , | ||
Dtype_t | , | ||
Mtype_t | |||
) |
void cFillRHS | ( | trans_t | , |
int | , | ||
complex * | , | ||
int | , | ||
SuperMatrix * | , | ||
SuperMatrix * | |||
) |
int cgemm_ | ( | const char * | , |
const char * | , | ||
const int * | , | ||
const int * | , | ||
const int * | , | ||
const complex * | , | ||
const complex * | , | ||
const int * | , | ||
const complex * | , | ||
const int * | , | ||
const complex * | , | ||
complex * | , | ||
const int * | |||
) |
int cgemv_ | ( | char * | , |
int * | , | ||
int * | , | ||
complex * | , | ||
complex * | a, | ||
int * | , | ||
complex * | , | ||
int * | , | ||
complex * | , | ||
complex * | , | ||
int * | |||
) |
void cGenXtrue | ( | int | , |
int | , | ||
complex * | , | ||
int | |||
) |
void cgscon | ( | char * | norm, |
SuperMatrix * | L, | ||
SuperMatrix * | U, | ||
float | anorm, | ||
float * | rcond, | ||
SuperLUStat_t * | stat, | ||
int * | info | ||
) |
Purpose
CGSCON estimates the reciprocal of the condition number of a general real matrix A, in either the 1-norm or the infinity-norm, using the LU factorization computed by CGETRF. *
An estimate is obtained for norm(inv(A)), and the reciprocal of the condition number is computed as RCOND = 1 / ( norm(A) * norm(inv(A)) ).
See supermatrix.h for the definition of 'SuperMatrix' structure.
Arguments
NORM (input) char* Specifies whether the 1-norm condition number or the infinity-norm condition number is required: = '1' or 'O': 1-norm; = 'I': Infinity-norm.
L (input) SuperMatrix* The factor L from the factorization Pr*A*Pc=L*U as computed by cgstrf(). Use compressed row subscripts storage for supernodes, i.e., L has types: Stype = SLU_SC, Dtype = SLU_C, Mtype = SLU_TRLU.
U (input) SuperMatrix* The factor U from the factorization Pr*A*Pc=L*U as computed by cgstrf(). Use column-wise storage scheme, i.e., U has types: Stype = SLU_NC, Dtype = SLU_C, Mtype = SLU_TRU.
ANORM (input) float If NORM = '1' or 'O', the 1-norm of the original matrix A. If NORM = 'I', the infinity-norm of the original matrix A.
RCOND (output) float* The reciprocal of the condition number of the matrix A, computed as RCOND = 1/(norm(A) * norm(inv(A))).
INFO (output) int* = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value
void cgsequ | ( | SuperMatrix * | A, |
float * | r, | ||
float * | c, | ||
float * | rowcnd, | ||
float * | colcnd, | ||
float * | amax, | ||
int * | info | ||
) |
Purpose
CGSEQU computes row and column scalings intended to equilibrate an M-by-N sparse matrix A and reduce its condition number. R returns the row scale factors and C the column scale factors, chosen to try to make the largest element in each row and column of the matrix B with elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1.
R(i) and C(j) are restricted to be between SMLNUM = smallest safe number and BIGNUM = largest safe number. Use of these scaling factors is not guaranteed to reduce the condition number of A but works well in practice.
See supermatrix.h for the definition of 'SuperMatrix' structure.
Arguments
A (input) SuperMatrix* The matrix of dimension (A->nrow, A->ncol) whose equilibration factors are to be computed. The type of A can be: Stype = SLU_NC; Dtype = SLU_C; Mtype = SLU_GE.
R (output) float*, size A->nrow If INFO = 0 or INFO > M, R contains the row scale factors for A.
C (output) float*, size A->ncol If INFO = 0, C contains the column scale factors for A.
ROWCND (output) float* If INFO = 0 or INFO > M, ROWCND contains the ratio of the smallest R(i) to the largest R(i). If ROWCND >= 0.1 and AMAX is neither too large nor too small, it is not worth scaling by R.
COLCND (output) float* If INFO = 0, COLCND contains the ratio of the smallest C(i) to the largest C(i). If COLCND >= 0.1, it is not worth scaling by C.
AMAX (output) float* Absolute value of largest matrix element. If AMAX is very close to overflow or very close to underflow, the matrix should be scaled.
INFO (output) int* = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, and i is <= A->nrow: the i-th row of A is exactly zero > A->ncol: the (i-M)-th column of A is exactly zero
void cgsisv | ( | superlu_options_t * | , |
SuperMatrix * | , | ||
int * | , | ||
int * | , | ||
SuperMatrix * | , | ||
SuperMatrix * | , | ||
SuperMatrix * | , | ||
SuperLUStat_t * | , | ||
int * | |||
) |
void cgsisx | ( | superlu_options_t * | options, |
SuperMatrix * | A, | ||
int * | perm_c, | ||
int * | perm_r, | ||
int * | etree, | ||
char * | equed, | ||
float * | R, | ||
float * | C, | ||
SuperMatrix * | L, | ||
SuperMatrix * | U, | ||
void * | work, | ||
int | lwork, | ||
SuperMatrix * | B, | ||
SuperMatrix * | X, | ||
float * | recip_pivot_growth, | ||
float * | rcond, | ||
GlobalLU_t * | Glu, | ||
mem_usage_t * | mem_usage, | ||
SuperLUStat_t * | stat, | ||
int * | info | ||
) |
Purpose
CGSISX computes an approximate solutions of linear equations A*X=B or A'*X=B, using the ILU factorization from cgsitrf(). An estimation of the condition number is provided. The routine performs the following steps:
1. If A is stored column-wise (A->Stype = SLU_NC):
1.1. If options->Equil = YES or options->RowPerm = LargeDiag, scaling factors are computed to equilibrate the system: options->Trans = NOTRANS: diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B options->Trans = TRANS: (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B options->Trans = CONJ: (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B Whether or not the system will be equilibrated depends on the scaling of the matrix A, but if equilibration is used, A is overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if options->Trans=NOTRANS) or diag(C)*B (if options->Trans = TRANS or CONJ).
1.2. Permute columns of A, forming A*Pc, where Pc is a permutation matrix that usually preserves sparsity. For more details of this step, see sp_preorder.c.
1.3. If options->Fact != FACTORED, the LU decomposition is used to factor the matrix A (after equilibration if options->Equil = YES) as Pr*A*Pc = L*U, with Pr determined by partial pivoting.
1.4. Compute the reciprocal pivot growth factor.
1.5. If some U(i,i) = 0, so that U is exactly singular, then the routine fills a small number on the diagonal entry, that is U(i,i) = ||A(:,i)||_oo * options->ILU_FillTol ** (1 - i / n), and info will be increased by 1. The factored form of A is used to estimate the condition number of the preconditioner. If the reciprocal of the condition number is less than machine precision, info = A->ncol+1 is returned as a warning, but the routine still goes on to solve for X.
1.6. The system of equations is solved for X using the factored form of A.
1.7. options->IterRefine is not used
1.8. If equilibration was used, the matrix X is premultiplied by diag(C) (if options->Trans = NOTRANS) or diag(R) (if options->Trans = TRANS or CONJ) so that it solves the original system before equilibration.
1.9. options for ILU only 1) If options->RowPerm = LargeDiag, MC64 is used to scale and permute the matrix to an I-matrix, that is Pr*Dr*A*Dc has entries of modulus 1 on the diagonal and off-diagonal entries of modulus at most 1. If MC64 fails, dgsequ() is used to equilibrate the system. ( Default: LargeDiag ) 2) options->ILU_DropTol = tau is the threshold for dropping. For L, it is used directly (for the whole row in a supernode); For U, ||A(:,i)||_oo * tau is used as the threshold for the i-th column. If a secondary dropping rule is required, tau will also be used to compute the second threshold. ( Default: 1e-4 ) 3) options->ILU_FillFactor = gamma, used as the initial guess of memory growth. If a secondary dropping rule is required, it will also be used as an upper bound of the memory. ( Default: 10 ) 4) options->ILU_DropRule specifies the dropping rule. Option Meaning ====== =========== DROP_BASIC: Basic dropping rule, supernodal based ILUTP(tau). DROP_PROWS: Supernodal based ILUTP(p,tau), p = gamma*nnz(A)/n. DROP_COLUMN: Variant of ILUTP(p,tau), for j-th column, p = gamma * nnz(A(:,j)). DROP_AREA: Variation of ILUTP, for j-th column, use nnz(F(:,1:j)) / nnz(A(:,1:j)) to control memory. DROP_DYNAMIC: Modify the threshold tau during factorizaion: If nnz(L(:,1:j)) / nnz(A(:,1:j)) > gamma tau_L(j) := MIN(tau_0, tau_L(j-1) * 2); Otherwise tau_L(j) := MAX(tau_0, tau_L(j-1) / 2); tau_U(j) uses the similar rule. NOTE: the thresholds used by L and U are separate. DROP_INTERP: Compute the second dropping threshold by interpolation instead of sorting (default). In this case, the actual fill ratio is not guaranteed smaller than gamma. DROP_PROWS, DROP_COLUMN and DROP_AREA are mutually exclusive. ( Default: DROP_BASIC | DROP_AREA ) 5) options->ILU_Norm is the criterion of measuring the magnitude of a row in a supernode of L. ( Default is INF_NORM ) options->ILU_Norm RowSize(x[1:n]) ================= =============== ONE_NORM ||x||_1 / n TWO_NORM ||x||_2 / sqrt(n) INF_NORM max{|x[i]|} 6) options->ILU_MILU specifies the type of MILU's variation. = SILU: do not perform Modified ILU; = SMILU_1 (not recommended): U(i,i) := U(i,i) + sum(dropped entries); = SMILU_2: U(i,i) := U(i,i) + SGN(U(i,i)) * sum(dropped entries); = SMILU_3: U(i,i) := U(i,i) + SGN(U(i,i)) * sum(|dropped entries|); NOTE: Even SMILU_1 does not preserve the column sum because of late dropping. ( Default: SILU ) 7) options->ILU_FillTol is used as the perturbation when encountering zero pivots. If some U(i,i) = 0, so that U is exactly singular, then U(i,i) := ||A(:,i)|| * options->ILU_FillTol ** (1 - i / n). ( Default: 1e-2 )
2. If A is stored row-wise (A->Stype = SLU_NR), apply the above algorithm to the transpose of A:
2.1. If options->Equil = YES or options->RowPerm = LargeDiag, scaling factors are computed to equilibrate the system: options->Trans = NOTRANS: diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B options->Trans = TRANS: (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B options->Trans = CONJ: (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B Whether or not the system will be equilibrated depends on the scaling of the matrix A, but if equilibration is used, A' is overwritten by diag(R)*A'*diag(C) and B by diag(R)*B (if trans='N') or diag(C)*B (if trans = 'T' or 'C').
2.2. Permute columns of transpose(A) (rows of A), forming transpose(A)*Pc, where Pc is a permutation matrix that usually preserves sparsity. For more details of this step, see sp_preorder.c.
2.3. If options->Fact != FACTORED, the LU decomposition is used to factor the transpose(A) (after equilibration if options->Fact = YES) as Pr*transpose(A)*Pc = L*U with the permutation Pr determined by partial pivoting.
2.4. Compute the reciprocal pivot growth factor.
2.5. If some U(i,i) = 0, so that U is exactly singular, then the routine fills a small number on the diagonal entry, that is U(i,i) = ||A(:,i)||_oo * options->ILU_FillTol ** (1 - i / n). And info will be increased by 1. The factored form of A is used to estimate the condition number of the preconditioner. If the reciprocal of the condition number is less than machine precision, info = A->ncol+1 is returned as a warning, but the routine still goes on to solve for X.
2.6. The system of equations is solved for X using the factored form of transpose(A).
2.7. If options->IterRefine is not used.
2.8. If equilibration was used, the matrix X is premultiplied by diag(C) (if options->Trans = NOTRANS) or diag(R) (if options->Trans = TRANS or CONJ) so that it solves the original system before equilibration.
See supermatrix.h for the definition of 'SuperMatrix' structure.
Arguments
options (input) superlu_options_t* The structure defines the input parameters to control how the LU decomposition will be performed and how the system will be solved.
A (input/output) SuperMatrix* Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number of the linear equations is A->nrow. Currently, the type of A can be: Stype = SLU_NC or SLU_NR, Dtype = SLU_C, Mtype = SLU_GE. In the future, more general A may be handled.
On entry, If options->Fact = FACTORED and equed is not 'N', then A must have been equilibrated by the scaling factors in R and/or C. On exit, A is not modified if options->Equil = NO, or if options->Equil = YES but equed = 'N' on exit, or if options->RowPerm = NO.
Otherwise, if options->Equil = YES and equed is not 'N', A is scaled as follows: If A->Stype = SLU_NC: equed = 'R': A := diag(R) * A equed = 'C': A := A * diag(C) equed = 'B': A := diag(R) * A * diag(C). If A->Stype = SLU_NR: equed = 'R': transpose(A) := diag(R) * transpose(A) equed = 'C': transpose(A) := transpose(A) * diag(C) equed = 'B': transpose(A) := diag(R) * transpose(A) * diag(C).
If options->RowPerm = LargeDiag, MC64 is used to scale and permute the matrix to an I-matrix, that is A is modified as follows: P*Dr*A*Dc has entries of modulus 1 on the diagonal and off-diagonal entries of modulus at most 1. P is a permutation obtained from MC64. If MC64 fails, cgsequ() is used to equilibrate the system, and A is scaled as above, but no permutation is involved. On exit, A is restored to the orginal row numbering, so Dr*A*Dc is returned.
perm_c (input/output) int* If A->Stype = SLU_NC, Column permutation vector of size A->ncol, which defines the permutation matrix Pc; perm_c[i] = j means column i of A is in position j in A*Pc. On exit, perm_c may be overwritten by the product of the input perm_c and a permutation that postorders the elimination tree of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree is already in postorder.
If A->Stype = SLU_NR, column permutation vector of size A->nrow, which describes permutation of columns of transpose(A) (rows of A) as described above.
perm_r (input/output) int* If A->Stype = SLU_NC, row permutation vector of size A->nrow, which defines the permutation matrix Pr, and is determined by MC64 first then followed by partial pivoting. perm_r[i] = j means row i of A is in position j in Pr*A.
If A->Stype = SLU_NR, permutation vector of size A->ncol, which determines permutation of rows of transpose(A) (columns of A) as described above.
If options->Fact = SamePattern_SameRowPerm, the pivoting routine will try to use the input perm_r, unless a certain threshold criterion is violated. In that case, perm_r is overwritten by a new permutation determined by partial pivoting or diagonal threshold pivoting. Otherwise, perm_r is output argument.
etree (input/output) int*, dimension (A->ncol) Elimination tree of Pc'*A'*A*Pc. If options->Fact != FACTORED and options->Fact != DOFACT, etree is an input argument, otherwise it is an output argument. Note: etree is a vector of parent pointers for a forest whose vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
equed (input/output) char* Specifies the form of equilibration that was done. = 'N': No equilibration. = 'R': Row equilibration, i.e., A was premultiplied by diag(R). = 'C': Column equilibration, i.e., A was postmultiplied by diag(C). = 'B': Both row and column equilibration, i.e., A was replaced by diag(R)*A*diag(C). If options->Fact = FACTORED, equed is an input argument, otherwise it is an output argument.
R (input/output) float*, dimension (A->nrow) The row scale factors for A or transpose(A). If equed = 'R' or 'B', A (if A->Stype = SLU_NC) or transpose(A) (if A->Stype = SLU_NR) is multiplied on the left by diag(R). If equed = 'N' or 'C', R is not accessed. If options->Fact = FACTORED, R is an input argument, otherwise, R is output. If options->Fact = FACTORED and equed = 'R' or 'B', each element of R must be positive.
C (input/output) float*, dimension (A->ncol) The column scale factors for A or transpose(A). If equed = 'C' or 'B', A (if A->Stype = SLU_NC) or transpose(A) (if A->Stype = SLU_NR) is multiplied on the right by diag(C). If equed = 'N' or 'R', C is not accessed. If options->Fact = FACTORED, C is an input argument, otherwise, C is output. If options->Fact = FACTORED and equed = 'C' or 'B', each element of C must be positive.
L (output) SuperMatrix* The factor L from the factorization Pr*A*Pc=L*U (if A->Stype SLU_= NC) or Pr*transpose(A)*Pc=L*U (if A->Stype = SLU_NR). Uses compressed row subscripts storage for supernodes, i.e., L has types: Stype = SLU_SC, Dtype = SLU_C, Mtype = SLU_TRLU.
U (output) SuperMatrix* The factor U from the factorization Pr*A*Pc=L*U (if A->Stype = SLU_NC) or Pr*transpose(A)*Pc=L*U (if A->Stype = SLU_NR). Uses column-wise storage scheme, i.e., U has types: Stype = SLU_NC, Dtype = SLU_C, Mtype = SLU_TRU.
work (workspace/output) void*, size (lwork) (in bytes) User supplied workspace, should be large enough to hold data structures for factors L and U. On exit, if fact is not 'F', L and U point to this array.
lwork (input) int Specifies the size of work array in bytes. = 0: allocate space internally by system malloc; > 0: use user-supplied work array of length lwork in bytes, returns error if space runs out. = -1: the routine guesses the amount of space needed without performing the factorization, and returns it in mem_usage->total_needed; no other side effects.
See argument 'mem_usage' for memory usage statistics.
B (input/output) SuperMatrix* B has types: Stype = SLU_DN, Dtype = SLU_C, Mtype = SLU_GE. On entry, the right hand side matrix. If B->ncol = 0, only LU decomposition is performed, the triangular solve is skipped. On exit, if equed = 'N', B is not modified; otherwise if A->Stype = SLU_NC: if options->Trans = NOTRANS and equed = 'R' or 'B', B is overwritten by diag(R)*B; if options->Trans = TRANS or CONJ and equed = 'C' of 'B', B is overwritten by diag(C)*B; if A->Stype = SLU_NR: if options->Trans = NOTRANS and equed = 'C' or 'B', B is overwritten by diag(C)*B; if options->Trans = TRANS or CONJ and equed = 'R' of 'B', B is overwritten by diag(R)*B.
X (output) SuperMatrix* X has types: Stype = SLU_DN, Dtype = SLU_C, Mtype = SLU_GE. If info = 0 or info = A->ncol+1, X contains the solution matrix to the original system of equations. Note that A and B are modified on exit if equed is not 'N', and the solution to the equilibrated system is inv(diag(C))*X if options->Trans = NOTRANS and equed = 'C' or 'B', or inv(diag(R))*X if options->Trans = 'T' or 'C' and equed = 'R' or 'B'.
recip_pivot_growth (output) float* The reciprocal pivot growth factor max_j( norm(A_j)/norm(U_j) ). The infinity norm is used. If recip_pivot_growth is much less than 1, the stability of the LU factorization could be poor.
rcond (output) float* The estimate of the reciprocal condition number of the matrix A after equilibration (if done). If rcond is less than the machine precision (in particular, if rcond = 0), the matrix is singular to working precision. This condition is indicated by a return code of info > 0.
mem_usage (output) mem_usage_t* Record the memory usage statistics, consisting of following fields:
stat (output) SuperLUStat_t* Record the statistics on runtime and floating-point operation count. See slu_util.h for the definition of 'SuperLUStat_t'.
info (output) int* = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value > 0: if info = i, and i is <= A->ncol: number of zero pivots. They are replaced by small entries due to options->ILU_FillTol. = A->ncol+1: U is nonsingular, but RCOND is less than machine precision, meaning that the matrix is singular to working precision. Nevertheless, the solution and error bounds are computed because there are a number of situations where the computed solution can be more accurate than the value of RCOND would suggest. > A->ncol+1: number of bytes allocated when memory allocation failure occurred, plus A->ncol.
void cgsitrf | ( | superlu_options_t * | options, |
SuperMatrix * | A, | ||
int | relax, | ||
int | panel_size, | ||
int * | etree, | ||
void * | work, | ||
int | lwork, | ||
int * | perm_c, | ||
int * | perm_r, | ||
SuperMatrix * | L, | ||
SuperMatrix * | U, | ||
GlobalLU_t * | Glu, | ||
SuperLUStat_t * | stat, | ||
int * | info | ||
) |
Purpose
CGSITRF computes an ILU factorization of a general sparse m-by-n matrix A using partial pivoting with row interchanges. The factorization has the form Pr * A = L * U where Pr is a row permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if A->nrow > A->ncol), and U is upper triangular (upper trapezoidal if A->nrow < A->ncol).
See supermatrix.h for the definition of 'SuperMatrix' structure.
Arguments
options (input) superlu_options_t* The structure defines the input parameters to control how the ILU decomposition will be performed.
A (input) SuperMatrix* Original matrix A, permuted by columns, of dimension (A->nrow, A->ncol). The type of A can be: Stype = SLU_NCP; Dtype = SLU_C; Mtype = SLU_GE.
relax (input) int To control degree of relaxing supernodes. If the number of nodes (columns) in a subtree of the elimination tree is less than relax, this subtree is considered as one supernode, regardless of the row structures of those columns.
panel_size (input) int A panel consists of at most panel_size consecutive columns.
etree (input) int*, dimension (A->ncol) Elimination tree of A'*A. Note: etree is a vector of parent pointers for a forest whose vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol. On input, the columns of A should be permuted so that the etree is in a certain postorder.
work (input/output) void*, size (lwork) (in bytes) User-supplied work space and space for the output data structures. Not referenced if lwork = 0;
lwork (input) int Specifies the size of work array in bytes. = 0: allocate space internally by system malloc; > 0: use user-supplied work array of length lwork in bytes, returns error if space runs out. = -1: the routine guesses the amount of space needed without performing the factorization, and returns it in *info; no other side effects.
perm_c (input) int*, dimension (A->ncol) Column permutation vector, which defines the permutation matrix Pc; perm_c[i] = j means column i of A is in position j in A*Pc. When searching for diagonal, perm_c[*] is applied to the row subscripts of A, so that diagonal threshold pivoting can find the diagonal of A, rather than that of A*Pc.
perm_r (input/output) int*, dimension (A->nrow) Row permutation vector which defines the permutation matrix Pr, perm_r[i] = j means row i of A is in position j in Pr*A. If options->Fact = SamePattern_SameRowPerm, the pivoting routine will try to use the input perm_r, unless a certain threshold criterion is violated. In that case, perm_r is overwritten by a new permutation determined by partial pivoting or diagonal threshold pivoting. Otherwise, perm_r is output argument;
L (output) SuperMatrix* The factor L from the factorization Pr*A=L*U; use compressed row subscripts storage for supernodes, i.e., L has type: Stype = SLU_SC, Dtype = SLU_C, Mtype = SLU_TRLU.
U (output) SuperMatrix* The factor U from the factorization Pr*A*Pc=L*U. Use column-wise storage scheme, i.e., U has types: Stype = SLU_NC, Dtype = SLU_C, Mtype = SLU_TRU.
Glu (input/output) GlobalLU_t * If options->Fact == SamePattern_SameRowPerm, it is an input; The matrix A will be factorized assuming that a factorization of a matrix with the same sparsity pattern and similar numerical values was performed prior to this one. Therefore, this factorization will reuse both row and column scaling factors R and C, both row and column permutation vectors perm_r and perm_c, and the L & U data structures set up from the previous factorization. Otherwise, it is an output.
stat (output) SuperLUStat_t* Record the statistics on runtime and floating-point operation count. See slu_util.h for the definition of 'SuperLUStat_t'.
info (output) int* = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value > 0: if info = i, and i is <= A->ncol: number of zero pivots. They are replaced by small entries according to options->ILU_FillTol. > A->ncol: number of bytes allocated when memory allocation failure occurred, plus A->ncol. If lwork = -1, it is the estimated amount of space needed, plus A->ncol.
Local Working Arrays:
m = number of rows in the matrix n = number of columns in the matrix
marker[0:3*m-1]: marker[i] = j means that node i has been reached when working on column j. Storage: relative to original row subscripts NOTE: There are 4 of them: marker/marker1 are used for panel dfs, see (ilu_)dpanel_dfs.c; marker2 is used for inner-factorization, see (ilu)_dcolumn_dfs.c; marker_relax(has its own space) is used for relaxed supernodes.
parent[0:m-1]: parent vector used during dfs Storage: relative to new row subscripts
xplore[0:m-1]: xplore[i] gives the location of the next (dfs) unexplored neighbor of i in lsub[*]
segrep[0:nseg-1]: contains the list of supernodal representatives in topological order of the dfs. A supernode representative is the last column of a supernode. The maximum size of segrep[] is n.
repfnz[0:W*m-1]: for a nonzero segment U[*,j] that ends at a supernodal representative r, repfnz[r] is the location of the first nonzero in this segment. It is also used during the dfs: repfnz[r]>0 indicates the supernode r has been explored. NOTE: There are W of them, each used for one column of a panel.
panel_lsub[0:W*m-1]: temporary for the nonzeros row indices below the panel diagonal. These are filled in during dpanel_dfs(), and are used later in the inner LU factorization within the panel. panel_lsub[]/dense[] pair forms the SPA data structure. NOTE: There are W of them.
dense[0:W*m-1]: sparse accumulating (SPA) vector for intermediate values; NOTE: there are W of them.
tempv[0:*]: real temporary used for dense numeric kernels; The size of this array is defined by NUM_TEMPV() in slu_util.h. It is also used by the dropping routine ilu_ddrop_row().
void cgsrfs | ( | trans_t | trans, |
SuperMatrix * | A, | ||
SuperMatrix * | L, | ||
SuperMatrix * | U, | ||
int * | perm_c, | ||
int * | perm_r, | ||
char * | equed, | ||
float * | R, | ||
float * | C, | ||
SuperMatrix * | B, | ||
SuperMatrix * | X, | ||
float * | ferr, | ||
float * | berr, | ||
SuperLUStat_t * | stat, | ||
int * | info | ||
) |
Purpose
CGSRFS improves the computed solution to a system of linear equations and provides error bounds and backward error estimates for the solution.
If equilibration was performed, the system becomes: (diag(R)*A_original*diag(C)) * X = diag(R)*B_original.
See supermatrix.h for the definition of 'SuperMatrix' structure.
Arguments
trans (input) trans_t Specifies the form of the system of equations: = NOTRANS: A * X = B (No transpose) = TRANS: A'* X = B (Transpose) = CONJ: A**H * X = B (Conjugate transpose)
A (input) SuperMatrix* The original matrix A in the system, or the scaled A if equilibration was done. The type of A can be: Stype = SLU_NC, Dtype = SLU_C, Mtype = SLU_GE.
L (input) SuperMatrix* The factor L from the factorization Pr*A*Pc=L*U. Use compressed row subscripts storage for supernodes, i.e., L has types: Stype = SLU_SC, Dtype = SLU_C, Mtype = SLU_TRLU.
U (input) SuperMatrix* The factor U from the factorization Pr*A*Pc=L*U as computed by cgstrf(). Use column-wise storage scheme, i.e., U has types: Stype = SLU_NC, Dtype = SLU_C, Mtype = SLU_TRU.
perm_c (input) int*, dimension (A->ncol) Column permutation vector, which defines the permutation matrix Pc; perm_c[i] = j means column i of A is in position j in A*Pc.
perm_r (input) int*, dimension (A->nrow) Row permutation vector, which defines the permutation matrix Pr; perm_r[i] = j means row i of A is in position j in Pr*A.
equed (input) Specifies the form of equilibration that was done. = 'N': No equilibration. = 'R': Row equilibration, i.e., A was premultiplied by diag(R). = 'C': Column equilibration, i.e., A was postmultiplied by diag(C). = 'B': Both row and column equilibration, i.e., A was replaced by diag(R)*A*diag(C).
R (input) float*, dimension (A->nrow) The row scale factors for A. If equed = 'R' or 'B', A is premultiplied by diag(R). If equed = 'N' or 'C', R is not accessed.
C (input) float*, dimension (A->ncol) The column scale factors for A. If equed = 'C' or 'B', A is postmultiplied by diag(C). If equed = 'N' or 'R', C is not accessed.
B (input) SuperMatrix* B has types: Stype = SLU_DN, Dtype = SLU_C, Mtype = SLU_GE. The right hand side matrix B. if equed = 'R' or 'B', B is premultiplied by diag(R).
X (input/output) SuperMatrix* X has types: Stype = SLU_DN, Dtype = SLU_C, Mtype = SLU_GE. On entry, the solution matrix X, as computed by cgstrs(). On exit, the improved solution matrix X. if *equed = 'C' or 'B', X should be premultiplied by diag(C) in order to obtain the solution to the original system.
FERR (output) float*, dimension (B->ncol) The estimated forward error bound for each solution vector X(j) (the j-th column of the solution matrix X). If XTRUE is the true solution corresponding to X(j), FERR(j) is an estimated upper bound for the magnitude of the largest element in (X(j) - XTRUE) divided by the magnitude of the largest element in X(j). The estimate is as reliable as the estimate for RCOND, and is almost always a slight overestimate of the true error.
BERR (output) float*, dimension (B->ncol) The componentwise relative backward error of each solution vector X(j) (i.e., the smallest relative change in any element of A or B that makes X(j) an exact solution).
stat (output) SuperLUStat_t* Record the statistics on runtime and floating-point operation count. See util.h for the definition of 'SuperLUStat_t'.
info (output) int* = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value
Internal Parameters
ITMAX is the maximum number of steps of iterative refinement.
void cgssv | ( | superlu_options_t * | options, |
SuperMatrix * | A, | ||
int * | perm_c, | ||
int * | perm_r, | ||
SuperMatrix * | L, | ||
SuperMatrix * | U, | ||
SuperMatrix * | B, | ||
SuperLUStat_t * | stat, | ||
int * | info | ||
) |
Purpose
CGSSV solves the system of linear equations A*X=B, using the LU factorization from CGSTRF. It performs the following steps:
1. If A is stored column-wise (A->Stype = SLU_NC):
1.1. Permute the columns of A, forming A*Pc, where Pc is a permutation matrix. For more details of this step, see sp_preorder.c.
1.2. Factor A as Pr*A*Pc=L*U with the permutation Pr determined by Gaussian elimination with partial pivoting. L is unit lower triangular with offdiagonal entries bounded by 1 in magnitude, and U is upper triangular.
1.3. Solve the system of equations A*X=B using the factored form of A.
2. If A is stored row-wise (A->Stype = SLU_NR), apply the above algorithm to the transpose of A:
2.1. Permute columns of transpose(A) (rows of A), forming transpose(A)*Pc, where Pc is a permutation matrix. For more details of this step, see sp_preorder.c.
2.2. Factor A as Pr*transpose(A)*Pc=L*U with the permutation Pr determined by Gaussian elimination with partial pivoting. L is unit lower triangular with offdiagonal entries bounded by 1 in magnitude, and U is upper triangular.
2.3. Solve the system of equations A*X=B using the factored form of A.
See supermatrix.h for the definition of 'SuperMatrix' structure.
Arguments
options (input) superlu_options_t* The structure defines the input parameters to control how the LU decomposition will be performed and how the system will be solved.
A (input) SuperMatrix* Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number of linear equations is A->nrow. Currently, the type of A can be: Stype = SLU_NC or SLU_NR; Dtype = SLU_C; Mtype = SLU_GE. In the future, more general A may be handled.
perm_c (input/output) int* If A->Stype = SLU_NC, column permutation vector of size A->ncol which defines the permutation matrix Pc; perm_c[i] = j means column i of A is in position j in A*Pc. If A->Stype = SLU_NR, column permutation vector of size A->nrow which describes permutation of columns of transpose(A) (rows of A) as described above.
If options->ColPerm = MY_PERMC or options->Fact = SamePattern or options->Fact = SamePattern_SameRowPerm, it is an input argument. On exit, perm_c may be overwritten by the product of the input perm_c and a permutation that postorders the elimination tree of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree is already in postorder. Otherwise, it is an output argument.
perm_r (input/output) int* If A->Stype = SLU_NC, row permutation vector of size A->nrow, which defines the permutation matrix Pr, and is determined by partial pivoting. perm_r[i] = j means row i of A is in position j in Pr*A. If A->Stype = SLU_NR, permutation vector of size A->ncol, which determines permutation of rows of transpose(A) (columns of A) as described above.
If options->RowPerm = MY_PERMR or options->Fact = SamePattern_SameRowPerm, perm_r is an input argument. otherwise it is an output argument.
L (output) SuperMatrix* The factor L from the factorization Pr*A*Pc=L*U (if A->Stype = SLU_NC) or Pr*transpose(A)*Pc=L*U (if A->Stype = SLU_NR). Uses compressed row subscripts storage for supernodes, i.e., L has types: Stype = SLU_SC, Dtype = SLU_C, Mtype = SLU_TRLU.
U (output) SuperMatrix* The factor U from the factorization Pr*A*Pc=L*U (if A->Stype = SLU_NC) or Pr*transpose(A)*Pc=L*U (if A->Stype = SLU_NR). Uses column-wise storage scheme, i.e., U has types: Stype = SLU_NC, Dtype = SLU_C, Mtype = SLU_TRU.
B (input/output) SuperMatrix* B has types: Stype = SLU_DN, Dtype = SLU_C, Mtype = SLU_GE. On entry, the right hand side matrix. On exit, the solution matrix if info = 0;
stat (output) SuperLUStat_t* Record the statistics on runtime and floating-point operation count. See util.h for the definition of 'SuperLUStat_t'.
info (output) int* = 0: successful exit > 0: if info = i, and i is <= A->ncol: U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, so the solution could not be computed. > A->ncol: number of bytes allocated when memory allocation failure occurred, plus A->ncol.
void cgssvx | ( | superlu_options_t * | options, |
SuperMatrix * | A, | ||
int * | perm_c, | ||
int * | perm_r, | ||
int * | etree, | ||
char * | equed, | ||
float * | R, | ||
float * | C, | ||
SuperMatrix * | L, | ||
SuperMatrix * | U, | ||
void * | work, | ||
int | lwork, | ||
SuperMatrix * | B, | ||
SuperMatrix * | X, | ||
float * | recip_pivot_growth, | ||
float * | rcond, | ||
float * | ferr, | ||
float * | berr, | ||
GlobalLU_t * | Glu, | ||
mem_usage_t * | mem_usage, | ||
SuperLUStat_t * | stat, | ||
int * | info | ||
) |
Purpose
CGSSVX solves the system of linear equations A*X=B or A'*X=B, using the LU factorization from cgstrf(). Error bounds on the solution and a condition estimate are also provided. It performs the following steps:
1. If A is stored column-wise (A->Stype = SLU_NC):
1.1. If options->Equil = YES, scaling factors are computed to equilibrate the system: options->Trans = NOTRANS: diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B options->Trans = TRANS: (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B options->Trans = CONJ: (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B Whether or not the system will be equilibrated depends on the scaling of the matrix A, but if equilibration is used, A is overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if options->Trans=NOTRANS) or diag(C)*B (if options->Trans = TRANS or CONJ).
1.2. Permute columns of A, forming A*Pc, where Pc is a permutation matrix that usually preserves sparsity. For more details of this step, see sp_preorder.c.
1.3. If options->Fact != FACTORED, the LU decomposition is used to factor the matrix A (after equilibration if options->Equil = YES) as Pr*A*Pc = L*U, with Pr determined by partial pivoting.
1.4. Compute the reciprocal pivot growth factor.
1.5. If some U(i,i) = 0, so that U is exactly singular, then the routine returns with info = i. Otherwise, the factored form of A is used to estimate the condition number of the matrix A. If the reciprocal of the condition number is less than machine precision, info = A->ncol+1 is returned as a warning, but the routine still goes on to solve for X and computes error bounds as described below.
1.6. The system of equations is solved for X using the factored form of A.
1.7. If options->IterRefine != NOREFINE, iterative refinement is applied to improve the computed solution matrix and calculate error bounds and backward error estimates for it.
1.8. If equilibration was used, the matrix X is premultiplied by diag(C) (if options->Trans = NOTRANS) or diag(R) (if options->Trans = TRANS or CONJ) so that it solves the original system before equilibration.
2. If A is stored row-wise (A->Stype = SLU_NR), apply the above algorithm to the transpose of A:
2.1. If options->Equil = YES, scaling factors are computed to equilibrate the system: options->Trans = NOTRANS: diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B options->Trans = TRANS: (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B options->Trans = CONJ: (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B Whether or not the system will be equilibrated depends on the scaling of the matrix A, but if equilibration is used, A' is overwritten by diag(R)*A'*diag(C) and B by diag(R)*B (if trans='N') or diag(C)*B (if trans = 'T' or 'C').
2.2. Permute columns of transpose(A) (rows of A), forming transpose(A)*Pc, where Pc is a permutation matrix that usually preserves sparsity. For more details of this step, see sp_preorder.c.
2.3. If options->Fact != FACTORED, the LU decomposition is used to factor the transpose(A) (after equilibration if options->Fact = YES) as Pr*transpose(A)*Pc = L*U with the permutation Pr determined by partial pivoting.
2.4. Compute the reciprocal pivot growth factor.
2.5. If some U(i,i) = 0, so that U is exactly singular, then the routine returns with info = i. Otherwise, the factored form of transpose(A) is used to estimate the condition number of the matrix A. If the reciprocal of the condition number is less than machine precision, info = A->nrow+1 is returned as a warning, but the routine still goes on to solve for X and computes error bounds as described below.
2.6. The system of equations is solved for X using the factored form of transpose(A).
2.7. If options->IterRefine != NOREFINE, iterative refinement is applied to improve the computed solution matrix and calculate error bounds and backward error estimates for it.
2.8. If equilibration was used, the matrix X is premultiplied by diag(C) (if options->Trans = NOTRANS) or diag(R) (if options->Trans = TRANS or CONJ) so that it solves the original system before equilibration.
See supermatrix.h for the definition of 'SuperMatrix' structure.
Arguments
options (input) superlu_options_t* The structure defines the input parameters to control how the LU decomposition will be performed and how the system will be solved.
A (input/output) SuperMatrix* Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number of the linear equations is A->nrow. Currently, the type of A can be: Stype = SLU_NC or SLU_NR, Dtype = SLU_D, Mtype = SLU_GE. In the future, more general A may be handled.
On entry, If options->Fact = FACTORED and equed is not 'N', then A must have been equilibrated by the scaling factors in R and/or C. On exit, A is not modified if options->Equil = NO, or if options->Equil = YES but equed = 'N' on exit. Otherwise, if options->Equil = YES and equed is not 'N', A is scaled as follows: If A->Stype = SLU_NC: equed = 'R': A := diag(R) * A equed = 'C': A := A * diag(C) equed = 'B': A := diag(R) * A * diag(C). If A->Stype = SLU_NR: equed = 'R': transpose(A) := diag(R) * transpose(A) equed = 'C': transpose(A) := transpose(A) * diag(C) equed = 'B': transpose(A) := diag(R) * transpose(A) * diag(C).
perm_c (input/output) int* If A->Stype = SLU_NC, Column permutation vector of size A->ncol, which defines the permutation matrix Pc; perm_c[i] = j means column i of A is in position j in A*Pc. On exit, perm_c may be overwritten by the product of the input perm_c and a permutation that postorders the elimination tree of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree is already in postorder.
If A->Stype = SLU_NR, column permutation vector of size A->nrow, which describes permutation of columns of transpose(A) (rows of A) as described above.
perm_r (input/output) int* If A->Stype = SLU_NC, row permutation vector of size A->nrow, which defines the permutation matrix Pr, and is determined by partial pivoting. perm_r[i] = j means row i of A is in position j in Pr*A.
If A->Stype = SLU_NR, permutation vector of size A->ncol, which determines permutation of rows of transpose(A) (columns of A) as described above.
If options->Fact = SamePattern_SameRowPerm, the pivoting routine will try to use the input perm_r, unless a certain threshold criterion is violated. In that case, perm_r is overwritten by a new permutation determined by partial pivoting or diagonal threshold pivoting. Otherwise, perm_r is output argument.
etree (input/output) int*, dimension (A->ncol) Elimination tree of Pc'*A'*A*Pc. If options->Fact != FACTORED and options->Fact != DOFACT, etree is an input argument, otherwise it is an output argument. Note: etree is a vector of parent pointers for a forest whose vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
equed (input/output) char* Specifies the form of equilibration that was done. = 'N': No equilibration. = 'R': Row equilibration, i.e., A was premultiplied by diag(R). = 'C': Column equilibration, i.e., A was postmultiplied by diag(C). = 'B': Both row and column equilibration, i.e., A was replaced by diag(R)*A*diag(C). If options->Fact = FACTORED, equed is an input argument, otherwise it is an output argument.
R (input/output) float*, dimension (A->nrow) The row scale factors for A or transpose(A). If equed = 'R' or 'B', A (if A->Stype = SLU_NC) or transpose(A) (if A->Stype = SLU_NR) is multiplied on the left by diag(R). If equed = 'N' or 'C', R is not accessed. If options->Fact = FACTORED, R is an input argument, otherwise, R is output. If options->zFact = FACTORED and equed = 'R' or 'B', each element of R must be positive.
C (input/output) float*, dimension (A->ncol) The column scale factors for A or transpose(A). If equed = 'C' or 'B', A (if A->Stype = SLU_NC) or transpose(A) (if A->Stype = SLU_NR) is multiplied on the right by diag(C). If equed = 'N' or 'R', C is not accessed. If options->Fact = FACTORED, C is an input argument, otherwise, C is output. If options->Fact = FACTORED and equed = 'C' or 'B', each element of C must be positive.
L (output) SuperMatrix* The factor L from the factorization Pr*A*Pc=L*U (if A->Stype SLU_= NC) or Pr*transpose(A)*Pc=L*U (if A->Stype = SLU_NR). Uses compressed row subscripts storage for supernodes, i.e., L has types: Stype = SLU_SC, Dtype = SLU_C, Mtype = SLU_TRLU.
U (output) SuperMatrix* The factor U from the factorization Pr*A*Pc=L*U (if A->Stype = SLU_NC) or Pr*transpose(A)*Pc=L*U (if A->Stype = SLU_NR). Uses column-wise storage scheme, i.e., U has types: Stype = SLU_NC, Dtype = SLU_C, Mtype = SLU_TRU.
work (workspace/output) void*, size (lwork) (in bytes) User supplied workspace, should be large enough to hold data structures for factors L and U. On exit, if fact is not 'F', L and U point to this array.
lwork (input) int Specifies the size of work array in bytes. = 0: allocate space internally by system malloc; > 0: use user-supplied work array of length lwork in bytes, returns error if space runs out. = -1: the routine guesses the amount of space needed without performing the factorization, and returns it in mem_usage->total_needed; no other side effects.
See argument 'mem_usage' for memory usage statistics.
B (input/output) SuperMatrix* B has types: Stype = SLU_DN, Dtype = SLU_C, Mtype = SLU_GE. On entry, the right hand side matrix. If B->ncol = 0, only LU decomposition is performed, the triangular solve is skipped. On exit, if equed = 'N', B is not modified; otherwise if A->Stype = SLU_NC: if options->Trans = NOTRANS and equed = 'R' or 'B', B is overwritten by diag(R)*B; if options->Trans = TRANS or CONJ and equed = 'C' of 'B', B is overwritten by diag(C)*B; if A->Stype = SLU_NR: if options->Trans = NOTRANS and equed = 'C' or 'B', B is overwritten by diag(C)*B; if options->Trans = TRANS or CONJ and equed = 'R' of 'B', B is overwritten by diag(R)*B.
X (output) SuperMatrix* X has types: Stype = SLU_DN, Dtype = SLU_C, Mtype = SLU_GE. If info = 0 or info = A->ncol+1, X contains the solution matrix to the original system of equations. Note that A and B are modified on exit if equed is not 'N', and the solution to the equilibrated system is inv(diag(C))*X if options->Trans = NOTRANS and equed = 'C' or 'B', or inv(diag(R))*X if options->Trans = 'T' or 'C' and equed = 'R' or 'B'.
recip_pivot_growth (output) float* The reciprocal pivot growth factor max_j( norm(A_j)/norm(U_j) ). The infinity norm is used. If recip_pivot_growth is much less than 1, the stability of the LU factorization could be poor.
rcond (output) float* The estimate of the reciprocal condition number of the matrix A after equilibration (if done). If rcond is less than the machine precision (in particular, if rcond = 0), the matrix is singular to working precision. This condition is indicated by a return code of info > 0.
FERR (output) float*, dimension (B->ncol) The estimated forward error bound for each solution vector X(j) (the j-th column of the solution matrix X). If XTRUE is the true solution corresponding to X(j), FERR(j) is an estimated upper bound for the magnitude of the largest element in (X(j) - XTRUE) divided by the magnitude of the largest element in X(j). The estimate is as reliable as the estimate for RCOND, and is almost always a slight overestimate of the true error. If options->IterRefine = NOREFINE, ferr = 1.0.
BERR (output) float*, dimension (B->ncol) The componentwise relative backward error of each solution vector X(j) (i.e., the smallest relative change in any element of A or B that makes X(j) an exact solution). If options->IterRefine = NOREFINE, berr = 1.0.
Glu (input/output) GlobalLU_t * If options->Fact == SamePattern_SameRowPerm, it is an input; The matrix A will be factorized assuming that a factorization of a matrix with the same sparsity pattern and similar numerical values was performed prior to this one. Therefore, this factorization will reuse both row and column scaling factors R and C, both row and column permutation vectors perm_r and perm_c, and the L & U data structures set up from the previous factorization. Otherwise, it is an output.
mem_usage (output) mem_usage_t* Record the memory usage statistics, consisting of following fields:
stat (output) SuperLUStat_t* Record the statistics on runtime and floating-point operation count. See slu_util.h for the definition of 'SuperLUStat_t'.
info (output) int* = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value > 0: if info = i, and i is <= A->ncol: U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, so the solution and error bounds could not be computed. = A->ncol+1: U is nonsingular, but RCOND is less than machine precision, meaning that the matrix is singular to working precision. Nevertheless, the solution and error bounds are computed because there are a number of situations where the computed solution can be more accurate than the value of RCOND would suggest. > A->ncol+1: number of bytes allocated when memory allocation failure occurred, plus A->ncol.
void cgstrf | ( | superlu_options_t * | options, |
SuperMatrix * | A, | ||
int | relax, | ||
int | panel_size, | ||
int * | etree, | ||
void * | work, | ||
int | lwork, | ||
int * | perm_c, | ||
int * | perm_r, | ||
SuperMatrix * | L, | ||
SuperMatrix * | U, | ||
GlobalLU_t * | Glu, | ||
SuperLUStat_t * | stat, | ||
int * | info | ||
) |
Purpose
CGSTRF computes an LU factorization of a general sparse m-by-n matrix A using partial pivoting with row interchanges. The factorization has the form Pr * A = L * U where Pr is a row permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if A->nrow > A->ncol), and U is upper triangular (upper trapezoidal if A->nrow < A->ncol).
See supermatrix.h for the definition of 'SuperMatrix' structure.
Arguments
options (input) superlu_options_t* The structure defines the input parameters to control how the LU decomposition will be performed.
A (input) SuperMatrix* Original matrix A, permuted by columns, of dimension (A->nrow, A->ncol). The type of A can be: Stype = SLU_NCP; Dtype = SLU_C; Mtype = SLU_GE.
relax (input) int To control degree of relaxing supernodes. If the number of nodes (columns) in a subtree of the elimination tree is less than relax, this subtree is considered as one supernode, regardless of the row structures of those columns.
panel_size (input) int A panel consists of at most panel_size consecutive columns.
etree (input) int*, dimension (A->ncol) Elimination tree of A'*A. Note: etree is a vector of parent pointers for a forest whose vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol. On input, the columns of A should be permuted so that the etree is in a certain postorder.
work (input/output) void*, size (lwork) (in bytes) User-supplied work space and space for the output data structures. Not referenced if lwork = 0;
lwork (input) int Specifies the size of work array in bytes. = 0: allocate space internally by system malloc; > 0: use user-supplied work array of length lwork in bytes, returns error if space runs out. = -1: the routine guesses the amount of space needed without performing the factorization, and returns it in *info; no other side effects.
perm_c (input) int*, dimension (A->ncol) Column permutation vector, which defines the permutation matrix Pc; perm_c[i] = j means column i of A is in position j in A*Pc. When searching for diagonal, perm_c[*] is applied to the row subscripts of A, so that diagonal threshold pivoting can find the diagonal of A, rather than that of A*Pc.
perm_r (input/output) int*, dimension (A->nrow) Row permutation vector which defines the permutation matrix Pr, perm_r[i] = j means row i of A is in position j in Pr*A. If options->Fact == SamePattern_SameRowPerm, the pivoting routine will try to use the input perm_r, unless a certain threshold criterion is violated. In that case, perm_r is overwritten by a new permutation determined by partial pivoting or diagonal threshold pivoting. Otherwise, perm_r is output argument;
L (output) SuperMatrix* The factor L from the factorization Pr*A=L*U; use compressed row subscripts storage for supernodes, i.e., L has type: Stype = SLU_SC, Dtype = SLU_C, Mtype = SLU_TRLU.
U (output) SuperMatrix* The factor U from the factorization Pr*A*Pc=L*U. Use column-wise storage scheme, i.e., U has types: Stype = SLU_NC, Dtype = SLU_C, Mtype = SLU_TRU.
Glu (input/output) GlobalLU_t * If options->Fact == SamePattern_SameRowPerm, it is an input; The matrix A will be factorized assuming that a factorization of a matrix with the same sparsity pattern and similar numerical values was performed prior to this one. Therefore, this factorization will reuse both row and column scaling factors R and C, both row and column permutation vectors perm_r and perm_c, and the L & U data structures set up from the previous factorization. Otherwise, it is an output.
stat (output) SuperLUStat_t* Record the statistics on runtime and floating-point operation count. See slu_util.h for the definition of 'SuperLUStat_t'.
info (output) int* = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value > 0: if info = i, and i is <= A->ncol: U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations. > A->ncol: number of bytes allocated when memory allocation failure occurred, plus A->ncol. If lwork = -1, it is the estimated amount of space needed, plus A->ncol.
Local Working Arrays:
m = number of rows in the matrix n = number of columns in the matrix
xprune[0:n-1]: xprune[*] points to locations in subscript vector lsub[*]. For column i, xprune[i] denotes the point where structural pruning begins. I.e. only xlsub[i],..,xprune[i]-1 need to be traversed for symbolic factorization.
marker[0:3*m-1]: marker[i] = j means that node i has been reached when working on column j. Storage: relative to original row subscripts NOTE: There are 3 of them: marker/marker1 are used for panel dfs, see cpanel_dfs.c; marker2 is used for inner-factorization, see ccolumn_dfs.c.
parent[0:m-1]: parent vector used during dfs Storage: relative to new row subscripts
xplore[0:m-1]: xplore[i] gives the location of the next (dfs) unexplored neighbor of i in lsub[*]
segrep[0:nseg-1]: contains the list of supernodal representatives in topological order of the dfs. A supernode representative is the last column of a supernode. The maximum size of segrep[] is n.
repfnz[0:W*m-1]: for a nonzero segment U[*,j] that ends at a supernodal representative r, repfnz[r] is the location of the first nonzero in this segment. It is also used during the dfs: repfnz[r]>0 indicates the supernode r has been explored. NOTE: There are W of them, each used for one column of a panel.
panel_lsub[0:W*m-1]: temporary for the nonzeros row indices below the panel diagonal. These are filled in during cpanel_dfs(), and are used later in the inner LU factorization within the panel. panel_lsub[]/dense[] pair forms the SPA data structure. NOTE: There are W of them.
dense[0:W*m-1]: sparse accumulating (SPA) vector for intermediate values; NOTE: there are W of them.
tempv[0:*]: real temporary used for dense numeric kernels; The size of this array is defined by NUM_TEMPV() in slu_cdefs.h.
void cgstrs | ( | trans_t | trans, |
SuperMatrix * | L, | ||
SuperMatrix * | U, | ||
int * | perm_c, | ||
int * | perm_r, | ||
SuperMatrix * | B, | ||
SuperLUStat_t * | stat, | ||
int * | info | ||
) |
Purpose
CGSTRS solves a system of linear equations A*X=B or A'*X=B with A sparse and B dense, using the LU factorization computed by CGSTRF.
See supermatrix.h for the definition of 'SuperMatrix' structure.
Arguments
trans (input) trans_t Specifies the form of the system of equations: = NOTRANS: A * X = B (No transpose) = TRANS: A'* X = B (Transpose) = CONJ: A**H * X = B (Conjugate transpose)
L (input) SuperMatrix* The factor L from the factorization Pr*A*Pc=L*U as computed by cgstrf(). Use compressed row subscripts storage for supernodes, i.e., L has types: Stype = SLU_SC, Dtype = SLU_C, Mtype = SLU_TRLU.
U (input) SuperMatrix* The factor U from the factorization Pr*A*Pc=L*U as computed by cgstrf(). Use column-wise storage scheme, i.e., U has types: Stype = SLU_NC, Dtype = SLU_C, Mtype = SLU_TRU.
perm_c (input) int*, dimension (L->ncol) Column permutation vector, which defines the permutation matrix Pc; perm_c[i] = j means column i of A is in position j in A*Pc.
perm_r (input) int*, dimension (L->nrow) Row permutation vector, which defines the permutation matrix Pr; perm_r[i] = j means row i of A is in position j in Pr*A.
B (input/output) SuperMatrix* B has types: Stype = SLU_DN, Dtype = SLU_C, Mtype = SLU_GE. On entry, the right hand side matrix. On exit, the solution matrix if info = 0;
stat (output) SuperLUStat_t* Record the statistics on runtime and floating-point operation count. See util.h for the definition of 'SuperLUStat_t'.
info (output) int* = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value
void cinf_norm_error | ( | int | , |
SuperMatrix * | , | ||
complex * | |||
) |
void claqgs | ( | SuperMatrix * | A, |
float * | r, | ||
float * | c, | ||
float | rowcnd, | ||
float | colcnd, | ||
float | amax, | ||
char * | equed | ||
) |
Purpose
CLAQGS equilibrates a general sparse M by N matrix A using the row and scaling factors in the vectors R and C.
See supermatrix.h for the definition of 'SuperMatrix' structure.
Arguments
A (input/output) SuperMatrix* On exit, the equilibrated matrix. See EQUED for the form of the equilibrated matrix. The type of A can be: Stype = NC; Dtype = SLU_C; Mtype = GE.
R (input) float*, dimension (A->nrow) The row scale factors for A.
C (input) float*, dimension (A->ncol) The column scale factors for A.
ROWCND (input) float Ratio of the smallest R(i) to the largest R(i).
COLCND (input) float Ratio of the smallest C(i) to the largest C(i).
AMAX (input) float Absolute value of largest matrix entry.
EQUED (output) char* Specifies the form of equilibration that was done. = 'N': No equilibration = 'R': Row equilibration, i.e., A has been premultiplied by diag(R). = 'C': Column equilibration, i.e., A has been postmultiplied by diag(C). = 'B': Both row and column equilibration, i.e., A has been replaced by diag(R) * A * diag(C).
Internal Parameters
THRESH is a threshold value used to decide if row or column scaling should be done based on the ratio of the row or column scaling factors. If ROWCND < THRESH, row scaling is done, and if COLCND < THRESH, column scaling is done.
LARGE and SMALL are threshold values used to decide if row scaling should be done based on the absolute size of the largest matrix element. If AMAX > LARGE or AMAX < SMALL, row scaling is done.
int cldperm | ( | int | , |
int | , | ||
int | , | ||
int | [], | ||
int | [], | ||
complex | [], | ||
int | [], | ||
float | [], | ||
float | [] | ||
) |
int cLUMemInit | ( | fact_t | fact, |
void * | work, | ||
int | lwork, | ||
int | m, | ||
int | n, | ||
int | annz, | ||
int | panel_size, | ||
float | fill_ratio, | ||
SuperMatrix * | L, | ||
SuperMatrix * | U, | ||
GlobalLU_t * | Glu, | ||
int ** | iwork, | ||
complex ** | dwork | ||
) |
Memory-related.
For those unpredictable size, estimate as fill_ratio * nnz(A). Return value: If lwork = -1, return the estimated amount of space required, plus n; otherwise, return the amount of space actually allocated when memory allocation failure occurred.
int cLUMemXpand | ( | int | jcol, |
int | next, | ||
MemType | mem_type, | ||
int * | maxlen, | ||
GlobalLU_t * | Glu | ||
) |
Return value: 0 - successful return > 0 - number of bytes allocated when run out of space
void cLUWorkFree | ( | int * | , |
complex * | , | ||
GlobalLU_t * | |||
) |
int cmemory_usage | ( | const int | , |
const int | , | ||
const int | , | ||
const int | |||
) |
complex* complexCalloc | ( | int | ) |
complex* complexMalloc | ( | int | ) |
void countnz | ( | const int | , |
int * | , | ||
int * | , | ||
int * | , | ||
GlobalLU_t * | |||
) |
void cpanel_bmod | ( | const int | m, |
const int | w, | ||
const int | jcol, | ||
const int | nseg, | ||
complex * | dense, | ||
complex * | tempv, | ||
int * | segrep, | ||
int * | repfnz, | ||
GlobalLU_t * | Glu, | ||
SuperLUStat_t * | stat | ||
) |
Purpose
Performs numeric block updates (sup-panel) in topological order. It features: col-col, 2cols-col, 3cols-col, and sup-col updates. Special processing on the supernodal portion of L[*,j]
Before entering this routine, the original nonzeros in the panel were already copied into the spa[m,w].
Updated/Output parameters- dense[0:m-1,w]: L[*,j:j+w-1] and U[*,j:j+w-1] are returned collectively in the m-by-w vector dense[*].
void cpanel_dfs | ( | const int | m, |
const int | w, | ||
const int | jcol, | ||
SuperMatrix * | A, | ||
int * | perm_r, | ||
int * | nseg, | ||
complex * | dense, | ||
int * | panel_lsub, | ||
int * | segrep, | ||
int * | repfnz, | ||
int * | xprune, | ||
int * | marker, | ||
int * | parent, | ||
int * | xplore, | ||
GlobalLU_t * | Glu | ||
) |
Purpose
Performs a symbolic factorization on a panel of columns [jcol, jcol+w).
A supernode representative is the last column of a supernode. The nonzeros in U[*,j] are segments that end at supernodal representatives.
The routine returns one list of the supernodal representatives in topological order of the dfs that generates them. This list is a superset of the topological order of each individual column within the panel. The location of the first nonzero in each supernodal segment (supernodal entry location) is also returned. Each column has a separate list for this purpose.
Two marker arrays are used for dfs: marker[i] == jj, if i was visited during dfs of current column jj; marker1[i] >= jcol, if i was visited by earlier columns in this panel;
marker: A-row –> A-row/col (0/1) repfnz: SuperA-col –> PA-row parent: SuperA-col –> SuperA-col xplore: SuperA-col –> index to L-structure
float cPivotGrowth | ( | int | ncols, |
SuperMatrix * | A, | ||
int * | perm_c, | ||
SuperMatrix * | L, | ||
SuperMatrix * | U | ||
) |
Purpose
Compute the reciprocal pivot growth factor of the leading ncols columns of the matrix, using the formula: min_j ( max_i(abs(A_ij)) / max_i(abs(U_ij)) )
Arguments
ncols (input) int The number of columns of matrices A, L and U.
A (input) SuperMatrix* Original matrix A, permuted by columns, of dimension (A->nrow, A->ncol). The type of A can be: Stype = NC; Dtype = SLU_C; Mtype = GE.
L (output) SuperMatrix* The factor L from the factorization Pr*A=L*U; use compressed row subscripts storage for supernodes, i.e., L has type: Stype = SC; Dtype = SLU_C; Mtype = TRLU.
U (output) SuperMatrix* The factor U from the factorization Pr*A*Pc=L*U. Use column-wise storage scheme, i.e., U has types: Stype = NC; Dtype = SLU_C; Mtype = TRU.
int cpivotL | ( | const int | jcol, |
const double | u, | ||
int * | usepr, | ||
int * | perm_r, | ||
int * | iperm_r, | ||
int * | iperm_c, | ||
int * | pivrow, | ||
GlobalLU_t * | Glu, | ||
SuperLUStat_t * | stat | ||
) |
Purpose
Performs the numerical pivoting on the current column of L, and the CDIV operation.
Pivot policy: (1) Compute thresh = u * max_(i>=j) abs(A_ij); (2) IF user specifies pivot row k and abs(A_kj) >= thresh THEN pivot row = k; ELSE IF abs(A_jj) >= thresh THEN pivot row = j; ELSE pivot row = m;
Note: If you absolutely want to use a given pivot order, then set u=0.0.
Return value: 0 success; i > 0 U(i,i) is exactly zero.
void cPrint_CompCol_Matrix | ( | char * | , |
SuperMatrix * | |||
) |
void cPrint_Dense_Matrix | ( | char * | , |
SuperMatrix * | |||
) |
void cprint_lu_col | ( | char * | , |
int | , | ||
int | , | ||
int * | , | ||
GlobalLU_t * | |||
) |
void cPrint_SuperNode_Matrix | ( | char * | , |
SuperMatrix * | |||
) |
void cpruneL | ( | const int | jcol, |
const int * | perm_r, | ||
const int | pivrow, | ||
const int | nseg, | ||
const int * | segrep, | ||
const int * | repfnz, | ||
int * | xprune, | ||
GlobalLU_t * | Glu | ||
) |
Purpose
Prunes the L-structure of supernodes whose L-structure contains the current pivot row "pivrow"
int cQuerySpace | ( | SuperMatrix * | L, |
SuperMatrix * | U, | ||
mem_usage_t * | mem_usage | ||
) |
mem_usage consists of the following fields:
void creadhb | ( | FILE * | , |
int * | , | ||
int * | , | ||
int * | , | ||
complex ** | , | ||
int ** | , | ||
int ** | |||
) |
void creadmt | ( | int * | , |
int * | , | ||
int * | , | ||
complex ** | , | ||
int ** | , | ||
int ** | |||
) |
void creadrb | ( | int * | , |
int * | , | ||
int * | , | ||
complex ** | , | ||
int ** | , | ||
int ** | |||
) |
void creadtriple | ( | int * | , |
int * | , | ||
int * | , | ||
complex ** | , | ||
int ** | , | ||
int ** | |||
) |
int csnode_bmod | ( | const int | , |
const int | , | ||
const int | , | ||
complex * | , | ||
complex * | , | ||
GlobalLU_t * | , | ||
SuperLUStat_t * | |||
) |
int csnode_dfs | ( | const int | jcol, |
const int | kcol, | ||
const int * | asub, | ||
const int * | xa_begin, | ||
const int * | xa_end, | ||
int * | xprune, | ||
int * | marker, | ||
GlobalLU_t * | Glu | ||
) |
Purpose
csnode_dfs() - Determine the union of the row structures of those columns within the relaxed snode. Note: The relaxed snodes are leaves of the supernodal etree, therefore, the portion outside the rectangular supernode must be zero.
Return value
0 success; >0 number of bytes allocated when run out of memory.
int ctrsm_ | ( | char * | , |
char * | , | ||
char * | , | ||
char * | , | ||
int * | , | ||
int * | , | ||
complex * | , | ||
complex * | , | ||
int * | , | ||
complex * | , | ||
int * | |||
) |
void fixupL | ( | const int | , |
const int * | , | ||
GlobalLU_t * | |||
) |
float* floatCalloc | ( | int | ) |
float* floatMalloc | ( | int | ) |
int ilu_ccolumn_dfs | ( | const int | m, |
const int | jcol, | ||
int * | perm_r, | ||
int * | nseg, | ||
int * | lsub_col, | ||
int * | segrep, | ||
int * | repfnz, | ||
int * | marker, | ||
int * | parent, | ||
int * | xplore, | ||
GlobalLU_t * | Glu | ||
) |
Purpose
ILU_CCOLUMN_DFS performs a symbolic factorization on column jcol, and decide the supernode boundary.
This routine does not use numeric values, but only use the RHS row indices to start the dfs.
A supernode representative is the last column of a supernode. The nonzeros in U[*,j] are segments that end at supernodal representatives. The routine returns a list of such supernodal representatives in topological order of the dfs that generates them. The location of the first nonzero in each such supernodal segment (supernodal entry location) is also returned.
Local parameters
nseg: no of segments in current U[*,j] jsuper: jsuper=EMPTY if column j does not belong to the same supernode as j-1. Otherwise, jsuper=nsuper.
marker2: A-row –> A-row/col (0/1) repfnz: SuperA-col –> PA-row parent: SuperA-col –> SuperA-col xplore: SuperA-col –> index to L-structure
Return value
0 success;0 number of bytes allocated when run out of space.
int ilu_ccopy_to_ucol | ( | int | , |
int | , | ||
int * | , | ||
int * | , | ||
int * | , | ||
complex * | , | ||
int | , | ||
milu_t | , | ||
double | , | ||
int | , | ||
complex * | , | ||
int * | , | ||
GlobalLU_t * | , | ||
float * | |||
) |
int ilu_cdrop_row | ( | superlu_options_t * | , |
int | , | ||
int | , | ||
double | , | ||
int | , | ||
int * | , | ||
double * | , | ||
GlobalLU_t * | , | ||
float * | , | ||
float * | , | ||
int | |||
) |
void ilu_countnz | ( | const int | , |
int * | , | ||
int * | , | ||
GlobalLU_t * | |||
) |
void ilu_cpanel_dfs | ( | const int | m, |
const int | w, | ||
const int | jcol, | ||
SuperMatrix * | A, | ||
int * | perm_r, | ||
int * | nseg, | ||
complex * | dense, | ||
float * | amax, | ||
int * | panel_lsub, | ||
int * | segrep, | ||
int * | repfnz, | ||
int * | marker, | ||
int * | parent, | ||
int * | xplore, | ||
GlobalLU_t * | Glu | ||
) |
Purpose
Performs a symbolic factorization on a panel of columns [jcol, jcol+w).
A supernode representative is the last column of a supernode. The nonzeros in U[*,j] are segments that end at supernodal representatives.
The routine returns one list of the supernodal representatives in topological order of the dfs that generates them. This list is a superset of the topological order of each individual column within the panel. The location of the first nonzero in each supernodal segment (supernodal entry location) is also returned. Each column has a separate list for this purpose.
Two marker arrays are used for dfs: marker[i] == jj, if i was visited during dfs of current column jj; marker1[i] >= jcol, if i was visited by earlier columns in this panel;
marker: A-row –> A-row/col (0/1) repfnz: SuperA-col –> PA-row parent: SuperA-col –> SuperA-col xplore: SuperA-col –> index to L-structure
int ilu_cpivotL | ( | const int | jcol, |
const double | u, | ||
int * | usepr, | ||
int * | perm_r, | ||
int | diagind, | ||
int * | swap, | ||
int * | iswap, | ||
int * | marker, | ||
int * | pivrow, | ||
double | fill_tol, | ||
milu_t | milu, | ||
complex | drop_sum, | ||
GlobalLU_t * | Glu, | ||
SuperLUStat_t * | stat | ||
) |
Purpose
Performs the numerical pivoting on the current column of L, and the CDIV operation.
Pivot policy: (1) Compute thresh = u * max_(i>=j) abs(A_ij); (2) IF user specifies pivot row k and abs(A_kj) >= thresh THEN pivot row = k; ELSE IF abs(A_jj) >= thresh THEN pivot row = j; ELSE pivot row = m;
Note: If you absolutely want to use a given pivot order, then set u=0.0.
Return value: 0 success; i > 0 U(i,i) is exactly zero.
int ilu_cQuerySpace | ( | SuperMatrix * | L, |
SuperMatrix * | U, | ||
mem_usage_t * | mem_usage | ||
) |
mem_usage consists of the following fields:
int ilu_csnode_dfs | ( | const int | jcol, |
const int | kcol, | ||
const int * | asub, | ||
const int * | xa_begin, | ||
const int * | xa_end, | ||
int * | marker, | ||
GlobalLU_t * | Glu | ||
) |
Purpose
ilu_csnode_dfs() - Determine the union of the row structures of those columns within the relaxed snode. Note: The relaxed snodes are leaves of the supernodal etree, therefore, the portion outside the rectangular supernode must be zero.
Return value
0 success; >0 number of bytes allocated when run out of memory.
int print_double_vec | ( | char * | , |
int | , | ||
double * | |||
) |
float smach | ( | char * | ) |
int sp_cgemm | ( | char * | transa, |
char * | transb, | ||
int | m, | ||
int | n, | ||
int | k, | ||
complex | alpha, | ||
SuperMatrix * | A, | ||
complex * | b, | ||
int | ldb, | ||
complex | beta, | ||
complex * | c, | ||
int | ldc | ||
) |
Purpose
sp_c performs one of the matrix-matrix operations
C := alpha*op( A )*op( B ) + beta*C,
where op( X ) is one of
op( X ) = X or op( X ) = X' or op( X ) = conjg( X' ),
alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
Parameters
TRANSA - (input) char* On entry, TRANSA specifies the form of op( A ) to be used in the matrix multiplication as follows: TRANSA = 'N' or 'n', op( A ) = A. TRANSA = 'T' or 't', op( A ) = A'. TRANSA = 'C' or 'c', op( A ) = conjg( A' ). Unchanged on exit.
TRANSB - (input) char* On entry, TRANSB specifies the form of op( B ) to be used in the matrix multiplication as follows: TRANSB = 'N' or 'n', op( B ) = B. TRANSB = 'T' or 't', op( B ) = B'. TRANSB = 'C' or 'c', op( B ) = conjg( B' ). Unchanged on exit.
M - (input) int On entry, M specifies the number of rows of the matrix op( A ) and of the matrix C. M must be at least zero. Unchanged on exit.
N - (input) int On entry, N specifies the number of columns of the matrix op( B ) and the number of columns of the matrix C. N must be at least zero. Unchanged on exit.
K - (input) int On entry, K specifies the number of columns of the matrix op( A ) and the number of rows of the matrix op( B ). K must be at least zero. Unchanged on exit.
ALPHA - (input) complex On entry, ALPHA specifies the scalar alpha.
A - (input) SuperMatrix* Matrix A with a sparse format, of dimension (A->nrow, A->ncol). Currently, the type of A can be: Stype = NC or NCP; Dtype = SLU_C; Mtype = GE. In the future, more general A can be handled.
B - COMPLEX PRECISION array of DIMENSION ( LDB, kb ), where kb is n when TRANSB = 'N' or 'n', and is k otherwise. Before entry with TRANSB = 'N' or 'n', the leading k by n part of the array B must contain the matrix B, otherwise the leading n by k part of the array B must contain the matrix B. Unchanged on exit.
LDB - (input) int On entry, LDB specifies the first dimension of B as declared in the calling (sub) program. LDB must be at least max( 1, n ). Unchanged on exit.
BETA - (input) complex On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input.
C - COMPLEX PRECISION array of DIMENSION ( LDC, n ). Before entry, the leading m by n part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, the array C is overwritten by the m by n matrix ( alpha*op( A )*B + beta*C ).
LDC - (input) int On entry, LDC specifies the first dimension of C as declared in the calling (sub)program. LDC must be at least max(1,m). Unchanged on exit.
==== Sparse Level 3 Blas routine.
int sp_cgemv | ( | char * | trans, |
complex | alpha, | ||
SuperMatrix * | A, | ||
complex * | x, | ||
int | incx, | ||
complex | beta, | ||
complex * | y, | ||
int | incy | ||
) |
Purpose
sp_cgemv() performs one of the matrix-vector operations y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, where alpha and beta are scalars, x and y are vectors and A is a sparse A->nrow by A->ncol matrix.
Parameters
TRANS - (input) char* On entry, TRANS specifies the operation to be performed as follows: TRANS = 'N' or 'n' y := alpha*A*x + beta*y. TRANS = 'T' or 't' y := alpha*A'*x + beta*y. TRANS = 'C' or 'c' y := alpha*A^H*x + beta*y.
ALPHA - (input) complex On entry, ALPHA specifies the scalar alpha.
A - (input) SuperMatrix* Before entry, the leading m by n part of the array A must contain the matrix of coefficients.
X - (input) complex*, array of DIMENSION at least ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' and at least ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. Before entry, the incremented array X must contain the vector x.
INCX - (input) int On entry, INCX specifies the increment for the elements of X. INCX must not be zero.
BETA - (input) complex On entry, BETA specifies the scalar beta. When BETA is supplied as zero then Y need not be set on input.
Y - (output) complex*, array of DIMENSION at least ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' and at least ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. Before entry with BETA non-zero, the incremented array Y must contain the vector y. On exit, Y is overwritten by the updated vector y.
INCY - (input) int On entry, INCY specifies the increment for the elements of Y. INCY must not be zero.
==== Sparse Level 2 Blas routine.
int sp_ctrsv | ( | char * | uplo, |
char * | trans, | ||
char * | diag, | ||
SuperMatrix * | L, | ||
SuperMatrix * | U, | ||
complex * | x, | ||
SuperLUStat_t * | stat, | ||
int * | info | ||
) |
Purpose
sp_ctrsv() solves one of the systems of equations A*x = b, or A'*x = b, where b and x are n element vectors and A is a sparse unit , or non-unit, upper or lower triangular matrix. No test for singularity or near-singularity is included in this routine. Such tests must be performed before calling this routine.
Parameters
uplo - (input) char* On entry, uplo specifies whether the matrix is an upper or lower triangular matrix as follows: uplo = 'U' or 'u' A is an upper triangular matrix. uplo = 'L' or 'l' A is a lower triangular matrix.
trans - (input) char* On entry, trans specifies the equations to be solved as follows: trans = 'N' or 'n' A*x = b. trans = 'T' or 't' A'*x = b. trans = 'C' or 'c' A^H*x = b.
diag - (input) char* On entry, diag specifies whether or not A is unit triangular as follows: diag = 'U' or 'u' A is assumed to be unit triangular. diag = 'N' or 'n' A is not assumed to be unit triangular.
L - (input) SuperMatrix* The factor L from the factorization Pr*A*Pc=L*U. Use compressed row subscripts storage for supernodes, i.e., L has types: Stype = SC, Dtype = SLU_C, Mtype = TRLU.
U - (input) SuperMatrix* The factor U from the factorization Pr*A*Pc=L*U. U has types: Stype = NC, Dtype = SLU_C, Mtype = TRU.
x - (input/output) complex* Before entry, the incremented array X must contain the n element right-hand side vector b. On exit, X is overwritten with the solution vector x.
info - (output) int* If *info = -i, the i-th argument had an illegal value.
float sqselect | ( | int | , |
float * | , | ||
int | |||
) |