#include <mpi.h>
#include <stdlib.h>
#include <stdio.h>
#include "Cnames.h"
#include "supermatrix.h"
#include "util_dist.h"
#include "psymbfact.h"
Go to the source code of this file.
Data Structures | |
struct | superlu_scope_t |
struct | gridinfo_t |
struct | Glu_persist_t |
struct | Glu_freeable_t |
struct | ScalePermstruct_t |
struct | superlu_options_t |
struct | mem_usage_t |
Defines | |
#define | mpi_int_t MPI_INT |
#define | BC_HEADER 2 |
#define | LB_DESCRIPTOR 2 |
#define | BR_HEADER 3 |
#define | UB_DESCRIPTOR 2 |
#define | NBUFFERS 5 |
#define | NTAGS 10000 |
#define | UjROW 10 |
#define | UkSUB 11 |
#define | UkVAL 12 |
#define | LkSUB 13 |
#define | LkVAL 14 |
#define | LkkDIAG 15 |
#define | XK_H 1 |
#define | LSUM_H 1 |
#define | GSUM 20 |
#define | Xk 21 |
#define | Yk 22 |
#define | LSUM 23 |
#define | COMM_ALL 100 |
#define | COMM_COLUMN 101 |
#define | COMM_ROW 102 |
#define | SUPER_LINEAR 11 |
#define | SUPER_BLOCK 12 |
#define | NO_MARKER 3 |
#define | IAM(comm) { int rank; MPI_Comm_rank ( comm, &rank ); rank}; |
#define | MYROW(iam, grid) ( (iam) / grid->npcol ) |
#define | MYCOL(iam, grid) ( (iam) % grid->npcol ) |
#define | BlockNum(i) ( supno[i] ) |
#define | FstBlockC(bnum) ( xsup[bnum] ) |
#define | SuperSize(bnum) ( xsup[bnum+1]-xsup[bnum] ) |
#define | LBi(bnum, grid) ( (bnum)/grid->nprow ) |
#define | LBj(bnum, grid) ( (bnum)/grid->npcol ) |
#define | PROW(bnum, grid) ( (bnum) % grid->nprow ) |
#define | PCOL(bnum, grid) ( (bnum) % grid->npcol ) |
#define | PNUM(i, j, grid) ( (i)*grid->npcol + j ) |
#define | CEILING(a, b) ( ((a)%(b)) ? ((a)/(b) + 1) : ((a)/(b)) ) |
#define | RHS_ITERATE(i) for (i = 0; i < nrhs; ++i) |
#define | X_BLK(i) ilsum[i] * nrhs + (i+1) * XK_H |
#define | LSUM_BLK(i) ilsum[i] * nrhs + (i+1) * LSUM_H |
#define | SuperLU_timer_ SuperLU_timer_dist_ |
#define | LOG2(x) (log10((double) x) / log10(2.0)) |
#define | VT_TRACEON |
#define | VT_TRACEOFF |
Typedefs | |
typedef int | int_t |
Enumerations | |
enum | yes_no_t { NO, YES } |
enum | fact_t { DOFACT, SamePattern, SamePattern_SameRowPerm, FACTORED } |
enum | rowperm_t { NOROWPERM, LargeDiag, MY_PERMR } |
enum | colperm_t { NATURAL, MMD_AT_PLUS_A, MMD_ATA, METIS_AT_PLUS_A, PARMETIS, MY_PERMC } |
enum | trans_t { NOTRANS, TRANS, CONJ } |
enum | DiagScale_t { NOEQUIL, ROW, COL, BOTH } |
enum | IterRefine_t { NOREFINE, SINGLE = 1, DOUBLE, EXTRA } |
enum | MemType { LUSUP, UCOL, LSUB, USUB } |
enum | stack_end_t { HEAD, TAIL } |
enum | LU_space_t { SYSTEM, USER } |
Functions | |
void | set_default_options_dist (superlu_options_t *) |
Set the default values for the options argument. | |
void | print_options_dist (superlu_options_t *) |
Print the options setting. | |
void | Destroy_CompCol_Matrix_dist (SuperMatrix *) |
void | Destroy_SuperNode_Matrix_dist (SuperMatrix *) |
void | Destroy_SuperMatrix_Store_dist (SuperMatrix *) |
Deallocate the structure pointing to the actual storage of the matrix. | |
void | Destroy_CompCol_Permuted_dist (SuperMatrix *) |
A is of type Stype==NCP. | |
void | Destroy_CompRowLoc_Matrix_dist (SuperMatrix *) |
void | Destroy_CompRow_Matrix_dist (SuperMatrix *) |
void | sp_colorder (superlu_options_t *, SuperMatrix *, int_t *, int_t *, SuperMatrix *) |
int_t | sp_coletree_dist (int_t *, int_t *, int_t *, int_t, int_t, int_t *) |
Nonsymmetric elimination tree. | |
void | countnz_dist (const int_t, int_t *, int_t *, int_t *, Glu_persist_t *, Glu_freeable_t *) |
int_t | fixupL_dist (const int_t, const int_t *, Glu_persist_t *, Glu_freeable_t *) |
int_t * | TreePostorder_dist (int_t, int_t *) |
float | slamch_ (char *) |
double | dlamch_ (char *) |
void * | superlu_malloc_dist (size_t) |
void | superlu_free_dist (void *) |
int_t * | intMalloc_dist (int_t) |
int_t * | intCalloc_dist (int_t) |
double | SuperLU_timer_ () |
void | superlu_abort_and_exit_dist (char *) |
int_t | sp_ienv_dist (int_t) |
Purpose ======= | |
int | lsame_ (char *, char *) |
int | xerbla_ (char *, int *) |
void | ifill_dist (int_t *, int_t, int_t) |
Fills an integer array with a given value. | |
void | super_stats_dist (int_t, int_t *) |
void | ScalePermstructInit (const int_t, const int_t, ScalePermstruct_t *) |
Allocate storage in ScalePermstruct. | |
void | ScalePermstructFree (ScalePermstruct_t *) |
Deallocate ScalePermstruct. | |
void | superlu_gridinit (MPI_Comm, int_t, int_t, gridinfo_t *) |
All processes in the MPI communicator must call this routine. | |
void | superlu_gridmap (MPI_Comm, int_t, int_t, int_t[], int_t, gridinfo_t *) |
All processes in the MPI communicator must call this routine. | |
void | superlu_gridexit (gridinfo_t *) |
void | get_perm_c_dist (int_t, int_t, SuperMatrix *, int_t *) |
void | a_plus_at_dist (const int_t, const int_t, int_t *, int_t *, int_t *, int_t **, int_t **) |
void | bcast_tree (void *, int, MPI_Datatype, int, int, gridinfo_t *, int, int *) |
int_t | symbfact (superlu_options_t *, int, SuperMatrix *, int_t *, int_t *, Glu_persist_t *, Glu_freeable_t *) |
int_t | symbfact_SubInit (fact_t, void *, int_t, int_t, int_t, int_t, Glu_persist_t *, Glu_freeable_t *) |
int_t | symbfact_SubXpand (int_t, int_t, int_t, MemType, int_t *, Glu_freeable_t *) |
int_t | symbfact_SubFree (Glu_freeable_t *) |
void | get_diag_procs (int_t, Glu_persist_t *, gridinfo_t *, int_t *, int_t **, int_t **) |
int_t | QuerySpace_dist (int_t, int_t, Glu_freeable_t *, mem_usage_t *) |
void | pxerbla (char *, gridinfo_t *, int_t) |
void | PStatInit (SuperLUStat_t *) |
void | PStatFree (SuperLUStat_t *) |
void | PStatPrint (superlu_options_t *, SuperLUStat_t *, gridinfo_t *) |
float | symbfact_dist (int, int, SuperMatrix *, int_t *, int_t *, int_t *, int_t *, Pslu_freeable_t *, MPI_Comm *, MPI_Comm *, mem_usage_t *) |
float | get_perm_c_parmetis (SuperMatrix *, int_t *, int_t *, int, int, int_t **, int_t **, gridinfo_t *, MPI_Comm *) |
int_t | psymbfact_LUXpandMem (int_t, int_t, int_t, int_t, int_t, int_t, int_t, int_t, Pslu_freeable_t *, Llu_symbfact_t *, vtcsInfo_symbfact_t *, psymbfact_stat_t *) |
int_t | psymbfact_LUXpand (int_t, int_t, int_t, int_t, int_t *, int_t, int_t, int_t, int_t, Pslu_freeable_t *, Llu_symbfact_t *, vtcsInfo_symbfact_t *, psymbfact_stat_t *) |
int_t | psymbfact_LUXpand_RL (int_t, int_t, int_t, int_t, int_t, int_t, Pslu_freeable_t *, Llu_symbfact_t *, vtcsInfo_symbfact_t *, psymbfact_stat_t *) |
int_t | psymbfact_prLUXpand (int_t, int_t, MemType, Llu_symbfact_t *, psymbfact_stat_t *) |
void | print_panel_seg_dist (int_t, int_t, int_t, int_t, int_t *, int_t *) |
Diagnostic print of segment info after panel_dfs(). | |
void | check_repfnz_dist (int_t, int_t, int_t, int_t *) |
Check whether repfnz[] == EMPTY after reset. | |
int_t | CheckZeroDiagonal (int_t, int_t *, int_t *, int_t *) |
void | PrintDouble5 (char *, int_t, double *) |
void | PrintInt10 (char *, int_t, int_t *) |
int | file_PrintInt10 (FILE *, char *, int_t, int_t *) |
-- Distributed SuperLU routine (version 2.2) -- Lawrence Berkeley National Lab, Univ. of California Berkeley. November 1, 2007 Feburary 20, 2008
#define BC_HEADER 2 |
#define BlockNum | ( | i | ) | ( supno[i] ) |
#define BR_HEADER 3 |
#define CEILING | ( | a, | |||
b | ) | ( ((a)%(b)) ? ((a)/(b) + 1) : ((a)/(b)) ) |
#define COMM_ALL 100 |
#define COMM_COLUMN 101 |
#define COMM_ROW 102 |
#define FstBlockC | ( | bnum | ) | ( xsup[bnum] ) |
#define GSUM 20 |
#define IAM | ( | comm | ) | { int rank; MPI_Comm_rank ( comm, &rank ); rank}; |
#define LB_DESCRIPTOR 2 |
#define LBi | ( | bnum, | |||
grid | ) | ( (bnum)/grid->nprow ) |
#define LBj | ( | bnum, | |||
grid | ) | ( (bnum)/grid->npcol ) |
#define LkkDIAG 15 |
#define LkSUB 13 |
#define LkVAL 14 |
#define LOG2 | ( | x | ) | (log10((double) x) / log10(2.0)) |
#define LSUM 23 |
#define LSUM_BLK | ( | i | ) | ilsum[i] * nrhs + (i+1) * LSUM_H |
#define LSUM_H 1 |
#define mpi_int_t MPI_INT |
#define MYCOL | ( | iam, | |||
grid | ) | ( (iam) % grid->npcol ) |
#define MYROW | ( | iam, | |||
grid | ) | ( (iam) / grid->npcol ) |
#define NBUFFERS 5 |
#define NO_MARKER 3 |
#define NTAGS 10000 |
#define PCOL | ( | bnum, | |||
grid | ) | ( (bnum) % grid->npcol ) |
#define PNUM | ( | i, | |||
j, | |||||
grid | ) | ( (i)*grid->npcol + j ) |
#define PROW | ( | bnum, | |||
grid | ) | ( (bnum) % grid->nprow ) |
#define RHS_ITERATE | ( | i | ) | for (i = 0; i < nrhs; ++i) |
#define SUPER_BLOCK 12 |
#define SUPER_LINEAR 11 |
#define SuperLU_timer_ SuperLU_timer_dist_ |
#define SuperSize | ( | bnum | ) | ( xsup[bnum+1]-xsup[bnum] ) |
#define UB_DESCRIPTOR 2 |
#define UjROW 10 |
#define UkSUB 11 |
#define UkVAL 12 |
#define VT_TRACEOFF |
#define VT_TRACEON |
#define X_BLK | ( | i | ) | ilsum[i] * nrhs + (i+1) * XK_H |
#define Xk 21 |
#define XK_H 1 |
#define Yk 22 |
typedef int int_t |
enum colperm_t |
enum DiagScale_t |
enum fact_t |
enum IterRefine_t |
enum LU_space_t |
enum MemType |
enum rowperm_t |
enum stack_end_t |
enum trans_t |
enum yes_no_t |
void bcast_tree | ( | void * | buf, | |
int | count, | |||
MPI_Datatype | dtype, | |||
int | root, | |||
int | tag, | |||
gridinfo_t * | grid, | |||
int | scope, | |||
int * | recvcnt | |||
) |
Purpose ======= Broadcast an array of *dtype* numbers. The communication pattern is a tree with number of branches equal to NBRANCHES. The process ranks are between 0 and Np-1.
The following two pairs of graphs give different ways of viewing the same algorithm. The first pair shows the trees as they should be visualized when examining the algorithm. The second pair are isomorphic graphs of of the first, which show the actual pattern of data movement. Note that a tree broadcast with NBRANCHES = 2 is isomorphic with a hypercube broadcast (however, it does not require the nodes be a power of two to work).
TREE BROADCAST, NBRANCHES = 2 * TREE BROADCAST, NBRANCHES = 3
root=2 i=4 &______________ * | \ * root=2 i=2 &______ &______ * i=3 &______________________ | \ | \ * | \ \ i=1 &__ &__ &__ &__ * i=1 &______ &______ &__ | \ | \ | \ | \ * | \ \ | \ \ | \ 2 3 4 5 6 7 0 1 * 2 3 4 5 6 7 0 1
ISOMORPHIC GRAPHS OF ABOVE, SHOWN IN MORE FAMILIAR TERMS:
2 2 _________|_________ ___________|____________ / | \ / | | \ 6 4 3 5 0 3 4 / \ | / \ | 0 7 5 6 7 1 | 1
Arguments =========
scope
void countnz_dist | ( | const int_t | n, | |
int_t * | xprune, | |||
int_t * | nnzL, | |||
int_t * | nnzU, | |||
Glu_persist_t * | Glu_persist, | |||
Glu_freeable_t * | Glu_freeable | |||
) |
Count the total number of nonzeros in factors L and U, and in the symmetrically reduced L.
void Destroy_CompCol_Matrix_dist | ( | SuperMatrix * | ) |
void Destroy_CompCol_Permuted_dist | ( | SuperMatrix * | ) |
void Destroy_CompRow_Matrix_dist | ( | SuperMatrix * | ) |
void Destroy_CompRowLoc_Matrix_dist | ( | SuperMatrix * | ) |
void Destroy_SuperMatrix_Store_dist | ( | SuperMatrix * | ) |
void Destroy_SuperNode_Matrix_dist | ( | SuperMatrix * | ) |
double dlamch_ | ( | char * | cmach | ) |
Purpose =======
DLAMCH determines double precision machine parameters.
Arguments =========
CMACH (input) CHARACTER*1 Specifies the value to be returned by DLAMCH: = 'E' or 'e', DLAMCH := eps = 'S' or 's , DLAMCH := sfmin = 'B' or 'b', DLAMCH := base = 'P' or 'p', DLAMCH := eps*base = 'N' or 'n', DLAMCH := t = 'R' or 'r', DLAMCH := rnd = 'M' or 'm', DLAMCH := emin = 'U' or 'u', DLAMCH := rmin = 'L' or 'l', DLAMCH := emax = 'O' or 'o', DLAMCH := rmax
where
eps = relative machine precision sfmin = safe minimum, such that 1/sfmin does not overflow base = base of the machine prec = eps*base t = number of (base) digits in the mantissa rnd = 1.0 when rounding occurs in addition, 0.0 otherwise emin = minimum exponent before (gradual) underflow rmin = underflow threshold - base**(emin-1) emax = largest exponent before overflow rmax = overflow threshold - (base**emax)*(1-eps)
=====================================================================
int_t fixupL_dist | ( | const int_t | n, | |
const int_t * | perm_r, | |||
Glu_persist_t * | Glu_persist, | |||
Glu_freeable_t * | Glu_freeable | |||
) |
Fix up the data storage lsub for L-subscripts. It removes the subscript sets for structural pruning, and applies permuation to the remaining subscripts.
void get_diag_procs | ( | int_t | , | |
Glu_persist_t * | , | |||
gridinfo_t * | , | |||
int_t * | , | |||
int_t ** | , | |||
int_t ** | ||||
) |
void get_perm_c_dist | ( | int_t | pnum, | |
int_t | ispec, | |||
SuperMatrix * | A, | |||
int_t * | perm_c | |||
) |
Purpose =======
GET_PERM_C_DIST obtains a permutation matrix Pc, by applying the multiple minimum degree ordering code by Joseph Liu to matrix A'*A or A+A', or using approximate minimum degree column ordering by Davis et. al. The LU factorization of A*Pc tends to have less fill than the LU factorization of A.
Arguments =========
ispec (input) colperm_t Specifies what type of column permutation to use to reduce fill. = NATURAL: natural ordering (i.e., Pc = I) = MMD_AT_PLUS_A: minimum degree ordering on structure of A'+A = MMD_ATA: minimum degree ordering on structure of A'*A = METIS_AT_PLUS_A: MeTis on A'+A
A (input) SuperMatrix* Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number of the linear equations is A->nrow. Currently, the type of A can be: Stype = SLU_NC; Dtype = SLU_D; Mtype = SLU_GE. In the future, more general A can be handled.
perm_c (output) int* Column permutation vector of size A->ncol, which defines the permutation matrix Pc; perm_c[i] = j means column i of A is in position j in A*Pc.
float get_perm_c_parmetis | ( | SuperMatrix * | A, | |
int_t * | perm_r, | |||
int_t * | perm_c, | |||
int | nprocs_i, | |||
int | noDomains, | |||
int_t ** | sizes, | |||
int_t ** | fstVtxSep, | |||
gridinfo_t * | grid, | |||
MPI_Comm * | metis_comm | |||
) |
Purpose =======
GET_PERM_C_PARMETIS obtains a permutation matrix Pc, by applying a graph partitioning algorithm to the symmetrized graph A+A'. The multilevel graph partitioning algorithm used is the ParMETIS_V3_NodeND routine available in the parallel graph partitioning package parMETIS.
The number of independent sub-domains noDomains computed by this algorithm has to be a power of 2. Hence noDomains is the larger number power of 2 that is smaller than nprocs_i, where nprocs_i = nprow * npcol is the number of processors used in SuperLU_DIST.
Arguments =========
A (input) SuperMatrix* Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number of the linear equations is A->nrow. Matrix A is distributed in NRformat_loc format.
perm_r (input) int_t* Row permutation vector of size A->nrow, which defines the permutation matrix Pr; perm_r[i] = j means row i of A is in position j in Pr*A.
perm_c (output) int_t* Column permutation vector of size A->ncol, which defines the permutation matrix Pc; perm_c[i] = j means column i of A is in position j in A*Pc.
nprocs_i (input) int* Number of processors the input matrix is distributed on in a block row format. It corresponds to number of processors used in SuperLU_DIST.
noDomains (input) int*, must be power of 2 Number of independent domains to be computed by the graph partitioning algorithm. ( noDomains <= nprocs_i )
sizes (output) int_t**, of size 2 * noDomains Returns pointer to an array containing the number of nodes for each sub-domain and each separator. Separators are stored from left to right. Memory for the array is allocated in this routine.
fstVtxSep (output) int_t**, of size 2 * noDomains Returns pointer to an array containing first node for each sub-domain and each separator. Memory for the array is allocated in this routine.
Return value ============ < 0, number of bytes allocated on return from the symbolic factorization. > 0, number of bytes allocated when out of memory.
int lsame_ | ( | char * | ca, | |
char * | cb | |||
) |
Purpose =======
LSAME returns .TRUE. if CA is the same letter as CB regardless of case.
Arguments =========
CA (input) CHARACTER*1 CB (input) CHARACTER*1 CA and CB specify the single characters to be compared.
=====================================================================
void print_options_dist | ( | superlu_options_t * | ) |
void PrintDouble5 | ( | char * | , | |
int_t | , | |||
double * | ||||
) |
void PStatFree | ( | SuperLUStat_t * | ) |
void PStatInit | ( | SuperLUStat_t * | ) |
void PStatPrint | ( | superlu_options_t * | , | |
SuperLUStat_t * | , | |||
gridinfo_t * | ||||
) |
int_t psymbfact_LUXpand | ( | int_t | iam, | |
int_t | n, | |||
int_t | fstVtxLvl_loc, | |||
int_t | vtxXp, | |||
int_t * | p_next, | |||
int_t | min_new_len, | |||
int_t | mem_type, | |||
int_t | rout_type, | |||
int_t | free_prev_mem, | |||
Pslu_freeable_t * | Pslu_freeable, | |||
Llu_symbfact_t * | Llu_symbfact, | |||
vtcsInfo_symbfact_t * | VInfo, | |||
psymbfact_stat_t * | PS | |||
) |
Expand the data structures for L and U during the factorization. Return value: SUCCES_RET - successful return ERROR_RET - error due to a memory alocation failure
int_t psymbfact_LUXpand_RL | ( | int_t | iam, | |
int_t | n, | |||
int_t | vtxXp, | |||
int_t | next, | |||
int_t | len_texp, | |||
int_t | mem_type, | |||
Pslu_freeable_t * | Pslu_freeable, | |||
Llu_symbfact_t * | Llu_symbfact, | |||
vtcsInfo_symbfact_t * | VInfo, | |||
psymbfact_stat_t * | PS | |||
) |
Expand the data structures for L and U during the factorization. Return value: 0 - successful return > 0 - number of bytes allocated when run out of space
int_t psymbfact_LUXpandMem | ( | int_t | iam, | |
int_t | n, | |||
int_t | vtxXp, | |||
int_t | next, | |||
int_t | min_new_len, | |||
int_t | mem_type, | |||
int_t | rout_type, | |||
int_t | free_prev_mem, | |||
Pslu_freeable_t * | Pslu_freeable, | |||
Llu_symbfact_t * | Llu_symbfact, | |||
vtcsInfo_symbfact_t * | VInfo, | |||
psymbfact_stat_t * | PS | |||
) |
Expand the data structures for L and U during the factorization. Return value: 0 - successful return > 0 - number of bytes allocated when run out of space
int_t psymbfact_prLUXpand | ( | int_t | iam, | |
int_t | min_new_len, | |||
MemType | mem_type, | |||
Llu_symbfact_t * | Llu_symbfact, | |||
psymbfact_stat_t * | PS | |||
) |
Expand the data structures for L and U pruned during the factorization. Return value: SUCCES_RET - successful return ERROR_RET - error when run out of space
void pxerbla | ( | char * | , | |
gridinfo_t * | , | |||
int_t | ||||
) |
int_t QuerySpace_dist | ( | int_t | n, | |
int_t | lsub_size, | |||
Glu_freeable_t * | Glu_freeable, | |||
mem_usage_t * | mem_usage | |||
) |
mem_usage consists of the following fields:
void ScalePermstructFree | ( | ScalePermstruct_t * | ) |
void ScalePermstructInit | ( | const | int_t, | |
const | int_t, | |||
ScalePermstruct_t * | ||||
) |
void set_default_options_dist | ( | superlu_options_t * | ) |
float slamch_ | ( | char * | cmach | ) |
Purpose =======
SLAMCH determines single precision machine parameters.
Arguments =========
CMACH (input) CHARACTER*1 Specifies the value to be returned by SLAMCH: = 'E' or 'e', SLAMCH := eps = 'S' or 's , SLAMCH := sfmin = 'B' or 'b', SLAMCH := base = 'P' or 'p', SLAMCH := eps*base = 'N' or 'n', SLAMCH := t = 'R' or 'r', SLAMCH := rnd = 'M' or 'm', SLAMCH := emin = 'U' or 'u', SLAMCH := rmin = 'L' or 'l', SLAMCH := emax = 'O' or 'o', SLAMCH := rmax
where
eps = relative machine precision sfmin = safe minimum, such that 1/sfmin does not overflow base = base of the machine prec = eps*base t = number of (base) digits in the mantissa rnd = 1.0 when rounding occurs in addition, 0.0 otherwise emin = minimum exponent before (gradual) underflow rmin = underflow threshold - base**(emin-1) emax = largest exponent before overflow rmax = overflow threshold - (base**emax)*(1-eps)
=====================================================================
int_t sp_coletree_dist | ( | int_t * | acolst, | |
int_t * | acolend, | |||
int_t * | arow, | |||
int_t | nr, | |||
int_t | nc, | |||
int_t * | parent | |||
) |
Find the elimination tree for A'*A. This uses something similar to Liu's algorithm. It runs in time O(nz(A)*log n) and does not form A'*A.
Input: Sparse matrix A. Numeric values are ignored, so any explicit zeros are treated as nonzero. Output: Integer array of parents representing the elimination tree of the symbolic product A'*A. Each vertex is a column of A, and nc means a root of the elimination forest.
John R. Gilbert, Xerox, 10 Dec 1990 Based on code by JRG dated 1987, 1988, and 1990.
void sp_colorder | ( | superlu_options_t * | options, | |
SuperMatrix * | A, | |||
int_t * | perm_c, | |||
int_t * | etree, | |||
SuperMatrix * | AC | |||
) |
Purpose =======
sp_colorder() permutes the columns of the original matrix. It performs the following steps:
1. Apply column permutation perm_c[] to A's column pointers to form AC;
2. If options->Fact = DOFACT, then (1) Compute column elimination tree etree[] of AC'AC; (2) Post order etree[] to get a postordered elimination tree etree[], and a postorder permutation post[]; (3) Apply post[] permutation to columns of AC; (4) Overwrite perm_c[] with the product perm_c * post.
Arguments =========
options (input) superlu_options_t* Specifies whether or not the elimination tree will be re-used. If options->Fact == DOFACT, this means first time factor A, etree is computed and output. Otherwise, re-factor A, etree is input, unchanged on exit.
A (input) SuperMatrix* Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number of the linear equations is A->nrow. Currently, the type of A can be: Stype = SLU_NC or SLU_NCP; Dtype = SLU__D; Mtype = SLU_GE. In the future, more general A can be handled.
perm_c (input/output) int* Column permutation vector of size A->ncol, which defines the permutation matrix Pc; perm_c[i] = j means column i of A is in position j in A*Pc. If options->Fact == DOFACT, perm_c is both input and output. On output, it is changed according to a postorder of etree. Otherwise, perm_c is input.
etree (input/output) int* Elimination tree of Pc*(A'+A)*Pc', dimension A->ncol. If options->Fact == DOFACT, etree is an output argument, otherwise it is an input argument. Note: etree is a vector of parent pointers for a forest whose vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
AC (output) SuperMatrix* The resulting matrix after applied the column permutation perm_c[] to matrix A. The type of AC can be: Stype = SLU_NCP; Dtype = A->Dtype; Mtype = SLU_GE.
Purpose =======
sp_ienv_dist() is inquired to choose machine-dependent parameters for the local environment. See ISPEC for a description of the parameters.
This version provides a set of parameters which should give good, but not optimal, performance on many of the currently available computers. Users are encouraged to modify this subroutine to set the tuning parameters for their particular machine using the option and problem size information in the arguments.
Arguments =========
ISPEC (input) int Specifies the parameter to be returned as the value of SP_IENV_DIST. = 1: the panel size w; a panel consists of w consecutive columns of matrix A in the process of Gaussian elimination. The best value depends on machine's cache characters. = 2: the relaxation parameter relax; if the number of nodes (columns) in a subtree of the elimination tree is less than relax, this subtree is considered as one supernode, regardless of the their row structures. = 3: the maximum size for a supernode, which must be greater than or equal to relaxation parameter (see case 2); = 4: the minimum row dimension for 2-D blocking to be used; = 5: the minimum column dimension for 2-D blocking to be used; = 6: the estimated fills factor for the adjacency structures of L and U, compared with A;
(SP_IENV_DIST) (output) int >= 0: the value of the parameter specified by ISPEC < 0: if SP_IENV_DIST = -k, the k-th argument had an illegal value.
=====================================================================
sp_ienv_dist() is inquired to choose machine-dependent parameters for the local environment. See ISPEC for a description of the parameters.
This version provides a set of parameters which should give good, but not optimal, performance on many of the currently available computers. Users are encouraged to modify this subroutine to set the tuning parameters for their particular machine using the option and problem size information in the arguments.
Arguments =========
ISPEC (input) int Specifies the parameter to be returned as the value of SP_IENV_DIST. = 1: the panel size w; a panel consists of w consecutive columns of matrix A in the process of Gaussian elimination. The best value depends on machine's cache characters. = 2: the relaxation parameter relax; if the number of nodes (columns) in a subtree of the elimination tree is less than relax, this subtree is considered as one supernode, regardless of the their row structures. = 3: the maximum size for a supernode, which must be greater than or equal to relaxation parameter (see case 2); = 4: the minimum row dimension for 2-D blocking to be used; = 5: the minimum column dimension for 2-D blocking to be used; = 6: the estimated fills factor for the adjacency structures of L and U, compared with A;
(SP_IENV_DIST) (output) int >= 0: the value of the parameter specified by ISPEC < 0: if SP_IENV_DIST = -k, the k-th argument had an illegal value.
=====================================================================
void superlu_abort_and_exit_dist | ( | char * | ) |
void superlu_free_dist | ( | void * | ) |
void superlu_gridexit | ( | gridinfo_t * | ) |
void superlu_gridinit | ( | MPI_Comm | , | |
int_t | , | |||
int_t | , | |||
gridinfo_t * | ||||
) |
void superlu_gridmap | ( | MPI_Comm | , | |
int_t | , | |||
int_t | , | |||
int_t | [], | |||
int_t | , | |||
gridinfo_t * | ||||
) |
void* superlu_malloc_dist | ( | size_t | ) |
double SuperLU_timer_ | ( | ) |
int_t symbfact | ( | superlu_options_t * | options, | |
int | pnum, | |||
SuperMatrix * | A, | |||
int_t * | perm_c, | |||
int_t * | etree, | |||
Glu_persist_t * | Glu_persist, | |||
Glu_freeable_t * | Glu_freeable | |||
) |
Purpose ======= symbfact() performs a symbolic factorization on matrix A and sets up the nonzero data structures which are suitable for supernodal Gaussian elimination with no pivoting (GENP). This routine features: o depth-first search (DFS) o supernodes o symmetric structure pruning
Return value ============ < 0, number of bytes needed for LSUB. = 0, matrix dimension is 1. > 0, number of bytes allocated when out of memory.
float symbfact_dist | ( | int | , | |
int | , | |||
SuperMatrix * | , | |||
int_t * | , | |||
int_t * | , | |||
int_t * | , | |||
int_t * | , | |||
Pslu_freeable_t * | , | |||
MPI_Comm * | , | |||
MPI_Comm * | , | |||
mem_usage_t * | ||||
) |
Purpose ======= symbfact_dist() performs symbolic factorization of matrix A suitable for performing the supernodal Gaussian elimination with no pivoting (GEPP). This routine computes the structure of one column of L and one row of U at a time. It uses: o distributed input matrix o supernodes o symmetric structure pruning
Arguments =========
nprocs_num (input) int Number of processors SuperLU_DIST is executed on, and the input matrix is distributed on.
nprocs_symb (input) int Number of processors on which the symbolic factorization is performed. It is equal to the number of independent domains idenfied in the graph partitioning algorithm executed previously and has to be a power of 2. It corresponds to number of leaves in the separator tree.
A (input) SuperMatrix* Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number of the linear equations is A->nrow. Matrix A is distributed in NRformat_loc format. Matrix A is not yet permuted by perm_c.
perm_c (input) int_t* Column permutation vector of size A->ncol, which defines the permutation matrix Pc; perm_c[i] = j means column i of A is in position j in A*Pc.
perm_r (input) int_t* Row permutation vector of size A->nrow, which defines the permutation matrix Pr; perm_r[i] = j means column i of A is in position j in Pr*A.
sizes (input) int_t* Contains the number of vertices in each separator.
fstVtxSep (input) int_t* Contains first vertex for each separator.
Pslu_freeable (output) Pslu_freeable_t* Returns the local L and U structure, and global to local information on the indexing of the vertices. Contains all the information necessary for performing the data distribution towards the numeric factorization.
num_comm (input) MPI_Comm* Communicator for numerical factorization
symb_comm (input) MPI_Comm* Communicator for symbolic factorization
symb_mem_usage (input) mem_usage_t * Statistics on memory usage.
Return value ============ < 0, number of bytes allocated on return from the symbolic factorization. > 0, number of bytes allocated when out of memory.
Sketch of the algorithm =======================
Distrbute the vertices on the processors using a subtree to subcube algorithm.
Redistribute the structure of the input matrix A according to the subtree to subcube computed previously for the symbolic factorization routine. This implies in particular a distribution from nprocs_num processors to nprocs_symb processors.
Perform symbolic factorization guided by the separator tree provided by a graph partitioning algorithm. The symbolic factorization uses a combined left-looking, right-looking approach.
int_t symbfact_SubFree | ( | Glu_freeable_t * | Glu_freeable | ) |
Deallocate storage of the data structures common to symbolic factorization routines.
int_t symbfact_SubInit | ( | fact_t | fact, | |
void * | work, | |||
int_t | lwork, | |||
int_t | m, | |||
int_t | n, | |||
int_t | annz, | |||
Glu_persist_t * | Glu_persist, | |||
Glu_freeable_t * | Glu_freeable | |||
) |
Allocate storage for the data structures common to symbolic factorization routines. For those unpredictable size, make a guess as FILL * nnz(A). Return value: If lwork = -1, return the estimated amount of space required, plus n; otherwise, return the amount of space actually allocated when memory allocation failure occurred.
int_t symbfact_SubXpand | ( | int_t | n, | |
int_t | jcol, | |||
int_t | next, | |||
MemType | mem_type, | |||
int_t * | maxlen, | |||
Glu_freeable_t * | Glu_freeable | |||
) |
Expand the data structures for L and U during the factorization. Return value: 0 - successful return > 0 - number of bytes allocated when run out of space
int xerbla_ | ( | char * | srname, | |
int * | info | |||
) |
Purpose =======
XERBLA is an error handler for the LAPACK routines. It is called by an LAPACK routine if an input parameter has an invalid value. A message is printed and execution stops.
Installers may consider modifying the STOP statement in order to call system-specific exception-handling facilities.
Arguments =========
SRNAME (input) CHARACTER*6 The name of the routine which called XERBLA.
INFO (input) INT The position of the invalid parameter in the parameter list
of the calling routine.
=====================================================================