SRC/pzgstrf.c File Reference

Performs LU factorization in parallel. More...

#include <math.h>
#include "superlu_zdefs.h"

Include dependency graph for pzgstrf.c:


Functions

static void pzgstrf2 (superlu_options_t *options, int_t k, double thresh, Glu_persist_t *Glu_persist, gridinfo_t *grid, LocalLU_t *Llu, MPI_Request *U_diag_blk_send_req, SuperLUStat_t *stat, int *info)
static void pzgstrs2 (int_t, int_t, Glu_persist_t *, gridinfo_t *, LocalLU_t *, SuperLUStat_t *)
int_t pzgstrf (superlu_options_t *options, int m, int n, double anorm, LUstruct_t *LUstruct, gridinfo_t *grid, SuperLUStat_t *stat, int *info)
static int probe_recv (int iam, int source, int tag, MPI_Datatype datatype, MPI_Comm comm, int buf_size)

Detailed Description

 -- Distributed SuperLU routine (version 2.3) --
 Lawrence Berkeley National Lab, Univ. of California Berkeley.
 October 15, 2008

 Modified:
 September 1, 1999
     Feburary 7, 2001    use MPI_Isend/MPI_Irecv
     October 15, 2008  latency-reducing panel factorization

 Sketch of the algorithm
 =======================

 The following relations hold:
     * A_kk = L_kk * U_kk
     * L_ik = Aik * U_kk^(-1)
     * U_kj = L_kk^(-1) * A_kj

              ----------------------------------
              |   |                            |
              ----|-----------------------------
              |   | \ U_kk|                    |
              |   |   \   |        U_kj        |
              |   |L_kk \ |         ||         |
              ----|-------|---------||----------
              |   |       |         \/         |
              |   |       |                    |
              |   |       |                    |
              |   |       |                    |
              |   | L_ik ==>       A_ij        |
              |   |       |                    |
              |   |       |                    |
              |   |       |                    |
              ----------------------------------

 Handle the first block of columns separately.
     * Factor diagonal and subdiagonal blocks and test for exact
       singularity. ( pzgstrf2(0), one column at a time )
     * Compute block row of U
     * Update trailing matrix

 Loop over the remaining blocks of columns.
   mycol = MYCOL( iam, grid );
   myrow = MYROW( iam, grid );
   N = nsupers;
   For (k = 1; k < N; ++k) {
       krow = PROW( k, grid );
       kcol = PCOL( k, grid );
       Pkk = PNUM( krow, kcol, grid );

     * Factor diagonal and subdiagonal blocks and test for exact
       singularity.
       if ( mycol == kcol ) {
           pzgstrf2(k), one column at a time 
       }

     * Parallel triangular solve
       if ( iam == Pkk ) multicast L_k,k to this process row;
       if ( myrow == krow && mycol != kcol ) {
          Recv L_k,k from process Pkk;
          for (j = k+1; j < N; ++j) 
              if ( PCOL( j, grid ) == mycol && A_k,j != 0 )
                 U_k,j = L_k,k \ A_k,j;
       }

     * Parallel rank-k update
       if ( myrow == krow ) multicast U_k,k+1:N to this process column;
       if ( mycol == kcol ) multicast L_k+1:N,k to this process row;
       if ( myrow != krow ) {
          Pkj = PNUM( krow, mycol, grid );
          Recv U_k,k+1:N from process Pkj;
       }
       if ( mycol != kcol ) {
          Pik = PNUM( myrow, kcol, grid );
          Recv L_k+1:N,k from process Pik;
       }
       for (j = k+1; k < N; ++k) {
          for (i = k+1; i < N; ++i) 
              if ( myrow == PROW( i, grid ) && mycol == PCOL( j, grid )
                   && L_i,k != 0 && U_k,j != 0 )
                 A_i,j = A_i,j - L_i,k * U_k,j;
       }
  }

 

Function Documentation

static int probe_recv ( int  iam,
int  source,
int  tag,
MPI_Datatype  datatype,
MPI_Comm  comm,
int  buf_size 
) [static]

int_t pzgstrf ( superlu_options_t options,
int  m,
int  n,
double  anorm,
LUstruct_t LUstruct,
gridinfo_t grid,
SuperLUStat_t stat,
int *  info 
)

 Purpose
 =======

  PZGSTRF performs the LU factorization in parallel.

 Arguments
 =========

 options (input) superlu_options_t*
         The structure defines the input parameters to control
         how the LU decomposition will be performed.
         The following field should be defined:
         o ReplaceTinyPivot (yes_no_t)
           Specifies whether to replace the tiny diagonals by
           sqrt(epsilon)*norm(A) during LU factorization.

 m      (input) int
        Number of rows in the matrix.

 n      (input) int
        Number of columns in the matrix.

 anorm  (input) double
        The norm of the original matrix A, or the scaled A if
        equilibration was done.

 LUstruct (input/output) LUstruct_t*
         The data structures to store the distributed L and U factors.
         The following fields should be defined:

         o Glu_persist (input) Glu_persist_t*
           Global data structure (xsup, supno) replicated on all processes,
           describing the supernode partition in the factored matrices
           L and U:
	       xsup[s] is the leading column of the s-th supernode,
             supno[i] is the supernode number to which column i belongs.

         o Llu (input/output) LocalLU_t*
           The distributed data structures to store L and U factors.
           See superlu_zdefs.h for the definition of 'LocalLU_t'.

 grid   (input) gridinfo_t*
        The 2D process mesh. It contains the MPI communicator, the number
        of process rows (NPROW), the number of process columns (NPCOL),
        and my process rank. It is an input argument to all the
        parallel routines.
        Grid can be initialized by subroutine SUPERLU_GRIDINIT.
        See superlu_zdefs.h for the definition of 'gridinfo_t'.

 stat   (output) SuperLUStat_t*
        Record the statistics on runtime and floating-point operation count.
        See util.h for the definition of 'SuperLUStat_t'.

 info   (output) int*
        = 0: successful exit
        < 0: if info = -i, the i-th argument had an illegal value
        > 0: if info = i, U(i,i) is exactly zero. The factorization has
             been completed, but the factor U is exactly singular,
             and division by zero will occur if it is used to solve a
             system of equations.
 

static void pzgstrf2 ( superlu_options_t options,
int_t  k,
double  thresh,
Glu_persist_t Glu_persist,
gridinfo_t grid,
LocalLU_t Llu,
MPI_Request *  U_diag_blk_send_req,
SuperLUStat_t stat,
int *  info 
) [static]

 Purpose
 =======
   Panel factorization -- block column k

   Factor diagonal and subdiagonal blocks and test for exact singularity.
   Only the column processes that own block column *k* participate
   in the work.

 Arguments
 =========

 k      (input) int (global)
        The column number of the block column to be factorized.

 thresh (input) double (global)
        The threshold value = s_eps * anorm.

 Glu_persist (input) Glu_persist_t*
        Global data structures (xsup, supno) replicated on all processes.

 grid   (input) gridinfo_t*
        The 2D process mesh.

 Llu    (input/output) LocalLU_t*
        Local data structures to store distributed L and U matrices.

 U_diag_blk_send_req (input/output) MPI_Request*
        List of send requests to send down the diagonal block of U.

 stat   (output) SuperLUStat_t*
        Record the statistics about the factorization.
        See SuperLUStat_t structure defined in util.h.

 info   (output) int*
        = 0: successful exit
        < 0: if info = -i, the i-th argument had an illegal value
        > 0: if info = i, U(i,i) is exactly zero. The factorization has
             been completed, but the factor U is exactly singular,
             and division by zero will occur if it is used to solve a
             system of equations.
 

ALWAYS SEND TO ALL OTHERS - TO FIX

static void pzgstrs2 ( int_t  m,
int_t  k,
Glu_persist_t Glu_persist,
gridinfo_t grid,
LocalLU_t Llu,
SuperLUStat_t stat 
) [static]


Generated on Sat Aug 29 04:22:02 2009 for SuperLUDistributed by  doxygen 1.5.9