FLA_SA_LU_unb.c File Reference

(r)


Functions

FLA_Error FLA_SA_LU_unb (FLA_Obj U, FLA_Obj D, FLA_Obj p, FLA_Obj L)

Function Documentation

FLA_Error FLA_SA_LU_unb ( FLA_Obj  U,
FLA_Obj  D,
FLA_Obj  p,
FLA_Obj  L 
)

References cblas_ccopy(), cblas_cgeru(), cblas_cscal(), cblas_cswap(), cblas_dcopy(), cblas_dger(), cblas_dscal(), cblas_dswap(), cblas_icamax(), cblas_idamax(), cblas_isamax(), cblas_izamax(), cblas_scopy(), cblas_sger(), cblas_sscal(), cblas_sswap(), cblas_zcopy(), cblas_zgeru(), cblas_zscal(), cblas_zswap(), CblasColMajor, ccopy(), cgeru(), cscal(), cswap(), dcopy(), dger(), dscal(), dswap(), FLA_Copy_external(), FLA_MINUS_ONE, FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_ldim(), FLA_Obj_length(), FLA_ONE, FLA_Triangularize(), icamax(), idamax(), dcomplex::imag, scomplex::imag, isamax(), izamax(), dcomplex::real, scomplex::real, scopy(), sger(), sscal(), sswap(), zcopy(), zgeru(), zscal(), and zswap().

Referenced by FLA_SA_LU_blk().

00038 {
00039   FLA_Datatype datatype;
00040   int          m_U, ldim_U;
00041   int          m_D, ldim_D;
00042   int               ldim_L;
00043   int          m_U_min_j, m_U_min_j_min_1; 
00044   int          j, ipiv;
00045   int*         buff_p;
00046   int*         buff_1_int;
00047 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00048   CBLAS_ORDER  cblas_order = CblasColMajor;
00049 #endif
00050 
00051   if ( FLA_Obj_has_zero_dim( U ) ) return FLA_SUCCESS;
00052   
00053   datatype = FLA_Obj_datatype( U );
00054 
00055   m_U      = FLA_Obj_length( U );
00056   ldim_U   = FLA_Obj_ldim( U );
00057 
00058   m_D      = FLA_Obj_length( D );
00059   ldim_D   = FLA_Obj_ldim( D );
00060   
00061   ldim_L   = FLA_Obj_ldim( L );
00062 
00063   FLA_Copy_external( U, L );
00064   FLA_Triangularize( FLA_UPPER_TRIANGULAR, FLA_NONUNIT_DIAG, L );
00065 
00066   buff_p     = ( int * ) FLA_INT_PTR( p );
00067   buff_1_int = ( int * ) FLA_INT_PTR( FLA_ONE );
00068 
00069   switch ( datatype ){
00070 
00071   case FLA_FLOAT:
00072   {
00073     float* buff_U      = ( float * ) FLA_FLOAT_PTR( U );
00074     float* buff_D      = ( float * ) FLA_FLOAT_PTR( D );
00075     float* buff_L      = ( float * ) FLA_FLOAT_PTR( L );
00076     float* buff_minus1 = ( float * ) FLA_FLOAT_PTR( FLA_MINUS_ONE );
00077     float  L_tmp;
00078     float  D_tmp;
00079     float  d_inv_Ljj;
00080 
00081     for ( j = 0; j < m_U; ++j )
00082     {
00083 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00084       ipiv = cblas_isamax( m_D, 
00085                            buff_D + j*ldim_D + 0,
00086                            *buff_1_int );
00087 #else
00088       ipiv = FLA_C2F( isamax )( &m_D, 
00089                                 buff_D + j*ldim_D + 0,
00090                                 buff_1_int ) - 1;
00091 #endif
00092 
00093       L_tmp = buff_L[ j*ldim_L + j    ];
00094       D_tmp = buff_D[ j*ldim_D + ipiv ];
00095 
00096       if ( dabs( L_tmp ) < dabs( D_tmp ) )
00097       {
00098 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00099         cblas_sswap( m_U,
00100                      buff_L + 0*ldim_L + j,    ldim_L,
00101                      buff_D + 0*ldim_D + ipiv, ldim_D ); 
00102 #else
00103         FLA_C2F( sswap )( &m_U,
00104                           buff_L + 0*ldim_L + j,    &ldim_L,
00105                           buff_D + 0*ldim_D + ipiv, &ldim_D ); 
00106 #endif
00107 
00108         buff_p[ j ] = ipiv + m_U - j;
00109       }        
00110       else
00111       {
00112         buff_p[ j ] = 0;
00113       }
00114 
00115       d_inv_Ljj = 1.0F / buff_L[ j*ldim_L + j ];
00116 
00117 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00118       cblas_sscal( m_D,
00119                    d_inv_Ljj,
00120                    buff_D + j*ldim_D + 0, *buff_1_int ); 
00121 #else
00122       FLA_C2F( sscal )( &m_D,
00123                         &d_inv_Ljj,
00124                         buff_D + j*ldim_D + 0, buff_1_int ); 
00125 #endif
00126 
00127       m_U_min_j_min_1 = m_U - j - 1;
00128 
00129       if ( m_U_min_j_min_1 > 0  )
00130       {
00131 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00132         cblas_sger( cblas_order,
00133                     m_D, m_U_min_j_min_1,
00134                     *buff_minus1, 
00135                     buff_D +     j*ldim_D + 0, *buff_1_int,
00136                     buff_L + (j+1)*ldim_L + j, ldim_L,
00137                     buff_D + (j+1)*ldim_D + 0, ldim_D );
00138 #else
00139         FLA_C2F( sger )( &m_D, &m_U_min_j_min_1,
00140                          buff_minus1, 
00141                          buff_D +     j*ldim_D + 0, buff_1_int,
00142                          buff_L + (j+1)*ldim_L + j, &ldim_L,
00143                          buff_D + (j+1)*ldim_D + 0, &ldim_D );
00144 #endif
00145       }
00146 
00147       m_U_min_j = m_U - j;
00148 
00149       if ( m_U_min_j > 0 ) 
00150       {
00151 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00152         cblas_scopy( m_U_min_j,
00153                      buff_L + j*ldim_L + j, ldim_L,
00154                      buff_U + j*ldim_U + j, ldim_U );
00155 #else
00156         FLA_C2F( scopy )( &m_U_min_j,
00157                           buff_L + j*ldim_L + j, &ldim_L,
00158                           buff_U + j*ldim_U + j, &ldim_U );
00159 #endif
00160       }
00161     }                 
00162     break;
00163   }
00164 
00165   case FLA_DOUBLE:
00166   {
00167     double* buff_U      = ( double * ) FLA_DOUBLE_PTR( U );
00168     double* buff_D      = ( double * ) FLA_DOUBLE_PTR( D );
00169     double* buff_L      = ( double * ) FLA_DOUBLE_PTR( L );
00170     double* buff_minus1 = ( double * ) FLA_DOUBLE_PTR( FLA_MINUS_ONE );
00171     double  L_tmp;
00172     double  D_tmp;
00173     double  d_inv_Ljj;
00174 
00175     for ( j = 0; j < m_U; ++j )
00176     {
00177 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00178       ipiv = cblas_idamax( m_D, 
00179                            buff_D + j*ldim_D + 0,
00180                            *buff_1_int );
00181 #else
00182       ipiv = FLA_C2F( idamax )( &m_D, 
00183                                 buff_D + j*ldim_D + 0,
00184                                 buff_1_int ) - 1;
00185 #endif
00186 
00187       L_tmp = buff_L[ j*ldim_L + j    ];
00188       D_tmp = buff_D[ j*ldim_D + ipiv ];
00189 
00190       if ( dabs( L_tmp ) < dabs( D_tmp ) )
00191       {
00192 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00193         cblas_dswap( m_U,
00194                      buff_L + 0*ldim_L + j,    ldim_L,
00195                      buff_D + 0*ldim_D + ipiv, ldim_D ); 
00196 #else
00197         FLA_C2F( dswap )( &m_U,
00198                           buff_L + 0*ldim_L + j,    &ldim_L,
00199                           buff_D + 0*ldim_D + ipiv, &ldim_D ); 
00200 #endif
00201 
00202         buff_p[ j ] = ipiv + m_U - j;
00203       }        
00204       else
00205       {
00206         buff_p[ j ] = 0;
00207       }
00208 
00209       d_inv_Ljj = 1.0 / buff_L[ j*ldim_L + j ];
00210 
00211 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00212       cblas_dscal( m_D,
00213                    d_inv_Ljj,
00214                    buff_D + j*ldim_D + 0, *buff_1_int ); 
00215 #else
00216       FLA_C2F( dscal )( &m_D,
00217                         &d_inv_Ljj,
00218                         buff_D + j*ldim_D + 0, buff_1_int ); 
00219 #endif
00220 
00221       m_U_min_j_min_1 = m_U - j - 1;
00222 
00223       if ( m_U_min_j_min_1 > 0  )
00224       {
00225 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00226         cblas_dger( cblas_order,
00227                     m_D, m_U_min_j_min_1,
00228                     *buff_minus1, 
00229                     buff_D +     j*ldim_D + 0, *buff_1_int,
00230                     buff_L + (j+1)*ldim_L + j, ldim_L,
00231                     buff_D + (j+1)*ldim_D + 0, ldim_D );
00232 #else
00233         FLA_C2F( dger )( &m_D, &m_U_min_j_min_1,
00234                          buff_minus1, 
00235                          buff_D +     j*ldim_D + 0, buff_1_int,
00236                          buff_L + (j+1)*ldim_L + j, &ldim_L,
00237                          buff_D + (j+1)*ldim_D + 0, &ldim_D );
00238 #endif
00239       }
00240 
00241       m_U_min_j = m_U - j;
00242 
00243       if ( m_U_min_j > 0 ) 
00244       {
00245 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00246         cblas_dcopy( m_U_min_j,
00247                      buff_L + j*ldim_L + j, ldim_L,
00248                      buff_U + j*ldim_U + j, ldim_U );
00249 #else
00250         FLA_C2F( dcopy )( &m_U_min_j,
00251                           buff_L + j*ldim_L + j, &ldim_L,
00252                           buff_U + j*ldim_U + j, &ldim_U );
00253 #endif
00254       }
00255     }                 
00256     break;
00257   }
00258 
00259   case FLA_COMPLEX:
00260   {
00261     scomplex* buff_U      = ( scomplex * ) FLA_COMPLEX_PTR( U );
00262     scomplex* buff_D      = ( scomplex * ) FLA_COMPLEX_PTR( D );
00263     scomplex* buff_L      = ( scomplex * ) FLA_COMPLEX_PTR( L );
00264     scomplex* buff_minus1 = ( scomplex * ) FLA_COMPLEX_PTR( FLA_MINUS_ONE );
00265     scomplex  L_tmp;
00266     scomplex  D_tmp;
00267     scomplex  d_inv_Ljj;
00268     scomplex  Ljj;
00269     float     temp;
00270 
00271     for ( j = 0; j < m_U; ++j )
00272     {
00273 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00274       ipiv = cblas_icamax( m_D, 
00275                            buff_D + j*ldim_D + 0,
00276                            *buff_1_int );
00277 #else
00278       ipiv = FLA_C2F( icamax )( &m_D, 
00279                                 buff_D + j*ldim_D + 0,
00280                                 buff_1_int ) - 1;
00281 #endif
00282 
00283       L_tmp = buff_L[ j*ldim_L + j    ];
00284       D_tmp = buff_D[ j*ldim_D + ipiv ];
00285 
00286       if ( dabs( L_tmp.real + L_tmp.imag ) < dabs( D_tmp.real + D_tmp.imag ) )
00287       {
00288 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00289         cblas_cswap( m_U,
00290                      buff_L + 0*ldim_L + j,    ldim_L,
00291                      buff_D + 0*ldim_D + ipiv, ldim_D ); 
00292 #else
00293         FLA_C2F( cswap )( &m_U,
00294                           buff_L + 0*ldim_L + j,    &ldim_L,
00295                           buff_D + 0*ldim_D + ipiv, &ldim_D ); 
00296 #endif
00297 
00298         buff_p[ j ] = ipiv + m_U - j;
00299       }        
00300       else
00301       {
00302         buff_p[ j ] = 0;
00303       }
00304 
00305       Ljj = buff_L[ j*ldim_L + j ];
00306 
00307       // d_inv_Ljj = 1.0 / Ljj
00308       temp = 1.0F / ( Ljj.real * Ljj.real +
00309                       Ljj.imag * Ljj.imag );
00310       d_inv_Ljj.real = Ljj.real *  temp;
00311       d_inv_Ljj.imag = Ljj.imag * -temp;
00312 
00313 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00314       cblas_cscal( m_D,
00315                    d_inv_Ljj,
00316                    buff_D + j*ldim_D + 0, *buff_1_int ); 
00317 #else
00318       FLA_C2F( cscal )( &m_D,
00319                         &d_inv_Ljj,
00320                         buff_D + j*ldim_D + 0, buff_1_int ); 
00321 #endif
00322 
00323       m_U_min_j_min_1 = m_U - j - 1;
00324 
00325       if ( m_U_min_j_min_1 > 0  )
00326       {
00327 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00328         cblas_cgeru( cblas_order,
00329                      m_D, m_U_min_j_min_1,
00330                      *buff_minus1, 
00331                      buff_D +     j*ldim_D + 0, *buff_1_int,
00332                      buff_L + (j+1)*ldim_L + j, ldim_L,
00333                      buff_D + (j+1)*ldim_D + 0, ldim_D );
00334 #else
00335         FLA_C2F( cgeru )( &m_D, &m_U_min_j_min_1,
00336                           buff_minus1, 
00337                           buff_D +     j*ldim_D + 0, buff_1_int,
00338                           buff_L + (j+1)*ldim_L + j, &ldim_L,
00339                           buff_D + (j+1)*ldim_D + 0, &ldim_D );
00340 #endif
00341       }
00342 
00343       m_U_min_j = m_U - j;
00344 
00345       if ( m_U_min_j > 0 ) 
00346       {
00347 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00348         cblas_ccopy( m_U_min_j,
00349                      buff_L + j*ldim_L + j, ldim_L,
00350                      buff_U + j*ldim_U + j, ldim_U );
00351 #else
00352         FLA_C2F( ccopy )( &m_U_min_j,
00353                           buff_L + j*ldim_L + j, &ldim_L,
00354                           buff_U + j*ldim_U + j, &ldim_U );
00355 #endif
00356       }
00357     }                 
00358     break;
00359   }
00360 
00361   case FLA_DOUBLE_COMPLEX:
00362   {
00363     dcomplex* buff_U      = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( U );
00364     dcomplex* buff_D      = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( D );
00365     dcomplex* buff_L      = ( dcomplex * ) FLA_COMPLEX_PTR( L );
00366     dcomplex* buff_minus1 = ( dcomplex * ) FLA_COMPLEX_PTR( FLA_MINUS_ONE );
00367     dcomplex  L_tmp;
00368     dcomplex  D_tmp;
00369     dcomplex  d_inv_Ljj;
00370     dcomplex  Ljj;
00371     double    temp;
00372 
00373     for ( j = 0; j < m_U; ++j )
00374     {
00375 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00376       ipiv = cblas_izamax( m_D, 
00377                            buff_D + j*ldim_D + 0,
00378                            *buff_1_int );
00379 #else
00380       ipiv = FLA_C2F( izamax )( &m_D, 
00381                                 buff_D + j*ldim_D + 0,
00382                                 buff_1_int ) - 1;
00383 #endif
00384 
00385       L_tmp = buff_L[ j*ldim_L + j    ];
00386       D_tmp = buff_D[ j*ldim_D + ipiv ];
00387 
00388       if ( dabs( L_tmp.real + L_tmp.imag ) < dabs( D_tmp.real + D_tmp.imag ) )
00389       {
00390 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00391         cblas_zswap( m_U,
00392                      buff_L + 0*ldim_L + j,    ldim_L,
00393                      buff_D + 0*ldim_D + ipiv, ldim_D ); 
00394 #else
00395         FLA_C2F( zswap )( &m_U,
00396                           buff_L + 0*ldim_L + j,    &ldim_L,
00397                           buff_D + 0*ldim_D + ipiv, &ldim_D ); 
00398 #endif
00399 
00400         buff_p[ j ] = ipiv + m_U - j;
00401       }        
00402       else
00403       {
00404         buff_p[ j ] = 0;
00405       }
00406 
00407       Ljj = buff_L[ j*ldim_L + j ];
00408 
00409       // d_inv_Ljj = 1.0 / Ljj
00410       temp = 1.0  / ( Ljj.real * Ljj.real +
00411                       Ljj.imag * Ljj.imag );
00412       d_inv_Ljj.real = Ljj.real *  temp;
00413       d_inv_Ljj.imag = Ljj.imag * -temp;
00414 
00415 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00416       cblas_zscal( m_D,
00417                    d_inv_Ljj,
00418                    buff_D + j*ldim_D + 0, *buff_1_int ); 
00419 #else
00420       FLA_C2F( zscal )( &m_D,
00421                         &d_inv_Ljj,
00422                         buff_D + j*ldim_D + 0, buff_1_int ); 
00423 #endif
00424 
00425       m_U_min_j_min_1 = m_U - j - 1;
00426 
00427       if ( m_U_min_j_min_1 > 0  )
00428       {
00429 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00430         cblas_zgeru( cblas_order,
00431                      m_D, m_U_min_j_min_1,
00432                      *buff_minus1, 
00433                      buff_D +     j*ldim_D + 0, *buff_1_int,
00434                      buff_L + (j+1)*ldim_L + j, ldim_L,
00435                      buff_D + (j+1)*ldim_D + 0, ldim_D );
00436 #else
00437         FLA_C2F( zgeru )( &m_D, &m_U_min_j_min_1,
00438                           buff_minus1, 
00439                           buff_D +     j*ldim_D + 0, buff_1_int,
00440                           buff_L + (j+1)*ldim_L + j, &ldim_L,
00441                           buff_D + (j+1)*ldim_D + 0, &ldim_D );
00442 #endif
00443       }
00444 
00445       m_U_min_j = m_U - j;
00446 
00447       if ( m_U_min_j > 0 ) 
00448       {
00449 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00450         cblas_zcopy( m_U_min_j,
00451                      buff_L + j*ldim_L + j, ldim_L,
00452                      buff_U + j*ldim_U + j, ldim_U );
00453 #else
00454         FLA_C2F( zcopy )( &m_U_min_j,
00455                           buff_L + j*ldim_L + j, &ldim_L,
00456                           buff_U + j*ldim_U + j, &ldim_U );
00457 #endif
00458       }
00459     }                 
00460     break;
00461   }
00462 
00463   }
00464 
00465   return FLA_SUCCESS;
00466 }


Generated on Mon Jul 6 05:45:56 2009 for libflame by  doxygen 1.5.9