FLA_LU_incpiv_aux.h File Reference

(r)

Go to the source code of this file.

Functions

FLA_Error FLA_SA_Apply_pivots (FLA_Obj C, FLA_Obj E, FLA_Obj p)
FLA_Error FLA_SA_LU_blk (FLA_Obj U, FLA_Obj D, FLA_Obj p, FLA_Obj L, dim_t nb_alg)
FLA_Error FLA_SA_LU_unb (FLA_Obj U, FLA_Obj D, FLA_Obj p, FLA_Obj L)
FLA_Error FLA_SA_FS_blk (FLA_Obj L, FLA_Obj D, FLA_Obj p, FLA_Obj C, FLA_Obj E, dim_t nb_alg)
FLA_Error FLASH_LU_incpiv_var1 (FLA_Obj A, FLA_Obj p, FLA_Obj L, dim_t nb_alg)
FLA_Error FLASH_LU_incpiv_var2 (FLA_Obj A, FLA_Obj p, FLA_Obj L, dim_t nb_alg)
FLA_Error FLASH_Trsm_piv (FLA_Obj A, FLA_Obj B, FLA_Obj p)
FLA_Error FLASH_SA_LU (FLA_Obj B, FLA_Obj C, FLA_Obj D, FLA_Obj E, FLA_Obj p, FLA_Obj L, dim_t nb_alg)
FLA_Error FLASH_SA_FS (FLA_Obj L, FLA_Obj D, FLA_Obj p, FLA_Obj C, FLA_Obj E, dim_t nb_alg)
FLA_Error FLASH_FS_incpiv_aux1 (FLA_Obj A, FLA_Obj p, FLA_Obj L, FLA_Obj b, dim_t nb_alg)
FLA_Error FLASH_FS_incpiv_aux2 (FLA_Obj L, FLA_Obj D, FLA_Obj p, FLA_Obj C, FLA_Obj E, dim_t nb_alg)


Function Documentation

FLA_Error FLA_SA_Apply_pivots ( FLA_Obj  C,
FLA_Obj  E,
FLA_Obj  p 
)

References cblas_cswap(), cblas_dswap(), cblas_sswap(), cblas_zswap(), cswap(), dswap(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_ldim(), FLA_Obj_length(), FLA_Obj_width(), sswap(), and zswap().

Referenced by FLA_SA_FS_blk(), and FLA_SA_LU_blk().

00036 {
00037   FLA_Datatype datatype;
00038   int          m_C, n_C, ldim_C;
00039   int          m_E,      ldim_E;
00040   int          m_p;
00041   int          i;
00042   int*         buff_p;
00043 
00044   if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS;
00045 
00046   datatype = FLA_Obj_datatype( C );
00047 
00048   m_C    = FLA_Obj_length( C );
00049   n_C    = FLA_Obj_width( C );
00050   ldim_C = FLA_Obj_ldim( C );
00051 
00052   m_E    = FLA_Obj_length( E );
00053   ldim_E = FLA_Obj_ldim( E );
00054 
00055   m_p    = FLA_Obj_length( p );
00056   
00057   buff_p = ( int * ) FLA_INT_PTR( p );
00058 
00059 
00060   switch ( datatype ){
00061 
00062   case FLA_FLOAT:
00063   {
00064     float* buff_C = ( float * ) FLA_FLOAT_PTR( C );
00065     float* buff_E = ( float * ) FLA_FLOAT_PTR( E );
00066 
00067     for ( i = 0; i < m_p; ++i )
00068     {
00069       if ( buff_p[ i ] != 0 ) 
00070 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00071         cblas_sswap( n_C, 
00072                      buff_C + 0*ldim_C + i,                         ldim_C, 
00073                      buff_E + 0*ldim_E + buff_p[ i ] - ( m_C - i ), ldim_E );
00074 #else
00075         FLA_C2F( sswap ) ( &n_C, 
00076                            buff_C + 0*ldim_C + i,                         &ldim_C, 
00077                            buff_E + 0*ldim_E + buff_p[ i ] - ( m_C - i ), &ldim_E );
00078 #endif
00079     }
00080     break;
00081   }
00082 
00083   case FLA_DOUBLE:
00084   {
00085     double* buff_C = ( double * ) FLA_DOUBLE_PTR( C );
00086     double* buff_E = ( double * ) FLA_DOUBLE_PTR( E );
00087 
00088     for ( i = 0; i < m_p; ++i )
00089     {
00090       if ( buff_p[ i ] != 0 ) 
00091 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00092         cblas_dswap( n_C, 
00093                      buff_C + 0*ldim_C + i,                         ldim_C, 
00094                      buff_E + 0*ldim_E + buff_p[ i ] - ( m_C - i ), ldim_E );
00095 #else
00096         FLA_C2F( dswap ) ( &n_C, 
00097                            buff_C + 0*ldim_C + i,                         &ldim_C, 
00098                            buff_E + 0*ldim_E + buff_p[ i ] - ( m_C - i ), &ldim_E );
00099 #endif
00100     }
00101     break;
00102   }
00103 
00104   case FLA_COMPLEX:
00105   {
00106     scomplex* buff_C = ( scomplex * ) FLA_COMPLEX_PTR( C );
00107     scomplex* buff_E = ( scomplex * ) FLA_COMPLEX_PTR( E );
00108 
00109     for ( i = 0; i < m_p; ++i )
00110     {
00111       if ( buff_p[ i ] != 0 ) 
00112 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00113         cblas_cswap( n_C, 
00114                      buff_C + 0*ldim_C + i,                         ldim_C, 
00115                      buff_E + 0*ldim_E + buff_p[ i ] - ( m_C - i ), ldim_E );
00116 #else
00117         FLA_C2F( cswap ) ( &n_C, 
00118                            buff_C + 0*ldim_C + i,                         &ldim_C, 
00119                            buff_E + 0*ldim_E + buff_p[ i ] - ( m_C - i ), &ldim_E );
00120 #endif
00121     }
00122     break;
00123   }
00124 
00125   case FLA_DOUBLE_COMPLEX:
00126   {
00127     dcomplex* buff_C = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( C );
00128     dcomplex* buff_E = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( E );
00129 
00130     for ( i = 0; i < m_p; ++i )
00131     {
00132       if ( buff_p[ i ] != 0 ) 
00133 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00134         cblas_zswap( n_C, 
00135                      buff_C + 0*ldim_C + i,                         ldim_C, 
00136                      buff_E + 0*ldim_E + buff_p[ i ] - ( m_C - i ), ldim_E );
00137 #else
00138         FLA_C2F( zswap ) ( &n_C, 
00139                            buff_C + 0*ldim_C + i,                         &ldim_C, 
00140                            buff_E + 0*ldim_E + buff_p[ i ] - ( m_C - i ), &ldim_E );
00141 #endif
00142     }
00143     break;
00144   }
00145 
00146   }
00147 
00148   return FLA_SUCCESS;
00149 }

FLA_Error FLA_SA_FS_blk ( FLA_Obj  L,
FLA_Obj  D,
FLA_Obj  p,
FLA_Obj  C,
FLA_Obj  E,
dim_t  nb_alg 
)

References FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Gemm_external(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_SA_Apply_pivots(), and FLA_Trsm_external().

Referenced by FLA_SA_FS_task(), and FLASH_FS_incpiv_aux2().

00038 {
00039   FLA_Obj LT,              L0,
00040           LB,              L1,
00041                            L2;
00042 
00043   FLA_Obj DL,    DR,       D0,  D1,  D2;
00044 
00045   FLA_Obj pT,              p0,
00046           pB,              p1,
00047                            p2;
00048 
00049   FLA_Obj CT,              C0,
00050           CB,              C1,
00051                            C2;
00052 
00053   FLA_Obj L1_sqr, L1_rest;
00054 
00055   dim_t b;
00056 
00057   FLA_Part_2x1( L,    &LT, 
00058                       &LB,            0, FLA_TOP );
00059 
00060   FLA_Part_1x2( D,    &DL,  &DR,      0, FLA_LEFT );
00061 
00062   FLA_Part_2x1( p,    &pT, 
00063                       &pB,            0, FLA_TOP );
00064 
00065   FLA_Part_2x1( C,    &CT, 
00066                       &CB,            0, FLA_TOP );
00067 
00068   while ( FLA_Obj_length( LT ) < FLA_Obj_length( L ) )
00069   {
00070     b = min( FLA_Obj_length( LB ), nb_alg );
00071 
00072     FLA_Repart_2x1_to_3x1( LT,                &L0, 
00073                         /* ** */            /* ** */
00074                                               &L1, 
00075                            LB,                &L2,        b, FLA_BOTTOM );
00076 
00077     FLA_Repart_1x2_to_1x3( DL,  /**/ DR,      &D0, /**/ &D1, &D2,
00078                            b, FLA_RIGHT );
00079 
00080     FLA_Repart_2x1_to_3x1( pT,                &p0, 
00081                         /* ** */            /* ** */
00082                                               &p1, 
00083                            pB,                &p2,        b, FLA_BOTTOM );
00084 
00085     FLA_Repart_2x1_to_3x1( CT,                &C0, 
00086                         /* ** */            /* ** */
00087                                               &C1, 
00088                            CB,                &C2,        b, FLA_BOTTOM );
00089 
00090     /*------------------------------------------------------------*/
00091 
00092     FLA_Part_1x2( L1,    &L1_sqr, &L1_rest,      b, FLA_LEFT );
00093 
00094 
00095     FLA_SA_Apply_pivots( C1,
00096                          E, p1 );
00097 
00098     FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR,
00099                        FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
00100                        FLA_ONE, L1_sqr, C1 );
00101 
00102     FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 
00103                        FLA_MINUS_ONE, D1, C1, FLA_ONE, E );
00104 
00105     /*------------------------------------------------------------*/
00106 
00107     FLA_Cont_with_3x1_to_2x1( &LT,                L0, 
00108                                                   L1, 
00109                             /* ** */           /* ** */
00110                               &LB,                L2,     FLA_TOP );
00111 
00112     FLA_Cont_with_1x3_to_1x2( &DL,  /**/ &DR,     D0, D1, /**/ D2,
00113                               FLA_LEFT );
00114 
00115     FLA_Cont_with_3x1_to_2x1( &pT,                p0, 
00116                                                   p1, 
00117                             /* ** */           /* ** */
00118                               &pB,                p2,     FLA_TOP );
00119 
00120     FLA_Cont_with_3x1_to_2x1( &CT,                C0, 
00121                                                   C1, 
00122                             /* ** */           /* ** */
00123                               &CB,                C2,     FLA_TOP );
00124   }
00125 
00126   return FLA_SUCCESS;
00127 }

FLA_Error FLA_SA_LU_blk ( FLA_Obj  U,
FLA_Obj  D,
FLA_Obj  p,
FLA_Obj  L,
dim_t  nb_alg 
)

References FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Gemm_external(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_SA_Apply_pivots(), FLA_SA_LU_unb(), and FLA_Trsm_external().

Referenced by FLA_SA_LU_task().

00037 {
00038   FLA_Obj UTL,   UTR,      U00, U01, U02, 
00039           UBL,   UBR,      U10, U11, U12,
00040                            U20, U21, U22;
00041 
00042   FLA_Obj DL,    DR,       D0,  D1,  D2;
00043 
00044   FLA_Obj pT,              p0,
00045           pB,              p1,
00046                            p2;
00047 
00048   FLA_Obj LT,              L0,
00049           LB,              L1,
00050                            L2;
00051 
00052   FLA_Obj L1_sqr, L1_rest;
00053 
00054   dim_t b;
00055 
00056   FLA_Part_2x2( U,    &UTL, &UTR,
00057                       &UBL, &UBR,     0, 0, FLA_TL );
00058 
00059   FLA_Part_1x2( D,    &DL,  &DR,      0, FLA_LEFT );
00060 
00061   FLA_Part_2x1( p,    &pT, 
00062                       &pB,            0, FLA_TOP );
00063 
00064   FLA_Part_2x1( L,    &LT, 
00065                       &LB,            0, FLA_TOP );
00066 
00067   while ( FLA_Obj_length( UTL ) < FLA_Obj_length( U ) )
00068   {
00069     b = min( FLA_Obj_length( UBR ), nb_alg );
00070 
00071     FLA_Repart_2x2_to_3x3( UTL, /**/ UTR,       &U00, /**/ &U01, &U02,
00072                         /* ************* */   /* ******************** */
00073                                                 &U10, /**/ &U11, &U12,
00074                            UBL, /**/ UBR,       &U20, /**/ &U21, &U22,
00075                            b, b, FLA_BR );
00076 
00077     FLA_Repart_1x2_to_1x3( DL,  /**/ DR,        &D0, /**/ &D1, &D2,
00078                            b, FLA_RIGHT );
00079 
00080     FLA_Repart_2x1_to_3x1( pT,                  &p0, 
00081                         /* ** */              /* ** */
00082                                                 &p1, 
00083                            pB,                  &p2,        b, FLA_BOTTOM );
00084 
00085     FLA_Repart_2x1_to_3x1( LT,                  &L0, 
00086                         /* ** */              /* ** */
00087                                                 &L1, 
00088                            LB,                  &L2,        b, FLA_BOTTOM );
00089 
00090     /*------------------------------------------------------------*/
00091 
00092     FLA_Part_1x2( L1,    &L1_sqr, &L1_rest,      b, FLA_LEFT );
00093 
00094 
00095     FLA_SA_LU_unb( U11,
00096                    D1, p1, L1_sqr );
00097 
00098     FLA_SA_Apply_pivots( U12,
00099                          D2, p1 );
00100 
00101     FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR,
00102                        FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
00103                        FLA_ONE, L1_sqr, U12 );
00104 
00105     FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 
00106                        FLA_MINUS_ONE, D1, U12, FLA_ONE, D2 );
00107 
00108     /*------------------------------------------------------------*/
00109 
00110     FLA_Cont_with_3x3_to_2x2( &UTL, /**/ &UTR,       U00, U01, /**/ U02,
00111                                                      U10, U11, /**/ U12,
00112                             /* ************** */  /* ****************** */
00113                               &UBL, /**/ &UBR,       U20, U21, /**/ U22,
00114                               FLA_TL );
00115 
00116     FLA_Cont_with_1x3_to_1x2( &DL,  /**/ &DR,        D0, D1, /**/ D2,
00117                               FLA_LEFT );
00118 
00119     FLA_Cont_with_3x1_to_2x1( &pT,                   p0, 
00120                                                      p1, 
00121                             /* ** */              /* ** */
00122                               &pB,                   p2,     FLA_TOP );
00123 
00124     FLA_Cont_with_3x1_to_2x1( &LT,                   L0, 
00125                                                      L1, 
00126                             /* ** */              /* ** */
00127                               &LB,                   L2,     FLA_TOP );
00128   }
00129 
00130   return FLA_SUCCESS;
00131 }

FLA_Error FLA_SA_LU_unb ( FLA_Obj  U,
FLA_Obj  D,
FLA_Obj  p,
FLA_Obj  L 
)

References cblas_ccopy(), cblas_cgeru(), cblas_cscal(), cblas_cswap(), cblas_dcopy(), cblas_dger(), cblas_dscal(), cblas_dswap(), cblas_icamax(), cblas_idamax(), cblas_isamax(), cblas_izamax(), cblas_scopy(), cblas_sger(), cblas_sscal(), cblas_sswap(), cblas_zcopy(), cblas_zgeru(), cblas_zscal(), cblas_zswap(), CblasColMajor, ccopy(), cgeru(), cscal(), cswap(), dcopy(), dger(), dscal(), dswap(), FLA_Copy_external(), FLA_MINUS_ONE, FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_ldim(), FLA_Obj_length(), FLA_ONE, FLA_Triangularize(), icamax(), idamax(), dcomplex::imag, scomplex::imag, isamax(), izamax(), dcomplex::real, scomplex::real, scopy(), sger(), sscal(), sswap(), zcopy(), zgeru(), zscal(), and zswap().

Referenced by FLA_SA_LU_blk().

00038 {
00039   FLA_Datatype datatype;
00040   int          m_U, ldim_U;
00041   int          m_D, ldim_D;
00042   int               ldim_L;
00043   int          m_U_min_j, m_U_min_j_min_1; 
00044   int          j, ipiv;
00045   int*         buff_p;
00046   int*         buff_1_int;
00047 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00048   CBLAS_ORDER  cblas_order = CblasColMajor;
00049 #endif
00050 
00051   if ( FLA_Obj_has_zero_dim( U ) ) return FLA_SUCCESS;
00052   
00053   datatype = FLA_Obj_datatype( U );
00054 
00055   m_U      = FLA_Obj_length( U );
00056   ldim_U   = FLA_Obj_ldim( U );
00057 
00058   m_D      = FLA_Obj_length( D );
00059   ldim_D   = FLA_Obj_ldim( D );
00060   
00061   ldim_L   = FLA_Obj_ldim( L );
00062 
00063   FLA_Copy_external( U, L );
00064   FLA_Triangularize( FLA_UPPER_TRIANGULAR, FLA_NONUNIT_DIAG, L );
00065 
00066   buff_p     = ( int * ) FLA_INT_PTR( p );
00067   buff_1_int = ( int * ) FLA_INT_PTR( FLA_ONE );
00068 
00069   switch ( datatype ){
00070 
00071   case FLA_FLOAT:
00072   {
00073     float* buff_U      = ( float * ) FLA_FLOAT_PTR( U );
00074     float* buff_D      = ( float * ) FLA_FLOAT_PTR( D );
00075     float* buff_L      = ( float * ) FLA_FLOAT_PTR( L );
00076     float* buff_minus1 = ( float * ) FLA_FLOAT_PTR( FLA_MINUS_ONE );
00077     float  L_tmp;
00078     float  D_tmp;
00079     float  d_inv_Ljj;
00080 
00081     for ( j = 0; j < m_U; ++j )
00082     {
00083 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00084       ipiv = cblas_isamax( m_D, 
00085                            buff_D + j*ldim_D + 0,
00086                            *buff_1_int );
00087 #else
00088       ipiv = FLA_C2F( isamax )( &m_D, 
00089                                 buff_D + j*ldim_D + 0,
00090                                 buff_1_int ) - 1;
00091 #endif
00092 
00093       L_tmp = buff_L[ j*ldim_L + j    ];
00094       D_tmp = buff_D[ j*ldim_D + ipiv ];
00095 
00096       if ( dabs( L_tmp ) < dabs( D_tmp ) )
00097       {
00098 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00099         cblas_sswap( m_U,
00100                      buff_L + 0*ldim_L + j,    ldim_L,
00101                      buff_D + 0*ldim_D + ipiv, ldim_D ); 
00102 #else
00103         FLA_C2F( sswap )( &m_U,
00104                           buff_L + 0*ldim_L + j,    &ldim_L,
00105                           buff_D + 0*ldim_D + ipiv, &ldim_D ); 
00106 #endif
00107 
00108         buff_p[ j ] = ipiv + m_U - j;
00109       }        
00110       else
00111       {
00112         buff_p[ j ] = 0;
00113       }
00114 
00115       d_inv_Ljj = 1.0F / buff_L[ j*ldim_L + j ];
00116 
00117 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00118       cblas_sscal( m_D,
00119                    d_inv_Ljj,
00120                    buff_D + j*ldim_D + 0, *buff_1_int ); 
00121 #else
00122       FLA_C2F( sscal )( &m_D,
00123                         &d_inv_Ljj,
00124                         buff_D + j*ldim_D + 0, buff_1_int ); 
00125 #endif
00126 
00127       m_U_min_j_min_1 = m_U - j - 1;
00128 
00129       if ( m_U_min_j_min_1 > 0  )
00130       {
00131 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00132         cblas_sger( cblas_order,
00133                     m_D, m_U_min_j_min_1,
00134                     *buff_minus1, 
00135                     buff_D +     j*ldim_D + 0, *buff_1_int,
00136                     buff_L + (j+1)*ldim_L + j, ldim_L,
00137                     buff_D + (j+1)*ldim_D + 0, ldim_D );
00138 #else
00139         FLA_C2F( sger )( &m_D, &m_U_min_j_min_1,
00140                          buff_minus1, 
00141                          buff_D +     j*ldim_D + 0, buff_1_int,
00142                          buff_L + (j+1)*ldim_L + j, &ldim_L,
00143                          buff_D + (j+1)*ldim_D + 0, &ldim_D );
00144 #endif
00145       }
00146 
00147       m_U_min_j = m_U - j;
00148 
00149       if ( m_U_min_j > 0 ) 
00150       {
00151 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00152         cblas_scopy( m_U_min_j,
00153                      buff_L + j*ldim_L + j, ldim_L,
00154                      buff_U + j*ldim_U + j, ldim_U );
00155 #else
00156         FLA_C2F( scopy )( &m_U_min_j,
00157                           buff_L + j*ldim_L + j, &ldim_L,
00158                           buff_U + j*ldim_U + j, &ldim_U );
00159 #endif
00160       }
00161     }                 
00162     break;
00163   }
00164 
00165   case FLA_DOUBLE:
00166   {
00167     double* buff_U      = ( double * ) FLA_DOUBLE_PTR( U );
00168     double* buff_D      = ( double * ) FLA_DOUBLE_PTR( D );
00169     double* buff_L      = ( double * ) FLA_DOUBLE_PTR( L );
00170     double* buff_minus1 = ( double * ) FLA_DOUBLE_PTR( FLA_MINUS_ONE );
00171     double  L_tmp;
00172     double  D_tmp;
00173     double  d_inv_Ljj;
00174 
00175     for ( j = 0; j < m_U; ++j )
00176     {
00177 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00178       ipiv = cblas_idamax( m_D, 
00179                            buff_D + j*ldim_D + 0,
00180                            *buff_1_int );
00181 #else
00182       ipiv = FLA_C2F( idamax )( &m_D, 
00183                                 buff_D + j*ldim_D + 0,
00184                                 buff_1_int ) - 1;
00185 #endif
00186 
00187       L_tmp = buff_L[ j*ldim_L + j    ];
00188       D_tmp = buff_D[ j*ldim_D + ipiv ];
00189 
00190       if ( dabs( L_tmp ) < dabs( D_tmp ) )
00191       {
00192 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00193         cblas_dswap( m_U,
00194                      buff_L + 0*ldim_L + j,    ldim_L,
00195                      buff_D + 0*ldim_D + ipiv, ldim_D ); 
00196 #else
00197         FLA_C2F( dswap )( &m_U,
00198                           buff_L + 0*ldim_L + j,    &ldim_L,
00199                           buff_D + 0*ldim_D + ipiv, &ldim_D ); 
00200 #endif
00201 
00202         buff_p[ j ] = ipiv + m_U - j;
00203       }        
00204       else
00205       {
00206         buff_p[ j ] = 0;
00207       }
00208 
00209       d_inv_Ljj = 1.0 / buff_L[ j*ldim_L + j ];
00210 
00211 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00212       cblas_dscal( m_D,
00213                    d_inv_Ljj,
00214                    buff_D + j*ldim_D + 0, *buff_1_int ); 
00215 #else
00216       FLA_C2F( dscal )( &m_D,
00217                         &d_inv_Ljj,
00218                         buff_D + j*ldim_D + 0, buff_1_int ); 
00219 #endif
00220 
00221       m_U_min_j_min_1 = m_U - j - 1;
00222 
00223       if ( m_U_min_j_min_1 > 0  )
00224       {
00225 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00226         cblas_dger( cblas_order,
00227                     m_D, m_U_min_j_min_1,
00228                     *buff_minus1, 
00229                     buff_D +     j*ldim_D + 0, *buff_1_int,
00230                     buff_L + (j+1)*ldim_L + j, ldim_L,
00231                     buff_D + (j+1)*ldim_D + 0, ldim_D );
00232 #else
00233         FLA_C2F( dger )( &m_D, &m_U_min_j_min_1,
00234                          buff_minus1, 
00235                          buff_D +     j*ldim_D + 0, buff_1_int,
00236                          buff_L + (j+1)*ldim_L + j, &ldim_L,
00237                          buff_D + (j+1)*ldim_D + 0, &ldim_D );
00238 #endif
00239       }
00240 
00241       m_U_min_j = m_U - j;
00242 
00243       if ( m_U_min_j > 0 ) 
00244       {
00245 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00246         cblas_dcopy( m_U_min_j,
00247                      buff_L + j*ldim_L + j, ldim_L,
00248                      buff_U + j*ldim_U + j, ldim_U );
00249 #else
00250         FLA_C2F( dcopy )( &m_U_min_j,
00251                           buff_L + j*ldim_L + j, &ldim_L,
00252                           buff_U + j*ldim_U + j, &ldim_U );
00253 #endif
00254       }
00255     }                 
00256     break;
00257   }
00258 
00259   case FLA_COMPLEX:
00260   {
00261     scomplex* buff_U      = ( scomplex * ) FLA_COMPLEX_PTR( U );
00262     scomplex* buff_D      = ( scomplex * ) FLA_COMPLEX_PTR( D );
00263     scomplex* buff_L      = ( scomplex * ) FLA_COMPLEX_PTR( L );
00264     scomplex* buff_minus1 = ( scomplex * ) FLA_COMPLEX_PTR( FLA_MINUS_ONE );
00265     scomplex  L_tmp;
00266     scomplex  D_tmp;
00267     scomplex  d_inv_Ljj;
00268     scomplex  Ljj;
00269     float     temp;
00270 
00271     for ( j = 0; j < m_U; ++j )
00272     {
00273 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00274       ipiv = cblas_icamax( m_D, 
00275                            buff_D + j*ldim_D + 0,
00276                            *buff_1_int );
00277 #else
00278       ipiv = FLA_C2F( icamax )( &m_D, 
00279                                 buff_D + j*ldim_D + 0,
00280                                 buff_1_int ) - 1;
00281 #endif
00282 
00283       L_tmp = buff_L[ j*ldim_L + j    ];
00284       D_tmp = buff_D[ j*ldim_D + ipiv ];
00285 
00286       if ( dabs( L_tmp.real + L_tmp.imag ) < dabs( D_tmp.real + D_tmp.imag ) )
00287       {
00288 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00289         cblas_cswap( m_U,
00290                      buff_L + 0*ldim_L + j,    ldim_L,
00291                      buff_D + 0*ldim_D + ipiv, ldim_D ); 
00292 #else
00293         FLA_C2F( cswap )( &m_U,
00294                           buff_L + 0*ldim_L + j,    &ldim_L,
00295                           buff_D + 0*ldim_D + ipiv, &ldim_D ); 
00296 #endif
00297 
00298         buff_p[ j ] = ipiv + m_U - j;
00299       }        
00300       else
00301       {
00302         buff_p[ j ] = 0;
00303       }
00304 
00305       Ljj = buff_L[ j*ldim_L + j ];
00306 
00307       // d_inv_Ljj = 1.0 / Ljj
00308       temp = 1.0F / ( Ljj.real * Ljj.real +
00309                       Ljj.imag * Ljj.imag );
00310       d_inv_Ljj.real = Ljj.real *  temp;
00311       d_inv_Ljj.imag = Ljj.imag * -temp;
00312 
00313 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00314       cblas_cscal( m_D,
00315                    d_inv_Ljj,
00316                    buff_D + j*ldim_D + 0, *buff_1_int ); 
00317 #else
00318       FLA_C2F( cscal )( &m_D,
00319                         &d_inv_Ljj,
00320                         buff_D + j*ldim_D + 0, buff_1_int ); 
00321 #endif
00322 
00323       m_U_min_j_min_1 = m_U - j - 1;
00324 
00325       if ( m_U_min_j_min_1 > 0  )
00326       {
00327 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00328         cblas_cgeru( cblas_order,
00329                      m_D, m_U_min_j_min_1,
00330                      *buff_minus1, 
00331                      buff_D +     j*ldim_D + 0, *buff_1_int,
00332                      buff_L + (j+1)*ldim_L + j, ldim_L,
00333                      buff_D + (j+1)*ldim_D + 0, ldim_D );
00334 #else
00335         FLA_C2F( cgeru )( &m_D, &m_U_min_j_min_1,
00336                           buff_minus1, 
00337                           buff_D +     j*ldim_D + 0, buff_1_int,
00338                           buff_L + (j+1)*ldim_L + j, &ldim_L,
00339                           buff_D + (j+1)*ldim_D + 0, &ldim_D );
00340 #endif
00341       }
00342 
00343       m_U_min_j = m_U - j;
00344 
00345       if ( m_U_min_j > 0 ) 
00346       {
00347 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00348         cblas_ccopy( m_U_min_j,
00349                      buff_L + j*ldim_L + j, ldim_L,
00350                      buff_U + j*ldim_U + j, ldim_U );
00351 #else
00352         FLA_C2F( ccopy )( &m_U_min_j,
00353                           buff_L + j*ldim_L + j, &ldim_L,
00354                           buff_U + j*ldim_U + j, &ldim_U );
00355 #endif
00356       }
00357     }                 
00358     break;
00359   }
00360 
00361   case FLA_DOUBLE_COMPLEX:
00362   {
00363     dcomplex* buff_U      = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( U );
00364     dcomplex* buff_D      = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( D );
00365     dcomplex* buff_L      = ( dcomplex * ) FLA_COMPLEX_PTR( L );
00366     dcomplex* buff_minus1 = ( dcomplex * ) FLA_COMPLEX_PTR( FLA_MINUS_ONE );
00367     dcomplex  L_tmp;
00368     dcomplex  D_tmp;
00369     dcomplex  d_inv_Ljj;
00370     dcomplex  Ljj;
00371     double    temp;
00372 
00373     for ( j = 0; j < m_U; ++j )
00374     {
00375 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00376       ipiv = cblas_izamax( m_D, 
00377                            buff_D + j*ldim_D + 0,
00378                            *buff_1_int );
00379 #else
00380       ipiv = FLA_C2F( izamax )( &m_D, 
00381                                 buff_D + j*ldim_D + 0,
00382                                 buff_1_int ) - 1;
00383 #endif
00384 
00385       L_tmp = buff_L[ j*ldim_L + j    ];
00386       D_tmp = buff_D[ j*ldim_D + ipiv ];
00387 
00388       if ( dabs( L_tmp.real + L_tmp.imag ) < dabs( D_tmp.real + D_tmp.imag ) )
00389       {
00390 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00391         cblas_zswap( m_U,
00392                      buff_L + 0*ldim_L + j,    ldim_L,
00393                      buff_D + 0*ldim_D + ipiv, ldim_D ); 
00394 #else
00395         FLA_C2F( zswap )( &m_U,
00396                           buff_L + 0*ldim_L + j,    &ldim_L,
00397                           buff_D + 0*ldim_D + ipiv, &ldim_D ); 
00398 #endif
00399 
00400         buff_p[ j ] = ipiv + m_U - j;
00401       }        
00402       else
00403       {
00404         buff_p[ j ] = 0;
00405       }
00406 
00407       Ljj = buff_L[ j*ldim_L + j ];
00408 
00409       // d_inv_Ljj = 1.0 / Ljj
00410       temp = 1.0  / ( Ljj.real * Ljj.real +
00411                       Ljj.imag * Ljj.imag );
00412       d_inv_Ljj.real = Ljj.real *  temp;
00413       d_inv_Ljj.imag = Ljj.imag * -temp;
00414 
00415 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00416       cblas_zscal( m_D,
00417                    d_inv_Ljj,
00418                    buff_D + j*ldim_D + 0, *buff_1_int ); 
00419 #else
00420       FLA_C2F( zscal )( &m_D,
00421                         &d_inv_Ljj,
00422                         buff_D + j*ldim_D + 0, buff_1_int ); 
00423 #endif
00424 
00425       m_U_min_j_min_1 = m_U - j - 1;
00426 
00427       if ( m_U_min_j_min_1 > 0  )
00428       {
00429 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00430         cblas_zgeru( cblas_order,
00431                      m_D, m_U_min_j_min_1,
00432                      *buff_minus1, 
00433                      buff_D +     j*ldim_D + 0, *buff_1_int,
00434                      buff_L + (j+1)*ldim_L + j, ldim_L,
00435                      buff_D + (j+1)*ldim_D + 0, ldim_D );
00436 #else
00437         FLA_C2F( zgeru )( &m_D, &m_U_min_j_min_1,
00438                           buff_minus1, 
00439                           buff_D +     j*ldim_D + 0, buff_1_int,
00440                           buff_L + (j+1)*ldim_L + j, &ldim_L,
00441                           buff_D + (j+1)*ldim_D + 0, &ldim_D );
00442 #endif
00443       }
00444 
00445       m_U_min_j = m_U - j;
00446 
00447       if ( m_U_min_j > 0 ) 
00448       {
00449 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00450         cblas_zcopy( m_U_min_j,
00451                      buff_L + j*ldim_L + j, ldim_L,
00452                      buff_U + j*ldim_U + j, ldim_U );
00453 #else
00454         FLA_C2F( zcopy )( &m_U_min_j,
00455                           buff_L + j*ldim_L + j, &ldim_L,
00456                           buff_U + j*ldim_U + j, &ldim_U );
00457 #endif
00458       }
00459     }                 
00460     break;
00461   }
00462 
00463   }
00464 
00465   return FLA_SUCCESS;
00466 }

FLA_Error FLASH_FS_incpiv_aux1 ( FLA_Obj  A,
FLA_Obj  p,
FLA_Obj  L,
FLA_Obj  b,
dim_t  nb_alg 
)

References FLA_Apply_pivots(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Obj_length(), FLA_Obj_width(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Trsv_external(), and FLASH_FS_incpiv_aux2().

Referenced by FLASH_FS_incpiv().

00036 {
00037    FLA_Obj ATL,   ATR,      A00, A01, A02,
00038            ABL,   ABR,      A10, A11, A12,
00039                             A20, A21, A22;
00040 
00041    FLA_Obj pTL,   pTR,      p00, p01, p02,
00042            pBL,   pBR,      p10, p11, p12,
00043                             p20, p21, p22;
00044 
00045    FLA_Obj LTL,   LTR,      L00, L01, L02,
00046            LBL,   LBR,      L10, L11, L12,
00047                             L20, L21, L22;
00048 
00049    FLA_Obj bT,              b0,
00050            bB,              b1,
00051                             b2;
00052 
00053    FLA_Obj p11_conf,
00054            p11_rest;
00055 
00056    FLA_Part_2x2( A,    &ATL, &ATR,
00057                        &ABL, &ABR,     0, 0, FLA_TL );
00058 
00059    FLA_Part_2x2( p,    &pTL, &pTR,
00060                        &pBL, &pBR,     0, 0, FLA_TL );
00061 
00062    FLA_Part_2x2( L,    &LTL, &LTR,
00063                        &LBL, &LBR,     0, 0, FLA_TL );
00064 
00065    FLA_Part_2x1( b,    &bT,
00066                        &bB,            0, FLA_TOP );
00067 
00068    while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) &&
00069            FLA_Obj_width ( ATL ) < FLA_Obj_width ( A ) )
00070    {
00071       FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00, /**/ &A01, &A02,
00072                           /* ************* */   /* ******************** */
00073                                                   &A10, /**/ &A11, &A12,
00074                              ABL, /**/ ABR,       &A20, /**/ &A21, &A22,
00075                              1, 1, FLA_BR );
00076 
00077       FLA_Repart_2x2_to_3x3( pTL, /**/ pTR,       &p00, /**/ &p01, &p02,
00078                           /* ************* */   /* ******************** */
00079                                                   &p10, /**/ &p11, &p12,
00080                              pBL, /**/ pBR,       &p20, /**/ &p21, &p22,
00081                              1, 1, FLA_BR );
00082 
00083       FLA_Repart_2x2_to_3x3( LTL, /**/ LTR,       &L00, /**/ &L01, &L02,
00084                           /* ************* */   /* ******************** */
00085                                                   &L10, /**/ &L11, &L12,
00086                              LBL, /**/ LBR,       &L20, /**/ &L21, &L22,
00087                              1, 1, FLA_BR );
00088 
00089       FLA_Repart_2x1_to_3x1( bT,                  &b0,
00090                           /* ** */              /* ** */
00091                                                   &b1,
00092                              bB,                  &b2,        1, FLA_BOTTOM );
00093 
00094       /*------------------------------------------------------------*/
00095 
00096       FLA_Part_2x1( *FLASH_OBJ_PTR_AT( p11 ),   &p11_conf,
00097                                                 &p11_rest,
00098                     FLA_Obj_length( *FLASH_OBJ_PTR_AT( b1 ) ), FLA_TOP );
00099 
00100 
00101       FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE,
00102                         p11_conf,
00103                         *FLASH_OBJ_PTR_AT( b1 ) );
00104 
00105       FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
00106                          *FLASH_OBJ_PTR_AT( A11 ),
00107                          *FLASH_OBJ_PTR_AT( b1 ) );
00108 
00109       FLASH_FS_incpiv_aux2( L21,
00110                             A21, p21, b1,
00111                                       b2, nb_alg );
00112 
00113       /*------------------------------------------------------------*/
00114 
00115       FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00, A01, /**/ A02,
00116                                                        A10, A11, /**/ A12,
00117                              /* ************** */  /* ****************** */
00118                                 &ABL, /**/ &ABR,       A20, A21, /**/ A22,
00119                                 FLA_TL );
00120 
00121       FLA_Cont_with_3x3_to_2x2( &pTL, /**/ &pTR,       p00, p01, /**/ p02,
00122                                                        p10, p11, /**/ p12,
00123                              /* ************** */  /* ****************** */
00124                                 &pBL, /**/ &pBR,       p20, p21, /**/ p22,
00125                                 FLA_TL );
00126 
00127       FLA_Cont_with_3x3_to_2x2( &LTL, /**/ &LTR,       L00, L01, /**/ L02,
00128                                                        L10, L11, /**/ L12,
00129                              /* ************** */  /* ****************** */
00130                                 &LBL, /**/ &LBR,       L20, L21, /**/ L22,
00131                                 FLA_TL );
00132 
00133       FLA_Cont_with_3x1_to_2x1( &bT,                   b0,
00134                                                        b1,
00135                               /* ** */              /* ** */
00136                                 &bB,                   b2,     FLA_TOP );
00137    }
00138    
00139    return FLA_SUCCESS;
00140 }

FLA_Error FLASH_FS_incpiv_aux2 ( FLA_Obj  L,
FLA_Obj  D,
FLA_Obj  p,
FLA_Obj  C,
FLA_Obj  E,
dim_t  nb_alg 
)

References FLA_Cont_with_3x1_to_2x1(), FLA_Obj_length(), FLA_Part_2x1(), FLA_Repart_2x1_to_3x1(), and FLA_SA_FS_blk().

Referenced by FLASH_FS_incpiv_aux1().

00038 {
00039    FLA_Obj LT,              L0,
00040            LB,              L1,
00041                             L2;
00042 
00043    FLA_Obj DT,              D0,
00044            DB,              D1,
00045                             D2;
00046 
00047    FLA_Obj pT,              p0,
00048            pB,              p1,
00049                             p2;
00050 
00051    FLA_Obj ET,              E0,
00052            EB,              E1,
00053                             E2;
00054 
00055    FLA_Part_2x1( L,    &LT,
00056                        &LB,            0, FLA_TOP );
00057 
00058    FLA_Part_2x1( D,    &DT,
00059                        &DB,            0, FLA_TOP );
00060 
00061    FLA_Part_2x1( p,    &pT,
00062                        &pB,            0, FLA_TOP );
00063 
00064    FLA_Part_2x1( E,    &ET,
00065                        &EB,            0, FLA_TOP );
00066 
00067    while ( FLA_Obj_length( DT ) < FLA_Obj_length( D ) )
00068    {
00069       FLA_Repart_2x1_to_3x1( LT,                &L0,
00070                           /* ** */            /* ** */
00071                                                 &L1,
00072                              LB,                &L2,        1, FLA_BOTTOM );
00073 
00074       FLA_Repart_2x1_to_3x1( DT,                &D0,
00075                           /* ** */            /* ** */
00076                                                 &D1,
00077                              DB,                &D2,        1, FLA_BOTTOM );
00078 
00079       FLA_Repart_2x1_to_3x1( pT,                &p0,
00080                           /* ** */            /* ** */
00081                                                 &p1,
00082                              pB,                &p2,        1, FLA_BOTTOM );
00083 
00084       FLA_Repart_2x1_to_3x1( ET,                &E0,
00085                           /* ** */            /* ** */
00086                                                 &E1,
00087                              EB,                &E2,        1, FLA_BOTTOM );
00088 
00089       /*------------------------------------------------------------*/
00090       
00091       FLA_SA_FS_blk( *FLASH_OBJ_PTR_AT( L1 ),
00092                      *FLASH_OBJ_PTR_AT( D1 ),
00093                      *FLASH_OBJ_PTR_AT( p1 ),
00094                      *FLASH_OBJ_PTR_AT( C ),
00095                      *FLASH_OBJ_PTR_AT( E1 ),
00096                      nb_alg );
00097       
00098       /*------------------------------------------------------------*/
00099 
00100       FLA_Cont_with_3x1_to_2x1( &LT,                L0,
00101                                                     L1,
00102                               /* ** */           /* ** */
00103                                 &LB,                L2,     FLA_TOP );
00104 
00105       FLA_Cont_with_3x1_to_2x1( &DT,                D0,
00106                                                     D1,
00107                               /* ** */           /* ** */
00108                                 &DB,                D2,     FLA_TOP );
00109 
00110       FLA_Cont_with_3x1_to_2x1( &pT,                p0,
00111                                                     p1,
00112                               /* ** */           /* ** */
00113                                 &pB,                p2,     FLA_TOP );
00114 
00115       FLA_Cont_with_3x1_to_2x1( &ET,                E0,
00116                                                     E1,
00117                               /* ** */           /* ** */
00118                                 &EB,                E2,     FLA_TOP );
00119    }
00120    
00121    return FLA_SUCCESS;
00122 }

FLA_Error FLASH_LU_incpiv_var1 ( FLA_Obj  A,
FLA_Obj  p,
FLA_Obj  L,
dim_t  nb_alg 
)

References FLA_Cont_with_3x3_to_2x2(), FLA_LU_piv_task(), FLA_Obj_length(), FLA_Obj_width(), FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), FLASH_Queue_get_enabled(), FLASH_SA_LU(), and FLASH_Trsm_piv().

00036 {
00037    FLA_Obj ATL,   ATR,      A00, A01, A02,
00038            ABL,   ABR,      A10, A11, A12,
00039                             A20, A21, A22;
00040 
00041    FLA_Obj pTL,   pTR,      p00, p01, p02,
00042            pBL,   pBR,      p10, p11, p12,
00043                             p20, p21, p22;
00044 
00045    FLA_Obj LTL,   LTR,      L00, L01, L02,
00046            LBL,   LBR,      L10, L11, L12,
00047                             L20, L21, L22;
00048 
00049    FLA_Part_2x2( A,    &ATL, &ATR,
00050                        &ABL, &ABR,     0, 0, FLA_TL );
00051 
00052    FLA_Part_2x2( p,    &pTL, &pTR,
00053                        &pBL, &pBR,     0, 0, FLA_TL );
00054 
00055    FLA_Part_2x2( L,    &LTL, &LTR,
00056                        &LBL, &LBR,     0, 0, FLA_TL );
00057 
00058    while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) &&
00059            FLA_Obj_width ( ATL ) < FLA_Obj_width ( A ) )
00060    {
00061       FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00, /**/ &A01, &A02,
00062                           /* ************* */   /* ******************** */
00063                                                   &A10, /**/ &A11, &A12,
00064                              ABL, /**/ ABR,       &A20, /**/ &A21, &A22,
00065                              1, 1, FLA_BR );
00066 
00067       FLA_Repart_2x2_to_3x3( pTL, /**/ pTR,       &p00, /**/ &p01, &p02,
00068                           /* ************* */   /* ******************** */
00069                                                   &p10, /**/ &p11, &p12,
00070                              pBL, /**/ pBR,       &p20, /**/ &p21, &p22,
00071                              1, 1, FLA_BR );
00072 
00073       FLA_Repart_2x2_to_3x3( LTL, /**/ LTR,       &L00, /**/ &L01, &L02,
00074                           /* ************* */   /* ******************** */
00075                                                   &L10, /**/ &L11, &L12,
00076                              LBL, /**/ LBR,       &L20, /**/ &L21, &L22,
00077                              1, 1, FLA_BR );
00078 
00079       /*------------------------------------------------------------*/
00080 
00081       if ( FLASH_Queue_get_enabled( ) )
00082       {
00083          // Enqueue
00084          ENQUEUE_FLASH_LU_piv( *FLASH_OBJ_PTR_AT( A11 ),
00085                                *FLASH_OBJ_PTR_AT( p11 ),
00086                                NULL );
00087       }
00088       else
00089       {
00090          // Execute leaf
00091          FLA_LU_piv_task( *FLASH_OBJ_PTR_AT( A11 ), 
00092                           *FLASH_OBJ_PTR_AT( p11 ),
00093                           NULL );
00094       }
00095 
00096       FLASH_Trsm_piv( *FLASH_OBJ_PTR_AT( A11 ), A12, p11 );
00097 
00098       FLASH_SA_LU( A11, A12, 
00099                    A21, A22, p21, L21, nb_alg );
00100 
00101       /*------------------------------------------------------------*/
00102 
00103       FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00, A01, /**/ A02,
00104                                                        A10, A11, /**/ A12,
00105                              /* ************** */  /* ****************** */
00106                                 &ABL, /**/ &ABR,       A20, A21, /**/ A22,
00107                                 FLA_TL );
00108 
00109       FLA_Cont_with_3x3_to_2x2( &pTL, /**/ &pTR,       p00, p01, /**/ p02,
00110                                                        p10, p11, /**/ p12,
00111                              /* ************** */  /* ****************** */
00112                                 &pBL, /**/ &pBR,       p20, p21, /**/ p22,
00113                                 FLA_TL );
00114 
00115       FLA_Cont_with_3x3_to_2x2( &LTL, /**/ &LTR,       L00, L01, /**/ L02,
00116                                                        L10, L11, /**/ L12,
00117                              /* ************** */  /* ****************** */
00118                                 &LBL, /**/ &LBR,       L20, L21, /**/ L22,
00119                                 FLA_TL );
00120    }
00121    
00122    return FLA_SUCCESS;
00123 }

FLA_Error FLASH_LU_incpiv_var2 ( FLA_Obj  A,
FLA_Obj  p,
FLA_Obj  L,
dim_t  nb_alg 
)

References FLA_Cont_with_3x3_to_2x2(), FLA_LU_piv_copy_task(), FLA_Obj_create_conf_to(), FLA_Obj_free_task(), FLA_Obj_length(), FLA_Obj_width(), FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), FLASH_Queue_get_enabled(), FLASH_SA_LU(), and FLASH_Trsm_piv().

Referenced by FLASH_LU_incpiv().

00036 {
00037    FLA_Obj ATL,   ATR,      A00, A01, A02,
00038            ABL,   ABR,      A10, A11, A12,
00039                             A20, A21, A22;
00040 
00041    FLA_Obj pTL,   pTR,      p00, p01, p02,
00042            pBL,   pBR,      p10, p11, p12,
00043                             p20, p21, p22;
00044 
00045    FLA_Obj LTL,   LTR,      L00, L01, L02,
00046            LBL,   LBR,      L10, L11, L12,
00047                             L20, L21, L22;
00048 
00049    FLA_Obj U;
00050 
00051    FLA_Part_2x2( A,    &ATL, &ATR,
00052                        &ABL, &ABR,     0, 0, FLA_TL );
00053 
00054    FLA_Part_2x2( p,    &pTL, &pTR,
00055                        &pBL, &pBR,     0, 0, FLA_TL );
00056 
00057    FLA_Part_2x2( L,    &LTL, &LTR,
00058                        &LBL, &LBR,     0, 0, FLA_TL );
00059 
00060    while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) &&
00061            FLA_Obj_width ( ATL ) < FLA_Obj_width ( A ) )
00062    {
00063       FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00, /**/ &A01, &A02,
00064                           /* ************* */   /* ******************** */
00065                                                   &A10, /**/ &A11, &A12,
00066                              ABL, /**/ ABR,       &A20, /**/ &A21, &A22,
00067                              1, 1, FLA_BR );
00068 
00069       FLA_Repart_2x2_to_3x3( pTL, /**/ pTR,       &p00, /**/ &p01, &p02,
00070                           /* ************* */   /* ******************** */
00071                                                   &p10, /**/ &p11, &p12,
00072                              pBL, /**/ pBR,       &p20, /**/ &p21, &p22,
00073                              1, 1, FLA_BR );
00074 
00075       FLA_Repart_2x2_to_3x3( LTL, /**/ LTR,       &L00, /**/ &L01, &L02,
00076                           /* ************* */   /* ******************** */
00077                                                   &L10, /**/ &L11, &L12,
00078                              LBL, /**/ LBR,       &L20, /**/ &L21, &L22,
00079                              1, 1, FLA_BR );
00080 
00081       /*------------------------------------------------------------*/    
00082 
00083       FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, *FLASH_OBJ_PTR_AT( A11 ), &U );
00084 
00085 
00086       if ( FLASH_Queue_get_enabled( ) )
00087       {
00088          // Enqueue
00089          ENQUEUE_FLASH_LU_piv_copy( *FLASH_OBJ_PTR_AT( A11 ),
00090                                     *FLASH_OBJ_PTR_AT( p11 ),
00091                                     U, NULL );
00092       }
00093       else
00094       {
00095          // Execute leaf
00096          FLA_LU_piv_copy_task( *FLASH_OBJ_PTR_AT( A11 ),
00097                                *FLASH_OBJ_PTR_AT( p11 ),
00098                                U, NULL );
00099       }
00100 
00101       FLASH_Trsm_piv( U, A12, p11 );
00102 
00103       if ( FLASH_Queue_get_enabled( ) )
00104       {
00105          // Enqueue
00106          ENQUEUE_FLASH_Obj_free( U, NULL );
00107       }
00108       else
00109       {
00110          // Execute leaf
00111          FLA_Obj_free_task( U, NULL );
00112       }
00113 
00114       FLASH_SA_LU( A11, A12, 
00115                    A21, A22, p21, L21, nb_alg );
00116 
00117       /*------------------------------------------------------------*/
00118 
00119       FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00, A01, /**/ A02,
00120                                                        A10, A11, /**/ A12,
00121                              /* ************** */  /* ****************** */
00122                                 &ABL, /**/ &ABR,       A20, A21, /**/ A22,
00123                                 FLA_TL );
00124 
00125       FLA_Cont_with_3x3_to_2x2( &pTL, /**/ &pTR,       p00, p01, /**/ p02,
00126                                                        p10, p11, /**/ p12,
00127                              /* ************** */  /* ****************** */
00128                                 &pBL, /**/ &pBR,       p20, p21, /**/ p22,
00129                                 FLA_TL );
00130 
00131       FLA_Cont_with_3x3_to_2x2( &LTL, /**/ &LTR,       L00, L01, /**/ L02,
00132                                                        L10, L11, /**/ L12,
00133                              /* ************** */  /* ****************** */
00134                                 &LBL, /**/ &LBR,       L20, L21, /**/ L22,
00135                                 FLA_TL );
00136    }
00137    
00138    return FLA_SUCCESS;
00139 }

FLA_Error FLASH_SA_FS ( FLA_Obj  L,
FLA_Obj  D,
FLA_Obj  p,
FLA_Obj  C,
FLA_Obj  E,
dim_t  nb_alg 
)

References FLA_Cont_with_1x3_to_1x2(), FLA_Obj_width(), FLA_Part_1x2(), FLA_Repart_1x2_to_1x3(), FLA_SA_FS_task(), and FLASH_Queue_get_enabled().

Referenced by FLASH_SA_LU().

00038 {
00039    FLA_Obj CL,    CR,       C0,  C1,  C2;
00040 
00041    FLA_Obj EL,    ER,       E0,  E1,  E2;
00042 
00043    FLA_Part_1x2( C,    &CL,  &CR,      0, FLA_LEFT );
00044 
00045    FLA_Part_1x2( E,    &EL,  &ER,      0, FLA_LEFT );
00046 
00047    while ( FLA_Obj_width( CL ) < FLA_Obj_width( C ) )
00048    {
00049       FLA_Repart_1x2_to_1x3( CL,  /**/ CR,        &C0, /**/ &C1, &C2,
00050                              1, FLA_RIGHT );
00051 
00052       FLA_Repart_1x2_to_1x3( EL,  /**/ ER,        &E0, /**/ &E1, &E2,
00053                              1, FLA_RIGHT );
00054 
00055       /*------------------------------------------------------------*/
00056 
00057       if ( FLASH_Queue_get_enabled( ) )
00058       {
00059          // Enqueue
00060          ENQUEUE_FLASH_SA_FS( *FLASH_OBJ_PTR_AT( L ),
00061                               *FLASH_OBJ_PTR_AT( D ),
00062                               *FLASH_OBJ_PTR_AT( p ),
00063                               *FLASH_OBJ_PTR_AT( C1 ),
00064                               *FLASH_OBJ_PTR_AT( E1 ),
00065                               nb_alg,
00066                               NULL );
00067       }
00068       else
00069       {
00070          // Execute leaf
00071          FLA_SA_FS_task( *FLASH_OBJ_PTR_AT( L ),
00072                          *FLASH_OBJ_PTR_AT( D ),
00073                          *FLASH_OBJ_PTR_AT( p ),
00074                          *FLASH_OBJ_PTR_AT( C1 ),
00075                          *FLASH_OBJ_PTR_AT( E1 ),
00076                          nb_alg,
00077                          NULL );
00078       }
00079       
00080       /*------------------------------------------------------------*/
00081 
00082       FLA_Cont_with_1x3_to_1x2( &CL,  /**/ &CR,        C0, C1, /**/ C2,
00083                                 FLA_LEFT );
00084 
00085       FLA_Cont_with_1x3_to_1x2( &EL,  /**/ &ER,        E0, E1, /**/ E2,
00086                                 FLA_LEFT );
00087    }
00088    
00089    return FLA_SUCCESS;
00090 }

FLA_Error FLASH_SA_LU ( FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  D,
FLA_Obj  E,
FLA_Obj  p,
FLA_Obj  L,
dim_t  nb_alg 
)

References FLA_Cont_with_3x1_to_2x1(), FLA_Obj_length(), FLA_Part_2x1(), FLA_Repart_2x1_to_3x1(), FLA_SA_LU_task(), FLASH_Queue_get_enabled(), and FLASH_SA_FS().

Referenced by FLASH_LU_incpiv_var1(), and FLASH_LU_incpiv_var2().

00037 {
00038    FLA_Obj DT,              D0,
00039            DB,              D1,
00040                             D2;
00041 
00042    FLA_Obj ET,              E0,
00043            EB,              E1,
00044                             E2;
00045 
00046    FLA_Obj pT,              p0,
00047            pB,              p1,
00048                             p2;
00049 
00050    FLA_Obj LT,              L0,
00051            LB,              L1,
00052                             L2;
00053 
00054    FLA_Part_2x1( D,    &DT,
00055                        &DB,            0, FLA_TOP );
00056 
00057    FLA_Part_2x1( E,    &ET,
00058                        &EB,            0, FLA_TOP );
00059 
00060    FLA_Part_2x1( p,    &pT,
00061                        &pB,            0, FLA_TOP );
00062 
00063    FLA_Part_2x1( L,    &LT,
00064                        &LB,            0, FLA_TOP );
00065 
00066    while ( FLA_Obj_length( DT ) < FLA_Obj_length( D ) )
00067    {
00068       FLA_Repart_2x1_to_3x1( DT,                &D0,
00069                           /* ** */            /* ** */
00070                                                 &D1,
00071                              DB,                &D2,        1, FLA_BOTTOM );
00072 
00073       FLA_Repart_2x1_to_3x1( ET,                &E0,
00074                           /* ** */            /* ** */
00075                                                 &E1,
00076                              EB,                &E2,        1, FLA_BOTTOM );
00077 
00078       FLA_Repart_2x1_to_3x1( pT,                &p0,
00079                           /* ** */            /* ** */
00080                                                 &p1,
00081                              pB,                &p2,        1, FLA_BOTTOM );
00082 
00083       FLA_Repart_2x1_to_3x1( LT,                &L0,
00084                           /* ** */            /* ** */
00085                                                 &L1,
00086                              LB,                &L2,        1, FLA_BOTTOM );
00087 
00088       /*------------------------------------------------------------*/
00089 
00090       if ( FLASH_Queue_get_enabled( ) )
00091       {
00092          // Enqueue
00093          ENQUEUE_FLASH_SA_LU( *FLASH_OBJ_PTR_AT( B ),
00094                               *FLASH_OBJ_PTR_AT( D1 ),
00095                               *FLASH_OBJ_PTR_AT( p1 ),
00096                               *FLASH_OBJ_PTR_AT( L1 ),
00097                               nb_alg,
00098                               NULL );
00099       }
00100       else
00101       {
00102          // Execute leaf
00103          FLA_SA_LU_task( *FLASH_OBJ_PTR_AT( B ),
00104                          *FLASH_OBJ_PTR_AT( D1 ),
00105                          *FLASH_OBJ_PTR_AT( p1 ),
00106                          *FLASH_OBJ_PTR_AT( L1 ),
00107                          nb_alg,
00108                          NULL );
00109       }
00110       
00111       FLASH_SA_FS( L1,
00112                    D1, p1, C,
00113                            E1, nb_alg );
00114 
00115       /*------------------------------------------------------------*/
00116 
00117       FLA_Cont_with_3x1_to_2x1( &DT,                D0,
00118                                                     D1,
00119                               /* ** */           /* ** */
00120                                 &DB,                D2,     FLA_TOP );
00121 
00122       FLA_Cont_with_3x1_to_2x1( &ET,                E0,
00123                                                     E1,
00124                               /* ** */           /* ** */
00125                                 &EB,                E2,     FLA_TOP );
00126 
00127       FLA_Cont_with_3x1_to_2x1( &pT,                p0,
00128                                                     p1,
00129                               /* ** */           /* ** */
00130                                 &pB,                p2,     FLA_TOP );
00131 
00132       FLA_Cont_with_3x1_to_2x1( &LT,                L0,
00133                                                     L1,
00134                               /* ** */           /* ** */
00135                                 &LB,                L2,     FLA_TOP );
00136    }
00137    
00138    return FLA_SUCCESS;
00139 }

FLA_Error FLASH_Trsm_piv ( FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  p 
)

References FLA_Cont_with_1x3_to_1x2(), FLA_Obj_width(), FLA_Part_1x2(), FLA_Repart_1x2_to_1x3(), FLA_Trsm_piv_task(), and FLASH_Queue_get_enabled().

Referenced by FLASH_LU_incpiv_var1(), and FLASH_LU_incpiv_var2().

00036 {
00037    FLA_Obj BL,    BR,       B0,  B1,  B2;
00038 
00039    FLA_Part_1x2( B,    &BL,  &BR,      0, FLA_LEFT );
00040 
00041    while ( FLA_Obj_width( BL ) < FLA_Obj_width( B ) )
00042    {
00043       FLA_Repart_1x2_to_1x3( BL,  /**/ BR,        &B0, /**/ &B1, &B2,
00044                              1, FLA_RIGHT );
00045 
00046       /*------------------------------------------------------------*/
00047 
00048       if ( FLASH_Queue_get_enabled( ) )
00049       {
00050          // Enqueue
00051          ENQUEUE_FLASH_Trsm_piv( A,
00052                                  *FLASH_OBJ_PTR_AT( B1 ),
00053                                  *FLASH_OBJ_PTR_AT( p ),
00054                                  NULL );
00055       }
00056       else
00057       {
00058          // Execute leaf
00059          FLA_Trsm_piv_task( A,
00060                             *FLASH_OBJ_PTR_AT( B1 ),
00061                             *FLASH_OBJ_PTR_AT( p ),
00062                             NULL );
00063       }
00064 
00065       /*------------------------------------------------------------*/
00066 
00067       FLA_Cont_with_1x3_to_1x2( &BL,  /**/ &BR,        B0, B1, /**/ B2,
00068                                 FLA_LEFT );
00069    }
00070    
00071    return FLA_SUCCESS;
00072 }


Generated on Mon Jul 6 05:45:56 2009 for libflame by  doxygen 1.5.9