Go to the source code of this file.
Functions | |
FLA_Error | FLA_SA_Apply_pivots (FLA_Obj C, FLA_Obj E, FLA_Obj p) |
FLA_Error | FLA_SA_LU_blk (FLA_Obj U, FLA_Obj D, FLA_Obj p, FLA_Obj L, dim_t nb_alg) |
FLA_Error | FLA_SA_LU_unb (FLA_Obj U, FLA_Obj D, FLA_Obj p, FLA_Obj L) |
FLA_Error | FLA_SA_FS_blk (FLA_Obj L, FLA_Obj D, FLA_Obj p, FLA_Obj C, FLA_Obj E, dim_t nb_alg) |
FLA_Error | FLASH_LU_incpiv_var1 (FLA_Obj A, FLA_Obj p, FLA_Obj L, dim_t nb_alg) |
FLA_Error | FLASH_LU_incpiv_var2 (FLA_Obj A, FLA_Obj p, FLA_Obj L, dim_t nb_alg) |
FLA_Error | FLASH_Trsm_piv (FLA_Obj A, FLA_Obj B, FLA_Obj p) |
FLA_Error | FLASH_SA_LU (FLA_Obj B, FLA_Obj C, FLA_Obj D, FLA_Obj E, FLA_Obj p, FLA_Obj L, dim_t nb_alg) |
FLA_Error | FLASH_SA_FS (FLA_Obj L, FLA_Obj D, FLA_Obj p, FLA_Obj C, FLA_Obj E, dim_t nb_alg) |
FLA_Error | FLASH_FS_incpiv_aux1 (FLA_Obj A, FLA_Obj p, FLA_Obj L, FLA_Obj b, dim_t nb_alg) |
FLA_Error | FLASH_FS_incpiv_aux2 (FLA_Obj L, FLA_Obj D, FLA_Obj p, FLA_Obj C, FLA_Obj E, dim_t nb_alg) |
References cblas_cswap(), cblas_dswap(), cblas_sswap(), cblas_zswap(), cswap(), dswap(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_ldim(), FLA_Obj_length(), FLA_Obj_width(), sswap(), and zswap().
Referenced by FLA_SA_FS_blk(), and FLA_SA_LU_blk().
00036 { 00037 FLA_Datatype datatype; 00038 int m_C, n_C, ldim_C; 00039 int m_E, ldim_E; 00040 int m_p; 00041 int i; 00042 int* buff_p; 00043 00044 if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS; 00045 00046 datatype = FLA_Obj_datatype( C ); 00047 00048 m_C = FLA_Obj_length( C ); 00049 n_C = FLA_Obj_width( C ); 00050 ldim_C = FLA_Obj_ldim( C ); 00051 00052 m_E = FLA_Obj_length( E ); 00053 ldim_E = FLA_Obj_ldim( E ); 00054 00055 m_p = FLA_Obj_length( p ); 00056 00057 buff_p = ( int * ) FLA_INT_PTR( p ); 00058 00059 00060 switch ( datatype ){ 00061 00062 case FLA_FLOAT: 00063 { 00064 float* buff_C = ( float * ) FLA_FLOAT_PTR( C ); 00065 float* buff_E = ( float * ) FLA_FLOAT_PTR( E ); 00066 00067 for ( i = 0; i < m_p; ++i ) 00068 { 00069 if ( buff_p[ i ] != 0 ) 00070 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00071 cblas_sswap( n_C, 00072 buff_C + 0*ldim_C + i, ldim_C, 00073 buff_E + 0*ldim_E + buff_p[ i ] - ( m_C - i ), ldim_E ); 00074 #else 00075 FLA_C2F( sswap ) ( &n_C, 00076 buff_C + 0*ldim_C + i, &ldim_C, 00077 buff_E + 0*ldim_E + buff_p[ i ] - ( m_C - i ), &ldim_E ); 00078 #endif 00079 } 00080 break; 00081 } 00082 00083 case FLA_DOUBLE: 00084 { 00085 double* buff_C = ( double * ) FLA_DOUBLE_PTR( C ); 00086 double* buff_E = ( double * ) FLA_DOUBLE_PTR( E ); 00087 00088 for ( i = 0; i < m_p; ++i ) 00089 { 00090 if ( buff_p[ i ] != 0 ) 00091 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00092 cblas_dswap( n_C, 00093 buff_C + 0*ldim_C + i, ldim_C, 00094 buff_E + 0*ldim_E + buff_p[ i ] - ( m_C - i ), ldim_E ); 00095 #else 00096 FLA_C2F( dswap ) ( &n_C, 00097 buff_C + 0*ldim_C + i, &ldim_C, 00098 buff_E + 0*ldim_E + buff_p[ i ] - ( m_C - i ), &ldim_E ); 00099 #endif 00100 } 00101 break; 00102 } 00103 00104 case FLA_COMPLEX: 00105 { 00106 scomplex* buff_C = ( scomplex * ) FLA_COMPLEX_PTR( C ); 00107 scomplex* buff_E = ( scomplex * ) FLA_COMPLEX_PTR( E ); 00108 00109 for ( i = 0; i < m_p; ++i ) 00110 { 00111 if ( buff_p[ i ] != 0 ) 00112 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00113 cblas_cswap( n_C, 00114 buff_C + 0*ldim_C + i, ldim_C, 00115 buff_E + 0*ldim_E + buff_p[ i ] - ( m_C - i ), ldim_E ); 00116 #else 00117 FLA_C2F( cswap ) ( &n_C, 00118 buff_C + 0*ldim_C + i, &ldim_C, 00119 buff_E + 0*ldim_E + buff_p[ i ] - ( m_C - i ), &ldim_E ); 00120 #endif 00121 } 00122 break; 00123 } 00124 00125 case FLA_DOUBLE_COMPLEX: 00126 { 00127 dcomplex* buff_C = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( C ); 00128 dcomplex* buff_E = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( E ); 00129 00130 for ( i = 0; i < m_p; ++i ) 00131 { 00132 if ( buff_p[ i ] != 0 ) 00133 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00134 cblas_zswap( n_C, 00135 buff_C + 0*ldim_C + i, ldim_C, 00136 buff_E + 0*ldim_E + buff_p[ i ] - ( m_C - i ), ldim_E ); 00137 #else 00138 FLA_C2F( zswap ) ( &n_C, 00139 buff_C + 0*ldim_C + i, &ldim_C, 00140 buff_E + 0*ldim_E + buff_p[ i ] - ( m_C - i ), &ldim_E ); 00141 #endif 00142 } 00143 break; 00144 } 00145 00146 } 00147 00148 return FLA_SUCCESS; 00149 }
References FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Gemm_external(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_SA_Apply_pivots(), and FLA_Trsm_external().
Referenced by FLA_SA_FS_task(), and FLASH_FS_incpiv_aux2().
00038 { 00039 FLA_Obj LT, L0, 00040 LB, L1, 00041 L2; 00042 00043 FLA_Obj DL, DR, D0, D1, D2; 00044 00045 FLA_Obj pT, p0, 00046 pB, p1, 00047 p2; 00048 00049 FLA_Obj CT, C0, 00050 CB, C1, 00051 C2; 00052 00053 FLA_Obj L1_sqr, L1_rest; 00054 00055 dim_t b; 00056 00057 FLA_Part_2x1( L, <, 00058 &LB, 0, FLA_TOP ); 00059 00060 FLA_Part_1x2( D, &DL, &DR, 0, FLA_LEFT ); 00061 00062 FLA_Part_2x1( p, &pT, 00063 &pB, 0, FLA_TOP ); 00064 00065 FLA_Part_2x1( C, &CT, 00066 &CB, 0, FLA_TOP ); 00067 00068 while ( FLA_Obj_length( LT ) < FLA_Obj_length( L ) ) 00069 { 00070 b = min( FLA_Obj_length( LB ), nb_alg ); 00071 00072 FLA_Repart_2x1_to_3x1( LT, &L0, 00073 /* ** */ /* ** */ 00074 &L1, 00075 LB, &L2, b, FLA_BOTTOM ); 00076 00077 FLA_Repart_1x2_to_1x3( DL, /**/ DR, &D0, /**/ &D1, &D2, 00078 b, FLA_RIGHT ); 00079 00080 FLA_Repart_2x1_to_3x1( pT, &p0, 00081 /* ** */ /* ** */ 00082 &p1, 00083 pB, &p2, b, FLA_BOTTOM ); 00084 00085 FLA_Repart_2x1_to_3x1( CT, &C0, 00086 /* ** */ /* ** */ 00087 &C1, 00088 CB, &C2, b, FLA_BOTTOM ); 00089 00090 /*------------------------------------------------------------*/ 00091 00092 FLA_Part_1x2( L1, &L1_sqr, &L1_rest, b, FLA_LEFT ); 00093 00094 00095 FLA_SA_Apply_pivots( C1, 00096 E, p1 ); 00097 00098 FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR, 00099 FLA_NO_TRANSPOSE, FLA_UNIT_DIAG, 00100 FLA_ONE, L1_sqr, C1 ); 00101 00102 FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00103 FLA_MINUS_ONE, D1, C1, FLA_ONE, E ); 00104 00105 /*------------------------------------------------------------*/ 00106 00107 FLA_Cont_with_3x1_to_2x1( <, L0, 00108 L1, 00109 /* ** */ /* ** */ 00110 &LB, L2, FLA_TOP ); 00111 00112 FLA_Cont_with_1x3_to_1x2( &DL, /**/ &DR, D0, D1, /**/ D2, 00113 FLA_LEFT ); 00114 00115 FLA_Cont_with_3x1_to_2x1( &pT, p0, 00116 p1, 00117 /* ** */ /* ** */ 00118 &pB, p2, FLA_TOP ); 00119 00120 FLA_Cont_with_3x1_to_2x1( &CT, C0, 00121 C1, 00122 /* ** */ /* ** */ 00123 &CB, C2, FLA_TOP ); 00124 } 00125 00126 return FLA_SUCCESS; 00127 }
References FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Gemm_external(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_SA_Apply_pivots(), FLA_SA_LU_unb(), and FLA_Trsm_external().
Referenced by FLA_SA_LU_task().
00037 { 00038 FLA_Obj UTL, UTR, U00, U01, U02, 00039 UBL, UBR, U10, U11, U12, 00040 U20, U21, U22; 00041 00042 FLA_Obj DL, DR, D0, D1, D2; 00043 00044 FLA_Obj pT, p0, 00045 pB, p1, 00046 p2; 00047 00048 FLA_Obj LT, L0, 00049 LB, L1, 00050 L2; 00051 00052 FLA_Obj L1_sqr, L1_rest; 00053 00054 dim_t b; 00055 00056 FLA_Part_2x2( U, &UTL, &UTR, 00057 &UBL, &UBR, 0, 0, FLA_TL ); 00058 00059 FLA_Part_1x2( D, &DL, &DR, 0, FLA_LEFT ); 00060 00061 FLA_Part_2x1( p, &pT, 00062 &pB, 0, FLA_TOP ); 00063 00064 FLA_Part_2x1( L, <, 00065 &LB, 0, FLA_TOP ); 00066 00067 while ( FLA_Obj_length( UTL ) < FLA_Obj_length( U ) ) 00068 { 00069 b = min( FLA_Obj_length( UBR ), nb_alg ); 00070 00071 FLA_Repart_2x2_to_3x3( UTL, /**/ UTR, &U00, /**/ &U01, &U02, 00072 /* ************* */ /* ******************** */ 00073 &U10, /**/ &U11, &U12, 00074 UBL, /**/ UBR, &U20, /**/ &U21, &U22, 00075 b, b, FLA_BR ); 00076 00077 FLA_Repart_1x2_to_1x3( DL, /**/ DR, &D0, /**/ &D1, &D2, 00078 b, FLA_RIGHT ); 00079 00080 FLA_Repart_2x1_to_3x1( pT, &p0, 00081 /* ** */ /* ** */ 00082 &p1, 00083 pB, &p2, b, FLA_BOTTOM ); 00084 00085 FLA_Repart_2x1_to_3x1( LT, &L0, 00086 /* ** */ /* ** */ 00087 &L1, 00088 LB, &L2, b, FLA_BOTTOM ); 00089 00090 /*------------------------------------------------------------*/ 00091 00092 FLA_Part_1x2( L1, &L1_sqr, &L1_rest, b, FLA_LEFT ); 00093 00094 00095 FLA_SA_LU_unb( U11, 00096 D1, p1, L1_sqr ); 00097 00098 FLA_SA_Apply_pivots( U12, 00099 D2, p1 ); 00100 00101 FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR, 00102 FLA_NO_TRANSPOSE, FLA_UNIT_DIAG, 00103 FLA_ONE, L1_sqr, U12 ); 00104 00105 FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00106 FLA_MINUS_ONE, D1, U12, FLA_ONE, D2 ); 00107 00108 /*------------------------------------------------------------*/ 00109 00110 FLA_Cont_with_3x3_to_2x2( &UTL, /**/ &UTR, U00, U01, /**/ U02, 00111 U10, U11, /**/ U12, 00112 /* ************** */ /* ****************** */ 00113 &UBL, /**/ &UBR, U20, U21, /**/ U22, 00114 FLA_TL ); 00115 00116 FLA_Cont_with_1x3_to_1x2( &DL, /**/ &DR, D0, D1, /**/ D2, 00117 FLA_LEFT ); 00118 00119 FLA_Cont_with_3x1_to_2x1( &pT, p0, 00120 p1, 00121 /* ** */ /* ** */ 00122 &pB, p2, FLA_TOP ); 00123 00124 FLA_Cont_with_3x1_to_2x1( <, L0, 00125 L1, 00126 /* ** */ /* ** */ 00127 &LB, L2, FLA_TOP ); 00128 } 00129 00130 return FLA_SUCCESS; 00131 }
References cblas_ccopy(), cblas_cgeru(), cblas_cscal(), cblas_cswap(), cblas_dcopy(), cblas_dger(), cblas_dscal(), cblas_dswap(), cblas_icamax(), cblas_idamax(), cblas_isamax(), cblas_izamax(), cblas_scopy(), cblas_sger(), cblas_sscal(), cblas_sswap(), cblas_zcopy(), cblas_zgeru(), cblas_zscal(), cblas_zswap(), CblasColMajor, ccopy(), cgeru(), cscal(), cswap(), dcopy(), dger(), dscal(), dswap(), FLA_Copy_external(), FLA_MINUS_ONE, FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_ldim(), FLA_Obj_length(), FLA_ONE, FLA_Triangularize(), icamax(), idamax(), dcomplex::imag, scomplex::imag, isamax(), izamax(), dcomplex::real, scomplex::real, scopy(), sger(), sscal(), sswap(), zcopy(), zgeru(), zscal(), and zswap().
Referenced by FLA_SA_LU_blk().
00038 { 00039 FLA_Datatype datatype; 00040 int m_U, ldim_U; 00041 int m_D, ldim_D; 00042 int ldim_L; 00043 int m_U_min_j, m_U_min_j_min_1; 00044 int j, ipiv; 00045 int* buff_p; 00046 int* buff_1_int; 00047 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00048 CBLAS_ORDER cblas_order = CblasColMajor; 00049 #endif 00050 00051 if ( FLA_Obj_has_zero_dim( U ) ) return FLA_SUCCESS; 00052 00053 datatype = FLA_Obj_datatype( U ); 00054 00055 m_U = FLA_Obj_length( U ); 00056 ldim_U = FLA_Obj_ldim( U ); 00057 00058 m_D = FLA_Obj_length( D ); 00059 ldim_D = FLA_Obj_ldim( D ); 00060 00061 ldim_L = FLA_Obj_ldim( L ); 00062 00063 FLA_Copy_external( U, L ); 00064 FLA_Triangularize( FLA_UPPER_TRIANGULAR, FLA_NONUNIT_DIAG, L ); 00065 00066 buff_p = ( int * ) FLA_INT_PTR( p ); 00067 buff_1_int = ( int * ) FLA_INT_PTR( FLA_ONE ); 00068 00069 switch ( datatype ){ 00070 00071 case FLA_FLOAT: 00072 { 00073 float* buff_U = ( float * ) FLA_FLOAT_PTR( U ); 00074 float* buff_D = ( float * ) FLA_FLOAT_PTR( D ); 00075 float* buff_L = ( float * ) FLA_FLOAT_PTR( L ); 00076 float* buff_minus1 = ( float * ) FLA_FLOAT_PTR( FLA_MINUS_ONE ); 00077 float L_tmp; 00078 float D_tmp; 00079 float d_inv_Ljj; 00080 00081 for ( j = 0; j < m_U; ++j ) 00082 { 00083 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00084 ipiv = cblas_isamax( m_D, 00085 buff_D + j*ldim_D + 0, 00086 *buff_1_int ); 00087 #else 00088 ipiv = FLA_C2F( isamax )( &m_D, 00089 buff_D + j*ldim_D + 0, 00090 buff_1_int ) - 1; 00091 #endif 00092 00093 L_tmp = buff_L[ j*ldim_L + j ]; 00094 D_tmp = buff_D[ j*ldim_D + ipiv ]; 00095 00096 if ( dabs( L_tmp ) < dabs( D_tmp ) ) 00097 { 00098 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00099 cblas_sswap( m_U, 00100 buff_L + 0*ldim_L + j, ldim_L, 00101 buff_D + 0*ldim_D + ipiv, ldim_D ); 00102 #else 00103 FLA_C2F( sswap )( &m_U, 00104 buff_L + 0*ldim_L + j, &ldim_L, 00105 buff_D + 0*ldim_D + ipiv, &ldim_D ); 00106 #endif 00107 00108 buff_p[ j ] = ipiv + m_U - j; 00109 } 00110 else 00111 { 00112 buff_p[ j ] = 0; 00113 } 00114 00115 d_inv_Ljj = 1.0F / buff_L[ j*ldim_L + j ]; 00116 00117 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00118 cblas_sscal( m_D, 00119 d_inv_Ljj, 00120 buff_D + j*ldim_D + 0, *buff_1_int ); 00121 #else 00122 FLA_C2F( sscal )( &m_D, 00123 &d_inv_Ljj, 00124 buff_D + j*ldim_D + 0, buff_1_int ); 00125 #endif 00126 00127 m_U_min_j_min_1 = m_U - j - 1; 00128 00129 if ( m_U_min_j_min_1 > 0 ) 00130 { 00131 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00132 cblas_sger( cblas_order, 00133 m_D, m_U_min_j_min_1, 00134 *buff_minus1, 00135 buff_D + j*ldim_D + 0, *buff_1_int, 00136 buff_L + (j+1)*ldim_L + j, ldim_L, 00137 buff_D + (j+1)*ldim_D + 0, ldim_D ); 00138 #else 00139 FLA_C2F( sger )( &m_D, &m_U_min_j_min_1, 00140 buff_minus1, 00141 buff_D + j*ldim_D + 0, buff_1_int, 00142 buff_L + (j+1)*ldim_L + j, &ldim_L, 00143 buff_D + (j+1)*ldim_D + 0, &ldim_D ); 00144 #endif 00145 } 00146 00147 m_U_min_j = m_U - j; 00148 00149 if ( m_U_min_j > 0 ) 00150 { 00151 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00152 cblas_scopy( m_U_min_j, 00153 buff_L + j*ldim_L + j, ldim_L, 00154 buff_U + j*ldim_U + j, ldim_U ); 00155 #else 00156 FLA_C2F( scopy )( &m_U_min_j, 00157 buff_L + j*ldim_L + j, &ldim_L, 00158 buff_U + j*ldim_U + j, &ldim_U ); 00159 #endif 00160 } 00161 } 00162 break; 00163 } 00164 00165 case FLA_DOUBLE: 00166 { 00167 double* buff_U = ( double * ) FLA_DOUBLE_PTR( U ); 00168 double* buff_D = ( double * ) FLA_DOUBLE_PTR( D ); 00169 double* buff_L = ( double * ) FLA_DOUBLE_PTR( L ); 00170 double* buff_minus1 = ( double * ) FLA_DOUBLE_PTR( FLA_MINUS_ONE ); 00171 double L_tmp; 00172 double D_tmp; 00173 double d_inv_Ljj; 00174 00175 for ( j = 0; j < m_U; ++j ) 00176 { 00177 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00178 ipiv = cblas_idamax( m_D, 00179 buff_D + j*ldim_D + 0, 00180 *buff_1_int ); 00181 #else 00182 ipiv = FLA_C2F( idamax )( &m_D, 00183 buff_D + j*ldim_D + 0, 00184 buff_1_int ) - 1; 00185 #endif 00186 00187 L_tmp = buff_L[ j*ldim_L + j ]; 00188 D_tmp = buff_D[ j*ldim_D + ipiv ]; 00189 00190 if ( dabs( L_tmp ) < dabs( D_tmp ) ) 00191 { 00192 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00193 cblas_dswap( m_U, 00194 buff_L + 0*ldim_L + j, ldim_L, 00195 buff_D + 0*ldim_D + ipiv, ldim_D ); 00196 #else 00197 FLA_C2F( dswap )( &m_U, 00198 buff_L + 0*ldim_L + j, &ldim_L, 00199 buff_D + 0*ldim_D + ipiv, &ldim_D ); 00200 #endif 00201 00202 buff_p[ j ] = ipiv + m_U - j; 00203 } 00204 else 00205 { 00206 buff_p[ j ] = 0; 00207 } 00208 00209 d_inv_Ljj = 1.0 / buff_L[ j*ldim_L + j ]; 00210 00211 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00212 cblas_dscal( m_D, 00213 d_inv_Ljj, 00214 buff_D + j*ldim_D + 0, *buff_1_int ); 00215 #else 00216 FLA_C2F( dscal )( &m_D, 00217 &d_inv_Ljj, 00218 buff_D + j*ldim_D + 0, buff_1_int ); 00219 #endif 00220 00221 m_U_min_j_min_1 = m_U - j - 1; 00222 00223 if ( m_U_min_j_min_1 > 0 ) 00224 { 00225 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00226 cblas_dger( cblas_order, 00227 m_D, m_U_min_j_min_1, 00228 *buff_minus1, 00229 buff_D + j*ldim_D + 0, *buff_1_int, 00230 buff_L + (j+1)*ldim_L + j, ldim_L, 00231 buff_D + (j+1)*ldim_D + 0, ldim_D ); 00232 #else 00233 FLA_C2F( dger )( &m_D, &m_U_min_j_min_1, 00234 buff_minus1, 00235 buff_D + j*ldim_D + 0, buff_1_int, 00236 buff_L + (j+1)*ldim_L + j, &ldim_L, 00237 buff_D + (j+1)*ldim_D + 0, &ldim_D ); 00238 #endif 00239 } 00240 00241 m_U_min_j = m_U - j; 00242 00243 if ( m_U_min_j > 0 ) 00244 { 00245 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00246 cblas_dcopy( m_U_min_j, 00247 buff_L + j*ldim_L + j, ldim_L, 00248 buff_U + j*ldim_U + j, ldim_U ); 00249 #else 00250 FLA_C2F( dcopy )( &m_U_min_j, 00251 buff_L + j*ldim_L + j, &ldim_L, 00252 buff_U + j*ldim_U + j, &ldim_U ); 00253 #endif 00254 } 00255 } 00256 break; 00257 } 00258 00259 case FLA_COMPLEX: 00260 { 00261 scomplex* buff_U = ( scomplex * ) FLA_COMPLEX_PTR( U ); 00262 scomplex* buff_D = ( scomplex * ) FLA_COMPLEX_PTR( D ); 00263 scomplex* buff_L = ( scomplex * ) FLA_COMPLEX_PTR( L ); 00264 scomplex* buff_minus1 = ( scomplex * ) FLA_COMPLEX_PTR( FLA_MINUS_ONE ); 00265 scomplex L_tmp; 00266 scomplex D_tmp; 00267 scomplex d_inv_Ljj; 00268 scomplex Ljj; 00269 float temp; 00270 00271 for ( j = 0; j < m_U; ++j ) 00272 { 00273 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00274 ipiv = cblas_icamax( m_D, 00275 buff_D + j*ldim_D + 0, 00276 *buff_1_int ); 00277 #else 00278 ipiv = FLA_C2F( icamax )( &m_D, 00279 buff_D + j*ldim_D + 0, 00280 buff_1_int ) - 1; 00281 #endif 00282 00283 L_tmp = buff_L[ j*ldim_L + j ]; 00284 D_tmp = buff_D[ j*ldim_D + ipiv ]; 00285 00286 if ( dabs( L_tmp.real + L_tmp.imag ) < dabs( D_tmp.real + D_tmp.imag ) ) 00287 { 00288 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00289 cblas_cswap( m_U, 00290 buff_L + 0*ldim_L + j, ldim_L, 00291 buff_D + 0*ldim_D + ipiv, ldim_D ); 00292 #else 00293 FLA_C2F( cswap )( &m_U, 00294 buff_L + 0*ldim_L + j, &ldim_L, 00295 buff_D + 0*ldim_D + ipiv, &ldim_D ); 00296 #endif 00297 00298 buff_p[ j ] = ipiv + m_U - j; 00299 } 00300 else 00301 { 00302 buff_p[ j ] = 0; 00303 } 00304 00305 Ljj = buff_L[ j*ldim_L + j ]; 00306 00307 // d_inv_Ljj = 1.0 / Ljj 00308 temp = 1.0F / ( Ljj.real * Ljj.real + 00309 Ljj.imag * Ljj.imag ); 00310 d_inv_Ljj.real = Ljj.real * temp; 00311 d_inv_Ljj.imag = Ljj.imag * -temp; 00312 00313 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00314 cblas_cscal( m_D, 00315 d_inv_Ljj, 00316 buff_D + j*ldim_D + 0, *buff_1_int ); 00317 #else 00318 FLA_C2F( cscal )( &m_D, 00319 &d_inv_Ljj, 00320 buff_D + j*ldim_D + 0, buff_1_int ); 00321 #endif 00322 00323 m_U_min_j_min_1 = m_U - j - 1; 00324 00325 if ( m_U_min_j_min_1 > 0 ) 00326 { 00327 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00328 cblas_cgeru( cblas_order, 00329 m_D, m_U_min_j_min_1, 00330 *buff_minus1, 00331 buff_D + j*ldim_D + 0, *buff_1_int, 00332 buff_L + (j+1)*ldim_L + j, ldim_L, 00333 buff_D + (j+1)*ldim_D + 0, ldim_D ); 00334 #else 00335 FLA_C2F( cgeru )( &m_D, &m_U_min_j_min_1, 00336 buff_minus1, 00337 buff_D + j*ldim_D + 0, buff_1_int, 00338 buff_L + (j+1)*ldim_L + j, &ldim_L, 00339 buff_D + (j+1)*ldim_D + 0, &ldim_D ); 00340 #endif 00341 } 00342 00343 m_U_min_j = m_U - j; 00344 00345 if ( m_U_min_j > 0 ) 00346 { 00347 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00348 cblas_ccopy( m_U_min_j, 00349 buff_L + j*ldim_L + j, ldim_L, 00350 buff_U + j*ldim_U + j, ldim_U ); 00351 #else 00352 FLA_C2F( ccopy )( &m_U_min_j, 00353 buff_L + j*ldim_L + j, &ldim_L, 00354 buff_U + j*ldim_U + j, &ldim_U ); 00355 #endif 00356 } 00357 } 00358 break; 00359 } 00360 00361 case FLA_DOUBLE_COMPLEX: 00362 { 00363 dcomplex* buff_U = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( U ); 00364 dcomplex* buff_D = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( D ); 00365 dcomplex* buff_L = ( dcomplex * ) FLA_COMPLEX_PTR( L ); 00366 dcomplex* buff_minus1 = ( dcomplex * ) FLA_COMPLEX_PTR( FLA_MINUS_ONE ); 00367 dcomplex L_tmp; 00368 dcomplex D_tmp; 00369 dcomplex d_inv_Ljj; 00370 dcomplex Ljj; 00371 double temp; 00372 00373 for ( j = 0; j < m_U; ++j ) 00374 { 00375 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00376 ipiv = cblas_izamax( m_D, 00377 buff_D + j*ldim_D + 0, 00378 *buff_1_int ); 00379 #else 00380 ipiv = FLA_C2F( izamax )( &m_D, 00381 buff_D + j*ldim_D + 0, 00382 buff_1_int ) - 1; 00383 #endif 00384 00385 L_tmp = buff_L[ j*ldim_L + j ]; 00386 D_tmp = buff_D[ j*ldim_D + ipiv ]; 00387 00388 if ( dabs( L_tmp.real + L_tmp.imag ) < dabs( D_tmp.real + D_tmp.imag ) ) 00389 { 00390 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00391 cblas_zswap( m_U, 00392 buff_L + 0*ldim_L + j, ldim_L, 00393 buff_D + 0*ldim_D + ipiv, ldim_D ); 00394 #else 00395 FLA_C2F( zswap )( &m_U, 00396 buff_L + 0*ldim_L + j, &ldim_L, 00397 buff_D + 0*ldim_D + ipiv, &ldim_D ); 00398 #endif 00399 00400 buff_p[ j ] = ipiv + m_U - j; 00401 } 00402 else 00403 { 00404 buff_p[ j ] = 0; 00405 } 00406 00407 Ljj = buff_L[ j*ldim_L + j ]; 00408 00409 // d_inv_Ljj = 1.0 / Ljj 00410 temp = 1.0 / ( Ljj.real * Ljj.real + 00411 Ljj.imag * Ljj.imag ); 00412 d_inv_Ljj.real = Ljj.real * temp; 00413 d_inv_Ljj.imag = Ljj.imag * -temp; 00414 00415 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00416 cblas_zscal( m_D, 00417 d_inv_Ljj, 00418 buff_D + j*ldim_D + 0, *buff_1_int ); 00419 #else 00420 FLA_C2F( zscal )( &m_D, 00421 &d_inv_Ljj, 00422 buff_D + j*ldim_D + 0, buff_1_int ); 00423 #endif 00424 00425 m_U_min_j_min_1 = m_U - j - 1; 00426 00427 if ( m_U_min_j_min_1 > 0 ) 00428 { 00429 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00430 cblas_zgeru( cblas_order, 00431 m_D, m_U_min_j_min_1, 00432 *buff_minus1, 00433 buff_D + j*ldim_D + 0, *buff_1_int, 00434 buff_L + (j+1)*ldim_L + j, ldim_L, 00435 buff_D + (j+1)*ldim_D + 0, ldim_D ); 00436 #else 00437 FLA_C2F( zgeru )( &m_D, &m_U_min_j_min_1, 00438 buff_minus1, 00439 buff_D + j*ldim_D + 0, buff_1_int, 00440 buff_L + (j+1)*ldim_L + j, &ldim_L, 00441 buff_D + (j+1)*ldim_D + 0, &ldim_D ); 00442 #endif 00443 } 00444 00445 m_U_min_j = m_U - j; 00446 00447 if ( m_U_min_j > 0 ) 00448 { 00449 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00450 cblas_zcopy( m_U_min_j, 00451 buff_L + j*ldim_L + j, ldim_L, 00452 buff_U + j*ldim_U + j, ldim_U ); 00453 #else 00454 FLA_C2F( zcopy )( &m_U_min_j, 00455 buff_L + j*ldim_L + j, &ldim_L, 00456 buff_U + j*ldim_U + j, &ldim_U ); 00457 #endif 00458 } 00459 } 00460 break; 00461 } 00462 00463 } 00464 00465 return FLA_SUCCESS; 00466 }
References FLA_Apply_pivots(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Obj_length(), FLA_Obj_width(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Trsv_external(), and FLASH_FS_incpiv_aux2().
Referenced by FLASH_FS_incpiv().
00036 { 00037 FLA_Obj ATL, ATR, A00, A01, A02, 00038 ABL, ABR, A10, A11, A12, 00039 A20, A21, A22; 00040 00041 FLA_Obj pTL, pTR, p00, p01, p02, 00042 pBL, pBR, p10, p11, p12, 00043 p20, p21, p22; 00044 00045 FLA_Obj LTL, LTR, L00, L01, L02, 00046 LBL, LBR, L10, L11, L12, 00047 L20, L21, L22; 00048 00049 FLA_Obj bT, b0, 00050 bB, b1, 00051 b2; 00052 00053 FLA_Obj p11_conf, 00054 p11_rest; 00055 00056 FLA_Part_2x2( A, &ATL, &ATR, 00057 &ABL, &ABR, 0, 0, FLA_TL ); 00058 00059 FLA_Part_2x2( p, &pTL, &pTR, 00060 &pBL, &pBR, 0, 0, FLA_TL ); 00061 00062 FLA_Part_2x2( L, <L, <R, 00063 &LBL, &LBR, 0, 0, FLA_TL ); 00064 00065 FLA_Part_2x1( b, &bT, 00066 &bB, 0, FLA_TOP ); 00067 00068 while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) && 00069 FLA_Obj_width ( ATL ) < FLA_Obj_width ( A ) ) 00070 { 00071 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02, 00072 /* ************* */ /* ******************** */ 00073 &A10, /**/ &A11, &A12, 00074 ABL, /**/ ABR, &A20, /**/ &A21, &A22, 00075 1, 1, FLA_BR ); 00076 00077 FLA_Repart_2x2_to_3x3( pTL, /**/ pTR, &p00, /**/ &p01, &p02, 00078 /* ************* */ /* ******************** */ 00079 &p10, /**/ &p11, &p12, 00080 pBL, /**/ pBR, &p20, /**/ &p21, &p22, 00081 1, 1, FLA_BR ); 00082 00083 FLA_Repart_2x2_to_3x3( LTL, /**/ LTR, &L00, /**/ &L01, &L02, 00084 /* ************* */ /* ******************** */ 00085 &L10, /**/ &L11, &L12, 00086 LBL, /**/ LBR, &L20, /**/ &L21, &L22, 00087 1, 1, FLA_BR ); 00088 00089 FLA_Repart_2x1_to_3x1( bT, &b0, 00090 /* ** */ /* ** */ 00091 &b1, 00092 bB, &b2, 1, FLA_BOTTOM ); 00093 00094 /*------------------------------------------------------------*/ 00095 00096 FLA_Part_2x1( *FLASH_OBJ_PTR_AT( p11 ), &p11_conf, 00097 &p11_rest, 00098 FLA_Obj_length( *FLASH_OBJ_PTR_AT( b1 ) ), FLA_TOP ); 00099 00100 00101 FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, 00102 p11_conf, 00103 *FLASH_OBJ_PTR_AT( b1 ) ); 00104 00105 FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_UNIT_DIAG, 00106 *FLASH_OBJ_PTR_AT( A11 ), 00107 *FLASH_OBJ_PTR_AT( b1 ) ); 00108 00109 FLASH_FS_incpiv_aux2( L21, 00110 A21, p21, b1, 00111 b2, nb_alg ); 00112 00113 /*------------------------------------------------------------*/ 00114 00115 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02, 00116 A10, A11, /**/ A12, 00117 /* ************** */ /* ****************** */ 00118 &ABL, /**/ &ABR, A20, A21, /**/ A22, 00119 FLA_TL ); 00120 00121 FLA_Cont_with_3x3_to_2x2( &pTL, /**/ &pTR, p00, p01, /**/ p02, 00122 p10, p11, /**/ p12, 00123 /* ************** */ /* ****************** */ 00124 &pBL, /**/ &pBR, p20, p21, /**/ p22, 00125 FLA_TL ); 00126 00127 FLA_Cont_with_3x3_to_2x2( <L, /**/ <R, L00, L01, /**/ L02, 00128 L10, L11, /**/ L12, 00129 /* ************** */ /* ****************** */ 00130 &LBL, /**/ &LBR, L20, L21, /**/ L22, 00131 FLA_TL ); 00132 00133 FLA_Cont_with_3x1_to_2x1( &bT, b0, 00134 b1, 00135 /* ** */ /* ** */ 00136 &bB, b2, FLA_TOP ); 00137 } 00138 00139 return FLA_SUCCESS; 00140 }
FLA_Error FLASH_FS_incpiv_aux2 | ( | FLA_Obj | L, | |
FLA_Obj | D, | |||
FLA_Obj | p, | |||
FLA_Obj | C, | |||
FLA_Obj | E, | |||
dim_t | nb_alg | |||
) |
References FLA_Cont_with_3x1_to_2x1(), FLA_Obj_length(), FLA_Part_2x1(), FLA_Repart_2x1_to_3x1(), and FLA_SA_FS_blk().
Referenced by FLASH_FS_incpiv_aux1().
00038 { 00039 FLA_Obj LT, L0, 00040 LB, L1, 00041 L2; 00042 00043 FLA_Obj DT, D0, 00044 DB, D1, 00045 D2; 00046 00047 FLA_Obj pT, p0, 00048 pB, p1, 00049 p2; 00050 00051 FLA_Obj ET, E0, 00052 EB, E1, 00053 E2; 00054 00055 FLA_Part_2x1( L, <, 00056 &LB, 0, FLA_TOP ); 00057 00058 FLA_Part_2x1( D, &DT, 00059 &DB, 0, FLA_TOP ); 00060 00061 FLA_Part_2x1( p, &pT, 00062 &pB, 0, FLA_TOP ); 00063 00064 FLA_Part_2x1( E, &ET, 00065 &EB, 0, FLA_TOP ); 00066 00067 while ( FLA_Obj_length( DT ) < FLA_Obj_length( D ) ) 00068 { 00069 FLA_Repart_2x1_to_3x1( LT, &L0, 00070 /* ** */ /* ** */ 00071 &L1, 00072 LB, &L2, 1, FLA_BOTTOM ); 00073 00074 FLA_Repart_2x1_to_3x1( DT, &D0, 00075 /* ** */ /* ** */ 00076 &D1, 00077 DB, &D2, 1, FLA_BOTTOM ); 00078 00079 FLA_Repart_2x1_to_3x1( pT, &p0, 00080 /* ** */ /* ** */ 00081 &p1, 00082 pB, &p2, 1, FLA_BOTTOM ); 00083 00084 FLA_Repart_2x1_to_3x1( ET, &E0, 00085 /* ** */ /* ** */ 00086 &E1, 00087 EB, &E2, 1, FLA_BOTTOM ); 00088 00089 /*------------------------------------------------------------*/ 00090 00091 FLA_SA_FS_blk( *FLASH_OBJ_PTR_AT( L1 ), 00092 *FLASH_OBJ_PTR_AT( D1 ), 00093 *FLASH_OBJ_PTR_AT( p1 ), 00094 *FLASH_OBJ_PTR_AT( C ), 00095 *FLASH_OBJ_PTR_AT( E1 ), 00096 nb_alg ); 00097 00098 /*------------------------------------------------------------*/ 00099 00100 FLA_Cont_with_3x1_to_2x1( <, L0, 00101 L1, 00102 /* ** */ /* ** */ 00103 &LB, L2, FLA_TOP ); 00104 00105 FLA_Cont_with_3x1_to_2x1( &DT, D0, 00106 D1, 00107 /* ** */ /* ** */ 00108 &DB, D2, FLA_TOP ); 00109 00110 FLA_Cont_with_3x1_to_2x1( &pT, p0, 00111 p1, 00112 /* ** */ /* ** */ 00113 &pB, p2, FLA_TOP ); 00114 00115 FLA_Cont_with_3x1_to_2x1( &ET, E0, 00116 E1, 00117 /* ** */ /* ** */ 00118 &EB, E2, FLA_TOP ); 00119 } 00120 00121 return FLA_SUCCESS; 00122 }
References FLA_Cont_with_3x3_to_2x2(), FLA_LU_piv_task(), FLA_Obj_length(), FLA_Obj_width(), FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), FLASH_Queue_get_enabled(), FLASH_SA_LU(), and FLASH_Trsm_piv().
00036 { 00037 FLA_Obj ATL, ATR, A00, A01, A02, 00038 ABL, ABR, A10, A11, A12, 00039 A20, A21, A22; 00040 00041 FLA_Obj pTL, pTR, p00, p01, p02, 00042 pBL, pBR, p10, p11, p12, 00043 p20, p21, p22; 00044 00045 FLA_Obj LTL, LTR, L00, L01, L02, 00046 LBL, LBR, L10, L11, L12, 00047 L20, L21, L22; 00048 00049 FLA_Part_2x2( A, &ATL, &ATR, 00050 &ABL, &ABR, 0, 0, FLA_TL ); 00051 00052 FLA_Part_2x2( p, &pTL, &pTR, 00053 &pBL, &pBR, 0, 0, FLA_TL ); 00054 00055 FLA_Part_2x2( L, <L, <R, 00056 &LBL, &LBR, 0, 0, FLA_TL ); 00057 00058 while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) && 00059 FLA_Obj_width ( ATL ) < FLA_Obj_width ( A ) ) 00060 { 00061 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02, 00062 /* ************* */ /* ******************** */ 00063 &A10, /**/ &A11, &A12, 00064 ABL, /**/ ABR, &A20, /**/ &A21, &A22, 00065 1, 1, FLA_BR ); 00066 00067 FLA_Repart_2x2_to_3x3( pTL, /**/ pTR, &p00, /**/ &p01, &p02, 00068 /* ************* */ /* ******************** */ 00069 &p10, /**/ &p11, &p12, 00070 pBL, /**/ pBR, &p20, /**/ &p21, &p22, 00071 1, 1, FLA_BR ); 00072 00073 FLA_Repart_2x2_to_3x3( LTL, /**/ LTR, &L00, /**/ &L01, &L02, 00074 /* ************* */ /* ******************** */ 00075 &L10, /**/ &L11, &L12, 00076 LBL, /**/ LBR, &L20, /**/ &L21, &L22, 00077 1, 1, FLA_BR ); 00078 00079 /*------------------------------------------------------------*/ 00080 00081 if ( FLASH_Queue_get_enabled( ) ) 00082 { 00083 // Enqueue 00084 ENQUEUE_FLASH_LU_piv( *FLASH_OBJ_PTR_AT( A11 ), 00085 *FLASH_OBJ_PTR_AT( p11 ), 00086 NULL ); 00087 } 00088 else 00089 { 00090 // Execute leaf 00091 FLA_LU_piv_task( *FLASH_OBJ_PTR_AT( A11 ), 00092 *FLASH_OBJ_PTR_AT( p11 ), 00093 NULL ); 00094 } 00095 00096 FLASH_Trsm_piv( *FLASH_OBJ_PTR_AT( A11 ), A12, p11 ); 00097 00098 FLASH_SA_LU( A11, A12, 00099 A21, A22, p21, L21, nb_alg ); 00100 00101 /*------------------------------------------------------------*/ 00102 00103 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02, 00104 A10, A11, /**/ A12, 00105 /* ************** */ /* ****************** */ 00106 &ABL, /**/ &ABR, A20, A21, /**/ A22, 00107 FLA_TL ); 00108 00109 FLA_Cont_with_3x3_to_2x2( &pTL, /**/ &pTR, p00, p01, /**/ p02, 00110 p10, p11, /**/ p12, 00111 /* ************** */ /* ****************** */ 00112 &pBL, /**/ &pBR, p20, p21, /**/ p22, 00113 FLA_TL ); 00114 00115 FLA_Cont_with_3x3_to_2x2( <L, /**/ <R, L00, L01, /**/ L02, 00116 L10, L11, /**/ L12, 00117 /* ************** */ /* ****************** */ 00118 &LBL, /**/ &LBR, L20, L21, /**/ L22, 00119 FLA_TL ); 00120 } 00121 00122 return FLA_SUCCESS; 00123 }
References FLA_Cont_with_3x3_to_2x2(), FLA_LU_piv_copy_task(), FLA_Obj_create_conf_to(), FLA_Obj_free_task(), FLA_Obj_length(), FLA_Obj_width(), FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), FLASH_Queue_get_enabled(), FLASH_SA_LU(), and FLASH_Trsm_piv().
Referenced by FLASH_LU_incpiv().
00036 { 00037 FLA_Obj ATL, ATR, A00, A01, A02, 00038 ABL, ABR, A10, A11, A12, 00039 A20, A21, A22; 00040 00041 FLA_Obj pTL, pTR, p00, p01, p02, 00042 pBL, pBR, p10, p11, p12, 00043 p20, p21, p22; 00044 00045 FLA_Obj LTL, LTR, L00, L01, L02, 00046 LBL, LBR, L10, L11, L12, 00047 L20, L21, L22; 00048 00049 FLA_Obj U; 00050 00051 FLA_Part_2x2( A, &ATL, &ATR, 00052 &ABL, &ABR, 0, 0, FLA_TL ); 00053 00054 FLA_Part_2x2( p, &pTL, &pTR, 00055 &pBL, &pBR, 0, 0, FLA_TL ); 00056 00057 FLA_Part_2x2( L, <L, <R, 00058 &LBL, &LBR, 0, 0, FLA_TL ); 00059 00060 while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) && 00061 FLA_Obj_width ( ATL ) < FLA_Obj_width ( A ) ) 00062 { 00063 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02, 00064 /* ************* */ /* ******************** */ 00065 &A10, /**/ &A11, &A12, 00066 ABL, /**/ ABR, &A20, /**/ &A21, &A22, 00067 1, 1, FLA_BR ); 00068 00069 FLA_Repart_2x2_to_3x3( pTL, /**/ pTR, &p00, /**/ &p01, &p02, 00070 /* ************* */ /* ******************** */ 00071 &p10, /**/ &p11, &p12, 00072 pBL, /**/ pBR, &p20, /**/ &p21, &p22, 00073 1, 1, FLA_BR ); 00074 00075 FLA_Repart_2x2_to_3x3( LTL, /**/ LTR, &L00, /**/ &L01, &L02, 00076 /* ************* */ /* ******************** */ 00077 &L10, /**/ &L11, &L12, 00078 LBL, /**/ LBR, &L20, /**/ &L21, &L22, 00079 1, 1, FLA_BR ); 00080 00081 /*------------------------------------------------------------*/ 00082 00083 FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, *FLASH_OBJ_PTR_AT( A11 ), &U ); 00084 00085 00086 if ( FLASH_Queue_get_enabled( ) ) 00087 { 00088 // Enqueue 00089 ENQUEUE_FLASH_LU_piv_copy( *FLASH_OBJ_PTR_AT( A11 ), 00090 *FLASH_OBJ_PTR_AT( p11 ), 00091 U, NULL ); 00092 } 00093 else 00094 { 00095 // Execute leaf 00096 FLA_LU_piv_copy_task( *FLASH_OBJ_PTR_AT( A11 ), 00097 *FLASH_OBJ_PTR_AT( p11 ), 00098 U, NULL ); 00099 } 00100 00101 FLASH_Trsm_piv( U, A12, p11 ); 00102 00103 if ( FLASH_Queue_get_enabled( ) ) 00104 { 00105 // Enqueue 00106 ENQUEUE_FLASH_Obj_free( U, NULL ); 00107 } 00108 else 00109 { 00110 // Execute leaf 00111 FLA_Obj_free_task( U, NULL ); 00112 } 00113 00114 FLASH_SA_LU( A11, A12, 00115 A21, A22, p21, L21, nb_alg ); 00116 00117 /*------------------------------------------------------------*/ 00118 00119 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02, 00120 A10, A11, /**/ A12, 00121 /* ************** */ /* ****************** */ 00122 &ABL, /**/ &ABR, A20, A21, /**/ A22, 00123 FLA_TL ); 00124 00125 FLA_Cont_with_3x3_to_2x2( &pTL, /**/ &pTR, p00, p01, /**/ p02, 00126 p10, p11, /**/ p12, 00127 /* ************** */ /* ****************** */ 00128 &pBL, /**/ &pBR, p20, p21, /**/ p22, 00129 FLA_TL ); 00130 00131 FLA_Cont_with_3x3_to_2x2( <L, /**/ <R, L00, L01, /**/ L02, 00132 L10, L11, /**/ L12, 00133 /* ************** */ /* ****************** */ 00134 &LBL, /**/ &LBR, L20, L21, /**/ L22, 00135 FLA_TL ); 00136 } 00137 00138 return FLA_SUCCESS; 00139 }
References FLA_Cont_with_1x3_to_1x2(), FLA_Obj_width(), FLA_Part_1x2(), FLA_Repart_1x2_to_1x3(), FLA_SA_FS_task(), and FLASH_Queue_get_enabled().
Referenced by FLASH_SA_LU().
00038 { 00039 FLA_Obj CL, CR, C0, C1, C2; 00040 00041 FLA_Obj EL, ER, E0, E1, E2; 00042 00043 FLA_Part_1x2( C, &CL, &CR, 0, FLA_LEFT ); 00044 00045 FLA_Part_1x2( E, &EL, &ER, 0, FLA_LEFT ); 00046 00047 while ( FLA_Obj_width( CL ) < FLA_Obj_width( C ) ) 00048 { 00049 FLA_Repart_1x2_to_1x3( CL, /**/ CR, &C0, /**/ &C1, &C2, 00050 1, FLA_RIGHT ); 00051 00052 FLA_Repart_1x2_to_1x3( EL, /**/ ER, &E0, /**/ &E1, &E2, 00053 1, FLA_RIGHT ); 00054 00055 /*------------------------------------------------------------*/ 00056 00057 if ( FLASH_Queue_get_enabled( ) ) 00058 { 00059 // Enqueue 00060 ENQUEUE_FLASH_SA_FS( *FLASH_OBJ_PTR_AT( L ), 00061 *FLASH_OBJ_PTR_AT( D ), 00062 *FLASH_OBJ_PTR_AT( p ), 00063 *FLASH_OBJ_PTR_AT( C1 ), 00064 *FLASH_OBJ_PTR_AT( E1 ), 00065 nb_alg, 00066 NULL ); 00067 } 00068 else 00069 { 00070 // Execute leaf 00071 FLA_SA_FS_task( *FLASH_OBJ_PTR_AT( L ), 00072 *FLASH_OBJ_PTR_AT( D ), 00073 *FLASH_OBJ_PTR_AT( p ), 00074 *FLASH_OBJ_PTR_AT( C1 ), 00075 *FLASH_OBJ_PTR_AT( E1 ), 00076 nb_alg, 00077 NULL ); 00078 } 00079 00080 /*------------------------------------------------------------*/ 00081 00082 FLA_Cont_with_1x3_to_1x2( &CL, /**/ &CR, C0, C1, /**/ C2, 00083 FLA_LEFT ); 00084 00085 FLA_Cont_with_1x3_to_1x2( &EL, /**/ &ER, E0, E1, /**/ E2, 00086 FLA_LEFT ); 00087 } 00088 00089 return FLA_SUCCESS; 00090 }
FLA_Error FLASH_SA_LU | ( | FLA_Obj | B, | |
FLA_Obj | C, | |||
FLA_Obj | D, | |||
FLA_Obj | E, | |||
FLA_Obj | p, | |||
FLA_Obj | L, | |||
dim_t | nb_alg | |||
) |
References FLA_Cont_with_3x1_to_2x1(), FLA_Obj_length(), FLA_Part_2x1(), FLA_Repart_2x1_to_3x1(), FLA_SA_LU_task(), FLASH_Queue_get_enabled(), and FLASH_SA_FS().
Referenced by FLASH_LU_incpiv_var1(), and FLASH_LU_incpiv_var2().
00037 { 00038 FLA_Obj DT, D0, 00039 DB, D1, 00040 D2; 00041 00042 FLA_Obj ET, E0, 00043 EB, E1, 00044 E2; 00045 00046 FLA_Obj pT, p0, 00047 pB, p1, 00048 p2; 00049 00050 FLA_Obj LT, L0, 00051 LB, L1, 00052 L2; 00053 00054 FLA_Part_2x1( D, &DT, 00055 &DB, 0, FLA_TOP ); 00056 00057 FLA_Part_2x1( E, &ET, 00058 &EB, 0, FLA_TOP ); 00059 00060 FLA_Part_2x1( p, &pT, 00061 &pB, 0, FLA_TOP ); 00062 00063 FLA_Part_2x1( L, <, 00064 &LB, 0, FLA_TOP ); 00065 00066 while ( FLA_Obj_length( DT ) < FLA_Obj_length( D ) ) 00067 { 00068 FLA_Repart_2x1_to_3x1( DT, &D0, 00069 /* ** */ /* ** */ 00070 &D1, 00071 DB, &D2, 1, FLA_BOTTOM ); 00072 00073 FLA_Repart_2x1_to_3x1( ET, &E0, 00074 /* ** */ /* ** */ 00075 &E1, 00076 EB, &E2, 1, FLA_BOTTOM ); 00077 00078 FLA_Repart_2x1_to_3x1( pT, &p0, 00079 /* ** */ /* ** */ 00080 &p1, 00081 pB, &p2, 1, FLA_BOTTOM ); 00082 00083 FLA_Repart_2x1_to_3x1( LT, &L0, 00084 /* ** */ /* ** */ 00085 &L1, 00086 LB, &L2, 1, FLA_BOTTOM ); 00087 00088 /*------------------------------------------------------------*/ 00089 00090 if ( FLASH_Queue_get_enabled( ) ) 00091 { 00092 // Enqueue 00093 ENQUEUE_FLASH_SA_LU( *FLASH_OBJ_PTR_AT( B ), 00094 *FLASH_OBJ_PTR_AT( D1 ), 00095 *FLASH_OBJ_PTR_AT( p1 ), 00096 *FLASH_OBJ_PTR_AT( L1 ), 00097 nb_alg, 00098 NULL ); 00099 } 00100 else 00101 { 00102 // Execute leaf 00103 FLA_SA_LU_task( *FLASH_OBJ_PTR_AT( B ), 00104 *FLASH_OBJ_PTR_AT( D1 ), 00105 *FLASH_OBJ_PTR_AT( p1 ), 00106 *FLASH_OBJ_PTR_AT( L1 ), 00107 nb_alg, 00108 NULL ); 00109 } 00110 00111 FLASH_SA_FS( L1, 00112 D1, p1, C, 00113 E1, nb_alg ); 00114 00115 /*------------------------------------------------------------*/ 00116 00117 FLA_Cont_with_3x1_to_2x1( &DT, D0, 00118 D1, 00119 /* ** */ /* ** */ 00120 &DB, D2, FLA_TOP ); 00121 00122 FLA_Cont_with_3x1_to_2x1( &ET, E0, 00123 E1, 00124 /* ** */ /* ** */ 00125 &EB, E2, FLA_TOP ); 00126 00127 FLA_Cont_with_3x1_to_2x1( &pT, p0, 00128 p1, 00129 /* ** */ /* ** */ 00130 &pB, p2, FLA_TOP ); 00131 00132 FLA_Cont_with_3x1_to_2x1( <, L0, 00133 L1, 00134 /* ** */ /* ** */ 00135 &LB, L2, FLA_TOP ); 00136 } 00137 00138 return FLA_SUCCESS; 00139 }
References FLA_Cont_with_1x3_to_1x2(), FLA_Obj_width(), FLA_Part_1x2(), FLA_Repart_1x2_to_1x3(), FLA_Trsm_piv_task(), and FLASH_Queue_get_enabled().
Referenced by FLASH_LU_incpiv_var1(), and FLASH_LU_incpiv_var2().
00036 { 00037 FLA_Obj BL, BR, B0, B1, B2; 00038 00039 FLA_Part_1x2( B, &BL, &BR, 0, FLA_LEFT ); 00040 00041 while ( FLA_Obj_width( BL ) < FLA_Obj_width( B ) ) 00042 { 00043 FLA_Repart_1x2_to_1x3( BL, /**/ BR, &B0, /**/ &B1, &B2, 00044 1, FLA_RIGHT ); 00045 00046 /*------------------------------------------------------------*/ 00047 00048 if ( FLASH_Queue_get_enabled( ) ) 00049 { 00050 // Enqueue 00051 ENQUEUE_FLASH_Trsm_piv( A, 00052 *FLASH_OBJ_PTR_AT( B1 ), 00053 *FLASH_OBJ_PTR_AT( p ), 00054 NULL ); 00055 } 00056 else 00057 { 00058 // Execute leaf 00059 FLA_Trsm_piv_task( A, 00060 *FLASH_OBJ_PTR_AT( B1 ), 00061 *FLASH_OBJ_PTR_AT( p ), 00062 NULL ); 00063 } 00064 00065 /*------------------------------------------------------------*/ 00066 00067 FLA_Cont_with_1x3_to_1x2( &BL, /**/ &BR, B0, B1, /**/ B2, 00068 FLA_LEFT ); 00069 } 00070 00071 return FLA_SUCCESS; 00072 }