Functions | |
FLA_Error | FLA_SA_LU_unb (FLA_Obj U, FLA_Obj D, FLA_Obj p, FLA_Obj L) |
References cblas_ccopy(), cblas_cgeru(), cblas_cscal(), cblas_cswap(), cblas_dcopy(), cblas_dger(), cblas_dscal(), cblas_dswap(), cblas_icamax(), cblas_idamax(), cblas_isamax(), cblas_izamax(), cblas_scopy(), cblas_sger(), cblas_sscal(), cblas_sswap(), cblas_zcopy(), cblas_zgeru(), cblas_zscal(), cblas_zswap(), CblasColMajor, ccopy(), cgeru(), cscal(), cswap(), dcopy(), dger(), dscal(), dswap(), FLA_Copy_external(), FLA_MINUS_ONE, FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_ldim(), FLA_Obj_length(), FLA_ONE, FLA_Triangularize(), icamax(), idamax(), dcomplex::imag, scomplex::imag, isamax(), izamax(), dcomplex::real, scomplex::real, scopy(), sger(), sscal(), sswap(), zcopy(), zgeru(), zscal(), and zswap().
Referenced by FLA_SA_LU_blk().
00038 { 00039 FLA_Datatype datatype; 00040 int m_U, ldim_U; 00041 int m_D, ldim_D; 00042 int ldim_L; 00043 int m_U_min_j, m_U_min_j_min_1; 00044 int j, ipiv; 00045 int* buff_p; 00046 int* buff_1_int; 00047 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00048 CBLAS_ORDER cblas_order = CblasColMajor; 00049 #endif 00050 00051 if ( FLA_Obj_has_zero_dim( U ) ) return FLA_SUCCESS; 00052 00053 datatype = FLA_Obj_datatype( U ); 00054 00055 m_U = FLA_Obj_length( U ); 00056 ldim_U = FLA_Obj_ldim( U ); 00057 00058 m_D = FLA_Obj_length( D ); 00059 ldim_D = FLA_Obj_ldim( D ); 00060 00061 ldim_L = FLA_Obj_ldim( L ); 00062 00063 FLA_Copy_external( U, L ); 00064 FLA_Triangularize( FLA_UPPER_TRIANGULAR, FLA_NONUNIT_DIAG, L ); 00065 00066 buff_p = ( int * ) FLA_INT_PTR( p ); 00067 buff_1_int = ( int * ) FLA_INT_PTR( FLA_ONE ); 00068 00069 switch ( datatype ){ 00070 00071 case FLA_FLOAT: 00072 { 00073 float* buff_U = ( float * ) FLA_FLOAT_PTR( U ); 00074 float* buff_D = ( float * ) FLA_FLOAT_PTR( D ); 00075 float* buff_L = ( float * ) FLA_FLOAT_PTR( L ); 00076 float* buff_minus1 = ( float * ) FLA_FLOAT_PTR( FLA_MINUS_ONE ); 00077 float L_tmp; 00078 float D_tmp; 00079 float d_inv_Ljj; 00080 00081 for ( j = 0; j < m_U; ++j ) 00082 { 00083 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00084 ipiv = cblas_isamax( m_D, 00085 buff_D + j*ldim_D + 0, 00086 *buff_1_int ); 00087 #else 00088 ipiv = FLA_C2F( isamax )( &m_D, 00089 buff_D + j*ldim_D + 0, 00090 buff_1_int ) - 1; 00091 #endif 00092 00093 L_tmp = buff_L[ j*ldim_L + j ]; 00094 D_tmp = buff_D[ j*ldim_D + ipiv ]; 00095 00096 if ( dabs( L_tmp ) < dabs( D_tmp ) ) 00097 { 00098 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00099 cblas_sswap( m_U, 00100 buff_L + 0*ldim_L + j, ldim_L, 00101 buff_D + 0*ldim_D + ipiv, ldim_D ); 00102 #else 00103 FLA_C2F( sswap )( &m_U, 00104 buff_L + 0*ldim_L + j, &ldim_L, 00105 buff_D + 0*ldim_D + ipiv, &ldim_D ); 00106 #endif 00107 00108 buff_p[ j ] = ipiv + m_U - j; 00109 } 00110 else 00111 { 00112 buff_p[ j ] = 0; 00113 } 00114 00115 d_inv_Ljj = 1.0F / buff_L[ j*ldim_L + j ]; 00116 00117 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00118 cblas_sscal( m_D, 00119 d_inv_Ljj, 00120 buff_D + j*ldim_D + 0, *buff_1_int ); 00121 #else 00122 FLA_C2F( sscal )( &m_D, 00123 &d_inv_Ljj, 00124 buff_D + j*ldim_D + 0, buff_1_int ); 00125 #endif 00126 00127 m_U_min_j_min_1 = m_U - j - 1; 00128 00129 if ( m_U_min_j_min_1 > 0 ) 00130 { 00131 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00132 cblas_sger( cblas_order, 00133 m_D, m_U_min_j_min_1, 00134 *buff_minus1, 00135 buff_D + j*ldim_D + 0, *buff_1_int, 00136 buff_L + (j+1)*ldim_L + j, ldim_L, 00137 buff_D + (j+1)*ldim_D + 0, ldim_D ); 00138 #else 00139 FLA_C2F( sger )( &m_D, &m_U_min_j_min_1, 00140 buff_minus1, 00141 buff_D + j*ldim_D + 0, buff_1_int, 00142 buff_L + (j+1)*ldim_L + j, &ldim_L, 00143 buff_D + (j+1)*ldim_D + 0, &ldim_D ); 00144 #endif 00145 } 00146 00147 m_U_min_j = m_U - j; 00148 00149 if ( m_U_min_j > 0 ) 00150 { 00151 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00152 cblas_scopy( m_U_min_j, 00153 buff_L + j*ldim_L + j, ldim_L, 00154 buff_U + j*ldim_U + j, ldim_U ); 00155 #else 00156 FLA_C2F( scopy )( &m_U_min_j, 00157 buff_L + j*ldim_L + j, &ldim_L, 00158 buff_U + j*ldim_U + j, &ldim_U ); 00159 #endif 00160 } 00161 } 00162 break; 00163 } 00164 00165 case FLA_DOUBLE: 00166 { 00167 double* buff_U = ( double * ) FLA_DOUBLE_PTR( U ); 00168 double* buff_D = ( double * ) FLA_DOUBLE_PTR( D ); 00169 double* buff_L = ( double * ) FLA_DOUBLE_PTR( L ); 00170 double* buff_minus1 = ( double * ) FLA_DOUBLE_PTR( FLA_MINUS_ONE ); 00171 double L_tmp; 00172 double D_tmp; 00173 double d_inv_Ljj; 00174 00175 for ( j = 0; j < m_U; ++j ) 00176 { 00177 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00178 ipiv = cblas_idamax( m_D, 00179 buff_D + j*ldim_D + 0, 00180 *buff_1_int ); 00181 #else 00182 ipiv = FLA_C2F( idamax )( &m_D, 00183 buff_D + j*ldim_D + 0, 00184 buff_1_int ) - 1; 00185 #endif 00186 00187 L_tmp = buff_L[ j*ldim_L + j ]; 00188 D_tmp = buff_D[ j*ldim_D + ipiv ]; 00189 00190 if ( dabs( L_tmp ) < dabs( D_tmp ) ) 00191 { 00192 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00193 cblas_dswap( m_U, 00194 buff_L + 0*ldim_L + j, ldim_L, 00195 buff_D + 0*ldim_D + ipiv, ldim_D ); 00196 #else 00197 FLA_C2F( dswap )( &m_U, 00198 buff_L + 0*ldim_L + j, &ldim_L, 00199 buff_D + 0*ldim_D + ipiv, &ldim_D ); 00200 #endif 00201 00202 buff_p[ j ] = ipiv + m_U - j; 00203 } 00204 else 00205 { 00206 buff_p[ j ] = 0; 00207 } 00208 00209 d_inv_Ljj = 1.0 / buff_L[ j*ldim_L + j ]; 00210 00211 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00212 cblas_dscal( m_D, 00213 d_inv_Ljj, 00214 buff_D + j*ldim_D + 0, *buff_1_int ); 00215 #else 00216 FLA_C2F( dscal )( &m_D, 00217 &d_inv_Ljj, 00218 buff_D + j*ldim_D + 0, buff_1_int ); 00219 #endif 00220 00221 m_U_min_j_min_1 = m_U - j - 1; 00222 00223 if ( m_U_min_j_min_1 > 0 ) 00224 { 00225 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00226 cblas_dger( cblas_order, 00227 m_D, m_U_min_j_min_1, 00228 *buff_minus1, 00229 buff_D + j*ldim_D + 0, *buff_1_int, 00230 buff_L + (j+1)*ldim_L + j, ldim_L, 00231 buff_D + (j+1)*ldim_D + 0, ldim_D ); 00232 #else 00233 FLA_C2F( dger )( &m_D, &m_U_min_j_min_1, 00234 buff_minus1, 00235 buff_D + j*ldim_D + 0, buff_1_int, 00236 buff_L + (j+1)*ldim_L + j, &ldim_L, 00237 buff_D + (j+1)*ldim_D + 0, &ldim_D ); 00238 #endif 00239 } 00240 00241 m_U_min_j = m_U - j; 00242 00243 if ( m_U_min_j > 0 ) 00244 { 00245 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00246 cblas_dcopy( m_U_min_j, 00247 buff_L + j*ldim_L + j, ldim_L, 00248 buff_U + j*ldim_U + j, ldim_U ); 00249 #else 00250 FLA_C2F( dcopy )( &m_U_min_j, 00251 buff_L + j*ldim_L + j, &ldim_L, 00252 buff_U + j*ldim_U + j, &ldim_U ); 00253 #endif 00254 } 00255 } 00256 break; 00257 } 00258 00259 case FLA_COMPLEX: 00260 { 00261 scomplex* buff_U = ( scomplex * ) FLA_COMPLEX_PTR( U ); 00262 scomplex* buff_D = ( scomplex * ) FLA_COMPLEX_PTR( D ); 00263 scomplex* buff_L = ( scomplex * ) FLA_COMPLEX_PTR( L ); 00264 scomplex* buff_minus1 = ( scomplex * ) FLA_COMPLEX_PTR( FLA_MINUS_ONE ); 00265 scomplex L_tmp; 00266 scomplex D_tmp; 00267 scomplex d_inv_Ljj; 00268 scomplex Ljj; 00269 float temp; 00270 00271 for ( j = 0; j < m_U; ++j ) 00272 { 00273 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00274 ipiv = cblas_icamax( m_D, 00275 buff_D + j*ldim_D + 0, 00276 *buff_1_int ); 00277 #else 00278 ipiv = FLA_C2F( icamax )( &m_D, 00279 buff_D + j*ldim_D + 0, 00280 buff_1_int ) - 1; 00281 #endif 00282 00283 L_tmp = buff_L[ j*ldim_L + j ]; 00284 D_tmp = buff_D[ j*ldim_D + ipiv ]; 00285 00286 if ( dabs( L_tmp.real + L_tmp.imag ) < dabs( D_tmp.real + D_tmp.imag ) ) 00287 { 00288 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00289 cblas_cswap( m_U, 00290 buff_L + 0*ldim_L + j, ldim_L, 00291 buff_D + 0*ldim_D + ipiv, ldim_D ); 00292 #else 00293 FLA_C2F( cswap )( &m_U, 00294 buff_L + 0*ldim_L + j, &ldim_L, 00295 buff_D + 0*ldim_D + ipiv, &ldim_D ); 00296 #endif 00297 00298 buff_p[ j ] = ipiv + m_U - j; 00299 } 00300 else 00301 { 00302 buff_p[ j ] = 0; 00303 } 00304 00305 Ljj = buff_L[ j*ldim_L + j ]; 00306 00307 // d_inv_Ljj = 1.0 / Ljj 00308 temp = 1.0F / ( Ljj.real * Ljj.real + 00309 Ljj.imag * Ljj.imag ); 00310 d_inv_Ljj.real = Ljj.real * temp; 00311 d_inv_Ljj.imag = Ljj.imag * -temp; 00312 00313 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00314 cblas_cscal( m_D, 00315 d_inv_Ljj, 00316 buff_D + j*ldim_D + 0, *buff_1_int ); 00317 #else 00318 FLA_C2F( cscal )( &m_D, 00319 &d_inv_Ljj, 00320 buff_D + j*ldim_D + 0, buff_1_int ); 00321 #endif 00322 00323 m_U_min_j_min_1 = m_U - j - 1; 00324 00325 if ( m_U_min_j_min_1 > 0 ) 00326 { 00327 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00328 cblas_cgeru( cblas_order, 00329 m_D, m_U_min_j_min_1, 00330 *buff_minus1, 00331 buff_D + j*ldim_D + 0, *buff_1_int, 00332 buff_L + (j+1)*ldim_L + j, ldim_L, 00333 buff_D + (j+1)*ldim_D + 0, ldim_D ); 00334 #else 00335 FLA_C2F( cgeru )( &m_D, &m_U_min_j_min_1, 00336 buff_minus1, 00337 buff_D + j*ldim_D + 0, buff_1_int, 00338 buff_L + (j+1)*ldim_L + j, &ldim_L, 00339 buff_D + (j+1)*ldim_D + 0, &ldim_D ); 00340 #endif 00341 } 00342 00343 m_U_min_j = m_U - j; 00344 00345 if ( m_U_min_j > 0 ) 00346 { 00347 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00348 cblas_ccopy( m_U_min_j, 00349 buff_L + j*ldim_L + j, ldim_L, 00350 buff_U + j*ldim_U + j, ldim_U ); 00351 #else 00352 FLA_C2F( ccopy )( &m_U_min_j, 00353 buff_L + j*ldim_L + j, &ldim_L, 00354 buff_U + j*ldim_U + j, &ldim_U ); 00355 #endif 00356 } 00357 } 00358 break; 00359 } 00360 00361 case FLA_DOUBLE_COMPLEX: 00362 { 00363 dcomplex* buff_U = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( U ); 00364 dcomplex* buff_D = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( D ); 00365 dcomplex* buff_L = ( dcomplex * ) FLA_COMPLEX_PTR( L ); 00366 dcomplex* buff_minus1 = ( dcomplex * ) FLA_COMPLEX_PTR( FLA_MINUS_ONE ); 00367 dcomplex L_tmp; 00368 dcomplex D_tmp; 00369 dcomplex d_inv_Ljj; 00370 dcomplex Ljj; 00371 double temp; 00372 00373 for ( j = 0; j < m_U; ++j ) 00374 { 00375 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00376 ipiv = cblas_izamax( m_D, 00377 buff_D + j*ldim_D + 0, 00378 *buff_1_int ); 00379 #else 00380 ipiv = FLA_C2F( izamax )( &m_D, 00381 buff_D + j*ldim_D + 0, 00382 buff_1_int ) - 1; 00383 #endif 00384 00385 L_tmp = buff_L[ j*ldim_L + j ]; 00386 D_tmp = buff_D[ j*ldim_D + ipiv ]; 00387 00388 if ( dabs( L_tmp.real + L_tmp.imag ) < dabs( D_tmp.real + D_tmp.imag ) ) 00389 { 00390 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00391 cblas_zswap( m_U, 00392 buff_L + 0*ldim_L + j, ldim_L, 00393 buff_D + 0*ldim_D + ipiv, ldim_D ); 00394 #else 00395 FLA_C2F( zswap )( &m_U, 00396 buff_L + 0*ldim_L + j, &ldim_L, 00397 buff_D + 0*ldim_D + ipiv, &ldim_D ); 00398 #endif 00399 00400 buff_p[ j ] = ipiv + m_U - j; 00401 } 00402 else 00403 { 00404 buff_p[ j ] = 0; 00405 } 00406 00407 Ljj = buff_L[ j*ldim_L + j ]; 00408 00409 // d_inv_Ljj = 1.0 / Ljj 00410 temp = 1.0 / ( Ljj.real * Ljj.real + 00411 Ljj.imag * Ljj.imag ); 00412 d_inv_Ljj.real = Ljj.real * temp; 00413 d_inv_Ljj.imag = Ljj.imag * -temp; 00414 00415 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00416 cblas_zscal( m_D, 00417 d_inv_Ljj, 00418 buff_D + j*ldim_D + 0, *buff_1_int ); 00419 #else 00420 FLA_C2F( zscal )( &m_D, 00421 &d_inv_Ljj, 00422 buff_D + j*ldim_D + 0, buff_1_int ); 00423 #endif 00424 00425 m_U_min_j_min_1 = m_U - j - 1; 00426 00427 if ( m_U_min_j_min_1 > 0 ) 00428 { 00429 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00430 cblas_zgeru( cblas_order, 00431 m_D, m_U_min_j_min_1, 00432 *buff_minus1, 00433 buff_D + j*ldim_D + 0, *buff_1_int, 00434 buff_L + (j+1)*ldim_L + j, ldim_L, 00435 buff_D + (j+1)*ldim_D + 0, ldim_D ); 00436 #else 00437 FLA_C2F( zgeru )( &m_D, &m_U_min_j_min_1, 00438 buff_minus1, 00439 buff_D + j*ldim_D + 0, buff_1_int, 00440 buff_L + (j+1)*ldim_L + j, &ldim_L, 00441 buff_D + (j+1)*ldim_D + 0, &ldim_D ); 00442 #endif 00443 } 00444 00445 m_U_min_j = m_U - j; 00446 00447 if ( m_U_min_j > 0 ) 00448 { 00449 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00450 cblas_zcopy( m_U_min_j, 00451 buff_L + j*ldim_L + j, ldim_L, 00452 buff_U + j*ldim_U + j, ldim_U ); 00453 #else 00454 FLA_C2F( zcopy )( &m_U_min_j, 00455 buff_L + j*ldim_L + j, &ldim_L, 00456 buff_U + j*ldim_U + j, &ldim_U ); 00457 #endif 00458 } 00459 } 00460 break; 00461 } 00462 00463 } 00464 00465 return FLA_SUCCESS; 00466 }