Functions | |
FLA_Error | FLA_Apply_househ2_UT_opt (FLA_Obj tau, FLA_Obj u2, FLA_Obj a1t, FLA_Obj A2) |
FLA_Error | FLA_Apply_househ2_UT_opt_float (int m_u2_A2, int n_a1t, float *tau, float *u2, int inc_u2, float *a1t, int inc_a1t, float *A2, int ldim_A2) |
FLA_Error | FLA_Apply_househ2_UT_opt_double (int m_u2_A2, int n_a1t, double *tau, double *u2, int inc_u2, double *a1t, int inc_a1t, double *A2, int ldim_A2) |
FLA_Error | FLA_Apply_househ2_UT_opt_scomplex (int m_u2_A2, int n_a1t, scomplex *tau, scomplex *u2, int inc_u2, scomplex *a1t, int inc_a1t, scomplex *A2, int ldim_A2) |
FLA_Error | FLA_Apply_househ2_UT_opt_dcomplex (int m_u2_A2, int n_a1t, dcomplex *tau, dcomplex *u2, int inc_u2, dcomplex *a1t, int inc_a1t, dcomplex *A2, int ldim_A2) |
References FLA_Apply_househ2_UT_check(), FLA_Apply_househ2_UT_opt_dcomplex(), FLA_Apply_househ2_UT_opt_double(), FLA_Apply_househ2_UT_opt_float(), FLA_Apply_househ2_UT_opt_scomplex(), FLA_Check_error_level(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_ldim(), FLA_Obj_length(), FLA_Obj_vector_inc(), and FLA_Obj_width().
00039 := / I - 1/tau / 1 \ ( 1 u2^H ) \ / a1t \ 00040 \ A2 / \ \ u2 / / \ A2 / 00041 00042 w = ( a1t + u2' * A2 ) / conj( tau ); 00043 00044 a1t = - w + a1t; 00045 A2 = - u2 * w + A2; 00046 */ 00047 { 00048 FLA_Datatype datatype; 00049 int m_u2_A2; 00050 int n_a1t; 00051 int inc_u2; 00052 int inc_a1t; 00053 int ldim_A2; 00054 00055 if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) 00056 FLA_Apply_househ2_UT_check( tau, u2, a1t, A2 ); 00057 00058 if ( FLA_Obj_has_zero_dim( a1t ) ) return FLA_SUCCESS; 00059 00060 datatype = FLA_Obj_datatype( A2 ); 00061 00062 m_u2_A2 = FLA_Obj_length( u2 ); 00063 n_a1t = FLA_Obj_width( a1t ); 00064 inc_u2 = FLA_Obj_vector_inc( u2 ); 00065 inc_a1t = FLA_Obj_vector_inc( a1t ); 00066 ldim_A2 = FLA_Obj_ldim( A2 ); 00067 00068 switch ( datatype ) 00069 { 00070 case FLA_FLOAT: 00071 { 00072 float* tau_p = ( float* ) FLA_FLOAT_PTR( tau ); 00073 float* u2_p = ( float* ) FLA_FLOAT_PTR( u2 ); 00074 float* a1t_p = ( float* ) FLA_FLOAT_PTR( a1t ); 00075 float* A2_p = ( float* ) FLA_FLOAT_PTR( A2 ); 00076 00077 FLA_Apply_househ2_UT_opt_float( m_u2_A2, n_a1t, 00078 tau_p, 00079 u2_p, inc_u2, 00080 a1t_p, inc_a1t, 00081 A2_p, ldim_A2 ); 00082 break; 00083 } 00084 00085 case FLA_DOUBLE: 00086 { 00087 double* tau_p = ( double* ) FLA_DOUBLE_PTR( tau ); 00088 double* u2_p = ( double* ) FLA_DOUBLE_PTR( u2 ); 00089 double* a1t_p = ( double* ) FLA_DOUBLE_PTR( a1t ); 00090 double* A2_p = ( double* ) FLA_DOUBLE_PTR( A2 ); 00091 00092 FLA_Apply_househ2_UT_opt_double( m_u2_A2, n_a1t, 00093 tau_p, 00094 u2_p, inc_u2, 00095 a1t_p, inc_a1t, 00096 A2_p, ldim_A2 ); 00097 break; 00098 } 00099 00100 case FLA_COMPLEX: 00101 { 00102 scomplex* tau_p = ( scomplex* ) FLA_COMPLEX_PTR( tau ); 00103 scomplex* u2_p = ( scomplex* ) FLA_COMPLEX_PTR( u2 ); 00104 scomplex* a1t_p = ( scomplex* ) FLA_COMPLEX_PTR( a1t ); 00105 scomplex* A2_p = ( scomplex* ) FLA_COMPLEX_PTR( A2 ); 00106 00107 FLA_Apply_househ2_UT_opt_scomplex( m_u2_A2, n_a1t, 00108 tau_p, 00109 u2_p, inc_u2, 00110 a1t_p, inc_a1t, 00111 A2_p, ldim_A2 ); 00112 break; 00113 } 00114 00115 case FLA_DOUBLE_COMPLEX: 00116 { 00117 dcomplex* tau_p = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( tau ); 00118 dcomplex* u2_p = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( u2 ); 00119 dcomplex* a1t_p = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( a1t ); 00120 dcomplex* A2_p = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A2 ); 00121 00122 FLA_Apply_househ2_UT_opt_dcomplex( m_u2_A2, n_a1t, 00123 tau_p, 00124 u2_p, inc_u2, 00125 a1t_p, inc_a1t, 00126 A2_p, ldim_A2 ); 00127 break; 00128 } 00129 } 00130 00131 return FLA_SUCCESS; 00132 }
FLA_Error FLA_Apply_househ2_UT_opt_dcomplex | ( | int | m_u2_A2, | |
int | n_a1t, | |||
dcomplex * | tau, | |||
dcomplex * | u2, | |||
int | inc_u2, | |||
dcomplex * | a1t, | |||
int | inc_a1t, | |||
dcomplex * | A2, | |||
int | ldim_A2 | |||
) |
References dscal(), FLA_free(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, dcomplex::imag, dcomplex::real, zaxpy(), zcopy(), zgemv(), zgeru(), and zscal().
Referenced by FLA_Apply_househ2_UT_opt(), FLA_QR_UT_Accum_T_opt_var1_dcomplex(), and FLA_QR_UT_UD_Accum_T_opt_var1_dcomplex().
00370 { 00371 dcomplex* one_p = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE ); 00372 dcomplex* minus_one_p = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE ); 00373 dcomplex tau_copy; 00374 double temp; 00375 char blas_trans = 'T'; 00376 int inc_w1t; 00377 int inc_u2_conj; 00378 int i_two = 2; 00379 dcomplex* u2_conj; 00380 00381 // FLA_Obj w1t; 00382 dcomplex* w1t; 00383 00384 // if ( FLA_Obj_has_zero_dim( a1t ) ) return FLA_SUCCESS; 00385 if ( n_a1t == 0 ) return FLA_SUCCESS; 00386 00387 // FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, a1t, &w1t ); 00388 w1t = FLA_malloc( n_a1t * sizeof( dcomplex ) ); 00389 inc_w1t = 1; 00390 00391 // // w1t = a1t; 00392 // FLA_Copy_external( a1t, w1t ); 00393 FLA_C2F( zcopy )( &n_a1t, 00394 a1t, &inc_a1t, 00395 w1t, &inc_w1t ); 00396 00397 // // w1t = w1t + u2' * A2; 00398 // // w1t = w1t + A2^T * conj(u2); 00399 // FLA_Gemvc_external( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A2, u2, FLA_ONE, w1t ); 00400 u2_conj = FLA_malloc( m_u2_A2 * sizeof( dcomplex ) ); 00401 inc_u2_conj = 1; 00402 00403 FLA_C2F( zcopy )( &m_u2_A2, 00404 u2, &inc_u2, 00405 u2_conj, &inc_u2_conj ); 00406 00407 FLA_C2F( dscal )( &m_u2_A2, 00408 &(minus_one_p->real), 00409 (( double* ) u2_conj ) + 1, &i_two ); 00410 00411 FLA_C2F( zgemv )( &blas_trans, 00412 &m_u2_A2, &n_a1t, 00413 one_p, 00414 A2, &ldim_A2, 00415 u2_conj, &inc_u2_conj, 00416 one_p, 00417 w1t, &inc_w1t ); 00418 00419 FLA_free( u2_conj ); 00420 00421 // // w1t = w1t / conj( tau ); 00422 // FLA_Inv_scalc_external( FLA_CONJUGATE, tau, w1t ); 00423 temp = 1.0 / ( tau->real * tau->real + 00424 tau->imag * tau->imag ); 00425 tau_copy.real = tau->real * temp; 00426 tau_copy.imag = tau->imag * temp; 00427 FLA_C2F( zscal )( &n_a1t, 00428 &tau_copy, 00429 w1t, &inc_w1t ); 00430 00431 // // a1t = - w1t + a1t; 00432 // FLA_Axpy_external( FLA_MINUS_ONE, w1t, a1t ); 00433 FLA_C2F( zaxpy )( &n_a1t, 00434 minus_one_p, 00435 w1t, &inc_w1t, 00436 a1t, &inc_a1t ); 00437 00438 // // A2 = - u2 * w1t + A2; 00439 // FLA_Ger_external( FLA_MINUS_ONE, u2, w1t, A2 ); 00440 FLA_C2F( zgeru )( &m_u2_A2, &n_a1t, 00441 minus_one_p, 00442 u2, &inc_u2, 00443 w1t, &inc_w1t, 00444 A2, &ldim_A2 ); 00445 00446 // FLA_Obj_free( &w1t ); 00447 FLA_free( w1t ); 00448 00449 return FLA_SUCCESS; 00450 }
FLA_Error FLA_Apply_househ2_UT_opt_double | ( | int | m_u2_A2, | |
int | n_a1t, | |||
double * | tau, | |||
double * | u2, | |||
int | inc_u2, | |||
double * | a1t, | |||
int | inc_a1t, | |||
double * | A2, | |||
int | ldim_A2 | |||
) |
References daxpy(), dcopy(), dgemv(), dger(), dscal(), FLA_free(), FLA_malloc(), FLA_MINUS_ONE, and FLA_ONE.
Referenced by FLA_Apply_househ2_UT_opt(), FLA_QR_UT_Accum_T_opt_var1_double(), and FLA_QR_UT_UD_Accum_T_opt_var1_double().
00210 { 00211 double* one_p = FLA_DOUBLE_PTR( FLA_ONE ); 00212 double* minus_one_p = FLA_DOUBLE_PTR( FLA_MINUS_ONE ); 00213 double tau_copy; 00214 char blas_trans = 'T'; 00215 int inc_w1t; 00216 00217 // FLA_Obj w1t; 00218 double* w1t; 00219 00220 // if ( FLA_Obj_has_zero_dim( a1t ) ) return FLA_SUCCESS; 00221 if ( n_a1t == 0 ) return FLA_SUCCESS; 00222 00223 // FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, a1t, &w1t ); 00224 w1t = FLA_malloc( n_a1t * sizeof( double ) ); 00225 inc_w1t = 1; 00226 00227 // // w1t = a1t; 00228 // FLA_Copy_external( a1t, w1t ); 00229 FLA_C2F( dcopy )( &n_a1t, 00230 a1t, &inc_a1t, 00231 w1t, &inc_w1t ); 00232 00233 // // w1t = w1t + u2' * A2; 00234 // // w1t = w1t + A2^T * conj(u2); 00235 // FLA_Gemvc_external( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A2, u2, FLA_ONE, w1t ); 00236 FLA_C2F( dgemv )( &blas_trans, 00237 &m_u2_A2, &n_a1t, 00238 one_p, 00239 A2, &ldim_A2, 00240 u2, &inc_u2, 00241 one_p, 00242 w1t, &inc_w1t ); 00243 00244 // // w1t = w1t / conj( tau ); 00245 // FLA_Inv_scalc_external( FLA_CONJUGATE, tau, w1t ); 00246 tau_copy = 1.0 / *tau; 00247 FLA_C2F( dscal )( &n_a1t, 00248 &tau_copy, 00249 w1t, &inc_w1t ); 00250 00251 // // a1t = - w1t + a1t; 00252 // FLA_Axpy_external( FLA_MINUS_ONE, w1t, a1t ); 00253 FLA_C2F( daxpy )( &n_a1t, 00254 minus_one_p, 00255 w1t, &inc_w1t, 00256 a1t, &inc_a1t ); 00257 00258 // // A2 = - u2 * w1t + A2; 00259 // FLA_Ger_external( FLA_MINUS_ONE, u2, w1t, A2 ); 00260 FLA_C2F( dger )( &m_u2_A2, &n_a1t, 00261 minus_one_p, 00262 u2, &inc_u2, 00263 w1t, &inc_w1t, 00264 A2, &ldim_A2 ); 00265 00266 // FLA_Obj_free( &w1t ); 00267 FLA_free( w1t ); 00268 00269 return FLA_SUCCESS; 00270 }
FLA_Error FLA_Apply_househ2_UT_opt_float | ( | int | m_u2_A2, | |
int | n_a1t, | |||
float * | tau, | |||
float * | u2, | |||
int | inc_u2, | |||
float * | a1t, | |||
int | inc_a1t, | |||
float * | A2, | |||
int | ldim_A2 | |||
) |
References FLA_free(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, saxpy(), scopy(), sgemv(), sger(), and sscal().
Referenced by FLA_Apply_househ2_UT_opt(), FLA_QR_UT_Accum_T_opt_var1_float(), and FLA_QR_UT_UD_Accum_T_opt_var1_float().
00140 { 00141 float* one_p = FLA_FLOAT_PTR( FLA_ONE ); 00142 float* minus_one_p = FLA_FLOAT_PTR( FLA_MINUS_ONE ); 00143 float tau_copy; 00144 char blas_trans = 'T'; 00145 int inc_w1t; 00146 00147 // FLA_Obj w1t; 00148 float* w1t; 00149 00150 // if ( FLA_Obj_has_zero_dim( a1t ) ) return FLA_SUCCESS; 00151 if ( n_a1t == 0 ) return FLA_SUCCESS; 00152 00153 // FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, a1t, &w1t ); 00154 w1t = FLA_malloc( n_a1t * sizeof( float ) ); 00155 inc_w1t = 1; 00156 00157 // // w1t = a1t; 00158 // FLA_Copy_external( a1t, w1t ); 00159 FLA_C2F( scopy )( &n_a1t, 00160 a1t, &inc_a1t, 00161 w1t, &inc_w1t ); 00162 00163 // // w1t = w1t + u2' * A2; 00164 // // w1t = w1t + A2^T * conj(u2); 00165 // FLA_Gemvc_external( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A2, u2, FLA_ONE, w1t ); 00166 FLA_C2F( sgemv )( &blas_trans, 00167 &m_u2_A2, &n_a1t, 00168 one_p, 00169 A2, &ldim_A2, 00170 u2, &inc_u2, 00171 one_p, 00172 w1t, &inc_w1t ); 00173 00174 // // w1t = w1t / conj( tau ); 00175 // FLA_Inv_scalc_external( FLA_CONJUGATE, tau, w1t ); 00176 tau_copy = 1.0F / *tau; 00177 FLA_C2F( sscal )( &n_a1t, 00178 &tau_copy, 00179 w1t, &inc_w1t ); 00180 00181 // // a1t = - w1t + a1t; 00182 // FLA_Axpy_external( FLA_MINUS_ONE, w1t, a1t ); 00183 FLA_C2F( saxpy )( &n_a1t, 00184 minus_one_p, 00185 w1t, &inc_w1t, 00186 a1t, &inc_a1t ); 00187 00188 // // A2 = - u2 * w1t + A2; 00189 // FLA_Ger_external( FLA_MINUS_ONE, u2, w1t, A2 ); 00190 FLA_C2F( sger )( &m_u2_A2, &n_a1t, 00191 minus_one_p, 00192 u2, &inc_u2, 00193 w1t, &inc_w1t, 00194 A2, &ldim_A2 ); 00195 00196 // FLA_Obj_free( &w1t ); 00197 FLA_free( w1t ); 00198 00199 return FLA_SUCCESS; 00200 }
FLA_Error FLA_Apply_househ2_UT_opt_scomplex | ( | int | m_u2_A2, | |
int | n_a1t, | |||
scomplex * | tau, | |||
scomplex * | u2, | |||
int | inc_u2, | |||
scomplex * | a1t, | |||
int | inc_a1t, | |||
scomplex * | A2, | |||
int | ldim_A2 | |||
) |
References caxpy(), ccopy(), cgemv(), cgeru(), cscal(), FLA_free(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, scomplex::imag, scomplex::real, and sscal().
Referenced by FLA_Apply_househ2_UT_opt(), FLA_QR_UT_Accum_T_opt_var1_scomplex(), and FLA_QR_UT_UD_Accum_T_opt_var1_scomplex().
00280 { 00281 scomplex* one_p = FLA_COMPLEX_PTR( FLA_ONE ); 00282 scomplex* minus_one_p = FLA_COMPLEX_PTR( FLA_MINUS_ONE ); 00283 scomplex tau_copy; 00284 float temp; 00285 char blas_trans = 'T'; 00286 int inc_w1t; 00287 int inc_u2_conj; 00288 int i_two = 2; 00289 scomplex* u2_conj; 00290 00291 // FLA_Obj w1t; 00292 scomplex* w1t; 00293 00294 // if ( FLA_Obj_has_zero_dim( a1t ) ) return FLA_SUCCESS; 00295 if ( n_a1t == 0 ) return FLA_SUCCESS; 00296 00297 // FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, a1t, &w1t ); 00298 w1t = FLA_malloc( n_a1t * sizeof( scomplex ) ); 00299 inc_w1t = 1; 00300 00301 // // w1t = a1t; 00302 // FLA_Copy_external( a1t, w1t ); 00303 FLA_C2F( ccopy )( &n_a1t, 00304 a1t, &inc_a1t, 00305 w1t, &inc_w1t ); 00306 00307 // // w1t = w1t + u2' * A2; 00308 // // w1t = w1t + A2^T * conj(u2); 00309 // FLA_Gemvc_external( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A2, u2, FLA_ONE, w1t ); 00310 u2_conj = FLA_malloc( m_u2_A2 * sizeof( scomplex ) ); 00311 inc_u2_conj = 1; 00312 00313 FLA_C2F( ccopy )( &m_u2_A2, 00314 u2, &inc_u2, 00315 u2_conj, &inc_u2_conj ); 00316 00317 FLA_C2F( sscal )( &m_u2_A2, 00318 &(minus_one_p->real), 00319 (( float* ) u2_conj ) + 1, &i_two ); 00320 00321 FLA_C2F( cgemv )( &blas_trans, 00322 &m_u2_A2, &n_a1t, 00323 one_p, 00324 A2, &ldim_A2, 00325 u2_conj, &inc_u2_conj, 00326 one_p, 00327 w1t, &inc_w1t ); 00328 00329 FLA_free( u2_conj ); 00330 00331 // // w1t = w1t / conj( tau ); 00332 // FLA_Inv_scalc_external( FLA_CONJUGATE, tau, w1t ); 00333 temp = 1.0F / ( tau->real * tau->real + 00334 tau->imag * tau->imag ); 00335 tau_copy.real = tau->real * temp; 00336 tau_copy.imag = tau->imag * temp; 00337 FLA_C2F( cscal )( &n_a1t, 00338 &tau_copy, 00339 w1t, &inc_w1t ); 00340 00341 // // a1t = - w1t + a1t; 00342 // FLA_Axpy_external( FLA_MINUS_ONE, w1t, a1t ); 00343 FLA_C2F( caxpy )( &n_a1t, 00344 minus_one_p, 00345 w1t, &inc_w1t, 00346 a1t, &inc_a1t ); 00347 00348 // // A2 = - u2 * w1t + A2; 00349 // FLA_Ger_external( FLA_MINUS_ONE, u2, w1t, A2 ); 00350 FLA_C2F( cgeru )( &m_u2_A2, &n_a1t, 00351 minus_one_p, 00352 u2, &inc_u2, 00353 w1t, &inc_w1t, 00354 A2, &ldim_A2 ); 00355 00356 // FLA_Obj_free( &w1t ); 00357 FLA_free( w1t ); 00358 00359 return FLA_SUCCESS; 00360 }