Functions | |
FLA_Error | FLA_Househ2_UT_opt (FLA_Obj chi_1, FLA_Obj x2, FLA_Obj tau) |
FLA_Error | FLA_Househ2_UT_opt_float (int m_x2, float *chi_1, float *x2, int inc_x2, float *tau) |
FLA_Error | FLA_Househ2_UT_opt_double (int m_x2, double *chi_1, double *x2, int inc_x2, double *tau) |
FLA_Error | FLA_Househ2_UT_opt_scomplex (int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau) |
FLA_Error | FLA_Househ2_UT_opt_dcomplex (int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau) |
References FLA_Househ2_UT_opt_dcomplex(), FLA_Househ2_UT_opt_double(), FLA_Househ2_UT_opt_float(), FLA_Househ2_UT_opt_scomplex(), FLA_Obj_datatype(), FLA_Obj_length(), and FLA_Obj_vector_inc().
00038 { 00039 FLA_Datatype datatype; 00040 int m_x2; 00041 int inc_x2; 00042 00043 datatype = FLA_Obj_datatype( x2 ); 00044 00045 m_x2 = FLA_Obj_length( x2 ); 00046 inc_x2 = FLA_Obj_vector_inc( x2 ); 00047 00048 switch ( datatype ) 00049 { 00050 case FLA_FLOAT: 00051 { 00052 float* chi_1_p = ( float* ) FLA_FLOAT_PTR( chi_1 ); 00053 float* x2_p = ( float* ) FLA_FLOAT_PTR( x2 ); 00054 float* tau_p = ( float* ) FLA_FLOAT_PTR( tau ); 00055 00056 FLA_Househ2_UT_opt_float( m_x2, 00057 chi_1_p, 00058 x2_p, inc_x2, 00059 tau_p ); 00060 break; 00061 } 00062 00063 case FLA_DOUBLE: 00064 { 00065 double* chi_1_p = ( double* ) FLA_DOUBLE_PTR( chi_1 ); 00066 double* x2_p = ( double* ) FLA_DOUBLE_PTR( x2 ); 00067 double* tau_p = ( double* ) FLA_DOUBLE_PTR( tau ); 00068 00069 FLA_Househ2_UT_opt_double( m_x2, 00070 chi_1_p, 00071 x2_p, inc_x2, 00072 tau_p ); 00073 break; 00074 } 00075 00076 case FLA_COMPLEX: 00077 { 00078 scomplex* chi_1_p = ( scomplex* ) FLA_COMPLEX_PTR( chi_1 ); 00079 scomplex* x2_p = ( scomplex* ) FLA_COMPLEX_PTR( x2 ); 00080 scomplex* tau_p = ( scomplex* ) FLA_COMPLEX_PTR( tau ); 00081 00082 FLA_Househ2_UT_opt_scomplex( m_x2, 00083 chi_1_p, 00084 x2_p, inc_x2, 00085 tau_p ); 00086 break; 00087 } 00088 00089 case FLA_DOUBLE_COMPLEX: 00090 { 00091 dcomplex* chi_1_p = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( chi_1 ); 00092 dcomplex* x2_p = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( x2 ); 00093 dcomplex* tau_p = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( tau ); 00094 00095 FLA_Househ2_UT_opt_dcomplex( m_x2, 00096 chi_1_p, 00097 x2_p, inc_x2, 00098 tau_p ); 00099 break; 00100 } 00101 } 00102 00103 return FLA_SUCCESS; 00104 }
FLA_Error FLA_Househ2_UT_opt_dcomplex | ( | int | m_x2, | |
dcomplex * | chi_1, | |||
dcomplex * | x2, | |||
int | inc_x2, | |||
dcomplex * | tau | |||
) |
References cblas_dznrm2(), cblas_zscal(), dznrm2(), FLA_ZERO, dcomplex::imag, dcomplex::real, and zscal().
Referenced by FLA_Househ2_UT_opt(), FLA_QR_UT_Accum_T_opt_var1_dcomplex(), and FLA_QR_UT_UD_Accum_T_opt_var1_dcomplex().
00531 { 00532 int i_one = 1; 00533 int i_two = 2; 00534 00535 if ( m_x2 < 0 ) 00536 { 00537 FLA_Check_error_code( FLA_EXPECTED_NONNEGATIVE_VALUE ); 00538 } 00539 00540 00541 { 00542 dcomplex *chi_1_p = ( dcomplex * ) chi_1; 00543 dcomplex *x2_buff = ( dcomplex * ) x2; 00544 dcomplex *tau_p = ( dcomplex * ) tau; 00545 dcomplex *zero_p = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO ); 00546 dcomplex one_half; 00547 dcomplex y[ 2 ]; 00548 dcomplex temp; 00549 dcomplex alpha; 00550 00551 one_half.real = 1.0 / 2.0; 00552 one_half.imag = 0.0; 00553 00554 /* 00555 Trivial tau, chi_1 if x_2 is 0 x 1. 00556 */ 00557 00558 if ( m_x2 == 0 ) 00559 { 00560 tau_p->real = one_half.real; 00561 tau_p->imag = one_half.imag; 00562 chi_1_p->real = -(chi_1_p->real); 00563 chi_1_p->imag = -(chi_1_p->imag); 00564 00565 return FLA_SUCCESS; 00566 } 00567 00568 /* 00569 Preserve chi_1 for later use. 00570 */ 00571 00572 alpha.real = chi_1_p->real; 00573 alpha.imag = chi_1_p->imag; 00574 00575 /* 00576 y = / chi_1 \ 00577 \ || x_2 ||_2 / ( notice: || y ||_2 = || x ||_2 ) 00578 */ 00579 00580 y[ 0 ].real = chi_1_p->real; 00581 y[ 0 ].imag = chi_1_p->imag; 00582 00583 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00584 y[ 1 ].real = cblas_dznrm2( m_x2, x2_buff, inc_x2 ); 00585 y[ 1 ].imag = 0.0; 00586 #else 00587 y[ 1 ].real = FLA_C2F( dznrm2 )( &m_x2, x2_buff, &inc_x2 ); 00588 y[ 1 ].imag = 0.0; 00589 #endif 00590 00591 /* 00592 Trivial tau, chi_1 if || x_2 ||_2 = 0. 00593 */ 00594 00595 if ( y[ 1 ].real == zero_p->real ) 00596 { 00597 tau_p->real = one_half.real; 00598 tau_p->imag = one_half.imag; 00599 chi_1_p->real = -(chi_1_p->real); 00600 chi_1_p->imag = -(chi_1_p->imag); 00601 00602 return FLA_SUCCESS; 00603 } 00604 00605 /* 00606 temp = || / chi_1 \ || 00607 || \ x_2 / ||_2 00608 */ 00609 00610 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00611 temp.real = cblas_dznrm2( i_two, y, i_one ); 00612 #else 00613 temp.real = FLA_C2F( dznrm2 )( &i_two, y, &i_one ); 00614 #endif 00615 00616 /* 00617 y = / chi_1 + sign( chi_1 ) * temp \ 00618 \ || x_2 ||_2 / 00619 00620 chi_1 = -sign( Re( chi_1 ) ) * temp 00621 */ 00622 00623 if ( sign( y[ 0 ].real ) < 1.0 ) 00624 { 00625 y[ 0 ].real -= temp.real; 00626 chi_1_p->real = temp.real; 00627 chi_1_p->imag = 0.0; 00628 } 00629 else 00630 { 00631 y[ 0 ].real += temp.real; 00632 chi_1_p->real = -temp.real; 00633 chi_1_p->imag = 0.0; 00634 } 00635 00636 /* 00637 Overwrite x_2 with x_2/eta_2 00638 */ 00639 00640 // y[ 0 ] = 1.0 / y[ 0 ]; 00641 00642 temp.real = 1.0 / ( y[ 0 ].real * y[ 0 ].real + 00643 y[ 0 ].imag * y[ 0 ].imag ); 00644 y[ 0 ].real = y[ 0 ].real * temp.real; 00645 y[ 0 ].imag = y[ 0 ].imag * -temp.real; 00646 00647 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00648 cblas_zscal( m_x2, y[ 0 ], x2_buff, inc_x2 ); 00649 #else 00650 FLA_C2F( zscal )( &m_x2, &y[ 0 ], x2_buff, &inc_x2 ); 00651 #endif 00652 00653 /* 00654 Recall that 00655 00656 alpha = original value of chi_1 00657 00658 and 00659 00660 -sign( Re( chi_1 ) ) * || x ||_2 00661 00662 where || x ||_2 = || / chi_1 \ || 00663 || \ x_2 / ||_2 00664 overwrote chi_1. 00665 */ 00666 00667 tau_p->real = ( chi_1_p->real - alpha.real ) / chi_1_p->real; 00668 tau_p->imag = ( 0.0 - alpha.imag ) / chi_1_p->real; 00669 00670 /* 00671 Invert tau. 00672 */ 00673 temp.real = 1.0 / ( tau_p->real * tau_p->real + 00674 tau_p->imag * tau_p->imag ); 00675 tau_p->real = tau_p->real * temp.real; 00676 tau_p->imag = tau_p->imag * -temp.real; 00677 } 00678 00679 return FLA_SUCCESS; 00680 }
FLA_Error FLA_Househ2_UT_opt_double | ( | int | m_x2, | |
double * | chi_1, | |||
double * | x2, | |||
int | inc_x2, | |||
double * | tau | |||
) |
References cblas_ddot(), cblas_dnrm2(), cblas_dscal(), ddot(), dnrm2(), dscal(), and FLA_ZERO.
Referenced by FLA_Househ2_UT_opt(), FLA_QR_UT_Accum_T_opt_var1_double(), and FLA_QR_UT_UD_Accum_T_opt_var1_double().
00241 { 00242 int i_one = 1; 00243 int i_two = 2; 00244 00245 if ( m_x2 < 0 ) 00246 { 00247 FLA_Check_error_code( FLA_EXPECTED_NONNEGATIVE_VALUE ); 00248 } 00249 00250 00251 { 00252 double *chi_1_p = ( double * ) chi_1; 00253 double *x2_buff = ( double * ) x2; 00254 double *tau_p = ( double * ) tau; 00255 double *zero_p = ( double * ) FLA_DOUBLE_PTR( FLA_ZERO ); 00256 double one_half = 1.0 / 2.0; 00257 double y[ 2 ]; 00258 double temp; 00259 00260 /* 00261 Trivial tau, chi_1 if x_2 is 0 x 1. 00262 */ 00263 00264 if ( m_x2 == 0 ) 00265 { 00266 *tau_p = one_half; 00267 *chi_1_p = -(*chi_1_p); 00268 00269 return FLA_SUCCESS; 00270 } 00271 00272 /* 00273 y = / chi_1 \ 00274 \ || x_2 ||_2 / ( notice: || y ||_2 = || x ||_2 ) 00275 */ 00276 00277 y[ 0 ] = *chi_1_p; 00278 00279 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00280 y[ 1 ] = cblas_dnrm2( m_x2, x2_buff, inc_x2 ); 00281 #else 00282 y[ 1 ] = FLA_C2F( dnrm2 )( &m_x2, x2_buff, &inc_x2 ); 00283 #endif 00284 00285 /* 00286 Trivial tau, chi_1 if || x_2 ||_2 = 0. 00287 */ 00288 00289 if ( y[ 1 ] == *zero_p ) 00290 { 00291 *tau_p = one_half; 00292 *chi_1_p = -(*chi_1_p); 00293 00294 return FLA_SUCCESS; 00295 } 00296 00297 /* 00298 temp = || / chi_1 \ || 00299 || \ x_2 / ||_2 00300 */ 00301 00302 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00303 temp = cblas_dnrm2( i_two, y, i_one ); 00304 #else 00305 temp = FLA_C2F( dnrm2 )( &i_two, y, &i_one ); 00306 #endif 00307 00308 /* 00309 y = / chi_1 + sign( chi_1 ) * temp \ 00310 \ || x_2 ||_2 / 00311 00312 chi_1 = -sign( chi_1 ) * temp 00313 */ 00314 00315 if ( sign( y[ 0 ] ) < 1.0 ) 00316 { 00317 y[ 0 ] -= temp; 00318 *chi_1_p = temp; 00319 } 00320 else 00321 { 00322 y[ 0 ] += temp; 00323 *chi_1_p = -temp; 00324 } 00325 00326 /* 00327 y = / 1 \ 00328 \ eta_2/eta_1 / ( normalize y to have "1" first entry ) 00329 */ 00330 00331 y[ 1 ] = y[ 1 ] / y[ 0 ]; 00332 00333 /* 00334 Overwrite x_2 with x_2/eta_2 00335 */ 00336 00337 y[ 0 ] = 1.0 / y[ 0 ]; 00338 00339 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00340 cblas_dscal( m_x2, y[ 0 ], x2_buff, inc_x2 ); 00341 #else 00342 FLA_C2F( dscal )( &m_x2, &y[ 0 ], x2_buff, &inc_x2 ); 00343 #endif 00344 00345 y[ 0 ] = 1.0; 00346 00347 /* 00348 temp = y^T y / 2 00349 */ 00350 00351 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00352 temp = cblas_ddot( i_two, y, i_one, y, i_one ); 00353 #else 00354 temp = FLA_C2F( ddot )( &i_two, y, &i_one, y, &i_one ); 00355 #endif 00356 00357 *tau_p = temp / 2.0; 00358 } 00359 00360 return FLA_SUCCESS; 00361 }
FLA_Error FLA_Househ2_UT_opt_float | ( | int | m_x2, | |
float * | chi_1, | |||
float * | x2, | |||
int | inc_x2, | |||
float * | tau | |||
) |
References cblas_sdot(), cblas_snrm2(), cblas_sscal(), FLA_ZERO, sdot(), snrm2(), and sscal().
Referenced by FLA_Househ2_UT_opt(), FLA_QR_UT_Accum_T_opt_var1_float(), and FLA_QR_UT_UD_Accum_T_opt_var1_float().
00111 { 00112 int i_one = 1; 00113 int i_two = 2; 00114 00115 if ( m_x2 < 0 ) 00116 { 00117 FLA_Check_error_code( FLA_EXPECTED_NONNEGATIVE_VALUE ); 00118 } 00119 00120 00121 { 00122 float *chi_1_p = ( float * ) chi_1; 00123 float *x2_buff = ( float * ) x2; 00124 float *tau_p = ( float * ) tau; 00125 float *zero_p = ( float * ) FLA_FLOAT_PTR( FLA_ZERO ); 00126 float one_half = 1.0F / 2.0F; 00127 float y[ 2 ]; 00128 float temp; 00129 00130 /* 00131 Trivial tau, chi_1 if x_2 is 0 x 1. 00132 */ 00133 00134 if ( m_x2 == 0 ) 00135 { 00136 *tau_p = one_half; 00137 *chi_1_p = -(*chi_1_p); 00138 00139 return FLA_SUCCESS; 00140 } 00141 00142 /* 00143 y = / chi_1 \ 00144 \ || x_2 ||_2 / ( notice: || y ||_2 = || x ||_2 ) 00145 */ 00146 00147 y[ 0 ] = *chi_1_p; 00148 00149 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00150 y[ 1 ] = cblas_snrm2( m_x2, x2_buff, inc_x2 ); 00151 #else 00152 y[ 1 ] = FLA_C2F( snrm2 )( &m_x2, x2_buff, &inc_x2 ); 00153 #endif 00154 00155 /* 00156 Trivial tau, chi_1 if || x_2 ||_2 = 0. 00157 */ 00158 00159 if ( y[ 1 ] == *zero_p ) 00160 { 00161 *tau_p = one_half; 00162 *chi_1_p = -(*chi_1_p); 00163 00164 return FLA_SUCCESS; 00165 } 00166 00167 /* 00168 temp = || / chi_1 \ || 00169 || \ x_2 / ||_2 00170 */ 00171 00172 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00173 temp = cblas_snrm2( i_two, y, i_one ); 00174 #else 00175 temp = FLA_C2F( snrm2 )( &i_two, y, &i_one ); 00176 #endif 00177 00178 /* 00179 y = / chi_1 + sign( chi_1 ) * temp \ 00180 \ || x_2 ||_2 / 00181 00182 chi_1 = -sign( chi_1 ) * temp 00183 */ 00184 00185 if ( sign( y[ 0 ] ) < 1.0F ) 00186 { 00187 y[ 0 ] -= temp; 00188 *chi_1_p = temp; 00189 } 00190 else 00191 { 00192 y[ 0 ] += temp; 00193 *chi_1_p = -temp; 00194 } 00195 00196 /* 00197 y = / 1 \ 00198 \ eta_2/eta_1 / ( normalize y to have "1" first entry ) 00199 */ 00200 00201 y[ 1 ] = y[ 1 ] / y[ 0 ]; 00202 00203 /* 00204 Overwrite x_2 with x_2/eta_2 00205 */ 00206 00207 y[ 0 ] = 1.0 / y[ 0 ]; 00208 00209 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00210 cblas_sscal( m_x2, y[ 0 ], x2_buff, inc_x2 ); 00211 #else 00212 FLA_C2F( sscal )( &m_x2, &y[ 0 ], x2_buff, &inc_x2 ); 00213 #endif 00214 00215 y[ 0 ] = 1.0F; 00216 00217 /* 00218 temp = y^T y / 2 00219 */ 00220 00221 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00222 temp = cblas_sdot( i_two, y, i_one, y, i_one ); 00223 #else 00224 temp = FLA_C2F( sdot )( &i_two, y, &i_one, y, &i_one ); 00225 #endif 00226 00227 *tau_p = temp / 2.0F; 00228 } 00229 00230 return FLA_SUCCESS; 00231 }
FLA_Error FLA_Househ2_UT_opt_scomplex | ( | int | m_x2, | |
scomplex * | chi_1, | |||
scomplex * | x2, | |||
int | inc_x2, | |||
scomplex * | tau | |||
) |
References cblas_cscal(), cblas_scnrm2(), cscal(), FLA_ZERO, scomplex::imag, scomplex::real, and scnrm2().
Referenced by FLA_Househ2_UT_opt(), FLA_QR_UT_Accum_T_opt_var1_scomplex(), and FLA_QR_UT_UD_Accum_T_opt_var1_scomplex().
00372 { 00373 int i_one = 1; 00374 int i_two = 2; 00375 00376 if ( m_x2 < 0 ) 00377 { 00378 FLA_Check_error_code( FLA_EXPECTED_NONNEGATIVE_VALUE ); 00379 } 00380 00381 00382 { 00383 scomplex *chi_1_p = ( scomplex * ) chi_1; 00384 scomplex *x2_buff = ( scomplex * ) x2; 00385 scomplex *tau_p = ( scomplex * ) tau; 00386 scomplex *zero_p = ( scomplex * ) FLA_COMPLEX_PTR( FLA_ZERO ); 00387 scomplex one_half; 00388 scomplex y[ 2 ]; 00389 scomplex temp; 00390 scomplex alpha; 00391 00392 one_half.real = 1.0F / 2.0F; 00393 one_half.imag = 0.0F; 00394 00395 /* 00396 Trivial tau, chi_1 if x_2 is 0 x 1. 00397 */ 00398 00399 if ( m_x2 == 0 ) 00400 { 00401 tau_p->real = one_half.real; 00402 tau_p->imag = one_half.imag; 00403 chi_1_p->real = -(chi_1_p->real); 00404 chi_1_p->imag = -(chi_1_p->imag); 00405 00406 return FLA_SUCCESS; 00407 } 00408 00409 /* 00410 Preserve chi_1 for later use. 00411 */ 00412 00413 alpha.real = chi_1_p->real; 00414 alpha.imag = chi_1_p->imag; 00415 00416 /* 00417 y = / chi_1 \ 00418 \ || x_2 ||_2 / ( notice: || y ||_2 = || x ||_2 ) 00419 */ 00420 00421 y[ 0 ].real = chi_1_p->real; 00422 y[ 0 ].imag = chi_1_p->imag; 00423 00424 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00425 y[ 1 ].real = cblas_scnrm2( m_x2, x2_buff, inc_x2 ); 00426 y[ 1 ].imag = 0.0F; 00427 #else 00428 y[ 1 ].real = FLA_C2F( scnrm2 )( &m_x2, x2_buff, &inc_x2 ); 00429 y[ 1 ].imag = 0.0F; 00430 #endif 00431 00432 /* 00433 Trivial tau, chi_1 if || x_2 ||_2 = 0. 00434 */ 00435 00436 if ( y[ 1 ].real == zero_p->real ) 00437 { 00438 tau_p->real = one_half.real; 00439 tau_p->imag = one_half.imag; 00440 chi_1_p->real = -(chi_1_p->real); 00441 chi_1_p->imag = -(chi_1_p->imag); 00442 00443 return FLA_SUCCESS; 00444 } 00445 00446 /* 00447 temp = || / chi_1 \ || 00448 || \ x_2 / ||_2 00449 */ 00450 00451 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00452 temp.real = cblas_scnrm2( i_two, y, i_one ); 00453 #else 00454 temp.real = FLA_C2F( scnrm2 )( &i_two, y, &i_one ); 00455 #endif 00456 00457 /* 00458 y = / chi_1 + sign( chi_1 ) * temp \ 00459 \ || x_2 ||_2 / 00460 00461 chi_1 = -sign( Re( chi_1 ) ) * temp 00462 */ 00463 00464 if ( sign( y[ 0 ].real ) < 1.0F ) 00465 { 00466 y[ 0 ].real -= temp.real; 00467 chi_1_p->real = temp.real; 00468 chi_1_p->imag = 0.0F; 00469 } 00470 else 00471 { 00472 y[ 0 ].real += temp.real; 00473 chi_1_p->real = -temp.real; 00474 chi_1_p->imag = 0.0F; 00475 } 00476 00477 /* 00478 Overwrite x_2 with x_2/eta_2 00479 */ 00480 00481 // y[ 0 ] = 1.0 / y[ 0 ]; 00482 00483 temp.real = 1.0F / ( y[ 0 ].real * y[ 0 ].real + 00484 y[ 0 ].imag * y[ 0 ].imag ); 00485 y[ 0 ].real = y[ 0 ].real * temp.real; 00486 y[ 0 ].imag = y[ 0 ].imag * -temp.real; 00487 00488 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00489 cblas_cscal( m_x2, y[ 0 ], x2_buff, inc_x2 ); 00490 #else 00491 FLA_C2F( cscal )( &m_x2, &y[ 0 ], x2_buff, &inc_x2 ); 00492 #endif 00493 00494 /* 00495 Recall that 00496 00497 alpha = original value of chi_1 00498 00499 and 00500 00501 -sign( Re( chi_1 ) ) * || x ||_2 00502 00503 where || x ||_2 = || / chi_1 \ || 00504 || \ x_2 / ||_2 00505 overwrote chi_1. 00506 */ 00507 00508 tau_p->real = ( chi_1_p->real - alpha.real ) / chi_1_p->real; 00509 tau_p->imag = ( 0.0F - alpha.imag ) / chi_1_p->real; 00510 00511 /* 00512 Invert tau. 00513 */ 00514 temp.real = 1.0F / ( tau_p->real * tau_p->real + 00515 tau_p->imag * tau_p->imag ); 00516 tau_p->real = tau_p->real * temp.real; 00517 tau_p->imag = tau_p->imag * -temp.real; 00518 } 00519 00520 return FLA_SUCCESS; 00521 }