Functions | |
FLA_Error | FLA_Househ2_UT (FLA_Obj chi_1, FLA_Obj x2, FLA_Obj tau) |
References cblas_cscal(), cblas_ddot(), cblas_dnrm2(), cblas_dscal(), cblas_dznrm2(), cblas_scnrm2(), cblas_sdot(), cblas_snrm2(), cblas_sscal(), cblas_zscal(), cscal(), ddot(), dnrm2(), dscal(), dznrm2(), FLA_Obj_datatype(), FLA_Obj_vector_dim(), FLA_Obj_vector_inc(), FLA_ZERO, dcomplex::imag, scomplex::imag, dcomplex::real, scomplex::real, scnrm2(), sdot(), snrm2(), sscal(), and zscal().
Referenced by FLA_QR_UT_Accum_T_unb_var1(), and FLA_QR_UT_UD_Accum_T_unb_var1().
00039 : 00040 \ u2 / 00041 00042 Compute u2 and tau such that 00043 ( I - 1 / tau / 1 \ ( 1 u2^T ) ) / chi_1 \ = / -+ || x ||_2 \ 00044 \ u2 / \ x2 / \ 0 / 00045 where 00046 x = / chi_1 \ 00047 \ x2 / 00048 tau = ( 1 + u2^T u2 ) / 2 00049 00050 Upon completion, -+ || x ||_2 has overwritten chi_1 00051 and u2 has overwritten x2 00052 */ 00053 { 00054 FLA_Datatype datatype; 00055 int x2_stride; 00056 int x2_length; 00057 int i_one = 1; 00058 int i_two = 2; 00059 00060 x2_length = FLA_Obj_vector_dim( x2 ); 00061 x2_stride = FLA_Obj_vector_inc( x2 ); 00062 datatype = FLA_Obj_datatype( chi_1 ); 00063 00064 if ( x2_length < 0 ) 00065 { 00066 /* 00067 Negative length of x_2 encountered. If Hess was invoked, check ilo, ihi 00068 for correct index base--may have to subtract 1 from Fortran ilo/ihi 00069 */ 00070 FLA_Check_error_code( FLA_EXPECTED_NONNEGATIVE_VALUE ); 00071 } 00072 00073 00074 switch ( datatype ) 00075 { 00076 00077 case FLA_FLOAT: 00078 { 00079 float *chi_1_p = ( float * ) FLA_FLOAT_PTR( chi_1 ); 00080 float *x2_buff = ( float * ) FLA_FLOAT_PTR( x2 ); 00081 float *tau_p = ( float * ) FLA_FLOAT_PTR( tau ); 00082 float *zero_p = ( float * ) FLA_FLOAT_PTR( FLA_ZERO ); 00083 float one_half = 1.0F / 2.0F; 00084 float y[ 2 ]; 00085 float temp; 00086 00087 /* 00088 Trivial tau, chi_1 if x_2 is 0 x 1. 00089 */ 00090 00091 if ( x2_length == 0 ) 00092 { 00093 *tau_p = one_half; 00094 *chi_1_p = -(*chi_1_p); 00095 00096 return FLA_SUCCESS; 00097 } 00098 00099 /* 00100 y = / chi_1 \ 00101 \ || x_2 ||_2 / ( notice: || y ||_2 = || x ||_2 ) 00102 */ 00103 00104 y[ 0 ] = *chi_1_p; 00105 00106 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00107 y[ 1 ] = cblas_snrm2( x2_length, x2_buff, x2_stride ); 00108 #else 00109 y[ 1 ] = FLA_C2F( snrm2 )( &x2_length, x2_buff, &x2_stride ); 00110 #endif 00111 00112 /* 00113 Trivial tau, chi_1 if || x_2 ||_2 = 0. 00114 */ 00115 00116 if ( y[ 1 ] == *zero_p ) 00117 { 00118 *tau_p = one_half; 00119 *chi_1_p = -(*chi_1_p); 00120 00121 return FLA_SUCCESS; 00122 } 00123 00124 /* 00125 temp = || / chi_1 \ || 00126 || \ x_2 / ||_2 00127 */ 00128 00129 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00130 temp = cblas_snrm2( i_two, y, i_one ); 00131 #else 00132 temp = FLA_C2F( snrm2 )( &i_two, y, &i_one ); 00133 #endif 00134 00135 /* 00136 y = / chi_1 + sign( chi_1 ) * temp \ 00137 \ || x_2 ||_2 / 00138 00139 chi_1 = -sign( chi_1 ) * temp 00140 */ 00141 00142 if ( sign( y[ 0 ] ) < 1.0F ) 00143 { 00144 y[ 0 ] -= temp; 00145 *chi_1_p = temp; 00146 } 00147 else 00148 { 00149 y[ 0 ] += temp; 00150 *chi_1_p = -temp; 00151 } 00152 00153 /* 00154 y = / 1 \ 00155 \ eta_2/eta_1 / ( normalize y to have "1" first entry ) 00156 */ 00157 00158 y[ 1 ] = y[ 1 ] / y[ 0 ]; 00159 00160 /* 00161 Overwrite x_2 with x_2/eta_2 00162 */ 00163 00164 y[ 0 ] = 1.0 / y[ 0 ]; 00165 00166 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00167 cblas_sscal( x2_length, y[ 0 ], x2_buff, x2_stride ); 00168 #else 00169 FLA_C2F( sscal )( &x2_length, &y[ 0 ], x2_buff, &x2_stride ); 00170 #endif 00171 00172 y[ 0 ] = 1.0F; 00173 00174 /* 00175 temp = y^T y / 2 00176 */ 00177 00178 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00179 temp = cblas_sdot( i_two, y, i_one, y, i_one ); 00180 #else 00181 temp = FLA_C2F( sdot )( &i_two, y, &i_one, y, &i_one ); 00182 #endif 00183 00184 *tau_p = temp / 2.0F; 00185 00186 break; 00187 } 00188 00189 case FLA_DOUBLE: 00190 { 00191 double *chi_1_p = ( double * ) FLA_DOUBLE_PTR( chi_1 ); 00192 double *x2_buff = ( double * ) FLA_DOUBLE_PTR( x2 ); 00193 double *tau_p = ( double * ) FLA_DOUBLE_PTR( tau ); 00194 double *zero_p = ( double * ) FLA_DOUBLE_PTR( FLA_ZERO ); 00195 double one_half = 1.0 / 2.0; 00196 double y[ 2 ]; 00197 double temp; 00198 00199 /* 00200 Trivial tau, chi_1 if x_2 is 0 x 1. 00201 */ 00202 00203 if ( x2_length == 0 ) 00204 { 00205 *tau_p = one_half; 00206 *chi_1_p = -(*chi_1_p); 00207 00208 return FLA_SUCCESS; 00209 } 00210 00211 /* 00212 y = / chi_1 \ 00213 \ || x_2 ||_2 / ( notice: || y ||_2 = || x ||_2 ) 00214 */ 00215 00216 y[ 0 ] = *chi_1_p; 00217 00218 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00219 y[ 1 ] = cblas_dnrm2( x2_length, x2_buff, x2_stride ); 00220 #else 00221 y[ 1 ] = FLA_C2F( dnrm2 )( &x2_length, x2_buff, &x2_stride ); 00222 #endif 00223 00224 /* 00225 Trivial tau, chi_1 if || x_2 ||_2 = 0. 00226 */ 00227 00228 if ( y[ 1 ] == *zero_p ) 00229 { 00230 *tau_p = one_half; 00231 *chi_1_p = -(*chi_1_p); 00232 00233 return FLA_SUCCESS; 00234 } 00235 00236 /* 00237 temp = || / chi_1 \ || 00238 || \ x_2 / ||_2 00239 */ 00240 00241 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00242 temp = cblas_dnrm2( i_two, y, i_one ); 00243 #else 00244 temp = FLA_C2F( dnrm2 )( &i_two, y, &i_one ); 00245 #endif 00246 00247 /* 00248 y = / chi_1 + sign( chi_1 ) * temp \ 00249 \ || x_2 ||_2 / 00250 00251 chi_1 = -sign( chi_1 ) * temp 00252 */ 00253 00254 if ( sign( y[ 0 ] ) < 1.0 ) 00255 { 00256 y[ 0 ] -= temp; 00257 *chi_1_p = temp; 00258 } 00259 else 00260 { 00261 y[ 0 ] += temp; 00262 *chi_1_p = -temp; 00263 } 00264 00265 /* 00266 y = / 1 \ 00267 \ eta_2/eta_1 / ( normalize y to have "1" first entry ) 00268 */ 00269 00270 y[ 1 ] = y[ 1 ] / y[ 0 ]; 00271 00272 /* 00273 Overwrite x_2 with x_2/eta_2 00274 */ 00275 00276 y[ 0 ] = 1.0 / y[ 0 ]; 00277 00278 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00279 cblas_dscal( x2_length, y[ 0 ], x2_buff, x2_stride ); 00280 #else 00281 FLA_C2F( dscal )( &x2_length, &y[ 0 ], x2_buff, &x2_stride ); 00282 #endif 00283 00284 y[ 0 ] = 1.0; 00285 00286 /* 00287 temp = y^T y / 2 00288 */ 00289 00290 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00291 temp = cblas_ddot( i_two, y, i_one, y, i_one ); 00292 #else 00293 temp = FLA_C2F( ddot )( &i_two, y, &i_one, y, &i_one ); 00294 #endif 00295 00296 *tau_p = temp / 2.0; 00297 00298 break; 00299 } 00300 00301 case FLA_COMPLEX: 00302 { 00303 scomplex *chi_1_p = ( scomplex * ) FLA_COMPLEX_PTR( chi_1 ); 00304 scomplex *x2_buff = ( scomplex * ) FLA_COMPLEX_PTR( x2 ); 00305 scomplex *tau_p = ( scomplex * ) FLA_COMPLEX_PTR( tau ); 00306 scomplex *zero_p = ( scomplex * ) FLA_COMPLEX_PTR( FLA_ZERO ); 00307 scomplex one_half; 00308 scomplex y[ 2 ]; 00309 scomplex temp; 00310 scomplex alpha; 00311 00312 one_half.real = 1.0F / 2.0F; 00313 one_half.imag = 0.0F; 00314 00315 /* 00316 Trivial tau, chi_1 if x_2 is 0 x 1. 00317 */ 00318 00319 if ( x2_length == 0 ) 00320 { 00321 tau_p->real = one_half.real; 00322 tau_p->imag = one_half.imag; 00323 chi_1_p->real = -(chi_1_p->real); 00324 chi_1_p->imag = -(chi_1_p->imag); 00325 00326 return FLA_SUCCESS; 00327 } 00328 00329 /* 00330 Preserve chi_1 for later use. 00331 */ 00332 00333 alpha.real = chi_1_p->real; 00334 alpha.imag = chi_1_p->imag; 00335 00336 /* 00337 y = / chi_1 \ 00338 \ || x_2 ||_2 / ( notice: || y ||_2 = || x ||_2 ) 00339 */ 00340 00341 y[ 0 ].real = chi_1_p->real; 00342 y[ 0 ].imag = chi_1_p->imag; 00343 00344 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00345 y[ 1 ].real = cblas_scnrm2( x2_length, x2_buff, x2_stride ); 00346 y[ 1 ].imag = 0.0F; 00347 #else 00348 y[ 1 ].real = FLA_C2F( scnrm2 )( &x2_length, x2_buff, &x2_stride ); 00349 y[ 1 ].imag = 0.0F; 00350 #endif 00351 00352 /* 00353 Trivial tau, chi_1 if || x_2 ||_2 = 0. 00354 */ 00355 00356 if ( y[ 1 ].real == zero_p->real ) 00357 { 00358 tau_p->real = one_half.real; 00359 tau_p->imag = one_half.imag; 00360 chi_1_p->real = -(chi_1_p->real); 00361 chi_1_p->imag = -(chi_1_p->imag); 00362 00363 return FLA_SUCCESS; 00364 } 00365 00366 /* 00367 temp = || / chi_1 \ || 00368 || \ x_2 / ||_2 00369 */ 00370 00371 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00372 temp.real = cblas_scnrm2( i_two, y, i_one ); 00373 #else 00374 temp.real = FLA_C2F( scnrm2 )( &i_two, y, &i_one ); 00375 #endif 00376 00377 /* 00378 y = / chi_1 + sign( chi_1 ) * temp \ 00379 \ || x_2 ||_2 / 00380 00381 chi_1 = -sign( Re( chi_1 ) ) * temp 00382 */ 00383 00384 if ( sign( y[ 0 ].real ) < 1.0F ) 00385 { 00386 y[ 0 ].real -= temp.real; 00387 chi_1_p->real = temp.real; 00388 chi_1_p->imag = 0.0F; 00389 } 00390 else 00391 { 00392 y[ 0 ].real += temp.real; 00393 chi_1_p->real = -temp.real; 00394 chi_1_p->imag = 0.0F; 00395 } 00396 00397 /* 00398 Overwrite x_2 with x_2/eta_2 00399 */ 00400 00401 // y[ 0 ] = 1.0 / y[ 0 ]; 00402 00403 temp.real = 1.0F / ( y[ 0 ].real * y[ 0 ].real + 00404 y[ 0 ].imag * y[ 0 ].imag ); 00405 y[ 0 ].real = y[ 0 ].real * temp.real; 00406 y[ 0 ].imag = y[ 0 ].imag * -temp.real; 00407 00408 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00409 cblas_cscal( x2_length, y[ 0 ], x2_buff, x2_stride ); 00410 #else 00411 FLA_C2F( cscal )( &x2_length, &y[ 0 ], x2_buff, &x2_stride ); 00412 #endif 00413 00414 /* 00415 Recall that 00416 00417 alpha = original value of chi_1 00418 00419 and 00420 00421 -sign( Re( chi_1 ) ) * || x ||_2 00422 00423 where || x ||_2 = || / chi_1 \ || 00424 || \ x_2 / ||_2 00425 overwrote chi_1. 00426 */ 00427 00428 tau_p->real = ( chi_1_p->real - alpha.real ) / chi_1_p->real; 00429 tau_p->imag = ( 0.0F - alpha.imag ) / chi_1_p->real; 00430 00431 /* 00432 Invert tau. 00433 */ 00434 temp.real = 1.0F / ( tau_p->real * tau_p->real + 00435 tau_p->imag * tau_p->imag ); 00436 tau_p->real = tau_p->real * temp.real; 00437 tau_p->imag = tau_p->imag * -temp.real; 00438 00439 break; 00440 } 00441 00442 case FLA_DOUBLE_COMPLEX: 00443 { 00444 dcomplex *chi_1_p = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( chi_1 ); 00445 dcomplex *x2_buff = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( x2 ); 00446 dcomplex *tau_p = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( tau ); 00447 dcomplex *zero_p = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO ); 00448 dcomplex one_half; 00449 dcomplex y[ 2 ]; 00450 dcomplex temp; 00451 dcomplex alpha; 00452 00453 one_half.real = 1.0 / 2.0; 00454 one_half.imag = 0.0; 00455 00456 /* 00457 Trivial tau, chi_1 if x_2 is 0 x 1. 00458 */ 00459 00460 if ( x2_length == 0 ) 00461 { 00462 tau_p->real = one_half.real; 00463 tau_p->imag = one_half.imag; 00464 chi_1_p->real = -(chi_1_p->real); 00465 chi_1_p->imag = -(chi_1_p->imag); 00466 00467 return FLA_SUCCESS; 00468 } 00469 00470 /* 00471 Preserve chi_1 for later use. 00472 */ 00473 00474 alpha.real = chi_1_p->real; 00475 alpha.imag = chi_1_p->imag; 00476 00477 /* 00478 y = / chi_1 \ 00479 \ || x_2 ||_2 / ( notice: || y ||_2 = || x ||_2 ) 00480 */ 00481 00482 y[ 0 ].real = chi_1_p->real; 00483 y[ 0 ].imag = chi_1_p->imag; 00484 00485 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00486 y[ 1 ].real = cblas_dznrm2( x2_length, x2_buff, x2_stride ); 00487 y[ 1 ].imag = 0.0; 00488 #else 00489 y[ 1 ].real = FLA_C2F( dznrm2 )( &x2_length, x2_buff, &x2_stride ); 00490 y[ 1 ].imag = 0.0; 00491 #endif 00492 00493 /* 00494 Trivial tau, chi_1 if || x_2 ||_2 = 0. 00495 */ 00496 00497 if ( y[ 1 ].real == zero_p->real ) 00498 { 00499 tau_p->real = one_half.real; 00500 tau_p->imag = one_half.imag; 00501 chi_1_p->real = -(chi_1_p->real); 00502 chi_1_p->imag = -(chi_1_p->imag); 00503 00504 return FLA_SUCCESS; 00505 } 00506 00507 /* 00508 temp = || / chi_1 \ || 00509 || \ x_2 / ||_2 00510 */ 00511 00512 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00513 temp.real = cblas_dznrm2( i_two, y, i_one ); 00514 #else 00515 temp.real = FLA_C2F( dznrm2 )( &i_two, y, &i_one ); 00516 #endif 00517 00518 /* 00519 y = / chi_1 + sign( chi_1 ) * temp \ 00520 \ || x_2 ||_2 / 00521 00522 chi_1 = -sign( Re( chi_1 ) ) * temp 00523 */ 00524 00525 if ( sign( y[ 0 ].real ) < 1.0 ) 00526 { 00527 y[ 0 ].real -= temp.real; 00528 chi_1_p->real = temp.real; 00529 chi_1_p->imag = 0.0; 00530 } 00531 else 00532 { 00533 y[ 0 ].real += temp.real; 00534 chi_1_p->real = -temp.real; 00535 chi_1_p->imag = 0.0; 00536 } 00537 00538 /* 00539 y = / 1 \ 00540 \ eta_2/eta_1 / ( normalize y to have "1" first entry ) 00541 */ 00542 00543 // y[ 1 ] = y[ 1 ] / y[ 0 ]; 00544 /* 00545 temp.real = y[ 1 ].real / ( y[ 0 ].real * y[ 0 ].real + 00546 y[ 0 ].imag * y[ 0 ].imag ); 00547 y[ 1 ].real = y[ 0 ].real * temp.real; 00548 y[ 1 ].imag = y[ 0 ].imag * -temp.real; 00549 */ 00550 /* 00551 Overwrite x_2 with x_2/eta_2 00552 */ 00553 00554 // y[ 0 ] = 1.0 / y[ 0 ]; 00555 00556 temp.real = 1.0 / ( y[ 0 ].real * y[ 0 ].real + 00557 y[ 0 ].imag * y[ 0 ].imag ); 00558 y[ 0 ].real = y[ 0 ].real * temp.real; 00559 y[ 0 ].imag = y[ 0 ].imag * -temp.real; 00560 00561 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00562 cblas_zscal( x2_length, y[ 0 ], x2_buff, x2_stride ); 00563 #else 00564 FLA_C2F( zscal )( &x2_length, &y[ 0 ], x2_buff, &x2_stride ); 00565 #endif 00566 00567 /* 00568 y[ 0 ].real = 1.0; 00569 y[ 0 ].imag = 0.0; 00570 */ 00571 00572 /* 00573 temp = y^H y 00574 */ 00575 00576 /* 00577 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00578 cblas_zdotc_sub( i_two, y, i_one, y, i_one, &temp ); 00579 #else 00580 FLA_F2C( fla_zdotc )( &i_two, y, &i_one, y, &i_one, &temp ); 00581 #endif 00582 00583 tau_p->real = temp.real / 2.0; 00584 tau_p->imag = -alpha.imag / 2.0; 00585 */ 00586 00587 /* 00588 Recall that 00589 00590 alpha = original value of chi_1 00591 00592 and 00593 00594 -sign( Re( chi_1 ) ) * || x ||_2 00595 00596 where || x ||_2 = || / chi_1 \ || 00597 || \ x_2 / ||_2 00598 overwrote chi_1. 00599 */ 00600 00601 tau_p->real = ( chi_1_p->real - alpha.real ) / chi_1_p->real; 00602 tau_p->imag = ( 0.0 - alpha.imag ) / chi_1_p->real; 00603 00604 /* 00605 Invert tau. 00606 */ 00607 temp.real = 1.0 / ( tau_p->real * tau_p->real + 00608 tau_p->imag * tau_p->imag ); 00609 tau_p->real = tau_p->real * temp.real; 00610 tau_p->imag = tau_p->imag * -temp.real; 00611 00612 break; 00613 } 00614 00615 } 00616 00617 return FLA_SUCCESS; 00618 }