FLA_util_lapack_prototypes_opt.h File Reference

(r)

Go to the source code of this file.

Functions

FLA_Error FLA_Househ2_UT_opt (FLA_Obj chi_1, FLA_Obj x2, FLA_Obj tau)
FLA_Error FLA_Househ2_UT_opt_float (int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
FLA_Error FLA_Househ2_UT_opt_double (int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
FLA_Error FLA_Househ2_UT_opt_scomplex (int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
FLA_Error FLA_Househ2_UT_opt_dcomplex (int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
FLA_Error FLA_Apply_househ2_UT_opt (FLA_Obj tau, FLA_Obj u2, FLA_Obj a1t, FLA_Obj A2)
FLA_Error FLA_Apply_househ2_UT_opt_float (int m_u2_A2, int n_a1t, float *tau, float *u2, int inc_u2, float *a1t, int inc_a1t, float *A2, int ldim_A2)
FLA_Error FLA_Apply_househ2_UT_opt_double (int m_u2_A2, int n_a1t, double *tau, double *u2, int inc_u2, double *a1t, int inc_a1t, double *A2, int ldim_A2)
FLA_Error FLA_Apply_househ2_UT_opt_scomplex (int m_u2_A2, int n_a1t, scomplex *tau, scomplex *u2, int inc_u2, scomplex *a1t, int inc_a1t, scomplex *A2, int ldim_A2)
FLA_Error FLA_Apply_househ2_UT_opt_dcomplex (int m_u2_A2, int n_a1t, dcomplex *tau, dcomplex *u2, int inc_u2, dcomplex *a1t, int inc_a1t, dcomplex *A2, int ldim_A2)


Function Documentation

FLA_Error FLA_Apply_househ2_UT_opt ( FLA_Obj  tau,
FLA_Obj  u2,
FLA_Obj  a1t,
FLA_Obj  A2 
)

References FLA_Apply_househ2_UT_check(), FLA_Apply_househ2_UT_opt_dcomplex(), FLA_Apply_househ2_UT_opt_double(), FLA_Apply_househ2_UT_opt_float(), FLA_Apply_househ2_UT_opt_scomplex(), FLA_Check_error_level(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_ldim(), FLA_Obj_length(), FLA_Obj_vector_inc(), and FLA_Obj_width().

00039              :=  / I - 1/tau / 1  \ ( 1  u2^H ) \ / a1t \ 
00040     \ A2  /      \           \ u2 /             / \ A2  / 
00041  
00042   w = ( a1t + u2' * A2 ) / conj( tau );
00043 
00044   a1t = - w + a1t;
00045   A2  = - u2 * w + A2;
00046 */
00047 {
00048   FLA_Datatype datatype;
00049   int          m_u2_A2;
00050   int          n_a1t;
00051   int          inc_u2;
00052   int          inc_a1t;
00053   int          ldim_A2;
00054 
00055   if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
00056     FLA_Apply_househ2_UT_check( tau, u2, a1t, A2 );
00057 
00058   if ( FLA_Obj_has_zero_dim( a1t ) ) return FLA_SUCCESS;
00059 
00060   datatype = FLA_Obj_datatype( A2 );
00061 
00062   m_u2_A2  = FLA_Obj_length( u2 );
00063   n_a1t    = FLA_Obj_width( a1t );
00064   inc_u2   = FLA_Obj_vector_inc( u2 );
00065   inc_a1t  = FLA_Obj_vector_inc( a1t );
00066   ldim_A2  = FLA_Obj_ldim( A2 );
00067 
00068   switch ( datatype )
00069   {
00070     case FLA_FLOAT:
00071     {
00072       float* tau_p = ( float* ) FLA_FLOAT_PTR( tau );
00073       float* u2_p  = ( float* ) FLA_FLOAT_PTR( u2 );
00074       float* a1t_p = ( float* ) FLA_FLOAT_PTR( a1t );
00075       float* A2_p  = ( float* ) FLA_FLOAT_PTR( A2 );
00076 
00077       FLA_Apply_househ2_UT_opt_float( m_u2_A2, n_a1t,
00078                                       tau_p,
00079                                       u2_p, inc_u2,
00080                                       a1t_p, inc_a1t,
00081                                       A2_p, ldim_A2 );
00082       break;
00083     }
00084 
00085     case FLA_DOUBLE:
00086     {
00087       double* tau_p = ( double* ) FLA_DOUBLE_PTR( tau );
00088       double* u2_p  = ( double* ) FLA_DOUBLE_PTR( u2 );
00089       double* a1t_p = ( double* ) FLA_DOUBLE_PTR( a1t );
00090       double* A2_p  = ( double* ) FLA_DOUBLE_PTR( A2 );
00091 
00092       FLA_Apply_househ2_UT_opt_double( m_u2_A2, n_a1t,
00093                                        tau_p,
00094                                        u2_p, inc_u2,
00095                                        a1t_p, inc_a1t,
00096                                        A2_p, ldim_A2 );
00097       break;
00098     }
00099 
00100     case FLA_COMPLEX:
00101     {
00102       scomplex* tau_p = ( scomplex* ) FLA_COMPLEX_PTR( tau );
00103       scomplex* u2_p  = ( scomplex* ) FLA_COMPLEX_PTR( u2 );
00104       scomplex* a1t_p = ( scomplex* ) FLA_COMPLEX_PTR( a1t );
00105       scomplex* A2_p  = ( scomplex* ) FLA_COMPLEX_PTR( A2 );
00106 
00107       FLA_Apply_househ2_UT_opt_scomplex( m_u2_A2, n_a1t,
00108                                          tau_p,
00109                                          u2_p, inc_u2,
00110                                          a1t_p, inc_a1t,
00111                                          A2_p, ldim_A2 );
00112       break;
00113     }
00114 
00115     case FLA_DOUBLE_COMPLEX:
00116     {
00117       dcomplex* tau_p = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( tau );
00118       dcomplex* u2_p  = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( u2 );
00119       dcomplex* a1t_p = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( a1t );
00120       dcomplex* A2_p  = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A2 );
00121 
00122       FLA_Apply_househ2_UT_opt_dcomplex( m_u2_A2, n_a1t,
00123                                          tau_p,
00124                                          u2_p, inc_u2,
00125                                          a1t_p, inc_a1t,
00126                                          A2_p, ldim_A2 );
00127       break;
00128     }
00129   }
00130 
00131   return FLA_SUCCESS;
00132 }

FLA_Error FLA_Apply_househ2_UT_opt_dcomplex ( int  m_u2_A2,
int  n_a1t,
dcomplex tau,
dcomplex u2,
int  inc_u2,
dcomplex a1t,
int  inc_a1t,
dcomplex A2,
int  ldim_A2 
)

References dscal(), FLA_free(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, dcomplex::imag, dcomplex::real, zaxpy(), zcopy(), zgemv(), zgeru(), and zscal().

Referenced by FLA_Apply_househ2_UT_opt(), FLA_QR_UT_Accum_T_opt_var1_dcomplex(), and FLA_QR_UT_UD_Accum_T_opt_var1_dcomplex().

00370 {
00371   dcomplex* one_p        = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
00372   dcomplex* minus_one_p  = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
00373   dcomplex  tau_copy;
00374   double    temp;
00375   char      blas_trans   = 'T';
00376   int       inc_w1t;
00377   int       inc_u2_conj;
00378   int       i_two        = 2;
00379   dcomplex* u2_conj;
00380 
00381   // FLA_Obj w1t;
00382   dcomplex* w1t;
00383 
00384   // if ( FLA_Obj_has_zero_dim( a1t ) ) return FLA_SUCCESS;
00385   if ( n_a1t == 0 ) return FLA_SUCCESS;
00386 
00387   // FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, a1t, &w1t );
00388   w1t = FLA_malloc( n_a1t * sizeof( dcomplex ) );
00389   inc_w1t = 1;
00390 
00391   // // w1t = a1t;
00392   // FLA_Copy_external( a1t, w1t );
00393   FLA_C2F( zcopy )( &n_a1t,
00394                     a1t, &inc_a1t, 
00395                     w1t, &inc_w1t ); 
00396 
00397   // // w1t = w1t + u2' * A2;
00398   // // w1t = w1t + A2^T * conj(u2);
00399   // FLA_Gemvc_external( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A2, u2, FLA_ONE, w1t );
00400   u2_conj = FLA_malloc( m_u2_A2 * sizeof( dcomplex ) );
00401   inc_u2_conj = 1;
00402 
00403   FLA_C2F( zcopy )( &m_u2_A2,
00404                     u2, &inc_u2, 
00405                     u2_conj, &inc_u2_conj ); 
00406 
00407   FLA_C2F( dscal )( &m_u2_A2,
00408                     &(minus_one_p->real),
00409                     (( double* ) u2_conj ) + 1, &i_two );
00410 
00411   FLA_C2F( zgemv )( &blas_trans,
00412                     &m_u2_A2, &n_a1t,
00413                     one_p,
00414                     A2, &ldim_A2,
00415                     u2_conj, &inc_u2_conj,
00416                     one_p,
00417                     w1t, &inc_w1t );
00418 
00419   FLA_free( u2_conj );
00420 
00421   // // w1t = w1t / conj( tau );
00422   // FLA_Inv_scalc_external( FLA_CONJUGATE, tau, w1t );
00423   temp = 1.0  / ( tau->real * tau->real + 
00424                   tau->imag * tau->imag );
00425   tau_copy.real = tau->real * temp;
00426   tau_copy.imag = tau->imag * temp;
00427   FLA_C2F( zscal )( &n_a1t,
00428                     &tau_copy,
00429                     w1t, &inc_w1t );
00430 
00431   // // a1t = - w1t + a1t;
00432   // FLA_Axpy_external( FLA_MINUS_ONE, w1t, a1t );
00433   FLA_C2F( zaxpy )( &n_a1t,
00434                     minus_one_p,
00435                     w1t, &inc_w1t,
00436                     a1t, &inc_a1t );
00437 
00438   // // A2 = - u2 * w1t + A2;
00439   // FLA_Ger_external( FLA_MINUS_ONE, u2, w1t, A2 );
00440   FLA_C2F( zgeru )( &m_u2_A2, &n_a1t,
00441                     minus_one_p,
00442                     u2, &inc_u2,
00443                     w1t, &inc_w1t,
00444                     A2, &ldim_A2 );
00445 
00446   // FLA_Obj_free( &w1t );
00447   FLA_free( w1t );
00448 
00449   return FLA_SUCCESS;
00450 }

FLA_Error FLA_Apply_househ2_UT_opt_double ( int  m_u2_A2,
int  n_a1t,
double *  tau,
double *  u2,
int  inc_u2,
double *  a1t,
int  inc_a1t,
double *  A2,
int  ldim_A2 
)

References daxpy(), dcopy(), dgemv(), dger(), dscal(), FLA_free(), FLA_malloc(), FLA_MINUS_ONE, and FLA_ONE.

Referenced by FLA_Apply_househ2_UT_opt(), FLA_QR_UT_Accum_T_opt_var1_double(), and FLA_QR_UT_UD_Accum_T_opt_var1_double().

00210 {
00211   double* one_p        = FLA_DOUBLE_PTR( FLA_ONE );
00212   double* minus_one_p  = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
00213   double  tau_copy;
00214   char    blas_trans   = 'T';
00215   int     inc_w1t;
00216 
00217   // FLA_Obj w1t;
00218   double* w1t;
00219 
00220   // if ( FLA_Obj_has_zero_dim( a1t ) ) return FLA_SUCCESS;
00221   if ( n_a1t == 0 ) return FLA_SUCCESS;
00222 
00223   // FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, a1t, &w1t );
00224   w1t = FLA_malloc( n_a1t * sizeof( double ) );
00225   inc_w1t = 1;
00226 
00227   // // w1t = a1t;
00228   // FLA_Copy_external( a1t, w1t );
00229   FLA_C2F( dcopy )( &n_a1t,
00230                     a1t, &inc_a1t, 
00231                     w1t, &inc_w1t ); 
00232 
00233   // // w1t = w1t + u2' * A2;
00234   // // w1t = w1t + A2^T * conj(u2);
00235   // FLA_Gemvc_external( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A2, u2, FLA_ONE, w1t );
00236   FLA_C2F( dgemv )( &blas_trans,
00237                     &m_u2_A2, &n_a1t,
00238                     one_p,
00239                     A2, &ldim_A2,
00240                     u2, &inc_u2,
00241                     one_p,
00242                     w1t, &inc_w1t );
00243 
00244   // // w1t = w1t / conj( tau );
00245   // FLA_Inv_scalc_external( FLA_CONJUGATE, tau, w1t );
00246   tau_copy = 1.0 / *tau;
00247   FLA_C2F( dscal )( &n_a1t,
00248                     &tau_copy,
00249                     w1t, &inc_w1t );
00250 
00251   // // a1t = - w1t + a1t;
00252   // FLA_Axpy_external( FLA_MINUS_ONE, w1t, a1t );
00253   FLA_C2F( daxpy )( &n_a1t,
00254                     minus_one_p,
00255                     w1t, &inc_w1t,
00256                     a1t, &inc_a1t );
00257 
00258   // // A2 = - u2 * w1t + A2;
00259   // FLA_Ger_external( FLA_MINUS_ONE, u2, w1t, A2 );
00260   FLA_C2F( dger )( &m_u2_A2, &n_a1t,
00261                    minus_one_p,
00262                    u2, &inc_u2,
00263                    w1t, &inc_w1t,
00264                    A2, &ldim_A2 );
00265 
00266   // FLA_Obj_free( &w1t );
00267   FLA_free( w1t );
00268 
00269   return FLA_SUCCESS;
00270 }

FLA_Error FLA_Apply_househ2_UT_opt_float ( int  m_u2_A2,
int  n_a1t,
float *  tau,
float *  u2,
int  inc_u2,
float *  a1t,
int  inc_a1t,
float *  A2,
int  ldim_A2 
)

References FLA_free(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, saxpy(), scopy(), sgemv(), sger(), and sscal().

Referenced by FLA_Apply_househ2_UT_opt(), FLA_QR_UT_Accum_T_opt_var1_float(), and FLA_QR_UT_UD_Accum_T_opt_var1_float().

00140 {
00141   float* one_p        = FLA_FLOAT_PTR( FLA_ONE );
00142   float* minus_one_p  = FLA_FLOAT_PTR( FLA_MINUS_ONE );
00143   float  tau_copy;
00144   char   blas_trans   = 'T';
00145   int    inc_w1t;
00146 
00147   // FLA_Obj w1t;
00148   float* w1t;
00149 
00150   // if ( FLA_Obj_has_zero_dim( a1t ) ) return FLA_SUCCESS;
00151   if ( n_a1t == 0 ) return FLA_SUCCESS;
00152 
00153   // FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, a1t, &w1t );
00154   w1t = FLA_malloc( n_a1t * sizeof( float ) );
00155   inc_w1t = 1;
00156 
00157   // // w1t = a1t;
00158   // FLA_Copy_external( a1t, w1t );
00159   FLA_C2F( scopy )( &n_a1t,
00160                     a1t, &inc_a1t, 
00161                     w1t, &inc_w1t ); 
00162 
00163   // // w1t = w1t + u2' * A2;
00164   // // w1t = w1t + A2^T * conj(u2);
00165   // FLA_Gemvc_external( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A2, u2, FLA_ONE, w1t );
00166   FLA_C2F( sgemv )( &blas_trans,
00167                     &m_u2_A2, &n_a1t,
00168                     one_p,
00169                     A2, &ldim_A2,
00170                     u2, &inc_u2,
00171                     one_p,
00172                     w1t, &inc_w1t );
00173 
00174   // // w1t = w1t / conj( tau );
00175   // FLA_Inv_scalc_external( FLA_CONJUGATE, tau, w1t );
00176   tau_copy = 1.0F / *tau;
00177   FLA_C2F( sscal )( &n_a1t,
00178                     &tau_copy,
00179                     w1t, &inc_w1t );
00180 
00181   // // a1t = - w1t + a1t;
00182   // FLA_Axpy_external( FLA_MINUS_ONE, w1t, a1t );
00183   FLA_C2F( saxpy )( &n_a1t,
00184                     minus_one_p,
00185                     w1t, &inc_w1t,
00186                     a1t, &inc_a1t );
00187 
00188   // // A2 = - u2 * w1t + A2;
00189   // FLA_Ger_external( FLA_MINUS_ONE, u2, w1t, A2 );
00190   FLA_C2F( sger )( &m_u2_A2, &n_a1t,
00191                    minus_one_p,
00192                    u2, &inc_u2,
00193                    w1t, &inc_w1t,
00194                    A2, &ldim_A2 );
00195 
00196   // FLA_Obj_free( &w1t );
00197   FLA_free( w1t );
00198 
00199   return FLA_SUCCESS;
00200 }

FLA_Error FLA_Apply_househ2_UT_opt_scomplex ( int  m_u2_A2,
int  n_a1t,
scomplex tau,
scomplex u2,
int  inc_u2,
scomplex a1t,
int  inc_a1t,
scomplex A2,
int  ldim_A2 
)

References caxpy(), ccopy(), cgemv(), cgeru(), cscal(), FLA_free(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, scomplex::imag, scomplex::real, and sscal().

Referenced by FLA_Apply_househ2_UT_opt(), FLA_QR_UT_Accum_T_opt_var1_scomplex(), and FLA_QR_UT_UD_Accum_T_opt_var1_scomplex().

00280 {
00281   scomplex* one_p        = FLA_COMPLEX_PTR( FLA_ONE );
00282   scomplex* minus_one_p  = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
00283   scomplex  tau_copy;
00284   float     temp;
00285   char      blas_trans   = 'T';
00286   int       inc_w1t;
00287   int       inc_u2_conj;
00288   int       i_two        = 2;
00289   scomplex* u2_conj;
00290 
00291   // FLA_Obj w1t;
00292   scomplex* w1t;
00293 
00294   // if ( FLA_Obj_has_zero_dim( a1t ) ) return FLA_SUCCESS;
00295   if ( n_a1t == 0 ) return FLA_SUCCESS;
00296 
00297   // FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, a1t, &w1t );
00298   w1t = FLA_malloc( n_a1t * sizeof( scomplex ) );
00299   inc_w1t = 1;
00300 
00301   // // w1t = a1t;
00302   // FLA_Copy_external( a1t, w1t );
00303   FLA_C2F( ccopy )( &n_a1t,
00304                     a1t, &inc_a1t, 
00305                     w1t, &inc_w1t ); 
00306 
00307   // // w1t = w1t + u2' * A2;
00308   // // w1t = w1t + A2^T * conj(u2);
00309   // FLA_Gemvc_external( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A2, u2, FLA_ONE, w1t );
00310   u2_conj = FLA_malloc( m_u2_A2 * sizeof( scomplex ) );
00311   inc_u2_conj = 1;
00312 
00313   FLA_C2F( ccopy )( &m_u2_A2,
00314                     u2, &inc_u2, 
00315                     u2_conj, &inc_u2_conj ); 
00316 
00317   FLA_C2F( sscal )( &m_u2_A2,
00318                     &(minus_one_p->real),
00319                     (( float* ) u2_conj ) + 1, &i_two );
00320 
00321   FLA_C2F( cgemv )( &blas_trans,
00322                     &m_u2_A2, &n_a1t,
00323                     one_p,
00324                     A2, &ldim_A2,
00325                     u2_conj, &inc_u2_conj,
00326                     one_p,
00327                     w1t, &inc_w1t );
00328 
00329   FLA_free( u2_conj );
00330 
00331   // // w1t = w1t / conj( tau );
00332   // FLA_Inv_scalc_external( FLA_CONJUGATE, tau, w1t );
00333   temp = 1.0F / ( tau->real * tau->real + 
00334                   tau->imag * tau->imag );
00335   tau_copy.real = tau->real * temp;
00336   tau_copy.imag = tau->imag * temp;
00337   FLA_C2F( cscal )( &n_a1t,
00338                     &tau_copy,
00339                     w1t, &inc_w1t );
00340 
00341   // // a1t = - w1t + a1t;
00342   // FLA_Axpy_external( FLA_MINUS_ONE, w1t, a1t );
00343   FLA_C2F( caxpy )( &n_a1t,
00344                     minus_one_p,
00345                     w1t, &inc_w1t,
00346                     a1t, &inc_a1t );
00347 
00348   // // A2 = - u2 * w1t + A2;
00349   // FLA_Ger_external( FLA_MINUS_ONE, u2, w1t, A2 );
00350   FLA_C2F( cgeru )( &m_u2_A2, &n_a1t,
00351                     minus_one_p,
00352                     u2, &inc_u2,
00353                     w1t, &inc_w1t,
00354                     A2, &ldim_A2 );
00355 
00356   // FLA_Obj_free( &w1t );
00357   FLA_free( w1t );
00358 
00359   return FLA_SUCCESS;
00360 }

FLA_Error FLA_Househ2_UT_opt ( FLA_Obj  chi_1,
FLA_Obj  x2,
FLA_Obj  tau 
)

References FLA_Househ2_UT_opt_dcomplex(), FLA_Househ2_UT_opt_double(), FLA_Househ2_UT_opt_float(), FLA_Househ2_UT_opt_scomplex(), FLA_Obj_datatype(), FLA_Obj_length(), and FLA_Obj_vector_inc().

00038 {
00039   FLA_Datatype datatype;
00040   int          m_x2;
00041   int          inc_x2;
00042 
00043   datatype = FLA_Obj_datatype( x2 );
00044 
00045   m_x2     = FLA_Obj_length( x2 );
00046   inc_x2   = FLA_Obj_vector_inc( x2 );
00047 
00048   switch ( datatype )
00049   {
00050     case FLA_FLOAT:
00051     {
00052       float* chi_1_p = ( float* ) FLA_FLOAT_PTR( chi_1 );
00053       float* x2_p    = ( float* ) FLA_FLOAT_PTR( x2 );
00054       float* tau_p   = ( float* ) FLA_FLOAT_PTR( tau );
00055 
00056       FLA_Househ2_UT_opt_float( m_x2,
00057                                 chi_1_p,
00058                                 x2_p, inc_x2,
00059                                 tau_p );
00060       break;
00061     }
00062 
00063     case FLA_DOUBLE:
00064     {
00065       double* chi_1_p = ( double* ) FLA_DOUBLE_PTR( chi_1 );
00066       double* x2_p    = ( double* ) FLA_DOUBLE_PTR( x2 );
00067       double* tau_p   = ( double* ) FLA_DOUBLE_PTR( tau );
00068 
00069       FLA_Househ2_UT_opt_double( m_x2,
00070                                  chi_1_p,
00071                                  x2_p, inc_x2,
00072                                  tau_p );
00073       break;
00074     }
00075 
00076     case FLA_COMPLEX:
00077     {
00078       scomplex* chi_1_p = ( scomplex* ) FLA_COMPLEX_PTR( chi_1 );
00079       scomplex* x2_p    = ( scomplex* ) FLA_COMPLEX_PTR( x2 );
00080       scomplex* tau_p   = ( scomplex* ) FLA_COMPLEX_PTR( tau );
00081 
00082       FLA_Househ2_UT_opt_scomplex( m_x2,
00083                                    chi_1_p,
00084                                    x2_p, inc_x2,
00085                                    tau_p );
00086       break;
00087     }
00088 
00089     case FLA_DOUBLE_COMPLEX:
00090     {
00091       dcomplex* chi_1_p = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( chi_1 );
00092       dcomplex* x2_p    = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( x2 );
00093       dcomplex* tau_p   = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( tau );
00094 
00095       FLA_Househ2_UT_opt_dcomplex( m_x2,
00096                                    chi_1_p,
00097                                    x2_p, inc_x2,
00098                                    tau_p );
00099       break;
00100     }
00101   }
00102 
00103   return FLA_SUCCESS;
00104 }

FLA_Error FLA_Househ2_UT_opt_dcomplex ( int  m_x2,
dcomplex chi_1,
dcomplex x2,
int  inc_x2,
dcomplex tau 
)

References cblas_dznrm2(), cblas_zscal(), dznrm2(), FLA_ZERO, dcomplex::imag, dcomplex::real, and zscal().

Referenced by FLA_Househ2_UT_opt(), FLA_QR_UT_Accum_T_opt_var1_dcomplex(), and FLA_QR_UT_UD_Accum_T_opt_var1_dcomplex().

00531 {
00532   int          i_one = 1;
00533   int          i_two = 2;
00534 
00535   if ( m_x2 < 0 )
00536   {
00537     FLA_Check_error_code( FLA_EXPECTED_NONNEGATIVE_VALUE );
00538   }
00539 
00540 
00541   {
00542     dcomplex *chi_1_p = ( dcomplex * ) chi_1;
00543     dcomplex *x2_buff = ( dcomplex * ) x2;
00544     dcomplex *tau_p   = ( dcomplex * ) tau;
00545     dcomplex *zero_p  = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
00546     dcomplex one_half;
00547     dcomplex y[ 2 ];
00548     dcomplex temp;
00549     dcomplex alpha;
00550 
00551     one_half.real = 1.0 / 2.0;
00552     one_half.imag = 0.0;
00553 
00554     /*
00555       Trivial tau, chi_1 if x_2 is 0 x 1.
00556     */
00557 
00558     if ( m_x2 == 0 )
00559     {
00560       tau_p->real   = one_half.real;
00561       tau_p->imag   = one_half.imag;
00562       chi_1_p->real = -(chi_1_p->real);
00563       chi_1_p->imag = -(chi_1_p->imag);
00564 
00565       return FLA_SUCCESS;
00566     }
00567 
00568     /*
00569       Preserve chi_1 for later use.
00570     */
00571 
00572     alpha.real = chi_1_p->real;
00573     alpha.imag = chi_1_p->imag;
00574 
00575     /*
00576       y = /    chi_1    \
00577           \ || x_2 ||_2 /  ( notice: || y ||_2 = || x ||_2 )
00578     */
00579 
00580     y[ 0 ].real = chi_1_p->real;
00581     y[ 0 ].imag = chi_1_p->imag;
00582 
00583 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00584     y[ 1 ].real = cblas_dznrm2( m_x2, x2_buff, inc_x2 );
00585     y[ 1 ].imag = 0.0;
00586 #else
00587     y[ 1 ].real = FLA_C2F( dznrm2 )( &m_x2, x2_buff, &inc_x2 );
00588     y[ 1 ].imag = 0.0;
00589 #endif
00590 
00591     /*
00592       Trivial tau, chi_1 if || x_2 ||_2 = 0.
00593     */
00594 
00595     if ( y[ 1 ].real == zero_p->real )
00596     {
00597       tau_p->real   = one_half.real;
00598       tau_p->imag   = one_half.imag;
00599       chi_1_p->real = -(chi_1_p->real);
00600       chi_1_p->imag = -(chi_1_p->imag);
00601 
00602       return FLA_SUCCESS;
00603     }
00604 
00605     /*
00606       temp  = || / chi_1 \ ||
00607               || \  x_2  / ||_2
00608     */
00609 
00610 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00611     temp.real = cblas_dznrm2( i_two, y, i_one );
00612 #else
00613     temp.real = FLA_C2F( dznrm2 )( &i_two, y, &i_one );
00614 #endif
00615 
00616     /*
00617       y = / chi_1 + sign( chi_1 ) * temp  \
00618           \      || x_2 ||_2              /
00619 
00620       chi_1 = -sign( Re( chi_1 ) ) * temp
00621     */
00622 
00623     if ( sign( y[ 0 ].real ) < 1.0 )
00624     {
00625       y[ 0 ].real   -=  temp.real;
00626       chi_1_p->real  =  temp.real;
00627       chi_1_p->imag  =  0.0;
00628     }
00629     else
00630     {
00631       y[ 0 ].real   +=  temp.real;
00632       chi_1_p->real  = -temp.real;
00633       chi_1_p->imag  =  0.0;
00634     }
00635 
00636     /*
00637       Overwrite x_2 with x_2/eta_2
00638     */
00639 
00640     // y[ 0 ] = 1.0 / y[ 0 ];
00641 
00642     temp.real = 1.0 / ( y[ 0 ].real * y[ 0 ].real +
00643                         y[ 0 ].imag * y[ 0 ].imag );
00644     y[ 0 ].real = y[ 0 ].real *  temp.real;
00645     y[ 0 ].imag = y[ 0 ].imag * -temp.real;
00646 
00647 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00648     cblas_zscal( m_x2, y[ 0 ], x2_buff, inc_x2 );
00649 #else
00650     FLA_C2F( zscal )( &m_x2, &y[ 0 ], x2_buff, &inc_x2 );
00651 #endif
00652 
00653     /*
00654       Recall that
00655 
00656         alpha = original value of chi_1
00657 
00658       and
00659 
00660         -sign( Re( chi_1 ) ) * || x ||_2
00661 
00662                          where || x ||_2 = || / chi_1 \ ||
00663                                            || \  x_2  / ||_2
00664       overwrote chi_1.             
00665     */
00666 
00667     tau_p->real = ( chi_1_p->real - alpha.real ) / chi_1_p->real;
00668     tau_p->imag = ( 0.0           - alpha.imag ) / chi_1_p->real;
00669 
00670     /*
00671       Invert tau.
00672     */
00673     temp.real = 1.0 / ( tau_p->real * tau_p->real +
00674                         tau_p->imag * tau_p->imag );
00675     tau_p->real = tau_p->real *  temp.real;
00676     tau_p->imag = tau_p->imag * -temp.real;
00677   }
00678 
00679   return FLA_SUCCESS;
00680 }

FLA_Error FLA_Househ2_UT_opt_double ( int  m_x2,
double *  chi_1,
double *  x2,
int  inc_x2,
double *  tau 
)

References cblas_ddot(), cblas_dnrm2(), cblas_dscal(), ddot(), dnrm2(), dscal(), and FLA_ZERO.

Referenced by FLA_Househ2_UT_opt(), FLA_QR_UT_Accum_T_opt_var1_double(), and FLA_QR_UT_UD_Accum_T_opt_var1_double().

00241 {
00242   int          i_one = 1;
00243   int          i_two = 2;
00244 
00245   if ( m_x2 < 0 )
00246   {
00247     FLA_Check_error_code( FLA_EXPECTED_NONNEGATIVE_VALUE );
00248   }
00249 
00250 
00251   {
00252     double *chi_1_p = ( double * ) chi_1;
00253     double *x2_buff = ( double * ) x2;
00254     double *tau_p   = ( double * ) tau;
00255     double *zero_p  = ( double * ) FLA_DOUBLE_PTR( FLA_ZERO );
00256     double one_half = 1.0 / 2.0;
00257     double y[ 2 ];
00258     double temp;
00259 
00260     /*
00261       Trivial tau, chi_1 if x_2 is 0 x 1.
00262     */
00263 
00264     if ( m_x2 == 0 )
00265     {
00266       *tau_p   = one_half;
00267       *chi_1_p = -(*chi_1_p);
00268 
00269       return FLA_SUCCESS;
00270     }
00271 
00272     /*
00273       y = /    chi_1    \
00274           \ || x_2 ||_2 /  ( notice: || y ||_2 = || x ||_2 )
00275     */
00276 
00277     y[ 0 ] = *chi_1_p;
00278 
00279 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00280     y[ 1 ] = cblas_dnrm2( m_x2, x2_buff, inc_x2 );
00281 #else
00282     y[ 1 ] = FLA_C2F( dnrm2 )( &m_x2, x2_buff, &inc_x2 );
00283 #endif
00284 
00285     /*
00286       Trivial tau, chi_1 if || x_2 ||_2 = 0.
00287     */
00288 
00289     if ( y[ 1 ] == *zero_p )
00290     {
00291       *tau_p   = one_half;
00292       *chi_1_p = -(*chi_1_p);
00293 
00294       return FLA_SUCCESS;
00295     }
00296 
00297     /*
00298       temp  = || / chi_1 \ ||
00299               || \  x_2  / ||_2
00300     */
00301 
00302 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00303     temp = cblas_dnrm2( i_two, y, i_one );
00304 #else
00305     temp = FLA_C2F( dnrm2 )( &i_two, y, &i_one );
00306 #endif
00307 
00308     /*
00309       y = / chi_1 + sign( chi_1 ) * temp  \
00310           \      || x_2 ||_2              /
00311 
00312       chi_1 = -sign( chi_1 ) * temp
00313     */
00314 
00315     if ( sign( y[ 0 ] ) < 1.0 )
00316     {
00317       y[ 0 ]  -= temp;
00318       *chi_1_p = temp;
00319     }
00320     else
00321     {
00322       y[ 0 ]  +=  temp;
00323       *chi_1_p = -temp;
00324     }
00325 
00326     /*
00327       y = /     1        \
00328           \ eta_2/eta_1  /  ( normalize y to have "1" first entry )
00329     */
00330 
00331     y[ 1 ] = y[ 1 ] / y[ 0 ];
00332 
00333     /*
00334       Overwrite x_2 with x_2/eta_2
00335     */
00336 
00337     y[ 0 ] = 1.0 / y[ 0 ];
00338 
00339 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00340     cblas_dscal( m_x2, y[ 0 ], x2_buff, inc_x2 );
00341 #else
00342     FLA_C2F( dscal )( &m_x2, &y[ 0 ], x2_buff, &inc_x2 );
00343 #endif
00344 
00345     y[ 0 ] = 1.0;
00346 
00347     /*
00348       temp = y^T y / 2
00349     */
00350 
00351 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00352     temp = cblas_ddot( i_two, y, i_one, y, i_one );
00353 #else
00354     temp = FLA_C2F( ddot )( &i_two, y, &i_one, y, &i_one );
00355 #endif
00356 
00357     *tau_p = temp / 2.0;
00358   }
00359 
00360   return FLA_SUCCESS;
00361 }

FLA_Error FLA_Househ2_UT_opt_float ( int  m_x2,
float *  chi_1,
float *  x2,
int  inc_x2,
float *  tau 
)

References cblas_sdot(), cblas_snrm2(), cblas_sscal(), FLA_ZERO, sdot(), snrm2(), and sscal().

Referenced by FLA_Househ2_UT_opt(), FLA_QR_UT_Accum_T_opt_var1_float(), and FLA_QR_UT_UD_Accum_T_opt_var1_float().

00111 {
00112   int          i_one = 1;
00113   int          i_two = 2;
00114 
00115   if ( m_x2 < 0 )
00116   {
00117     FLA_Check_error_code( FLA_EXPECTED_NONNEGATIVE_VALUE );
00118   }
00119 
00120 
00121   {
00122     float *chi_1_p = ( float * ) chi_1;
00123     float *x2_buff = ( float * ) x2;
00124     float *tau_p   = ( float * ) tau;
00125     float *zero_p  = ( float * ) FLA_FLOAT_PTR( FLA_ZERO );
00126     float one_half = 1.0F / 2.0F;
00127     float y[ 2 ];
00128     float temp;
00129 
00130     /*
00131       Trivial tau, chi_1 if x_2 is 0 x 1.
00132     */
00133 
00134     if ( m_x2 == 0 )
00135     {
00136       *tau_p   = one_half;
00137       *chi_1_p = -(*chi_1_p);
00138 
00139       return FLA_SUCCESS;
00140     }
00141 
00142     /*
00143       y = /    chi_1    \
00144           \ || x_2 ||_2 /  ( notice: || y ||_2 = || x ||_2 )
00145     */
00146 
00147     y[ 0 ] = *chi_1_p;
00148 
00149 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00150     y[ 1 ] = cblas_snrm2( m_x2, x2_buff, inc_x2 );
00151 #else
00152     y[ 1 ] = FLA_C2F( snrm2 )( &m_x2, x2_buff, &inc_x2 );
00153 #endif
00154 
00155     /*
00156       Trivial tau, chi_1 if || x_2 ||_2 = 0.
00157     */
00158 
00159     if ( y[ 1 ] == *zero_p )
00160     {
00161       *tau_p   = one_half;
00162       *chi_1_p = -(*chi_1_p);
00163 
00164       return FLA_SUCCESS;
00165     }
00166 
00167     /*
00168       temp  = || / chi_1 \ ||
00169               || \  x_2  / ||_2
00170     */
00171 
00172 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00173     temp = cblas_snrm2( i_two, y, i_one );
00174 #else
00175     temp = FLA_C2F( snrm2 )( &i_two, y, &i_one );
00176 #endif
00177 
00178     /*
00179       y = / chi_1 + sign( chi_1 ) * temp  \
00180           \      || x_2 ||_2              /
00181 
00182       chi_1 = -sign( chi_1 ) * temp
00183     */
00184 
00185     if ( sign( y[ 0 ] ) < 1.0F )
00186     {
00187       y[ 0 ]  -= temp;
00188       *chi_1_p = temp;
00189     }
00190     else
00191     {
00192       y[ 0 ]  +=  temp;
00193       *chi_1_p = -temp;
00194     }
00195 
00196     /*
00197       y = /     1        \
00198           \ eta_2/eta_1  /  ( normalize y to have "1" first entry )
00199     */
00200 
00201     y[ 1 ] = y[ 1 ] / y[ 0 ];
00202 
00203     /*
00204       Overwrite x_2 with x_2/eta_2
00205     */
00206 
00207     y[ 0 ] = 1.0 / y[ 0 ];
00208 
00209 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00210     cblas_sscal( m_x2, y[ 0 ], x2_buff, inc_x2 );
00211 #else
00212     FLA_C2F( sscal )( &m_x2, &y[ 0 ], x2_buff, &inc_x2 );
00213 #endif
00214 
00215     y[ 0 ] = 1.0F;
00216 
00217     /*
00218       temp = y^T y / 2
00219     */
00220 
00221 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00222     temp = cblas_sdot( i_two, y, i_one, y, i_one );
00223 #else
00224     temp = FLA_C2F( sdot )( &i_two, y, &i_one, y, &i_one );
00225 #endif
00226 
00227     *tau_p = temp / 2.0F;
00228   }
00229 
00230   return FLA_SUCCESS;
00231 }

FLA_Error FLA_Househ2_UT_opt_scomplex ( int  m_x2,
scomplex chi_1,
scomplex x2,
int  inc_x2,
scomplex tau 
)

References cblas_cscal(), cblas_scnrm2(), cscal(), FLA_ZERO, scomplex::imag, scomplex::real, and scnrm2().

Referenced by FLA_Househ2_UT_opt(), FLA_QR_UT_Accum_T_opt_var1_scomplex(), and FLA_QR_UT_UD_Accum_T_opt_var1_scomplex().

00372 {
00373   int          i_one = 1;
00374   int          i_two = 2;
00375 
00376   if ( m_x2 < 0 )
00377   {
00378     FLA_Check_error_code( FLA_EXPECTED_NONNEGATIVE_VALUE );
00379   }
00380 
00381 
00382   {
00383     scomplex *chi_1_p = ( scomplex * ) chi_1;
00384     scomplex *x2_buff = ( scomplex * ) x2;
00385     scomplex *tau_p   = ( scomplex * ) tau;
00386     scomplex *zero_p  = ( scomplex * ) FLA_COMPLEX_PTR( FLA_ZERO );
00387     scomplex one_half;
00388     scomplex y[ 2 ];
00389     scomplex temp;
00390     scomplex alpha;
00391 
00392     one_half.real = 1.0F / 2.0F;
00393     one_half.imag = 0.0F;
00394 
00395     /*
00396       Trivial tau, chi_1 if x_2 is 0 x 1.
00397     */
00398 
00399     if ( m_x2 == 0 )
00400     {
00401       tau_p->real   = one_half.real;
00402       tau_p->imag   = one_half.imag;
00403       chi_1_p->real = -(chi_1_p->real);
00404       chi_1_p->imag = -(chi_1_p->imag);
00405 
00406       return FLA_SUCCESS;
00407     }
00408 
00409     /*
00410       Preserve chi_1 for later use.
00411     */
00412 
00413     alpha.real = chi_1_p->real;
00414     alpha.imag = chi_1_p->imag;
00415 
00416     /*
00417       y = /    chi_1    \
00418           \ || x_2 ||_2 /  ( notice: || y ||_2 = || x ||_2 )
00419     */
00420 
00421     y[ 0 ].real = chi_1_p->real;
00422     y[ 0 ].imag = chi_1_p->imag;
00423 
00424 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00425     y[ 1 ].real = cblas_scnrm2( m_x2, x2_buff, inc_x2 );
00426     y[ 1 ].imag = 0.0F;
00427 #else
00428     y[ 1 ].real = FLA_C2F( scnrm2 )( &m_x2, x2_buff, &inc_x2 );
00429     y[ 1 ].imag = 0.0F;
00430 #endif
00431 
00432     /*
00433       Trivial tau, chi_1 if || x_2 ||_2 = 0.
00434     */
00435 
00436     if ( y[ 1 ].real == zero_p->real )
00437     {
00438       tau_p->real   = one_half.real;
00439       tau_p->imag   = one_half.imag;
00440       chi_1_p->real = -(chi_1_p->real);
00441       chi_1_p->imag = -(chi_1_p->imag);
00442 
00443       return FLA_SUCCESS;
00444     }
00445 
00446     /*
00447       temp  = || / chi_1 \ ||
00448               || \  x_2  / ||_2
00449     */
00450 
00451 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00452     temp.real = cblas_scnrm2( i_two, y, i_one );
00453 #else
00454     temp.real = FLA_C2F( scnrm2 )( &i_two, y, &i_one );
00455 #endif
00456 
00457     /*
00458       y = / chi_1 + sign( chi_1 ) * temp  \
00459           \      || x_2 ||_2              /
00460 
00461       chi_1 = -sign( Re( chi_1 ) ) * temp
00462     */
00463 
00464     if ( sign( y[ 0 ].real ) < 1.0F )
00465     {
00466       y[ 0 ].real   -=  temp.real;
00467       chi_1_p->real  =  temp.real;
00468       chi_1_p->imag  =  0.0F;
00469     }
00470     else
00471     {
00472       y[ 0 ].real   +=  temp.real;
00473       chi_1_p->real  = -temp.real;
00474       chi_1_p->imag  =  0.0F;
00475     }
00476 
00477     /*
00478       Overwrite x_2 with x_2/eta_2
00479     */
00480 
00481     // y[ 0 ] = 1.0 / y[ 0 ];
00482 
00483     temp.real = 1.0F / ( y[ 0 ].real * y[ 0 ].real +
00484                          y[ 0 ].imag * y[ 0 ].imag );
00485     y[ 0 ].real = y[ 0 ].real *  temp.real;
00486     y[ 0 ].imag = y[ 0 ].imag * -temp.real;
00487 
00488 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00489     cblas_cscal( m_x2, y[ 0 ], x2_buff, inc_x2 );
00490 #else
00491     FLA_C2F( cscal )( &m_x2, &y[ 0 ], x2_buff, &inc_x2 );
00492 #endif
00493 
00494     /*
00495       Recall that
00496 
00497         alpha = original value of chi_1
00498 
00499       and
00500 
00501         -sign( Re( chi_1 ) ) * || x ||_2
00502 
00503                          where || x ||_2 = || / chi_1 \ ||
00504                                            || \  x_2  / ||_2
00505       overwrote chi_1.             
00506     */
00507 
00508     tau_p->real = ( chi_1_p->real - alpha.real ) / chi_1_p->real;
00509     tau_p->imag = ( 0.0F          - alpha.imag ) / chi_1_p->real;
00510 
00511     /*
00512       Invert tau.
00513     */
00514     temp.real = 1.0F / ( tau_p->real * tau_p->real +
00515                          tau_p->imag * tau_p->imag );
00516     tau_p->real = tau_p->real *  temp.real;
00517     tau_p->imag = tau_p->imag * -temp.real;
00518   }
00519 
00520   return FLA_SUCCESS;
00521 }


Generated on Mon Jul 6 05:45:52 2009 for libflame by  doxygen 1.5.9