FLA_Apply_househ2_UT_opt.c File Reference

(r)


Functions

FLA_Error FLA_Apply_househ2_UT_opt (FLA_Obj tau, FLA_Obj u2, FLA_Obj a1t, FLA_Obj A2)
FLA_Error FLA_Apply_househ2_UT_opt_float (int m_u2_A2, int n_a1t, float *tau, float *u2, int inc_u2, float *a1t, int inc_a1t, float *A2, int ldim_A2)
FLA_Error FLA_Apply_househ2_UT_opt_double (int m_u2_A2, int n_a1t, double *tau, double *u2, int inc_u2, double *a1t, int inc_a1t, double *A2, int ldim_A2)
FLA_Error FLA_Apply_househ2_UT_opt_scomplex (int m_u2_A2, int n_a1t, scomplex *tau, scomplex *u2, int inc_u2, scomplex *a1t, int inc_a1t, scomplex *A2, int ldim_A2)
FLA_Error FLA_Apply_househ2_UT_opt_dcomplex (int m_u2_A2, int n_a1t, dcomplex *tau, dcomplex *u2, int inc_u2, dcomplex *a1t, int inc_a1t, dcomplex *A2, int ldim_A2)

Function Documentation

FLA_Error FLA_Apply_househ2_UT_opt ( FLA_Obj  tau,
FLA_Obj  u2,
FLA_Obj  a1t,
FLA_Obj  A2 
)

References FLA_Apply_househ2_UT_check(), FLA_Apply_househ2_UT_opt_dcomplex(), FLA_Apply_househ2_UT_opt_double(), FLA_Apply_househ2_UT_opt_float(), FLA_Apply_househ2_UT_opt_scomplex(), FLA_Check_error_level(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_ldim(), FLA_Obj_length(), FLA_Obj_vector_inc(), and FLA_Obj_width().

00039              :=  / I - 1/tau / 1  \ ( 1  u2^H ) \ / a1t \ 
00040     \ A2  /      \           \ u2 /             / \ A2  / 
00041  
00042   w = ( a1t + u2' * A2 ) / conj( tau );
00043 
00044   a1t = - w + a1t;
00045   A2  = - u2 * w + A2;
00046 */
00047 {
00048   FLA_Datatype datatype;
00049   int          m_u2_A2;
00050   int          n_a1t;
00051   int          inc_u2;
00052   int          inc_a1t;
00053   int          ldim_A2;
00054 
00055   if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
00056     FLA_Apply_househ2_UT_check( tau, u2, a1t, A2 );
00057 
00058   if ( FLA_Obj_has_zero_dim( a1t ) ) return FLA_SUCCESS;
00059 
00060   datatype = FLA_Obj_datatype( A2 );
00061 
00062   m_u2_A2  = FLA_Obj_length( u2 );
00063   n_a1t    = FLA_Obj_width( a1t );
00064   inc_u2   = FLA_Obj_vector_inc( u2 );
00065   inc_a1t  = FLA_Obj_vector_inc( a1t );
00066   ldim_A2  = FLA_Obj_ldim( A2 );
00067 
00068   switch ( datatype )
00069   {
00070     case FLA_FLOAT:
00071     {
00072       float* tau_p = ( float* ) FLA_FLOAT_PTR( tau );
00073       float* u2_p  = ( float* ) FLA_FLOAT_PTR( u2 );
00074       float* a1t_p = ( float* ) FLA_FLOAT_PTR( a1t );
00075       float* A2_p  = ( float* ) FLA_FLOAT_PTR( A2 );
00076 
00077       FLA_Apply_househ2_UT_opt_float( m_u2_A2, n_a1t,
00078                                       tau_p,
00079                                       u2_p, inc_u2,
00080                                       a1t_p, inc_a1t,
00081                                       A2_p, ldim_A2 );
00082       break;
00083     }
00084 
00085     case FLA_DOUBLE:
00086     {
00087       double* tau_p = ( double* ) FLA_DOUBLE_PTR( tau );
00088       double* u2_p  = ( double* ) FLA_DOUBLE_PTR( u2 );
00089       double* a1t_p = ( double* ) FLA_DOUBLE_PTR( a1t );
00090       double* A2_p  = ( double* ) FLA_DOUBLE_PTR( A2 );
00091 
00092       FLA_Apply_househ2_UT_opt_double( m_u2_A2, n_a1t,
00093                                        tau_p,
00094                                        u2_p, inc_u2,
00095                                        a1t_p, inc_a1t,
00096                                        A2_p, ldim_A2 );
00097       break;
00098     }
00099 
00100     case FLA_COMPLEX:
00101     {
00102       scomplex* tau_p = ( scomplex* ) FLA_COMPLEX_PTR( tau );
00103       scomplex* u2_p  = ( scomplex* ) FLA_COMPLEX_PTR( u2 );
00104       scomplex* a1t_p = ( scomplex* ) FLA_COMPLEX_PTR( a1t );
00105       scomplex* A2_p  = ( scomplex* ) FLA_COMPLEX_PTR( A2 );
00106 
00107       FLA_Apply_househ2_UT_opt_scomplex( m_u2_A2, n_a1t,
00108                                          tau_p,
00109                                          u2_p, inc_u2,
00110                                          a1t_p, inc_a1t,
00111                                          A2_p, ldim_A2 );
00112       break;
00113     }
00114 
00115     case FLA_DOUBLE_COMPLEX:
00116     {
00117       dcomplex* tau_p = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( tau );
00118       dcomplex* u2_p  = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( u2 );
00119       dcomplex* a1t_p = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( a1t );
00120       dcomplex* A2_p  = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A2 );
00121 
00122       FLA_Apply_househ2_UT_opt_dcomplex( m_u2_A2, n_a1t,
00123                                          tau_p,
00124                                          u2_p, inc_u2,
00125                                          a1t_p, inc_a1t,
00126                                          A2_p, ldim_A2 );
00127       break;
00128     }
00129   }
00130 
00131   return FLA_SUCCESS;
00132 }

FLA_Error FLA_Apply_househ2_UT_opt_dcomplex ( int  m_u2_A2,
int  n_a1t,
dcomplex tau,
dcomplex u2,
int  inc_u2,
dcomplex a1t,
int  inc_a1t,
dcomplex A2,
int  ldim_A2 
)

References dscal(), FLA_free(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, dcomplex::imag, dcomplex::real, zaxpy(), zcopy(), zgemv(), zgeru(), and zscal().

Referenced by FLA_Apply_househ2_UT_opt(), FLA_QR_UT_Accum_T_opt_var1_dcomplex(), and FLA_QR_UT_UD_Accum_T_opt_var1_dcomplex().

00370 {
00371   dcomplex* one_p        = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
00372   dcomplex* minus_one_p  = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
00373   dcomplex  tau_copy;
00374   double    temp;
00375   char      blas_trans   = 'T';
00376   int       inc_w1t;
00377   int       inc_u2_conj;
00378   int       i_two        = 2;
00379   dcomplex* u2_conj;
00380 
00381   // FLA_Obj w1t;
00382   dcomplex* w1t;
00383 
00384   // if ( FLA_Obj_has_zero_dim( a1t ) ) return FLA_SUCCESS;
00385   if ( n_a1t == 0 ) return FLA_SUCCESS;
00386 
00387   // FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, a1t, &w1t );
00388   w1t = FLA_malloc( n_a1t * sizeof( dcomplex ) );
00389   inc_w1t = 1;
00390 
00391   // // w1t = a1t;
00392   // FLA_Copy_external( a1t, w1t );
00393   FLA_C2F( zcopy )( &n_a1t,
00394                     a1t, &inc_a1t, 
00395                     w1t, &inc_w1t ); 
00396 
00397   // // w1t = w1t + u2' * A2;
00398   // // w1t = w1t + A2^T * conj(u2);
00399   // FLA_Gemvc_external( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A2, u2, FLA_ONE, w1t );
00400   u2_conj = FLA_malloc( m_u2_A2 * sizeof( dcomplex ) );
00401   inc_u2_conj = 1;
00402 
00403   FLA_C2F( zcopy )( &m_u2_A2,
00404                     u2, &inc_u2, 
00405                     u2_conj, &inc_u2_conj ); 
00406 
00407   FLA_C2F( dscal )( &m_u2_A2,
00408                     &(minus_one_p->real),
00409                     (( double* ) u2_conj ) + 1, &i_two );
00410 
00411   FLA_C2F( zgemv )( &blas_trans,
00412                     &m_u2_A2, &n_a1t,
00413                     one_p,
00414                     A2, &ldim_A2,
00415                     u2_conj, &inc_u2_conj,
00416                     one_p,
00417                     w1t, &inc_w1t );
00418 
00419   FLA_free( u2_conj );
00420 
00421   // // w1t = w1t / conj( tau );
00422   // FLA_Inv_scalc_external( FLA_CONJUGATE, tau, w1t );
00423   temp = 1.0  / ( tau->real * tau->real + 
00424                   tau->imag * tau->imag );
00425   tau_copy.real = tau->real * temp;
00426   tau_copy.imag = tau->imag * temp;
00427   FLA_C2F( zscal )( &n_a1t,
00428                     &tau_copy,
00429                     w1t, &inc_w1t );
00430 
00431   // // a1t = - w1t + a1t;
00432   // FLA_Axpy_external( FLA_MINUS_ONE, w1t, a1t );
00433   FLA_C2F( zaxpy )( &n_a1t,
00434                     minus_one_p,
00435                     w1t, &inc_w1t,
00436                     a1t, &inc_a1t );
00437 
00438   // // A2 = - u2 * w1t + A2;
00439   // FLA_Ger_external( FLA_MINUS_ONE, u2, w1t, A2 );
00440   FLA_C2F( zgeru )( &m_u2_A2, &n_a1t,
00441                     minus_one_p,
00442                     u2, &inc_u2,
00443                     w1t, &inc_w1t,
00444                     A2, &ldim_A2 );
00445 
00446   // FLA_Obj_free( &w1t );
00447   FLA_free( w1t );
00448 
00449   return FLA_SUCCESS;
00450 }

FLA_Error FLA_Apply_househ2_UT_opt_double ( int  m_u2_A2,
int  n_a1t,
double *  tau,
double *  u2,
int  inc_u2,
double *  a1t,
int  inc_a1t,
double *  A2,
int  ldim_A2 
)

References daxpy(), dcopy(), dgemv(), dger(), dscal(), FLA_free(), FLA_malloc(), FLA_MINUS_ONE, and FLA_ONE.

Referenced by FLA_Apply_househ2_UT_opt(), FLA_QR_UT_Accum_T_opt_var1_double(), and FLA_QR_UT_UD_Accum_T_opt_var1_double().

00210 {
00211   double* one_p        = FLA_DOUBLE_PTR( FLA_ONE );
00212   double* minus_one_p  = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
00213   double  tau_copy;
00214   char    blas_trans   = 'T';
00215   int     inc_w1t;
00216 
00217   // FLA_Obj w1t;
00218   double* w1t;
00219 
00220   // if ( FLA_Obj_has_zero_dim( a1t ) ) return FLA_SUCCESS;
00221   if ( n_a1t == 0 ) return FLA_SUCCESS;
00222 
00223   // FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, a1t, &w1t );
00224   w1t = FLA_malloc( n_a1t * sizeof( double ) );
00225   inc_w1t = 1;
00226 
00227   // // w1t = a1t;
00228   // FLA_Copy_external( a1t, w1t );
00229   FLA_C2F( dcopy )( &n_a1t,
00230                     a1t, &inc_a1t, 
00231                     w1t, &inc_w1t ); 
00232 
00233   // // w1t = w1t + u2' * A2;
00234   // // w1t = w1t + A2^T * conj(u2);
00235   // FLA_Gemvc_external( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A2, u2, FLA_ONE, w1t );
00236   FLA_C2F( dgemv )( &blas_trans,
00237                     &m_u2_A2, &n_a1t,
00238                     one_p,
00239                     A2, &ldim_A2,
00240                     u2, &inc_u2,
00241                     one_p,
00242                     w1t, &inc_w1t );
00243 
00244   // // w1t = w1t / conj( tau );
00245   // FLA_Inv_scalc_external( FLA_CONJUGATE, tau, w1t );
00246   tau_copy = 1.0 / *tau;
00247   FLA_C2F( dscal )( &n_a1t,
00248                     &tau_copy,
00249                     w1t, &inc_w1t );
00250 
00251   // // a1t = - w1t + a1t;
00252   // FLA_Axpy_external( FLA_MINUS_ONE, w1t, a1t );
00253   FLA_C2F( daxpy )( &n_a1t,
00254                     minus_one_p,
00255                     w1t, &inc_w1t,
00256                     a1t, &inc_a1t );
00257 
00258   // // A2 = - u2 * w1t + A2;
00259   // FLA_Ger_external( FLA_MINUS_ONE, u2, w1t, A2 );
00260   FLA_C2F( dger )( &m_u2_A2, &n_a1t,
00261                    minus_one_p,
00262                    u2, &inc_u2,
00263                    w1t, &inc_w1t,
00264                    A2, &ldim_A2 );
00265 
00266   // FLA_Obj_free( &w1t );
00267   FLA_free( w1t );
00268 
00269   return FLA_SUCCESS;
00270 }

FLA_Error FLA_Apply_househ2_UT_opt_float ( int  m_u2_A2,
int  n_a1t,
float *  tau,
float *  u2,
int  inc_u2,
float *  a1t,
int  inc_a1t,
float *  A2,
int  ldim_A2 
)

References FLA_free(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, saxpy(), scopy(), sgemv(), sger(), and sscal().

Referenced by FLA_Apply_househ2_UT_opt(), FLA_QR_UT_Accum_T_opt_var1_float(), and FLA_QR_UT_UD_Accum_T_opt_var1_float().

00140 {
00141   float* one_p        = FLA_FLOAT_PTR( FLA_ONE );
00142   float* minus_one_p  = FLA_FLOAT_PTR( FLA_MINUS_ONE );
00143   float  tau_copy;
00144   char   blas_trans   = 'T';
00145   int    inc_w1t;
00146 
00147   // FLA_Obj w1t;
00148   float* w1t;
00149 
00150   // if ( FLA_Obj_has_zero_dim( a1t ) ) return FLA_SUCCESS;
00151   if ( n_a1t == 0 ) return FLA_SUCCESS;
00152 
00153   // FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, a1t, &w1t );
00154   w1t = FLA_malloc( n_a1t * sizeof( float ) );
00155   inc_w1t = 1;
00156 
00157   // // w1t = a1t;
00158   // FLA_Copy_external( a1t, w1t );
00159   FLA_C2F( scopy )( &n_a1t,
00160                     a1t, &inc_a1t, 
00161                     w1t, &inc_w1t ); 
00162 
00163   // // w1t = w1t + u2' * A2;
00164   // // w1t = w1t + A2^T * conj(u2);
00165   // FLA_Gemvc_external( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A2, u2, FLA_ONE, w1t );
00166   FLA_C2F( sgemv )( &blas_trans,
00167                     &m_u2_A2, &n_a1t,
00168                     one_p,
00169                     A2, &ldim_A2,
00170                     u2, &inc_u2,
00171                     one_p,
00172                     w1t, &inc_w1t );
00173 
00174   // // w1t = w1t / conj( tau );
00175   // FLA_Inv_scalc_external( FLA_CONJUGATE, tau, w1t );
00176   tau_copy = 1.0F / *tau;
00177   FLA_C2F( sscal )( &n_a1t,
00178                     &tau_copy,
00179                     w1t, &inc_w1t );
00180 
00181   // // a1t = - w1t + a1t;
00182   // FLA_Axpy_external( FLA_MINUS_ONE, w1t, a1t );
00183   FLA_C2F( saxpy )( &n_a1t,
00184                     minus_one_p,
00185                     w1t, &inc_w1t,
00186                     a1t, &inc_a1t );
00187 
00188   // // A2 = - u2 * w1t + A2;
00189   // FLA_Ger_external( FLA_MINUS_ONE, u2, w1t, A2 );
00190   FLA_C2F( sger )( &m_u2_A2, &n_a1t,
00191                    minus_one_p,
00192                    u2, &inc_u2,
00193                    w1t, &inc_w1t,
00194                    A2, &ldim_A2 );
00195 
00196   // FLA_Obj_free( &w1t );
00197   FLA_free( w1t );
00198 
00199   return FLA_SUCCESS;
00200 }

FLA_Error FLA_Apply_househ2_UT_opt_scomplex ( int  m_u2_A2,
int  n_a1t,
scomplex tau,
scomplex u2,
int  inc_u2,
scomplex a1t,
int  inc_a1t,
scomplex A2,
int  ldim_A2 
)

References caxpy(), ccopy(), cgemv(), cgeru(), cscal(), FLA_free(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, scomplex::imag, scomplex::real, and sscal().

Referenced by FLA_Apply_househ2_UT_opt(), FLA_QR_UT_Accum_T_opt_var1_scomplex(), and FLA_QR_UT_UD_Accum_T_opt_var1_scomplex().

00280 {
00281   scomplex* one_p        = FLA_COMPLEX_PTR( FLA_ONE );
00282   scomplex* minus_one_p  = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
00283   scomplex  tau_copy;
00284   float     temp;
00285   char      blas_trans   = 'T';
00286   int       inc_w1t;
00287   int       inc_u2_conj;
00288   int       i_two        = 2;
00289   scomplex* u2_conj;
00290 
00291   // FLA_Obj w1t;
00292   scomplex* w1t;
00293 
00294   // if ( FLA_Obj_has_zero_dim( a1t ) ) return FLA_SUCCESS;
00295   if ( n_a1t == 0 ) return FLA_SUCCESS;
00296 
00297   // FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, a1t, &w1t );
00298   w1t = FLA_malloc( n_a1t * sizeof( scomplex ) );
00299   inc_w1t = 1;
00300 
00301   // // w1t = a1t;
00302   // FLA_Copy_external( a1t, w1t );
00303   FLA_C2F( ccopy )( &n_a1t,
00304                     a1t, &inc_a1t, 
00305                     w1t, &inc_w1t ); 
00306 
00307   // // w1t = w1t + u2' * A2;
00308   // // w1t = w1t + A2^T * conj(u2);
00309   // FLA_Gemvc_external( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A2, u2, FLA_ONE, w1t );
00310   u2_conj = FLA_malloc( m_u2_A2 * sizeof( scomplex ) );
00311   inc_u2_conj = 1;
00312 
00313   FLA_C2F( ccopy )( &m_u2_A2,
00314                     u2, &inc_u2, 
00315                     u2_conj, &inc_u2_conj ); 
00316 
00317   FLA_C2F( sscal )( &m_u2_A2,
00318                     &(minus_one_p->real),
00319                     (( float* ) u2_conj ) + 1, &i_two );
00320 
00321   FLA_C2F( cgemv )( &blas_trans,
00322                     &m_u2_A2, &n_a1t,
00323                     one_p,
00324                     A2, &ldim_A2,
00325                     u2_conj, &inc_u2_conj,
00326                     one_p,
00327                     w1t, &inc_w1t );
00328 
00329   FLA_free( u2_conj );
00330 
00331   // // w1t = w1t / conj( tau );
00332   // FLA_Inv_scalc_external( FLA_CONJUGATE, tau, w1t );
00333   temp = 1.0F / ( tau->real * tau->real + 
00334                   tau->imag * tau->imag );
00335   tau_copy.real = tau->real * temp;
00336   tau_copy.imag = tau->imag * temp;
00337   FLA_C2F( cscal )( &n_a1t,
00338                     &tau_copy,
00339                     w1t, &inc_w1t );
00340 
00341   // // a1t = - w1t + a1t;
00342   // FLA_Axpy_external( FLA_MINUS_ONE, w1t, a1t );
00343   FLA_C2F( caxpy )( &n_a1t,
00344                     minus_one_p,
00345                     w1t, &inc_w1t,
00346                     a1t, &inc_a1t );
00347 
00348   // // A2 = - u2 * w1t + A2;
00349   // FLA_Ger_external( FLA_MINUS_ONE, u2, w1t, A2 );
00350   FLA_C2F( cgeru )( &m_u2_A2, &n_a1t,
00351                     minus_one_p,
00352                     u2, &inc_u2,
00353                     w1t, &inc_w1t,
00354                     A2, &ldim_A2 );
00355 
00356   // FLA_Obj_free( &w1t );
00357   FLA_free( w1t );
00358 
00359   return FLA_SUCCESS;
00360 }


Generated on Mon Jul 6 05:45:53 2009 for libflame by  doxygen 1.5.9