FLA_QR_UT_vars.h File Reference

(r)

Go to the source code of this file.

Functions

FLA_Error FLA_QR_UT_blk_var2 (FLA_Obj A, FLA_Obj T, fla_qrut_t *cntl)
FLA_Error FLA_QR_UT_Accum_T_unb_var1 (FLA_Obj A, FLA_Obj T)
FLA_Error FLA_QR_UT_Accum_T_blk_var1 (FLA_Obj A, FLA_Obj T, fla_qrut_t *cntl)
FLA_Error FLA_QR_UT_Accum_T_opt_var1 (FLA_Obj A, FLA_Obj T)
FLA_Error FLA_QR_UT_Accum_T_opt_var1_float (FLA_Obj A, FLA_Obj T)
FLA_Error FLA_QR_UT_Accum_T_opt_var1_double (FLA_Obj A, FLA_Obj T)
FLA_Error FLA_QR_UT_Accum_T_opt_var1_scomplex (FLA_Obj A, FLA_Obj T)
FLA_Error FLA_QR_UT_Accum_T_opt_var1_dcomplex (FLA_Obj A, FLA_Obj T)


Function Documentation

FLA_Error FLA_QR_UT_Accum_T_blk_var1 ( FLA_Obj  A,
FLA_Obj  T,
fla_qrut_t cntl 
)

References FLA_Axpy_external(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy_external(), FLA_Copyt_external(), FLA_Determine_blocksize(), FLA_Gemm_external(), FLA_Merge_2x1(), FLA_MINUS_ONE, FLA_Obj_min_dim(), FLA_ONE, FLA_Part_2x2(), FLA_QR_UT_internal(), FLA_Repart_2x2_to_3x3(), FLA_Trmm_external(), and FLA_Trsm_external().

Referenced by FLA_QR_UT_internal().

00036 {
00037   FLA_Obj ATL,   ATR,      A00, A01, A02, 
00038           ABL,   ABR,      A10, A11, A12,
00039                            A20, A21, A22;
00040 
00041   FLA_Obj TTL,   TTR,      T00, T01, T02, 
00042           TBL,   TBR,      T10, T11, W12,
00043                            T20, T21, T22;
00044 
00045   FLA_Obj AB1;
00046 
00047   dim_t b;
00048 
00049   FLA_Part_2x2( A,    &ATL, &ATR,
00050                       &ABL, &ABR,     0, 0, FLA_TL );
00051 
00052   FLA_Part_2x2( T,    &TTL, &TTR,
00053                       &TBL, &TBR,     0, 0, FLA_TL );
00054 
00055   while ( FLA_Obj_min_dim( ABR ) > 0 ){
00056 
00057     b = FLA_Determine_blocksize( ABR, FLA_BR, FLA_Cntl_blocksize( cntl ) );
00058 
00059     FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00, /**/ &A01, &A02,
00060                         /* ************* */   /* ******************** */
00061                                                 &A10, /**/ &A11, &A12,
00062                            ABL, /**/ ABR,       &A20, /**/ &A21, &A22,
00063                            b, b, FLA_BR );
00064 
00065     FLA_Repart_2x2_to_3x3( TTL, /**/ TTR,       &T00, /**/ &T01, &T02,
00066                         /* ************* */   /* ******************** */
00067                                                 &T10, /**/ &T11, &W12,
00068                            TBL, /**/ TBR,       &T20, /**/ &T21, &T22,
00069                            b, b, FLA_BR );
00070 
00071     /*------------------------------------------------------------*/
00072 
00073     /*
00074       [ U1, T11 ] = FLA_QR_UT_internal( [ A11
00075                                           A21 ], T11 );
00076     */
00077 
00078     FLA_Merge_2x1( A11,
00079                    A21,   &AB1 );
00080 
00081     FLA_QR_UT_internal( AB1, T11,
00082                         FLA_Cntl_sub_qrut( cntl ) );
00083 
00084     /*
00085       U11 = trilu( A11 );
00086       U21 = A21;
00087     
00088       W12 = triu( inv(T11) )' * ( U11' * A12 + U21' * A22 );
00089     */
00090 
00091     FLA_Copy_external( A12, W12 );
00092     FLA_Trmm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR, 
00093                        FLA_CONJ_TRANSPOSE, FLA_UNIT_DIAG,
00094                        FLA_ONE, A11, W12 );
00095     FLA_Gemm_external( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
00096                        FLA_ONE, A21, A22, FLA_ONE, W12 );
00097     FLA_Trsm_external( FLA_LEFT, FLA_UPPER_TRIANGULAR, 
00098                        FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG, 
00099                        FLA_ONE, T11, W12 );
00100 
00101     /*
00102       A22 = A22 - U21 * W12;
00103       A12 = A12 - U11 * W12;
00104     */
00105 
00106     FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 
00107                        FLA_MINUS_ONE, A21, W12, FLA_ONE, A22 );
00108     FLA_Trmm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR, 
00109                        FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
00110                        FLA_MINUS_ONE, A11, W12 );
00111     FLA_Axpy_external( FLA_ONE, W12, A12 );
00112 
00113     /*
00114       Update T
00115     
00116       T01 = A10' * U11 + A20' * U21;
00117     
00118       Recall: U11 = trilu( A11 );
00119               U21 = A21;
00120     */
00121 
00122     FLA_Copyt_external( FLA_CONJ_TRANSPOSE, A10, T01 );
00123     FLA_Trmm_external( FLA_RIGHT, FLA_LOWER_TRIANGULAR,
00124                        FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
00125                        FLA_ONE, A11, T01 );
00126     FLA_Gemm_external( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE, 
00127                        FLA_ONE, A20, A21, FLA_ONE, T01 );
00128 
00129     /*------------------------------------------------------------*/
00130 
00131     FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00, A01, /**/ A02,
00132                                                      A10, A11, /**/ A12,
00133                             /* ************** */  /* ****************** */
00134                               &ABL, /**/ &ABR,       A20, A21, /**/ A22,
00135                               FLA_TL );
00136 
00137     FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR,       T00, T01, /**/ T02,
00138                                                      T10, T11, /**/ W12,
00139                             /* ************** */  /* ****************** */
00140                               &TBL, /**/ &TBR,       T20, T21, /**/ T22,
00141                               FLA_TL );
00142 
00143   }
00144 
00145   return FLA_SUCCESS;
00146 }

FLA_Error FLA_QR_UT_Accum_T_opt_var1 ( FLA_Obj  A,
FLA_Obj  T 
)

References FLA_Obj_datatype(), FLA_QR_UT_Accum_T_opt_var1_dcomplex(), FLA_QR_UT_Accum_T_opt_var1_double(), FLA_QR_UT_Accum_T_opt_var1_float(), and FLA_QR_UT_Accum_T_opt_var1_scomplex().

Referenced by FLA_QR_UT_internal().

00036 {
00037   FLA_Datatype datatype;
00038 
00039   datatype = FLA_Obj_datatype( A );
00040 
00041   switch ( datatype )
00042   {
00043     case FLA_FLOAT:
00044     FLA_QR_UT_Accum_T_opt_var1_float( A, T );
00045     break;
00046 
00047     case FLA_DOUBLE:
00048     FLA_QR_UT_Accum_T_opt_var1_double( A, T );
00049     break;
00050 
00051     case FLA_COMPLEX:
00052     FLA_QR_UT_Accum_T_opt_var1_scomplex( A, T );
00053     break;
00054 
00055     case FLA_DOUBLE_COMPLEX:
00056     FLA_QR_UT_Accum_T_opt_var1_dcomplex( A, T );
00057     break;
00058   }
00059 
00060   return FLA_SUCCESS;
00061 }

FLA_Error FLA_QR_UT_Accum_T_opt_var1_dcomplex ( FLA_Obj  A,
FLA_Obj  T 
)

References dscal(), FLA_Apply_househ2_UT_opt_dcomplex(), FLA_Househ2_UT_opt_dcomplex(), FLA_MINUS_ONE, FLA_Obj_ldim(), FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, dcomplex::real, zcopy(), and zgemv().

Referenced by FLA_QR_UT_Accum_T_opt_var1().

00297 {
00298   dcomplex* buff_A  = FLA_DOUBLE_COMPLEX_PTR( A );
00299   dcomplex* buff_T  = FLA_DOUBLE_COMPLEX_PTR( T );
00300   dcomplex* buff_1  = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
00301   dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
00302 
00303   int       m_A     = FLA_Obj_length( A );
00304   int       n_AT    = FLA_Obj_width( A );
00305   int       ldim_A  = FLA_Obj_ldim( A );
00306   int       ldim_T  = FLA_Obj_ldim( T );
00307 
00308   char      trans   = 'C';
00309   int       inc_one = 1;
00310   int       inc_two = 2;
00311   int       i;
00312 
00313   for ( i = 0; i < n_AT; ++i )
00314   {
00315     dcomplex* a10t      = buff_A + (0  )*ldim_A + i;
00316     dcomplex* A20       = buff_A + (0  )*ldim_A + i + 1;
00317     dcomplex* alpha11   = buff_A + (i  )*ldim_A + i;
00318     dcomplex* a21       = buff_A + (i  )*ldim_A + i + 1;
00319     dcomplex* a12t      = buff_A + (i+1)*ldim_A + i;
00320     dcomplex* A22       = buff_A + (i+1)*ldim_A + i + 1;
00321 
00322     dcomplex* tau11     = buff_T + (i  )*ldim_T + i;
00323     dcomplex* t01       = buff_T + (i  )*ldim_T;
00324 
00325     int       m_ahead   = m_A  - i - 1;
00326     int       n_ahead   = n_AT - i - 1;
00327     int       n_behind  = i;
00328 
00329     /*------------------------------------------------------------*/
00330 
00331     // FLA_Househ2_UT( alpha11,
00332     //                 a21, tau11 );
00333     FLA_Househ2_UT_opt_dcomplex( m_ahead,
00334                                  alpha11,
00335                                  a21, inc_one,
00336                                  tau11 );
00337 
00338     if ( i > 0 )
00339     {
00340       // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
00341       FLA_C2F( zcopy )( &n_behind,
00342                         a10t, &ldim_A,
00343                         t01,  &inc_one );
00344       FLA_C2F( dscal )( &n_behind,
00345                         &(buff_m1->real),
00346                         (( double* ) t01 ) + 1, &inc_two );
00347 
00348       // // t01 = a10t' + A20' * a21;
00349       // FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
00350       FLA_C2F( zgemv )( &trans,
00351                         &m_ahead, &n_behind,
00352                         buff_1,
00353                         A20, &ldim_A,
00354                         a21, &inc_one,
00355                         buff_1,
00356                         t01, &inc_one );
00357     }
00358 
00359     if ( i < n_AT - 1 )
00360     {
00361       // FLA_Apply_househ2_UT( tau11, a21, a12t,
00362       //                                   A22 );
00363       FLA_Apply_househ2_UT_opt_dcomplex( m_ahead, n_ahead,
00364                                          tau11,
00365                                          a21, inc_one,
00366                                          a12t, ldim_A,
00367                                          A22, ldim_A );
00368     }
00369 
00370     /*------------------------------------------------------------*/
00371 
00372   }
00373 
00374   return FLA_SUCCESS;
00375 }

FLA_Error FLA_QR_UT_Accum_T_opt_var1_double ( FLA_Obj  A,
FLA_Obj  T 
)

References dcopy(), dgemv(), FLA_Apply_househ2_UT_opt_double(), FLA_Househ2_UT_opt_double(), FLA_Obj_ldim(), FLA_Obj_length(), FLA_Obj_width(), and FLA_ONE.

Referenced by FLA_QR_UT_Accum_T_opt_var1().

00140 {
00141   double* buff_A  = FLA_DOUBLE_PTR( A );
00142   double* buff_T  = FLA_DOUBLE_PTR( T );
00143   double* buff_1  = FLA_DOUBLE_PTR( FLA_ONE );
00144 
00145   int     m_A     = FLA_Obj_length( A );
00146   int     n_AT    = FLA_Obj_width( A );
00147   int     ldim_A  = FLA_Obj_ldim( A );
00148   int     ldim_T  = FLA_Obj_ldim( T );
00149 
00150   char    trans   = 'T';
00151   int     inc_one = 1;
00152   int     i;
00153 
00154   for ( i = 0; i < n_AT; ++i )
00155   {
00156     double* a10t      = buff_A + (0  )*ldim_A + i;
00157     double* A20       = buff_A + (0  )*ldim_A + i + 1;
00158     double* alpha11   = buff_A + (i  )*ldim_A + i;
00159     double* a21       = buff_A + (i  )*ldim_A + i + 1;
00160     double* a12t      = buff_A + (i+1)*ldim_A + i;
00161     double* A22       = buff_A + (i+1)*ldim_A + i + 1;
00162 
00163     double* tau11     = buff_T + (i  )*ldim_T + i;
00164     double* t01       = buff_T + (i  )*ldim_T;
00165 
00166     int     m_ahead   = m_A  - i - 1;
00167     int     n_ahead   = n_AT - i - 1;
00168     int     n_behind  = i;
00169 
00170     /*------------------------------------------------------------*/
00171 
00172     // FLA_Househ2_UT( alpha11,
00173     //                 a21, tau11 );
00174     FLA_Househ2_UT_opt_double( m_ahead,
00175                                alpha11,
00176                                a21, inc_one,
00177                                tau11 );
00178 
00179     if ( i > 0 )
00180     {
00181       // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
00182       FLA_C2F( dcopy )( &n_behind,
00183                         a10t, &ldim_A,
00184                         t01,  &inc_one );
00185 
00186       // // t01 = a10t' + A20' * a21;
00187       // FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
00188       FLA_C2F( dgemv )( &trans,
00189                         &m_ahead, &n_behind,
00190                         buff_1,
00191                         A20, &ldim_A,
00192                         a21, &inc_one,
00193                         buff_1,
00194                         t01, &inc_one );
00195     }
00196 
00197     if ( i < n_AT - 1 )
00198     {
00199       // FLA_Apply_househ2_UT( tau11, a21, a12t,
00200       //                                   A22 );
00201       FLA_Apply_househ2_UT_opt_double( m_ahead, n_ahead,
00202                                        tau11,
00203                                        a21, inc_one,
00204                                        a12t, ldim_A,
00205                                        A22, ldim_A );
00206     }
00207 
00208     /*------------------------------------------------------------*/
00209 
00210   }
00211 
00212   return FLA_SUCCESS;
00213 }

FLA_Error FLA_QR_UT_Accum_T_opt_var1_float ( FLA_Obj  A,
FLA_Obj  T 
)

References FLA_Apply_househ2_UT_opt_float(), FLA_Househ2_UT_opt_float(), FLA_Obj_ldim(), FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, scopy(), and sgemv().

Referenced by FLA_QR_UT_Accum_T_opt_var1().

00064 {
00065   float* buff_A  = FLA_FLOAT_PTR( A );
00066   float* buff_T  = FLA_FLOAT_PTR( T );
00067   float* buff_1  = FLA_FLOAT_PTR( FLA_ONE );
00068 
00069   int    m_A     = FLA_Obj_length( A );
00070   int    n_AT    = FLA_Obj_width( A );
00071   int    ldim_A  = FLA_Obj_ldim( A );
00072   int    ldim_T  = FLA_Obj_ldim( T );
00073 
00074   char   trans   = 'T';
00075   int    inc_one = 1;
00076   int    i;
00077 
00078   for ( i = 0; i < n_AT; ++i )
00079   {
00080     float* a10t      = buff_A + (0  )*ldim_A + i;
00081     float* A20       = buff_A + (0  )*ldim_A + i + 1;
00082     float* alpha11   = buff_A + (i  )*ldim_A + i;
00083     float* a21       = buff_A + (i  )*ldim_A + i + 1;
00084     float* a12t      = buff_A + (i+1)*ldim_A + i;
00085     float* A22       = buff_A + (i+1)*ldim_A + i + 1;
00086 
00087     float* tau11     = buff_T + (i  )*ldim_T + i;
00088     float* t01       = buff_T + (i  )*ldim_T;
00089 
00090     int    m_ahead  = m_A  - i - 1;
00091     int    n_ahead  = n_AT - i - 1;
00092     int    n_behind = i;
00093 
00094     /*------------------------------------------------------------*/
00095 
00096     // FLA_Househ2_UT( alpha11,
00097     //                 a21, tau11 );
00098     FLA_Househ2_UT_opt_float( m_ahead,
00099                               alpha11,
00100                               a21, inc_one,
00101                               tau11 );
00102 
00103     if ( i > 0 )
00104     {
00105       // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
00106       FLA_C2F( scopy )( &n_behind,
00107                         a10t, &ldim_A,
00108                         t01,  &inc_one );
00109 
00110       // // t01 = a10t' + A20' * a21;
00111       // FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
00112       FLA_C2F( sgemv )( &trans,
00113                         &m_ahead, &n_behind,
00114                         buff_1,
00115                         A20, &ldim_A,
00116                         a21, &inc_one,
00117                         buff_1,
00118                         t01, &inc_one );
00119     }
00120 
00121     if ( i < n_AT - 1 )
00122     {
00123       // FLA_Apply_househ2_UT( tau11, a21, a12t,
00124       //                                   A22 );
00125       FLA_Apply_househ2_UT_opt_float( m_ahead, n_ahead,
00126                                       tau11,
00127                                       a21, inc_one,
00128                                       a12t, ldim_A,
00129                                       A22, ldim_A );
00130     }
00131 
00132     /*------------------------------------------------------------*/
00133 
00134   }
00135 
00136   return FLA_SUCCESS;
00137 }

FLA_Error FLA_QR_UT_Accum_T_opt_var1_scomplex ( FLA_Obj  A,
FLA_Obj  T 
)

References ccopy(), cgemv(), FLA_Apply_househ2_UT_opt_scomplex(), FLA_Househ2_UT_opt_scomplex(), FLA_MINUS_ONE, FLA_Obj_ldim(), FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, scomplex::real, and sscal().

Referenced by FLA_QR_UT_Accum_T_opt_var1().

00216 {
00217   scomplex* buff_A  = FLA_COMPLEX_PTR( A );
00218   scomplex* buff_T  = FLA_COMPLEX_PTR( T );
00219   scomplex* buff_1  = FLA_COMPLEX_PTR( FLA_ONE );
00220   scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
00221 
00222   int       m_A     = FLA_Obj_length( A );
00223   int       n_AT    = FLA_Obj_width( A );
00224   int       ldim_A  = FLA_Obj_ldim( A );
00225   int       ldim_T  = FLA_Obj_ldim( T );
00226 
00227   char      trans   = 'C';
00228   int       inc_one = 1;
00229   int       inc_two = 2;
00230   int       i;
00231 
00232   for ( i = 0; i < n_AT; ++i )
00233   {
00234     scomplex* a10t      = buff_A + (0  )*ldim_A + i;
00235     scomplex* A20       = buff_A + (0  )*ldim_A + i + 1;
00236     scomplex* alpha11   = buff_A + (i  )*ldim_A + i;
00237     scomplex* a21       = buff_A + (i  )*ldim_A + i + 1;
00238     scomplex* a12t      = buff_A + (i+1)*ldim_A + i;
00239     scomplex* A22       = buff_A + (i+1)*ldim_A + i + 1;
00240 
00241     scomplex* tau11     = buff_T + (i  )*ldim_T + i;
00242     scomplex* t01       = buff_T + (i  )*ldim_T;
00243 
00244     int       m_ahead   = m_A  - i - 1;
00245     int       n_ahead   = n_AT - i - 1;
00246     int       n_behind  = i;
00247 
00248     /*------------------------------------------------------------*/
00249 
00250     // FLA_Househ2_UT( alpha11,
00251     //                 a21, tau11 );
00252     FLA_Househ2_UT_opt_scomplex( m_ahead,
00253                                  alpha11,
00254                                  a21, inc_one,
00255                                  tau11 );
00256 
00257     if ( i > 0 )
00258     {
00259       // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
00260       FLA_C2F( ccopy )( &n_behind,
00261                         a10t, &ldim_A,
00262                         t01,  &inc_one );
00263       FLA_C2F( sscal )( &n_behind,
00264                         &(buff_m1->real),
00265                         (( float* ) t01 ) + 1, &inc_two );
00266 
00267       // // t01 = a10t' + A20' * a21;
00268       // FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
00269       FLA_C2F( cgemv )( &trans,
00270                         &m_ahead, &n_behind,
00271                         buff_1,
00272                         A20, &ldim_A,
00273                         a21, &inc_one,
00274                         buff_1,
00275                         t01, &inc_one );
00276     }
00277 
00278     if ( i < n_AT - 1 )
00279     {
00280       // FLA_Apply_househ2_UT( tau11, a21, a12t,
00281       //                                   A22 );
00282       FLA_Apply_househ2_UT_opt_scomplex( m_ahead, n_ahead,
00283                                          tau11,
00284                                          a21, inc_one,
00285                                          a12t, ldim_A,
00286                                          A22, ldim_A );
00287     }
00288 
00289     /*------------------------------------------------------------*/
00290 
00291   }
00292 
00293   return FLA_SUCCESS;
00294 }

FLA_Error FLA_QR_UT_Accum_T_unb_var1 ( FLA_Obj  A,
FLA_Obj  T 
)

References FLA_Apply_househ2_UT(), FLA_Cont_with_3x3_to_2x2(), FLA_Copyt_external(), FLA_Gemv_external(), FLA_Househ2_UT(), FLA_Obj_min_dim(), FLA_ONE, FLA_Part_2x2(), and FLA_Repart_2x2_to_3x3().

Referenced by FLA_QR_UT_internal().

00036 {
00037   FLA_Obj ATL,   ATR,      A00,  a01,     A02, 
00038           ABL,   ABR,      a10t, alpha11, a12t,
00039                            A20,  a21,     A22;
00040 
00041   FLA_Obj TTL,   TTR,      T00,  t01,   T02, 
00042           TBL,   TBR,      t10t, tau11, t12t,
00043                            T20,  t21,   T22;
00044 
00045 
00046   FLA_Part_2x2( A,    &ATL, &ATR,
00047                       &ABL, &ABR,     0, 0, FLA_TL );
00048 
00049   FLA_Part_2x2( T,    &TTL, &TTR,
00050                       &TBL, &TBR,     0, 0, FLA_TL );
00051 
00052   while ( FLA_Obj_min_dim( ABR ) > 0 ){
00053 
00054     FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00,  /**/ &a01,     &A02,
00055                         /* ************* */   /* ************************** */
00056                                                 &a10t, /**/ &alpha11, &a12t,
00057                            ABL, /**/ ABR,       &A20,  /**/ &a21,     &A22,
00058                            1, 1, FLA_BR );
00059 
00060     FLA_Repart_2x2_to_3x3( TTL, /**/ TTR,       &T00,  /**/ &t01,   &T02,
00061                         /* ************* */   /* ************************ */
00062                                                 &t10t, /**/ &tau11, &t12t,
00063                            TBL, /**/ TBR,       &T20,  /**/ &t21,   &T22,
00064                            1, 1, FLA_BR );
00065 
00066     /*------------------------------------------------------------*/
00067 
00068     // [ alpha11, ...
00069     //   a21, tau11 ] = FLA_Househ2_UT( alpha11, ...
00070     //                                  a21, tau11 );
00071     FLA_Househ2_UT( alpha11,
00072                     a21, tau11 );
00073 
00074     // t01 = a10t' + A20' * a21;
00075     FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
00076     FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
00077 
00078     // [ a12t, ...
00079     //   A22 ] = FLA_Apply_househ2_UT( tau11, a21, a12t, ...
00080     //                                             A22 );
00081     FLA_Apply_househ2_UT( tau11, a21, a12t,
00082                                       A22 );
00083 
00084     /*------------------------------------------------------------*/
00085 
00086     FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00,  a01,     /**/ A02,
00087                                                      a10t, alpha11, /**/ a12t,
00088                             /* ************** */  /* ************************ */
00089                               &ABL, /**/ &ABR,       A20,  a21,     /**/ A22,
00090                               FLA_TL );
00091 
00092     FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR,       T00,  t01,   /**/ T02,
00093                                                      t10t, tau11, /**/ t12t,
00094                             /* ************** */  /* ********************** */
00095                               &TBL, /**/ &TBR,       T20,  t21,   /**/ T22,
00096                               FLA_TL );
00097 
00098   }
00099 
00100   return FLA_SUCCESS;
00101 }

FLA_Error FLA_QR_UT_blk_var2 ( FLA_Obj  A,
FLA_Obj  T,
fla_qrut_t cntl 
)

References FLA_Axpy_external(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy_external(), FLA_Determine_blocksize(), FLA_Gemm_external(), FLA_Merge_2x1(), FLA_MINUS_ONE, FLA_Obj_min_dim(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_QR_UT_internal(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x2_to_3x3(), FLA_Trmm_external(), and FLA_Trsm_external().

Referenced by FLA_QR_UT_internal().

00036 {
00037   FLA_Obj ATL,   ATR,      A00, A01, A02, 
00038           ABL,   ABR,      A10, A11, A12,
00039                            A20, A21, A22;
00040 
00041   FLA_Obj TL,    TR,       T0,  T1,  W12;
00042 
00043   FLA_Obj W12TL,   W12TR, 
00044           W12BL,   W12BR;
00045 
00046   FLA_Obj T1T,   T2B,      AB1;
00047 
00048   dim_t b;
00049 
00050   FLA_Part_2x2( A,    &ATL, &ATR,
00051                       &ABL, &ABR,     0, 0, FLA_TL );
00052 
00053   FLA_Part_1x2( T,    &TL,  &TR,      0, FLA_LEFT );
00054 
00055   while ( FLA_Obj_min_dim( ABR ) > 0 ){
00056 
00057     b = FLA_Determine_blocksize( ABR, FLA_BR, FLA_Cntl_blocksize( cntl ) );
00058 
00059     FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00, /**/ &A01, &A02,
00060                         /* ************* */   /* ******************** */
00061                                                 &A10, /**/ &A11, &A12,
00062                            ABL, /**/ ABR,       &A20, /**/ &A21, &A22,
00063                            b, b, FLA_BR );
00064 
00065     FLA_Repart_1x2_to_1x3( TL,  /**/ TR,        &T0, /**/ &T1, &W12,
00066                            b, FLA_RIGHT );
00067 
00068     /*------------------------------------------------------------*/
00069 
00070     /*
00071       T1T = FLA_Top_part( T1, b );
00072     */
00073 
00074     FLA_Part_2x1( T1,   &T1T, 
00075                         &T2B, b, FLA_TOP );
00076 
00077     /*
00078       [ U1, T1 ] = FLA_QR_UT_internal( [ A11
00079                                          A21 ], t1, T1T );
00080     */
00081 
00082     FLA_Merge_2x1( A11,
00083                    A21,   &AB1 );
00084 
00085     FLA_QR_UT_internal( AB1, T1T, 
00086                         FLA_Cntl_sub_qrut( cntl ) );
00087 
00088 
00089     if ( FLA_Obj_width( A12 ) > 0 )
00090     {
00091       /*
00092         W12T  = FLA_Top_part( W12, b );
00093         W12TL = FLA_Left_part( W12T, FLA_Obj_width( A12 ) );
00094       */
00095 
00096       FLA_Part_2x2( W12,    &W12TL, &W12TR,
00097                             &W12BL, &W12BR,     b, FLA_Obj_width( A12 ), FLA_TL );
00098 
00099       /*
00100         U11 = trilu( A11 );
00101         U21 = A21;
00102       
00103         W12TL = triu( inv(T1T) )' * ( U11' * A12 + U21' * A22 );
00104       */
00105 
00106       FLA_Copy_external( A12, W12TL );
00107       FLA_Trmm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR, 
00108                          FLA_CONJ_TRANSPOSE, FLA_UNIT_DIAG,
00109                          FLA_ONE, A11, W12TL );
00110       FLA_Gemm_external( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
00111                          FLA_ONE, A21, A22, FLA_ONE, W12TL );
00112       FLA_Trsm_external( FLA_LEFT, FLA_UPPER_TRIANGULAR, 
00113                          FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG, 
00114                          FLA_ONE, T1T, W12TL );
00115 
00116       /*
00117         A22 = A22 - U21 * W12TL;
00118         A12 = A12 - U11 * W12TL;
00119       */
00120 
00121       FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 
00122                          FLA_MINUS_ONE, A21, W12TL, FLA_ONE, A22 );
00123       FLA_Trmm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR, 
00124                          FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
00125                          FLA_MINUS_ONE, A11, W12TL );
00126       FLA_Axpy_external( FLA_ONE, W12TL, A12 );
00127     }
00128 
00129     /*------------------------------------------------------------*/
00130 
00131     FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00, A01, /**/ A02,
00132                                                      A10, A11, /**/ A12,
00133                             /* ************** */  /* ****************** */
00134                               &ABL, /**/ &ABR,       A20, A21, /**/ A22,
00135                               FLA_TL );
00136 
00137     FLA_Cont_with_1x3_to_1x2( &TL,  /**/ &TR,        T0, T1, /**/ W12,
00138                               FLA_LEFT );
00139 
00140   }
00141 
00142   return FLA_SUCCESS;
00143 }


Generated on Mon Jul 6 05:45:56 2009 for libflame by  doxygen 1.5.9