FLA_QR_UT_inc_blk_var2.c File Reference

(r)


Functions

FLA_Error FLA_QR_UT_inc_blk_var2 (FLA_Obj A, FLA_Obj TW, FLA_Obj U, fla_qrutinc_t *cntl)

Function Documentation

FLA_Error FLA_QR_UT_inc_blk_var2 ( FLA_Obj  A,
FLA_Obj  TW,
FLA_Obj  U,
fla_qrutinc_t cntl 
)

References FLA_Apply_Q_UT_internal(), FLA_Apply_Q_UT_UD_internal(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Obj_min_dim(), FLA_Part_1x2(), FLA_Part_2x2(), FLA_QR_UT_copy_internal(), FLA_QR_UT_UD_internal(), FLA_Repart_1x2_to_1x3(), and FLA_Repart_2x2_to_3x3().

Referenced by FLASH_QR_UT_inc_opt1().

00036 {
00037   FLA_Obj ATL,   ATR,      A00, A01, A02, 
00038           ABL,   ABR,      A10, A11, A12,
00039                            A20, A21, A22;
00040 
00041   FLA_Obj TTL,   WTR,      T00, W01, W02, 
00042           TBL,   TBR,      T10, T11, W12,
00043                            T20, T21, T22;
00044 
00045   FLA_Obj UL,    UR,       U0,  U11,  U2;
00046 
00047   dim_t b;
00048 
00049   FLA_Part_2x2( A,    &ATL, &ATR,
00050                       &ABL, &ABR,     0, 0, FLA_TL );
00051 
00052   FLA_Part_2x2( TW,   &TTL, &WTR,
00053                       &TBL, &TBR,     0, 0, FLA_TL );
00054 
00055   FLA_Part_1x2( U,    &UL,  &UR,      0, FLA_LEFT );
00056 
00057   while ( FLA_Obj_min_dim( ABR ) > 0 ){
00058 
00059     b = FLA_Determine_blocksize( ABR, FLA_BR, FLA_Cntl_blocksize( cntl ) );
00060 
00061     FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00, /**/ &A01, &A02,
00062                         /* ************* */   /* ******************** */
00063                                                 &A10, /**/ &A11, &A12,
00064                            ABL, /**/ ABR,       &A20, /**/ &A21, &A22,
00065                            b, b, FLA_BR );
00066 
00067     FLA_Repart_2x2_to_3x3( TTL, /**/ WTR,       &T00, /**/ &W01, &W02,
00068                         /* ************* */   /* ******************** */
00069                                                 &T10, /**/ &T11, &W12,
00070                            TBL, /**/ TBR,       &T20, /**/ &T21, &T22,
00071                            b, b, FLA_BR );
00072 
00073     FLA_Repart_1x2_to_1x3( UL,  /**/ UR,        &U0, /**/ &U11, &U2,
00074                            b, FLA_RIGHT );
00075 
00076     /*------------------------------------------------------------*/
00077 
00078     /*
00079        Use U11 to hold a copy of A11 to avoid a false
00080        write-after-read dependency so that FLA_QR_UT_UD() may proceed
00081        while FLA_Apply_Q_UT() executes.
00082     */
00083 
00084 
00085     /*
00086        Perform a QR factorization (via UT transform) on A11:
00087      
00088          [ A11, T11 ] = QR_UT( A11, T11 );
00089 
00090        where T11 refers to a single storage block that refers to an
00091        nb_alg-by-b row-panel of upper triangular block Householder
00092        transforms. Here, b is the storage blocksize while nb_alg is
00093        the algorithmic blocksize used by the QR factorization.
00094        Typically nb_alg << b.
00095        
00096        After the factorization is complete, A11 is copied into U11.
00097      
00098     */
00099 
00100     FLA_QR_UT_copy_internal( A11, T11, U11,
00101                              FLA_Cntl_sub_qrut( cntl ) );
00102 
00103 
00104     /*
00105        Apply Q^H to A12 from the left:
00106      
00107          A12 = Q^H * A12
00108      
00109        where Q is formed from A11 and T11. Note that W12 refers
00110        to a row-panel of blocks where each block refers to an
00111        nb_alg-by-b row-panel of workspace.
00112     */
00113 
00114     FLA_Apply_Q_UT_internal( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_COLUMNWISE,
00115                              U11, T11, W12, A12,
00116                              FLA_Cntl_sub_apqut( cntl ) );
00117 
00118 
00119     /*
00120        Update QR factorization of A11 with each block of A21, storing
00121        block Householder transforms into corresponding blocks of T21.
00122      
00123          [ A11, ...
00124            A21, T21 ] = QR_UT_UD( A11, ...
00125                                   A21, T21 );
00126     */
00127 
00128     FLA_QR_UT_UD_internal( A11,
00129                            A21, T21, 
00130                            FLA_Cntl_sub_qrutud( cntl ) );
00131 
00132 
00133     /*
00134        Apply Q^H to A12 and A22 from the left:
00135      
00136            / A12 \ = Q^H * / A12 \
00137            \ A22 /         \ A22 / 
00138      
00139        where Q is formed from A21 and T21.
00140     */
00141 
00142     FLA_Apply_Q_UT_UD_internal( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_COLUMNWISE,
00143                                 A21, T21, W12, A12,
00144                                                A22,
00145                                 FLA_Cntl_sub_apqutud( cntl ) );
00146 
00147     /*------------------------------------------------------------*/
00148 
00149     FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00, A01, /**/ A02,
00150                                                      A10, A11, /**/ A12,
00151                             /* ************** */  /* ****************** */
00152                               &ABL, /**/ &ABR,       A20, A21, /**/ A22,
00153                               FLA_TL );
00154 
00155     FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &WTR,       T00, W01, /**/ W02,
00156                                                      T10, T11, /**/ W12,
00157                             /* ************** */  /* ****************** */
00158                               &TBL, /**/ &TBR,       T20, T21, /**/ T22,
00159                               FLA_TL );
00160 
00161     FLA_Cont_with_1x3_to_1x2( &UL,  /**/ &UR,        U0, U11, /**/ U2,
00162                               FLA_LEFT );
00163 
00164   }
00165 
00166   return FLA_SUCCESS;
00167 }


Generated on Mon Jul 6 05:45:56 2009 for libflame by  doxygen 1.5.9