Functions | |
FLA_Error | FLA_QR_UT_inc_blk_var2 (FLA_Obj A, FLA_Obj TW, FLA_Obj U, fla_qrutinc_t *cntl) |
FLA_Error FLA_QR_UT_inc_blk_var2 | ( | FLA_Obj | A, | |
FLA_Obj | TW, | |||
FLA_Obj | U, | |||
fla_qrutinc_t * | cntl | |||
) |
References FLA_Apply_Q_UT_internal(), FLA_Apply_Q_UT_UD_internal(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Obj_min_dim(), FLA_Part_1x2(), FLA_Part_2x2(), FLA_QR_UT_copy_internal(), FLA_QR_UT_UD_internal(), FLA_Repart_1x2_to_1x3(), and FLA_Repart_2x2_to_3x3().
Referenced by FLASH_QR_UT_inc_opt1().
00036 { 00037 FLA_Obj ATL, ATR, A00, A01, A02, 00038 ABL, ABR, A10, A11, A12, 00039 A20, A21, A22; 00040 00041 FLA_Obj TTL, WTR, T00, W01, W02, 00042 TBL, TBR, T10, T11, W12, 00043 T20, T21, T22; 00044 00045 FLA_Obj UL, UR, U0, U11, U2; 00046 00047 dim_t b; 00048 00049 FLA_Part_2x2( A, &ATL, &ATR, 00050 &ABL, &ABR, 0, 0, FLA_TL ); 00051 00052 FLA_Part_2x2( TW, &TTL, &WTR, 00053 &TBL, &TBR, 0, 0, FLA_TL ); 00054 00055 FLA_Part_1x2( U, &UL, &UR, 0, FLA_LEFT ); 00056 00057 while ( FLA_Obj_min_dim( ABR ) > 0 ){ 00058 00059 b = FLA_Determine_blocksize( ABR, FLA_BR, FLA_Cntl_blocksize( cntl ) ); 00060 00061 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02, 00062 /* ************* */ /* ******************** */ 00063 &A10, /**/ &A11, &A12, 00064 ABL, /**/ ABR, &A20, /**/ &A21, &A22, 00065 b, b, FLA_BR ); 00066 00067 FLA_Repart_2x2_to_3x3( TTL, /**/ WTR, &T00, /**/ &W01, &W02, 00068 /* ************* */ /* ******************** */ 00069 &T10, /**/ &T11, &W12, 00070 TBL, /**/ TBR, &T20, /**/ &T21, &T22, 00071 b, b, FLA_BR ); 00072 00073 FLA_Repart_1x2_to_1x3( UL, /**/ UR, &U0, /**/ &U11, &U2, 00074 b, FLA_RIGHT ); 00075 00076 /*------------------------------------------------------------*/ 00077 00078 /* 00079 Use U11 to hold a copy of A11 to avoid a false 00080 write-after-read dependency so that FLA_QR_UT_UD() may proceed 00081 while FLA_Apply_Q_UT() executes. 00082 */ 00083 00084 00085 /* 00086 Perform a QR factorization (via UT transform) on A11: 00087 00088 [ A11, T11 ] = QR_UT( A11, T11 ); 00089 00090 where T11 refers to a single storage block that refers to an 00091 nb_alg-by-b row-panel of upper triangular block Householder 00092 transforms. Here, b is the storage blocksize while nb_alg is 00093 the algorithmic blocksize used by the QR factorization. 00094 Typically nb_alg << b. 00095 00096 After the factorization is complete, A11 is copied into U11. 00097 00098 */ 00099 00100 FLA_QR_UT_copy_internal( A11, T11, U11, 00101 FLA_Cntl_sub_qrut( cntl ) ); 00102 00103 00104 /* 00105 Apply Q^H to A12 from the left: 00106 00107 A12 = Q^H * A12 00108 00109 where Q is formed from A11 and T11. Note that W12 refers 00110 to a row-panel of blocks where each block refers to an 00111 nb_alg-by-b row-panel of workspace. 00112 */ 00113 00114 FLA_Apply_Q_UT_internal( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_COLUMNWISE, 00115 U11, T11, W12, A12, 00116 FLA_Cntl_sub_apqut( cntl ) ); 00117 00118 00119 /* 00120 Update QR factorization of A11 with each block of A21, storing 00121 block Householder transforms into corresponding blocks of T21. 00122 00123 [ A11, ... 00124 A21, T21 ] = QR_UT_UD( A11, ... 00125 A21, T21 ); 00126 */ 00127 00128 FLA_QR_UT_UD_internal( A11, 00129 A21, T21, 00130 FLA_Cntl_sub_qrutud( cntl ) ); 00131 00132 00133 /* 00134 Apply Q^H to A12 and A22 from the left: 00135 00136 / A12 \ = Q^H * / A12 \ 00137 \ A22 / \ A22 / 00138 00139 where Q is formed from A21 and T21. 00140 */ 00141 00142 FLA_Apply_Q_UT_UD_internal( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_COLUMNWISE, 00143 A21, T21, W12, A12, 00144 A22, 00145 FLA_Cntl_sub_apqutud( cntl ) ); 00146 00147 /*------------------------------------------------------------*/ 00148 00149 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02, 00150 A10, A11, /**/ A12, 00151 /* ************** */ /* ****************** */ 00152 &ABL, /**/ &ABR, A20, A21, /**/ A22, 00153 FLA_TL ); 00154 00155 FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &WTR, T00, W01, /**/ W02, 00156 T10, T11, /**/ W12, 00157 /* ************** */ /* ****************** */ 00158 &TBL, /**/ &TBR, T20, T21, /**/ T22, 00159 FLA_TL ); 00160 00161 FLA_Cont_with_1x3_to_1x2( &UL, /**/ &UR, U0, U11, /**/ U2, 00162 FLA_LEFT ); 00163 00164 } 00165 00166 return FLA_SUCCESS; 00167 }