Functions | |
FLA_Error | FLA_QR_UT_UD_blk_var1 (FLA_Obj U, FLA_Obj D, FLA_Obj T, fla_qrutud_t *cntl) |
FLA_Error FLA_QR_UT_UD_blk_var1 | ( | FLA_Obj | U, | |
FLA_Obj | D, | |||
FLA_Obj | T, | |||
fla_qrutud_t * | cntl | |||
) |
References FLA_Axpy_internal(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy_internal(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_min_dim(), FLA_Obj_set_to_scalar(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_QR_UT_UD_internal(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x2_to_3x3(), FLA_Trsm_internal(), and FLA_ZERO.
Referenced by FLA_QR_UT_UD_internal().
00037 { 00038 FLA_Obj UTL, UTR, U00, U01, U02, 00039 UBL, UBR, U10, U11, U12, 00040 U20, U21, U22; 00041 00042 FLA_Obj DL, DR, D0, D1, D2; 00043 00044 FLA_Obj TL, TR, T0, T1, W12; 00045 00046 FLA_Obj W12T, W12B; 00047 00048 FLA_Obj T1T, T2B; 00049 00050 dim_t b; 00051 00052 FLA_Part_2x2( U, &UTL, &UTR, 00053 &UBL, &UBR, 0, 0, FLA_TL ); 00054 00055 FLA_Part_1x2( D, &DL, &DR, 0, FLA_LEFT ); 00056 00057 FLA_Part_1x2( T, &TL, &TR, 0, FLA_LEFT ); 00058 00059 while ( FLA_Obj_min_dim( UBR ) > 0 ){ 00060 00061 b = FLA_Determine_blocksize( UBR, FLA_BR, FLA_Cntl_blocksize( cntl ) ); 00062 00063 FLA_Repart_2x2_to_3x3( UTL, /**/ UTR, &U00, /**/ &U01, &U02, 00064 /* ************* */ /* ******************** */ 00065 &U10, /**/ &U11, &U12, 00066 UBL, /**/ UBR, &U20, /**/ &U21, &U22, 00067 b, b, FLA_BR ); 00068 00069 FLA_Repart_1x2_to_1x3( DL, /**/ DR, &D0, /**/ &D1, &D2, 00070 b, FLA_RIGHT ); 00071 00072 FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, /**/ &T1, &W12, 00073 b, FLA_RIGHT ); 00074 00075 /*------------------------------------------------------------*/ 00076 00077 /* 00078 T1T = FLA_Top_part( T1, b ); 00079 */ 00080 00081 FLA_Part_2x1( T1, &T1T, 00082 &T2B, b, FLA_TOP ); 00083 00084 /* 00085 [ U11, ... 00086 D1, T1 ] = FLA_QR_UT_UD_internal( U11 00087 D1, T1T ); 00088 */ 00089 00090 FLA_QR_UT_UD_internal( U11, 00091 D1, T1T, 00092 FLA_Cntl_sub_qrutud( cntl ) ); 00093 00094 00095 if ( FLA_Obj_width( U12 ) > 0 ) 00096 { 00097 /* 00098 W12T = FLA_Top_part( W12, b ); 00099 */ 00100 00101 FLA_Part_2x1( W12, &W12T, 00102 &W12B, b, FLA_TOP ); 00103 00104 /* 00105 W12T = inv( triu( T1T ) )' * ( U12 + D1' * D2 ); 00106 */ 00107 00108 FLA_Copy_internal( U12, W12T, 00109 FLA_Cntl_sub_copy( cntl ) ); 00110 00111 FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE, 00112 FLA_ONE, D1, D2, FLA_ONE, W12T, 00113 FLA_Cntl_sub_gemm1( cntl ) ); 00114 00115 FLA_Trsm_internal( FLA_LEFT, FLA_UPPER_TRIANGULAR, 00116 FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG, 00117 FLA_ONE, T1T, W12T, 00118 FLA_Cntl_sub_trsm( cntl ) ); 00119 00120 /* 00121 U12 = U12 - W12T; 00122 D2 = D2 - D1 * W12T; 00123 */ 00124 00125 FLA_Axpy_internal( FLA_MINUS_ONE, W12T, U12, 00126 FLA_Cntl_sub_axpy( cntl ) ); 00127 00128 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00129 FLA_MINUS_ONE, D1, W12T, FLA_ONE, D2, 00130 FLA_Cntl_sub_gemm2( cntl ) ); 00131 00132 FLA_Obj_set_to_scalar( FLA_ZERO, W12T ); 00133 } 00134 00135 /*------------------------------------------------------------*/ 00136 00137 FLA_Cont_with_3x3_to_2x2( &UTL, /**/ &UTR, U00, U01, /**/ U02, 00138 U10, U11, /**/ U12, 00139 /* ************** */ /* ****************** */ 00140 &UBL, /**/ &UBR, U20, U21, /**/ U22, 00141 FLA_TL ); 00142 00143 FLA_Cont_with_1x3_to_1x2( &DL, /**/ &DR, D0, D1, /**/ D2, 00144 FLA_LEFT ); 00145 00146 FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, T1, /**/ W12, 00147 FLA_LEFT ); 00148 00149 } 00150 00151 return FLA_SUCCESS; 00152 }