Go to the source code of this file.
Functions | |
FLA_Error | FLA_Accum_T_UT_fc_unb_var1 (FLA_Obj A, FLA_Obj tau, FLA_Obj T) |
FLA_Error | FLA_Accum_T_UT_fc_opt_var1 (FLA_Obj A, FLA_Obj tau, FLA_Obj T) |
FLA_Error | FLA_Accum_T_UT_fc_opt_var1_float (FLA_Obj A, FLA_Obj tau, FLA_Obj T) |
FLA_Error | FLA_Accum_T_UT_fc_opt_var1_double (FLA_Obj A, FLA_Obj tau, FLA_Obj T) |
FLA_Error | FLA_Accum_T_UT_fc_opt_var1_scomplex (FLA_Obj A, FLA_Obj tau, FLA_Obj T) |
FLA_Error | FLA_Accum_T_UT_fc_opt_var1_dcomplex (FLA_Obj A, FLA_Obj tau, FLA_Obj T) |
References FLA_Accum_T_UT_fc_opt_var1_dcomplex(), FLA_Accum_T_UT_fc_opt_var1_double(), FLA_Accum_T_UT_fc_opt_var1_float(), FLA_Accum_T_UT_fc_opt_var1_scomplex(), and FLA_Obj_datatype().
Referenced by FLA_Accum_T_UT_internal().
00036 { 00037 FLA_Datatype datatype; 00038 00039 datatype = FLA_Obj_datatype( A ); 00040 00041 switch ( datatype ) 00042 { 00043 case FLA_FLOAT: 00044 FLA_Accum_T_UT_fc_opt_var1_float( A, tau, T ); 00045 break; 00046 00047 case FLA_DOUBLE: 00048 FLA_Accum_T_UT_fc_opt_var1_double( A, tau, T ); 00049 break; 00050 00051 case FLA_COMPLEX: 00052 FLA_Accum_T_UT_fc_opt_var1_scomplex( A, tau, T ); 00053 break; 00054 00055 case FLA_DOUBLE_COMPLEX: 00056 FLA_Accum_T_UT_fc_opt_var1_dcomplex( A, tau, T ); 00057 break; 00058 } 00059 00060 return FLA_SUCCESS; 00061 }
References dscal(), FLA_MINUS_ONE, FLA_Obj_ldim(), FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, dcomplex::real, zcopy(), and zgemv().
Referenced by FLA_Accum_T_UT_fc_opt_var1().
00249 { 00250 dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A ); 00251 dcomplex* buff_tau = FLA_DOUBLE_COMPLEX_PTR( tau ); 00252 dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T ); 00253 dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE ); 00254 dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE ); 00255 00256 int m_A = FLA_Obj_length( A ); 00257 int n_AT = FLA_Obj_width( A ); 00258 int ldim_A = FLA_Obj_ldim( A ); 00259 int ldim_T = FLA_Obj_ldim( T ); 00260 00261 char trans = 'C'; 00262 int inc_one = 1; 00263 int inc_two = 2; 00264 int i; 00265 00266 for ( i = 0; i < n_AT; ++i ) 00267 { 00268 dcomplex* a10t = buff_A + (0 )*ldim_A + i; 00269 dcomplex* A20 = buff_A + (0 )*ldim_A + i + 1; 00270 dcomplex* a21 = buff_A + (i )*ldim_A + i + 1; 00271 00272 dcomplex* tau1 = buff_tau + i; 00273 00274 dcomplex* tau11 = buff_T + (i )*ldim_T + i; 00275 dcomplex* t01 = buff_T + (i )*ldim_T; 00276 00277 int m_ahead = m_A - i - 1; 00278 int n_behind = i; 00279 00280 /*------------------------------------------------------------*/ 00281 00282 // FLA_Copy_external( tau1, tau11 ); 00283 *tau11 = *tau1; 00284 00285 if ( i > 0 ) 00286 { 00287 // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 ); 00288 FLA_C2F( zcopy )( &n_behind, 00289 a10t, &ldim_A, 00290 t01, &inc_one ); 00291 FLA_C2F( dscal )( &n_behind, 00292 &(buff_m1->real), 00293 (( double* ) t01 ) + 1, &inc_two ); 00294 00295 // // t01 = a10t' + A20' * a21; 00296 // FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 ); 00297 FLA_C2F( zgemv )( &trans, 00298 &m_ahead, &n_behind, 00299 buff_1, 00300 A20, &ldim_A, 00301 a21, &inc_one, 00302 buff_1, 00303 t01, &inc_one ); 00304 } 00305 00306 /*------------------------------------------------------------*/ 00307 00308 } 00309 00310 return FLA_SUCCESS; 00311 }
References dcopy(), dgemv(), FLA_Obj_ldim(), FLA_Obj_length(), FLA_Obj_width(), and FLA_ONE.
Referenced by FLA_Accum_T_UT_fc_opt_var1().
00124 { 00125 double* buff_A = FLA_DOUBLE_PTR( A ); 00126 double* buff_tau = FLA_DOUBLE_PTR( tau ); 00127 double* buff_T = FLA_DOUBLE_PTR( T ); 00128 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE ); 00129 00130 int m_A = FLA_Obj_length( A ); 00131 int n_AT = FLA_Obj_width( A ); 00132 int ldim_A = FLA_Obj_ldim( A ); 00133 int ldim_T = FLA_Obj_ldim( T ); 00134 00135 char trans = 'T'; 00136 int inc_one = 1; 00137 int i; 00138 00139 for ( i = 0; i < n_AT; ++i ) 00140 { 00141 double* a10t = buff_A + (0 )*ldim_A + i; 00142 double* A20 = buff_A + (0 )*ldim_A + i + 1; 00143 double* a21 = buff_A + (i )*ldim_A + i + 1; 00144 00145 double* tau1 = buff_tau + i; 00146 00147 double* tau11 = buff_T + (i )*ldim_T + i; 00148 double* t01 = buff_T + (i )*ldim_T; 00149 00150 int m_ahead = m_A - i - 1; 00151 int n_behind = i; 00152 00153 /*------------------------------------------------------------*/ 00154 00155 // FLA_Copy_external( tau1, tau11 ); 00156 *tau11 = *tau1; 00157 00158 if ( i > 0 ) 00159 { 00160 // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 ); 00161 FLA_C2F( dcopy )( &n_behind, 00162 a10t, &ldim_A, 00163 t01, &inc_one ); 00164 00165 // // t01 = a10t' + A20' * a21; 00166 // FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 ); 00167 FLA_C2F( dgemv )( &trans, 00168 &m_ahead, &n_behind, 00169 buff_1, 00170 A20, &ldim_A, 00171 a21, &inc_one, 00172 buff_1, 00173 t01, &inc_one ); 00174 } 00175 00176 /*------------------------------------------------------------*/ 00177 00178 } 00179 00180 return FLA_SUCCESS; 00181 }
References FLA_Obj_ldim(), FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, scopy(), and sgemv().
Referenced by FLA_Accum_T_UT_fc_opt_var1().
00064 { 00065 float* buff_A = FLA_FLOAT_PTR( A ); 00066 float* buff_tau = FLA_FLOAT_PTR( tau ); 00067 float* buff_T = FLA_FLOAT_PTR( T ); 00068 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE ); 00069 00070 int m_A = FLA_Obj_length( A ); 00071 int n_AT = FLA_Obj_width( A ); 00072 int ldim_A = FLA_Obj_ldim( A ); 00073 int ldim_T = FLA_Obj_ldim( T ); 00074 00075 char trans = 'T'; 00076 int inc_one = 1; 00077 int i; 00078 00079 for ( i = 0; i < n_AT; ++i ) 00080 { 00081 float* a10t = buff_A + (0 )*ldim_A + i; 00082 float* A20 = buff_A + (0 )*ldim_A + i + 1; 00083 float* a21 = buff_A + (i )*ldim_A + i + 1; 00084 00085 float* tau1 = buff_tau + i; 00086 00087 float* tau11 = buff_T + (i )*ldim_T + i; 00088 float* t01 = buff_T + (i )*ldim_T; 00089 00090 int m_ahead = m_A - i - 1; 00091 int n_behind = i; 00092 00093 /*------------------------------------------------------------*/ 00094 00095 // FLA_Copy_external( tau1, tau11 ); 00096 *tau11 = *tau1; 00097 00098 if ( i > 0 ) 00099 { 00100 // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 ); 00101 FLA_C2F( scopy )( &n_behind, 00102 a10t, &ldim_A, 00103 t01, &inc_one ); 00104 00105 // // t01 = a10t' + A20' * a21; 00106 // FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 ); 00107 FLA_C2F( sgemv )( &trans, 00108 &m_ahead, &n_behind, 00109 buff_1, 00110 A20, &ldim_A, 00111 a21, &inc_one, 00112 buff_1, 00113 t01, &inc_one ); 00114 } 00115 00116 /*------------------------------------------------------------*/ 00117 00118 } 00119 00120 return FLA_SUCCESS; 00121 }
References ccopy(), cgemv(), FLA_MINUS_ONE, FLA_Obj_ldim(), FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, scomplex::real, and sscal().
Referenced by FLA_Accum_T_UT_fc_opt_var1().
00184 { 00185 scomplex* buff_A = FLA_COMPLEX_PTR( A ); 00186 scomplex* buff_tau = FLA_COMPLEX_PTR( tau ); 00187 scomplex* buff_T = FLA_COMPLEX_PTR( T ); 00188 scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE ); 00189 scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE ); 00190 00191 int m_A = FLA_Obj_length( A ); 00192 int n_AT = FLA_Obj_width( A ); 00193 int ldim_A = FLA_Obj_ldim( A ); 00194 int ldim_T = FLA_Obj_ldim( T ); 00195 00196 char trans = 'C'; 00197 int inc_one = 1; 00198 int inc_two = 2; 00199 int i; 00200 00201 for ( i = 0; i < n_AT; ++i ) 00202 { 00203 scomplex* a10t = buff_A + (0 )*ldim_A + i; 00204 scomplex* A20 = buff_A + (0 )*ldim_A + i + 1; 00205 scomplex* a21 = buff_A + (i )*ldim_A + i + 1; 00206 00207 scomplex* tau1 = buff_tau + i; 00208 00209 scomplex* tau11 = buff_T + (i )*ldim_T + i; 00210 scomplex* t01 = buff_T + (i )*ldim_T; 00211 00212 int m_ahead = m_A - i - 1; 00213 int n_behind = i; 00214 00215 /*------------------------------------------------------------*/ 00216 00217 // FLA_Copy_external( tau1, tau11 ); 00218 *tau11 = *tau1; 00219 00220 if ( i > 0 ) 00221 { 00222 // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 ); 00223 FLA_C2F( ccopy )( &n_behind, 00224 a10t, &ldim_A, 00225 t01, &inc_one ); 00226 FLA_C2F( sscal )( &n_behind, 00227 &(buff_m1->real), 00228 (( float* ) t01 ) + 1, &inc_two ); 00229 00230 // // t01 = a10t' + A20' * a21; 00231 // FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 ); 00232 FLA_C2F( cgemv )( &trans, 00233 &m_ahead, &n_behind, 00234 buff_1, 00235 A20, &ldim_A, 00236 a21, &inc_one, 00237 buff_1, 00238 t01, &inc_one ); 00239 } 00240 00241 /*------------------------------------------------------------*/ 00242 00243 } 00244 00245 return FLA_SUCCESS; 00246 }
References FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy_external(), FLA_Copyt_external(), FLA_Gemv_external(), FLA_Obj_min_dim(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), and FLA_Repart_2x2_to_3x3().
00036 { 00037 FLA_Obj ATL, ATR, A00, a01, A02, 00038 ABL, ABR, a10t, alpha11, a12t, 00039 A20, a21, A22; 00040 00041 FLA_Obj tauT, tau0, 00042 tauB, tau1, 00043 tau2; 00044 00045 FLA_Obj TTL, TTR, T00, t01, T02, 00046 TBL, TBR, t10t, tau11, t12t, 00047 T20, t21, T22; 00048 00049 00050 FLA_Part_2x2( A, &ATL, &ATR, 00051 &ABL, &ABR, 0, 0, FLA_TL ); 00052 00053 FLA_Part_2x1( tau, &tauT, 00054 &tauB, 0, FLA_TOP ); 00055 00056 FLA_Part_2x2( T, &TTL, &TTR, 00057 &TBL, &TBR, 0, 0, FLA_TL ); 00058 00059 while ( FLA_Obj_min_dim( ABR ) > 0 ){ 00060 00061 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02, 00062 /* ************* */ /* ************************** */ 00063 &a10t, /**/ &alpha11, &a12t, 00064 ABL, /**/ ABR, &A20, /**/ &a21, &A22, 00065 1, 1, FLA_BR ); 00066 00067 FLA_Repart_2x1_to_3x1( tauT, &tau0, 00068 /* ** */ /* ** */ 00069 &tau1, 00070 tauB, &tau2, 1, FLA_BOTTOM ); 00071 00072 FLA_Repart_2x2_to_3x3( TTL, /**/ TTR, &T00, /**/ &t01, &T02, 00073 /* ************* */ /* ************************ */ 00074 &t10t, /**/ &tau11, &t12t, 00075 TBL, /**/ TBR, &T20, /**/ &t21, &T22, 00076 1, 1, FLA_BR ); 00077 00078 /*------------------------------------------------------------*/ 00079 00080 // tau11 = tau1; 00081 FLA_Copy_external( tau1, tau11 ); 00082 00083 // t01 = a10t' + A20' * a21; 00084 FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 ); 00085 FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 ); 00086 00087 /*------------------------------------------------------------*/ 00088 00089 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02, 00090 a10t, alpha11, /**/ a12t, 00091 /* ************** */ /* ************************ */ 00092 &ABL, /**/ &ABR, A20, a21, /**/ A22, 00093 FLA_TL ); 00094 00095 FLA_Cont_with_3x1_to_2x1( &tauT, tau0, 00096 tau1, 00097 /* ** */ /* ** */ 00098 &tauB, tau2, FLA_TOP ); 00099 00100 FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR, T00, t01, /**/ T02, 00101 t10t, tau11, /**/ t12t, 00102 /* ************** */ /* ********************** */ 00103 &TBL, /**/ &TBR, T20, t21, /**/ T22, 00104 FLA_TL ); 00105 00106 } 00107 00108 return FLA_SUCCESS; 00109 }