Functions | |
FLA_Error | FLA_Gemm_external (FLA_Trans transa, FLA_Trans transb, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C) |
void FLA_F2C() | fla_gemm_external_f (F_INT *transa, F_INT *transb, F_INT *alpha, F_INT *A, F_INT *B, F_INT *beta, F_INT *C, F_INT *IERROR) |
FLA_Error FLA_Gemm_external | ( | FLA_Trans | transa, | |
FLA_Trans | transb, | |||
FLA_Obj | alpha, | |||
FLA_Obj | A, | |||
FLA_Obj | B, | |||
FLA_Obj | beta, | |||
FLA_Obj | C | |||
) |
References cblas_cgemm(), cblas_dgemm(), cblas_sgemm(), cblas_zgemm(), CblasColMajor, cgemm(), dgemm(), FLA_Check_error_level(), FLA_Copyt_external(), FLA_Gemm_check(), FLA_Obj_create_conf_to(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_has_zero_dim(), FLA_Obj_ldim(), FLA_Obj_length(), FLA_Obj_width(), FLA_Param_map_to_blas_trans(), FLA_Scal_external(), sgemm(), and zgemm().
Referenced by FLA_Gemm(), fla_gemm_external_f(), FLA_Gemm_hh_task(), FLA_Gemm_hn_task(), FLA_Gemm_ht_task(), FLA_Gemm_nh_task(), FLA_Gemm_nn_task(), FLA_Gemm_nt_task(), FLA_Gemm_task(), FLA_Gemm_th_task(), FLA_Gemm_tn_task(), FLA_Gemm_tt_task(), FLA_Gemp(), FLA_Gepm(), FLA_Gepp(), FLA_LQ_UT_Accum_T_blk_var1(), FLA_LQ_UT_blk_var2(), FLA_QR_UT_Accum_T_blk_var1(), FLA_QR_UT_blk_var2(), FLA_SA_FS_blk(), and FLA_SA_LU_blk().
00036 { 00037 FLA_Datatype datatype; 00038 int k_AB; 00039 int m_A, n_A, ldim_A; 00040 int ldim_B; 00041 int m_C, n_C, ldim_C; 00042 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00043 CBLAS_ORDER cblas_order = CblasColMajor; 00044 CBLAS_TRANSPOSE blas_transa; 00045 CBLAS_TRANSPOSE blas_transb; 00046 #else 00047 char blas_transa; 00048 char blas_transb; 00049 #endif 00050 FLA_Obj A_copy; 00051 FLA_Obj B_copy; 00052 00053 if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) 00054 FLA_Gemm_check( transa, transb, alpha, A, B, beta, C ); 00055 00056 if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS; 00057 00058 if ( FLA_Obj_has_zero_dim( A ) || FLA_Obj_has_zero_dim( B ) ) 00059 { 00060 FLA_Scal_external( beta, C ); 00061 return FLA_SUCCESS; 00062 } 00063 00064 datatype = FLA_Obj_datatype( A ); 00065 00066 m_A = FLA_Obj_length( A ); 00067 n_A = FLA_Obj_width( A ); 00068 ldim_A = FLA_Obj_ldim( A ); 00069 00070 ldim_B = FLA_Obj_ldim( B ); 00071 00072 m_C = FLA_Obj_length( C ); 00073 n_C = FLA_Obj_width( C ); 00074 ldim_C = FLA_Obj_ldim( C ); 00075 00076 if ( transa == FLA_NO_TRANSPOSE || transa == FLA_CONJ_NO_TRANSPOSE ) 00077 k_AB = n_A; 00078 else 00079 k_AB = m_A; 00080 00081 if ( transa == FLA_CONJ_NO_TRANSPOSE ) 00082 { 00083 FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &A_copy ); 00084 FLA_Copyt_external( FLA_CONJ_NO_TRANSPOSE, A, A_copy ); 00085 00086 ldim_A = FLA_Obj_ldim( A_copy ); 00087 } 00088 else 00089 { 00090 A_copy = A; 00091 } 00092 00093 if ( transb == FLA_CONJ_NO_TRANSPOSE ) 00094 { 00095 FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, B, &B_copy ); 00096 FLA_Copyt_external( FLA_CONJ_NO_TRANSPOSE, B, B_copy ); 00097 00098 ldim_B = FLA_Obj_ldim( B_copy ); 00099 } 00100 else 00101 { 00102 B_copy = B; 00103 } 00104 00105 FLA_Param_map_to_blas_trans( transa, &blas_transa ); 00106 FLA_Param_map_to_blas_trans( transb, &blas_transb ); 00107 00108 00109 switch( datatype ){ 00110 00111 case FLA_FLOAT: 00112 { 00113 float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); 00114 float *buff_B = ( float * ) FLA_FLOAT_PTR( B ); 00115 float *buff_C = ( float * ) FLA_FLOAT_PTR( C ); 00116 float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); 00117 float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta ); 00118 00119 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00120 cblas_sgemm( cblas_order, 00121 blas_transa, 00122 blas_transb, 00123 m_C, 00124 n_C, 00125 k_AB, 00126 *buff_alpha, 00127 buff_A, ldim_A, 00128 buff_B, ldim_B, 00129 *buff_beta, 00130 buff_C, ldim_C ); 00131 #else 00132 FLA_C2F( sgemm )( &blas_transa, 00133 &blas_transb, 00134 &m_C, 00135 &n_C, 00136 &k_AB, 00137 buff_alpha, 00138 buff_A, &ldim_A, 00139 buff_B, &ldim_B, 00140 buff_beta, 00141 buff_C, &ldim_C ); 00142 #endif 00143 break; 00144 } 00145 00146 case FLA_DOUBLE: 00147 { 00148 double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); 00149 double *buff_B = ( double * ) FLA_DOUBLE_PTR( B ); 00150 double *buff_C = ( double * ) FLA_DOUBLE_PTR( C ); 00151 double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); 00152 double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta ); 00153 00154 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00155 cblas_dgemm( cblas_order, 00156 blas_transa, 00157 blas_transb, 00158 m_C, 00159 n_C, 00160 k_AB, 00161 *buff_alpha, 00162 buff_A, ldim_A, 00163 buff_B, ldim_B, 00164 *buff_beta, 00165 buff_C, ldim_C ); 00166 #else 00167 FLA_C2F( dgemm )( &blas_transa, 00168 &blas_transb, 00169 &m_C, 00170 &n_C, 00171 &k_AB, 00172 buff_alpha, 00173 buff_A, &ldim_A, 00174 buff_B, &ldim_B, 00175 buff_beta, 00176 buff_C, &ldim_C ); 00177 #endif 00178 break; 00179 } 00180 00181 case FLA_COMPLEX: 00182 { 00183 scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A_copy ); 00184 scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B_copy ); 00185 scomplex *buff_C = ( scomplex * ) FLA_COMPLEX_PTR( C ); 00186 scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha ); 00187 scomplex *buff_beta = ( scomplex * ) FLA_COMPLEX_PTR( beta ); 00188 00189 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00190 cblas_cgemm( cblas_order, 00191 blas_transa, 00192 blas_transb, 00193 m_C, 00194 n_C, 00195 k_AB, 00196 buff_alpha, 00197 buff_A, ldim_A, 00198 buff_B, ldim_B, 00199 buff_beta, 00200 buff_C, ldim_C ); 00201 #else 00202 FLA_C2F( cgemm )( &blas_transa, 00203 &blas_transb, 00204 &m_C, 00205 &n_C, 00206 &k_AB, 00207 buff_alpha, 00208 buff_A, &ldim_A, 00209 buff_B, &ldim_B, 00210 buff_beta, 00211 buff_C, &ldim_C ); 00212 #endif 00213 break; 00214 } 00215 00216 case FLA_DOUBLE_COMPLEX: 00217 { 00218 dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A_copy ); 00219 dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B_copy ); 00220 dcomplex *buff_C = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( C ); 00221 dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); 00222 dcomplex *buff_beta = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( beta ); 00223 00224 #ifdef FLA_ENABLE_CBLAS_INTERFACE 00225 cblas_zgemm( cblas_order, 00226 blas_transa, 00227 blas_transb, 00228 m_C, 00229 n_C, 00230 k_AB, 00231 buff_alpha, 00232 buff_A, ldim_A, 00233 buff_B, ldim_B, 00234 buff_beta, 00235 buff_C, ldim_C ); 00236 #else 00237 FLA_C2F( zgemm )( &blas_transa, 00238 &blas_transb, 00239 &m_C, 00240 &n_C, 00241 &k_AB, 00242 buff_alpha, 00243 buff_A, &ldim_A, 00244 buff_B, &ldim_B, 00245 buff_beta, 00246 buff_C, &ldim_C ); 00247 #endif 00248 break; 00249 } 00250 00251 } 00252 00253 if ( transa == FLA_CONJ_NO_TRANSPOSE ) 00254 FLA_Obj_free( &A_copy ); 00255 00256 if ( transb == FLA_CONJ_NO_TRANSPOSE ) 00257 FLA_Obj_free( &B_copy ); 00258 00259 return FLA_SUCCESS; 00260 }
void FLA_F2C() fla_gemm_external_f | ( | F_INT * | transa, | |
F_INT * | transb, | |||
F_INT * | alpha, | |||
F_INT * | A, | |||
F_INT * | B, | |||
F_INT * | beta, | |||
F_INT * | C, | |||
F_INT * | IERROR | |||
) |
References FLA_Gemm_external().
00264 { 00265 *IERROR = FLA_Gemm_external( *( ( FLA_Trans * ) transa ), 00266 *( ( FLA_Trans * ) transb ), 00267 *( ( FLA_Obj * ) alpha ), 00268 *( ( FLA_Obj * ) A ), 00269 *( ( FLA_Obj * ) B ), 00270 *( ( FLA_Obj * ) beta ), 00271 *( ( FLA_Obj * ) C ) ); 00272 }