FLA_Gemm_external.c File Reference

(r)


Functions

FLA_Error FLA_Gemm_external (FLA_Trans transa, FLA_Trans transb, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C)
void FLA_F2C() fla_gemm_external_f (F_INT *transa, F_INT *transb, F_INT *alpha, F_INT *A, F_INT *B, F_INT *beta, F_INT *C, F_INT *IERROR)

Function Documentation

FLA_Error FLA_Gemm_external ( FLA_Trans  transa,
FLA_Trans  transb,
FLA_Obj  alpha,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  beta,
FLA_Obj  C 
)

References cblas_cgemm(), cblas_dgemm(), cblas_sgemm(), cblas_zgemm(), CblasColMajor, cgemm(), dgemm(), FLA_Check_error_level(), FLA_Copyt_external(), FLA_Gemm_check(), FLA_Obj_create_conf_to(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_has_zero_dim(), FLA_Obj_ldim(), FLA_Obj_length(), FLA_Obj_width(), FLA_Param_map_to_blas_trans(), FLA_Scal_external(), sgemm(), and zgemm().

Referenced by FLA_Gemm(), fla_gemm_external_f(), FLA_Gemm_hh_task(), FLA_Gemm_hn_task(), FLA_Gemm_ht_task(), FLA_Gemm_nh_task(), FLA_Gemm_nn_task(), FLA_Gemm_nt_task(), FLA_Gemm_task(), FLA_Gemm_th_task(), FLA_Gemm_tn_task(), FLA_Gemm_tt_task(), FLA_Gemp(), FLA_Gepm(), FLA_Gepp(), FLA_LQ_UT_Accum_T_blk_var1(), FLA_LQ_UT_blk_var2(), FLA_QR_UT_Accum_T_blk_var1(), FLA_QR_UT_blk_var2(), FLA_SA_FS_blk(), and FLA_SA_LU_blk().

00036 {
00037   FLA_Datatype datatype;
00038   int          k_AB;
00039   int          m_A, n_A, ldim_A;
00040   int          ldim_B; 
00041   int          m_C, n_C, ldim_C;
00042 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00043   CBLAS_ORDER     cblas_order = CblasColMajor;
00044   CBLAS_TRANSPOSE blas_transa;
00045   CBLAS_TRANSPOSE blas_transb;
00046 #else
00047   char         blas_transa; 
00048   char         blas_transb;
00049 #endif
00050   FLA_Obj      A_copy;
00051   FLA_Obj      B_copy;
00052 
00053   if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) 
00054     FLA_Gemm_check( transa, transb, alpha, A, B, beta, C );
00055 
00056   if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS;
00057 
00058   if ( FLA_Obj_has_zero_dim( A ) || FLA_Obj_has_zero_dim( B ) )
00059   {
00060     FLA_Scal_external( beta, C );
00061     return FLA_SUCCESS;
00062   }
00063 
00064   datatype = FLA_Obj_datatype( A );
00065 
00066   m_A      = FLA_Obj_length( A );
00067   n_A      = FLA_Obj_width( A );
00068   ldim_A   = FLA_Obj_ldim( A );
00069 
00070   ldim_B   = FLA_Obj_ldim( B );
00071 
00072   m_C      = FLA_Obj_length( C );
00073   n_C      = FLA_Obj_width( C );
00074   ldim_C   = FLA_Obj_ldim( C );
00075 
00076   if ( transa == FLA_NO_TRANSPOSE || transa == FLA_CONJ_NO_TRANSPOSE )
00077     k_AB = n_A;
00078   else
00079     k_AB = m_A;
00080 
00081   if ( transa == FLA_CONJ_NO_TRANSPOSE )
00082   {
00083     FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &A_copy );
00084     FLA_Copyt_external( FLA_CONJ_NO_TRANSPOSE, A, A_copy );
00085 
00086     ldim_A = FLA_Obj_ldim( A_copy );
00087   }
00088   else
00089   {
00090     A_copy = A;
00091   }
00092 
00093   if ( transb == FLA_CONJ_NO_TRANSPOSE )
00094   {
00095     FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, B, &B_copy );
00096     FLA_Copyt_external( FLA_CONJ_NO_TRANSPOSE, B, B_copy );
00097 
00098     ldim_B = FLA_Obj_ldim( B_copy );
00099   }
00100   else
00101   {
00102     B_copy = B;
00103   }
00104 
00105   FLA_Param_map_to_blas_trans( transa, &blas_transa );
00106   FLA_Param_map_to_blas_trans( transb, &blas_transb );
00107 
00108 
00109   switch( datatype ){
00110 
00111   case FLA_FLOAT:
00112   {
00113     float *buff_A     = ( float * ) FLA_FLOAT_PTR( A );
00114     float *buff_B     = ( float * ) FLA_FLOAT_PTR( B );
00115     float *buff_C     = ( float * ) FLA_FLOAT_PTR( C );
00116     float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha );
00117     float *buff_beta  = ( float * ) FLA_FLOAT_PTR( beta );
00118 
00119 #ifdef FLA_ENABLE_CBLAS_INTERFACE
00120     cblas_sgemm( cblas_order,
00121                  blas_transa, 
00122                  blas_transb,
00123                  m_C,
00124                  n_C, 
00125                  k_AB, 
00126                  *buff_alpha,
00127                  buff_A, ldim_A, 
00128                  buff_B, ldim_B,
00129                  *buff_beta,
00130                  buff_C, ldim_C );
00131 #else
00132     FLA_C2F( sgemm )( &blas_transa, 
00133                       &blas_transb,
00134                       &m_C,
00135                       &n_C, 
00136                       &k_AB, 
00137                       buff_alpha,
00138                       buff_A, &ldim_A, 
00139                       buff_B, &ldim_B,
00140                       buff_beta,
00141                       buff_C, &ldim_C );
00142 #endif
00143     break;
00144   }
00145 
00146   case FLA_DOUBLE:
00147   {
00148     double *buff_A     = ( double * ) FLA_DOUBLE_PTR( A );
00149     double *buff_B     = ( double * ) FLA_DOUBLE_PTR( B );
00150     double *buff_C     = ( double * ) FLA_DOUBLE_PTR( C );
00151     double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha );
00152     double *buff_beta  = ( double * ) FLA_DOUBLE_PTR( beta );
00153                          
00154 #ifdef FLA_ENABLE_CBLAS_INTERFACE           
00155     cblas_dgemm( cblas_order,
00156                  blas_transa, 
00157                  blas_transb,
00158                  m_C,
00159                  n_C, 
00160                  k_AB, 
00161                  *buff_alpha,
00162                  buff_A, ldim_A, 
00163                  buff_B, ldim_B,
00164                  *buff_beta,
00165                  buff_C, ldim_C );
00166 #else
00167     FLA_C2F( dgemm )( &blas_transa, 
00168                       &blas_transb,
00169                       &m_C,
00170                       &n_C, 
00171                       &k_AB, 
00172                       buff_alpha,
00173                       buff_A, &ldim_A, 
00174                       buff_B, &ldim_B,
00175                       buff_beta,
00176                       buff_C, &ldim_C );
00177 #endif
00178     break;
00179   }
00180 
00181   case FLA_COMPLEX:
00182   {
00183     scomplex *buff_A     = ( scomplex * ) FLA_COMPLEX_PTR( A_copy );
00184     scomplex *buff_B     = ( scomplex * ) FLA_COMPLEX_PTR( B_copy );
00185     scomplex *buff_C     = ( scomplex * ) FLA_COMPLEX_PTR( C );
00186     scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha );
00187     scomplex *buff_beta  = ( scomplex * ) FLA_COMPLEX_PTR( beta );
00188                          
00189 #ifdef FLA_ENABLE_CBLAS_INTERFACE           
00190     cblas_cgemm( cblas_order,
00191                  blas_transa, 
00192                  blas_transb,
00193                  m_C,
00194                  n_C, 
00195                  k_AB, 
00196                  buff_alpha,
00197                  buff_A, ldim_A, 
00198                  buff_B, ldim_B,
00199                  buff_beta,
00200                  buff_C, ldim_C );
00201 #else
00202     FLA_C2F( cgemm )( &blas_transa, 
00203                       &blas_transb,
00204                       &m_C,
00205                       &n_C, 
00206                       &k_AB, 
00207                       buff_alpha,
00208                       buff_A, &ldim_A, 
00209                       buff_B, &ldim_B,
00210                       buff_beta,
00211                       buff_C, &ldim_C );
00212 #endif
00213     break;
00214   }
00215 
00216   case FLA_DOUBLE_COMPLEX:
00217   {
00218     dcomplex *buff_A     = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A_copy );
00219     dcomplex *buff_B     = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B_copy );
00220     dcomplex *buff_C     = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( C );
00221     dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha );
00222     dcomplex *buff_beta  = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( beta );
00223 
00224 #ifdef FLA_ENABLE_CBLAS_INTERFACE                                    
00225     cblas_zgemm( cblas_order,
00226                  blas_transa, 
00227                  blas_transb,
00228                  m_C,
00229                  n_C, 
00230                  k_AB, 
00231                  buff_alpha,
00232                  buff_A, ldim_A, 
00233                  buff_B, ldim_B,
00234                  buff_beta,
00235                  buff_C, ldim_C );
00236 #else
00237     FLA_C2F( zgemm )( &blas_transa, 
00238                       &blas_transb,
00239                       &m_C,
00240                       &n_C, 
00241                       &k_AB, 
00242                       buff_alpha,
00243                       buff_A, &ldim_A, 
00244                       buff_B, &ldim_B,
00245                       buff_beta,
00246                       buff_C, &ldim_C );
00247 #endif
00248     break;
00249   }
00250 
00251   }
00252 
00253   if ( transa == FLA_CONJ_NO_TRANSPOSE )
00254     FLA_Obj_free( &A_copy );
00255 
00256   if ( transb == FLA_CONJ_NO_TRANSPOSE )
00257     FLA_Obj_free( &B_copy );
00258  
00259   return FLA_SUCCESS;
00260 }

void FLA_F2C() fla_gemm_external_f ( F_INT *  transa,
F_INT *  transb,
F_INT *  alpha,
F_INT *  A,
F_INT *  B,
F_INT *  beta,
F_INT *  C,
F_INT *  IERROR 
)

References FLA_Gemm_external().

00264 {
00265   *IERROR = FLA_Gemm_external( *( ( FLA_Trans * ) transa ), 
00266                                *( ( FLA_Trans * ) transb ), 
00267                                *( ( FLA_Obj   * ) alpha  ), 
00268                                *( ( FLA_Obj   * ) A      ), 
00269                                *( ( FLA_Obj   * ) B      ),
00270                                *( ( FLA_Obj   * ) beta   ), 
00271                                *( ( FLA_Obj   * ) C      ) );
00272 }


Generated on Mon Jul 6 05:45:53 2009 for libflame by  doxygen 1.5.9