FLA_Gemm_internal.c File Reference

(r)


Functions

FLA_Error FLA_Gemm_internal (FLA_Trans transa, FLA_Trans transb, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_gemm_t *cntl)

Variables

fla_gemm_tflash_gemm_cntl_blas
fla_gemm_tflash_gemm_cntl_mm_op

Function Documentation

FLA_Error FLA_Gemm_internal ( FLA_Trans  transa,
FLA_Trans  transb,
FLA_Obj  alpha,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  beta,
FLA_Obj  C,
fla_gemm_t cntl 
)

References FLA_Check_error_level(), FLA_Gemm_hh(), FLA_Gemm_hn(), FLA_Gemm_ht(), FLA_Gemm_internal(), FLA_Gemm_internal_check(), FLA_Gemm_nh(), FLA_Gemm_nn(), FLA_Gemm_nt(), FLA_Gemm_th(), FLA_Gemm_tn(), FLA_Gemm_tt(), FLA_Obj_elemtype(), and FLASH_Queue_get_enabled().

Referenced by FLA_Apply_Q_UT_lhc_blk_var1(), FLA_Apply_Q_UT_UD_lhc_blk_var1(), FLA_Chol_l_blk_var2(), FLA_Chol_u_blk_var2(), FLA_Gemm(), FLA_Gemm_hh_blk_var1(), FLA_Gemm_hh_blk_var2(), FLA_Gemm_hh_blk_var3(), FLA_Gemm_hh_blk_var4(), FLA_Gemm_hh_blk_var5(), FLA_Gemm_hh_blk_var6(), FLA_Gemm_hn_blk_var1(), FLA_Gemm_hn_blk_var2(), FLA_Gemm_hn_blk_var3(), FLA_Gemm_hn_blk_var4(), FLA_Gemm_hn_blk_var5(), FLA_Gemm_hn_blk_var6(), FLA_Gemm_ht_blk_var1(), FLA_Gemm_ht_blk_var2(), FLA_Gemm_ht_blk_var3(), FLA_Gemm_ht_blk_var4(), FLA_Gemm_ht_blk_var5(), FLA_Gemm_ht_blk_var6(), FLA_Gemm_internal(), FLA_Gemm_nh_blk_var1(), FLA_Gemm_nh_blk_var2(), FLA_Gemm_nh_blk_var3(), FLA_Gemm_nh_blk_var4(), FLA_Gemm_nh_blk_var5(), FLA_Gemm_nh_blk_var6(), FLA_Gemm_nn_blk_var1(), FLA_Gemm_nn_blk_var2(), FLA_Gemm_nn_blk_var3(), FLA_Gemm_nn_blk_var4(), FLA_Gemm_nn_blk_var5(), FLA_Gemm_nn_blk_var6(), FLA_Gemm_nt_blk_var1(), FLA_Gemm_nt_blk_var2(), FLA_Gemm_nt_blk_var3(), FLA_Gemm_nt_blk_var4(), FLA_Gemm_nt_blk_var5(), FLA_Gemm_nt_blk_var6(), FLA_Gemm_th_blk_var1(), FLA_Gemm_th_blk_var2(), FLA_Gemm_th_blk_var3(), FLA_Gemm_th_blk_var4(), FLA_Gemm_th_blk_var5(), FLA_Gemm_th_blk_var6(), FLA_Gemm_tn_blk_var1(), FLA_Gemm_tn_blk_var2(), FLA_Gemm_tn_blk_var3(), FLA_Gemm_tn_blk_var4(), FLA_Gemm_tn_blk_var5(), FLA_Gemm_tn_blk_var6(), FLA_Gemm_tt_blk_var1(), FLA_Gemm_tt_blk_var2(), FLA_Gemm_tt_blk_var3(), FLA_Gemm_tt_blk_var4(), FLA_Gemm_tt_blk_var5(), FLA_Gemm_tt_blk_var6(), FLA_Hemm_ll_blk_var1(), FLA_Hemm_ll_blk_var2(), FLA_Hemm_ll_blk_var3(), FLA_Hemm_ll_blk_var4(), FLA_Hemm_ll_blk_var5(), FLA_Hemm_ll_blk_var6(), FLA_Hemm_ll_blk_var7(), FLA_Hemm_ll_blk_var8(), FLA_Hemm_lu_blk_var1(), FLA_Hemm_lu_blk_var2(), FLA_Hemm_lu_blk_var3(), FLA_Hemm_lu_blk_var4(), FLA_Hemm_lu_blk_var5(), FLA_Hemm_lu_blk_var6(), FLA_Hemm_lu_blk_var7(), FLA_Hemm_lu_blk_var8(), FLA_Hemm_rl_blk_var1(), FLA_Hemm_rl_blk_var2(), FLA_Hemm_rl_blk_var3(), FLA_Hemm_rl_blk_var4(), FLA_Hemm_rl_blk_var5(), FLA_Hemm_rl_blk_var6(), FLA_Hemm_rl_blk_var7(), FLA_Hemm_rl_blk_var8(), FLA_Hemm_ru_blk_var1(), FLA_Hemm_ru_blk_var2(), FLA_Hemm_ru_blk_var3(), FLA_Hemm_ru_blk_var4(), FLA_Hemm_ru_blk_var5(), FLA_Hemm_ru_blk_var6(), FLA_Hemm_ru_blk_var7(), FLA_Hemm_ru_blk_var8(), FLA_Her2k_lh_blk_var1(), FLA_Her2k_lh_blk_var2(), FLA_Her2k_lh_blk_var3(), FLA_Her2k_lh_blk_var4(), FLA_Her2k_lh_blk_var5(), FLA_Her2k_lh_blk_var6(), FLA_Her2k_lh_blk_var7(), FLA_Her2k_lh_blk_var8(), FLA_Her2k_ln_blk_var1(), FLA_Her2k_ln_blk_var2(), FLA_Her2k_ln_blk_var3(), FLA_Her2k_ln_blk_var4(), FLA_Her2k_ln_blk_var5(), FLA_Her2k_ln_blk_var6(), FLA_Her2k_ln_blk_var7(), FLA_Her2k_ln_blk_var8(), FLA_Her2k_uh_blk_var1(), FLA_Her2k_uh_blk_var2(), FLA_Her2k_uh_blk_var3(), FLA_Her2k_uh_blk_var4(), FLA_Her2k_uh_blk_var5(), FLA_Her2k_uh_blk_var6(), FLA_Her2k_uh_blk_var7(), FLA_Her2k_uh_blk_var8(), FLA_Her2k_un_blk_var1(), FLA_Her2k_un_blk_var2(), FLA_Her2k_un_blk_var3(), FLA_Her2k_un_blk_var4(), FLA_Her2k_un_blk_var5(), FLA_Her2k_un_blk_var6(), FLA_Her2k_un_blk_var7(), FLA_Her2k_un_blk_var8(), FLA_Herk_lh_blk_var1(), FLA_Herk_lh_blk_var2(), FLA_Herk_lh_blk_var3(), FLA_Herk_lh_blk_var4(), FLA_Herk_ln_blk_var1(), FLA_Herk_ln_blk_var2(), FLA_Herk_ln_blk_var3(), FLA_Herk_ln_blk_var4(), FLA_Herk_uh_blk_var1(), FLA_Herk_uh_blk_var2(), FLA_Herk_uh_blk_var3(), FLA_Herk_uh_blk_var4(), FLA_Herk_un_blk_var1(), FLA_Herk_un_blk_var2(), FLA_Herk_un_blk_var3(), FLA_Herk_un_blk_var4(), FLA_LU_nopiv_blk_var1(), FLA_LU_nopiv_blk_var2(), FLA_LU_nopiv_blk_var3(), FLA_LU_nopiv_blk_var4(), FLA_LU_nopiv_blk_var5(), FLA_LU_piv_blk_var3(), FLA_LU_piv_blk_var4(), FLA_LU_piv_blk_var5(), FLA_QR_UT_UD_blk_var1(), FLA_Sylv_nn_blk_var1(), FLA_Sylv_nn_blk_var10(), FLA_Sylv_nn_blk_var11(), FLA_Sylv_nn_blk_var12(), FLA_Sylv_nn_blk_var13(), FLA_Sylv_nn_blk_var14(), FLA_Sylv_nn_blk_var15(), FLA_Sylv_nn_blk_var16(), FLA_Sylv_nn_blk_var17(), FLA_Sylv_nn_blk_var18(), FLA_Sylv_nn_blk_var2(), FLA_Sylv_nn_blk_var3(), FLA_Sylv_nn_blk_var4(), FLA_Sylv_nn_blk_var5(), FLA_Sylv_nn_blk_var6(), FLA_Sylv_nn_blk_var7(), FLA_Sylv_nn_blk_var8(), FLA_Sylv_nn_blk_var9(), FLA_Sylv_nt_blk_var1(), FLA_Sylv_nt_blk_var10(), FLA_Sylv_nt_blk_var11(), FLA_Sylv_nt_blk_var12(), FLA_Sylv_nt_blk_var13(), FLA_Sylv_nt_blk_var14(), FLA_Sylv_nt_blk_var15(), FLA_Sylv_nt_blk_var16(), FLA_Sylv_nt_blk_var17(), FLA_Sylv_nt_blk_var18(), FLA_Sylv_nt_blk_var2(), FLA_Sylv_nt_blk_var3(), FLA_Sylv_nt_blk_var4(), FLA_Sylv_nt_blk_var5(), FLA_Sylv_nt_blk_var6(), FLA_Sylv_nt_blk_var7(), FLA_Sylv_nt_blk_var8(), FLA_Sylv_nt_blk_var9(), FLA_Sylv_tn_blk_var1(), FLA_Sylv_tn_blk_var10(), FLA_Sylv_tn_blk_var11(), FLA_Sylv_tn_blk_var12(), FLA_Sylv_tn_blk_var13(), FLA_Sylv_tn_blk_var14(), FLA_Sylv_tn_blk_var15(), FLA_Sylv_tn_blk_var16(), FLA_Sylv_tn_blk_var17(), FLA_Sylv_tn_blk_var18(), FLA_Sylv_tn_blk_var2(), FLA_Sylv_tn_blk_var3(), FLA_Sylv_tn_blk_var4(), FLA_Sylv_tn_blk_var5(), FLA_Sylv_tn_blk_var6(), FLA_Sylv_tn_blk_var7(), FLA_Sylv_tn_blk_var8(), FLA_Sylv_tn_blk_var9(), FLA_Sylv_tt_blk_var1(), FLA_Sylv_tt_blk_var10(), FLA_Sylv_tt_blk_var11(), FLA_Sylv_tt_blk_var12(), FLA_Sylv_tt_blk_var13(), FLA_Sylv_tt_blk_var14(), FLA_Sylv_tt_blk_var15(), FLA_Sylv_tt_blk_var16(), FLA_Sylv_tt_blk_var17(), FLA_Sylv_tt_blk_var18(), FLA_Sylv_tt_blk_var2(), FLA_Sylv_tt_blk_var3(), FLA_Sylv_tt_blk_var4(), FLA_Sylv_tt_blk_var5(), FLA_Sylv_tt_blk_var6(), FLA_Sylv_tt_blk_var7(), FLA_Sylv_tt_blk_var8(), FLA_Sylv_tt_blk_var9(), FLA_Symm_ll_blk_var1(), FLA_Symm_ll_blk_var2(), FLA_Symm_ll_blk_var3(), FLA_Symm_ll_blk_var4(), FLA_Symm_ll_blk_var5(), FLA_Symm_ll_blk_var6(), FLA_Symm_ll_blk_var7(), FLA_Symm_ll_blk_var8(), FLA_Symm_lu_blk_var1(), FLA_Symm_lu_blk_var2(), FLA_Symm_lu_blk_var3(), FLA_Symm_lu_blk_var4(), FLA_Symm_lu_blk_var5(), FLA_Symm_lu_blk_var6(), FLA_Symm_lu_blk_var7(), FLA_Symm_lu_blk_var8(), FLA_Symm_rl_blk_var1(), FLA_Symm_rl_blk_var2(), FLA_Symm_rl_blk_var3(), FLA_Symm_rl_blk_var4(), FLA_Symm_rl_blk_var5(), FLA_Symm_rl_blk_var6(), FLA_Symm_rl_blk_var7(), FLA_Symm_rl_blk_var8(), FLA_Symm_ru_blk_var1(), FLA_Symm_ru_blk_var2(), FLA_Symm_ru_blk_var3(), FLA_Symm_ru_blk_var4(), FLA_Symm_ru_blk_var5(), FLA_Symm_ru_blk_var6(), FLA_Symm_ru_blk_var7(), FLA_Symm_ru_blk_var8(), FLA_Syr2k_ln_blk_var1(), FLA_Syr2k_ln_blk_var2(), FLA_Syr2k_ln_blk_var3(), FLA_Syr2k_ln_blk_var4(), FLA_Syr2k_ln_blk_var5(), FLA_Syr2k_ln_blk_var6(), FLA_Syr2k_ln_blk_var7(), FLA_Syr2k_ln_blk_var8(), FLA_Syr2k_lt_blk_var1(), FLA_Syr2k_lt_blk_var2(), FLA_Syr2k_lt_blk_var3(), FLA_Syr2k_lt_blk_var4(), FLA_Syr2k_lt_blk_var5(), FLA_Syr2k_lt_blk_var6(), FLA_Syr2k_lt_blk_var7(), FLA_Syr2k_lt_blk_var8(), FLA_Syr2k_un_blk_var1(), FLA_Syr2k_un_blk_var2(), FLA_Syr2k_un_blk_var3(), FLA_Syr2k_un_blk_var4(), FLA_Syr2k_un_blk_var5(), FLA_Syr2k_un_blk_var6(), FLA_Syr2k_un_blk_var7(), FLA_Syr2k_un_blk_var8(), FLA_Syr2k_ut_blk_var1(), FLA_Syr2k_ut_blk_var2(), FLA_Syr2k_ut_blk_var3(), FLA_Syr2k_ut_blk_var4(), FLA_Syr2k_ut_blk_var5(), FLA_Syr2k_ut_blk_var6(), FLA_Syr2k_ut_blk_var7(), FLA_Syr2k_ut_blk_var8(), FLA_Syrk_ln_blk_var1(), FLA_Syrk_ln_blk_var2(), FLA_Syrk_ln_blk_var3(), FLA_Syrk_ln_blk_var4(), FLA_Syrk_lt_blk_var1(), FLA_Syrk_lt_blk_var2(), FLA_Syrk_lt_blk_var3(), FLA_Syrk_lt_blk_var4(), FLA_Syrk_un_blk_var1(), FLA_Syrk_un_blk_var2(), FLA_Syrk_un_blk_var3(), FLA_Syrk_un_blk_var4(), FLA_Syrk_ut_blk_var1(), FLA_Syrk_ut_blk_var2(), FLA_Syrk_ut_blk_var3(), FLA_Syrk_ut_blk_var4(), FLA_Trinv_l_blk_var3(), FLA_Trinv_l_blk_var4(), FLA_Trinv_u_blk_var3(), FLA_Trinv_u_blk_var4(), FLA_Trmm_llh_blk_var1(), FLA_Trmm_llh_blk_var2(), FLA_Trmm_lln_blk_var1(), FLA_Trmm_lln_blk_var2(), FLA_Trmm_llt_blk_var1(), FLA_Trmm_llt_blk_var2(), FLA_Trmm_luh_blk_var1(), FLA_Trmm_luh_blk_var2(), FLA_Trmm_lun_blk_var1(), FLA_Trmm_lun_blk_var2(), FLA_Trmm_lut_blk_var1(), FLA_Trmm_lut_blk_var2(), FLA_Trmm_rlh_blk_var1(), FLA_Trmm_rlh_blk_var2(), FLA_Trmm_rln_blk_var1(), FLA_Trmm_rln_blk_var2(), FLA_Trmm_rlt_blk_var1(), FLA_Trmm_rlt_blk_var2(), FLA_Trmm_ruh_blk_var1(), FLA_Trmm_ruh_blk_var2(), FLA_Trmm_run_blk_var1(), FLA_Trmm_run_blk_var2(), FLA_Trmm_rut_blk_var1(), FLA_Trmm_rut_blk_var2(), FLA_Trsm_llh_blk_var1(), FLA_Trsm_llh_blk_var2(), FLA_Trsm_lln_blk_var1(), FLA_Trsm_lln_blk_var2(), FLA_Trsm_llt_blk_var1(), FLA_Trsm_llt_blk_var2(), FLA_Trsm_luh_blk_var1(), FLA_Trsm_luh_blk_var2(), FLA_Trsm_lun_blk_var1(), FLA_Trsm_lun_blk_var2(), FLA_Trsm_lut_blk_var1(), FLA_Trsm_lut_blk_var2(), FLA_Trsm_rlh_blk_var1(), FLA_Trsm_rlh_blk_var2(), FLA_Trsm_rln_blk_var1(), FLA_Trsm_rln_blk_var2(), FLA_Trsm_rlt_blk_var1(), FLA_Trsm_rlt_blk_var2(), FLA_Trsm_ruh_blk_var1(), FLA_Trsm_ruh_blk_var2(), FLA_Trsm_run_blk_var1(), FLA_Trsm_run_blk_var2(), FLA_Trsm_rut_blk_var1(), FLA_Trsm_rut_blk_var2(), FLA_Ttmm_l_blk_var2(), FLA_Ttmm_u_blk_var2(), and FLASH_Gemm().

00039 {
00040     FLA_Error r_val = FLA_SUCCESS;
00041 
00042     if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING )
00043         FLA_Gemm_internal_check( transa, transb, alpha, A, B, beta, C, cntl );
00044 
00045     if      ( FLA_Cntl_matrix_type( cntl ) == FLA_HIER &&
00046               FLA_Obj_elemtype( A ) == FLA_MATRIX &&
00047               FLA_Cntl_variant( cntl ) == FLA_SUBPROBLEM )
00048     {
00049         // Recurse
00050         r_val = FLA_Gemm_internal( transa, 
00051                                    transb, 
00052                                    alpha, 
00053                                    *FLASH_OBJ_PTR_AT( A ), 
00054                                    *FLASH_OBJ_PTR_AT( B ), 
00055                                    beta, 
00056                                    *FLASH_OBJ_PTR_AT( C ), 
00057                                    flash_gemm_cntl_mm_op );
00058     }
00059     else if ( FLA_Cntl_matrix_type( cntl ) == FLA_HIER &&
00060               FLA_Obj_elemtype( A ) == FLA_SCALAR &&
00061               FLASH_Queue_get_enabled( ) )
00062     {
00063         // Enqueue
00064         ENQUEUE_FLASH_Gemm( transa, transb, alpha, A, B, beta, C, cntl );
00065     }
00066     else
00067     {
00068         if ( FLA_Cntl_matrix_type( cntl ) == FLA_HIER &&
00069              FLA_Obj_elemtype( A ) == FLA_SCALAR &&
00070              !FLASH_Queue_get_enabled( ) )
00071         {
00072             // Execute leaf
00073             cntl = flash_gemm_cntl_blas;
00074         }
00075 
00076         // Parameter combinations
00077         if      ( transa == FLA_NO_TRANSPOSE )
00078         {
00079             if      ( transb == FLA_NO_TRANSPOSE )
00080                 r_val = FLA_Gemm_nn( alpha, A, B, beta, C, cntl );
00081             else if ( transb == FLA_TRANSPOSE )
00082                 r_val = FLA_Gemm_nt( alpha, A, B, beta, C, cntl );
00083             else if ( transb == FLA_CONJ_TRANSPOSE )
00084                 r_val = FLA_Gemm_nh( alpha, A, B, beta, C, cntl );
00085         }
00086         else if ( transa == FLA_TRANSPOSE )
00087         {
00088             if      ( transb == FLA_NO_TRANSPOSE )
00089                 r_val = FLA_Gemm_tn( alpha, A, B, beta, C, cntl );
00090             else if ( transb == FLA_TRANSPOSE )
00091                 r_val = FLA_Gemm_tt( alpha, A, B, beta, C, cntl );
00092             else if ( transb == FLA_CONJ_TRANSPOSE )
00093                 r_val = FLA_Gemm_th( alpha, A, B, beta, C, cntl );
00094         }
00095         else if ( transa == FLA_CONJ_TRANSPOSE )
00096         {
00097             if      ( transb == FLA_NO_TRANSPOSE )
00098                 r_val = FLA_Gemm_hn( alpha, A, B, beta, C, cntl );
00099             else if ( transb == FLA_TRANSPOSE )
00100                 r_val = FLA_Gemm_ht( alpha, A, B, beta, C, cntl );
00101             else if ( transb == FLA_CONJ_TRANSPOSE )
00102                 r_val = FLA_Gemm_hh( alpha, A, B, beta, C, cntl );
00103         }
00104     }
00105 
00106     return r_val;
00107 }


Variable Documentation


Generated on Mon Jul 6 05:45:54 2009 for libflame by  doxygen 1.5.9