Functions | |
FLA_Error | FLA_Gemm_internal (FLA_Trans transa, FLA_Trans transb, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_gemm_t *cntl) |
Variables | |
fla_gemm_t * | flash_gemm_cntl_blas |
fla_gemm_t * | flash_gemm_cntl_mm_op |
FLA_Error FLA_Gemm_internal | ( | FLA_Trans | transa, | |
FLA_Trans | transb, | |||
FLA_Obj | alpha, | |||
FLA_Obj | A, | |||
FLA_Obj | B, | |||
FLA_Obj | beta, | |||
FLA_Obj | C, | |||
fla_gemm_t * | cntl | |||
) |
References FLA_Check_error_level(), FLA_Gemm_hh(), FLA_Gemm_hn(), FLA_Gemm_ht(), FLA_Gemm_internal(), FLA_Gemm_internal_check(), FLA_Gemm_nh(), FLA_Gemm_nn(), FLA_Gemm_nt(), FLA_Gemm_th(), FLA_Gemm_tn(), FLA_Gemm_tt(), FLA_Obj_elemtype(), and FLASH_Queue_get_enabled().
Referenced by FLA_Apply_Q_UT_lhc_blk_var1(), FLA_Apply_Q_UT_UD_lhc_blk_var1(), FLA_Chol_l_blk_var2(), FLA_Chol_u_blk_var2(), FLA_Gemm(), FLA_Gemm_hh_blk_var1(), FLA_Gemm_hh_blk_var2(), FLA_Gemm_hh_blk_var3(), FLA_Gemm_hh_blk_var4(), FLA_Gemm_hh_blk_var5(), FLA_Gemm_hh_blk_var6(), FLA_Gemm_hn_blk_var1(), FLA_Gemm_hn_blk_var2(), FLA_Gemm_hn_blk_var3(), FLA_Gemm_hn_blk_var4(), FLA_Gemm_hn_blk_var5(), FLA_Gemm_hn_blk_var6(), FLA_Gemm_ht_blk_var1(), FLA_Gemm_ht_blk_var2(), FLA_Gemm_ht_blk_var3(), FLA_Gemm_ht_blk_var4(), FLA_Gemm_ht_blk_var5(), FLA_Gemm_ht_blk_var6(), FLA_Gemm_internal(), FLA_Gemm_nh_blk_var1(), FLA_Gemm_nh_blk_var2(), FLA_Gemm_nh_blk_var3(), FLA_Gemm_nh_blk_var4(), FLA_Gemm_nh_blk_var5(), FLA_Gemm_nh_blk_var6(), FLA_Gemm_nn_blk_var1(), FLA_Gemm_nn_blk_var2(), FLA_Gemm_nn_blk_var3(), FLA_Gemm_nn_blk_var4(), FLA_Gemm_nn_blk_var5(), FLA_Gemm_nn_blk_var6(), FLA_Gemm_nt_blk_var1(), FLA_Gemm_nt_blk_var2(), FLA_Gemm_nt_blk_var3(), FLA_Gemm_nt_blk_var4(), FLA_Gemm_nt_blk_var5(), FLA_Gemm_nt_blk_var6(), FLA_Gemm_th_blk_var1(), FLA_Gemm_th_blk_var2(), FLA_Gemm_th_blk_var3(), FLA_Gemm_th_blk_var4(), FLA_Gemm_th_blk_var5(), FLA_Gemm_th_blk_var6(), FLA_Gemm_tn_blk_var1(), FLA_Gemm_tn_blk_var2(), FLA_Gemm_tn_blk_var3(), FLA_Gemm_tn_blk_var4(), FLA_Gemm_tn_blk_var5(), FLA_Gemm_tn_blk_var6(), FLA_Gemm_tt_blk_var1(), FLA_Gemm_tt_blk_var2(), FLA_Gemm_tt_blk_var3(), FLA_Gemm_tt_blk_var4(), FLA_Gemm_tt_blk_var5(), FLA_Gemm_tt_blk_var6(), FLA_Hemm_ll_blk_var1(), FLA_Hemm_ll_blk_var2(), FLA_Hemm_ll_blk_var3(), FLA_Hemm_ll_blk_var4(), FLA_Hemm_ll_blk_var5(), FLA_Hemm_ll_blk_var6(), FLA_Hemm_ll_blk_var7(), FLA_Hemm_ll_blk_var8(), FLA_Hemm_lu_blk_var1(), FLA_Hemm_lu_blk_var2(), FLA_Hemm_lu_blk_var3(), FLA_Hemm_lu_blk_var4(), FLA_Hemm_lu_blk_var5(), FLA_Hemm_lu_blk_var6(), FLA_Hemm_lu_blk_var7(), FLA_Hemm_lu_blk_var8(), FLA_Hemm_rl_blk_var1(), FLA_Hemm_rl_blk_var2(), FLA_Hemm_rl_blk_var3(), FLA_Hemm_rl_blk_var4(), FLA_Hemm_rl_blk_var5(), FLA_Hemm_rl_blk_var6(), FLA_Hemm_rl_blk_var7(), FLA_Hemm_rl_blk_var8(), FLA_Hemm_ru_blk_var1(), FLA_Hemm_ru_blk_var2(), FLA_Hemm_ru_blk_var3(), FLA_Hemm_ru_blk_var4(), FLA_Hemm_ru_blk_var5(), FLA_Hemm_ru_blk_var6(), FLA_Hemm_ru_blk_var7(), FLA_Hemm_ru_blk_var8(), FLA_Her2k_lh_blk_var1(), FLA_Her2k_lh_blk_var2(), FLA_Her2k_lh_blk_var3(), FLA_Her2k_lh_blk_var4(), FLA_Her2k_lh_blk_var5(), FLA_Her2k_lh_blk_var6(), FLA_Her2k_lh_blk_var7(), FLA_Her2k_lh_blk_var8(), FLA_Her2k_ln_blk_var1(), FLA_Her2k_ln_blk_var2(), FLA_Her2k_ln_blk_var3(), FLA_Her2k_ln_blk_var4(), FLA_Her2k_ln_blk_var5(), FLA_Her2k_ln_blk_var6(), FLA_Her2k_ln_blk_var7(), FLA_Her2k_ln_blk_var8(), FLA_Her2k_uh_blk_var1(), FLA_Her2k_uh_blk_var2(), FLA_Her2k_uh_blk_var3(), FLA_Her2k_uh_blk_var4(), FLA_Her2k_uh_blk_var5(), FLA_Her2k_uh_blk_var6(), FLA_Her2k_uh_blk_var7(), FLA_Her2k_uh_blk_var8(), FLA_Her2k_un_blk_var1(), FLA_Her2k_un_blk_var2(), FLA_Her2k_un_blk_var3(), FLA_Her2k_un_blk_var4(), FLA_Her2k_un_blk_var5(), FLA_Her2k_un_blk_var6(), FLA_Her2k_un_blk_var7(), FLA_Her2k_un_blk_var8(), FLA_Herk_lh_blk_var1(), FLA_Herk_lh_blk_var2(), FLA_Herk_lh_blk_var3(), FLA_Herk_lh_blk_var4(), FLA_Herk_ln_blk_var1(), FLA_Herk_ln_blk_var2(), FLA_Herk_ln_blk_var3(), FLA_Herk_ln_blk_var4(), FLA_Herk_uh_blk_var1(), FLA_Herk_uh_blk_var2(), FLA_Herk_uh_blk_var3(), FLA_Herk_uh_blk_var4(), FLA_Herk_un_blk_var1(), FLA_Herk_un_blk_var2(), FLA_Herk_un_blk_var3(), FLA_Herk_un_blk_var4(), FLA_LU_nopiv_blk_var1(), FLA_LU_nopiv_blk_var2(), FLA_LU_nopiv_blk_var3(), FLA_LU_nopiv_blk_var4(), FLA_LU_nopiv_blk_var5(), FLA_LU_piv_blk_var3(), FLA_LU_piv_blk_var4(), FLA_LU_piv_blk_var5(), FLA_QR_UT_UD_blk_var1(), FLA_Sylv_nn_blk_var1(), FLA_Sylv_nn_blk_var10(), FLA_Sylv_nn_blk_var11(), FLA_Sylv_nn_blk_var12(), FLA_Sylv_nn_blk_var13(), FLA_Sylv_nn_blk_var14(), FLA_Sylv_nn_blk_var15(), FLA_Sylv_nn_blk_var16(), FLA_Sylv_nn_blk_var17(), FLA_Sylv_nn_blk_var18(), FLA_Sylv_nn_blk_var2(), FLA_Sylv_nn_blk_var3(), FLA_Sylv_nn_blk_var4(), FLA_Sylv_nn_blk_var5(), FLA_Sylv_nn_blk_var6(), FLA_Sylv_nn_blk_var7(), FLA_Sylv_nn_blk_var8(), FLA_Sylv_nn_blk_var9(), FLA_Sylv_nt_blk_var1(), FLA_Sylv_nt_blk_var10(), FLA_Sylv_nt_blk_var11(), FLA_Sylv_nt_blk_var12(), FLA_Sylv_nt_blk_var13(), FLA_Sylv_nt_blk_var14(), FLA_Sylv_nt_blk_var15(), FLA_Sylv_nt_blk_var16(), FLA_Sylv_nt_blk_var17(), FLA_Sylv_nt_blk_var18(), FLA_Sylv_nt_blk_var2(), FLA_Sylv_nt_blk_var3(), FLA_Sylv_nt_blk_var4(), FLA_Sylv_nt_blk_var5(), FLA_Sylv_nt_blk_var6(), FLA_Sylv_nt_blk_var7(), FLA_Sylv_nt_blk_var8(), FLA_Sylv_nt_blk_var9(), FLA_Sylv_tn_blk_var1(), FLA_Sylv_tn_blk_var10(), FLA_Sylv_tn_blk_var11(), FLA_Sylv_tn_blk_var12(), FLA_Sylv_tn_blk_var13(), FLA_Sylv_tn_blk_var14(), FLA_Sylv_tn_blk_var15(), FLA_Sylv_tn_blk_var16(), FLA_Sylv_tn_blk_var17(), FLA_Sylv_tn_blk_var18(), FLA_Sylv_tn_blk_var2(), FLA_Sylv_tn_blk_var3(), FLA_Sylv_tn_blk_var4(), FLA_Sylv_tn_blk_var5(), FLA_Sylv_tn_blk_var6(), FLA_Sylv_tn_blk_var7(), FLA_Sylv_tn_blk_var8(), FLA_Sylv_tn_blk_var9(), FLA_Sylv_tt_blk_var1(), FLA_Sylv_tt_blk_var10(), FLA_Sylv_tt_blk_var11(), FLA_Sylv_tt_blk_var12(), FLA_Sylv_tt_blk_var13(), FLA_Sylv_tt_blk_var14(), FLA_Sylv_tt_blk_var15(), FLA_Sylv_tt_blk_var16(), FLA_Sylv_tt_blk_var17(), FLA_Sylv_tt_blk_var18(), FLA_Sylv_tt_blk_var2(), FLA_Sylv_tt_blk_var3(), FLA_Sylv_tt_blk_var4(), FLA_Sylv_tt_blk_var5(), FLA_Sylv_tt_blk_var6(), FLA_Sylv_tt_blk_var7(), FLA_Sylv_tt_blk_var8(), FLA_Sylv_tt_blk_var9(), FLA_Symm_ll_blk_var1(), FLA_Symm_ll_blk_var2(), FLA_Symm_ll_blk_var3(), FLA_Symm_ll_blk_var4(), FLA_Symm_ll_blk_var5(), FLA_Symm_ll_blk_var6(), FLA_Symm_ll_blk_var7(), FLA_Symm_ll_blk_var8(), FLA_Symm_lu_blk_var1(), FLA_Symm_lu_blk_var2(), FLA_Symm_lu_blk_var3(), FLA_Symm_lu_blk_var4(), FLA_Symm_lu_blk_var5(), FLA_Symm_lu_blk_var6(), FLA_Symm_lu_blk_var7(), FLA_Symm_lu_blk_var8(), FLA_Symm_rl_blk_var1(), FLA_Symm_rl_blk_var2(), FLA_Symm_rl_blk_var3(), FLA_Symm_rl_blk_var4(), FLA_Symm_rl_blk_var5(), FLA_Symm_rl_blk_var6(), FLA_Symm_rl_blk_var7(), FLA_Symm_rl_blk_var8(), FLA_Symm_ru_blk_var1(), FLA_Symm_ru_blk_var2(), FLA_Symm_ru_blk_var3(), FLA_Symm_ru_blk_var4(), FLA_Symm_ru_blk_var5(), FLA_Symm_ru_blk_var6(), FLA_Symm_ru_blk_var7(), FLA_Symm_ru_blk_var8(), FLA_Syr2k_ln_blk_var1(), FLA_Syr2k_ln_blk_var2(), FLA_Syr2k_ln_blk_var3(), FLA_Syr2k_ln_blk_var4(), FLA_Syr2k_ln_blk_var5(), FLA_Syr2k_ln_blk_var6(), FLA_Syr2k_ln_blk_var7(), FLA_Syr2k_ln_blk_var8(), FLA_Syr2k_lt_blk_var1(), FLA_Syr2k_lt_blk_var2(), FLA_Syr2k_lt_blk_var3(), FLA_Syr2k_lt_blk_var4(), FLA_Syr2k_lt_blk_var5(), FLA_Syr2k_lt_blk_var6(), FLA_Syr2k_lt_blk_var7(), FLA_Syr2k_lt_blk_var8(), FLA_Syr2k_un_blk_var1(), FLA_Syr2k_un_blk_var2(), FLA_Syr2k_un_blk_var3(), FLA_Syr2k_un_blk_var4(), FLA_Syr2k_un_blk_var5(), FLA_Syr2k_un_blk_var6(), FLA_Syr2k_un_blk_var7(), FLA_Syr2k_un_blk_var8(), FLA_Syr2k_ut_blk_var1(), FLA_Syr2k_ut_blk_var2(), FLA_Syr2k_ut_blk_var3(), FLA_Syr2k_ut_blk_var4(), FLA_Syr2k_ut_blk_var5(), FLA_Syr2k_ut_blk_var6(), FLA_Syr2k_ut_blk_var7(), FLA_Syr2k_ut_blk_var8(), FLA_Syrk_ln_blk_var1(), FLA_Syrk_ln_blk_var2(), FLA_Syrk_ln_blk_var3(), FLA_Syrk_ln_blk_var4(), FLA_Syrk_lt_blk_var1(), FLA_Syrk_lt_blk_var2(), FLA_Syrk_lt_blk_var3(), FLA_Syrk_lt_blk_var4(), FLA_Syrk_un_blk_var1(), FLA_Syrk_un_blk_var2(), FLA_Syrk_un_blk_var3(), FLA_Syrk_un_blk_var4(), FLA_Syrk_ut_blk_var1(), FLA_Syrk_ut_blk_var2(), FLA_Syrk_ut_blk_var3(), FLA_Syrk_ut_blk_var4(), FLA_Trinv_l_blk_var3(), FLA_Trinv_l_blk_var4(), FLA_Trinv_u_blk_var3(), FLA_Trinv_u_blk_var4(), FLA_Trmm_llh_blk_var1(), FLA_Trmm_llh_blk_var2(), FLA_Trmm_lln_blk_var1(), FLA_Trmm_lln_blk_var2(), FLA_Trmm_llt_blk_var1(), FLA_Trmm_llt_blk_var2(), FLA_Trmm_luh_blk_var1(), FLA_Trmm_luh_blk_var2(), FLA_Trmm_lun_blk_var1(), FLA_Trmm_lun_blk_var2(), FLA_Trmm_lut_blk_var1(), FLA_Trmm_lut_blk_var2(), FLA_Trmm_rlh_blk_var1(), FLA_Trmm_rlh_blk_var2(), FLA_Trmm_rln_blk_var1(), FLA_Trmm_rln_blk_var2(), FLA_Trmm_rlt_blk_var1(), FLA_Trmm_rlt_blk_var2(), FLA_Trmm_ruh_blk_var1(), FLA_Trmm_ruh_blk_var2(), FLA_Trmm_run_blk_var1(), FLA_Trmm_run_blk_var2(), FLA_Trmm_rut_blk_var1(), FLA_Trmm_rut_blk_var2(), FLA_Trsm_llh_blk_var1(), FLA_Trsm_llh_blk_var2(), FLA_Trsm_lln_blk_var1(), FLA_Trsm_lln_blk_var2(), FLA_Trsm_llt_blk_var1(), FLA_Trsm_llt_blk_var2(), FLA_Trsm_luh_blk_var1(), FLA_Trsm_luh_blk_var2(), FLA_Trsm_lun_blk_var1(), FLA_Trsm_lun_blk_var2(), FLA_Trsm_lut_blk_var1(), FLA_Trsm_lut_blk_var2(), FLA_Trsm_rlh_blk_var1(), FLA_Trsm_rlh_blk_var2(), FLA_Trsm_rln_blk_var1(), FLA_Trsm_rln_blk_var2(), FLA_Trsm_rlt_blk_var1(), FLA_Trsm_rlt_blk_var2(), FLA_Trsm_ruh_blk_var1(), FLA_Trsm_ruh_blk_var2(), FLA_Trsm_run_blk_var1(), FLA_Trsm_run_blk_var2(), FLA_Trsm_rut_blk_var1(), FLA_Trsm_rut_blk_var2(), FLA_Ttmm_l_blk_var2(), FLA_Ttmm_u_blk_var2(), and FLASH_Gemm().
00039 { 00040 FLA_Error r_val = FLA_SUCCESS; 00041 00042 if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) 00043 FLA_Gemm_internal_check( transa, transb, alpha, A, B, beta, C, cntl ); 00044 00045 if ( FLA_Cntl_matrix_type( cntl ) == FLA_HIER && 00046 FLA_Obj_elemtype( A ) == FLA_MATRIX && 00047 FLA_Cntl_variant( cntl ) == FLA_SUBPROBLEM ) 00048 { 00049 // Recurse 00050 r_val = FLA_Gemm_internal( transa, 00051 transb, 00052 alpha, 00053 *FLASH_OBJ_PTR_AT( A ), 00054 *FLASH_OBJ_PTR_AT( B ), 00055 beta, 00056 *FLASH_OBJ_PTR_AT( C ), 00057 flash_gemm_cntl_mm_op ); 00058 } 00059 else if ( FLA_Cntl_matrix_type( cntl ) == FLA_HIER && 00060 FLA_Obj_elemtype( A ) == FLA_SCALAR && 00061 FLASH_Queue_get_enabled( ) ) 00062 { 00063 // Enqueue 00064 ENQUEUE_FLASH_Gemm( transa, transb, alpha, A, B, beta, C, cntl ); 00065 } 00066 else 00067 { 00068 if ( FLA_Cntl_matrix_type( cntl ) == FLA_HIER && 00069 FLA_Obj_elemtype( A ) == FLA_SCALAR && 00070 !FLASH_Queue_get_enabled( ) ) 00071 { 00072 // Execute leaf 00073 cntl = flash_gemm_cntl_blas; 00074 } 00075 00076 // Parameter combinations 00077 if ( transa == FLA_NO_TRANSPOSE ) 00078 { 00079 if ( transb == FLA_NO_TRANSPOSE ) 00080 r_val = FLA_Gemm_nn( alpha, A, B, beta, C, cntl ); 00081 else if ( transb == FLA_TRANSPOSE ) 00082 r_val = FLA_Gemm_nt( alpha, A, B, beta, C, cntl ); 00083 else if ( transb == FLA_CONJ_TRANSPOSE ) 00084 r_val = FLA_Gemm_nh( alpha, A, B, beta, C, cntl ); 00085 } 00086 else if ( transa == FLA_TRANSPOSE ) 00087 { 00088 if ( transb == FLA_NO_TRANSPOSE ) 00089 r_val = FLA_Gemm_tn( alpha, A, B, beta, C, cntl ); 00090 else if ( transb == FLA_TRANSPOSE ) 00091 r_val = FLA_Gemm_tt( alpha, A, B, beta, C, cntl ); 00092 else if ( transb == FLA_CONJ_TRANSPOSE ) 00093 r_val = FLA_Gemm_th( alpha, A, B, beta, C, cntl ); 00094 } 00095 else if ( transa == FLA_CONJ_TRANSPOSE ) 00096 { 00097 if ( transb == FLA_NO_TRANSPOSE ) 00098 r_val = FLA_Gemm_hn( alpha, A, B, beta, C, cntl ); 00099 else if ( transb == FLA_TRANSPOSE ) 00100 r_val = FLA_Gemm_ht( alpha, A, B, beta, C, cntl ); 00101 else if ( transb == FLA_CONJ_TRANSPOSE ) 00102 r_val = FLA_Gemm_hh( alpha, A, B, beta, C, cntl ); 00103 } 00104 } 00105 00106 return r_val; 00107 }