Go to the source code of this file.
FLA_Error FLA_Syr2k_ln_blk_var1 | ( | FLA_Obj | alpha, | |
FLA_Obj | A, | |||
FLA_Obj | B, | |||
FLA_Obj | beta, | |||
FLA_Obj | C, | |||
fla_syr2k_t * | cntl | |||
) |
References FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_Obj_length(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), and FLA_Syr2k_internal().
Referenced by FLA_Syr2k_ln().
00038 { 00039 FLA_Obj AT, A0, 00040 AB, A1, 00041 A2; 00042 00043 FLA_Obj BT, B0, 00044 BB, B1, 00045 B2; 00046 00047 FLA_Obj CTL, CTR, C00, C01, C02, 00048 CBL, CBR, C10, C11, C12, 00049 C20, C21, C22; 00050 00051 dim_t b; 00052 00053 FLA_Part_2x1( A, &AT, 00054 &AB, 0, FLA_TOP ); 00055 00056 FLA_Part_2x1( B, &BT, 00057 &BB, 0, FLA_TOP ); 00058 00059 FLA_Part_2x2( C, &CTL, &CTR, 00060 &CBL, &CBR, 0, 0, FLA_TL ); 00061 00062 while ( FLA_Obj_length( AT ) < FLA_Obj_length( A ) ){ 00063 00064 b = FLA_Determine_blocksize( AB, FLA_BOTTOM, FLA_Cntl_blocksize( cntl ) ); 00065 00066 FLA_Repart_2x1_to_3x1( AT, &A0, 00067 /* ** */ /* ** */ 00068 &A1, 00069 AB, &A2, b, FLA_BOTTOM ); 00070 00071 FLA_Repart_2x1_to_3x1( BT, &B0, 00072 /* ** */ /* ** */ 00073 &B1, 00074 BB, &B2, b, FLA_BOTTOM ); 00075 00076 FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02, 00077 /* ************* */ /* ******************** */ 00078 &C10, /**/ &C11, &C12, 00079 CBL, /**/ CBR, &C20, /**/ &C21, &C22, 00080 b, b, FLA_BR ); 00081 00082 /*------------------------------------------------------------*/ 00083 00084 /* C10 = C10 + A1 * B0' */ 00085 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, 00086 alpha, A1, B0, beta, C10, 00087 FLA_Cntl_sub_gemm1( cntl ) ); 00088 00089 /* C10 = C10 + B1 * A0' */ 00090 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, 00091 alpha, B1, A0, FLA_ONE, C10, 00092 FLA_Cntl_sub_gemm2( cntl ) ); 00093 00094 /* C11 = C11 + A1 * B1' + B1 * A1' */ 00095 FLA_Syr2k_internal( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, 00096 alpha, A1, B1, beta, C11, 00097 FLA_Cntl_sub_syr2k( cntl ) ); 00098 00099 /*------------------------------------------------------------*/ 00100 00101 FLA_Cont_with_3x1_to_2x1( &AT, A0, 00102 A1, 00103 /* ** */ /* ** */ 00104 &AB, A2, FLA_TOP ); 00105 00106 FLA_Cont_with_3x1_to_2x1( &BT, B0, 00107 B1, 00108 /* ** */ /* ** */ 00109 &BB, B2, FLA_TOP ); 00110 00111 FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02, 00112 C10, C11, /**/ C12, 00113 /* ************** */ /* ****************** */ 00114 &CBL, /**/ &CBR, C20, C21, /**/ C22, 00115 FLA_TL ); 00116 00117 } 00118 00119 return FLA_SUCCESS; 00120 }
FLA_Error FLA_Syr2k_ln_blk_var10 | ( | FLA_Obj | alpha, | |
FLA_Obj | A, | |||
FLA_Obj | B, | |||
FLA_Obj | beta, | |||
FLA_Obj | C, | |||
fla_syr2k_t * | cntl | |||
) |
References FLA_Cont_with_1x3_to_1x2(), FLA_Determine_blocksize(), FLA_Obj_width(), FLA_Part_1x2(), FLA_Repart_1x2_to_1x3(), and FLA_Syr2k_internal().
Referenced by FLA_Syr2k_ln().
00038 { 00039 FLA_Obj AL, AR, A0, A1, A2; 00040 00041 FLA_Obj BL, BR, B0, B1, B2; 00042 00043 dim_t b; 00044 00045 FLA_Part_1x2( A, &AL, &AR, 0, FLA_RIGHT ); 00046 00047 FLA_Part_1x2( B, &BL, &BR, 0, FLA_RIGHT ); 00048 00049 while ( FLA_Obj_width( AR ) < FLA_Obj_width( A ) ){ 00050 00051 b = FLA_Determine_blocksize( AL, FLA_LEFT, FLA_Cntl_blocksize( cntl ) ); 00052 00053 FLA_Repart_1x2_to_1x3( AL, /**/ AR, &A0, &A1, /**/ &A2, 00054 b, FLA_LEFT ); 00055 00056 FLA_Repart_1x2_to_1x3( BL, /**/ BR, &B0, &B1, /**/ &B2, 00057 b, FLA_LEFT ); 00058 00059 /*------------------------------------------------------------*/ 00060 00061 /* C = C + A1 * B1' + B1 * A1' */ 00062 FLA_Syr2k_internal( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, 00063 alpha, A1, B1, beta, C, 00064 FLA_Cntl_sub_syr2k( cntl ) ); 00065 00066 /*------------------------------------------------------------*/ 00067 00068 FLA_Cont_with_1x3_to_1x2( &AL, /**/ &AR, A0, /**/ A1, A2, 00069 FLA_RIGHT ); 00070 00071 FLA_Cont_with_1x3_to_1x2( &BL, /**/ &BR, B0, /**/ B1, B2, 00072 FLA_RIGHT ); 00073 00074 } 00075 00076 return FLA_SUCCESS; 00077 }
FLA_Error FLA_Syr2k_ln_blk_var2 | ( | FLA_Obj | alpha, | |
FLA_Obj | A, | |||
FLA_Obj | B, | |||
FLA_Obj | beta, | |||
FLA_Obj | C, | |||
fla_syr2k_t * | cntl | |||
) |
References FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_Obj_length(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), and FLA_Syr2k_internal().
Referenced by FLA_Syr2k_ln().
00038 { 00039 FLA_Obj AT, A0, 00040 AB, A1, 00041 A2; 00042 00043 FLA_Obj BT, B0, 00044 BB, B1, 00045 B2; 00046 00047 FLA_Obj CTL, CTR, C00, C01, C02, 00048 CBL, CBR, C10, C11, C12, 00049 C20, C21, C22; 00050 00051 dim_t b; 00052 00053 FLA_Part_2x1( A, &AT, 00054 &AB, 0, FLA_TOP ); 00055 00056 FLA_Part_2x1( B, &BT, 00057 &BB, 0, FLA_TOP ); 00058 00059 FLA_Part_2x2( C, &CTL, &CTR, 00060 &CBL, &CBR, 0, 0, FLA_TL ); 00061 00062 while ( FLA_Obj_length( AT ) < FLA_Obj_length( A ) ){ 00063 00064 b = FLA_Determine_blocksize( AB, FLA_BOTTOM, FLA_Cntl_blocksize( cntl ) ); 00065 00066 FLA_Repart_2x1_to_3x1( AT, &A0, 00067 /* ** */ /* ** */ 00068 &A1, 00069 AB, &A2, b, FLA_BOTTOM ); 00070 00071 FLA_Repart_2x1_to_3x1( BT, &B0, 00072 /* ** */ /* ** */ 00073 &B1, 00074 BB, &B2, b, FLA_BOTTOM ); 00075 00076 FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02, 00077 /* ************* */ /* ******************** */ 00078 &C10, /**/ &C11, &C12, 00079 CBL, /**/ CBR, &C20, /**/ &C21, &C22, 00080 b, b, FLA_BR ); 00081 00082 /*------------------------------------------------------------*/ 00083 00084 /* C10 = C10 + B1 * A0' */ 00085 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, 00086 alpha, B1, A0, beta, C10, 00087 FLA_Cntl_sub_gemm1( cntl ) ); 00088 00089 /* C21 = C21 + A2 * B1' */ 00090 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, 00091 alpha, A2, B1, FLA_ONE, C21, 00092 FLA_Cntl_sub_gemm2( cntl ) ); 00093 00094 /* C11 = C11 + A1 * B1' + B1 * A1' */ 00095 FLA_Syr2k_internal( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, 00096 alpha, A1, B1, beta, C11, 00097 FLA_Cntl_sub_syr2k( cntl ) ); 00098 00099 /*------------------------------------------------------------*/ 00100 00101 FLA_Cont_with_3x1_to_2x1( &AT, A0, 00102 A1, 00103 /* ** */ /* ** */ 00104 &AB, A2, FLA_TOP ); 00105 00106 FLA_Cont_with_3x1_to_2x1( &BT, B0, 00107 B1, 00108 /* ** */ /* ** */ 00109 &BB, B2, FLA_TOP ); 00110 00111 FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02, 00112 C10, C11, /**/ C12, 00113 /* ************** */ /* ****************** */ 00114 &CBL, /**/ &CBR, C20, C21, /**/ C22, 00115 FLA_TL ); 00116 00117 } 00118 00119 return FLA_SUCCESS; 00120 }
FLA_Error FLA_Syr2k_ln_blk_var3 | ( | FLA_Obj | alpha, | |
FLA_Obj | A, | |||
FLA_Obj | B, | |||
FLA_Obj | beta, | |||
FLA_Obj | C, | |||
fla_syr2k_t * | cntl | |||
) |
References FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_Obj_length(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), and FLA_Syr2k_internal().
Referenced by FLA_Syr2k_ln().
00036 { 00037 FLA_Obj AT, A0, 00038 AB, A1, 00039 A2; 00040 00041 FLA_Obj BT, B0, 00042 BB, B1, 00043 B2; 00044 00045 FLA_Obj CTL, CTR, C00, C01, C02, 00046 CBL, CBR, C10, C11, C12, 00047 C20, C21, C22; 00048 00049 dim_t b; 00050 00051 FLA_Part_2x1( A, &AT, 00052 &AB, 0, FLA_TOP ); 00053 00054 FLA_Part_2x1( B, &BT, 00055 &BB, 0, FLA_TOP ); 00056 00057 FLA_Part_2x2( C, &CTL, &CTR, 00058 &CBL, &CBR, 0, 0, FLA_TL ); 00059 00060 while ( FLA_Obj_length( AT ) < FLA_Obj_length( A ) ){ 00061 00062 b = FLA_Determine_blocksize( AB, FLA_BOTTOM, FLA_Cntl_blocksize( cntl ) ); 00063 00064 FLA_Repart_2x1_to_3x1( AT, &A0, 00065 /* ** */ /* ** */ 00066 &A1, 00067 AB, &A2, b, FLA_BOTTOM ); 00068 00069 FLA_Repart_2x1_to_3x1( BT, &B0, 00070 /* ** */ /* ** */ 00071 &B1, 00072 BB, &B2, b, FLA_BOTTOM ); 00073 00074 FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02, 00075 /* ************* */ /* ******************** */ 00076 &C10, /**/ &C11, &C12, 00077 CBL, /**/ CBR, &C20, /**/ &C21, &C22, 00078 b, b, FLA_BR ); 00079 00080 /*------------------------------------------------------------*/ 00081 00082 /* C10 = C10 + A1 * B0' */ 00083 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, 00084 alpha, A1, B0, beta, C10, 00085 FLA_Cntl_sub_gemm1( cntl ) ); 00086 00087 /* C21 = C21 + B2 * A1' */ 00088 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, 00089 alpha, B2, A1, FLA_ONE, C21, 00090 FLA_Cntl_sub_gemm2( cntl ) ); 00091 00092 /* C11 = C11 + A1 * B1' + B1 * A1' */ 00093 FLA_Syr2k_internal( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, 00094 alpha, A1, B1, beta, C11, 00095 FLA_Cntl_sub_syr2k( cntl ) ); 00096 00097 /*------------------------------------------------------------*/ 00098 00099 FLA_Cont_with_3x1_to_2x1( &AT, A0, 00100 A1, 00101 /* ** */ /* ** */ 00102 &AB, A2, FLA_TOP ); 00103 00104 FLA_Cont_with_3x1_to_2x1( &BT, B0, 00105 B1, 00106 /* ** */ /* ** */ 00107 &BB, B2, FLA_TOP ); 00108 00109 FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02, 00110 C10, C11, /**/ C12, 00111 /* ************** */ /* ****************** */ 00112 &CBL, /**/ &CBR, C20, C21, /**/ C22, 00113 FLA_TL ); 00114 00115 } 00116 00117 return FLA_SUCCESS; 00118 }
FLA_Error FLA_Syr2k_ln_blk_var4 | ( | FLA_Obj | alpha, | |
FLA_Obj | A, | |||
FLA_Obj | B, | |||
FLA_Obj | beta, | |||
FLA_Obj | C, | |||
fla_syr2k_t * | cntl | |||
) |
References FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_Obj_length(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), and FLA_Syr2k_internal().
Referenced by FLA_Syr2k_ln().
00038 { 00039 FLA_Obj AT, A0, 00040 AB, A1, 00041 A2; 00042 00043 FLA_Obj BT, B0, 00044 BB, B1, 00045 B2; 00046 00047 FLA_Obj CTL, CTR, C00, C01, C02, 00048 CBL, CBR, C10, C11, C12, 00049 C20, C21, C22; 00050 00051 dim_t b; 00052 00053 FLA_Part_2x1( A, &AT, 00054 &AB, 0, FLA_TOP ); 00055 00056 FLA_Part_2x1( B, &BT, 00057 &BB, 0, FLA_TOP ); 00058 00059 FLA_Part_2x2( C, &CTL, &CTR, 00060 &CBL, &CBR, 0, 0, FLA_TL ); 00061 00062 while ( FLA_Obj_length( AT ) < FLA_Obj_length( A ) ){ 00063 00064 b = FLA_Determine_blocksize( AB, FLA_BOTTOM, FLA_Cntl_blocksize( cntl ) ); 00065 00066 FLA_Repart_2x1_to_3x1( AT, &A0, 00067 /* ** */ /* ** */ 00068 &A1, 00069 AB, &A2, b, FLA_BOTTOM ); 00070 00071 FLA_Repart_2x1_to_3x1( BT, &B0, 00072 /* ** */ /* ** */ 00073 &B1, 00074 BB, &B2, b, FLA_BOTTOM ); 00075 00076 FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02, 00077 /* ************* */ /* ******************** */ 00078 &C10, /**/ &C11, &C12, 00079 CBL, /**/ CBR, &C20, /**/ &C21, &C22, 00080 b, b, FLA_BR ); 00081 00082 /*------------------------------------------------------------*/ 00083 00084 /* C21 = C21 + A2 * B1' */ 00085 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, 00086 alpha, A2, B1, beta, C21, 00087 FLA_Cntl_sub_gemm1( cntl ) ); 00088 00089 /* C21 = C21 + B2 * A1' */ 00090 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, 00091 alpha, B2, A1, FLA_ONE, C21, 00092 FLA_Cntl_sub_gemm2( cntl ) ); 00093 00094 /* C11 = C11 + A1 * B1' + B1 * A1' */ 00095 FLA_Syr2k_internal( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, 00096 alpha, A1, B1, beta, C11, 00097 FLA_Cntl_sub_syr2k( cntl ) ); 00098 00099 /*------------------------------------------------------------*/ 00100 00101 FLA_Cont_with_3x1_to_2x1( &AT, A0, 00102 A1, 00103 /* ** */ /* ** */ 00104 &AB, A2, FLA_TOP ); 00105 00106 FLA_Cont_with_3x1_to_2x1( &BT, B0, 00107 B1, 00108 /* ** */ /* ** */ 00109 &BB, B2, FLA_TOP ); 00110 00111 FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02, 00112 C10, C11, /**/ C12, 00113 /* ************** */ /* ****************** */ 00114 &CBL, /**/ &CBR, C20, C21, /**/ C22, 00115 FLA_TL ); 00116 00117 } 00118 00119 return FLA_SUCCESS; 00120 }
FLA_Error FLA_Syr2k_ln_blk_var5 | ( | FLA_Obj | alpha, | |
FLA_Obj | A, | |||
FLA_Obj | B, | |||
FLA_Obj | beta, | |||
FLA_Obj | C, | |||
fla_syr2k_t * | cntl | |||
) |
References FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_Obj_length(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), and FLA_Syr2k_internal().
Referenced by FLA_Syr2k_ln().
00038 { 00039 FLA_Obj AT, A0, 00040 AB, A1, 00041 A2; 00042 00043 FLA_Obj BT, B0, 00044 BB, B1, 00045 B2; 00046 00047 FLA_Obj CTL, CTR, C00, C01, C02, 00048 CBL, CBR, C10, C11, C12, 00049 C20, C21, C22; 00050 00051 dim_t b; 00052 00053 FLA_Part_2x1( A, &AT, 00054 &AB, 0, FLA_BOTTOM ); 00055 00056 FLA_Part_2x1( B, &BT, 00057 &BB, 0, FLA_BOTTOM ); 00058 00059 FLA_Part_2x2( C, &CTL, &CTR, 00060 &CBL, &CBR, 0, 0, FLA_BR ); 00061 00062 while ( FLA_Obj_length( AB ) < FLA_Obj_length( A ) ){ 00063 00064 b = FLA_Determine_blocksize( AT, FLA_TOP, FLA_Cntl_blocksize( cntl ) ); 00065 00066 FLA_Repart_2x1_to_3x1( AT, &A0, 00067 &A1, 00068 /* ** */ /* ** */ 00069 AB, &A2, b, FLA_TOP ); 00070 00071 FLA_Repart_2x1_to_3x1( BT, &B0, 00072 &B1, 00073 /* ** */ /* ** */ 00074 BB, &B2, b, FLA_TOP ); 00075 00076 FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, &C01, /**/ &C02, 00077 &C10, &C11, /**/ &C12, 00078 /* ************* */ /* ******************** */ 00079 CBL, /**/ CBR, &C20, &C21, /**/ &C22, 00080 b, b, FLA_TL ); 00081 00082 /*------------------------------------------------------------*/ 00083 00084 /* C21 = C21 + A2 * B1' */ 00085 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, 00086 alpha, A2, B1, beta, C21, 00087 FLA_Cntl_sub_gemm1( cntl ) ); 00088 00089 /* C21 = C21 + B2 * A1' */ 00090 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, 00091 alpha, B2, A1, FLA_ONE, C21, 00092 FLA_Cntl_sub_gemm2( cntl ) ); 00093 00094 /* C11 = C11 + A1 * B1' + B1 * A1' */ 00095 FLA_Syr2k_internal( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, 00096 alpha, A1, B1, beta, C11, 00097 FLA_Cntl_sub_syr2k( cntl ) ); 00098 00099 /*------------------------------------------------------------*/ 00100 00101 FLA_Cont_with_3x1_to_2x1( &AT, A0, 00102 /* ** */ /* ** */ 00103 A1, 00104 &AB, A2, FLA_BOTTOM ); 00105 00106 FLA_Cont_with_3x1_to_2x1( &BT, B0, 00107 /* ** */ /* ** */ 00108 B1, 00109 &BB, B2, FLA_BOTTOM ); 00110 00111 FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, /**/ C01, C02, 00112 /* ************** */ /* ****************** */ 00113 C10, /**/ C11, C12, 00114 &CBL, /**/ &CBR, C20, /**/ C21, C22, 00115 FLA_BR ); 00116 00117 } 00118 00119 return FLA_SUCCESS; 00120 }
FLA_Error FLA_Syr2k_ln_blk_var6 | ( | FLA_Obj | alpha, | |
FLA_Obj | A, | |||
FLA_Obj | B, | |||
FLA_Obj | beta, | |||
FLA_Obj | C, | |||
fla_syr2k_t * | cntl | |||
) |
References FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_Obj_length(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), and FLA_Syr2k_internal().
Referenced by FLA_Syr2k_ln().
00038 { 00039 FLA_Obj AT, A0, 00040 AB, A1, 00041 A2; 00042 00043 FLA_Obj BT, B0, 00044 BB, B1, 00045 B2; 00046 00047 FLA_Obj CTL, CTR, C00, C01, C02, 00048 CBL, CBR, C10, C11, C12, 00049 C20, C21, C22; 00050 00051 dim_t b; 00052 00053 FLA_Part_2x1( A, &AT, 00054 &AB, 0, FLA_BOTTOM ); 00055 00056 FLA_Part_2x1( B, &BT, 00057 &BB, 0, FLA_BOTTOM ); 00058 00059 FLA_Part_2x2( C, &CTL, &CTR, 00060 &CBL, &CBR, 0, 0, FLA_BR ); 00061 00062 while ( FLA_Obj_length( AB ) < FLA_Obj_length( A ) ){ 00063 00064 b = FLA_Determine_blocksize( AT, FLA_TOP, FLA_Cntl_blocksize( cntl ) ); 00065 00066 FLA_Repart_2x1_to_3x1( AT, &A0, 00067 &A1, 00068 /* ** */ /* ** */ 00069 AB, &A2, b, FLA_TOP ); 00070 00071 FLA_Repart_2x1_to_3x1( BT, &B0, 00072 &B1, 00073 /* ** */ /* ** */ 00074 BB, &B2, b, FLA_TOP ); 00075 00076 FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, &C01, /**/ &C02, 00077 &C10, &C11, /**/ &C12, 00078 /* ************* */ /* ******************** */ 00079 CBL, /**/ CBR, &C20, &C21, /**/ &C22, 00080 b, b, FLA_TL ); 00081 00082 /*------------------------------------------------------------*/ 00083 00084 /* C10 = C10 + A1 * B0' */ 00085 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, 00086 alpha, A1, B0, beta, C10, 00087 FLA_Cntl_sub_gemm1( cntl ) ); 00088 00089 /* C21 = C21 + B2 * A1' */ 00090 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, 00091 alpha, B2, A1, FLA_ONE, C21, 00092 FLA_Cntl_sub_gemm2( cntl ) ); 00093 00094 /* C11 = C11 + A1 * B1' + B1 * A1' */ 00095 FLA_Syr2k_internal( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, 00096 alpha, A1, B1, beta, C11, 00097 FLA_Cntl_sub_syr2k( cntl ) ); 00098 00099 /*------------------------------------------------------------*/ 00100 00101 FLA_Cont_with_3x1_to_2x1( &AT, A0, 00102 /* ** */ /* ** */ 00103 A1, 00104 &AB, A2, FLA_BOTTOM ); 00105 00106 FLA_Cont_with_3x1_to_2x1( &BT, B0, 00107 /* ** */ /* ** */ 00108 B1, 00109 &BB, B2, FLA_BOTTOM ); 00110 00111 FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, /**/ C01, C02, 00112 /* ************** */ /* ****************** */ 00113 C10, /**/ C11, C12, 00114 &CBL, /**/ &CBR, C20, /**/ C21, C22, 00115 FLA_BR ); 00116 00117 } 00118 00119 return FLA_SUCCESS; 00120 }
FLA_Error FLA_Syr2k_ln_blk_var7 | ( | FLA_Obj | alpha, | |
FLA_Obj | A, | |||
FLA_Obj | B, | |||
FLA_Obj | beta, | |||
FLA_Obj | C, | |||
fla_syr2k_t * | cntl | |||
) |
References FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_Obj_length(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), and FLA_Syr2k_internal().
Referenced by FLA_Syr2k_ln().
00038 { 00039 FLA_Obj AT, A0, 00040 AB, A1, 00041 A2; 00042 00043 FLA_Obj BT, B0, 00044 BB, B1, 00045 B2; 00046 00047 FLA_Obj CTL, CTR, C00, C01, C02, 00048 CBL, CBR, C10, C11, C12, 00049 C20, C21, C22; 00050 00051 dim_t b; 00052 00053 FLA_Part_2x1( A, &AT, 00054 &AB, 0, FLA_BOTTOM ); 00055 00056 FLA_Part_2x1( B, &BT, 00057 &BB, 0, FLA_BOTTOM ); 00058 00059 FLA_Part_2x2( C, &CTL, &CTR, 00060 &CBL, &CBR, 0, 0, FLA_BR ); 00061 00062 while ( FLA_Obj_length( AB ) < FLA_Obj_length( A ) ){ 00063 00064 b = FLA_Determine_blocksize( AT, FLA_TOP, FLA_Cntl_blocksize( cntl ) ); 00065 00066 FLA_Repart_2x1_to_3x1( AT, &A0, 00067 &A1, 00068 /* ** */ /* ** */ 00069 AB, &A2, b, FLA_TOP ); 00070 00071 FLA_Repart_2x1_to_3x1( BT, &B0, 00072 &B1, 00073 /* ** */ /* ** */ 00074 BB, &B2, b, FLA_TOP ); 00075 00076 FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, &C01, /**/ &C02, 00077 &C10, &C11, /**/ &C12, 00078 /* ************* */ /* ******************** */ 00079 CBL, /**/ CBR, &C20, &C21, /**/ &C22, 00080 b, b, FLA_TL ); 00081 00082 /*------------------------------------------------------------*/ 00083 00084 /* C10 = C10 + B1 * A0' */ 00085 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, 00086 alpha, B1, A0, beta, C10, 00087 FLA_Cntl_sub_gemm1( cntl ) ); 00088 00089 /* C21 = C21 + A2 * B1' */ 00090 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, 00091 alpha, A2, B1, FLA_ONE, C21, 00092 FLA_Cntl_sub_gemm2( cntl ) ); 00093 00094 /* C11 = C11 + A1 * B1' + B1 * A1' */ 00095 FLA_Syr2k_internal( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, 00096 alpha, A1, B1, beta, C11, 00097 FLA_Cntl_sub_syr2k( cntl ) ); 00098 00099 /*------------------------------------------------------------*/ 00100 00101 FLA_Cont_with_3x1_to_2x1( &AT, A0, 00102 /* ** */ /* ** */ 00103 A1, 00104 &AB, A2, FLA_BOTTOM ); 00105 00106 FLA_Cont_with_3x1_to_2x1( &BT, B0, 00107 /* ** */ /* ** */ 00108 B1, 00109 &BB, B2, FLA_BOTTOM ); 00110 00111 FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, /**/ C01, C02, 00112 /* ************** */ /* ****************** */ 00113 C10, /**/ C11, C12, 00114 &CBL, /**/ &CBR, C20, /**/ C21, C22, 00115 FLA_BR ); 00116 00117 } 00118 00119 return FLA_SUCCESS; 00120 }
FLA_Error FLA_Syr2k_ln_blk_var8 | ( | FLA_Obj | alpha, | |
FLA_Obj | A, | |||
FLA_Obj | B, | |||
FLA_Obj | beta, | |||
FLA_Obj | C, | |||
fla_syr2k_t * | cntl | |||
) |
References FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_Obj_length(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), and FLA_Syr2k_internal().
Referenced by FLA_Syr2k_ln().
00038 { 00039 FLA_Obj AT, A0, 00040 AB, A1, 00041 A2; 00042 00043 FLA_Obj BT, B0, 00044 BB, B1, 00045 B2; 00046 00047 FLA_Obj CTL, CTR, C00, C01, C02, 00048 CBL, CBR, C10, C11, C12, 00049 C20, C21, C22; 00050 00051 dim_t b; 00052 00053 FLA_Part_2x1( A, &AT, 00054 &AB, 0, FLA_BOTTOM ); 00055 00056 FLA_Part_2x1( B, &BT, 00057 &BB, 0, FLA_BOTTOM ); 00058 00059 FLA_Part_2x2( C, &CTL, &CTR, 00060 &CBL, &CBR, 0, 0, FLA_BR ); 00061 00062 while ( FLA_Obj_length( AB ) < FLA_Obj_length( A ) ){ 00063 00064 b = FLA_Determine_blocksize( AT, FLA_TOP, FLA_Cntl_blocksize( cntl ) ); 00065 00066 FLA_Repart_2x1_to_3x1( AT, &A0, 00067 &A1, 00068 /* ** */ /* ** */ 00069 AB, &A2, b, FLA_TOP ); 00070 00071 FLA_Repart_2x1_to_3x1( BT, &B0, 00072 &B1, 00073 /* ** */ /* ** */ 00074 BB, &B2, b, FLA_TOP ); 00075 00076 FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, &C01, /**/ &C02, 00077 &C10, &C11, /**/ &C12, 00078 /* ************* */ /* ******************** */ 00079 CBL, /**/ CBR, &C20, &C21, /**/ &C22, 00080 b, b, FLA_TL ); 00081 00082 /*------------------------------------------------------------*/ 00083 00084 /* C10 = C10 + A1 * B0' */ 00085 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, 00086 alpha, A1, B0, beta, C10, 00087 FLA_Cntl_sub_gemm1( cntl ) ); 00088 00089 /* C10 = C10 + B1 * A0' */ 00090 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, 00091 alpha, B1, A0, FLA_ONE, C10, 00092 FLA_Cntl_sub_gemm2( cntl ) ); 00093 00094 /* C11 = C11 + A1 * B1' + B1 * A1' */ 00095 FLA_Syr2k_internal( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, 00096 alpha, A1, B1, beta, C11, 00097 FLA_Cntl_sub_syr2k( cntl ) ); 00098 00099 /*------------------------------------------------------------*/ 00100 00101 FLA_Cont_with_3x1_to_2x1( &AT, A0, 00102 /* ** */ /* ** */ 00103 A1, 00104 &AB, A2, FLA_BOTTOM ); 00105 00106 FLA_Cont_with_3x1_to_2x1( &BT, B0, 00107 /* ** */ /* ** */ 00108 B1, 00109 &BB, B2, FLA_BOTTOM ); 00110 00111 FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, /**/ C01, C02, 00112 /* ************** */ /* ****************** */ 00113 C10, /**/ C11, C12, 00114 &CBL, /**/ &CBR, C20, /**/ C21, C22, 00115 FLA_BR ); 00116 00117 } 00118 00119 return FLA_SUCCESS; 00120 }
FLA_Error FLA_Syr2k_ln_blk_var9 | ( | FLA_Obj | alpha, | |
FLA_Obj | A, | |||
FLA_Obj | B, | |||
FLA_Obj | beta, | |||
FLA_Obj | C, | |||
fla_syr2k_t * | cntl | |||
) |
References FLA_Cont_with_1x3_to_1x2(), FLA_Determine_blocksize(), FLA_Obj_width(), FLA_Part_1x2(), FLA_Repart_1x2_to_1x3(), and FLA_Syr2k_internal().
Referenced by FLA_Syr2k_ln().
00036 { 00037 FLA_Obj AL, AR, A0, A1, A2; 00038 00039 FLA_Obj BL, BR, B0, B1, B2; 00040 00041 dim_t b; 00042 00043 FLA_Part_1x2( A, &AL, &AR, 0, FLA_LEFT ); 00044 00045 FLA_Part_1x2( B, &BL, &BR, 0, FLA_LEFT ); 00046 00047 while ( FLA_Obj_width( AL ) < FLA_Obj_width( A ) ){ 00048 00049 b = FLA_Determine_blocksize( AR, FLA_RIGHT, FLA_Cntl_blocksize( cntl ) ); 00050 00051 FLA_Repart_1x2_to_1x3( AL, /**/ AR, &A0, /**/ &A1, &A2, 00052 b, FLA_RIGHT ); 00053 00054 FLA_Repart_1x2_to_1x3( BL, /**/ BR, &B0, /**/ &B1, &B2, 00055 b, FLA_RIGHT ); 00056 00057 /*------------------------------------------------------------*/ 00058 00059 /* C = C + A1 * B1' + B1 * A1' */ 00060 FLA_Syr2k_internal( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, 00061 alpha, A1, B1, beta, C, 00062 FLA_Cntl_sub_syr2k( cntl ) ); 00063 00064 /*------------------------------------------------------------*/ 00065 00066 FLA_Cont_with_1x3_to_1x2( &AL, /**/ &AR, A0, A1, /**/ A2, 00067 FLA_LEFT ); 00068 00069 FLA_Cont_with_1x3_to_1x2( &BL, /**/ &BR, B0, B1, /**/ B2, 00070 FLA_LEFT ); 00071 00072 } 00073 00074 return FLA_SUCCESS; 00075 }
References FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Dot2s_external(), FLA_Gemv_external(), FLA_Obj_length(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), and FLA_Repart_2x2_to_3x3().
00038 { 00039 FLA_Obj AT, A0, 00040 AB, a1t, 00041 A2; 00042 00043 FLA_Obj BT, B0, 00044 BB, b1t, 00045 B2; 00046 00047 FLA_Obj CTL, CTR, C00, c01, C02, 00048 CBL, CBR, c10t, gamma11, c12t, 00049 C20, c21, C22; 00050 00051 00052 FLA_Part_2x1( A, &AT, 00053 &AB, 0, FLA_TOP ); 00054 00055 FLA_Part_2x1( B, &BT, 00056 &BB, 0, FLA_TOP ); 00057 00058 FLA_Part_2x2( C, &CTL, &CTR, 00059 &CBL, &CBR, 0, 0, FLA_TL ); 00060 00061 while ( FLA_Obj_length( AT ) < FLA_Obj_length( A ) ){ 00062 00063 00064 FLA_Repart_2x1_to_3x1( AT, &A0, 00065 /* ** */ /* ** */ 00066 &a1t, 00067 AB, &A2, 1, FLA_BOTTOM ); 00068 00069 FLA_Repart_2x1_to_3x1( BT, &B0, 00070 /* ** */ /* ** */ 00071 &b1t, 00072 BB, &B2, 1, FLA_BOTTOM ); 00073 00074 FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &c01, &C02, 00075 /* ************* */ /* ************************** */ 00076 &c10t, /**/ &gamma11, &c12t, 00077 CBL, /**/ CBR, &C20, /**/ &c21, &C22, 00078 1, 1, FLA_BR ); 00079 00080 /*------------------------------------------------------------*/ 00081 00082 /* c10t = c10t + a1t * B0' */ 00083 FLA_Gemv_external( FLA_NO_TRANSPOSE, alpha, B0, a1t, beta, c10t ); 00084 00085 /* c10t = c10t + b1t * A0' */ 00086 FLA_Gemv_external( FLA_NO_TRANSPOSE, alpha, A0, b1t, FLA_ONE, c10t ); 00087 00088 /* gamma11 = gamma11 + a1t * b1t' + b1t * a1t' */ 00089 FLA_Dot2s_external( alpha, a1t, b1t, beta, gamma11 ); 00090 00091 /*------------------------------------------------------------*/ 00092 00093 FLA_Cont_with_3x1_to_2x1( &AT, A0, 00094 a1t, 00095 /* ** */ /* ** */ 00096 &AB, A2, FLA_TOP ); 00097 00098 FLA_Cont_with_3x1_to_2x1( &BT, B0, 00099 b1t, 00100 /* ** */ /* ** */ 00101 &BB, B2, FLA_TOP ); 00102 00103 FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, c01, /**/ C02, 00104 c10t, gamma11, /**/ c12t, 00105 /* ************** */ /* ************************ */ 00106 &CBL, /**/ &CBR, C20, c21, /**/ C22, 00107 FLA_TL ); 00108 00109 } 00110 00111 return FLA_SUCCESS; 00112 }
References FLA_Cont_with_1x3_to_1x2(), FLA_Obj_width(), FLA_Part_1x2(), FLA_Repart_1x2_to_1x3(), FLA_Scalr_external(), and FLA_Syr2_external().
00038 { 00039 FLA_Obj AL, AR, A0, a1t, A2; 00040 00041 FLA_Obj BL, BR, B0, b1t, B2; 00042 00043 FLA_Scalr_external( FLA_LOWER_TRIANGULAR, beta, C ); 00044 00045 FLA_Part_1x2( A, &AL, &AR, 0, FLA_RIGHT ); 00046 00047 FLA_Part_1x2( B, &BL, &BR, 0, FLA_RIGHT ); 00048 00049 while ( FLA_Obj_width( AR ) < FLA_Obj_width( A ) ){ 00050 00051 00052 FLA_Repart_1x2_to_1x3( AL, /**/ AR, &A0, &a1t, /**/ &A2, 00053 1, FLA_LEFT ); 00054 00055 FLA_Repart_1x2_to_1x3( BL, /**/ BR, &B0, &b1t, /**/ &B2, 00056 1, FLA_LEFT ); 00057 00058 /*------------------------------------------------------------*/ 00059 00060 /* C = C + a1t * b1t' + b1t * a1t' */ 00061 FLA_Syr2_external( FLA_LOWER_TRIANGULAR, alpha, a1t, b1t, C ); 00062 00063 /*------------------------------------------------------------*/ 00064 00065 FLA_Cont_with_1x3_to_1x2( &AL, /**/ &AR, A0, /**/ a1t, A2, 00066 FLA_RIGHT ); 00067 00068 FLA_Cont_with_1x3_to_1x2( &BL, /**/ &BR, B0, /**/ b1t, B2, 00069 FLA_RIGHT ); 00070 00071 } 00072 00073 return FLA_SUCCESS; 00074 }
References FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Dot2s_external(), FLA_Gemv_external(), FLA_Obj_length(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), and FLA_Repart_2x2_to_3x3().
00038 { 00039 FLA_Obj AT, A0, 00040 AB, a1t, 00041 A2; 00042 00043 FLA_Obj BT, B0, 00044 BB, b1t, 00045 B2; 00046 00047 FLA_Obj CTL, CTR, C00, c01, C02, 00048 CBL, CBR, c10t, gamma11, c12t, 00049 C20, c21, C22; 00050 00051 00052 FLA_Part_2x1( A, &AT, 00053 &AB, 0, FLA_TOP ); 00054 00055 FLA_Part_2x1( B, &BT, 00056 &BB, 0, FLA_TOP ); 00057 00058 FLA_Part_2x2( C, &CTL, &CTR, 00059 &CBL, &CBR, 0, 0, FLA_TL ); 00060 00061 while ( FLA_Obj_length( AT ) < FLA_Obj_length( A ) ){ 00062 00063 00064 FLA_Repart_2x1_to_3x1( AT, &A0, 00065 /* ** */ /* ** */ 00066 &a1t, 00067 AB, &A2, 1, FLA_BOTTOM ); 00068 00069 FLA_Repart_2x1_to_3x1( BT, &B0, 00070 /* ** */ /* ** */ 00071 &b1t, 00072 BB, &B2, 1, FLA_BOTTOM ); 00073 00074 FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &c01, &C02, 00075 /* ************* */ /* ************************** */ 00076 &c10t, /**/ &gamma11, &c12t, 00077 CBL, /**/ CBR, &C20, /**/ &c21, &C22, 00078 1, 1, FLA_BR ); 00079 00080 /*------------------------------------------------------------*/ 00081 00082 /* c10t = c10t + b1t * A0' */ 00083 FLA_Gemv_external( FLA_NO_TRANSPOSE, alpha, A0, b1t, beta, c10t ); 00084 00085 /* c21 = c21 + A2 * b1t' */ 00086 FLA_Gemv_external( FLA_NO_TRANSPOSE, alpha, A2, b1t, FLA_ONE, c21 ); 00087 00088 /* gamma11 = gamma11 + a1t * b1t' + b1t * a1t' */ 00089 FLA_Dot2s_external( alpha, a1t, b1t, beta, gamma11 ); 00090 00091 /*------------------------------------------------------------*/ 00092 00093 FLA_Cont_with_3x1_to_2x1( &AT, A0, 00094 a1t, 00095 /* ** */ /* ** */ 00096 &AB, A2, FLA_TOP ); 00097 00098 FLA_Cont_with_3x1_to_2x1( &BT, B0, 00099 b1t, 00100 /* ** */ /* ** */ 00101 &BB, B2, FLA_TOP ); 00102 00103 FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, c01, /**/ C02, 00104 c10t, gamma11, /**/ c12t, 00105 /* ************** */ /* ************************ */ 00106 &CBL, /**/ &CBR, C20, c21, /**/ C22, 00107 FLA_TL ); 00108 00109 } 00110 00111 return FLA_SUCCESS; 00112 }
References FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Dot2s_external(), FLA_Gemv_external(), FLA_Obj_length(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), and FLA_Repart_2x2_to_3x3().
00036 { 00037 FLA_Obj AT, A0, 00038 AB, a1t, 00039 A2; 00040 00041 FLA_Obj BT, B0, 00042 BB, b1t, 00043 B2; 00044 00045 FLA_Obj CTL, CTR, C00, c01, C02, 00046 CBL, CBR, c10t, gamma11, c12t, 00047 C20, c21, C22; 00048 00049 00050 FLA_Part_2x1( A, &AT, 00051 &AB, 0, FLA_TOP ); 00052 00053 FLA_Part_2x1( B, &BT, 00054 &BB, 0, FLA_TOP ); 00055 00056 FLA_Part_2x2( C, &CTL, &CTR, 00057 &CBL, &CBR, 0, 0, FLA_TL ); 00058 00059 while ( FLA_Obj_length( AT ) < FLA_Obj_length( A ) ){ 00060 00061 00062 FLA_Repart_2x1_to_3x1( AT, &A0, 00063 /* ** */ /* ** */ 00064 &a1t, 00065 AB, &A2, 1, FLA_BOTTOM ); 00066 00067 FLA_Repart_2x1_to_3x1( BT, &B0, 00068 /* ** */ /* ** */ 00069 &b1t, 00070 BB, &B2, 1, FLA_BOTTOM ); 00071 00072 FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &c01, &C02, 00073 /* ************* */ /* ************************** */ 00074 &c10t, /**/ &gamma11, &c12t, 00075 CBL, /**/ CBR, &C20, /**/ &c21, &C22, 00076 1, 1, FLA_BR ); 00077 00078 /*------------------------------------------------------------*/ 00079 00080 /* c10t = c10t + a1t * B0' */ 00081 FLA_Gemv_external( FLA_NO_TRANSPOSE, alpha, B0, a1t, beta, c10t ); 00082 00083 /* c21 = c21 + B2 * a1t' */ 00084 FLA_Gemv_external( FLA_NO_TRANSPOSE, alpha, B2, a1t, FLA_ONE, c21 ); 00085 00086 /* gamma11 = gamma11 + a1t * b1t' + b1t * a1t' */ 00087 FLA_Dot2s_external( alpha, a1t, b1t, beta, gamma11 ); 00088 00089 /*------------------------------------------------------------*/ 00090 00091 FLA_Cont_with_3x1_to_2x1( &AT, A0, 00092 a1t, 00093 /* ** */ /* ** */ 00094 &AB, A2, FLA_TOP ); 00095 00096 FLA_Cont_with_3x1_to_2x1( &BT, B0, 00097 b1t, 00098 /* ** */ /* ** */ 00099 &BB, B2, FLA_TOP ); 00100 00101 FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, c01, /**/ C02, 00102 c10t, gamma11, /**/ c12t, 00103 /* ************** */ /* ************************ */ 00104 &CBL, /**/ &CBR, C20, c21, /**/ C22, 00105 FLA_TL ); 00106 00107 } 00108 00109 return FLA_SUCCESS; 00110 }
References FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Dot2s_external(), FLA_Gemv_external(), FLA_Obj_length(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), and FLA_Repart_2x2_to_3x3().
00038 { 00039 FLA_Obj AT, A0, 00040 AB, a1t, 00041 A2; 00042 00043 FLA_Obj BT, B0, 00044 BB, b1t, 00045 B2; 00046 00047 FLA_Obj CTL, CTR, C00, c01, C02, 00048 CBL, CBR, c10t, gamma11, c12t, 00049 C20, c21, C22; 00050 00051 00052 FLA_Part_2x1( A, &AT, 00053 &AB, 0, FLA_TOP ); 00054 00055 FLA_Part_2x1( B, &BT, 00056 &BB, 0, FLA_TOP ); 00057 00058 FLA_Part_2x2( C, &CTL, &CTR, 00059 &CBL, &CBR, 0, 0, FLA_TL ); 00060 00061 while ( FLA_Obj_length( AT ) < FLA_Obj_length( A ) ){ 00062 00063 00064 FLA_Repart_2x1_to_3x1( AT, &A0, 00065 /* ** */ /* ** */ 00066 &a1t, 00067 AB, &A2, 1, FLA_BOTTOM ); 00068 00069 FLA_Repart_2x1_to_3x1( BT, &B0, 00070 /* ** */ /* ** */ 00071 &b1t, 00072 BB, &B2, 1, FLA_BOTTOM ); 00073 00074 FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &c01, &C02, 00075 /* ************* */ /* ************************** */ 00076 &c10t, /**/ &gamma11, &c12t, 00077 CBL, /**/ CBR, &C20, /**/ &c21, &C22, 00078 1, 1, FLA_BR ); 00079 00080 /*------------------------------------------------------------*/ 00081 00082 /* c21 = c21 + A2 * b1t' */ 00083 FLA_Gemv_external( FLA_NO_TRANSPOSE, alpha, A2, b1t, beta, c21 ); 00084 00085 /* c21 = c21 + B2 * a1t' */ 00086 FLA_Gemv_external( FLA_NO_TRANSPOSE, alpha, B2, a1t, FLA_ONE, c21 ); 00087 00088 /* gamma11 = gamma11 + a1t * b1t' + b1t * a1t' */ 00089 FLA_Dot2s_external( alpha, a1t, b1t, beta, gamma11 ); 00090 00091 /*------------------------------------------------------------*/ 00092 00093 FLA_Cont_with_3x1_to_2x1( &AT, A0, 00094 a1t, 00095 /* ** */ /* ** */ 00096 &AB, A2, FLA_TOP ); 00097 00098 FLA_Cont_with_3x1_to_2x1( &BT, B0, 00099 b1t, 00100 /* ** */ /* ** */ 00101 &BB, B2, FLA_TOP ); 00102 00103 FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, c01, /**/ C02, 00104 c10t, gamma11, /**/ c12t, 00105 /* ************** */ /* ************************ */ 00106 &CBL, /**/ &CBR, C20, c21, /**/ C22, 00107 FLA_TL ); 00108 00109 } 00110 00111 return FLA_SUCCESS; 00112 }
References FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Dot2s_external(), FLA_Gemv_external(), FLA_Obj_length(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), and FLA_Repart_2x2_to_3x3().
00038 { 00039 FLA_Obj AT, A0, 00040 AB, a1t, 00041 A2; 00042 00043 FLA_Obj BT, B0, 00044 BB, b1t, 00045 B2; 00046 00047 FLA_Obj CTL, CTR, C00, c01, C02, 00048 CBL, CBR, c10t, gamma11, c12t, 00049 C20, c21, C22; 00050 00051 00052 FLA_Part_2x1( A, &AT, 00053 &AB, 0, FLA_BOTTOM ); 00054 00055 FLA_Part_2x1( B, &BT, 00056 &BB, 0, FLA_BOTTOM ); 00057 00058 FLA_Part_2x2( C, &CTL, &CTR, 00059 &CBL, &CBR, 0, 0, FLA_BR ); 00060 00061 while ( FLA_Obj_length( AB ) < FLA_Obj_length( A ) ){ 00062 00063 00064 FLA_Repart_2x1_to_3x1( AT, &A0, 00065 &a1t, 00066 /* ** */ /* ** */ 00067 AB, &A2, 1, FLA_TOP ); 00068 00069 FLA_Repart_2x1_to_3x1( BT, &B0, 00070 &b1t, 00071 /* ** */ /* ** */ 00072 BB, &B2, 1, FLA_TOP ); 00073 00074 FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, &c01, /**/ &C02, 00075 &c10t, &gamma11, /**/ &c12t, 00076 /* ************* */ /* ************************** */ 00077 CBL, /**/ CBR, &C20, &c21, /**/ &C22, 00078 1, 1, FLA_TL ); 00079 00080 /*------------------------------------------------------------*/ 00081 00082 /* c21 = c21 + A2 * b1t' */ 00083 FLA_Gemv_external( FLA_NO_TRANSPOSE, alpha, A2, b1t, beta, c21 ); 00084 00085 /* c21 = c21 + B2 * a1t' */ 00086 FLA_Gemv_external( FLA_NO_TRANSPOSE, alpha, B2, a1t, FLA_ONE, c21 ); 00087 00088 /* gamma11 = gamma11 + a1t * b1t' + b1t * a1t' */ 00089 FLA_Dot2s_external( alpha, a1t, b1t, beta, gamma11 ); 00090 00091 /*------------------------------------------------------------*/ 00092 00093 FLA_Cont_with_3x1_to_2x1( &AT, A0, 00094 /* ** */ /* ** */ 00095 a1t, 00096 &AB, A2, FLA_BOTTOM ); 00097 00098 FLA_Cont_with_3x1_to_2x1( &BT, B0, 00099 /* ** */ /* ** */ 00100 b1t, 00101 &BB, B2, FLA_BOTTOM ); 00102 00103 FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, /**/ c01, C02, 00104 /* ************** */ /* ************************ */ 00105 c10t, /**/ gamma11, c12t, 00106 &CBL, /**/ &CBR, C20, /**/ c21, C22, 00107 FLA_BR ); 00108 00109 } 00110 00111 return FLA_SUCCESS; 00112 }
References FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Dot2s_external(), FLA_Gemv_external(), FLA_Obj_length(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), and FLA_Repart_2x2_to_3x3().
00038 { 00039 FLA_Obj AT, A0, 00040 AB, a1t, 00041 A2; 00042 00043 FLA_Obj BT, B0, 00044 BB, b1t, 00045 B2; 00046 00047 FLA_Obj CTL, CTR, C00, c01, C02, 00048 CBL, CBR, c10t, gamma11, c12t, 00049 C20, c21, C22; 00050 00051 00052 FLA_Part_2x1( A, &AT, 00053 &AB, 0, FLA_BOTTOM ); 00054 00055 FLA_Part_2x1( B, &BT, 00056 &BB, 0, FLA_BOTTOM ); 00057 00058 FLA_Part_2x2( C, &CTL, &CTR, 00059 &CBL, &CBR, 0, 0, FLA_BR ); 00060 00061 while ( FLA_Obj_length( AB ) < FLA_Obj_length( A ) ){ 00062 00063 00064 FLA_Repart_2x1_to_3x1( AT, &A0, 00065 &a1t, 00066 /* ** */ /* ** */ 00067 AB, &A2, 1, FLA_TOP ); 00068 00069 FLA_Repart_2x1_to_3x1( BT, &B0, 00070 &b1t, 00071 /* ** */ /* ** */ 00072 BB, &B2, 1, FLA_TOP ); 00073 00074 FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, &c01, /**/ &C02, 00075 &c10t, &gamma11, /**/ &c12t, 00076 /* ************* */ /* ************************** */ 00077 CBL, /**/ CBR, &C20, &c21, /**/ &C22, 00078 1, 1, FLA_TL ); 00079 00080 /*------------------------------------------------------------*/ 00081 00082 /* c10t = c10t + a1t * B0' */ 00083 FLA_Gemv_external( FLA_NO_TRANSPOSE, alpha, B0, a1t, beta, c10t ); 00084 00085 /* c21 = c21 + B2 * a1t' */ 00086 FLA_Gemv_external( FLA_NO_TRANSPOSE, alpha, B2, a1t, FLA_ONE, c21 ); 00087 00088 /* gamma11 = gamma11 + a1t * b1t' + b1t * a1t' */ 00089 FLA_Dot2s_external( alpha, a1t, b1t, beta, gamma11 ); 00090 00091 /*------------------------------------------------------------*/ 00092 00093 FLA_Cont_with_3x1_to_2x1( &AT, A0, 00094 /* ** */ /* ** */ 00095 a1t, 00096 &AB, A2, FLA_BOTTOM ); 00097 00098 FLA_Cont_with_3x1_to_2x1( &BT, B0, 00099 /* ** */ /* ** */ 00100 b1t, 00101 &BB, B2, FLA_BOTTOM ); 00102 00103 FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, /**/ c01, C02, 00104 /* ************** */ /* ************************ */ 00105 c10t, /**/ gamma11, c12t, 00106 &CBL, /**/ &CBR, C20, /**/ c21, C22, 00107 FLA_BR ); 00108 00109 } 00110 00111 return FLA_SUCCESS; 00112 }
References FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Dot2s_external(), FLA_Gemv_external(), FLA_Obj_length(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), and FLA_Repart_2x2_to_3x3().
00038 { 00039 FLA_Obj AT, A0, 00040 AB, a1t, 00041 A2; 00042 00043 FLA_Obj BT, B0, 00044 BB, b1t, 00045 B2; 00046 00047 FLA_Obj CTL, CTR, C00, c01, C02, 00048 CBL, CBR, c10t, gamma11, c12t, 00049 C20, c21, C22; 00050 00051 00052 FLA_Part_2x1( A, &AT, 00053 &AB, 0, FLA_BOTTOM ); 00054 00055 FLA_Part_2x1( B, &BT, 00056 &BB, 0, FLA_BOTTOM ); 00057 00058 FLA_Part_2x2( C, &CTL, &CTR, 00059 &CBL, &CBR, 0, 0, FLA_BR ); 00060 00061 while ( FLA_Obj_length( AB ) < FLA_Obj_length( A ) ){ 00062 00063 00064 FLA_Repart_2x1_to_3x1( AT, &A0, 00065 &a1t, 00066 /* ** */ /* ** */ 00067 AB, &A2, 1, FLA_TOP ); 00068 00069 FLA_Repart_2x1_to_3x1( BT, &B0, 00070 &b1t, 00071 /* ** */ /* ** */ 00072 BB, &B2, 1, FLA_TOP ); 00073 00074 FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, &c01, /**/ &C02, 00075 &c10t, &gamma11, /**/ &c12t, 00076 /* ************* */ /* ************************** */ 00077 CBL, /**/ CBR, &C20, &c21, /**/ &C22, 00078 1, 1, FLA_TL ); 00079 00080 /*------------------------------------------------------------*/ 00081 00082 /* c10t = c10t + b1t * A0' */ 00083 FLA_Gemv_external( FLA_NO_TRANSPOSE, alpha, A0, b1t, beta, c10t ); 00084 00085 /* c21 = c21 + A2 * b1t' */ 00086 FLA_Gemv_external( FLA_NO_TRANSPOSE, alpha, A2, b1t, FLA_ONE, c21 ); 00087 00088 /* gamma11 = gamma11 + a1t * b1t' + b1t * a1t' */ 00089 FLA_Dot2s_external( alpha, a1t, b1t, beta, gamma11 ); 00090 00091 /*------------------------------------------------------------*/ 00092 00093 FLA_Cont_with_3x1_to_2x1( &AT, A0, 00094 /* ** */ /* ** */ 00095 a1t, 00096 &AB, A2, FLA_BOTTOM ); 00097 00098 FLA_Cont_with_3x1_to_2x1( &BT, B0, 00099 /* ** */ /* ** */ 00100 b1t, 00101 &BB, B2, FLA_BOTTOM ); 00102 00103 FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, /**/ c01, C02, 00104 /* ************** */ /* ************************ */ 00105 c10t, /**/ gamma11, c12t, 00106 &CBL, /**/ &CBR, C20, /**/ c21, C22, 00107 FLA_BR ); 00108 00109 } 00110 00111 return FLA_SUCCESS; 00112 }
References FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Dot2s_external(), FLA_Gemv_external(), FLA_Obj_length(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), and FLA_Repart_2x2_to_3x3().
00038 { 00039 FLA_Obj AT, A0, 00040 AB, a1t, 00041 A2; 00042 00043 FLA_Obj BT, B0, 00044 BB, b1t, 00045 B2; 00046 00047 FLA_Obj CTL, CTR, C00, c01, C02, 00048 CBL, CBR, c10t, gamma11, c12t, 00049 C20, c21, C22; 00050 00051 00052 FLA_Part_2x1( A, &AT, 00053 &AB, 0, FLA_BOTTOM ); 00054 00055 FLA_Part_2x1( B, &BT, 00056 &BB, 0, FLA_BOTTOM ); 00057 00058 FLA_Part_2x2( C, &CTL, &CTR, 00059 &CBL, &CBR, 0, 0, FLA_BR ); 00060 00061 while ( FLA_Obj_length( AB ) < FLA_Obj_length( A ) ){ 00062 00063 00064 FLA_Repart_2x1_to_3x1( AT, &A0, 00065 &a1t, 00066 /* ** */ /* ** */ 00067 AB, &A2, 1, FLA_TOP ); 00068 00069 FLA_Repart_2x1_to_3x1( BT, &B0, 00070 &b1t, 00071 /* ** */ /* ** */ 00072 BB, &B2, 1, FLA_TOP ); 00073 00074 FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, &c01, /**/ &C02, 00075 &c10t, &gamma11, /**/ &c12t, 00076 /* ************* */ /* ************************** */ 00077 CBL, /**/ CBR, &C20, &c21, /**/ &C22, 00078 1, 1, FLA_TL ); 00079 00080 /*------------------------------------------------------------*/ 00081 00082 /* c10t = c10t + a1t * B0' */ 00083 FLA_Gemv_external( FLA_NO_TRANSPOSE, alpha, B0, a1t, beta, c10t ); 00084 00085 /* c10t = c10t + b1t * A0' */ 00086 FLA_Gemv_external( FLA_NO_TRANSPOSE, alpha, A0, b1t, FLA_ONE, c10t ); 00087 00088 /* gamma11 = gamma11 + a1t * b1t' + b1t * a1t' */ 00089 FLA_Dot2s_external( alpha, a1t, b1t, beta, gamma11 ); 00090 00091 /*------------------------------------------------------------*/ 00092 00093 FLA_Cont_with_3x1_to_2x1( &AT, A0, 00094 /* ** */ /* ** */ 00095 a1t, 00096 &AB, A2, FLA_BOTTOM ); 00097 00098 FLA_Cont_with_3x1_to_2x1( &BT, B0, 00099 /* ** */ /* ** */ 00100 b1t, 00101 &BB, B2, FLA_BOTTOM ); 00102 00103 FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, /**/ c01, C02, 00104 /* ************** */ /* ************************ */ 00105 c10t, /**/ gamma11, c12t, 00106 &CBL, /**/ &CBR, C20, /**/ c21, C22, 00107 FLA_BR ); 00108 00109 } 00110 00111 return FLA_SUCCESS; 00112 }
References FLA_Cont_with_1x3_to_1x2(), FLA_Obj_width(), FLA_Part_1x2(), FLA_Repart_1x2_to_1x3(), FLA_Scalr_external(), and FLA_Syr2_external().
00038 { 00039 FLA_Obj AL, AR, A0, a1t, A2; 00040 00041 FLA_Obj BL, BR, B0, b1t, B2; 00042 00043 FLA_Scalr_external( FLA_LOWER_TRIANGULAR, beta, C ); 00044 00045 FLA_Part_1x2( A, &AL, &AR, 0, FLA_LEFT ); 00046 00047 FLA_Part_1x2( B, &BL, &BR, 0, FLA_LEFT ); 00048 00049 while ( FLA_Obj_width( AL ) < FLA_Obj_width( A ) ){ 00050 00051 00052 FLA_Repart_1x2_to_1x3( AL, /**/ AR, &A0, /**/ &a1t, &A2, 00053 1, FLA_RIGHT ); 00054 00055 FLA_Repart_1x2_to_1x3( BL, /**/ BR, &B0, /**/ &b1t, &B2, 00056 1, FLA_RIGHT ); 00057 00058 /*------------------------------------------------------------*/ 00059 00060 /* C = C + a1t * b1t' + b1t * a1t' */ 00061 FLA_Syr2_external( FLA_LOWER_TRIANGULAR, alpha, a1t, b1t, C ); 00062 00063 /*------------------------------------------------------------*/ 00064 00065 FLA_Cont_with_1x3_to_1x2( &AL, /**/ &AR, A0, a1t, /**/ A2, 00066 FLA_LEFT ); 00067 00068 FLA_Cont_with_1x3_to_1x2( &BL, /**/ &BR, B0, b1t, /**/ B2, 00069 FLA_LEFT ); 00070 00071 } 00072 00073 return FLA_SUCCESS; 00074 }