Go to the source code of this file.
FLA_Error FLA_Sylv_nn_blk_var1 | ( | FLA_Obj | isgn, | |
FLA_Obj | A, | |||
FLA_Obj | B, | |||
FLA_Obj | C, | |||
FLA_Obj | scale, | |||
fla_sylv_t * | cntl | |||
) |
References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_nn().
00038 { 00039 FLA_Obj ATL, ATR, A00, A01, A02, 00040 ABL, ABR, A10, A11, A12, 00041 A20, A21, A22; 00042 00043 FLA_Obj BTL, BTR, B00, B01, B02, 00044 BBL, BBR, B10, B11, B12, 00045 B20, B21, B22; 00046 00047 FLA_Obj CTL, CTR, C00, C01, C02, 00048 CBL, CBR, C10, C11, C12, 00049 C20, C21, C22; 00050 00051 dim_t b; 00052 00053 FLA_Part_2x2( A, &ATL, &ATR, 00054 &ABL, &ABR, 0, 0, FLA_BR ); 00055 00056 FLA_Part_2x2( B, &BTL, &BTR, 00057 &BBL, &BBR, 0, 0, FLA_TL ); 00058 00059 FLA_Part_2x2( C, &CTL, &CTR, 00060 &CBL, &CBR, 0, 0, FLA_BL ); 00061 00062 while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){ 00063 00064 b = FLA_Determine_blocksize( CTR, FLA_TR, FLA_Cntl_blocksize( cntl ) ); 00065 00066 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &A01, /**/ &A02, 00067 &A10, &A11, /**/ &A12, 00068 /* ************* */ /* ******************** */ 00069 ABL, /**/ ABR, &A20, &A21, /**/ &A22, 00070 b, b, FLA_TL ); 00071 00072 FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02, 00073 /* ************* */ /* ******************** */ 00074 &B10, /**/ &B11, &B12, 00075 BBL, /**/ BBR, &B20, /**/ &B21, &B22, 00076 b, b, FLA_BR ); 00077 00078 FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02, 00079 &C10, /**/ &C11, &C12, 00080 /* ************* */ /* ******************** */ 00081 CBL, /**/ CBR, &C20, /**/ &C21, &C22, 00082 b, b, FLA_TR ); 00083 00084 // Loop Invariant: 00085 // CTL = CTL 00086 // CTR = CTR 00087 // CBL = sylv( ABR, BTL, CBL ) 00088 // CBR = CBR 00089 00090 /*------------------------------------------------------------*/ 00091 00092 // C10 = sylv( A11, B00, C10 - A12 * C20 ); 00093 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00094 FLA_MINUS_ONE, A12, C20, FLA_ONE, C10, 00095 FLA_Cntl_sub_gemm1( cntl ) ); 00096 00097 FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00098 isgn, A11, B00, C10, scale, 00099 FLA_Cntl_sub_sylv1( cntl ) ); 00100 00101 // C21 = sylv( A22, B11, C21 -/+ C20 * B01 ); 00102 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00103 FLA_NEGATE( isgn ), C20, B01, FLA_ONE, C21, 00104 FLA_Cntl_sub_gemm2( cntl ) ); 00105 00106 FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00107 isgn, A22, B11, C21, scale, 00108 FLA_Cntl_sub_sylv2( cntl ) ); 00109 00110 // C11 = sylv( A11, B11, C11 - A12 * C21 -/+ C10 * B01 ); 00111 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00112 FLA_NEGATE( isgn ), C10, B01, FLA_ONE, C11, 00113 FLA_Cntl_sub_gemm3( cntl ) ); 00114 00115 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00116 FLA_MINUS_ONE, A12, C21, FLA_ONE, C11, 00117 FLA_Cntl_sub_gemm4( cntl ) ); 00118 00119 FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00120 isgn, A11, B11, C11, scale, 00121 FLA_Cntl_sub_sylv3( cntl ) ); 00122 00123 /*------------------------------------------------------------*/ 00124 00125 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ A01, A02, 00126 /* ************** */ /* ****************** */ 00127 A10, /**/ A11, A12, 00128 &ABL, /**/ &ABR, A20, /**/ A21, A22, 00129 FLA_BR ); 00130 00131 FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02, 00132 B10, B11, /**/ B12, 00133 /* ************** */ /* ****************** */ 00134 &BBL, /**/ &BBR, B20, B21, /**/ B22, 00135 FLA_TL ); 00136 00137 FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02, 00138 /* ************** */ /* ****************** */ 00139 C10, C11, /**/ C12, 00140 &CBL, /**/ &CBR, C20, C21, /**/ C22, 00141 FLA_BL ); 00142 00143 } 00144 00145 return FLA_SUCCESS; 00146 }
FLA_Error FLA_Sylv_nn_blk_var10 | ( | FLA_Obj | isgn, | |
FLA_Obj | A, | |||
FLA_Obj | B, | |||
FLA_Obj | C, | |||
FLA_Obj | scale, | |||
fla_sylv_t * | cntl | |||
) |
References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_nn().
00038 { 00039 FLA_Obj ATL, ATR, A00, A01, A02, 00040 ABL, ABR, A10, A11, A12, 00041 A20, A21, A22; 00042 00043 FLA_Obj BTL, BTR, B00, B01, B02, 00044 BBL, BBR, B10, B11, B12, 00045 B20, B21, B22; 00046 00047 FLA_Obj CTL, CTR, C00, C01, C02, 00048 CBL, CBR, C10, C11, C12, 00049 C20, C21, C22; 00050 00051 dim_t b; 00052 00053 FLA_Part_2x2( A, &ATL, &ATR, 00054 &ABL, &ABR, 0, 0, FLA_BR ); 00055 00056 FLA_Part_2x2( B, &BTL, &BTR, 00057 &BBL, &BBR, 0, 0, FLA_TL ); 00058 00059 FLA_Part_2x2( C, &CTL, &CTR, 00060 &CBL, &CBR, 0, 0, FLA_BL ); 00061 00062 while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){ 00063 00064 b = FLA_Determine_blocksize( CTR, FLA_TR, FLA_Cntl_blocksize( cntl ) ); 00065 00066 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &A01, /**/ &A02, 00067 &A10, &A11, /**/ &A12, 00068 /* ************* */ /* ******************** */ 00069 ABL, /**/ ABR, &A20, &A21, /**/ &A22, 00070 b, b, FLA_TL ); 00071 00072 FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02, 00073 /* ************* */ /* ******************** */ 00074 &B10, /**/ &B11, &B12, 00075 BBL, /**/ BBR, &B20, /**/ &B21, &B22, 00076 b, b, FLA_BR ); 00077 00078 FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02, 00079 &C10, /**/ &C11, &C12, 00080 /* ************* */ /* ******************** */ 00081 CBL, /**/ CBR, &C20, /**/ &C21, &C22, 00082 b, b, FLA_TR ); 00083 00084 // Loop Invariant: 00085 // CTL = sylv( ATL, BTL, CTL - ATR * sylv( ABR, BTL, CBL ) ) 00086 // CTR = CTR - sylv( ATL, BTL, CTL - ATR * sylv( ABR, BTL, CBL ) ) * BTR 00087 // CBL = sylv( ABR, BTL, CBL ) 00088 // CBR = CBR - sylv( ABR, BTL, CBL ) * BTR 00089 00090 /*------------------------------------------------------------*/ 00091 00092 // C21 = sylv( A22, B11, C21 ); 00093 FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00094 isgn, A22, B11, C21, scale, 00095 FLA_Cntl_sub_sylv1( cntl ) ); 00096 00097 // C11 = sylv( A11, B11, C11 - A12 * C21 ); 00098 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00099 FLA_MINUS_ONE, A12, C21, FLA_ONE, C11, 00100 FLA_Cntl_sub_gemm1( cntl ) ); 00101 00102 FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00103 isgn, A11, B11, C11, scale, 00104 FLA_Cntl_sub_sylv2( cntl ) ); 00105 00106 // C01 = sylv( A00, B11, C01 - A01 * C11 - A02 * C21 ); 00107 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00108 FLA_MINUS_ONE, A02, C21, FLA_ONE, C01, 00109 FLA_Cntl_sub_gemm2( cntl ) ); 00110 00111 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00112 FLA_MINUS_ONE, A01, C11, FLA_ONE, C01, 00113 FLA_Cntl_sub_gemm3( cntl ) ); 00114 00115 FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00116 isgn, A00, B11, C01, scale, 00117 FLA_Cntl_sub_sylv3( cntl ) ); 00118 00119 // C02 = C02 -/+ C01 * B12; 00120 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00121 FLA_NEGATE( isgn ), C01, B12, FLA_ONE, C02, 00122 FLA_Cntl_sub_gemm4( cntl ) ); 00123 00124 // C12 = C12 -/+ C11 * B12; 00125 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00126 FLA_NEGATE( isgn ), C11, B12, FLA_ONE, C12, 00127 FLA_Cntl_sub_gemm5( cntl ) ); 00128 00129 // C22 = C22 -/+ C21 * B12; 00130 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00131 FLA_NEGATE( isgn ), C21, B12, FLA_ONE, C22, 00132 FLA_Cntl_sub_gemm6( cntl ) ); 00133 00134 /*------------------------------------------------------------*/ 00135 00136 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ A01, A02, 00137 /* ************** */ /* ****************** */ 00138 A10, /**/ A11, A12, 00139 &ABL, /**/ &ABR, A20, /**/ A21, A22, 00140 FLA_BR ); 00141 00142 FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02, 00143 B10, B11, /**/ B12, 00144 /* ************** */ /* ****************** */ 00145 &BBL, /**/ &BBR, B20, B21, /**/ B22, 00146 FLA_TL ); 00147 00148 FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02, 00149 /* ************** */ /* ****************** */ 00150 C10, C11, /**/ C12, 00151 &CBL, /**/ &CBR, C20, C21, /**/ C22, 00152 FLA_BL ); 00153 00154 } 00155 00156 return FLA_SUCCESS; 00157 }
FLA_Error FLA_Sylv_nn_blk_var11 | ( | FLA_Obj | isgn, | |
FLA_Obj | A, | |||
FLA_Obj | B, | |||
FLA_Obj | C, | |||
FLA_Obj | scale, | |||
fla_sylv_t * | cntl | |||
) |
References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_nn().
00038 { 00039 FLA_Obj ATL, ATR, A00, A01, A02, 00040 ABL, ABR, A10, A11, A12, 00041 A20, A21, A22; 00042 00043 FLA_Obj BTL, BTR, B00, B01, B02, 00044 BBL, BBR, B10, B11, B12, 00045 B20, B21, B22; 00046 00047 FLA_Obj CTL, CTR, C00, C01, C02, 00048 CBL, CBR, C10, C11, C12, 00049 C20, C21, C22; 00050 00051 dim_t b; 00052 00053 FLA_Part_2x2( A, &ATL, &ATR, 00054 &ABL, &ABR, 0, 0, FLA_BR ); 00055 00056 FLA_Part_2x2( B, &BTL, &BTR, 00057 &BBL, &BBR, 0, 0, FLA_TL ); 00058 00059 FLA_Part_2x2( C, &CTL, &CTR, 00060 &CBL, &CBR, 0, 0, FLA_BL ); 00061 00062 while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){ 00063 00064 b = FLA_Determine_blocksize( CTR, FLA_TR, FLA_Cntl_blocksize( cntl ) ); 00065 00066 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &A01, /**/ &A02, 00067 &A10, &A11, /**/ &A12, 00068 /* ************* */ /* ******************** */ 00069 ABL, /**/ ABR, &A20, &A21, /**/ &A22, 00070 b, b, FLA_TL ); 00071 00072 FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02, 00073 /* ************* */ /* ******************** */ 00074 &B10, /**/ &B11, &B12, 00075 BBL, /**/ BBR, &B20, /**/ &B21, &B22, 00076 b, b, FLA_BR ); 00077 00078 FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02, 00079 &C10, /**/ &C11, &C12, 00080 /* ************* */ /* ******************** */ 00081 CBL, /**/ CBR, &C20, /**/ &C21, &C22, 00082 b, b, FLA_TR ); 00083 00084 // Loop Invariant: 00085 // CTL = CTL - ATR * sylv( ABR, BTL, CBL ) 00086 // CTR = CTR - ATR * sylv( ABR, BBR, CBR - sylv( ABR, BTL, CBL ) * BTR ) 00087 // CBL = sylv( ABR, BTL, CBL ) 00088 // CBR = sylv( ABR, BBR, CBR - sylv( ABR, BTL, CBL ) * BTR ) 00089 00090 /*------------------------------------------------------------*/ 00091 00092 // C10 = sylv( A11, B00, C10 ); 00093 FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00094 isgn, A11, B00, C10, scale, 00095 FLA_Cntl_sub_sylv1( cntl ) ); 00096 00097 // C00 = C00 - A01 * C10; 00098 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00099 FLA_MINUS_ONE, A01, C10, FLA_ONE, C00, 00100 FLA_Cntl_sub_gemm1( cntl ) ); 00101 00102 // C11 = sylv( A11, B11, C11 -/+ C10 * B01 ); 00103 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00104 FLA_NEGATE( isgn ), C10, B01, FLA_ONE, C11, 00105 FLA_Cntl_sub_gemm2( cntl ) ); 00106 00107 FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00108 isgn, A11, B11, C11, scale, 00109 FLA_Cntl_sub_sylv2( cntl ) ); 00110 00111 // C01 = C01 - A01 * C11; 00112 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00113 FLA_MINUS_ONE, A01, C11, FLA_ONE, C01, 00114 FLA_Cntl_sub_gemm3( cntl ) ); 00115 00116 // C12 = sylv( A11, B22, C12 -/+ C10 * B02 -/+ C11 * B12 ); 00117 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00118 FLA_NEGATE( isgn ), C11, B12, FLA_ONE, C12, 00119 FLA_Cntl_sub_gemm4( cntl ) ); 00120 00121 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00122 FLA_NEGATE( isgn ), C10, B02, FLA_ONE, C12, 00123 FLA_Cntl_sub_gemm5( cntl ) ); 00124 00125 FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00126 isgn, A11, B22, C12, scale, 00127 FLA_Cntl_sub_sylv3( cntl ) ); 00128 00129 // C02 = C02 - A01 * C12; 00130 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00131 FLA_MINUS_ONE, A01, C12, FLA_ONE, C02, 00132 FLA_Cntl_sub_gemm6( cntl ) ); 00133 00134 /*------------------------------------------------------------*/ 00135 00136 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ A01, A02, 00137 /* ************** */ /* ****************** */ 00138 A10, /**/ A11, A12, 00139 &ABL, /**/ &ABR, A20, /**/ A21, A22, 00140 FLA_BR ); 00141 00142 FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02, 00143 B10, B11, /**/ B12, 00144 /* ************** */ /* ****************** */ 00145 &BBL, /**/ &BBR, B20, B21, /**/ B22, 00146 FLA_TL ); 00147 00148 FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02, 00149 /* ************** */ /* ****************** */ 00150 C10, C11, /**/ C12, 00151 &CBL, /**/ &CBR, C20, C21, /**/ C22, 00152 FLA_BL ); 00153 00154 } 00155 00156 return FLA_SUCCESS; 00157 }
FLA_Error FLA_Sylv_nn_blk_var12 | ( | FLA_Obj | isgn, | |
FLA_Obj | A, | |||
FLA_Obj | B, | |||
FLA_Obj | C, | |||
FLA_Obj | scale, | |||
fla_sylv_t * | cntl | |||
) |
References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_nn().
00038 { 00039 FLA_Obj ATL, ATR, A00, A01, A02, 00040 ABL, ABR, A10, A11, A12, 00041 A20, A21, A22; 00042 00043 FLA_Obj BTL, BTR, B00, B01, B02, 00044 BBL, BBR, B10, B11, B12, 00045 B20, B21, B22; 00046 00047 FLA_Obj CTL, CTR, C00, C01, C02, 00048 CBL, CBR, C10, C11, C12, 00049 C20, C21, C22; 00050 00051 dim_t b; 00052 00053 FLA_Part_2x2( A, &ATL, &ATR, 00054 &ABL, &ABR, 0, 0, FLA_BR ); 00055 00056 FLA_Part_2x2( B, &BTL, &BTR, 00057 &BBL, &BBR, 0, 0, FLA_TL ); 00058 00059 FLA_Part_2x2( C, &CTL, &CTR, 00060 &CBL, &CBR, 0, 0, FLA_BL ); 00061 00062 while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){ 00063 00064 b = FLA_Determine_blocksize( CTR, FLA_TR, FLA_Cntl_blocksize( cntl ) ); 00065 00066 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &A01, /**/ &A02, 00067 &A10, &A11, /**/ &A12, 00068 /* ************* */ /* ******************** */ 00069 ABL, /**/ ABR, &A20, &A21, /**/ &A22, 00070 b, b, FLA_TL ); 00071 00072 FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02, 00073 /* ************* */ /* ******************** */ 00074 &B10, /**/ &B11, &B12, 00075 BBL, /**/ BBR, &B20, /**/ &B21, &B22, 00076 b, b, FLA_BR ); 00077 00078 FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02, 00079 &C10, /**/ &C11, &C12, 00080 /* ************* */ /* ******************** */ 00081 CBL, /**/ CBR, &C20, /**/ &C21, &C22, 00082 b, b, FLA_TR ); 00083 00084 // Loop Invariant: 00085 // CTL = sylv( ATL, BTL, CTL - ATR * sylv( ABR, BTL, CBL ) ) 00086 // CTR = CTR - sylv( ATL, BTL, CTL - ATR * sylv( ABR, BTL, CBL ) ) * BTR 00087 // CBL = sylv( ABR, BTL, CBL ) 00088 // CBR = sylv( ABR, BBR, CBR - sylv( ABR, BTL, CBL ) * BTR ) 00089 00090 /*------------------------------------------------------------*/ 00091 00092 // C11 = sylv( A11, B11, C11 - A12 * C21 ); 00093 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00094 FLA_MINUS_ONE, A12, C21, FLA_ONE, C11, 00095 FLA_Cntl_sub_gemm1( cntl ) ); 00096 00097 FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00098 isgn, A11, B11, C11, scale, 00099 FLA_Cntl_sub_sylv1( cntl ) ); 00100 00101 // C01 = sylv( A00, B11, C01 - A01 * C11 - A02 * C21 ); 00102 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00103 FLA_MINUS_ONE, A02, C21, FLA_ONE, C01, 00104 FLA_Cntl_sub_gemm2( cntl ) ); 00105 00106 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00107 FLA_MINUS_ONE, A01, C11, FLA_ONE, C01, 00108 FLA_Cntl_sub_gemm3( cntl ) ); 00109 00110 FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00111 isgn, A00, B11, C01, scale, 00112 FLA_Cntl_sub_sylv2( cntl ) ); 00113 00114 // C02 = C02 -/+ C01 * B12; 00115 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00116 FLA_NEGATE( isgn ), C01, B12, FLA_ONE, C02, 00117 FLA_Cntl_sub_gemm4( cntl ) ); 00118 00119 // C12 = sylv( A11, B22, C12 - A12 * C22 -/+ C11 * B12 ); 00120 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00121 FLA_NEGATE( isgn ), C11, B12, FLA_ONE, C12, 00122 FLA_Cntl_sub_gemm5( cntl ) ); 00123 00124 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00125 FLA_MINUS_ONE, A12, C22, FLA_ONE, C12, 00126 FLA_Cntl_sub_gemm6( cntl ) ); 00127 00128 FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00129 isgn, A11, B22, C12, scale, 00130 FLA_Cntl_sub_sylv3( cntl ) ); 00131 00132 /*------------------------------------------------------------*/ 00133 00134 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ A01, A02, 00135 /* ************** */ /* ****************** */ 00136 A10, /**/ A11, A12, 00137 &ABL, /**/ &ABR, A20, /**/ A21, A22, 00138 FLA_BR ); 00139 00140 FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02, 00141 B10, B11, /**/ B12, 00142 /* ************** */ /* ****************** */ 00143 &BBL, /**/ &BBR, B20, B21, /**/ B22, 00144 FLA_TL ); 00145 00146 FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02, 00147 /* ************** */ /* ****************** */ 00148 C10, C11, /**/ C12, 00149 &CBL, /**/ &CBR, C20, C21, /**/ C22, 00150 FLA_BL ); 00151 00152 } 00153 00154 return FLA_SUCCESS; 00155 }
FLA_Error FLA_Sylv_nn_blk_var13 | ( | FLA_Obj | isgn, | |
FLA_Obj | A, | |||
FLA_Obj | B, | |||
FLA_Obj | C, | |||
FLA_Obj | scale, | |||
fla_sylv_t * | cntl | |||
) |
References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_nn().
00038 { 00039 FLA_Obj ATL, ATR, A00, A01, A02, 00040 ABL, ABR, A10, A11, A12, 00041 A20, A21, A22; 00042 00043 FLA_Obj BTL, BTR, B00, B01, B02, 00044 BBL, BBR, B10, B11, B12, 00045 B20, B21, B22; 00046 00047 FLA_Obj CTL, CTR, C00, C01, C02, 00048 CBL, CBR, C10, C11, C12, 00049 C20, C21, C22; 00050 00051 dim_t b; 00052 00053 FLA_Part_2x2( A, &ATL, &ATR, 00054 &ABL, &ABR, 0, 0, FLA_BR ); 00055 00056 FLA_Part_2x2( B, &BTL, &BTR, 00057 &BBL, &BBR, 0, 0, FLA_TL ); 00058 00059 FLA_Part_2x2( C, &CTL, &CTR, 00060 &CBL, &CBR, 0, 0, FLA_BL ); 00061 00062 while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){ 00063 00064 b = FLA_Determine_blocksize( CTR, FLA_TR, FLA_Cntl_blocksize( cntl ) ); 00065 00066 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &A01, /**/ &A02, 00067 &A10, &A11, /**/ &A12, 00068 /* ************* */ /* ******************** */ 00069 ABL, /**/ ABR, &A20, &A21, /**/ &A22, 00070 b, b, FLA_TL ); 00071 00072 FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02, 00073 /* ************* */ /* ******************** */ 00074 &B10, /**/ &B11, &B12, 00075 BBL, /**/ BBR, &B20, /**/ &B21, &B22, 00076 b, b, FLA_BR ); 00077 00078 FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02, 00079 &C10, /**/ &C11, &C12, 00080 /* ************* */ /* ******************** */ 00081 CBL, /**/ CBR, &C20, /**/ &C21, &C22, 00082 b, b, FLA_TR ); 00083 00084 // Loop Invariant: 00085 // CTL = sylv( ATL, BTL, CTL - ATR * sylv( ABR, BTL, CBL ) ) 00086 // CTR = CTR - ATR * sylv( ABR, BBR, CBR - sylv( ABR, BTL, CBL ) * BTR ) 00087 // CBL = sylv( ABR, BTL, CBL ) 00088 // CBR = sylv( ABR, BBR, CBR - sylv( ABR, BTL, CBL ) * BTR ) 00089 00090 /*------------------------------------------------------------*/ 00091 00092 // C11 = sylv( A11, B11, C11 -/+ C10 * B01 ); 00093 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00094 FLA_NEGATE( isgn ), C10, B01, FLA_ONE, C11, 00095 FLA_Cntl_sub_gemm1( cntl ) ); 00096 00097 FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00098 isgn, A11, B11, C11, scale, 00099 FLA_Cntl_sub_sylv1( cntl ) ); 00100 00101 // C01 = sylv( A00, B11, C01 - A01 * C11 -/+ C00 * B01 ); 00102 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00103 FLA_NEGATE( isgn ), C00, B01, FLA_ONE, C01, 00104 FLA_Cntl_sub_gemm2( cntl ) ); 00105 00106 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00107 FLA_MINUS_ONE, A01, C11, FLA_ONE, C01, 00108 FLA_Cntl_sub_gemm3( cntl ) ); 00109 00110 FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00111 isgn, A00, B11, C01, scale, 00112 FLA_Cntl_sub_sylv2( cntl ) ); 00113 00114 // C12 = sylv( A11, B22, C12 -/+ C10 * B02 -/+ C11 * B12 ); 00115 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00116 FLA_NEGATE( isgn ), C11, B12, FLA_ONE, C12, 00117 FLA_Cntl_sub_gemm4( cntl ) ); 00118 00119 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00120 FLA_NEGATE( isgn ), C10, B02, FLA_ONE, C12, 00121 FLA_Cntl_sub_gemm5( cntl ) ); 00122 00123 FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00124 isgn, A11, B22, C12, scale, 00125 FLA_Cntl_sub_sylv3( cntl ) ); 00126 00127 // C02 = C02 - A01 * C12; 00128 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00129 FLA_MINUS_ONE, A01, C12, FLA_ONE, C02, 00130 FLA_Cntl_sub_gemm6( cntl ) ); 00131 00132 /*------------------------------------------------------------*/ 00133 00134 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ A01, A02, 00135 /* ************** */ /* ****************** */ 00136 A10, /**/ A11, A12, 00137 &ABL, /**/ &ABR, A20, /**/ A21, A22, 00138 FLA_BR ); 00139 00140 FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02, 00141 B10, B11, /**/ B12, 00142 /* ************** */ /* ****************** */ 00143 &BBL, /**/ &BBR, B20, B21, /**/ B22, 00144 FLA_TL ); 00145 00146 FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02, 00147 /* ************** */ /* ****************** */ 00148 C10, C11, /**/ C12, 00149 &CBL, /**/ &CBR, C20, C21, /**/ C22, 00150 FLA_BL ); 00151 00152 } 00153 00154 return FLA_SUCCESS; 00155 }
FLA_Error FLA_Sylv_nn_blk_var14 | ( | FLA_Obj | isgn, | |
FLA_Obj | A, | |||
FLA_Obj | B, | |||
FLA_Obj | C, | |||
FLA_Obj | scale, | |||
fla_sylv_t * | cntl | |||
) |
References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_nn().
00038 { 00039 FLA_Obj ATL, ATR, A00, A01, A02, 00040 ABL, ABR, A10, A11, A12, 00041 A20, A21, A22; 00042 00043 FLA_Obj BTL, BTR, B00, B01, B02, 00044 BBL, BBR, B10, B11, B12, 00045 B20, B21, B22; 00046 00047 FLA_Obj CTL, CTR, C00, C01, C02, 00048 CBL, CBR, C10, C11, C12, 00049 C20, C21, C22; 00050 00051 dim_t b; 00052 00053 FLA_Part_2x2( A, &ATL, &ATR, 00054 &ABL, &ABR, 0, 0, FLA_BR ); 00055 00056 FLA_Part_2x2( B, &BTL, &BTR, 00057 &BBL, &BBR, 0, 0, FLA_TL ); 00058 00059 FLA_Part_2x2( C, &CTL, &CTR, 00060 &CBL, &CBR, 0, 0, FLA_BL ); 00061 00062 while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){ 00063 00064 b = FLA_Determine_blocksize( CTR, FLA_TR, FLA_Cntl_blocksize( cntl ) ); 00065 00066 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &A01, /**/ &A02, 00067 &A10, &A11, /**/ &A12, 00068 /* ************* */ /* ******************** */ 00069 ABL, /**/ ABR, &A20, &A21, /**/ &A22, 00070 b, b, FLA_TL ); 00071 00072 FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02, 00073 /* ************* */ /* ******************** */ 00074 &B10, /**/ &B11, &B12, 00075 BBL, /**/ BBR, &B20, /**/ &B21, &B22, 00076 b, b, FLA_BR ); 00077 00078 FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02, 00079 &C10, /**/ &C11, &C12, 00080 /* ************* */ /* ******************** */ 00081 CBL, /**/ CBR, &C20, /**/ &C21, &C22, 00082 b, b, FLA_TR ); 00083 00084 // Loop Invariant: 00085 // CTL = sylv( ATL, BTL, CTL - ATR * sylv( ABR, BTL, CBL ) ) 00086 // CTR = CTR - ATR * sylv( ABR, BBR, CBR - sylv( ABR, BTL, CBL ) * BTR ) 00087 // - sylv( ATL, BTL, CTL - ATR * sylv( ABR, BTL, CBL ) ) * BTR 00088 // CBL = sylv( ABR, BTL, CBL ) 00089 // CBR = sylv( ABR, BBR, CBR - sylv( ABR, BTL, CBL ) * BTR ) 00090 00091 /*------------------------------------------------------------*/ 00092 00093 // C11 = sylv( A11, B11, C11 ); 00094 FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00095 isgn, A11, B11, C11, scale, 00096 FLA_Cntl_sub_sylv1( cntl ) ); 00097 00098 // C01 = sylv( A00, B11, C01 - A01 * C11 ); 00099 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00100 FLA_MINUS_ONE, A01, C11, FLA_ONE, C01, 00101 FLA_Cntl_sub_gemm1( cntl ) ); 00102 00103 FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00104 isgn, A00, B11, C01, scale, 00105 FLA_Cntl_sub_sylv2( cntl ) ); 00106 00107 // C12 = sylv( A11, B22, C12 -/+ C11 * B12 ); 00108 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00109 FLA_NEGATE( isgn ), C11, B12, FLA_ONE, C12, 00110 FLA_Cntl_sub_gemm2( cntl ) ); 00111 00112 FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00113 isgn, A11, B22, C12, scale, 00114 FLA_Cntl_sub_sylv3( cntl ) ); 00115 00116 // C02 = C02 - A01 * C12 -/+ C01 * B12; 00117 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00118 FLA_NEGATE( isgn ), C01, B12, FLA_ONE, C02, 00119 FLA_Cntl_sub_gemm3( cntl ) ); 00120 00121 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00122 FLA_MINUS_ONE, A01, C12, FLA_ONE, C02, 00123 FLA_Cntl_sub_gemm4( cntl ) ); 00124 00125 /*------------------------------------------------------------*/ 00126 00127 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ A01, A02, 00128 /* ************** */ /* ****************** */ 00129 A10, /**/ A11, A12, 00130 &ABL, /**/ &ABR, A20, /**/ A21, A22, 00131 FLA_BR ); 00132 00133 FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02, 00134 B10, B11, /**/ B12, 00135 /* ************** */ /* ****************** */ 00136 &BBL, /**/ &BBR, B20, B21, /**/ B22, 00137 FLA_TL ); 00138 00139 FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02, 00140 /* ************** */ /* ****************** */ 00141 C10, C11, /**/ C12, 00142 &CBL, /**/ &CBR, C20, C21, /**/ C22, 00143 FLA_BL ); 00144 00145 } 00146 00147 return FLA_SUCCESS; 00148 }
FLA_Error FLA_Sylv_nn_blk_var15 | ( | FLA_Obj | isgn, | |
FLA_Obj | A, | |||
FLA_Obj | B, | |||
FLA_Obj | C, | |||
FLA_Obj | scale, | |||
fla_sylv_t * | cntl | |||
) |
References FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_nn().
00036 { 00037 FLA_Obj ATL, ATR, A00, A01, A02, 00038 ABL, ABR, A10, A11, A12, 00039 A20, A21, A22; 00040 00041 FLA_Obj CT, C0, 00042 CB, C1, 00043 C2; 00044 00045 dim_t b; 00046 00047 FLA_Part_2x2( A, &ATL, &ATR, 00048 &ABL, &ABR, 0, 0, FLA_BR ); 00049 00050 FLA_Part_2x1( C, &CT, 00051 &CB, 0, FLA_BOTTOM ); 00052 00053 while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){ 00054 00055 b = FLA_Determine_blocksize( CT, FLA_TOP, FLA_Cntl_blocksize( cntl ) ); 00056 00057 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &A01, /**/ &A02, 00058 &A10, &A11, /**/ &A12, 00059 /* ************* */ /* ******************** */ 00060 ABL, /**/ ABR, &A20, &A21, /**/ &A22, 00061 b, b, FLA_TL ); 00062 00063 FLA_Repart_2x1_to_3x1( CT, &C0, 00064 &C1, 00065 /* ** */ /* ** */ 00066 CB, &C2, b, FLA_TOP ); 00067 00068 // Loop Invariant: 00069 // CT = CT 00070 // CB = sylv( ABR, B, CB ) 00071 00072 /*------------------------------------------------------------*/ 00073 00074 // C1 = sylv( A11, B, C1 - A12 * C2 ); 00075 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00076 FLA_MINUS_ONE, A12, C2, FLA_ONE, C1, 00077 FLA_Cntl_sub_gemm1( cntl ) ); 00078 00079 FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00080 isgn, A11, B, C1, scale, 00081 FLA_Cntl_sub_sylv1( cntl ) ); 00082 00083 /*------------------------------------------------------------*/ 00084 00085 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ A01, A02, 00086 /* ************** */ /* ****************** */ 00087 A10, /**/ A11, A12, 00088 &ABL, /**/ &ABR, A20, /**/ A21, A22, 00089 FLA_BR ); 00090 00091 FLA_Cont_with_3x1_to_2x1( &CT, C0, 00092 /* ** */ /* ** */ 00093 C1, 00094 &CB, C2, FLA_BOTTOM ); 00095 00096 } 00097 00098 return FLA_SUCCESS; 00099 }
FLA_Error FLA_Sylv_nn_blk_var16 | ( | FLA_Obj | isgn, | |
FLA_Obj | A, | |||
FLA_Obj | B, | |||
FLA_Obj | C, | |||
FLA_Obj | scale, | |||
fla_sylv_t * | cntl | |||
) |
References FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_nn().
00038 { 00039 FLA_Obj ATL, ATR, A00, A01, A02, 00040 ABL, ABR, A10, A11, A12, 00041 A20, A21, A22; 00042 00043 FLA_Obj CT, C0, 00044 CB, C1, 00045 C2; 00046 00047 dim_t b; 00048 00049 FLA_Part_2x2( A, &ATL, &ATR, 00050 &ABL, &ABR, 0, 0, FLA_BR ); 00051 00052 FLA_Part_2x1( C, &CT, 00053 &CB, 0, FLA_BOTTOM ); 00054 00055 while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){ 00056 00057 b = FLA_Determine_blocksize( CT, FLA_TOP, FLA_Cntl_blocksize( cntl ) ); 00058 00059 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &A01, /**/ &A02, 00060 &A10, &A11, /**/ &A12, 00061 /* ************* */ /* ******************** */ 00062 ABL, /**/ ABR, &A20, &A21, /**/ &A22, 00063 b, b, FLA_TL ); 00064 00065 FLA_Repart_2x1_to_3x1( CT, &C0, 00066 &C1, 00067 /* ** */ /* ** */ 00068 CB, &C2, b, FLA_TOP ); 00069 00070 // Loop Invariant: 00071 // CT = CT - ATR * sylv( ABR, B, CB ) 00072 // CB = sylv( ABR, B, CB ) 00073 00074 /*------------------------------------------------------------*/ 00075 00076 // C1 = sylv( A11, B, C1 ); 00077 FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00078 isgn, A11, B, C1, scale, 00079 FLA_Cntl_sub_sylv1( cntl ) ); 00080 00081 // C0 = C0 - A01 * C1; 00082 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00083 FLA_MINUS_ONE, A01, C1, FLA_ONE, C0, 00084 FLA_Cntl_sub_gemm1( cntl ) ); 00085 00086 /*------------------------------------------------------------*/ 00087 00088 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ A01, A02, 00089 /* ************** */ /* ****************** */ 00090 A10, /**/ A11, A12, 00091 &ABL, /**/ &ABR, A20, /**/ A21, A22, 00092 FLA_BR ); 00093 00094 FLA_Cont_with_3x1_to_2x1( &CT, C0, 00095 /* ** */ /* ** */ 00096 C1, 00097 &CB, C2, FLA_BOTTOM ); 00098 00099 } 00100 00101 return FLA_SUCCESS; 00102 }
FLA_Error FLA_Sylv_nn_blk_var17 | ( | FLA_Obj | isgn, | |
FLA_Obj | A, | |||
FLA_Obj | B, | |||
FLA_Obj | C, | |||
FLA_Obj | scale, | |||
fla_sylv_t * | cntl | |||
) |
References FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_Obj_length(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_nn().
00036 { 00037 FLA_Obj BTL, BTR, B00, B01, B02, 00038 BBL, BBR, B10, B11, B12, 00039 B20, B21, B22; 00040 00041 FLA_Obj CL, CR, C0, C1, C2; 00042 00043 dim_t b; 00044 00045 FLA_Part_2x2( B, &BTL, &BTR, 00046 &BBL, &BBR, 0, 0, FLA_TL ); 00047 00048 FLA_Part_1x2( C, &CL, &CR, 0, FLA_LEFT ); 00049 00050 while ( FLA_Obj_length( BTL ) < FLA_Obj_length( B ) ){ 00051 00052 b = FLA_Determine_blocksize( CR, FLA_RIGHT, FLA_Cntl_blocksize( cntl ) ); 00053 00054 FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02, 00055 /* ************* */ /* ******************** */ 00056 &B10, /**/ &B11, &B12, 00057 BBL, /**/ BBR, &B20, /**/ &B21, &B22, 00058 b, b, FLA_BR ); 00059 00060 FLA_Repart_1x2_to_1x3( CL, /**/ CR, &C0, /**/ &C1, &C2, 00061 b, FLA_RIGHT ); 00062 00063 // Loop Invariant: 00064 // CL = 00065 // CR = 00066 00067 /*------------------------------------------------------------*/ 00068 00069 // C1 = sylv( A, B11, C1 -/+ C0 * B01 ); 00070 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00071 FLA_NEGATE( isgn ), C0, B01, FLA_ONE, C1, 00072 FLA_Cntl_sub_gemm1( cntl ) ); 00073 00074 FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00075 isgn, A, B11, C1, scale, 00076 FLA_Cntl_sub_sylv1( cntl ) ); 00077 00078 /*------------------------------------------------------------*/ 00079 00080 FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02, 00081 B10, B11, /**/ B12, 00082 /* ************** */ /* ****************** */ 00083 &BBL, /**/ &BBR, B20, B21, /**/ B22, 00084 FLA_TL ); 00085 00086 FLA_Cont_with_1x3_to_1x2( &CL, /**/ &CR, C0, C1, /**/ C2, 00087 FLA_LEFT ); 00088 } 00089 00090 return FLA_SUCCESS; 00091 }
FLA_Error FLA_Sylv_nn_blk_var18 | ( | FLA_Obj | isgn, | |
FLA_Obj | A, | |||
FLA_Obj | B, | |||
FLA_Obj | C, | |||
FLA_Obj | scale, | |||
fla_sylv_t * | cntl | |||
) |
References FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_Obj_length(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_nn().
00036 { 00037 FLA_Obj BTL, BTR, B00, B01, B02, 00038 BBL, BBR, B10, B11, B12, 00039 B20, B21, B22; 00040 00041 FLA_Obj CL, CR, C0, C1, C2; 00042 00043 dim_t b; 00044 00045 FLA_Part_2x2( B, &BTL, &BTR, 00046 &BBL, &BBR, 0, 0, FLA_TL ); 00047 00048 FLA_Part_1x2( C, &CL, &CR, 0, FLA_LEFT ); 00049 00050 while ( FLA_Obj_length( BTL ) < FLA_Obj_length( B ) ){ 00051 00052 b = FLA_Determine_blocksize( CR, FLA_RIGHT, FLA_Cntl_blocksize( cntl ) ); 00053 00054 FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02, 00055 /* ************* */ /* ******************** */ 00056 &B10, /**/ &B11, &B12, 00057 BBL, /**/ BBR, &B20, /**/ &B21, &B22, 00058 b, b, FLA_BR ); 00059 00060 FLA_Repart_1x2_to_1x3( CL, /**/ CR, &C0, /**/ &C1, &C2, 00061 b, FLA_RIGHT ); 00062 00063 // Loop Invariant: 00064 // CL = 00065 // CR = 00066 00067 /*------------------------------------------------------------*/ 00068 00069 // C1 = sylv( A, B11, C1 ); 00070 FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00071 isgn, A, B11, C1, scale, 00072 FLA_Cntl_sub_sylv1( cntl ) ); 00073 00074 // C2 = C2 -/+ C1 * B12; 00075 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00076 FLA_NEGATE( isgn ), C1, B12, FLA_ONE, C2, 00077 FLA_Cntl_sub_gemm1( cntl ) ); 00078 00079 /*------------------------------------------------------------*/ 00080 00081 FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02, 00082 B10, B11, /**/ B12, 00083 /* ************** */ /* ****************** */ 00084 &BBL, /**/ &BBR, B20, B21, /**/ B22, 00085 FLA_TL ); 00086 00087 FLA_Cont_with_1x3_to_1x2( &CL, /**/ &CR, C0, C1, /**/ C2, 00088 FLA_LEFT ); 00089 } 00090 00091 return FLA_SUCCESS; 00092 }
FLA_Error FLA_Sylv_nn_blk_var2 | ( | FLA_Obj | isgn, | |
FLA_Obj | A, | |||
FLA_Obj | B, | |||
FLA_Obj | C, | |||
FLA_Obj | scale, | |||
fla_sylv_t * | cntl | |||
) |
References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_nn().
00038 { 00039 FLA_Obj ATL, ATR, A00, A01, A02, 00040 ABL, ABR, A10, A11, A12, 00041 A20, A21, A22; 00042 00043 FLA_Obj BTL, BTR, B00, B01, B02, 00044 BBL, BBR, B10, B11, B12, 00045 B20, B21, B22; 00046 00047 FLA_Obj CTL, CTR, C00, C01, C02, 00048 CBL, CBR, C10, C11, C12, 00049 C20, C21, C22; 00050 00051 dim_t b; 00052 00053 FLA_Part_2x2( A, &ATL, &ATR, 00054 &ABL, &ABR, 0, 0, FLA_BR ); 00055 00056 FLA_Part_2x2( B, &BTL, &BTR, 00057 &BBL, &BBR, 0, 0, FLA_TL ); 00058 00059 FLA_Part_2x2( C, &CTL, &CTR, 00060 &CBL, &CBR, 0, 0, FLA_BL ); 00061 00062 while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){ 00063 00064 b = FLA_Determine_blocksize( CTR, FLA_TR, FLA_Cntl_blocksize( cntl ) ); 00065 00066 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &A01, /**/ &A02, 00067 &A10, &A11, /**/ &A12, 00068 /* ************* */ /* ******************** */ 00069 ABL, /**/ ABR, &A20, &A21, /**/ &A22, 00070 b, b, FLA_TL ); 00071 00072 FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02, 00073 /* ************* */ /* ******************** */ 00074 &B10, /**/ &B11, &B12, 00075 BBL, /**/ BBR, &B20, /**/ &B21, &B22, 00076 b, b, FLA_BR ); 00077 00078 FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02, 00079 &C10, /**/ &C11, &C12, 00080 /* ************* */ /* ******************** */ 00081 CBL, /**/ CBR, &C20, /**/ &C21, &C22, 00082 b, b, FLA_TR ); 00083 00084 // Loop Invariant: 00085 // CTL = CTL - ATR * sylv( ABR, BTL, CBL) 00086 // CTR = CTR 00087 // CBL = sylv( ABR, BTL, CBL ) 00088 // CBR = CBR 00089 00090 /*------------------------------------------------------------*/ 00091 00092 // C10 = sylv( A11, B00, C10 ); 00093 FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00094 isgn, A11, B00, C10, scale, 00095 FLA_Cntl_sub_sylv1( cntl ) ); 00096 00097 // C00 = C00 - A01 * C10; 00098 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00099 FLA_MINUS_ONE, A01, C10, FLA_ONE, C00, 00100 FLA_Cntl_sub_gemm1( cntl ) ); 00101 00102 // C21 = sylv( A22, B11, C21 -/+ C20 * B01 ); 00103 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00104 FLA_NEGATE( isgn ), C20, B01, FLA_ONE, C21, 00105 FLA_Cntl_sub_gemm2( cntl ) ); 00106 00107 FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00108 isgn, A22, B11, C21, scale, 00109 FLA_Cntl_sub_sylv2( cntl ) ); 00110 00111 // C11 = sylv( A11, B11, C11 - A12 * C21 -/+ C10 * B01 ); 00112 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00113 FLA_NEGATE( isgn ), C10, B01, FLA_ONE, C11, 00114 FLA_Cntl_sub_gemm3( cntl ) ); 00115 00116 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00117 FLA_MINUS_ONE, A12, C21, FLA_ONE, C11, 00118 FLA_Cntl_sub_gemm4( cntl ) ); 00119 00120 FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00121 isgn, A11, B11, C11, scale, 00122 FLA_Cntl_sub_sylv3( cntl ) ); 00123 00124 // C01 = C01 - A01 * C11 - A02 * C21; 00125 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00126 FLA_MINUS_ONE, A02, C21, FLA_ONE, C01, 00127 FLA_Cntl_sub_gemm5( cntl ) ); 00128 00129 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00130 FLA_MINUS_ONE, A01, C11, FLA_ONE, C01, 00131 FLA_Cntl_sub_gemm6( cntl ) ); 00132 00133 /*------------------------------------------------------------*/ 00134 00135 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ A01, A02, 00136 /* ************** */ /* ****************** */ 00137 A10, /**/ A11, A12, 00138 &ABL, /**/ &ABR, A20, /**/ A21, A22, 00139 FLA_BR ); 00140 00141 FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02, 00142 B10, B11, /**/ B12, 00143 /* ************** */ /* ****************** */ 00144 &BBL, /**/ &BBR, B20, B21, /**/ B22, 00145 FLA_TL ); 00146 00147 FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02, 00148 /* ************** */ /* ****************** */ 00149 C10, C11, /**/ C12, 00150 &CBL, /**/ &CBR, C20, C21, /**/ C22, 00151 FLA_BL ); 00152 00153 } 00154 00155 return FLA_SUCCESS; 00156 }
FLA_Error FLA_Sylv_nn_blk_var3 | ( | FLA_Obj | isgn, | |
FLA_Obj | A, | |||
FLA_Obj | B, | |||
FLA_Obj | C, | |||
FLA_Obj | scale, | |||
fla_sylv_t * | cntl | |||
) |
References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_nn().
00038 { 00039 FLA_Obj ATL, ATR, A00, A01, A02, 00040 ABL, ABR, A10, A11, A12, 00041 A20, A21, A22; 00042 00043 FLA_Obj BTL, BTR, B00, B01, B02, 00044 BBL, BBR, B10, B11, B12, 00045 B20, B21, B22; 00046 00047 FLA_Obj CTL, CTR, C00, C01, C02, 00048 CBL, CBR, C10, C11, C12, 00049 C20, C21, C22; 00050 00051 dim_t b; 00052 00053 FLA_Part_2x2( A, &ATL, &ATR, 00054 &ABL, &ABR, 0, 0, FLA_BR ); 00055 00056 FLA_Part_2x2( B, &BTL, &BTR, 00057 &BBL, &BBR, 0, 0, FLA_TL ); 00058 00059 FLA_Part_2x2( C, &CTL, &CTR, 00060 &CBL, &CBR, 0, 0, FLA_BL ); 00061 00062 while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){ 00063 00064 b = FLA_Determine_blocksize( CTR, FLA_TR, FLA_Cntl_blocksize( cntl ) ); 00065 00066 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &A01, /**/ &A02, 00067 &A10, &A11, /**/ &A12, 00068 /* ************* */ /* ******************** */ 00069 ABL, /**/ ABR, &A20, &A21, /**/ &A22, 00070 b, b, FLA_TL ); 00071 00072 FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02, 00073 /* ************* */ /* ******************** */ 00074 &B10, /**/ &B11, &B12, 00075 BBL, /**/ BBR, &B20, /**/ &B21, &B22, 00076 b, b, FLA_BR ); 00077 00078 FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02, 00079 &C10, /**/ &C11, &C12, 00080 /* ************* */ /* ******************** */ 00081 CBL, /**/ CBR, &C20, /**/ &C21, &C22, 00082 b, b, FLA_TR ); 00083 00084 // Loop Invariant: 00085 // CTL = sylv( ATL, BTL, CTL - ATR * sylv( ABR, BTL, CBL ) ) 00086 // CTR = CTR 00087 // CBL = sylv( ABR, BTL, CBL ) 00088 // CBR = CBR 00089 00090 /*------------------------------------------------------------*/ 00091 00092 // C21 = sylv( A22, B11, C21 -/+ C20 * B01 ); 00093 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00094 FLA_NEGATE( isgn ), C20, B01, FLA_ONE, C21, 00095 FLA_Cntl_sub_gemm1( cntl ) ); 00096 00097 FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00098 isgn, A22, B11, C21, scale, 00099 FLA_Cntl_sub_sylv1( cntl ) ); 00100 00101 // C11 = sylv( A11, B11, C11 - A12 * C21 -/+ C10 * B01 ); 00102 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00103 FLA_NEGATE( isgn ), C10, B01, FLA_ONE, C11, 00104 FLA_Cntl_sub_gemm2( cntl ) ); 00105 00106 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00107 FLA_MINUS_ONE, A12, C21, FLA_ONE, C11, 00108 FLA_Cntl_sub_gemm3( cntl ) ); 00109 00110 FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00111 isgn, A11, B11, C11, scale, 00112 FLA_Cntl_sub_sylv2( cntl ) ); 00113 00114 // C01 = sylv( A00, B11, C01 - A01 * C11 - A02 * C21 -/+ C00 * B01 ); 00115 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00116 FLA_NEGATE( isgn ), C00, B01, FLA_ONE, C01, 00117 FLA_Cntl_sub_gemm4( cntl ) ); 00118 00119 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00120 FLA_MINUS_ONE, A02, C21, FLA_ONE, C01, 00121 FLA_Cntl_sub_gemm5( cntl ) ); 00122 00123 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00124 FLA_MINUS_ONE, A01, C11, FLA_ONE, C01, 00125 FLA_Cntl_sub_gemm6( cntl ) ); 00126 00127 FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00128 isgn, A00, B11, C01, scale, 00129 FLA_Cntl_sub_sylv3( cntl ) ); 00130 00131 /*------------------------------------------------------------*/ 00132 00133 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ A01, A02, 00134 /* ************** */ /* ****************** */ 00135 A10, /**/ A11, A12, 00136 &ABL, /**/ &ABR, A20, /**/ A21, A22, 00137 FLA_BR ); 00138 00139 FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02, 00140 B10, B11, /**/ B12, 00141 /* ************** */ /* ****************** */ 00142 &BBL, /**/ &BBR, B20, B21, /**/ B22, 00143 FLA_TL ); 00144 00145 FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02, 00146 /* ************** */ /* ****************** */ 00147 C10, C11, /**/ C12, 00148 &CBL, /**/ &CBR, C20, C21, /**/ C22, 00149 FLA_BL ); 00150 00151 } 00152 00153 return FLA_SUCCESS; 00154 }
FLA_Error FLA_Sylv_nn_blk_var4 | ( | FLA_Obj | isgn, | |
FLA_Obj | A, | |||
FLA_Obj | B, | |||
FLA_Obj | C, | |||
FLA_Obj | scale, | |||
fla_sylv_t * | cntl | |||
) |
References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_nn().
00038 { 00039 FLA_Obj ATL, ATR, A00, A01, A02, 00040 ABL, ABR, A10, A11, A12, 00041 A20, A21, A22; 00042 00043 FLA_Obj BTL, BTR, B00, B01, B02, 00044 BBL, BBR, B10, B11, B12, 00045 B20, B21, B22; 00046 00047 FLA_Obj CTL, CTR, C00, C01, C02, 00048 CBL, CBR, C10, C11, C12, 00049 C20, C21, C22; 00050 00051 dim_t b; 00052 00053 FLA_Part_2x2( A, &ATL, &ATR, 00054 &ABL, &ABR, 0, 0, FLA_BR ); 00055 00056 FLA_Part_2x2( B, &BTL, &BTR, 00057 &BBL, &BBR, 0, 0, FLA_TL ); 00058 00059 FLA_Part_2x2( C, &CTL, &CTR, 00060 &CBL, &CBR, 0, 0, FLA_BL ); 00061 00062 while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){ 00063 00064 b = FLA_Determine_blocksize( CTR, FLA_TR, FLA_Cntl_blocksize( cntl ) ); 00065 00066 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &A01, /**/ &A02, 00067 &A10, &A11, /**/ &A12, 00068 /* ************* */ /* ******************** */ 00069 ABL, /**/ ABR, &A20, &A21, /**/ &A22, 00070 b, b, FLA_TL ); 00071 00072 FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02, 00073 /* ************* */ /* ******************** */ 00074 &B10, /**/ &B11, &B12, 00075 BBL, /**/ BBR, &B20, /**/ &B21, &B22, 00076 b, b, FLA_BR ); 00077 00078 FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02, 00079 &C10, /**/ &C11, &C12, 00080 /* ************* */ /* ******************** */ 00081 CBL, /**/ CBR, &C20, /**/ &C21, &C22, 00082 b, b, FLA_TR ); 00083 00084 // Loop Invariant: 00085 // CTL = CTL 00086 // CTR = CTR 00087 // CBL = sylv( ABR, BTL, CBL ) 00088 // CBR = CBR - sylv( ABR, BTL, CBL ) * BTR 00089 00090 /*------------------------------------------------------------*/ 00091 00092 // C10 = sylv( A11, B00, C10 - A12 * C20 ); 00093 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00094 FLA_MINUS_ONE, A12, C20, FLA_ONE, C10, 00095 FLA_Cntl_sub_gemm1( cntl ) ); 00096 00097 FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00098 isgn, A11, B00, C10, scale, 00099 FLA_Cntl_sub_sylv1( cntl ) ); 00100 00101 // C21 = sylv( A22, B11, C21 ); 00102 FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00103 isgn, A22, B11, C21, scale, 00104 FLA_Cntl_sub_sylv2( cntl ) ); 00105 00106 // C11 = sylv( A11, B11, C11 - A12 * C21 -/+ C10 * B01 ); 00107 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00108 FLA_NEGATE( isgn ), C10, B01, FLA_ONE, C11, 00109 FLA_Cntl_sub_gemm2( cntl ) ); 00110 00111 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00112 FLA_MINUS_ONE, A12, C21, FLA_ONE, C11, 00113 FLA_Cntl_sub_gemm3( cntl ) ); 00114 00115 FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00116 isgn, A11, B11, C11, scale, 00117 FLA_Cntl_sub_sylv3( cntl ) ); 00118 00119 // C12 = C12 -/+ C10 * B02 -/+ C11 * B12; 00120 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00121 FLA_NEGATE( isgn ), C11, B12, FLA_ONE, C12, 00122 FLA_Cntl_sub_gemm4( cntl ) ); 00123 00124 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00125 FLA_NEGATE( isgn ), C10, B02, FLA_ONE, C12, 00126 FLA_Cntl_sub_gemm5( cntl ) ); 00127 00128 // C22 = C22 -/+ C21 * B12; 00129 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00130 FLA_NEGATE( isgn ), C21, B12, FLA_ONE, C22, 00131 FLA_Cntl_sub_gemm6( cntl ) ); 00132 00133 00134 /*------------------------------------------------------------*/ 00135 00136 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ A01, A02, 00137 /* ************** */ /* ****************** */ 00138 A10, /**/ A11, A12, 00139 &ABL, /**/ &ABR, A20, /**/ A21, A22, 00140 FLA_BR ); 00141 00142 FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02, 00143 B10, B11, /**/ B12, 00144 /* ************** */ /* ****************** */ 00145 &BBL, /**/ &BBR, B20, B21, /**/ B22, 00146 FLA_TL ); 00147 00148 FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02, 00149 /* ************** */ /* ****************** */ 00150 C10, C11, /**/ C12, 00151 &CBL, /**/ &CBR, C20, C21, /**/ C22, 00152 FLA_BL ); 00153 00154 } 00155 00156 return FLA_SUCCESS; 00157 }
FLA_Error FLA_Sylv_nn_blk_var5 | ( | FLA_Obj | isgn, | |
FLA_Obj | A, | |||
FLA_Obj | B, | |||
FLA_Obj | C, | |||
FLA_Obj | scale, | |||
fla_sylv_t * | cntl | |||
) |
References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_nn().
00038 { 00039 FLA_Obj ATL, ATR, A00, A01, A02, 00040 ABL, ABR, A10, A11, A12, 00041 A20, A21, A22; 00042 00043 FLA_Obj BTL, BTR, B00, B01, B02, 00044 BBL, BBR, B10, B11, B12, 00045 B20, B21, B22; 00046 00047 FLA_Obj CTL, CTR, C00, C01, C02, 00048 CBL, CBR, C10, C11, C12, 00049 C20, C21, C22; 00050 00051 dim_t b; 00052 00053 FLA_Part_2x2( A, &ATL, &ATR, 00054 &ABL, &ABR, 0, 0, FLA_BR ); 00055 00056 FLA_Part_2x2( B, &BTL, &BTR, 00057 &BBL, &BBR, 0, 0, FLA_TL ); 00058 00059 FLA_Part_2x2( C, &CTL, &CTR, 00060 &CBL, &CBR, 0, 0, FLA_BL ); 00061 00062 while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){ 00063 00064 b = FLA_Determine_blocksize( CTR, FLA_TR, FLA_Cntl_blocksize( cntl ) ); 00065 00066 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &A01, /**/ &A02, 00067 &A10, &A11, /**/ &A12, 00068 /* ************* */ /* ******************** */ 00069 ABL, /**/ ABR, &A20, &A21, /**/ &A22, 00070 b, b, FLA_TL ); 00071 00072 FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02, 00073 /* ************* */ /* ******************** */ 00074 &B10, /**/ &B11, &B12, 00075 BBL, /**/ BBR, &B20, /**/ &B21, &B22, 00076 b, b, FLA_BR ); 00077 00078 FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02, 00079 &C10, /**/ &C11, &C12, 00080 /* ************* */ /* ******************** */ 00081 CBL, /**/ CBR, &C20, /**/ &C21, &C22, 00082 b, b, FLA_TR ); 00083 00084 // Loop Invariant: 00085 // CTL = CTL 00086 // CTR = CTR 00087 // CBL = sylv( ABR, BTL, CBL ) 00088 // CBR = sylv( ABR, BBR, CBR - sylv( ABR, BTL, CBL ) * BTR ) 00089 00090 /*------------------------------------------------------------*/ 00091 00092 // C10 = sylv( A11, B00, C10 - A12 * C20 ); 00093 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00094 FLA_MINUS_ONE, A12, C20, FLA_ONE, C10, 00095 FLA_Cntl_sub_gemm1( cntl ) ); 00096 00097 FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00098 isgn, A11, B00, C10, scale, 00099 FLA_Cntl_sub_sylv1( cntl ) ); 00100 00101 // C11 = sylv( A11, B11, C11 - A12 * C21 -/+ C10 * B01 ); 00102 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00103 FLA_NEGATE( isgn ), C10, B01, FLA_ONE, C11, 00104 FLA_Cntl_sub_gemm2( cntl ) ); 00105 00106 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00107 FLA_MINUS_ONE, A12, C21, FLA_ONE, C11, 00108 FLA_Cntl_sub_gemm3( cntl ) ); 00109 00110 FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00111 isgn, A11, B11, C11, scale, 00112 FLA_Cntl_sub_sylv2( cntl ) ); 00113 00114 // C12 = sylv( A11, B22, C12 - A12 * C22 -/+ C10 * B02 -/+ C11 * B12 ); 00115 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00116 FLA_NEGATE( isgn ), C11, B12, FLA_ONE, C12, 00117 FLA_Cntl_sub_gemm4( cntl ) ); 00118 00119 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00120 FLA_NEGATE( isgn ), C10, B02, FLA_ONE, C12, 00121 FLA_Cntl_sub_gemm5( cntl ) ); 00122 00123 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00124 FLA_MINUS_ONE, A12, C22, FLA_ONE, C12, 00125 FLA_Cntl_sub_gemm6( cntl ) ); 00126 00127 FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00128 isgn, A11, B22, C12, scale, 00129 FLA_Cntl_sub_sylv3( cntl ) ); 00130 00131 /*------------------------------------------------------------*/ 00132 00133 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ A01, A02, 00134 /* ************** */ /* ****************** */ 00135 A10, /**/ A11, A12, 00136 &ABL, /**/ &ABR, A20, /**/ A21, A22, 00137 FLA_BR ); 00138 00139 FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02, 00140 B10, B11, /**/ B12, 00141 /* ************** */ /* ****************** */ 00142 &BBL, /**/ &BBR, B20, B21, /**/ B22, 00143 FLA_TL ); 00144 00145 FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02, 00146 /* ************** */ /* ****************** */ 00147 C10, C11, /**/ C12, 00148 &CBL, /**/ &CBR, C20, C21, /**/ C22, 00149 FLA_BL ); 00150 00151 } 00152 00153 return FLA_SUCCESS; 00154 }
FLA_Error FLA_Sylv_nn_blk_var6 | ( | FLA_Obj | isgn, | |
FLA_Obj | A, | |||
FLA_Obj | B, | |||
FLA_Obj | C, | |||
FLA_Obj | scale, | |||
fla_sylv_t * | cntl | |||
) |
References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_nn().
00038 { 00039 FLA_Obj ATL, ATR, A00, A01, A02, 00040 ABL, ABR, A10, A11, A12, 00041 A20, A21, A22; 00042 00043 FLA_Obj BTL, BTR, B00, B01, B02, 00044 BBL, BBR, B10, B11, B12, 00045 B20, B21, B22; 00046 00047 FLA_Obj CTL, CTR, C00, C01, C02, 00048 CBL, CBR, C10, C11, C12, 00049 C20, C21, C22; 00050 00051 dim_t b; 00052 00053 FLA_Part_2x2( A, &ATL, &ATR, 00054 &ABL, &ABR, 0, 0, FLA_BR ); 00055 00056 FLA_Part_2x2( B, &BTL, &BTR, 00057 &BBL, &BBR, 0, 0, FLA_TL ); 00058 00059 FLA_Part_2x2( C, &CTL, &CTR, 00060 &CBL, &CBR, 0, 0, FLA_BL ); 00061 00062 while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){ 00063 00064 b = FLA_Determine_blocksize( CTR, FLA_TR, FLA_Cntl_blocksize( cntl ) ); 00065 00066 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &A01, /**/ &A02, 00067 &A10, &A11, /**/ &A12, 00068 /* ************* */ /* ******************** */ 00069 ABL, /**/ ABR, &A20, &A21, /**/ &A22, 00070 b, b, FLA_TL ); 00071 00072 FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02, 00073 /* ************* */ /* ******************** */ 00074 &B10, /**/ &B11, &B12, 00075 BBL, /**/ BBR, &B20, /**/ &B21, &B22, 00076 b, b, FLA_BR ); 00077 00078 FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02, 00079 &C10, /**/ &C11, &C12, 00080 /* ************* */ /* ******************** */ 00081 CBL, /**/ CBR, &C20, /**/ &C21, &C22, 00082 b, b, FLA_TR ); 00083 00084 // Loop Invariant: 00085 // CTL = CTL - ATR * sylv( ABR, BTL, CBL ) 00086 // CTR = CTR 00087 // CBL = sylv( ABR, BTL, CBL ) 00088 // CBR = CBR - sylv( ABR, BTL, CBL ) * BTR 00089 00090 /*------------------------------------------------------------*/ 00091 00092 // C10 = sylv( A11, B00, C10 ); 00093 FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00094 isgn, A11, B00, C10, scale, 00095 FLA_Cntl_sub_sylv1( cntl ) ); 00096 00097 // C00 = C00 - A01 * C10; 00098 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00099 FLA_MINUS_ONE, A01, C10, FLA_ONE, C00, 00100 FLA_Cntl_sub_gemm1( cntl ) ); 00101 00102 // C21 = sylv( A22, B11, C21 ); 00103 FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00104 isgn, A22, B11, C21, scale, 00105 FLA_Cntl_sub_sylv2( cntl ) ); 00106 00107 // C11 = sylv( A11, B11, C11 - A12 * C21 -/+ C10 * B01 ); 00108 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00109 FLA_NEGATE( isgn ), C10, B01, FLA_ONE, C11, 00110 FLA_Cntl_sub_gemm2( cntl ) ); 00111 00112 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00113 FLA_MINUS_ONE, A12, C21, FLA_ONE, C11, 00114 FLA_Cntl_sub_gemm3( cntl ) ); 00115 00116 FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00117 isgn, A11, B11, C11, scale, 00118 FLA_Cntl_sub_sylv3( cntl ) ); 00119 00120 // C01 = C01 - A01 * C11 - A02 * C21; 00121 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00122 FLA_MINUS_ONE, A02, C21, FLA_ONE, C01, 00123 FLA_Cntl_sub_gemm4( cntl ) ); 00124 00125 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00126 FLA_MINUS_ONE, A01, C11, FLA_ONE, C01, 00127 FLA_Cntl_sub_gemm5( cntl ) ); 00128 00129 // C12 = C12 -/+ C10 * B02 -/+ C11 * B12; 00130 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00131 FLA_NEGATE( isgn ), C11, B12, FLA_ONE, C12, 00132 FLA_Cntl_sub_gemm6( cntl ) ); 00133 00134 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00135 FLA_NEGATE( isgn ), C10, B02, FLA_ONE, C12, 00136 FLA_Cntl_sub_gemm7( cntl ) ); 00137 00138 // C22 = C22 -/+ C21 * B12; 00139 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00140 FLA_NEGATE( isgn ), C21, B12, FLA_ONE, C22, 00141 FLA_Cntl_sub_gemm8( cntl ) ); 00142 00143 /*------------------------------------------------------------*/ 00144 00145 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ A01, A02, 00146 /* ************** */ /* ****************** */ 00147 A10, /**/ A11, A12, 00148 &ABL, /**/ &ABR, A20, /**/ A21, A22, 00149 FLA_BR ); 00150 00151 FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02, 00152 B10, B11, /**/ B12, 00153 /* ************** */ /* ****************** */ 00154 &BBL, /**/ &BBR, B20, B21, /**/ B22, 00155 FLA_TL ); 00156 00157 FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02, 00158 /* ************** */ /* ****************** */ 00159 C10, C11, /**/ C12, 00160 &CBL, /**/ &CBR, C20, C21, /**/ C22, 00161 FLA_BL ); 00162 00163 } 00164 00165 return FLA_SUCCESS; 00166 }
FLA_Error FLA_Sylv_nn_blk_var7 | ( | FLA_Obj | isgn, | |
FLA_Obj | A, | |||
FLA_Obj | B, | |||
FLA_Obj | C, | |||
FLA_Obj | scale, | |||
fla_sylv_t * | cntl | |||
) |
References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_nn().
00038 { 00039 FLA_Obj ATL, ATR, A00, A01, A02, 00040 ABL, ABR, A10, A11, A12, 00041 A20, A21, A22; 00042 00043 FLA_Obj BTL, BTR, B00, B01, B02, 00044 BBL, BBR, B10, B11, B12, 00045 B20, B21, B22; 00046 00047 FLA_Obj CTL, CTR, C00, C01, C02, 00048 CBL, CBR, C10, C11, C12, 00049 C20, C21, C22; 00050 00051 dim_t b; 00052 00053 FLA_Part_2x2( A, &ATL, &ATR, 00054 &ABL, &ABR, 0, 0, FLA_BR ); 00055 00056 FLA_Part_2x2( B, &BTL, &BTR, 00057 &BBL, &BBR, 0, 0, FLA_TL ); 00058 00059 FLA_Part_2x2( C, &CTL, &CTR, 00060 &CBL, &CBR, 0, 0, FLA_BL ); 00061 00062 while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){ 00063 00064 b = FLA_Determine_blocksize( CTR, FLA_TR, FLA_Cntl_blocksize( cntl ) ); 00065 00066 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &A01, /**/ &A02, 00067 &A10, &A11, /**/ &A12, 00068 /* ************* */ /* ******************** */ 00069 ABL, /**/ ABR, &A20, &A21, /**/ &A22, 00070 b, b, FLA_TL ); 00071 00072 FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02, 00073 /* ************* */ /* ******************** */ 00074 &B10, /**/ &B11, &B12, 00075 BBL, /**/ BBR, &B20, /**/ &B21, &B22, 00076 b, b, FLA_BR ); 00077 00078 FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02, 00079 &C10, /**/ &C11, &C12, 00080 /* ************* */ /* ******************** */ 00081 CBL, /**/ CBR, &C20, /**/ &C21, &C22, 00082 b, b, FLA_TR ); 00083 00084 // Loop Invariant: 00085 // CTL = sylv( ATL, BTL, CTL - ATR * sylv( ABR, BTL, CBL ) ) 00086 // CTR = CTR 00087 // CBL = sylv( ABR, BTL, CBL ) 00088 // CBR = CBR - sylv( ABR, BTL, CBL ) * BTR 00089 00090 /*------------------------------------------------------------*/ 00091 00092 // C21 = sylv( A22, B11, C21 ); 00093 FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00094 isgn, A22, B11, C21, scale, 00095 FLA_Cntl_sub_sylv1( cntl ) ); 00096 00097 // C11 = sylv( A11, B11, C11 - A12 * C21 -/+ C10 * B01 ); 00098 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00099 FLA_NEGATE( isgn ), C10, B01, FLA_ONE, C11, 00100 FLA_Cntl_sub_gemm1( cntl ) ); 00101 00102 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00103 FLA_MINUS_ONE, A12, C21, FLA_ONE, C11, 00104 FLA_Cntl_sub_gemm2( cntl ) ); 00105 00106 FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00107 isgn, A11, B11, C11, scale, 00108 FLA_Cntl_sub_sylv2( cntl ) ); 00109 00110 // C01 = sylv( A00, B11, C01 - A01 * C11 - A02 * C21 -/+ C00 * B01 ); 00111 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00112 FLA_NEGATE( isgn ), C00, B01, FLA_ONE, C01, 00113 FLA_Cntl_sub_gemm3( cntl ) ); 00114 00115 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00116 FLA_MINUS_ONE, A02, C21, FLA_ONE, C01, 00117 FLA_Cntl_sub_gemm4( cntl ) ); 00118 00119 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00120 FLA_MINUS_ONE, A01, C11, FLA_ONE, C01, 00121 FLA_Cntl_sub_gemm5( cntl ) ); 00122 00123 FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00124 isgn, A00, B11, C01, scale, 00125 FLA_Cntl_sub_sylv3( cntl ) ); 00126 00127 // C12 = C12 -/+ C10 * B02 -/+ C11 * B12; 00128 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00129 FLA_NEGATE( isgn ), C11, B12, FLA_ONE, C12, 00130 FLA_Cntl_sub_gemm6( cntl ) ); 00131 00132 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00133 FLA_NEGATE( isgn ), C10, B02, FLA_ONE, C12, 00134 FLA_Cntl_sub_gemm7( cntl ) ); 00135 00136 // C22 = C22 -/+ C21 * B12; 00137 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00138 FLA_NEGATE( isgn ), C21, B12, FLA_ONE, C22, 00139 FLA_Cntl_sub_gemm8( cntl ) ); 00140 00141 /*------------------------------------------------------------*/ 00142 00143 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ A01, A02, 00144 /* ************** */ /* ****************** */ 00145 A10, /**/ A11, A12, 00146 &ABL, /**/ &ABR, A20, /**/ A21, A22, 00147 FLA_BR ); 00148 00149 FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02, 00150 B10, B11, /**/ B12, 00151 /* ************** */ /* ****************** */ 00152 &BBL, /**/ &BBR, B20, B21, /**/ B22, 00153 FLA_TL ); 00154 00155 FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02, 00156 /* ************** */ /* ****************** */ 00157 C10, C11, /**/ C12, 00158 &CBL, /**/ &CBR, C20, C21, /**/ C22, 00159 FLA_BL ); 00160 00161 } 00162 00163 return FLA_SUCCESS; 00164 }
FLA_Error FLA_Sylv_nn_blk_var8 | ( | FLA_Obj | isgn, | |
FLA_Obj | A, | |||
FLA_Obj | B, | |||
FLA_Obj | C, | |||
FLA_Obj | scale, | |||
fla_sylv_t * | cntl | |||
) |
References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_nn().
00038 { 00039 FLA_Obj ATL, ATR, A00, A01, A02, 00040 ABL, ABR, A10, A11, A12, 00041 A20, A21, A22; 00042 00043 FLA_Obj BTL, BTR, B00, B01, B02, 00044 BBL, BBR, B10, B11, B12, 00045 B20, B21, B22; 00046 00047 FLA_Obj CTL, CTR, C00, C01, C02, 00048 CBL, CBR, C10, C11, C12, 00049 C20, C21, C22; 00050 00051 dim_t b; 00052 00053 FLA_Part_2x2( A, &ATL, &ATR, 00054 &ABL, &ABR, 0, 0, FLA_BR ); 00055 00056 FLA_Part_2x2( B, &BTL, &BTR, 00057 &BBL, &BBR, 0, 0, FLA_TL ); 00058 00059 FLA_Part_2x2( C, &CTL, &CTR, 00060 &CBL, &CBR, 0, 0, FLA_BL ); 00061 00062 while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){ 00063 00064 b = FLA_Determine_blocksize( CTR, FLA_TR, FLA_Cntl_blocksize( cntl ) ); 00065 00066 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &A01, /**/ &A02, 00067 &A10, &A11, /**/ &A12, 00068 /* ************* */ /* ******************** */ 00069 ABL, /**/ ABR, &A20, &A21, /**/ &A22, 00070 b, b, FLA_TL ); 00071 00072 FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02, 00073 /* ************* */ /* ******************** */ 00074 &B10, /**/ &B11, &B12, 00075 BBL, /**/ BBR, &B20, /**/ &B21, &B22, 00076 b, b, FLA_BR ); 00077 00078 FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02, 00079 &C10, /**/ &C11, &C12, 00080 /* ************* */ /* ******************** */ 00081 CBL, /**/ CBR, &C20, /**/ &C21, &C22, 00082 b, b, FLA_TR ); 00083 00084 // Loop Invariant: 00085 // CTL = CTL - ATR * sylv( ABR, BTL, CBL ) 00086 // CTR = CTR 00087 // CBL = sylv( ABR, BTL, CBL ) 00088 // CBR = sylv( ABR, BBR, CBR - sylv( ABR, BTL, CBL ) * BTR ) 00089 00090 /*------------------------------------------------------------*/ 00091 00092 // C10 = sylv( A11, B00, C10 ); 00093 FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00094 isgn, A11, B00, C10, scale, 00095 FLA_Cntl_sub_sylv1( cntl ) ); 00096 00097 // C00 = C00 - A01 * C10; 00098 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00099 FLA_MINUS_ONE, A01, C10, FLA_ONE, C00, 00100 FLA_Cntl_sub_gemm1( cntl ) ); 00101 00102 // C11 = sylv( A11, B11, C11 - A12 * C21 -/+ C10 * B01 ); 00103 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00104 FLA_NEGATE( isgn ), C10, B01, FLA_ONE, C11, 00105 FLA_Cntl_sub_gemm2( cntl ) ); 00106 00107 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00108 FLA_MINUS_ONE, A12, C21, FLA_ONE, C11, 00109 FLA_Cntl_sub_gemm3( cntl ) ); 00110 00111 FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00112 isgn, A11, B11, C11, scale, 00113 FLA_Cntl_sub_sylv2( cntl ) ); 00114 00115 // C01 = C01 - A01 * C11 - A02 * C21; 00116 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00117 FLA_MINUS_ONE, A02, C21, FLA_ONE, C01, 00118 FLA_Cntl_sub_gemm4( cntl ) ); 00119 00120 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00121 FLA_MINUS_ONE, A01, C11, FLA_ONE, C01, 00122 FLA_Cntl_sub_gemm5( cntl ) ); 00123 00124 // C12 = sylv( A11, B22, C12 - A12 * C22 -/+ C10 * B02 -/+ C11 * B12 ); 00125 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00126 FLA_NEGATE( isgn ), C11, B12, FLA_ONE, C12, 00127 FLA_Cntl_sub_gemm6( cntl ) ); 00128 00129 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00130 FLA_NEGATE( isgn ), C10, B02, FLA_ONE, C12, 00131 FLA_Cntl_sub_gemm7( cntl ) ); 00132 00133 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00134 FLA_MINUS_ONE, A12, C22, FLA_ONE, C12, 00135 FLA_Cntl_sub_gemm8( cntl ) ); 00136 00137 FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00138 isgn, A11, B22, C12, scale, 00139 FLA_Cntl_sub_sylv3( cntl ) ); 00140 00141 /*------------------------------------------------------------*/ 00142 00143 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ A01, A02, 00144 /* ************** */ /* ****************** */ 00145 A10, /**/ A11, A12, 00146 &ABL, /**/ &ABR, A20, /**/ A21, A22, 00147 FLA_BR ); 00148 00149 FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02, 00150 B10, B11, /**/ B12, 00151 /* ************** */ /* ****************** */ 00152 &BBL, /**/ &BBR, B20, B21, /**/ B22, 00153 FLA_TL ); 00154 00155 FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02, 00156 /* ************** */ /* ****************** */ 00157 C10, C11, /**/ C12, 00158 &CBL, /**/ &CBR, C20, C21, /**/ C22, 00159 FLA_BL ); 00160 00161 } 00162 00163 return FLA_SUCCESS; 00164 }
FLA_Error FLA_Sylv_nn_blk_var9 | ( | FLA_Obj | isgn, | |
FLA_Obj | A, | |||
FLA_Obj | B, | |||
FLA_Obj | C, | |||
FLA_Obj | scale, | |||
fla_sylv_t * | cntl | |||
) |
References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_nn().
00038 { 00039 FLA_Obj ATL, ATR, A00, A01, A02, 00040 ABL, ABR, A10, A11, A12, 00041 A20, A21, A22; 00042 00043 FLA_Obj BTL, BTR, B00, B01, B02, 00044 BBL, BBR, B10, B11, B12, 00045 B20, B21, B22; 00046 00047 FLA_Obj CTL, CTR, C00, C01, C02, 00048 CBL, CBR, C10, C11, C12, 00049 C20, C21, C22; 00050 00051 dim_t b; 00052 00053 FLA_Part_2x2( A, &ATL, &ATR, 00054 &ABL, &ABR, 0, 0, FLA_BR ); 00055 00056 FLA_Part_2x2( B, &BTL, &BTR, 00057 &BBL, &BBR, 0, 0, FLA_TL ); 00058 00059 FLA_Part_2x2( C, &CTL, &CTR, 00060 &CBL, &CBR, 0, 0, FLA_BL ); 00061 00062 while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){ 00063 00064 b = FLA_Determine_blocksize( CTR, FLA_TR, FLA_Cntl_blocksize( cntl ) ); 00065 00066 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &A01, /**/ &A02, 00067 &A10, &A11, /**/ &A12, 00068 /* ************* */ /* ******************** */ 00069 ABL, /**/ ABR, &A20, &A21, /**/ &A22, 00070 b, b, FLA_TL ); 00071 00072 FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02, 00073 /* ************* */ /* ******************** */ 00074 &B10, /**/ &B11, &B12, 00075 BBL, /**/ BBR, &B20, /**/ &B21, &B22, 00076 b, b, FLA_BR ); 00077 00078 FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02, 00079 &C10, /**/ &C11, &C12, 00080 /* ************* */ /* ******************** */ 00081 CBL, /**/ CBR, &C20, /**/ &C21, &C22, 00082 b, b, FLA_TR ); 00083 00084 // Loop Invariant: 00085 // CTL = sylv( ATL, BTL, CTL - ATR * sylv( ABR, BTL, CBL ) ) 00086 // CTR = CTR 00087 // CBL = sylv( ABR, BTL, CBL) 00088 // CBR = sylv( ABR, BBR, CBR - sylv( ABR, BTL, CBL ) * BTR ) 00089 00090 /*------------------------------------------------------------*/ 00091 00092 // C11 = sylv( A11, B11, C11 - A12 * C21 -/+ C10 * B01 ); 00093 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00094 FLA_NEGATE( isgn ), C10, B01, FLA_ONE, C11, 00095 FLA_Cntl_sub_gemm1( cntl ) ); 00096 00097 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00098 FLA_MINUS_ONE, A12, C21, FLA_ONE, C11, 00099 FLA_Cntl_sub_gemm2( cntl ) ); 00100 00101 FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00102 isgn, A11, B11, C11, scale, 00103 FLA_Cntl_sub_sylv1( cntl ) ); 00104 00105 // C01 = sylv( A00, B11, C01 - A01 * C11 - A02 * C21 -/+ C00 * B01 ); 00106 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00107 FLA_NEGATE( isgn ), C00, B01, FLA_ONE, C01, 00108 FLA_Cntl_sub_gemm3( cntl ) ); 00109 00110 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00111 FLA_MINUS_ONE, A02, C21, FLA_ONE, C01, 00112 FLA_Cntl_sub_gemm4( cntl ) ); 00113 00114 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00115 FLA_MINUS_ONE, A01, C11, FLA_ONE, C01, 00116 FLA_Cntl_sub_gemm5( cntl ) ); 00117 00118 FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00119 isgn, A00, B11, C01, scale, 00120 FLA_Cntl_sub_sylv2( cntl ) ); 00121 00122 // C12 = sylv( A11, B22, C12 - A12 * C22 -/+ C10 * B02 -/+ C11 * B12 ); 00123 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00124 FLA_NEGATE( isgn ), C11, B12, FLA_ONE, C12, 00125 FLA_Cntl_sub_gemm6( cntl ) ); 00126 00127 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00128 FLA_NEGATE( isgn ), C10, B02, FLA_ONE, C12, 00129 FLA_Cntl_sub_gemm7( cntl ) ); 00130 00131 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00132 FLA_MINUS_ONE, A12, C22, FLA_ONE, C12, 00133 FLA_Cntl_sub_gemm8( cntl ) ); 00134 00135 FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 00136 isgn, A11, B22, C12, scale, 00137 FLA_Cntl_sub_sylv3( cntl ) ); 00138 00139 /*------------------------------------------------------------*/ 00140 00141 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ A01, A02, 00142 /* ************** */ /* ****************** */ 00143 A10, /**/ A11, A12, 00144 &ABL, /**/ &ABR, A20, /**/ A21, A22, 00145 FLA_BR ); 00146 00147 FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02, 00148 B10, B11, /**/ B12, 00149 /* ************** */ /* ****************** */ 00150 &BBL, /**/ &BBR, B20, B21, /**/ B22, 00151 FLA_TL ); 00152 00153 FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02, 00154 /* ************** */ /* ****************** */ 00155 C10, C11, /**/ C12, 00156 &CBL, /**/ &CBR, C20, C21, /**/ C22, 00157 FLA_BL ); 00158 00159 } 00160 00161 return FLA_SUCCESS; 00162 }