Functions | |
FLA_Error | FLASH_Axpy_submatrix_to_global (FLA_Obj alpha, dim_t m, dim_t n, void *buffer, dim_t ldim, dim_t i, dim_t j, FLA_Obj H) |
FLA_Error | FLASH_Axpy_global_to_submatrix (FLA_Obj alpha, dim_t i, dim_t j, FLA_Obj H, dim_t m, dim_t n, void *buffer, dim_t ldim) |
FLA_Error | FLASH_Axpy_subobject_to_global (FLA_Obj alpha, FLA_Obj F, dim_t i, dim_t j, FLA_Obj H) |
FLA_Error | FLASH_Axpy_global_to_subobject (FLA_Obj alpha, dim_t i, dim_t j, FLA_Obj H, FLA_Obj F) |
FLA_Error | FLASH_Axpy_hierarchy (int direction, FLA_Obj alpha, FLA_Obj F, dim_t i, dim_t j, FLA_Obj *H) |
FLA_Error | FLASH_Axpy_hierarchy_r (int direction, FLA_Obj alpha, FLA_Obj F, dim_t i, dim_t j, FLA_Obj *H, FLASH_Obj_queue *queue) |
FLA_Error FLASH_Axpy_global_to_submatrix | ( | FLA_Obj | alpha, | |
dim_t | i, | |||
dim_t | j, | |||
FLA_Obj | H, | |||
dim_t | m, | |||
dim_t | n, | |||
void * | buffer, | |||
dim_t | ldim | |||
) |
References FLA_Check_consistent_object_datatype(), FLA_Check_error_level(), FLA_Check_if_scalar(), FLA_Check_submatrix_dims_and_offset(), FLA_Obj_attach_buffer(), FLA_Obj_create_without_buffer(), FLA_Obj_free_without_buffer(), FLASH_Axpy_hierarchy(), and FLASH_Obj_datatype().
00073 { 00074 FLA_Obj flat_matrix; 00075 FLA_Datatype datatype; 00076 FLA_Error e_val; 00077 00078 if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) 00079 { 00080 e_val = FLA_Check_if_scalar( alpha ); 00081 FLA_Check_error_code( e_val ); 00082 00083 e_val = FLA_Check_consistent_object_datatype( alpha, H ); 00084 FLA_Check_error_code( e_val ); 00085 00086 e_val = FLA_Check_submatrix_dims_and_offset( m, n, i, j, H ); 00087 FLA_Check_error_code( e_val ); 00088 } 00089 00090 // Acquire the datatype from the hierarchical matrix object. 00091 datatype = FLASH_Obj_datatype( H ); 00092 00093 // Create a temporary conventional matrix object of the requested datatype 00094 // and dimensions and attach the given buffer containing the incoming data. 00095 FLA_Obj_create_without_buffer( datatype, m, n, &flat_matrix ); 00096 FLA_Obj_attach_buffer( buffer, ldim, &flat_matrix ); 00097 00098 // Recurse through H, adding in the corresponding elements of flat_matrix, 00099 // starting at the (i,j) element offset. 00100 FLASH_Axpy_hierarchy( FLA_HIER_TO_FLAT, alpha, flat_matrix, i, j, &H ); 00101 00102 // Free the object (but don't free the buffer!). 00103 FLA_Obj_free_without_buffer( &flat_matrix ); 00104 00105 return FLA_SUCCESS; 00106 }
References FLASH_Axpy_hierarchy().
00118 { 00119 FLASH_Axpy_hierarchy( FLA_HIER_TO_FLAT, alpha, F, i, j, &H ); 00120 00121 return FLA_SUCCESS; 00122 }
FLA_Error FLASH_Axpy_hierarchy | ( | int | direction, | |
FLA_Obj | alpha, | |||
FLA_Obj | F, | |||
dim_t | i, | |||
dim_t | j, | |||
FLA_Obj * | H | |||
) |
References FLASH_Axpy_hierarchy(), FLASH_Axpy_hierarchy_r(), FLASH_Obj_exec(), FLASH_Obj_queue_s::head, FLASH_Obj_queue_s::n_tasks, and FLASH_Obj_queue_s::tail.
Referenced by FLASH_Axpy_global_to_submatrix(), FLASH_Axpy_global_to_subobject(), FLASH_Axpy_hierarchy(), FLASH_Axpy_submatrix_to_global(), FLASH_Axpy_subobject_to_global(), and FLASH_Obj_exec_parallel().
00126 { 00127 FLA_Error e_val; 00128 FLASH_Obj_queue queue; 00129 00130 queue.n_tasks = 0; 00131 queue.head = NULL; 00132 queue.tail = NULL; 00133 00134 e_val = FLASH_Axpy_hierarchy_r( direction, alpha, F, i, j, H, &queue ); 00135 00136 #ifdef FLA_ENABLE_MULTITHREADING 00137 FLASH_Obj_exec( ( void* ) FLASH_Axpy_hierarchy, &queue ); 00138 #endif 00139 return e_val; 00140 }
FLA_Error FLASH_Axpy_hierarchy_r | ( | int | direction, | |
FLA_Obj | alpha, | |||
FLA_Obj | F, | |||
dim_t | i, | |||
dim_t | j, | |||
FLA_Obj * | H, | |||
FLASH_Obj_queue * | queue | |||
) |
References FLA_Axpy_external(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Obj_buffer(), FLA_Obj_elemtype(), FLA_Obj_length(), FLA_Obj_width(), FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLASH_Axpy_hierarchy_r(), FLASH_Obj_push(), FLASH_Obj_scalar_length(), FLASH_Obj_scalar_width(), and FLASH_Queue_get_num_threads().
Referenced by FLASH_Axpy_hierarchy(), and FLASH_Axpy_hierarchy_r().
00144 { 00145 dim_t m_scal, n_scal; 00146 dim_t row_offset, col_offset; 00147 dim_t b_m, b_n; 00148 FLA_Obj* buffer; 00149 FLA_Obj* h11p; 00150 00151 FLA_Obj HTL, HTR, H00, H01, H02, 00152 HBL, HBR, H10, H11, H12, 00153 H20, H21, H22; 00154 00155 FLA_Obj F1T, F01, 00156 F1B, F11, 00157 F21; 00158 00159 FLA_Obj hT, h01, 00160 hB, h11, 00161 h21; 00162 00163 FLA_Obj FL, FR, F0, F1, F2; 00164 00165 FLA_Obj HBR_L, HBR_R, h0, h1, h2; 00166 00167 // Once we get down to a submatrix whose elements are scalars, we are down 00168 // to our base case. 00169 if ( FLA_Obj_elemtype( *H ) == FLA_SCALAR ) 00170 { 00171 dim_t m_flat, n_flat; 00172 00173 // Grab the length and width of the conventional matrix argument. 00174 m_flat = FLA_Obj_length( F ); 00175 n_flat = FLA_Obj_width( F ); 00176 00177 // Advance to the (i,j) offset of the hierarchical matrix. 00178 FLA_Part_2x2( *H, &HTL, &HTR, 00179 &HBL, &HBR, i, j, FLA_TL ); 00180 00181 // Repartition the destination submatrix, H, such that H11 is conformal 00182 // to dimensions of the flat conventional matrix F. 00183 FLA_Repart_2x2_to_3x3( HTL, /**/ HTR, &H00, /**/ &H01, &H02, 00184 /* ************* */ /* ******************** */ 00185 &H10, /**/ &H11, &H12, 00186 HBL, /**/ HBR, &H20, /**/ &H21, &H22, 00187 m_flat, n_flat, FLA_BR ); 00188 00189 #ifdef FLA_ENABLE_MULTITHREADING 00190 if ( FLASH_Queue_get_num_threads() > 1 ) 00191 { 00192 FLASH_Obj_push( direction, alpha, F, H11, queue ); 00193 } 00194 else 00195 #endif 00196 { 00197 // Depending on which top-level function invoked us, we either add the 00198 // the source data in the flat matrix to the leaf-level submatrix of the 00199 // hierarchical matrix, or add the data in the hierarchical submatrix to 00200 // the appropriate place in the flat matrix. 00201 if ( direction == FLA_FLAT_TO_HIER ) 00202 { 00203 FLA_Axpy_external( alpha, F, H11 ); 00204 } 00205 else if ( direction == FLA_HIER_TO_FLAT ) 00206 { 00207 FLA_Axpy_external( alpha, H11, F ); 00208 } 00209 } 00210 00211 // Return, since we're done for this leaf. 00212 return FLA_SUCCESS; 00213 } 00214 00215 // If the above conditional did not hold, then we proceed. 00216 00217 // Grab the buffer of H, whose elemtype we know must be FLA_MATRIX. The 00218 // elements of this buffer are objects. 00219 buffer = ( FLA_Obj* ) FLA_Obj_buffer( *H ); 00220 00221 // Extract the overall scalar dimensions of the first (top-left) matrix 00222 // referenced in the hierarchical matrix's buffer. 00223 m_scal = FLASH_Obj_scalar_length( buffer[0] ); 00224 n_scal = FLASH_Obj_scalar_width( buffer[0] ); 00225 00226 // The i and j offsets given to the current function are relative to the 00227 // entire matrix, which is represented by H. Since the elemtype of H is 00228 // FLA_MATRIX, we now compute the number of submatrix elements we will skip 00229 // in both the m and n dimensions to advance ourselves to the proper 00230 // hierarchical submatrix within H, the one that contains the destination 00231 // for the data (or at least the top-left most portion of the data) in the 00232 // raw buffer matrix. Notice that integer division is performed here. 00233 b_m = i / m_scal; 00234 b_n = j / n_scal; 00235 00236 // We use the indices computed above to advance through H such that the 00237 // (i,j) element represented by the (i,j) values passed to the current 00238 // function is situated at the top-left of HBR. 00239 FLA_Part_2x2( *H, &HTL, &HTR, 00240 &HBL, &HBR, b_m, b_n, FLA_TL ); 00241 00242 // Prepare to march laterally through HBR. 00243 FLA_Part_1x2( HBR, &HBR_L, &HBR_R, 0, FLA_LEFT); 00244 00245 // Prepare to march laterally through the conventional flat matrix. 00246 FLA_Part_1x2( F, &FL, &FR, 0, FLA_LEFT ); 00247 00248 // Iterate laterally through F. 00249 while ( FLA_Obj_width( FL ) < FLA_Obj_width( F ) ) 00250 { 00251 // Handle the initial edge case and "interior" cases differently. 00252 if ( FLA_Obj_width( FL ) == 0 ) 00253 { 00254 // Compute the width of the matrix that we will recurse into. 00255 // Since this is the first iteration of the while loop, this 00256 // blocksize may be smaller than subsequent full blocks if the 00257 // j offset passed to this routine is non-zero. 00258 b_n = n_scal - (j % n_scal); 00259 00260 // Reduce the blocksize if the width of F is smaller than the 00261 // value computed above. 00262 b_n = min( FLA_Obj_width( FR ), b_n ); 00263 00264 // If the j offset is non-zero, we must modulo it to the 00265 // hierarchical blocksize for the element submatrices of H 00266 // in preparation for our recursion. 00267 col_offset = j % n_scal; 00268 } 00269 else 00270 { 00271 // The width of the matrix into which we will recursve may be 00272 // assumed to be the same as the width of the element submatrices 00273 // of H. 00274 b_n = n_scal; 00275 00276 // Reduce the blocksize if the width of F is smaller than the 00277 // element submatrices. 00278 b_n = min( FLA_Obj_width( FR ), b_n ); 00279 00280 // The column offset will be zero because we will be starting from 00281 // the left edge of the submatrix. 00282 col_offset = 0; 00283 } 00284 00285 // Expose one column of hierarchical submatrices within HBR. 00286 FLA_Repart_1x2_to_1x3( HBR_L, /**/ HBR_R, &h0, /**/ &h1, &h2, 00287 1, FLA_RIGHT ); 00288 00289 // Expose one panel of F of width b_n, computed above. 00290 FLA_Repart_1x2_to_1x3( FL, /**/ FR, &F0, /**/ &F1, &F2, 00291 b_n, FLA_RIGHT ); 00292 00293 /*------------------------------------------------------------*/ 00294 00295 // Prepare to march vertically through h1 (the first submatrix column 00296 // of HBR). 00297 FLA_Part_2x1( h1, &hT, 00298 &hB, 0, FLA_TOP ); 00299 00300 // Prepare to march vertically through F. 00301 FLA_Part_2x1( F1, &F1T, 00302 &F1B, 0, FLA_TOP ); 00303 00304 // Iterate vertically through the current column-panel of F, F1. 00305 while ( FLA_Obj_length( F1T ) < FLA_Obj_length( F1 ) ) 00306 { 00307 00308 if ( FLA_Obj_length( F1T ) == 0 ) 00309 { 00310 // Compute the length of the matrix that we will recurse into. 00311 // Since this is the first iteration of the while loop, this 00312 // blocksize may be smaller than subsequent full blocks if the 00313 // i offset passed to this routine is non-zero. 00314 b_m = m_scal - (i % m_scal); 00315 00316 // Reduce the blocksize if the length of F is smaller than the 00317 // value computed above. 00318 b_m = min( FLA_Obj_length( F1B ), b_m ); 00319 00320 // If the i offset is non-zero, we must modulo it to the 00321 // hierarchical blocksize for the element submatrices of H 00322 // in preparation for our recursion. 00323 row_offset = i % m_scal; 00324 } 00325 else 00326 { 00327 // The length of the matrix into which we will recursve may be 00328 // assumed to be the same as the length of the element submatrices 00329 // of H. 00330 b_m = m_scal; 00331 00332 // Reduce the blocksize if the length of the first panel of 00333 // F is smaller than the the element submatrices. 00334 b_m = min( FLA_Obj_length( F1B ), b_m ); 00335 00336 // The column offset will be zero because we will be starting from 00337 // the top edge of the submatrix. 00338 row_offset = 0; 00339 } 00340 00341 // Expose one hierarchical submatrix within the current column of h1. 00342 FLA_Repart_2x1_to_3x1( hT, &h01, 00343 /* ** */ /* ** */ 00344 &h11, 00345 hB, &h21, 1, FLA_BOTTOM ); 00346 00347 // Expose one block of F1 (the current panel of F) of length b_m, 00348 // computed above. 00349 FLA_Repart_2x1_to_3x1( F1T, &F01, 00350 /* ** */ /* ** */ 00351 &F11, 00352 F1B, &F21, b_m, FLA_BOTTOM ); 00353 00354 /*------------------------------------------------------------*/ 00355 00356 // This is a pointer to the FLA_Obj element referenced by h11. 00357 h11p = ( FLA_Obj* ) FLA_Obj_buffer( h11 ); 00358 00359 // Recurse with new (i,j) offset parameters, a narrowed block of 00360 // F, and a corresponding hierarchical block of H. 00361 FLASH_Axpy_hierarchy_r( direction, alpha, F11, row_offset, col_offset, h11p, queue ); 00362 00363 /*------------------------------------------------------------*/ 00364 00365 FLA_Cont_with_3x1_to_2x1( &hT, h01, 00366 h11, 00367 /* ** */ /* ** */ 00368 &hB, h21, FLA_TOP ); 00369 00370 FLA_Cont_with_3x1_to_2x1( &F1T, F01, 00371 F11, 00372 /* ** */ /* ** */ 00373 &F1B, F21, FLA_TOP ); 00374 } 00375 00376 /*------------------------------------------------------------*/ 00377 00378 FLA_Cont_with_1x3_to_1x2( &FL, /**/ &FR, F0, F1, /**/ F2, 00379 FLA_LEFT ); 00380 00381 FLA_Cont_with_1x3_to_1x2( &HBR_L, /**/ &HBR_R, h0, h1, /**/ h2, 00382 FLA_LEFT ); 00383 } 00384 00385 return FLA_SUCCESS; 00386 00387 }
FLA_Error FLASH_Axpy_submatrix_to_global | ( | FLA_Obj | alpha, | |
dim_t | m, | |||
dim_t | n, | |||
void * | buffer, | |||
dim_t | ldim, | |||
dim_t | i, | |||
dim_t | j, | |||
FLA_Obj | H | |||
) |
References FLA_Check_consistent_object_datatype(), FLA_Check_error_level(), FLA_Check_if_scalar(), FLA_Check_submatrix_dims_and_offset(), FLA_Obj_attach_buffer(), FLA_Obj_create_without_buffer(), FLA_Obj_free_without_buffer(), FLASH_Axpy_hierarchy(), and FLASH_Obj_datatype().
00036 { 00037 FLA_Obj flat_matrix; 00038 FLA_Datatype datatype; 00039 FLA_Error e_val; 00040 00041 if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) 00042 { 00043 e_val = FLA_Check_if_scalar( alpha ); 00044 FLA_Check_error_code( e_val ); 00045 00046 e_val = FLA_Check_consistent_object_datatype( alpha, H ); 00047 FLA_Check_error_code( e_val ); 00048 00049 e_val = FLA_Check_submatrix_dims_and_offset( m, n, i, j, H ); 00050 FLA_Check_error_code( e_val ); 00051 } 00052 00053 // Acquire the datatype from the hierarchical matrix object. 00054 datatype = FLASH_Obj_datatype( H ); 00055 00056 // Create a temporary conventional matrix object of the requested datatype 00057 // and dimensions and attach the given buffer containing the incoming data. 00058 FLA_Obj_create_without_buffer( datatype, m, n, &flat_matrix ); 00059 FLA_Obj_attach_buffer( buffer, ldim, &flat_matrix ); 00060 00061 // Recurse through H, adding in the corresponding elements of flat_matrix, 00062 // starting at the (i,j) element offset. 00063 FLASH_Axpy_hierarchy( FLA_FLAT_TO_HIER, alpha, flat_matrix, i, j, &H ); 00064 00065 // Free the object (but don't free the buffer!). 00066 FLA_Obj_free_without_buffer( &flat_matrix ); 00067 00068 return FLA_SUCCESS; 00069 }
References FLASH_Axpy_hierarchy().
00110 { 00111 FLASH_Axpy_hierarchy( FLA_FLAT_TO_HIER, alpha, F, i, j, &H ); 00112 00113 return FLA_SUCCESS; 00114 }