FLASH_Axpy_other.c File Reference

(r)


Functions

FLA_Error FLASH_Axpy_submatrix_to_global (FLA_Obj alpha, dim_t m, dim_t n, void *buffer, dim_t ldim, dim_t i, dim_t j, FLA_Obj H)
FLA_Error FLASH_Axpy_global_to_submatrix (FLA_Obj alpha, dim_t i, dim_t j, FLA_Obj H, dim_t m, dim_t n, void *buffer, dim_t ldim)
FLA_Error FLASH_Axpy_subobject_to_global (FLA_Obj alpha, FLA_Obj F, dim_t i, dim_t j, FLA_Obj H)
FLA_Error FLASH_Axpy_global_to_subobject (FLA_Obj alpha, dim_t i, dim_t j, FLA_Obj H, FLA_Obj F)
FLA_Error FLASH_Axpy_hierarchy (int direction, FLA_Obj alpha, FLA_Obj F, dim_t i, dim_t j, FLA_Obj *H)
FLA_Error FLASH_Axpy_hierarchy_r (int direction, FLA_Obj alpha, FLA_Obj F, dim_t i, dim_t j, FLA_Obj *H, FLASH_Obj_queue *queue)

Function Documentation

FLA_Error FLASH_Axpy_global_to_submatrix ( FLA_Obj  alpha,
dim_t  i,
dim_t  j,
FLA_Obj  H,
dim_t  m,
dim_t  n,
void *  buffer,
dim_t  ldim 
)

References FLA_Check_consistent_object_datatype(), FLA_Check_error_level(), FLA_Check_if_scalar(), FLA_Check_submatrix_dims_and_offset(), FLA_Obj_attach_buffer(), FLA_Obj_create_without_buffer(), FLA_Obj_free_without_buffer(), FLASH_Axpy_hierarchy(), and FLASH_Obj_datatype().

00073 {
00074     FLA_Obj      flat_matrix;
00075     FLA_Datatype datatype;
00076     FLA_Error    e_val;
00077 
00078     if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
00079     {
00080         e_val = FLA_Check_if_scalar( alpha );
00081         FLA_Check_error_code( e_val );
00082 
00083         e_val = FLA_Check_consistent_object_datatype( alpha, H );
00084         FLA_Check_error_code( e_val );
00085 
00086         e_val = FLA_Check_submatrix_dims_and_offset( m, n, i, j, H );
00087         FLA_Check_error_code( e_val );
00088     }
00089 
00090     // Acquire the datatype from the hierarchical matrix object.
00091     datatype = FLASH_Obj_datatype( H );
00092 
00093     // Create a temporary conventional matrix object of the requested datatype
00094     // and dimensions and attach the given buffer containing the incoming data.
00095     FLA_Obj_create_without_buffer( datatype, m, n, &flat_matrix );
00096     FLA_Obj_attach_buffer( buffer, ldim, &flat_matrix );
00097 
00098     // Recurse through H, adding in the corresponding elements of flat_matrix,
00099     // starting at the (i,j) element offset.
00100     FLASH_Axpy_hierarchy( FLA_HIER_TO_FLAT, alpha, flat_matrix, i, j, &H );
00101 
00102     // Free the object (but don't free the buffer!).
00103     FLA_Obj_free_without_buffer( &flat_matrix );
00104 
00105     return FLA_SUCCESS;
00106 }

FLA_Error FLASH_Axpy_global_to_subobject ( FLA_Obj  alpha,
dim_t  i,
dim_t  j,
FLA_Obj  H,
FLA_Obj  F 
)

References FLASH_Axpy_hierarchy().

00118 {
00119     FLASH_Axpy_hierarchy( FLA_HIER_TO_FLAT, alpha, F, i, j, &H );
00120 
00121     return FLA_SUCCESS;
00122 }

FLA_Error FLASH_Axpy_hierarchy ( int  direction,
FLA_Obj  alpha,
FLA_Obj  F,
dim_t  i,
dim_t  j,
FLA_Obj H 
)

References FLASH_Axpy_hierarchy(), FLASH_Axpy_hierarchy_r(), FLASH_Obj_exec(), FLASH_Obj_queue_s::head, FLASH_Obj_queue_s::n_tasks, and FLASH_Obj_queue_s::tail.

Referenced by FLASH_Axpy_global_to_submatrix(), FLASH_Axpy_global_to_subobject(), FLASH_Axpy_hierarchy(), FLASH_Axpy_submatrix_to_global(), FLASH_Axpy_subobject_to_global(), and FLASH_Obj_exec_parallel().

00126 {
00127     FLA_Error e_val;
00128     FLASH_Obj_queue queue;
00129 
00130     queue.n_tasks = 0;
00131     queue.head = NULL;
00132     queue.tail = NULL;
00133 
00134     e_val = FLASH_Axpy_hierarchy_r( direction, alpha, F, i, j, H, &queue );
00135 
00136 #ifdef FLA_ENABLE_MULTITHREADING
00137     FLASH_Obj_exec( ( void* ) FLASH_Axpy_hierarchy, &queue );
00138 #endif
00139     return e_val;
00140 }

FLA_Error FLASH_Axpy_hierarchy_r ( int  direction,
FLA_Obj  alpha,
FLA_Obj  F,
dim_t  i,
dim_t  j,
FLA_Obj H,
FLASH_Obj_queue queue 
)

References FLA_Axpy_external(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Obj_buffer(), FLA_Obj_elemtype(), FLA_Obj_length(), FLA_Obj_width(), FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLASH_Axpy_hierarchy_r(), FLASH_Obj_push(), FLASH_Obj_scalar_length(), FLASH_Obj_scalar_width(), and FLASH_Queue_get_num_threads().

Referenced by FLASH_Axpy_hierarchy(), and FLASH_Axpy_hierarchy_r().

00144 {
00145     dim_t    m_scal, n_scal;
00146     dim_t    row_offset, col_offset;
00147     dim_t    b_m, b_n;
00148     FLA_Obj* buffer;
00149     FLA_Obj* h11p;
00150 
00151     FLA_Obj HTL,   HTR,    H00, H01, H02,
00152             HBL,   HBR,    H10, H11, H12,
00153                            H20, H21, H22;
00154 
00155     FLA_Obj F1T,             F01,
00156             F1B,             F11,
00157                              F21;
00158 
00159     FLA_Obj hT,              h01,
00160             hB,              h11,
00161                              h21;
00162 
00163     FLA_Obj FL,    FR,       F0,  F1,  F2;
00164 
00165     FLA_Obj HBR_L,  HBR_R,     h0,  h1,  h2;
00166 
00167     // Once we get down to a submatrix whose elements are scalars, we are down
00168     // to our base case.
00169     if ( FLA_Obj_elemtype( *H ) == FLA_SCALAR )
00170     {
00171         dim_t m_flat, n_flat;
00172 
00173         // Grab the length and width of the conventional matrix argument.
00174         m_flat = FLA_Obj_length( F );
00175         n_flat = FLA_Obj_width( F );
00176 
00177         // Advance to the (i,j) offset of the hierarchical matrix.
00178         FLA_Part_2x2( *H,   &HTL, &HTR,
00179                             &HBL, &HBR,     i, j, FLA_TL );
00180 
00181         // Repartition the destination submatrix, H, such that H11 is conformal
00182         // to dimensions of the flat conventional matrix F.
00183         FLA_Repart_2x2_to_3x3( HTL, /**/ HTR,       &H00, /**/ &H01, &H02,
00184                             /* ************* */   /* ******************** */
00185                                                     &H10, /**/ &H11, &H12,
00186                                HBL, /**/ HBR,       &H20, /**/ &H21, &H22,
00187                                m_flat, n_flat, FLA_BR );
00188 
00189 #ifdef FLA_ENABLE_MULTITHREADING
00190         if ( FLASH_Queue_get_num_threads() > 1 )
00191         {
00192             FLASH_Obj_push( direction, alpha, F, H11, queue );
00193         }   
00194         else
00195 #endif
00196         {
00197             // Depending on which top-level function invoked us, we either add the
00198             // the source data in the flat matrix to the leaf-level submatrix of the
00199             // hierarchical matrix, or add the data in the hierarchical submatrix to
00200             // the appropriate place in the flat matrix.
00201             if      ( direction == FLA_FLAT_TO_HIER )
00202             {
00203                 FLA_Axpy_external( alpha, F, H11 );
00204             }
00205             else if ( direction == FLA_HIER_TO_FLAT )
00206             {
00207                 FLA_Axpy_external( alpha, H11, F );
00208             }
00209         }
00210 
00211         // Return, since we're done for this leaf.
00212         return FLA_SUCCESS;
00213     }
00214 
00215     // If the above conditional did not hold, then we proceed.
00216 
00217     // Grab the buffer of H, whose elemtype we know must be FLA_MATRIX. The
00218     // elements of this buffer are objects.
00219     buffer = ( FLA_Obj* ) FLA_Obj_buffer( *H );
00220 
00221     // Extract the overall scalar dimensions of the first (top-left) matrix
00222     // referenced in the hierarchical matrix's buffer.
00223     m_scal = FLASH_Obj_scalar_length( buffer[0] );
00224     n_scal = FLASH_Obj_scalar_width( buffer[0] );
00225 
00226     // The i and j offsets given to the current function are relative to the
00227     // entire matrix, which is represented by H. Since the elemtype of H is
00228     // FLA_MATRIX, we now compute the number of submatrix elements we will skip
00229     // in both the m and n dimensions to advance ourselves to the proper
00230     // hierarchical submatrix within H, the one that contains the destination
00231     // for the data (or at least the top-left most portion of the data) in the
00232     // raw buffer matrix. Notice that integer division is performed here.
00233     b_m = i / m_scal;
00234     b_n = j / n_scal;
00235 
00236     // We use the indices computed above to advance through H such that the
00237     // (i,j) element represented by the (i,j) values passed to the current
00238     // function is situated at the top-left of HBR.
00239     FLA_Part_2x2( *H,   &HTL, &HTR,
00240                         &HBL, &HBR,     b_m, b_n, FLA_TL );
00241 
00242     // Prepare to march laterally through HBR.
00243     FLA_Part_1x2( HBR,     &HBR_L,  &HBR_R,    0, FLA_LEFT);
00244 
00245     // Prepare to march laterally through the conventional flat matrix.
00246     FLA_Part_1x2( F,       &FL,  &FR,          0, FLA_LEFT );
00247 
00248     // Iterate laterally through F.
00249     while ( FLA_Obj_width( FL ) < FLA_Obj_width( F ) )
00250     {
00251         // Handle the initial edge case and "interior" cases differently.
00252         if ( FLA_Obj_width( FL ) == 0 )
00253         {
00254             // Compute the width of the matrix that we will recurse into.
00255             // Since this is the first iteration of the while loop, this
00256             // blocksize may be smaller than subsequent full blocks if the
00257             // j offset passed to this routine is non-zero.
00258             b_n = n_scal - (j % n_scal);
00259 
00260             // Reduce the blocksize if the width of F is smaller than the
00261             // value computed above.
00262             b_n = min( FLA_Obj_width( FR ), b_n );
00263 
00264             // If the j offset is non-zero, we must modulo it to the
00265             // hierarchical blocksize for the element submatrices of H
00266             // in preparation for our recursion.
00267             col_offset = j % n_scal;
00268         }
00269         else
00270         {
00271             // The width of the matrix into which we will recursve may be
00272             // assumed to be the same as the width of the element submatrices
00273             // of H. 
00274             b_n = n_scal;
00275 
00276             // Reduce the blocksize if the width of F is smaller than the
00277             // element submatrices.
00278             b_n = min( FLA_Obj_width( FR ), b_n );
00279 
00280             // The column offset will be zero because we will be starting from
00281             // the left edge of the submatrix.
00282             col_offset = 0;
00283         }
00284 
00285         // Expose one column of hierarchical submatrices within HBR.
00286         FLA_Repart_1x2_to_1x3( HBR_L,  /**/ HBR_R,        &h0, /**/ &h1, &h2,
00287                                1, FLA_RIGHT );
00288 
00289         // Expose one panel of F of width b_n, computed above.
00290         FLA_Repart_1x2_to_1x3( FL,  /**/ FR,        &F0, /**/ &F1, &F2,
00291                                b_n, FLA_RIGHT );
00292 
00293         /*------------------------------------------------------------*/
00294 
00295         // Prepare to march vertically through h1 (the first submatrix column
00296         // of HBR).
00297         FLA_Part_2x1( h1,    &hT,
00298                              &hB,            0, FLA_TOP );
00299 
00300         // Prepare to march vertically through F.
00301         FLA_Part_2x1( F1,    &F1T,
00302                              &F1B,           0, FLA_TOP );
00303 
00304         // Iterate vertically through the current column-panel of F, F1.
00305         while ( FLA_Obj_length( F1T ) < FLA_Obj_length( F1 ) )
00306         {
00307 
00308             if ( FLA_Obj_length( F1T ) == 0 )
00309             {
00310                 // Compute the length of the matrix that we will recurse into.
00311                 // Since this is the first iteration of the while loop, this
00312                 // blocksize may be smaller than subsequent full blocks if the
00313                 // i offset passed to this routine is non-zero.
00314                 b_m = m_scal - (i % m_scal);
00315 
00316                 // Reduce the blocksize if the length of F is smaller than the
00317                 // value computed above.
00318                 b_m = min( FLA_Obj_length( F1B ), b_m );
00319 
00320                 // If the i offset is non-zero, we must modulo it to the
00321                 // hierarchical blocksize for the element submatrices of H
00322                 // in preparation for our recursion.
00323                 row_offset = i % m_scal;
00324             }
00325             else
00326             {
00327                 // The length of the matrix into which we will recursve may be
00328                 // assumed to be the same as the length of the element submatrices
00329                 // of H. 
00330                 b_m = m_scal;
00331 
00332                 // Reduce the blocksize if the length of the first panel of
00333                 // F is smaller than the the element submatrices.
00334                 b_m = min( FLA_Obj_length( F1B ), b_m );
00335 
00336                 // The column offset will be zero because we will be starting from
00337                 // the top edge of the submatrix.
00338                 row_offset = 0;
00339             }
00340 
00341             // Expose one hierarchical submatrix within the current column of h1.
00342             FLA_Repart_2x1_to_3x1( hT,                &h01,
00343                                 /* ** */            /* ** */
00344                                                       &h11,
00345                                    hB,                &h21,        1, FLA_BOTTOM );
00346 
00347             // Expose one block of F1 (the current panel of F) of length b_m,
00348             // computed above.
00349             FLA_Repart_2x1_to_3x1( F1T,               &F01,
00350                                 /* ** */            /* ** */
00351                                                       &F11,
00352                                    F1B,               &F21,      b_m, FLA_BOTTOM );
00353 
00354             /*------------------------------------------------------------*/
00355 
00356             // This is a pointer to the FLA_Obj element referenced by h11.
00357             h11p = ( FLA_Obj* ) FLA_Obj_buffer( h11 );
00358 
00359             // Recurse with new (i,j) offset parameters, a narrowed block of
00360             // F, and a corresponding hierarchical block of H.
00361             FLASH_Axpy_hierarchy_r( direction, alpha, F11, row_offset, col_offset, h11p, queue );
00362 
00363             /*------------------------------------------------------------*/
00364 
00365             FLA_Cont_with_3x1_to_2x1( &hT,                h01,
00366                                                           h11,
00367                                    /* ** */            /* ** */
00368                                       &hB,                h21,     FLA_TOP );
00369 
00370             FLA_Cont_with_3x1_to_2x1( &F1T,               F01,
00371                                                           F11,
00372                                     /* ** */           /* ** */
00373                                       &F1B,               F21,     FLA_TOP );
00374         }
00375 
00376         /*------------------------------------------------------------*/
00377 
00378         FLA_Cont_with_1x3_to_1x2( &FL,  /**/ &FR,        F0, F1, /**/ F2,
00379                                   FLA_LEFT );
00380 
00381         FLA_Cont_with_1x3_to_1x2( &HBR_L,  /**/ &HBR_R,        h0, h1, /**/ h2,
00382                                   FLA_LEFT );
00383     }
00384 
00385     return FLA_SUCCESS;
00386 
00387 }

FLA_Error FLASH_Axpy_submatrix_to_global ( FLA_Obj  alpha,
dim_t  m,
dim_t  n,
void *  buffer,
dim_t  ldim,
dim_t  i,
dim_t  j,
FLA_Obj  H 
)

References FLA_Check_consistent_object_datatype(), FLA_Check_error_level(), FLA_Check_if_scalar(), FLA_Check_submatrix_dims_and_offset(), FLA_Obj_attach_buffer(), FLA_Obj_create_without_buffer(), FLA_Obj_free_without_buffer(), FLASH_Axpy_hierarchy(), and FLASH_Obj_datatype().

00036 {
00037     FLA_Obj      flat_matrix;
00038     FLA_Datatype datatype;
00039     FLA_Error    e_val;
00040 
00041     if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
00042     {
00043         e_val = FLA_Check_if_scalar( alpha );
00044         FLA_Check_error_code( e_val );
00045 
00046         e_val = FLA_Check_consistent_object_datatype( alpha, H );
00047         FLA_Check_error_code( e_val );
00048 
00049         e_val = FLA_Check_submatrix_dims_and_offset( m, n, i, j, H );
00050         FLA_Check_error_code( e_val );
00051     }
00052 
00053     // Acquire the datatype from the hierarchical matrix object.
00054     datatype = FLASH_Obj_datatype( H );
00055 
00056     // Create a temporary conventional matrix object of the requested datatype
00057     // and dimensions and attach the given buffer containing the incoming data.
00058     FLA_Obj_create_without_buffer( datatype, m, n, &flat_matrix );
00059     FLA_Obj_attach_buffer( buffer, ldim, &flat_matrix );
00060 
00061     // Recurse through H, adding in the corresponding elements of flat_matrix,
00062     // starting at the (i,j) element offset.
00063     FLASH_Axpy_hierarchy( FLA_FLAT_TO_HIER, alpha, flat_matrix, i, j, &H );
00064 
00065     // Free the object (but don't free the buffer!).
00066     FLA_Obj_free_without_buffer( &flat_matrix );
00067 
00068     return FLA_SUCCESS;
00069 }

FLA_Error FLASH_Axpy_subobject_to_global ( FLA_Obj  alpha,
FLA_Obj  F,
dim_t  i,
dim_t  j,
FLA_Obj  H 
)

References FLASH_Axpy_hierarchy().

00110 {
00111     FLASH_Axpy_hierarchy( FLA_FLAT_TO_HIER, alpha, F, i, j, &H );
00112 
00113     return FLA_SUCCESS;
00114 }


Generated on Mon Jul 6 05:45:51 2009 for libflame by  doxygen 1.5.9