FLASH_Queue.c File Reference

(r)


Functions

void FLASH_Queue_begin (void)
void FLASH_Queue_end (void)
FLA_Error FLASH_Queue_enable (void)
FLA_Error FLASH_Queue_disable (void)
FLA_Bool FLASH_Queue_get_enabled (void)
void FLASH_Queue_set_num_threads (unsigned int n_threads)
unsigned int FLASH_Queue_get_num_threads (void)
void FLASH_Queue_init (void)
void FLASH_Queue_finalize (void)
int FLASH_Queue_get_num_tasks (void)
void FLASH_Queue_set_verbose_output (FLA_Bool verbose)
FLA_Bool FLASH_Queue_get_verbose_output (void)
void FLASH_Queue_set_sorting (FLA_Bool sorting)
FLA_Bool FLASH_Queue_get_sorting (void)
void FLASH_Queue_set_caching (FLA_Bool caching)
FLA_Bool FLASH_Queue_get_caching (void)
void FLASH_Queue_set_work_stealing (FLA_Bool work_stealing)
FLA_Bool FLASH_Queue_get_work_stealing (void)
void FLASH_Queue_set_data_affinity (FLASH_Data_aff data_affinity)
FLASH_Data_aff FLASH_Queue_get_data_affinity (void)
double FLASH_Queue_get_total_time (void)
double FLASH_Queue_get_parallel_time (void)
void FLASH_Queue_set_parallel_time (double dtime)
int FLASH_Queue_get_num_blocks (void)
void FLASH_Queue_set_block_size (int size)
int FLASH_Queue_get_block_size (void)
void FLASH_Queue_set_cache_size (int size)
int FLASH_Queue_get_cache_size (void)
void FLASH_Queue_set_cache_line_size (int size)
int FLASH_Queue_get_cache_line_size (void)
void FLASH_Queue_set_cores_per_cache (int cores)
int FLASH_Queue_get_cores_per_cache (void)
void FLASH_Queue_reset (void)
FLASH_TaskFLASH_Queue_get_head_task (void)
FLASH_TaskFLASH_Queue_get_tail_task (void)
void FLASH_Queue_push (void *func, void *cntl, char *name, int n_int_args, int n_fla_args, int n_input_args, int n_output_args,...)
FLASH_TaskFLASH_Task_alloc (void *func, void *cntl, char *name, int n_int_args, int n_fla_args, int n_input_args, int n_output_args)
void FLASH_Task_free (FLASH_Task *t)
void FLASH_Queue_exec_task (FLASH_Task *t)
void FLASH_Queue_verbose_output (void)
void FLASH_Queue_visualization (void)

Variables

FLASH_Queue _tq

Function Documentation

void FLASH_Queue_begin ( void   ) 

References FLA_Clock().

Referenced by FLASH_Apply_Q_UT(), FLASH_Apply_Q_UT_inc(), FLASH_Apply_Q_UT_UD(), FLASH_Chol(), FLASH_Gemm(), FLASH_Hemm(), FLASH_Her2k(), FLASH_Herk(), FLASH_LU_incpiv(), FLASH_LU_nopiv(), FLASH_QR_UT_inc_noopt(), FLASH_QR_UT_inc_opt1(), FLASH_QR_UT_UD(), FLASH_SPDinv(), FLASH_Sylv(), FLASH_Symm(), FLASH_Syr2k(), FLASH_Syrk(), FLASH_Trinv(), FLASH_Trmm(), FLASH_Trsm(), and FLASH_Ttmm().

00081 {
00082 #ifdef FLA_ENABLE_SUPERMATRIX
00083    if ( flash_queue_stack == 0 )
00084    {
00085       // Reset the value of the parallel execution timer.
00086       flash_queue_parallel_time = 0.0;
00087 
00088       // Save the starting time for the total execution time.
00089       flash_queue_total_time = FLA_Clock();
00090    }
00091 #endif
00092 
00093    // Push onto the stack.
00094    flash_queue_stack++;
00095 
00096    return;
00097 }

FLA_Error FLASH_Queue_disable ( void   ) 

Referenced by FLASH_Axpy(), FLASH_Copy(), FLASH_FS_incpiv(), FLASH_Gemv(), and FLASH_Trsv().

00158 {
00159 #ifdef FLA_ENABLE_SUPERMATRIX
00160    if ( flash_queue_stack == 0 )
00161    {
00162       // Disable if not begin parallel region yet.
00163       flash_queue_enabled = FALSE;   
00164       return FLA_SUCCESS;      
00165    }
00166    else
00167    {
00168       // Cannot change status during parallel region.
00169       return FLA_FAILURE;
00170    }
00171 #else
00172    // Allow disabling enqueuing even when SuperMatrix is not configured.
00173    flash_queue_enabled = FALSE;   
00174    return FLA_SUCCESS;
00175 #endif
00176 }

FLA_Error FLASH_Queue_enable ( void   ) 

Referenced by FLASH_Axpy(), FLASH_Copy(), FLASH_FS_incpiv(), FLASH_Gemv(), and FLASH_Trsv().

00131 {
00132 #ifdef FLA_ENABLE_SUPERMATRIX
00133    if ( flash_queue_stack == 0 )
00134    {
00135       // Enable if not begin parallel region yet.
00136       flash_queue_enabled = TRUE;   
00137       return FLA_SUCCESS;
00138    }
00139    else
00140    {
00141       // Cannot change status during parallel region.
00142       return FLA_FAILURE;
00143    }
00144 #else
00145    // Raise an exception when SuperMatrix is not configured.
00146    FLA_Check_error_code( FLA_SUPERMATRIX_NOT_ENABLED );
00147    return FLA_FAILURE;
00148 #endif
00149 }

void FLASH_Queue_end ( void   ) 

References FLA_Clock(), and FLASH_Queue_exec().

Referenced by FLASH_Apply_Q_UT(), FLASH_Apply_Q_UT_inc(), FLASH_Apply_Q_UT_UD(), FLASH_Chol(), FLASH_Gemm(), FLASH_Hemm(), FLASH_Her2k(), FLASH_Herk(), FLASH_LU_incpiv(), FLASH_LU_nopiv(), FLASH_QR_UT_inc_noopt(), FLASH_QR_UT_inc_opt1(), FLASH_QR_UT_UD(), FLASH_SPDinv(), FLASH_Sylv(), FLASH_Symm(), FLASH_Syr2k(), FLASH_Syrk(), FLASH_Trinv(), FLASH_Trmm(), FLASH_Trsm(), and FLASH_Ttmm().

00106 {
00107    // Pop off the stack.
00108    flash_queue_stack--;
00109 
00110 #ifdef FLA_ENABLE_SUPERMATRIX
00111    if ( flash_queue_stack == 0 )
00112    {
00113       // Execute tasks if encounter the outermost parallel region.
00114       FLASH_Queue_exec();
00115 
00116       // Find the total execution time.
00117       flash_queue_total_time = FLA_Clock() - flash_queue_total_time;
00118    }
00119 #endif
00120 
00121    return;
00122 }

void FLASH_Queue_exec_task ( FLASH_Task t  ) 

References FLASH_Task_s::begin_time, FLASH_Task_s::cntl, FLASH_Task_s::end_time, FLA_Apply_Q_UT_task(), FLA_Apply_Q_UT_UD_task(), FLASH_Task_s::fla_arg, FLA_Axpy_task(), FLA_Chol_task(), FLA_Clock(), FLA_Copy_task(), FLA_Gemm_task(), FLA_Gemv_task(), FLA_Hemm_task(), FLA_Her2k_task(), FLA_Herk_task(), FLA_LU_nopiv_task(), FLA_LU_piv_copy_task(), FLA_LU_piv_task(), FLA_Obj_free_task(), FLA_QR_UT_copy_task(), FLA_QR_UT_task(), FLA_QR_UT_UD_task(), FLA_SA_FS_task(), FLA_SA_LU_task(), FLA_Sylv_task(), FLA_Symm_task(), FLA_Syr2k_task(), FLA_Syrk_task(), FLA_Trinv_task(), FLA_Trmm_task(), FLA_Trsm_piv_task(), FLA_Trsm_task(), FLA_Trsv_task(), FLA_Ttmm_task(), FLASH_Task_s::func, FLASH_Task_s::input_arg, FLASH_Task_s::int_arg, and FLASH_Task_s::output_arg.

Referenced by FLASH_Queue_exec_parallel_function(), and FLASH_Queue_exec_simulation().

00992 {
00993    // Define local function pointer types.
00994 
00995    // LAPACK-level
00996    typedef FLA_Error(*flash_lu_piv_p)(FLA_Obj A, FLA_Obj p, fla_lu_t* cntl);
00997    typedef FLA_Error(*flash_lu_piv_copy_p)(FLA_Obj A, FLA_Obj p, FLA_Obj U, fla_lu_t* cntl);
00998    typedef FLA_Error(*flash_trsm_piv_p)(FLA_Obj A, FLA_Obj C, FLA_Obj p, fla_trsm_t* cntl);
00999    typedef FLA_Error(*flash_sa_lu_p)(FLA_Obj U, FLA_Obj D, FLA_Obj p, FLA_Obj L, int nb_alg, fla_lu_t* cntl);
01000    typedef FLA_Error(*flash_sa_fs_p)(FLA_Obj L, FLA_Obj D, FLA_Obj p, FLA_Obj C, FLA_Obj E, int nb_alg, fla_lu_t* cntl);
01001    typedef FLA_Error(*flash_lu_nopiv_p)(FLA_Obj A, fla_lu_t* cntl);
01002    typedef FLA_Error(*flash_trinv_p)(FLA_Uplo uplo, FLA_Diag diag, FLA_Obj A, fla_trinv_t* cntl);
01003    typedef FLA_Error(*flash_ttmm_p)(FLA_Uplo uplo, FLA_Obj A, fla_ttmm_t* cntl);
01004    typedef FLA_Error(*flash_chol_p)(FLA_Uplo uplo, FLA_Obj A, fla_chol_t* cntl);
01005    typedef FLA_Error(*flash_sylv_p)(FLA_Trans transa, FLA_Trans transb, FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t* cntl);
01006    typedef FLA_Error(*flash_qrut_p)(FLA_Obj A, FLA_Obj T, fla_qrut_t* cntl);
01007    typedef FLA_Error(*flash_qrutc_p)(FLA_Obj A, FLA_Obj T, FLA_Obj U, fla_qrut_t* cntl);
01008    typedef FLA_Error(*flash_qrutud_p)(FLA_Obj B, FLA_Obj D, FLA_Obj T, fla_qrutud_t* cntl);
01009    typedef FLA_Error(*flash_apqut_p)(FLA_Side side, FLA_Trans trans, FLA_Store storev, FLA_Obj A, FLA_Obj T, FLA_Obj W, FLA_Obj B, fla_apqut_t* cntl);
01010    typedef FLA_Error(*flash_apqutud_p)(FLA_Side side, FLA_Trans trans, FLA_Store storev, FLA_Obj D, FLA_Obj T, FLA_Obj W, FLA_Obj C, FLA_Obj E, fla_apqutud_t* cntl);
01011 
01012    // Level-3 BLAS
01013    typedef FLA_Error(*flash_gemm_p)(FLA_Trans transa, FLA_Trans transb, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_gemm_t* cntl);
01014    typedef FLA_Error(*flash_hemm_p)(FLA_Side side, FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_hemm_t* cntl);
01015    typedef FLA_Error(*flash_herk_p)(FLA_Uplo uplo, FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj beta, FLA_Obj C, fla_herk_t* cntl);
01016    typedef FLA_Error(*flash_her2k_p)(FLA_Uplo uplo, FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_her2k_t* cntl);
01017    typedef FLA_Error(*flash_symm_p)(FLA_Side side, FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_symm_t* cntl);
01018    typedef FLA_Error(*flash_syrk_p)(FLA_Uplo uplo, FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj beta, FLA_Obj C, fla_syrk_t* cntl);
01019    typedef FLA_Error(*flash_syr2k_p)(FLA_Uplo uplo, FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_syr2k_t* cntl);
01020    typedef FLA_Error(*flash_trmm_p)(FLA_Side side, FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj C, fla_trmm_t* cntl);
01021    typedef FLA_Error(*flash_trsm_p)(FLA_Side side, FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj C, fla_trsm_t* cntl);
01022 
01023    // Level-2 BLAS
01024    typedef FLA_Error(*flash_gemv_p)(FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y, fla_gemv_t* cntl);
01025    typedef FLA_Error(*flash_trsv_p)(FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj A, FLA_Obj x, fla_trsv_t* cntl);
01026 
01027    // Level-1 BLAS
01028    typedef FLA_Error(*flash_axpy_p)(FLA_Obj alpha, FLA_Obj A, FLA_Obj B, fla_axpy_t* cntl);
01029    typedef FLA_Error(*flash_copy_p)(FLA_Obj A, FLA_Obj B, fla_copy_t* cntl);
01030 
01031    // Base
01032    typedef FLA_Error(*flash_obj_free_p)(FLA_Obj A, void* cntl);
01033 
01034 
01035    // Only execute task if it is not NULL.
01036    if ( t == NULL )
01037       return;
01038    
01039 #ifdef FLA_ENABLE_SUPERMATRIX_VISUALIZATION
01040    t->begin_time = FLA_Clock();
01041 #endif
01042 
01043    // Now "switch" between the various possible task functions.
01044 
01045    // FLA_LU_piv
01046    if ( t->func == (void *) FLA_LU_piv_task )
01047    {
01048       flash_lu_piv_p func;
01049       func = (flash_lu_piv_p) t->func;
01050 
01051       func(               t->output_arg[0],
01052                           t->fla_arg[0],
01053             ( fla_lu_t* ) t->cntl );
01054    }
01055    // FLA_LU_piv_copy
01056    else if ( t->func == (void *) FLA_LU_piv_copy_task )
01057    {
01058       flash_lu_piv_copy_p func;
01059       func = (flash_lu_piv_copy_p) t->func;
01060 
01061       func(               t->output_arg[0],
01062                           t->fla_arg[0],
01063                           t->output_arg[1],
01064             ( fla_lu_t* ) t->cntl );
01065    }
01066    // FLA_Trsm_piv
01067    else if ( t->func == (void *) FLA_Trsm_piv_task )
01068    {
01069       flash_trsm_piv_p func;
01070       func = (flash_trsm_piv_p) t->func;
01071 
01072       func(                 t->input_arg[0],
01073                             t->output_arg[0],
01074                             t->fla_arg[0],
01075             ( fla_trsm_t* ) t->cntl );
01076    }
01077    // FLA_SA_LU
01078    else if ( t->func == (void *) FLA_SA_LU_task )
01079    {
01080       flash_sa_lu_p func;
01081       func = (flash_sa_lu_p) t->func;
01082 
01083       func(               t->output_arg[1],
01084                           t->output_arg[0],
01085                           t->fla_arg[0],
01086                           t->fla_arg[1],
01087                           t->int_arg[0],
01088             ( fla_lu_t* ) t->cntl );
01089    }
01090    // FLA_SA_FS
01091    else if ( t->func == (void *) FLA_SA_FS_task )
01092    {
01093       flash_sa_fs_p func;
01094       func = (flash_sa_fs_p) t->func;
01095 
01096       func(               t->fla_arg[0],
01097                           t->input_arg[0],
01098                           t->fla_arg[1],                          
01099                           t->output_arg[1],
01100                           t->output_arg[0],
01101                           t->int_arg[0],
01102             ( fla_lu_t* ) t->cntl );
01103    }
01104    // FLA_LU_nopiv
01105    else if ( t->func == (void *) FLA_LU_nopiv_task )
01106    {
01107       flash_lu_nopiv_p func;
01108       func = (flash_lu_nopiv_p) t->func;
01109 
01110       func(               t->output_arg[0],
01111             ( fla_lu_t* ) t->cntl );
01112    }
01113    // FLA_Trinv
01114    else if ( t->func == (void *) FLA_Trinv_task )
01115    {
01116       flash_trinv_p func;
01117       func = (flash_trinv_p) t->func;
01118 
01119       func( ( FLA_Uplo     ) t->int_arg[0],
01120             ( FLA_Diag     ) t->int_arg[1],
01121                              t->output_arg[0],
01122             ( fla_trinv_t* ) t->cntl );
01123    }
01124    // FLA_Ttmm
01125    else if ( t->func == (void *) FLA_Ttmm_task )
01126    {      
01127       flash_ttmm_p func;
01128       func = (flash_ttmm_p) t->func;
01129 
01130       func( ( FLA_Uplo    ) t->int_arg[0],
01131                             t->output_arg[0],
01132             ( fla_ttmm_t* ) t->cntl );
01133    }
01134    // FLA_Chol
01135    else if ( t->func == (void *) FLA_Chol_task )
01136    {      
01137       flash_chol_p func;
01138       func = (flash_chol_p) t->func;
01139 
01140       func( ( FLA_Uplo    ) t->int_arg[0],
01141                             t->output_arg[0],
01142             ( fla_chol_t* ) t->cntl );
01143    }
01144    // FLA_Sylv
01145    else if ( t->func == (void *) FLA_Sylv_task )
01146    {      
01147       flash_sylv_p func;
01148       func = (flash_sylv_p) t->func;
01149 
01150       func( ( FLA_Trans   ) t->int_arg[0],
01151             ( FLA_Trans   ) t->int_arg[1],
01152                             t->fla_arg[0],
01153                             t->input_arg[0],
01154                             t->input_arg[1],
01155                             t->output_arg[0],
01156                             t->fla_arg[1],
01157             ( fla_sylv_t* ) t->cntl );
01158    }
01159    // FLA_QR_UT
01160    else if ( t->func == (void *) FLA_QR_UT_task )
01161    {      
01162       flash_qrut_p func;
01163       func = (flash_qrut_p) t->func;
01164 
01165       func(                 t->output_arg[0],
01166                             t->fla_arg[0],
01167             ( fla_qrut_t* ) t->cntl );
01168    }
01169    // FLA_QR_UT_copy
01170    else if ( t->func == (void *) FLA_QR_UT_copy_task )
01171    {      
01172       flash_qrutc_p func;
01173       func = (flash_qrutc_p) t->func;
01174 
01175       func(                 t->output_arg[0],
01176                             t->fla_arg[0],
01177                             t->output_arg[1],
01178             ( fla_qrut_t* ) t->cntl );
01179    }
01180    // FLA_QR_UT_UD
01181    else if ( t->func == (void *) FLA_QR_UT_UD_task )
01182    {      
01183       flash_qrutud_p func;
01184       func = (flash_qrutud_p) t->func;
01185 
01186       func(                 t->output_arg[1],
01187                             t->output_arg[0],
01188                             t->fla_arg[0],
01189           ( fla_qrutud_t* ) t->cntl );
01190    }
01191    // FLA_Apply_Q_UT
01192    else if ( t->func == (void *) FLA_Apply_Q_UT_task )
01193    {      
01194       flash_apqut_p func;
01195       func = (flash_apqut_p) t->func;
01196 
01197       func( ( FLA_Side    ) t->int_arg[0],
01198             ( FLA_Trans   ) t->int_arg[1],
01199             ( FLA_Store   ) t->int_arg[2],
01200                             t->input_arg[0],
01201                             t->fla_arg[0],
01202                             t->output_arg[1],
01203                             t->output_arg[0],
01204            ( fla_apqut_t* ) t->cntl );
01205    }
01206    // FLA_Apply_Q_UT_UD
01207    else if ( t->func == (void *) FLA_Apply_Q_UT_UD_task )
01208    {      
01209       flash_apqutud_p func;
01210       func = (flash_apqutud_p) t->func;
01211 
01212       func( ( FLA_Side    ) t->int_arg[0],
01213             ( FLA_Trans   ) t->int_arg[1],
01214             ( FLA_Store   ) t->int_arg[2],
01215                             t->input_arg[0],
01216                             t->fla_arg[0],
01217                             t->output_arg[2],
01218                             t->output_arg[1],
01219                             t->output_arg[0],
01220          ( fla_apqutud_t* ) t->cntl );
01221    }
01222    // FLA_Gemm
01223    else if ( t->func == (void *) FLA_Gemm_task )
01224    {
01225       flash_gemm_p func;
01226       func = (flash_gemm_p) t->func;
01227 
01228       func( ( FLA_Trans   ) t->int_arg[0],
01229             ( FLA_Trans   ) t->int_arg[1],
01230                             t->fla_arg[0],
01231                             t->input_arg[0],
01232                             t->input_arg[1],
01233                             t->fla_arg[1],
01234                             t->output_arg[0],
01235             ( fla_gemm_t* ) t->cntl );
01236    }
01237    // FLA_Hemm
01238    else if ( t->func == (void *) FLA_Hemm_task )
01239    {
01240       flash_hemm_p func;
01241       func = (flash_hemm_p) t->func;
01242       
01243       func( ( FLA_Side    ) t->int_arg[0],
01244             ( FLA_Uplo    ) t->int_arg[1],
01245                             t->fla_arg[0],
01246                             t->input_arg[0],
01247                             t->input_arg[1],
01248                             t->fla_arg[1],
01249                             t->output_arg[0],
01250             ( fla_hemm_t* ) t->cntl );
01251    }
01252    // FLA_Herk
01253    else if ( t->func == (void *) FLA_Herk_task )
01254    {
01255       flash_herk_p func;
01256       func = (flash_herk_p) t->func;
01257 
01258       func( ( FLA_Uplo    ) t->int_arg[0],
01259             ( FLA_Trans   ) t->int_arg[1],
01260                             t->fla_arg[0],
01261                             t->input_arg[0],
01262                             t->fla_arg[1],
01263                             t->output_arg[0],
01264             ( fla_herk_t* ) t->cntl );
01265    }
01266    // FLA_Her2k
01267    else if ( t->func == (void *) FLA_Her2k_task )
01268    {
01269       flash_her2k_p func;
01270       func = (flash_her2k_p) t->func;
01271 
01272       func( ( FLA_Uplo     ) t->int_arg[0],
01273             ( FLA_Trans    ) t->int_arg[1],
01274                              t->fla_arg[0],
01275                              t->input_arg[0],
01276                              t->input_arg[1],
01277                              t->fla_arg[1],
01278                              t->output_arg[0],
01279             ( fla_her2k_t* ) t->cntl );
01280    }
01281    // FLA_Symm
01282    else if ( t->func == (void *) FLA_Symm_task )
01283    {
01284       flash_symm_p func;
01285       func = (flash_symm_p) t->func;
01286       
01287       func( ( FLA_Side    ) t->int_arg[0],
01288             ( FLA_Uplo    ) t->int_arg[1],
01289                             t->fla_arg[0],
01290                             t->input_arg[0],
01291                             t->input_arg[1],
01292                             t->fla_arg[1],
01293                             t->output_arg[0],
01294             ( fla_symm_t* ) t->cntl );
01295    }
01296    // FLA_Syrk
01297    else if ( t->func == (void *) FLA_Syrk_task )
01298    {
01299       flash_syrk_p func;
01300       func = (flash_syrk_p) t->func;
01301 
01302       func( ( FLA_Uplo    ) t->int_arg[0],
01303             ( FLA_Trans   ) t->int_arg[1],
01304                             t->fla_arg[0],
01305                             t->input_arg[0],
01306                             t->fla_arg[1],
01307                             t->output_arg[0],
01308             ( fla_syrk_t* ) t->cntl );
01309    }
01310    // FLA_Syr2k
01311    else if ( t->func == (void *) FLA_Syr2k_task )
01312    {
01313       flash_syr2k_p func;
01314       func = (flash_syr2k_p) t->func;
01315 
01316       func( ( FLA_Uplo     ) t->int_arg[0],
01317             ( FLA_Trans    ) t->int_arg[1],
01318                              t->fla_arg[0],
01319                              t->input_arg[0],
01320                              t->input_arg[1],
01321                              t->fla_arg[1],
01322                              t->output_arg[0],
01323             ( fla_syr2k_t* ) t->cntl );
01324    }
01325    // FLA_Trmm
01326    else if ( t->func == (void *) FLA_Trmm_task )
01327    {
01328       flash_trmm_p func;
01329       func = (flash_trmm_p) t->func;
01330 
01331       func( ( FLA_Side    ) t->int_arg[0],
01332             ( FLA_Uplo    ) t->int_arg[1],
01333             ( FLA_Trans   ) t->int_arg[2],
01334             ( FLA_Diag    ) t->int_arg[3],
01335                             t->fla_arg[0],
01336                             t->input_arg[0],
01337                             t->output_arg[0],
01338             ( fla_trmm_t* ) t->cntl );
01339    }
01340    // FLA_Trsm
01341    else if ( t->func == (void *) FLA_Trsm_task )
01342    {
01343       flash_trsm_p func;
01344       func = (flash_trsm_p) t->func;
01345 
01346       func( ( FLA_Side    ) t->int_arg[0],
01347             ( FLA_Uplo    ) t->int_arg[1],
01348             ( FLA_Trans   ) t->int_arg[2],
01349             ( FLA_Diag    ) t->int_arg[3],
01350                             t->fla_arg[0],
01351                             t->input_arg[0],
01352                             t->output_arg[0],
01353             ( fla_trsm_t* ) t->cntl );
01354    }
01355    // FLA_Gemv
01356    else if ( t->func == (void *) FLA_Gemv_task )
01357    {
01358       flash_gemv_p func;
01359       func = (flash_gemv_p) t->func;
01360 
01361       func( ( FLA_Trans   ) t->int_arg[0],
01362                             t->fla_arg[0],
01363                             t->input_arg[0],
01364                             t->input_arg[1],
01365                             t->fla_arg[1],
01366                             t->output_arg[0],
01367             ( fla_gemv_t* ) t->cntl );
01368    }
01369    // FLA_Trsv
01370    else if ( t->func == (void *) FLA_Trsv_task )
01371    {
01372       flash_trsv_p func;
01373       func = (flash_trsv_p) t->func;
01374 
01375       func( ( FLA_Uplo    ) t->int_arg[0],
01376             ( FLA_Trans   ) t->int_arg[1],
01377             ( FLA_Diag    ) t->int_arg[2],
01378                             t->input_arg[0],
01379                             t->output_arg[0],
01380             ( fla_trsv_t* ) t->cntl );
01381    }
01382    // FLA_Axpy
01383    else if ( t->func == (void *) FLA_Axpy_task )
01384    {
01385       flash_axpy_p func;
01386       func = (flash_axpy_p) t->func;
01387 
01388       func(                 t->fla_arg[0],
01389                             t->input_arg[0],
01390                             t->output_arg[0],
01391             ( fla_axpy_t* ) t->cntl );
01392    }
01393    // FLA_Copy
01394    else if ( t->func == (void *) FLA_Copy_task )
01395    {
01396       flash_copy_p func;
01397       func = (flash_copy_p) t->func;
01398 
01399       func(                 t->input_arg[0],
01400                             t->output_arg[0],
01401             ( fla_copy_t* ) t->cntl );
01402    }
01403    // FLA_Obj_free
01404    else if ( t->func == (void *) FLA_Obj_free_task )
01405    {
01406       flash_obj_free_p func;
01407       func = (flash_obj_free_p) t->func;
01408 
01409       func(           t->output_arg[0],
01410             ( void* ) t->cntl );
01411    }
01412    else
01413    {
01414       FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
01415    }
01416 
01417 #ifdef FLA_ENABLE_SUPERMATRIX_VISUALIZATION
01418    t->end_time = FLA_Clock();
01419 #endif
01420    
01421    return;
01422 }

void FLASH_Queue_finalize ( void   ) 

Referenced by FLA_Finalize().

00272 {
00273    // Exit early if we're not already initialized.
00274    if ( flash_queue_initialized == FALSE )
00275       return;
00276 
00277    // Clear the initialized flag.
00278    flash_queue_initialized = FALSE;
00279 
00280    return;
00281 }

int FLASH_Queue_get_block_size ( void   ) 

00493 {
00494    return flash_queue_block_size;
00495 }

int FLASH_Queue_get_cache_line_size ( void   ) 

00541 {
00542    return flash_queue_cache_line_size;
00543 }

int FLASH_Queue_get_cache_size ( void   ) 

00517 {
00518    return flash_queue_cache_size;
00519 }

FLA_Bool FLASH_Queue_get_caching ( void   ) 

00362 { 
00363    return flash_queue_caching;
00364 }

int FLASH_Queue_get_cores_per_cache ( void   ) 

00565 {
00566    return flash_queue_cores_per_cache;
00567 }

FLASH_Data_aff FLASH_Queue_get_data_affinity ( void   ) 

Referenced by FLASH_Queue_exec(), FLASH_Queue_exec_parallel_function(), FLASH_Queue_exec_simulation(), and FLASH_Queue_init_tasks().

00410 { 
00411    return flash_queue_data_affinity;
00412 }

FLA_Bool FLASH_Queue_get_enabled ( void   ) 

FLASH_Task* FLASH_Queue_get_head_task ( void   ) 

int FLASH_Queue_get_num_blocks ( void   ) 

00467 {
00468    return flash_queue_n_write_blocks;
00469 }

int FLASH_Queue_get_num_tasks ( void   ) 

unsigned int FLASH_Queue_get_num_threads ( void   ) 

double FLASH_Queue_get_parallel_time ( void   ) 

00436 {
00437    // Only return time if out of parallel region.
00438    if ( flash_queue_stack == 0 )
00439       return flash_queue_parallel_time;
00440 
00441    return 0.0;
00442 }

FLA_Bool FLASH_Queue_get_sorting ( void   ) 

Referenced by FLASH_Queue_wait_enqueue().

00338 { 
00339    return flash_queue_sorting;
00340 }

FLASH_Task* FLASH_Queue_get_tail_task ( void   ) 

References FLASH_Queue_s::tail.

Referenced by FLASH_Queue_init_tasks().

00607 {
00608    return _tq.tail;
00609 }

double FLASH_Queue_get_total_time ( void   ) 

00421 {
00422    // Only return time if out of parallel region.
00423    if ( flash_queue_stack == 0 )
00424       return flash_queue_total_time;
00425 
00426    return 0.0;
00427 }

FLA_Bool FLASH_Queue_get_verbose_output ( void   ) 

Referenced by FLASH_Queue_exec(), and FLASH_Queue_exec_simulation().

00314 { 
00315    return flash_queue_verbose;
00316 }

FLA_Bool FLASH_Queue_get_work_stealing ( void   ) 

00386 {
00387    return flash_queue_work_stealing;
00388 }

void FLASH_Queue_init ( void   ) 

References FLASH_Queue_reset().

Referenced by FLA_Init().

00251 {
00252    // Exit early if we're already initialized.
00253    if ( flash_queue_initialized == TRUE )
00254       return;
00255    
00256    // Reset all the initial values.
00257    FLASH_Queue_reset();
00258 
00259    // Set the initialized flag.
00260    flash_queue_initialized = TRUE;
00261 
00262    return;
00263 }

void FLASH_Queue_push ( void *  func,
void *  cntl,
char *  name,
int  n_int_args,
int  n_fla_args,
int  n_input_args,
int  n_output_args,
  ... 
)

References FLA_Obj_view::base, FLASH_Task_s::dep_arg_head, FLASH_Task_s::dep_arg_tail, FLA_Obj_struct::first_task, FLASH_Task_s::fla_arg, FLA_free(), FLA_malloc(), FLASH_Task_alloc(), FLASH_Queue_s::head, FLASH_Task_s::input_arg, FLASH_Task_s::int_arg, FLASH_Task_s::n_dep_args, FLA_Obj_struct::n_read_blocks, FLA_Obj_struct::n_read_tasks, FLASH_Task_s::n_ready, FLASH_Queue_s::n_tasks, FLASH_Task_s::n_war_args, FLA_Obj_struct::n_write_blocks, FLASH_Dep_s::next_dep, FLASH_Task_s::next_task, FLASH_Task_s::order, FLASH_Task_s::output_arg, FLASH_Task_s::prev_task, FLASH_Task_s::queue, FLA_Obj_struct::read_task_head, FLA_Obj_struct::read_task_tail, FLASH_Queue_s::tail, FLASH_Dep_s::task, and FLA_Obj_struct::write_task.

00625 {
00626    int         i, j;
00627    va_list     var_arg_list;
00628    FLASH_Task* t;
00629    FLASH_Task* task;
00630    FLASH_Dep*  d;
00631    FLASH_Dep*  next_dep;
00632    FLA_Obj     obj;
00633 
00634    // Allocate a new FLA_Task and populate its fields with appropriate values.
00635    t = FLASH_Task_alloc( func, cntl, name,
00636                          n_int_args, n_fla_args,
00637                          n_input_args, n_output_args );
00638    
00639    // Initialize variable argument environment. In case you're wondering, the
00640    // second argument in this macro invocation of va_start() is supposed to be
00641    // the parameter that immediately preceeds the variable argument list
00642    // (ie: the ... above ).
00643    va_start( var_arg_list, n_output_args );
00644 
00645    // Extract the integer arguments.
00646    for ( i = 0; i < n_int_args; i++ )
00647       t->int_arg[i] = va_arg( var_arg_list, int );
00648    
00649    // Extract the FLA_Obj arguments.
00650    for ( i = 0; i < n_fla_args; i++ )
00651       t->fla_arg[i] = va_arg( var_arg_list, FLA_Obj );
00652 
00653    // Extract the input FLA_Obj arguments.
00654    for ( i = 0; i < n_input_args; i++ )
00655    {
00656       obj = va_arg( var_arg_list, FLA_Obj );
00657       t->input_arg[i] = obj;
00658 
00659       // Find dependence information.
00660       if ( obj.base->write_task == NULL )
00661       {
00662          t->n_ready--;
00663 
00664          // Add to number of blocks read if not written and not read before.
00665          if ( obj.base->n_read_tasks == 0 )
00666          {
00667             // Identify each read block with an id for freeing.
00668             obj.base->n_read_blocks = flash_queue_n_read_blocks;
00669 
00670             flash_queue_n_read_blocks++;            
00671          }
00672       }
00673       else
00674       { // Flow dependence.
00675          task = obj.base->write_task;
00676 
00677          d = (FLASH_Dep *) FLA_malloc( sizeof(FLASH_Dep) );
00678 
00679          d->task     = t;
00680          d->next_dep = NULL;
00681          
00682          if ( task->n_dep_args == 0 )
00683          {
00684             task->dep_arg_head = d;
00685             task->dep_arg_tail = d;
00686          }
00687          else
00688          {
00689             task->dep_arg_tail->next_dep = d;
00690             task->dep_arg_tail           = d;
00691          }
00692 
00693          task->n_dep_args++;
00694       }
00695 
00696       // Add task to the read task in the object if not already there.
00697       if ( obj.base->n_read_tasks == 0 ||
00698            obj.base->read_task_tail->task != t )
00699       { // Anti-dependence potentially.
00700          d = (FLASH_Dep *) FLA_malloc( sizeof(FLASH_Dep) );
00701 
00702          d->task     = t;
00703          d->next_dep = NULL;
00704 
00705          if ( obj.base->n_read_tasks == 0 )
00706          {
00707             obj.base->read_task_head = d;
00708             obj.base->read_task_tail = d;
00709          }
00710          else
00711          {
00712             obj.base->read_task_tail->next_dep = d;
00713             obj.base->read_task_tail           = d;
00714          }
00715          
00716          obj.base->n_read_tasks++;
00717       }      
00718    }
00719 
00720    // Extract the output FLA_Obj arguments.
00721    for ( i = 0; i < n_output_args; i++ )
00722    {
00723       obj = va_arg( var_arg_list, FLA_Obj );
00724       t->output_arg[i] = obj;
00725 
00726       // Assign tasks to threads with data affinity.
00727       if ( obj.base->write_task == NULL )
00728       {
00729          t->n_ready--;
00730 
00731          // Only assign data affinity to the first output block.
00732          if ( i == 0 )
00733             t->queue = flash_queue_n_write_blocks;
00734 
00735          // Save index in which this output block is first encountered.
00736          obj.base->n_write_blocks = flash_queue_n_write_blocks;
00737          obj.base->first_task     = t;
00738 
00739          // Number of blocks written if not written before.
00740          flash_queue_n_write_blocks++;
00741 
00742          // Add to number of blocks read if not written or read before.
00743          if ( obj.base->n_read_tasks == 0 )
00744          {
00745             // Identify each read block with an id for freeing.
00746             obj.base->n_read_blocks = flash_queue_n_read_blocks;
00747 
00748             flash_queue_n_read_blocks++;
00749          }
00750       }
00751       else
00752       { // Flow dependence potentially.
00753          // The last task to overwrite this block is not itself.
00754          if ( obj.base->write_task != t )
00755          {
00756             // Create dependency from task that last wrote the block.
00757             task = obj.base->write_task;
00758             
00759             d = (FLASH_Dep *) FLA_malloc( sizeof(FLASH_Dep) );
00760             
00761             d->task     = t;
00762             d->next_dep = NULL;
00763             
00764             if ( task->n_dep_args == 0 )
00765             {
00766                task->dep_arg_head = d;
00767                task->dep_arg_tail = d;
00768             }
00769             else
00770             {
00771                task->dep_arg_tail->next_dep = d;
00772                task->dep_arg_tail           = d;
00773             }
00774             
00775             task->n_dep_args++;
00776             
00777             // Only assign data affinity to the first output block.
00778             if ( i == 0 )
00779                t->queue = task->queue;
00780          }
00781          else
00782          {
00783             // No need to notify task twice for output block already seen.
00784             t->n_ready--;
00785          }
00786       }
00787       
00788       // Clear read task for next set of reads and record the anti-dependence.
00789       d = obj.base->read_task_head;
00790 
00791       for ( j = 0; j < obj.base->n_read_tasks; j++ )
00792       {
00793          task     = d->task;
00794          next_dep = d->next_dep;
00795 
00796          // If the last task to read is not the current task, add dependence.
00797          if ( task != t )
00798          {
00799             d->task     = t;
00800             d->next_dep = NULL;
00801             
00802             if ( task->n_dep_args == 0 )
00803             {
00804                task->dep_arg_head = d;
00805                task->dep_arg_tail = d;
00806             }
00807             else
00808             {
00809                task->dep_arg_tail->next_dep = d;
00810                task->dep_arg_tail           = d;
00811             }
00812             
00813             task->n_dep_args++;
00814 
00815             t->n_war_args++;
00816          }  
00817          else
00818          {
00819             FLA_free( d );
00820          }
00821 
00822          d = next_dep;
00823       }
00824       
00825       obj.base->n_read_tasks   = 0;
00826       obj.base->read_task_head = NULL;
00827       obj.base->read_task_tail = NULL;
00828 
00829       // Record this task as the last to write to this block.
00830       obj.base->write_task = t;
00831    }      
00832 
00833    // Finalize the variable argument environment.
00834    va_end( var_arg_list );
00835   
00836    // Add the task to the tail of the queue (and the head if queue is empty).
00837    if ( _tq.n_tasks == 0 )
00838    {
00839       _tq.head = t;
00840       _tq.tail = t;
00841    }
00842    else
00843    {
00844       t->prev_task = _tq.tail;
00845       _tq.tail->next_task = t;
00846       _tq.tail            = t;
00847 
00848       // Determine the index of the task in the task queue.
00849       t->order = t->prev_task->order + 1;
00850    }
00851    
00852    // Increment the number of tasks.
00853    _tq.n_tasks++;
00854 
00855    return;
00856 }

void FLASH_Queue_reset ( void   ) 

References FLASH_Queue_s::head, FLASH_Queue_s::n_tasks, and FLASH_Queue_s::tail.

Referenced by FLASH_Queue_exec(), and FLASH_Queue_init().

00576 {
00577    // Clear the other fields of the FLASH_Queue structure.
00578    _tq.n_tasks = 0;
00579    _tq.head    = NULL;
00580    _tq.tail    = NULL;
00581 
00582    // Reset the number of blocks.
00583    flash_queue_n_read_blocks  = 0;
00584    flash_queue_n_write_blocks = 0;
00585 
00586    return;
00587 }

void FLASH_Queue_set_block_size ( int  size  ) 

Referenced by FLASH_Obj_create_hierarchy().

00478 {
00479    // Only adjust the block size if the new block is larger.
00480    if ( flash_queue_block_size < size )
00481       flash_queue_block_size = size;
00482 
00483    return;
00484 }

void FLASH_Queue_set_cache_line_size ( int  size  ) 

00528 {
00529    flash_queue_cache_line_size = size;
00530 
00531    return;
00532 }

void FLASH_Queue_set_cache_size ( int  size  ) 

00504 {
00505    flash_queue_cache_size = size;
00506 
00507    return;
00508 }

void FLASH_Queue_set_caching ( FLA_Bool  caching  ) 

00349 { 
00350    flash_queue_caching = caching; 
00351 
00352    return;
00353 }

void FLASH_Queue_set_cores_per_cache ( int  cores  ) 

00552 {
00553    flash_queue_cores_per_cache = cores;
00554 
00555    return;
00556 }

void FLASH_Queue_set_data_affinity ( FLASH_Data_aff  data_affinity  ) 

00397 { 
00398    flash_queue_data_affinity = data_affinity; 
00399 
00400    return;
00401 }

void FLASH_Queue_set_num_threads ( unsigned int  n_threads  ) 

References FLA_Check_num_threads().

00201 {
00202    FLA_Error e_val;
00203 
00204    // Verify that the number of threads is positive. 
00205    e_val = FLA_Check_num_threads( n_threads );
00206    FLA_Check_error_code( e_val );
00207 
00208    // Keep track of the number of threads internally.
00209    flash_queue_n_threads = n_threads;
00210 
00211 #if   FLA_MULTITHREADING_MODEL == FLA_OPENMP
00212 
00213    // No additional action is necessary to set the number of OpenMP threads
00214    // since setting the number of threads is handled at the parallel for loop
00215    // with a num_threads() clause. This gives the user more flexibility since
00216    // he can use the OMP_NUM_THREADS environment variable or the
00217    // omp_set_num_threads() function to set the global number of OpenMP threads
00218    // independently of the number of SuperMatrix threads.
00219    
00220 #elif FLA_MULTITHREADING_MODEL == FLA_PTHREADS
00221 
00222    // No additional action is necessary to set the number of pthreads
00223    // since setting the number of threads is handled entirely on our end.
00224 
00225 #endif
00226 
00227    return;
00228 }

void FLASH_Queue_set_parallel_time ( double  dtime  ) 

Referenced by FLASH_Queue_exec().

00454 {
00455    flash_queue_parallel_time += dtime;
00456 
00457    return;
00458 }

void FLASH_Queue_set_sorting ( FLA_Bool  sorting  ) 

00325 { 
00326    flash_queue_sorting = sorting; 
00327 
00328    return;
00329 }

void FLASH_Queue_set_verbose_output ( FLA_Bool  verbose  ) 

00301 { 
00302    flash_queue_verbose = verbose;
00303 
00304    return;
00305 }

void FLASH_Queue_set_work_stealing ( FLA_Bool  work_stealing  ) 

00373 {
00374    flash_queue_work_stealing = work_stealing;
00375 
00376    return;
00377 }

void FLASH_Queue_verbose_output ( void   ) 

References FLA_Obj_view::base, FLASH_Task_s::dep_arg_head, FLASH_Queue_get_head_task(), FLASH_Queue_get_num_tasks(), FLA_Obj_struct::id, FLASH_Task_s::input_arg, FLA_Obj_struct::m_index, FLASH_Task_s::n_dep_args, FLA_Obj_struct::n_index, FLASH_Task_s::n_input_args, FLASH_Task_s::n_output_args, FLASH_Task_s::name, FLASH_Dep_s::next_dep, FLASH_Task_s::next_task, FLASH_Task_s::order, FLASH_Task_s::output_arg, and FLASH_Dep_s::task.

Referenced by FLASH_Queue_exec().

01431 {
01432    int         i, j;
01433    int         n_tasks = FLASH_Queue_get_num_tasks();
01434    FLASH_Task* t;
01435    FLASH_Dep*  d;
01436 
01437    // Grab the head of the task queue.
01438    t = FLASH_Queue_get_head_task();
01439 
01440    // Iterate over linked list of tasks.
01441    for ( i = 0; i < n_tasks; i++ )
01442    {
01443       printf( "%d;%s;", t->order, t->name );
01444 
01445       printf( "In;" );
01446       for ( j = 0; j < t->n_input_args; j++ )
01447          printf( "%lu[%d,%d];", t->input_arg[j].base->id,
01448                  t->input_arg[j].base->m_index, 
01449                  t->input_arg[j].base->n_index );
01450 
01451       printf( "Out;" );
01452       for ( j = 0; j < t->n_output_args; j++ )
01453          printf( "%lu[%d,%d];", t->output_arg[j].base->id,
01454                  t->output_arg[j].base->m_index, 
01455                  t->output_arg[j].base->n_index );
01456 
01457       printf( "Dep" );
01458       d = t->dep_arg_head;
01459       for ( j = 0; j < t->n_dep_args; j++ )
01460       {
01461          printf( ";%d", d->task->order );
01462          d = d->next_dep;
01463       }
01464 
01465       printf( "\n" );
01466 
01467       // Go to the next task.
01468       t = t->next_task;
01469    }
01470 
01471    printf( "\n" );
01472 
01473    return;
01474 }

void FLASH_Queue_visualization ( void   ) 

References FLA_Obj_view::base, FLASH_Task_s::begin_time, FLASH_Task_s::end_time, FLASH_Queue_get_head_task(), FLASH_Queue_get_num_tasks(), FLA_Obj_struct::id, FLASH_Task_s::input_arg, FLA_Obj_struct::m_index, FLA_Obj_struct::n_index, FLASH_Task_s::n_input_args, FLASH_Task_s::n_output_args, FLASH_Task_s::name, FLASH_Task_s::next_task, FLASH_Task_s::output_arg, and FLASH_Task_s::thread.

Referenced by FLASH_Queue_exec().

01485 {
01486    int         i, j;
01487    int         n_tasks = FLASH_Queue_get_num_tasks();
01488    FLASH_Task* t;
01489 
01490    // Grab the head of the task queue.
01491    t = FLASH_Queue_get_head_task();
01492    
01493    // Iterate over linked list of tasks.
01494    for ( i = 0; i < n_tasks; i++ )
01495    {
01496       printf( "%s;%d;%f;%f;", 
01497               t->name, t->thread, t->begin_time, t->end_time );
01498 
01499       printf( "In;" );
01500       for ( j = 0; j < t->n_input_args; j++ )
01501          printf( "%lu[%d,%d];", t->input_arg[j].base->id,
01502                  t->input_arg[j].base->m_index, 
01503                  t->input_arg[j].base->n_index );
01504       
01505       printf( "Out" );
01506       for ( j = 0; j < t->n_output_args; j++ )
01507          printf( ";%lu[%d,%d]", t->output_arg[j].base->id,
01508                  t->output_arg[j].base->m_index, 
01509                  t->output_arg[j].base->n_index );
01510       
01511       printf( "\n" );
01512       
01513       // Go to the next task.
01514       t = t->next_task;
01515    }
01516 
01517    return;
01518 }

FLASH_Task* FLASH_Task_alloc ( void *  func,
void *  cntl,
char *  name,
int  n_int_args,
int  n_fla_args,
int  n_input_args,
int  n_output_args 
)

References FLASH_Task_s::cache, FLASH_Task_s::cntl, FLASH_Task_s::dep_arg_head, FLASH_Task_s::dep_arg_tail, FLASH_Task_s::fla_arg, FLA_malloc(), FLASH_Task_s::func, FLASH_Task_s::height, FLASH_Task_s::hit, FLASH_Task_s::input_arg, FLASH_Task_s::int_arg, FLASH_Task_s::n_dep_args, FLASH_Task_s::n_fla_args, FLASH_Task_s::n_input_args, FLASH_Task_s::n_int_args, FLASH_Task_s::n_output_args, FLASH_Task_s::n_ready, FLASH_Task_s::n_war_args, FLASH_Task_s::name, FLASH_Task_s::next_task, FLASH_Task_s::next_wait, FLASH_Task_s::order, FLASH_Task_s::output_arg, FLASH_Task_s::prev_task, FLASH_Task_s::prev_wait, FLASH_Task_s::queue, and FLASH_Task_s::thread.

Referenced by FLASH_Queue_push().

00871 {
00872    FLASH_Task* t;
00873 
00874    // Allocate space for the task structure t.
00875    t             = (FLASH_Task *) FLA_malloc( sizeof(FLASH_Task) );
00876 
00877    // Allocate space for the task's integer arguments.
00878    t->int_arg    = (int *) FLA_malloc( n_int_args * sizeof(int) );
00879 
00880    // Allocate space for the task's FLA_Obj arguments.
00881    t->fla_arg    = (FLA_Obj *) FLA_malloc( n_fla_args * sizeof(FLA_Obj) );
00882 
00883    // Allocate space for the task's input FLA_Obj arguments.
00884    t->input_arg  = (FLA_Obj *) FLA_malloc( n_input_args * sizeof(FLA_Obj) );
00885 
00886    // Allocate space for the task's output FLA_Obj arguments.
00887    t->output_arg = (FLA_Obj *) FLA_malloc( n_output_args * sizeof(FLA_Obj) );
00888    
00889    // Initialize other fields of the structure.
00890    t->n_ready       = 0;
00891    t->order         = 0;
00892    t->queue         = 0;
00893    t->height        = 0;
00894    t->thread        = 0;
00895    t->cache         = 0;
00896    t->hit           = FALSE;
00897 
00898    t->func          = func;
00899    t->cntl          = cntl;
00900    t->name          = name;
00901    t->n_int_args    = n_int_args;
00902    t->n_fla_args    = n_fla_args;
00903    t->n_input_args  = n_input_args;
00904    t->n_output_args = n_output_args;
00905    
00906    t->n_war_args    = 0;
00907    t->n_dep_args    = 0;
00908    t->dep_arg_head  = NULL;
00909    t->dep_arg_tail  = NULL;
00910    t->prev_task     = NULL;
00911    t->next_task     = NULL;
00912    t->prev_wait     = NULL;
00913    t->next_wait     = NULL;
00914    
00915    // Return a pointer to the initialized structure.
00916    return t;
00917 }

void FLASH_Task_free ( FLASH_Task t  ) 

References FLA_Obj_view::base, FLASH_Task_s::dep_arg_head, FLASH_Task_s::fla_arg, FLA_free(), FLA_Obj_free_task(), FLASH_Task_s::func, FLASH_Task_s::input_arg, FLASH_Task_s::int_arg, FLASH_Task_s::n_dep_args, FLASH_Task_s::n_input_args, FLASH_Task_s::n_output_args, FLA_Obj_struct::n_read_tasks, FLASH_Dep_s::next_dep, FLASH_Task_s::output_arg, FLA_Obj_struct::read_task_head, FLA_Obj_struct::read_task_tail, and FLA_Obj_struct::write_task.

Referenced by FLASH_Queue_exec(), and FLASH_Queue_exec_simulation().

00926 {
00927    int        i, j, k;
00928    FLASH_Dep* d;
00929    FLASH_Dep* next_dep;
00930 
00931    // Do not clear if the block has been free'd.
00932    if ( t->func != (void *) FLA_Obj_free_task )
00933    {
00934       // Clearing the last write task in each output block.
00935       for ( i = 0; i < t->n_output_args; i++ )
00936          t->output_arg[i].base->write_task = NULL;
00937    }
00938 
00939    // Cleaning the last read tasks in each input block.
00940    for ( i = 0; i < t->n_input_args; i++ )
00941    {
00942       k = t->input_arg[i].base->n_read_tasks;
00943       d = t->input_arg[i].base->read_task_head;
00944 
00945       t->input_arg[i].base->n_read_tasks   = 0;
00946       t->input_arg[i].base->read_task_head = NULL;
00947       t->input_arg[i].base->read_task_tail = NULL;
00948 
00949       for ( j = 0; j < k; j++ )
00950       {
00951          next_dep = d->next_dep;
00952          FLA_free( d );
00953          d = next_dep;
00954       }
00955    }
00956 
00957    // Free the dep_arg field of t.
00958    d = t->dep_arg_head;
00959 
00960    for ( i = 0; i < t->n_dep_args; i++ )
00961    {
00962       next_dep = d->next_dep;
00963       FLA_free( d );
00964       d = next_dep;
00965    }   
00966 
00967    // Free the int_arg field of t.
00968    FLA_free( t->int_arg );
00969    
00970    // Free the fla_arg field of t.
00971    FLA_free( t->fla_arg );
00972 
00973    // Free the input_arg field of t.
00974    FLA_free( t->input_arg );
00975 
00976    // Free the output_arg field of t.
00977    FLA_free( t->output_arg );
00978 
00979    // Finally, free the struct itself.
00980    FLA_free( t );
00981 
00982    return;
00983 }


Variable Documentation


Generated on Mon Jul 6 05:45:53 2009 for libflame by  doxygen 1.5.9