FLASH_Queue_main_prototypes.h File Reference

(r)

Go to the source code of this file.

Functions

void FLASH_Queue_begin (void)
void FLASH_Queue_end (void)
FLA_Error FLASH_Queue_enable (void)
FLA_Error FLASH_Queue_disable (void)
FLA_Bool FLASH_Queue_get_enabled (void)
void FLASH_Queue_set_num_threads (unsigned int n_threads)
unsigned int FLASH_Queue_get_num_threads (void)
void FLASH_Queue_init (void)
void FLASH_Queue_finalize (void)
int FLASH_Queue_get_num_tasks (void)
void FLASH_Queue_set_verbose_output (FLA_Bool verbose)
FLA_Bool FLASH_Queue_get_verbose_output (void)
void FLASH_Queue_set_sorting (FLA_Bool sorting)
FLA_Bool FLASH_Queue_get_sorting (void)
void FLASH_Queue_set_caching (FLA_Bool caching)
FLA_Bool FLASH_Queue_get_caching (void)
void FLASH_Queue_set_work_stealing (FLA_Bool work_stealing)
FLA_Bool FLASH_Queue_get_work_stealing (void)
void FLASH_Queue_set_data_affinity (FLASH_Data_aff data_affinity)
FLASH_Data_aff FLASH_Queue_get_data_affinity (void)
double FLASH_Queue_get_total_time (void)
double FLASH_Queue_get_parallel_time (void)
void FLASH_Queue_exec (void)
void FLASH_Queue_set_parallel_time (double dtime)
int FLASH_Queue_get_num_blocks (void)
void FLASH_Queue_set_block_size (int size)
int FLASH_Queue_get_block_size (void)
void FLASH_Queue_set_cache_size (int size)
int FLASH_Queue_get_cache_size (void)
void FLASH_Queue_set_cache_line_size (int size)
int FLASH_Queue_get_cache_line_size (void)
void FLASH_Queue_set_cores_per_cache (int cores)
int FLASH_Queue_get_cores_per_cache (void)
void FLASH_Queue_reset (void)
FLASH_TaskFLASH_Queue_get_head_task (void)
FLASH_TaskFLASH_Queue_get_tail_task (void)
void FLASH_Queue_push (void *func, void *cntl, char *name, int n_int_args, int n_fla_args, int n_input_args, int n_output_args,...)
FLASH_TaskFLASH_Task_alloc (void *func, void *cntl, char *name, int n_int_args, int n_fla_args, int n_input_args, int n_output_args)
void FLASH_Task_free (FLASH_Task *t)
void FLASH_Queue_exec_task (FLASH_Task *t)
void FLASH_Queue_verbose_output (void)
void FLASH_Queue_visualization (void)
void FLASH_Queue_init_tasks (void *arg)
void FLASH_Queue_wait_enqueue (FLASH_Task *t, void *arg)
FLASH_TaskFLASH_Queue_wait_dequeue (int queue, int thread, void *arg)
void FLASH_Queue_exec_parallel (void *arg)
void * FLASH_Queue_exec_parallel_function (void *arg)
FLASH_TaskFLASH_Task_update_dependencies (FLASH_Task *t, void *arg)
void FLASH_Task_free_parallel (FLASH_Task *t, void *arg)
void FLASH_Queue_exec_simulation (void *arg)


Function Documentation

void FLASH_Queue_begin ( void   ) 

References FLA_Clock().

Referenced by FLASH_Apply_Q_UT(), FLASH_Apply_Q_UT_inc(), FLASH_Apply_Q_UT_UD(), FLASH_Chol(), FLASH_Gemm(), FLASH_Hemm(), FLASH_Her2k(), FLASH_Herk(), FLASH_LU_incpiv(), FLASH_LU_nopiv(), FLASH_QR_UT_inc_noopt(), FLASH_QR_UT_inc_opt1(), FLASH_QR_UT_UD(), FLASH_SPDinv(), FLASH_Sylv(), FLASH_Symm(), FLASH_Syr2k(), FLASH_Syrk(), FLASH_Trinv(), FLASH_Trmm(), FLASH_Trsm(), and FLASH_Ttmm().

00081 {
00082 #ifdef FLA_ENABLE_SUPERMATRIX
00083    if ( flash_queue_stack == 0 )
00084    {
00085       // Reset the value of the parallel execution timer.
00086       flash_queue_parallel_time = 0.0;
00087 
00088       // Save the starting time for the total execution time.
00089       flash_queue_total_time = FLA_Clock();
00090    }
00091 #endif
00092 
00093    // Push onto the stack.
00094    flash_queue_stack++;
00095 
00096    return;
00097 }

FLA_Error FLASH_Queue_disable ( void   ) 

Referenced by FLASH_Axpy(), FLASH_Copy(), FLASH_FS_incpiv(), FLASH_Gemv(), and FLASH_Trsv().

00158 {
00159 #ifdef FLA_ENABLE_SUPERMATRIX
00160    if ( flash_queue_stack == 0 )
00161    {
00162       // Disable if not begin parallel region yet.
00163       flash_queue_enabled = FALSE;   
00164       return FLA_SUCCESS;      
00165    }
00166    else
00167    {
00168       // Cannot change status during parallel region.
00169       return FLA_FAILURE;
00170    }
00171 #else
00172    // Allow disabling enqueuing even when SuperMatrix is not configured.
00173    flash_queue_enabled = FALSE;   
00174    return FLA_SUCCESS;
00175 #endif
00176 }

FLA_Error FLASH_Queue_enable ( void   ) 

Referenced by FLASH_Axpy(), FLASH_Copy(), FLASH_FS_incpiv(), FLASH_Gemv(), and FLASH_Trsv().

00131 {
00132 #ifdef FLA_ENABLE_SUPERMATRIX
00133    if ( flash_queue_stack == 0 )
00134    {
00135       // Enable if not begin parallel region yet.
00136       flash_queue_enabled = TRUE;   
00137       return FLA_SUCCESS;
00138    }
00139    else
00140    {
00141       // Cannot change status during parallel region.
00142       return FLA_FAILURE;
00143    }
00144 #else
00145    // Raise an exception when SuperMatrix is not configured.
00146    FLA_Check_error_code( FLA_SUPERMATRIX_NOT_ENABLED );
00147    return FLA_FAILURE;
00148 #endif
00149 }

void FLASH_Queue_end ( void   ) 

References FLA_Clock(), and FLASH_Queue_exec().

Referenced by FLASH_Apply_Q_UT(), FLASH_Apply_Q_UT_inc(), FLASH_Apply_Q_UT_UD(), FLASH_Chol(), FLASH_Gemm(), FLASH_Hemm(), FLASH_Her2k(), FLASH_Herk(), FLASH_LU_incpiv(), FLASH_LU_nopiv(), FLASH_QR_UT_inc_noopt(), FLASH_QR_UT_inc_opt1(), FLASH_QR_UT_UD(), FLASH_SPDinv(), FLASH_Sylv(), FLASH_Symm(), FLASH_Syr2k(), FLASH_Syrk(), FLASH_Trinv(), FLASH_Trmm(), FLASH_Trsm(), and FLASH_Ttmm().

00106 {
00107    // Pop off the stack.
00108    flash_queue_stack--;
00109 
00110 #ifdef FLA_ENABLE_SUPERMATRIX
00111    if ( flash_queue_stack == 0 )
00112    {
00113       // Execute tasks if encounter the outermost parallel region.
00114       FLASH_Queue_exec();
00115 
00116       // Find the total execution time.
00117       flash_queue_total_time = FLA_Clock() - flash_queue_total_time;
00118    }
00119 #endif
00120 
00121    return;
00122 }

void FLASH_Queue_exec ( void   ) 

References FLASH_Queue_variables::all_lock, FLASH_Queue_variables::dep_lock, FLA_Clock(), FLA_Lock_destroy(), FLA_Lock_init(), FLASH_Queue_exec_parallel(), FLASH_Queue_exec_simulation(), FLASH_Queue_get_data_affinity(), FLASH_Queue_get_head_task(), FLASH_Queue_get_num_tasks(), FLASH_Queue_get_num_threads(), FLASH_Queue_get_verbose_output(), FLASH_Queue_init_tasks(), FLASH_Queue_reset(), FLASH_Queue_set_parallel_time(), FLASH_Queue_verbose_output(), FLASH_Queue_visualization(), FLASH_Task_free(), FLASH_Queue_s::head, FLASH_Queue_s::n_tasks, FLASH_Task_s::next_task, FLASH_Queue_variables::pc, FLASH_Queue_variables::run_lock, FLASH_Queue_s::tail, FLASH_Queue_variables::wait_queue, and FLASH_Queue_variables::war_lock.

Referenced by FLASH_Queue_end().

00080 {
00081    FLA_Bool     verbose   = FLASH_Queue_get_verbose_output();
00082    int          n_tasks   = FLASH_Queue_get_num_tasks();
00083    int          n_threads = FLASH_Queue_get_num_threads();
00084    int          n_memory;
00085    int          i;
00086    double       dtime;
00087 
00088 #ifdef FLA_ENABLE_SUPERMATRIX_VISUALIZATION
00089    FLASH_Task*  t;
00090    FLASH_Task*  next;
00091 #endif
00092 
00093 #ifdef FLA_ENABLE_WINDOWS_BUILD
00094    FLA_Lock*    run_lock;
00095    FLA_Lock*    dep_lock;
00096    FLA_Lock*    war_lock;
00097    FLASH_Queue* wait_queue;
00098 #endif
00099 
00100    // All the necessary variables for the SuperMatrix mechanism.
00101    FLASH_Queue_vars args;
00102 
00103    // If the queue is empty, return early.
00104    if ( n_tasks == 0 )
00105       return;
00106 
00107    // Allocate different number of elements in arrays if using data affinity.
00108    n_memory = ( FLASH_Queue_get_data_affinity() == FLASH_QUEUE_AFFINITY_NONE ?
00109                 1 : n_threads );
00110 
00111 #ifdef FLA_ENABLE_MULTITHREADING
00112    // Allocate memory for array of locks and the waiting queue.
00113 #ifdef FLA_ENABLE_WINDOWS_BUILD
00114    run_lock = ( FLA_Lock* ) _alloca( n_memory  * sizeof( FLA_Lock ) );
00115    dep_lock = ( FLA_Lock* ) _alloca( n_threads * sizeof( FLA_Lock ) );
00116    war_lock = ( FLA_Lock* ) _alloca( n_threads * sizeof( FLA_Lock ) );
00117 #else
00118    FLA_Lock run_lock[n_memory];
00119    FLA_Lock dep_lock[n_threads];
00120    FLA_Lock war_lock[n_threads];
00121 #endif
00122 
00123    args.run_lock = run_lock;
00124    args.dep_lock = dep_lock;
00125    args.war_lock = war_lock;
00126 
00127    // Initialize the all lock.
00128    FLA_Lock_init( &(args.all_lock) );
00129    
00130    // Initialize the run lock for thread i.
00131    for ( i = 0; i < n_memory; i++ )
00132    {
00133       FLA_Lock_init( &(args.run_lock[i]) );
00134    }
00135 
00136    // Initialize the dep and war locks for thread i.
00137    for ( i = 0; i < n_threads; i++ )
00138    {
00139       FLA_Lock_init( &(args.dep_lock[i]) );
00140       FLA_Lock_init( &(args.war_lock[i]) );
00141    }
00142 #endif
00143 
00144    // Allocate memory for waiting queue.
00145 #ifdef FLA_ENABLE_WINDOWS_BUILD
00146    wait_queue = ( FLASH_Queue* ) _alloca( n_memory * sizeof( FLASH_Queue ) );
00147 #else
00148    FLASH_Queue wait_queue[n_memory];
00149 #endif
00150 
00151    args.wait_queue = wait_queue;
00152 
00153    for ( i = 0; i < n_memory; i++ )
00154    {
00155       args.wait_queue[i].n_tasks = 0;
00156       args.wait_queue[i].head = NULL;
00157       args.wait_queue[i].tail = NULL;
00158    }
00159 
00160    // Initialize the aggregate task counter.
00161    args.pc = 0;
00162 
00163    // Initialize tasks with critical information.
00164    FLASH_Queue_init_tasks( ( void* ) &args );
00165    
00166    // Display verbose output before free all tasks. 
00167    if ( verbose )
00168       FLASH_Queue_verbose_output();
00169    
00170    // Start timing the parallel execution.
00171    dtime = FLA_Clock();
00172    
00173 #ifdef FLA_ENABLE_MULTITHREADING
00174    // Parallel Execution!
00175    FLASH_Queue_exec_parallel( ( void* ) &args );
00176 #else
00177    // Simulation!
00178    FLASH_Queue_exec_simulation( ( void* ) &args );
00179 #endif
00180    
00181    // End timing the parallel execution.
00182    dtime = FLA_Clock() - dtime;
00183    FLASH_Queue_set_parallel_time( dtime );
00184 
00185 #ifdef FLA_ENABLE_SUPERMATRIX_VISUALIZATION
00186    // Visualize all tasks.
00187    if ( !verbose )
00188       FLASH_Queue_visualization();
00189 
00190    // Now that we're done with the task array, flush the queue.
00191    t = FLASH_Queue_get_head_task();
00192 
00193    for ( i = 0; i < n_tasks; i++ )
00194    {
00195       // Obtain the next task.
00196       next = t->next_task;
00197 
00198       // Free the current task.
00199       FLASH_Task_free( t );
00200 
00201       // Move to the next task.
00202       t = next;      
00203    }
00204 #endif
00205 
00206 #ifdef FLA_ENABLE_MULTITHREADING   
00207    // Destroy the locks.
00208    FLA_Lock_destroy( &(args.all_lock) );
00209 
00210    for ( i = 0; i < n_memory; i++ )
00211    {
00212       FLA_Lock_destroy( &(args.run_lock[i]) );
00213    }
00214 
00215    for ( i = 0; i < n_threads; i++ )
00216    {
00217       FLA_Lock_destroy( &(args.dep_lock[i]) );
00218       FLA_Lock_destroy( &(args.war_lock[i]) );
00219    }
00220 #endif
00221 
00222    // Reset values for next call to FLASH_Queue_exec().
00223    FLASH_Queue_reset();
00224 
00225    return;
00226 }

void FLASH_Queue_exec_parallel ( void *  arg  ) 

References FLASH_Thread_s::args, FLA_Check_error_level(), FLA_Check_pthread_create_result(), FLA_Check_pthread_join_result(), FLASH_Queue_exec_parallel_function(), FLASH_Queue_get_num_threads(), and FLASH_Thread_s::id.

Referenced by FLASH_Queue_exec().

00429 {
00430    int   i;
00431    int   n_threads = FLASH_Queue_get_num_threads();
00432    void* (*thread_entry_point)( void* );
00433 
00434    // Allocate the thread structures array. Here, an array of FLASH_Thread
00435    // structures of length n_threads is allocated and the fields of each
00436    // structure set to appropriate values.
00437 #ifdef FLA_ENABLE_WINDOWS_BUILD
00438    FLASH_Thread* thread = ( FLASH_Thread* ) _alloca( n_threads * sizeof( FLASH_Thread ) );
00439 #else
00440    FLASH_Thread thread[n_threads];
00441 #endif
00442 
00443    // Initialize the thread structures array.
00444    for ( i = 0; i < n_threads; i++ )
00445    {
00446       // Save the thread's identifier.
00447       thread[i].id = i;
00448 
00449       // Save the pointer to the necessary variables with the thread.
00450       thread[i].args = arg;
00451 
00452       // The pthread object, if it was even compiled into the FLASH_Thread
00453       // structure, will be initialized by the pthread implementation when we
00454       // call pthread_create() and does not need to be touched at this time.
00455    }
00456 
00457    // Determine which function to send threads to.
00458    thread_entry_point = FLASH_Queue_exec_parallel_function;
00459 
00460 #if FLA_MULTITHREADING_MODEL == FLA_OPENMP
00461 
00462    // An OpenMP parallel for region spawns n_threads threads. Each thread
00463    // executes the work function with a different FLASH_Thread argument.
00464    // An implicit synchronization point exists at the end of the curly
00465    // brace scope.
00466    #pragma omp parallel for \
00467            private( i ) \
00468            shared( thread, n_threads, thread_entry_point ) \
00469            schedule( static, 1 ) \
00470            num_threads( n_threads )
00471    for ( i = 0; i < n_threads; ++i )
00472    {
00473       thread_entry_point( ( void* ) &thread[i] );
00474    }
00475 
00476 #elif FLA_MULTITHREADING_MODEL == FLA_PTHREADS
00477 
00478    // Create each POSIX thread needed in addition to the main thread.
00479    for ( i = 1; i < n_threads; i++ )
00480    {
00481       int pthread_e_val;
00482 
00483       // Create thread i with default attributes.
00484       pthread_e_val = pthread_create( &(thread[i].pthread_obj),
00485                                       NULL,
00486                                       thread_entry_point,
00487                                       ( void* ) &thread[i] );
00488 
00489       if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
00490       {
00491          FLA_Error e_val = FLA_Check_pthread_create_result( pthread_e_val );
00492          FLA_Check_error_code( e_val );
00493       }
00494    }
00495 
00496    // The main thread is assigned the role of thread 0. Here we manually
00497    // execute it as a worker thread.
00498    thread_entry_point( ( void* ) &thread[0] );
00499 
00500    // Wait for non-main threads to finish.
00501    for ( i = 1; i < n_threads; i++ )
00502    {
00503       // These two variables are declared local to this for loop since this
00504       // is the only place they are needed, and since they would show up as
00505       // unused variables if FLA_MULTITHREADING_MODEL == FLA_PTHREADS.
00506       // Strangely, the Intel compiler produces code that results in an
00507       // "unaligned access" runtime message if thread_status is declared as
00508       // an int. Declaring it as a long or void* appears to force the
00509       // compiler (not surprisingly) into aligning it to an 8-byte boundary.
00510       int   pthread_e_val;
00511       void* thread_status;
00512 
00513       // Wait for thread i to invoke its respective pthread_exit().
00514       // The return value passed to pthread_exit() is provided to us
00515       // via status, if one was given.
00516       pthread_e_val = pthread_join( thread[i].pthread_obj,
00517                                     ( void** ) &thread_status );
00518       
00519       if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
00520       {
00521          FLA_Error e_val = FLA_Check_pthread_join_result( pthread_e_val );
00522          FLA_Check_error_code( e_val );
00523       }
00524    }
00525    
00526 #endif
00527 
00528    return;
00529 }

void* FLASH_Queue_exec_parallel_function ( void *  arg  ) 

References FLASH_Thread_s::args, FLA_Lock_acquire(), FLA_Lock_release(), FLASH_Queue_exec_task(), FLASH_Queue_get_data_affinity(), FLASH_Queue_get_num_tasks(), FLASH_Queue_wait_dequeue(), FLASH_Task_free_parallel(), FLASH_Task_update_dependencies(), FLASH_Thread_s::id, and FLASH_Queue_variables::run_lock.

Referenced by FLASH_Queue_exec_parallel().

00546 {
00547    FLASH_Queue_vars* args;   
00548    int           i, queue;
00549    int           n_tasks   = FLASH_Queue_get_num_tasks();
00550    FLA_Bool      condition = TRUE;
00551    FLA_Bool      available;
00552    FLASH_Task*   t;
00553    FLASH_Thread* me;
00554    //cpu_set_t     cpu_set;
00555 
00556    // Interpret the thread argument as what it really is--a pointer to an
00557    // FLASH_Thread structure.
00558    me = ( FLASH_Thread* ) arg;
00559 
00560    // Extract the variables from the current thread.
00561    args = ( FLASH_Queue_vars* ) me->args;
00562 
00563    // Figure out the id of the current thread.
00564    i = me->id;
00565 
00566    // Use different queues depending on if using data affinity or not.
00567    if ( FLASH_Queue_get_data_affinity() != FLASH_QUEUE_AFFINITY_NONE )
00568    {
00569       queue = i;
00570    }
00571    else // No data affinity.
00572    {
00573       queue = 0;
00574    }
00575 
00576    // Set the CPU affinity; We want the current thread i to run only on CPU i.
00577    //CPU_ZERO( &cpu_set );
00578    //CPU_SET( i, &cpu_set );
00579    //sched_setaffinity( syscall( __NR_gettid ), sizeof(cpu_set_t), &cpu_set );
00580    
00581    // Loop until all the tasks have committed.
00582    while ( condition )
00583    {
00584       FLA_Lock_acquire( &(args->run_lock[queue]) ); // R ***
00585 
00586       // Obtain task to execute.
00587       t = FLASH_Queue_wait_dequeue( queue, i, ( void* ) args );
00588 
00589       FLA_Lock_release( &(args->run_lock[queue]) ); // R ***
00590 
00591       // Dequeued a task from the waiting queue.
00592       available = ( t != NULL );
00593 
00594       if ( available )
00595       {
00596          // Execute the task.
00597          FLASH_Queue_exec_task( t );         
00598 
00599          // Update task dependencies.
00600          FLASH_Task_update_dependencies( t, ( void* ) args );
00601 
00602 #ifndef FLA_ENABLE_SUPERMATRIX_VISUALIZATION
00603          // Free the task once it executes in parallel.
00604          FLASH_Task_free_parallel( t, ( void* ) args );
00605 #endif         
00606       }
00607 
00608       FLA_Lock_acquire( &(args->all_lock) ); // A ***
00609 
00610       // Increment program counter.
00611       if ( available )
00612          args->pc++;
00613 
00614       // Terminate loop.
00615       if ( args->pc >= n_tasks )
00616          condition = FALSE;
00617       
00618       FLA_Lock_release( &(args->all_lock) ); // A ***
00619    }
00620    
00621 #if FLA_MULTITHREADING_MODEL == FLA_PTHREADS
00622    // If this is a non-main thread, then exit with a zero (normal) error code.
00623    // The main thread cannot call pthread_exit() because this routine never
00624    // returns. The main thread must proceed so it can oversee the joining of
00625    // the exited non-main pthreads.
00626    if ( i != 0 )
00627       pthread_exit( ( void* ) NULL );
00628 #endif
00629 
00630    return ( void* ) NULL;
00631 }

void FLASH_Queue_exec_simulation ( void *  arg  ) 

References FLASH_Task_s::dep_arg_head, FLASH_Queue_exec_task(), FLASH_Queue_get_data_affinity(), FLASH_Queue_get_num_tasks(), FLASH_Queue_get_num_threads(), FLASH_Queue_get_verbose_output(), FLASH_Queue_wait_dequeue(), FLASH_Queue_wait_enqueue(), FLASH_Task_free(), FLASH_Task_s::n_dep_args, FLASH_Task_s::n_ready, FLASH_Task_s::name, FLASH_Dep_s::next_dep, FLASH_Queue_variables::pc, and FLASH_Dep_s::task.

Referenced by FLASH_Queue_exec().

00770 {
00771    FLASH_Queue_vars* args = ( FLASH_Queue_vars* ) arg;
00772    int         i, j;
00773    int         queue;
00774    int         n_stages  = 0;
00775    int         n_tasks   = FLASH_Queue_get_num_tasks();
00776    int         n_threads = FLASH_Queue_get_num_threads();
00777    FLA_Bool    verbose   = FLASH_Queue_get_verbose_output();
00778    FLASH_Task* task;
00779    FLASH_Task* t;
00780    FLASH_Dep*  d;
00781 
00782    // An array to hold tasks to be executed during of simulation.
00783 #ifdef FLA_ENABLE_WINDOWS_BUILD
00784    FLASH_Task** exec_array = ( FLASH_Task** ) _alloca( n_threads * sizeof( FLASH_Task* ) );
00785 #else
00786    FLASH_Task* exec_array[n_threads];
00787 #endif
00788 
00789    // Initialize all exec_array to NULL.
00790    for ( i = 0; i < n_threads; i++ )
00791       exec_array[i] = NULL;
00792    
00793    // Loop until all the tasks have committed.
00794    while ( args->pc < n_tasks )
00795    {
00796       for ( i = 0; i < n_threads; i++ )
00797       {
00798          // Update waiting queue with ready tasks.
00799          t = exec_array[i];
00800          
00801          if ( t != NULL )
00802          {
00803             // Check each dependent task.
00804             d = t->dep_arg_head;
00805             
00806             for ( j = 0; j < t->n_dep_args; j++ )
00807             {
00808                task = d->task;              
00809                task->n_ready--;
00810                
00811                // Place newly ready tasks on waiting queue.
00812                if ( task->n_ready == 0 )
00813                {
00814                   FLASH_Queue_wait_enqueue( task, arg );
00815                }
00816                
00817                // Go to the next dep.
00818                d = d->next_dep;
00819             }
00820 
00821 #ifndef FLA_ENABLE_SUPERMATRIX_VISUALIZATION
00822             // Free the task.
00823             FLASH_Task_free( t );
00824 #endif
00825          }
00826       }      
00827       
00828       n_stages++;
00829       if ( !verbose )
00830          printf( "%7d", n_stages );
00831       
00832       // Move ready tasks from the waiting queue to execution queue.
00833       for ( i = 0; i < n_threads; i++ )
00834       {
00835          // Use different queues depending on if using data affinity or not.
00836          if ( FLASH_Queue_get_data_affinity() != FLASH_QUEUE_AFFINITY_NONE )
00837          {
00838             queue = i;
00839          }
00840          else // No data affinity.
00841          {
00842             queue = 0;
00843          }
00844 
00845          t = FLASH_Queue_wait_dequeue( queue, i, arg );
00846          exec_array[i] = t;
00847          
00848          // Increment program counter.
00849          if ( t != NULL )
00850          {
00851             args->pc++;
00852          }
00853       }
00854 
00855       // Execute independent tasks.
00856       for ( i = 0; i < n_threads; i++ )
00857       {
00858          t = exec_array[i];
00859          FLASH_Queue_exec_task( t );
00860          
00861          if ( !verbose )
00862             printf( "%7s", ( t == NULL ? "     " : t->name ) );        
00863       }
00864       
00865       if ( !verbose ) 
00866          printf( "\n" );
00867    }
00868    
00869    if ( !verbose )
00870       printf( "\n" );
00871 
00872    return;
00873 }

void FLASH_Queue_exec_task ( FLASH_Task t  ) 

References FLASH_Task_s::begin_time, FLASH_Task_s::cntl, FLASH_Task_s::end_time, FLA_Apply_Q_UT_task(), FLA_Apply_Q_UT_UD_task(), FLASH_Task_s::fla_arg, FLA_Axpy_task(), FLA_Chol_task(), FLA_Clock(), FLA_Copy_task(), FLA_Gemm_task(), FLA_Gemv_task(), FLA_Hemm_task(), FLA_Her2k_task(), FLA_Herk_task(), FLA_LU_nopiv_task(), FLA_LU_piv_copy_task(), FLA_LU_piv_task(), FLA_Obj_free_task(), FLA_QR_UT_copy_task(), FLA_QR_UT_task(), FLA_QR_UT_UD_task(), FLA_SA_FS_task(), FLA_SA_LU_task(), FLA_Sylv_task(), FLA_Symm_task(), FLA_Syr2k_task(), FLA_Syrk_task(), FLA_Trinv_task(), FLA_Trmm_task(), FLA_Trsm_piv_task(), FLA_Trsm_task(), FLA_Trsv_task(), FLA_Ttmm_task(), FLASH_Task_s::func, FLASH_Task_s::input_arg, FLASH_Task_s::int_arg, and FLASH_Task_s::output_arg.

Referenced by FLASH_Queue_exec_parallel_function(), and FLASH_Queue_exec_simulation().

00992 {
00993    // Define local function pointer types.
00994 
00995    // LAPACK-level
00996    typedef FLA_Error(*flash_lu_piv_p)(FLA_Obj A, FLA_Obj p, fla_lu_t* cntl);
00997    typedef FLA_Error(*flash_lu_piv_copy_p)(FLA_Obj A, FLA_Obj p, FLA_Obj U, fla_lu_t* cntl);
00998    typedef FLA_Error(*flash_trsm_piv_p)(FLA_Obj A, FLA_Obj C, FLA_Obj p, fla_trsm_t* cntl);
00999    typedef FLA_Error(*flash_sa_lu_p)(FLA_Obj U, FLA_Obj D, FLA_Obj p, FLA_Obj L, int nb_alg, fla_lu_t* cntl);
01000    typedef FLA_Error(*flash_sa_fs_p)(FLA_Obj L, FLA_Obj D, FLA_Obj p, FLA_Obj C, FLA_Obj E, int nb_alg, fla_lu_t* cntl);
01001    typedef FLA_Error(*flash_lu_nopiv_p)(FLA_Obj A, fla_lu_t* cntl);
01002    typedef FLA_Error(*flash_trinv_p)(FLA_Uplo uplo, FLA_Diag diag, FLA_Obj A, fla_trinv_t* cntl);
01003    typedef FLA_Error(*flash_ttmm_p)(FLA_Uplo uplo, FLA_Obj A, fla_ttmm_t* cntl);
01004    typedef FLA_Error(*flash_chol_p)(FLA_Uplo uplo, FLA_Obj A, fla_chol_t* cntl);
01005    typedef FLA_Error(*flash_sylv_p)(FLA_Trans transa, FLA_Trans transb, FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t* cntl);
01006    typedef FLA_Error(*flash_qrut_p)(FLA_Obj A, FLA_Obj T, fla_qrut_t* cntl);
01007    typedef FLA_Error(*flash_qrutc_p)(FLA_Obj A, FLA_Obj T, FLA_Obj U, fla_qrut_t* cntl);
01008    typedef FLA_Error(*flash_qrutud_p)(FLA_Obj B, FLA_Obj D, FLA_Obj T, fla_qrutud_t* cntl);
01009    typedef FLA_Error(*flash_apqut_p)(FLA_Side side, FLA_Trans trans, FLA_Store storev, FLA_Obj A, FLA_Obj T, FLA_Obj W, FLA_Obj B, fla_apqut_t* cntl);
01010    typedef FLA_Error(*flash_apqutud_p)(FLA_Side side, FLA_Trans trans, FLA_Store storev, FLA_Obj D, FLA_Obj T, FLA_Obj W, FLA_Obj C, FLA_Obj E, fla_apqutud_t* cntl);
01011 
01012    // Level-3 BLAS
01013    typedef FLA_Error(*flash_gemm_p)(FLA_Trans transa, FLA_Trans transb, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_gemm_t* cntl);
01014    typedef FLA_Error(*flash_hemm_p)(FLA_Side side, FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_hemm_t* cntl);
01015    typedef FLA_Error(*flash_herk_p)(FLA_Uplo uplo, FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj beta, FLA_Obj C, fla_herk_t* cntl);
01016    typedef FLA_Error(*flash_her2k_p)(FLA_Uplo uplo, FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_her2k_t* cntl);
01017    typedef FLA_Error(*flash_symm_p)(FLA_Side side, FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_symm_t* cntl);
01018    typedef FLA_Error(*flash_syrk_p)(FLA_Uplo uplo, FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj beta, FLA_Obj C, fla_syrk_t* cntl);
01019    typedef FLA_Error(*flash_syr2k_p)(FLA_Uplo uplo, FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_syr2k_t* cntl);
01020    typedef FLA_Error(*flash_trmm_p)(FLA_Side side, FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj C, fla_trmm_t* cntl);
01021    typedef FLA_Error(*flash_trsm_p)(FLA_Side side, FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj C, fla_trsm_t* cntl);
01022 
01023    // Level-2 BLAS
01024    typedef FLA_Error(*flash_gemv_p)(FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y, fla_gemv_t* cntl);
01025    typedef FLA_Error(*flash_trsv_p)(FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj A, FLA_Obj x, fla_trsv_t* cntl);
01026 
01027    // Level-1 BLAS
01028    typedef FLA_Error(*flash_axpy_p)(FLA_Obj alpha, FLA_Obj A, FLA_Obj B, fla_axpy_t* cntl);
01029    typedef FLA_Error(*flash_copy_p)(FLA_Obj A, FLA_Obj B, fla_copy_t* cntl);
01030 
01031    // Base
01032    typedef FLA_Error(*flash_obj_free_p)(FLA_Obj A, void* cntl);
01033 
01034 
01035    // Only execute task if it is not NULL.
01036    if ( t == NULL )
01037       return;
01038    
01039 #ifdef FLA_ENABLE_SUPERMATRIX_VISUALIZATION
01040    t->begin_time = FLA_Clock();
01041 #endif
01042 
01043    // Now "switch" between the various possible task functions.
01044 
01045    // FLA_LU_piv
01046    if ( t->func == (void *) FLA_LU_piv_task )
01047    {
01048       flash_lu_piv_p func;
01049       func = (flash_lu_piv_p) t->func;
01050 
01051       func(               t->output_arg[0],
01052                           t->fla_arg[0],
01053             ( fla_lu_t* ) t->cntl );
01054    }
01055    // FLA_LU_piv_copy
01056    else if ( t->func == (void *) FLA_LU_piv_copy_task )
01057    {
01058       flash_lu_piv_copy_p func;
01059       func = (flash_lu_piv_copy_p) t->func;
01060 
01061       func(               t->output_arg[0],
01062                           t->fla_arg[0],
01063                           t->output_arg[1],
01064             ( fla_lu_t* ) t->cntl );
01065    }
01066    // FLA_Trsm_piv
01067    else if ( t->func == (void *) FLA_Trsm_piv_task )
01068    {
01069       flash_trsm_piv_p func;
01070       func = (flash_trsm_piv_p) t->func;
01071 
01072       func(                 t->input_arg[0],
01073                             t->output_arg[0],
01074                             t->fla_arg[0],
01075             ( fla_trsm_t* ) t->cntl );
01076    }
01077    // FLA_SA_LU
01078    else if ( t->func == (void *) FLA_SA_LU_task )
01079    {
01080       flash_sa_lu_p func;
01081       func = (flash_sa_lu_p) t->func;
01082 
01083       func(               t->output_arg[1],
01084                           t->output_arg[0],
01085                           t->fla_arg[0],
01086                           t->fla_arg[1],
01087                           t->int_arg[0],
01088             ( fla_lu_t* ) t->cntl );
01089    }
01090    // FLA_SA_FS
01091    else if ( t->func == (void *) FLA_SA_FS_task )
01092    {
01093       flash_sa_fs_p func;
01094       func = (flash_sa_fs_p) t->func;
01095 
01096       func(               t->fla_arg[0],
01097                           t->input_arg[0],
01098                           t->fla_arg[1],                          
01099                           t->output_arg[1],
01100                           t->output_arg[0],
01101                           t->int_arg[0],
01102             ( fla_lu_t* ) t->cntl );
01103    }
01104    // FLA_LU_nopiv
01105    else if ( t->func == (void *) FLA_LU_nopiv_task )
01106    {
01107       flash_lu_nopiv_p func;
01108       func = (flash_lu_nopiv_p) t->func;
01109 
01110       func(               t->output_arg[0],
01111             ( fla_lu_t* ) t->cntl );
01112    }
01113    // FLA_Trinv
01114    else if ( t->func == (void *) FLA_Trinv_task )
01115    {
01116       flash_trinv_p func;
01117       func = (flash_trinv_p) t->func;
01118 
01119       func( ( FLA_Uplo     ) t->int_arg[0],
01120             ( FLA_Diag     ) t->int_arg[1],
01121                              t->output_arg[0],
01122             ( fla_trinv_t* ) t->cntl );
01123    }
01124    // FLA_Ttmm
01125    else if ( t->func == (void *) FLA_Ttmm_task )
01126    {      
01127       flash_ttmm_p func;
01128       func = (flash_ttmm_p) t->func;
01129 
01130       func( ( FLA_Uplo    ) t->int_arg[0],
01131                             t->output_arg[0],
01132             ( fla_ttmm_t* ) t->cntl );
01133    }
01134    // FLA_Chol
01135    else if ( t->func == (void *) FLA_Chol_task )
01136    {      
01137       flash_chol_p func;
01138       func = (flash_chol_p) t->func;
01139 
01140       func( ( FLA_Uplo    ) t->int_arg[0],
01141                             t->output_arg[0],
01142             ( fla_chol_t* ) t->cntl );
01143    }
01144    // FLA_Sylv
01145    else if ( t->func == (void *) FLA_Sylv_task )
01146    {      
01147       flash_sylv_p func;
01148       func = (flash_sylv_p) t->func;
01149 
01150       func( ( FLA_Trans   ) t->int_arg[0],
01151             ( FLA_Trans   ) t->int_arg[1],
01152                             t->fla_arg[0],
01153                             t->input_arg[0],
01154                             t->input_arg[1],
01155                             t->output_arg[0],
01156                             t->fla_arg[1],
01157             ( fla_sylv_t* ) t->cntl );
01158    }
01159    // FLA_QR_UT
01160    else if ( t->func == (void *) FLA_QR_UT_task )
01161    {      
01162       flash_qrut_p func;
01163       func = (flash_qrut_p) t->func;
01164 
01165       func(                 t->output_arg[0],
01166                             t->fla_arg[0],
01167             ( fla_qrut_t* ) t->cntl );
01168    }
01169    // FLA_QR_UT_copy
01170    else if ( t->func == (void *) FLA_QR_UT_copy_task )
01171    {      
01172       flash_qrutc_p func;
01173       func = (flash_qrutc_p) t->func;
01174 
01175       func(                 t->output_arg[0],
01176                             t->fla_arg[0],
01177                             t->output_arg[1],
01178             ( fla_qrut_t* ) t->cntl );
01179    }
01180    // FLA_QR_UT_UD
01181    else if ( t->func == (void *) FLA_QR_UT_UD_task )
01182    {      
01183       flash_qrutud_p func;
01184       func = (flash_qrutud_p) t->func;
01185 
01186       func(                 t->output_arg[1],
01187                             t->output_arg[0],
01188                             t->fla_arg[0],
01189           ( fla_qrutud_t* ) t->cntl );
01190    }
01191    // FLA_Apply_Q_UT
01192    else if ( t->func == (void *) FLA_Apply_Q_UT_task )
01193    {      
01194       flash_apqut_p func;
01195       func = (flash_apqut_p) t->func;
01196 
01197       func( ( FLA_Side    ) t->int_arg[0],
01198             ( FLA_Trans   ) t->int_arg[1],
01199             ( FLA_Store   ) t->int_arg[2],
01200                             t->input_arg[0],
01201                             t->fla_arg[0],
01202                             t->output_arg[1],
01203                             t->output_arg[0],
01204            ( fla_apqut_t* ) t->cntl );
01205    }
01206    // FLA_Apply_Q_UT_UD
01207    else if ( t->func == (void *) FLA_Apply_Q_UT_UD_task )
01208    {      
01209       flash_apqutud_p func;
01210       func = (flash_apqutud_p) t->func;
01211 
01212       func( ( FLA_Side    ) t->int_arg[0],
01213             ( FLA_Trans   ) t->int_arg[1],
01214             ( FLA_Store   ) t->int_arg[2],
01215                             t->input_arg[0],
01216                             t->fla_arg[0],
01217                             t->output_arg[2],
01218                             t->output_arg[1],
01219                             t->output_arg[0],
01220          ( fla_apqutud_t* ) t->cntl );
01221    }
01222    // FLA_Gemm
01223    else if ( t->func == (void *) FLA_Gemm_task )
01224    {
01225       flash_gemm_p func;
01226       func = (flash_gemm_p) t->func;
01227 
01228       func( ( FLA_Trans   ) t->int_arg[0],
01229             ( FLA_Trans   ) t->int_arg[1],
01230                             t->fla_arg[0],
01231                             t->input_arg[0],
01232                             t->input_arg[1],
01233                             t->fla_arg[1],
01234                             t->output_arg[0],
01235             ( fla_gemm_t* ) t->cntl );
01236    }
01237    // FLA_Hemm
01238    else if ( t->func == (void *) FLA_Hemm_task )
01239    {
01240       flash_hemm_p func;
01241       func = (flash_hemm_p) t->func;
01242       
01243       func( ( FLA_Side    ) t->int_arg[0],
01244             ( FLA_Uplo    ) t->int_arg[1],
01245                             t->fla_arg[0],
01246                             t->input_arg[0],
01247                             t->input_arg[1],
01248                             t->fla_arg[1],
01249                             t->output_arg[0],
01250             ( fla_hemm_t* ) t->cntl );
01251    }
01252    // FLA_Herk
01253    else if ( t->func == (void *) FLA_Herk_task )
01254    {
01255       flash_herk_p func;
01256       func = (flash_herk_p) t->func;
01257 
01258       func( ( FLA_Uplo    ) t->int_arg[0],
01259             ( FLA_Trans   ) t->int_arg[1],
01260                             t->fla_arg[0],
01261                             t->input_arg[0],
01262                             t->fla_arg[1],
01263                             t->output_arg[0],
01264             ( fla_herk_t* ) t->cntl );
01265    }
01266    // FLA_Her2k
01267    else if ( t->func == (void *) FLA_Her2k_task )
01268    {
01269       flash_her2k_p func;
01270       func = (flash_her2k_p) t->func;
01271 
01272       func( ( FLA_Uplo     ) t->int_arg[0],
01273             ( FLA_Trans    ) t->int_arg[1],
01274                              t->fla_arg[0],
01275                              t->input_arg[0],
01276                              t->input_arg[1],
01277                              t->fla_arg[1],
01278                              t->output_arg[0],
01279             ( fla_her2k_t* ) t->cntl );
01280    }
01281    // FLA_Symm
01282    else if ( t->func == (void *) FLA_Symm_task )
01283    {
01284       flash_symm_p func;
01285       func = (flash_symm_p) t->func;
01286       
01287       func( ( FLA_Side    ) t->int_arg[0],
01288             ( FLA_Uplo    ) t->int_arg[1],
01289                             t->fla_arg[0],
01290                             t->input_arg[0],
01291                             t->input_arg[1],
01292                             t->fla_arg[1],
01293                             t->output_arg[0],
01294             ( fla_symm_t* ) t->cntl );
01295    }
01296    // FLA_Syrk
01297    else if ( t->func == (void *) FLA_Syrk_task )
01298    {
01299       flash_syrk_p func;
01300       func = (flash_syrk_p) t->func;
01301 
01302       func( ( FLA_Uplo    ) t->int_arg[0],
01303             ( FLA_Trans   ) t->int_arg[1],
01304                             t->fla_arg[0],
01305                             t->input_arg[0],
01306                             t->fla_arg[1],
01307                             t->output_arg[0],
01308             ( fla_syrk_t* ) t->cntl );
01309    }
01310    // FLA_Syr2k
01311    else if ( t->func == (void *) FLA_Syr2k_task )
01312    {
01313       flash_syr2k_p func;
01314       func = (flash_syr2k_p) t->func;
01315 
01316       func( ( FLA_Uplo     ) t->int_arg[0],
01317             ( FLA_Trans    ) t->int_arg[1],
01318                              t->fla_arg[0],
01319                              t->input_arg[0],
01320                              t->input_arg[1],
01321                              t->fla_arg[1],
01322                              t->output_arg[0],
01323             ( fla_syr2k_t* ) t->cntl );
01324    }
01325    // FLA_Trmm
01326    else if ( t->func == (void *) FLA_Trmm_task )
01327    {
01328       flash_trmm_p func;
01329       func = (flash_trmm_p) t->func;
01330 
01331       func( ( FLA_Side    ) t->int_arg[0],
01332             ( FLA_Uplo    ) t->int_arg[1],
01333             ( FLA_Trans   ) t->int_arg[2],
01334             ( FLA_Diag    ) t->int_arg[3],
01335                             t->fla_arg[0],
01336                             t->input_arg[0],
01337                             t->output_arg[0],
01338             ( fla_trmm_t* ) t->cntl );
01339    }
01340    // FLA_Trsm
01341    else if ( t->func == (void *) FLA_Trsm_task )
01342    {
01343       flash_trsm_p func;
01344       func = (flash_trsm_p) t->func;
01345 
01346       func( ( FLA_Side    ) t->int_arg[0],
01347             ( FLA_Uplo    ) t->int_arg[1],
01348             ( FLA_Trans   ) t->int_arg[2],
01349             ( FLA_Diag    ) t->int_arg[3],
01350                             t->fla_arg[0],
01351                             t->input_arg[0],
01352                             t->output_arg[0],
01353             ( fla_trsm_t* ) t->cntl );
01354    }
01355    // FLA_Gemv
01356    else if ( t->func == (void *) FLA_Gemv_task )
01357    {
01358       flash_gemv_p func;
01359       func = (flash_gemv_p) t->func;
01360 
01361       func( ( FLA_Trans   ) t->int_arg[0],
01362                             t->fla_arg[0],
01363                             t->input_arg[0],
01364                             t->input_arg[1],
01365                             t->fla_arg[1],
01366                             t->output_arg[0],
01367             ( fla_gemv_t* ) t->cntl );
01368    }
01369    // FLA_Trsv
01370    else if ( t->func == (void *) FLA_Trsv_task )
01371    {
01372       flash_trsv_p func;
01373       func = (flash_trsv_p) t->func;
01374 
01375       func( ( FLA_Uplo    ) t->int_arg[0],
01376             ( FLA_Trans   ) t->int_arg[1],
01377             ( FLA_Diag    ) t->int_arg[2],
01378                             t->input_arg[0],
01379                             t->output_arg[0],
01380             ( fla_trsv_t* ) t->cntl );
01381    }
01382    // FLA_Axpy
01383    else if ( t->func == (void *) FLA_Axpy_task )
01384    {
01385       flash_axpy_p func;
01386       func = (flash_axpy_p) t->func;
01387 
01388       func(                 t->fla_arg[0],
01389                             t->input_arg[0],
01390                             t->output_arg[0],
01391             ( fla_axpy_t* ) t->cntl );
01392    }
01393    // FLA_Copy
01394    else if ( t->func == (void *) FLA_Copy_task )
01395    {
01396       flash_copy_p func;
01397       func = (flash_copy_p) t->func;
01398 
01399       func(                 t->input_arg[0],
01400                             t->output_arg[0],
01401             ( fla_copy_t* ) t->cntl );
01402    }
01403    // FLA_Obj_free
01404    else if ( t->func == (void *) FLA_Obj_free_task )
01405    {
01406       flash_obj_free_p func;
01407       func = (flash_obj_free_p) t->func;
01408 
01409       func(           t->output_arg[0],
01410             ( void* ) t->cntl );
01411    }
01412    else
01413    {
01414       FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
01415    }
01416 
01417 #ifdef FLA_ENABLE_SUPERMATRIX_VISUALIZATION
01418    t->end_time = FLA_Clock();
01419 #endif
01420    
01421    return;
01422 }

void FLASH_Queue_finalize ( void   ) 

Referenced by FLA_Finalize().

00272 {
00273    // Exit early if we're not already initialized.
00274    if ( flash_queue_initialized == FALSE )
00275       return;
00276 
00277    // Clear the initialized flag.
00278    flash_queue_initialized = FALSE;
00279 
00280    return;
00281 }

int FLASH_Queue_get_block_size ( void   ) 

00493 {
00494    return flash_queue_block_size;
00495 }

int FLASH_Queue_get_cache_line_size ( void   ) 

00541 {
00542    return flash_queue_cache_line_size;
00543 }

int FLASH_Queue_get_cache_size ( void   ) 

00517 {
00518    return flash_queue_cache_size;
00519 }

FLA_Bool FLASH_Queue_get_caching ( void   ) 

00362 { 
00363    return flash_queue_caching;
00364 }

int FLASH_Queue_get_cores_per_cache ( void   ) 

00565 {
00566    return flash_queue_cores_per_cache;
00567 }

FLASH_Data_aff FLASH_Queue_get_data_affinity ( void   ) 

Referenced by FLASH_Queue_exec(), FLASH_Queue_exec_parallel_function(), FLASH_Queue_exec_simulation(), and FLASH_Queue_init_tasks().

00410 { 
00411    return flash_queue_data_affinity;
00412 }

FLA_Bool FLASH_Queue_get_enabled ( void   ) 

FLASH_Task* FLASH_Queue_get_head_task ( void   ) 

int FLASH_Queue_get_num_blocks ( void   ) 

00467 {
00468    return flash_queue_n_write_blocks;
00469 }

int FLASH_Queue_get_num_tasks ( void   ) 

unsigned int FLASH_Queue_get_num_threads ( void   ) 

double FLASH_Queue_get_parallel_time ( void   ) 

00436 {
00437    // Only return time if out of parallel region.
00438    if ( flash_queue_stack == 0 )
00439       return flash_queue_parallel_time;
00440 
00441    return 0.0;
00442 }

FLA_Bool FLASH_Queue_get_sorting ( void   ) 

Referenced by FLASH_Queue_wait_enqueue().

00338 { 
00339    return flash_queue_sorting;
00340 }

FLASH_Task* FLASH_Queue_get_tail_task ( void   ) 

References FLASH_Queue_s::tail.

Referenced by FLASH_Queue_init_tasks().

00607 {
00608    return _tq.tail;
00609 }

double FLASH_Queue_get_total_time ( void   ) 

00421 {
00422    // Only return time if out of parallel region.
00423    if ( flash_queue_stack == 0 )
00424       return flash_queue_total_time;
00425 
00426    return 0.0;
00427 }

FLA_Bool FLASH_Queue_get_verbose_output ( void   ) 

Referenced by FLASH_Queue_exec(), and FLASH_Queue_exec_simulation().

00314 { 
00315    return flash_queue_verbose;
00316 }

FLA_Bool FLASH_Queue_get_work_stealing ( void   ) 

00386 {
00387    return flash_queue_work_stealing;
00388 }

void FLASH_Queue_init ( void   ) 

References FLASH_Queue_reset().

Referenced by FLA_Init().

00251 {
00252    // Exit early if we're already initialized.
00253    if ( flash_queue_initialized == TRUE )
00254       return;
00255    
00256    // Reset all the initial values.
00257    FLASH_Queue_reset();
00258 
00259    // Set the initialized flag.
00260    flash_queue_initialized = TRUE;
00261 
00262    return;
00263 }

void FLASH_Queue_init_tasks ( void *  arg  ) 

References FLA_Obj_view::base, FLASH_Task_s::dep_arg_head, FLASH_Queue_get_data_affinity(), FLASH_Queue_get_head_task(), FLASH_Queue_get_num_tasks(), FLASH_Queue_get_num_threads(), FLASH_Queue_get_tail_task(), FLASH_Queue_wait_enqueue(), FLASH_Task_s::height, FLA_Obj_struct::m_index, FLASH_Task_s::n_dep_args, FLA_Obj_struct::n_index, FLASH_Task_s::n_input_args, FLASH_Task_s::n_output_args, FLASH_Task_s::n_ready, FLASH_Task_s::n_war_args, FLASH_Dep_s::next_dep, FLASH_Task_s::next_task, FLASH_Task_s::output_arg, FLASH_Task_s::prev_task, FLASH_Task_s::queue, and FLASH_Dep_s::task.

Referenced by FLASH_Queue_exec().

00235 {
00236    int            i, j;
00237    int            n_tasks   = FLASH_Queue_get_num_tasks();
00238    int            n_threads = FLASH_Queue_get_num_threads();
00239    int            n_ready   = 0;
00240    int            length    = 0;
00241    int            width     = 0;
00242    int            height    = 0;
00243    FLASH_Data_aff data_aff  = FLASH_Queue_get_data_affinity();
00244    FLASH_Task*    t;
00245    FLASH_Dep*     d;
00246 
00247    // Find the 2D factorization of the number of threads.
00248    if ( data_aff == FLASH_QUEUE_AFFINITY_2D_BLOCK_CYCLIC )
00249    {
00250       int sq_rt = 0;
00251       while ( sq_rt * sq_rt <= n_threads ) sq_rt++;
00252       sq_rt--;
00253       while ( n_threads % sq_rt != 0 ) sq_rt--;
00254       length = n_threads / sq_rt;
00255       width  = sq_rt;     
00256    }
00257 
00258    // Grab the tail of the task queue.
00259    t = FLASH_Queue_get_tail_task();
00260 
00261    for ( i = n_tasks - 1; i >= 0; i-- )
00262    {
00263       // Determine data affinity.
00264       if ( data_aff == FLASH_QUEUE_AFFINITY_NONE )
00265       { // No data affinity
00266          t->queue = 0;
00267       }
00268       else if ( data_aff == FLASH_QUEUE_AFFINITY_2D_BLOCK_CYCLIC )
00269       { // Two-dimensional block cyclic
00270          t->queue = ( t->output_arg[0].base->m_index % length ) + 
00271                     ( t->output_arg[0].base->n_index % width  ) * length;
00272       }
00273       else
00274       { // Round-robin
00275          t->queue = t->queue % n_threads;
00276       }
00277 
00278       // Determine the height of each task in the DAG.
00279       height = 0;
00280       d = t->dep_arg_head;
00281 
00282       // Take the maximum height of dependent tasks.
00283       for ( j = 0; j < t->n_dep_args; j++ )
00284       {
00285          height = max( height, d->task->height );
00286          d = d->next_dep;
00287       }
00288 
00289       t->height = height + 1;
00290 
00291       // Find all ready tasks.
00292       t->n_ready += t->n_input_args + t->n_output_args + t->n_war_args;
00293       
00294       if ( t->n_ready == 0 )
00295       {
00296          // Save the number of ready and available tasks.
00297          n_ready++;
00298       }
00299 
00300       // Go to the previous task.
00301       t = t->prev_task;
00302    }
00303 
00304    // Grab the head of the task queue.
00305    t = FLASH_Queue_get_head_task();
00306 
00307    for ( i = 0; i < n_tasks && n_ready > 0; i++ )
00308    {
00309       if ( t->n_ready == 0 )
00310       {
00311          // Enqueue all the ready and available tasks.
00312          FLASH_Queue_wait_enqueue( t, arg );
00313 
00314          // Decrement the number of ready tasks left to be enqueued.
00315          n_ready--;
00316       }
00317 
00318       // Go to the next task.
00319       t = t->next_task;   
00320    }
00321 
00322    return;
00323 }

void FLASH_Queue_push ( void *  func,
void *  cntl,
char *  name,
int  n_int_args,
int  n_fla_args,
int  n_input_args,
int  n_output_args,
  ... 
)

References FLA_Obj_view::base, FLASH_Task_s::dep_arg_head, FLASH_Task_s::dep_arg_tail, FLA_Obj_struct::first_task, FLASH_Task_s::fla_arg, FLA_free(), FLA_malloc(), FLASH_Task_alloc(), FLASH_Queue_s::head, FLASH_Task_s::input_arg, FLASH_Task_s::int_arg, FLASH_Task_s::n_dep_args, FLA_Obj_struct::n_read_blocks, FLA_Obj_struct::n_read_tasks, FLASH_Task_s::n_ready, FLASH_Queue_s::n_tasks, FLASH_Task_s::n_war_args, FLA_Obj_struct::n_write_blocks, FLASH_Dep_s::next_dep, FLASH_Task_s::next_task, FLASH_Task_s::order, FLASH_Task_s::output_arg, FLASH_Task_s::prev_task, FLASH_Task_s::queue, FLA_Obj_struct::read_task_head, FLA_Obj_struct::read_task_tail, FLASH_Queue_s::tail, FLASH_Dep_s::task, and FLA_Obj_struct::write_task.

00625 {
00626    int         i, j;
00627    va_list     var_arg_list;
00628    FLASH_Task* t;
00629    FLASH_Task* task;
00630    FLASH_Dep*  d;
00631    FLASH_Dep*  next_dep;
00632    FLA_Obj     obj;
00633 
00634    // Allocate a new FLA_Task and populate its fields with appropriate values.
00635    t = FLASH_Task_alloc( func, cntl, name,
00636                          n_int_args, n_fla_args,
00637                          n_input_args, n_output_args );
00638    
00639    // Initialize variable argument environment. In case you're wondering, the
00640    // second argument in this macro invocation of va_start() is supposed to be
00641    // the parameter that immediately preceeds the variable argument list
00642    // (ie: the ... above ).
00643    va_start( var_arg_list, n_output_args );
00644 
00645    // Extract the integer arguments.
00646    for ( i = 0; i < n_int_args; i++ )
00647       t->int_arg[i] = va_arg( var_arg_list, int );
00648    
00649    // Extract the FLA_Obj arguments.
00650    for ( i = 0; i < n_fla_args; i++ )
00651       t->fla_arg[i] = va_arg( var_arg_list, FLA_Obj );
00652 
00653    // Extract the input FLA_Obj arguments.
00654    for ( i = 0; i < n_input_args; i++ )
00655    {
00656       obj = va_arg( var_arg_list, FLA_Obj );
00657       t->input_arg[i] = obj;
00658 
00659       // Find dependence information.
00660       if ( obj.base->write_task == NULL )
00661       {
00662          t->n_ready--;
00663 
00664          // Add to number of blocks read if not written and not read before.
00665          if ( obj.base->n_read_tasks == 0 )
00666          {
00667             // Identify each read block with an id for freeing.
00668             obj.base->n_read_blocks = flash_queue_n_read_blocks;
00669 
00670             flash_queue_n_read_blocks++;            
00671          }
00672       }
00673       else
00674       { // Flow dependence.
00675          task = obj.base->write_task;
00676 
00677          d = (FLASH_Dep *) FLA_malloc( sizeof(FLASH_Dep) );
00678 
00679          d->task     = t;
00680          d->next_dep = NULL;
00681          
00682          if ( task->n_dep_args == 0 )
00683          {
00684             task->dep_arg_head = d;
00685             task->dep_arg_tail = d;
00686          }
00687          else
00688          {
00689             task->dep_arg_tail->next_dep = d;
00690             task->dep_arg_tail           = d;
00691          }
00692 
00693          task->n_dep_args++;
00694       }
00695 
00696       // Add task to the read task in the object if not already there.
00697       if ( obj.base->n_read_tasks == 0 ||
00698            obj.base->read_task_tail->task != t )
00699       { // Anti-dependence potentially.
00700          d = (FLASH_Dep *) FLA_malloc( sizeof(FLASH_Dep) );
00701 
00702          d->task     = t;
00703          d->next_dep = NULL;
00704 
00705          if ( obj.base->n_read_tasks == 0 )
00706          {
00707             obj.base->read_task_head = d;
00708             obj.base->read_task_tail = d;
00709          }
00710          else
00711          {
00712             obj.base->read_task_tail->next_dep = d;
00713             obj.base->read_task_tail           = d;
00714          }
00715          
00716          obj.base->n_read_tasks++;
00717       }      
00718    }
00719 
00720    // Extract the output FLA_Obj arguments.
00721    for ( i = 0; i < n_output_args; i++ )
00722    {
00723       obj = va_arg( var_arg_list, FLA_Obj );
00724       t->output_arg[i] = obj;
00725 
00726       // Assign tasks to threads with data affinity.
00727       if ( obj.base->write_task == NULL )
00728       {
00729          t->n_ready--;
00730 
00731          // Only assign data affinity to the first output block.
00732          if ( i == 0 )
00733             t->queue = flash_queue_n_write_blocks;
00734 
00735          // Save index in which this output block is first encountered.
00736          obj.base->n_write_blocks = flash_queue_n_write_blocks;
00737          obj.base->first_task     = t;
00738 
00739          // Number of blocks written if not written before.
00740          flash_queue_n_write_blocks++;
00741 
00742          // Add to number of blocks read if not written or read before.
00743          if ( obj.base->n_read_tasks == 0 )
00744          {
00745             // Identify each read block with an id for freeing.
00746             obj.base->n_read_blocks = flash_queue_n_read_blocks;
00747 
00748             flash_queue_n_read_blocks++;
00749          }
00750       }
00751       else
00752       { // Flow dependence potentially.
00753          // The last task to overwrite this block is not itself.
00754          if ( obj.base->write_task != t )
00755          {
00756             // Create dependency from task that last wrote the block.
00757             task = obj.base->write_task;
00758             
00759             d = (FLASH_Dep *) FLA_malloc( sizeof(FLASH_Dep) );
00760             
00761             d->task     = t;
00762             d->next_dep = NULL;
00763             
00764             if ( task->n_dep_args == 0 )
00765             {
00766                task->dep_arg_head = d;
00767                task->dep_arg_tail = d;
00768             }
00769             else
00770             {
00771                task->dep_arg_tail->next_dep = d;
00772                task->dep_arg_tail           = d;
00773             }
00774             
00775             task->n_dep_args++;
00776             
00777             // Only assign data affinity to the first output block.
00778             if ( i == 0 )
00779                t->queue = task->queue;
00780          }
00781          else
00782          {
00783             // No need to notify task twice for output block already seen.
00784             t->n_ready--;
00785          }
00786       }
00787       
00788       // Clear read task for next set of reads and record the anti-dependence.
00789       d = obj.base->read_task_head;
00790 
00791       for ( j = 0; j < obj.base->n_read_tasks; j++ )
00792       {
00793          task     = d->task;
00794          next_dep = d->next_dep;
00795 
00796          // If the last task to read is not the current task, add dependence.
00797          if ( task != t )
00798          {
00799             d->task     = t;
00800             d->next_dep = NULL;
00801             
00802             if ( task->n_dep_args == 0 )
00803             {
00804                task->dep_arg_head = d;
00805                task->dep_arg_tail = d;
00806             }
00807             else
00808             {
00809                task->dep_arg_tail->next_dep = d;
00810                task->dep_arg_tail           = d;
00811             }
00812             
00813             task->n_dep_args++;
00814 
00815             t->n_war_args++;
00816          }  
00817          else
00818          {
00819             FLA_free( d );
00820          }
00821 
00822          d = next_dep;
00823       }
00824       
00825       obj.base->n_read_tasks   = 0;
00826       obj.base->read_task_head = NULL;
00827       obj.base->read_task_tail = NULL;
00828 
00829       // Record this task as the last to write to this block.
00830       obj.base->write_task = t;
00831    }      
00832 
00833    // Finalize the variable argument environment.
00834    va_end( var_arg_list );
00835   
00836    // Add the task to the tail of the queue (and the head if queue is empty).
00837    if ( _tq.n_tasks == 0 )
00838    {
00839       _tq.head = t;
00840       _tq.tail = t;
00841    }
00842    else
00843    {
00844       t->prev_task = _tq.tail;
00845       _tq.tail->next_task = t;
00846       _tq.tail            = t;
00847 
00848       // Determine the index of the task in the task queue.
00849       t->order = t->prev_task->order + 1;
00850    }
00851    
00852    // Increment the number of tasks.
00853    _tq.n_tasks++;
00854 
00855    return;
00856 }

void FLASH_Queue_reset ( void   ) 

References FLASH_Queue_s::head, FLASH_Queue_s::n_tasks, and FLASH_Queue_s::tail.

Referenced by FLASH_Queue_exec(), and FLASH_Queue_init().

00576 {
00577    // Clear the other fields of the FLASH_Queue structure.
00578    _tq.n_tasks = 0;
00579    _tq.head    = NULL;
00580    _tq.tail    = NULL;
00581 
00582    // Reset the number of blocks.
00583    flash_queue_n_read_blocks  = 0;
00584    flash_queue_n_write_blocks = 0;
00585 
00586    return;
00587 }

void FLASH_Queue_set_block_size ( int  size  ) 

Referenced by FLASH_Obj_create_hierarchy().

00478 {
00479    // Only adjust the block size if the new block is larger.
00480    if ( flash_queue_block_size < size )
00481       flash_queue_block_size = size;
00482 
00483    return;
00484 }

void FLASH_Queue_set_cache_line_size ( int  size  ) 

00528 {
00529    flash_queue_cache_line_size = size;
00530 
00531    return;
00532 }

void FLASH_Queue_set_cache_size ( int  size  ) 

00504 {
00505    flash_queue_cache_size = size;
00506 
00507    return;
00508 }

void FLASH_Queue_set_caching ( FLA_Bool  caching  ) 

00349 { 
00350    flash_queue_caching = caching; 
00351 
00352    return;
00353 }

void FLASH_Queue_set_cores_per_cache ( int  cores  ) 

00552 {
00553    flash_queue_cores_per_cache = cores;
00554 
00555    return;
00556 }

void FLASH_Queue_set_data_affinity ( FLASH_Data_aff  data_affinity  ) 

00397 { 
00398    flash_queue_data_affinity = data_affinity; 
00399 
00400    return;
00401 }

void FLASH_Queue_set_num_threads ( unsigned int  n_threads  ) 

References FLA_Check_num_threads().

00201 {
00202    FLA_Error e_val;
00203 
00204    // Verify that the number of threads is positive. 
00205    e_val = FLA_Check_num_threads( n_threads );
00206    FLA_Check_error_code( e_val );
00207 
00208    // Keep track of the number of threads internally.
00209    flash_queue_n_threads = n_threads;
00210 
00211 #if   FLA_MULTITHREADING_MODEL == FLA_OPENMP
00212 
00213    // No additional action is necessary to set the number of OpenMP threads
00214    // since setting the number of threads is handled at the parallel for loop
00215    // with a num_threads() clause. This gives the user more flexibility since
00216    // he can use the OMP_NUM_THREADS environment variable or the
00217    // omp_set_num_threads() function to set the global number of OpenMP threads
00218    // independently of the number of SuperMatrix threads.
00219    
00220 #elif FLA_MULTITHREADING_MODEL == FLA_PTHREADS
00221 
00222    // No additional action is necessary to set the number of pthreads
00223    // since setting the number of threads is handled entirely on our end.
00224 
00225 #endif
00226 
00227    return;
00228 }

void FLASH_Queue_set_parallel_time ( double  dtime  ) 

Referenced by FLASH_Queue_exec().

00454 {
00455    flash_queue_parallel_time += dtime;
00456 
00457    return;
00458 }

void FLASH_Queue_set_sorting ( FLA_Bool  sorting  ) 

00325 { 
00326    flash_queue_sorting = sorting; 
00327 
00328    return;
00329 }

void FLASH_Queue_set_verbose_output ( FLA_Bool  verbose  ) 

00301 { 
00302    flash_queue_verbose = verbose;
00303 
00304    return;
00305 }

void FLASH_Queue_set_work_stealing ( FLA_Bool  work_stealing  ) 

00373 {
00374    flash_queue_work_stealing = work_stealing;
00375 
00376    return;
00377 }

void FLASH_Queue_verbose_output ( void   ) 

References FLA_Obj_view::base, FLASH_Task_s::dep_arg_head, FLASH_Queue_get_head_task(), FLASH_Queue_get_num_tasks(), FLA_Obj_struct::id, FLASH_Task_s::input_arg, FLA_Obj_struct::m_index, FLASH_Task_s::n_dep_args, FLA_Obj_struct::n_index, FLASH_Task_s::n_input_args, FLASH_Task_s::n_output_args, FLASH_Task_s::name, FLASH_Dep_s::next_dep, FLASH_Task_s::next_task, FLASH_Task_s::order, FLASH_Task_s::output_arg, and FLASH_Dep_s::task.

Referenced by FLASH_Queue_exec().

01431 {
01432    int         i, j;
01433    int         n_tasks = FLASH_Queue_get_num_tasks();
01434    FLASH_Task* t;
01435    FLASH_Dep*  d;
01436 
01437    // Grab the head of the task queue.
01438    t = FLASH_Queue_get_head_task();
01439 
01440    // Iterate over linked list of tasks.
01441    for ( i = 0; i < n_tasks; i++ )
01442    {
01443       printf( "%d;%s;", t->order, t->name );
01444 
01445       printf( "In;" );
01446       for ( j = 0; j < t->n_input_args; j++ )
01447          printf( "%lu[%d,%d];", t->input_arg[j].base->id,
01448                  t->input_arg[j].base->m_index, 
01449                  t->input_arg[j].base->n_index );
01450 
01451       printf( "Out;" );
01452       for ( j = 0; j < t->n_output_args; j++ )
01453          printf( "%lu[%d,%d];", t->output_arg[j].base->id,
01454                  t->output_arg[j].base->m_index, 
01455                  t->output_arg[j].base->n_index );
01456 
01457       printf( "Dep" );
01458       d = t->dep_arg_head;
01459       for ( j = 0; j < t->n_dep_args; j++ )
01460       {
01461          printf( ";%d", d->task->order );
01462          d = d->next_dep;
01463       }
01464 
01465       printf( "\n" );
01466 
01467       // Go to the next task.
01468       t = t->next_task;
01469    }
01470 
01471    printf( "\n" );
01472 
01473    return;
01474 }

void FLASH_Queue_visualization ( void   ) 

References FLA_Obj_view::base, FLASH_Task_s::begin_time, FLASH_Task_s::end_time, FLASH_Queue_get_head_task(), FLASH_Queue_get_num_tasks(), FLA_Obj_struct::id, FLASH_Task_s::input_arg, FLA_Obj_struct::m_index, FLA_Obj_struct::n_index, FLASH_Task_s::n_input_args, FLASH_Task_s::n_output_args, FLASH_Task_s::name, FLASH_Task_s::next_task, FLASH_Task_s::output_arg, and FLASH_Task_s::thread.

Referenced by FLASH_Queue_exec().

01485 {
01486    int         i, j;
01487    int         n_tasks = FLASH_Queue_get_num_tasks();
01488    FLASH_Task* t;
01489 
01490    // Grab the head of the task queue.
01491    t = FLASH_Queue_get_head_task();
01492    
01493    // Iterate over linked list of tasks.
01494    for ( i = 0; i < n_tasks; i++ )
01495    {
01496       printf( "%s;%d;%f;%f;", 
01497               t->name, t->thread, t->begin_time, t->end_time );
01498 
01499       printf( "In;" );
01500       for ( j = 0; j < t->n_input_args; j++ )
01501          printf( "%lu[%d,%d];", t->input_arg[j].base->id,
01502                  t->input_arg[j].base->m_index, 
01503                  t->input_arg[j].base->n_index );
01504       
01505       printf( "Out" );
01506       for ( j = 0; j < t->n_output_args; j++ )
01507          printf( ";%lu[%d,%d]", t->output_arg[j].base->id,
01508                  t->output_arg[j].base->m_index, 
01509                  t->output_arg[j].base->n_index );
01510       
01511       printf( "\n" );
01512       
01513       // Go to the next task.
01514       t = t->next_task;
01515    }
01516 
01517    return;
01518 }

FLASH_Task* FLASH_Queue_wait_dequeue ( int  queue,
int  thread,
void *  arg 
)

References FLASH_Queue_s::head, FLASH_Queue_s::n_tasks, FLASH_Task_s::next_wait, FLASH_Task_s::prev_wait, FLASH_Queue_s::tail, FLASH_Task_s::thread, and FLASH_Queue_variables::wait_queue.

Referenced by FLASH_Queue_exec_parallel_function(), and FLASH_Queue_exec_simulation().

00384 {
00385    FLASH_Queue_vars* args = ( FLASH_Queue_vars* ) arg;
00386    FLASH_Task* t = NULL;
00387 
00388    if ( args->wait_queue[queue].n_tasks > 0 )
00389    {
00390       // Dequeue the first task.
00391       t = args->wait_queue[queue].head;
00392 
00393       if ( args->wait_queue[queue].n_tasks == 1 )
00394       {
00395          // Clear the queue of its only task.
00396          args->wait_queue[queue].head = NULL;
00397          args->wait_queue[queue].tail = NULL;        
00398       }
00399       else
00400       {
00401          // Adjust pointers in waiting queue.
00402          args->wait_queue[queue].head = t->next_wait;
00403          args->wait_queue[queue].head->prev_wait = NULL;
00404       }
00405 
00406       // Save the executing thread.
00407       t->thread = thread;
00408 
00409       // Clear the task's waiting linked list pointers.
00410       t->prev_wait = NULL;
00411       t->next_wait = NULL;
00412 
00413       // Decrement number of tasks on waiting queue.
00414       args->wait_queue[queue].n_tasks--;     
00415    }
00416 
00417    return t;
00418 }

void FLASH_Queue_wait_enqueue ( FLASH_Task t,
void *  arg 
)

References FLASH_Queue_get_sorting(), FLASH_Queue_s::head, FLASH_Task_s::height, FLASH_Queue_s::n_tasks, FLASH_Task_s::next_wait, FLASH_Task_s::prev_wait, FLASH_Task_s::queue, FLASH_Queue_s::tail, and FLASH_Queue_variables::wait_queue.

Referenced by FLASH_Queue_exec_simulation(), FLASH_Queue_init_tasks(), and FLASH_Task_update_dependencies().

00332 {  
00333    FLASH_Queue_vars* args = ( FLASH_Queue_vars* ) arg;
00334    int queue = t->queue;
00335 
00336    if ( args->wait_queue[queue].n_tasks == 0 )
00337    {
00338       args->wait_queue[queue].head = t;
00339       args->wait_queue[queue].tail = t;
00340    }
00341    else
00342    {
00343       t->prev_wait = args->wait_queue[queue].tail;
00344 
00345       // Insertion sort of tasks in waiting queue.
00346       if ( FLASH_Queue_get_sorting() )
00347       {
00348          while ( t->prev_wait != NULL )
00349          {
00350             if ( t->prev_wait->height >= t->height )
00351                break;
00352             
00353             t->next_wait = t->prev_wait;
00354             t->prev_wait = t->prev_wait->prev_wait;
00355          }         
00356       }
00357 
00358       // Checking if the task is the head of the waiting queue.      
00359       if ( t->prev_wait == NULL )
00360          args->wait_queue[queue].head = t;
00361       else
00362          t->prev_wait->next_wait = t;
00363 
00364       // Checking if the task is the tail of the waiting queue.
00365       if ( t->next_wait == NULL )
00366          args->wait_queue[queue].tail = t;
00367       else
00368          t->next_wait->prev_wait = t;
00369    }
00370 
00371    // Increment number of tasks on waiting queue.
00372    args->wait_queue[queue].n_tasks++;
00373 
00374    return;
00375 }

FLASH_Task* FLASH_Task_alloc ( void *  func,
void *  cntl,
char *  name,
int  n_int_args,
int  n_fla_args,
int  n_input_args,
int  n_output_args 
)

References FLASH_Task_s::cache, FLASH_Task_s::cntl, FLASH_Task_s::dep_arg_head, FLASH_Task_s::dep_arg_tail, FLASH_Task_s::fla_arg, FLA_malloc(), FLASH_Task_s::func, FLASH_Task_s::height, FLASH_Task_s::hit, FLASH_Task_s::input_arg, FLASH_Task_s::int_arg, FLASH_Task_s::n_dep_args, FLASH_Task_s::n_fla_args, FLASH_Task_s::n_input_args, FLASH_Task_s::n_int_args, FLASH_Task_s::n_output_args, FLASH_Task_s::n_ready, FLASH_Task_s::n_war_args, FLASH_Task_s::name, FLASH_Task_s::next_task, FLASH_Task_s::next_wait, FLASH_Task_s::order, FLASH_Task_s::output_arg, FLASH_Task_s::prev_task, FLASH_Task_s::prev_wait, FLASH_Task_s::queue, and FLASH_Task_s::thread.

Referenced by FLASH_Queue_push().

00871 {
00872    FLASH_Task* t;
00873 
00874    // Allocate space for the task structure t.
00875    t             = (FLASH_Task *) FLA_malloc( sizeof(FLASH_Task) );
00876 
00877    // Allocate space for the task's integer arguments.
00878    t->int_arg    = (int *) FLA_malloc( n_int_args * sizeof(int) );
00879 
00880    // Allocate space for the task's FLA_Obj arguments.
00881    t->fla_arg    = (FLA_Obj *) FLA_malloc( n_fla_args * sizeof(FLA_Obj) );
00882 
00883    // Allocate space for the task's input FLA_Obj arguments.
00884    t->input_arg  = (FLA_Obj *) FLA_malloc( n_input_args * sizeof(FLA_Obj) );
00885 
00886    // Allocate space for the task's output FLA_Obj arguments.
00887    t->output_arg = (FLA_Obj *) FLA_malloc( n_output_args * sizeof(FLA_Obj) );
00888    
00889    // Initialize other fields of the structure.
00890    t->n_ready       = 0;
00891    t->order         = 0;
00892    t->queue         = 0;
00893    t->height        = 0;
00894    t->thread        = 0;
00895    t->cache         = 0;
00896    t->hit           = FALSE;
00897 
00898    t->func          = func;
00899    t->cntl          = cntl;
00900    t->name          = name;
00901    t->n_int_args    = n_int_args;
00902    t->n_fla_args    = n_fla_args;
00903    t->n_input_args  = n_input_args;
00904    t->n_output_args = n_output_args;
00905    
00906    t->n_war_args    = 0;
00907    t->n_dep_args    = 0;
00908    t->dep_arg_head  = NULL;
00909    t->dep_arg_tail  = NULL;
00910    t->prev_task     = NULL;
00911    t->next_task     = NULL;
00912    t->prev_wait     = NULL;
00913    t->next_wait     = NULL;
00914    
00915    // Return a pointer to the initialized structure.
00916    return t;
00917 }

void FLASH_Task_free ( FLASH_Task t  ) 

References FLA_Obj_view::base, FLASH_Task_s::dep_arg_head, FLASH_Task_s::fla_arg, FLA_free(), FLA_Obj_free_task(), FLASH_Task_s::func, FLASH_Task_s::input_arg, FLASH_Task_s::int_arg, FLASH_Task_s::n_dep_args, FLASH_Task_s::n_input_args, FLASH_Task_s::n_output_args, FLA_Obj_struct::n_read_tasks, FLASH_Dep_s::next_dep, FLASH_Task_s::output_arg, FLA_Obj_struct::read_task_head, FLA_Obj_struct::read_task_tail, and FLA_Obj_struct::write_task.

Referenced by FLASH_Queue_exec(), and FLASH_Queue_exec_simulation().

00926 {
00927    int        i, j, k;
00928    FLASH_Dep* d;
00929    FLASH_Dep* next_dep;
00930 
00931    // Do not clear if the block has been free'd.
00932    if ( t->func != (void *) FLA_Obj_free_task )
00933    {
00934       // Clearing the last write task in each output block.
00935       for ( i = 0; i < t->n_output_args; i++ )
00936          t->output_arg[i].base->write_task = NULL;
00937    }
00938 
00939    // Cleaning the last read tasks in each input block.
00940    for ( i = 0; i < t->n_input_args; i++ )
00941    {
00942       k = t->input_arg[i].base->n_read_tasks;
00943       d = t->input_arg[i].base->read_task_head;
00944 
00945       t->input_arg[i].base->n_read_tasks   = 0;
00946       t->input_arg[i].base->read_task_head = NULL;
00947       t->input_arg[i].base->read_task_tail = NULL;
00948 
00949       for ( j = 0; j < k; j++ )
00950       {
00951          next_dep = d->next_dep;
00952          FLA_free( d );
00953          d = next_dep;
00954       }
00955    }
00956 
00957    // Free the dep_arg field of t.
00958    d = t->dep_arg_head;
00959 
00960    for ( i = 0; i < t->n_dep_args; i++ )
00961    {
00962       next_dep = d->next_dep;
00963       FLA_free( d );
00964       d = next_dep;
00965    }   
00966 
00967    // Free the int_arg field of t.
00968    FLA_free( t->int_arg );
00969    
00970    // Free the fla_arg field of t.
00971    FLA_free( t->fla_arg );
00972 
00973    // Free the input_arg field of t.
00974    FLA_free( t->input_arg );
00975 
00976    // Free the output_arg field of t.
00977    FLA_free( t->output_arg );
00978 
00979    // Finally, free the struct itself.
00980    FLA_free( t );
00981 
00982    return;
00983 }

void FLASH_Task_free_parallel ( FLASH_Task t,
void *  arg 
)

References FLA_Obj_view::base, FLASH_Task_s::dep_arg_head, FLASH_Task_s::fla_arg, FLA_free(), FLA_Lock_acquire(), FLA_Lock_release(), FLA_Obj_free_task(), FLASH_Queue_get_num_threads(), FLASH_Task_s::func, FLASH_Task_s::input_arg, FLASH_Task_s::int_arg, FLASH_Task_s::n_dep_args, FLASH_Task_s::n_input_args, FLASH_Task_s::n_output_args, FLA_Obj_struct::n_read_blocks, FLA_Obj_struct::n_read_tasks, FLASH_Dep_s::next_dep, FLASH_Task_s::output_arg, FLA_Obj_struct::read_task_head, FLA_Obj_struct::read_task_tail, FLASH_Queue_variables::war_lock, and FLA_Obj_struct::write_task.

Referenced by FLASH_Queue_exec_parallel_function().

00688 {
00689    FLASH_Queue_vars* args = ( FLASH_Queue_vars* ) arg;   
00690    int        i, j, k;
00691    int        thread;
00692    int        n_threads = FLASH_Queue_get_num_threads();
00693    FLASH_Dep* d;
00694    FLASH_Dep* next_dep;
00695 
00696    // Do not clear if the block has been free'd.
00697    if ( t->func != (void *) FLA_Obj_free_task )
00698    {   
00699       // Clearing the last write task in each output block.
00700       for ( i = 0; i < t->n_output_args; i++ )
00701          t->output_arg[i].base->write_task = NULL;
00702    }
00703 
00704    // Cleaning the last read tasks in each input block.
00705    for ( i = 0; i < t->n_input_args; i++ )
00706    {
00707       thread = t->input_arg[i].base->n_read_blocks % n_threads;
00708 
00709       FLA_Lock_acquire( &(args->war_lock[thread]) ); // W ***
00710 
00711       k = t->input_arg[i].base->n_read_tasks;
00712       d = t->input_arg[i].base->read_task_head;
00713 
00714       t->input_arg[i].base->n_read_tasks   = 0;
00715       t->input_arg[i].base->read_task_head = NULL;
00716       t->input_arg[i].base->read_task_tail = NULL;
00717 
00718       FLA_Lock_release( &(args->war_lock[thread]) ); // W ***
00719 
00720       for ( j = 0; j < k; j++ )
00721       {
00722          next_dep = d->next_dep;
00723          FLA_free( d );
00724          d = next_dep;
00725       }
00726    }
00727 
00728    // Free the dep_arg field of t.
00729    d = t->dep_arg_head;
00730 
00731    for ( i = 0; i < t->n_dep_args; i++ )
00732    {
00733       next_dep = d->next_dep;
00734       FLA_free( d );
00735       d = next_dep;
00736    }
00737    
00738    // Free the int_arg field of t.
00739    FLA_free( t->int_arg );
00740    
00741    // Free the fla_arg field of t.
00742    FLA_free( t->fla_arg );
00743 
00744    // Free the input_arg field of t.
00745    FLA_free( t->input_arg );
00746 
00747    // Free the output_arg field of t.
00748    FLA_free( t->output_arg );
00749 
00750    // Finally, free the struct itself.
00751    FLA_free( t );
00752 
00753    return;
00754 }

FLASH_Task* FLASH_Task_update_dependencies ( FLASH_Task t,
void *  arg 
)

References FLASH_Task_s::dep_arg_head, FLASH_Queue_variables::dep_lock, FLA_Lock_acquire(), FLA_Lock_release(), FLASH_Queue_get_num_threads(), FLASH_Queue_wait_enqueue(), FLASH_Task_s::n_dep_args, FLASH_Task_s::n_ready, FLASH_Dep_s::next_dep, FLASH_Task_s::order, FLASH_Task_s::queue, FLASH_Queue_variables::run_lock, and FLASH_Dep_s::task.

Referenced by FLASH_Queue_exec_parallel_function().

00640 {
00641    FLASH_Queue_vars* args = ( FLASH_Queue_vars* ) arg;
00642    int         i, queue, thread;
00643    int         n_threads = FLASH_Queue_get_num_threads();
00644    FLA_Bool    available;
00645    FLASH_Task* task;
00646    FLASH_Dep*  d;
00647 
00648    // Check each dependent task.
00649    d = t->dep_arg_head;
00650    
00651    for ( i = 0; i < t->n_dep_args; i++ )
00652    {
00653       task   = d->task;
00654       queue  = task->queue;
00655       thread = task->order % n_threads;
00656 
00657       FLA_Lock_acquire( &(args->dep_lock[thread]) ); // D ***
00658       
00659       task->n_ready--;
00660       available = ( task->n_ready == 0 );
00661       
00662       FLA_Lock_release( &(args->dep_lock[thread]) ); // D ***
00663       
00664       // Place newly ready tasks on sorted queue.
00665       if ( available )
00666       {
00667          FLA_Lock_acquire( &(args->run_lock[queue]) ); // R ***
00668          
00669          FLASH_Queue_wait_enqueue( task, arg );
00670          
00671          FLA_Lock_release( &(args->run_lock[queue]) ); // R ***
00672       }
00673       
00674       // Go to the next dep.
00675       d = d->next_dep;
00676    }
00677 
00678    return NULL;
00679 }


Generated on Mon Jul 6 05:45:53 2009 for libflame by  doxygen 1.5.9