FLASH_Obj_exec.c File Reference

(r)


Data Structures

struct  FLASH_Obj_variables

Typedefs

typedef struct FLASH_Obj_variables FLASH_Obj_vars

Functions

void FLASH_Obj_exec (void *func, FLASH_Obj_queue *queue)
void * FLASH_Obj_exec_parallel (void *arg)
void FLASH_Obj_push (int direction, FLA_Obj alpha, FLA_Obj F, FLA_Obj H, FLASH_Obj_queue *queue)

Typedef Documentation


Function Documentation

void FLASH_Obj_exec ( void *  func,
FLASH_Obj_queue queue 
)

References FLASH_Thread_s::args, FLA_Check_error_level(), FLA_Check_pthread_create_result(), FLA_Check_pthread_join_result(), FLA_Lock_destroy(), FLA_Lock_init(), FLASH_Obj_exec_parallel(), FLASH_Queue_get_num_threads(), FLASH_Obj_variables::func, FLASH_Thread_s::id, FLASH_Obj_variables::lock, FLASH_Obj_queue_s::n_tasks, and FLASH_Obj_variables::queue.

Referenced by FLASH_Axpy_hierarchy(), and FLASH_Copy_hierarchy().

00054 {
00055    int i;
00056    int n_threads = FLASH_Queue_get_num_threads();
00057    void* (*thread_entry_point)( void* );
00058 #ifdef FLA_ENABLE_WINDOWS_BUILD
00059    FLASH_Thread* thread;
00060 #endif
00061 
00062    // The structre to save all the execution variables.
00063    FLASH_Obj_vars args;
00064    
00065    // Return if there are no tasks.
00066    if ( queue->n_tasks == 0 )
00067       return;
00068 
00069    // Determine which function to send threads to.
00070    thread_entry_point = FLASH_Obj_exec_parallel;
00071 
00072    // Save the queue and function pointer.
00073    args.func  = func;
00074    args.queue = queue;
00075 
00076    // Initialize the lock.
00077    FLA_Lock_init( &(args.lock) );
00078 
00079    // Allocate the thread structures array. Here, an array of FLASH_Thread
00080    // structures of length n_threads is allocated and the fields of each
00081    // structure set to appropriate values.
00082 #ifdef FLA_ENABLE_WINDOWS_BUILD
00083    thread = ( FLASH_Thread* ) _alloca( n_threads * sizeof( FLASH_Thread ) );
00084 #else
00085    FLASH_Thread thread[n_threads];
00086 #endif
00087 
00088    // Initialize the thread structures array.
00089    for ( i = 0; i < n_threads; i++ )
00090    {
00091       // Save the thread's identifier.
00092       thread[i].id = i;
00093 
00094       // Save the pointer to the necessary variables with the thread.
00095       thread[i].args = ( void* ) &args;
00096 
00097       // The pthread object, if it was even compiled into the FLASH_Thread
00098       // structure, will be initialized by the pthread implementation when we
00099       // call pthread_create() and does not need to be touched at this time.
00100    }
00101 
00102 #if FLA_MULTITHREADING_MODEL == FLA_OPENMP
00103 
00104    // An OpenMP parallel for region spawns n_threads threads. Each thread
00105    // executes the work function with a different FLASH_Thread argument.
00106    // An implicit synchronization point exists at the end of the curly
00107    // brace scope.
00108    #pragma omp parallel for \
00109            private( i ) \
00110            shared( thread, n_threads, thread_entry_point ) \
00111            schedule( static, 1 ) \
00112            num_threads( n_threads )
00113    for ( i = 0; i < n_threads; ++i )
00114    {
00115       thread_entry_point( ( void* ) &thread[i] );
00116    }
00117 
00118 #elif FLA_MULTITHREADING_MODEL == FLA_PTHREADS
00119 
00120    // Create each POSIX thread needed in addition to the main thread.
00121    for ( i = 1; i < n_threads; i++ )
00122    {
00123       int pthread_e_val;
00124 
00125       // Create thread i with default attributes.
00126       pthread_e_val = pthread_create( &(thread[i].pthread_obj),
00127                                       NULL,
00128                                       thread_entry_point,
00129                                       ( void* ) &thread[i] );
00130       if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
00131       {
00132          FLA_Error e_val = FLA_Check_pthread_create_result( pthread_e_val );
00133          FLA_Check_error_code( e_val );
00134       }
00135    }
00136 
00137    // The main thread is assigned the role of thread 0. Here we manually
00138    // execute it as a worker thread.
00139    thread_entry_point( ( void* ) &thread[0] );
00140 
00141    // Wait for non-main threads to finish.
00142    for ( i = 1; i < n_threads; i++ )
00143    {
00144       // These two variables are declared local to this for loop since this
00145       // is the only place they are needed, and since they would show up as
00146       // unused variables if FLA_MULTITHREADING_MODEL == FLA_PTHREADS.
00147       // Strangely, the Intel compiler produces code that results in an
00148       // "unaligned access" runtime message if thread_status is declared as
00149       // an int. Declaring it as a long or void* appears to force the
00150       // compiler (not surprisingly) into aligning it to an 8-byte boundary.
00151       int   pthread_e_val;
00152       void* thread_status;
00153 
00154       // Wait for thread i to invoke its respective pthread_exit().
00155       // The return value passed to pthread_exit() is provided to us
00156       // via status, if one was given.
00157       pthread_e_val = pthread_join( thread[i].pthread_obj,
00158                                     ( void** ) &thread_status );
00159 
00160       if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
00161       {
00162          FLA_Error e_val = FLA_Check_pthread_join_result( pthread_e_val );
00163          FLA_Check_error_code( e_val );
00164       }
00165    }
00166 
00167 #endif
00168 
00169    // Destroy the lock.
00170    FLA_Lock_destroy( &(args.lock) );
00171 
00172    return;
00173 }

void* FLASH_Obj_exec_parallel ( void *  arg  ) 

References FLASH_Obj_task_s::alpha, FLASH_Thread_s::args, FLASH_Obj_task_s::direction, FLASH_Obj_task_s::F, FLA_Axpy_external(), FLA_Copy_external(), FLA_free(), FLA_Lock_acquire(), FLA_Lock_release(), FLASH_Axpy_hierarchy(), FLASH_Copy_hierarchy(), FLASH_Obj_variables::func, FLASH_Obj_task_s::H, FLASH_Obj_queue_s::head, FLASH_Thread_s::id, FLASH_Obj_variables::lock, FLASH_Obj_queue_s::n_tasks, FLASH_Obj_task_s::next_task, FLASH_Obj_task_s::prev_task, FLASH_Obj_variables::queue, and FLASH_Obj_queue_s::tail.

Referenced by FLASH_Obj_exec().

00177 {
00178    FLASH_Thread*   me;
00179    FLASH_Obj_vars* args;
00180    FLASH_Obj_task* t;
00181    FLA_Bool        condition = TRUE;
00182 
00183    // Interpret the thread argument as what it really is--a pointer to an
00184    // FLASH_Thread structure.
00185    me = ( FLASH_Thread* ) arg;
00186 
00187    // Extract the variables from the current thread.
00188    args = ( FLASH_Obj_vars* ) me->args;
00189    
00190    // Loop until all the tasks have committed.
00191    while ( condition )
00192    {
00193       t = NULL;
00194 
00195       FLA_Lock_acquire( &(args->lock) ); // L ***
00196 
00197       // If there are no more tasks, stop execution.
00198       if ( args->queue->n_tasks == 0 )
00199       {
00200          condition = FALSE;
00201       }
00202       else
00203       {
00204          // Dequeue the first task.
00205          t = args->queue->head;
00206 
00207          if ( args->queue->n_tasks == 1 )
00208          {
00209             // Clear the queue of its only task.
00210             args->queue->head = NULL;
00211             args->queue->tail = NULL;
00212          }
00213          else
00214          {
00215             // Adjust pointers in queue.
00216             args->queue->head = t->next_task;
00217             args->queue->head->prev_task = NULL;
00218          }
00219 
00220          // Decrement number of tasks on queue.
00221          args->queue->n_tasks--;
00222       }
00223 
00224       FLA_Lock_release( &(args->lock) ); // L ***
00225 
00226       // Execute the task.
00227       if ( t != NULL )
00228       {
00229          // FLASH_Axpy
00230          if      ( args->func == (void *) FLASH_Axpy_hierarchy )
00231          {
00232             if      ( t->direction == FLA_FLAT_TO_HIER )
00233             {
00234                FLA_Axpy_external( t->alpha, t->F, t->H );
00235             }
00236             else if ( t->direction == FLA_HIER_TO_FLAT )
00237             {
00238                FLA_Axpy_external( t->alpha, t->H, t->F );
00239             }
00240          }
00241          // FLASH_Copy
00242          else if ( args->func == (void *) FLASH_Copy_hierarchy )
00243          {
00244             if      ( t->direction == FLA_FLAT_TO_HIER )
00245             {
00246                FLA_Copy_external( t->F, t->H );
00247             }
00248             else if ( t->direction == FLA_HIER_TO_FLAT )
00249             {
00250                FLA_Copy_external( t->H, t->F );
00251             }
00252          }
00253          else
00254          {
00255             FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
00256          }
00257 
00258          // Free the task structure in parallel.
00259          FLA_free( t );
00260       }
00261    }
00262 
00263 #if FLA_MULTITHREADING_MODEL == FLA_PTHREADS
00264    // If this is a non-main thread, then exit with a zero (normal) error code.
00265    // The main thread cannot call pthread_exit() because this routine never
00266    // returns. The main thread must proceed so it can oversee the joining of
00267    // the exited non-main pthreads.
00268    if ( me->id != 0 )
00269       pthread_exit( ( void* ) NULL );
00270 #endif
00271 
00272    return ( void* ) NULL;
00273 }

void FLASH_Obj_push ( int  direction,
FLA_Obj  alpha,
FLA_Obj  F,
FLA_Obj  H,
FLASH_Obj_queue queue 
)

References FLASH_Obj_task_s::alpha, FLASH_Obj_task_s::direction, FLASH_Obj_task_s::F, FLA_malloc(), FLASH_Obj_task_s::H, FLASH_Obj_queue_s::head, FLASH_Obj_queue_s::n_tasks, FLASH_Obj_task_s::next_task, FLASH_Obj_task_s::prev_task, and FLASH_Obj_queue_s::tail.

Referenced by FLASH_Axpy_hierarchy_r(), and FLASH_Copy_hierarchy_r().

00278 {
00279    FLASH_Obj_task* t = (FLASH_Obj_task *) FLA_malloc( sizeof(FLASH_Obj_task) );
00280 
00281    // Initialize the task elements.
00282    t->direction = direction;
00283    t->alpha     = alpha;
00284    t->F         = F;
00285    t->H         = H;
00286    t->prev_task = NULL;
00287    t->next_task = NULL;
00288   
00289    // Add the task to the tail of the queue (and the head if queue is empty).
00290    if ( queue->n_tasks == 0 )
00291    {
00292       queue->head = t;
00293       queue->tail = t;
00294    }
00295    else
00296    {
00297       t->prev_task = queue->tail;
00298       queue->tail->next_task = t;
00299       queue->tail            = t;
00300    }
00301 
00302    // Increment the number of tasks.
00303    queue->n_tasks++;
00304 
00305    return;
00306 }


Generated on Mon Jul 6 05:45:51 2009 for libflame by  doxygen 1.5.9