Data Structures | |
struct | FLASH_Obj_variables |
Typedefs | |
typedef struct FLASH_Obj_variables | FLASH_Obj_vars |
Functions | |
void | FLASH_Obj_exec (void *func, FLASH_Obj_queue *queue) |
void * | FLASH_Obj_exec_parallel (void *arg) |
void | FLASH_Obj_push (int direction, FLA_Obj alpha, FLA_Obj F, FLA_Obj H, FLASH_Obj_queue *queue) |
typedef struct FLASH_Obj_variables FLASH_Obj_vars |
void FLASH_Obj_exec | ( | void * | func, | |
FLASH_Obj_queue * | queue | |||
) |
References FLASH_Thread_s::args, FLA_Check_error_level(), FLA_Check_pthread_create_result(), FLA_Check_pthread_join_result(), FLA_Lock_destroy(), FLA_Lock_init(), FLASH_Obj_exec_parallel(), FLASH_Queue_get_num_threads(), FLASH_Obj_variables::func, FLASH_Thread_s::id, FLASH_Obj_variables::lock, FLASH_Obj_queue_s::n_tasks, and FLASH_Obj_variables::queue.
Referenced by FLASH_Axpy_hierarchy(), and FLASH_Copy_hierarchy().
00054 { 00055 int i; 00056 int n_threads = FLASH_Queue_get_num_threads(); 00057 void* (*thread_entry_point)( void* ); 00058 #ifdef FLA_ENABLE_WINDOWS_BUILD 00059 FLASH_Thread* thread; 00060 #endif 00061 00062 // The structre to save all the execution variables. 00063 FLASH_Obj_vars args; 00064 00065 // Return if there are no tasks. 00066 if ( queue->n_tasks == 0 ) 00067 return; 00068 00069 // Determine which function to send threads to. 00070 thread_entry_point = FLASH_Obj_exec_parallel; 00071 00072 // Save the queue and function pointer. 00073 args.func = func; 00074 args.queue = queue; 00075 00076 // Initialize the lock. 00077 FLA_Lock_init( &(args.lock) ); 00078 00079 // Allocate the thread structures array. Here, an array of FLASH_Thread 00080 // structures of length n_threads is allocated and the fields of each 00081 // structure set to appropriate values. 00082 #ifdef FLA_ENABLE_WINDOWS_BUILD 00083 thread = ( FLASH_Thread* ) _alloca( n_threads * sizeof( FLASH_Thread ) ); 00084 #else 00085 FLASH_Thread thread[n_threads]; 00086 #endif 00087 00088 // Initialize the thread structures array. 00089 for ( i = 0; i < n_threads; i++ ) 00090 { 00091 // Save the thread's identifier. 00092 thread[i].id = i; 00093 00094 // Save the pointer to the necessary variables with the thread. 00095 thread[i].args = ( void* ) &args; 00096 00097 // The pthread object, if it was even compiled into the FLASH_Thread 00098 // structure, will be initialized by the pthread implementation when we 00099 // call pthread_create() and does not need to be touched at this time. 00100 } 00101 00102 #if FLA_MULTITHREADING_MODEL == FLA_OPENMP 00103 00104 // An OpenMP parallel for region spawns n_threads threads. Each thread 00105 // executes the work function with a different FLASH_Thread argument. 00106 // An implicit synchronization point exists at the end of the curly 00107 // brace scope. 00108 #pragma omp parallel for \ 00109 private( i ) \ 00110 shared( thread, n_threads, thread_entry_point ) \ 00111 schedule( static, 1 ) \ 00112 num_threads( n_threads ) 00113 for ( i = 0; i < n_threads; ++i ) 00114 { 00115 thread_entry_point( ( void* ) &thread[i] ); 00116 } 00117 00118 #elif FLA_MULTITHREADING_MODEL == FLA_PTHREADS 00119 00120 // Create each POSIX thread needed in addition to the main thread. 00121 for ( i = 1; i < n_threads; i++ ) 00122 { 00123 int pthread_e_val; 00124 00125 // Create thread i with default attributes. 00126 pthread_e_val = pthread_create( &(thread[i].pthread_obj), 00127 NULL, 00128 thread_entry_point, 00129 ( void* ) &thread[i] ); 00130 if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) 00131 { 00132 FLA_Error e_val = FLA_Check_pthread_create_result( pthread_e_val ); 00133 FLA_Check_error_code( e_val ); 00134 } 00135 } 00136 00137 // The main thread is assigned the role of thread 0. Here we manually 00138 // execute it as a worker thread. 00139 thread_entry_point( ( void* ) &thread[0] ); 00140 00141 // Wait for non-main threads to finish. 00142 for ( i = 1; i < n_threads; i++ ) 00143 { 00144 // These two variables are declared local to this for loop since this 00145 // is the only place they are needed, and since they would show up as 00146 // unused variables if FLA_MULTITHREADING_MODEL == FLA_PTHREADS. 00147 // Strangely, the Intel compiler produces code that results in an 00148 // "unaligned access" runtime message if thread_status is declared as 00149 // an int. Declaring it as a long or void* appears to force the 00150 // compiler (not surprisingly) into aligning it to an 8-byte boundary. 00151 int pthread_e_val; 00152 void* thread_status; 00153 00154 // Wait for thread i to invoke its respective pthread_exit(). 00155 // The return value passed to pthread_exit() is provided to us 00156 // via status, if one was given. 00157 pthread_e_val = pthread_join( thread[i].pthread_obj, 00158 ( void** ) &thread_status ); 00159 00160 if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) 00161 { 00162 FLA_Error e_val = FLA_Check_pthread_join_result( pthread_e_val ); 00163 FLA_Check_error_code( e_val ); 00164 } 00165 } 00166 00167 #endif 00168 00169 // Destroy the lock. 00170 FLA_Lock_destroy( &(args.lock) ); 00171 00172 return; 00173 }
void* FLASH_Obj_exec_parallel | ( | void * | arg | ) |
References FLASH_Obj_task_s::alpha, FLASH_Thread_s::args, FLASH_Obj_task_s::direction, FLASH_Obj_task_s::F, FLA_Axpy_external(), FLA_Copy_external(), FLA_free(), FLA_Lock_acquire(), FLA_Lock_release(), FLASH_Axpy_hierarchy(), FLASH_Copy_hierarchy(), FLASH_Obj_variables::func, FLASH_Obj_task_s::H, FLASH_Obj_queue_s::head, FLASH_Thread_s::id, FLASH_Obj_variables::lock, FLASH_Obj_queue_s::n_tasks, FLASH_Obj_task_s::next_task, FLASH_Obj_task_s::prev_task, FLASH_Obj_variables::queue, and FLASH_Obj_queue_s::tail.
Referenced by FLASH_Obj_exec().
00177 { 00178 FLASH_Thread* me; 00179 FLASH_Obj_vars* args; 00180 FLASH_Obj_task* t; 00181 FLA_Bool condition = TRUE; 00182 00183 // Interpret the thread argument as what it really is--a pointer to an 00184 // FLASH_Thread structure. 00185 me = ( FLASH_Thread* ) arg; 00186 00187 // Extract the variables from the current thread. 00188 args = ( FLASH_Obj_vars* ) me->args; 00189 00190 // Loop until all the tasks have committed. 00191 while ( condition ) 00192 { 00193 t = NULL; 00194 00195 FLA_Lock_acquire( &(args->lock) ); // L *** 00196 00197 // If there are no more tasks, stop execution. 00198 if ( args->queue->n_tasks == 0 ) 00199 { 00200 condition = FALSE; 00201 } 00202 else 00203 { 00204 // Dequeue the first task. 00205 t = args->queue->head; 00206 00207 if ( args->queue->n_tasks == 1 ) 00208 { 00209 // Clear the queue of its only task. 00210 args->queue->head = NULL; 00211 args->queue->tail = NULL; 00212 } 00213 else 00214 { 00215 // Adjust pointers in queue. 00216 args->queue->head = t->next_task; 00217 args->queue->head->prev_task = NULL; 00218 } 00219 00220 // Decrement number of tasks on queue. 00221 args->queue->n_tasks--; 00222 } 00223 00224 FLA_Lock_release( &(args->lock) ); // L *** 00225 00226 // Execute the task. 00227 if ( t != NULL ) 00228 { 00229 // FLASH_Axpy 00230 if ( args->func == (void *) FLASH_Axpy_hierarchy ) 00231 { 00232 if ( t->direction == FLA_FLAT_TO_HIER ) 00233 { 00234 FLA_Axpy_external( t->alpha, t->F, t->H ); 00235 } 00236 else if ( t->direction == FLA_HIER_TO_FLAT ) 00237 { 00238 FLA_Axpy_external( t->alpha, t->H, t->F ); 00239 } 00240 } 00241 // FLASH_Copy 00242 else if ( args->func == (void *) FLASH_Copy_hierarchy ) 00243 { 00244 if ( t->direction == FLA_FLAT_TO_HIER ) 00245 { 00246 FLA_Copy_external( t->F, t->H ); 00247 } 00248 else if ( t->direction == FLA_HIER_TO_FLAT ) 00249 { 00250 FLA_Copy_external( t->H, t->F ); 00251 } 00252 } 00253 else 00254 { 00255 FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED ); 00256 } 00257 00258 // Free the task structure in parallel. 00259 FLA_free( t ); 00260 } 00261 } 00262 00263 #if FLA_MULTITHREADING_MODEL == FLA_PTHREADS 00264 // If this is a non-main thread, then exit with a zero (normal) error code. 00265 // The main thread cannot call pthread_exit() because this routine never 00266 // returns. The main thread must proceed so it can oversee the joining of 00267 // the exited non-main pthreads. 00268 if ( me->id != 0 ) 00269 pthread_exit( ( void* ) NULL ); 00270 #endif 00271 00272 return ( void* ) NULL; 00273 }
void FLASH_Obj_push | ( | int | direction, | |
FLA_Obj | alpha, | |||
FLA_Obj | F, | |||
FLA_Obj | H, | |||
FLASH_Obj_queue * | queue | |||
) |
References FLASH_Obj_task_s::alpha, FLASH_Obj_task_s::direction, FLASH_Obj_task_s::F, FLA_malloc(), FLASH_Obj_task_s::H, FLASH_Obj_queue_s::head, FLASH_Obj_queue_s::n_tasks, FLASH_Obj_task_s::next_task, FLASH_Obj_task_s::prev_task, and FLASH_Obj_queue_s::tail.
Referenced by FLASH_Axpy_hierarchy_r(), and FLASH_Copy_hierarchy_r().
00278 { 00279 FLASH_Obj_task* t = (FLASH_Obj_task *) FLA_malloc( sizeof(FLASH_Obj_task) ); 00280 00281 // Initialize the task elements. 00282 t->direction = direction; 00283 t->alpha = alpha; 00284 t->F = F; 00285 t->H = H; 00286 t->prev_task = NULL; 00287 t->next_task = NULL; 00288 00289 // Add the task to the tail of the queue (and the head if queue is empty). 00290 if ( queue->n_tasks == 0 ) 00291 { 00292 queue->head = t; 00293 queue->tail = t; 00294 } 00295 else 00296 { 00297 t->prev_task = queue->tail; 00298 queue->tail->next_task = t; 00299 queue->tail = t; 00300 } 00301 00302 // Increment the number of tasks. 00303 queue->n_tasks++; 00304 00305 return; 00306 }