Go to the source code of this file.
void FLASH_Queue_begin | ( | void | ) |
References FLA_Clock().
Referenced by FLASH_Apply_Q_UT(), FLASH_Apply_Q_UT_inc(), FLASH_Apply_Q_UT_UD(), FLASH_Chol(), FLASH_Gemm(), FLASH_Hemm(), FLASH_Her2k(), FLASH_Herk(), FLASH_LU_incpiv(), FLASH_LU_nopiv(), FLASH_QR_UT_inc_noopt(), FLASH_QR_UT_inc_opt1(), FLASH_QR_UT_UD(), FLASH_SPDinv(), FLASH_Sylv(), FLASH_Symm(), FLASH_Syr2k(), FLASH_Syrk(), FLASH_Trinv(), FLASH_Trmm(), FLASH_Trsm(), and FLASH_Ttmm().
00081 { 00082 #ifdef FLA_ENABLE_SUPERMATRIX 00083 if ( flash_queue_stack == 0 ) 00084 { 00085 // Reset the value of the parallel execution timer. 00086 flash_queue_parallel_time = 0.0; 00087 00088 // Save the starting time for the total execution time. 00089 flash_queue_total_time = FLA_Clock(); 00090 } 00091 #endif 00092 00093 // Push onto the stack. 00094 flash_queue_stack++; 00095 00096 return; 00097 }
FLA_Error FLASH_Queue_disable | ( | void | ) |
Referenced by FLASH_Axpy(), FLASH_Copy(), FLASH_FS_incpiv(), FLASH_Gemv(), and FLASH_Trsv().
00158 { 00159 #ifdef FLA_ENABLE_SUPERMATRIX 00160 if ( flash_queue_stack == 0 ) 00161 { 00162 // Disable if not begin parallel region yet. 00163 flash_queue_enabled = FALSE; 00164 return FLA_SUCCESS; 00165 } 00166 else 00167 { 00168 // Cannot change status during parallel region. 00169 return FLA_FAILURE; 00170 } 00171 #else 00172 // Allow disabling enqueuing even when SuperMatrix is not configured. 00173 flash_queue_enabled = FALSE; 00174 return FLA_SUCCESS; 00175 #endif 00176 }
FLA_Error FLASH_Queue_enable | ( | void | ) |
Referenced by FLASH_Axpy(), FLASH_Copy(), FLASH_FS_incpiv(), FLASH_Gemv(), and FLASH_Trsv().
00131 { 00132 #ifdef FLA_ENABLE_SUPERMATRIX 00133 if ( flash_queue_stack == 0 ) 00134 { 00135 // Enable if not begin parallel region yet. 00136 flash_queue_enabled = TRUE; 00137 return FLA_SUCCESS; 00138 } 00139 else 00140 { 00141 // Cannot change status during parallel region. 00142 return FLA_FAILURE; 00143 } 00144 #else 00145 // Raise an exception when SuperMatrix is not configured. 00146 FLA_Check_error_code( FLA_SUPERMATRIX_NOT_ENABLED ); 00147 return FLA_FAILURE; 00148 #endif 00149 }
void FLASH_Queue_end | ( | void | ) |
References FLA_Clock(), and FLASH_Queue_exec().
Referenced by FLASH_Apply_Q_UT(), FLASH_Apply_Q_UT_inc(), FLASH_Apply_Q_UT_UD(), FLASH_Chol(), FLASH_Gemm(), FLASH_Hemm(), FLASH_Her2k(), FLASH_Herk(), FLASH_LU_incpiv(), FLASH_LU_nopiv(), FLASH_QR_UT_inc_noopt(), FLASH_QR_UT_inc_opt1(), FLASH_QR_UT_UD(), FLASH_SPDinv(), FLASH_Sylv(), FLASH_Symm(), FLASH_Syr2k(), FLASH_Syrk(), FLASH_Trinv(), FLASH_Trmm(), FLASH_Trsm(), and FLASH_Ttmm().
00106 { 00107 // Pop off the stack. 00108 flash_queue_stack--; 00109 00110 #ifdef FLA_ENABLE_SUPERMATRIX 00111 if ( flash_queue_stack == 0 ) 00112 { 00113 // Execute tasks if encounter the outermost parallel region. 00114 FLASH_Queue_exec(); 00115 00116 // Find the total execution time. 00117 flash_queue_total_time = FLA_Clock() - flash_queue_total_time; 00118 } 00119 #endif 00120 00121 return; 00122 }
void FLASH_Queue_exec | ( | void | ) |
References FLASH_Queue_variables::all_lock, FLASH_Queue_variables::dep_lock, FLA_Clock(), FLA_Lock_destroy(), FLA_Lock_init(), FLASH_Queue_exec_parallel(), FLASH_Queue_exec_simulation(), FLASH_Queue_get_data_affinity(), FLASH_Queue_get_head_task(), FLASH_Queue_get_num_tasks(), FLASH_Queue_get_num_threads(), FLASH_Queue_get_verbose_output(), FLASH_Queue_init_tasks(), FLASH_Queue_reset(), FLASH_Queue_set_parallel_time(), FLASH_Queue_verbose_output(), FLASH_Queue_visualization(), FLASH_Task_free(), FLASH_Queue_s::head, FLASH_Queue_s::n_tasks, FLASH_Task_s::next_task, FLASH_Queue_variables::pc, FLASH_Queue_variables::run_lock, FLASH_Queue_s::tail, FLASH_Queue_variables::wait_queue, and FLASH_Queue_variables::war_lock.
Referenced by FLASH_Queue_end().
00080 { 00081 FLA_Bool verbose = FLASH_Queue_get_verbose_output(); 00082 int n_tasks = FLASH_Queue_get_num_tasks(); 00083 int n_threads = FLASH_Queue_get_num_threads(); 00084 int n_memory; 00085 int i; 00086 double dtime; 00087 00088 #ifdef FLA_ENABLE_SUPERMATRIX_VISUALIZATION 00089 FLASH_Task* t; 00090 FLASH_Task* next; 00091 #endif 00092 00093 #ifdef FLA_ENABLE_WINDOWS_BUILD 00094 FLA_Lock* run_lock; 00095 FLA_Lock* dep_lock; 00096 FLA_Lock* war_lock; 00097 FLASH_Queue* wait_queue; 00098 #endif 00099 00100 // All the necessary variables for the SuperMatrix mechanism. 00101 FLASH_Queue_vars args; 00102 00103 // If the queue is empty, return early. 00104 if ( n_tasks == 0 ) 00105 return; 00106 00107 // Allocate different number of elements in arrays if using data affinity. 00108 n_memory = ( FLASH_Queue_get_data_affinity() == FLASH_QUEUE_AFFINITY_NONE ? 00109 1 : n_threads ); 00110 00111 #ifdef FLA_ENABLE_MULTITHREADING 00112 // Allocate memory for array of locks and the waiting queue. 00113 #ifdef FLA_ENABLE_WINDOWS_BUILD 00114 run_lock = ( FLA_Lock* ) _alloca( n_memory * sizeof( FLA_Lock ) ); 00115 dep_lock = ( FLA_Lock* ) _alloca( n_threads * sizeof( FLA_Lock ) ); 00116 war_lock = ( FLA_Lock* ) _alloca( n_threads * sizeof( FLA_Lock ) ); 00117 #else 00118 FLA_Lock run_lock[n_memory]; 00119 FLA_Lock dep_lock[n_threads]; 00120 FLA_Lock war_lock[n_threads]; 00121 #endif 00122 00123 args.run_lock = run_lock; 00124 args.dep_lock = dep_lock; 00125 args.war_lock = war_lock; 00126 00127 // Initialize the all lock. 00128 FLA_Lock_init( &(args.all_lock) ); 00129 00130 // Initialize the run lock for thread i. 00131 for ( i = 0; i < n_memory; i++ ) 00132 { 00133 FLA_Lock_init( &(args.run_lock[i]) ); 00134 } 00135 00136 // Initialize the dep and war locks for thread i. 00137 for ( i = 0; i < n_threads; i++ ) 00138 { 00139 FLA_Lock_init( &(args.dep_lock[i]) ); 00140 FLA_Lock_init( &(args.war_lock[i]) ); 00141 } 00142 #endif 00143 00144 // Allocate memory for waiting queue. 00145 #ifdef FLA_ENABLE_WINDOWS_BUILD 00146 wait_queue = ( FLASH_Queue* ) _alloca( n_memory * sizeof( FLASH_Queue ) ); 00147 #else 00148 FLASH_Queue wait_queue[n_memory]; 00149 #endif 00150 00151 args.wait_queue = wait_queue; 00152 00153 for ( i = 0; i < n_memory; i++ ) 00154 { 00155 args.wait_queue[i].n_tasks = 0; 00156 args.wait_queue[i].head = NULL; 00157 args.wait_queue[i].tail = NULL; 00158 } 00159 00160 // Initialize the aggregate task counter. 00161 args.pc = 0; 00162 00163 // Initialize tasks with critical information. 00164 FLASH_Queue_init_tasks( ( void* ) &args ); 00165 00166 // Display verbose output before free all tasks. 00167 if ( verbose ) 00168 FLASH_Queue_verbose_output(); 00169 00170 // Start timing the parallel execution. 00171 dtime = FLA_Clock(); 00172 00173 #ifdef FLA_ENABLE_MULTITHREADING 00174 // Parallel Execution! 00175 FLASH_Queue_exec_parallel( ( void* ) &args ); 00176 #else 00177 // Simulation! 00178 FLASH_Queue_exec_simulation( ( void* ) &args ); 00179 #endif 00180 00181 // End timing the parallel execution. 00182 dtime = FLA_Clock() - dtime; 00183 FLASH_Queue_set_parallel_time( dtime ); 00184 00185 #ifdef FLA_ENABLE_SUPERMATRIX_VISUALIZATION 00186 // Visualize all tasks. 00187 if ( !verbose ) 00188 FLASH_Queue_visualization(); 00189 00190 // Now that we're done with the task array, flush the queue. 00191 t = FLASH_Queue_get_head_task(); 00192 00193 for ( i = 0; i < n_tasks; i++ ) 00194 { 00195 // Obtain the next task. 00196 next = t->next_task; 00197 00198 // Free the current task. 00199 FLASH_Task_free( t ); 00200 00201 // Move to the next task. 00202 t = next; 00203 } 00204 #endif 00205 00206 #ifdef FLA_ENABLE_MULTITHREADING 00207 // Destroy the locks. 00208 FLA_Lock_destroy( &(args.all_lock) ); 00209 00210 for ( i = 0; i < n_memory; i++ ) 00211 { 00212 FLA_Lock_destroy( &(args.run_lock[i]) ); 00213 } 00214 00215 for ( i = 0; i < n_threads; i++ ) 00216 { 00217 FLA_Lock_destroy( &(args.dep_lock[i]) ); 00218 FLA_Lock_destroy( &(args.war_lock[i]) ); 00219 } 00220 #endif 00221 00222 // Reset values for next call to FLASH_Queue_exec(). 00223 FLASH_Queue_reset(); 00224 00225 return; 00226 }
void FLASH_Queue_exec_parallel | ( | void * | arg | ) |
References FLASH_Thread_s::args, FLA_Check_error_level(), FLA_Check_pthread_create_result(), FLA_Check_pthread_join_result(), FLASH_Queue_exec_parallel_function(), FLASH_Queue_get_num_threads(), and FLASH_Thread_s::id.
Referenced by FLASH_Queue_exec().
00429 { 00430 int i; 00431 int n_threads = FLASH_Queue_get_num_threads(); 00432 void* (*thread_entry_point)( void* ); 00433 00434 // Allocate the thread structures array. Here, an array of FLASH_Thread 00435 // structures of length n_threads is allocated and the fields of each 00436 // structure set to appropriate values. 00437 #ifdef FLA_ENABLE_WINDOWS_BUILD 00438 FLASH_Thread* thread = ( FLASH_Thread* ) _alloca( n_threads * sizeof( FLASH_Thread ) ); 00439 #else 00440 FLASH_Thread thread[n_threads]; 00441 #endif 00442 00443 // Initialize the thread structures array. 00444 for ( i = 0; i < n_threads; i++ ) 00445 { 00446 // Save the thread's identifier. 00447 thread[i].id = i; 00448 00449 // Save the pointer to the necessary variables with the thread. 00450 thread[i].args = arg; 00451 00452 // The pthread object, if it was even compiled into the FLASH_Thread 00453 // structure, will be initialized by the pthread implementation when we 00454 // call pthread_create() and does not need to be touched at this time. 00455 } 00456 00457 // Determine which function to send threads to. 00458 thread_entry_point = FLASH_Queue_exec_parallel_function; 00459 00460 #if FLA_MULTITHREADING_MODEL == FLA_OPENMP 00461 00462 // An OpenMP parallel for region spawns n_threads threads. Each thread 00463 // executes the work function with a different FLASH_Thread argument. 00464 // An implicit synchronization point exists at the end of the curly 00465 // brace scope. 00466 #pragma omp parallel for \ 00467 private( i ) \ 00468 shared( thread, n_threads, thread_entry_point ) \ 00469 schedule( static, 1 ) \ 00470 num_threads( n_threads ) 00471 for ( i = 0; i < n_threads; ++i ) 00472 { 00473 thread_entry_point( ( void* ) &thread[i] ); 00474 } 00475 00476 #elif FLA_MULTITHREADING_MODEL == FLA_PTHREADS 00477 00478 // Create each POSIX thread needed in addition to the main thread. 00479 for ( i = 1; i < n_threads; i++ ) 00480 { 00481 int pthread_e_val; 00482 00483 // Create thread i with default attributes. 00484 pthread_e_val = pthread_create( &(thread[i].pthread_obj), 00485 NULL, 00486 thread_entry_point, 00487 ( void* ) &thread[i] ); 00488 00489 if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) 00490 { 00491 FLA_Error e_val = FLA_Check_pthread_create_result( pthread_e_val ); 00492 FLA_Check_error_code( e_val ); 00493 } 00494 } 00495 00496 // The main thread is assigned the role of thread 0. Here we manually 00497 // execute it as a worker thread. 00498 thread_entry_point( ( void* ) &thread[0] ); 00499 00500 // Wait for non-main threads to finish. 00501 for ( i = 1; i < n_threads; i++ ) 00502 { 00503 // These two variables are declared local to this for loop since this 00504 // is the only place they are needed, and since they would show up as 00505 // unused variables if FLA_MULTITHREADING_MODEL == FLA_PTHREADS. 00506 // Strangely, the Intel compiler produces code that results in an 00507 // "unaligned access" runtime message if thread_status is declared as 00508 // an int. Declaring it as a long or void* appears to force the 00509 // compiler (not surprisingly) into aligning it to an 8-byte boundary. 00510 int pthread_e_val; 00511 void* thread_status; 00512 00513 // Wait for thread i to invoke its respective pthread_exit(). 00514 // The return value passed to pthread_exit() is provided to us 00515 // via status, if one was given. 00516 pthread_e_val = pthread_join( thread[i].pthread_obj, 00517 ( void** ) &thread_status ); 00518 00519 if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) 00520 { 00521 FLA_Error e_val = FLA_Check_pthread_join_result( pthread_e_val ); 00522 FLA_Check_error_code( e_val ); 00523 } 00524 } 00525 00526 #endif 00527 00528 return; 00529 }
void* FLASH_Queue_exec_parallel_function | ( | void * | arg | ) |
References FLASH_Thread_s::args, FLA_Lock_acquire(), FLA_Lock_release(), FLASH_Queue_exec_task(), FLASH_Queue_get_data_affinity(), FLASH_Queue_get_num_tasks(), FLASH_Queue_wait_dequeue(), FLASH_Task_free_parallel(), FLASH_Task_update_dependencies(), FLASH_Thread_s::id, and FLASH_Queue_variables::run_lock.
Referenced by FLASH_Queue_exec_parallel().
00546 { 00547 FLASH_Queue_vars* args; 00548 int i, queue; 00549 int n_tasks = FLASH_Queue_get_num_tasks(); 00550 FLA_Bool condition = TRUE; 00551 FLA_Bool available; 00552 FLASH_Task* t; 00553 FLASH_Thread* me; 00554 //cpu_set_t cpu_set; 00555 00556 // Interpret the thread argument as what it really is--a pointer to an 00557 // FLASH_Thread structure. 00558 me = ( FLASH_Thread* ) arg; 00559 00560 // Extract the variables from the current thread. 00561 args = ( FLASH_Queue_vars* ) me->args; 00562 00563 // Figure out the id of the current thread. 00564 i = me->id; 00565 00566 // Use different queues depending on if using data affinity or not. 00567 if ( FLASH_Queue_get_data_affinity() != FLASH_QUEUE_AFFINITY_NONE ) 00568 { 00569 queue = i; 00570 } 00571 else // No data affinity. 00572 { 00573 queue = 0; 00574 } 00575 00576 // Set the CPU affinity; We want the current thread i to run only on CPU i. 00577 //CPU_ZERO( &cpu_set ); 00578 //CPU_SET( i, &cpu_set ); 00579 //sched_setaffinity( syscall( __NR_gettid ), sizeof(cpu_set_t), &cpu_set ); 00580 00581 // Loop until all the tasks have committed. 00582 while ( condition ) 00583 { 00584 FLA_Lock_acquire( &(args->run_lock[queue]) ); // R *** 00585 00586 // Obtain task to execute. 00587 t = FLASH_Queue_wait_dequeue( queue, i, ( void* ) args ); 00588 00589 FLA_Lock_release( &(args->run_lock[queue]) ); // R *** 00590 00591 // Dequeued a task from the waiting queue. 00592 available = ( t != NULL ); 00593 00594 if ( available ) 00595 { 00596 // Execute the task. 00597 FLASH_Queue_exec_task( t ); 00598 00599 // Update task dependencies. 00600 FLASH_Task_update_dependencies( t, ( void* ) args ); 00601 00602 #ifndef FLA_ENABLE_SUPERMATRIX_VISUALIZATION 00603 // Free the task once it executes in parallel. 00604 FLASH_Task_free_parallel( t, ( void* ) args ); 00605 #endif 00606 } 00607 00608 FLA_Lock_acquire( &(args->all_lock) ); // A *** 00609 00610 // Increment program counter. 00611 if ( available ) 00612 args->pc++; 00613 00614 // Terminate loop. 00615 if ( args->pc >= n_tasks ) 00616 condition = FALSE; 00617 00618 FLA_Lock_release( &(args->all_lock) ); // A *** 00619 } 00620 00621 #if FLA_MULTITHREADING_MODEL == FLA_PTHREADS 00622 // If this is a non-main thread, then exit with a zero (normal) error code. 00623 // The main thread cannot call pthread_exit() because this routine never 00624 // returns. The main thread must proceed so it can oversee the joining of 00625 // the exited non-main pthreads. 00626 if ( i != 0 ) 00627 pthread_exit( ( void* ) NULL ); 00628 #endif 00629 00630 return ( void* ) NULL; 00631 }
void FLASH_Queue_exec_simulation | ( | void * | arg | ) |
References FLASH_Task_s::dep_arg_head, FLASH_Queue_exec_task(), FLASH_Queue_get_data_affinity(), FLASH_Queue_get_num_tasks(), FLASH_Queue_get_num_threads(), FLASH_Queue_get_verbose_output(), FLASH_Queue_wait_dequeue(), FLASH_Queue_wait_enqueue(), FLASH_Task_free(), FLASH_Task_s::n_dep_args, FLASH_Task_s::n_ready, FLASH_Task_s::name, FLASH_Dep_s::next_dep, FLASH_Queue_variables::pc, and FLASH_Dep_s::task.
Referenced by FLASH_Queue_exec().
00770 { 00771 FLASH_Queue_vars* args = ( FLASH_Queue_vars* ) arg; 00772 int i, j; 00773 int queue; 00774 int n_stages = 0; 00775 int n_tasks = FLASH_Queue_get_num_tasks(); 00776 int n_threads = FLASH_Queue_get_num_threads(); 00777 FLA_Bool verbose = FLASH_Queue_get_verbose_output(); 00778 FLASH_Task* task; 00779 FLASH_Task* t; 00780 FLASH_Dep* d; 00781 00782 // An array to hold tasks to be executed during of simulation. 00783 #ifdef FLA_ENABLE_WINDOWS_BUILD 00784 FLASH_Task** exec_array = ( FLASH_Task** ) _alloca( n_threads * sizeof( FLASH_Task* ) ); 00785 #else 00786 FLASH_Task* exec_array[n_threads]; 00787 #endif 00788 00789 // Initialize all exec_array to NULL. 00790 for ( i = 0; i < n_threads; i++ ) 00791 exec_array[i] = NULL; 00792 00793 // Loop until all the tasks have committed. 00794 while ( args->pc < n_tasks ) 00795 { 00796 for ( i = 0; i < n_threads; i++ ) 00797 { 00798 // Update waiting queue with ready tasks. 00799 t = exec_array[i]; 00800 00801 if ( t != NULL ) 00802 { 00803 // Check each dependent task. 00804 d = t->dep_arg_head; 00805 00806 for ( j = 0; j < t->n_dep_args; j++ ) 00807 { 00808 task = d->task; 00809 task->n_ready--; 00810 00811 // Place newly ready tasks on waiting queue. 00812 if ( task->n_ready == 0 ) 00813 { 00814 FLASH_Queue_wait_enqueue( task, arg ); 00815 } 00816 00817 // Go to the next dep. 00818 d = d->next_dep; 00819 } 00820 00821 #ifndef FLA_ENABLE_SUPERMATRIX_VISUALIZATION 00822 // Free the task. 00823 FLASH_Task_free( t ); 00824 #endif 00825 } 00826 } 00827 00828 n_stages++; 00829 if ( !verbose ) 00830 printf( "%7d", n_stages ); 00831 00832 // Move ready tasks from the waiting queue to execution queue. 00833 for ( i = 0; i < n_threads; i++ ) 00834 { 00835 // Use different queues depending on if using data affinity or not. 00836 if ( FLASH_Queue_get_data_affinity() != FLASH_QUEUE_AFFINITY_NONE ) 00837 { 00838 queue = i; 00839 } 00840 else // No data affinity. 00841 { 00842 queue = 0; 00843 } 00844 00845 t = FLASH_Queue_wait_dequeue( queue, i, arg ); 00846 exec_array[i] = t; 00847 00848 // Increment program counter. 00849 if ( t != NULL ) 00850 { 00851 args->pc++; 00852 } 00853 } 00854 00855 // Execute independent tasks. 00856 for ( i = 0; i < n_threads; i++ ) 00857 { 00858 t = exec_array[i]; 00859 FLASH_Queue_exec_task( t ); 00860 00861 if ( !verbose ) 00862 printf( "%7s", ( t == NULL ? " " : t->name ) ); 00863 } 00864 00865 if ( !verbose ) 00866 printf( "\n" ); 00867 } 00868 00869 if ( !verbose ) 00870 printf( "\n" ); 00871 00872 return; 00873 }
void FLASH_Queue_exec_task | ( | FLASH_Task * | t | ) |
References FLASH_Task_s::begin_time, FLASH_Task_s::cntl, FLASH_Task_s::end_time, FLA_Apply_Q_UT_task(), FLA_Apply_Q_UT_UD_task(), FLASH_Task_s::fla_arg, FLA_Axpy_task(), FLA_Chol_task(), FLA_Clock(), FLA_Copy_task(), FLA_Gemm_task(), FLA_Gemv_task(), FLA_Hemm_task(), FLA_Her2k_task(), FLA_Herk_task(), FLA_LU_nopiv_task(), FLA_LU_piv_copy_task(), FLA_LU_piv_task(), FLA_Obj_free_task(), FLA_QR_UT_copy_task(), FLA_QR_UT_task(), FLA_QR_UT_UD_task(), FLA_SA_FS_task(), FLA_SA_LU_task(), FLA_Sylv_task(), FLA_Symm_task(), FLA_Syr2k_task(), FLA_Syrk_task(), FLA_Trinv_task(), FLA_Trmm_task(), FLA_Trsm_piv_task(), FLA_Trsm_task(), FLA_Trsv_task(), FLA_Ttmm_task(), FLASH_Task_s::func, FLASH_Task_s::input_arg, FLASH_Task_s::int_arg, and FLASH_Task_s::output_arg.
Referenced by FLASH_Queue_exec_parallel_function(), and FLASH_Queue_exec_simulation().
00992 { 00993 // Define local function pointer types. 00994 00995 // LAPACK-level 00996 typedef FLA_Error(*flash_lu_piv_p)(FLA_Obj A, FLA_Obj p, fla_lu_t* cntl); 00997 typedef FLA_Error(*flash_lu_piv_copy_p)(FLA_Obj A, FLA_Obj p, FLA_Obj U, fla_lu_t* cntl); 00998 typedef FLA_Error(*flash_trsm_piv_p)(FLA_Obj A, FLA_Obj C, FLA_Obj p, fla_trsm_t* cntl); 00999 typedef FLA_Error(*flash_sa_lu_p)(FLA_Obj U, FLA_Obj D, FLA_Obj p, FLA_Obj L, int nb_alg, fla_lu_t* cntl); 01000 typedef FLA_Error(*flash_sa_fs_p)(FLA_Obj L, FLA_Obj D, FLA_Obj p, FLA_Obj C, FLA_Obj E, int nb_alg, fla_lu_t* cntl); 01001 typedef FLA_Error(*flash_lu_nopiv_p)(FLA_Obj A, fla_lu_t* cntl); 01002 typedef FLA_Error(*flash_trinv_p)(FLA_Uplo uplo, FLA_Diag diag, FLA_Obj A, fla_trinv_t* cntl); 01003 typedef FLA_Error(*flash_ttmm_p)(FLA_Uplo uplo, FLA_Obj A, fla_ttmm_t* cntl); 01004 typedef FLA_Error(*flash_chol_p)(FLA_Uplo uplo, FLA_Obj A, fla_chol_t* cntl); 01005 typedef FLA_Error(*flash_sylv_p)(FLA_Trans transa, FLA_Trans transb, FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t* cntl); 01006 typedef FLA_Error(*flash_qrut_p)(FLA_Obj A, FLA_Obj T, fla_qrut_t* cntl); 01007 typedef FLA_Error(*flash_qrutc_p)(FLA_Obj A, FLA_Obj T, FLA_Obj U, fla_qrut_t* cntl); 01008 typedef FLA_Error(*flash_qrutud_p)(FLA_Obj B, FLA_Obj D, FLA_Obj T, fla_qrutud_t* cntl); 01009 typedef FLA_Error(*flash_apqut_p)(FLA_Side side, FLA_Trans trans, FLA_Store storev, FLA_Obj A, FLA_Obj T, FLA_Obj W, FLA_Obj B, fla_apqut_t* cntl); 01010 typedef FLA_Error(*flash_apqutud_p)(FLA_Side side, FLA_Trans trans, FLA_Store storev, FLA_Obj D, FLA_Obj T, FLA_Obj W, FLA_Obj C, FLA_Obj E, fla_apqutud_t* cntl); 01011 01012 // Level-3 BLAS 01013 typedef FLA_Error(*flash_gemm_p)(FLA_Trans transa, FLA_Trans transb, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_gemm_t* cntl); 01014 typedef FLA_Error(*flash_hemm_p)(FLA_Side side, FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_hemm_t* cntl); 01015 typedef FLA_Error(*flash_herk_p)(FLA_Uplo uplo, FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj beta, FLA_Obj C, fla_herk_t* cntl); 01016 typedef FLA_Error(*flash_her2k_p)(FLA_Uplo uplo, FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_her2k_t* cntl); 01017 typedef FLA_Error(*flash_symm_p)(FLA_Side side, FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_symm_t* cntl); 01018 typedef FLA_Error(*flash_syrk_p)(FLA_Uplo uplo, FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj beta, FLA_Obj C, fla_syrk_t* cntl); 01019 typedef FLA_Error(*flash_syr2k_p)(FLA_Uplo uplo, FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_syr2k_t* cntl); 01020 typedef FLA_Error(*flash_trmm_p)(FLA_Side side, FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj C, fla_trmm_t* cntl); 01021 typedef FLA_Error(*flash_trsm_p)(FLA_Side side, FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj C, fla_trsm_t* cntl); 01022 01023 // Level-2 BLAS 01024 typedef FLA_Error(*flash_gemv_p)(FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y, fla_gemv_t* cntl); 01025 typedef FLA_Error(*flash_trsv_p)(FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj A, FLA_Obj x, fla_trsv_t* cntl); 01026 01027 // Level-1 BLAS 01028 typedef FLA_Error(*flash_axpy_p)(FLA_Obj alpha, FLA_Obj A, FLA_Obj B, fla_axpy_t* cntl); 01029 typedef FLA_Error(*flash_copy_p)(FLA_Obj A, FLA_Obj B, fla_copy_t* cntl); 01030 01031 // Base 01032 typedef FLA_Error(*flash_obj_free_p)(FLA_Obj A, void* cntl); 01033 01034 01035 // Only execute task if it is not NULL. 01036 if ( t == NULL ) 01037 return; 01038 01039 #ifdef FLA_ENABLE_SUPERMATRIX_VISUALIZATION 01040 t->begin_time = FLA_Clock(); 01041 #endif 01042 01043 // Now "switch" between the various possible task functions. 01044 01045 // FLA_LU_piv 01046 if ( t->func == (void *) FLA_LU_piv_task ) 01047 { 01048 flash_lu_piv_p func; 01049 func = (flash_lu_piv_p) t->func; 01050 01051 func( t->output_arg[0], 01052 t->fla_arg[0], 01053 ( fla_lu_t* ) t->cntl ); 01054 } 01055 // FLA_LU_piv_copy 01056 else if ( t->func == (void *) FLA_LU_piv_copy_task ) 01057 { 01058 flash_lu_piv_copy_p func; 01059 func = (flash_lu_piv_copy_p) t->func; 01060 01061 func( t->output_arg[0], 01062 t->fla_arg[0], 01063 t->output_arg[1], 01064 ( fla_lu_t* ) t->cntl ); 01065 } 01066 // FLA_Trsm_piv 01067 else if ( t->func == (void *) FLA_Trsm_piv_task ) 01068 { 01069 flash_trsm_piv_p func; 01070 func = (flash_trsm_piv_p) t->func; 01071 01072 func( t->input_arg[0], 01073 t->output_arg[0], 01074 t->fla_arg[0], 01075 ( fla_trsm_t* ) t->cntl ); 01076 } 01077 // FLA_SA_LU 01078 else if ( t->func == (void *) FLA_SA_LU_task ) 01079 { 01080 flash_sa_lu_p func; 01081 func = (flash_sa_lu_p) t->func; 01082 01083 func( t->output_arg[1], 01084 t->output_arg[0], 01085 t->fla_arg[0], 01086 t->fla_arg[1], 01087 t->int_arg[0], 01088 ( fla_lu_t* ) t->cntl ); 01089 } 01090 // FLA_SA_FS 01091 else if ( t->func == (void *) FLA_SA_FS_task ) 01092 { 01093 flash_sa_fs_p func; 01094 func = (flash_sa_fs_p) t->func; 01095 01096 func( t->fla_arg[0], 01097 t->input_arg[0], 01098 t->fla_arg[1], 01099 t->output_arg[1], 01100 t->output_arg[0], 01101 t->int_arg[0], 01102 ( fla_lu_t* ) t->cntl ); 01103 } 01104 // FLA_LU_nopiv 01105 else if ( t->func == (void *) FLA_LU_nopiv_task ) 01106 { 01107 flash_lu_nopiv_p func; 01108 func = (flash_lu_nopiv_p) t->func; 01109 01110 func( t->output_arg[0], 01111 ( fla_lu_t* ) t->cntl ); 01112 } 01113 // FLA_Trinv 01114 else if ( t->func == (void *) FLA_Trinv_task ) 01115 { 01116 flash_trinv_p func; 01117 func = (flash_trinv_p) t->func; 01118 01119 func( ( FLA_Uplo ) t->int_arg[0], 01120 ( FLA_Diag ) t->int_arg[1], 01121 t->output_arg[0], 01122 ( fla_trinv_t* ) t->cntl ); 01123 } 01124 // FLA_Ttmm 01125 else if ( t->func == (void *) FLA_Ttmm_task ) 01126 { 01127 flash_ttmm_p func; 01128 func = (flash_ttmm_p) t->func; 01129 01130 func( ( FLA_Uplo ) t->int_arg[0], 01131 t->output_arg[0], 01132 ( fla_ttmm_t* ) t->cntl ); 01133 } 01134 // FLA_Chol 01135 else if ( t->func == (void *) FLA_Chol_task ) 01136 { 01137 flash_chol_p func; 01138 func = (flash_chol_p) t->func; 01139 01140 func( ( FLA_Uplo ) t->int_arg[0], 01141 t->output_arg[0], 01142 ( fla_chol_t* ) t->cntl ); 01143 } 01144 // FLA_Sylv 01145 else if ( t->func == (void *) FLA_Sylv_task ) 01146 { 01147 flash_sylv_p func; 01148 func = (flash_sylv_p) t->func; 01149 01150 func( ( FLA_Trans ) t->int_arg[0], 01151 ( FLA_Trans ) t->int_arg[1], 01152 t->fla_arg[0], 01153 t->input_arg[0], 01154 t->input_arg[1], 01155 t->output_arg[0], 01156 t->fla_arg[1], 01157 ( fla_sylv_t* ) t->cntl ); 01158 } 01159 // FLA_QR_UT 01160 else if ( t->func == (void *) FLA_QR_UT_task ) 01161 { 01162 flash_qrut_p func; 01163 func = (flash_qrut_p) t->func; 01164 01165 func( t->output_arg[0], 01166 t->fla_arg[0], 01167 ( fla_qrut_t* ) t->cntl ); 01168 } 01169 // FLA_QR_UT_copy 01170 else if ( t->func == (void *) FLA_QR_UT_copy_task ) 01171 { 01172 flash_qrutc_p func; 01173 func = (flash_qrutc_p) t->func; 01174 01175 func( t->output_arg[0], 01176 t->fla_arg[0], 01177 t->output_arg[1], 01178 ( fla_qrut_t* ) t->cntl ); 01179 } 01180 // FLA_QR_UT_UD 01181 else if ( t->func == (void *) FLA_QR_UT_UD_task ) 01182 { 01183 flash_qrutud_p func; 01184 func = (flash_qrutud_p) t->func; 01185 01186 func( t->output_arg[1], 01187 t->output_arg[0], 01188 t->fla_arg[0], 01189 ( fla_qrutud_t* ) t->cntl ); 01190 } 01191 // FLA_Apply_Q_UT 01192 else if ( t->func == (void *) FLA_Apply_Q_UT_task ) 01193 { 01194 flash_apqut_p func; 01195 func = (flash_apqut_p) t->func; 01196 01197 func( ( FLA_Side ) t->int_arg[0], 01198 ( FLA_Trans ) t->int_arg[1], 01199 ( FLA_Store ) t->int_arg[2], 01200 t->input_arg[0], 01201 t->fla_arg[0], 01202 t->output_arg[1], 01203 t->output_arg[0], 01204 ( fla_apqut_t* ) t->cntl ); 01205 } 01206 // FLA_Apply_Q_UT_UD 01207 else if ( t->func == (void *) FLA_Apply_Q_UT_UD_task ) 01208 { 01209 flash_apqutud_p func; 01210 func = (flash_apqutud_p) t->func; 01211 01212 func( ( FLA_Side ) t->int_arg[0], 01213 ( FLA_Trans ) t->int_arg[1], 01214 ( FLA_Store ) t->int_arg[2], 01215 t->input_arg[0], 01216 t->fla_arg[0], 01217 t->output_arg[2], 01218 t->output_arg[1], 01219 t->output_arg[0], 01220 ( fla_apqutud_t* ) t->cntl ); 01221 } 01222 // FLA_Gemm 01223 else if ( t->func == (void *) FLA_Gemm_task ) 01224 { 01225 flash_gemm_p func; 01226 func = (flash_gemm_p) t->func; 01227 01228 func( ( FLA_Trans ) t->int_arg[0], 01229 ( FLA_Trans ) t->int_arg[1], 01230 t->fla_arg[0], 01231 t->input_arg[0], 01232 t->input_arg[1], 01233 t->fla_arg[1], 01234 t->output_arg[0], 01235 ( fla_gemm_t* ) t->cntl ); 01236 } 01237 // FLA_Hemm 01238 else if ( t->func == (void *) FLA_Hemm_task ) 01239 { 01240 flash_hemm_p func; 01241 func = (flash_hemm_p) t->func; 01242 01243 func( ( FLA_Side ) t->int_arg[0], 01244 ( FLA_Uplo ) t->int_arg[1], 01245 t->fla_arg[0], 01246 t->input_arg[0], 01247 t->input_arg[1], 01248 t->fla_arg[1], 01249 t->output_arg[0], 01250 ( fla_hemm_t* ) t->cntl ); 01251 } 01252 // FLA_Herk 01253 else if ( t->func == (void *) FLA_Herk_task ) 01254 { 01255 flash_herk_p func; 01256 func = (flash_herk_p) t->func; 01257 01258 func( ( FLA_Uplo ) t->int_arg[0], 01259 ( FLA_Trans ) t->int_arg[1], 01260 t->fla_arg[0], 01261 t->input_arg[0], 01262 t->fla_arg[1], 01263 t->output_arg[0], 01264 ( fla_herk_t* ) t->cntl ); 01265 } 01266 // FLA_Her2k 01267 else if ( t->func == (void *) FLA_Her2k_task ) 01268 { 01269 flash_her2k_p func; 01270 func = (flash_her2k_p) t->func; 01271 01272 func( ( FLA_Uplo ) t->int_arg[0], 01273 ( FLA_Trans ) t->int_arg[1], 01274 t->fla_arg[0], 01275 t->input_arg[0], 01276 t->input_arg[1], 01277 t->fla_arg[1], 01278 t->output_arg[0], 01279 ( fla_her2k_t* ) t->cntl ); 01280 } 01281 // FLA_Symm 01282 else if ( t->func == (void *) FLA_Symm_task ) 01283 { 01284 flash_symm_p func; 01285 func = (flash_symm_p) t->func; 01286 01287 func( ( FLA_Side ) t->int_arg[0], 01288 ( FLA_Uplo ) t->int_arg[1], 01289 t->fla_arg[0], 01290 t->input_arg[0], 01291 t->input_arg[1], 01292 t->fla_arg[1], 01293 t->output_arg[0], 01294 ( fla_symm_t* ) t->cntl ); 01295 } 01296 // FLA_Syrk 01297 else if ( t->func == (void *) FLA_Syrk_task ) 01298 { 01299 flash_syrk_p func; 01300 func = (flash_syrk_p) t->func; 01301 01302 func( ( FLA_Uplo ) t->int_arg[0], 01303 ( FLA_Trans ) t->int_arg[1], 01304 t->fla_arg[0], 01305 t->input_arg[0], 01306 t->fla_arg[1], 01307 t->output_arg[0], 01308 ( fla_syrk_t* ) t->cntl ); 01309 } 01310 // FLA_Syr2k 01311 else if ( t->func == (void *) FLA_Syr2k_task ) 01312 { 01313 flash_syr2k_p func; 01314 func = (flash_syr2k_p) t->func; 01315 01316 func( ( FLA_Uplo ) t->int_arg[0], 01317 ( FLA_Trans ) t->int_arg[1], 01318 t->fla_arg[0], 01319 t->input_arg[0], 01320 t->input_arg[1], 01321 t->fla_arg[1], 01322 t->output_arg[0], 01323 ( fla_syr2k_t* ) t->cntl ); 01324 } 01325 // FLA_Trmm 01326 else if ( t->func == (void *) FLA_Trmm_task ) 01327 { 01328 flash_trmm_p func; 01329 func = (flash_trmm_p) t->func; 01330 01331 func( ( FLA_Side ) t->int_arg[0], 01332 ( FLA_Uplo ) t->int_arg[1], 01333 ( FLA_Trans ) t->int_arg[2], 01334 ( FLA_Diag ) t->int_arg[3], 01335 t->fla_arg[0], 01336 t->input_arg[0], 01337 t->output_arg[0], 01338 ( fla_trmm_t* ) t->cntl ); 01339 } 01340 // FLA_Trsm 01341 else if ( t->func == (void *) FLA_Trsm_task ) 01342 { 01343 flash_trsm_p func; 01344 func = (flash_trsm_p) t->func; 01345 01346 func( ( FLA_Side ) t->int_arg[0], 01347 ( FLA_Uplo ) t->int_arg[1], 01348 ( FLA_Trans ) t->int_arg[2], 01349 ( FLA_Diag ) t->int_arg[3], 01350 t->fla_arg[0], 01351 t->input_arg[0], 01352 t->output_arg[0], 01353 ( fla_trsm_t* ) t->cntl ); 01354 } 01355 // FLA_Gemv 01356 else if ( t->func == (void *) FLA_Gemv_task ) 01357 { 01358 flash_gemv_p func; 01359 func = (flash_gemv_p) t->func; 01360 01361 func( ( FLA_Trans ) t->int_arg[0], 01362 t->fla_arg[0], 01363 t->input_arg[0], 01364 t->input_arg[1], 01365 t->fla_arg[1], 01366 t->output_arg[0], 01367 ( fla_gemv_t* ) t->cntl ); 01368 } 01369 // FLA_Trsv 01370 else if ( t->func == (void *) FLA_Trsv_task ) 01371 { 01372 flash_trsv_p func; 01373 func = (flash_trsv_p) t->func; 01374 01375 func( ( FLA_Uplo ) t->int_arg[0], 01376 ( FLA_Trans ) t->int_arg[1], 01377 ( FLA_Diag ) t->int_arg[2], 01378 t->input_arg[0], 01379 t->output_arg[0], 01380 ( fla_trsv_t* ) t->cntl ); 01381 } 01382 // FLA_Axpy 01383 else if ( t->func == (void *) FLA_Axpy_task ) 01384 { 01385 flash_axpy_p func; 01386 func = (flash_axpy_p) t->func; 01387 01388 func( t->fla_arg[0], 01389 t->input_arg[0], 01390 t->output_arg[0], 01391 ( fla_axpy_t* ) t->cntl ); 01392 } 01393 // FLA_Copy 01394 else if ( t->func == (void *) FLA_Copy_task ) 01395 { 01396 flash_copy_p func; 01397 func = (flash_copy_p) t->func; 01398 01399 func( t->input_arg[0], 01400 t->output_arg[0], 01401 ( fla_copy_t* ) t->cntl ); 01402 } 01403 // FLA_Obj_free 01404 else if ( t->func == (void *) FLA_Obj_free_task ) 01405 { 01406 flash_obj_free_p func; 01407 func = (flash_obj_free_p) t->func; 01408 01409 func( t->output_arg[0], 01410 ( void* ) t->cntl ); 01411 } 01412 else 01413 { 01414 FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED ); 01415 } 01416 01417 #ifdef FLA_ENABLE_SUPERMATRIX_VISUALIZATION 01418 t->end_time = FLA_Clock(); 01419 #endif 01420 01421 return; 01422 }
void FLASH_Queue_finalize | ( | void | ) |
Referenced by FLA_Finalize().
00272 { 00273 // Exit early if we're not already initialized. 00274 if ( flash_queue_initialized == FALSE ) 00275 return; 00276 00277 // Clear the initialized flag. 00278 flash_queue_initialized = FALSE; 00279 00280 return; 00281 }
int FLASH_Queue_get_cache_line_size | ( | void | ) |
FLA_Bool FLASH_Queue_get_caching | ( | void | ) |
int FLASH_Queue_get_cores_per_cache | ( | void | ) |
FLASH_Data_aff FLASH_Queue_get_data_affinity | ( | void | ) |
FLA_Bool FLASH_Queue_get_enabled | ( | void | ) |
Referenced by FLA_Apply_Q_UT_internal(), FLA_Apply_Q_UT_UD_internal(), FLA_Axpy_internal(), FLA_Chol_internal(), FLA_Copy_internal(), FLA_Gemm_internal(), FLA_Gemv_internal(), FLA_Hemm_internal(), FLA_Her2k_internal(), FLA_Herk_internal(), FLA_LU_nopiv_internal(), FLA_QR_UT_copy_internal(), FLA_QR_UT_inc_free_U(), FLA_QR_UT_internal(), FLA_QR_UT_UD_internal(), FLA_Sylv_internal(), FLA_Symm_internal(), FLA_Syr2k_internal(), FLA_Syrk_internal(), FLA_Trinv_internal(), FLA_Trmm_internal(), FLA_Trsm_internal(), FLA_Trsv_internal(), FLA_Ttmm_internal(), FLASH_Axpy(), FLASH_Copy(), FLASH_FS_incpiv(), FLASH_Gemv(), FLASH_LU_incpiv_var1(), FLASH_LU_incpiv_var2(), FLASH_SA_FS(), FLASH_SA_LU(), FLASH_Trsm_piv(), and FLASH_Trsv().
00185 { 00186 // Return if enabled, but always false if SuperMatrix is not configured. 00187 #ifdef FLA_ENABLE_SUPERMATRIX 00188 return flash_queue_enabled; 00189 #else 00190 return FALSE; 00191 #endif 00192 }
FLASH_Task* FLASH_Queue_get_head_task | ( | void | ) |
References FLASH_Queue_s::head.
Referenced by FLASH_Queue_exec(), FLASH_Queue_init_tasks(), FLASH_Queue_verbose_output(), and FLASH_Queue_visualization().
int FLASH_Queue_get_num_tasks | ( | void | ) |
unsigned int FLASH_Queue_get_num_threads | ( | void | ) |
double FLASH_Queue_get_parallel_time | ( | void | ) |
FLA_Bool FLASH_Queue_get_sorting | ( | void | ) |
FLASH_Task* FLASH_Queue_get_tail_task | ( | void | ) |
double FLASH_Queue_get_total_time | ( | void | ) |
FLA_Bool FLASH_Queue_get_verbose_output | ( | void | ) |
FLA_Bool FLASH_Queue_get_work_stealing | ( | void | ) |
void FLASH_Queue_init | ( | void | ) |
References FLASH_Queue_reset().
Referenced by FLA_Init().
00251 { 00252 // Exit early if we're already initialized. 00253 if ( flash_queue_initialized == TRUE ) 00254 return; 00255 00256 // Reset all the initial values. 00257 FLASH_Queue_reset(); 00258 00259 // Set the initialized flag. 00260 flash_queue_initialized = TRUE; 00261 00262 return; 00263 }
void FLASH_Queue_init_tasks | ( | void * | arg | ) |
References FLA_Obj_view::base, FLASH_Task_s::dep_arg_head, FLASH_Queue_get_data_affinity(), FLASH_Queue_get_head_task(), FLASH_Queue_get_num_tasks(), FLASH_Queue_get_num_threads(), FLASH_Queue_get_tail_task(), FLASH_Queue_wait_enqueue(), FLASH_Task_s::height, FLA_Obj_struct::m_index, FLASH_Task_s::n_dep_args, FLA_Obj_struct::n_index, FLASH_Task_s::n_input_args, FLASH_Task_s::n_output_args, FLASH_Task_s::n_ready, FLASH_Task_s::n_war_args, FLASH_Dep_s::next_dep, FLASH_Task_s::next_task, FLASH_Task_s::output_arg, FLASH_Task_s::prev_task, FLASH_Task_s::queue, and FLASH_Dep_s::task.
Referenced by FLASH_Queue_exec().
00235 { 00236 int i, j; 00237 int n_tasks = FLASH_Queue_get_num_tasks(); 00238 int n_threads = FLASH_Queue_get_num_threads(); 00239 int n_ready = 0; 00240 int length = 0; 00241 int width = 0; 00242 int height = 0; 00243 FLASH_Data_aff data_aff = FLASH_Queue_get_data_affinity(); 00244 FLASH_Task* t; 00245 FLASH_Dep* d; 00246 00247 // Find the 2D factorization of the number of threads. 00248 if ( data_aff == FLASH_QUEUE_AFFINITY_2D_BLOCK_CYCLIC ) 00249 { 00250 int sq_rt = 0; 00251 while ( sq_rt * sq_rt <= n_threads ) sq_rt++; 00252 sq_rt--; 00253 while ( n_threads % sq_rt != 0 ) sq_rt--; 00254 length = n_threads / sq_rt; 00255 width = sq_rt; 00256 } 00257 00258 // Grab the tail of the task queue. 00259 t = FLASH_Queue_get_tail_task(); 00260 00261 for ( i = n_tasks - 1; i >= 0; i-- ) 00262 { 00263 // Determine data affinity. 00264 if ( data_aff == FLASH_QUEUE_AFFINITY_NONE ) 00265 { // No data affinity 00266 t->queue = 0; 00267 } 00268 else if ( data_aff == FLASH_QUEUE_AFFINITY_2D_BLOCK_CYCLIC ) 00269 { // Two-dimensional block cyclic 00270 t->queue = ( t->output_arg[0].base->m_index % length ) + 00271 ( t->output_arg[0].base->n_index % width ) * length; 00272 } 00273 else 00274 { // Round-robin 00275 t->queue = t->queue % n_threads; 00276 } 00277 00278 // Determine the height of each task in the DAG. 00279 height = 0; 00280 d = t->dep_arg_head; 00281 00282 // Take the maximum height of dependent tasks. 00283 for ( j = 0; j < t->n_dep_args; j++ ) 00284 { 00285 height = max( height, d->task->height ); 00286 d = d->next_dep; 00287 } 00288 00289 t->height = height + 1; 00290 00291 // Find all ready tasks. 00292 t->n_ready += t->n_input_args + t->n_output_args + t->n_war_args; 00293 00294 if ( t->n_ready == 0 ) 00295 { 00296 // Save the number of ready and available tasks. 00297 n_ready++; 00298 } 00299 00300 // Go to the previous task. 00301 t = t->prev_task; 00302 } 00303 00304 // Grab the head of the task queue. 00305 t = FLASH_Queue_get_head_task(); 00306 00307 for ( i = 0; i < n_tasks && n_ready > 0; i++ ) 00308 { 00309 if ( t->n_ready == 0 ) 00310 { 00311 // Enqueue all the ready and available tasks. 00312 FLASH_Queue_wait_enqueue( t, arg ); 00313 00314 // Decrement the number of ready tasks left to be enqueued. 00315 n_ready--; 00316 } 00317 00318 // Go to the next task. 00319 t = t->next_task; 00320 } 00321 00322 return; 00323 }
void FLASH_Queue_push | ( | void * | func, | |
void * | cntl, | |||
char * | name, | |||
int | n_int_args, | |||
int | n_fla_args, | |||
int | n_input_args, | |||
int | n_output_args, | |||
... | ||||
) |
References FLA_Obj_view::base, FLASH_Task_s::dep_arg_head, FLASH_Task_s::dep_arg_tail, FLA_Obj_struct::first_task, FLASH_Task_s::fla_arg, FLA_free(), FLA_malloc(), FLASH_Task_alloc(), FLASH_Queue_s::head, FLASH_Task_s::input_arg, FLASH_Task_s::int_arg, FLASH_Task_s::n_dep_args, FLA_Obj_struct::n_read_blocks, FLA_Obj_struct::n_read_tasks, FLASH_Task_s::n_ready, FLASH_Queue_s::n_tasks, FLASH_Task_s::n_war_args, FLA_Obj_struct::n_write_blocks, FLASH_Dep_s::next_dep, FLASH_Task_s::next_task, FLASH_Task_s::order, FLASH_Task_s::output_arg, FLASH_Task_s::prev_task, FLASH_Task_s::queue, FLA_Obj_struct::read_task_head, FLA_Obj_struct::read_task_tail, FLASH_Queue_s::tail, FLASH_Dep_s::task, and FLA_Obj_struct::write_task.
00625 { 00626 int i, j; 00627 va_list var_arg_list; 00628 FLASH_Task* t; 00629 FLASH_Task* task; 00630 FLASH_Dep* d; 00631 FLASH_Dep* next_dep; 00632 FLA_Obj obj; 00633 00634 // Allocate a new FLA_Task and populate its fields with appropriate values. 00635 t = FLASH_Task_alloc( func, cntl, name, 00636 n_int_args, n_fla_args, 00637 n_input_args, n_output_args ); 00638 00639 // Initialize variable argument environment. In case you're wondering, the 00640 // second argument in this macro invocation of va_start() is supposed to be 00641 // the parameter that immediately preceeds the variable argument list 00642 // (ie: the ... above ). 00643 va_start( var_arg_list, n_output_args ); 00644 00645 // Extract the integer arguments. 00646 for ( i = 0; i < n_int_args; i++ ) 00647 t->int_arg[i] = va_arg( var_arg_list, int ); 00648 00649 // Extract the FLA_Obj arguments. 00650 for ( i = 0; i < n_fla_args; i++ ) 00651 t->fla_arg[i] = va_arg( var_arg_list, FLA_Obj ); 00652 00653 // Extract the input FLA_Obj arguments. 00654 for ( i = 0; i < n_input_args; i++ ) 00655 { 00656 obj = va_arg( var_arg_list, FLA_Obj ); 00657 t->input_arg[i] = obj; 00658 00659 // Find dependence information. 00660 if ( obj.base->write_task == NULL ) 00661 { 00662 t->n_ready--; 00663 00664 // Add to number of blocks read if not written and not read before. 00665 if ( obj.base->n_read_tasks == 0 ) 00666 { 00667 // Identify each read block with an id for freeing. 00668 obj.base->n_read_blocks = flash_queue_n_read_blocks; 00669 00670 flash_queue_n_read_blocks++; 00671 } 00672 } 00673 else 00674 { // Flow dependence. 00675 task = obj.base->write_task; 00676 00677 d = (FLASH_Dep *) FLA_malloc( sizeof(FLASH_Dep) ); 00678 00679 d->task = t; 00680 d->next_dep = NULL; 00681 00682 if ( task->n_dep_args == 0 ) 00683 { 00684 task->dep_arg_head = d; 00685 task->dep_arg_tail = d; 00686 } 00687 else 00688 { 00689 task->dep_arg_tail->next_dep = d; 00690 task->dep_arg_tail = d; 00691 } 00692 00693 task->n_dep_args++; 00694 } 00695 00696 // Add task to the read task in the object if not already there. 00697 if ( obj.base->n_read_tasks == 0 || 00698 obj.base->read_task_tail->task != t ) 00699 { // Anti-dependence potentially. 00700 d = (FLASH_Dep *) FLA_malloc( sizeof(FLASH_Dep) ); 00701 00702 d->task = t; 00703 d->next_dep = NULL; 00704 00705 if ( obj.base->n_read_tasks == 0 ) 00706 { 00707 obj.base->read_task_head = d; 00708 obj.base->read_task_tail = d; 00709 } 00710 else 00711 { 00712 obj.base->read_task_tail->next_dep = d; 00713 obj.base->read_task_tail = d; 00714 } 00715 00716 obj.base->n_read_tasks++; 00717 } 00718 } 00719 00720 // Extract the output FLA_Obj arguments. 00721 for ( i = 0; i < n_output_args; i++ ) 00722 { 00723 obj = va_arg( var_arg_list, FLA_Obj ); 00724 t->output_arg[i] = obj; 00725 00726 // Assign tasks to threads with data affinity. 00727 if ( obj.base->write_task == NULL ) 00728 { 00729 t->n_ready--; 00730 00731 // Only assign data affinity to the first output block. 00732 if ( i == 0 ) 00733 t->queue = flash_queue_n_write_blocks; 00734 00735 // Save index in which this output block is first encountered. 00736 obj.base->n_write_blocks = flash_queue_n_write_blocks; 00737 obj.base->first_task = t; 00738 00739 // Number of blocks written if not written before. 00740 flash_queue_n_write_blocks++; 00741 00742 // Add to number of blocks read if not written or read before. 00743 if ( obj.base->n_read_tasks == 0 ) 00744 { 00745 // Identify each read block with an id for freeing. 00746 obj.base->n_read_blocks = flash_queue_n_read_blocks; 00747 00748 flash_queue_n_read_blocks++; 00749 } 00750 } 00751 else 00752 { // Flow dependence potentially. 00753 // The last task to overwrite this block is not itself. 00754 if ( obj.base->write_task != t ) 00755 { 00756 // Create dependency from task that last wrote the block. 00757 task = obj.base->write_task; 00758 00759 d = (FLASH_Dep *) FLA_malloc( sizeof(FLASH_Dep) ); 00760 00761 d->task = t; 00762 d->next_dep = NULL; 00763 00764 if ( task->n_dep_args == 0 ) 00765 { 00766 task->dep_arg_head = d; 00767 task->dep_arg_tail = d; 00768 } 00769 else 00770 { 00771 task->dep_arg_tail->next_dep = d; 00772 task->dep_arg_tail = d; 00773 } 00774 00775 task->n_dep_args++; 00776 00777 // Only assign data affinity to the first output block. 00778 if ( i == 0 ) 00779 t->queue = task->queue; 00780 } 00781 else 00782 { 00783 // No need to notify task twice for output block already seen. 00784 t->n_ready--; 00785 } 00786 } 00787 00788 // Clear read task for next set of reads and record the anti-dependence. 00789 d = obj.base->read_task_head; 00790 00791 for ( j = 0; j < obj.base->n_read_tasks; j++ ) 00792 { 00793 task = d->task; 00794 next_dep = d->next_dep; 00795 00796 // If the last task to read is not the current task, add dependence. 00797 if ( task != t ) 00798 { 00799 d->task = t; 00800 d->next_dep = NULL; 00801 00802 if ( task->n_dep_args == 0 ) 00803 { 00804 task->dep_arg_head = d; 00805 task->dep_arg_tail = d; 00806 } 00807 else 00808 { 00809 task->dep_arg_tail->next_dep = d; 00810 task->dep_arg_tail = d; 00811 } 00812 00813 task->n_dep_args++; 00814 00815 t->n_war_args++; 00816 } 00817 else 00818 { 00819 FLA_free( d ); 00820 } 00821 00822 d = next_dep; 00823 } 00824 00825 obj.base->n_read_tasks = 0; 00826 obj.base->read_task_head = NULL; 00827 obj.base->read_task_tail = NULL; 00828 00829 // Record this task as the last to write to this block. 00830 obj.base->write_task = t; 00831 } 00832 00833 // Finalize the variable argument environment. 00834 va_end( var_arg_list ); 00835 00836 // Add the task to the tail of the queue (and the head if queue is empty). 00837 if ( _tq.n_tasks == 0 ) 00838 { 00839 _tq.head = t; 00840 _tq.tail = t; 00841 } 00842 else 00843 { 00844 t->prev_task = _tq.tail; 00845 _tq.tail->next_task = t; 00846 _tq.tail = t; 00847 00848 // Determine the index of the task in the task queue. 00849 t->order = t->prev_task->order + 1; 00850 } 00851 00852 // Increment the number of tasks. 00853 _tq.n_tasks++; 00854 00855 return; 00856 }
void FLASH_Queue_reset | ( | void | ) |
References FLASH_Queue_s::head, FLASH_Queue_s::n_tasks, and FLASH_Queue_s::tail.
Referenced by FLASH_Queue_exec(), and FLASH_Queue_init().
00576 { 00577 // Clear the other fields of the FLASH_Queue structure. 00578 _tq.n_tasks = 0; 00579 _tq.head = NULL; 00580 _tq.tail = NULL; 00581 00582 // Reset the number of blocks. 00583 flash_queue_n_read_blocks = 0; 00584 flash_queue_n_write_blocks = 0; 00585 00586 return; 00587 }
void FLASH_Queue_set_block_size | ( | int | size | ) |
Referenced by FLASH_Obj_create_hierarchy().
00478 { 00479 // Only adjust the block size if the new block is larger. 00480 if ( flash_queue_block_size < size ) 00481 flash_queue_block_size = size; 00482 00483 return; 00484 }
void FLASH_Queue_set_cache_line_size | ( | int | size | ) |
void FLASH_Queue_set_cache_size | ( | int | size | ) |
void FLASH_Queue_set_caching | ( | FLA_Bool | caching | ) |
void FLASH_Queue_set_cores_per_cache | ( | int | cores | ) |
void FLASH_Queue_set_data_affinity | ( | FLASH_Data_aff | data_affinity | ) |
void FLASH_Queue_set_num_threads | ( | unsigned int | n_threads | ) |
References FLA_Check_num_threads().
00201 { 00202 FLA_Error e_val; 00203 00204 // Verify that the number of threads is positive. 00205 e_val = FLA_Check_num_threads( n_threads ); 00206 FLA_Check_error_code( e_val ); 00207 00208 // Keep track of the number of threads internally. 00209 flash_queue_n_threads = n_threads; 00210 00211 #if FLA_MULTITHREADING_MODEL == FLA_OPENMP 00212 00213 // No additional action is necessary to set the number of OpenMP threads 00214 // since setting the number of threads is handled at the parallel for loop 00215 // with a num_threads() clause. This gives the user more flexibility since 00216 // he can use the OMP_NUM_THREADS environment variable or the 00217 // omp_set_num_threads() function to set the global number of OpenMP threads 00218 // independently of the number of SuperMatrix threads. 00219 00220 #elif FLA_MULTITHREADING_MODEL == FLA_PTHREADS 00221 00222 // No additional action is necessary to set the number of pthreads 00223 // since setting the number of threads is handled entirely on our end. 00224 00225 #endif 00226 00227 return; 00228 }
void FLASH_Queue_set_parallel_time | ( | double | dtime | ) |
void FLASH_Queue_set_sorting | ( | FLA_Bool | sorting | ) |
void FLASH_Queue_set_verbose_output | ( | FLA_Bool | verbose | ) |
void FLASH_Queue_set_work_stealing | ( | FLA_Bool | work_stealing | ) |
void FLASH_Queue_verbose_output | ( | void | ) |
References FLA_Obj_view::base, FLASH_Task_s::dep_arg_head, FLASH_Queue_get_head_task(), FLASH_Queue_get_num_tasks(), FLA_Obj_struct::id, FLASH_Task_s::input_arg, FLA_Obj_struct::m_index, FLASH_Task_s::n_dep_args, FLA_Obj_struct::n_index, FLASH_Task_s::n_input_args, FLASH_Task_s::n_output_args, FLASH_Task_s::name, FLASH_Dep_s::next_dep, FLASH_Task_s::next_task, FLASH_Task_s::order, FLASH_Task_s::output_arg, and FLASH_Dep_s::task.
Referenced by FLASH_Queue_exec().
01431 { 01432 int i, j; 01433 int n_tasks = FLASH_Queue_get_num_tasks(); 01434 FLASH_Task* t; 01435 FLASH_Dep* d; 01436 01437 // Grab the head of the task queue. 01438 t = FLASH_Queue_get_head_task(); 01439 01440 // Iterate over linked list of tasks. 01441 for ( i = 0; i < n_tasks; i++ ) 01442 { 01443 printf( "%d;%s;", t->order, t->name ); 01444 01445 printf( "In;" ); 01446 for ( j = 0; j < t->n_input_args; j++ ) 01447 printf( "%lu[%d,%d];", t->input_arg[j].base->id, 01448 t->input_arg[j].base->m_index, 01449 t->input_arg[j].base->n_index ); 01450 01451 printf( "Out;" ); 01452 for ( j = 0; j < t->n_output_args; j++ ) 01453 printf( "%lu[%d,%d];", t->output_arg[j].base->id, 01454 t->output_arg[j].base->m_index, 01455 t->output_arg[j].base->n_index ); 01456 01457 printf( "Dep" ); 01458 d = t->dep_arg_head; 01459 for ( j = 0; j < t->n_dep_args; j++ ) 01460 { 01461 printf( ";%d", d->task->order ); 01462 d = d->next_dep; 01463 } 01464 01465 printf( "\n" ); 01466 01467 // Go to the next task. 01468 t = t->next_task; 01469 } 01470 01471 printf( "\n" ); 01472 01473 return; 01474 }
void FLASH_Queue_visualization | ( | void | ) |
References FLA_Obj_view::base, FLASH_Task_s::begin_time, FLASH_Task_s::end_time, FLASH_Queue_get_head_task(), FLASH_Queue_get_num_tasks(), FLA_Obj_struct::id, FLASH_Task_s::input_arg, FLA_Obj_struct::m_index, FLA_Obj_struct::n_index, FLASH_Task_s::n_input_args, FLASH_Task_s::n_output_args, FLASH_Task_s::name, FLASH_Task_s::next_task, FLASH_Task_s::output_arg, and FLASH_Task_s::thread.
Referenced by FLASH_Queue_exec().
01485 { 01486 int i, j; 01487 int n_tasks = FLASH_Queue_get_num_tasks(); 01488 FLASH_Task* t; 01489 01490 // Grab the head of the task queue. 01491 t = FLASH_Queue_get_head_task(); 01492 01493 // Iterate over linked list of tasks. 01494 for ( i = 0; i < n_tasks; i++ ) 01495 { 01496 printf( "%s;%d;%f;%f;", 01497 t->name, t->thread, t->begin_time, t->end_time ); 01498 01499 printf( "In;" ); 01500 for ( j = 0; j < t->n_input_args; j++ ) 01501 printf( "%lu[%d,%d];", t->input_arg[j].base->id, 01502 t->input_arg[j].base->m_index, 01503 t->input_arg[j].base->n_index ); 01504 01505 printf( "Out" ); 01506 for ( j = 0; j < t->n_output_args; j++ ) 01507 printf( ";%lu[%d,%d]", t->output_arg[j].base->id, 01508 t->output_arg[j].base->m_index, 01509 t->output_arg[j].base->n_index ); 01510 01511 printf( "\n" ); 01512 01513 // Go to the next task. 01514 t = t->next_task; 01515 } 01516 01517 return; 01518 }
FLASH_Task* FLASH_Queue_wait_dequeue | ( | int | queue, | |
int | thread, | |||
void * | arg | |||
) |
References FLASH_Queue_s::head, FLASH_Queue_s::n_tasks, FLASH_Task_s::next_wait, FLASH_Task_s::prev_wait, FLASH_Queue_s::tail, FLASH_Task_s::thread, and FLASH_Queue_variables::wait_queue.
Referenced by FLASH_Queue_exec_parallel_function(), and FLASH_Queue_exec_simulation().
00384 { 00385 FLASH_Queue_vars* args = ( FLASH_Queue_vars* ) arg; 00386 FLASH_Task* t = NULL; 00387 00388 if ( args->wait_queue[queue].n_tasks > 0 ) 00389 { 00390 // Dequeue the first task. 00391 t = args->wait_queue[queue].head; 00392 00393 if ( args->wait_queue[queue].n_tasks == 1 ) 00394 { 00395 // Clear the queue of its only task. 00396 args->wait_queue[queue].head = NULL; 00397 args->wait_queue[queue].tail = NULL; 00398 } 00399 else 00400 { 00401 // Adjust pointers in waiting queue. 00402 args->wait_queue[queue].head = t->next_wait; 00403 args->wait_queue[queue].head->prev_wait = NULL; 00404 } 00405 00406 // Save the executing thread. 00407 t->thread = thread; 00408 00409 // Clear the task's waiting linked list pointers. 00410 t->prev_wait = NULL; 00411 t->next_wait = NULL; 00412 00413 // Decrement number of tasks on waiting queue. 00414 args->wait_queue[queue].n_tasks--; 00415 } 00416 00417 return t; 00418 }
void FLASH_Queue_wait_enqueue | ( | FLASH_Task * | t, | |
void * | arg | |||
) |
References FLASH_Queue_get_sorting(), FLASH_Queue_s::head, FLASH_Task_s::height, FLASH_Queue_s::n_tasks, FLASH_Task_s::next_wait, FLASH_Task_s::prev_wait, FLASH_Task_s::queue, FLASH_Queue_s::tail, and FLASH_Queue_variables::wait_queue.
Referenced by FLASH_Queue_exec_simulation(), FLASH_Queue_init_tasks(), and FLASH_Task_update_dependencies().
00332 { 00333 FLASH_Queue_vars* args = ( FLASH_Queue_vars* ) arg; 00334 int queue = t->queue; 00335 00336 if ( args->wait_queue[queue].n_tasks == 0 ) 00337 { 00338 args->wait_queue[queue].head = t; 00339 args->wait_queue[queue].tail = t; 00340 } 00341 else 00342 { 00343 t->prev_wait = args->wait_queue[queue].tail; 00344 00345 // Insertion sort of tasks in waiting queue. 00346 if ( FLASH_Queue_get_sorting() ) 00347 { 00348 while ( t->prev_wait != NULL ) 00349 { 00350 if ( t->prev_wait->height >= t->height ) 00351 break; 00352 00353 t->next_wait = t->prev_wait; 00354 t->prev_wait = t->prev_wait->prev_wait; 00355 } 00356 } 00357 00358 // Checking if the task is the head of the waiting queue. 00359 if ( t->prev_wait == NULL ) 00360 args->wait_queue[queue].head = t; 00361 else 00362 t->prev_wait->next_wait = t; 00363 00364 // Checking if the task is the tail of the waiting queue. 00365 if ( t->next_wait == NULL ) 00366 args->wait_queue[queue].tail = t; 00367 else 00368 t->next_wait->prev_wait = t; 00369 } 00370 00371 // Increment number of tasks on waiting queue. 00372 args->wait_queue[queue].n_tasks++; 00373 00374 return; 00375 }
FLASH_Task* FLASH_Task_alloc | ( | void * | func, | |
void * | cntl, | |||
char * | name, | |||
int | n_int_args, | |||
int | n_fla_args, | |||
int | n_input_args, | |||
int | n_output_args | |||
) |
References FLASH_Task_s::cache, FLASH_Task_s::cntl, FLASH_Task_s::dep_arg_head, FLASH_Task_s::dep_arg_tail, FLASH_Task_s::fla_arg, FLA_malloc(), FLASH_Task_s::func, FLASH_Task_s::height, FLASH_Task_s::hit, FLASH_Task_s::input_arg, FLASH_Task_s::int_arg, FLASH_Task_s::n_dep_args, FLASH_Task_s::n_fla_args, FLASH_Task_s::n_input_args, FLASH_Task_s::n_int_args, FLASH_Task_s::n_output_args, FLASH_Task_s::n_ready, FLASH_Task_s::n_war_args, FLASH_Task_s::name, FLASH_Task_s::next_task, FLASH_Task_s::next_wait, FLASH_Task_s::order, FLASH_Task_s::output_arg, FLASH_Task_s::prev_task, FLASH_Task_s::prev_wait, FLASH_Task_s::queue, and FLASH_Task_s::thread.
Referenced by FLASH_Queue_push().
00871 { 00872 FLASH_Task* t; 00873 00874 // Allocate space for the task structure t. 00875 t = (FLASH_Task *) FLA_malloc( sizeof(FLASH_Task) ); 00876 00877 // Allocate space for the task's integer arguments. 00878 t->int_arg = (int *) FLA_malloc( n_int_args * sizeof(int) ); 00879 00880 // Allocate space for the task's FLA_Obj arguments. 00881 t->fla_arg = (FLA_Obj *) FLA_malloc( n_fla_args * sizeof(FLA_Obj) ); 00882 00883 // Allocate space for the task's input FLA_Obj arguments. 00884 t->input_arg = (FLA_Obj *) FLA_malloc( n_input_args * sizeof(FLA_Obj) ); 00885 00886 // Allocate space for the task's output FLA_Obj arguments. 00887 t->output_arg = (FLA_Obj *) FLA_malloc( n_output_args * sizeof(FLA_Obj) ); 00888 00889 // Initialize other fields of the structure. 00890 t->n_ready = 0; 00891 t->order = 0; 00892 t->queue = 0; 00893 t->height = 0; 00894 t->thread = 0; 00895 t->cache = 0; 00896 t->hit = FALSE; 00897 00898 t->func = func; 00899 t->cntl = cntl; 00900 t->name = name; 00901 t->n_int_args = n_int_args; 00902 t->n_fla_args = n_fla_args; 00903 t->n_input_args = n_input_args; 00904 t->n_output_args = n_output_args; 00905 00906 t->n_war_args = 0; 00907 t->n_dep_args = 0; 00908 t->dep_arg_head = NULL; 00909 t->dep_arg_tail = NULL; 00910 t->prev_task = NULL; 00911 t->next_task = NULL; 00912 t->prev_wait = NULL; 00913 t->next_wait = NULL; 00914 00915 // Return a pointer to the initialized structure. 00916 return t; 00917 }
void FLASH_Task_free | ( | FLASH_Task * | t | ) |
References FLA_Obj_view::base, FLASH_Task_s::dep_arg_head, FLASH_Task_s::fla_arg, FLA_free(), FLA_Obj_free_task(), FLASH_Task_s::func, FLASH_Task_s::input_arg, FLASH_Task_s::int_arg, FLASH_Task_s::n_dep_args, FLASH_Task_s::n_input_args, FLASH_Task_s::n_output_args, FLA_Obj_struct::n_read_tasks, FLASH_Dep_s::next_dep, FLASH_Task_s::output_arg, FLA_Obj_struct::read_task_head, FLA_Obj_struct::read_task_tail, and FLA_Obj_struct::write_task.
Referenced by FLASH_Queue_exec(), and FLASH_Queue_exec_simulation().
00926 { 00927 int i, j, k; 00928 FLASH_Dep* d; 00929 FLASH_Dep* next_dep; 00930 00931 // Do not clear if the block has been free'd. 00932 if ( t->func != (void *) FLA_Obj_free_task ) 00933 { 00934 // Clearing the last write task in each output block. 00935 for ( i = 0; i < t->n_output_args; i++ ) 00936 t->output_arg[i].base->write_task = NULL; 00937 } 00938 00939 // Cleaning the last read tasks in each input block. 00940 for ( i = 0; i < t->n_input_args; i++ ) 00941 { 00942 k = t->input_arg[i].base->n_read_tasks; 00943 d = t->input_arg[i].base->read_task_head; 00944 00945 t->input_arg[i].base->n_read_tasks = 0; 00946 t->input_arg[i].base->read_task_head = NULL; 00947 t->input_arg[i].base->read_task_tail = NULL; 00948 00949 for ( j = 0; j < k; j++ ) 00950 { 00951 next_dep = d->next_dep; 00952 FLA_free( d ); 00953 d = next_dep; 00954 } 00955 } 00956 00957 // Free the dep_arg field of t. 00958 d = t->dep_arg_head; 00959 00960 for ( i = 0; i < t->n_dep_args; i++ ) 00961 { 00962 next_dep = d->next_dep; 00963 FLA_free( d ); 00964 d = next_dep; 00965 } 00966 00967 // Free the int_arg field of t. 00968 FLA_free( t->int_arg ); 00969 00970 // Free the fla_arg field of t. 00971 FLA_free( t->fla_arg ); 00972 00973 // Free the input_arg field of t. 00974 FLA_free( t->input_arg ); 00975 00976 // Free the output_arg field of t. 00977 FLA_free( t->output_arg ); 00978 00979 // Finally, free the struct itself. 00980 FLA_free( t ); 00981 00982 return; 00983 }
void FLASH_Task_free_parallel | ( | FLASH_Task * | t, | |
void * | arg | |||
) |
References FLA_Obj_view::base, FLASH_Task_s::dep_arg_head, FLASH_Task_s::fla_arg, FLA_free(), FLA_Lock_acquire(), FLA_Lock_release(), FLA_Obj_free_task(), FLASH_Queue_get_num_threads(), FLASH_Task_s::func, FLASH_Task_s::input_arg, FLASH_Task_s::int_arg, FLASH_Task_s::n_dep_args, FLASH_Task_s::n_input_args, FLASH_Task_s::n_output_args, FLA_Obj_struct::n_read_blocks, FLA_Obj_struct::n_read_tasks, FLASH_Dep_s::next_dep, FLASH_Task_s::output_arg, FLA_Obj_struct::read_task_head, FLA_Obj_struct::read_task_tail, FLASH_Queue_variables::war_lock, and FLA_Obj_struct::write_task.
Referenced by FLASH_Queue_exec_parallel_function().
00688 { 00689 FLASH_Queue_vars* args = ( FLASH_Queue_vars* ) arg; 00690 int i, j, k; 00691 int thread; 00692 int n_threads = FLASH_Queue_get_num_threads(); 00693 FLASH_Dep* d; 00694 FLASH_Dep* next_dep; 00695 00696 // Do not clear if the block has been free'd. 00697 if ( t->func != (void *) FLA_Obj_free_task ) 00698 { 00699 // Clearing the last write task in each output block. 00700 for ( i = 0; i < t->n_output_args; i++ ) 00701 t->output_arg[i].base->write_task = NULL; 00702 } 00703 00704 // Cleaning the last read tasks in each input block. 00705 for ( i = 0; i < t->n_input_args; i++ ) 00706 { 00707 thread = t->input_arg[i].base->n_read_blocks % n_threads; 00708 00709 FLA_Lock_acquire( &(args->war_lock[thread]) ); // W *** 00710 00711 k = t->input_arg[i].base->n_read_tasks; 00712 d = t->input_arg[i].base->read_task_head; 00713 00714 t->input_arg[i].base->n_read_tasks = 0; 00715 t->input_arg[i].base->read_task_head = NULL; 00716 t->input_arg[i].base->read_task_tail = NULL; 00717 00718 FLA_Lock_release( &(args->war_lock[thread]) ); // W *** 00719 00720 for ( j = 0; j < k; j++ ) 00721 { 00722 next_dep = d->next_dep; 00723 FLA_free( d ); 00724 d = next_dep; 00725 } 00726 } 00727 00728 // Free the dep_arg field of t. 00729 d = t->dep_arg_head; 00730 00731 for ( i = 0; i < t->n_dep_args; i++ ) 00732 { 00733 next_dep = d->next_dep; 00734 FLA_free( d ); 00735 d = next_dep; 00736 } 00737 00738 // Free the int_arg field of t. 00739 FLA_free( t->int_arg ); 00740 00741 // Free the fla_arg field of t. 00742 FLA_free( t->fla_arg ); 00743 00744 // Free the input_arg field of t. 00745 FLA_free( t->input_arg ); 00746 00747 // Free the output_arg field of t. 00748 FLA_free( t->output_arg ); 00749 00750 // Finally, free the struct itself. 00751 FLA_free( t ); 00752 00753 return; 00754 }
FLASH_Task* FLASH_Task_update_dependencies | ( | FLASH_Task * | t, | |
void * | arg | |||
) |
References FLASH_Task_s::dep_arg_head, FLASH_Queue_variables::dep_lock, FLA_Lock_acquire(), FLA_Lock_release(), FLASH_Queue_get_num_threads(), FLASH_Queue_wait_enqueue(), FLASH_Task_s::n_dep_args, FLASH_Task_s::n_ready, FLASH_Dep_s::next_dep, FLASH_Task_s::order, FLASH_Task_s::queue, FLASH_Queue_variables::run_lock, and FLASH_Dep_s::task.
Referenced by FLASH_Queue_exec_parallel_function().
00640 { 00641 FLASH_Queue_vars* args = ( FLASH_Queue_vars* ) arg; 00642 int i, queue, thread; 00643 int n_threads = FLASH_Queue_get_num_threads(); 00644 FLA_Bool available; 00645 FLASH_Task* task; 00646 FLASH_Dep* d; 00647 00648 // Check each dependent task. 00649 d = t->dep_arg_head; 00650 00651 for ( i = 0; i < t->n_dep_args; i++ ) 00652 { 00653 task = d->task; 00654 queue = task->queue; 00655 thread = task->order % n_threads; 00656 00657 FLA_Lock_acquire( &(args->dep_lock[thread]) ); // D *** 00658 00659 task->n_ready--; 00660 available = ( task->n_ready == 0 ); 00661 00662 FLA_Lock_release( &(args->dep_lock[thread]) ); // D *** 00663 00664 // Place newly ready tasks on sorted queue. 00665 if ( available ) 00666 { 00667 FLA_Lock_acquire( &(args->run_lock[queue]) ); // R *** 00668 00669 FLASH_Queue_wait_enqueue( task, arg ); 00670 00671 FLA_Lock_release( &(args->run_lock[queue]) ); // R *** 00672 } 00673 00674 // Go to the next dep. 00675 d = d->next_dep; 00676 } 00677 00678 return NULL; 00679 }