LLVM OpenMP* Runtime Library
kmp_tasking.c
1 /*
2  * kmp_tasking.c -- OpenMP 3.0 tasking support.
3  */
4 
5 
6 //===----------------------------------------------------------------------===//
7 //
8 // The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 
16 #include "kmp.h"
17 #include "kmp_i18n.h"
18 #include "kmp_itt.h"
19 #include "kmp_wait_release.h"
20 
21 #if OMPT_SUPPORT
22 #include "ompt-specific.h"
23 #endif
24 
25 
26 /* ------------------------------------------------------------------------ */
27 /* ------------------------------------------------------------------------ */
28 
29 
30 /* forward declaration */
31 static void __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr );
32 static void __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data );
33 static int __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team );
34 
35 #ifdef OMP_41_ENABLED
36 static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask );
37 #endif
38 
39 static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
40  switch (((kmp_flag_64 *)flag)->get_type()) {
41  case flag32: __kmp_resume_32(gtid, NULL); break;
42  case flag64: __kmp_resume_64(gtid, NULL); break;
43  case flag_oncore: __kmp_resume_oncore(gtid, NULL); break;
44  }
45 }
46 
47 #ifdef BUILD_TIED_TASK_STACK
48 
49 //---------------------------------------------------------------------------
50 // __kmp_trace_task_stack: print the tied tasks from the task stack in order
51 // from top do bottom
52 //
53 // gtid: global thread identifier for thread containing stack
54 // thread_data: thread data for task team thread containing stack
55 // threshold: value above which the trace statement triggers
56 // location: string identifying call site of this function (for trace)
57 
58 static void
59 __kmp_trace_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data, int threshold, char *location )
60 {
61  kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
62  kmp_taskdata_t **stack_top = task_stack -> ts_top;
63  kmp_int32 entries = task_stack -> ts_entries;
64  kmp_taskdata_t *tied_task;
65 
66  KA_TRACE(threshold, ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
67  "first_block = %p, stack_top = %p \n",
68  location, gtid, entries, task_stack->ts_first_block, stack_top ) );
69 
70  KMP_DEBUG_ASSERT( stack_top != NULL );
71  KMP_DEBUG_ASSERT( entries > 0 );
72 
73  while ( entries != 0 )
74  {
75  KMP_DEBUG_ASSERT( stack_top != & task_stack->ts_first_block.sb_block[0] );
76  // fix up ts_top if we need to pop from previous block
77  if ( entries & TASK_STACK_INDEX_MASK == 0 )
78  {
79  kmp_stack_block_t *stack_block = (kmp_stack_block_t *) (stack_top) ;
80 
81  stack_block = stack_block -> sb_prev;
82  stack_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
83  }
84 
85  // finish bookkeeping
86  stack_top--;
87  entries--;
88 
89  tied_task = * stack_top;
90 
91  KMP_DEBUG_ASSERT( tied_task != NULL );
92  KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
93 
94  KA_TRACE(threshold, ("__kmp_trace_task_stack(%s): gtid=%d, entry=%d, "
95  "stack_top=%p, tied_task=%p\n",
96  location, gtid, entries, stack_top, tied_task ) );
97  }
98  KMP_DEBUG_ASSERT( stack_top == & task_stack->ts_first_block.sb_block[0] );
99 
100  KA_TRACE(threshold, ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
101  location, gtid ) );
102 }
103 
104 //---------------------------------------------------------------------------
105 // __kmp_init_task_stack: initialize the task stack for the first time
106 // after a thread_data structure is created.
107 // It should not be necessary to do this again (assuming the stack works).
108 //
109 // gtid: global thread identifier of calling thread
110 // thread_data: thread data for task team thread containing stack
111 
112 static void
113 __kmp_init_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
114 {
115  kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
116  kmp_stack_block_t *first_block;
117 
118  // set up the first block of the stack
119  first_block = & task_stack -> ts_first_block;
120  task_stack -> ts_top = (kmp_taskdata_t **) first_block;
121  memset( (void *) first_block, '\0', TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *));
122 
123  // initialize the stack to be empty
124  task_stack -> ts_entries = TASK_STACK_EMPTY;
125  first_block -> sb_next = NULL;
126  first_block -> sb_prev = NULL;
127 }
128 
129 
130 //---------------------------------------------------------------------------
131 // __kmp_free_task_stack: free the task stack when thread_data is destroyed.
132 //
133 // gtid: global thread identifier for calling thread
134 // thread_data: thread info for thread containing stack
135 
136 static void
137 __kmp_free_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
138 {
139  kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
140  kmp_stack_block_t *stack_block = & task_stack -> ts_first_block;
141 
142  KMP_DEBUG_ASSERT( task_stack -> ts_entries == TASK_STACK_EMPTY );
143  // free from the second block of the stack
144  while ( stack_block != NULL ) {
145  kmp_stack_block_t *next_block = (stack_block) ? stack_block -> sb_next : NULL;
146 
147  stack_block -> sb_next = NULL;
148  stack_block -> sb_prev = NULL;
149  if (stack_block != & task_stack -> ts_first_block) {
150  __kmp_thread_free( thread, stack_block ); // free the block, if not the first
151  }
152  stack_block = next_block;
153  }
154  // initialize the stack to be empty
155  task_stack -> ts_entries = 0;
156  task_stack -> ts_top = NULL;
157 }
158 
159 
160 //---------------------------------------------------------------------------
161 // __kmp_push_task_stack: Push the tied task onto the task stack.
162 // Grow the stack if necessary by allocating another block.
163 //
164 // gtid: global thread identifier for calling thread
165 // thread: thread info for thread containing stack
166 // tied_task: the task to push on the stack
167 
168 static void
169 __kmp_push_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t * tied_task )
170 {
171  // GEH - need to consider what to do if tt_threads_data not allocated yet
172  kmp_thread_data_t *thread_data = & thread -> th.th_task_team ->
173  tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
174  kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
175 
176  if ( tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser ) {
177  return; // Don't push anything on stack if team or team tasks are serialized
178  }
179 
180  KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
181  KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
182 
183  KA_TRACE(20, ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
184  gtid, thread, tied_task ) );
185  // Store entry
186  * (task_stack -> ts_top) = tied_task;
187 
188  // Do bookkeeping for next push
189  task_stack -> ts_top++;
190  task_stack -> ts_entries++;
191 
192  if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
193  {
194  // Find beginning of this task block
195  kmp_stack_block_t *stack_block =
196  (kmp_stack_block_t *) (task_stack -> ts_top - TASK_STACK_BLOCK_SIZE);
197 
198  // Check if we already have a block
199  if ( stack_block -> sb_next != NULL )
200  { // reset ts_top to beginning of next block
201  task_stack -> ts_top = & stack_block -> sb_next -> sb_block[0];
202  }
203  else
204  { // Alloc new block and link it up
205  kmp_stack_block_t *new_block = (kmp_stack_block_t *)
206  __kmp_thread_calloc(thread, sizeof(kmp_stack_block_t));
207 
208  task_stack -> ts_top = & new_block -> sb_block[0];
209  stack_block -> sb_next = new_block;
210  new_block -> sb_prev = stack_block;
211  new_block -> sb_next = NULL;
212 
213  KA_TRACE(30, ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
214  gtid, tied_task, new_block ) );
215  }
216  }
217  KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
218 }
219 
220 //---------------------------------------------------------------------------
221 // __kmp_pop_task_stack: Pop the tied task from the task stack. Don't return
222 // the task, just check to make sure it matches the ending task passed in.
223 //
224 // gtid: global thread identifier for the calling thread
225 // thread: thread info structure containing stack
226 // tied_task: the task popped off the stack
227 // ending_task: the task that is ending (should match popped task)
228 
229 static void
230 __kmp_pop_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t *ending_task )
231 {
232  // GEH - need to consider what to do if tt_threads_data not allocated yet
233  kmp_thread_data_t *thread_data = & thread -> th.th_task_team -> tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
234  kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
235  kmp_taskdata_t *tied_task;
236 
237  if ( ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser ) {
238  return; // Don't pop anything from stack if team or team tasks are serialized
239  }
240 
241  KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
242  KMP_DEBUG_ASSERT( task_stack -> ts_entries > 0 );
243 
244  KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid, thread ) );
245 
246  // fix up ts_top if we need to pop from previous block
247  if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
248  {
249  kmp_stack_block_t *stack_block =
250  (kmp_stack_block_t *) (task_stack -> ts_top) ;
251 
252  stack_block = stack_block -> sb_prev;
253  task_stack -> ts_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
254  }
255 
256  // finish bookkeeping
257  task_stack -> ts_top--;
258  task_stack -> ts_entries--;
259 
260  tied_task = * (task_stack -> ts_top );
261 
262  KMP_DEBUG_ASSERT( tied_task != NULL );
263  KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
264  KMP_DEBUG_ASSERT( tied_task == ending_task ); // If we built the stack correctly
265 
266  KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
267  return;
268 }
269 #endif /* BUILD_TIED_TASK_STACK */
270 
271 //---------------------------------------------------
272 // __kmp_push_task: Add a task to the thread's deque
273 
274 static kmp_int32
275 __kmp_push_task(kmp_int32 gtid, kmp_task_t * task )
276 {
277  kmp_info_t * thread = __kmp_threads[ gtid ];
278  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
279  kmp_task_team_t * task_team = thread->th.th_task_team;
280  kmp_int32 tid = __kmp_tid_from_gtid( gtid );
281  kmp_thread_data_t * thread_data;
282 
283  KA_TRACE(20, ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata ) );
284 
285  // The first check avoids building task_team thread data if serialized
286  if ( taskdata->td_flags.task_serial ) {
287  KA_TRACE(20, ( "__kmp_push_task: T#%d team serialized; returning TASK_NOT_PUSHED for task %p\n",
288  gtid, taskdata ) );
289  return TASK_NOT_PUSHED;
290  }
291 
292  // Now that serialized tasks have returned, we can assume that we are not in immediate exec mode
293  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
294  if ( ! KMP_TASKING_ENABLED(task_team) ) {
295  __kmp_enable_tasking( task_team, thread );
296  }
297  KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_found_tasks) == TRUE );
298  KMP_DEBUG_ASSERT( TCR_PTR(task_team -> tt.tt_threads_data) != NULL );
299 
300  // Find tasking deque specific to encountering thread
301  thread_data = & task_team -> tt.tt_threads_data[ tid ];
302 
303  // No lock needed since only owner can allocate
304  if (thread_data -> td.td_deque == NULL ) {
305  __kmp_alloc_task_deque( thread, thread_data );
306  }
307 
308  // Check if deque is full
309  if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
310  {
311  KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full; returning TASK_NOT_PUSHED for task %p\n",
312  gtid, taskdata ) );
313  return TASK_NOT_PUSHED;
314  }
315 
316  // Lock the deque for the task push operation
317  __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
318 
319 #if OMP_41_ENABLED
320  // Need to recheck as we can get a proxy task from a thread outside of OpenMP
321  if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
322  {
323  __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
324  KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full on 2nd check; returning TASK_NOT_PUSHED for task %p\n",
325  gtid, taskdata ) );
326  return TASK_NOT_PUSHED;
327  }
328 #else
329  // Must have room since no thread can add tasks but calling thread
330  KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) < TASK_DEQUE_SIZE );
331 #endif
332 
333  thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata; // Push taskdata
334  // Wrap index.
335  thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK;
336  TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1); // Adjust task count
337 
338  __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
339 
340  KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
341  "task=%p ntasks=%d head=%u tail=%u\n",
342  gtid, taskdata, thread_data->td.td_deque_ntasks,
343  thread_data->td.td_deque_tail, thread_data->td.td_deque_head) );
344 
345  return TASK_SUCCESSFULLY_PUSHED;
346 }
347 
348 
349 //-----------------------------------------------------------------------------------------
350 // __kmp_pop_current_task_from_thread: set up current task from called thread when team ends
351 // this_thr: thread structure to set current_task in.
352 
353 void
354 __kmp_pop_current_task_from_thread( kmp_info_t *this_thr )
355 {
356  KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(enter): T#%d this_thread=%p, curtask=%p, "
357  "curtask_parent=%p\n",
358  0, this_thr, this_thr -> th.th_current_task,
359  this_thr -> th.th_current_task -> td_parent ) );
360 
361  this_thr -> th.th_current_task = this_thr -> th.th_current_task -> td_parent;
362 
363  KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(exit): T#%d this_thread=%p, curtask=%p, "
364  "curtask_parent=%p\n",
365  0, this_thr, this_thr -> th.th_current_task,
366  this_thr -> th.th_current_task -> td_parent ) );
367 }
368 
369 
370 //---------------------------------------------------------------------------------------
371 // __kmp_push_current_task_to_thread: set up current task in called thread for a new team
372 // this_thr: thread structure to set up
373 // team: team for implicit task data
374 // tid: thread within team to set up
375 
376 void
377 __kmp_push_current_task_to_thread( kmp_info_t *this_thr, kmp_team_t *team, int tid )
378 {
379  // current task of the thread is a parent of the new just created implicit tasks of new team
380  KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p curtask=%p "
381  "parent_task=%p\n",
382  tid, this_thr, this_thr->th.th_current_task,
383  team->t.t_implicit_task_taskdata[tid].td_parent ) );
384 
385  KMP_DEBUG_ASSERT (this_thr != NULL);
386 
387  if( tid == 0 ) {
388  if( this_thr->th.th_current_task != & team -> t.t_implicit_task_taskdata[ 0 ] ) {
389  team -> t.t_implicit_task_taskdata[ 0 ].td_parent = this_thr->th.th_current_task;
390  this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ 0 ];
391  }
392  } else {
393  team -> t.t_implicit_task_taskdata[ tid ].td_parent = team -> t.t_implicit_task_taskdata[ 0 ].td_parent;
394  this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ tid ];
395  }
396 
397  KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p curtask=%p "
398  "parent_task=%p\n",
399  tid, this_thr, this_thr->th.th_current_task,
400  team->t.t_implicit_task_taskdata[tid].td_parent ) );
401 }
402 
403 
404 //----------------------------------------------------------------------
405 // __kmp_task_start: bookkeeping for a task starting execution
406 // GTID: global thread id of calling thread
407 // task: task starting execution
408 // current_task: task suspending
409 
410 static void
411 __kmp_task_start( kmp_int32 gtid, kmp_task_t * task, kmp_taskdata_t * current_task )
412 {
413  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
414  kmp_info_t * thread = __kmp_threads[ gtid ];
415 
416  KA_TRACE(10, ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
417  gtid, taskdata, current_task) );
418 
419  KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
420 
421  // mark currently executing task as suspended
422  // TODO: GEH - make sure root team implicit task is initialized properly.
423  // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 );
424  current_task -> td_flags.executing = 0;
425 
426  // Add task to stack if tied
427 #ifdef BUILD_TIED_TASK_STACK
428  if ( taskdata -> td_flags.tiedness == TASK_TIED )
429  {
430  __kmp_push_task_stack( gtid, thread, taskdata );
431  }
432 #endif /* BUILD_TIED_TASK_STACK */
433 
434  // mark starting task as executing and as current task
435  thread -> th.th_current_task = taskdata;
436 
437  KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 0 );
438  KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 0 );
439  taskdata -> td_flags.started = 1;
440  taskdata -> td_flags.executing = 1;
441  KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
442  KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
443 
444  // GEH TODO: shouldn't we pass some sort of location identifier here?
445  // APT: yes, we will pass location here.
446  // need to store current thread state (in a thread or taskdata structure)
447  // before setting work_state, otherwise wrong state is set after end of task
448 
449  KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n",
450  gtid, taskdata ) );
451 
452 #if OMPT_SUPPORT
453  if ((ompt_status == ompt_status_track_callback) &&
454  ompt_callbacks.ompt_callback(ompt_event_task_begin)) {
455  kmp_taskdata_t *parent = taskdata->td_parent;
456  ompt_callbacks.ompt_callback(ompt_event_task_begin)(
457  parent ? parent->ompt_task_info.task_id : ompt_task_id_none,
458  parent ? &(parent->ompt_task_info.frame) : NULL,
459  taskdata->ompt_task_info.task_id,
460  taskdata->ompt_task_info.function);
461  }
462 #endif
463 
464  return;
465 }
466 
467 
468 //----------------------------------------------------------------------
469 // __kmpc_omp_task_begin_if0: report that a given serialized task has started execution
470 // loc_ref: source location information; points to beginning of task block.
471 // gtid: global thread number.
472 // task: task thunk for the started task.
473 
474 void
475 __kmpc_omp_task_begin_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
476 {
477  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
478  kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
479 
480  KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p current_task=%p\n",
481  gtid, loc_ref, taskdata, current_task ) );
482 
483  taskdata -> td_flags.task_serial = 1; // Execute this task immediately, not deferred.
484  __kmp_task_start( gtid, task, current_task );
485 
486  KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n",
487  gtid, loc_ref, taskdata ) );
488 
489  return;
490 }
491 
492 #ifdef TASK_UNUSED
493 //----------------------------------------------------------------------
494 // __kmpc_omp_task_begin: report that a given task has started execution
495 // NEVER GENERATED BY COMPILER, DEPRECATED!!!
496 
497 void
498 __kmpc_omp_task_begin( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
499 {
500  kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
501 
502  KA_TRACE(10, ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
503  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task ) );
504 
505  __kmp_task_start( gtid, task, current_task );
506 
507  KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n",
508  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
509 
510  return;
511 }
512 #endif // TASK_UNUSED
513 
514 
515 //-------------------------------------------------------------------------------------
516 // __kmp_free_task: free the current task space and the space for shareds
517 // gtid: Global thread ID of calling thread
518 // taskdata: task to free
519 // thread: thread data structure of caller
520 
521 static void
522 __kmp_free_task( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
523 {
524  KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n",
525  gtid, taskdata) );
526 
527  // Check to make sure all flags and counters have the correct values
528  KMP_DEBUG_ASSERT( taskdata->td_flags.tasktype == TASK_EXPLICIT );
529  KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 );
530  KMP_DEBUG_ASSERT( taskdata->td_flags.complete == 1 );
531  KMP_DEBUG_ASSERT( taskdata->td_flags.freed == 0 );
532  KMP_DEBUG_ASSERT( TCR_4(taskdata->td_allocated_child_tasks) == 0 || taskdata->td_flags.task_serial == 1);
533  KMP_DEBUG_ASSERT( TCR_4(taskdata->td_incomplete_child_tasks) == 0 );
534 
535  taskdata->td_flags.freed = 1;
536  // deallocate the taskdata and shared variable blocks associated with this task
537  #if USE_FAST_MEMORY
538  __kmp_fast_free( thread, taskdata );
539  #else /* ! USE_FAST_MEMORY */
540  __kmp_thread_free( thread, taskdata );
541  #endif
542 
543  KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n",
544  gtid, taskdata) );
545 }
546 
547 //-------------------------------------------------------------------------------------
548 // __kmp_free_task_and_ancestors: free the current task and ancestors without children
549 //
550 // gtid: Global thread ID of calling thread
551 // taskdata: task to free
552 // thread: thread data structure of caller
553 
554 static void
555 __kmp_free_task_and_ancestors( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
556 {
557  kmp_int32 children = 0;
558  kmp_int32 team_or_tasking_serialized = taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser;
559 
560  KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
561 
562  if ( !team_or_tasking_serialized ) {
563  children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
564  KMP_DEBUG_ASSERT( children >= 0 );
565  }
566 
567  // Now, go up the ancestor tree to see if any ancestors can now be freed.
568  while ( children == 0 )
569  {
570  kmp_taskdata_t * parent_taskdata = taskdata -> td_parent;
571 
572  KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
573  "and freeing itself\n", gtid, taskdata) );
574 
575  // --- Deallocate my ancestor task ---
576  __kmp_free_task( gtid, taskdata, thread );
577 
578  taskdata = parent_taskdata;
579 
580  // Stop checking ancestors at implicit task or if tasking serialized
581  // instead of walking up ancestor tree to avoid premature deallocation of ancestors.
582  if ( team_or_tasking_serialized || taskdata -> td_flags.tasktype == TASK_IMPLICIT )
583  return;
584 
585  if ( !team_or_tasking_serialized ) {
586  // Predecrement simulated by "- 1" calculation
587  children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
588  KMP_DEBUG_ASSERT( children >= 0 );
589  }
590  }
591 
592  KA_TRACE(20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
593  "not freeing it yet\n", gtid, taskdata, children) );
594 }
595 
596 //---------------------------------------------------------------------
597 // __kmp_task_finish: bookkeeping to do when a task finishes execution
598 // gtid: global thread ID for calling thread
599 // task: task to be finished
600 // resumed_task: task to be resumed. (may be NULL if task is serialized)
601 
602 static void
603 __kmp_task_finish( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_task )
604 {
605  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
606  kmp_info_t * thread = __kmp_threads[ gtid ];
607  kmp_int32 children = 0;
608 
609 #if OMPT_SUPPORT
610  if ((ompt_status == ompt_status_track_callback) &&
611  ompt_callbacks.ompt_callback(ompt_event_task_end)) {
612  kmp_taskdata_t *parent = taskdata->td_parent;
613  ompt_callbacks.ompt_callback(ompt_event_task_end)(
614  taskdata->ompt_task_info.task_id);
615  }
616 #endif
617 
618  KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming task %p\n",
619  gtid, taskdata, resumed_task) );
620 
621  KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
622 
623  // Pop task from stack if tied
624 #ifdef BUILD_TIED_TASK_STACK
625  if ( taskdata -> td_flags.tiedness == TASK_TIED )
626  {
627  __kmp_pop_task_stack( gtid, thread, taskdata );
628  }
629 #endif /* BUILD_TIED_TASK_STACK */
630 
631  KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
632  taskdata -> td_flags.complete = 1; // mark the task as completed
633  KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 1 );
634  KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
635 
636  // Only need to keep track of count if team parallel and tasking not serialized
637  if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
638  // Predecrement simulated by "- 1" calculation
639  children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
640  KMP_DEBUG_ASSERT( children >= 0 );
641 #if OMP_40_ENABLED
642  if ( taskdata->td_taskgroup )
643  KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
644  __kmp_release_deps(gtid,taskdata);
645 #endif
646  }
647 
648  // td_flags.executing must be marked as 0 after __kmp_release_deps has been called
649  // Othertwise, if a task is executed immediately from the release_deps code
650  // the flag will be reset to 1 again by this same function
651  KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 1 );
652  taskdata -> td_flags.executing = 0; // suspend the finishing task
653 
654  KA_TRACE(20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
655  gtid, taskdata, children) );
656 
657 #if OMP_40_ENABLED
658  /* If the tasks' destructor thunk flag has been set, we need to invoke the
659  destructor thunk that has been generated by the compiler.
660  The code is placed here, since at this point other tasks might have been released
661  hence overlapping the destructor invokations with some other work in the
662  released tasks. The OpenMP spec is not specific on when the destructors are
663  invoked, so we should be free to choose.
664  */
665  if (taskdata->td_flags.destructors_thunk) {
666  kmp_routine_entry_t destr_thunk = task->destructors;
667  KMP_ASSERT(destr_thunk);
668  destr_thunk(gtid, task);
669  }
670 #endif // OMP_40_ENABLED
671 
672  // bookkeeping for resuming task:
673  // GEH - note tasking_ser => task_serial
674  KMP_DEBUG_ASSERT( (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
675  taskdata->td_flags.task_serial);
676  if ( taskdata->td_flags.task_serial )
677  {
678  if (resumed_task == NULL) {
679  resumed_task = taskdata->td_parent; // In a serialized task, the resumed task is the parent
680  }
681  else {
682  // verify resumed task passed in points to parent
683  KMP_DEBUG_ASSERT( resumed_task == taskdata->td_parent );
684  }
685  }
686  else {
687  KMP_DEBUG_ASSERT( resumed_task != NULL ); // verify that resumed task is passed as arguemnt
688  }
689 
690  // Free this task and then ancestor tasks if they have no children.
691  __kmp_free_task_and_ancestors(gtid, taskdata, thread);
692 
693  // FIXME johnmc: I this statement should be before the last one so if an
694  // asynchronous inquiry peers into the runtime system it doesn't see the freed
695  // task as the current task
696  __kmp_threads[ gtid ] -> th.th_current_task = resumed_task; // restore current_task
697 
698  // TODO: GEH - make sure root team implicit task is initialized properly.
699  // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 );
700  resumed_task->td_flags.executing = 1; // resume previous task
701 
702  KA_TRACE(10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
703  gtid, taskdata, resumed_task) );
704 
705  return;
706 }
707 
708 //---------------------------------------------------------------------
709 // __kmpc_omp_task_complete_if0: report that a task has completed execution
710 // loc_ref: source location information; points to end of task block.
711 // gtid: global thread number.
712 // task: task thunk for the completed task.
713 
714 void
715 __kmpc_omp_task_complete_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
716 {
717  KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
718  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
719 
720  __kmp_task_finish( gtid, task, NULL ); // this routine will provide task to resume
721 
722  KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
723  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
724 
725  return;
726 }
727 
728 #ifdef TASK_UNUSED
729 //---------------------------------------------------------------------
730 // __kmpc_omp_task_complete: report that a task has completed execution
731 // NEVER GENERATED BY COMPILER, DEPRECATED!!!
732 
733 void
734 __kmpc_omp_task_complete( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
735 {
736  KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n",
737  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
738 
739  __kmp_task_finish( gtid, task, NULL ); // Not sure how to find task to resume
740 
741  KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n",
742  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
743  return;
744 }
745 #endif // TASK_UNUSED
746 
747 
748 #if OMPT_SUPPORT
749 //----------------------------------------------------------------------------------------------------
750 // __kmp_task_init_ompt:
751 // Initialize OMPT fields maintained by a task. Since the serial task is initialized before
752 // ompt_initialize is called, at the point the serial task is initialized we don't know whether
753 // OMPT will be used or not when the serial task is initialized. This function provides the support
754 // needed to initialize OMPT for the serial task after the fact.
755 
756 void
757 __kmp_task_init_ompt( kmp_taskdata_t * task, int tid )
758 {
759  task->ompt_task_info.task_id = __ompt_task_id_new(tid);
760  task->ompt_task_info.function = NULL;
761  task->ompt_task_info.frame.exit_runtime_frame = NULL;
762  task->ompt_task_info.frame.reenter_runtime_frame = NULL;
763 }
764 #endif
765 
766 
767 //----------------------------------------------------------------------------------------------------
768 // __kmp_init_implicit_task: Initialize the appropriate fields in the implicit task for a given thread
769 //
770 // loc_ref: reference to source location of parallel region
771 // this_thr: thread data structure corresponding to implicit task
772 // team: team for this_thr
773 // tid: thread id of given thread within team
774 // set_curr_task: TRUE if need to push current task to thread
775 // NOTE: Routine does not set up the implicit task ICVS. This is assumed to have already been done elsewhere.
776 // TODO: Get better loc_ref. Value passed in may be NULL
777 
778 void
779 __kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *team, int tid, int set_curr_task )
780 {
781  kmp_taskdata_t * task = & team->t.t_implicit_task_taskdata[ tid ];
782 
783  KF_TRACE(10, ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
784  tid, team, task, set_curr_task ? "TRUE" : "FALSE" ) );
785 
786  task->td_task_id = KMP_GEN_TASK_ID();
787  task->td_team = team;
788 // task->td_parent = NULL; // fix for CQ230101 (broken parent task info in debugger)
789  task->td_ident = loc_ref;
790  task->td_taskwait_ident = NULL;
791  task->td_taskwait_counter = 0;
792  task->td_taskwait_thread = 0;
793 
794  task->td_flags.tiedness = TASK_TIED;
795  task->td_flags.tasktype = TASK_IMPLICIT;
796 #if OMP_41_ENABLED
797  task->td_flags.proxy = TASK_FULL;
798 #endif
799 
800  // All implicit tasks are executed immediately, not deferred
801  task->td_flags.task_serial = 1;
802  task->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
803  task->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
804 
805  task->td_flags.started = 1;
806  task->td_flags.executing = 1;
807  task->td_flags.complete = 0;
808  task->td_flags.freed = 0;
809 
810 #if OMP_40_ENABLED
811  task->td_dephash = NULL;
812  task->td_depnode = NULL;
813 #endif
814 
815  if (set_curr_task) { // only do this initialization the first time a thread is created
816  task->td_incomplete_child_tasks = 0;
817  task->td_allocated_child_tasks = 0; // Not used because do not need to deallocate implicit task
818 #if OMP_40_ENABLED
819  task->td_taskgroup = NULL; // An implicit task does not have taskgroup
820 #endif
821  __kmp_push_current_task_to_thread( this_thr, team, tid );
822  } else {
823  KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
824  KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
825  }
826 
827 #if OMPT_SUPPORT
828  __kmp_task_init_ompt(task, tid);
829 #endif
830 
831  KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n",
832  tid, team, task ) );
833 }
834 
835 // Round up a size to a power of two specified by val
836 // Used to insert padding between structures co-allocated using a single malloc() call
837 static size_t
838 __kmp_round_up_to_val( size_t size, size_t val ) {
839  if ( size & ( val - 1 ) ) {
840  size &= ~ ( val - 1 );
841  if ( size <= KMP_SIZE_T_MAX - val ) {
842  size += val; // Round up if there is no overflow.
843  }; // if
844  }; // if
845  return size;
846 } // __kmp_round_up_to_va
847 
848 
849 //---------------------------------------------------------------------------------
850 // __kmp_task_alloc: Allocate the taskdata and task data structures for a task
851 //
852 // loc_ref: source location information
853 // gtid: global thread number.
854 // flags: include tiedness & task type (explicit vs. implicit) of the ''new'' task encountered.
855 // Converted from kmp_int32 to kmp_tasking_flags_t in routine.
856 // sizeof_kmp_task_t: Size in bytes of kmp_task_t data structure including private vars accessed in task.
857 // sizeof_shareds: Size in bytes of array of pointers to shared vars accessed in task.
858 // task_entry: Pointer to task code entry point generated by compiler.
859 // returns: a pointer to the allocated kmp_task_t structure (task).
860 
861 kmp_task_t *
862 __kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags,
863  size_t sizeof_kmp_task_t, size_t sizeof_shareds,
864  kmp_routine_entry_t task_entry )
865 {
866  kmp_task_t *task;
867  kmp_taskdata_t *taskdata;
868  kmp_info_t *thread = __kmp_threads[ gtid ];
869  kmp_team_t *team = thread->th.th_team;
870  kmp_taskdata_t *parent_task = thread->th.th_current_task;
871  size_t shareds_offset;
872 
873  KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
874  "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
875  gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
876  sizeof_shareds, task_entry) );
877 
878  if ( parent_task->td_flags.final ) {
879  if (flags->merged_if0) {
880  }
881  flags->final = 1;
882  }
883 
884 #if OMP_41_ENABLED
885  if ( flags->proxy == TASK_PROXY ) {
886  flags->tiedness = TASK_UNTIED;
887  flags->merged_if0 = 1;
888 
889  /* are we running in a sequential parallel or tskm_immediate_exec... we need tasking support enabled */
890  if ( (thread->th.th_task_team) == NULL ) {
891  /* This should only happen if the team is serialized
892  setup a task team and propagate it to the thread
893  */
894  KMP_DEBUG_ASSERT(team->t.t_serialized);
895  KA_TRACE(30,("T#%d creating task team in __kmp_task_alloc for proxy task\n", gtid));
896  __kmp_task_team_setup(thread,team,0,1); // 0,1 indicates only setup the current team regardless of nthreads
897  thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
898  }
899  kmp_task_team_t * task_team = thread->th.th_task_team;
900 
901  /* tasking must be enabled now as the task might not be pushed */
902  if ( !KMP_TASKING_ENABLED( task_team ) ) {
903  KA_TRACE(30,("T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
904  __kmp_enable_tasking( task_team, thread );
905  kmp_int32 tid = thread->th.th_info.ds.ds_tid;
906  kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
907  // No lock needed since only owner can allocate
908  if (thread_data -> td.td_deque == NULL ) {
909  __kmp_alloc_task_deque( thread, thread_data );
910  }
911  }
912 
913  if ( task_team->tt.tt_found_proxy_tasks == FALSE )
914  TCW_4(task_team -> tt.tt_found_proxy_tasks, TRUE);
915  }
916 #endif
917 
918  // Calculate shared structure offset including padding after kmp_task_t struct
919  // to align pointers in shared struct
920  shareds_offset = sizeof( kmp_taskdata_t ) + sizeof_kmp_task_t;
921  shareds_offset = __kmp_round_up_to_val( shareds_offset, sizeof( void * ));
922 
923  // Allocate a kmp_taskdata_t block and a kmp_task_t block.
924  KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n",
925  gtid, shareds_offset) );
926  KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n",
927  gtid, sizeof_shareds) );
928 
929  // Avoid double allocation here by combining shareds with taskdata
930  #if USE_FAST_MEMORY
931  taskdata = (kmp_taskdata_t *) __kmp_fast_allocate( thread, shareds_offset + sizeof_shareds );
932  #else /* ! USE_FAST_MEMORY */
933  taskdata = (kmp_taskdata_t *) __kmp_thread_malloc( thread, shareds_offset + sizeof_shareds );
934  #endif /* USE_FAST_MEMORY */
935 
936  task = KMP_TASKDATA_TO_TASK(taskdata);
937 
938  // Make sure task & taskdata are aligned appropriately
939 #if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD
940  KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(double)-1) ) == 0 );
941  KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(double)-1) ) == 0 );
942 #else
943  KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(_Quad)-1) ) == 0 );
944  KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(_Quad)-1) ) == 0 );
945 #endif
946  if (sizeof_shareds > 0) {
947  // Avoid double allocation here by combining shareds with taskdata
948  task->shareds = & ((char *) taskdata)[ shareds_offset ];
949  // Make sure shareds struct is aligned to pointer size
950  KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task->shareds) & (sizeof(void *)-1) ) == 0 );
951  } else {
952  task->shareds = NULL;
953  }
954  task->routine = task_entry;
955  task->part_id = 0; // AC: Always start with 0 part id
956 
957  taskdata->td_task_id = KMP_GEN_TASK_ID();
958  taskdata->td_team = team;
959  taskdata->td_alloc_thread = thread;
960  taskdata->td_parent = parent_task;
961  taskdata->td_level = parent_task->td_level + 1; // increment nesting level
962  taskdata->td_ident = loc_ref;
963  taskdata->td_taskwait_ident = NULL;
964  taskdata->td_taskwait_counter = 0;
965  taskdata->td_taskwait_thread = 0;
966  KMP_DEBUG_ASSERT( taskdata->td_parent != NULL );
967 #if OMP_41_ENABLED
968  // avoid copying icvs for proxy tasks
969  if ( flags->proxy == TASK_FULL )
970 #endif
971  copy_icvs( &taskdata->td_icvs, &taskdata->td_parent->td_icvs );
972 
973  taskdata->td_flags.tiedness = flags->tiedness;
974  taskdata->td_flags.final = flags->final;
975  taskdata->td_flags.merged_if0 = flags->merged_if0;
976 #if OMP_40_ENABLED
977  taskdata->td_flags.destructors_thunk = flags->destructors_thunk;
978 #endif // OMP_40_ENABLED
979 #if OMP_41_ENABLED
980  taskdata->td_flags.proxy = flags->proxy;
981 #endif
982  taskdata->td_flags.tasktype = TASK_EXPLICIT;
983 
984  // GEH - TODO: fix this to copy parent task's value of tasking_ser flag
985  taskdata->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
986 
987  // GEH - TODO: fix this to copy parent task's value of team_serial flag
988  taskdata->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
989 
990  // GEH - Note we serialize the task if the team is serialized to make sure implicit parallel region
991  // tasks are not left until program termination to execute. Also, it helps locality to execute
992  // immediately.
993  taskdata->td_flags.task_serial = ( parent_task->td_flags.final
994  || taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser );
995 
996  taskdata->td_flags.started = 0;
997  taskdata->td_flags.executing = 0;
998  taskdata->td_flags.complete = 0;
999  taskdata->td_flags.freed = 0;
1000 
1001  taskdata->td_flags.native = flags->native;
1002 
1003  taskdata->td_incomplete_child_tasks = 0;
1004  taskdata->td_allocated_child_tasks = 1; // start at one because counts current task and children
1005 #if OMP_40_ENABLED
1006  taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task
1007  taskdata->td_dephash = NULL;
1008  taskdata->td_depnode = NULL;
1009 #endif
1010 
1011  // Only need to keep track of child task counts if team parallel and tasking not serialized or if it is a proxy task
1012 #if OMP_41_ENABLED
1013  if ( flags->proxy == TASK_PROXY || !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )
1014 #else
1015  if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )
1016 #endif
1017  {
1018  KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
1019 #if OMP_40_ENABLED
1020  if ( parent_task->td_taskgroup )
1021  KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
1022 #endif
1023  // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated
1024  if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT ) {
1025  KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
1026  }
1027  }
1028 
1029  KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
1030  gtid, taskdata, taskdata->td_parent) );
1031 
1032 #if OMPT_SUPPORT
1033  if (ompt_status & ompt_status_track) {
1034  taskdata->ompt_task_info.task_id = __ompt_task_id_new(gtid);
1035  taskdata->ompt_task_info.function = (void*) task_entry;
1036  taskdata->ompt_task_info.frame.exit_runtime_frame = NULL;
1037  taskdata->ompt_task_info.frame.reenter_runtime_frame = NULL;
1038  }
1039 #endif
1040 
1041  return task;
1042 }
1043 
1044 
1045 kmp_task_t *
1046 __kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
1047  size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1048  kmp_routine_entry_t task_entry )
1049 {
1050  kmp_task_t *retval;
1051  kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *) & flags;
1052 
1053  input_flags->native = FALSE;
1054  // __kmp_task_alloc() sets up all other runtime flags
1055 
1056 #if OMP_41_ENABLED
1057  KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s) "
1058  "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1059  gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
1060  input_flags->proxy ? "proxy" : "",
1061  sizeof_kmp_task_t, sizeof_shareds, task_entry) );
1062 #else
1063  KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) "
1064  "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1065  gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
1066  sizeof_kmp_task_t, sizeof_shareds, task_entry) );
1067 #endif
1068 
1069  retval = __kmp_task_alloc( loc_ref, gtid, input_flags, sizeof_kmp_task_t,
1070  sizeof_shareds, task_entry );
1071 
1072  KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval) );
1073 
1074  return retval;
1075 }
1076 
1077 //-----------------------------------------------------------
1078 // __kmp_invoke_task: invoke the specified task
1079 //
1080 // gtid: global thread ID of caller
1081 // task: the task to invoke
1082 // current_task: the task to resume after task invokation
1083 
1084 static void
1085 __kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_task )
1086 {
1087  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
1088 #if OMP_40_ENABLED
1089  int discard = 0 /* false */;
1090 #endif
1091  KA_TRACE(30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
1092  gtid, taskdata, current_task) );
1093 
1094 #if OMP_41_ENABLED
1095  if ( taskdata->td_flags.proxy == TASK_PROXY &&
1096  taskdata->td_flags.complete == 1)
1097  {
1098  // This is a proxy task that was already completed but it needs to run
1099  // its bottom-half finish
1100  KA_TRACE(30, ("__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n",
1101  gtid, taskdata) );
1102 
1103  __kmp_bottom_half_finish_proxy(gtid,task);
1104 
1105  KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed bottom finish for proxy task %p, resuming task %p\n", gtid, taskdata, current_task) );
1106 
1107  return;
1108  }
1109 #endif
1110 
1111 #if OMP_41_ENABLED
1112  // Proxy tasks are not handled by the runtime
1113  if ( taskdata->td_flags.proxy != TASK_PROXY )
1114 #endif
1115  __kmp_task_start( gtid, task, current_task );
1116 
1117 #if OMPT_SUPPORT
1118  ompt_thread_info_t oldInfo;
1119  kmp_info_t * thread;
1120  if (ompt_status & ompt_status_track) {
1121  // Store the threads states and restore them after the task
1122  thread = __kmp_threads[ gtid ];
1123  oldInfo = thread->th.ompt_thread_info;
1124  thread->th.ompt_thread_info.wait_id = 0;
1125  thread->th.ompt_thread_info.state = ompt_state_work_parallel;
1126  taskdata->ompt_task_info.frame.exit_runtime_frame = __builtin_frame_address(0);
1127  }
1128 #endif
1129 
1130 #if OMP_40_ENABLED
1131  // TODO: cancel tasks if the parallel region has also been cancelled
1132  // TODO: check if this sequence can be hoisted above __kmp_task_start
1133  // if cancellation has been enabled for this run ...
1134  if (__kmp_omp_cancellation) {
1135  kmp_info_t *this_thr = __kmp_threads [ gtid ];
1136  kmp_team_t * this_team = this_thr->th.th_team;
1137  kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1138  if ((taskgroup && taskgroup->cancel_request) || (this_team->t.t_cancel_request == cancel_parallel)) {
1139  // this task belongs to a task group and we need to cancel it
1140  discard = 1 /* true */;
1141  }
1142  }
1143 
1144  //
1145  // Invoke the task routine and pass in relevant data.
1146  // Thunks generated by gcc take a different argument list.
1147  //
1148  if (!discard) {
1149 #endif // OMP_40_ENABLED
1150 #ifdef KMP_GOMP_COMPAT
1151  if (taskdata->td_flags.native) {
1152  ((void (*)(void *))(*(task->routine)))(task->shareds);
1153  }
1154  else
1155 #endif /* KMP_GOMP_COMPAT */
1156  {
1157  (*(task->routine))(gtid, task);
1158  }
1159 #if OMP_40_ENABLED
1160  }
1161 #endif // OMP_40_ENABLED
1162 
1163 
1164 #if OMPT_SUPPORT
1165  if (ompt_status & ompt_status_track) {
1166  thread->th.ompt_thread_info = oldInfo;
1167  taskdata->ompt_task_info.frame.exit_runtime_frame = 0;
1168  }
1169 #endif
1170 
1171 #if OMP_41_ENABLED
1172  // Proxy tasks are not handled by the runtime
1173  if ( taskdata->td_flags.proxy != TASK_PROXY )
1174 #endif
1175  __kmp_task_finish( gtid, task, current_task );
1176 
1177  KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n",
1178  gtid, taskdata, current_task) );
1179  return;
1180 }
1181 
1182 //-----------------------------------------------------------------------
1183 // __kmpc_omp_task_parts: Schedule a thread-switchable task for execution
1184 //
1185 // loc_ref: location of original task pragma (ignored)
1186 // gtid: Global Thread ID of encountering thread
1187 // new_task: task thunk allocated by __kmp_omp_task_alloc() for the ''new task''
1188 // Returns:
1189 // TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1190 // TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1191 
1192 kmp_int32
1193 __kmpc_omp_task_parts( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1194 {
1195  kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1196 
1197  KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n",
1198  gtid, loc_ref, new_taskdata ) );
1199 
1200  /* Should we execute the new task or queue it? For now, let's just always try to
1201  queue it. If the queue fills up, then we'll execute it. */
1202 
1203  if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1204  { // Execute this task immediately
1205  kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1206  new_taskdata->td_flags.task_serial = 1;
1207  __kmp_invoke_task( gtid, new_task, current_task );
1208  }
1209 
1210  KA_TRACE(10, ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
1211  "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref,
1212  new_taskdata ) );
1213 
1214  return TASK_CURRENT_NOT_QUEUED;
1215 }
1216 
1217 //---------------------------------------------------------------------
1218 // __kmp_omp_task: Schedule a non-thread-switchable task for execution
1219 // gtid: Global Thread ID of encountering thread
1220 // new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1221 // serialize_immediate: if TRUE then if the task is executed immediately its execution will be serialized
1222 // returns:
1223 //
1224 // TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1225 // TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1226 kmp_int32
1227 __kmp_omp_task( kmp_int32 gtid, kmp_task_t * new_task, bool serialize_immediate )
1228 {
1229  kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1230 
1231 #if OMPT_SUPPORT
1232  if (ompt_status & ompt_status_track) {
1233  new_taskdata->ompt_task_info.frame.reenter_runtime_frame =
1234  __builtin_frame_address(0);
1235  }
1236 #endif
1237 
1238  /* Should we execute the new task or queue it? For now, let's just always try to
1239  queue it. If the queue fills up, then we'll execute it. */
1240 #if OMP_41_ENABLED
1241  if ( new_taskdata->td_flags.proxy == TASK_PROXY || __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1242 #else
1243  if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1244 #endif
1245  { // Execute this task immediately
1246  kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1247  if ( serialize_immediate )
1248  new_taskdata -> td_flags.task_serial = 1;
1249  __kmp_invoke_task( gtid, new_task, current_task );
1250  }
1251 
1252 #if OMPT_SUPPORT
1253  if (ompt_status & ompt_status_track) {
1254  new_taskdata->ompt_task_info.frame.reenter_runtime_frame = 0;
1255  }
1256 #endif
1257 
1258  return TASK_CURRENT_NOT_QUEUED;
1259 }
1260 
1261 //---------------------------------------------------------------------
1262 // __kmpc_omp_task: Wrapper around __kmp_omp_task to schedule a non-thread-switchable task from
1263 // the parent thread only!
1264 // loc_ref: location of original task pragma (ignored)
1265 // gtid: Global Thread ID of encountering thread
1266 // new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1267 // returns:
1268 //
1269 // TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1270 // TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1271 
1272 kmp_int32
1273 __kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1274 {
1275  kmp_taskdata_t * new_taskdata;
1276  kmp_int32 res;
1277 
1278  new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1279  KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n",
1280  gtid, loc_ref, new_taskdata ) );
1281 
1282  res = __kmp_omp_task(gtid,new_task,true);
1283 
1284  KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1285  gtid, loc_ref, new_taskdata ) );
1286  return res;
1287 }
1288 
1289 //-------------------------------------------------------------------------------------
1290 // __kmpc_omp_taskwait: Wait until all tasks generated by the current task are complete
1291 
1292 kmp_int32
1293 __kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid )
1294 {
1295  kmp_taskdata_t * taskdata;
1296  kmp_info_t * thread;
1297  int thread_finished = FALSE;
1298 
1299  KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n",
1300  gtid, loc_ref) );
1301 
1302  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1303  // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1304 
1305  thread = __kmp_threads[ gtid ];
1306  taskdata = thread -> th.th_current_task;
1307 #if USE_ITT_BUILD
1308  // Note: These values are used by ITT events as well.
1309 #endif /* USE_ITT_BUILD */
1310  taskdata->td_taskwait_counter += 1;
1311  taskdata->td_taskwait_ident = loc_ref;
1312  taskdata->td_taskwait_thread = gtid + 1;
1313 
1314 #if USE_ITT_BUILD
1315  void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1316  if ( itt_sync_obj != NULL )
1317  __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1318 #endif /* USE_ITT_BUILD */
1319 
1320 #if OMP_41_ENABLED
1321  if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) )
1322 #else
1323  if ( ! taskdata->td_flags.team_serial )
1324 #endif
1325  {
1326  // GEH: if team serialized, avoid reading the volatile variable below.
1327  kmp_flag_32 flag(&(taskdata->td_incomplete_child_tasks), 0U);
1328  while ( TCR_4(taskdata -> td_incomplete_child_tasks) != 0 ) {
1329  flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1330  USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1331  }
1332  }
1333 #if USE_ITT_BUILD
1334  if ( itt_sync_obj != NULL )
1335  __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1336 #endif /* USE_ITT_BUILD */
1337 
1338  // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
1339  taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1340  }
1341 
1342  KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
1343  "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1344 
1345  return TASK_CURRENT_NOT_QUEUED;
1346 }
1347 
1348 
1349 //-------------------------------------------------
1350 // __kmpc_omp_taskyield: switch to a different task
1351 
1352 kmp_int32
1353 __kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part )
1354 {
1355  kmp_taskdata_t * taskdata;
1356  kmp_info_t * thread;
1357  int thread_finished = FALSE;
1358 
1359  KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
1360  gtid, loc_ref, end_part) );
1361 
1362  if ( __kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel ) {
1363  // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1364 
1365  thread = __kmp_threads[ gtid ];
1366  taskdata = thread -> th.th_current_task;
1367  // Should we model this as a task wait or not?
1368 #if USE_ITT_BUILD
1369  // Note: These values are used by ITT events as well.
1370 #endif /* USE_ITT_BUILD */
1371  taskdata->td_taskwait_counter += 1;
1372  taskdata->td_taskwait_ident = loc_ref;
1373  taskdata->td_taskwait_thread = gtid + 1;
1374 
1375 #if USE_ITT_BUILD
1376  void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1377  if ( itt_sync_obj != NULL )
1378  __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1379 #endif /* USE_ITT_BUILD */
1380  if ( ! taskdata->td_flags.team_serial ) {
1381  kmp_task_team_t * task_team = thread->th.th_task_team;
1382  if (task_team != NULL) {
1383  if (KMP_TASKING_ENABLED(task_team)) {
1384  __kmp_execute_tasks_32( thread, gtid, NULL, FALSE, &thread_finished
1385  USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1386  }
1387  }
1388  }
1389 #if USE_ITT_BUILD
1390  if ( itt_sync_obj != NULL )
1391  __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1392 #endif /* USE_ITT_BUILD */
1393 
1394  // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
1395  taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1396  }
1397 
1398  KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
1399  "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1400 
1401  return TASK_CURRENT_NOT_QUEUED;
1402 }
1403 
1404 
1405 #if OMP_40_ENABLED
1406 //-------------------------------------------------------------------------------------
1407 // __kmpc_taskgroup: Start a new taskgroup
1408 
1409 void
1410 __kmpc_taskgroup( ident_t* loc, int gtid )
1411 {
1412  kmp_info_t * thread = __kmp_threads[ gtid ];
1413  kmp_taskdata_t * taskdata = thread->th.th_current_task;
1414  kmp_taskgroup_t * tg_new =
1415  (kmp_taskgroup_t *)__kmp_thread_malloc( thread, sizeof( kmp_taskgroup_t ) );
1416  KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new) );
1417  tg_new->count = 0;
1418  tg_new->cancel_request = cancel_noreq;
1419  tg_new->parent = taskdata->td_taskgroup;
1420  taskdata->td_taskgroup = tg_new;
1421 }
1422 
1423 
1424 //-------------------------------------------------------------------------------------
1425 // __kmpc_end_taskgroup: Wait until all tasks generated by the current task
1426 // and its descendants are complete
1427 
1428 void
1429 __kmpc_end_taskgroup( ident_t* loc, int gtid )
1430 {
1431  kmp_info_t * thread = __kmp_threads[ gtid ];
1432  kmp_taskdata_t * taskdata = thread->th.th_current_task;
1433  kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1434  int thread_finished = FALSE;
1435 
1436  KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc) );
1437  KMP_DEBUG_ASSERT( taskgroup != NULL );
1438 
1439  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1440 #if USE_ITT_BUILD
1441  // For ITT the taskgroup wait is similar to taskwait until we need to distinguish them
1442  void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1443  if ( itt_sync_obj != NULL )
1444  __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1445 #endif /* USE_ITT_BUILD */
1446 
1447 #if OMP_41_ENABLED
1448  if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) )
1449 #else
1450  if ( ! taskdata->td_flags.team_serial )
1451 #endif
1452  {
1453  kmp_flag_32 flag(&(taskgroup->count), 0U);
1454  while ( TCR_4(taskgroup->count) != 0 ) {
1455  flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1456  USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1457  }
1458  }
1459 
1460 #if USE_ITT_BUILD
1461  if ( itt_sync_obj != NULL )
1462  __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1463 #endif /* USE_ITT_BUILD */
1464  }
1465  KMP_DEBUG_ASSERT( taskgroup->count == 0 );
1466 
1467  // Restore parent taskgroup for the current task
1468  taskdata->td_taskgroup = taskgroup->parent;
1469  __kmp_thread_free( thread, taskgroup );
1470 
1471  KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n", gtid, taskdata) );
1472 }
1473 #endif
1474 
1475 
1476 //------------------------------------------------------
1477 // __kmp_remove_my_task: remove a task from my own deque
1478 
1479 static kmp_task_t *
1480 __kmp_remove_my_task( kmp_info_t * thread, kmp_int32 gtid, kmp_task_team_t *task_team,
1481  kmp_int32 is_constrained )
1482 {
1483  kmp_task_t * task;
1484  kmp_taskdata_t * taskdata;
1485  kmp_thread_data_t *thread_data;
1486  kmp_uint32 tail;
1487 
1488  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1489  KMP_DEBUG_ASSERT( task_team -> tt.tt_threads_data != NULL ); // Caller should check this condition
1490 
1491  thread_data = & task_team -> tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
1492 
1493  KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
1494  gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1495  thread_data->td.td_deque_tail) );
1496 
1497  if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1498  KA_TRACE(10, ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1499  gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1500  thread_data->td.td_deque_tail) );
1501  return NULL;
1502  }
1503 
1504  __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
1505 
1506  if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1507  __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1508  KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1509  gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1510  thread_data->td.td_deque_tail) );
1511  return NULL;
1512  }
1513 
1514  tail = ( thread_data -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK; // Wrap index.
1515  taskdata = thread_data -> td.td_deque[ tail ];
1516 
1517  if (is_constrained) {
1518  // we need to check if the candidate obeys task scheduling constraint:
1519  // only child of current task can be scheduled
1520  kmp_taskdata_t * current = thread->th.th_current_task;
1521  kmp_int32 level = current->td_level;
1522  kmp_taskdata_t * parent = taskdata->td_parent;
1523  while ( parent != current && parent->td_level > level ) {
1524  parent = parent->td_parent; // check generation up to the level of the current task
1525  KMP_DEBUG_ASSERT(parent != NULL);
1526  }
1527  if ( parent != current ) {
1528  // If the tail task is not a child, then no other childs can appear in the deque.
1529  __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1530  KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1531  gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1532  thread_data->td.td_deque_tail) );
1533  return NULL;
1534  }
1535  }
1536 
1537  thread_data -> td.td_deque_tail = tail;
1538  TCW_4(thread_data -> td.td_deque_ntasks, thread_data -> td.td_deque_ntasks - 1);
1539 
1540  __kmp_release_bootstrap_lock( & thread_data->td.td_deque_lock );
1541 
1542  KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d task %p removed: ntasks=%d head=%u tail=%u\n",
1543  gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1544  thread_data->td.td_deque_tail) );
1545 
1546  task = KMP_TASKDATA_TO_TASK( taskdata );
1547  return task;
1548 }
1549 
1550 
1551 //-----------------------------------------------------------
1552 // __kmp_steal_task: remove a task from another thread's deque
1553 // Assume that calling thread has already checked existence of
1554 // task_team thread_data before calling this routine.
1555 
1556 static kmp_task_t *
1557 __kmp_steal_task( kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team,
1558  volatile kmp_uint32 *unfinished_threads, int *thread_finished,
1559  kmp_int32 is_constrained )
1560 {
1561  kmp_task_t * task;
1562  kmp_taskdata_t * taskdata;
1563  kmp_thread_data_t *victim_td, *threads_data;
1564  kmp_int32 victim_tid;
1565 
1566  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1567 
1568  threads_data = task_team -> tt.tt_threads_data;
1569  KMP_DEBUG_ASSERT( threads_data != NULL ); // Caller should check this condition
1570 
1571  victim_tid = victim->th.th_info.ds.ds_tid;
1572  victim_td = & threads_data[ victim_tid ];
1573 
1574  KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: task_team=%p ntasks=%d "
1575  "head=%u tail=%u\n",
1576  gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1577  victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1578 
1579  if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) || // Caller should not check this condition
1580  (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1581  {
1582  KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: task_team=%p "
1583  "ntasks=%d head=%u tail=%u\n",
1584  gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1585  victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1586  return NULL;
1587  }
1588 
1589  __kmp_acquire_bootstrap_lock( & victim_td -> td.td_deque_lock );
1590 
1591  // Check again after we acquire the lock
1592  if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) ||
1593  (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1594  {
1595  __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1596  KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1597  "ntasks=%d head=%u tail=%u\n",
1598  gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1599  victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1600  return NULL;
1601  }
1602 
1603  KMP_DEBUG_ASSERT( victim_td -> td.td_deque != NULL );
1604 
1605  if ( !is_constrained ) {
1606  taskdata = victim_td -> td.td_deque[ victim_td -> td.td_deque_head ];
1607  // Bump head pointer and Wrap.
1608  victim_td -> td.td_deque_head = ( victim_td -> td.td_deque_head + 1 ) & TASK_DEQUE_MASK;
1609  } else {
1610  // While we have postponed tasks let's steal from tail of the deque (smaller tasks)
1611  kmp_int32 tail = ( victim_td -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK; // Wrap index.
1612  taskdata = victim_td -> td.td_deque[ tail ];
1613  // we need to check if the candidate obeys task scheduling constraint:
1614  // only child of current task can be scheduled
1615  kmp_taskdata_t * current = __kmp_threads[ gtid ]->th.th_current_task;
1616  kmp_int32 level = current->td_level;
1617  kmp_taskdata_t * parent = taskdata->td_parent;
1618  while ( parent != current && parent->td_level > level ) {
1619  parent = parent->td_parent; // check generation up to the level of the current task
1620  KMP_DEBUG_ASSERT(parent != NULL);
1621  }
1622  if ( parent != current ) {
1623  // If the tail task is not a child, then no other childs can appear in the deque (?).
1624  __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1625  KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1626  "ntasks=%d head=%u tail=%u\n",
1627  gtid, __kmp_gtid_from_thread( threads_data[victim_tid].td.td_thr ),
1628  task_team, victim_td->td.td_deque_ntasks,
1629  victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1630  return NULL;
1631  }
1632  victim_td -> td.td_deque_tail = tail;
1633  }
1634  if (*thread_finished) {
1635  // We need to un-mark this victim as a finished victim. This must be done before
1636  // releasing the lock, or else other threads (starting with the master victim)
1637  // might be prematurely released from the barrier!!!
1638  kmp_uint32 count;
1639 
1640  count = KMP_TEST_THEN_INC32( (kmp_int32 *)unfinished_threads );
1641 
1642  KA_TRACE(20, ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
1643  gtid, count + 1, task_team) );
1644 
1645  *thread_finished = FALSE;
1646  }
1647  TCW_4(victim_td -> td.td_deque_ntasks, TCR_4(victim_td -> td.td_deque_ntasks) - 1);
1648 
1649  __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1650 
1651  KA_TRACE(10, ("__kmp_steal_task(exit #3): T#%d stole task %p from T#%d: task_team=%p "
1652  "ntasks=%d head=%u tail=%u\n",
1653  gtid, taskdata, __kmp_gtid_from_thread( victim ), task_team,
1654  victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
1655  victim_td->td.td_deque_tail) );
1656 
1657  task = KMP_TASKDATA_TO_TASK( taskdata );
1658  return task;
1659 }
1660 
1661 
1662 //-----------------------------------------------------------------------------
1663 // __kmp_execute_tasks_template: Choose and execute tasks until either the condition
1664 // is statisfied (return true) or there are none left (return false).
1665 // final_spin is TRUE if this is the spin at the release barrier.
1666 // thread_finished indicates whether the thread is finished executing all
1667 // the tasks it has on its deque, and is at the release barrier.
1668 // spinner is the location on which to spin.
1669 // spinner == NULL means only execute a single task and return.
1670 // checker is the value to check to terminate the spin.
1671 template <class C>
1672 static inline int __kmp_execute_tasks_template(kmp_info_t *thread, kmp_int32 gtid, C *flag, int final_spin,
1673  int *thread_finished
1674  USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
1675 {
1676  kmp_task_team_t * task_team;
1677  kmp_thread_data_t * threads_data;
1678  kmp_task_t * task;
1679  kmp_taskdata_t * current_task = thread -> th.th_current_task;
1680  volatile kmp_uint32 * unfinished_threads;
1681  kmp_int32 nthreads, last_stolen, k, tid;
1682 
1683  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1684  KMP_DEBUG_ASSERT( thread == __kmp_threads[ gtid ] );
1685 
1686  task_team = thread -> th.th_task_team;
1687  KMP_DEBUG_ASSERT( task_team != NULL );
1688 
1689  KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d *thread_finished=%d\n",
1690  gtid, final_spin, *thread_finished) );
1691 
1692  threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
1693  KMP_DEBUG_ASSERT( threads_data != NULL );
1694 
1695  nthreads = task_team -> tt.tt_nproc;
1696  unfinished_threads = &(task_team -> tt.tt_unfinished_threads);
1697 #if OMP_41_ENABLED
1698  KMP_DEBUG_ASSERT( nthreads > 1 || task_team->tt.tt_found_proxy_tasks);
1699 #else
1700  KMP_DEBUG_ASSERT( nthreads > 1 );
1701 #endif
1702  KMP_DEBUG_ASSERT( TCR_4((int)*unfinished_threads) >= 0 );
1703 
1704  // Choose tasks from our own work queue.
1705  start:
1706  while (( task = __kmp_remove_my_task( thread, gtid, task_team, is_constrained )) != NULL ) {
1707 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1708  if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1709  if ( itt_sync_obj == NULL ) {
1710  // we are at fork barrier where we could not get the object reliably
1711  itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1712  }
1713  __kmp_itt_task_starting( itt_sync_obj );
1714  }
1715 #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1716  __kmp_invoke_task( gtid, task, current_task );
1717 #if USE_ITT_BUILD
1718  if ( itt_sync_obj != NULL )
1719  __kmp_itt_task_finished( itt_sync_obj );
1720 #endif /* USE_ITT_BUILD */
1721 
1722  // If this thread is only partway through the barrier and the condition
1723  // is met, then return now, so that the barrier gather/release pattern can proceed.
1724  // If this thread is in the last spin loop in the barrier, waiting to be
1725  // released, we know that the termination condition will not be satisified,
1726  // so don't waste any cycles checking it.
1727  if (flag == NULL || (!final_spin && flag->done_check())) {
1728  KA_TRACE(15, ("__kmp_execute_tasks_template(exit #1): T#%d spin condition satisfied\n", gtid) );
1729  return TRUE;
1730  }
1731  KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1732  }
1733 
1734  // This thread's work queue is empty. If we are in the final spin loop
1735  // of the barrier, check and see if the termination condition is satisfied.
1736 #if OMP_41_ENABLED
1737  // The work queue may be empty but there might be proxy tasks still executing
1738  if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
1739 #else
1740  if (final_spin)
1741 #endif
1742  {
1743  // First, decrement the #unfinished threads, if that has not already
1744  // been done. This decrement might be to the spin location, and
1745  // result in the termination condition being satisfied.
1746  if (! *thread_finished) {
1747  kmp_uint32 count;
1748 
1749  count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
1750  KA_TRACE(20, ("__kmp_execute_tasks_template(dec #1): T#%d dec unfinished_threads to %d task_team=%p\n",
1751  gtid, count, task_team) );
1752  *thread_finished = TRUE;
1753  }
1754 
1755  // It is now unsafe to reference thread->th.th_team !!!
1756  // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1757  // thread to pass through the barrier, where it might reset each thread's
1758  // th.th_team field for the next parallel region.
1759  // If we can steal more work, we know that this has not happened yet.
1760  if (flag != NULL && flag->done_check()) {
1761  KA_TRACE(15, ("__kmp_execute_tasks_template(exit #2): T#%d spin condition satisfied\n", gtid) );
1762  return TRUE;
1763  }
1764  }
1765 
1766 #if OMP_41_ENABLED
1767  // check if there are other threads to steal from, otherwise go back
1768  if ( nthreads == 1 )
1769  goto start;
1770 #endif
1771 
1772  // Try to steal from the last place I stole from successfully.
1773  tid = thread -> th.th_info.ds.ds_tid;//__kmp_tid_from_gtid( gtid );
1774  last_stolen = threads_data[ tid ].td.td_deque_last_stolen;
1775 
1776  if (last_stolen != -1) {
1777  kmp_info_t *other_thread = threads_data[last_stolen].td.td_thr;
1778 
1779  while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1780  thread_finished, is_constrained )) != NULL)
1781  {
1782 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1783  if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1784  if ( itt_sync_obj == NULL ) {
1785  // we are at fork barrier where we could not get the object reliably
1786  itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1787  }
1788  __kmp_itt_task_starting( itt_sync_obj );
1789  }
1790 #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1791  __kmp_invoke_task( gtid, task, current_task );
1792 #if USE_ITT_BUILD
1793  if ( itt_sync_obj != NULL )
1794  __kmp_itt_task_finished( itt_sync_obj );
1795 #endif /* USE_ITT_BUILD */
1796 
1797  // Check to see if this thread can proceed.
1798  if (flag == NULL || (!final_spin && flag->done_check())) {
1799  KA_TRACE(15, ("__kmp_execute_tasks_template(exit #3): T#%d spin condition satisfied\n",
1800  gtid) );
1801  return TRUE;
1802  }
1803 
1804  KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1805  // If the execution of the stolen task resulted in more tasks being
1806  // placed on our run queue, then restart the whole process.
1807  if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
1808  KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n",
1809  gtid) );
1810  goto start;
1811  }
1812  }
1813 
1814  // Don't give priority to stealing from this thread anymore.
1815  threads_data[ tid ].td.td_deque_last_stolen = -1;
1816 
1817  // The victims's work queue is empty. If we are in the final spin loop
1818  // of the barrier, check and see if the termination condition is satisfied.
1819 #if OMP_41_ENABLED
1820  // The work queue may be empty but there might be proxy tasks still executing
1821  if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
1822 #else
1823  if (final_spin)
1824 #endif
1825  {
1826  // First, decrement the #unfinished threads, if that has not already
1827  // been done. This decrement might be to the spin location, and
1828  // result in the termination condition being satisfied.
1829  if (! *thread_finished) {
1830  kmp_uint32 count;
1831 
1832  count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
1833  KA_TRACE(20, ("__kmp_execute_tasks_template(dec #2): T#%d dec unfinished_threads to %d "
1834  "task_team=%p\n", gtid, count, task_team) );
1835  *thread_finished = TRUE;
1836  }
1837 
1838  // If __kmp_tasking_mode != tskm_immediate_exec
1839  // then it is now unsafe to reference thread->th.th_team !!!
1840  // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1841  // thread to pass through the barrier, where it might reset each thread's
1842  // th.th_team field for the next parallel region.
1843  // If we can steal more work, we know that this has not happened yet.
1844  if (flag != NULL && flag->done_check()) {
1845  KA_TRACE(15, ("__kmp_execute_tasks_template(exit #4): T#%d spin condition satisfied\n",
1846  gtid) );
1847  return TRUE;
1848  }
1849  }
1850  }
1851 
1852  // Find a different thread to steal work from. Pick a random thread.
1853  // My initial plan was to cycle through all the threads, and only return
1854  // if we tried to steal from every thread, and failed. Arch says that's
1855  // not such a great idea.
1856  // GEH - need yield code in this loop for throughput library mode?
1857  new_victim:
1858  k = __kmp_get_random( thread ) % (nthreads - 1);
1859  if ( k >= thread -> th.th_info.ds.ds_tid ) {
1860  ++k; // Adjusts random distribution to exclude self
1861  }
1862  {
1863  kmp_info_t *other_thread = threads_data[k].td.td_thr;
1864  int first;
1865 
1866  // There is a slight chance that __kmp_enable_tasking() did not wake up
1867  // all threads waiting at the barrier. If this thread is sleeping, then
1868  // then wake it up. Since we weree going to pay the cache miss penalty
1869  // for referenceing another thread's kmp_info_t struct anyway, the check
1870  // shouldn't cost too much performance at this point.
1871  // In extra barrier mode, tasks do not sleep at the separate tasking
1872  // barrier, so this isn't a problem.
1873  if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
1874  (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
1875  (TCR_PTR(other_thread->th.th_sleep_loc) != NULL))
1876  {
1877  __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread), other_thread->th.th_sleep_loc);
1878  // A sleeping thread should not have any tasks on it's queue.
1879  // There is a slight possibility that it resumes, steals a task from
1880  // another thread, which spawns more tasks, all in the that it takes
1881  // this thread to check => don't write an assertion that the victim's
1882  // queue is empty. Try stealing from a different thread.
1883  goto new_victim;
1884  }
1885 
1886  // Now try to steal work from the selected thread
1887  first = TRUE;
1888  while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1889  thread_finished, is_constrained )) != NULL)
1890  {
1891 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1892  if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1893  if ( itt_sync_obj == NULL ) {
1894  // we are at fork barrier where we could not get the object reliably
1895  itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1896  }
1897  __kmp_itt_task_starting( itt_sync_obj );
1898  }
1899 #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1900  __kmp_invoke_task( gtid, task, current_task );
1901 #if USE_ITT_BUILD
1902  if ( itt_sync_obj != NULL )
1903  __kmp_itt_task_finished( itt_sync_obj );
1904 #endif /* USE_ITT_BUILD */
1905 
1906  // Try stealing from this victim again, in the future.
1907  if (first) {
1908  threads_data[ tid ].td.td_deque_last_stolen = k;
1909  first = FALSE;
1910  }
1911 
1912  // Check to see if this thread can proceed.
1913  if (flag == NULL || (!final_spin && flag->done_check())) {
1914  KA_TRACE(15, ("__kmp_execute_tasks_template(exit #5): T#%d spin condition satisfied\n",
1915  gtid) );
1916  return TRUE;
1917  }
1918  KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1919 
1920  // If the execution of the stolen task resulted in more tasks being
1921  // placed on our run queue, then restart the whole process.
1922  if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
1923  KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n",
1924  gtid) );
1925  goto start;
1926  }
1927  }
1928 
1929  // The victims's work queue is empty. If we are in the final spin loop
1930  // of the barrier, check and see if the termination condition is satisfied.
1931  // Going on and finding a new victim to steal from is expensive, as it
1932  // involves a lot of cache misses, so we definitely want to re-check the
1933  // termination condition before doing that.
1934 #if OMP_41_ENABLED
1935  // The work queue may be empty but there might be proxy tasks still executing
1936  if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
1937 #else
1938  if (final_spin)
1939 #endif
1940  {
1941  // First, decrement the #unfinished threads, if that has not already
1942  // been done. This decrement might be to the spin location, and
1943  // result in the termination condition being satisfied.
1944  if (! *thread_finished) {
1945  kmp_uint32 count;
1946 
1947  count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
1948  KA_TRACE(20, ("__kmp_execute_tasks_template(dec #3): T#%d dec unfinished_threads to %d; "
1949  "task_team=%p\n",
1950  gtid, count, task_team) );
1951  *thread_finished = TRUE;
1952  }
1953 
1954  // If __kmp_tasking_mode != tskm_immediate_exec,
1955  // then it is now unsafe to reference thread->th.th_team !!!
1956  // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1957  // thread to pass through the barrier, where it might reset each thread's
1958  // th.th_team field for the next parallel region.
1959  // If we can steal more work, we know that this has not happened yet.
1960  if (flag != NULL && flag->done_check()) {
1961  KA_TRACE(15, ("__kmp_execute_tasks_template(exit #6): T#%d spin condition satisfied\n", gtid) );
1962  return TRUE;
1963  }
1964  }
1965  }
1966 
1967  KA_TRACE(15, ("__kmp_execute_tasks_template(exit #7): T#%d can't find work\n", gtid) );
1968  return FALSE;
1969 }
1970 
1971 int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin,
1972  int *thread_finished
1973  USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
1974 {
1975  return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
1976  USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1977 }
1978 
1979 int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin,
1980  int *thread_finished
1981  USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
1982 {
1983  return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
1984  USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1985 }
1986 
1987 int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin,
1988  int *thread_finished
1989  USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
1990 {
1991  return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
1992  USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1993 }
1994 
1995 
1996 
1997 //-----------------------------------------------------------------------------
1998 // __kmp_enable_tasking: Allocate task team and resume threads sleeping at the
1999 // next barrier so they can assist in executing enqueued tasks.
2000 // First thread in allocates the task team atomically.
2001 
2002 static void
2003 __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr )
2004 {
2005  kmp_team_t *team;
2006  kmp_thread_data_t *threads_data;
2007  int nthreads, i, is_init_thread;
2008 
2009  KA_TRACE( 10, ( "__kmp_enable_tasking(enter): T#%d\n",
2010  __kmp_gtid_from_thread( this_thr ) ) );
2011 
2012  team = this_thr->th.th_team;
2013  KMP_DEBUG_ASSERT(task_team != NULL);
2014  KMP_DEBUG_ASSERT(team != NULL);
2015 
2016  nthreads = task_team->tt.tt_nproc;
2017  KMP_DEBUG_ASSERT(nthreads > 0);
2018  KMP_DEBUG_ASSERT(nthreads == team->t.t_nproc);
2019 
2020  // Allocate or increase the size of threads_data if necessary
2021  is_init_thread = __kmp_realloc_task_threads_data( this_thr, task_team );
2022 
2023  if (!is_init_thread) {
2024  // Some other thread already set up the array.
2025  KA_TRACE( 20, ( "__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
2026  __kmp_gtid_from_thread( this_thr ) ) );
2027  return;
2028  }
2029  threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
2030  KMP_DEBUG_ASSERT( threads_data != NULL );
2031 
2032  if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
2033  ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) )
2034  {
2035  // Release any threads sleeping at the barrier, so that they can steal
2036  // tasks and execute them. In extra barrier mode, tasks do not sleep
2037  // at the separate tasking barrier, so this isn't a problem.
2038  for (i = 0; i < nthreads; i++) {
2039  volatile void *sleep_loc;
2040  kmp_info_t *thread = threads_data[i].td.td_thr;
2041 
2042  if (i == this_thr->th.th_info.ds.ds_tid) {
2043  continue;
2044  }
2045  // Since we haven't locked the thread's suspend mutex lock at this
2046  // point, there is a small window where a thread might be putting
2047  // itself to sleep, but hasn't set the th_sleep_loc field yet.
2048  // To work around this, __kmp_execute_tasks_template() periodically checks
2049  // see if other threads are sleeping (using the same random
2050  // mechanism that is used for task stealing) and awakens them if
2051  // they are.
2052  if ( ( sleep_loc = TCR_PTR( thread -> th.th_sleep_loc) ) != NULL )
2053  {
2054  KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d waking up thread T#%d\n",
2055  __kmp_gtid_from_thread( this_thr ),
2056  __kmp_gtid_from_thread( thread ) ) );
2057  __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
2058  }
2059  else {
2060  KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
2061  __kmp_gtid_from_thread( this_thr ),
2062  __kmp_gtid_from_thread( thread ) ) );
2063  }
2064  }
2065  }
2066 
2067  KA_TRACE( 10, ( "__kmp_enable_tasking(exit): T#%d\n",
2068  __kmp_gtid_from_thread( this_thr ) ) );
2069 }
2070 
2071 
2072 /* ------------------------------------------------------------------------ */
2073 /* // TODO: Check the comment consistency
2074  * Utility routines for "task teams". A task team (kmp_task_t) is kind of
2075  * like a shadow of the kmp_team_t data struct, with a different lifetime.
2076  * After a child * thread checks into a barrier and calls __kmp_release() from
2077  * the particular variant of __kmp_<barrier_kind>_barrier_gather(), it can no
2078  * longer assume that the kmp_team_t structure is intact (at any moment, the
2079  * master thread may exit the barrier code and free the team data structure,
2080  * and return the threads to the thread pool).
2081  *
2082  * This does not work with the the tasking code, as the thread is still
2083  * expected to participate in the execution of any tasks that may have been
2084  * spawned my a member of the team, and the thread still needs access to all
2085  * to each thread in the team, so that it can steal work from it.
2086  *
2087  * Enter the existence of the kmp_task_team_t struct. It employs a reference
2088  * counting mechanims, and is allocated by the master thread before calling
2089  * __kmp_<barrier_kind>_release, and then is release by the last thread to
2090  * exit __kmp_<barrier_kind>_release at the next barrier. I.e. the lifetimes
2091  * of the kmp_task_team_t structs for consecutive barriers can overlap
2092  * (and will, unless the master thread is the last thread to exit the barrier
2093  * release phase, which is not typical).
2094  *
2095  * The existence of such a struct is useful outside the context of tasking,
2096  * but for now, I'm trying to keep it specific to the OMP_30_ENABLED macro,
2097  * so that any performance differences show up when comparing the 2.5 vs. 3.0
2098  * libraries.
2099  *
2100  * We currently use the existence of the threads array as an indicator that
2101  * tasks were spawned since the last barrier. If the structure is to be
2102  * useful outside the context of tasking, then this will have to change, but
2103  * not settting the field minimizes the performance impact of tasking on
2104  * barriers, when no explicit tasks were spawned (pushed, actually).
2105  */
2106 
2107 
2108 static kmp_task_team_t *__kmp_free_task_teams = NULL; // Free list for task_team data structures
2109 // Lock for task team data structures
2110 static kmp_bootstrap_lock_t __kmp_task_team_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_task_team_lock );
2111 
2112 
2113 //------------------------------------------------------------------------------
2114 // __kmp_alloc_task_deque:
2115 // Allocates a task deque for a particular thread, and initialize the necessary
2116 // data structures relating to the deque. This only happens once per thread
2117 // per task team since task teams are recycled.
2118 // No lock is needed during allocation since each thread allocates its own
2119 // deque.
2120 
2121 static void
2122 __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data )
2123 {
2124  __kmp_init_bootstrap_lock( & thread_data -> td.td_deque_lock );
2125  KMP_DEBUG_ASSERT( thread_data -> td.td_deque == NULL );
2126 
2127  // Initialize last stolen task field to "none"
2128  thread_data -> td.td_deque_last_stolen = -1;
2129 
2130  KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) == 0 );
2131  KMP_DEBUG_ASSERT( thread_data -> td.td_deque_head == 0 );
2132  KMP_DEBUG_ASSERT( thread_data -> td.td_deque_tail == 0 );
2133 
2134  KE_TRACE( 10, ( "__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
2135  __kmp_gtid_from_thread( thread ), TASK_DEQUE_SIZE, thread_data ) );
2136  // Allocate space for task deque, and zero the deque
2137  // Cannot use __kmp_thread_calloc() because threads not around for
2138  // kmp_reap_task_team( ).
2139  thread_data -> td.td_deque = (kmp_taskdata_t **)
2140  __kmp_allocate( TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *));
2141 }
2142 
2143 
2144 //------------------------------------------------------------------------------
2145 // __kmp_free_task_deque:
2146 // Deallocates a task deque for a particular thread.
2147 // Happens at library deallocation so don't need to reset all thread data fields.
2148 
2149 static void
2150 __kmp_free_task_deque( kmp_thread_data_t *thread_data )
2151 {
2152  __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
2153 
2154  if ( thread_data -> td.td_deque != NULL ) {
2155  TCW_4(thread_data -> td.td_deque_ntasks, 0);
2156  __kmp_free( thread_data -> td.td_deque );
2157  thread_data -> td.td_deque = NULL;
2158  }
2159  __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
2160 
2161 #ifdef BUILD_TIED_TASK_STACK
2162  // GEH: Figure out what to do here for td_susp_tied_tasks
2163  if ( thread_data -> td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY ) {
2164  __kmp_free_task_stack( __kmp_thread_from_gtid( gtid ), thread_data );
2165  }
2166 #endif // BUILD_TIED_TASK_STACK
2167 }
2168 
2169 
2170 //------------------------------------------------------------------------------
2171 // __kmp_realloc_task_threads_data:
2172 // Allocates a threads_data array for a task team, either by allocating an initial
2173 // array or enlarging an existing array. Only the first thread to get the lock
2174 // allocs or enlarges the array and re-initializes the array eleemnts.
2175 // That thread returns "TRUE", the rest return "FALSE".
2176 // Assumes that the new array size is given by task_team -> tt.tt_nproc.
2177 // The current size is given by task_team -> tt.tt_max_threads.
2178 
2179 static int
2180 __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team )
2181 {
2182  kmp_thread_data_t ** threads_data_p;
2183  kmp_int32 nthreads, maxthreads;
2184  int is_init_thread = FALSE;
2185 
2186  if ( TCR_4(task_team -> tt.tt_found_tasks) ) {
2187  // Already reallocated and initialized.
2188  return FALSE;
2189  }
2190 
2191  threads_data_p = & task_team -> tt.tt_threads_data;
2192  nthreads = task_team -> tt.tt_nproc;
2193  maxthreads = task_team -> tt.tt_max_threads;
2194 
2195  // All threads must lock when they encounter the first task of the implicit task
2196  // region to make sure threads_data fields are (re)initialized before used.
2197  __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2198 
2199  if ( ! TCR_4(task_team -> tt.tt_found_tasks) ) {
2200  // first thread to enable tasking
2201  kmp_team_t *team = thread -> th.th_team;
2202  int i;
2203 
2204  is_init_thread = TRUE;
2205  if ( maxthreads < nthreads ) {
2206 
2207  if ( *threads_data_p != NULL ) {
2208  kmp_thread_data_t *old_data = *threads_data_p;
2209  kmp_thread_data_t *new_data = NULL;
2210 
2211  KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d reallocating "
2212  "threads data for task_team %p, new_size = %d, old_size = %d\n",
2213  __kmp_gtid_from_thread( thread ), task_team,
2214  nthreads, maxthreads ) );
2215  // Reallocate threads_data to have more elements than current array
2216  // Cannot use __kmp_thread_realloc() because threads not around for
2217  // kmp_reap_task_team( ). Note all new array entries are initialized
2218  // to zero by __kmp_allocate().
2219  new_data = (kmp_thread_data_t *)
2220  __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
2221  // copy old data to new data
2222  KMP_MEMCPY_S( (void *) new_data, nthreads * sizeof(kmp_thread_data_t),
2223  (void *) old_data,
2224  maxthreads * sizeof(kmp_taskdata_t *) );
2225 
2226 #ifdef BUILD_TIED_TASK_STACK
2227  // GEH: Figure out if this is the right thing to do
2228  for (i = maxthreads; i < nthreads; i++) {
2229  kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2230  __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2231  }
2232 #endif // BUILD_TIED_TASK_STACK
2233  // Install the new data and free the old data
2234  (*threads_data_p) = new_data;
2235  __kmp_free( old_data );
2236  }
2237  else {
2238  KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d allocating "
2239  "threads data for task_team %p, size = %d\n",
2240  __kmp_gtid_from_thread( thread ), task_team, nthreads ) );
2241  // Make the initial allocate for threads_data array, and zero entries
2242  // Cannot use __kmp_thread_calloc() because threads not around for
2243  // kmp_reap_task_team( ).
2244  *threads_data_p = (kmp_thread_data_t *)
2245  __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
2246 #ifdef BUILD_TIED_TASK_STACK
2247  // GEH: Figure out if this is the right thing to do
2248  for (i = 0; i < nthreads; i++) {
2249  kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2250  __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2251  }
2252 #endif // BUILD_TIED_TASK_STACK
2253  }
2254  task_team -> tt.tt_max_threads = nthreads;
2255  }
2256  else {
2257  // If array has (more than) enough elements, go ahead and use it
2258  KMP_DEBUG_ASSERT( *threads_data_p != NULL );
2259  }
2260 
2261  // initialize threads_data pointers back to thread_info structures
2262  for (i = 0; i < nthreads; i++) {
2263  kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2264  thread_data -> td.td_thr = team -> t.t_threads[i];
2265 
2266  if ( thread_data -> td.td_deque_last_stolen >= nthreads) {
2267  // The last stolen field survives across teams / barrier, and the number
2268  // of threads may have changed. It's possible (likely?) that a new
2269  // parallel region will exhibit the same behavior as the previous region.
2270  thread_data -> td.td_deque_last_stolen = -1;
2271  }
2272  }
2273 
2274  KMP_MB();
2275  TCW_SYNC_4(task_team -> tt.tt_found_tasks, TRUE);
2276  }
2277 
2278  __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2279  return is_init_thread;
2280 }
2281 
2282 
2283 //------------------------------------------------------------------------------
2284 // __kmp_free_task_threads_data:
2285 // Deallocates a threads_data array for a task team, including any attached
2286 // tasking deques. Only occurs at library shutdown.
2287 
2288 static void
2289 __kmp_free_task_threads_data( kmp_task_team_t *task_team )
2290 {
2291  __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2292  if ( task_team -> tt.tt_threads_data != NULL ) {
2293  int i;
2294  for (i = 0; i < task_team->tt.tt_max_threads; i++ ) {
2295  __kmp_free_task_deque( & task_team -> tt.tt_threads_data[i] );
2296  }
2297  __kmp_free( task_team -> tt.tt_threads_data );
2298  task_team -> tt.tt_threads_data = NULL;
2299  }
2300  __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2301 }
2302 
2303 
2304 //------------------------------------------------------------------------------
2305 // __kmp_allocate_task_team:
2306 // Allocates a task team associated with a specific team, taking it from
2307 // the global task team free list if possible. Also initializes data structures.
2308 
2309 static kmp_task_team_t *
2310 __kmp_allocate_task_team( kmp_info_t *thread, kmp_team_t *team )
2311 {
2312  kmp_task_team_t *task_team = NULL;
2313  int nthreads;
2314 
2315  KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d entering; team = %p\n",
2316  (thread ? __kmp_gtid_from_thread( thread ) : -1), team ) );
2317 
2318  if (TCR_PTR(__kmp_free_task_teams) != NULL) {
2319  // Take a task team from the task team pool
2320  __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2321  if (__kmp_free_task_teams != NULL) {
2322  task_team = __kmp_free_task_teams;
2323  TCW_PTR(__kmp_free_task_teams, task_team -> tt.tt_next);
2324  task_team -> tt.tt_next = NULL;
2325  }
2326  __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2327  }
2328 
2329  if (task_team == NULL) {
2330  KE_TRACE( 10, ( "__kmp_allocate_task_team: T#%d allocating "
2331  "task team for team %p\n",
2332  __kmp_gtid_from_thread( thread ), team ) );
2333  // Allocate a new task team if one is not available.
2334  // Cannot use __kmp_thread_malloc() because threads not around for
2335  // kmp_reap_task_team( ).
2336  task_team = (kmp_task_team_t *) __kmp_allocate( sizeof(kmp_task_team_t) );
2337  __kmp_init_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2338  //task_team -> tt.tt_threads_data = NULL; // AC: __kmp_allocate zeroes returned memory
2339  //task_team -> tt.tt_max_threads = 0;
2340  //task_team -> tt.tt_next = NULL;
2341  }
2342 
2343  TCW_4(task_team -> tt.tt_found_tasks, FALSE);
2344 #if OMP_41_ENABLED
2345  TCW_4(task_team -> tt.tt_found_proxy_tasks, FALSE);
2346 #endif
2347  task_team -> tt.tt_nproc = nthreads = team->t.t_nproc;
2348 
2349  TCW_4( task_team -> tt.tt_unfinished_threads, nthreads );
2350  TCW_4( task_team -> tt.tt_active, TRUE );
2351  TCW_4( task_team -> tt.tt_ref_ct, nthreads - 1);
2352 
2353  KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d exiting; task_team = %p\n",
2354  (thread ? __kmp_gtid_from_thread( thread ) : -1), task_team ) );
2355  return task_team;
2356 }
2357 
2358 
2359 //------------------------------------------------------------------------------
2360 // __kmp_free_task_team:
2361 // Frees the task team associated with a specific thread, and adds it
2362 // to the global task team free list.
2363 //
2364 
2365 static void
2366 __kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team )
2367 {
2368  KA_TRACE( 20, ( "__kmp_free_task_team: T#%d task_team = %p\n",
2369  thread ? __kmp_gtid_from_thread( thread ) : -1, task_team ) );
2370 
2371  KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_ref_ct) == 0 );
2372 
2373  // Put task team back on free list
2374  __kmp_acquire_bootstrap_lock( & __kmp_task_team_lock );
2375 
2376  KMP_DEBUG_ASSERT( task_team -> tt.tt_next == NULL );
2377  task_team -> tt.tt_next = __kmp_free_task_teams;
2378  TCW_4(task_team -> tt.tt_found_tasks, FALSE);
2379  TCW_PTR(__kmp_free_task_teams, task_team);
2380 
2381  __kmp_release_bootstrap_lock( & __kmp_task_team_lock );
2382 }
2383 
2384 
2385 //------------------------------------------------------------------------------
2386 // __kmp_reap_task_teams:
2387 // Free all the task teams on the task team free list.
2388 // Should only be done during library shutdown.
2389 // Cannot do anything that needs a thread structure or gtid since they are already gone.
2390 
2391 void
2392 __kmp_reap_task_teams( void )
2393 {
2394  kmp_task_team_t *task_team;
2395 
2396  if ( TCR_PTR(__kmp_free_task_teams) != NULL ) {
2397  // Free all task_teams on the free list
2398  __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2399  while ( ( task_team = __kmp_free_task_teams ) != NULL ) {
2400  __kmp_free_task_teams = task_team -> tt.tt_next;
2401  task_team -> tt.tt_next = NULL;
2402 
2403  // Free threads_data if necessary
2404  if ( task_team -> tt.tt_threads_data != NULL ) {
2405  __kmp_free_task_threads_data( task_team );
2406  }
2407  __kmp_free( task_team );
2408  }
2409  __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2410  }
2411 }
2412 
2413 
2414 //------------------------------------------------------------------------------
2415 // __kmp_unref_task_teams:
2416 // Remove one thread from referencing the task team structure by
2417 // decreasing the reference count and deallocate task team if no more
2418 // references to it.
2419 //
2420 void
2421 __kmp_unref_task_team( kmp_task_team_t *task_team, kmp_info_t *thread )
2422 {
2423  kmp_uint ref_ct;
2424 
2425  ref_ct = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& task_team->tt.tt_ref_ct) ) - 1;
2426 
2427  KA_TRACE( 20, ( "__kmp_unref_task_team: T#%d task_team = %p ref_ct = %d\n",
2428  __kmp_gtid_from_thread( thread ), task_team, ref_ct ) );
2429 
2430 
2431  if ( ref_ct == 0 ) {
2432  __kmp_free_task_team( thread, task_team );
2433  }
2434 
2435  TCW_PTR( *((volatile kmp_task_team_t **)(&thread->th.th_task_team)), NULL );
2436 }
2437 
2438 
2439 //------------------------------------------------------------------------------
2440 // __kmp_wait_to_unref_task_teams:
2441 // Some threads could still be in the fork barrier release code, possibly
2442 // trying to steal tasks. Wait for each thread to unreference its task team.
2443 //
2444 void
2445 __kmp_wait_to_unref_task_teams(void)
2446 {
2447  kmp_info_t *thread;
2448  kmp_uint32 spins;
2449  int done;
2450 
2451  KMP_INIT_YIELD( spins );
2452 
2453 
2454  for (;;) {
2455  done = TRUE;
2456 
2457  // TODO: GEH - this may be is wrong because some sync would be necessary
2458  // in case threads are added to the pool during the traversal.
2459  // Need to verify that lock for thread pool is held when calling
2460  // this routine.
2461  for (thread = (kmp_info_t *)__kmp_thread_pool;
2462  thread != NULL;
2463  thread = thread->th.th_next_pool)
2464  {
2465 #if KMP_OS_WINDOWS
2466  DWORD exit_val;
2467 #endif
2468  if ( TCR_PTR(thread->th.th_task_team) == NULL ) {
2469  KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
2470  __kmp_gtid_from_thread( thread ) ) );
2471  continue;
2472  }
2473 #if KMP_OS_WINDOWS
2474  // TODO: GEH - add this check for Linux* OS / OS X* as well?
2475  if (!__kmp_is_thread_alive(thread, &exit_val)) {
2476  if (TCR_PTR(thread->th.th_task_team) != NULL) {
2477  __kmp_unref_task_team( thread->th.th_task_team, thread );
2478  }
2479  continue;
2480  }
2481 #endif
2482 
2483  done = FALSE; // Because th_task_team pointer is not NULL for this thread
2484 
2485  KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to unreference task_team\n",
2486  __kmp_gtid_from_thread( thread ) ) );
2487 
2488  if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
2489  volatile void *sleep_loc;
2490  // If the thread is sleeping, awaken it.
2491  if ( ( sleep_loc = TCR_PTR( thread->th.th_sleep_loc) ) != NULL ) {
2492  KA_TRACE( 10, ( "__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
2493  __kmp_gtid_from_thread( thread ), __kmp_gtid_from_thread( thread ) ) );
2494  __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
2495  }
2496  }
2497  }
2498  if (done) {
2499  break;
2500  }
2501 
2502  // If we are oversubscribed,
2503  // or have waited a bit (and library mode is throughput), yield.
2504  // Pause is in the following code.
2505  KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
2506  KMP_YIELD_SPIN( spins ); // Yields only if KMP_LIBRARY=throughput
2507  }
2508 
2509 
2510 }
2511 
2512 
2513 //------------------------------------------------------------------------------
2514 // __kmp_task_team_setup: Create a task_team for the current team, but use
2515 // an already created, unused one if it already exists.
2516 // This may be called by any thread, but only for teams with # threads >1.
2517 void
2518 __kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team, int both, int always )
2519 {
2520  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2521 
2522  if ( ( team->t.t_task_team[this_thr->th.th_task_state] == NULL ) && ( always || team->t.t_nproc > 1 ) ) {
2523  // Allocate a new task team, which will be propagated to
2524  // all of the worker threads after the barrier. As they
2525  // spin in the barrier release phase, then will continue
2526  // to use the previous task team struct, until they receive
2527  // the signal to stop checking for tasks (they can't safely
2528  // reference the kmp_team_t struct, which could be reallocated
2529  // by the master thread).
2530  team->t.t_task_team[this_thr->th.th_task_state] = __kmp_allocate_task_team( this_thr, team );
2531  KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created new task_team %p for team %d\n",
2532  __kmp_gtid_from_thread(this_thr), team->t.t_task_team[this_thr->th.th_task_state],
2533  ((team != NULL) ? team->t.t_id : -1)) );
2534  }
2535  //else
2536  // All threads have reported in, and no tasks were spawned
2537  // for this release->gather region. Leave the old task
2538  // team struct in place for the upcoming region. No task
2539  // teams are formed for serialized teams.
2540  if (both) {
2541  int other_team = 1 - this_thr->th.th_task_state;
2542  if ( ( team->t.t_task_team[other_team] == NULL ) && ( team->t.t_nproc > 1 ) ) { // setup other team as well
2543  team->t.t_task_team[other_team] = __kmp_allocate_task_team( this_thr, team );
2544  KA_TRACE( 20, ( "__kmp_task_team_setup: Master T#%d created new task_team %p for team %d\n",
2545  __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
2546  ((team != NULL) ? team->t.t_id : -1)) );
2547  }
2548  }
2549 }
2550 
2551 
2552 //------------------------------------------------------------------------------
2553 // __kmp_task_team_sync: Propagation of task team data from team to threads
2554 // which happens just after the release phase of a team barrier. This may be
2555 // called by any thread, but only for teams with # threads > 1.
2556 
2557 void
2558 __kmp_task_team_sync( kmp_info_t *this_thr, kmp_team_t *team )
2559 {
2560  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2561 
2562  // In case this thread never saw that the task team was no longer active, unref/deallocate it now.
2563  if ( this_thr->th.th_task_team != NULL ) {
2564  if ( ! TCR_SYNC_4( this_thr->th.th_task_team->tt.tt_active ) ) {
2565  KMP_DEBUG_ASSERT( ! KMP_MASTER_TID( __kmp_tid_from_gtid( __kmp_gtid_from_thread( this_thr ) ) ) );
2566  __kmp_unref_task_team( this_thr->th.th_task_team, this_thr );
2567  } else { // We are re-using a task team that was never enabled.
2568  KMP_DEBUG_ASSERT(this_thr->th.th_task_team == team->t.t_task_team[this_thr->th.th_task_state]);
2569  }
2570  }
2571 
2572  // Toggle the th_task_state field, to switch which task_team this thread refers to
2573  this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
2574  // It is now safe to propagate the task team pointer from the team struct to the current thread.
2575  TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team[this_thr->th.th_task_state]);
2576  KA_TRACE( 20, ( "__kmp_task_team_sync: Thread T#%d task team assigned pointer (%p) from Team #%d task team\n",
2577  __kmp_gtid_from_thread( this_thr ), &this_thr->th.th_task_team,
2578  this_thr->th.th_task_team, ((team != NULL) ? (team->t.t_id) : -1) ) );
2579 }
2580 
2581 
2582 //------------------------------------------------------------------------------
2583 // __kmp_task_team_wait: Master thread waits for outstanding tasks after the
2584 // barrier gather phase. Only called by master thread if #threads in team > 1 or if proxy tasks were created
2585 void
2586 __kmp_task_team_wait( kmp_info_t *this_thr, kmp_team_t *team
2587  USE_ITT_BUILD_ARG(void * itt_sync_obj)
2588  )
2589 {
2590  kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
2591 
2592  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2593  KMP_DEBUG_ASSERT( task_team == this_thr->th.th_task_team );
2594 
2595  if ( ( task_team != NULL ) && KMP_TASKING_ENABLED(task_team) ) {
2596  KA_TRACE( 20, ( "__kmp_task_team_wait: Master T#%d waiting for all tasks: task_team = %p\n",
2597  __kmp_gtid_from_thread( this_thr ), task_team ) );
2598  // All worker threads might have dropped through to the release phase, but could still
2599  // be executing tasks. Wait here for all tasks to complete. To avoid memory contention,
2600  // only the master thread checks for the termination condition.
2601  kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U);
2602  flag.wait(this_thr, TRUE
2603  USE_ITT_BUILD_ARG(itt_sync_obj));
2604 
2605  // Kill the old task team, so that the worker threads will stop referencing it while spinning.
2606  // They will deallocate it when the reference count reaches zero.
2607  // The master thread is not included in the ref count.
2608  KA_TRACE( 20, ( "__kmp_task_team_wait: Master T#%d deactivating task_team %p\n",
2609  __kmp_gtid_from_thread( this_thr ), task_team ) );
2610 #if OMP_41_ENABLED
2611  KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 || task_team->tt.tt_found_proxy_tasks == TRUE );
2612  TCW_SYNC_4( task_team->tt.tt_found_proxy_tasks, FALSE );
2613 #else
2614  KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 );
2615 #endif
2616  TCW_SYNC_4( task_team->tt.tt_active, FALSE );
2617  KMP_MB();
2618 
2619  TCW_PTR(this_thr->th.th_task_team, NULL);
2620  team->t.t_task_team[this_thr->th.th_task_state] = NULL;
2621  }
2622 }
2623 
2624 
2625 //------------------------------------------------------------------------------
2626 // __kmp_tasking_barrier:
2627 // Internal function to execute all tasks prior to a regular barrier or a
2628 // join barrier. It is a full barrier itself, which unfortunately turns
2629 // regular barriers into double barriers and join barriers into 1 1/2
2630 // barriers.
2631 // This routine may only called when __kmp_tasking_mode == tskm_extra_barrier.
2632 
2633 void
2634 __kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid )
2635 {
2636  volatile kmp_uint32 *spin = &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads;
2637  int flag = FALSE;
2638  KMP_DEBUG_ASSERT( __kmp_tasking_mode == tskm_extra_barrier );
2639 
2640 #if USE_ITT_BUILD
2641  KMP_FSYNC_SPIN_INIT( spin, (kmp_uint32*) NULL );
2642 #endif /* USE_ITT_BUILD */
2643  kmp_flag_32 spin_flag(spin, 0U);
2644  while (! spin_flag.execute_tasks(thread, gtid, TRUE, &flag
2645  USE_ITT_BUILD_ARG(NULL), 0 ) ) {
2646 #if USE_ITT_BUILD
2647  // TODO: What about itt_sync_obj??
2648  KMP_FSYNC_SPIN_PREPARE( spin );
2649 #endif /* USE_ITT_BUILD */
2650 
2651  if( TCR_4(__kmp_global.g.g_done) ) {
2652  if( __kmp_global.g.g_abort )
2653  __kmp_abort_thread( );
2654  break;
2655  }
2656  KMP_YIELD( TRUE ); // GH: We always yield here
2657  }
2658 #if USE_ITT_BUILD
2659  KMP_FSYNC_SPIN_ACQUIRED( (void*) spin );
2660 #endif /* USE_ITT_BUILD */
2661 }
2662 
2663 
2664 #if OMP_41_ENABLED
2665 
2666 /* __kmp_give_task puts a task into a given thread queue if:
2667  - the queue for that thread it was created
2668  - there's space in that queue
2669 
2670  Because of this, __kmp_push_task needs to check if there's space after getting the lock
2671  */
2672 static bool __kmp_give_task ( kmp_info_t *thread, kmp_int32 tid, kmp_task_t * task )
2673 {
2674  kmp_task_team_t * task_team = thread->th.th_task_team;
2675  kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
2676  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
2677  bool result = false;
2678 
2679  KA_TRACE(20, ("__kmp_give_task: trying to give task %p to thread %d.\n", taskdata, tid ) );
2680 
2681  // assert tasking is enabled? what if not?
2682  KMP_DEBUG_ASSERT( task_team != NULL );
2683 
2684  if (thread_data -> td.td_deque == NULL ) {
2685  // There's no queue in this thread, go find another one
2686  // We're guaranteed that at least one thread has a queue
2687  KA_TRACE(30, ("__kmp_give_task: thread %d has no queue while giving task %p.\n", tid, taskdata ) );
2688  return result;
2689  }
2690 
2691  if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
2692  {
2693  KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
2694  return result;
2695  }
2696 
2697  __kmp_acquire_bootstrap_lock( & thread_data-> td.td_deque_lock );
2698 
2699  if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
2700  {
2701  KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
2702  goto release_and_exit;
2703  }
2704 
2705  thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata;
2706  // Wrap index.
2707  thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK;
2708  TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1);
2709 
2710  result = true;
2711  KA_TRACE(30, ("__kmp_give_task: successfully gave task %p to thread %d.\n", taskdata, tid ) );
2712 
2713 release_and_exit:
2714  __kmp_release_bootstrap_lock( & thread_data-> td.td_deque_lock );
2715 
2716  return result;
2717 }
2718 
2719 
2720 /* The finish of the a proxy tasks is divided in two pieces:
2721  - the top half is the one that can be done from a thread outside the team
2722  - the bottom half must be run from a them within the team
2723 
2724  In order to run the bottom half the task gets queued back into one of the threads of the team.
2725  Once the td_incomplete_child_task counter of the parent is decremented the threads can leave the barriers.
2726  So, the bottom half needs to be queued before the counter is decremented. The top half is therefore divided in two parts:
2727  - things that can be run before queuing the bottom half
2728  - things that must be run after queuing the bottom half
2729 
2730  This creates a second race as the bottom half can free the task before the second top half is executed. To avoid this
2731  we use the td_incomplete_child_task of the proxy task to synchronize the top and bottom half.
2732 */
2733 
2734 static void __kmp_first_top_half_finish_proxy( kmp_taskdata_t * taskdata )
2735 {
2736  KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
2737  KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
2738  KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
2739  KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
2740 
2741  taskdata -> td_flags.complete = 1; // mark the task as completed
2742 
2743  if ( taskdata->td_taskgroup )
2744  KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
2745 
2746  // Create an imaginary children for this task so the bottom half cannot release the task before we have completed the second top half
2747  TCR_4(taskdata->td_incomplete_child_tasks++);
2748 }
2749 
2750 static void __kmp_second_top_half_finish_proxy( kmp_taskdata_t * taskdata )
2751 {
2752  kmp_int32 children = 0;
2753 
2754  // Predecrement simulated by "- 1" calculation
2755  children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
2756  KMP_DEBUG_ASSERT( children >= 0 );
2757 
2758  // Remove the imaginary children
2759  TCR_4(taskdata->td_incomplete_child_tasks--);
2760 }
2761 
2762 static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask )
2763 {
2764  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2765  kmp_info_t * thread = __kmp_threads[ gtid ];
2766 
2767  KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
2768  KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 1 ); // top half must run before bottom half
2769 
2770  // We need to wait to make sure the top half is finished
2771  // Spinning here should be ok as this should happen quickly
2772  while ( TCR_4(taskdata->td_incomplete_child_tasks) > 0 ) ;
2773 
2774  __kmp_release_deps(gtid,taskdata);
2775  __kmp_free_task_and_ancestors(gtid, taskdata, thread);
2776 }
2777 
2785 void __kmpc_proxy_task_completed( kmp_int32 gtid, kmp_task_t *ptask )
2786 {
2787  KMP_DEBUG_ASSERT( ptask != NULL );
2788  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2789  KA_TRACE(10, ("__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n", gtid, taskdata ) );
2790 
2791  KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
2792 
2793  __kmp_first_top_half_finish_proxy(taskdata);
2794  __kmp_second_top_half_finish_proxy(taskdata);
2795  __kmp_bottom_half_finish_proxy(gtid,ptask);
2796 
2797  KA_TRACE(10, ("__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n", gtid, taskdata ) );
2798 }
2799 
2806 void __kmpc_proxy_task_completed_ooo ( kmp_task_t *ptask )
2807 {
2808  KMP_DEBUG_ASSERT( ptask != NULL );
2809  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2810 
2811  KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n", taskdata ) );
2812 
2813  KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
2814 
2815  __kmp_first_top_half_finish_proxy(taskdata);
2816 
2817  // Enqueue task to complete bottom half completation from a thread within the corresponding team
2818  kmp_team_t * team = taskdata->td_team;
2819  kmp_int32 nthreads = team->t.t_nproc;
2820  kmp_info_t *thread;
2821  kmp_int32 k = 0;
2822 
2823  do {
2824  //This should be similar to k = __kmp_get_random( thread ) % nthreads but we cannot use __kmp_get_random here
2825  //For now we're just linearly trying to find a thread
2826  k = (k+1) % nthreads;
2827  thread = team->t.t_threads[k];
2828  } while ( !__kmp_give_task( thread, k, ptask ) );
2829 
2830  __kmp_second_top_half_finish_proxy(taskdata);
2831 
2832  KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n", taskdata ) );
2833 }
2834 
2835 #endif
Definition: kmp.h:198