LLVM OpenMP* Runtime Library
kmp_wait_release.h
1 /*
2  * kmp_wait_release.h -- Wait/Release implementation
3  */
4 
5 
6 //===----------------------------------------------------------------------===//
7 //
8 // The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 
16 #ifndef KMP_WAIT_RELEASE_H
17 #define KMP_WAIT_RELEASE_H
18 
19 #include "kmp.h"
20 #include "kmp_itt.h"
21 
38 enum flag_type {
42 };
43 
47 template <typename P>
48 class kmp_flag {
49  volatile P * loc;
51  public:
52  typedef P flag_t;
53  kmp_flag(volatile P *p, flag_type ft) : loc(p), t(ft) {}
57  volatile P * get() { return loc; }
61  flag_type get_type() { return t; }
62  // Derived classes must provide the following:
63  /*
64  kmp_info_t * get_waiter(kmp_uint32 i);
65  kmp_uint32 get_num_waiters();
66  bool done_check();
67  bool done_check_val(P old_loc);
68  bool notdone_check();
69  P internal_release();
70  P set_sleeping();
71  P unset_sleeping();
72  bool is_sleeping();
73  bool is_sleeping_val(P old_loc);
74  */
75 };
76 
77 /* Spin wait loop that first does pause, then yield, then sleep. A thread that calls __kmp_wait_*
78  must make certain that another thread calls __kmp_release to wake it back up to prevent deadlocks! */
79 template <class C>
80 static inline void __kmp_wait_template(kmp_info_t *this_thr, C *flag, int final_spin
81  USE_ITT_BUILD_ARG(void * itt_sync_obj) )
82 {
83  // NOTE: We may not belong to a team at this point.
84  volatile typename C::flag_t *spin = flag->get();
85  kmp_uint32 spins;
86  kmp_uint32 hibernate;
87  int th_gtid;
88  int tasks_completed = FALSE;
89 
90  KMP_FSYNC_SPIN_INIT(spin, NULL);
91  if (flag->done_check()) {
92  KMP_FSYNC_SPIN_ACQUIRED(spin);
93  return;
94  }
95  th_gtid = this_thr->th.th_info.ds.ds_gtid;
96  KA_TRACE(20, ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
97 
98 #if OMPT_SUPPORT && OMPT_BLAME
99  if (ompt_status == ompt_status_track_callback) {
100  if (this_thr->th.ompt_thread_info.state == ompt_state_idle){
101  if (ompt_callbacks.ompt_callback(ompt_event_idle_begin)) {
102  ompt_callbacks.ompt_callback(ompt_event_idle_begin)(th_gtid + 1);
103  }
104  } else if (ompt_callbacks.ompt_callback(ompt_event_wait_barrier_begin)) {
105  KMP_DEBUG_ASSERT(this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier ||
106  this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit ||
107  this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_explicit);
108 
109  ompt_lw_taskteam_t* team = this_thr->th.th_team->t.ompt_serialized_team_info;
110  ompt_parallel_id_t pId;
111  ompt_task_id_t tId;
112  if (team){
113  pId = team->ompt_team_info.parallel_id;
114  tId = team->ompt_task_info.task_id;
115  } else {
116  pId = this_thr->th.th_team->t.ompt_team_info.parallel_id;
117  tId = this_thr->th.th_current_task->ompt_task_info.task_id;
118  }
119  ompt_callbacks.ompt_callback(ompt_event_wait_barrier_begin)(pId, tId);
120  }
121  }
122 #endif
123 
124  // Setup for waiting
125  KMP_INIT_YIELD(spins);
126 
127  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
128  // The worker threads cannot rely on the team struct existing at this point.
129  // Use the bt values cached in the thread struct instead.
130 #ifdef KMP_ADJUST_BLOCKTIME
131  if (__kmp_zero_bt && !this_thr->th.th_team_bt_set)
132  // Force immediate suspend if not set by user and more threads than available procs
133  hibernate = 0;
134  else
135  hibernate = this_thr->th.th_team_bt_intervals;
136 #else
137  hibernate = this_thr->th.th_team_bt_intervals;
138 #endif /* KMP_ADJUST_BLOCKTIME */
139 
140  /* If the blocktime is nonzero, we want to make sure that we spin wait for the entirety
141  of the specified #intervals, plus up to one interval more. This increment make
142  certain that this thread doesn't go to sleep too soon. */
143  if (hibernate != 0)
144  hibernate++;
145 
146  // Add in the current time value.
147  hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
148  KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
149  th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
150  hibernate - __kmp_global.g.g_time.dt.t_value));
151  }
152  KMP_MB();
153 
154  // Main wait spin loop
155  while (flag->notdone_check()) {
156  int in_pool;
157 
158  /* If the task team is NULL, it means one of things:
159  1) A newly-created thread is first being released by __kmp_fork_barrier(), and
160  its task team has not been set up yet.
161  2) All tasks have been executed to completion, this thread has decremented the task
162  team's ref ct and possibly deallocated it, and should no longer reference it.
163  3) Tasking is off for this region. This could be because we are in a serialized region
164  (perhaps the outer one), or else tasking was manually disabled (KMP_TASKING=0). */
165  kmp_task_team_t * task_team = NULL;
166  if (__kmp_tasking_mode != tskm_immediate_exec) {
167  task_team = this_thr->th.th_task_team;
168  if (task_team != NULL) {
169  if (!TCR_SYNC_4(task_team->tt.tt_active)) {
170  KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
171  __kmp_unref_task_team(task_team, this_thr);
172  } else if (KMP_TASKING_ENABLED(task_team)) {
173  flag->execute_tasks(this_thr, th_gtid, final_spin, &tasks_completed
174  USE_ITT_BUILD_ARG(itt_sync_obj), 0);
175  }
176  } // if
177  } // if
178 
179  KMP_FSYNC_SPIN_PREPARE(spin);
180  if (TCR_4(__kmp_global.g.g_done)) {
181  if (__kmp_global.g.g_abort)
182  __kmp_abort_thread();
183  break;
184  }
185 
186  // If we are oversubscribed, or have waited a bit (and KMP_LIBRARY=throughput), then yield
187  KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc);
188  // TODO: Should it be number of cores instead of thread contexts? Like:
189  // KMP_YIELD(TCR_4(__kmp_nth) > __kmp_ncores);
190  // Need performance improvement data to make the change...
191  KMP_YIELD_SPIN(spins);
192 
193  // Check if this thread was transferred from a team
194  // to the thread pool (or vice-versa) while spinning.
195  in_pool = !!TCR_4(this_thr->th.th_in_pool);
196  if (in_pool != !!this_thr->th.th_active_in_pool) {
197  if (in_pool) { // Recently transferred from team to pool
198  KMP_TEST_THEN_INC32((kmp_int32 *)&__kmp_thread_pool_active_nth);
199  this_thr->th.th_active_in_pool = TRUE;
200  /* Here, we cannot assert that:
201  KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) <= __kmp_thread_pool_nth);
202  __kmp_thread_pool_nth is inc/dec'd by the master thread while the fork/join
203  lock is held, whereas __kmp_thread_pool_active_nth is inc/dec'd asynchronously
204  by the workers. The two can get out of sync for brief periods of time. */
205  }
206  else { // Recently transferred from pool to team
207  KMP_TEST_THEN_DEC32((kmp_int32 *) &__kmp_thread_pool_active_nth);
208  KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
209  this_thr->th.th_active_in_pool = FALSE;
210  }
211  }
212 
213  // Don't suspend if KMP_BLOCKTIME is set to "infinite"
214  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
215  continue;
216 
217  // Don't suspend if there is a likelihood of new tasks being spawned.
218  if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks))
219  continue;
220 
221  // If we have waited a bit more, fall asleep
222  if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
223  continue;
224 
225  KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
226 
227  flag->suspend(th_gtid);
228 
229  if (TCR_4(__kmp_global.g.g_done)) {
230  if (__kmp_global.g.g_abort)
231  __kmp_abort_thread();
232  break;
233  }
234  // TODO: If thread is done with work and times out, disband/free
235  }
236 
237 #if OMPT_SUPPORT && OMPT_BLAME
238  if (ompt_status == ompt_status_track_callback) {
239  if (this_thr->th.ompt_thread_info.state == ompt_state_idle){
240  if (ompt_callbacks.ompt_callback(ompt_event_idle_end)) {
241  ompt_callbacks.ompt_callback(ompt_event_idle_end)(th_gtid + 1);
242  }
243  } else if (ompt_callbacks.ompt_callback(ompt_event_wait_barrier_end)) {
244  KMP_DEBUG_ASSERT(this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier ||
245  this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit ||
246  this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_explicit);
247 
248  ompt_lw_taskteam_t* team = this_thr->th.th_team->t.ompt_serialized_team_info;
249  ompt_parallel_id_t pId;
250  ompt_task_id_t tId;
251  if (team){
252  pId = team->ompt_team_info.parallel_id;
253  tId = team->ompt_task_info.task_id;
254  } else {
255  pId = this_thr->th.th_team->t.ompt_team_info.parallel_id;
256  tId = this_thr->th.th_current_task->ompt_task_info.task_id;
257  }
258  ompt_callbacks.ompt_callback(ompt_event_wait_barrier_end)(pId, tId);
259  }
260  }
261 #endif
262 
263  KMP_FSYNC_SPIN_ACQUIRED(spin);
264 }
265 
266 /* Release any threads specified as waiting on the flag by releasing the flag and resume the waiting thread
267  if indicated by the sleep bit(s). A thread that calls __kmp_wait_template must call this function to wake
268  up the potentially sleeping thread and prevent deadlocks! */
269 template <class C>
270 static inline void __kmp_release_template(C *flag)
271 {
272 #ifdef KMP_DEBUG
273  // FIX ME
274  kmp_info_t * wait_thr = flag->get_waiter(0);
275  int target_gtid = wait_thr->th.th_info.ds.ds_gtid;
276  int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
277 #endif
278  KF_TRACE(20, ("__kmp_release: T#%d releasing T#%d spin(%p)\n", gtid, target_gtid, flag->get()));
279  KMP_DEBUG_ASSERT(flag->get());
280  KMP_FSYNC_RELEASING(flag->get());
281 
282  typename C::flag_t old_spin = flag->internal_release();
283 
284  KF_TRACE(100, ("__kmp_release: T#%d old spin(%p)=%d, set new spin=%d\n",
285  gtid, flag->get(), old_spin, *(flag->get())));
286 
287  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
288  // Only need to check sleep stuff if infinite block time not set
289  if (flag->is_sleeping_val(old_spin)) {
290  for (unsigned int i=0; i<flag->get_num_waiters(); ++i) {
291  kmp_info_t * waiter = flag->get_waiter(i);
292  int wait_gtid = waiter->th.th_info.ds.ds_gtid;
293  // Wake up thread if needed
294  KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep spin(%p) set\n",
295  gtid, wait_gtid, flag->get()));
296  flag->resume(wait_gtid);
297  }
298  } else {
299  KF_TRACE(50, ("__kmp_release: T#%d don't wake up thread T#%d since sleep spin(%p) not set\n",
300  gtid, target_gtid, flag->get()));
301  }
302  }
303 }
304 
305 template <typename FlagType>
306 struct flag_traits {};
307 
308 template <>
309 struct flag_traits<kmp_uint32> {
310  typedef kmp_uint32 flag_t;
311  static const flag_type t = flag32;
312  static inline flag_t tcr(flag_t f) { return TCR_4(f); }
313  static inline flag_t test_then_add4(volatile flag_t *f) { return KMP_TEST_THEN_ADD4_32((volatile kmp_int32 *)f); }
314  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_OR32((volatile kmp_int32 *)f, v); }
315  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_AND32((volatile kmp_int32 *)f, v); }
316 };
317 
318 template <>
319 struct flag_traits<kmp_uint64> {
320  typedef kmp_uint64 flag_t;
321  static const flag_type t = flag64;
322  static inline flag_t tcr(flag_t f) { return TCR_8(f); }
323  static inline flag_t test_then_add4(volatile flag_t *f) { return KMP_TEST_THEN_ADD4_64((volatile kmp_int64 *)f); }
324  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_OR64((volatile kmp_int64 *)f, v); }
325  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_AND64((volatile kmp_int64 *)f, v); }
326 };
327 
328 template <typename FlagType>
329 class kmp_basic_flag : public kmp_flag<FlagType> {
330  typedef flag_traits<FlagType> traits_type;
331  FlagType checker;
332  kmp_info_t * waiting_threads[1];
333  kmp_uint32 num_waiting_threads;
334 public:
335  kmp_basic_flag(volatile FlagType *p) : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
336  kmp_basic_flag(volatile FlagType *p, kmp_info_t *thr) : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(1) {
337  waiting_threads[0] = thr;
338  }
339  kmp_basic_flag(volatile FlagType *p, FlagType c) : kmp_flag<FlagType>(p, traits_type::t), checker(c), num_waiting_threads(0) {}
344  kmp_info_t * get_waiter(kmp_uint32 i) {
345  KMP_DEBUG_ASSERT(i<num_waiting_threads);
346  return waiting_threads[i];
347  }
351  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
357  void set_waiter(kmp_info_t *thr) {
358  waiting_threads[0] = thr;
359  num_waiting_threads = 1;
360  }
364  bool done_check() { return traits_type::tcr(*(this->get())) == checker; }
369  bool done_check_val(FlagType old_loc) { return old_loc == checker; }
377  bool notdone_check() { return traits_type::tcr(*(this->get())) != checker; }
382  FlagType internal_release() {
383  return traits_type::test_then_add4((volatile FlagType *)this->get());
384  }
389  FlagType set_sleeping() {
390  return traits_type::test_then_or((volatile FlagType *)this->get(), KMP_BARRIER_SLEEP_STATE);
391  }
396  FlagType unset_sleeping() {
397  return traits_type::test_then_and((volatile FlagType *)this->get(), ~KMP_BARRIER_SLEEP_STATE);
398  }
403  bool is_sleeping_val(FlagType old_loc) { return old_loc & KMP_BARRIER_SLEEP_STATE; }
407  bool is_sleeping() { return is_sleeping_val(*(this->get())); }
408 };
409 
410 class kmp_flag_32 : public kmp_basic_flag<kmp_uint32> {
411 public:
412  kmp_flag_32(volatile kmp_uint32 *p) : kmp_basic_flag<kmp_uint32>(p) {}
413  kmp_flag_32(volatile kmp_uint32 *p, kmp_info_t *thr) : kmp_basic_flag<kmp_uint32>(p, thr) {}
414  kmp_flag_32(volatile kmp_uint32 *p, kmp_uint32 c) : kmp_basic_flag<kmp_uint32>(p, c) {}
415  void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
416  void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); }
417  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished
418  USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) {
419  return __kmp_execute_tasks_32(this_thr, gtid, this, final_spin, thread_finished
420  USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
421  }
422  void wait(kmp_info_t *this_thr, int final_spin
423  USE_ITT_BUILD_ARG(void * itt_sync_obj)) {
424  __kmp_wait_template(this_thr, this, final_spin
425  USE_ITT_BUILD_ARG(itt_sync_obj));
426  }
427  void release() { __kmp_release_template(this); }
428 };
429 
430 class kmp_flag_64 : public kmp_basic_flag<kmp_uint64> {
431 public:
432  kmp_flag_64(volatile kmp_uint64 *p) : kmp_basic_flag<kmp_uint64>(p) {}
433  kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr) : kmp_basic_flag<kmp_uint64>(p, thr) {}
434  kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c) : kmp_basic_flag<kmp_uint64>(p, c) {}
435  void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
436  void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); }
437  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished
438  USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) {
439  return __kmp_execute_tasks_64(this_thr, gtid, this, final_spin, thread_finished
440  USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
441  }
442  void wait(kmp_info_t *this_thr, int final_spin
443  USE_ITT_BUILD_ARG(void * itt_sync_obj)) {
444  __kmp_wait_template(this_thr, this, final_spin
445  USE_ITT_BUILD_ARG(itt_sync_obj));
446  }
447  void release() { __kmp_release_template(this); }
448 };
449 
450 // Hierarchical 64-bit on-core barrier instantiation
451 class kmp_flag_oncore : public kmp_flag<kmp_uint64> {
452  kmp_uint64 checker;
453  kmp_info_t * waiting_threads[1];
454  kmp_uint32 num_waiting_threads;
455  kmp_uint32 offset;
456  bool flag_switch;
457  enum barrier_type bt;
458  kmp_info_t * this_thr;
459 #if USE_ITT_BUILD
460  void *itt_sync_obj;
461 #endif
462  unsigned char& byteref(volatile kmp_uint64* loc, size_t offset) { return ((unsigned char *)loc)[offset]; }
463 public:
464  kmp_flag_oncore(volatile kmp_uint64 *p)
465  : kmp_flag<kmp_uint64>(p, flag_oncore), num_waiting_threads(0), flag_switch(false) {}
466  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)
467  : kmp_flag<kmp_uint64>(p, flag_oncore), num_waiting_threads(0), offset(idx), flag_switch(false) {}
468  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx, enum barrier_type bar_t,
469  kmp_info_t * thr
470 #if USE_ITT_BUILD
471  , void *itt
472 #endif
473  )
474  : kmp_flag<kmp_uint64>(p, flag_oncore), checker(c), num_waiting_threads(0), offset(idx),
475  flag_switch(false), bt(bar_t), this_thr(thr)
476 #if USE_ITT_BUILD
477  , itt_sync_obj(itt)
478 #endif
479  {}
480  kmp_info_t * get_waiter(kmp_uint32 i) {
481  KMP_DEBUG_ASSERT(i<num_waiting_threads);
482  return waiting_threads[i];
483  }
484  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
485  void set_waiter(kmp_info_t *thr) {
486  waiting_threads[0] = thr;
487  num_waiting_threads = 1;
488  }
489  bool done_check_val(kmp_uint64 old_loc) { return byteref(&old_loc,offset) == checker; }
490  bool done_check() { return done_check_val(*get()); }
491  bool notdone_check() {
492  // Calculate flag_switch
493  if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
494  flag_switch = true;
495  if (byteref(get(),offset) != 1 && !flag_switch)
496  return true;
497  else if (flag_switch) {
498  this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
499  kmp_flag_64 flag(&this_thr->th.th_bar[bt].bb.b_go, (kmp_uint64)KMP_BARRIER_STATE_BUMP);
500  __kmp_wait_64(this_thr, &flag, TRUE
501 #if USE_ITT_BUILD
502  , itt_sync_obj
503 #endif
504  );
505  }
506  return false;
507  }
508  kmp_uint64 internal_release() {
509  kmp_uint64 old_val;
510  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
511  old_val = *get();
512  byteref(get(),offset) = 1;
513  }
514  else {
515  kmp_uint64 mask=0;
516  byteref(&mask,offset) = 1;
517  old_val = KMP_TEST_THEN_OR64((volatile kmp_int64 *)get(), mask);
518  }
519  return old_val;
520  }
521  kmp_uint64 set_sleeping() {
522  return KMP_TEST_THEN_OR64((kmp_int64 volatile *)get(), KMP_BARRIER_SLEEP_STATE);
523  }
524  kmp_uint64 unset_sleeping() {
525  return KMP_TEST_THEN_AND64((kmp_int64 volatile *)get(), ~KMP_BARRIER_SLEEP_STATE);
526  }
527  bool is_sleeping_val(kmp_uint64 old_loc) { return old_loc & KMP_BARRIER_SLEEP_STATE; }
528  bool is_sleeping() { return is_sleeping_val(*get()); }
529  void wait(kmp_info_t *this_thr, int final_spin
530  USE_ITT_BUILD_ARG(void * itt_sync_obj)) {
531  __kmp_wait_template(this_thr, this, final_spin
532  USE_ITT_BUILD_ARG(itt_sync_obj));
533  }
534  void release() { __kmp_release_template(this); }
535  void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); }
536  void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); }
537  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished
538  USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) {
539  return __kmp_execute_tasks_oncore(this_thr, gtid, this, final_spin, thread_finished
540  USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
541  }
542 };
543 
548 #endif // KMP_WAIT_RELEASE_H
volatile P * loc
flag_type get_type()
flag_type
flag_type t