LLVM OpenMP* Runtime Library
kmp_wait_release.h
1 /*
2  * kmp_wait_release.h -- Wait/Release implementation
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef KMP_WAIT_RELEASE_H
14 #define KMP_WAIT_RELEASE_H
15 
16 #include "kmp.h"
17 #include "kmp_itt.h"
18 #include "kmp_stats.h"
19 #if OMPT_SUPPORT
20 #include "ompt-specific.h"
21 #endif
22 
36 struct flag_properties {
37  unsigned int type : 16;
38  unsigned int reserved : 16;
39 };
40 
41 template <enum flag_type FlagType> struct flag_traits {};
42 
43 template <> struct flag_traits<flag32> {
44  typedef kmp_uint32 flag_t;
45  static const flag_type t = flag32;
46  static inline flag_t tcr(flag_t f) { return TCR_4(f); }
47  static inline flag_t test_then_add4(volatile flag_t *f) {
48  return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f));
49  }
50  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
51  return KMP_TEST_THEN_OR32(f, v);
52  }
53  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
54  return KMP_TEST_THEN_AND32(f, v);
55  }
56 };
57 
58 template <> struct flag_traits<atomic_flag64> {
59  typedef kmp_uint64 flag_t;
60  static const flag_type t = atomic_flag64;
61  static inline flag_t tcr(flag_t f) { return TCR_8(f); }
62  static inline flag_t test_then_add4(volatile flag_t *f) {
63  return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
64  }
65  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
66  return KMP_TEST_THEN_OR64(f, v);
67  }
68  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
69  return KMP_TEST_THEN_AND64(f, v);
70  }
71 };
72 
73 template <> struct flag_traits<flag64> {
74  typedef kmp_uint64 flag_t;
75  static const flag_type t = flag64;
76  static inline flag_t tcr(flag_t f) { return TCR_8(f); }
77  static inline flag_t test_then_add4(volatile flag_t *f) {
78  return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
79  }
80  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
81  return KMP_TEST_THEN_OR64(f, v);
82  }
83  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
84  return KMP_TEST_THEN_AND64(f, v);
85  }
86 };
87 
88 template <> struct flag_traits<flag_oncore> {
89  typedef kmp_uint64 flag_t;
90  static const flag_type t = flag_oncore;
91  static inline flag_t tcr(flag_t f) { return TCR_8(f); }
92  static inline flag_t test_then_add4(volatile flag_t *f) {
93  return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
94  }
95  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
96  return KMP_TEST_THEN_OR64(f, v);
97  }
98  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
99  return KMP_TEST_THEN_AND64(f, v);
100  }
101 };
102 
104 template <flag_type FlagType> class kmp_flag {
105 protected:
106  flag_properties t;
107  kmp_info_t *waiting_threads[1];
108  kmp_uint32 num_waiting_threads;
109  std::atomic<bool> *sleepLoc;
110 
111 public:
112  typedef flag_traits<FlagType> traits_type;
113  kmp_flag() : t({FlagType, 0U}), num_waiting_threads(0), sleepLoc(nullptr) {}
114  kmp_flag(int nwaiters)
115  : t({FlagType, 0U}), num_waiting_threads(nwaiters), sleepLoc(nullptr) {}
116  kmp_flag(std::atomic<bool> *sloc)
117  : t({FlagType, 0U}), num_waiting_threads(0), sleepLoc(sloc) {}
119  flag_type get_type() { return (flag_type)(t.type); }
120 
123  kmp_info_t *get_waiter(kmp_uint32 i) {
124  KMP_DEBUG_ASSERT(i < num_waiting_threads);
125  return waiting_threads[i];
126  }
128  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
131  void set_waiter(kmp_info_t *thr) {
132  waiting_threads[0] = thr;
133  num_waiting_threads = 1;
134  }
135  enum barrier_type get_bt() { return bs_last_barrier; }
136 };
137 
139 template <typename PtrType, flag_type FlagType, bool Sleepable>
140 class kmp_flag_native : public kmp_flag<FlagType> {
141 protected:
142  volatile PtrType *loc;
143  PtrType checker;
144  typedef flag_traits<FlagType> traits_type;
145 
146 public:
147  typedef PtrType flag_t;
148  kmp_flag_native(volatile PtrType *p) : kmp_flag<FlagType>(), loc(p) {}
149  kmp_flag_native(volatile PtrType *p, kmp_info_t *thr)
150  : kmp_flag<FlagType>(1), loc(p) {
151  this->waiting_threads[0] = thr;
152  }
153  kmp_flag_native(volatile PtrType *p, PtrType c)
154  : kmp_flag<FlagType>(), loc(p), checker(c) {}
155  kmp_flag_native(volatile PtrType *p, PtrType c, std::atomic<bool> *sloc)
156  : kmp_flag<FlagType>(sloc), loc(p), checker(c) {}
157  virtual ~kmp_flag_native() {}
158  void *operator new(size_t size) { return __kmp_allocate(size); }
159  void operator delete(void *p) { __kmp_free(p); }
160  volatile PtrType *get() { return loc; }
161  void *get_void_p() { return RCAST(void *, CCAST(PtrType *, loc)); }
162  void set(volatile PtrType *new_loc) { loc = new_loc; }
163  PtrType load() { return *loc; }
164  void store(PtrType val) { *loc = val; }
166  virtual bool done_check() {
167  if (Sleepable && !(this->sleepLoc))
168  return (traits_type::tcr(*(this->get())) & ~KMP_BARRIER_SLEEP_STATE) ==
169  checker;
170  else
171  return traits_type::tcr(*(this->get())) == checker;
172  }
175  virtual bool done_check_val(PtrType old_loc) { return old_loc == checker; }
181  virtual bool notdone_check() {
182  return traits_type::tcr(*(this->get())) != checker;
183  }
187  (void)traits_type::test_then_add4((volatile PtrType *)this->get());
188  }
192  PtrType set_sleeping() {
193  if (this->sleepLoc) {
194  this->sleepLoc->store(true);
195  return *(this->get());
196  }
197  return traits_type::test_then_or((volatile PtrType *)this->get(),
198  KMP_BARRIER_SLEEP_STATE);
199  }
203  void unset_sleeping() {
204  if (this->sleepLoc) {
205  this->sleepLoc->store(false);
206  return;
207  }
208  traits_type::test_then_and((volatile PtrType *)this->get(),
209  ~KMP_BARRIER_SLEEP_STATE);
210  }
213  bool is_sleeping_val(PtrType old_loc) {
214  if (this->sleepLoc)
215  return this->sleepLoc->load();
216  return old_loc & KMP_BARRIER_SLEEP_STATE;
217  }
219  bool is_sleeping() {
220  if (this->sleepLoc)
221  return this->sleepLoc->load();
222  return is_sleeping_val(*(this->get()));
223  }
224  bool is_any_sleeping() {
225  if (this->sleepLoc)
226  return this->sleepLoc->load();
227  return is_sleeping_val(*(this->get()));
228  }
229  kmp_uint8 *get_stolen() { return NULL; }
230 };
231 
233 template <typename PtrType, flag_type FlagType, bool Sleepable>
234 class kmp_flag_atomic : public kmp_flag<FlagType> {
235 protected:
236  std::atomic<PtrType> *loc;
237  PtrType checker;
238 public:
239  typedef flag_traits<FlagType> traits_type;
240  typedef PtrType flag_t;
241  kmp_flag_atomic(std::atomic<PtrType> *p) : kmp_flag<FlagType>(), loc(p) {}
242  kmp_flag_atomic(std::atomic<PtrType> *p, kmp_info_t *thr)
243  : kmp_flag<FlagType>(1), loc(p) {
244  this->waiting_threads[0] = thr;
245  }
246  kmp_flag_atomic(std::atomic<PtrType> *p, PtrType c)
247  : kmp_flag<FlagType>(), loc(p), checker(c) {}
248  kmp_flag_atomic(std::atomic<PtrType> *p, PtrType c, std::atomic<bool> *sloc)
249  : kmp_flag<FlagType>(sloc), loc(p), checker(c) {}
251  std::atomic<PtrType> *get() { return loc; }
253  void *get_void_p() { return RCAST(void *, loc); }
255  void set(std::atomic<PtrType> *new_loc) { loc = new_loc; }
257  PtrType load() { return loc->load(std::memory_order_acquire); }
259  void store(PtrType val) { loc->store(val, std::memory_order_release); }
261  bool done_check() {
262  if (Sleepable && !(this->sleepLoc))
263  return (this->load() & ~KMP_BARRIER_SLEEP_STATE) == checker;
264  else
265  return this->load() == checker;
266  }
269  bool done_check_val(PtrType old_loc) { return old_loc == checker; }
275  bool notdone_check() { return this->load() != checker; }
278  void internal_release() { KMP_ATOMIC_ADD(this->get(), 4); }
282  PtrType set_sleeping() {
283  if (this->sleepLoc) {
284  this->sleepLoc->store(true);
285  return *(this->get());
286  }
287  return KMP_ATOMIC_OR(this->get(), KMP_BARRIER_SLEEP_STATE);
288  }
292  void unset_sleeping() {
293  if (this->sleepLoc) {
294  this->sleepLoc->store(false);
295  return;
296  }
297  KMP_ATOMIC_AND(this->get(), ~KMP_BARRIER_SLEEP_STATE);
298  }
301  bool is_sleeping_val(PtrType old_loc) {
302  if (this->sleepLoc)
303  return this->sleepLoc->load();
304  return old_loc & KMP_BARRIER_SLEEP_STATE;
305  }
307  bool is_sleeping() {
308  if (this->sleepLoc)
309  return this->sleepLoc->load();
310  return is_sleeping_val(this->load());
311  }
312  bool is_any_sleeping() {
313  if (this->sleepLoc)
314  return this->sleepLoc->load();
315  return is_sleeping_val(this->load());
316  }
317  kmp_uint8 *get_stolen() { return NULL; }
318 };
319 
320 #if OMPT_SUPPORT
321 OMPT_NOINLINE
322 static void __ompt_implicit_task_end(kmp_info_t *this_thr,
323  ompt_state_t ompt_state,
324  ompt_data_t *tId) {
325  int ds_tid = this_thr->th.th_info.ds.ds_tid;
326  if (ompt_state == ompt_state_wait_barrier_implicit) {
327  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
328 #if OMPT_OPTIONAL
329  void *codeptr = NULL;
330  if (ompt_enabled.ompt_callback_sync_region_wait) {
331  ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
332  ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
333  codeptr);
334  }
335  if (ompt_enabled.ompt_callback_sync_region) {
336  ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
337  ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
338  codeptr);
339  }
340 #endif
341  if (!KMP_MASTER_TID(ds_tid)) {
342  if (ompt_enabled.ompt_callback_implicit_task) {
343  int flags = this_thr->th.ompt_thread_info.parallel_flags;
344  flags = (flags & ompt_parallel_league) ? ompt_task_initial
345  : ompt_task_implicit;
346  ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
347  ompt_scope_end, NULL, tId, 0, ds_tid, flags);
348  }
349  // return to idle state
350  this_thr->th.ompt_thread_info.state = ompt_state_idle;
351  } else {
352  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
353  }
354  }
355 }
356 #endif
357 
358 /* Spin wait loop that first does pause/yield, then sleep. A thread that calls
359  __kmp_wait_* must make certain that another thread calls __kmp_release
360  to wake it back up to prevent deadlocks!
361 
362  NOTE: We may not belong to a team at this point. */
363 template <class C, bool final_spin, bool Cancellable = false,
364  bool Sleepable = true>
365 static inline bool
366 __kmp_wait_template(kmp_info_t *this_thr,
367  C *flag USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
368 #if USE_ITT_BUILD && USE_ITT_NOTIFY
369  volatile void *spin = flag->get();
370 #endif
371  kmp_uint32 spins;
372  int th_gtid;
373  int tasks_completed = FALSE;
374 #if !KMP_USE_MONITOR
375  kmp_uint64 poll_count;
376  kmp_uint64 hibernate_goal;
377 #else
378  kmp_uint32 hibernate;
379 #endif
380  kmp_uint64 time;
381 
382  KMP_FSYNC_SPIN_INIT(spin, NULL);
383  if (flag->done_check()) {
384  KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
385  return false;
386  }
387  th_gtid = this_thr->th.th_info.ds.ds_gtid;
388  if (Cancellable) {
389  kmp_team_t *team = this_thr->th.th_team;
390  if (team && team->t.t_cancel_request == cancel_parallel)
391  return true;
392  }
393 #if KMP_OS_UNIX
394  if (final_spin)
395  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
396 #endif
397  KA_TRACE(20,
398  ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
399 #if KMP_STATS_ENABLED
400  stats_state_e thread_state = KMP_GET_THREAD_STATE();
401 #endif
402 
403 /* OMPT Behavior:
404 THIS function is called from
405  __kmp_barrier (2 times) (implicit or explicit barrier in parallel regions)
406  these have join / fork behavior
407 
408  In these cases, we don't change the state or trigger events in THIS
409 function.
410  Events are triggered in the calling code (__kmp_barrier):
411 
412  state := ompt_state_overhead
413  barrier-begin
414  barrier-wait-begin
415  state := ompt_state_wait_barrier
416  call join-barrier-implementation (finally arrive here)
417  {}
418  call fork-barrier-implementation (finally arrive here)
419  {}
420  state := ompt_state_overhead
421  barrier-wait-end
422  barrier-end
423  state := ompt_state_work_parallel
424 
425 
426  __kmp_fork_barrier (after thread creation, before executing implicit task)
427  call fork-barrier-implementation (finally arrive here)
428  {} // worker arrive here with state = ompt_state_idle
429 
430 
431  __kmp_join_barrier (implicit barrier at end of parallel region)
432  state := ompt_state_barrier_implicit
433  barrier-begin
434  barrier-wait-begin
435  call join-barrier-implementation (finally arrive here
436 final_spin=FALSE)
437  {
438  }
439  __kmp_fork_barrier (implicit barrier at end of parallel region)
440  call fork-barrier-implementation (finally arrive here final_spin=TRUE)
441 
442  Worker after task-team is finished:
443  barrier-wait-end
444  barrier-end
445  implicit-task-end
446  idle-begin
447  state := ompt_state_idle
448 
449  Before leaving, if state = ompt_state_idle
450  idle-end
451  state := ompt_state_overhead
452 */
453 #if OMPT_SUPPORT
454  ompt_state_t ompt_entry_state;
455  ompt_data_t *tId;
456  if (ompt_enabled.enabled) {
457  ompt_entry_state = this_thr->th.ompt_thread_info.state;
458  if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit ||
459  KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) {
460  ompt_lw_taskteam_t *team = NULL;
461  if (this_thr->th.th_team)
462  team = this_thr->th.th_team->t.ompt_serialized_team_info;
463  if (team) {
464  tId = &(team->ompt_task_info.task_data);
465  } else {
466  tId = OMPT_CUR_TASK_DATA(this_thr);
467  }
468  } else {
469  tId = &(this_thr->th.ompt_thread_info.task_data);
470  }
471  if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec ||
472  this_thr->th.th_task_team == NULL)) {
473  // implicit task is done. Either no taskqueue, or task-team finished
474  __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
475  }
476  }
477 #endif
478 
479  KMP_INIT_YIELD(spins); // Setup for waiting
480  KMP_INIT_BACKOFF(time);
481 
482  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ||
483  __kmp_pause_status == kmp_soft_paused) {
484 #if KMP_USE_MONITOR
485 // The worker threads cannot rely on the team struct existing at this point.
486 // Use the bt values cached in the thread struct instead.
487 #ifdef KMP_ADJUST_BLOCKTIME
488  if (__kmp_pause_status == kmp_soft_paused ||
489  (__kmp_zero_bt && !this_thr->th.th_team_bt_set))
490  // Force immediate suspend if not set by user and more threads than
491  // available procs
492  hibernate = 0;
493  else
494  hibernate = this_thr->th.th_team_bt_intervals;
495 #else
496  hibernate = this_thr->th.th_team_bt_intervals;
497 #endif /* KMP_ADJUST_BLOCKTIME */
498 
499  /* If the blocktime is nonzero, we want to make sure that we spin wait for
500  the entirety of the specified #intervals, plus up to one interval more.
501  This increment make certain that this thread doesn't go to sleep too
502  soon. */
503  if (hibernate != 0)
504  hibernate++;
505 
506  // Add in the current time value.
507  hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
508  KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
509  th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
510  hibernate - __kmp_global.g.g_time.dt.t_value));
511 #else
512  if (__kmp_pause_status == kmp_soft_paused) {
513  // Force immediate suspend
514  hibernate_goal = KMP_NOW();
515  } else
516  hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals;
517  poll_count = 0;
518  (void)poll_count;
519 #endif // KMP_USE_MONITOR
520  }
521 
522  KMP_MB();
523 
524  // Main wait spin loop
525  while (flag->notdone_check()) {
526  kmp_task_team_t *task_team = NULL;
527  if (__kmp_tasking_mode != tskm_immediate_exec) {
528  task_team = this_thr->th.th_task_team;
529  /* If the thread's task team pointer is NULL, it means one of 3 things:
530  1) A newly-created thread is first being released by
531  __kmp_fork_barrier(), and its task team has not been set up yet.
532  2) All tasks have been executed to completion.
533  3) Tasking is off for this region. This could be because we are in a
534  serialized region (perhaps the outer one), or else tasking was manually
535  disabled (KMP_TASKING=0). */
536  if (task_team != NULL) {
537  if (TCR_SYNC_4(task_team->tt.tt_active)) {
538  if (KMP_TASKING_ENABLED(task_team)) {
539  flag->execute_tasks(
540  this_thr, th_gtid, final_spin,
541  &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
542  } else
543  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
544  } else {
545  KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
546 #if OMPT_SUPPORT
547  // task-team is done now, other cases should be catched above
548  if (final_spin && ompt_enabled.enabled)
549  __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
550 #endif
551  this_thr->th.th_task_team = NULL;
552  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
553  }
554  } else {
555  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
556  } // if
557  } // if
558 
559  KMP_FSYNC_SPIN_PREPARE(CCAST(void *, spin));
560  if (TCR_4(__kmp_global.g.g_done)) {
561  if (__kmp_global.g.g_abort)
562  __kmp_abort_thread();
563  break;
564  }
565 
566  // If we are oversubscribed, or have waited a bit (and
567  // KMP_LIBRARY=throughput), then yield
568  KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
569 
570 #if KMP_STATS_ENABLED
571  // Check if thread has been signalled to idle state
572  // This indicates that the logical "join-barrier" has finished
573  if (this_thr->th.th_stats->isIdle() &&
574  KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) {
575  KMP_SET_THREAD_STATE(IDLE);
576  KMP_PUSH_PARTITIONED_TIMER(OMP_idle);
577  }
578 #endif
579  // Check if the barrier surrounding this wait loop has been cancelled
580  if (Cancellable) {
581  kmp_team_t *team = this_thr->th.th_team;
582  if (team && team->t.t_cancel_request == cancel_parallel)
583  break;
584  }
585 
586  // For hidden helper thread, if task_team is nullptr, it means the main
587  // thread has not released the barrier. We cannot wait here because once the
588  // main thread releases all children barriers, all hidden helper threads are
589  // still sleeping. This leads to a problem that following configuration,
590  // such as task team sync, will not be performed such that this thread does
591  // not have task team. Usually it is not bad. However, a corner case is,
592  // when the first task encountered is an untied task, the check in
593  // __kmp_task_alloc will crash because it uses the task team pointer without
594  // checking whether it is nullptr. It is probably under some kind of
595  // assumption.
596  if (task_team && KMP_HIDDEN_HELPER_WORKER_THREAD(th_gtid) &&
597  !TCR_4(__kmp_hidden_helper_team_done)) {
598  // If there is still hidden helper tasks to be executed, the hidden helper
599  // thread will not enter a waiting status.
600  if (KMP_ATOMIC_LD_ACQ(&__kmp_unexecuted_hidden_helper_tasks) == 0) {
601  __kmp_hidden_helper_worker_thread_wait();
602  }
603  continue;
604  }
605 
606  // Don't suspend if KMP_BLOCKTIME is set to "infinite"
607  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
608  __kmp_pause_status != kmp_soft_paused)
609  continue;
610 
611  // Don't suspend if there is a likelihood of new tasks being spawned.
612  if (task_team != NULL && TCR_4(task_team->tt.tt_found_tasks) &&
613  !__kmp_wpolicy_passive)
614  continue;
615 
616 #if KMP_USE_MONITOR
617  // If we have waited a bit more, fall asleep
618  if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
619  continue;
620 #else
621  if (KMP_BLOCKING(hibernate_goal, poll_count++))
622  continue;
623 #endif
624  // Don't suspend if wait loop designated non-sleepable
625  // in template parameters
626  if (!Sleepable)
627  continue;
628 
629 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
630  if (__kmp_mwait_enabled || __kmp_umwait_enabled) {
631  KF_TRACE(50, ("__kmp_wait_sleep: T#%d using monitor/mwait\n", th_gtid));
632  flag->mwait(th_gtid);
633  } else {
634 #endif
635  KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
636 #if KMP_OS_UNIX
637  if (final_spin)
638  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
639 #endif
640  flag->suspend(th_gtid);
641 #if KMP_OS_UNIX
642  if (final_spin)
643  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
644 #endif
645 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
646  }
647 #endif
648 
649  if (TCR_4(__kmp_global.g.g_done)) {
650  if (__kmp_global.g.g_abort)
651  __kmp_abort_thread();
652  break;
653  } else if (__kmp_tasking_mode != tskm_immediate_exec &&
654  this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
655  this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
656  }
657  // TODO: If thread is done with work and times out, disband/free
658  }
659 
660 #if OMPT_SUPPORT
661  ompt_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state;
662  if (ompt_enabled.enabled && ompt_exit_state != ompt_state_undefined) {
663 #if OMPT_OPTIONAL
664  if (final_spin) {
665  __ompt_implicit_task_end(this_thr, ompt_exit_state, tId);
666  ompt_exit_state = this_thr->th.ompt_thread_info.state;
667  }
668 #endif
669  if (ompt_exit_state == ompt_state_idle) {
670  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
671  }
672  }
673 #endif
674 #if KMP_STATS_ENABLED
675  // If we were put into idle state, pop that off the state stack
676  if (KMP_GET_THREAD_STATE() == IDLE) {
677  KMP_POP_PARTITIONED_TIMER();
678  KMP_SET_THREAD_STATE(thread_state);
679  this_thr->th.th_stats->resetIdleFlag();
680  }
681 #endif
682 
683 #if KMP_OS_UNIX
684  if (final_spin)
685  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
686 #endif
687  KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
688  if (Cancellable) {
689  kmp_team_t *team = this_thr->th.th_team;
690  if (team && team->t.t_cancel_request == cancel_parallel) {
691  if (tasks_completed) {
692  // undo the previous decrement of unfinished_threads so that the
693  // thread can decrement at the join barrier with no problem
694  kmp_task_team_t *task_team = this_thr->th.th_task_team;
695  std::atomic<kmp_int32> *unfinished_threads =
696  &(task_team->tt.tt_unfinished_threads);
697  KMP_ATOMIC_INC(unfinished_threads);
698  }
699  return true;
700  }
701  }
702  return false;
703 }
704 
705 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
706 // Set up a monitor on the flag variable causing the calling thread to wait in
707 // a less active state until the flag variable is modified.
708 template <class C>
709 static inline void __kmp_mwait_template(int th_gtid, C *flag) {
710  KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_mwait);
711  kmp_info_t *th = __kmp_threads[th_gtid];
712 
713  KF_TRACE(30, ("__kmp_mwait_template: T#%d enter for flag = %p\n", th_gtid,
714  flag->get()));
715 
716  // User-level mwait is available
717  KMP_DEBUG_ASSERT(__kmp_mwait_enabled || __kmp_umwait_enabled);
718 
719  __kmp_suspend_initialize_thread(th);
720  __kmp_lock_suspend_mx(th);
721 
722  volatile void *spin = flag->get();
723  void *cacheline = (void *)(kmp_uintptr_t(spin) & ~(CACHE_LINE - 1));
724 
725  if (!flag->done_check()) {
726  // Mark thread as no longer active
727  th->th.th_active = FALSE;
728  if (th->th.th_active_in_pool) {
729  th->th.th_active_in_pool = FALSE;
730  KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
731  KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
732  }
733  flag->set_sleeping();
734  KF_TRACE(50, ("__kmp_mwait_template: T#%d calling monitor\n", th_gtid));
735 #if KMP_HAVE_UMWAIT
736  if (__kmp_umwait_enabled) {
737  __kmp_umonitor(cacheline);
738  }
739 #elif KMP_HAVE_MWAIT
740  if (__kmp_mwait_enabled) {
741  __kmp_mm_monitor(cacheline, 0, 0);
742  }
743 #endif
744  // To avoid a race, check flag between 'monitor' and 'mwait'. A write to
745  // the address could happen after the last time we checked and before
746  // monitoring started, in which case monitor can't detect the change.
747  if (flag->done_check())
748  flag->unset_sleeping();
749  else {
750  // if flag changes here, wake-up happens immediately
751  TCW_PTR(th->th.th_sleep_loc, (void *)flag);
752  th->th.th_sleep_loc_type = flag->get_type();
753  __kmp_unlock_suspend_mx(th);
754  KF_TRACE(50, ("__kmp_mwait_template: T#%d calling mwait\n", th_gtid));
755 #if KMP_HAVE_UMWAIT
756  if (__kmp_umwait_enabled) {
757  __kmp_umwait(1, 100); // to do: enable ctrl via hints, backoff counter
758  }
759 #elif KMP_HAVE_MWAIT
760  if (__kmp_mwait_enabled) {
761  __kmp_mm_mwait(0, __kmp_mwait_hints);
762  }
763 #endif
764  KF_TRACE(50, ("__kmp_mwait_template: T#%d mwait done\n", th_gtid));
765  __kmp_lock_suspend_mx(th);
766  // Clean up sleep info; doesn't matter how/why this thread stopped waiting
767  if (flag->is_sleeping())
768  flag->unset_sleeping();
769  TCW_PTR(th->th.th_sleep_loc, NULL);
770  th->th.th_sleep_loc_type = flag_unset;
771  }
772  // Mark thread as active again
773  th->th.th_active = TRUE;
774  if (TCR_4(th->th.th_in_pool)) {
775  KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
776  th->th.th_active_in_pool = TRUE;
777  }
778  } // Drop out to main wait loop to check flag, handle tasks, etc.
779  __kmp_unlock_suspend_mx(th);
780  KF_TRACE(30, ("__kmp_mwait_template: T#%d exit\n", th_gtid));
781 }
782 #endif // KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
783 
784 /* Release any threads specified as waiting on the flag by releasing the flag
785  and resume the waiting thread if indicated by the sleep bit(s). A thread that
786  calls __kmp_wait_template must call this function to wake up the potentially
787  sleeping thread and prevent deadlocks! */
788 template <class C> static inline void __kmp_release_template(C *flag) {
789 #ifdef KMP_DEBUG
790  int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
791 #endif
792  KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get()));
793  KMP_DEBUG_ASSERT(flag->get());
794  KMP_FSYNC_RELEASING(flag->get_void_p());
795 
796  flag->internal_release();
797 
798  KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(),
799  flag->load()));
800 
801  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
802  // Only need to check sleep stuff if infinite block time not set.
803  // Are *any* threads waiting on flag sleeping?
804  if (flag->is_any_sleeping()) {
805  for (unsigned int i = 0; i < flag->get_num_waiters(); ++i) {
806  // if sleeping waiter exists at i, sets current_waiter to i inside flag
807  kmp_info_t *waiter = flag->get_waiter(i);
808  if (waiter) {
809  int wait_gtid = waiter->th.th_info.ds.ds_gtid;
810  // Wake up thread if needed
811  KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep "
812  "flag(%p) set\n",
813  gtid, wait_gtid, flag->get()));
814  flag->resume(wait_gtid); // unsets flag's current_waiter when done
815  }
816  }
817  }
818  }
819 }
820 
821 template <bool Cancellable, bool Sleepable>
822 class kmp_flag_32 : public kmp_flag_atomic<kmp_uint32, flag32, Sleepable> {
823 public:
824  kmp_flag_32(std::atomic<kmp_uint32> *p)
826  kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_info_t *thr)
828  kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c)
830  void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
831 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
832  void mwait(int th_gtid) { __kmp_mwait_32(th_gtid, this); }
833 #endif
834  void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); }
835  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
836  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
837  kmp_int32 is_constrained) {
838  return __kmp_execute_tasks_32(
839  this_thr, gtid, this, final_spin,
840  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
841  }
842  bool wait(kmp_info_t *this_thr,
843  int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
844  if (final_spin)
845  return __kmp_wait_template<kmp_flag_32, TRUE, Cancellable, Sleepable>(
846  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
847  else
848  return __kmp_wait_template<kmp_flag_32, FALSE, Cancellable, Sleepable>(
849  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
850  }
851  void release() { __kmp_release_template(this); }
852  flag_type get_ptr_type() { return flag32; }
853 };
854 
855 template <bool Cancellable, bool Sleepable>
856 class kmp_flag_64 : public kmp_flag_native<kmp_uint64, flag64, Sleepable> {
857 public:
858  kmp_flag_64(volatile kmp_uint64 *p)
860  kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr)
862  kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c)
864  kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c, std::atomic<bool> *loc)
866  void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
867 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
868  void mwait(int th_gtid) { __kmp_mwait_64(th_gtid, this); }
869 #endif
870  void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); }
871  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
872  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
873  kmp_int32 is_constrained) {
874  return __kmp_execute_tasks_64(
875  this_thr, gtid, this, final_spin,
876  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
877  }
878  bool wait(kmp_info_t *this_thr,
879  int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
880  if (final_spin)
881  return __kmp_wait_template<kmp_flag_64, TRUE, Cancellable, Sleepable>(
882  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
883  else
884  return __kmp_wait_template<kmp_flag_64, FALSE, Cancellable, Sleepable>(
885  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
886  }
887  void release() { __kmp_release_template(this); }
888  flag_type get_ptr_type() { return flag64; }
889 };
890 
891 template <bool Cancellable, bool Sleepable>
892 class kmp_atomic_flag_64
893  : public kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable> {
894 public:
895  kmp_atomic_flag_64(std::atomic<kmp_uint64> *p)
897  kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_info_t *thr)
899  kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_uint64 c)
901  kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_uint64 c,
902  std::atomic<bool> *loc)
904  void suspend(int th_gtid) { __kmp_atomic_suspend_64(th_gtid, this); }
905  void mwait(int th_gtid) { __kmp_atomic_mwait_64(th_gtid, this); }
906  void resume(int th_gtid) { __kmp_atomic_resume_64(th_gtid, this); }
907  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
908  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
909  kmp_int32 is_constrained) {
910  return __kmp_atomic_execute_tasks_64(
911  this_thr, gtid, this, final_spin,
912  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
913  }
914  bool wait(kmp_info_t *this_thr,
915  int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
916  if (final_spin)
917  return __kmp_wait_template<kmp_atomic_flag_64, TRUE, Cancellable,
918  Sleepable>(
919  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
920  else
921  return __kmp_wait_template<kmp_atomic_flag_64, FALSE, Cancellable,
922  Sleepable>(
923  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
924  }
925  void release() { __kmp_release_template(this); }
926  flag_type get_ptr_type() { return atomic_flag64; }
927 };
928 
929 // Hierarchical 64-bit on-core barrier instantiation
930 class kmp_flag_oncore : public kmp_flag_native<kmp_uint64, flag_oncore, false> {
931  kmp_uint32 offset;
932  bool flag_switch;
933  enum barrier_type bt;
934  kmp_info_t *this_thr;
935 #if USE_ITT_BUILD
936  void *itt_sync_obj;
937 #endif
938  unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) {
939  return (RCAST(unsigned char *, CCAST(kmp_uint64 *, loc)))[offset];
940  }
941 
942 public:
943  kmp_flag_oncore(volatile kmp_uint64 *p)
944  : kmp_flag_native<kmp_uint64, flag_oncore, false>(p), flag_switch(false) {
945  }
946  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)
948  flag_switch(false),
949  bt(bs_last_barrier) USE_ITT_BUILD_ARG(itt_sync_obj(nullptr)) {}
950  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx,
951  enum barrier_type bar_t,
952  kmp_info_t *thr USE_ITT_BUILD_ARG(void *itt))
954  flag_switch(false), bt(bar_t),
955  this_thr(thr) USE_ITT_BUILD_ARG(itt_sync_obj(itt)) {}
956  virtual ~kmp_flag_oncore() override {}
957  void *operator new(size_t size) { return __kmp_allocate(size); }
958  void operator delete(void *p) { __kmp_free(p); }
959  bool done_check_val(kmp_uint64 old_loc) override {
960  return byteref(&old_loc, offset) == checker;
961  }
962  bool done_check() override { return done_check_val(*get()); }
963  bool notdone_check() override {
964  // Calculate flag_switch
965  if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
966  flag_switch = true;
967  if (byteref(get(), offset) != 1 && !flag_switch)
968  return true;
969  else if (flag_switch) {
970  this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
971  kmp_flag_64<> flag(&this_thr->th.th_bar[bt].bb.b_go,
972  (kmp_uint64)KMP_BARRIER_STATE_BUMP);
973  __kmp_wait_64(this_thr, &flag, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
974  }
975  return false;
976  }
977  void internal_release() {
978  // Other threads can write their own bytes simultaneously.
979  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
980  byteref(get(), offset) = 1;
981  } else {
982  kmp_uint64 mask = 0;
983  byteref(&mask, offset) = 1;
984  KMP_TEST_THEN_OR64(get(), mask);
985  }
986  }
987  void wait(kmp_info_t *this_thr, int final_spin) {
988  if (final_spin)
989  __kmp_wait_template<kmp_flag_oncore, TRUE>(
990  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
991  else
992  __kmp_wait_template<kmp_flag_oncore, FALSE>(
993  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
994  }
995  void release() { __kmp_release_template(this); }
996  void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); }
997 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
998  void mwait(int th_gtid) { __kmp_mwait_oncore(th_gtid, this); }
999 #endif
1000  void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); }
1001  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
1002  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
1003  kmp_int32 is_constrained) {
1004 #if OMPD_SUPPORT
1005  int ret = __kmp_execute_tasks_oncore(
1006  this_thr, gtid, this, final_spin,
1007  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1008  if (ompd_state & OMPD_ENABLE_BP)
1009  ompd_bp_task_end();
1010  return ret;
1011 #else
1012  return __kmp_execute_tasks_oncore(
1013  this_thr, gtid, this, final_spin,
1014  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1015 #endif
1016  }
1017  enum barrier_type get_bt() { return bt; }
1018  flag_type get_ptr_type() { return flag_oncore; }
1019 };
1020 
1021 static inline void __kmp_null_resume_wrapper(kmp_info_t *thr) {
1022  int gtid = __kmp_gtid_from_thread(thr);
1023  void *flag = CCAST(void *, thr->th.th_sleep_loc);
1024  flag_type type = thr->th.th_sleep_loc_type;
1025  if (!flag)
1026  return;
1027  // Attempt to wake up a thread: examine its type and call appropriate template
1028  switch (type) {
1029  case flag32:
1030  __kmp_resume_32(gtid, RCAST(kmp_flag_32<> *, flag));
1031  break;
1032  case flag64:
1033  __kmp_resume_64(gtid, RCAST(kmp_flag_64<> *, flag));
1034  break;
1035  case atomic_flag64:
1036  __kmp_atomic_resume_64(gtid, RCAST(kmp_atomic_flag_64<> *, flag));
1037  break;
1038  case flag_oncore:
1039  __kmp_resume_oncore(gtid, RCAST(kmp_flag_oncore *, flag));
1040  break;
1041 #ifdef KMP_DEBUG
1042  case flag_unset:
1043  KF_TRACE(100, ("__kmp_null_resume_wrapper: flag type %d is unset\n", type));
1044  break;
1045  default:
1046  KF_TRACE(100, ("__kmp_null_resume_wrapper: flag type %d does not match any "
1047  "known flag type\n",
1048  type));
1049 #endif
1050  }
1051 }
1052 
1057 #endif // KMP_WAIT_RELEASE_H
virtual bool done_check()
PtrType set_sleeping()
flag_type get_type()
void set_waiter(kmp_info_t *thr)
virtual bool notdone_check()
bool is_sleeping_val(PtrType old_loc)
bool done_check_val(PtrType old_loc)
void store(PtrType val)
kmp_uint32 get_num_waiters()
kmp_info_t * get_waiter(kmp_uint32 i)
std::atomic< PtrType > * loc
kmp_uint32 num_waiting_threads
PtrType set_sleeping()
flag_properties t
virtual bool done_check_val(PtrType old_loc)
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:63
bool is_sleeping_val(PtrType old_loc)