LLVM OpenMP* Runtime Library
kmp_wait_release.h
1 /*
2  * kmp_wait_release.h -- Wait/Release implementation
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef KMP_WAIT_RELEASE_H
14 #define KMP_WAIT_RELEASE_H
15 
16 #include "kmp.h"
17 #include "kmp_itt.h"
18 #include "kmp_stats.h"
19 #if OMPT_SUPPORT
20 #include "ompt-specific.h"
21 #endif
22 
36 struct flag_properties {
37  unsigned int type : 16;
38  unsigned int reserved : 16;
39 };
40 
41 template <enum flag_type FlagType> struct flag_traits {};
42 
43 template <> struct flag_traits<flag32> {
44  typedef kmp_uint32 flag_t;
45  static const flag_type t = flag32;
46  static inline flag_t tcr(flag_t f) { return TCR_4(f); }
47  static inline flag_t test_then_add4(volatile flag_t *f) {
48  return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f));
49  }
50  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
51  return KMP_TEST_THEN_OR32(f, v);
52  }
53  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
54  return KMP_TEST_THEN_AND32(f, v);
55  }
56 };
57 
58 template <> struct flag_traits<atomic_flag64> {
59  typedef kmp_uint64 flag_t;
60  static const flag_type t = atomic_flag64;
61  static inline flag_t tcr(flag_t f) { return TCR_8(f); }
62  static inline flag_t test_then_add4(volatile flag_t *f) {
63  return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
64  }
65  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
66  return KMP_TEST_THEN_OR64(f, v);
67  }
68  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
69  return KMP_TEST_THEN_AND64(f, v);
70  }
71 };
72 
73 template <> struct flag_traits<flag64> {
74  typedef kmp_uint64 flag_t;
75  static const flag_type t = flag64;
76  static inline flag_t tcr(flag_t f) { return TCR_8(f); }
77  static inline flag_t test_then_add4(volatile flag_t *f) {
78  return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
79  }
80  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
81  return KMP_TEST_THEN_OR64(f, v);
82  }
83  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
84  return KMP_TEST_THEN_AND64(f, v);
85  }
86 };
87 
88 template <> struct flag_traits<flag_oncore> {
89  typedef kmp_uint64 flag_t;
90  static const flag_type t = flag_oncore;
91  static inline flag_t tcr(flag_t f) { return TCR_8(f); }
92  static inline flag_t test_then_add4(volatile flag_t *f) {
93  return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
94  }
95  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
96  return KMP_TEST_THEN_OR64(f, v);
97  }
98  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
99  return KMP_TEST_THEN_AND64(f, v);
100  }
101 };
102 
104 template <flag_type FlagType> class kmp_flag {
105 protected:
106  flag_properties t;
107  kmp_info_t *waiting_threads[1];
108  kmp_uint32 num_waiting_threads;
109  std::atomic<bool> *sleepLoc;
110 
111 public:
112  typedef flag_traits<FlagType> traits_type;
113  kmp_flag() : t({FlagType, 0U}), num_waiting_threads(0), sleepLoc(nullptr) {}
114  kmp_flag(int nwaiters)
115  : t({FlagType, 0U}), num_waiting_threads(nwaiters), sleepLoc(nullptr) {}
116  kmp_flag(std::atomic<bool> *sloc)
117  : t({FlagType, 0U}), num_waiting_threads(0), sleepLoc(sloc) {}
119  flag_type get_type() { return (flag_type)(t.type); }
120 
123  kmp_info_t *get_waiter(kmp_uint32 i) {
124  KMP_DEBUG_ASSERT(i < num_waiting_threads);
125  return waiting_threads[i];
126  }
128  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
131  void set_waiter(kmp_info_t *thr) {
132  waiting_threads[0] = thr;
134  }
135  enum barrier_type get_bt() { return bs_last_barrier; }
136 };
137 
139 template <typename PtrType, flag_type FlagType, bool Sleepable>
140 class kmp_flag_native : public kmp_flag<FlagType> {
141 protected:
142  volatile PtrType *loc;
143  PtrType checker;
144  typedef flag_traits<FlagType> traits_type;
145 
146 public:
147  typedef PtrType flag_t;
148  kmp_flag_native(volatile PtrType *p) : kmp_flag<FlagType>(), loc(p) {}
149  kmp_flag_native(volatile PtrType *p, kmp_info_t *thr)
150  : kmp_flag<FlagType>(1), loc(p) {
151  this->waiting_threads[0] = thr;
152  }
153  kmp_flag_native(volatile PtrType *p, PtrType c)
154  : kmp_flag<FlagType>(), loc(p), checker(c) {}
155  kmp_flag_native(volatile PtrType *p, PtrType c, std::atomic<bool> *sloc)
156  : kmp_flag<FlagType>(sloc), loc(p), checker(c) {}
157  volatile PtrType *get() { return loc; }
158  void *get_void_p() { return RCAST(void *, CCAST(PtrType *, loc)); }
159  void set(volatile PtrType *new_loc) { loc = new_loc; }
160  PtrType load() { return *loc; }
161  void store(PtrType val) { *loc = val; }
163  virtual bool done_check() {
164  if (Sleepable && !(this->sleepLoc))
165  return (traits_type::tcr(*(this->get())) & ~KMP_BARRIER_SLEEP_STATE) ==
166  checker;
167  else
168  return traits_type::tcr(*(this->get())) == checker;
169  }
172  virtual bool done_check_val(PtrType old_loc) { return old_loc == checker; }
178  virtual bool notdone_check() {
179  return traits_type::tcr(*(this->get())) != checker;
180  }
184  (void)traits_type::test_then_add4((volatile PtrType *)this->get());
185  }
189  PtrType set_sleeping() {
190  if (this->sleepLoc) {
191  this->sleepLoc->store(true);
192  return *(this->get());
193  }
194  return traits_type::test_then_or((volatile PtrType *)this->get(),
195  KMP_BARRIER_SLEEP_STATE);
196  }
200  void unset_sleeping() {
201  if (this->sleepLoc) {
202  this->sleepLoc->store(false);
203  return;
204  }
205  traits_type::test_then_and((volatile PtrType *)this->get(),
206  ~KMP_BARRIER_SLEEP_STATE);
207  }
210  bool is_sleeping_val(PtrType old_loc) {
211  if (this->sleepLoc)
212  return this->sleepLoc->load();
213  return old_loc & KMP_BARRIER_SLEEP_STATE;
214  }
216  bool is_sleeping() {
217  if (this->sleepLoc)
218  return this->sleepLoc->load();
219  return is_sleeping_val(*(this->get()));
220  }
221  bool is_any_sleeping() {
222  if (this->sleepLoc)
223  return this->sleepLoc->load();
224  return is_sleeping_val(*(this->get()));
225  }
226  kmp_uint8 *get_stolen() { return NULL; }
227 };
228 
230 template <typename PtrType, flag_type FlagType, bool Sleepable>
231 class kmp_flag_atomic : public kmp_flag<FlagType> {
232 protected:
233  std::atomic<PtrType> *loc;
234  PtrType checker;
235 public:
236  typedef flag_traits<FlagType> traits_type;
237  typedef PtrType flag_t;
238  kmp_flag_atomic(std::atomic<PtrType> *p) : kmp_flag<FlagType>(), loc(p) {}
239  kmp_flag_atomic(std::atomic<PtrType> *p, kmp_info_t *thr)
240  : kmp_flag<FlagType>(1), loc(p) {
241  this->waiting_threads[0] = thr;
242  }
243  kmp_flag_atomic(std::atomic<PtrType> *p, PtrType c)
244  : kmp_flag<FlagType>(), loc(p), checker(c) {}
245  kmp_flag_atomic(std::atomic<PtrType> *p, PtrType c, std::atomic<bool> *sloc)
246  : kmp_flag<FlagType>(sloc), loc(p), checker(c) {}
248  std::atomic<PtrType> *get() { return loc; }
250  void *get_void_p() { return RCAST(void *, loc); }
252  void set(std::atomic<PtrType> *new_loc) { loc = new_loc; }
254  PtrType load() { return loc->load(std::memory_order_acquire); }
256  void store(PtrType val) { loc->store(val, std::memory_order_release); }
258  bool done_check() {
259  if (Sleepable && !(this->sleepLoc))
260  return (this->load() & ~KMP_BARRIER_SLEEP_STATE) == checker;
261  else
262  return this->load() == checker;
263  }
266  bool done_check_val(PtrType old_loc) { return old_loc == checker; }
272  bool notdone_check() { return this->load() != checker; }
275  void internal_release() { KMP_ATOMIC_ADD(this->get(), 4); }
279  PtrType set_sleeping() {
280  if (this->sleepLoc) {
281  this->sleepLoc->store(true);
282  return *(this->get());
283  }
284  return KMP_ATOMIC_OR(this->get(), KMP_BARRIER_SLEEP_STATE);
285  }
289  void unset_sleeping() {
290  if (this->sleepLoc) {
291  this->sleepLoc->store(false);
292  return;
293  }
294  KMP_ATOMIC_AND(this->get(), ~KMP_BARRIER_SLEEP_STATE);
295  }
298  bool is_sleeping_val(PtrType old_loc) {
299  if (this->sleepLoc)
300  return this->sleepLoc->load();
301  return old_loc & KMP_BARRIER_SLEEP_STATE;
302  }
304  bool is_sleeping() {
305  if (this->sleepLoc)
306  return this->sleepLoc->load();
307  return is_sleeping_val(this->load());
308  }
309  bool is_any_sleeping() {
310  if (this->sleepLoc)
311  return this->sleepLoc->load();
312  return is_sleeping_val(this->load());
313  }
314  kmp_uint8 *get_stolen() { return NULL; }
315 };
316 
317 #if OMPT_SUPPORT
318 OMPT_NOINLINE
319 static void __ompt_implicit_task_end(kmp_info_t *this_thr,
320  ompt_state_t ompt_state,
321  ompt_data_t *tId) {
322  int ds_tid = this_thr->th.th_info.ds.ds_tid;
323  if (ompt_state == ompt_state_wait_barrier_implicit) {
324  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
325 #if OMPT_OPTIONAL
326  void *codeptr = NULL;
327  if (ompt_enabled.ompt_callback_sync_region_wait) {
328  ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
329  ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
330  codeptr);
331  }
332  if (ompt_enabled.ompt_callback_sync_region) {
333  ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
334  ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
335  codeptr);
336  }
337 #endif
338  if (!KMP_MASTER_TID(ds_tid)) {
339  if (ompt_enabled.ompt_callback_implicit_task) {
340  int flags = this_thr->th.ompt_thread_info.parallel_flags;
341  flags = (flags & ompt_parallel_league) ? ompt_task_initial
342  : ompt_task_implicit;
343  ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
344  ompt_scope_end, NULL, tId, 0, ds_tid, flags);
345  }
346  // return to idle state
347  this_thr->th.ompt_thread_info.state = ompt_state_idle;
348  } else {
349  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
350  }
351  }
352 }
353 #endif
354 
355 /* Spin wait loop that first does pause/yield, then sleep. A thread that calls
356  __kmp_wait_* must make certain that another thread calls __kmp_release
357  to wake it back up to prevent deadlocks!
358 
359  NOTE: We may not belong to a team at this point. */
360 template <class C, bool final_spin, bool Cancellable = false,
361  bool Sleepable = true>
362 static inline bool
363 __kmp_wait_template(kmp_info_t *this_thr,
364  C *flag USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
365 #if USE_ITT_BUILD && USE_ITT_NOTIFY
366  volatile void *spin = flag->get();
367 #endif
368  kmp_uint32 spins;
369  int th_gtid;
370  int tasks_completed = FALSE;
371 #if !KMP_USE_MONITOR
372  kmp_uint64 poll_count;
373  kmp_uint64 hibernate_goal;
374 #else
375  kmp_uint32 hibernate;
376 #endif
377 
378  KMP_FSYNC_SPIN_INIT(spin, NULL);
379  if (flag->done_check()) {
380  KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
381  return false;
382  }
383  th_gtid = this_thr->th.th_info.ds.ds_gtid;
384  if (Cancellable) {
385  kmp_team_t *team = this_thr->th.th_team;
386  if (team && team->t.t_cancel_request == cancel_parallel)
387  return true;
388  }
389 #if KMP_OS_UNIX
390  if (final_spin)
391  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
392 #endif
393  KA_TRACE(20,
394  ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
395 #if KMP_STATS_ENABLED
396  stats_state_e thread_state = KMP_GET_THREAD_STATE();
397 #endif
398 
399 /* OMPT Behavior:
400 THIS function is called from
401  __kmp_barrier (2 times) (implicit or explicit barrier in parallel regions)
402  these have join / fork behavior
403 
404  In these cases, we don't change the state or trigger events in THIS
405 function.
406  Events are triggered in the calling code (__kmp_barrier):
407 
408  state := ompt_state_overhead
409  barrier-begin
410  barrier-wait-begin
411  state := ompt_state_wait_barrier
412  call join-barrier-implementation (finally arrive here)
413  {}
414  call fork-barrier-implementation (finally arrive here)
415  {}
416  state := ompt_state_overhead
417  barrier-wait-end
418  barrier-end
419  state := ompt_state_work_parallel
420 
421 
422  __kmp_fork_barrier (after thread creation, before executing implicit task)
423  call fork-barrier-implementation (finally arrive here)
424  {} // worker arrive here with state = ompt_state_idle
425 
426 
427  __kmp_join_barrier (implicit barrier at end of parallel region)
428  state := ompt_state_barrier_implicit
429  barrier-begin
430  barrier-wait-begin
431  call join-barrier-implementation (finally arrive here
432 final_spin=FALSE)
433  {
434  }
435  __kmp_fork_barrier (implicit barrier at end of parallel region)
436  call fork-barrier-implementation (finally arrive here final_spin=TRUE)
437 
438  Worker after task-team is finished:
439  barrier-wait-end
440  barrier-end
441  implicit-task-end
442  idle-begin
443  state := ompt_state_idle
444 
445  Before leaving, if state = ompt_state_idle
446  idle-end
447  state := ompt_state_overhead
448 */
449 #if OMPT_SUPPORT
450  ompt_state_t ompt_entry_state;
451  ompt_data_t *tId;
452  if (ompt_enabled.enabled) {
453  ompt_entry_state = this_thr->th.ompt_thread_info.state;
454  if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit ||
455  KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) {
456  ompt_lw_taskteam_t *team = NULL;
457  if (this_thr->th.th_team)
458  team = this_thr->th.th_team->t.ompt_serialized_team_info;
459  if (team) {
460  tId = &(team->ompt_task_info.task_data);
461  } else {
462  tId = OMPT_CUR_TASK_DATA(this_thr);
463  }
464  } else {
465  tId = &(this_thr->th.ompt_thread_info.task_data);
466  }
467  if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec ||
468  this_thr->th.th_task_team == NULL)) {
469  // implicit task is done. Either no taskqueue, or task-team finished
470  __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
471  }
472  }
473 #endif
474 
475  KMP_INIT_YIELD(spins); // Setup for waiting
476 
477  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ||
478  __kmp_pause_status == kmp_soft_paused) {
479 #if KMP_USE_MONITOR
480 // The worker threads cannot rely on the team struct existing at this point.
481 // Use the bt values cached in the thread struct instead.
482 #ifdef KMP_ADJUST_BLOCKTIME
483  if (__kmp_pause_status == kmp_soft_paused ||
484  (__kmp_zero_bt && !this_thr->th.th_team_bt_set))
485  // Force immediate suspend if not set by user and more threads than
486  // available procs
487  hibernate = 0;
488  else
489  hibernate = this_thr->th.th_team_bt_intervals;
490 #else
491  hibernate = this_thr->th.th_team_bt_intervals;
492 #endif /* KMP_ADJUST_BLOCKTIME */
493 
494  /* If the blocktime is nonzero, we want to make sure that we spin wait for
495  the entirety of the specified #intervals, plus up to one interval more.
496  This increment make certain that this thread doesn't go to sleep too
497  soon. */
498  if (hibernate != 0)
499  hibernate++;
500 
501  // Add in the current time value.
502  hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
503  KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
504  th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
505  hibernate - __kmp_global.g.g_time.dt.t_value));
506 #else
507  if (__kmp_pause_status == kmp_soft_paused) {
508  // Force immediate suspend
509  hibernate_goal = KMP_NOW();
510  } else
511  hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals;
512  poll_count = 0;
513  (void)poll_count;
514 #endif // KMP_USE_MONITOR
515  }
516 
517  KMP_MB();
518 
519  // Main wait spin loop
520  while (flag->notdone_check()) {
521  kmp_task_team_t *task_team = NULL;
522  if (__kmp_tasking_mode != tskm_immediate_exec) {
523  task_team = this_thr->th.th_task_team;
524  /* If the thread's task team pointer is NULL, it means one of 3 things:
525  1) A newly-created thread is first being released by
526  __kmp_fork_barrier(), and its task team has not been set up yet.
527  2) All tasks have been executed to completion.
528  3) Tasking is off for this region. This could be because we are in a
529  serialized region (perhaps the outer one), or else tasking was manually
530  disabled (KMP_TASKING=0). */
531  if (task_team != NULL) {
532  if (TCR_SYNC_4(task_team->tt.tt_active)) {
533  if (KMP_TASKING_ENABLED(task_team)) {
534  flag->execute_tasks(
535  this_thr, th_gtid, final_spin,
536  &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
537  } else
538  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
539  } else {
540  KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
541 #if OMPT_SUPPORT
542  // task-team is done now, other cases should be catched above
543  if (final_spin && ompt_enabled.enabled)
544  __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
545 #endif
546  this_thr->th.th_task_team = NULL;
547  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
548  }
549  } else {
550  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
551  } // if
552  } // if
553 
554  KMP_FSYNC_SPIN_PREPARE(CCAST(void *, spin));
555  if (TCR_4(__kmp_global.g.g_done)) {
556  if (__kmp_global.g.g_abort)
557  __kmp_abort_thread();
558  break;
559  }
560 
561  // If we are oversubscribed, or have waited a bit (and
562  // KMP_LIBRARY=throughput), then yield
563  KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
564 
565 #if KMP_STATS_ENABLED
566  // Check if thread has been signalled to idle state
567  // This indicates that the logical "join-barrier" has finished
568  if (this_thr->th.th_stats->isIdle() &&
569  KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) {
570  KMP_SET_THREAD_STATE(IDLE);
571  KMP_PUSH_PARTITIONED_TIMER(OMP_idle);
572  }
573 #endif
574  // Check if the barrier surrounding this wait loop has been cancelled
575  if (Cancellable) {
576  kmp_team_t *team = this_thr->th.th_team;
577  if (team && team->t.t_cancel_request == cancel_parallel)
578  break;
579  }
580 
581  // For hidden helper thread, if task_team is nullptr, it means the main
582  // thread has not released the barrier. We cannot wait here because once the
583  // main thread releases all children barriers, all hidden helper threads are
584  // still sleeping. This leads to a problem that following configuration,
585  // such as task team sync, will not be performed such that this thread does
586  // not have task team. Usually it is not bad. However, a corner case is,
587  // when the first task encountered is an untied task, the check in
588  // __kmp_task_alloc will crash because it uses the task team pointer without
589  // checking whether it is nullptr. It is probably under some kind of
590  // assumption.
591  if (task_team && KMP_HIDDEN_HELPER_WORKER_THREAD(th_gtid) &&
592  !TCR_4(__kmp_hidden_helper_team_done)) {
593  // If there is still hidden helper tasks to be executed, the hidden helper
594  // thread will not enter a waiting status.
595  if (KMP_ATOMIC_LD_ACQ(&__kmp_unexecuted_hidden_helper_tasks) == 0) {
596  __kmp_hidden_helper_worker_thread_wait();
597  }
598  continue;
599  }
600 
601  // Don't suspend if KMP_BLOCKTIME is set to "infinite"
602  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
603  __kmp_pause_status != kmp_soft_paused)
604  continue;
605 
606  // Don't suspend if there is a likelihood of new tasks being spawned.
607  if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks))
608  continue;
609 
610 #if KMP_USE_MONITOR
611  // If we have waited a bit more, fall asleep
612  if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
613  continue;
614 #else
615  if (KMP_BLOCKING(hibernate_goal, poll_count++))
616  continue;
617 #endif
618  // Don't suspend if wait loop designated non-sleepable
619  // in template parameters
620  if (!Sleepable)
621  continue;
622 
623  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
624  __kmp_pause_status != kmp_soft_paused)
625  continue;
626 
627 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
628  if (__kmp_mwait_enabled || __kmp_umwait_enabled) {
629  KF_TRACE(50, ("__kmp_wait_sleep: T#%d using monitor/mwait\n", th_gtid));
630  flag->mwait(th_gtid);
631  } else {
632 #endif
633  KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
634 #if KMP_OS_UNIX
635  if (final_spin)
636  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
637 #endif
638  flag->suspend(th_gtid);
639 #if KMP_OS_UNIX
640  if (final_spin)
641  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
642 #endif
643 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
644  }
645 #endif
646 
647  if (TCR_4(__kmp_global.g.g_done)) {
648  if (__kmp_global.g.g_abort)
649  __kmp_abort_thread();
650  break;
651  } else if (__kmp_tasking_mode != tskm_immediate_exec &&
652  this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
653  this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
654  }
655  // TODO: If thread is done with work and times out, disband/free
656  }
657 
658 #if OMPT_SUPPORT
659  ompt_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state;
660  if (ompt_enabled.enabled && ompt_exit_state != ompt_state_undefined) {
661 #if OMPT_OPTIONAL
662  if (final_spin) {
663  __ompt_implicit_task_end(this_thr, ompt_exit_state, tId);
664  ompt_exit_state = this_thr->th.ompt_thread_info.state;
665  }
666 #endif
667  if (ompt_exit_state == ompt_state_idle) {
668  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
669  }
670  }
671 #endif
672 #if KMP_STATS_ENABLED
673  // If we were put into idle state, pop that off the state stack
674  if (KMP_GET_THREAD_STATE() == IDLE) {
675  KMP_POP_PARTITIONED_TIMER();
676  KMP_SET_THREAD_STATE(thread_state);
677  this_thr->th.th_stats->resetIdleFlag();
678  }
679 #endif
680 
681 #if KMP_OS_UNIX
682  if (final_spin)
683  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
684 #endif
685  KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
686  if (Cancellable) {
687  kmp_team_t *team = this_thr->th.th_team;
688  if (team && team->t.t_cancel_request == cancel_parallel) {
689  if (tasks_completed) {
690  // undo the previous decrement of unfinished_threads so that the
691  // thread can decrement at the join barrier with no problem
692  kmp_task_team_t *task_team = this_thr->th.th_task_team;
693  std::atomic<kmp_int32> *unfinished_threads =
694  &(task_team->tt.tt_unfinished_threads);
695  KMP_ATOMIC_INC(unfinished_threads);
696  }
697  return true;
698  }
699  }
700  return false;
701 }
702 
703 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
704 // Set up a monitor on the flag variable causing the calling thread to wait in
705 // a less active state until the flag variable is modified.
706 template <class C>
707 static inline void __kmp_mwait_template(int th_gtid, C *flag) {
708  KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_mwait);
709  kmp_info_t *th = __kmp_threads[th_gtid];
710 
711  KF_TRACE(30, ("__kmp_mwait_template: T#%d enter for flag = %p\n", th_gtid,
712  flag->get()));
713 
714  // User-level mwait is available
715  KMP_DEBUG_ASSERT(__kmp_mwait_enabled || __kmp_umwait_enabled);
716 
717  __kmp_suspend_initialize_thread(th);
718  __kmp_lock_suspend_mx(th);
719 
720  volatile void *spin = flag->get();
721  void *cacheline = (void *)(kmp_uintptr_t(spin) & ~(CACHE_LINE - 1));
722 
723  if (!flag->done_check()) {
724  // Mark thread as no longer active
725  th->th.th_active = FALSE;
726  if (th->th.th_active_in_pool) {
727  th->th.th_active_in_pool = FALSE;
728  KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
729  KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
730  }
731  flag->set_sleeping();
732  KF_TRACE(50, ("__kmp_mwait_template: T#%d calling monitor\n", th_gtid));
733 #if KMP_HAVE_UMWAIT
734  if (__kmp_umwait_enabled) {
735  __kmp_umonitor(cacheline);
736  }
737 #elif KMP_HAVE_MWAIT
738  if (__kmp_mwait_enabled) {
739  __kmp_mm_monitor(cacheline, 0, 0);
740  }
741 #endif
742  // To avoid a race, check flag between 'monitor' and 'mwait'. A write to
743  // the address could happen after the last time we checked and before
744  // monitoring started, in which case monitor can't detect the change.
745  if (flag->done_check())
746  flag->unset_sleeping();
747  else {
748  // if flag changes here, wake-up happens immediately
749  TCW_PTR(th->th.th_sleep_loc, (void *)flag);
750  th->th.th_sleep_loc_type = flag->get_type();
751  __kmp_unlock_suspend_mx(th);
752  KF_TRACE(50, ("__kmp_mwait_template: T#%d calling mwait\n", th_gtid));
753 #if KMP_HAVE_UMWAIT
754  if (__kmp_umwait_enabled) {
755  __kmp_umwait(1, 100); // to do: enable ctrl via hints, backoff counter
756  }
757 #elif KMP_HAVE_MWAIT
758  if (__kmp_mwait_enabled) {
759  __kmp_mm_mwait(0, __kmp_mwait_hints);
760  }
761 #endif
762  KF_TRACE(50, ("__kmp_mwait_template: T#%d mwait done\n", th_gtid));
763  __kmp_lock_suspend_mx(th);
764  // Clean up sleep info; doesn't matter how/why this thread stopped waiting
765  if (flag->is_sleeping())
766  flag->unset_sleeping();
767  TCW_PTR(th->th.th_sleep_loc, NULL);
768  th->th.th_sleep_loc_type = flag_unset;
769  }
770  // Mark thread as active again
771  th->th.th_active = TRUE;
772  if (TCR_4(th->th.th_in_pool)) {
773  KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
774  th->th.th_active_in_pool = TRUE;
775  }
776  } // Drop out to main wait loop to check flag, handle tasks, etc.
777  __kmp_unlock_suspend_mx(th);
778  KF_TRACE(30, ("__kmp_mwait_template: T#%d exit\n", th_gtid));
779 }
780 #endif // KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
781 
782 /* Release any threads specified as waiting on the flag by releasing the flag
783  and resume the waiting thread if indicated by the sleep bit(s). A thread that
784  calls __kmp_wait_template must call this function to wake up the potentially
785  sleeping thread and prevent deadlocks! */
786 template <class C> static inline void __kmp_release_template(C *flag) {
787 #ifdef KMP_DEBUG
788  int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
789 #endif
790  KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get()));
791  KMP_DEBUG_ASSERT(flag->get());
792  KMP_FSYNC_RELEASING(flag->get_void_p());
793 
794  flag->internal_release();
795 
796  KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(),
797  flag->load()));
798 
799  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
800  // Only need to check sleep stuff if infinite block time not set.
801  // Are *any* threads waiting on flag sleeping?
802  if (flag->is_any_sleeping()) {
803  for (unsigned int i = 0; i < flag->get_num_waiters(); ++i) {
804  // if sleeping waiter exists at i, sets current_waiter to i inside flag
805  kmp_info_t *waiter = flag->get_waiter(i);
806  if (waiter) {
807  int wait_gtid = waiter->th.th_info.ds.ds_gtid;
808  // Wake up thread if needed
809  KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep "
810  "flag(%p) set\n",
811  gtid, wait_gtid, flag->get()));
812  flag->resume(wait_gtid); // unsets flag's current_waiter when done
813  }
814  }
815  }
816  }
817 }
818 
819 template <bool Cancellable, bool Sleepable>
820 class kmp_flag_32 : public kmp_flag_atomic<kmp_uint32, flag32, Sleepable> {
821 public:
822  kmp_flag_32(std::atomic<kmp_uint32> *p)
823  : kmp_flag_atomic<kmp_uint32, flag32, Sleepable>(p) {}
824  kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_info_t *thr)
825  : kmp_flag_atomic<kmp_uint32, flag32, Sleepable>(p, thr) {}
826  kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c)
827  : kmp_flag_atomic<kmp_uint32, flag32, Sleepable>(p, c) {}
828  void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
829 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
830  void mwait(int th_gtid) { __kmp_mwait_32(th_gtid, this); }
831 #endif
832  void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); }
833  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
834  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
835  kmp_int32 is_constrained) {
836  return __kmp_execute_tasks_32(
837  this_thr, gtid, this, final_spin,
838  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
839  }
840  bool wait(kmp_info_t *this_thr,
841  int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
842  if (final_spin)
843  return __kmp_wait_template<kmp_flag_32, TRUE, Cancellable, Sleepable>(
844  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
845  else
846  return __kmp_wait_template<kmp_flag_32, FALSE, Cancellable, Sleepable>(
847  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
848  }
849  void release() { __kmp_release_template(this); }
850  flag_type get_ptr_type() { return flag32; }
851 };
852 
853 template <bool Cancellable, bool Sleepable>
854 class kmp_flag_64 : public kmp_flag_native<kmp_uint64, flag64, Sleepable> {
855 public:
856  kmp_flag_64(volatile kmp_uint64 *p)
857  : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p) {}
858  kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr)
859  : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p, thr) {}
860  kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c)
861  : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p, c) {}
862  kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c, std::atomic<bool> *loc)
863  : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p, c, loc) {}
864  void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
865 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
866  void mwait(int th_gtid) { __kmp_mwait_64(th_gtid, this); }
867 #endif
868  void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); }
869  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
870  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
871  kmp_int32 is_constrained) {
872  return __kmp_execute_tasks_64(
873  this_thr, gtid, this, final_spin,
874  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
875  }
876  bool wait(kmp_info_t *this_thr,
877  int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
878  if (final_spin)
879  return __kmp_wait_template<kmp_flag_64, TRUE, Cancellable, Sleepable>(
880  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
881  else
882  return __kmp_wait_template<kmp_flag_64, FALSE, Cancellable, Sleepable>(
883  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
884  }
885  void release() { __kmp_release_template(this); }
886  flag_type get_ptr_type() { return flag64; }
887 };
888 
889 template <bool Cancellable, bool Sleepable>
890 class kmp_atomic_flag_64
891  : public kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable> {
892 public:
893  kmp_atomic_flag_64(std::atomic<kmp_uint64> *p)
894  : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p) {}
895  kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_info_t *thr)
896  : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p, thr) {}
897  kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_uint64 c)
898  : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p, c) {}
899  kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_uint64 c,
900  std::atomic<bool> *loc)
901  : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p, c, loc) {}
902  void suspend(int th_gtid) { __kmp_atomic_suspend_64(th_gtid, this); }
903  void mwait(int th_gtid) { __kmp_atomic_mwait_64(th_gtid, this); }
904  void resume(int th_gtid) { __kmp_atomic_resume_64(th_gtid, this); }
905  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
906  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
907  kmp_int32 is_constrained) {
908  return __kmp_atomic_execute_tasks_64(
909  this_thr, gtid, this, final_spin,
910  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
911  }
912  bool wait(kmp_info_t *this_thr,
913  int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
914  if (final_spin)
915  return __kmp_wait_template<kmp_atomic_flag_64, TRUE, Cancellable,
916  Sleepable>(
917  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
918  else
919  return __kmp_wait_template<kmp_atomic_flag_64, FALSE, Cancellable,
920  Sleepable>(
921  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
922  }
923  void release() { __kmp_release_template(this); }
924  flag_type get_ptr_type() { return atomic_flag64; }
925 };
926 
927 // Hierarchical 64-bit on-core barrier instantiation
928 class kmp_flag_oncore : public kmp_flag_native<kmp_uint64, flag_oncore, false> {
929  kmp_uint32 offset;
930  bool flag_switch;
931  enum barrier_type bt;
932  kmp_info_t *this_thr;
933 #if USE_ITT_BUILD
934  void *itt_sync_obj;
935 #endif
936  unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) {
937  return (RCAST(unsigned char *, CCAST(kmp_uint64 *, loc)))[offset];
938  }
939 
940 public:
941  kmp_flag_oncore(volatile kmp_uint64 *p)
942  : kmp_flag_native<kmp_uint64, flag_oncore, false>(p), flag_switch(false) {
943  }
944  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)
945  : kmp_flag_native<kmp_uint64, flag_oncore, false>(p), offset(idx),
946  flag_switch(false), bt(bs_last_barrier), itt_sync_obj(nullptr) {}
947  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx,
948  enum barrier_type bar_t,
949  kmp_info_t *thr USE_ITT_BUILD_ARG(void *itt))
950  : kmp_flag_native<kmp_uint64, flag_oncore, false>(p, c), offset(idx),
951  flag_switch(false), bt(bar_t),
952  this_thr(thr) USE_ITT_BUILD_ARG(itt_sync_obj(itt)) {}
953  bool done_check_val(kmp_uint64 old_loc) {
954  return byteref(&old_loc, offset) == checker;
955  }
956  bool done_check() { return done_check_val(*get()); }
957  bool notdone_check() {
958  // Calculate flag_switch
959  if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
960  flag_switch = true;
961  if (byteref(get(), offset) != 1 && !flag_switch)
962  return true;
963  else if (flag_switch) {
964  this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
965  kmp_flag_64<> flag(&this_thr->th.th_bar[bt].bb.b_go,
966  (kmp_uint64)KMP_BARRIER_STATE_BUMP);
967  __kmp_wait_64(this_thr, &flag, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
968  }
969  return false;
970  }
971  void internal_release() {
972  // Other threads can write their own bytes simultaneously.
973  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
974  byteref(get(), offset) = 1;
975  } else {
976  kmp_uint64 mask = 0;
977  byteref(&mask, offset) = 1;
978  KMP_TEST_THEN_OR64(get(), mask);
979  }
980  }
981  void wait(kmp_info_t *this_thr, int final_spin) {
982  if (final_spin)
983  __kmp_wait_template<kmp_flag_oncore, TRUE>(
984  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
985  else
986  __kmp_wait_template<kmp_flag_oncore, FALSE>(
987  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
988  }
989  void release() { __kmp_release_template(this); }
990  void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); }
991 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
992  void mwait(int th_gtid) { __kmp_mwait_oncore(th_gtid, this); }
993 #endif
994  void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); }
995  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
996  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
997  kmp_int32 is_constrained) {
998 #if OMPD_SUPPORT
999  int ret = __kmp_execute_tasks_oncore(
1000  this_thr, gtid, this, final_spin,
1001  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1002  if (ompd_state & OMPD_ENABLE_BP)
1003  ompd_bp_task_end();
1004  return ret;
1005 #else
1006  return __kmp_execute_tasks_oncore(
1007  this_thr, gtid, this, final_spin,
1008  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1009 #endif
1010  }
1011  enum barrier_type get_bt() { return bt; }
1012  flag_type get_ptr_type() { return flag_oncore; }
1013 };
1014 
1015 static inline void __kmp_null_resume_wrapper(kmp_info_t *thr) {
1016  int gtid = __kmp_gtid_from_thread(thr);
1017  void *flag = CCAST(void *, thr->th.th_sleep_loc);
1018  flag_type type = thr->th.th_sleep_loc_type;
1019  if (!flag)
1020  return;
1021  // Attempt to wake up a thread: examine its type and call appropriate template
1022  switch (type) {
1023  case flag32:
1024  __kmp_resume_32(gtid, RCAST(kmp_flag_32<> *, flag));
1025  break;
1026  case flag64:
1027  __kmp_resume_64(gtid, RCAST(kmp_flag_64<> *, flag));
1028  break;
1029  case atomic_flag64:
1030  __kmp_atomic_resume_64(gtid, RCAST(kmp_atomic_flag_64<> *, flag));
1031  break;
1032  case flag_oncore:
1033  __kmp_resume_oncore(gtid, RCAST(kmp_flag_oncore *, flag));
1034  break;
1035 #ifdef KMP_DEBUG
1036  case flag_unset:
1037  KF_TRACE(100, ("__kmp_null_resume_wrapper: flag type %d is unset\n", type));
1038  break;
1039  default:
1040  KF_TRACE(100, ("__kmp_null_resume_wrapper: flag type %d does not match any "
1041  "known flag type\n",
1042  type));
1043 #endif
1044  }
1045 }
1046 
1051 #endif // KMP_WAIT_RELEASE_H
std::atomic< PtrType > * loc
void store(PtrType val)
bool is_sleeping_val(PtrType old_loc)
PtrType set_sleeping()
bool done_check_val(PtrType old_loc)
void set(std::atomic< PtrType > *new_loc)
std::atomic< PtrType > * get()
bool is_sleeping_val(PtrType old_loc)
virtual bool notdone_check()
virtual bool done_check_val(PtrType old_loc)
virtual bool done_check()
PtrType set_sleeping()
flag_properties t
kmp_uint32 num_waiting_threads
kmp_info_t * waiting_threads[1]
flag_type get_type()
kmp_uint32 get_num_waiters()
kmp_info_t * get_waiter(kmp_uint32 i)
void set_waiter(kmp_info_t *thr)
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:63