LLVM OpenMP* Runtime Library
kmp_barrier.h
1 /*
2  * kmp_barrier.h
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef KMP_BARRIER_H
14 #define KMP_BARRIER_H
15 
16 #include "kmp.h"
17 
18 // Use four cache lines: MLC tends to prefetch the next or previous cache line
19 // creating a possible fake conflict between cores, so this is the only way to
20 // guarantee that no such prefetch can happen.
21 #ifndef KMP_FOURLINE_ALIGN_CACHE
22 #define KMP_FOURLINE_ALIGN_CACHE KMP_ALIGN(4 * CACHE_LINE)
23 #endif
24 
25 #define KMP_OPTIMIZE_FOR_REDUCTIONS 0
26 
27 class distributedBarrier {
28  struct flags_s {
29  kmp_uint32 volatile KMP_FOURLINE_ALIGN_CACHE stillNeed;
30  };
31 
32  struct go_s {
33  std::atomic<kmp_uint64> KMP_FOURLINE_ALIGN_CACHE go;
34  };
35 
36  struct iter_s {
37  kmp_uint64 volatile KMP_FOURLINE_ALIGN_CACHE iter;
38  };
39 
40  struct sleep_s {
41  std::atomic<bool> KMP_FOURLINE_ALIGN_CACHE sleep;
42  };
43 
44  void init(size_t nthr);
45  void resize(size_t nthr);
46  void computeGo(size_t n);
47  void computeVarsForN(size_t n);
48 
49 public:
50  enum {
51  MAX_ITERS = 3,
52  MAX_GOS = 8,
53  IDEAL_GOS = 4,
54  IDEAL_CONTENTION = 16,
55  };
56 
57  flags_s *flags[MAX_ITERS];
58  go_s *go;
59  iter_s *iter;
60  sleep_s *sleep;
61 
62  size_t KMP_ALIGN_CACHE num_threads; // number of threads in barrier
63  size_t KMP_ALIGN_CACHE max_threads; // size of arrays in data structure
64  // number of go signals each requiring one write per iteration
65  size_t KMP_ALIGN_CACHE num_gos;
66  // number of groups of gos
67  size_t KMP_ALIGN_CACHE num_groups;
68  // threads per go signal
69  size_t KMP_ALIGN_CACHE threads_per_go;
70  bool KMP_ALIGN_CACHE fix_threads_per_go;
71  // threads per group
72  size_t KMP_ALIGN_CACHE threads_per_group;
73  // number of go signals in a group
74  size_t KMP_ALIGN_CACHE gos_per_group;
75  void *team_icvs;
76 
77  distributedBarrier() = delete;
78  ~distributedBarrier() = delete;
79 
80  // Used instead of constructor to create aligned data
81  static distributedBarrier *allocate(int nThreads) {
82  distributedBarrier *d = (distributedBarrier *)_mm_malloc(
83  sizeof(distributedBarrier), 4 * CACHE_LINE);
84  d->num_threads = 0;
85  d->max_threads = 0;
86  for (int i = 0; i < MAX_ITERS; ++i)
87  d->flags[i] = NULL;
88  d->go = NULL;
89  d->iter = NULL;
90  d->sleep = NULL;
91  d->team_icvs = NULL;
92  d->fix_threads_per_go = false;
93  // calculate gos and groups ONCE on base size
94  d->computeGo(nThreads);
95  d->init(nThreads);
96  return d;
97  }
98 
99  static void deallocate(distributedBarrier *db) { _mm_free(db); }
100 
101  void update_num_threads(size_t nthr) { init(nthr); }
102 
103  bool need_resize(size_t new_nthr) { return (new_nthr > max_threads); }
104  size_t get_num_threads() { return num_threads; }
105  kmp_uint64 go_release();
106  void go_reset();
107 };
108 
109 #endif // KMP_BARRIER_H