21 #ifndef KMP_FOURLINE_ALIGN_CACHE
22 #define KMP_FOURLINE_ALIGN_CACHE KMP_ALIGN(4 * CACHE_LINE)
25 #define KMP_OPTIMIZE_FOR_REDUCTIONS 0
27 class distributedBarrier {
29 kmp_uint32
volatile KMP_FOURLINE_ALIGN_CACHE stillNeed;
33 std::atomic<kmp_uint64> KMP_FOURLINE_ALIGN_CACHE go;
37 kmp_uint64
volatile KMP_FOURLINE_ALIGN_CACHE iter;
41 std::atomic<bool> KMP_FOURLINE_ALIGN_CACHE sleep;
44 void init(
size_t nthr);
45 void resize(
size_t nthr);
46 void computeGo(
size_t n);
47 void computeVarsForN(
size_t n);
54 IDEAL_CONTENTION = 16,
57 flags_s *flags[MAX_ITERS];
62 size_t KMP_ALIGN_CACHE num_threads;
63 size_t KMP_ALIGN_CACHE max_threads;
65 size_t KMP_ALIGN_CACHE num_gos;
67 size_t KMP_ALIGN_CACHE num_groups;
69 size_t KMP_ALIGN_CACHE threads_per_go;
70 bool KMP_ALIGN_CACHE fix_threads_per_go;
72 size_t KMP_ALIGN_CACHE threads_per_group;
74 size_t KMP_ALIGN_CACHE gos_per_group;
77 distributedBarrier() =
delete;
78 ~distributedBarrier() =
delete;
81 static distributedBarrier *allocate(
int nThreads) {
82 distributedBarrier *d = (distributedBarrier *)_mm_malloc(
83 sizeof(distributedBarrier), 4 * CACHE_LINE);
86 for (
int i = 0; i < MAX_ITERS; ++i)
92 d->fix_threads_per_go =
false;
94 d->computeGo(nThreads);
99 static void deallocate(distributedBarrier *db) { _mm_free(db); }
101 void update_num_threads(
size_t nthr) { init(nthr); }
103 bool need_resize(
size_t new_nthr) {
return (new_nthr > max_threads); }
104 size_t get_num_threads() {
return num_threads; }
105 kmp_uint64 go_release();