LLVM OpenMP* Runtime Library
kmp_affinity.h
1 /*
2  * kmp_affinity.h -- header for affinity management
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef KMP_AFFINITY_H
14 #define KMP_AFFINITY_H
15 
16 #include "kmp.h"
17 #include "kmp_os.h"
18 
19 #if KMP_AFFINITY_SUPPORTED
20 #if KMP_USE_HWLOC
21 class KMPHwlocAffinity : public KMPAffinity {
22 public:
23  class Mask : public KMPAffinity::Mask {
24  hwloc_cpuset_t mask;
25 
26  public:
27  Mask() {
28  mask = hwloc_bitmap_alloc();
29  this->zero();
30  }
31  ~Mask() { hwloc_bitmap_free(mask); }
32  void set(int i) override { hwloc_bitmap_set(mask, i); }
33  bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
34  void clear(int i) override { hwloc_bitmap_clr(mask, i); }
35  void zero() override { hwloc_bitmap_zero(mask); }
36  void copy(const KMPAffinity::Mask *src) override {
37  const Mask *convert = static_cast<const Mask *>(src);
38  hwloc_bitmap_copy(mask, convert->mask);
39  }
40  void bitwise_and(const KMPAffinity::Mask *rhs) override {
41  const Mask *convert = static_cast<const Mask *>(rhs);
42  hwloc_bitmap_and(mask, mask, convert->mask);
43  }
44  void bitwise_or(const KMPAffinity::Mask *rhs) override {
45  const Mask *convert = static_cast<const Mask *>(rhs);
46  hwloc_bitmap_or(mask, mask, convert->mask);
47  }
48  void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
49  int begin() const override { return hwloc_bitmap_first(mask); }
50  int end() const override { return -1; }
51  int next(int previous) const override {
52  return hwloc_bitmap_next(mask, previous);
53  }
54  int get_system_affinity(bool abort_on_error) override {
55  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
56  "Illegal get affinity operation when not capable");
57  int retval =
58  hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
59  if (retval >= 0) {
60  return 0;
61  }
62  int error = errno;
63  if (abort_on_error) {
64  __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
65  }
66  return error;
67  }
68  int set_system_affinity(bool abort_on_error) const override {
69  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
70  "Illegal get affinity operation when not capable");
71  int retval =
72  hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
73  if (retval >= 0) {
74  return 0;
75  }
76  int error = errno;
77  if (abort_on_error) {
78  __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
79  }
80  return error;
81  }
82  int get_proc_group() const override {
83  int group = -1;
84 #if KMP_OS_WINDOWS
85  if (__kmp_num_proc_groups == 1) {
86  return 1;
87  }
88  for (int i = 0; i < __kmp_num_proc_groups; i++) {
89  // On windows, the long type is always 32 bits
90  unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2);
91  unsigned long second_32_bits =
92  hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1);
93  if (first_32_bits == 0 && second_32_bits == 0) {
94  continue;
95  }
96  if (group >= 0) {
97  return -1;
98  }
99  group = i;
100  }
101 #endif /* KMP_OS_WINDOWS */
102  return group;
103  }
104  };
105  void determine_capable(const char *var) override {
106  const hwloc_topology_support *topology_support;
107  if (__kmp_hwloc_topology == NULL) {
108  if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
109  __kmp_hwloc_error = TRUE;
110  if (__kmp_affinity_verbose)
111  KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
112  }
113  if (hwloc_topology_load(__kmp_hwloc_topology) < 0) {
114  __kmp_hwloc_error = TRUE;
115  if (__kmp_affinity_verbose)
116  KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
117  }
118  }
119  topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
120  // Is the system capable of setting/getting this thread's affinity?
121  // Also, is topology discovery possible? (pu indicates ability to discover
122  // processing units). And finally, were there no errors when calling any
123  // hwloc_* API functions?
124  if (topology_support && topology_support->cpubind->set_thisthread_cpubind &&
125  topology_support->cpubind->get_thisthread_cpubind &&
126  topology_support->discovery->pu && !__kmp_hwloc_error) {
127  // enables affinity according to KMP_AFFINITY_CAPABLE() macro
128  KMP_AFFINITY_ENABLE(TRUE);
129  } else {
130  // indicate that hwloc didn't work and disable affinity
131  __kmp_hwloc_error = TRUE;
132  KMP_AFFINITY_DISABLE();
133  }
134  }
135  void bind_thread(int which) override {
136  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
137  "Illegal set affinity operation when not capable");
138  KMPAffinity::Mask *mask;
139  KMP_CPU_ALLOC_ON_STACK(mask);
140  KMP_CPU_ZERO(mask);
141  KMP_CPU_SET(which, mask);
142  __kmp_set_system_affinity(mask, TRUE);
143  KMP_CPU_FREE_FROM_STACK(mask);
144  }
145  KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
146  void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
147  KMPAffinity::Mask *allocate_mask_array(int num) override {
148  return new Mask[num];
149  }
150  void deallocate_mask_array(KMPAffinity::Mask *array) override {
151  Mask *hwloc_array = static_cast<Mask *>(array);
152  delete[] hwloc_array;
153  }
154  KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
155  int index) override {
156  Mask *hwloc_array = static_cast<Mask *>(array);
157  return &(hwloc_array[index]);
158  }
159  api_type get_api_type() const override { return HWLOC; }
160 };
161 #endif /* KMP_USE_HWLOC */
162 
163 #if KMP_OS_LINUX || KMP_OS_FREEBSD
164 #if KMP_OS_LINUX
165 /* On some of the older OS's that we build on, these constants aren't present
166  in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
167  all systems of the same arch where they are defined, and they cannot change.
168  stone forever. */
169 #include <sys/syscall.h>
170 #if KMP_ARCH_X86 || KMP_ARCH_ARM
171 #ifndef __NR_sched_setaffinity
172 #define __NR_sched_setaffinity 241
173 #elif __NR_sched_setaffinity != 241
174 #error Wrong code for setaffinity system call.
175 #endif /* __NR_sched_setaffinity */
176 #ifndef __NR_sched_getaffinity
177 #define __NR_sched_getaffinity 242
178 #elif __NR_sched_getaffinity != 242
179 #error Wrong code for getaffinity system call.
180 #endif /* __NR_sched_getaffinity */
181 #elif KMP_ARCH_AARCH64
182 #ifndef __NR_sched_setaffinity
183 #define __NR_sched_setaffinity 122
184 #elif __NR_sched_setaffinity != 122
185 #error Wrong code for setaffinity system call.
186 #endif /* __NR_sched_setaffinity */
187 #ifndef __NR_sched_getaffinity
188 #define __NR_sched_getaffinity 123
189 #elif __NR_sched_getaffinity != 123
190 #error Wrong code for getaffinity system call.
191 #endif /* __NR_sched_getaffinity */
192 #elif KMP_ARCH_X86_64
193 #ifndef __NR_sched_setaffinity
194 #define __NR_sched_setaffinity 203
195 #elif __NR_sched_setaffinity != 203
196 #error Wrong code for setaffinity system call.
197 #endif /* __NR_sched_setaffinity */
198 #ifndef __NR_sched_getaffinity
199 #define __NR_sched_getaffinity 204
200 #elif __NR_sched_getaffinity != 204
201 #error Wrong code for getaffinity system call.
202 #endif /* __NR_sched_getaffinity */
203 #elif KMP_ARCH_PPC64
204 #ifndef __NR_sched_setaffinity
205 #define __NR_sched_setaffinity 222
206 #elif __NR_sched_setaffinity != 222
207 #error Wrong code for setaffinity system call.
208 #endif /* __NR_sched_setaffinity */
209 #ifndef __NR_sched_getaffinity
210 #define __NR_sched_getaffinity 223
211 #elif __NR_sched_getaffinity != 223
212 #error Wrong code for getaffinity system call.
213 #endif /* __NR_sched_getaffinity */
214 # elif KMP_ARCH_MIPS
215 # ifndef __NR_sched_setaffinity
216 # define __NR_sched_setaffinity 4239
217 # elif __NR_sched_setaffinity != 4239
218 # error Wrong code for setaffinity system call.
219 # endif /* __NR_sched_setaffinity */
220 # ifndef __NR_sched_getaffinity
221 # define __NR_sched_getaffinity 4240
222 # elif __NR_sched_getaffinity != 4240
223 # error Wrong code for getaffinity system call.
224 # endif /* __NR_sched_getaffinity */
225 # elif KMP_ARCH_MIPS64
226 # ifndef __NR_sched_setaffinity
227 # define __NR_sched_setaffinity 5195
228 # elif __NR_sched_setaffinity != 5195
229 # error Wrong code for setaffinity system call.
230 # endif /* __NR_sched_setaffinity */
231 # ifndef __NR_sched_getaffinity
232 # define __NR_sched_getaffinity 5196
233 # elif __NR_sched_getaffinity != 5196
234 # error Wrong code for getaffinity system call.
235 # endif /* __NR_sched_getaffinity */
236 # else
237 #error Unknown or unsupported architecture
238 #endif /* KMP_ARCH_* */
239 #elif KMP_OS_FREEBSD
240 #include <pthread.h>
241 #include <pthread_np.h>
242 #endif
243 class KMPNativeAffinity : public KMPAffinity {
244  class Mask : public KMPAffinity::Mask {
245  typedef unsigned char mask_t;
246  static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
247 
248  public:
249  mask_t *mask;
250  Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); }
251  ~Mask() {
252  if (mask)
253  __kmp_free(mask);
254  }
255  void set(int i) override {
256  mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
257  }
258  bool is_set(int i) const override {
259  return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
260  }
261  void clear(int i) override {
262  mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
263  }
264  void zero() override {
265  for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
266  mask[i] = 0;
267  }
268  void copy(const KMPAffinity::Mask *src) override {
269  const Mask *convert = static_cast<const Mask *>(src);
270  for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
271  mask[i] = convert->mask[i];
272  }
273  void bitwise_and(const KMPAffinity::Mask *rhs) override {
274  const Mask *convert = static_cast<const Mask *>(rhs);
275  for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
276  mask[i] &= convert->mask[i];
277  }
278  void bitwise_or(const KMPAffinity::Mask *rhs) override {
279  const Mask *convert = static_cast<const Mask *>(rhs);
280  for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
281  mask[i] |= convert->mask[i];
282  }
283  void bitwise_not() override {
284  for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
285  mask[i] = ~(mask[i]);
286  }
287  int begin() const override {
288  int retval = 0;
289  while (retval < end() && !is_set(retval))
290  ++retval;
291  return retval;
292  }
293  int end() const override { return __kmp_affin_mask_size * BITS_PER_MASK_T; }
294  int next(int previous) const override {
295  int retval = previous + 1;
296  while (retval < end() && !is_set(retval))
297  ++retval;
298  return retval;
299  }
300  int get_system_affinity(bool abort_on_error) override {
301  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
302  "Illegal get affinity operation when not capable");
303 #if KMP_OS_LINUX
304  int retval =
305  syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
306 #elif KMP_OS_FREEBSD
307  int r =
308  pthread_getaffinity_np(pthread_self(), __kmp_affin_mask_size, reinterpret_cast<cpuset_t *>(mask));
309  int retval = (r == 0 ? 0 : -1);
310 #endif
311  if (retval >= 0) {
312  return 0;
313  }
314  int error = errno;
315  if (abort_on_error) {
316  __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
317  }
318  return error;
319  }
320  int set_system_affinity(bool abort_on_error) const override {
321  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
322  "Illegal get affinity operation when not capable");
323 #if KMP_OS_LINUX
324  int retval =
325  syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
326 #elif KMP_OS_FREEBSD
327  int r =
328  pthread_setaffinity_np(pthread_self(), __kmp_affin_mask_size, reinterpret_cast<cpuset_t *>(mask));
329  int retval = (r == 0 ? 0 : -1);
330 #endif
331  if (retval >= 0) {
332  return 0;
333  }
334  int error = errno;
335  if (abort_on_error) {
336  __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
337  }
338  return error;
339  }
340  };
341  void determine_capable(const char *env_var) override {
342  __kmp_affinity_determine_capable(env_var);
343  }
344  void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
345  KMPAffinity::Mask *allocate_mask() override {
346  KMPNativeAffinity::Mask *retval = new Mask();
347  return retval;
348  }
349  void deallocate_mask(KMPAffinity::Mask *m) override {
350  KMPNativeAffinity::Mask *native_mask =
351  static_cast<KMPNativeAffinity::Mask *>(m);
352  delete native_mask;
353  }
354  KMPAffinity::Mask *allocate_mask_array(int num) override {
355  return new Mask[num];
356  }
357  void deallocate_mask_array(KMPAffinity::Mask *array) override {
358  Mask *linux_array = static_cast<Mask *>(array);
359  delete[] linux_array;
360  }
361  KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
362  int index) override {
363  Mask *linux_array = static_cast<Mask *>(array);
364  return &(linux_array[index]);
365  }
366  api_type get_api_type() const override { return NATIVE_OS; }
367 };
368 #endif /* KMP_OS_LINUX || KMP_OS_FREEBSD */
369 
370 #if KMP_OS_WINDOWS
371 class KMPNativeAffinity : public KMPAffinity {
372  class Mask : public KMPAffinity::Mask {
373  typedef ULONG_PTR mask_t;
374  static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
375  mask_t *mask;
376 
377  public:
378  Mask() {
379  mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups);
380  }
381  ~Mask() {
382  if (mask)
383  __kmp_free(mask);
384  }
385  void set(int i) override {
386  mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
387  }
388  bool is_set(int i) const override {
389  return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
390  }
391  void clear(int i) override {
392  mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
393  }
394  void zero() override {
395  for (int i = 0; i < __kmp_num_proc_groups; ++i)
396  mask[i] = 0;
397  }
398  void copy(const KMPAffinity::Mask *src) override {
399  const Mask *convert = static_cast<const Mask *>(src);
400  for (int i = 0; i < __kmp_num_proc_groups; ++i)
401  mask[i] = convert->mask[i];
402  }
403  void bitwise_and(const KMPAffinity::Mask *rhs) override {
404  const Mask *convert = static_cast<const Mask *>(rhs);
405  for (int i = 0; i < __kmp_num_proc_groups; ++i)
406  mask[i] &= convert->mask[i];
407  }
408  void bitwise_or(const KMPAffinity::Mask *rhs) override {
409  const Mask *convert = static_cast<const Mask *>(rhs);
410  for (int i = 0; i < __kmp_num_proc_groups; ++i)
411  mask[i] |= convert->mask[i];
412  }
413  void bitwise_not() override {
414  for (int i = 0; i < __kmp_num_proc_groups; ++i)
415  mask[i] = ~(mask[i]);
416  }
417  int begin() const override {
418  int retval = 0;
419  while (retval < end() && !is_set(retval))
420  ++retval;
421  return retval;
422  }
423  int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; }
424  int next(int previous) const override {
425  int retval = previous + 1;
426  while (retval < end() && !is_set(retval))
427  ++retval;
428  return retval;
429  }
430  int set_system_affinity(bool abort_on_error) const override {
431  if (__kmp_num_proc_groups > 1) {
432  // Check for a valid mask.
433  GROUP_AFFINITY ga;
434  int group = get_proc_group();
435  if (group < 0) {
436  if (abort_on_error) {
437  KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
438  }
439  return -1;
440  }
441  // Transform the bit vector into a GROUP_AFFINITY struct
442  // and make the system call to set affinity.
443  ga.Group = group;
444  ga.Mask = mask[group];
445  ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
446 
447  KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
448  if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
449  DWORD error = GetLastError();
450  if (abort_on_error) {
451  __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
452  __kmp_msg_null);
453  }
454  return error;
455  }
456  } else {
457  if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) {
458  DWORD error = GetLastError();
459  if (abort_on_error) {
460  __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
461  __kmp_msg_null);
462  }
463  return error;
464  }
465  }
466  return 0;
467  }
468  int get_system_affinity(bool abort_on_error) override {
469  if (__kmp_num_proc_groups > 1) {
470  this->zero();
471  GROUP_AFFINITY ga;
472  KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
473  if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
474  DWORD error = GetLastError();
475  if (abort_on_error) {
476  __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
477  KMP_ERR(error), __kmp_msg_null);
478  }
479  return error;
480  }
481  if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) ||
482  (ga.Mask == 0)) {
483  return -1;
484  }
485  mask[ga.Group] = ga.Mask;
486  } else {
487  mask_t newMask, sysMask, retval;
488  if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
489  DWORD error = GetLastError();
490  if (abort_on_error) {
491  __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
492  KMP_ERR(error), __kmp_msg_null);
493  }
494  return error;
495  }
496  retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
497  if (!retval) {
498  DWORD error = GetLastError();
499  if (abort_on_error) {
500  __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
501  KMP_ERR(error), __kmp_msg_null);
502  }
503  return error;
504  }
505  newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
506  if (!newMask) {
507  DWORD error = GetLastError();
508  if (abort_on_error) {
509  __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
510  KMP_ERR(error), __kmp_msg_null);
511  }
512  }
513  *mask = retval;
514  }
515  return 0;
516  }
517  int get_proc_group() const override {
518  int group = -1;
519  if (__kmp_num_proc_groups == 1) {
520  return 1;
521  }
522  for (int i = 0; i < __kmp_num_proc_groups; i++) {
523  if (mask[i] == 0)
524  continue;
525  if (group >= 0)
526  return -1;
527  group = i;
528  }
529  return group;
530  }
531  };
532  void determine_capable(const char *env_var) override {
533  __kmp_affinity_determine_capable(env_var);
534  }
535  void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
536  KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
537  void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
538  KMPAffinity::Mask *allocate_mask_array(int num) override {
539  return new Mask[num];
540  }
541  void deallocate_mask_array(KMPAffinity::Mask *array) override {
542  Mask *windows_array = static_cast<Mask *>(array);
543  delete[] windows_array;
544  }
545  KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
546  int index) override {
547  Mask *windows_array = static_cast<Mask *>(array);
548  return &(windows_array[index]);
549  }
550  api_type get_api_type() const override { return NATIVE_OS; }
551 };
552 #endif /* KMP_OS_WINDOWS */
553 #endif /* KMP_AFFINITY_SUPPORTED */
554 
555 class Address {
556 public:
557  static const unsigned maxDepth = 32;
558  unsigned labels[maxDepth];
559  unsigned childNums[maxDepth];
560  unsigned depth;
561  unsigned leader;
562  Address(unsigned _depth) : depth(_depth), leader(FALSE) {}
563  Address &operator=(const Address &b) {
564  depth = b.depth;
565  for (unsigned i = 0; i < depth; i++) {
566  labels[i] = b.labels[i];
567  childNums[i] = b.childNums[i];
568  }
569  leader = FALSE;
570  return *this;
571  }
572  bool operator==(const Address &b) const {
573  if (depth != b.depth)
574  return false;
575  for (unsigned i = 0; i < depth; i++)
576  if (labels[i] != b.labels[i])
577  return false;
578  return true;
579  }
580  bool isClose(const Address &b, int level) const {
581  if (depth != b.depth)
582  return false;
583  if ((unsigned)level >= depth)
584  return true;
585  for (unsigned i = 0; i < (depth - level); i++)
586  if (labels[i] != b.labels[i])
587  return false;
588  return true;
589  }
590  bool operator!=(const Address &b) const { return !operator==(b); }
591  void print() const {
592  unsigned i;
593  printf("Depth: %u --- ", depth);
594  for (i = 0; i < depth; i++) {
595  printf("%u ", labels[i]);
596  }
597  }
598 };
599 
600 class AddrUnsPair {
601 public:
602  Address first;
603  unsigned second;
604  AddrUnsPair(Address _first, unsigned _second)
605  : first(_first), second(_second) {}
606  AddrUnsPair &operator=(const AddrUnsPair &b) {
607  first = b.first;
608  second = b.second;
609  return *this;
610  }
611  void print() const {
612  printf("first = ");
613  first.print();
614  printf(" --- second = %u", second);
615  }
616  bool operator==(const AddrUnsPair &b) const {
617  if (first != b.first)
618  return false;
619  if (second != b.second)
620  return false;
621  return true;
622  }
623  bool operator!=(const AddrUnsPair &b) const { return !operator==(b); }
624 };
625 
626 static int __kmp_affinity_cmp_Address_labels(const void *a, const void *b) {
627  const Address *aa = &(((const AddrUnsPair *)a)->first);
628  const Address *bb = &(((const AddrUnsPair *)b)->first);
629  unsigned depth = aa->depth;
630  unsigned i;
631  KMP_DEBUG_ASSERT(depth == bb->depth);
632  for (i = 0; i < depth; i++) {
633  if (aa->labels[i] < bb->labels[i])
634  return -1;
635  if (aa->labels[i] > bb->labels[i])
636  return 1;
637  }
638  return 0;
639 }
640 
641 /* A structure for holding machine-specific hierarchy info to be computed once
642  at init. This structure represents a mapping of threads to the actual machine
643  hierarchy, or to our best guess at what the hierarchy might be, for the
644  purpose of performing an efficient barrier. In the worst case, when there is
645  no machine hierarchy information, it produces a tree suitable for a barrier,
646  similar to the tree used in the hyper barrier. */
647 class hierarchy_info {
648 public:
649  /* Good default values for number of leaves and branching factor, given no
650  affinity information. Behaves a bit like hyper barrier. */
651  static const kmp_uint32 maxLeaves = 4;
652  static const kmp_uint32 minBranch = 4;
658  kmp_uint32 maxLevels;
659 
664  kmp_uint32 depth;
665  kmp_uint32 base_num_threads;
666  enum init_status { initialized = 0, not_initialized = 1, initializing = 2 };
667  volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized,
668  // 2=initialization in progress
669  volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
670 
675  kmp_uint32 *numPerLevel;
676  kmp_uint32 *skipPerLevel;
677 
678  void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
679  int hier_depth = adr2os[0].first.depth;
680  int level = 0;
681  for (int i = hier_depth - 1; i >= 0; --i) {
682  int max = -1;
683  for (int j = 0; j < num_addrs; ++j) {
684  int next = adr2os[j].first.childNums[i];
685  if (next > max)
686  max = next;
687  }
688  numPerLevel[level] = max + 1;
689  ++level;
690  }
691  }
692 
693  hierarchy_info()
694  : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
695 
696  void fini() {
697  if (!uninitialized && numPerLevel) {
698  __kmp_free(numPerLevel);
699  numPerLevel = NULL;
700  uninitialized = not_initialized;
701  }
702  }
703 
704  void init(AddrUnsPair *adr2os, int num_addrs) {
705  kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
706  &uninitialized, not_initialized, initializing);
707  if (bool_result == 0) { // Wait for initialization
708  while (TCR_1(uninitialized) != initialized)
709  KMP_CPU_PAUSE();
710  return;
711  }
712  KMP_DEBUG_ASSERT(bool_result == 1);
713 
714  /* Added explicit initialization of the data fields here to prevent usage of
715  dirty value observed when static library is re-initialized multiple times
716  (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses
717  OpenMP). */
718  depth = 1;
719  resizing = 0;
720  maxLevels = 7;
721  numPerLevel =
722  (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
723  skipPerLevel = &(numPerLevel[maxLevels]);
724  for (kmp_uint32 i = 0; i < maxLevels;
725  ++i) { // init numPerLevel[*] to 1 item per level
726  numPerLevel[i] = 1;
727  skipPerLevel[i] = 1;
728  }
729 
730  // Sort table by physical ID
731  if (adr2os) {
732  qsort(adr2os, num_addrs, sizeof(*adr2os),
733  __kmp_affinity_cmp_Address_labels);
734  deriveLevels(adr2os, num_addrs);
735  } else {
736  numPerLevel[0] = maxLeaves;
737  numPerLevel[1] = num_addrs / maxLeaves;
738  if (num_addrs % maxLeaves)
739  numPerLevel[1]++;
740  }
741 
742  base_num_threads = num_addrs;
743  for (int i = maxLevels - 1; i >= 0;
744  --i) // count non-empty levels to get depth
745  if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
746  depth++;
747 
748  kmp_uint32 branch = minBranch;
749  if (numPerLevel[0] == 1)
750  branch = num_addrs / maxLeaves;
751  if (branch < minBranch)
752  branch = minBranch;
753  for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width
754  while (numPerLevel[d] > branch ||
755  (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0!
756  if (numPerLevel[d] & 1)
757  numPerLevel[d]++;
758  numPerLevel[d] = numPerLevel[d] >> 1;
759  if (numPerLevel[d + 1] == 1)
760  depth++;
761  numPerLevel[d + 1] = numPerLevel[d + 1] << 1;
762  }
763  if (numPerLevel[0] == 1) {
764  branch = branch >> 1;
765  if (branch < 4)
766  branch = minBranch;
767  }
768  }
769 
770  for (kmp_uint32 i = 1; i < depth; ++i)
771  skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1];
772  // Fill in hierarchy in the case of oversubscription
773  for (kmp_uint32 i = depth; i < maxLevels; ++i)
774  skipPerLevel[i] = 2 * skipPerLevel[i - 1];
775 
776  uninitialized = initialized; // One writer
777  }
778 
779  // Resize the hierarchy if nproc changes to something larger than before
780  void resize(kmp_uint32 nproc) {
781  kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
782  while (bool_result == 0) { // someone else is trying to resize
783  KMP_CPU_PAUSE();
784  if (nproc <= base_num_threads) // happy with other thread's resize
785  return;
786  else // try to resize
787  bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
788  }
789  KMP_DEBUG_ASSERT(bool_result != 0);
790  if (nproc <= base_num_threads)
791  return; // happy with other thread's resize
792 
793  // Calculate new maxLevels
794  kmp_uint32 old_sz = skipPerLevel[depth - 1];
795  kmp_uint32 incs = 0, old_maxLevels = maxLevels;
796  // First see if old maxLevels is enough to contain new size
797  for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) {
798  skipPerLevel[i] = 2 * skipPerLevel[i - 1];
799  numPerLevel[i - 1] *= 2;
800  old_sz *= 2;
801  depth++;
802  }
803  if (nproc > old_sz) { // Not enough space, need to expand hierarchy
804  while (nproc > old_sz) {
805  old_sz *= 2;
806  incs++;
807  depth++;
808  }
809  maxLevels += incs;
810 
811  // Resize arrays
812  kmp_uint32 *old_numPerLevel = numPerLevel;
813  kmp_uint32 *old_skipPerLevel = skipPerLevel;
814  numPerLevel = skipPerLevel = NULL;
815  numPerLevel =
816  (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
817  skipPerLevel = &(numPerLevel[maxLevels]);
818 
819  // Copy old elements from old arrays
820  for (kmp_uint32 i = 0; i < old_maxLevels;
821  ++i) { // init numPerLevel[*] to 1 item per level
822  numPerLevel[i] = old_numPerLevel[i];
823  skipPerLevel[i] = old_skipPerLevel[i];
824  }
825 
826  // Init new elements in arrays to 1
827  for (kmp_uint32 i = old_maxLevels; i < maxLevels;
828  ++i) { // init numPerLevel[*] to 1 item per level
829  numPerLevel[i] = 1;
830  skipPerLevel[i] = 1;
831  }
832 
833  // Free old arrays
834  __kmp_free(old_numPerLevel);
835  }
836 
837  // Fill in oversubscription levels of hierarchy
838  for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i)
839  skipPerLevel[i] = 2 * skipPerLevel[i - 1];
840 
841  base_num_threads = nproc;
842  resizing = 0; // One writer
843  }
844 };
845 #endif // KMP_AFFINITY_H