16 #include "kmp_stats.h"
17 #include "kmp_wait_release.h"
18 #include "kmp_taskdeps.h"
21 #include "ompt-specific.h"
24 #include "tsan_annotations.h"
27 static void __kmp_enable_tasking(kmp_task_team_t *task_team,
28 kmp_info_t *this_thr);
29 static void __kmp_alloc_task_deque(kmp_info_t *thread,
30 kmp_thread_data_t *thread_data);
31 static int __kmp_realloc_task_threads_data(kmp_info_t *thread,
32 kmp_task_team_t *task_team);
33 static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask);
35 #ifdef BUILD_TIED_TASK_STACK
44 static void __kmp_trace_task_stack(kmp_int32 gtid,
45 kmp_thread_data_t *thread_data,
46 int threshold,
char *location) {
47 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
48 kmp_taskdata_t **stack_top = task_stack->ts_top;
49 kmp_int32 entries = task_stack->ts_entries;
50 kmp_taskdata_t *tied_task;
54 (
"__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
55 "first_block = %p, stack_top = %p \n",
56 location, gtid, entries, task_stack->ts_first_block, stack_top));
58 KMP_DEBUG_ASSERT(stack_top != NULL);
59 KMP_DEBUG_ASSERT(entries > 0);
61 while (entries != 0) {
62 KMP_DEBUG_ASSERT(stack_top != &task_stack->ts_first_block.sb_block[0]);
64 if (entries & TASK_STACK_INDEX_MASK == 0) {
65 kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(stack_top);
67 stack_block = stack_block->sb_prev;
68 stack_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE];
75 tied_task = *stack_top;
77 KMP_DEBUG_ASSERT(tied_task != NULL);
78 KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
81 (
"__kmp_trace_task_stack(%s): gtid=%d, entry=%d, "
82 "stack_top=%p, tied_task=%p\n",
83 location, gtid, entries, stack_top, tied_task));
85 KMP_DEBUG_ASSERT(stack_top == &task_stack->ts_first_block.sb_block[0]);
88 (
"__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
98 static void __kmp_init_task_stack(kmp_int32 gtid,
99 kmp_thread_data_t *thread_data) {
100 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
101 kmp_stack_block_t *first_block;
104 first_block = &task_stack->ts_first_block;
105 task_stack->ts_top = (kmp_taskdata_t **)first_block;
106 memset((
void *)first_block,
'\0',
107 TASK_STACK_BLOCK_SIZE *
sizeof(kmp_taskdata_t *));
110 task_stack->ts_entries = TASK_STACK_EMPTY;
111 first_block->sb_next = NULL;
112 first_block->sb_prev = NULL;
119 static void __kmp_free_task_stack(kmp_int32 gtid,
120 kmp_thread_data_t *thread_data) {
121 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
122 kmp_stack_block_t *stack_block = &task_stack->ts_first_block;
124 KMP_DEBUG_ASSERT(task_stack->ts_entries == TASK_STACK_EMPTY);
126 while (stack_block != NULL) {
127 kmp_stack_block_t *next_block = (stack_block) ? stack_block->sb_next : NULL;
129 stack_block->sb_next = NULL;
130 stack_block->sb_prev = NULL;
131 if (stack_block != &task_stack->ts_first_block) {
132 __kmp_thread_free(thread,
135 stack_block = next_block;
138 task_stack->ts_entries = 0;
139 task_stack->ts_top = NULL;
148 static void __kmp_push_task_stack(kmp_int32 gtid, kmp_info_t *thread,
149 kmp_taskdata_t *tied_task) {
151 kmp_thread_data_t *thread_data =
152 &thread->th.th_task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)];
153 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
155 if (tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser) {
159 KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
160 KMP_DEBUG_ASSERT(task_stack->ts_top != NULL);
163 (
"__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
164 gtid, thread, tied_task));
166 *(task_stack->ts_top) = tied_task;
169 task_stack->ts_top++;
170 task_stack->ts_entries++;
172 if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) {
174 kmp_stack_block_t *stack_block =
175 (kmp_stack_block_t *)(task_stack->ts_top - TASK_STACK_BLOCK_SIZE);
178 if (stack_block->sb_next !=
180 task_stack->ts_top = &stack_block->sb_next->sb_block[0];
182 kmp_stack_block_t *new_block = (kmp_stack_block_t *)__kmp_thread_calloc(
183 thread,
sizeof(kmp_stack_block_t));
185 task_stack->ts_top = &new_block->sb_block[0];
186 stack_block->sb_next = new_block;
187 new_block->sb_prev = stack_block;
188 new_block->sb_next = NULL;
192 (
"__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
193 gtid, tied_task, new_block));
196 KA_TRACE(20, (
"__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid,
207 static void __kmp_pop_task_stack(kmp_int32 gtid, kmp_info_t *thread,
208 kmp_taskdata_t *ending_task) {
210 kmp_thread_data_t *thread_data =
211 &thread->th.th_task_team->tt_threads_data[__kmp_tid_from_gtid(gtid)];
212 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
213 kmp_taskdata_t *tied_task;
215 if (ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser) {
220 KMP_DEBUG_ASSERT(task_stack->ts_top != NULL);
221 KMP_DEBUG_ASSERT(task_stack->ts_entries > 0);
223 KA_TRACE(20, (
"__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid,
227 if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) {
228 kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(task_stack->ts_top);
230 stack_block = stack_block->sb_prev;
231 task_stack->ts_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE];
235 task_stack->ts_top--;
236 task_stack->ts_entries--;
238 tied_task = *(task_stack->ts_top);
240 KMP_DEBUG_ASSERT(tied_task != NULL);
241 KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
242 KMP_DEBUG_ASSERT(tied_task == ending_task);
244 KA_TRACE(20, (
"__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid,
253 static bool __kmp_task_is_allowed(
int gtid,
const kmp_int32 is_constrained,
254 const kmp_taskdata_t *tasknew,
255 const kmp_taskdata_t *taskcurr) {
256 if (is_constrained && (tasknew->td_flags.tiedness == TASK_TIED)) {
260 kmp_taskdata_t *current = taskcurr->td_last_tied;
261 KMP_DEBUG_ASSERT(current != NULL);
263 if (current->td_flags.tasktype == TASK_EXPLICIT ||
264 current->td_taskwait_thread > 0) {
265 kmp_int32 level = current->td_level;
266 kmp_taskdata_t *parent = tasknew->td_parent;
267 while (parent != current && parent->td_level > level) {
269 parent = parent->td_parent;
270 KMP_DEBUG_ASSERT(parent != NULL);
272 if (parent != current)
277 kmp_depnode_t *node = tasknew->td_depnode;
278 if (UNLIKELY(node && (node->dn.mtx_num_locks > 0))) {
279 for (
int i = 0; i < node->dn.mtx_num_locks; ++i) {
280 KMP_DEBUG_ASSERT(node->dn.mtx_locks[i] != NULL);
281 if (__kmp_test_lock(node->dn.mtx_locks[i], gtid))
284 for (
int j = i - 1; j >= 0; --j)
285 __kmp_release_lock(node->dn.mtx_locks[j], gtid);
289 node->dn.mtx_num_locks = -node->dn.mtx_num_locks;
298 static void __kmp_realloc_task_deque(kmp_info_t *thread,
299 kmp_thread_data_t *thread_data) {
300 kmp_int32 size = TASK_DEQUE_SIZE(thread_data->td);
301 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) == size);
302 kmp_int32 new_size = 2 * size;
304 KE_TRACE(10, (
"__kmp_realloc_task_deque: T#%d reallocating deque[from %d to "
305 "%d] for thread_data %p\n",
306 __kmp_gtid_from_thread(thread), size, new_size, thread_data));
308 kmp_taskdata_t **new_deque =
309 (kmp_taskdata_t **)__kmp_allocate(new_size *
sizeof(kmp_taskdata_t *));
312 for (i = thread_data->td.td_deque_head, j = 0; j < size;
313 i = (i + 1) & TASK_DEQUE_MASK(thread_data->td), j++)
314 new_deque[j] = thread_data->td.td_deque[i];
316 __kmp_free(thread_data->td.td_deque);
318 thread_data->td.td_deque_head = 0;
319 thread_data->td.td_deque_tail = size;
320 thread_data->td.td_deque = new_deque;
321 thread_data->td.td_deque_size = new_size;
325 static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) {
326 kmp_info_t *thread = __kmp_threads[gtid];
327 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
330 if (taskdata->td_flags.hidden_helper && !KMP_HIDDEN_HELPER_THREAD(gtid)) {
331 gtid = KMP_GTID_TO_SHADOW_GTID(gtid);
332 thread = __kmp_threads[gtid];
335 kmp_task_team_t *task_team = thread->th.th_task_team;
336 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
337 kmp_thread_data_t *thread_data;
340 (
"__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata));
342 if (UNLIKELY(taskdata->td_flags.tiedness == TASK_UNTIED)) {
345 kmp_int32 counter = 1 + KMP_ATOMIC_INC(&taskdata->td_untied_count);
346 KMP_DEBUG_USE_VAR(counter);
349 (
"__kmp_push_task: T#%d untied_count (%d) incremented for task %p\n",
350 gtid, counter, taskdata));
354 if (UNLIKELY(taskdata->td_flags.task_serial)) {
355 KA_TRACE(20, (
"__kmp_push_task: T#%d team serialized; returning "
356 "TASK_NOT_PUSHED for task %p\n",
358 return TASK_NOT_PUSHED;
363 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
364 if (UNLIKELY(!KMP_TASKING_ENABLED(task_team))) {
365 __kmp_enable_tasking(task_team, thread);
367 KMP_DEBUG_ASSERT(TCR_4(task_team->tt.tt_found_tasks) == TRUE);
368 KMP_DEBUG_ASSERT(TCR_PTR(task_team->tt.tt_threads_data) != NULL);
371 thread_data = &task_team->tt.tt_threads_data[tid];
376 if (UNLIKELY(thread_data->td.td_deque == NULL)) {
377 __kmp_alloc_task_deque(thread, thread_data);
382 if (TCR_4(thread_data->td.td_deque_ntasks) >=
383 TASK_DEQUE_SIZE(thread_data->td)) {
384 if (__kmp_enable_task_throttling &&
385 __kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
386 thread->th.th_current_task)) {
387 KA_TRACE(20, (
"__kmp_push_task: T#%d deque is full; returning "
388 "TASK_NOT_PUSHED for task %p\n",
390 return TASK_NOT_PUSHED;
392 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
394 if (TCR_4(thread_data->td.td_deque_ntasks) >=
395 TASK_DEQUE_SIZE(thread_data->td)) {
397 __kmp_realloc_task_deque(thread, thread_data);
403 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
405 if (TCR_4(thread_data->td.td_deque_ntasks) >=
406 TASK_DEQUE_SIZE(thread_data->td)) {
407 if (__kmp_enable_task_throttling &&
408 __kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
409 thread->th.th_current_task)) {
410 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
411 KA_TRACE(20, (
"__kmp_push_task: T#%d deque is full on 2nd check; "
412 "returning TASK_NOT_PUSHED for task %p\n",
414 return TASK_NOT_PUSHED;
417 __kmp_realloc_task_deque(thread, thread_data);
422 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) <
423 TASK_DEQUE_SIZE(thread_data->td));
425 thread_data->td.td_deque[thread_data->td.td_deque_tail] =
428 thread_data->td.td_deque_tail =
429 (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
430 TCW_4(thread_data->td.td_deque_ntasks,
431 TCR_4(thread_data->td.td_deque_ntasks) + 1);
432 KMP_FSYNC_RELEASING(thread->th.th_current_task);
433 KMP_FSYNC_RELEASING(taskdata);
434 KA_TRACE(20, (
"__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
435 "task=%p ntasks=%d head=%u tail=%u\n",
436 gtid, taskdata, thread_data->td.td_deque_ntasks,
437 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
439 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
442 if (taskdata->td_flags.hidden_helper) {
444 __kmp_hidden_helper_worker_thread_signal();
447 return TASK_SUCCESSFULLY_PUSHED;
454 void __kmp_pop_current_task_from_thread(kmp_info_t *this_thr) {
455 KF_TRACE(10, (
"__kmp_pop_current_task_from_thread(enter): T#%d "
456 "this_thread=%p, curtask=%p, "
457 "curtask_parent=%p\n",
458 0, this_thr, this_thr->th.th_current_task,
459 this_thr->th.th_current_task->td_parent));
461 this_thr->th.th_current_task = this_thr->th.th_current_task->td_parent;
463 KF_TRACE(10, (
"__kmp_pop_current_task_from_thread(exit): T#%d "
464 "this_thread=%p, curtask=%p, "
465 "curtask_parent=%p\n",
466 0, this_thr, this_thr->th.th_current_task,
467 this_thr->th.th_current_task->td_parent));
476 void __kmp_push_current_task_to_thread(kmp_info_t *this_thr, kmp_team_t *team,
480 KF_TRACE(10, (
"__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p "
483 tid, this_thr, this_thr->th.th_current_task,
484 team->t.t_implicit_task_taskdata[tid].td_parent));
486 KMP_DEBUG_ASSERT(this_thr != NULL);
489 if (this_thr->th.th_current_task != &team->t.t_implicit_task_taskdata[0]) {
490 team->t.t_implicit_task_taskdata[0].td_parent =
491 this_thr->th.th_current_task;
492 this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[0];
495 team->t.t_implicit_task_taskdata[tid].td_parent =
496 team->t.t_implicit_task_taskdata[0].td_parent;
497 this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[tid];
500 KF_TRACE(10, (
"__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p "
503 tid, this_thr, this_thr->th.th_current_task,
504 team->t.t_implicit_task_taskdata[tid].td_parent));
512 static void __kmp_task_start(kmp_int32 gtid, kmp_task_t *task,
513 kmp_taskdata_t *current_task) {
514 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
515 kmp_info_t *thread = __kmp_threads[gtid];
518 (
"__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
519 gtid, taskdata, current_task));
521 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
526 current_task->td_flags.executing = 0;
529 #ifdef BUILD_TIED_TASK_STACK
530 if (taskdata->td_flags.tiedness == TASK_TIED) {
531 __kmp_push_task_stack(gtid, thread, taskdata);
536 thread->th.th_current_task = taskdata;
538 KMP_DEBUG_ASSERT(taskdata->td_flags.started == 0 ||
539 taskdata->td_flags.tiedness == TASK_UNTIED);
540 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0 ||
541 taskdata->td_flags.tiedness == TASK_UNTIED);
542 taskdata->td_flags.started = 1;
543 taskdata->td_flags.executing = 1;
544 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
545 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
552 KA_TRACE(10, (
"__kmp_task_start(exit): T#%d task=%p\n", gtid, taskdata));
563 static inline void __ompt_task_init(kmp_taskdata_t *task,
int tid) {
565 task->ompt_task_info.task_data.value = 0;
566 task->ompt_task_info.frame.exit_frame = ompt_data_none;
567 task->ompt_task_info.frame.enter_frame = ompt_data_none;
568 task->ompt_task_info.frame.exit_frame_flags =
569 ompt_frame_runtime | ompt_frame_framepointer;
570 task->ompt_task_info.frame.enter_frame_flags =
571 ompt_frame_runtime | ompt_frame_framepointer;
576 static inline void __ompt_task_start(kmp_task_t *task,
577 kmp_taskdata_t *current_task,
579 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
580 ompt_task_status_t status = ompt_task_switch;
581 if (__kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded) {
582 status = ompt_task_yield;
583 __kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded = 0;
586 if (ompt_enabled.ompt_callback_task_schedule) {
587 ompt_callbacks.ompt_callback(ompt_callback_task_schedule)(
588 &(current_task->ompt_task_info.task_data), status,
589 &(taskdata->ompt_task_info.task_data));
591 taskdata->ompt_task_info.scheduling_parent = current_task;
596 static inline void __ompt_task_finish(kmp_task_t *task,
597 kmp_taskdata_t *resumed_task,
598 ompt_task_status_t status) {
599 if (ompt_enabled.ompt_callback_task_schedule) {
600 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
601 if (__kmp_omp_cancellation && taskdata->td_taskgroup &&
602 taskdata->td_taskgroup->cancel_request == cancel_taskgroup) {
603 status = ompt_task_cancel;
607 ompt_callbacks.ompt_callback(ompt_callback_task_schedule)(
608 &(taskdata->ompt_task_info.task_data), status,
609 (resumed_task ? &(resumed_task->ompt_task_info.task_data) : NULL));
615 static void __kmpc_omp_task_begin_if0_template(
ident_t *loc_ref, kmp_int32 gtid,
618 void *return_address) {
619 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
620 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
622 KA_TRACE(10, (
"__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p "
624 gtid, loc_ref, taskdata, current_task));
626 if (UNLIKELY(taskdata->td_flags.tiedness == TASK_UNTIED)) {
629 kmp_int32 counter = 1 + KMP_ATOMIC_INC(&taskdata->td_untied_count);
630 KMP_DEBUG_USE_VAR(counter);
631 KA_TRACE(20, (
"__kmpc_omp_task_begin_if0: T#%d untied_count (%d) "
632 "incremented for task %p\n",
633 gtid, counter, taskdata));
636 taskdata->td_flags.task_serial =
638 __kmp_task_start(gtid, task, current_task);
642 if (current_task->ompt_task_info.frame.enter_frame.ptr == NULL) {
643 current_task->ompt_task_info.frame.enter_frame.ptr =
644 taskdata->ompt_task_info.frame.exit_frame.ptr = frame_address;
645 current_task->ompt_task_info.frame.enter_frame_flags =
646 taskdata->ompt_task_info.frame.exit_frame_flags =
647 ompt_frame_application | ompt_frame_framepointer;
649 if (ompt_enabled.ompt_callback_task_create) {
650 ompt_task_info_t *parent_info = &(current_task->ompt_task_info);
651 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
652 &(parent_info->task_data), &(parent_info->frame),
653 &(taskdata->ompt_task_info.task_data),
654 ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(taskdata), 0,
657 __ompt_task_start(task, current_task, gtid);
661 KA_TRACE(10, (
"__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n", gtid,
667 static void __kmpc_omp_task_begin_if0_ompt(
ident_t *loc_ref, kmp_int32 gtid,
670 void *return_address) {
671 __kmpc_omp_task_begin_if0_template<true>(loc_ref, gtid, task, frame_address,
682 void __kmpc_omp_task_begin_if0(
ident_t *loc_ref, kmp_int32 gtid,
685 if (UNLIKELY(ompt_enabled.enabled)) {
686 OMPT_STORE_RETURN_ADDRESS(gtid);
687 __kmpc_omp_task_begin_if0_ompt(loc_ref, gtid, task,
688 OMPT_GET_FRAME_ADDRESS(1),
689 OMPT_LOAD_RETURN_ADDRESS(gtid));
693 __kmpc_omp_task_begin_if0_template<false>(loc_ref, gtid, task, NULL, NULL);
699 void __kmpc_omp_task_begin(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task) {
700 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
704 (
"__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
705 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task));
707 __kmp_task_start(gtid, task, current_task);
709 KA_TRACE(10, (
"__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n", gtid,
710 loc_ref, KMP_TASK_TO_TASKDATA(task)));
720 static void __kmp_free_task(kmp_int32 gtid, kmp_taskdata_t *taskdata,
721 kmp_info_t *thread) {
722 KA_TRACE(30, (
"__kmp_free_task: T#%d freeing data from task %p\n", gtid,
726 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
727 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0);
728 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 1);
729 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
730 KMP_DEBUG_ASSERT(taskdata->td_allocated_child_tasks == 0 ||
731 taskdata->td_flags.task_serial == 1);
732 KMP_DEBUG_ASSERT(taskdata->td_incomplete_child_tasks == 0);
734 taskdata->td_flags.freed = 1;
735 ANNOTATE_HAPPENS_BEFORE(taskdata);
738 __kmp_fast_free(thread, taskdata);
740 __kmp_thread_free(thread, taskdata);
742 KA_TRACE(20, (
"__kmp_free_task: T#%d freed task %p\n", gtid, taskdata));
751 static void __kmp_free_task_and_ancestors(kmp_int32 gtid,
752 kmp_taskdata_t *taskdata,
753 kmp_info_t *thread) {
756 kmp_int32 team_serial =
757 (taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) &&
758 !taskdata->td_flags.proxy;
759 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
761 kmp_int32 children = KMP_ATOMIC_DEC(&taskdata->td_allocated_child_tasks) - 1;
762 KMP_DEBUG_ASSERT(children >= 0);
765 while (children == 0) {
766 kmp_taskdata_t *parent_taskdata = taskdata->td_parent;
768 KA_TRACE(20, (
"__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
769 "and freeing itself\n",
773 __kmp_free_task(gtid, taskdata, thread);
775 taskdata = parent_taskdata;
781 if (taskdata->td_flags.tasktype == TASK_IMPLICIT) {
782 if (taskdata->td_dephash) {
783 int children = KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks);
784 kmp_tasking_flags_t flags_old = taskdata->td_flags;
785 if (children == 0 && flags_old.complete == 1) {
786 kmp_tasking_flags_t flags_new = flags_old;
787 flags_new.complete = 0;
788 if (KMP_COMPARE_AND_STORE_ACQ32(
789 RCAST(kmp_int32 *, &taskdata->td_flags),
790 *RCAST(kmp_int32 *, &flags_old),
791 *RCAST(kmp_int32 *, &flags_new))) {
792 KA_TRACE(100, (
"__kmp_free_task_and_ancestors: T#%d cleans "
793 "dephash of implicit task %p\n",
796 __kmp_dephash_free_entries(thread, taskdata->td_dephash);
803 children = KMP_ATOMIC_DEC(&taskdata->td_allocated_child_tasks) - 1;
804 KMP_DEBUG_ASSERT(children >= 0);
808 20, (
"__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
809 "not freeing it yet\n",
810 gtid, taskdata, children));
823 static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
824 kmp_taskdata_t *resumed_task) {
825 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
826 kmp_info_t *thread = __kmp_threads[gtid];
827 kmp_task_team_t *task_team =
828 thread->th.th_task_team;
829 kmp_int32 children = 0;
831 KA_TRACE(10, (
"__kmp_task_finish(enter): T#%d finishing task %p and resuming "
833 gtid, taskdata, resumed_task));
835 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
838 #ifdef BUILD_TIED_TASK_STACK
839 if (taskdata->td_flags.tiedness == TASK_TIED) {
840 __kmp_pop_task_stack(gtid, thread, taskdata);
844 if (UNLIKELY(taskdata->td_flags.tiedness == TASK_UNTIED)) {
847 kmp_int32 counter = KMP_ATOMIC_DEC(&taskdata->td_untied_count) - 1;
850 (
"__kmp_task_finish: T#%d untied_count (%d) decremented for task %p\n",
851 gtid, counter, taskdata));
855 if (resumed_task == NULL) {
856 KMP_DEBUG_ASSERT(taskdata->td_flags.task_serial);
857 resumed_task = taskdata->td_parent;
860 thread->th.th_current_task = resumed_task;
861 resumed_task->td_flags.executing = 1;
862 KA_TRACE(10, (
"__kmp_task_finish(exit): T#%d partially done task %p, "
863 "resuming task %p\n",
864 gtid, taskdata, resumed_task));
872 (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
873 taskdata->td_flags.task_serial);
874 if (taskdata->td_flags.task_serial) {
875 if (resumed_task == NULL) {
876 resumed_task = taskdata->td_parent;
880 KMP_DEBUG_ASSERT(resumed_task !=
890 if (UNLIKELY(taskdata->td_flags.destructors_thunk)) {
891 kmp_routine_entry_t destr_thunk = task->data1.destructors;
892 KMP_ASSERT(destr_thunk);
893 destr_thunk(gtid, task);
896 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
897 KMP_DEBUG_ASSERT(taskdata->td_flags.started == 1);
898 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
901 if (UNLIKELY(taskdata->td_flags.detachable == TASK_DETACHABLE)) {
902 if (taskdata->td_allow_completion_event.type ==
903 KMP_EVENT_ALLOW_COMPLETION) {
905 __kmp_acquire_tas_lock(&taskdata->td_allow_completion_event.lock, gtid);
906 if (taskdata->td_allow_completion_event.type ==
907 KMP_EVENT_ALLOW_COMPLETION) {
909 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1);
910 taskdata->td_flags.executing = 0;
917 __ompt_task_finish(task, resumed_task, ompt_task_detach);
923 taskdata->td_flags.proxy = TASK_PROXY;
926 __kmp_release_tas_lock(&taskdata->td_allow_completion_event.lock, gtid);
931 taskdata->td_flags.complete = 1;
936 __ompt_task_finish(task, resumed_task, ompt_task_complete);
941 if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) ||
942 taskdata->td_flags.detachable == TASK_DETACHABLE ||
943 taskdata->td_flags.hidden_helper) {
946 KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks) - 1;
947 KMP_DEBUG_ASSERT(children >= 0);
948 if (taskdata->td_taskgroup)
949 KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count);
950 __kmp_release_deps(gtid, taskdata);
951 }
else if (task_team && task_team->tt.tt_found_proxy_tasks) {
954 __kmp_release_deps(gtid, taskdata);
960 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1);
961 taskdata->td_flags.executing = 0;
965 20, (
"__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
966 gtid, taskdata, children));
972 thread->th.th_current_task = resumed_task;
974 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
978 resumed_task->td_flags.executing = 1;
981 10, (
"__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
982 gtid, taskdata, resumed_task));
988 static void __kmpc_omp_task_complete_if0_template(
ident_t *loc_ref,
991 KA_TRACE(10, (
"__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
992 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)));
993 KMP_DEBUG_ASSERT(gtid >= 0);
995 __kmp_task_finish<ompt>(gtid, task, NULL);
997 KA_TRACE(10, (
"__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
998 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)));
1002 ompt_frame_t *ompt_frame;
1003 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1004 ompt_frame->enter_frame = ompt_data_none;
1005 ompt_frame->enter_frame_flags =
1006 ompt_frame_runtime | ompt_frame_framepointer;
1015 void __kmpc_omp_task_complete_if0_ompt(
ident_t *loc_ref, kmp_int32 gtid,
1017 __kmpc_omp_task_complete_if0_template<true>(loc_ref, gtid, task);
1026 void __kmpc_omp_task_complete_if0(
ident_t *loc_ref, kmp_int32 gtid,
1029 if (UNLIKELY(ompt_enabled.enabled)) {
1030 __kmpc_omp_task_complete_if0_ompt(loc_ref, gtid, task);
1034 __kmpc_omp_task_complete_if0_template<false>(loc_ref, gtid, task);
1040 void __kmpc_omp_task_complete(
ident_t *loc_ref, kmp_int32 gtid,
1042 KA_TRACE(10, (
"__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n", gtid,
1043 loc_ref, KMP_TASK_TO_TASKDATA(task)));
1045 __kmp_task_finish<false>(gtid, task,
1048 KA_TRACE(10, (
"__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n", gtid,
1049 loc_ref, KMP_TASK_TO_TASKDATA(task)));
1065 void __kmp_init_implicit_task(
ident_t *loc_ref, kmp_info_t *this_thr,
1066 kmp_team_t *team,
int tid,
int set_curr_task) {
1067 kmp_taskdata_t *task = &team->t.t_implicit_task_taskdata[tid];
1071 (
"__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
1072 tid, team, task, set_curr_task ?
"TRUE" :
"FALSE"));
1074 task->td_task_id = KMP_GEN_TASK_ID();
1075 task->td_team = team;
1078 task->td_ident = loc_ref;
1079 task->td_taskwait_ident = NULL;
1080 task->td_taskwait_counter = 0;
1081 task->td_taskwait_thread = 0;
1083 task->td_flags.tiedness = TASK_TIED;
1084 task->td_flags.tasktype = TASK_IMPLICIT;
1085 task->td_flags.proxy = TASK_FULL;
1088 task->td_flags.task_serial = 1;
1089 task->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
1090 task->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0;
1092 task->td_flags.started = 1;
1093 task->td_flags.executing = 1;
1094 task->td_flags.complete = 0;
1095 task->td_flags.freed = 0;
1097 task->td_depnode = NULL;
1098 task->td_last_tied = task;
1099 task->td_allow_completion_event.type = KMP_EVENT_UNINITIALIZED;
1101 if (set_curr_task) {
1102 KMP_ATOMIC_ST_REL(&task->td_incomplete_child_tasks, 0);
1104 KMP_ATOMIC_ST_REL(&task->td_allocated_child_tasks, 0);
1105 task->td_taskgroup = NULL;
1106 task->td_dephash = NULL;
1107 __kmp_push_current_task_to_thread(this_thr, team, tid);
1109 KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
1110 KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
1114 if (UNLIKELY(ompt_enabled.enabled))
1115 __ompt_task_init(task, tid);
1118 KF_TRACE(10, (
"__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n", tid,
1127 void __kmp_finish_implicit_task(kmp_info_t *thread) {
1128 kmp_taskdata_t *task = thread->th.th_current_task;
1129 if (task->td_dephash) {
1131 task->td_flags.complete = 1;
1132 children = KMP_ATOMIC_LD_ACQ(&task->td_incomplete_child_tasks);
1133 kmp_tasking_flags_t flags_old = task->td_flags;
1134 if (children == 0 && flags_old.complete == 1) {
1135 kmp_tasking_flags_t flags_new = flags_old;
1136 flags_new.complete = 0;
1137 if (KMP_COMPARE_AND_STORE_ACQ32(RCAST(kmp_int32 *, &task->td_flags),
1138 *RCAST(kmp_int32 *, &flags_old),
1139 *RCAST(kmp_int32 *, &flags_new))) {
1140 KA_TRACE(100, (
"__kmp_finish_implicit_task: T#%d cleans "
1141 "dephash of implicit task %p\n",
1142 thread->th.th_info.ds.ds_gtid, task));
1143 __kmp_dephash_free_entries(thread, task->td_dephash);
1153 void __kmp_free_implicit_task(kmp_info_t *thread) {
1154 kmp_taskdata_t *task = thread->th.th_current_task;
1155 if (task && task->td_dephash) {
1156 __kmp_dephash_free(thread, task->td_dephash);
1157 task->td_dephash = NULL;
1163 static size_t __kmp_round_up_to_val(
size_t size,
size_t val) {
1164 if (size & (val - 1)) {
1166 if (size <= KMP_SIZE_T_MAX - val) {
1185 kmp_task_t *__kmp_task_alloc(
ident_t *loc_ref, kmp_int32 gtid,
1186 kmp_tasking_flags_t *flags,
1187 size_t sizeof_kmp_task_t,
size_t sizeof_shareds,
1188 kmp_routine_entry_t task_entry) {
1190 kmp_taskdata_t *taskdata;
1191 kmp_info_t *thread = __kmp_threads[gtid];
1192 kmp_info_t *encountering_thread = thread;
1193 kmp_team_t *team = thread->th.th_team;
1194 kmp_taskdata_t *parent_task = thread->th.th_current_task;
1195 size_t shareds_offset;
1197 if (UNLIKELY(!TCR_4(__kmp_init_middle)))
1198 __kmp_middle_initialize();
1200 if (flags->hidden_helper) {
1201 if (__kmp_enable_hidden_helper) {
1202 if (!TCR_4(__kmp_init_hidden_helper))
1203 __kmp_hidden_helper_initialize();
1208 if (!KMP_HIDDEN_HELPER_THREAD(gtid)) {
1209 thread = __kmp_threads[KMP_GTID_TO_SHADOW_GTID(gtid)];
1215 flags->hidden_helper = FALSE;
1219 KA_TRACE(10, (
"__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
1220 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1221 gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
1222 sizeof_shareds, task_entry));
1224 KMP_DEBUG_ASSERT(parent_task);
1225 if (parent_task->td_flags.final) {
1226 if (flags->merged_if0) {
1231 if (flags->tiedness == TASK_UNTIED && !team->t.t_serialized) {
1236 encountering_thread->th.th_task_team->tt.tt_untied_task_encountered, 1);
1242 if (UNLIKELY(flags->proxy == TASK_PROXY ||
1243 flags->detachable == TASK_DETACHABLE || flags->hidden_helper)) {
1244 if (flags->proxy == TASK_PROXY) {
1245 flags->tiedness = TASK_UNTIED;
1246 flags->merged_if0 = 1;
1250 if ((encountering_thread->th.th_task_team) == NULL) {
1253 KMP_DEBUG_ASSERT(team->t.t_serialized);
1255 (
"T#%d creating task team in __kmp_task_alloc for proxy task\n",
1257 __kmp_task_team_setup(
1258 encountering_thread, team,
1260 encountering_thread->th.th_task_team =
1261 team->t.t_task_team[encountering_thread->th.th_task_state];
1263 kmp_task_team_t *task_team = encountering_thread->th.th_task_team;
1266 if (!KMP_TASKING_ENABLED(task_team)) {
1269 (
"T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
1270 __kmp_enable_tasking(task_team, encountering_thread);
1271 kmp_int32 tid = encountering_thread->th.th_info.ds.ds_tid;
1272 kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
1274 if (thread_data->td.td_deque == NULL) {
1275 __kmp_alloc_task_deque(encountering_thread, thread_data);
1279 if ((flags->proxy == TASK_PROXY || flags->detachable == TASK_DETACHABLE) &&
1280 task_team->tt.tt_found_proxy_tasks == FALSE)
1281 TCW_4(task_team->tt.tt_found_proxy_tasks, TRUE);
1282 if (flags->hidden_helper &&
1283 task_team->tt.tt_hidden_helper_task_encountered == FALSE)
1284 TCW_4(task_team->tt.tt_hidden_helper_task_encountered, TRUE);
1289 shareds_offset =
sizeof(kmp_taskdata_t) + sizeof_kmp_task_t;
1290 shareds_offset = __kmp_round_up_to_val(shareds_offset,
sizeof(
void *));
1293 KA_TRACE(30, (
"__kmp_task_alloc: T#%d First malloc size: %ld\n", gtid,
1295 KA_TRACE(30, (
"__kmp_task_alloc: T#%d Second malloc size: %ld\n", gtid,
1300 taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(
1301 encountering_thread, shareds_offset + sizeof_shareds);
1303 taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(
1304 encountering_thread, shareds_offset + sizeof_shareds);
1306 ANNOTATE_HAPPENS_AFTER(taskdata);
1308 task = KMP_TASKDATA_TO_TASK(taskdata);
1311 #if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD
1312 KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (
sizeof(
double) - 1)) == 0);
1313 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (
sizeof(
double) - 1)) == 0);
1315 KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (
sizeof(_Quad) - 1)) == 0);
1316 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (
sizeof(_Quad) - 1)) == 0);
1318 if (sizeof_shareds > 0) {
1320 task->shareds = &((
char *)taskdata)[shareds_offset];
1322 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (
sizeof(
void *) - 1)) ==
1325 task->shareds = NULL;
1327 task->routine = task_entry;
1330 taskdata->td_task_id = KMP_GEN_TASK_ID();
1331 taskdata->td_team = thread->th.th_team;
1332 taskdata->td_alloc_thread = encountering_thread;
1333 taskdata->td_parent = parent_task;
1334 taskdata->td_level = parent_task->td_level + 1;
1335 KMP_ATOMIC_ST_RLX(&taskdata->td_untied_count, 0);
1336 taskdata->td_ident = loc_ref;
1337 taskdata->td_taskwait_ident = NULL;
1338 taskdata->td_taskwait_counter = 0;
1339 taskdata->td_taskwait_thread = 0;
1340 KMP_DEBUG_ASSERT(taskdata->td_parent != NULL);
1342 if (flags->proxy == TASK_FULL)
1343 copy_icvs(&taskdata->td_icvs, &taskdata->td_parent->td_icvs);
1345 taskdata->td_flags.tiedness = flags->tiedness;
1346 taskdata->td_flags.final = flags->final;
1347 taskdata->td_flags.merged_if0 = flags->merged_if0;
1348 taskdata->td_flags.destructors_thunk = flags->destructors_thunk;
1349 taskdata->td_flags.proxy = flags->proxy;
1350 taskdata->td_flags.detachable = flags->detachable;
1351 taskdata->td_flags.hidden_helper = flags->hidden_helper;
1352 taskdata->encountering_gtid = gtid;
1353 taskdata->td_task_team = thread->th.th_task_team;
1354 taskdata->td_size_alloc = shareds_offset + sizeof_shareds;
1355 taskdata->td_flags.tasktype = TASK_EXPLICIT;
1358 taskdata->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
1361 taskdata->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0;
1367 taskdata->td_flags.task_serial =
1368 (parent_task->td_flags.final || taskdata->td_flags.team_serial ||
1369 taskdata->td_flags.tasking_ser || flags->merged_if0);
1371 taskdata->td_flags.started = 0;
1372 taskdata->td_flags.executing = 0;
1373 taskdata->td_flags.complete = 0;
1374 taskdata->td_flags.freed = 0;
1376 taskdata->td_flags.native = flags->native;
1378 KMP_ATOMIC_ST_RLX(&taskdata->td_incomplete_child_tasks, 0);
1380 KMP_ATOMIC_ST_RLX(&taskdata->td_allocated_child_tasks, 1);
1381 taskdata->td_taskgroup =
1382 parent_task->td_taskgroup;
1383 taskdata->td_dephash = NULL;
1384 taskdata->td_depnode = NULL;
1385 if (flags->tiedness == TASK_UNTIED)
1386 taskdata->td_last_tied = NULL;
1388 taskdata->td_last_tied = taskdata;
1389 taskdata->td_allow_completion_event.type = KMP_EVENT_UNINITIALIZED;
1391 if (UNLIKELY(ompt_enabled.enabled))
1392 __ompt_task_init(taskdata, gtid);
1396 if (flags->proxy == TASK_PROXY || flags->detachable == TASK_DETACHABLE ||
1397 flags->hidden_helper ||
1398 !(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) {
1399 KMP_ATOMIC_INC(&parent_task->td_incomplete_child_tasks);
1400 if (parent_task->td_taskgroup)
1401 KMP_ATOMIC_INC(&parent_task->td_taskgroup->count);
1404 if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT) {
1405 KMP_ATOMIC_INC(&taskdata->td_parent->td_allocated_child_tasks);
1409 if (flags->hidden_helper) {
1410 taskdata->td_flags.task_serial = FALSE;
1412 KMP_ATOMIC_INC(&__kmp_unexecuted_hidden_helper_tasks);
1415 KA_TRACE(20, (
"__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
1416 gtid, taskdata, taskdata->td_parent));
1417 ANNOTATE_HAPPENS_BEFORE(task);
1422 kmp_task_t *__kmpc_omp_task_alloc(
ident_t *loc_ref, kmp_int32 gtid,
1423 kmp_int32 flags,
size_t sizeof_kmp_task_t,
1424 size_t sizeof_shareds,
1425 kmp_routine_entry_t task_entry) {
1427 kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *)&flags;
1428 __kmp_assert_valid_gtid(gtid);
1429 input_flags->native = FALSE;
1431 KA_TRACE(10, (
"__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s %s) "
1432 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1433 gtid, loc_ref, input_flags->tiedness ?
"tied " :
"untied",
1434 input_flags->proxy ?
"proxy" :
"",
1435 input_flags->detachable ?
"detachable" :
"", sizeof_kmp_task_t,
1436 sizeof_shareds, task_entry));
1438 retval = __kmp_task_alloc(loc_ref, gtid, input_flags, sizeof_kmp_task_t,
1439 sizeof_shareds, task_entry);
1441 KA_TRACE(20, (
"__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval));
1446 kmp_task_t *__kmpc_omp_target_task_alloc(
ident_t *loc_ref, kmp_int32 gtid,
1448 size_t sizeof_kmp_task_t,
1449 size_t sizeof_shareds,
1450 kmp_routine_entry_t task_entry,
1451 kmp_int64 device_id) {
1452 if (__kmp_enable_hidden_helper) {
1453 auto &input_flags =
reinterpret_cast<kmp_tasking_flags_t &
>(flags);
1454 input_flags.hidden_helper = TRUE;
1457 return __kmpc_omp_task_alloc(loc_ref, gtid, flags, sizeof_kmp_task_t,
1458 sizeof_shareds, task_entry);
1476 kmp_task_t *new_task, kmp_int32 naffins,
1477 kmp_task_affinity_info_t *affin_list) {
1486 static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
1487 kmp_taskdata_t *current_task) {
1488 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
1492 30, (
"__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
1493 gtid, taskdata, current_task));
1494 KMP_DEBUG_ASSERT(task);
1495 if (UNLIKELY(taskdata->td_flags.proxy == TASK_PROXY &&
1496 taskdata->td_flags.complete == 1)) {
1501 (
"__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n",
1504 __kmp_bottom_half_finish_proxy(gtid, task);
1506 KA_TRACE(30, (
"__kmp_invoke_task(exit): T#%d completed bottom finish for "
1507 "proxy task %p, resuming task %p\n",
1508 gtid, taskdata, current_task));
1516 ompt_thread_info_t oldInfo;
1517 if (UNLIKELY(ompt_enabled.enabled)) {
1519 thread = __kmp_threads[gtid];
1520 oldInfo = thread->th.ompt_thread_info;
1521 thread->th.ompt_thread_info.wait_id = 0;
1522 thread->th.ompt_thread_info.state = (thread->th.th_team_serialized)
1523 ? ompt_state_work_serial
1524 : ompt_state_work_parallel;
1525 taskdata->ompt_task_info.frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1530 if (taskdata->td_flags.hidden_helper) {
1532 KMP_ASSERT(KMP_HIDDEN_HELPER_THREAD(gtid));
1533 KMP_ATOMIC_DEC(&__kmp_unexecuted_hidden_helper_tasks);
1537 if (taskdata->td_flags.proxy != TASK_PROXY) {
1538 ANNOTATE_HAPPENS_AFTER(task);
1539 __kmp_task_start(gtid, task, current_task);
1545 if (UNLIKELY(__kmp_omp_cancellation)) {
1546 thread = __kmp_threads[gtid];
1547 kmp_team_t *this_team = thread->th.th_team;
1548 kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
1549 if ((taskgroup && taskgroup->cancel_request) ||
1550 (this_team->t.t_cancel_request == cancel_parallel)) {
1551 #if OMPT_SUPPORT && OMPT_OPTIONAL
1552 ompt_data_t *task_data;
1553 if (UNLIKELY(ompt_enabled.ompt_callback_cancel)) {
1554 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL);
1555 ompt_callbacks.ompt_callback(ompt_callback_cancel)(
1557 ((taskgroup && taskgroup->cancel_request) ? ompt_cancel_taskgroup
1558 : ompt_cancel_parallel) |
1559 ompt_cancel_discarded_task,
1572 if (taskdata->td_flags.tiedness == TASK_UNTIED) {
1573 taskdata->td_last_tied = current_task->td_last_tied;
1574 KMP_DEBUG_ASSERT(taskdata->td_last_tied);
1576 #if KMP_STATS_ENABLED
1578 switch (KMP_GET_THREAD_STATE()) {
1579 case FORK_JOIN_BARRIER:
1580 KMP_PUSH_PARTITIONED_TIMER(OMP_task_join_bar);
1583 KMP_PUSH_PARTITIONED_TIMER(OMP_task_plain_bar);
1586 KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskyield);
1589 KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskwait);
1592 KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskgroup);
1595 KMP_PUSH_PARTITIONED_TIMER(OMP_task_immediate);
1602 if (UNLIKELY(ompt_enabled.enabled))
1603 __ompt_task_start(task, current_task, gtid);
1607 if (ompd_state & OMPD_ENABLE_BP)
1608 ompd_bp_task_begin();
1611 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1612 kmp_uint64 cur_time;
1613 kmp_int32 kmp_itt_count_task =
1614 __kmp_forkjoin_frames_mode == 3 && !taskdata->td_flags.task_serial &&
1615 current_task->td_flags.tasktype == TASK_IMPLICIT;
1616 if (kmp_itt_count_task) {
1617 thread = __kmp_threads[gtid];
1619 if (thread->th.th_bar_arrive_time)
1620 cur_time = __itt_get_timestamp();
1622 kmp_itt_count_task = 0;
1624 KMP_FSYNC_ACQUIRED(taskdata);
1627 #ifdef KMP_GOMP_COMPAT
1628 if (taskdata->td_flags.native) {
1629 ((void (*)(
void *))(*(task->routine)))(task->shareds);
1633 (*(task->routine))(gtid, task);
1635 KMP_POP_PARTITIONED_TIMER();
1637 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1638 if (kmp_itt_count_task) {
1640 thread->th.th_bar_arrive_time += (__itt_get_timestamp() - cur_time);
1642 KMP_FSYNC_CANCEL(taskdata);
1643 KMP_FSYNC_RELEASING(taskdata->td_parent);
1648 if (ompd_state & OMPD_ENABLE_BP)
1653 if (taskdata->td_flags.proxy != TASK_PROXY) {
1654 ANNOTATE_HAPPENS_BEFORE(taskdata->td_parent);
1656 if (UNLIKELY(ompt_enabled.enabled)) {
1657 thread->th.ompt_thread_info = oldInfo;
1658 if (taskdata->td_flags.tiedness == TASK_TIED) {
1659 taskdata->ompt_task_info.frame.exit_frame = ompt_data_none;
1661 __kmp_task_finish<true>(gtid, task, current_task);
1664 __kmp_task_finish<false>(gtid, task, current_task);
1669 (
"__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n",
1670 gtid, taskdata, current_task));
1684 kmp_int32 __kmpc_omp_task_parts(
ident_t *loc_ref, kmp_int32 gtid,
1685 kmp_task_t *new_task) {
1686 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1688 KA_TRACE(10, (
"__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n", gtid,
1689 loc_ref, new_taskdata));
1692 kmp_taskdata_t *parent;
1693 if (UNLIKELY(ompt_enabled.enabled)) {
1694 parent = new_taskdata->td_parent;
1695 if (ompt_enabled.ompt_callback_task_create) {
1696 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
1697 &(parent->ompt_task_info.task_data), &(parent->ompt_task_info.frame),
1698 &(new_taskdata->ompt_task_info.task_data), ompt_task_explicit, 0,
1699 OMPT_GET_RETURN_ADDRESS(0));
1707 if (__kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED)
1709 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
1710 new_taskdata->td_flags.task_serial = 1;
1711 __kmp_invoke_task(gtid, new_task, current_task);
1716 (
"__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
1717 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n",
1718 gtid, loc_ref, new_taskdata));
1720 ANNOTATE_HAPPENS_BEFORE(new_task);
1722 if (UNLIKELY(ompt_enabled.enabled)) {
1723 parent->ompt_task_info.frame.enter_frame = ompt_data_none;
1726 return TASK_CURRENT_NOT_QUEUED;
1740 kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
1741 bool serialize_immediate) {
1742 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1746 if (new_taskdata->td_flags.proxy == TASK_PROXY ||
1747 __kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED)
1749 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
1750 if (serialize_immediate)
1751 new_taskdata->td_flags.task_serial = 1;
1752 __kmp_invoke_task(gtid, new_task, current_task);
1755 ANNOTATE_HAPPENS_BEFORE(new_task);
1756 return TASK_CURRENT_NOT_QUEUED;
1771 kmp_int32 __kmpc_omp_task(
ident_t *loc_ref, kmp_int32 gtid,
1772 kmp_task_t *new_task) {
1774 KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
1776 #if KMP_DEBUG || OMPT_SUPPORT
1777 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1779 KA_TRACE(10, (
"__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref,
1781 __kmp_assert_valid_gtid(gtid);
1784 kmp_taskdata_t *parent = NULL;
1785 if (UNLIKELY(ompt_enabled.enabled)) {
1786 if (!new_taskdata->td_flags.started) {
1787 OMPT_STORE_RETURN_ADDRESS(gtid);
1788 parent = new_taskdata->td_parent;
1789 if (!parent->ompt_task_info.frame.enter_frame.ptr) {
1790 parent->ompt_task_info.frame.enter_frame.ptr =
1791 OMPT_GET_FRAME_ADDRESS(0);
1793 if (ompt_enabled.ompt_callback_task_create) {
1794 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
1795 &(parent->ompt_task_info.task_data),
1796 &(parent->ompt_task_info.frame),
1797 &(new_taskdata->ompt_task_info.task_data),
1798 ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0,
1799 OMPT_LOAD_RETURN_ADDRESS(gtid));
1804 __ompt_task_finish(new_task,
1805 new_taskdata->ompt_task_info.scheduling_parent,
1807 new_taskdata->ompt_task_info.frame.exit_frame = ompt_data_none;
1812 res = __kmp_omp_task(gtid, new_task,
true);
1814 KA_TRACE(10, (
"__kmpc_omp_task(exit): T#%d returning "
1815 "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1816 gtid, loc_ref, new_taskdata));
1818 if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) {
1819 parent->ompt_task_info.frame.enter_frame = ompt_data_none;
1838 kmp_int32 __kmp_omp_taskloop_task(
ident_t *loc_ref, kmp_int32 gtid,
1839 kmp_task_t *new_task,
void *codeptr_ra) {
1841 KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
1843 #if KMP_DEBUG || OMPT_SUPPORT
1844 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1846 KA_TRACE(10, (
"__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref,
1850 kmp_taskdata_t *parent = NULL;
1851 if (UNLIKELY(ompt_enabled.enabled && !new_taskdata->td_flags.started)) {
1852 parent = new_taskdata->td_parent;
1853 if (!parent->ompt_task_info.frame.enter_frame.ptr)
1854 parent->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1855 if (ompt_enabled.ompt_callback_task_create) {
1856 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
1857 &(parent->ompt_task_info.task_data), &(parent->ompt_task_info.frame),
1858 &(new_taskdata->ompt_task_info.task_data),
1859 ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0,
1865 res = __kmp_omp_task(gtid, new_task,
true);
1867 KA_TRACE(10, (
"__kmpc_omp_task(exit): T#%d returning "
1868 "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1869 gtid, loc_ref, new_taskdata));
1871 if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) {
1872 parent->ompt_task_info.frame.enter_frame = ompt_data_none;
1878 template <
bool ompt>
1879 static kmp_int32 __kmpc_omp_taskwait_template(
ident_t *loc_ref, kmp_int32 gtid,
1880 void *frame_address,
1881 void *return_address) {
1882 kmp_taskdata_t *taskdata =
nullptr;
1884 int thread_finished = FALSE;
1885 KMP_SET_THREAD_STATE_BLOCK(TASKWAIT);
1887 KA_TRACE(10, (
"__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref));
1888 KMP_DEBUG_ASSERT(gtid >= 0);
1890 if (__kmp_tasking_mode != tskm_immediate_exec) {
1891 thread = __kmp_threads[gtid];
1892 taskdata = thread->th.th_current_task;
1894 #if OMPT_SUPPORT && OMPT_OPTIONAL
1895 ompt_data_t *my_task_data;
1896 ompt_data_t *my_parallel_data;
1899 my_task_data = &(taskdata->ompt_task_info.task_data);
1900 my_parallel_data = OMPT_CUR_TEAM_DATA(thread);
1902 taskdata->ompt_task_info.frame.enter_frame.ptr = frame_address;
1904 if (ompt_enabled.ompt_callback_sync_region) {
1905 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
1906 ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data,
1907 my_task_data, return_address);
1910 if (ompt_enabled.ompt_callback_sync_region_wait) {
1911 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
1912 ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data,
1913 my_task_data, return_address);
1923 taskdata->td_taskwait_counter += 1;
1924 taskdata->td_taskwait_ident = loc_ref;
1925 taskdata->td_taskwait_thread = gtid + 1;
1928 void *itt_sync_obj = NULL;
1930 KMP_ITT_TASKWAIT_STARTING(itt_sync_obj);
1935 !taskdata->td_flags.team_serial && !taskdata->td_flags.final;
1937 must_wait = must_wait || (thread->th.th_task_team != NULL &&
1938 thread->th.th_task_team->tt.tt_found_proxy_tasks);
1942 (__kmp_enable_hidden_helper && thread->th.th_task_team != NULL &&
1943 thread->th.th_task_team->tt.tt_hidden_helper_task_encountered);
1946 kmp_flag_32<false, false> flag(
1947 RCAST(std::atomic<kmp_uint32> *,
1948 &(taskdata->td_incomplete_child_tasks)),
1950 while (KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks) != 0) {
1951 flag.execute_tasks(thread, gtid, FALSE,
1952 &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
1953 __kmp_task_stealing_constraint);
1957 KMP_ITT_TASKWAIT_FINISHED(itt_sync_obj);
1958 KMP_FSYNC_ACQUIRED(taskdata);
1963 taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
1965 #if OMPT_SUPPORT && OMPT_OPTIONAL
1967 if (ompt_enabled.ompt_callback_sync_region_wait) {
1968 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
1969 ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data,
1970 my_task_data, return_address);
1972 if (ompt_enabled.ompt_callback_sync_region) {
1973 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
1974 ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data,
1975 my_task_data, return_address);
1977 taskdata->ompt_task_info.frame.enter_frame = ompt_data_none;
1981 ANNOTATE_HAPPENS_AFTER(taskdata);
1984 KA_TRACE(10, (
"__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
1985 "returning TASK_CURRENT_NOT_QUEUED\n",
1988 return TASK_CURRENT_NOT_QUEUED;
1991 #if OMPT_SUPPORT && OMPT_OPTIONAL
1993 static kmp_int32 __kmpc_omp_taskwait_ompt(
ident_t *loc_ref, kmp_int32 gtid,
1994 void *frame_address,
1995 void *return_address) {
1996 return __kmpc_omp_taskwait_template<true>(loc_ref, gtid, frame_address,
2003 kmp_int32 __kmpc_omp_taskwait(
ident_t *loc_ref, kmp_int32 gtid) {
2004 #if OMPT_SUPPORT && OMPT_OPTIONAL
2005 if (UNLIKELY(ompt_enabled.enabled)) {
2006 OMPT_STORE_RETURN_ADDRESS(gtid);
2007 return __kmpc_omp_taskwait_ompt(loc_ref, gtid, OMPT_GET_FRAME_ADDRESS(0),
2008 OMPT_LOAD_RETURN_ADDRESS(gtid));
2011 return __kmpc_omp_taskwait_template<false>(loc_ref, gtid, NULL, NULL);
2015 kmp_int32 __kmpc_omp_taskyield(
ident_t *loc_ref, kmp_int32 gtid,
int end_part) {
2016 kmp_taskdata_t *taskdata = NULL;
2018 int thread_finished = FALSE;
2021 KMP_SET_THREAD_STATE_BLOCK(TASKYIELD);
2023 KA_TRACE(10, (
"__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
2024 gtid, loc_ref, end_part));
2025 __kmp_assert_valid_gtid(gtid);
2027 if (__kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel) {
2028 thread = __kmp_threads[gtid];
2029 taskdata = thread->th.th_current_task;
2036 taskdata->td_taskwait_counter += 1;
2037 taskdata->td_taskwait_ident = loc_ref;
2038 taskdata->td_taskwait_thread = gtid + 1;
2041 void *itt_sync_obj = NULL;
2043 KMP_ITT_TASKWAIT_STARTING(itt_sync_obj);
2046 if (!taskdata->td_flags.team_serial) {
2047 kmp_task_team_t *task_team = thread->th.th_task_team;
2048 if (task_team != NULL) {
2049 if (KMP_TASKING_ENABLED(task_team)) {
2051 if (UNLIKELY(ompt_enabled.enabled))
2052 thread->th.ompt_thread_info.ompt_task_yielded = 1;
2054 __kmp_execute_tasks_32(
2055 thread, gtid, (kmp_flag_32<> *)NULL, FALSE,
2056 &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
2057 __kmp_task_stealing_constraint);
2059 if (UNLIKELY(ompt_enabled.enabled))
2060 thread->th.ompt_thread_info.ompt_task_yielded = 0;
2066 KMP_ITT_TASKWAIT_FINISHED(itt_sync_obj);
2071 taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
2074 KA_TRACE(10, (
"__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
2075 "returning TASK_CURRENT_NOT_QUEUED\n",
2078 return TASK_CURRENT_NOT_QUEUED;
2099 unsigned reserved31 : 31;
2179 template <
typename T>
2180 void *__kmp_task_reduction_init(
int gtid,
int num, T *data) {
2181 __kmp_assert_valid_gtid(gtid);
2182 kmp_info_t *thread = __kmp_threads[gtid];
2183 kmp_taskgroup_t *tg = thread->th.th_current_task->td_taskgroup;
2184 kmp_uint32 nth = thread->th.th_team_nproc;
2188 KMP_ASSERT(tg != NULL);
2189 KMP_ASSERT(data != NULL);
2190 KMP_ASSERT(num > 0);
2192 KA_TRACE(10, (
"__kmpc_task_reduction_init: T#%d, tg %p, exiting nth=1\n",
2196 KA_TRACE(10, (
"__kmpc_task_reduction_init: T#%d, taskgroup %p, #items %d\n",
2200 for (
int i = 0; i < num; ++i) {
2201 size_t size = data[i].reduce_size - 1;
2203 size += CACHE_LINE - size % CACHE_LINE;
2204 KMP_ASSERT(data[i].reduce_comb != NULL);
2207 arr[i].
flags = data[i].flags;
2211 __kmp_assign_orig<T>(arr[i], data[i]);
2212 if (!arr[i].flags.lazy_priv) {
2215 arr[i].
reduce_pend = (
char *)(arr[i].reduce_priv) + nth * size;
2216 if (arr[i].reduce_init != NULL) {
2218 for (
size_t j = 0; j < nth; ++j) {
2219 __kmp_call_init<T>(arr[i], j * size);
2226 arr[i].
reduce_priv = __kmp_allocate(nth *
sizeof(
void *));
2229 tg->reduce_data = (
void *)arr;
2230 tg->reduce_num_data = num;
2269 template <
typename T>
2270 void __kmp_task_reduction_init_copy(kmp_info_t *thr,
int num, T *data,
2271 kmp_taskgroup_t *tg,
void *reduce_data) {
2273 KA_TRACE(20, (
"__kmp_task_reduction_init_copy: Th %p, init taskgroup %p,"
2275 thr, tg, reduce_data));
2280 for (
int i = 0; i < num; ++i) {
2283 tg->reduce_data = (
void *)arr;
2284 tg->reduce_num_data = num;
2297 __kmp_assert_valid_gtid(gtid);
2298 kmp_info_t *thread = __kmp_threads[gtid];
2299 kmp_int32 nth = thread->th.th_team_nproc;
2303 kmp_taskgroup_t *tg = (kmp_taskgroup_t *)tskgrp;
2305 tg = thread->th.th_current_task->td_taskgroup;
2306 KMP_ASSERT(tg != NULL);
2308 kmp_int32 num = tg->reduce_num_data;
2309 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
2311 KMP_ASSERT(data != NULL);
2312 while (tg != NULL) {
2313 for (
int i = 0; i < num; ++i) {
2314 if (!arr[i].flags.lazy_priv) {
2315 if (data == arr[i].reduce_shar ||
2316 (data >= arr[i].reduce_priv && data < arr[i].reduce_pend))
2317 return (
char *)(arr[i].
reduce_priv) + tid * arr[i].reduce_size;
2320 void **p_priv = (
void **)(arr[i].reduce_priv);
2321 if (data == arr[i].reduce_shar)
2324 for (
int j = 0; j < nth; ++j)
2325 if (data == p_priv[j])
2329 if (p_priv[tid] == NULL) {
2331 p_priv[tid] = __kmp_allocate(arr[i].reduce_size);
2332 if (arr[i].reduce_init != NULL) {
2333 if (arr[i].reduce_orig != NULL) {
2335 p_priv[tid], arr[i].reduce_orig);
2337 ((void (*)(
void *))arr[i].
reduce_init)(p_priv[tid]);
2346 num = tg->reduce_num_data;
2348 KMP_ASSERT2(0,
"Unknown task reduction item");
2354 static void __kmp_task_reduction_fini(kmp_info_t *th, kmp_taskgroup_t *tg) {
2355 kmp_int32 nth = th->th.th_team_nproc;
2356 KMP_DEBUG_ASSERT(nth > 1);
2358 kmp_int32 num = tg->reduce_num_data;
2359 for (
int i = 0; i < num; ++i) {
2361 void (*f_fini)(
void *) = (
void (*)(
void *))(arr[i].
reduce_fini);
2362 void (*f_comb)(
void *,
void *) =
2364 if (!arr[i].flags.lazy_priv) {
2367 for (
int j = 0; j < nth; ++j) {
2368 void *priv_data = (
char *)pr_data + j * size;
2369 f_comb(sh_data, priv_data);
2374 void **pr_data = (
void **)(arr[i].reduce_priv);
2375 for (
int j = 0; j < nth; ++j) {
2376 if (pr_data[j] != NULL) {
2377 f_comb(sh_data, pr_data[j]);
2380 __kmp_free(pr_data[j]);
2384 __kmp_free(arr[i].reduce_priv);
2386 __kmp_thread_free(th, arr);
2387 tg->reduce_data = NULL;
2388 tg->reduce_num_data = 0;
2394 static void __kmp_task_reduction_clean(kmp_info_t *th, kmp_taskgroup_t *tg) {
2395 __kmp_thread_free(th, tg->reduce_data);
2396 tg->reduce_data = NULL;
2397 tg->reduce_num_data = 0;
2400 template <
typename T>
2401 void *__kmp_task_reduction_modifier_init(
ident_t *loc,
int gtid,
int is_ws,
2403 __kmp_assert_valid_gtid(gtid);
2404 kmp_info_t *thr = __kmp_threads[gtid];
2405 kmp_int32 nth = thr->th.th_team_nproc;
2406 __kmpc_taskgroup(loc, gtid);
2409 (
"__kmpc_reduction_modifier_init: T#%d, tg %p, exiting nth=1\n",
2410 gtid, thr->th.th_current_task->td_taskgroup));
2411 return (
void *)thr->th.th_current_task->td_taskgroup;
2413 kmp_team_t *team = thr->th.th_team;
2415 kmp_taskgroup_t *tg;
2416 reduce_data = KMP_ATOMIC_LD_RLX(&team->t.t_tg_reduce_data[is_ws]);
2417 if (reduce_data == NULL &&
2418 __kmp_atomic_compare_store(&team->t.t_tg_reduce_data[is_ws], reduce_data,
2421 KMP_DEBUG_ASSERT(reduce_data == NULL);
2423 tg = (kmp_taskgroup_t *)__kmp_task_reduction_init<T>(gtid, num, data);
2427 KMP_DEBUG_ASSERT(KMP_ATOMIC_LD_RLX(&team->t.t_tg_fini_counter[0]) == 0);
2428 KMP_DEBUG_ASSERT(KMP_ATOMIC_LD_RLX(&team->t.t_tg_fini_counter[1]) == 0);
2429 KMP_ATOMIC_ST_REL(&team->t.t_tg_reduce_data[is_ws], reduce_data);
2432 (reduce_data = KMP_ATOMIC_LD_ACQ(&team->t.t_tg_reduce_data[is_ws])) ==
2436 KMP_DEBUG_ASSERT(reduce_data > (
void *)1);
2437 tg = thr->th.th_current_task->td_taskgroup;
2438 __kmp_task_reduction_init_copy<T>(thr, num, data, tg, reduce_data);
2460 int num,
void *data) {
2461 return __kmp_task_reduction_modifier_init(loc, gtid, is_ws, num,
2481 return __kmp_task_reduction_modifier_init(loc, gtid, is_ws, num,
2494 __kmpc_end_taskgroup(loc, gtid);
2498 void __kmpc_taskgroup(
ident_t *loc,
int gtid) {
2499 __kmp_assert_valid_gtid(gtid);
2500 kmp_info_t *thread = __kmp_threads[gtid];
2501 kmp_taskdata_t *taskdata = thread->th.th_current_task;
2502 kmp_taskgroup_t *tg_new =
2503 (kmp_taskgroup_t *)__kmp_thread_malloc(thread,
sizeof(kmp_taskgroup_t));
2504 KA_TRACE(10, (
"__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new));
2505 KMP_ATOMIC_ST_RLX(&tg_new->count, 0);
2506 KMP_ATOMIC_ST_RLX(&tg_new->cancel_request, cancel_noreq);
2507 tg_new->parent = taskdata->td_taskgroup;
2508 tg_new->reduce_data = NULL;
2509 tg_new->reduce_num_data = 0;
2510 tg_new->gomp_data = NULL;
2511 taskdata->td_taskgroup = tg_new;
2513 #if OMPT_SUPPORT && OMPT_OPTIONAL
2514 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)) {
2515 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2517 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2518 kmp_team_t *team = thread->th.th_team;
2519 ompt_data_t my_task_data = taskdata->ompt_task_info.task_data;
2521 ompt_data_t my_parallel_data = team->t.ompt_team_info.parallel_data;
2523 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
2524 ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data),
2525 &(my_task_data), codeptr);
2532 void __kmpc_end_taskgroup(
ident_t *loc,
int gtid) {
2533 __kmp_assert_valid_gtid(gtid);
2534 kmp_info_t *thread = __kmp_threads[gtid];
2535 kmp_taskdata_t *taskdata = thread->th.th_current_task;
2536 kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
2537 int thread_finished = FALSE;
2539 #if OMPT_SUPPORT && OMPT_OPTIONAL
2541 ompt_data_t my_task_data;
2542 ompt_data_t my_parallel_data;
2543 void *codeptr =
nullptr;
2544 if (UNLIKELY(ompt_enabled.enabled)) {
2545 team = thread->th.th_team;
2546 my_task_data = taskdata->ompt_task_info.task_data;
2548 my_parallel_data = team->t.ompt_team_info.parallel_data;
2549 codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2551 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2555 KA_TRACE(10, (
"__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc));
2556 KMP_DEBUG_ASSERT(taskgroup != NULL);
2557 KMP_SET_THREAD_STATE_BLOCK(TASKGROUP);
2559 if (__kmp_tasking_mode != tskm_immediate_exec) {
2561 taskdata->td_taskwait_counter += 1;
2562 taskdata->td_taskwait_ident = loc;
2563 taskdata->td_taskwait_thread = gtid + 1;
2567 void *itt_sync_obj = NULL;
2569 KMP_ITT_TASKWAIT_STARTING(itt_sync_obj);
2573 #if OMPT_SUPPORT && OMPT_OPTIONAL
2574 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region_wait)) {
2575 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
2576 ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data),
2577 &(my_task_data), codeptr);
2581 if (!taskdata->td_flags.team_serial ||
2582 (thread->th.th_task_team != NULL &&
2583 thread->th.th_task_team->tt.tt_found_proxy_tasks)) {
2584 kmp_flag_32<false, false> flag(
2585 RCAST(std::atomic<kmp_uint32> *, &(taskgroup->count)), 0U);
2586 while (KMP_ATOMIC_LD_ACQ(&taskgroup->count) != 0) {
2587 flag.execute_tasks(thread, gtid, FALSE,
2588 &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
2589 __kmp_task_stealing_constraint);
2592 taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
2594 #if OMPT_SUPPORT && OMPT_OPTIONAL
2595 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region_wait)) {
2596 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
2597 ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data),
2598 &(my_task_data), codeptr);
2603 KMP_ITT_TASKWAIT_FINISHED(itt_sync_obj);
2604 KMP_FSYNC_ACQUIRED(taskdata);
2607 KMP_DEBUG_ASSERT(taskgroup->count == 0);
2609 if (taskgroup->reduce_data != NULL &&
2610 !taskgroup->gomp_data) {
2613 kmp_team_t *t = thread->th.th_team;
2617 if ((reduce_data = KMP_ATOMIC_LD_ACQ(&t->t.t_tg_reduce_data[0])) != NULL &&
2620 cnt = KMP_ATOMIC_INC(&t->t.t_tg_fini_counter[0]);
2621 if (cnt == thread->th.th_team_nproc - 1) {
2624 __kmp_task_reduction_fini(thread, taskgroup);
2627 __kmp_thread_free(thread, reduce_data);
2628 KMP_ATOMIC_ST_REL(&t->t.t_tg_reduce_data[0], NULL);
2629 KMP_ATOMIC_ST_REL(&t->t.t_tg_fini_counter[0], 0);
2633 __kmp_task_reduction_clean(thread, taskgroup);
2635 }
else if ((reduce_data = KMP_ATOMIC_LD_ACQ(&t->t.t_tg_reduce_data[1])) !=
2639 cnt = KMP_ATOMIC_INC(&t->t.t_tg_fini_counter[1]);
2640 if (cnt == thread->th.th_team_nproc - 1) {
2642 __kmp_task_reduction_fini(thread, taskgroup);
2645 __kmp_thread_free(thread, reduce_data);
2646 KMP_ATOMIC_ST_REL(&t->t.t_tg_reduce_data[1], NULL);
2647 KMP_ATOMIC_ST_REL(&t->t.t_tg_fini_counter[1], 0);
2651 __kmp_task_reduction_clean(thread, taskgroup);
2655 __kmp_task_reduction_fini(thread, taskgroup);
2659 taskdata->td_taskgroup = taskgroup->parent;
2660 __kmp_thread_free(thread, taskgroup);
2662 KA_TRACE(10, (
"__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n",
2664 ANNOTATE_HAPPENS_AFTER(taskdata);
2666 #if OMPT_SUPPORT && OMPT_OPTIONAL
2667 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)) {
2668 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
2669 ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data),
2670 &(my_task_data), codeptr);
2676 static kmp_task_t *__kmp_remove_my_task(kmp_info_t *thread, kmp_int32 gtid,
2677 kmp_task_team_t *task_team,
2678 kmp_int32 is_constrained) {
2680 kmp_taskdata_t *taskdata;
2681 kmp_thread_data_t *thread_data;
2684 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
2685 KMP_DEBUG_ASSERT(task_team->tt.tt_threads_data !=
2688 thread_data = &task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)];
2690 KA_TRACE(10, (
"__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
2691 gtid, thread_data->td.td_deque_ntasks,
2692 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
2694 if (TCR_4(thread_data->td.td_deque_ntasks) == 0) {
2696 (
"__kmp_remove_my_task(exit #1): T#%d No tasks to remove: "
2697 "ntasks=%d head=%u tail=%u\n",
2698 gtid, thread_data->td.td_deque_ntasks,
2699 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
2703 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
2705 if (TCR_4(thread_data->td.td_deque_ntasks) == 0) {
2706 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2708 (
"__kmp_remove_my_task(exit #2): T#%d No tasks to remove: "
2709 "ntasks=%d head=%u tail=%u\n",
2710 gtid, thread_data->td.td_deque_ntasks,
2711 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
2715 tail = (thread_data->td.td_deque_tail - 1) &
2716 TASK_DEQUE_MASK(thread_data->td);
2717 taskdata = thread_data->td.td_deque[tail];
2719 if (!__kmp_task_is_allowed(gtid, is_constrained, taskdata,
2720 thread->th.th_current_task)) {
2722 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2724 (
"__kmp_remove_my_task(exit #3): T#%d TSC blocks tail task: "
2725 "ntasks=%d head=%u tail=%u\n",
2726 gtid, thread_data->td.td_deque_ntasks,
2727 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
2731 thread_data->td.td_deque_tail = tail;
2732 TCW_4(thread_data->td.td_deque_ntasks, thread_data->td.td_deque_ntasks - 1);
2734 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2736 KA_TRACE(10, (
"__kmp_remove_my_task(exit #4): T#%d task %p removed: "
2737 "ntasks=%d head=%u tail=%u\n",
2738 gtid, taskdata, thread_data->td.td_deque_ntasks,
2739 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
2741 task = KMP_TASKDATA_TO_TASK(taskdata);
2748 static kmp_task_t *__kmp_steal_task(kmp_info_t *victim_thr, kmp_int32 gtid,
2749 kmp_task_team_t *task_team,
2750 std::atomic<kmp_int32> *unfinished_threads,
2751 int *thread_finished,
2752 kmp_int32 is_constrained) {
2754 kmp_taskdata_t *taskdata;
2755 kmp_taskdata_t *current;
2756 kmp_thread_data_t *victim_td, *threads_data;
2758 kmp_int32 victim_tid;
2760 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
2762 threads_data = task_team->tt.tt_threads_data;
2763 KMP_DEBUG_ASSERT(threads_data != NULL);
2765 victim_tid = victim_thr->th.th_info.ds.ds_tid;
2766 victim_td = &threads_data[victim_tid];
2768 KA_TRACE(10, (
"__kmp_steal_task(enter): T#%d try to steal from T#%d: "
2769 "task_team=%p ntasks=%d head=%u tail=%u\n",
2770 gtid, __kmp_gtid_from_thread(victim_thr), task_team,
2771 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
2772 victim_td->td.td_deque_tail));
2774 if (TCR_4(victim_td->td.td_deque_ntasks) == 0) {
2775 KA_TRACE(10, (
"__kmp_steal_task(exit #1): T#%d could not steal from T#%d: "
2776 "task_team=%p ntasks=%d head=%u tail=%u\n",
2777 gtid, __kmp_gtid_from_thread(victim_thr), task_team,
2778 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
2779 victim_td->td.td_deque_tail));
2783 __kmp_acquire_bootstrap_lock(&victim_td->td.td_deque_lock);
2785 int ntasks = TCR_4(victim_td->td.td_deque_ntasks);
2788 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
2789 KA_TRACE(10, (
"__kmp_steal_task(exit #2): T#%d could not steal from T#%d: "
2790 "task_team=%p ntasks=%d head=%u tail=%u\n",
2791 gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,
2792 victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
2796 KMP_DEBUG_ASSERT(victim_td->td.td_deque != NULL);
2797 current = __kmp_threads[gtid]->th.th_current_task;
2798 taskdata = victim_td->td.td_deque[victim_td->td.td_deque_head];
2799 if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
2801 victim_td->td.td_deque_head =
2802 (victim_td->td.td_deque_head + 1) & TASK_DEQUE_MASK(victim_td->td);
2804 if (!task_team->tt.tt_untied_task_encountered) {
2806 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
2807 KA_TRACE(10, (
"__kmp_steal_task(exit #3): T#%d could not steal from "
2808 "T#%d: task_team=%p ntasks=%d head=%u tail=%u\n",
2809 gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,
2810 victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
2815 target = victim_td->td.td_deque_head;
2817 for (i = 1; i < ntasks; ++i) {
2818 target = (target + 1) & TASK_DEQUE_MASK(victim_td->td);
2819 taskdata = victim_td->td.td_deque[target];
2820 if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
2826 if (taskdata == NULL) {
2828 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
2829 KA_TRACE(10, (
"__kmp_steal_task(exit #4): T#%d could not steal from "
2830 "T#%d: task_team=%p ntasks=%d head=%u tail=%u\n",
2831 gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,
2832 victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
2836 for (i = i + 1; i < ntasks; ++i) {
2838 target = (target + 1) & TASK_DEQUE_MASK(victim_td->td);
2839 victim_td->td.td_deque[prev] = victim_td->td.td_deque[target];
2843 victim_td->td.td_deque_tail ==
2844 (kmp_uint32)((target + 1) & TASK_DEQUE_MASK(victim_td->td)));
2845 victim_td->td.td_deque_tail = target;
2847 if (*thread_finished) {
2853 count = KMP_ATOMIC_INC(unfinished_threads);
2857 (
"__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
2858 gtid, count + 1, task_team));
2860 *thread_finished = FALSE;
2862 TCW_4(victim_td->td.td_deque_ntasks, ntasks - 1);
2864 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
2868 (
"__kmp_steal_task(exit #5): T#%d stole task %p from T#%d: "
2869 "task_team=%p ntasks=%d head=%u tail=%u\n",
2870 gtid, taskdata, __kmp_gtid_from_thread(victim_thr), task_team,
2871 ntasks, victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
2873 task = KMP_TASKDATA_TO_TASK(taskdata);
2887 static inline int __kmp_execute_tasks_template(
2888 kmp_info_t *thread, kmp_int32 gtid, C *flag,
int final_spin,
2889 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
2890 kmp_int32 is_constrained) {
2891 kmp_task_team_t *task_team = thread->th.th_task_team;
2892 kmp_thread_data_t *threads_data;
2894 kmp_info_t *other_thread;
2895 kmp_taskdata_t *current_task = thread->th.th_current_task;
2896 std::atomic<kmp_int32> *unfinished_threads;
2897 kmp_int32 nthreads, victim_tid = -2, use_own_tasks = 1, new_victim = 0,
2898 tid = thread->th.th_info.ds.ds_tid;
2900 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
2901 KMP_DEBUG_ASSERT(thread == __kmp_threads[gtid]);
2903 if (task_team == NULL || current_task == NULL)
2906 KA_TRACE(15, (
"__kmp_execute_tasks_template(enter): T#%d final_spin=%d "
2907 "*thread_finished=%d\n",
2908 gtid, final_spin, *thread_finished));
2910 thread->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
2911 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data);
2913 KMP_DEBUG_ASSERT(threads_data != NULL);
2915 nthreads = task_team->tt.tt_nproc;
2916 unfinished_threads = &(task_team->tt.tt_unfinished_threads);
2917 KMP_DEBUG_ASSERT(nthreads > 1 || task_team->tt.tt_found_proxy_tasks ||
2918 task_team->tt.tt_hidden_helper_task_encountered);
2919 KMP_DEBUG_ASSERT(*unfinished_threads >= 0);
2925 if (use_own_tasks) {
2926 task = __kmp_remove_my_task(thread, gtid, task_team, is_constrained);
2928 if ((task == NULL) && (nthreads > 1)) {
2932 if (victim_tid == -2) {
2933 victim_tid = threads_data[tid].td.td_deque_last_stolen;
2936 other_thread = threads_data[victim_tid].td.td_thr;
2938 if (victim_tid != -1) {
2940 }
else if (!new_victim) {
2946 victim_tid = __kmp_get_random(thread) % (nthreads - 1);
2947 if (victim_tid >= tid) {
2951 other_thread = threads_data[victim_tid].td.td_thr;
2961 if ((__kmp_tasking_mode == tskm_task_teams) &&
2962 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
2963 (TCR_PTR(CCAST(
void *, other_thread->th.th_sleep_loc)) !=
2966 __kmp_null_resume_wrapper(other_thread);
2979 task = __kmp_steal_task(other_thread, gtid, task_team,
2980 unfinished_threads, thread_finished,
2984 if (threads_data[tid].td.td_deque_last_stolen != victim_tid) {
2985 threads_data[tid].td.td_deque_last_stolen = victim_tid;
2992 KMP_CHECK_UPDATE(threads_data[tid].td.td_deque_last_stolen, -1);
3001 #if USE_ITT_BUILD && USE_ITT_NOTIFY
3002 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
3003 if (itt_sync_obj == NULL) {
3005 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
3007 __kmp_itt_task_starting(itt_sync_obj);
3010 __kmp_invoke_task(gtid, task, current_task);
3012 if (itt_sync_obj != NULL)
3013 __kmp_itt_task_finished(itt_sync_obj);
3020 if (flag == NULL || (!final_spin && flag->done_check())) {
3023 (
"__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
3027 if (thread->th.th_task_team == NULL) {
3030 KMP_YIELD(__kmp_library == library_throughput);
3033 if (!use_own_tasks && TCR_4(threads_data[tid].td.td_deque_ntasks) != 0) {
3034 KA_TRACE(20, (
"__kmp_execute_tasks_template: T#%d stolen task spawned "
3035 "other tasks, restart\n",
3046 KMP_ATOMIC_LD_ACQ(¤t_task->td_incomplete_child_tasks) == 0) {
3050 if (!*thread_finished) {
3053 count = KMP_ATOMIC_DEC(unfinished_threads) - 1;
3054 KA_TRACE(20, (
"__kmp_execute_tasks_template: T#%d dec "
3055 "unfinished_threads to %d task_team=%p\n",
3056 gtid, count, task_team));
3057 *thread_finished = TRUE;
3065 if (flag != NULL && flag->done_check()) {
3068 (
"__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
3076 if (thread->th.th_task_team == NULL) {
3078 (
"__kmp_execute_tasks_template: T#%d no more tasks\n", gtid));
3084 if (nthreads == 1 &&
3085 KMP_ATOMIC_LD_ACQ(¤t_task->td_incomplete_child_tasks))
3089 (
"__kmp_execute_tasks_template: T#%d can't find work\n", gtid));
3095 template <
bool C,
bool S>
3096 int __kmp_execute_tasks_32(
3097 kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32<C, S> *flag,
int final_spin,
3098 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
3099 kmp_int32 is_constrained) {
3100 return __kmp_execute_tasks_template(
3101 thread, gtid, flag, final_spin,
3102 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
3105 template <
bool C,
bool S>
3106 int __kmp_execute_tasks_64(
3107 kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64<C, S> *flag,
int final_spin,
3108 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
3109 kmp_int32 is_constrained) {
3110 return __kmp_execute_tasks_template(
3111 thread, gtid, flag, final_spin,
3112 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
3115 template <
bool C,
bool S>
3116 int __kmp_atomic_execute_tasks_64(
3117 kmp_info_t *thread, kmp_int32 gtid, kmp_atomic_flag_64<C, S> *flag,
3118 int final_spin,
int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
3119 kmp_int32 is_constrained) {
3120 return __kmp_execute_tasks_template(
3121 thread, gtid, flag, final_spin,
3122 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
3125 int __kmp_execute_tasks_oncore(
3126 kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag,
int final_spin,
3127 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
3128 kmp_int32 is_constrained) {
3129 return __kmp_execute_tasks_template(
3130 thread, gtid, flag, final_spin,
3131 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
3135 __kmp_execute_tasks_32<false, false>(kmp_info_t *, kmp_int32,
3136 kmp_flag_32<false, false> *,
int,
3137 int *USE_ITT_BUILD_ARG(
void *), kmp_int32);
3139 template int __kmp_execute_tasks_64<false, true>(kmp_info_t *, kmp_int32,
3140 kmp_flag_64<false, true> *,
3142 int *USE_ITT_BUILD_ARG(
void *),
3145 template int __kmp_execute_tasks_64<true, false>(kmp_info_t *, kmp_int32,
3146 kmp_flag_64<true, false> *,
3148 int *USE_ITT_BUILD_ARG(
void *),
3151 template int __kmp_atomic_execute_tasks_64<false, true>(
3152 kmp_info_t *, kmp_int32, kmp_atomic_flag_64<false, true> *,
int,
3153 int *USE_ITT_BUILD_ARG(
void *), kmp_int32);
3155 template int __kmp_atomic_execute_tasks_64<true, false>(
3156 kmp_info_t *, kmp_int32, kmp_atomic_flag_64<true, false> *,
int,
3157 int *USE_ITT_BUILD_ARG(
void *), kmp_int32);
3162 static void __kmp_enable_tasking(kmp_task_team_t *task_team,
3163 kmp_info_t *this_thr) {
3164 kmp_thread_data_t *threads_data;
3165 int nthreads, i, is_init_thread;
3167 KA_TRACE(10, (
"__kmp_enable_tasking(enter): T#%d\n",
3168 __kmp_gtid_from_thread(this_thr)));
3170 KMP_DEBUG_ASSERT(task_team != NULL);
3171 KMP_DEBUG_ASSERT(this_thr->th.th_team != NULL);
3173 nthreads = task_team->tt.tt_nproc;
3174 KMP_DEBUG_ASSERT(nthreads > 0);
3175 KMP_DEBUG_ASSERT(nthreads == this_thr->th.th_team->t.t_nproc);
3178 is_init_thread = __kmp_realloc_task_threads_data(this_thr, task_team);
3180 if (!is_init_thread) {
3184 (
"__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
3185 __kmp_gtid_from_thread(this_thr)));
3188 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data);
3189 KMP_DEBUG_ASSERT(threads_data != NULL);
3191 if (__kmp_tasking_mode == tskm_task_teams &&
3192 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME)) {
3196 for (i = 0; i < nthreads; i++) {
3198 kmp_info_t *thread = threads_data[i].td.td_thr;
3200 if (i == this_thr->th.th_info.ds.ds_tid) {
3209 if ((sleep_loc = TCR_PTR(CCAST(
void *, thread->th.th_sleep_loc))) !=
3211 KF_TRACE(50, (
"__kmp_enable_tasking: T#%d waking up thread T#%d\n",
3212 __kmp_gtid_from_thread(this_thr),
3213 __kmp_gtid_from_thread(thread)));
3214 __kmp_null_resume_wrapper(thread);
3216 KF_TRACE(50, (
"__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
3217 __kmp_gtid_from_thread(this_thr),
3218 __kmp_gtid_from_thread(thread)));
3223 KA_TRACE(10, (
"__kmp_enable_tasking(exit): T#%d\n",
3224 __kmp_gtid_from_thread(this_thr)));
3257 static kmp_task_team_t *__kmp_free_task_teams =
3260 kmp_bootstrap_lock_t __kmp_task_team_lock =
3261 KMP_BOOTSTRAP_LOCK_INITIALIZER(__kmp_task_team_lock);
3268 static void __kmp_alloc_task_deque(kmp_info_t *thread,
3269 kmp_thread_data_t *thread_data) {
3270 __kmp_init_bootstrap_lock(&thread_data->td.td_deque_lock);
3271 KMP_DEBUG_ASSERT(thread_data->td.td_deque == NULL);
3274 thread_data->td.td_deque_last_stolen = -1;
3276 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) == 0);
3277 KMP_DEBUG_ASSERT(thread_data->td.td_deque_head == 0);
3278 KMP_DEBUG_ASSERT(thread_data->td.td_deque_tail == 0);
3282 (
"__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
3283 __kmp_gtid_from_thread(thread), INITIAL_TASK_DEQUE_SIZE, thread_data));
3287 thread_data->td.td_deque = (kmp_taskdata_t **)__kmp_allocate(
3288 INITIAL_TASK_DEQUE_SIZE *
sizeof(kmp_taskdata_t *));
3289 thread_data->td.td_deque_size = INITIAL_TASK_DEQUE_SIZE;
3295 static void __kmp_free_task_deque(kmp_thread_data_t *thread_data) {
3296 if (thread_data->td.td_deque != NULL) {
3297 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
3298 TCW_4(thread_data->td.td_deque_ntasks, 0);
3299 __kmp_free(thread_data->td.td_deque);
3300 thread_data->td.td_deque = NULL;
3301 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
3304 #ifdef BUILD_TIED_TASK_STACK
3306 if (thread_data->td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY) {
3307 __kmp_free_task_stack(__kmp_thread_from_gtid(gtid), thread_data);
3319 static int __kmp_realloc_task_threads_data(kmp_info_t *thread,
3320 kmp_task_team_t *task_team) {
3321 kmp_thread_data_t **threads_data_p;
3322 kmp_int32 nthreads, maxthreads;
3323 int is_init_thread = FALSE;
3325 if (TCR_4(task_team->tt.tt_found_tasks)) {
3330 threads_data_p = &task_team->tt.tt_threads_data;
3331 nthreads = task_team->tt.tt_nproc;
3332 maxthreads = task_team->tt.tt_max_threads;
3337 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock);
3339 if (!TCR_4(task_team->tt.tt_found_tasks)) {
3341 kmp_team_t *team = thread->th.th_team;
3344 is_init_thread = TRUE;
3345 if (maxthreads < nthreads) {
3347 if (*threads_data_p != NULL) {
3348 kmp_thread_data_t *old_data = *threads_data_p;
3349 kmp_thread_data_t *new_data = NULL;
3353 (
"__kmp_realloc_task_threads_data: T#%d reallocating "
3354 "threads data for task_team %p, new_size = %d, old_size = %d\n",
3355 __kmp_gtid_from_thread(thread), task_team, nthreads, maxthreads));
3360 new_data = (kmp_thread_data_t *)__kmp_allocate(
3361 nthreads *
sizeof(kmp_thread_data_t));
3363 KMP_MEMCPY_S((
void *)new_data, nthreads *
sizeof(kmp_thread_data_t),
3364 (
void *)old_data, maxthreads *
sizeof(kmp_thread_data_t));
3366 #ifdef BUILD_TIED_TASK_STACK
3368 for (i = maxthreads; i < nthreads; i++) {
3369 kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
3370 __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data);
3374 (*threads_data_p) = new_data;
3375 __kmp_free(old_data);
3377 KE_TRACE(10, (
"__kmp_realloc_task_threads_data: T#%d allocating "
3378 "threads data for task_team %p, size = %d\n",
3379 __kmp_gtid_from_thread(thread), task_team, nthreads));
3383 ANNOTATE_IGNORE_WRITES_BEGIN();
3384 *threads_data_p = (kmp_thread_data_t *)__kmp_allocate(
3385 nthreads *
sizeof(kmp_thread_data_t));
3386 ANNOTATE_IGNORE_WRITES_END();
3387 #ifdef BUILD_TIED_TASK_STACK
3389 for (i = 0; i < nthreads; i++) {
3390 kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
3391 __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data);
3395 task_team->tt.tt_max_threads = nthreads;
3398 KMP_DEBUG_ASSERT(*threads_data_p != NULL);
3402 for (i = 0; i < nthreads; i++) {
3403 kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
3404 thread_data->td.td_thr = team->t.t_threads[i];
3406 if (thread_data->td.td_deque_last_stolen >= nthreads) {
3410 thread_data->td.td_deque_last_stolen = -1;
3415 TCW_SYNC_4(task_team->tt.tt_found_tasks, TRUE);
3418 __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock);
3419 return is_init_thread;
3425 static void __kmp_free_task_threads_data(kmp_task_team_t *task_team) {
3426 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock);
3427 if (task_team->tt.tt_threads_data != NULL) {
3429 for (i = 0; i < task_team->tt.tt_max_threads; i++) {
3430 __kmp_free_task_deque(&task_team->tt.tt_threads_data[i]);
3432 __kmp_free(task_team->tt.tt_threads_data);
3433 task_team->tt.tt_threads_data = NULL;
3435 __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock);
3442 static kmp_task_team_t *__kmp_allocate_task_team(kmp_info_t *thread,
3444 kmp_task_team_t *task_team = NULL;
3447 KA_TRACE(20, (
"__kmp_allocate_task_team: T#%d entering; team = %p\n",
3448 (thread ? __kmp_gtid_from_thread(thread) : -1), team));
3450 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
3452 __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
3453 if (__kmp_free_task_teams != NULL) {
3454 task_team = __kmp_free_task_teams;
3455 TCW_PTR(__kmp_free_task_teams, task_team->tt.tt_next);
3456 task_team->tt.tt_next = NULL;
3458 __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
3461 if (task_team == NULL) {
3462 KE_TRACE(10, (
"__kmp_allocate_task_team: T#%d allocating "
3463 "task team for team %p\n",
3464 __kmp_gtid_from_thread(thread), team));
3467 task_team = (kmp_task_team_t *)__kmp_allocate(
sizeof(kmp_task_team_t));
3468 __kmp_init_bootstrap_lock(&task_team->tt.tt_threads_lock);
3469 #if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
3472 __itt_suppress_mark_range(
3473 __itt_suppress_range, __itt_suppress_threading_errors,
3474 &task_team->tt.tt_found_tasks,
sizeof(task_team->tt.tt_found_tasks));
3475 __itt_suppress_mark_range(__itt_suppress_range,
3476 __itt_suppress_threading_errors,
3477 CCAST(kmp_uint32 *, &task_team->tt.tt_active),
3478 sizeof(task_team->tt.tt_active));
3486 TCW_4(task_team->tt.tt_found_tasks, FALSE);
3487 TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
3488 task_team->tt.tt_nproc = nthreads = team->t.t_nproc;
3490 KMP_ATOMIC_ST_REL(&task_team->tt.tt_unfinished_threads, nthreads);
3491 TCW_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE);
3492 TCW_4(task_team->tt.tt_active, TRUE);
3494 KA_TRACE(20, (
"__kmp_allocate_task_team: T#%d exiting; task_team = %p "
3495 "unfinished_threads init'd to %d\n",
3496 (thread ? __kmp_gtid_from_thread(thread) : -1), task_team,
3497 KMP_ATOMIC_LD_RLX(&task_team->tt.tt_unfinished_threads)));
3504 void __kmp_free_task_team(kmp_info_t *thread, kmp_task_team_t *task_team) {
3505 KA_TRACE(20, (
"__kmp_free_task_team: T#%d task_team = %p\n",
3506 thread ? __kmp_gtid_from_thread(thread) : -1, task_team));
3509 __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
3511 KMP_DEBUG_ASSERT(task_team->tt.tt_next == NULL);
3512 task_team->tt.tt_next = __kmp_free_task_teams;
3513 TCW_PTR(__kmp_free_task_teams, task_team);
3515 __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
3523 void __kmp_reap_task_teams(
void) {
3524 kmp_task_team_t *task_team;
3526 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
3528 __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
3529 while ((task_team = __kmp_free_task_teams) != NULL) {
3530 __kmp_free_task_teams = task_team->tt.tt_next;
3531 task_team->tt.tt_next = NULL;
3534 if (task_team->tt.tt_threads_data != NULL) {
3535 __kmp_free_task_threads_data(task_team);
3537 __kmp_free(task_team);
3539 __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
3546 void __kmp_wait_to_unref_task_teams(
void) {
3551 KMP_INIT_YIELD(spins);
3559 for (thread = CCAST(kmp_info_t *, __kmp_thread_pool); thread != NULL;
3560 thread = thread->th.th_next_pool) {
3564 if (TCR_PTR(thread->th.th_task_team) == NULL) {
3565 KA_TRACE(10, (
"__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
3566 __kmp_gtid_from_thread(thread)));
3571 if (!__kmp_is_thread_alive(thread, &exit_val)) {
3572 thread->th.th_task_team = NULL;
3579 KA_TRACE(10, (
"__kmp_wait_to_unref_task_team: Waiting for T#%d to "
3580 "unreference task_team\n",
3581 __kmp_gtid_from_thread(thread)));
3583 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
3586 if ((sleep_loc = TCR_PTR(CCAST(
void *, thread->th.th_sleep_loc))) !=
3590 (
"__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
3591 __kmp_gtid_from_thread(thread), __kmp_gtid_from_thread(thread)));
3592 __kmp_null_resume_wrapper(thread);
3601 KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
3607 void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team,
int always) {
3608 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
3614 if (team->t.t_task_team[this_thr->th.th_task_state] == NULL &&
3615 (always || team->t.t_nproc > 1)) {
3616 team->t.t_task_team[this_thr->th.th_task_state] =
3617 __kmp_allocate_task_team(this_thr, team);
3618 KA_TRACE(20, (
"__kmp_task_team_setup: Primary T#%d created new task_team %p"
3619 " for team %d at parity=%d\n",
3620 __kmp_gtid_from_thread(this_thr),
3621 team->t.t_task_team[this_thr->th.th_task_state], team->t.t_id,
3622 this_thr->th.th_task_state));
3632 if (team->t.t_nproc > 1) {
3633 int other_team = 1 - this_thr->th.th_task_state;
3634 KMP_DEBUG_ASSERT(other_team >= 0 && other_team < 2);
3635 if (team->t.t_task_team[other_team] == NULL) {
3636 team->t.t_task_team[other_team] =
3637 __kmp_allocate_task_team(this_thr, team);
3638 KA_TRACE(20, (
"__kmp_task_team_setup: Primary T#%d created second new "
3639 "task_team %p for team %d at parity=%d\n",
3640 __kmp_gtid_from_thread(this_thr),
3641 team->t.t_task_team[other_team], team->t.t_id, other_team));
3644 kmp_task_team_t *task_team = team->t.t_task_team[other_team];
3645 if (!task_team->tt.tt_active ||
3646 team->t.t_nproc != task_team->tt.tt_nproc) {
3647 TCW_4(task_team->tt.tt_nproc, team->t.t_nproc);
3648 TCW_4(task_team->tt.tt_found_tasks, FALSE);
3649 TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
3650 KMP_ATOMIC_ST_REL(&task_team->tt.tt_unfinished_threads,
3652 TCW_4(task_team->tt.tt_active, TRUE);
3656 KA_TRACE(20, (
"__kmp_task_team_setup: Primary T#%d reset next task_team "
3657 "%p for team %d at parity=%d\n",
3658 __kmp_gtid_from_thread(this_thr),
3659 team->t.t_task_team[other_team], team->t.t_id, other_team));
3667 if (this_thr == __kmp_hidden_helper_main_thread) {
3668 for (
int i = 0; i < 2; ++i) {
3669 kmp_task_team_t *task_team = team->t.t_task_team[i];
3670 if (KMP_TASKING_ENABLED(task_team)) {
3673 __kmp_enable_tasking(task_team, this_thr);
3674 for (
int j = 0; j < task_team->tt.tt_nproc; ++j) {
3675 kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[j];
3676 if (thread_data->td.td_deque == NULL) {
3677 __kmp_alloc_task_deque(__kmp_hidden_helper_threads[j], thread_data);
3687 void __kmp_task_team_sync(kmp_info_t *this_thr, kmp_team_t *team) {
3688 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
3692 this_thr->th.th_task_state = (kmp_uint8)(1 - this_thr->th.th_task_state);
3696 TCW_PTR(this_thr->th.th_task_team,
3697 team->t.t_task_team[this_thr->th.th_task_state]);
3699 (
"__kmp_task_team_sync: Thread T#%d task team switched to task_team "
3700 "%p from Team #%d (parity=%d)\n",
3701 __kmp_gtid_from_thread(this_thr), this_thr->th.th_task_team,
3702 team->t.t_id, this_thr->th.th_task_state));
3712 void __kmp_task_team_wait(
3713 kmp_info_t *this_thr,
3714 kmp_team_t *team USE_ITT_BUILD_ARG(
void *itt_sync_obj),
int wait) {
3715 kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
3717 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
3718 KMP_DEBUG_ASSERT(task_team == this_thr->th.th_task_team);
3720 if ((task_team != NULL) && KMP_TASKING_ENABLED(task_team)) {
3722 KA_TRACE(20, (
"__kmp_task_team_wait: Primary T#%d waiting for all tasks "
3723 "(for unfinished_threads to reach 0) on task_team = %p\n",
3724 __kmp_gtid_from_thread(this_thr), task_team));
3728 kmp_flag_32<false, false> flag(
3729 RCAST(std::atomic<kmp_uint32> *,
3730 &task_team->tt.tt_unfinished_threads),
3732 flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
3738 (
"__kmp_task_team_wait: Primary T#%d deactivating task_team %p: "
3739 "setting active to false, setting local and team's pointer to NULL\n",
3740 __kmp_gtid_from_thread(this_thr), task_team));
3741 KMP_DEBUG_ASSERT(task_team->tt.tt_nproc > 1 ||
3742 task_team->tt.tt_found_proxy_tasks == TRUE);
3743 TCW_SYNC_4(task_team->tt.tt_found_proxy_tasks, FALSE);
3744 KMP_CHECK_UPDATE(task_team->tt.tt_untied_task_encountered, 0);
3745 TCW_SYNC_4(task_team->tt.tt_active, FALSE);
3748 TCW_PTR(this_thr->th.th_task_team, NULL);
3757 void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread,
int gtid) {
3758 std::atomic<kmp_uint32> *spin = RCAST(
3759 std::atomic<kmp_uint32> *,
3760 &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads);
3762 KMP_DEBUG_ASSERT(__kmp_tasking_mode == tskm_extra_barrier);
3765 KMP_FSYNC_SPIN_INIT(spin, NULL);
3767 kmp_flag_32<false, false> spin_flag(spin, 0U);
3768 while (!spin_flag.execute_tasks(thread, gtid, TRUE,
3769 &flag USE_ITT_BUILD_ARG(NULL), 0)) {
3772 KMP_FSYNC_SPIN_PREPARE(RCAST(
void *, spin));
3775 if (TCR_4(__kmp_global.g.g_done)) {
3776 if (__kmp_global.g.g_abort)
3777 __kmp_abort_thread();
3783 KMP_FSYNC_SPIN_ACQUIRED(RCAST(
void *, spin));
3792 static bool __kmp_give_task(kmp_info_t *thread, kmp_int32 tid, kmp_task_t *task,
3794 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
3795 kmp_task_team_t *task_team = taskdata->td_task_team;
3797 KA_TRACE(20, (
"__kmp_give_task: trying to give task %p to thread %d.\n",
3801 KMP_DEBUG_ASSERT(task_team != NULL);
3803 bool result =
false;
3804 kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
3806 if (thread_data->td.td_deque == NULL) {
3810 (
"__kmp_give_task: thread %d has no queue while giving task %p.\n",
3815 if (TCR_4(thread_data->td.td_deque_ntasks) >=
3816 TASK_DEQUE_SIZE(thread_data->td)) {
3819 (
"__kmp_give_task: queue is full while giving task %p to thread %d.\n",
3824 if (TASK_DEQUE_SIZE(thread_data->td) / INITIAL_TASK_DEQUE_SIZE >= pass)
3827 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
3828 if (TCR_4(thread_data->td.td_deque_ntasks) >=
3829 TASK_DEQUE_SIZE(thread_data->td)) {
3831 __kmp_realloc_task_deque(thread, thread_data);
3836 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
3838 if (TCR_4(thread_data->td.td_deque_ntasks) >=
3839 TASK_DEQUE_SIZE(thread_data->td)) {
3840 KA_TRACE(30, (
"__kmp_give_task: queue is full while giving task %p to "
3846 if (TASK_DEQUE_SIZE(thread_data->td) / INITIAL_TASK_DEQUE_SIZE >= pass)
3847 goto release_and_exit;
3849 __kmp_realloc_task_deque(thread, thread_data);
3855 thread_data->td.td_deque[thread_data->td.td_deque_tail] = taskdata;
3857 thread_data->td.td_deque_tail =
3858 (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
3859 TCW_4(thread_data->td.td_deque_ntasks,
3860 TCR_4(thread_data->td.td_deque_ntasks) + 1);
3863 KA_TRACE(30, (
"__kmp_give_task: successfully gave task %p to thread %d.\n",
3867 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
3888 static void __kmp_first_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
3889 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
3890 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
3891 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
3892 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
3894 taskdata->td_flags.complete = 1;
3896 if (taskdata->td_taskgroup)
3897 KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count);
3901 KMP_ATOMIC_INC(&taskdata->td_incomplete_child_tasks);
3904 static void __kmp_second_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
3905 kmp_int32 children = 0;
3909 KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks) - 1;
3910 KMP_DEBUG_ASSERT(children >= 0);
3913 KMP_ATOMIC_DEC(&taskdata->td_incomplete_child_tasks);
3916 static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask) {
3917 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
3918 kmp_info_t *thread = __kmp_threads[gtid];
3920 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
3921 KMP_DEBUG_ASSERT(taskdata->td_flags.complete ==
3926 while (KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks) > 0)
3929 __kmp_release_deps(gtid, taskdata);
3930 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
3942 KMP_DEBUG_ASSERT(ptask != NULL);
3943 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
3945 10, (
"__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n",
3947 __kmp_assert_valid_gtid(gtid);
3948 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
3950 __kmp_first_top_half_finish_proxy(taskdata);
3951 __kmp_second_top_half_finish_proxy(taskdata);
3952 __kmp_bottom_half_finish_proxy(gtid, ptask);
3955 (
"__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n",
3967 KMP_DEBUG_ASSERT(ptask != NULL);
3968 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
3972 (
"__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n",
3975 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
3977 __kmp_first_top_half_finish_proxy(taskdata);
3981 kmp_team_t *team = taskdata->td_team;
3982 kmp_int32 nthreads = team->t.t_nproc;
3987 kmp_int32 start_k = 0;
3989 kmp_int32 k = start_k;
3993 thread = team->t.t_threads[k];
3994 k = (k + 1) % nthreads;
4000 }
while (!__kmp_give_task(thread, k, ptask, pass));
4002 __kmp_second_top_half_finish_proxy(taskdata);
4006 (
"__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n",
4010 kmp_event_t *__kmpc_task_allow_completion_event(
ident_t *loc_ref,
int gtid,
4012 kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(task);
4013 if (td->td_allow_completion_event.type == KMP_EVENT_UNINITIALIZED) {
4014 td->td_allow_completion_event.type = KMP_EVENT_ALLOW_COMPLETION;
4015 td->td_allow_completion_event.ed.task = task;
4016 __kmp_init_tas_lock(&td->td_allow_completion_event.lock);
4018 return &td->td_allow_completion_event;
4021 void __kmp_fulfill_event(kmp_event_t *event) {
4022 if (event->type == KMP_EVENT_ALLOW_COMPLETION) {
4023 kmp_task_t *ptask = event->ed.task;
4024 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
4025 bool detached =
false;
4026 int gtid = __kmp_get_gtid();
4031 __kmp_acquire_tas_lock(&event->lock, gtid);
4032 if (taskdata->td_flags.proxy == TASK_PROXY) {
4038 if (UNLIKELY(ompt_enabled.enabled))
4039 __ompt_task_finish(ptask, NULL, ompt_task_early_fulfill);
4042 event->type = KMP_EVENT_UNINITIALIZED;
4043 __kmp_release_tas_lock(&event->lock, gtid);
4049 if (UNLIKELY(ompt_enabled.enabled))
4050 __ompt_task_finish(ptask, NULL, ompt_task_late_fulfill);
4054 kmp_team_t *team = taskdata->td_team;
4055 kmp_info_t *thread = __kmp_get_thread();
4056 if (thread->th.th_team == team) {
4074 kmp_task_t *__kmp_task_dup_alloc(kmp_info_t *thread, kmp_task_t *task_src) {
4076 kmp_taskdata_t *taskdata;
4077 kmp_taskdata_t *taskdata_src = KMP_TASK_TO_TASKDATA(task_src);
4078 kmp_taskdata_t *parent_task = taskdata_src->td_parent;
4079 size_t shareds_offset;
4082 KA_TRACE(10, (
"__kmp_task_dup_alloc(enter): Th %p, source task %p\n", thread,
4084 KMP_DEBUG_ASSERT(taskdata_src->td_flags.proxy ==
4086 KMP_DEBUG_ASSERT(taskdata_src->td_flags.tasktype == TASK_EXPLICIT);
4087 task_size = taskdata_src->td_size_alloc;
4090 KA_TRACE(30, (
"__kmp_task_dup_alloc: Th %p, malloc size %ld\n", thread,
4093 taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, task_size);
4095 taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, task_size);
4097 KMP_MEMCPY(taskdata, taskdata_src, task_size);
4099 task = KMP_TASKDATA_TO_TASK(taskdata);
4102 taskdata->td_task_id = KMP_GEN_TASK_ID();
4103 if (task->shareds != NULL) {
4104 shareds_offset = (
char *)task_src->shareds - (
char *)taskdata_src;
4105 task->shareds = &((
char *)taskdata)[shareds_offset];
4106 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (
sizeof(
void *) - 1)) ==
4109 taskdata->td_alloc_thread = thread;
4110 taskdata->td_parent = parent_task;
4112 taskdata->td_taskgroup = parent_task->td_taskgroup;
4115 if (taskdata->td_flags.tiedness == TASK_TIED)
4116 taskdata->td_last_tied = taskdata;
4120 if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) {
4121 KMP_ATOMIC_INC(&parent_task->td_incomplete_child_tasks);
4122 if (parent_task->td_taskgroup)
4123 KMP_ATOMIC_INC(&parent_task->td_taskgroup->count);
4126 if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT)
4127 KMP_ATOMIC_INC(&taskdata->td_parent->td_allocated_child_tasks);
4131 (
"__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n",
4132 thread, taskdata, taskdata->td_parent));
4134 if (UNLIKELY(ompt_enabled.enabled))
4135 __ompt_task_init(taskdata, thread->th.th_info.ds.ds_gtid);
4144 typedef void (*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32);
4146 KMP_BUILD_ASSERT(
sizeof(
long) == 4 ||
sizeof(
long) == 8);
4151 class kmp_taskloop_bounds_t {
4153 const kmp_taskdata_t *taskdata;
4154 size_t lower_offset;
4155 size_t upper_offset;
4158 kmp_taskloop_bounds_t(kmp_task_t *_task, kmp_uint64 *lb, kmp_uint64 *ub)
4159 : task(_task), taskdata(KMP_TASK_TO_TASKDATA(task)),
4160 lower_offset((char *)lb - (char *)task),
4161 upper_offset((char *)ub - (char *)task) {
4162 KMP_DEBUG_ASSERT((
char *)lb > (
char *)_task);
4163 KMP_DEBUG_ASSERT((
char *)ub > (
char *)_task);
4165 kmp_taskloop_bounds_t(kmp_task_t *_task,
const kmp_taskloop_bounds_t &bounds)
4166 : task(_task), taskdata(KMP_TASK_TO_TASKDATA(_task)),
4167 lower_offset(bounds.lower_offset), upper_offset(bounds.upper_offset) {}
4168 size_t get_lower_offset()
const {
return lower_offset; }
4169 size_t get_upper_offset()
const {
return upper_offset; }
4170 kmp_uint64 get_lb()
const {
4172 #if defined(KMP_GOMP_COMPAT)
4174 if (!taskdata->td_flags.native) {
4175 retval = *(kmp_int64 *)((
char *)task + lower_offset);
4178 if (taskdata->td_size_loop_bounds == 4) {
4179 kmp_int32 *lb = RCAST(kmp_int32 *, task->shareds);
4180 retval = (kmp_int64)*lb;
4182 kmp_int64 *lb = RCAST(kmp_int64 *, task->shareds);
4183 retval = (kmp_int64)*lb;
4188 retval = *(kmp_int64 *)((
char *)task + lower_offset);
4192 kmp_uint64 get_ub()
const {
4194 #if defined(KMP_GOMP_COMPAT)
4196 if (!taskdata->td_flags.native) {
4197 retval = *(kmp_int64 *)((
char *)task + upper_offset);
4200 if (taskdata->td_size_loop_bounds == 4) {
4201 kmp_int32 *ub = RCAST(kmp_int32 *, task->shareds) + 1;
4202 retval = (kmp_int64)*ub;
4204 kmp_int64 *ub = RCAST(kmp_int64 *, task->shareds) + 1;
4205 retval = (kmp_int64)*ub;
4209 retval = *(kmp_int64 *)((
char *)task + upper_offset);
4213 void set_lb(kmp_uint64 lb) {
4214 #if defined(KMP_GOMP_COMPAT)
4216 if (!taskdata->td_flags.native) {
4217 *(kmp_uint64 *)((
char *)task + lower_offset) = lb;
4220 if (taskdata->td_size_loop_bounds == 4) {
4221 kmp_uint32 *lower = RCAST(kmp_uint32 *, task->shareds);
4222 *lower = (kmp_uint32)lb;
4224 kmp_uint64 *lower = RCAST(kmp_uint64 *, task->shareds);
4225 *lower = (kmp_uint64)lb;
4229 *(kmp_uint64 *)((
char *)task + lower_offset) = lb;
4232 void set_ub(kmp_uint64 ub) {
4233 #if defined(KMP_GOMP_COMPAT)
4235 if (!taskdata->td_flags.native) {
4236 *(kmp_uint64 *)((
char *)task + upper_offset) = ub;
4239 if (taskdata->td_size_loop_bounds == 4) {
4240 kmp_uint32 *upper = RCAST(kmp_uint32 *, task->shareds) + 1;
4241 *upper = (kmp_uint32)ub;
4243 kmp_uint64 *upper = RCAST(kmp_uint64 *, task->shareds) + 1;
4244 *upper = (kmp_uint64)ub;
4248 *(kmp_uint64 *)((
char *)task + upper_offset) = ub;
4269 void __kmp_taskloop_linear(
ident_t *loc,
int gtid, kmp_task_t *task,
4270 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
4271 kmp_uint64 ub_glob, kmp_uint64 num_tasks,
4272 kmp_uint64 grainsize, kmp_uint64 extras,
4273 kmp_int64 last_chunk, kmp_uint64 tc,
4279 KMP_TIME_PARTITIONED_BLOCK(OMP_taskloop_scheduling);
4280 p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
4282 kmp_taskloop_bounds_t task_bounds(task, lb, ub);
4283 kmp_uint64 lower = task_bounds.get_lb();
4284 kmp_uint64 upper = task_bounds.get_ub();
4286 kmp_info_t *thread = __kmp_threads[gtid];
4287 kmp_taskdata_t *current_task = thread->th.th_current_task;
4288 kmp_task_t *next_task;
4289 kmp_int32 lastpriv = 0;
4291 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize +
4292 (last_chunk < 0 ? last_chunk : extras));
4293 KMP_DEBUG_ASSERT(num_tasks > extras);
4294 KMP_DEBUG_ASSERT(num_tasks > 0);
4295 KA_TRACE(20, (
"__kmp_taskloop_linear: T#%d: %lld tasks, grainsize %lld, "
4296 "extras %lld, last_chunk %lld, i=%lld,%lld(%d)%lld, dup %p\n",
4297 gtid, num_tasks, grainsize, extras, last_chunk, lower, upper,
4298 ub_glob, st, task_dup));
4301 for (i = 0; i < num_tasks; ++i) {
4302 kmp_uint64 chunk_minus_1;
4304 chunk_minus_1 = grainsize - 1;
4306 chunk_minus_1 = grainsize;
4309 upper = lower + st * chunk_minus_1;
4313 if (i == num_tasks - 1) {
4316 KMP_DEBUG_ASSERT(upper == *ub);
4317 if (upper == ub_glob)
4319 }
else if (st > 0) {
4320 KMP_DEBUG_ASSERT((kmp_uint64)st > *ub - upper);
4321 if ((kmp_uint64)st > ub_glob - upper)
4324 KMP_DEBUG_ASSERT(upper + st < *ub);
4325 if (upper - ub_glob < (kmp_uint64)(-st))
4329 next_task = __kmp_task_dup_alloc(thread, task);
4330 kmp_taskdata_t *next_taskdata = KMP_TASK_TO_TASKDATA(next_task);
4331 kmp_taskloop_bounds_t next_task_bounds =
4332 kmp_taskloop_bounds_t(next_task, task_bounds);
4335 next_task_bounds.set_lb(lower);
4336 if (next_taskdata->td_flags.native) {
4337 next_task_bounds.set_ub(upper + (st > 0 ? 1 : -1));
4339 next_task_bounds.set_ub(upper);
4341 if (ptask_dup != NULL)
4343 ptask_dup(next_task, task, lastpriv);
4345 (
"__kmp_taskloop_linear: T#%d; task #%llu: task %p: lower %lld, "
4346 "upper %lld stride %lld, (offsets %p %p)\n",
4347 gtid, i, next_task, lower, upper, st,
4348 next_task_bounds.get_lower_offset(),
4349 next_task_bounds.get_upper_offset()));
4351 __kmp_omp_taskloop_task(NULL, gtid, next_task,
4354 __kmp_omp_task(gtid, next_task,
true);
4359 __kmp_task_start(gtid, task, current_task);
4361 __kmp_task_finish<false>(gtid, task, current_task);
4366 typedef struct __taskloop_params {
4373 kmp_uint64 num_tasks;
4374 kmp_uint64 grainsize;
4376 kmp_int64 last_chunk;
4378 kmp_uint64 num_t_min;
4382 } __taskloop_params_t;
4384 void __kmp_taskloop_recur(
ident_t *,
int, kmp_task_t *, kmp_uint64 *,
4385 kmp_uint64 *, kmp_int64, kmp_uint64, kmp_uint64,
4386 kmp_uint64, kmp_uint64, kmp_int64, kmp_uint64,
4394 int __kmp_taskloop_task(
int gtid,
void *ptask) {
4395 __taskloop_params_t *p =
4396 (__taskloop_params_t *)((kmp_task_t *)ptask)->shareds;
4397 kmp_task_t *task = p->task;
4398 kmp_uint64 *lb = p->lb;
4399 kmp_uint64 *ub = p->ub;
4400 void *task_dup = p->task_dup;
4402 kmp_int64 st = p->st;
4403 kmp_uint64 ub_glob = p->ub_glob;
4404 kmp_uint64 num_tasks = p->num_tasks;
4405 kmp_uint64 grainsize = p->grainsize;
4406 kmp_uint64 extras = p->extras;
4407 kmp_int64 last_chunk = p->last_chunk;
4408 kmp_uint64 tc = p->tc;
4409 kmp_uint64 num_t_min = p->num_t_min;
4411 void *codeptr_ra = p->codeptr_ra;
4414 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
4415 KMP_DEBUG_ASSERT(task != NULL);
4417 (
"__kmp_taskloop_task: T#%d, task %p: %lld tasks, grainsize"
4418 " %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n",
4419 gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *lb, *ub,
4422 KMP_DEBUG_ASSERT(num_tasks * 2 + 1 > num_t_min);
4423 if (num_tasks > num_t_min)
4424 __kmp_taskloop_recur(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks,
4425 grainsize, extras, last_chunk, tc, num_t_min,
4431 __kmp_taskloop_linear(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks,
4432 grainsize, extras, last_chunk, tc,
4438 KA_TRACE(40, (
"__kmp_taskloop_task(exit): T#%d\n", gtid));
4460 void __kmp_taskloop_recur(
ident_t *loc,
int gtid, kmp_task_t *task,
4461 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
4462 kmp_uint64 ub_glob, kmp_uint64 num_tasks,
4463 kmp_uint64 grainsize, kmp_uint64 extras,
4464 kmp_int64 last_chunk, kmp_uint64 tc,
4465 kmp_uint64 num_t_min,
4470 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
4471 KMP_DEBUG_ASSERT(task != NULL);
4472 KMP_DEBUG_ASSERT(num_tasks > num_t_min);
4474 (
"__kmp_taskloop_recur: T#%d, task %p: %lld tasks, grainsize"
4475 " %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n",
4476 gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *lb, *ub,
4478 p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
4479 kmp_uint64 lower = *lb;
4480 kmp_info_t *thread = __kmp_threads[gtid];
4482 kmp_task_t *next_task;
4483 size_t lower_offset =
4484 (
char *)lb - (
char *)task;
4485 size_t upper_offset =
4486 (
char *)ub - (
char *)task;
4488 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize +
4489 (last_chunk < 0 ? last_chunk : extras));
4490 KMP_DEBUG_ASSERT(num_tasks > extras);
4491 KMP_DEBUG_ASSERT(num_tasks > 0);
4494 kmp_uint64 lb1, ub0, tc0, tc1, ext0, ext1;
4495 kmp_int64 last_chunk0 = 0, last_chunk1 = 0;
4496 kmp_uint64 gr_size0 = grainsize;
4497 kmp_uint64 n_tsk0 = num_tasks >> 1;
4498 kmp_uint64 n_tsk1 = num_tasks - n_tsk0;
4499 if (last_chunk < 0) {
4501 last_chunk1 = last_chunk;
4502 tc0 = grainsize * n_tsk0;
4504 }
else if (n_tsk0 <= extras) {
4507 ext1 = extras - n_tsk0;
4508 tc0 = gr_size0 * n_tsk0;
4513 tc1 = grainsize * n_tsk1;
4516 ub0 = lower + st * (tc0 - 1);
4520 next_task = __kmp_task_dup_alloc(thread, task);
4522 *(kmp_uint64 *)((
char *)next_task + lower_offset) = lb1;
4523 if (ptask_dup != NULL)
4524 ptask_dup(next_task, task, 0);
4529 kmp_taskdata_t *current_task = thread->th.th_current_task;
4530 thread->th.th_current_task = taskdata->td_parent;
4531 kmp_task_t *new_task =
4532 __kmpc_omp_task_alloc(loc, gtid, 1, 3 *
sizeof(
void *),
4533 sizeof(__taskloop_params_t), &__kmp_taskloop_task);
4535 thread->th.th_current_task = current_task;
4536 __taskloop_params_t *p = (__taskloop_params_t *)new_task->shareds;
4537 p->task = next_task;
4538 p->lb = (kmp_uint64 *)((
char *)next_task + lower_offset);
4539 p->ub = (kmp_uint64 *)((
char *)next_task + upper_offset);
4540 p->task_dup = task_dup;
4542 p->ub_glob = ub_glob;
4543 p->num_tasks = n_tsk1;
4544 p->grainsize = grainsize;
4546 p->last_chunk = last_chunk1;
4548 p->num_t_min = num_t_min;
4550 p->codeptr_ra = codeptr_ra;
4555 __kmp_omp_taskloop_task(NULL, gtid, new_task, codeptr_ra);
4557 __kmp_omp_task(gtid, new_task,
true);
4561 if (n_tsk0 > num_t_min)
4562 __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0, gr_size0,
4563 ext0, last_chunk0, tc0, num_t_min,
4569 __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0,
4570 gr_size0, ext0, last_chunk0, tc0,
4576 KA_TRACE(40, (
"__kmp_taskloop_recur(exit): T#%d\n", gtid));
4579 static void __kmp_taskloop(
ident_t *loc,
int gtid, kmp_task_t *task,
int if_val,
4580 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
4581 int nogroup,
int sched, kmp_uint64 grainsize,
4582 int modifier,
void *task_dup) {
4583 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
4584 KMP_DEBUG_ASSERT(task != NULL);
4586 #if OMPT_SUPPORT && OMPT_OPTIONAL
4587 OMPT_STORE_RETURN_ADDRESS(gtid);
4589 __kmpc_taskgroup(loc, gtid);
4594 kmp_taskloop_bounds_t task_bounds(task, lb, ub);
4597 kmp_uint64 lower = task_bounds.get_lb();
4598 kmp_uint64 upper = task_bounds.get_ub();
4599 kmp_uint64 ub_glob = upper;
4600 kmp_uint64 num_tasks = 0, extras = 0;
4601 kmp_int64 last_chunk =
4603 kmp_uint64 num_tasks_min = __kmp_taskloop_min_tasks;
4604 kmp_info_t *thread = __kmp_threads[gtid];
4605 kmp_taskdata_t *current_task = thread->th.th_current_task;
4607 KA_TRACE(20, (
"__kmp_taskloop: T#%d, task %p, lb %lld, ub %lld, st %lld, "
4608 "grain %llu(%d, %d), dup %p\n",
4609 gtid, taskdata, lower, upper, st, grainsize, sched, modifier,
4614 tc = upper - lower + 1;
4615 }
else if (st < 0) {
4616 tc = (lower - upper) / (-st) + 1;
4618 tc = (upper - lower) / st + 1;
4621 KA_TRACE(20, (
"__kmp_taskloop(exit): T#%d zero-trip loop\n", gtid));
4623 __kmp_task_start(gtid, task, current_task);
4625 __kmp_task_finish<false>(gtid, task, current_task);
4629 #if OMPT_SUPPORT && OMPT_OPTIONAL
4630 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
4631 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
4632 if (ompt_enabled.ompt_callback_work) {
4633 ompt_callbacks.ompt_callback(ompt_callback_work)(
4634 ompt_work_taskloop, ompt_scope_begin, &(team_info->parallel_data),
4635 &(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0));
4639 if (num_tasks_min == 0)
4642 KMP_MIN(thread->th.th_team_nproc * 10, INITIAL_TASK_DEQUE_SIZE);
4648 grainsize = thread->th.th_team_nproc * 10;
4651 if (grainsize > tc) {
4656 num_tasks = grainsize;
4657 grainsize = tc / num_tasks;
4658 extras = tc % num_tasks;
4662 if (grainsize > tc) {
4668 num_tasks = (tc + grainsize - 1) / grainsize;
4669 last_chunk = tc - (num_tasks * grainsize);
4672 num_tasks = tc / grainsize;
4674 grainsize = tc / num_tasks;
4675 extras = tc % num_tasks;
4680 KMP_ASSERT2(0,
"unknown scheduling of taskloop");
4683 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize +
4684 (last_chunk < 0 ? last_chunk : extras));
4685 KMP_DEBUG_ASSERT(num_tasks > extras);
4686 KMP_DEBUG_ASSERT(num_tasks > 0);
4692 taskdata->td_flags.task_serial = 1;
4693 taskdata->td_flags.tiedness = TASK_TIED;
4695 __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
4696 grainsize, extras, last_chunk, tc,
4698 OMPT_GET_RETURN_ADDRESS(0),
4703 }
else if (num_tasks > num_tasks_min && !taskdata->td_flags.native) {
4704 KA_TRACE(20, (
"__kmp_taskloop: T#%d, go recursive: tc %llu, #tasks %llu"
4705 "(%lld), grain %llu, extras %llu, last_chunk %lld\n",
4706 gtid, tc, num_tasks, num_tasks_min, grainsize, extras,
4708 __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
4709 grainsize, extras, last_chunk, tc, num_tasks_min,
4711 OMPT_GET_RETURN_ADDRESS(0),
4715 KA_TRACE(20, (
"__kmp_taskloop: T#%d, go linear: tc %llu, #tasks %llu"
4716 "(%lld), grain %llu, extras %llu, last_chunk %lld\n",
4717 gtid, tc, num_tasks, num_tasks_min, grainsize, extras,
4719 __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
4720 grainsize, extras, last_chunk, tc,
4722 OMPT_GET_RETURN_ADDRESS(0),
4727 #if OMPT_SUPPORT && OMPT_OPTIONAL
4728 if (ompt_enabled.ompt_callback_work) {
4729 ompt_callbacks.ompt_callback(ompt_callback_work)(
4730 ompt_work_taskloop, ompt_scope_end, &(team_info->parallel_data),
4731 &(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0));
4736 #if OMPT_SUPPORT && OMPT_OPTIONAL
4737 OMPT_STORE_RETURN_ADDRESS(gtid);
4739 __kmpc_end_taskgroup(loc, gtid);
4741 KA_TRACE(20, (
"__kmp_taskloop(exit): T#%d\n", gtid));
4761 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
int nogroup,
4762 int sched, kmp_uint64 grainsize,
void *task_dup) {
4763 __kmp_assert_valid_gtid(gtid);
4764 KA_TRACE(20, (
"__kmpc_taskloop(enter): T#%d\n", gtid));
4765 __kmp_taskloop(loc, gtid, task, if_val, lb, ub, st, nogroup, sched, grainsize,
4767 KA_TRACE(20, (
"__kmpc_taskloop(exit): T#%d\n", gtid));
4788 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
4789 int nogroup,
int sched, kmp_uint64 grainsize,
4790 int modifier,
void *task_dup) {
4791 __kmp_assert_valid_gtid(gtid);
4792 KA_TRACE(20, (
"__kmpc_taskloop_5(enter): T#%d\n", gtid));
4793 __kmp_taskloop(loc, gtid, task, if_val, lb, ub, st, nogroup, sched, grainsize,
4794 modifier, task_dup);
4795 KA_TRACE(20, (
"__kmpc_taskloop_5(exit): T#%d\n", gtid));
struct kmp_taskred_data kmp_taskred_data_t
struct kmp_task_red_input kmp_task_red_input_t
struct kmp_taskred_flags kmp_taskred_flags_t
struct kmp_taskred_input kmp_taskred_input_t
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int sched, kmp_uint64 grainsize, void *task_dup)
void * __kmpc_taskred_modifier_init(ident_t *loc, int gtid, int is_ws, int num, void *data)
void * __kmpc_taskred_init(int gtid, int num, void *data)
void * __kmpc_task_reduction_init(int gtid, int num, void *data)
void __kmpc_proxy_task_completed_ooo(kmp_task_t *ptask)
void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid, int is_ws)
kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 naffins, kmp_task_affinity_info_t *affin_list)
void * __kmpc_task_reduction_modifier_init(ident_t *loc, int gtid, int is_ws, int num, void *data)
void __kmpc_taskloop_5(ident_t *loc, int gtid, kmp_task_t *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int sched, kmp_uint64 grainsize, int modifier, void *task_dup)
void __kmpc_proxy_task_completed(kmp_int32 gtid, kmp_task_t *ptask)
void * __kmpc_task_reduction_get_th_data(int gtid, void *tskgrp, void *data)
kmp_taskred_flags_t flags