* kmp_tasking.cpp -- OpenMP 3.0 tasking support.
*/
#include "kmp.h"
#include "kmp_i18n.h"
#include "kmp_itt.h"
#include "kmp_stats.h"
#include "kmp_wait_release.h"
#include "kmp_taskdeps.h"
#if OMPT_SUPPORT
#include "ompt-specific.h"
#endif
#if ENABLE_LIBOMPTARGET
static void (*tgt_target_nowait_query)(void **);
void __kmp_init_target_task() {
*(void **)(&tgt_target_nowait_query) = KMP_DLSYM("__tgt_target_nowait_query");
}
#endif
static void __kmp_enable_tasking(kmp_task_team_t *task_team,
kmp_info_t *this_thr);
static void __kmp_alloc_task_deque(kmp_info_t *thread,
kmp_thread_data_t *thread_data);
static int __kmp_realloc_task_threads_data(kmp_info_t *thread,
kmp_task_team_t *task_team);
static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask);
#if OMPX_TASKGRAPH
static kmp_tdg_info_t *__kmp_find_tdg(kmp_int32 tdg_id);
int __kmp_taskloop_task(int gtid, void *ptask);
#endif
#ifdef BUILD_TIED_TASK_STACK
static void __kmp_trace_task_stack(kmp_int32 gtid,
kmp_thread_data_t *thread_data,
int threshold, char *location) {
kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
kmp_taskdata_t **stack_top = task_stack->ts_top;
kmp_int32 entries = task_stack->ts_entries;
kmp_taskdata_t *tied_task;
KA_TRACE(
threshold,
("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
"first_block = %p, stack_top = %p \n",
location, gtid, entries, task_stack->ts_first_block, stack_top));
KMP_DEBUG_ASSERT(stack_top != NULL);
KMP_DEBUG_ASSERT(entries > 0);
while (entries != 0) {
KMP_DEBUG_ASSERT(stack_top != &task_stack->ts_first_block.sb_block[0]);
if (entries & TASK_STACK_INDEX_MASK == 0) {
kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(stack_top);
stack_block = stack_block->sb_prev;
stack_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE];
}
stack_top--;
entries--;
tied_task = *stack_top;
KMP_DEBUG_ASSERT(tied_task != NULL);
KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
KA_TRACE(threshold,
("__kmp_trace_task_stack(%s): gtid=%d, entry=%d, "
"stack_top=%p, tied_task=%p\n",
location, gtid, entries, stack_top, tied_task));
}
KMP_DEBUG_ASSERT(stack_top == &task_stack->ts_first_block.sb_block[0]);
KA_TRACE(threshold,
("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
location, gtid));
}
static void __kmp_init_task_stack(kmp_int32 gtid,
kmp_thread_data_t *thread_data) {
kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
kmp_stack_block_t *first_block;
first_block = &task_stack->ts_first_block;
task_stack->ts_top = (kmp_taskdata_t **)first_block;
memset((void *)first_block, '\0',
TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *));
task_stack->ts_entries = TASK_STACK_EMPTY;
first_block->sb_next = NULL;
first_block->sb_prev = NULL;
}
static void __kmp_free_task_stack(kmp_int32 gtid,
kmp_thread_data_t *thread_data) {
kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
kmp_stack_block_t *stack_block = &task_stack->ts_first_block;
KMP_DEBUG_ASSERT(task_stack->ts_entries == TASK_STACK_EMPTY);
while (stack_block != NULL) {
kmp_stack_block_t *next_block = (stack_block) ? stack_block->sb_next : NULL;
stack_block->sb_next = NULL;
stack_block->sb_prev = NULL;
if (stack_block != &task_stack->ts_first_block) {
__kmp_thread_free(thread,
stack_block);
}
stack_block = next_block;
}
task_stack->ts_entries = 0;
task_stack->ts_top = NULL;
}
static void __kmp_push_task_stack(kmp_int32 gtid, kmp_info_t *thread,
kmp_taskdata_t *tied_task) {
kmp_thread_data_t *thread_data =
&thread->th.th_task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)];
kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
if (tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser) {
return;
}
KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
KMP_DEBUG_ASSERT(task_stack->ts_top != NULL);
KA_TRACE(20,
("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
gtid, thread, tied_task));
*(task_stack->ts_top) = tied_task;
task_stack->ts_top++;
task_stack->ts_entries++;
if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) {
kmp_stack_block_t *stack_block =
(kmp_stack_block_t *)(task_stack->ts_top - TASK_STACK_BLOCK_SIZE);
if (stack_block->sb_next !=
NULL) {
task_stack->ts_top = &stack_block->sb_next->sb_block[0];
} else {
kmp_stack_block_t *new_block = (kmp_stack_block_t *)__kmp_thread_calloc(
thread, sizeof(kmp_stack_block_t));
task_stack->ts_top = &new_block->sb_block[0];
stack_block->sb_next = new_block;
new_block->sb_prev = stack_block;
new_block->sb_next = NULL;
KA_TRACE(
30,
("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
gtid, tied_task, new_block));
}
}
KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid,
tied_task));
}
static void __kmp_pop_task_stack(kmp_int32 gtid, kmp_info_t *thread,
kmp_taskdata_t *ending_task) {
kmp_thread_data_t *thread_data =
&thread->th.th_task_team->tt_threads_data[__kmp_tid_from_gtid(gtid)];
kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
kmp_taskdata_t *tied_task;
if (ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser) {
return;
}
KMP_DEBUG_ASSERT(task_stack->ts_top != NULL);
KMP_DEBUG_ASSERT(task_stack->ts_entries > 0);
KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid,
thread));
if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) {
kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(task_stack->ts_top);
stack_block = stack_block->sb_prev;
task_stack->ts_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE];
}
task_stack->ts_top--;
task_stack->ts_entries--;
tied_task = *(task_stack->ts_top);
KMP_DEBUG_ASSERT(tied_task != NULL);
KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
KMP_DEBUG_ASSERT(tied_task == ending_task);
KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid,
tied_task));
return;
}
#endif
static bool __kmp_task_is_allowed(int gtid, const kmp_int32 is_constrained,
const kmp_taskdata_t *tasknew,
const kmp_taskdata_t *taskcurr) {
if (is_constrained && (tasknew->td_flags.tiedness == TASK_TIED)) {
kmp_taskdata_t *current = taskcurr->td_last_tied;
KMP_DEBUG_ASSERT(current != NULL);
if (current->td_flags.tasktype == TASK_EXPLICIT ||
current->td_taskwait_thread > 0) {
kmp_int32 level = current->td_level;
kmp_taskdata_t *parent = tasknew->td_parent;
while (parent != current && parent->td_level > level) {
parent = parent->td_parent;
KMP_DEBUG_ASSERT(parent != NULL);
}
if (parent != current)
return false;
}
}
kmp_depnode_t *node = tasknew->td_depnode;
#if OMPX_TASKGRAPH
if (!tasknew->is_taskgraph && UNLIKELY(node && (node->dn.mtx_num_locks > 0))) {
#else
if (UNLIKELY(node && (node->dn.mtx_num_locks > 0))) {
#endif
for (int i = 0; i < node->dn.mtx_num_locks; ++i) {
KMP_DEBUG_ASSERT(node->dn.mtx_locks[i] != NULL);
if (__kmp_test_lock(node->dn.mtx_locks[i], gtid))
continue;
for (int j = i - 1; j >= 0; --j)
__kmp_release_lock(node->dn.mtx_locks[j], gtid);
return false;
}
node->dn.mtx_num_locks = -node->dn.mtx_num_locks;
}
return true;
}
static void __kmp_realloc_task_deque(kmp_info_t *thread,
kmp_thread_data_t *thread_data) {
kmp_int32 size = TASK_DEQUE_SIZE(thread_data->td);
KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) == size);
kmp_int32 new_size = 2 * size;
KE_TRACE(10, ("__kmp_realloc_task_deque: T#%d reallocating deque[from %d to "
"%d] for thread_data %p\n",
__kmp_gtid_from_thread(thread), size, new_size, thread_data));
kmp_taskdata_t **new_deque =
(kmp_taskdata_t **)__kmp_allocate(new_size * sizeof(kmp_taskdata_t *));
int i, j;
for (i = thread_data->td.td_deque_head, j = 0; j < size;
i = (i + 1) & TASK_DEQUE_MASK(thread_data->td), j++)
new_deque[j] = thread_data->td.td_deque[i];
__kmp_free(thread_data->td.td_deque);
thread_data->td.td_deque_head = 0;
thread_data->td.td_deque_tail = size;
thread_data->td.td_deque = new_deque;
thread_data->td.td_deque_size = new_size;
}
static kmp_task_pri_t *__kmp_alloc_task_pri_list() {
kmp_task_pri_t *l = (kmp_task_pri_t *)__kmp_allocate(sizeof(kmp_task_pri_t));
kmp_thread_data_t *thread_data = &l->td;
__kmp_init_bootstrap_lock(&thread_data->td.td_deque_lock);
thread_data->td.td_deque_last_stolen = -1;
KE_TRACE(20, ("__kmp_alloc_task_pri_list: T#%d allocating deque[%d] "
"for thread_data %p\n",
__kmp_get_gtid(), INITIAL_TASK_DEQUE_SIZE, thread_data));
thread_data->td.td_deque = (kmp_taskdata_t **)__kmp_allocate(
INITIAL_TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *));
thread_data->td.td_deque_size = INITIAL_TASK_DEQUE_SIZE;
return l;
}
static kmp_thread_data_t *
__kmp_get_priority_deque_data(kmp_task_team_t *task_team, kmp_int32 pri) {
kmp_thread_data_t *thread_data;
kmp_task_pri_t *lst = task_team->tt.tt_task_pri_list;
if (lst->priority == pri) {
thread_data = &lst->td;
} else if (lst->priority < pri) {
kmp_task_pri_t *list = __kmp_alloc_task_pri_list();
thread_data = &list->td;
list->priority = pri;
list->next = lst;
task_team->tt.tt_task_pri_list = list;
} else {
kmp_task_pri_t *next_queue = lst->next;
while (next_queue && next_queue->priority > pri) {
lst = next_queue;
next_queue = lst->next;
}
if (next_queue == NULL) {
kmp_task_pri_t *list = __kmp_alloc_task_pri_list();
thread_data = &list->td;
list->priority = pri;
list->next = NULL;
lst->next = list;
} else if (next_queue->priority == pri) {
thread_data = &next_queue->td;
} else {
kmp_task_pri_t *list = __kmp_alloc_task_pri_list();
thread_data = &list->td;
list->priority = pri;
list->next = next_queue;
lst->next = list;
}
}
return thread_data;
}
static kmp_int32 __kmp_push_priority_task(kmp_int32 gtid, kmp_info_t *thread,
kmp_taskdata_t *taskdata,
kmp_task_team_t *task_team,
kmp_int32 pri) {
kmp_thread_data_t *thread_data = NULL;
KA_TRACE(20,
("__kmp_push_priority_task: T#%d trying to push task %p, pri %d.\n",
gtid, taskdata, pri));
kmp_task_pri_t *lst = task_team->tt.tt_task_pri_list;
if (UNLIKELY(lst == NULL)) {
__kmp_acquire_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
if (task_team->tt.tt_task_pri_list == NULL) {
kmp_task_pri_t *list = __kmp_alloc_task_pri_list();
thread_data = &list->td;
list->priority = pri;
list->next = NULL;
task_team->tt.tt_task_pri_list = list;
} else {
thread_data = __kmp_get_priority_deque_data(task_team, pri);
}
__kmp_release_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
} else {
if (lst->priority == pri) {
thread_data = &lst->td;
} else {
__kmp_acquire_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
thread_data = __kmp_get_priority_deque_data(task_team, pri);
__kmp_release_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
}
}
KMP_DEBUG_ASSERT(thread_data);
__kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
if (TCR_4(thread_data->td.td_deque_ntasks) >=
TASK_DEQUE_SIZE(thread_data->td)) {
if (__kmp_enable_task_throttling &&
__kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
thread->th.th_current_task)) {
__kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
KA_TRACE(20, ("__kmp_push_priority_task: T#%d deque is full; returning "
"TASK_NOT_PUSHED for task %p\n",
gtid, taskdata));
return TASK_NOT_PUSHED;
} else {
__kmp_realloc_task_deque(thread, thread_data);
}
}
KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) <
TASK_DEQUE_SIZE(thread_data->td));
thread_data->td.td_deque[thread_data->td.td_deque_tail] = taskdata;
thread_data->td.td_deque_tail =
(thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
TCW_4(thread_data->td.td_deque_ntasks,
TCR_4(thread_data->td.td_deque_ntasks) + 1);
KMP_FSYNC_RELEASING(thread->th.th_current_task);
KMP_FSYNC_RELEASING(taskdata);
KA_TRACE(20, ("__kmp_push_priority_task: T#%d returning "
"TASK_SUCCESSFULLY_PUSHED: task=%p ntasks=%d head=%u tail=%u\n",
gtid, taskdata, thread_data->td.td_deque_ntasks,
thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
__kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
task_team->tt.tt_num_task_pri++;
return TASK_SUCCESSFULLY_PUSHED;
}
static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) {
kmp_info_t *thread = __kmp_threads[gtid];
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
if (UNLIKELY(taskdata->td_flags.hidden_helper &&
!KMP_HIDDEN_HELPER_THREAD(gtid))) {
kmp_int32 shadow_gtid = KMP_GTID_TO_SHADOW_GTID(gtid);
__kmpc_give_task(task, __kmp_tid_from_gtid(shadow_gtid));
__kmp_hidden_helper_worker_thread_signal();
return TASK_SUCCESSFULLY_PUSHED;
}
kmp_task_team_t *task_team = thread->th.th_task_team;
kmp_int32 tid = __kmp_tid_from_gtid(gtid);
kmp_thread_data_t *thread_data;
KA_TRACE(20,
("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata));
if (UNLIKELY(taskdata->td_flags.tiedness == TASK_UNTIED)) {
kmp_int32 counter = 1 + KMP_ATOMIC_INC(&taskdata->td_untied_count);
KMP_DEBUG_USE_VAR(counter);
KA_TRACE(
20,
("__kmp_push_task: T#%d untied_count (%d) incremented for task %p\n",
gtid, counter, taskdata));
}
if (UNLIKELY(taskdata->td_flags.task_serial)) {
KA_TRACE(20, ("__kmp_push_task: T#%d team serialized; returning "
"TASK_NOT_PUSHED for task %p\n",
gtid, taskdata));
return TASK_NOT_PUSHED;
}
KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
if (UNLIKELY(!KMP_TASKING_ENABLED(task_team))) {
__kmp_enable_tasking(task_team, thread);
}
KMP_DEBUG_ASSERT(TCR_4(task_team->tt.tt_found_tasks) == TRUE);
KMP_DEBUG_ASSERT(TCR_PTR(task_team->tt.tt_threads_data) != NULL);
if (taskdata->td_flags.priority_specified && task->data2.priority > 0 &&
__kmp_max_task_priority > 0) {
int pri = KMP_MIN(task->data2.priority, __kmp_max_task_priority);
return __kmp_push_priority_task(gtid, thread, taskdata, task_team, pri);
}
thread_data = &task_team->tt.tt_threads_data[tid];
if (UNLIKELY(thread_data->td.td_deque == NULL)) {
__kmp_alloc_task_deque(thread, thread_data);
}
int locked = 0;
if (TCR_4(thread_data->td.td_deque_ntasks) >=
TASK_DEQUE_SIZE(thread_data->td)) {
if (__kmp_enable_task_throttling &&
__kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
thread->th.th_current_task)) {
KA_TRACE(20, ("__kmp_push_task: T#%d deque is full; returning "
"TASK_NOT_PUSHED for task %p\n",
gtid, taskdata));
return TASK_NOT_PUSHED;
} else {
__kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
locked = 1;
if (TCR_4(thread_data->td.td_deque_ntasks) >=
TASK_DEQUE_SIZE(thread_data->td)) {
__kmp_realloc_task_deque(thread, thread_data);
}
}
}
if (!locked) {
__kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
if (TCR_4(thread_data->td.td_deque_ntasks) >=
TASK_DEQUE_SIZE(thread_data->td)) {
if (__kmp_enable_task_throttling &&
__kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
thread->th.th_current_task)) {
__kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
KA_TRACE(20, ("__kmp_push_task: T#%d deque is full on 2nd check; "
"returning TASK_NOT_PUSHED for task %p\n",
gtid, taskdata));
return TASK_NOT_PUSHED;
} else {
__kmp_realloc_task_deque(thread, thread_data);
}
}
}
KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) <
TASK_DEQUE_SIZE(thread_data->td));
thread_data->td.td_deque[thread_data->td.td_deque_tail] =
taskdata;
thread_data->td.td_deque_tail =
(thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
TCW_4(thread_data->td.td_deque_ntasks,
TCR_4(thread_data->td.td_deque_ntasks) + 1);
KMP_FSYNC_RELEASING(thread->th.th_current_task);
KMP_FSYNC_RELEASING(taskdata);
KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
"task=%p ntasks=%d head=%u tail=%u\n",
gtid, taskdata, thread_data->td.td_deque_ntasks,
thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
__kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
return TASK_SUCCESSFULLY_PUSHED;
}
void __kmp_pop_current_task_from_thread(kmp_info_t *this_thr) {
KF_TRACE(10, ("__kmp_pop_current_task_from_thread(enter): T#%d "
"this_thread=%p, curtask=%p, "
"curtask_parent=%p\n",
0, this_thr, this_thr->th.th_current_task,
this_thr->th.th_current_task->td_parent));
this_thr->th.th_current_task = this_thr->th.th_current_task->td_parent;
KF_TRACE(10, ("__kmp_pop_current_task_from_thread(exit): T#%d "
"this_thread=%p, curtask=%p, "
"curtask_parent=%p\n",
0, this_thr, this_thr->th.th_current_task,
this_thr->th.th_current_task->td_parent));
}
void __kmp_push_current_task_to_thread(kmp_info_t *this_thr, kmp_team_t *team,
int tid) {
KF_TRACE(10, ("__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p "
"curtask=%p "
"parent_task=%p\n",
tid, this_thr, this_thr->th.th_current_task,
team->t.t_implicit_task_taskdata[tid].td_parent));
KMP_DEBUG_ASSERT(this_thr != NULL);
if (tid == 0) {
if (this_thr->th.th_current_task != &team->t.t_implicit_task_taskdata[0]) {
team->t.t_implicit_task_taskdata[0].td_parent =
this_thr->th.th_current_task;
this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[0];
}
} else {
team->t.t_implicit_task_taskdata[tid].td_parent =
team->t.t_implicit_task_taskdata[0].td_parent;
this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[tid];
}
KF_TRACE(10, ("__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p "
"curtask=%p "
"parent_task=%p\n",
tid, this_thr, this_thr->th.th_current_task,
team->t.t_implicit_task_taskdata[tid].td_parent));
}
static void __kmp_task_start(kmp_int32 gtid, kmp_task_t *task,
kmp_taskdata_t *current_task) {
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
kmp_info_t *thread = __kmp_threads[gtid];
KA_TRACE(10,
("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
gtid, taskdata, current_task));
KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
current_task->td_flags.executing = 0;
#ifdef BUILD_TIED_TASK_STACK
if (taskdata->td_flags.tiedness == TASK_TIED) {
__kmp_push_task_stack(gtid, thread, taskdata);
}
#endif
thread->th.th_current_task = taskdata;
KMP_DEBUG_ASSERT(taskdata->td_flags.started == 0 ||
taskdata->td_flags.tiedness == TASK_UNTIED);
KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0 ||
taskdata->td_flags.tiedness == TASK_UNTIED);
taskdata->td_flags.started = 1;
taskdata->td_flags.executing = 1;
KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n", gtid, taskdata));
return;
}
#if OMPT_SUPPORT
static inline void __ompt_task_init(kmp_taskdata_t *task, int tid) {
task->ompt_task_info.task_data.value = 0;
task->ompt_task_info.frame.exit_frame = ompt_data_none;
task->ompt_task_info.frame.enter_frame = ompt_data_none;
task->ompt_task_info.frame.exit_frame_flags =
ompt_frame_runtime | ompt_frame_framepointer;
task->ompt_task_info.frame.enter_frame_flags =
ompt_frame_runtime | ompt_frame_framepointer;
task->ompt_task_info.dispatch_chunk.start = 0;
task->ompt_task_info.dispatch_chunk.iterations = 0;
}
static inline void __ompt_task_start(kmp_task_t *task,
kmp_taskdata_t *current_task,
kmp_int32 gtid) {
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
ompt_task_status_t status = ompt_task_switch;
if (__kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded) {
status = ompt_task_yield;
__kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded = 0;
}
if (ompt_enabled.ompt_callback_task_schedule) {
ompt_callbacks.ompt_callback(ompt_callback_task_schedule)(
&(current_task->ompt_task_info.task_data), status,
&(taskdata->ompt_task_info.task_data));
}
taskdata->ompt_task_info.scheduling_parent = current_task;
}
static inline void __ompt_task_finish(kmp_task_t *task,
kmp_taskdata_t *resumed_task,
ompt_task_status_t status) {
if (ompt_enabled.ompt_callback_task_schedule) {
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
if (__kmp_omp_cancellation && taskdata->td_taskgroup &&
taskdata->td_taskgroup->cancel_request == cancel_taskgroup) {
status = ompt_task_cancel;
}
ompt_callbacks.ompt_callback(ompt_callback_task_schedule)(
&(taskdata->ompt_task_info.task_data), status,
(resumed_task ? &(resumed_task->ompt_task_info.task_data) : NULL));
}
}
#endif
template <bool ompt>
static void __kmpc_omp_task_begin_if0_template(ident_t *loc_ref, kmp_int32 gtid,
kmp_task_t *task,
void *frame_address,
void *return_address) {
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p "
"current_task=%p\n",
gtid, loc_ref, taskdata, current_task));
if (UNLIKELY(taskdata->td_flags.tiedness == TASK_UNTIED)) {
kmp_int32 counter = 1 + KMP_ATOMIC_INC(&taskdata->td_untied_count);
KMP_DEBUG_USE_VAR(counter);
KA_TRACE(20, ("__kmpc_omp_task_begin_if0: T#%d untied_count (%d) "
"incremented for task %p\n",
gtid, counter, taskdata));
}
taskdata->td_flags.task_serial =
1;
__kmp_task_start(gtid, task, current_task);
#if OMPT_SUPPORT
if (ompt) {
if (current_task->ompt_task_info.frame.enter_frame.ptr == NULL) {
current_task->ompt_task_info.frame.enter_frame.ptr =
taskdata->ompt_task_info.frame.exit_frame.ptr = frame_address;
current_task->ompt_task_info.frame.enter_frame_flags =
taskdata->ompt_task_info.frame.exit_frame_flags =
ompt_frame_application | ompt_frame_framepointer;
}
if (ompt_enabled.ompt_callback_task_create) {
ompt_task_info_t *parent_info = &(current_task->ompt_task_info);
ompt_callbacks.ompt_callback(ompt_callback_task_create)(
&(parent_info->task_data), &(parent_info->frame),
&(taskdata->ompt_task_info.task_data),
TASK_TYPE_DETAILS_FORMAT(taskdata), 0, return_address);
}
__ompt_task_start(task, current_task, gtid);
}
#endif
KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n", gtid,
loc_ref, taskdata));
}
#if OMPT_SUPPORT
OMPT_NOINLINE
static void __kmpc_omp_task_begin_if0_ompt(ident_t *loc_ref, kmp_int32 gtid,
kmp_task_t *task,
void *frame_address,
void *return_address) {
__kmpc_omp_task_begin_if0_template<true>(loc_ref, gtid, task, frame_address,
return_address);
}
#endif
#ifdef __s390x__
__attribute__((target("backchain")))
#endif
void __kmpc_omp_task_begin_if0(ident_t *loc_ref, kmp_int32 gtid,
kmp_task_t *task) {
#if OMPT_SUPPORT
if (UNLIKELY(ompt_enabled.enabled)) {
OMPT_STORE_RETURN_ADDRESS(gtid);
__kmpc_omp_task_begin_if0_ompt(loc_ref, gtid, task,
OMPT_GET_FRAME_ADDRESS(1),
OMPT_LOAD_RETURN_ADDRESS(gtid));
return;
}
#endif
__kmpc_omp_task_begin_if0_template<false>(loc_ref, gtid, task, NULL, NULL);
}
#ifdef TASK_UNUSED
void __kmpc_omp_task_begin(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task) {
kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
KA_TRACE(
10,
("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task));
__kmp_task_start(gtid, task, current_task);
KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n", gtid,
loc_ref, KMP_TASK_TO_TASKDATA(task)));
return;
}
#endif
static void __kmp_free_task(kmp_int32 gtid, kmp_taskdata_t *taskdata,
kmp_info_t *thread) {
KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n", gtid,
taskdata));
KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0);
KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 1);
KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
KMP_DEBUG_ASSERT(taskdata->td_allocated_child_tasks == 0 ||
taskdata->td_flags.task_serial == 1);
KMP_DEBUG_ASSERT(taskdata->td_incomplete_child_tasks == 0);
kmp_task_t *task = KMP_TASKDATA_TO_TASK(taskdata);
task->data1.destructors = NULL;
task->data2.priority = 0;
taskdata->td_flags.freed = 1;
#if OMPX_TASKGRAPH
if (!taskdata->is_taskgraph) {
#endif
#if USE_FAST_MEMORY
__kmp_fast_free(thread, taskdata);
#else
__kmp_thread_free(thread, taskdata);
#endif
#if OMPX_TASKGRAPH
} else {
taskdata->td_flags.complete = 0;
taskdata->td_flags.started = 0;
taskdata->td_flags.freed = 0;
taskdata->td_flags.executing = 0;
taskdata->td_flags.task_serial =
(taskdata->td_parent->td_flags.final ||
taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser);
KMP_ATOMIC_ST_RLX(&taskdata->td_untied_count, 0);
KMP_ATOMIC_ST_RLX(&taskdata->td_incomplete_child_tasks, 0);
KMP_ATOMIC_ST_RLX(&taskdata->td_allocated_child_tasks, 1);
}
#endif
KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n", gtid, taskdata));
}
static void __kmp_free_task_and_ancestors(kmp_int32 gtid,
kmp_taskdata_t *taskdata,
kmp_info_t *thread) {
kmp_int32 team_serial =
(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) &&
!taskdata->td_flags.proxy;
KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
kmp_int32 children = KMP_ATOMIC_DEC(&taskdata->td_allocated_child_tasks) - 1;
KMP_DEBUG_ASSERT(children >= 0);
while (children == 0) {
kmp_taskdata_t *parent_taskdata = taskdata->td_parent;
KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
"and freeing itself\n",
gtid, taskdata));
__kmp_free_task(gtid, taskdata, thread);
taskdata = parent_taskdata;
if (team_serial)
return;
if (taskdata->td_flags.tasktype == TASK_IMPLICIT) {
if (taskdata->td_dephash) {
int children = KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks);
kmp_tasking_flags_t flags_old = taskdata->td_flags;
if (children == 0 && flags_old.complete == 1) {
kmp_tasking_flags_t flags_new = flags_old;
flags_new.complete = 0;
if (KMP_COMPARE_AND_STORE_ACQ32(
RCAST(kmp_int32 *, &taskdata->td_flags),
*RCAST(kmp_int32 *, &flags_old),
*RCAST(kmp_int32 *, &flags_new))) {
KA_TRACE(100, ("__kmp_free_task_and_ancestors: T#%d cleans "
"dephash of implicit task %p\n",
gtid, taskdata));
__kmp_dephash_free_entries(thread, taskdata->td_dephash);
}
}
}
return;
}
children = KMP_ATOMIC_DEC(&taskdata->td_allocated_child_tasks) - 1;
KMP_DEBUG_ASSERT(children >= 0);
}
KA_TRACE(
20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
"not freeing it yet\n",
gtid, taskdata, children));
}
static bool __kmp_track_children_task(kmp_taskdata_t *taskdata) {
kmp_tasking_flags_t flags = taskdata->td_flags;
bool ret = !(flags.team_serial || flags.tasking_ser);
ret = ret || flags.proxy == TASK_PROXY ||
flags.detachable == TASK_DETACHABLE || flags.hidden_helper;
ret = ret ||
KMP_ATOMIC_LD_ACQ(&taskdata->td_parent->td_incomplete_child_tasks) > 0;
#if OMPX_TASKGRAPH
if (taskdata->td_taskgroup && taskdata->is_taskgraph)
ret = ret || KMP_ATOMIC_LD_ACQ(&taskdata->td_taskgroup->count) > 0;
#endif
return ret;
}
template <bool ompt>
static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
kmp_taskdata_t *resumed_task) {
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
kmp_info_t *thread = __kmp_threads[gtid];
kmp_task_team_t *task_team =
thread->th.th_task_team;
#if OMPX_TASKGRAPH
bool is_taskgraph;
#endif
#if KMP_DEBUG
kmp_int32 children = 0;
#endif
KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming "
"task %p\n",
gtid, taskdata, resumed_task));
KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
#if OMPX_TASKGRAPH
is_taskgraph = taskdata->is_taskgraph;
#endif
#ifdef BUILD_TIED_TASK_STACK
if (taskdata->td_flags.tiedness == TASK_TIED) {
__kmp_pop_task_stack(gtid, thread, taskdata);
}
#endif
if (UNLIKELY(taskdata->td_flags.tiedness == TASK_UNTIED)) {
kmp_int32 counter = KMP_ATOMIC_DEC(&taskdata->td_untied_count) - 1;
KA_TRACE(
20,
("__kmp_task_finish: T#%d untied_count (%d) decremented for task %p\n",
gtid, counter, taskdata));
if (counter > 0) {
if (resumed_task == NULL) {
KMP_DEBUG_ASSERT(taskdata->td_flags.task_serial);
resumed_task = taskdata->td_parent;
}
thread->th.th_current_task = resumed_task;
resumed_task->td_flags.executing = 1;
KA_TRACE(10, ("__kmp_task_finish(exit): T#%d partially done task %p, "
"resuming task %p\n",
gtid, taskdata, resumed_task));
return;
}
}
KMP_DEBUG_ASSERT(
(taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
taskdata->td_flags.task_serial);
if (taskdata->td_flags.task_serial) {
if (resumed_task == NULL) {
resumed_task = taskdata->td_parent;
}
} else {
KMP_DEBUG_ASSERT(resumed_task !=
NULL);
}
destructor thunk that has been generated by the compiler. The code is
placed here, since at this point other tasks might have been released
hence overlapping the destructor invocations with some other work in the
released tasks. The OpenMP spec is not specific on when the destructors
are invoked, so we should be free to choose. */
if (UNLIKELY(taskdata->td_flags.destructors_thunk)) {
kmp_routine_entry_t destr_thunk = task->data1.destructors;
KMP_ASSERT(destr_thunk);
destr_thunk(gtid, task);
}
KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
KMP_DEBUG_ASSERT(taskdata->td_flags.started == 1);
KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
bool completed = true;
if (UNLIKELY(taskdata->td_flags.detachable == TASK_DETACHABLE)) {
if (taskdata->td_allow_completion_event.type ==
KMP_EVENT_ALLOW_COMPLETION) {
__kmp_acquire_tas_lock(&taskdata->td_allow_completion_event.lock, gtid);
if (taskdata->td_allow_completion_event.type ==
KMP_EVENT_ALLOW_COMPLETION) {
KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1);
taskdata->td_flags.executing = 0;
#if OMPT_SUPPORT
if (ompt)
__ompt_task_finish(task, resumed_task, ompt_task_detach);
#endif
taskdata->td_flags.proxy = TASK_PROXY;
completed = false;
}
__kmp_release_tas_lock(&taskdata->td_allow_completion_event.lock, gtid);
}
}
if (taskdata->td_target_data.async_handle != NULL) {
#if OMPT_SUPPORT
if (ompt) {
__ompt_task_finish(task, resumed_task, ompt_task_switch);
}
#endif
__kmpc_give_task(task, __kmp_tid_from_gtid(gtid));
if (KMP_HIDDEN_HELPER_THREAD(gtid))
__kmp_hidden_helper_worker_thread_signal();
completed = false;
}
if (completed) {
taskdata->td_flags.complete = 1;
#if OMPX_TASKGRAPH
taskdata->td_flags.onced = 1;
#endif
#if OMPT_SUPPORT
if (ompt)
__ompt_task_finish(task, resumed_task, ompt_task_complete);
#endif
if (__kmp_track_children_task(taskdata)) {
__kmp_release_deps(gtid, taskdata);
#if KMP_DEBUG
children = -1 +
#endif
KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks);
KMP_DEBUG_ASSERT(children >= 0);
#if OMPX_TASKGRAPH
if (taskdata->td_taskgroup && !taskdata->is_taskgraph)
#else
if (taskdata->td_taskgroup)
#endif
KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count);
} else if (task_team && (task_team->tt.tt_found_proxy_tasks ||
task_team->tt.tt_hidden_helper_task_encountered)) {
__kmp_release_deps(gtid, taskdata);
}
KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1);
taskdata->td_flags.executing = 0;
if (taskdata->td_flags.hidden_helper) {
KMP_ASSERT(KMP_HIDDEN_HELPER_THREAD(gtid));
KMP_ATOMIC_DEC(&__kmp_unexecuted_hidden_helper_tasks);
}
}
KA_TRACE(
20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
gtid, taskdata, children));
thread->th.th_current_task = resumed_task;
if (completed)
__kmp_free_task_and_ancestors(gtid, taskdata, thread);
resumed_task->td_flags.executing = 1;
#if OMPX_TASKGRAPH
if (is_taskgraph && __kmp_track_children_task(taskdata) &&
taskdata->td_taskgroup) {
KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count);
}
#endif
KA_TRACE(
10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
gtid, taskdata, resumed_task));
return;
}
template <bool ompt>
static void __kmpc_omp_task_complete_if0_template(ident_t *loc_ref,
kmp_int32 gtid,
kmp_task_t *task) {
KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)));
KMP_DEBUG_ASSERT(gtid >= 0);
__kmp_task_finish<ompt>(gtid, task, NULL);
KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)));
#if OMPT_SUPPORT
if (ompt) {
ompt_frame_t *ompt_frame;
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
ompt_frame->enter_frame = ompt_data_none;
ompt_frame->enter_frame_flags =
ompt_frame_runtime | ompt_frame_framepointer;
}
#endif
return;
}
#if OMPT_SUPPORT
OMPT_NOINLINE
void __kmpc_omp_task_complete_if0_ompt(ident_t *loc_ref, kmp_int32 gtid,
kmp_task_t *task) {
__kmpc_omp_task_complete_if0_template<true>(loc_ref, gtid, task);
}
#endif
void __kmpc_omp_task_complete_if0(ident_t *loc_ref, kmp_int32 gtid,
kmp_task_t *task) {
#if OMPT_SUPPORT
if (UNLIKELY(ompt_enabled.enabled)) {
__kmpc_omp_task_complete_if0_ompt(loc_ref, gtid, task);
return;
}
#endif
__kmpc_omp_task_complete_if0_template<false>(loc_ref, gtid, task);
}
#ifdef TASK_UNUSED
void __kmpc_omp_task_complete(ident_t *loc_ref, kmp_int32 gtid,
kmp_task_t *task) {
KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n", gtid,
loc_ref, KMP_TASK_TO_TASKDATA(task)));
__kmp_task_finish<false>(gtid, task,
NULL);
KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n", gtid,
loc_ref, KMP_TASK_TO_TASKDATA(task)));
return;
}
#endif
void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr,
kmp_team_t *team, int tid, int set_curr_task) {
kmp_taskdata_t *task = &team->t.t_implicit_task_taskdata[tid];
KF_TRACE(
10,
("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
tid, team, task, set_curr_task ? "TRUE" : "FALSE"));
task->td_task_id = KMP_GEN_TASK_ID();
task->td_team = team;
task->td_ident = loc_ref;
task->td_taskwait_ident = NULL;
task->td_taskwait_counter = 0;
task->td_taskwait_thread = 0;
task->td_flags.tiedness = TASK_TIED;
task->td_flags.tasktype = TASK_IMPLICIT;
task->td_flags.proxy = TASK_FULL;
task->td_flags.task_serial = 1;
task->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
task->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0;
task->td_flags.started = 1;
task->td_flags.executing = 1;
task->td_flags.complete = 0;
task->td_flags.freed = 0;
#if OMPX_TASKGRAPH
task->td_flags.onced = 0;
#endif
task->td_depnode = NULL;
task->td_last_tied = task;
task->td_allow_completion_event.type = KMP_EVENT_UNINITIALIZED;
if (set_curr_task) {
KMP_ATOMIC_ST_REL(&task->td_incomplete_child_tasks, 0);
KMP_ATOMIC_ST_REL(&task->td_allocated_child_tasks, 0);
task->td_taskgroup = NULL;
task->td_dephash = NULL;
__kmp_push_current_task_to_thread(this_thr, team, tid);
} else {
KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
}
#if OMPT_SUPPORT
if (UNLIKELY(ompt_enabled.enabled))
__ompt_task_init(task, tid);
#endif
KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n", tid,
team, task));
}
void __kmp_finish_implicit_task(kmp_info_t *thread) {
kmp_taskdata_t *task = thread->th.th_current_task;
if (task->td_dephash) {
int children;
task->td_flags.complete = 1;
#if OMPX_TASKGRAPH
task->td_flags.onced = 1;
#endif
children = KMP_ATOMIC_LD_ACQ(&task->td_incomplete_child_tasks);
kmp_tasking_flags_t flags_old = task->td_flags;
if (children == 0 && flags_old.complete == 1) {
kmp_tasking_flags_t flags_new = flags_old;
flags_new.complete = 0;
if (KMP_COMPARE_AND_STORE_ACQ32(RCAST(kmp_int32 *, &task->td_flags),
*RCAST(kmp_int32 *, &flags_old),
*RCAST(kmp_int32 *, &flags_new))) {
KA_TRACE(100, ("__kmp_finish_implicit_task: T#%d cleans "
"dephash of implicit task %p\n",
thread->th.th_info.ds.ds_gtid, task));
__kmp_dephash_free_entries(thread, task->td_dephash);
}
}
}
}
void __kmp_free_implicit_task(kmp_info_t *thread) {
kmp_taskdata_t *task = thread->th.th_current_task;
if (task && task->td_dephash) {
__kmp_dephash_free(thread, task->td_dephash);
task->td_dephash = NULL;
}
}
static size_t __kmp_round_up_to_val(size_t size, size_t val) {
if (size & (val - 1)) {
size &= ~(val - 1);
if (size <= KMP_SIZE_T_MAX - val) {
size += val;
}
}
return size;
}
kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
kmp_tasking_flags_t *flags,
size_t sizeof_kmp_task_t, size_t sizeof_shareds,
kmp_routine_entry_t task_entry) {
kmp_task_t *task;
kmp_taskdata_t *taskdata;
kmp_info_t *thread = __kmp_threads[gtid];
kmp_team_t *team = thread->th.th_team;
kmp_taskdata_t *parent_task = thread->th.th_current_task;
size_t shareds_offset;
if (UNLIKELY(!TCR_4(__kmp_init_middle)))
__kmp_middle_initialize();
if (flags->hidden_helper) {
if (__kmp_enable_hidden_helper) {
if (!TCR_4(__kmp_init_hidden_helper))
__kmp_hidden_helper_initialize();
} else {
flags->hidden_helper = FALSE;
}
}
KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
"sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
sizeof_shareds, task_entry));
KMP_DEBUG_ASSERT(parent_task);
if (parent_task->td_flags.final) {
if (flags->merged_if0) {
}
flags->final = 1;
}
if (flags->tiedness == TASK_UNTIED && !team->t.t_serialized) {
KMP_CHECK_UPDATE(thread->th.th_task_team->tt.tt_untied_task_encountered, 1);
}
if (UNLIKELY(flags->proxy == TASK_PROXY ||
flags->detachable == TASK_DETACHABLE || flags->hidden_helper)) {
if (flags->proxy == TASK_PROXY) {
flags->tiedness = TASK_UNTIED;
flags->merged_if0 = 1;
}
tasking support enabled */
if ((thread->th.th_task_team) == NULL) {
setup a task team and propagate it to the thread */
KMP_DEBUG_ASSERT(team->t.t_serialized);
KA_TRACE(30,
("T#%d creating task team in __kmp_task_alloc for proxy task\n",
gtid));
__kmp_task_team_setup(thread, team);
thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
}
kmp_task_team_t *task_team = thread->th.th_task_team;
if (!KMP_TASKING_ENABLED(task_team)) {
KA_TRACE(
30,
("T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
__kmp_enable_tasking(task_team, thread);
kmp_int32 tid = thread->th.th_info.ds.ds_tid;
kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
if (thread_data->td.td_deque == NULL) {
__kmp_alloc_task_deque(thread, thread_data);
}
}
if ((flags->proxy == TASK_PROXY || flags->detachable == TASK_DETACHABLE) &&
task_team->tt.tt_found_proxy_tasks == FALSE)
TCW_4(task_team->tt.tt_found_proxy_tasks, TRUE);
if (flags->hidden_helper &&
task_team->tt.tt_hidden_helper_task_encountered == FALSE)
TCW_4(task_team->tt.tt_hidden_helper_task_encountered, TRUE);
}
shareds_offset = sizeof(kmp_taskdata_t) + sizeof_kmp_task_t;
shareds_offset = __kmp_round_up_to_val(shareds_offset, sizeof(void *));
KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n", gtid,
shareds_offset));
KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n", gtid,
sizeof_shareds));
#if USE_FAST_MEMORY
taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, shareds_offset +
sizeof_shareds);
#else
taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, shareds_offset +
sizeof_shareds);
#endif
task = KMP_TASKDATA_TO_TASK(taskdata);
#if KMP_ARCH_X86 || KMP_ARCH_PPC64 || KMP_ARCH_S390X || !KMP_HAVE_QUAD
KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (sizeof(double) - 1)) == 0);
KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (sizeof(double) - 1)) == 0);
#else
KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (sizeof(_Quad) - 1)) == 0);
KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (sizeof(_Quad) - 1)) == 0);
#endif
if (sizeof_shareds > 0) {
task->shareds = &((char *)taskdata)[shareds_offset];
KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (sizeof(void *) - 1)) ==
0);
} else {
task->shareds = NULL;
}
task->routine = task_entry;
task->part_id = 0;
taskdata->td_task_id = KMP_GEN_TASK_ID();
taskdata->td_team = thread->th.th_team;
taskdata->td_alloc_thread = thread;
taskdata->td_parent = parent_task;
taskdata->td_level = parent_task->td_level + 1;
KMP_ATOMIC_ST_RLX(&taskdata->td_untied_count, 0);
taskdata->td_ident = loc_ref;
taskdata->td_taskwait_ident = NULL;
taskdata->td_taskwait_counter = 0;
taskdata->td_taskwait_thread = 0;
KMP_DEBUG_ASSERT(taskdata->td_parent != NULL);
if (flags->proxy == TASK_FULL)
copy_icvs(&taskdata->td_icvs, &taskdata->td_parent->td_icvs);
taskdata->td_flags = *flags;
taskdata->td_task_team = thread->th.th_task_team;
taskdata->td_size_alloc = shareds_offset + sizeof_shareds;
taskdata->td_flags.tasktype = TASK_EXPLICIT;
if (flags->hidden_helper) {
kmp_info_t *shadow_thread = __kmp_threads[KMP_GTID_TO_SHADOW_GTID(gtid)];
taskdata->td_team = shadow_thread->th.th_team;
taskdata->td_task_team = shadow_thread->th.th_task_team;
}
taskdata->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
taskdata->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0;
taskdata->td_flags.task_serial =
(parent_task->td_flags.final || taskdata->td_flags.team_serial ||
taskdata->td_flags.tasking_ser || flags->merged_if0);
taskdata->td_flags.started = 0;
taskdata->td_flags.executing = 0;
taskdata->td_flags.complete = 0;
taskdata->td_flags.freed = 0;
#if OMPX_TASKGRAPH
taskdata->td_flags.onced = 0;
#endif
KMP_ATOMIC_ST_RLX(&taskdata->td_incomplete_child_tasks, 0);
KMP_ATOMIC_ST_RLX(&taskdata->td_allocated_child_tasks, 1);
taskdata->td_taskgroup =
parent_task->td_taskgroup;
taskdata->td_dephash = NULL;
taskdata->td_depnode = NULL;
taskdata->td_target_data.async_handle = NULL;
if (flags->tiedness == TASK_UNTIED)
taskdata->td_last_tied = NULL;
else
taskdata->td_last_tied = taskdata;
taskdata->td_allow_completion_event.type = KMP_EVENT_UNINITIALIZED;
#if OMPT_SUPPORT
if (UNLIKELY(ompt_enabled.enabled))
__ompt_task_init(taskdata, gtid);
#endif
if (__kmp_track_children_task(taskdata)) {
KMP_ATOMIC_INC(&parent_task->td_incomplete_child_tasks);
if (parent_task->td_taskgroup)
KMP_ATOMIC_INC(&parent_task->td_taskgroup->count);
if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT) {
KMP_ATOMIC_INC(&taskdata->td_parent->td_allocated_child_tasks);
}
if (flags->hidden_helper) {
taskdata->td_flags.task_serial = FALSE;
KMP_ATOMIC_INC(&__kmp_unexecuted_hidden_helper_tasks);
}
}
#if OMPX_TASKGRAPH
kmp_tdg_info_t *tdg = __kmp_find_tdg(__kmp_curr_tdg_idx);
if (tdg && __kmp_tdg_is_recording(tdg->tdg_status) &&
(task_entry != (kmp_routine_entry_t)__kmp_taskloop_task)) {
taskdata->is_taskgraph = 1;
taskdata->tdg = __kmp_global_tdgs[__kmp_curr_tdg_idx];
taskdata->td_task_id = KMP_ATOMIC_INC(&__kmp_tdg_task_id);
}
#endif
KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
gtid, taskdata, taskdata->td_parent));
return task;
}
kmp_task_t *__kmpc_omp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
kmp_int32 flags, size_t sizeof_kmp_task_t,
size_t sizeof_shareds,
kmp_routine_entry_t task_entry) {
kmp_task_t *retval;
kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *)&flags;
__kmp_assert_valid_gtid(gtid);
input_flags->native = FALSE;
KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s %s) "
"sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
input_flags->proxy ? "proxy" : "",
input_flags->detachable ? "detachable" : "", sizeof_kmp_task_t,
sizeof_shareds, task_entry));
retval = __kmp_task_alloc(loc_ref, gtid, input_flags, sizeof_kmp_task_t,
sizeof_shareds, task_entry);
KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval));
return retval;
}
kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
kmp_int32 flags,
size_t sizeof_kmp_task_t,
size_t sizeof_shareds,
kmp_routine_entry_t task_entry,
kmp_int64 device_id) {
auto &input_flags = reinterpret_cast<kmp_tasking_flags_t &>(flags);
input_flags.tiedness = TASK_UNTIED;
input_flags.target = 1;
if (__kmp_enable_hidden_helper)
input_flags.hidden_helper = TRUE;
return __kmpc_omp_task_alloc(loc_ref, gtid, flags, sizeof_kmp_task_t,
sizeof_shareds, task_entry);
}
@ingroup TASKING
@param loc_ref location of the original task directive
@param gtid Global Thread ID of encountering thread
@param new_task task thunk allocated by __kmpc_omp_task_alloc() for the ''new
task''
@param naffins Number of affinity items
@param affin_list List of affinity items
@return Returns non-zero if registering affinity information was not successful.
Returns 0 if registration was successful
This entry registers the affinity information attached to a task with the task
thunk structure kmp_taskdata_t.
*/
kmp_int32
__kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, kmp_int32 gtid,
kmp_task_t *new_task, kmp_int32 naffins,
kmp_task_affinity_info_t *affin_list) {
return 0;
}
#ifdef __s390x__
__attribute__((target("backchain")))
#endif
static void
__kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
kmp_taskdata_t *current_task) {
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
kmp_info_t *thread;
int discard = 0 ;
KA_TRACE(
30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
gtid, taskdata, current_task));
KMP_DEBUG_ASSERT(task);
if (UNLIKELY(taskdata->td_flags.proxy == TASK_PROXY &&
taskdata->td_flags.complete == 1)) {
KA_TRACE(
30,
("__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n",
gtid, taskdata));
__kmp_bottom_half_finish_proxy(gtid, task);
KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed bottom finish for "
"proxy task %p, resuming task %p\n",
gtid, taskdata, current_task));
return;
}
#if OMPT_SUPPORT
ompt_thread_info_t oldInfo;
if (UNLIKELY(ompt_enabled.enabled)) {
thread = __kmp_threads[gtid];
oldInfo = thread->th.ompt_thread_info;
thread->th.ompt_thread_info.wait_id = 0;
thread->th.ompt_thread_info.state = (thread->th.th_team_serialized)
? ompt_state_work_serial
: ompt_state_work_parallel;
taskdata->ompt_task_info.frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
}
#endif
if (taskdata->td_flags.proxy != TASK_PROXY) {
__kmp_task_start(gtid, task, current_task);
}
if (UNLIKELY(__kmp_omp_cancellation)) {
thread = __kmp_threads[gtid];
kmp_team_t *this_team = thread->th.th_team;
kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
if ((taskgroup && taskgroup->cancel_request) ||
(this_team->t.t_cancel_request == cancel_parallel)) {
#if OMPT_SUPPORT && OMPT_OPTIONAL
ompt_data_t *task_data;
if (UNLIKELY(ompt_enabled.ompt_callback_cancel)) {
__ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL);
ompt_callbacks.ompt_callback(ompt_callback_cancel)(
task_data,
((taskgroup && taskgroup->cancel_request) ? ompt_cancel_taskgroup
: ompt_cancel_parallel) |
ompt_cancel_discarded_task,
NULL);
}
#endif
KMP_COUNT_BLOCK(TASK_cancelled);
discard = 1 ;
}
}
if (!discard) {
if (taskdata->td_flags.tiedness == TASK_UNTIED) {
taskdata->td_last_tied = current_task->td_last_tied;
KMP_DEBUG_ASSERT(taskdata->td_last_tied);
}
#if KMP_STATS_ENABLED
KMP_COUNT_BLOCK(TASK_executed);
switch (KMP_GET_THREAD_STATE()) {
case FORK_JOIN_BARRIER:
KMP_PUSH_PARTITIONED_TIMER(OMP_task_join_bar);
break;
case PLAIN_BARRIER:
KMP_PUSH_PARTITIONED_TIMER(OMP_task_plain_bar);
break;
case TASKYIELD:
KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskyield);
break;
case TASKWAIT:
KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskwait);
break;
case TASKGROUP:
KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskgroup);
break;
default:
KMP_PUSH_PARTITIONED_TIMER(OMP_task_immediate);
break;
}
#endif
#if OMPT_SUPPORT
if (UNLIKELY(ompt_enabled.enabled))
__ompt_task_start(task, current_task, gtid);
#endif
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (UNLIKELY(ompt_enabled.ompt_callback_dispatch &&
taskdata->ompt_task_info.dispatch_chunk.iterations > 0)) {
ompt_data_t instance = ompt_data_none;
instance.ptr = &(taskdata->ompt_task_info.dispatch_chunk);
ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
ompt_callbacks.ompt_callback(ompt_callback_dispatch)(
&(team_info->parallel_data), &(taskdata->ompt_task_info.task_data),
ompt_dispatch_taskloop_chunk, instance);
taskdata->ompt_task_info.dispatch_chunk = {0, 0};
}
#endif
#if OMPD_SUPPORT
if (ompd_state & OMPD_ENABLE_BP)
ompd_bp_task_begin();
#endif
#if USE_ITT_BUILD && USE_ITT_NOTIFY
kmp_uint64 cur_time;
kmp_int32 kmp_itt_count_task =
__kmp_forkjoin_frames_mode == 3 && !taskdata->td_flags.task_serial &&
current_task->td_flags.tasktype == TASK_IMPLICIT;
if (kmp_itt_count_task) {
thread = __kmp_threads[gtid];
if (thread->th.th_bar_arrive_time)
cur_time = __itt_get_timestamp();
else
kmp_itt_count_task = 0;
}
KMP_FSYNC_ACQUIRED(taskdata);
#endif
#if ENABLE_LIBOMPTARGET
if (taskdata->td_target_data.async_handle != NULL) {
KMP_ASSERT(tgt_target_nowait_query);
tgt_target_nowait_query(&taskdata->td_target_data.async_handle);
} else
#endif
if (task->routine != NULL) {
#ifdef KMP_GOMP_COMPAT
if (taskdata->td_flags.native) {
((void (*)(void *))(*(task->routine)))(task->shareds);
} else
#endif
{
(*(task->routine))(gtid, task);
}
}
KMP_POP_PARTITIONED_TIMER();
#if USE_ITT_BUILD && USE_ITT_NOTIFY
if (kmp_itt_count_task) {
thread->th.th_bar_arrive_time += (__itt_get_timestamp() - cur_time);
}
KMP_FSYNC_CANCEL(taskdata);
KMP_FSYNC_RELEASING(taskdata->td_parent);
#endif
}
#if OMPD_SUPPORT
if (ompd_state & OMPD_ENABLE_BP)
ompd_bp_task_end();
#endif
if (taskdata->td_flags.proxy != TASK_PROXY) {
#if OMPT_SUPPORT
if (UNLIKELY(ompt_enabled.enabled)) {
thread->th.ompt_thread_info = oldInfo;
if (taskdata->td_flags.tiedness == TASK_TIED) {
taskdata->ompt_task_info.frame.exit_frame = ompt_data_none;
}
__kmp_task_finish<true>(gtid, task, current_task);
} else
#endif
__kmp_task_finish<false>(gtid, task, current_task);
}
#if OMPT_SUPPORT
else if (UNLIKELY(ompt_enabled.enabled && taskdata->td_flags.target)) {
__ompt_task_finish(task, current_task, ompt_task_switch);
}
#endif
KA_TRACE(
30,
("__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n",
gtid, taskdata, current_task));
return;
}
kmp_int32 __kmpc_omp_task_parts(ident_t *loc_ref, kmp_int32 gtid,
kmp_task_t *new_task) {
kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n", gtid,
loc_ref, new_taskdata));
#if OMPT_SUPPORT
kmp_taskdata_t *parent;
if (UNLIKELY(ompt_enabled.enabled)) {
parent = new_taskdata->td_parent;
if (ompt_enabled.ompt_callback_task_create) {
ompt_callbacks.ompt_callback(ompt_callback_task_create)(
&(parent->ompt_task_info.task_data), &(parent->ompt_task_info.frame),
&(new_taskdata->ompt_task_info.task_data),
TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0,
OMPT_GET_RETURN_ADDRESS(0));
}
}
#endif
to queue it. If the queue fills up, then we'll execute it. */
if (__kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED)
{
kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
new_taskdata->td_flags.task_serial = 1;
__kmp_invoke_task(gtid, new_task, current_task);
}
KA_TRACE(
10,
("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
"loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n",
gtid, loc_ref, new_taskdata));
#if OMPT_SUPPORT
if (UNLIKELY(ompt_enabled.enabled)) {
parent->ompt_task_info.frame.enter_frame = ompt_data_none;
}
#endif
return TASK_CURRENT_NOT_QUEUED;
}
kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
bool serialize_immediate) {
kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
#if OMPX_TASKGRAPH
if (new_taskdata->is_taskgraph &&
__kmp_tdg_is_recording(new_taskdata->tdg->tdg_status)) {
kmp_tdg_info_t *tdg = new_taskdata->tdg;
if (new_taskdata->td_task_id >= new_taskdata->tdg->map_size) {
__kmp_acquire_bootstrap_lock(&tdg->graph_lock);
if (new_taskdata->td_task_id >= tdg->map_size) {
kmp_uint old_size = tdg->map_size;
kmp_uint new_size = old_size * 2;
kmp_node_info_t *old_record = tdg->record_map;
kmp_node_info_t *new_record = (kmp_node_info_t *)__kmp_allocate(
new_size * sizeof(kmp_node_info_t));
KMP_MEMCPY(new_record, old_record, old_size * sizeof(kmp_node_info_t));
tdg->record_map = new_record;
__kmp_free(old_record);
for (kmp_int i = old_size; i < new_size; i++) {
kmp_int32 *successorsList = (kmp_int32 *)__kmp_allocate(
__kmp_successors_size * sizeof(kmp_int32));
new_record[i].task = nullptr;
new_record[i].successors = successorsList;
new_record[i].nsuccessors = 0;
new_record[i].npredecessors = 0;
new_record[i].successors_size = __kmp_successors_size;
KMP_ATOMIC_ST_REL(&new_record[i].npredecessors_counter, 0);
}
tdg->map_size = new_size;
}
__kmp_release_bootstrap_lock(&tdg->graph_lock);
}
if (tdg->record_map[new_taskdata->td_task_id].task == nullptr) {
tdg->record_map[new_taskdata->td_task_id].task = new_task;
tdg->record_map[new_taskdata->td_task_id].parent_task =
new_taskdata->td_parent;
KMP_ATOMIC_INC(&tdg->num_tasks);
}
}
#endif
to queue it. If the queue fills up, then we'll execute it. */
if (new_taskdata->td_flags.proxy == TASK_PROXY ||
__kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED)
{
kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
if (serialize_immediate)
new_taskdata->td_flags.task_serial = 1;
__kmp_invoke_task(gtid, new_task, current_task);
} else if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME &&
__kmp_wpolicy_passive) {
kmp_info_t *this_thr = __kmp_threads[gtid];
kmp_team_t *team = this_thr->th.th_team;
kmp_int32 nthreads = this_thr->th.th_team_nproc;
for (int i = 0; i < nthreads; ++i) {
kmp_info_t *thread = team->t.t_threads[i];
if (thread == this_thr)
continue;
if (thread->th.th_sleep_loc != NULL) {
__kmp_null_resume_wrapper(thread);
break;
}
}
}
return TASK_CURRENT_NOT_QUEUED;
}
kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid,
kmp_task_t *new_task) {
kmp_int32 res;
KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
#if KMP_DEBUG || OMPT_SUPPORT
kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
#endif
KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref,
new_taskdata));
__kmp_assert_valid_gtid(gtid);
#if OMPT_SUPPORT
kmp_taskdata_t *parent = NULL;
if (UNLIKELY(ompt_enabled.enabled)) {
if (!new_taskdata->td_flags.started) {
OMPT_STORE_RETURN_ADDRESS(gtid);
parent = new_taskdata->td_parent;
if (!parent->ompt_task_info.frame.enter_frame.ptr) {
parent->ompt_task_info.frame.enter_frame.ptr =
OMPT_GET_FRAME_ADDRESS(0);
}
if (ompt_enabled.ompt_callback_task_create) {
ompt_callbacks.ompt_callback(ompt_callback_task_create)(
&(parent->ompt_task_info.task_data),
&(parent->ompt_task_info.frame),
&(new_taskdata->ompt_task_info.task_data),
TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0,
OMPT_LOAD_RETURN_ADDRESS(gtid));
}
} else {
__ompt_task_finish(new_task,
new_taskdata->ompt_task_info.scheduling_parent,
ompt_task_switch);
new_taskdata->ompt_task_info.frame.exit_frame = ompt_data_none;
}
}
#endif
res = __kmp_omp_task(gtid, new_task, true);
KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning "
"TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
gtid, loc_ref, new_taskdata));
#if OMPT_SUPPORT
if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) {
parent->ompt_task_info.frame.enter_frame = ompt_data_none;
}
#endif
return res;
}
kmp_int32 __kmp_omp_taskloop_task(ident_t *loc_ref, kmp_int32 gtid,
kmp_task_t *new_task, void *codeptr_ra) {
kmp_int32 res;
KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
#if KMP_DEBUG || OMPT_SUPPORT
kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
#endif
KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref,
new_taskdata));
#if OMPT_SUPPORT
kmp_taskdata_t *parent = NULL;
if (UNLIKELY(ompt_enabled.enabled && !new_taskdata->td_flags.started)) {
parent = new_taskdata->td_parent;
if (!parent->ompt_task_info.frame.enter_frame.ptr)
parent->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
if (ompt_enabled.ompt_callback_task_create) {
ompt_callbacks.ompt_callback(ompt_callback_task_create)(
&(parent->ompt_task_info.task_data), &(parent->ompt_task_info.frame),
&(new_taskdata->ompt_task_info.task_data),
TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0, codeptr_ra);
}
}
#endif
res = __kmp_omp_task(gtid, new_task, true);
KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning "
"TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
gtid, loc_ref, new_taskdata));
#if OMPT_SUPPORT
if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) {
parent->ompt_task_info.frame.enter_frame = ompt_data_none;
}
#endif
return res;
}
template <bool ompt>
static kmp_int32 __kmpc_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid,
void *frame_address,
void *return_address) {
kmp_taskdata_t *taskdata = nullptr;
kmp_info_t *thread;
int thread_finished = FALSE;
KMP_SET_THREAD_STATE_BLOCK(TASKWAIT);
KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref));
KMP_DEBUG_ASSERT(gtid >= 0);
if (__kmp_tasking_mode != tskm_immediate_exec) {
thread = __kmp_threads[gtid];
taskdata = thread->th.th_current_task;
#if OMPT_SUPPORT && OMPT_OPTIONAL
ompt_data_t *my_task_data;
ompt_data_t *my_parallel_data;
if (ompt) {
my_task_data = &(taskdata->ompt_task_info.task_data);
my_parallel_data = OMPT_CUR_TEAM_DATA(thread);
taskdata->ompt_task_info.frame.enter_frame.ptr = frame_address;
if (ompt_enabled.ompt_callback_sync_region) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data,
my_task_data, return_address);
}
if (ompt_enabled.ompt_callback_sync_region_wait) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data,
my_task_data, return_address);
}
}
#endif
#if USE_ITT_BUILD
#endif
taskdata->td_taskwait_counter += 1;
taskdata->td_taskwait_ident = loc_ref;
taskdata->td_taskwait_thread = gtid + 1;
#if USE_ITT_BUILD
void *itt_sync_obj = NULL;
#if USE_ITT_NOTIFY
KMP_ITT_TASKWAIT_STARTING(itt_sync_obj);
#endif
#endif
bool must_wait =
!taskdata->td_flags.team_serial && !taskdata->td_flags.final;
must_wait = must_wait || (thread->th.th_task_team != NULL &&
thread->th.th_task_team->tt.tt_found_proxy_tasks);
must_wait =
must_wait ||
(__kmp_enable_hidden_helper && thread->th.th_task_team != NULL &&
thread->th.th_task_team->tt.tt_hidden_helper_task_encountered);
if (must_wait) {
kmp_flag_32<false, false> flag(
RCAST(std::atomic<kmp_uint32> *,
&(taskdata->td_incomplete_child_tasks)),
0U);
while (KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks) != 0) {
flag.execute_tasks(thread, gtid, FALSE,
&thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
__kmp_task_stealing_constraint);
}
}
#if USE_ITT_BUILD
KMP_ITT_TASKWAIT_FINISHED(itt_sync_obj);
KMP_FSYNC_ACQUIRED(taskdata);
#endif
taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt) {
if (ompt_enabled.ompt_callback_sync_region_wait) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data,
my_task_data, return_address);
}
if (ompt_enabled.ompt_callback_sync_region) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data,
my_task_data, return_address);
}
taskdata->ompt_task_info.frame.enter_frame = ompt_data_none;
}
#endif
}
KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
"returning TASK_CURRENT_NOT_QUEUED\n",
gtid, taskdata));
return TASK_CURRENT_NOT_QUEUED;
}
#if OMPT_SUPPORT && OMPT_OPTIONAL
OMPT_NOINLINE
static kmp_int32 __kmpc_omp_taskwait_ompt(ident_t *loc_ref, kmp_int32 gtid,
void *frame_address,
void *return_address) {
return __kmpc_omp_taskwait_template<true>(loc_ref, gtid, frame_address,
return_address);
}
#endif
kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid) {
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (UNLIKELY(ompt_enabled.enabled)) {
OMPT_STORE_RETURN_ADDRESS(gtid);
return __kmpc_omp_taskwait_ompt(loc_ref, gtid, OMPT_GET_FRAME_ADDRESS(0),
OMPT_LOAD_RETURN_ADDRESS(gtid));
}
#endif
return __kmpc_omp_taskwait_template<false>(loc_ref, gtid, NULL, NULL);
}
kmp_int32 __kmpc_omp_taskyield(ident_t *loc_ref, kmp_int32 gtid, int end_part) {
kmp_taskdata_t *taskdata = NULL;
kmp_info_t *thread;
int thread_finished = FALSE;
KMP_COUNT_BLOCK(OMP_TASKYIELD);
KMP_SET_THREAD_STATE_BLOCK(TASKYIELD);
KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
gtid, loc_ref, end_part));
__kmp_assert_valid_gtid(gtid);
if (__kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel) {
thread = __kmp_threads[gtid];
taskdata = thread->th.th_current_task;
#if USE_ITT_BUILD
#endif
taskdata->td_taskwait_counter += 1;
taskdata->td_taskwait_ident = loc_ref;
taskdata->td_taskwait_thread = gtid + 1;
#if USE_ITT_BUILD
void *itt_sync_obj = NULL;
#if USE_ITT_NOTIFY
KMP_ITT_TASKWAIT_STARTING(itt_sync_obj);
#endif
#endif
if (!taskdata->td_flags.team_serial) {
kmp_task_team_t *task_team = thread->th.th_task_team;
if (task_team != NULL) {
if (KMP_TASKING_ENABLED(task_team)) {
#if OMPT_SUPPORT
if (UNLIKELY(ompt_enabled.enabled))
thread->th.ompt_thread_info.ompt_task_yielded = 1;
#endif
__kmp_execute_tasks_32(
thread, gtid, (kmp_flag_32<> *)NULL, FALSE,
&thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
__kmp_task_stealing_constraint);
#if OMPT_SUPPORT
if (UNLIKELY(ompt_enabled.enabled))
thread->th.ompt_thread_info.ompt_task_yielded = 0;
#endif
}
}
}
#if USE_ITT_BUILD
KMP_ITT_TASKWAIT_FINISHED(itt_sync_obj);
#endif
taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
}
KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
"returning TASK_CURRENT_NOT_QUEUED\n",
gtid, taskdata));
return TASK_CURRENT_NOT_QUEUED;
}
@ingroup BASIC_TYPES
@{
*/
Flags for special info per task reduction item.
*/
typedef struct kmp_taskred_flags {
unsigned lazy_priv : 1;
unsigned reserved31 : 31;
} kmp_taskred_flags_t;
Internal struct for reduction data item related info set up by compiler.
*/
typedef struct kmp_task_red_input {
void *reduce_shar;
size_t reduce_size;
void *reduce_init;
void *reduce_fini;
void *reduce_comb;
kmp_taskred_flags_t flags;
} kmp_task_red_input_t;
Internal struct for reduction data item related info saved by the library.
*/
typedef struct kmp_taskred_data {
void *reduce_shar;
size_t reduce_size;
kmp_taskred_flags_t flags;
void *reduce_priv;
void *reduce_pend;
void *reduce_comb;
void *reduce_init;
void *reduce_fini;
void *reduce_orig;
} kmp_taskred_data_t;
Internal struct for reduction data item related info set up by compiler.
New interface: added reduce_orig field to provide omp_orig for UDR initializer.
*/
typedef struct kmp_taskred_input {
void *reduce_shar;
void *reduce_orig;
size_t reduce_size;
void *reduce_init;
void *reduce_fini;
void *reduce_comb;
kmp_taskred_flags_t flags;
} kmp_taskred_input_t;
@}
*/
template <typename T> void __kmp_assign_orig(kmp_taskred_data_t &item, T &src);
template <>
void __kmp_assign_orig<kmp_task_red_input_t>(kmp_taskred_data_t &item,
kmp_task_red_input_t &src) {
item.reduce_orig = NULL;
}
template <>
void __kmp_assign_orig<kmp_taskred_input_t>(kmp_taskred_data_t &item,
kmp_taskred_input_t &src) {
if (src.reduce_orig != NULL) {
item.reduce_orig = src.reduce_orig;
} else {
item.reduce_orig = src.reduce_shar;
}
}
template <typename T> void __kmp_call_init(kmp_taskred_data_t &item, size_t j);
template <>
void __kmp_call_init<kmp_task_red_input_t>(kmp_taskred_data_t &item,
size_t offset) {
((void (*)(void *))item.reduce_init)((char *)(item.reduce_priv) + offset);
}
template <>
void __kmp_call_init<kmp_taskred_input_t>(kmp_taskred_data_t &item,
size_t offset) {
((void (*)(void *, void *))item.reduce_init)(
(char *)(item.reduce_priv) + offset, item.reduce_orig);
}
template <typename T>
void *__kmp_task_reduction_init(int gtid, int num, T *data) {
__kmp_assert_valid_gtid(gtid);
kmp_info_t *thread = __kmp_threads[gtid];
kmp_taskgroup_t *tg = thread->th.th_current_task->td_taskgroup;
kmp_uint32 nth = thread->th.th_team_nproc;
kmp_taskred_data_t *arr;
KMP_ASSERT(tg != NULL);
KMP_ASSERT(data != NULL);
KMP_ASSERT(num > 0);
if (nth == 1 && !__kmp_enable_hidden_helper) {
KA_TRACE(10, ("__kmpc_task_reduction_init: T#%d, tg %p, exiting nth=1\n",
gtid, tg));
return (void *)tg;
}
KA_TRACE(10, ("__kmpc_task_reduction_init: T#%d, taskgroup %p, #items %d\n",
gtid, tg, num));
arr = (kmp_taskred_data_t *)__kmp_thread_malloc(
thread, num * sizeof(kmp_taskred_data_t));
for (int i = 0; i < num; ++i) {
size_t size = data[i].reduce_size - 1;
size += CACHE_LINE - size % CACHE_LINE;
KMP_ASSERT(data[i].reduce_comb != NULL);
arr[i].reduce_shar = data[i].reduce_shar;
arr[i].reduce_size = size;
arr[i].flags = data[i].flags;
arr[i].reduce_comb = data[i].reduce_comb;
arr[i].reduce_init = data[i].reduce_init;
arr[i].reduce_fini = data[i].reduce_fini;
__kmp_assign_orig<T>(arr[i], data[i]);
if (!arr[i].flags.lazy_priv) {
arr[i].reduce_priv = __kmp_allocate(nth * size);
arr[i].reduce_pend = (char *)(arr[i].reduce_priv) + nth * size;
if (arr[i].reduce_init != NULL) {
for (size_t j = 0; j < nth; ++j) {
__kmp_call_init<T>(arr[i], j * size);
}
}
} else {
arr[i].reduce_priv = __kmp_allocate(nth * sizeof(void *));
}
}
tg->reduce_data = (void *)arr;
tg->reduce_num_data = num;
return (void *)tg;
}
@ingroup TASKING
@param gtid Global thread ID
@param num Number of data items to reduce
@param data Array of data for reduction
@return The taskgroup identifier
Initialize task reduction for the taskgroup.
Note: this entry supposes the optional compiler-generated initializer routine
has single parameter - pointer to object to be initialized. That means
the reduction either does not use omp_orig object, or the omp_orig is accessible
without help of the runtime library.
*/
void *__kmpc_task_reduction_init(int gtid, int num, void *data) {
#if OMPX_TASKGRAPH
kmp_tdg_info_t *tdg = __kmp_find_tdg(__kmp_curr_tdg_idx);
if (tdg && __kmp_tdg_is_recording(tdg->tdg_status)) {
kmp_tdg_info_t *this_tdg = __kmp_global_tdgs[__kmp_curr_tdg_idx];
this_tdg->rec_taskred_data =
__kmp_allocate(sizeof(kmp_task_red_input_t) * num);
this_tdg->rec_num_taskred = num;
KMP_MEMCPY(this_tdg->rec_taskred_data, data,
sizeof(kmp_task_red_input_t) * num);
}
#endif
return __kmp_task_reduction_init(gtid, num, (kmp_task_red_input_t *)data);
}
@ingroup TASKING
@param gtid Global thread ID
@param num Number of data items to reduce
@param data Array of data for reduction
@return The taskgroup identifier
Initialize task reduction for the taskgroup.
Note: this entry supposes the optional compiler-generated initializer routine
has two parameters, pointer to object to be initialized and pointer to omp_orig
*/
void *__kmpc_taskred_init(int gtid, int num, void *data) {
#if OMPX_TASKGRAPH
kmp_tdg_info_t *tdg = __kmp_find_tdg(__kmp_curr_tdg_idx);
if (tdg && __kmp_tdg_is_recording(tdg->tdg_status)) {
kmp_tdg_info_t *this_tdg = __kmp_global_tdgs[__kmp_curr_tdg_idx];
this_tdg->rec_taskred_data =
__kmp_allocate(sizeof(kmp_task_red_input_t) * num);
this_tdg->rec_num_taskred = num;
KMP_MEMCPY(this_tdg->rec_taskred_data, data,
sizeof(kmp_task_red_input_t) * num);
}
#endif
return __kmp_task_reduction_init(gtid, num, (kmp_taskred_input_t *)data);
}
template <typename T>
void __kmp_task_reduction_init_copy(kmp_info_t *thr, int num, T *data,
kmp_taskgroup_t *tg, void *reduce_data) {
kmp_taskred_data_t *arr;
KA_TRACE(20, ("__kmp_task_reduction_init_copy: Th %p, init taskgroup %p,"
" from data %p\n",
thr, tg, reduce_data));
arr = (kmp_taskred_data_t *)__kmp_thread_malloc(
thr, num * sizeof(kmp_taskred_data_t));
KMP_MEMCPY(arr, reduce_data, num * sizeof(kmp_taskred_data_t));
for (int i = 0; i < num; ++i) {
arr[i].reduce_shar = data[i].reduce_shar;
}
tg->reduce_data = (void *)arr;
tg->reduce_num_data = num;
}
@ingroup TASKING
@param gtid Global thread ID
@param tskgrp The taskgroup ID (optional)
@param data Shared location of the item
@return The pointer to per-thread data
Get thread-specific location of data item
*/
void *__kmpc_task_reduction_get_th_data(int gtid, void *tskgrp, void *data) {
__kmp_assert_valid_gtid(gtid);
kmp_info_t *thread = __kmp_threads[gtid];
kmp_int32 nth = thread->th.th_team_nproc;
if (nth == 1)
return data;
kmp_taskgroup_t *tg = (kmp_taskgroup_t *)tskgrp;
if (tg == NULL)
tg = thread->th.th_current_task->td_taskgroup;
KMP_ASSERT(tg != NULL);
kmp_taskred_data_t *arr;
kmp_int32 num;
kmp_int32 tid = thread->th.th_info.ds.ds_tid;
#if OMPX_TASKGRAPH
if ((thread->th.th_current_task->is_taskgraph) &&
(!__kmp_tdg_is_recording(
__kmp_global_tdgs[__kmp_curr_tdg_idx]->tdg_status))) {
tg = thread->th.th_current_task->td_taskgroup;
KMP_ASSERT(tg != NULL);
KMP_ASSERT(tg->reduce_data != NULL);
arr = (kmp_taskred_data_t *)(tg->reduce_data);
num = tg->reduce_num_data;
}
#endif
KMP_ASSERT(data != NULL);
while (tg != NULL) {
arr = (kmp_taskred_data_t *)(tg->reduce_data);
num = tg->reduce_num_data;
for (int i = 0; i < num; ++i) {
if (!arr[i].flags.lazy_priv) {
if (data == arr[i].reduce_shar ||
(data >= arr[i].reduce_priv && data < arr[i].reduce_pend))
return (char *)(arr[i].reduce_priv) + tid * arr[i].reduce_size;
} else {
void **p_priv = (void **)(arr[i].reduce_priv);
if (data == arr[i].reduce_shar)
goto found;
for (int j = 0; j < nth; ++j)
if (data == p_priv[j])
goto found;
continue;
found:
if (p_priv[tid] == NULL) {
p_priv[tid] = __kmp_allocate(arr[i].reduce_size);
if (arr[i].reduce_init != NULL) {
if (arr[i].reduce_orig != NULL) {
((void (*)(void *, void *))arr[i].reduce_init)(
p_priv[tid], arr[i].reduce_orig);
} else {
((void (*)(void *))arr[i].reduce_init)(p_priv[tid]);
}
}
}
return p_priv[tid];
}
}
KMP_ASSERT(tg->parent);
tg = tg->parent;
}
KMP_ASSERT2(0, "Unknown task reduction item");
return NULL;
}
static void __kmp_task_reduction_fini(kmp_info_t *th, kmp_taskgroup_t *tg) {
kmp_int32 nth = th->th.th_team_nproc;
KMP_DEBUG_ASSERT(
nth > 1 ||
__kmp_enable_hidden_helper);
kmp_taskred_data_t *arr = (kmp_taskred_data_t *)tg->reduce_data;
kmp_int32 num = tg->reduce_num_data;
for (int i = 0; i < num; ++i) {
void *sh_data = arr[i].reduce_shar;
void (*f_fini)(void *) = (void (*)(void *))(arr[i].reduce_fini);
void (*f_comb)(void *, void *) =
(void (*)(void *, void *))(arr[i].reduce_comb);
if (!arr[i].flags.lazy_priv) {
void *pr_data = arr[i].reduce_priv;
size_t size = arr[i].reduce_size;
for (int j = 0; j < nth; ++j) {
void *priv_data = (char *)pr_data + j * size;
f_comb(sh_data, priv_data);
if (f_fini)
f_fini(priv_data);
}
} else {
void **pr_data = (void **)(arr[i].reduce_priv);
for (int j = 0; j < nth; ++j) {
if (pr_data[j] != NULL) {
f_comb(sh_data, pr_data[j]);
if (f_fini)
f_fini(pr_data[j]);
__kmp_free(pr_data[j]);
}
}
}
__kmp_free(arr[i].reduce_priv);
}
__kmp_thread_free(th, arr);
tg->reduce_data = NULL;
tg->reduce_num_data = 0;
}
static void __kmp_task_reduction_clean(kmp_info_t *th, kmp_taskgroup_t *tg) {
__kmp_thread_free(th, tg->reduce_data);
tg->reduce_data = NULL;
tg->reduce_num_data = 0;
}
template <typename T>
void *__kmp_task_reduction_modifier_init(ident_t *loc, int gtid, int is_ws,
int num, T *data) {
__kmp_assert_valid_gtid(gtid);
kmp_info_t *thr = __kmp_threads[gtid];
kmp_int32 nth = thr->th.th_team_nproc;
__kmpc_taskgroup(loc, gtid);
if (nth == 1) {
KA_TRACE(10,
("__kmpc_reduction_modifier_init: T#%d, tg %p, exiting nth=1\n",
gtid, thr->th.th_current_task->td_taskgroup));
return (void *)thr->th.th_current_task->td_taskgroup;
}
kmp_team_t *team = thr->th.th_team;
void *reduce_data;
kmp_taskgroup_t *tg;
reduce_data = KMP_ATOMIC_LD_RLX(&team->t.t_tg_reduce_data[is_ws]);
if (reduce_data == NULL &&
__kmp_atomic_compare_store(&team->t.t_tg_reduce_data[is_ws], reduce_data,
(void *)1)) {
KMP_DEBUG_ASSERT(reduce_data == NULL);
tg = (kmp_taskgroup_t *)__kmp_task_reduction_init<T>(gtid, num, data);
reduce_data = __kmp_thread_malloc(thr, num * sizeof(kmp_taskred_data_t));
KMP_MEMCPY(reduce_data, tg->reduce_data, num * sizeof(kmp_taskred_data_t));
KMP_DEBUG_ASSERT(KMP_ATOMIC_LD_RLX(&team->t.t_tg_fini_counter[0]) == 0);
KMP_DEBUG_ASSERT(KMP_ATOMIC_LD_RLX(&team->t.t_tg_fini_counter[1]) == 0);
KMP_ATOMIC_ST_REL(&team->t.t_tg_reduce_data[is_ws], reduce_data);
} else {
while (
(reduce_data = KMP_ATOMIC_LD_ACQ(&team->t.t_tg_reduce_data[is_ws])) ==
(void *)1) {
KMP_CPU_PAUSE();
}
KMP_DEBUG_ASSERT(reduce_data > (void *)1);
tg = thr->th.th_current_task->td_taskgroup;
__kmp_task_reduction_init_copy<T>(thr, num, data, tg, reduce_data);
}
return tg;
}
@ingroup TASKING
@param loc Source location info
@param gtid Global thread ID
@param is_ws Is 1 if the reduction is for worksharing, 0 otherwise
@param num Number of data items to reduce
@param data Array of data for reduction
@return The taskgroup identifier
Initialize task reduction for a parallel or worksharing.
Note: this entry supposes the optional compiler-generated initializer routine
has single parameter - pointer to object to be initialized. That means
the reduction either does not use omp_orig object, or the omp_orig is accessible
without help of the runtime library.
*/
void *__kmpc_task_reduction_modifier_init(ident_t *loc, int gtid, int is_ws,
int num, void *data) {
return __kmp_task_reduction_modifier_init(loc, gtid, is_ws, num,
(kmp_task_red_input_t *)data);
}
@ingroup TASKING
@param loc Source location info
@param gtid Global thread ID
@param is_ws Is 1 if the reduction is for worksharing, 0 otherwise
@param num Number of data items to reduce
@param data Array of data for reduction
@return The taskgroup identifier
Initialize task reduction for a parallel or worksharing.
Note: this entry supposes the optional compiler-generated initializer routine
has two parameters, pointer to object to be initialized and pointer to omp_orig
*/
void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int is_ws, int num,
void *data) {
return __kmp_task_reduction_modifier_init(loc, gtid, is_ws, num,
(kmp_taskred_input_t *)data);
}
@ingroup TASKING
@param loc Source location info
@param gtid Global thread ID
@param is_ws Is 1 if the reduction is for worksharing, 0 otherwise
Finalize task reduction for a parallel or worksharing.
*/
void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid, int is_ws) {
__kmpc_end_taskgroup(loc, gtid);
}
void __kmpc_taskgroup(ident_t *loc, int gtid) {
__kmp_assert_valid_gtid(gtid);
kmp_info_t *thread = __kmp_threads[gtid];
kmp_taskdata_t *taskdata = thread->th.th_current_task;
kmp_taskgroup_t *tg_new =
(kmp_taskgroup_t *)__kmp_thread_malloc(thread, sizeof(kmp_taskgroup_t));
KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new));
KMP_ATOMIC_ST_RLX(&tg_new->count, 0);
KMP_ATOMIC_ST_RLX(&tg_new->cancel_request, cancel_noreq);
tg_new->parent = taskdata->td_taskgroup;
tg_new->reduce_data = NULL;
tg_new->reduce_num_data = 0;
tg_new->gomp_data = NULL;
taskdata->td_taskgroup = tg_new;
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)) {
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
if (!codeptr)
codeptr = OMPT_GET_RETURN_ADDRESS(0);
kmp_team_t *team = thread->th.th_team;
ompt_data_t my_task_data = taskdata->ompt_task_info.task_data;
ompt_data_t my_parallel_data = team->t.ompt_team_info.parallel_data;
ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data),
&(my_task_data), codeptr);
}
#endif
}
void __kmpc_end_taskgroup(ident_t *loc, int gtid) {
__kmp_assert_valid_gtid(gtid);
kmp_info_t *thread = __kmp_threads[gtid];
kmp_taskdata_t *taskdata = thread->th.th_current_task;
kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
int thread_finished = FALSE;
#if OMPT_SUPPORT && OMPT_OPTIONAL
kmp_team_t *team;
ompt_data_t my_task_data;
ompt_data_t my_parallel_data;
void *codeptr = nullptr;
if (UNLIKELY(ompt_enabled.enabled)) {
team = thread->th.th_team;
my_task_data = taskdata->ompt_task_info.task_data;
my_parallel_data = team->t.ompt_team_info.parallel_data;
codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
if (!codeptr)
codeptr = OMPT_GET_RETURN_ADDRESS(0);
}
#endif
KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc));
KMP_DEBUG_ASSERT(taskgroup != NULL);
KMP_SET_THREAD_STATE_BLOCK(TASKGROUP);
if (__kmp_tasking_mode != tskm_immediate_exec) {
taskdata->td_taskwait_counter += 1;
taskdata->td_taskwait_ident = loc;
taskdata->td_taskwait_thread = gtid + 1;
#if USE_ITT_BUILD
void *itt_sync_obj = NULL;
#if USE_ITT_NOTIFY
KMP_ITT_TASKWAIT_STARTING(itt_sync_obj);
#endif
#endif
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (UNLIKELY(ompt_enabled.ompt_callback_sync_region_wait)) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data),
&(my_task_data), codeptr);
}
#endif
if (!taskdata->td_flags.team_serial ||
(thread->th.th_task_team != NULL &&
(thread->th.th_task_team->tt.tt_found_proxy_tasks ||
thread->th.th_task_team->tt.tt_hidden_helper_task_encountered))) {
kmp_flag_32<false, false> flag(
RCAST(std::atomic<kmp_uint32> *, &(taskgroup->count)), 0U);
while (KMP_ATOMIC_LD_ACQ(&taskgroup->count) != 0) {
flag.execute_tasks(thread, gtid, FALSE,
&thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
__kmp_task_stealing_constraint);
}
}
taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (UNLIKELY(ompt_enabled.ompt_callback_sync_region_wait)) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data),
&(my_task_data), codeptr);
}
#endif
#if USE_ITT_BUILD
KMP_ITT_TASKWAIT_FINISHED(itt_sync_obj);
KMP_FSYNC_ACQUIRED(taskdata);
#endif
}
KMP_DEBUG_ASSERT(taskgroup->count == 0);
if (taskgroup->reduce_data != NULL &&
!taskgroup->gomp_data) {
int cnt;
void *reduce_data;
kmp_team_t *t = thread->th.th_team;
kmp_taskred_data_t *arr = (kmp_taskred_data_t *)taskgroup->reduce_data;
void *priv0 = arr[0].reduce_priv;
if ((reduce_data = KMP_ATOMIC_LD_ACQ(&t->t.t_tg_reduce_data[0])) != NULL &&
((kmp_taskred_data_t *)reduce_data)[0].reduce_priv == priv0) {
cnt = KMP_ATOMIC_INC(&t->t.t_tg_fini_counter[0]);
if (cnt == thread->th.th_team_nproc - 1) {
__kmp_task_reduction_fini(thread, taskgroup);
__kmp_thread_free(thread, reduce_data);
KMP_ATOMIC_ST_REL(&t->t.t_tg_reduce_data[0], NULL);
KMP_ATOMIC_ST_REL(&t->t.t_tg_fini_counter[0], 0);
} else {
__kmp_task_reduction_clean(thread, taskgroup);
}
} else if ((reduce_data = KMP_ATOMIC_LD_ACQ(&t->t.t_tg_reduce_data[1])) !=
NULL &&
((kmp_taskred_data_t *)reduce_data)[0].reduce_priv == priv0) {
cnt = KMP_ATOMIC_INC(&t->t.t_tg_fini_counter[1]);
if (cnt == thread->th.th_team_nproc - 1) {
__kmp_task_reduction_fini(thread, taskgroup);
__kmp_thread_free(thread, reduce_data);
KMP_ATOMIC_ST_REL(&t->t.t_tg_reduce_data[1], NULL);
KMP_ATOMIC_ST_REL(&t->t.t_tg_fini_counter[1], 0);
} else {
__kmp_task_reduction_clean(thread, taskgroup);
}
} else {
__kmp_task_reduction_fini(thread, taskgroup);
}
}
taskdata->td_taskgroup = taskgroup->parent;
__kmp_thread_free(thread, taskgroup);
KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n",
gtid, taskdata));
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data),
&(my_task_data), codeptr);
}
#endif
}
static kmp_task_t *__kmp_get_priority_task(kmp_int32 gtid,
kmp_task_team_t *task_team,
kmp_int32 is_constrained) {
kmp_task_t *task = NULL;
kmp_taskdata_t *taskdata;
kmp_taskdata_t *current;
kmp_thread_data_t *thread_data;
int ntasks = task_team->tt.tt_num_task_pri;
if (ntasks == 0) {
KA_TRACE(
20, ("__kmp_get_priority_task(exit #1): T#%d No tasks to get\n", gtid));
return NULL;
}
do {
if (__kmp_atomic_compare_store(&task_team->tt.tt_num_task_pri, ntasks,
ntasks - 1))
break;
ntasks = task_team->tt.tt_num_task_pri;
} while (ntasks > 0);
if (ntasks == 0) {
KA_TRACE(20, ("__kmp_get_priority_task(exit #2): T#%d No tasks to get\n",
__kmp_get_gtid()));
return NULL;
}
int deque_ntasks;
kmp_task_pri_t *list = task_team->tt.tt_task_pri_list;
do {
KMP_ASSERT(list != NULL);
thread_data = &list->td;
__kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
deque_ntasks = thread_data->td.td_deque_ntasks;
if (deque_ntasks == 0) {
__kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
KA_TRACE(20, ("__kmp_get_priority_task: T#%d No tasks to get from %p\n",
__kmp_get_gtid(), thread_data));
list = list->next;
}
} while (deque_ntasks == 0);
KMP_DEBUG_ASSERT(deque_ntasks);
int target = thread_data->td.td_deque_head;
current = __kmp_threads[gtid]->th.th_current_task;
taskdata = thread_data->td.td_deque[target];
if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
thread_data->td.td_deque_head =
(target + 1) & TASK_DEQUE_MASK(thread_data->td);
} else {
if (!task_team->tt.tt_untied_task_encountered) {
__kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
KA_TRACE(20, ("__kmp_get_priority_task(exit #3): T#%d could not get task "
"from %p: task_team=%p ntasks=%d head=%u tail=%u\n",
gtid, thread_data, task_team, deque_ntasks, target,
thread_data->td.td_deque_tail));
task_team->tt.tt_num_task_pri++;
return NULL;
}
int i;
taskdata = NULL;
for (i = 1; i < deque_ntasks; ++i) {
target = (target + 1) & TASK_DEQUE_MASK(thread_data->td);
taskdata = thread_data->td.td_deque[target];
if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
break;
} else {
taskdata = NULL;
}
}
if (taskdata == NULL) {
__kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
KA_TRACE(
10, ("__kmp_get_priority_task(exit #4): T#%d could not get task from "
"%p: task_team=%p ntasks=%d head=%u tail=%u\n",
gtid, thread_data, task_team, deque_ntasks,
thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
task_team->tt.tt_num_task_pri++;
return NULL;
}
int prev = target;
for (i = i + 1; i < deque_ntasks; ++i) {
target = (target + 1) & TASK_DEQUE_MASK(thread_data->td);
thread_data->td.td_deque[prev] = thread_data->td.td_deque[target];
prev = target;
}
KMP_DEBUG_ASSERT(
thread_data->td.td_deque_tail ==
(kmp_uint32)((target + 1) & TASK_DEQUE_MASK(thread_data->td)));
thread_data->td.td_deque_tail = target;
}
thread_data->td.td_deque_ntasks = deque_ntasks - 1;
__kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
task = KMP_TASKDATA_TO_TASK(taskdata);
return task;
}
static kmp_task_t *__kmp_remove_my_task(kmp_info_t *thread, kmp_int32 gtid,
kmp_task_team_t *task_team,
kmp_int32 is_constrained) {
kmp_task_t *task;
kmp_taskdata_t *taskdata;
kmp_thread_data_t *thread_data;
kmp_uint32 tail;
KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
KMP_DEBUG_ASSERT(task_team->tt.tt_threads_data !=
NULL);
thread_data = &task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)];
KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
gtid, thread_data->td.td_deque_ntasks,
thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
if (TCR_4(thread_data->td.td_deque_ntasks) == 0) {
KA_TRACE(10,
("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: "
"ntasks=%d head=%u tail=%u\n",
gtid, thread_data->td.td_deque_ntasks,
thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
return NULL;
}
__kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
if (TCR_4(thread_data->td.td_deque_ntasks) == 0) {
__kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
KA_TRACE(10,
("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: "
"ntasks=%d head=%u tail=%u\n",
gtid, thread_data->td.td_deque_ntasks,
thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
return NULL;
}
tail = (thread_data->td.td_deque_tail - 1) &
TASK_DEQUE_MASK(thread_data->td);
taskdata = thread_data->td.td_deque[tail];
if (!__kmp_task_is_allowed(gtid, is_constrained, taskdata,
thread->th.th_current_task)) {
__kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
KA_TRACE(10,
("__kmp_remove_my_task(exit #3): T#%d TSC blocks tail task: "
"ntasks=%d head=%u tail=%u\n",
gtid, thread_data->td.td_deque_ntasks,
thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
return NULL;
}
thread_data->td.td_deque_tail = tail;
TCW_4(thread_data->td.td_deque_ntasks, thread_data->td.td_deque_ntasks - 1);
__kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
KA_TRACE(10, ("__kmp_remove_my_task(exit #4): T#%d task %p removed: "
"ntasks=%d head=%u tail=%u\n",
gtid, taskdata, thread_data->td.td_deque_ntasks,
thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
task = KMP_TASKDATA_TO_TASK(taskdata);
return task;
}
static kmp_task_t *__kmp_steal_task(kmp_int32 victim_tid, kmp_int32 gtid,
kmp_task_team_t *task_team,
std::atomic<kmp_int32> *unfinished_threads,
int *thread_finished,
kmp_int32 is_constrained) {
kmp_task_t *task;
kmp_taskdata_t *taskdata;
kmp_taskdata_t *current;
kmp_thread_data_t *victim_td, *threads_data;
kmp_int32 target;
kmp_info_t *victim_thr;
KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
threads_data = task_team->tt.tt_threads_data;
KMP_DEBUG_ASSERT(threads_data != NULL);
KMP_DEBUG_ASSERT(victim_tid >= 0);
KMP_DEBUG_ASSERT(victim_tid < task_team->tt.tt_nproc);
victim_td = &threads_data[victim_tid];
victim_thr = victim_td->td.td_thr;
(void)victim_thr;
KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: "
"task_team=%p ntasks=%d head=%u tail=%u\n",
gtid, __kmp_gtid_from_thread(victim_thr), task_team,
victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
victim_td->td.td_deque_tail));
if (TCR_4(victim_td->td.td_deque_ntasks) == 0) {
KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: "
"task_team=%p ntasks=%d head=%u tail=%u\n",
gtid, __kmp_gtid_from_thread(victim_thr), task_team,
victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
victim_td->td.td_deque_tail));
return NULL;
}
__kmp_acquire_bootstrap_lock(&victim_td->td.td_deque_lock);
int ntasks = TCR_4(victim_td->td.td_deque_ntasks);
if (ntasks == 0) {
__kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: "
"task_team=%p ntasks=%d head=%u tail=%u\n",
gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,
victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
return NULL;
}
KMP_DEBUG_ASSERT(victim_td->td.td_deque != NULL);
current = __kmp_threads[gtid]->th.th_current_task;
taskdata = victim_td->td.td_deque[victim_td->td.td_deque_head];
if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
victim_td->td.td_deque_head =
(victim_td->td.td_deque_head + 1) & TASK_DEQUE_MASK(victim_td->td);
} else {
if (!task_team->tt.tt_untied_task_encountered) {
__kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
KA_TRACE(10, ("__kmp_steal_task(exit #3): T#%d could not steal from "
"T#%d: task_team=%p ntasks=%d head=%u tail=%u\n",
gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,
victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
return NULL;
}
int i;
target = victim_td->td.td_deque_head;
taskdata = NULL;
for (i = 1; i < ntasks; ++i) {
target = (target + 1) & TASK_DEQUE_MASK(victim_td->td);
taskdata = victim_td->td.td_deque[target];
if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
break;
} else {
taskdata = NULL;
}
}
if (taskdata == NULL) {
__kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
KA_TRACE(10, ("__kmp_steal_task(exit #4): T#%d could not steal from "
"T#%d: task_team=%p ntasks=%d head=%u tail=%u\n",
gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,
victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
return NULL;
}
int prev = target;
for (i = i + 1; i < ntasks; ++i) {
target = (target + 1) & TASK_DEQUE_MASK(victim_td->td);
victim_td->td.td_deque[prev] = victim_td->td.td_deque[target];
prev = target;
}
KMP_DEBUG_ASSERT(
victim_td->td.td_deque_tail ==
(kmp_uint32)((target + 1) & TASK_DEQUE_MASK(victim_td->td)));
victim_td->td.td_deque_tail = target;
}
if (*thread_finished) {
#if KMP_DEBUG
kmp_int32 count =
#endif
KMP_ATOMIC_INC(unfinished_threads);
KA_TRACE(
20,
("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
gtid, count + 1, task_team));
*thread_finished = FALSE;
}
TCW_4(victim_td->td.td_deque_ntasks, ntasks - 1);
__kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
KMP_COUNT_BLOCK(TASK_stolen);
KA_TRACE(10,
("__kmp_steal_task(exit #5): T#%d stole task %p from T#%d: "
"task_team=%p ntasks=%d head=%u tail=%u\n",
gtid, taskdata, __kmp_gtid_from_thread(victim_thr), task_team,
ntasks, victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
task = KMP_TASKDATA_TO_TASK(taskdata);
return task;
}
template <class C>
static inline int __kmp_execute_tasks_template(
kmp_info_t *thread, kmp_int32 gtid, C *flag, int final_spin,
int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
kmp_int32 is_constrained) {
kmp_task_team_t *task_team = thread->th.th_task_team;
kmp_thread_data_t *threads_data;
kmp_task_t *task;
kmp_info_t *other_thread;
kmp_taskdata_t *current_task = thread->th.th_current_task;
std::atomic<kmp_int32> *unfinished_threads;
kmp_int32 nthreads, victim_tid = -2, use_own_tasks = 1, new_victim = 0,
tid = thread->th.th_info.ds.ds_tid;
KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
KMP_DEBUG_ASSERT(thread == __kmp_threads[gtid]);
if (task_team == NULL || current_task == NULL)
return FALSE;
KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d "
"*thread_finished=%d\n",
gtid, final_spin, *thread_finished));
thread->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data);
KMP_DEBUG_ASSERT(threads_data != NULL);
nthreads = task_team->tt.tt_nproc;
unfinished_threads = &(task_team->tt.tt_unfinished_threads);
KMP_DEBUG_ASSERT(*unfinished_threads >= 0);
while (1) {
while (1) {
task = NULL;
if (task_team->tt.tt_num_task_pri) {
task = __kmp_get_priority_task(gtid, task_team, is_constrained);
}
if (task == NULL && use_own_tasks) {
task = __kmp_remove_my_task(thread, gtid, task_team, is_constrained);
}
if ((task == NULL) && (nthreads > 1)) {
int asleep = 1;
use_own_tasks = 0;
if (victim_tid == -2) {
victim_tid = threads_data[tid].td.td_deque_last_stolen;
if (victim_tid !=
-1)
other_thread = threads_data[victim_tid].td.td_thr;
}
if (victim_tid != -1) {
asleep = 0;
} else if (!new_victim) {
do {
victim_tid = __kmp_get_random(thread) % (nthreads - 1);
if (victim_tid >= tid) {
++victim_tid;
}
other_thread = threads_data[victim_tid].td.td_thr;
asleep = 0;
if ((__kmp_tasking_mode == tskm_task_teams) &&
(__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
(TCR_PTR(CCAST(void *, other_thread->th.th_sleep_loc)) !=
NULL)) {
asleep = 1;
__kmp_null_resume_wrapper(other_thread);
}
} while (asleep);
}
if (!asleep) {
task =
__kmp_steal_task(victim_tid, gtid, task_team, unfinished_threads,
thread_finished, is_constrained);
}
if (task != NULL) {
if (threads_data[tid].td.td_deque_last_stolen != victim_tid) {
threads_data[tid].td.td_deque_last_stolen = victim_tid;
new_victim = 1;
}
} else {
KMP_CHECK_UPDATE(threads_data[tid].td.td_deque_last_stolen, -1);
victim_tid = -2;
}
}
if (task == NULL)
break;
#if USE_ITT_BUILD && USE_ITT_NOTIFY
if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
if (itt_sync_obj == NULL) {
itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
}
__kmp_itt_task_starting(itt_sync_obj);
}
#endif
__kmp_invoke_task(gtid, task, current_task);
#if USE_ITT_BUILD
if (itt_sync_obj != NULL)
__kmp_itt_task_finished(itt_sync_obj);
#endif
if (flag == NULL || (!final_spin && flag->done_check())) {
KA_TRACE(
15,
("__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
gtid));
return TRUE;
}
if (thread->th.th_task_team == NULL) {
break;
}
KMP_YIELD(__kmp_library == library_throughput);
if (!use_own_tasks && TCR_4(threads_data[tid].td.td_deque_ntasks) != 0) {
KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned "
"other tasks, restart\n",
gtid));
use_own_tasks = 1;
new_victim = 0;
}
}
if (final_spin &&
KMP_ATOMIC_LD_ACQ(¤t_task->td_incomplete_child_tasks) == 0) {
if (!*thread_finished) {
#if KMP_DEBUG
kmp_int32 count = -1 +
#endif
KMP_ATOMIC_DEC(unfinished_threads);
KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d dec "
"unfinished_threads to %d task_team=%p\n",
gtid, count, task_team));
*thread_finished = TRUE;
}
if (flag != NULL && flag->done_check()) {
KA_TRACE(
15,
("__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
gtid));
return TRUE;
}
}
if (thread->th.th_task_team == NULL) {
KA_TRACE(15,
("__kmp_execute_tasks_template: T#%d no more tasks\n", gtid));
return FALSE;
}
if (flag == NULL || (!final_spin && flag->done_check())) {
KA_TRACE(15,
("__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
gtid));
return TRUE;
}
if (nthreads == 1 &&
KMP_ATOMIC_LD_ACQ(¤t_task->td_incomplete_child_tasks))
use_own_tasks = 1;
else {
KA_TRACE(15,
("__kmp_execute_tasks_template: T#%d can't find work\n", gtid));
return FALSE;
}
}
}
template <bool C, bool S>
int __kmp_execute_tasks_32(
kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32<C, S> *flag, int final_spin,
int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
kmp_int32 is_constrained) {
return __kmp_execute_tasks_template(
thread, gtid, flag, final_spin,
thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
}
template <bool C, bool S>
int __kmp_execute_tasks_64(
kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64<C, S> *flag, int final_spin,
int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
kmp_int32 is_constrained) {
return __kmp_execute_tasks_template(
thread, gtid, flag, final_spin,
thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
}
template <bool C, bool S>
int __kmp_atomic_execute_tasks_64(
kmp_info_t *thread, kmp_int32 gtid, kmp_atomic_flag_64<C, S> *flag,
int final_spin, int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
kmp_int32 is_constrained) {
return __kmp_execute_tasks_template(
thread, gtid, flag, final_spin,
thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
}
int __kmp_execute_tasks_oncore(
kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin,
int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
kmp_int32 is_constrained) {
return __kmp_execute_tasks_template(
thread, gtid, flag, final_spin,
thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
}
template int
__kmp_execute_tasks_32<false, false>(kmp_info_t *, kmp_int32,
kmp_flag_32<false, false> *, int,
int *USE_ITT_BUILD_ARG(void *), kmp_int32);
template int __kmp_execute_tasks_64<false, true>(kmp_info_t *, kmp_int32,
kmp_flag_64<false, true> *,
int,
int *USE_ITT_BUILD_ARG(void *),
kmp_int32);
template int __kmp_execute_tasks_64<true, false>(kmp_info_t *, kmp_int32,
kmp_flag_64<true, false> *,
int,
int *USE_ITT_BUILD_ARG(void *),
kmp_int32);
template int __kmp_atomic_execute_tasks_64<false, true>(
kmp_info_t *, kmp_int32, kmp_atomic_flag_64<false, true> *, int,
int *USE_ITT_BUILD_ARG(void *), kmp_int32);
template int __kmp_atomic_execute_tasks_64<true, false>(
kmp_info_t *, kmp_int32, kmp_atomic_flag_64<true, false> *, int,
int *USE_ITT_BUILD_ARG(void *), kmp_int32);
static void __kmp_enable_tasking(kmp_task_team_t *task_team,
kmp_info_t *this_thr) {
kmp_thread_data_t *threads_data;
int nthreads, i, is_init_thread;
KA_TRACE(10, ("__kmp_enable_tasking(enter): T#%d\n",
__kmp_gtid_from_thread(this_thr)));
KMP_DEBUG_ASSERT(task_team != NULL);
KMP_DEBUG_ASSERT(this_thr->th.th_team != NULL);
nthreads = task_team->tt.tt_nproc;
KMP_DEBUG_ASSERT(nthreads > 0);
KMP_DEBUG_ASSERT(nthreads == this_thr->th.th_team->t.t_nproc);
is_init_thread = __kmp_realloc_task_threads_data(this_thr, task_team);
if (!is_init_thread) {
KA_TRACE(
20,
("__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
__kmp_gtid_from_thread(this_thr)));
return;
}
threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data);
KMP_DEBUG_ASSERT(threads_data != NULL);
if (__kmp_tasking_mode == tskm_task_teams &&
(__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME)) {
for (i = 0; i < nthreads; i++) {
void *sleep_loc;
kmp_info_t *thread = threads_data[i].td.td_thr;
if (i == this_thr->th.th_info.ds.ds_tid) {
continue;
}
if ((sleep_loc = TCR_PTR(CCAST(void *, thread->th.th_sleep_loc))) !=
NULL) {
KF_TRACE(50, ("__kmp_enable_tasking: T#%d waking up thread T#%d\n",
__kmp_gtid_from_thread(this_thr),
__kmp_gtid_from_thread(thread)));
__kmp_null_resume_wrapper(thread);
} else {
KF_TRACE(50, ("__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
__kmp_gtid_from_thread(this_thr),
__kmp_gtid_from_thread(thread)));
}
}
}
KA_TRACE(10, ("__kmp_enable_tasking(exit): T#%d\n",
__kmp_gtid_from_thread(this_thr)));
}
* Utility routines for "task teams". A task team (kmp_task_t) is kind of
* like a shadow of the kmp_team_t data struct, with a different lifetime.
* After a child * thread checks into a barrier and calls __kmp_release() from
* the particular variant of __kmp_<barrier_kind>_barrier_gather(), it can no
* longer assume that the kmp_team_t structure is intact (at any moment, the
* primary thread may exit the barrier code and free the team data structure,
* and return the threads to the thread pool).
*
* This does not work with the tasking code, as the thread is still
* expected to participate in the execution of any tasks that may have been
* spawned my a member of the team, and the thread still needs access to all
* to each thread in the team, so that it can steal work from it.
*
* Enter the existence of the kmp_task_team_t struct. It employs a reference
* counting mechanism, and is allocated by the primary thread before calling
* __kmp_<barrier_kind>_release, and then is release by the last thread to
* exit __kmp_<barrier_kind>_release at the next barrier. I.e. the lifetimes
* of the kmp_task_team_t structs for consecutive barriers can overlap
* (and will, unless the primary thread is the last thread to exit the barrier
* release phase, which is not typical). The existence of such a struct is
* useful outside the context of tasking.
*
* We currently use the existence of the threads array as an indicator that
* tasks were spawned since the last barrier. If the structure is to be
* useful outside the context of tasking, then this will have to change, but
* not setting the field minimizes the performance impact of tasking on
* barriers, when no explicit tasks were spawned (pushed, actually).
*/
static kmp_task_team_t *__kmp_free_task_teams =
NULL;
kmp_bootstrap_lock_t __kmp_task_team_lock =
KMP_BOOTSTRAP_LOCK_INITIALIZER(__kmp_task_team_lock);
static void __kmp_alloc_task_deque(kmp_info_t *thread,
kmp_thread_data_t *thread_data) {
__kmp_init_bootstrap_lock(&thread_data->td.td_deque_lock);
KMP_DEBUG_ASSERT(thread_data->td.td_deque == NULL);
thread_data->td.td_deque_last_stolen = -1;
KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) == 0);
KMP_DEBUG_ASSERT(thread_data->td.td_deque_head == 0);
KMP_DEBUG_ASSERT(thread_data->td.td_deque_tail == 0);
KE_TRACE(
10,
("__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
__kmp_gtid_from_thread(thread), INITIAL_TASK_DEQUE_SIZE, thread_data));
thread_data->td.td_deque = (kmp_taskdata_t **)__kmp_allocate(
INITIAL_TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *));
thread_data->td.td_deque_size = INITIAL_TASK_DEQUE_SIZE;
}
static void __kmp_free_task_deque(kmp_thread_data_t *thread_data) {
if (thread_data->td.td_deque != NULL) {
__kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
TCW_4(thread_data->td.td_deque_ntasks, 0);
__kmp_free(thread_data->td.td_deque);
thread_data->td.td_deque = NULL;
__kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
}
#ifdef BUILD_TIED_TASK_STACK
if (thread_data->td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY) {
__kmp_free_task_stack(__kmp_thread_from_gtid(gtid), thread_data);
}
#endif
}
static int __kmp_realloc_task_threads_data(kmp_info_t *thread,
kmp_task_team_t *task_team) {
kmp_thread_data_t **threads_data_p;
kmp_int32 nthreads, maxthreads;
int is_init_thread = FALSE;
if (TCR_4(task_team->tt.tt_found_tasks)) {
return FALSE;
}
threads_data_p = &task_team->tt.tt_threads_data;
nthreads = task_team->tt.tt_nproc;
maxthreads = task_team->tt.tt_max_threads;
__kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock);
if (!TCR_4(task_team->tt.tt_found_tasks)) {
kmp_team_t *team = thread->th.th_team;
int i;
is_init_thread = TRUE;
if (maxthreads < nthreads) {
if (*threads_data_p != NULL) {
kmp_thread_data_t *old_data = *threads_data_p;
kmp_thread_data_t *new_data = NULL;
KE_TRACE(
10,
("__kmp_realloc_task_threads_data: T#%d reallocating "
"threads data for task_team %p, new_size = %d, old_size = %d\n",
__kmp_gtid_from_thread(thread), task_team, nthreads, maxthreads));
new_data = (kmp_thread_data_t *)__kmp_allocate(
nthreads * sizeof(kmp_thread_data_t));
KMP_MEMCPY_S((void *)new_data, nthreads * sizeof(kmp_thread_data_t),
(void *)old_data, maxthreads * sizeof(kmp_thread_data_t));
#ifdef BUILD_TIED_TASK_STACK
for (i = maxthreads; i < nthreads; i++) {
kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
__kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data);
}
#endif
(*threads_data_p) = new_data;
__kmp_free(old_data);
} else {
KE_TRACE(10, ("__kmp_realloc_task_threads_data: T#%d allocating "
"threads data for task_team %p, size = %d\n",
__kmp_gtid_from_thread(thread), task_team, nthreads));
*threads_data_p = (kmp_thread_data_t *)__kmp_allocate(
nthreads * sizeof(kmp_thread_data_t));
#ifdef BUILD_TIED_TASK_STACK
for (i = 0; i < nthreads; i++) {
kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
__kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data);
}
#endif
}
task_team->tt.tt_max_threads = nthreads;
} else {
KMP_DEBUG_ASSERT(*threads_data_p != NULL);
}
for (i = 0; i < nthreads; i++) {
kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
thread_data->td.td_thr = team->t.t_threads[i];
if (thread_data->td.td_deque_last_stolen >= nthreads) {
thread_data->td.td_deque_last_stolen = -1;
}
}
KMP_MB();
TCW_SYNC_4(task_team->tt.tt_found_tasks, TRUE);
}
__kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock);
return is_init_thread;
}
static void __kmp_free_task_threads_data(kmp_task_team_t *task_team) {
__kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock);
if (task_team->tt.tt_threads_data != NULL) {
int i;
for (i = 0; i < task_team->tt.tt_max_threads; i++) {
__kmp_free_task_deque(&task_team->tt.tt_threads_data[i]);
}
__kmp_free(task_team->tt.tt_threads_data);
task_team->tt.tt_threads_data = NULL;
}
__kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock);
}
static void __kmp_free_task_pri_list(kmp_task_team_t *task_team) {
__kmp_acquire_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
if (task_team->tt.tt_task_pri_list != NULL) {
kmp_task_pri_t *list = task_team->tt.tt_task_pri_list;
while (list != NULL) {
kmp_task_pri_t *next = list->next;
__kmp_free_task_deque(&list->td);
__kmp_free(list);
list = next;
}
task_team->tt.tt_task_pri_list = NULL;
}
__kmp_release_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
}
static inline void __kmp_task_team_init(kmp_task_team_t *task_team,
kmp_team_t *team) {
int team_nth = team->t.t_nproc;
if (!task_team->tt.tt_active || team_nth != task_team->tt.tt_nproc) {
TCW_4(task_team->tt.tt_found_tasks, FALSE);
TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
TCW_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE);
TCW_4(task_team->tt.tt_nproc, team_nth);
KMP_ATOMIC_ST_REL(&task_team->tt.tt_unfinished_threads, team_nth);
TCW_4(task_team->tt.tt_active, TRUE);
}
}
static kmp_task_team_t *__kmp_allocate_task_team(kmp_info_t *thread,
kmp_team_t *team) {
kmp_task_team_t *task_team = NULL;
KA_TRACE(20, ("__kmp_allocate_task_team: T#%d entering; team = %p\n",
(thread ? __kmp_gtid_from_thread(thread) : -1), team));
if (TCR_PTR(__kmp_free_task_teams) != NULL) {
__kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
if (__kmp_free_task_teams != NULL) {
task_team = __kmp_free_task_teams;
TCW_PTR(__kmp_free_task_teams, task_team->tt.tt_next);
task_team->tt.tt_next = NULL;
}
__kmp_release_bootstrap_lock(&__kmp_task_team_lock);
}
if (task_team == NULL) {
KE_TRACE(10, ("__kmp_allocate_task_team: T#%d allocating "
"task team for team %p\n",
__kmp_gtid_from_thread(thread), team));
task_team = (kmp_task_team_t *)__kmp_allocate(sizeof(kmp_task_team_t));
__kmp_init_bootstrap_lock(&task_team->tt.tt_threads_lock);
__kmp_init_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
#if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
__itt_suppress_mark_range(
__itt_suppress_range, __itt_suppress_threading_errors,
&task_team->tt.tt_found_tasks, sizeof(task_team->tt.tt_found_tasks));
__itt_suppress_mark_range(__itt_suppress_range,
__itt_suppress_threading_errors,
CCAST(kmp_uint32 *, &task_team->tt.tt_active),
sizeof(task_team->tt.tt_active));
#endif
}
__kmp_task_team_init(task_team, team);
KA_TRACE(20, ("__kmp_allocate_task_team: T#%d exiting; task_team = %p "
"unfinished_threads init'd to %d\n",
(thread ? __kmp_gtid_from_thread(thread) : -1), task_team,
KMP_ATOMIC_LD_RLX(&task_team->tt.tt_unfinished_threads)));
return task_team;
}
void __kmp_free_task_team(kmp_info_t *thread, kmp_task_team_t *task_team) {
KA_TRACE(20, ("__kmp_free_task_team: T#%d task_team = %p\n",
thread ? __kmp_gtid_from_thread(thread) : -1, task_team));
__kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
KMP_DEBUG_ASSERT(task_team->tt.tt_next == NULL);
task_team->tt.tt_next = __kmp_free_task_teams;
TCW_PTR(__kmp_free_task_teams, task_team);
__kmp_release_bootstrap_lock(&__kmp_task_team_lock);
}
void __kmp_reap_task_teams(void) {
kmp_task_team_t *task_team;
if (TCR_PTR(__kmp_free_task_teams) != NULL) {
__kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
while ((task_team = __kmp_free_task_teams) != NULL) {
__kmp_free_task_teams = task_team->tt.tt_next;
task_team->tt.tt_next = NULL;
if (task_team->tt.tt_threads_data != NULL) {
__kmp_free_task_threads_data(task_team);
}
if (task_team->tt.tt_task_pri_list != NULL) {
__kmp_free_task_pri_list(task_team);
}
__kmp_free(task_team);
}
__kmp_release_bootstrap_lock(&__kmp_task_team_lock);
}
}
void __kmp_push_task_team_node(kmp_info_t *thread, kmp_team_t *team) {
KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
kmp_task_team_list_t *current =
(kmp_task_team_list_t *)(&team->t.t_task_team[0]);
kmp_task_team_list_t *node =
(kmp_task_team_list_t *)__kmp_allocate(sizeof(kmp_task_team_list_t));
node->task_team = current->task_team;
node->next = current->next;
thread->th.th_task_team = current->task_team = NULL;
current->next = node;
}
void __kmp_pop_task_team_node(kmp_info_t *thread, kmp_team_t *team) {
KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
kmp_task_team_list_t *current =
(kmp_task_team_list_t *)(&team->t.t_task_team[0]);
if (current->task_team) {
__kmp_free_task_team(thread, current->task_team);
}
kmp_task_team_list_t *next = current->next;
if (next) {
current->task_team = next->task_team;
current->next = next->next;
KMP_DEBUG_ASSERT(next != current);
__kmp_free(next);
thread->th.th_task_team = current->task_team;
}
}
void __kmp_wait_to_unref_task_teams(void) {
kmp_info_t *thread;
kmp_uint32 spins;
kmp_uint64 time;
int done;
KMP_INIT_YIELD(spins);
KMP_INIT_BACKOFF(time);
for (;;) {
done = TRUE;
for (thread = CCAST(kmp_info_t *, __kmp_thread_pool); thread != NULL;
thread = thread->th.th_next_pool) {
#if KMP_OS_WINDOWS
DWORD exit_val;
#endif
if (TCR_PTR(thread->th.th_task_team) == NULL) {
KA_TRACE(10, ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
__kmp_gtid_from_thread(thread)));
continue;
}
#if KMP_OS_WINDOWS
if (!__kmp_is_thread_alive(thread, &exit_val)) {
thread->th.th_task_team = NULL;
continue;
}
#endif
done = FALSE;
KA_TRACE(10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to "
"unreference task_team\n",
__kmp_gtid_from_thread(thread)));
if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
void *sleep_loc;
if ((sleep_loc = TCR_PTR(CCAST(void *, thread->th.th_sleep_loc))) !=
NULL) {
KA_TRACE(
10,
("__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
__kmp_gtid_from_thread(thread), __kmp_gtid_from_thread(thread)));
__kmp_null_resume_wrapper(thread);
}
}
}
if (done) {
break;
}
KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
}
}
void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team) {
KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
if (team == this_thr->th.th_serial_team ||
team == this_thr->th.th_root->r.r_root_team) {
KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
if (team->t.t_task_team[0] == NULL) {
team->t.t_task_team[0] = __kmp_allocate_task_team(this_thr, team);
KA_TRACE(
20, ("__kmp_task_team_setup: Primary T#%d created new task_team %p"
" for serial/root team %p\n",
__kmp_gtid_from_thread(this_thr), team->t.t_task_team[0], team));
} else
__kmp_task_team_init(team->t.t_task_team[0], team);
return;
}
if (team->t.t_task_team[this_thr->th.th_task_state] == NULL) {
team->t.t_task_team[this_thr->th.th_task_state] =
__kmp_allocate_task_team(this_thr, team);
KA_TRACE(20, ("__kmp_task_team_setup: Primary T#%d created new task_team %p"
" for team %d at parity=%d\n",
__kmp_gtid_from_thread(this_thr),
team->t.t_task_team[this_thr->th.th_task_state], team->t.t_id,
this_thr->th.th_task_state));
}
int other_team = 1 - this_thr->th.th_task_state;
KMP_DEBUG_ASSERT(other_team >= 0 && other_team < 2);
if (team->t.t_task_team[other_team] == NULL) {
team->t.t_task_team[other_team] = __kmp_allocate_task_team(this_thr, team);
KA_TRACE(20, ("__kmp_task_team_setup: Primary T#%d created second new "
"task_team %p for team %d at parity=%d\n",
__kmp_gtid_from_thread(this_thr),
team->t.t_task_team[other_team], team->t.t_id, other_team));
} else {
kmp_task_team_t *task_team = team->t.t_task_team[other_team];
__kmp_task_team_init(task_team, team);
KA_TRACE(20, ("__kmp_task_team_setup: Primary T#%d reset next task_team "
"%p for team %d at parity=%d\n",
__kmp_gtid_from_thread(this_thr),
team->t.t_task_team[other_team], team->t.t_id, other_team));
}
if (this_thr == __kmp_hidden_helper_main_thread) {
for (int i = 0; i < 2; ++i) {
kmp_task_team_t *task_team = team->t.t_task_team[i];
if (KMP_TASKING_ENABLED(task_team)) {
continue;
}
__kmp_enable_tasking(task_team, this_thr);
for (int j = 0; j < task_team->tt.tt_nproc; ++j) {
kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[j];
if (thread_data->td.td_deque == NULL) {
__kmp_alloc_task_deque(__kmp_hidden_helper_threads[j], thread_data);
}
}
}
}
}
void __kmp_task_team_sync(kmp_info_t *this_thr, kmp_team_t *team) {
KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
KMP_DEBUG_ASSERT(team != this_thr->th.th_serial_team);
KMP_DEBUG_ASSERT(team != this_thr->th.th_root->r.r_root_team);
this_thr->th.th_task_state = (kmp_uint8)(1 - this_thr->th.th_task_state);
TCW_PTR(this_thr->th.th_task_team,
team->t.t_task_team[this_thr->th.th_task_state]);
KA_TRACE(20,
("__kmp_task_team_sync: Thread T#%d task team switched to task_team "
"%p from Team #%d (parity=%d)\n",
__kmp_gtid_from_thread(this_thr), this_thr->th.th_task_team,
team->t.t_id, this_thr->th.th_task_state));
}
void __kmp_task_team_wait(
kmp_info_t *this_thr,
kmp_team_t *team USE_ITT_BUILD_ARG(void *itt_sync_obj), int wait) {
kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
KMP_DEBUG_ASSERT(task_team == this_thr->th.th_task_team);
if ((task_team != NULL) && KMP_TASKING_ENABLED(task_team)) {
if (wait) {
KA_TRACE(20, ("__kmp_task_team_wait: Primary T#%d waiting for all tasks "
"(for unfinished_threads to reach 0) on task_team = %p\n",
__kmp_gtid_from_thread(this_thr), task_team));
kmp_flag_32<false, false> flag(
RCAST(std::atomic<kmp_uint32> *,
&task_team->tt.tt_unfinished_threads),
0U);
flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
}
KA_TRACE(
20,
("__kmp_task_team_wait: Primary T#%d deactivating task_team %p: "
"setting active to false, setting local and team's pointer to NULL\n",
__kmp_gtid_from_thread(this_thr), task_team));
TCW_SYNC_4(task_team->tt.tt_found_proxy_tasks, FALSE);
TCW_SYNC_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE);
KMP_CHECK_UPDATE(task_team->tt.tt_untied_task_encountered, 0);
TCW_SYNC_4(task_team->tt.tt_active, FALSE);
KMP_MB();
TCW_PTR(this_thr->th.th_task_team, NULL);
}
}
void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread, int gtid) {
std::atomic<kmp_uint32> *spin = RCAST(
std::atomic<kmp_uint32> *,
&team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads);
int flag = FALSE;
KMP_DEBUG_ASSERT(__kmp_tasking_mode == tskm_extra_barrier);
#if USE_ITT_BUILD
KMP_FSYNC_SPIN_INIT(spin, NULL);
#endif
kmp_flag_32<false, false> spin_flag(spin, 0U);
while (!spin_flag.execute_tasks(thread, gtid, TRUE,
&flag USE_ITT_BUILD_ARG(NULL), 0)) {
#if USE_ITT_BUILD
KMP_FSYNC_SPIN_PREPARE(RCAST(void *, spin));
#endif
if (TCR_4(__kmp_global.g.g_done)) {
if (__kmp_global.g.g_abort)
__kmp_abort_thread();
break;
}
KMP_YIELD(TRUE);
}
#if USE_ITT_BUILD
KMP_FSYNC_SPIN_ACQUIRED(RCAST(void *, spin));
#endif
}
static bool __kmp_give_task(kmp_info_t *thread, kmp_int32 tid, kmp_task_t *task,
kmp_int32 pass) {
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
kmp_task_team_t *task_team = taskdata->td_task_team;
KA_TRACE(20, ("__kmp_give_task: trying to give task %p to thread %d.\n",
taskdata, tid));
KMP_DEBUG_ASSERT(task_team != NULL);
bool result = false;
kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
if (thread_data->td.td_deque == NULL) {
KA_TRACE(30,
("__kmp_give_task: thread %d has no queue while giving task %p.\n",
tid, taskdata));
return result;
}
if (TCR_4(thread_data->td.td_deque_ntasks) >=
TASK_DEQUE_SIZE(thread_data->td)) {
KA_TRACE(
30,
("__kmp_give_task: queue is full while giving task %p to thread %d.\n",
taskdata, tid));
if (TASK_DEQUE_SIZE(thread_data->td) / INITIAL_TASK_DEQUE_SIZE >= pass)
return result;
__kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
if (TCR_4(thread_data->td.td_deque_ntasks) >=
TASK_DEQUE_SIZE(thread_data->td)) {
__kmp_realloc_task_deque(thread, thread_data);
}
} else {
__kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
if (TCR_4(thread_data->td.td_deque_ntasks) >=
TASK_DEQUE_SIZE(thread_data->td)) {
KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to "
"thread %d.\n",
taskdata, tid));
if (TASK_DEQUE_SIZE(thread_data->td) / INITIAL_TASK_DEQUE_SIZE >= pass)
goto release_and_exit;
__kmp_realloc_task_deque(thread, thread_data);
}
}
thread_data->td.td_deque[thread_data->td.td_deque_tail] = taskdata;
thread_data->td.td_deque_tail =
(thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
TCW_4(thread_data->td.td_deque_ntasks,
TCR_4(thread_data->td.td_deque_ntasks) + 1);
result = true;
KA_TRACE(30, ("__kmp_give_task: successfully gave task %p to thread %d.\n",
taskdata, tid));
release_and_exit:
__kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
return result;
}
#define PROXY_TASK_FLAG 0x40000000
- the top half is the one that can be done from a thread outside the team
- the bottom half must be run from a thread within the team
In order to run the bottom half the task gets queued back into one of the
threads of the team. Once the td_incomplete_child_task counter of the parent
is decremented the threads can leave the barriers. So, the bottom half needs
to be queued before the counter is decremented. The top half is therefore
divided in two parts:
- things that can be run before queuing the bottom half
- things that must be run after queuing the bottom half
This creates a second race as the bottom half can free the task before the
second top half is executed. To avoid this we use the
td_incomplete_child_task of the proxy task to synchronize the top and bottom
half. */
static void __kmp_first_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
taskdata->td_flags.complete = 1;
#if OMPX_TASKGRAPH
taskdata->td_flags.onced = 1;
#endif
if (taskdata->td_taskgroup)
KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count);
KMP_ATOMIC_OR(&taskdata->td_incomplete_child_tasks, PROXY_TASK_FLAG);
}
static void __kmp_second_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
#if KMP_DEBUG
kmp_int32 children = 0;
children = -1 +
#endif
KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks);
KMP_DEBUG_ASSERT(children >= 0);
KMP_ATOMIC_AND(&taskdata->td_incomplete_child_tasks, ~PROXY_TASK_FLAG);
}
static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask) {
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
kmp_info_t *thread = __kmp_threads[gtid];
KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
KMP_DEBUG_ASSERT(taskdata->td_flags.complete ==
1);
while ((KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks) &
PROXY_TASK_FLAG) > 0)
;
__kmp_release_deps(gtid, taskdata);
__kmp_free_task_and_ancestors(gtid, taskdata, thread);
}
@ingroup TASKING
@param gtid Global Thread ID of encountering thread
@param ptask Task which execution is completed
Execute the completion of a proxy task from a thread of that is part of the
team. Run first and bottom halves directly.
*/
void __kmpc_proxy_task_completed(kmp_int32 gtid, kmp_task_t *ptask) {
KMP_DEBUG_ASSERT(ptask != NULL);
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
KA_TRACE(
10, ("__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n",
gtid, taskdata));
__kmp_assert_valid_gtid(gtid);
KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
__kmp_first_top_half_finish_proxy(taskdata);
__kmp_second_top_half_finish_proxy(taskdata);
__kmp_bottom_half_finish_proxy(gtid, ptask);
KA_TRACE(10,
("__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n",
gtid, taskdata));
}
void __kmpc_give_task(kmp_task_t *ptask, kmp_int32 start = 0) {
KMP_DEBUG_ASSERT(ptask != NULL);
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
kmp_team_t *team = taskdata->td_team;
kmp_int32 nthreads = team->t.t_nproc;
kmp_info_t *thread;
kmp_int32 start_k = start % nthreads;
kmp_int32 pass = 1;
kmp_int32 k = start_k;
do {
thread = team->t.t_threads[k];
k = (k + 1) % nthreads;
if (k == start_k)
pass = pass << 1;
} while (!__kmp_give_task(thread, k, ptask, pass));
if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME && __kmp_wpolicy_passive) {
for (int i = 0; i < nthreads; ++i) {
thread = team->t.t_threads[i];
if (thread->th.th_sleep_loc != NULL) {
__kmp_null_resume_wrapper(thread);
break;
}
}
}
}
@ingroup TASKING
@param ptask Task which execution is completed
Execute the completion of a proxy task from a thread that could not belong to
the team.
*/
void __kmpc_proxy_task_completed_ooo(kmp_task_t *ptask) {
KMP_DEBUG_ASSERT(ptask != NULL);
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
KA_TRACE(
10,
("__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n",
taskdata));
KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
__kmp_first_top_half_finish_proxy(taskdata);
__kmpc_give_task(ptask);
__kmp_second_top_half_finish_proxy(taskdata);
KA_TRACE(
10,
("__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n",
taskdata));
}
kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, int gtid,
kmp_task_t *task) {
kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(task);
if (td->td_allow_completion_event.type == KMP_EVENT_UNINITIALIZED) {
td->td_allow_completion_event.type = KMP_EVENT_ALLOW_COMPLETION;
td->td_allow_completion_event.ed.task = task;
__kmp_init_tas_lock(&td->td_allow_completion_event.lock);
}
return &td->td_allow_completion_event;
}
void __kmp_fulfill_event(kmp_event_t *event) {
if (event->type == KMP_EVENT_ALLOW_COMPLETION) {
kmp_task_t *ptask = event->ed.task;
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
bool detached = false;
int gtid = __kmp_get_gtid();
__kmp_acquire_tas_lock(&event->lock, gtid);
if (taskdata->td_flags.proxy == TASK_PROXY) {
detached = true;
} else {
#if OMPT_SUPPORT
if (UNLIKELY(ompt_enabled.enabled))
__ompt_task_finish(ptask, NULL, ompt_task_early_fulfill);
#endif
}
event->type = KMP_EVENT_UNINITIALIZED;
__kmp_release_tas_lock(&event->lock, gtid);
if (detached) {
#if OMPT_SUPPORT
if (UNLIKELY(ompt_enabled.enabled))
__ompt_task_finish(ptask, NULL, ompt_task_late_fulfill);
#endif
if (gtid >= 0) {
kmp_team_t *team = taskdata->td_team;
kmp_info_t *thread = __kmp_get_thread();
if (thread->th.th_team == team) {
__kmpc_proxy_task_completed(gtid, ptask);
return;
}
}
__kmpc_proxy_task_completed_ooo(ptask);
}
}
}
kmp_task_t *__kmp_task_dup_alloc(kmp_info_t *thread, kmp_task_t *task_src
#if OMPX_TASKGRAPH
, int taskloop_recur
#endif
) {
kmp_task_t *task;
kmp_taskdata_t *taskdata;
kmp_taskdata_t *taskdata_src = KMP_TASK_TO_TASKDATA(task_src);
kmp_taskdata_t *parent_task = taskdata_src->td_parent;
size_t shareds_offset;
size_t task_size;
KA_TRACE(10, ("__kmp_task_dup_alloc(enter): Th %p, source task %p\n", thread,
task_src));
KMP_DEBUG_ASSERT(taskdata_src->td_flags.proxy ==
TASK_FULL);
KMP_DEBUG_ASSERT(taskdata_src->td_flags.tasktype == TASK_EXPLICIT);
task_size = taskdata_src->td_size_alloc;
KA_TRACE(30, ("__kmp_task_dup_alloc: Th %p, malloc size %ld\n", thread,
task_size));
#if USE_FAST_MEMORY
taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, task_size);
#else
taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, task_size);
#endif
KMP_MEMCPY(taskdata, taskdata_src, task_size);
task = KMP_TASKDATA_TO_TASK(taskdata);
#if OMPX_TASKGRAPH
if (!taskdata->is_taskgraph || taskloop_recur)
taskdata->td_task_id = KMP_GEN_TASK_ID();
else if (taskdata->is_taskgraph &&
__kmp_tdg_is_recording(taskdata_src->tdg->tdg_status))
taskdata->td_task_id = KMP_ATOMIC_INC(&__kmp_tdg_task_id);
#else
taskdata->td_task_id = KMP_GEN_TASK_ID();
#endif
if (task->shareds != NULL) {
shareds_offset = (char *)task_src->shareds - (char *)taskdata_src;
task->shareds = &((char *)taskdata)[shareds_offset];
KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (sizeof(void *) - 1)) ==
0);
}
taskdata->td_alloc_thread = thread;
taskdata->td_parent = parent_task;
taskdata->td_taskgroup = parent_task->td_taskgroup;
if (taskdata->td_flags.tiedness == TASK_TIED)
taskdata->td_last_tied = taskdata;
if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) {
KMP_ATOMIC_INC(&parent_task->td_incomplete_child_tasks);
if (parent_task->td_taskgroup)
KMP_ATOMIC_INC(&parent_task->td_taskgroup->count);
if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT)
KMP_ATOMIC_INC(&taskdata->td_parent->td_allocated_child_tasks);
}
KA_TRACE(20,
("__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n",
thread, taskdata, taskdata->td_parent));
#if OMPT_SUPPORT
if (UNLIKELY(ompt_enabled.enabled))
__ompt_task_init(taskdata, thread->th.th_info.ds.ds_gtid);
#endif
return task;
}
typedef void (*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32);
KMP_BUILD_ASSERT(sizeof(long) == 4 || sizeof(long) == 8);
class kmp_taskloop_bounds_t {
kmp_task_t *task;
const kmp_taskdata_t *taskdata;
size_t lower_offset;
size_t upper_offset;
public:
kmp_taskloop_bounds_t(kmp_task_t *_task, kmp_uint64 *lb, kmp_uint64 *ub)
: task(_task), taskdata(KMP_TASK_TO_TASKDATA(task)),
lower_offset((char *)lb - (char *)task),
upper_offset((char *)ub - (char *)task) {
KMP_DEBUG_ASSERT((char *)lb > (char *)_task);
KMP_DEBUG_ASSERT((char *)ub > (char *)_task);
}
kmp_taskloop_bounds_t(kmp_task_t *_task, const kmp_taskloop_bounds_t &bounds)
: task(_task), taskdata(KMP_TASK_TO_TASKDATA(_task)),
lower_offset(bounds.lower_offset), upper_offset(bounds.upper_offset) {}
size_t get_lower_offset() const { return lower_offset; }
size_t get_upper_offset() const { return upper_offset; }
kmp_uint64 get_lb() const {
kmp_int64 retval;
#if defined(KMP_GOMP_COMPAT)
if (!taskdata->td_flags.native) {
retval = *(kmp_int64 *)((char *)task + lower_offset);
} else {
if (taskdata->td_size_loop_bounds == 4) {
kmp_int32 *lb = RCAST(kmp_int32 *, task->shareds);
retval = (kmp_int64)*lb;
} else {
kmp_int64 *lb = RCAST(kmp_int64 *, task->shareds);
retval = (kmp_int64)*lb;
}
}
#else
(void)taskdata;
retval = *(kmp_int64 *)((char *)task + lower_offset);
#endif
return retval;
}
kmp_uint64 get_ub() const {
kmp_int64 retval;
#if defined(KMP_GOMP_COMPAT)
if (!taskdata->td_flags.native) {
retval = *(kmp_int64 *)((char *)task + upper_offset);
} else {
if (taskdata->td_size_loop_bounds == 4) {
kmp_int32 *ub = RCAST(kmp_int32 *, task->shareds) + 1;
retval = (kmp_int64)*ub;
} else {
kmp_int64 *ub = RCAST(kmp_int64 *, task->shareds) + 1;
retval = (kmp_int64)*ub;
}
}
#else
retval = *(kmp_int64 *)((char *)task + upper_offset);
#endif
return retval;
}
void set_lb(kmp_uint64 lb) {
#if defined(KMP_GOMP_COMPAT)
if (!taskdata->td_flags.native) {
*(kmp_uint64 *)((char *)task + lower_offset) = lb;
} else {
if (taskdata->td_size_loop_bounds == 4) {
kmp_uint32 *lower = RCAST(kmp_uint32 *, task->shareds);
*lower = (kmp_uint32)lb;
} else {
kmp_uint64 *lower = RCAST(kmp_uint64 *, task->shareds);
*lower = (kmp_uint64)lb;
}
}
#else
*(kmp_uint64 *)((char *)task + lower_offset) = lb;
#endif
}
void set_ub(kmp_uint64 ub) {
#if defined(KMP_GOMP_COMPAT)
if (!taskdata->td_flags.native) {
*(kmp_uint64 *)((char *)task + upper_offset) = ub;
} else {
if (taskdata->td_size_loop_bounds == 4) {
kmp_uint32 *upper = RCAST(kmp_uint32 *, task->shareds) + 1;
*upper = (kmp_uint32)ub;
} else {
kmp_uint64 *upper = RCAST(kmp_uint64 *, task->shareds) + 1;
*upper = (kmp_uint64)ub;
}
}
#else
*(kmp_uint64 *)((char *)task + upper_offset) = ub;
#endif
}
};
void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task,
kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
kmp_uint64 ub_glob, kmp_uint64 num_tasks,
kmp_uint64 grainsize, kmp_uint64 extras,
kmp_int64 last_chunk, kmp_uint64 tc,
#if OMPT_SUPPORT
void *codeptr_ra,
#endif
void *task_dup) {
KMP_COUNT_BLOCK(OMP_TASKLOOP);
KMP_TIME_PARTITIONED_BLOCK(OMP_taskloop_scheduling);
p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
kmp_taskloop_bounds_t task_bounds(task, lb, ub);
kmp_uint64 lower = task_bounds.get_lb();
kmp_uint64 upper = task_bounds.get_ub();
kmp_uint64 i;
kmp_info_t *thread = __kmp_threads[gtid];
kmp_taskdata_t *current_task = thread->th.th_current_task;
kmp_task_t *next_task;
kmp_int32 lastpriv = 0;
KMP_DEBUG_ASSERT(tc == num_tasks * grainsize +
(last_chunk < 0 ? last_chunk : extras));
KMP_DEBUG_ASSERT(num_tasks > extras);
KMP_DEBUG_ASSERT(num_tasks > 0);
KA_TRACE(20, ("__kmp_taskloop_linear: T#%d: %lld tasks, grainsize %lld, "
"extras %lld, last_chunk %lld, i=%lld,%lld(%d)%lld, dup %p\n",
gtid, num_tasks, grainsize, extras, last_chunk, lower, upper,
ub_glob, st, task_dup));
for (i = 0; i < num_tasks; ++i) {
kmp_uint64 chunk_minus_1;
if (extras == 0) {
chunk_minus_1 = grainsize - 1;
} else {
chunk_minus_1 = grainsize;
--extras;
}
upper = lower + st * chunk_minus_1;
if (upper > *ub) {
upper = *ub;
}
if (i == num_tasks - 1) {
if (st == 1) {
KMP_DEBUG_ASSERT(upper == *ub);
if (upper == ub_glob)
lastpriv = 1;
} else if (st > 0) {
KMP_DEBUG_ASSERT((kmp_uint64)st > *ub - upper);
if ((kmp_uint64)st > ub_glob - upper)
lastpriv = 1;
} else {
KMP_DEBUG_ASSERT(upper + st < *ub);
if (upper - ub_glob < (kmp_uint64)(-st))
lastpriv = 1;
}
}
#if OMPX_TASKGRAPH
next_task = __kmp_task_dup_alloc(thread, task, 0);
#else
next_task = __kmp_task_dup_alloc(thread, task);
#endif
kmp_taskdata_t *next_taskdata = KMP_TASK_TO_TASKDATA(next_task);
kmp_taskloop_bounds_t next_task_bounds =
kmp_taskloop_bounds_t(next_task, task_bounds);
next_task_bounds.set_lb(lower);
if (next_taskdata->td_flags.native) {
next_task_bounds.set_ub(upper + (st > 0 ? 1 : -1));
} else {
next_task_bounds.set_ub(upper);
}
if (ptask_dup != NULL)
ptask_dup(next_task, task, lastpriv);
KA_TRACE(40,
("__kmp_taskloop_linear: T#%d; task #%llu: task %p: lower %lld, "
"upper %lld stride %lld, (offsets %p %p)\n",
gtid, i, next_task, lower, upper, st,
next_task_bounds.get_lower_offset(),
next_task_bounds.get_upper_offset()));
#if OMPT_SUPPORT
__kmp_omp_taskloop_task(NULL, gtid, next_task,
codeptr_ra);
#if OMPT_OPTIONAL
if (ompt_enabled.ompt_callback_dispatch) {
OMPT_GET_DISPATCH_CHUNK(next_taskdata->ompt_task_info.dispatch_chunk,
lower, upper, st);
}
#endif
#else
__kmp_omp_task(gtid, next_task, true);
#endif
lower = upper + st;
}
__kmp_task_start(gtid, task, current_task);
__kmp_task_finish<false>(gtid, task, current_task);
}
typedef struct __taskloop_params {
kmp_task_t *task;
kmp_uint64 *lb;
kmp_uint64 *ub;
void *task_dup;
kmp_int64 st;
kmp_uint64 ub_glob;
kmp_uint64 num_tasks;
kmp_uint64 grainsize;
kmp_uint64 extras;
kmp_int64 last_chunk;
kmp_uint64 tc;
kmp_uint64 num_t_min;
#if OMPT_SUPPORT
void *codeptr_ra;
#endif
} __taskloop_params_t;
void __kmp_taskloop_recur(ident_t *, int, kmp_task_t *, kmp_uint64 *,
kmp_uint64 *, kmp_int64, kmp_uint64, kmp_uint64,
kmp_uint64, kmp_uint64, kmp_int64, kmp_uint64,
kmp_uint64,
#if OMPT_SUPPORT
void *,
#endif
void *);
int __kmp_taskloop_task(int gtid, void *ptask) {
__taskloop_params_t *p =
(__taskloop_params_t *)((kmp_task_t *)ptask)->shareds;
kmp_task_t *task = p->task;
kmp_uint64 *lb = p->lb;
kmp_uint64 *ub = p->ub;
void *task_dup = p->task_dup;
kmp_int64 st = p->st;
kmp_uint64 ub_glob = p->ub_glob;
kmp_uint64 num_tasks = p->num_tasks;
kmp_uint64 grainsize = p->grainsize;
kmp_uint64 extras = p->extras;
kmp_int64 last_chunk = p->last_chunk;
kmp_uint64 tc = p->tc;
kmp_uint64 num_t_min = p->num_t_min;
#if OMPT_SUPPORT
void *codeptr_ra = p->codeptr_ra;
#endif
#if KMP_DEBUG
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
KMP_DEBUG_ASSERT(task != NULL);
KA_TRACE(20,
("__kmp_taskloop_task: T#%d, task %p: %lld tasks, grainsize"
" %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n",
gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *lb, *ub,
st, task_dup));
#endif
KMP_DEBUG_ASSERT(num_tasks * 2 + 1 > num_t_min);
if (num_tasks > num_t_min)
__kmp_taskloop_recur(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks,
grainsize, extras, last_chunk, tc, num_t_min,
#if OMPT_SUPPORT
codeptr_ra,
#endif
task_dup);
else
__kmp_taskloop_linear(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks,
grainsize, extras, last_chunk, tc,
#if OMPT_SUPPORT
codeptr_ra,
#endif
task_dup);
KA_TRACE(40, ("__kmp_taskloop_task(exit): T#%d\n", gtid));
return 0;
}
void __kmp_taskloop_recur(ident_t *loc, int gtid, kmp_task_t *task,
kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
kmp_uint64 ub_glob, kmp_uint64 num_tasks,
kmp_uint64 grainsize, kmp_uint64 extras,
kmp_int64 last_chunk, kmp_uint64 tc,
kmp_uint64 num_t_min,
#if OMPT_SUPPORT
void *codeptr_ra,
#endif
void *task_dup) {
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
KMP_DEBUG_ASSERT(task != NULL);
KMP_DEBUG_ASSERT(num_tasks > num_t_min);
KA_TRACE(20,
("__kmp_taskloop_recur: T#%d, task %p: %lld tasks, grainsize"
" %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n",
gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *lb, *ub,
st, task_dup));
p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
kmp_uint64 lower = *lb;
kmp_info_t *thread = __kmp_threads[gtid];
kmp_task_t *next_task;
size_t lower_offset =
(char *)lb - (char *)task;
size_t upper_offset =
(char *)ub - (char *)task;
KMP_DEBUG_ASSERT(tc == num_tasks * grainsize +
(last_chunk < 0 ? last_chunk : extras));
KMP_DEBUG_ASSERT(num_tasks > extras);
KMP_DEBUG_ASSERT(num_tasks > 0);
kmp_uint64 lb1, ub0, tc0, tc1, ext0, ext1;
kmp_int64 last_chunk0 = 0, last_chunk1 = 0;
kmp_uint64 gr_size0 = grainsize;
kmp_uint64 n_tsk0 = num_tasks >> 1;
kmp_uint64 n_tsk1 = num_tasks - n_tsk0;
if (last_chunk < 0) {
ext0 = ext1 = 0;
last_chunk1 = last_chunk;
tc0 = grainsize * n_tsk0;
tc1 = tc - tc0;
} else if (n_tsk0 <= extras) {
gr_size0++;
ext0 = 0;
ext1 = extras - n_tsk0;
tc0 = gr_size0 * n_tsk0;
tc1 = tc - tc0;
} else {
ext1 = 0;
ext0 = extras;
tc1 = grainsize * n_tsk1;
tc0 = tc - tc1;
}
ub0 = lower + st * (tc0 - 1);
lb1 = ub0 + st;
#if OMPX_TASKGRAPH
next_task = __kmp_task_dup_alloc(thread, task,
1);
#else
next_task = __kmp_task_dup_alloc(thread, task);
#endif
*(kmp_uint64 *)((char *)next_task + lower_offset) = lb1;
if (ptask_dup != NULL)
ptask_dup(next_task, task, 0);
*ub = ub0;
kmp_taskdata_t *current_task = thread->th.th_current_task;
thread->th.th_current_task = taskdata->td_parent;
kmp_task_t *new_task =
__kmpc_omp_task_alloc(loc, gtid, 1, 3 * sizeof(void *),
sizeof(__taskloop_params_t), &__kmp_taskloop_task);
thread->th.th_current_task = current_task;
__taskloop_params_t *p = (__taskloop_params_t *)new_task->shareds;
p->task = next_task;
p->lb = (kmp_uint64 *)((char *)next_task + lower_offset);
p->ub = (kmp_uint64 *)((char *)next_task + upper_offset);
p->task_dup = task_dup;
p->st = st;
p->ub_glob = ub_glob;
p->num_tasks = n_tsk1;
p->grainsize = grainsize;
p->extras = ext1;
p->last_chunk = last_chunk1;
p->tc = tc1;
p->num_t_min = num_t_min;
#if OMPT_SUPPORT
p->codeptr_ra = codeptr_ra;
#endif
#if OMPX_TASKGRAPH
kmp_taskdata_t *new_task_data = KMP_TASK_TO_TASKDATA(new_task);
new_task_data->tdg = taskdata->tdg;
new_task_data->is_taskgraph = 0;
#endif
#if OMPT_SUPPORT
__kmp_omp_taskloop_task(NULL, gtid, new_task, codeptr_ra);
#else
__kmp_omp_task(gtid, new_task, true);
#endif
if (n_tsk0 > num_t_min)
__kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0, gr_size0,
ext0, last_chunk0, tc0, num_t_min,
#if OMPT_SUPPORT
codeptr_ra,
#endif
task_dup);
else
__kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0,
gr_size0, ext0, last_chunk0, tc0,
#if OMPT_SUPPORT
codeptr_ra,
#endif
task_dup);
KA_TRACE(40, ("__kmp_taskloop_recur(exit): T#%d\n", gtid));
}
static void __kmp_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
int nogroup, int sched, kmp_uint64 grainsize,
int modifier, void *task_dup) {
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
KMP_DEBUG_ASSERT(task != NULL);
if (nogroup == 0) {
#if OMPT_SUPPORT && OMPT_OPTIONAL
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
__kmpc_taskgroup(loc, gtid);
}
#if OMPX_TASKGRAPH
KMP_ATOMIC_DEC(&__kmp_tdg_task_id);
#endif
kmp_taskloop_bounds_t task_bounds(task, lb, ub);
kmp_uint64 tc;
kmp_uint64 lower = task_bounds.get_lb();
kmp_uint64 upper = task_bounds.get_ub();
kmp_uint64 ub_glob = upper;
kmp_uint64 num_tasks = 0, extras = 0;
kmp_int64 last_chunk =
0;
kmp_uint64 num_tasks_min = __kmp_taskloop_min_tasks;
kmp_info_t *thread = __kmp_threads[gtid];
kmp_taskdata_t *current_task = thread->th.th_current_task;
KA_TRACE(20, ("__kmp_taskloop: T#%d, task %p, lb %lld, ub %lld, st %lld, "
"grain %llu(%d, %d), dup %p\n",
gtid, taskdata, lower, upper, st, grainsize, sched, modifier,
task_dup));
if (st == 1) {
tc = upper - lower + 1;
} else if (st < 0) {
tc = (lower - upper) / (-st) + 1;
} else {
tc = (upper - lower) / st + 1;
}
if (tc == 0) {
KA_TRACE(20, ("__kmp_taskloop(exit): T#%d zero-trip loop\n", gtid));
__kmp_task_start(gtid, task, current_task);
__kmp_task_finish<false>(gtid, task, current_task);
return;
}
#if OMPT_SUPPORT && OMPT_OPTIONAL
ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
if (ompt_enabled.ompt_callback_work) {
ompt_callbacks.ompt_callback(ompt_callback_work)(
ompt_work_taskloop, ompt_scope_begin, &(team_info->parallel_data),
&(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0));
}
#endif
if (num_tasks_min == 0)
num_tasks_min =
KMP_MIN(thread->th.th_team_nproc * 10, INITIAL_TASK_DEQUE_SIZE);
switch (sched) {
case 0:
grainsize = thread->th.th_team_nproc * 10;
KMP_FALLTHROUGH();
case 2:
if (grainsize > tc) {
num_tasks = tc;
grainsize = 1;
extras = 0;
} else {
num_tasks = grainsize;
grainsize = tc / num_tasks;
extras = tc % num_tasks;
}
break;
case 1:
if (grainsize > tc) {
num_tasks = 1;
grainsize = tc;
extras = 0;
} else {
if (modifier) {
num_tasks = (tc + grainsize - 1) / grainsize;
last_chunk = tc - (num_tasks * grainsize);
extras = 0;
} else {
num_tasks = tc / grainsize;
grainsize = tc / num_tasks;
extras = tc % num_tasks;
}
}
break;
default:
KMP_ASSERT2(0, "unknown scheduling of taskloop");
}
KMP_DEBUG_ASSERT(tc == num_tasks * grainsize +
(last_chunk < 0 ? last_chunk : extras));
KMP_DEBUG_ASSERT(num_tasks > extras);
KMP_DEBUG_ASSERT(num_tasks > 0);
if (if_val == 0) {
taskdata->td_flags.task_serial = 1;
taskdata->td_flags.tiedness = TASK_TIED;
__kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
grainsize, extras, last_chunk, tc,
#if OMPT_SUPPORT
OMPT_GET_RETURN_ADDRESS(0),
#endif
task_dup);
} else if (num_tasks > num_tasks_min && !taskdata->td_flags.native) {
KA_TRACE(20, ("__kmp_taskloop: T#%d, go recursive: tc %llu, #tasks %llu"
"(%lld), grain %llu, extras %llu, last_chunk %lld\n",
gtid, tc, num_tasks, num_tasks_min, grainsize, extras,
last_chunk));
__kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
grainsize, extras, last_chunk, tc, num_tasks_min,
#if OMPT_SUPPORT
OMPT_GET_RETURN_ADDRESS(0),
#endif
task_dup);
} else {
KA_TRACE(20, ("__kmp_taskloop: T#%d, go linear: tc %llu, #tasks %llu"
"(%lld), grain %llu, extras %llu, last_chunk %lld\n",
gtid, tc, num_tasks, num_tasks_min, grainsize, extras,
last_chunk));
__kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
grainsize, extras, last_chunk, tc,
#if OMPT_SUPPORT
OMPT_GET_RETURN_ADDRESS(0),
#endif
task_dup);
}
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.ompt_callback_work) {
ompt_callbacks.ompt_callback(ompt_callback_work)(
ompt_work_taskloop, ompt_scope_end, &(team_info->parallel_data),
&(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0));
}
#endif
if (nogroup == 0) {
#if OMPT_SUPPORT && OMPT_OPTIONAL
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
__kmpc_end_taskgroup(loc, gtid);
}
KA_TRACE(20, ("__kmp_taskloop(exit): T#%d\n", gtid));
}
@ingroup TASKING
@param loc Source location information
@param gtid Global thread ID
@param task Task structure
@param if_val Value of the if clause
@param lb Pointer to loop lower bound in task structure
@param ub Pointer to loop upper bound in task structure
@param st Loop stride
@param nogroup Flag, 1 if nogroup clause specified, 0 otherwise
@param sched Schedule specified 0/1/2 for none/grainsize/num_tasks
@param grainsize Schedule value if specified
@param task_dup Tasks duplication routine
Execute the taskloop construct.
*/
void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup,
int sched, kmp_uint64 grainsize, void *task_dup) {
__kmp_assert_valid_gtid(gtid);
KA_TRACE(20, ("__kmpc_taskloop(enter): T#%d\n", gtid));
__kmp_taskloop(loc, gtid, task, if_val, lb, ub, st, nogroup, sched, grainsize,
0, task_dup);
KA_TRACE(20, ("__kmpc_taskloop(exit): T#%d\n", gtid));
}
@ingroup TASKING
@param loc Source location information
@param gtid Global thread ID
@param task Task structure
@param if_val Value of the if clause
@param lb Pointer to loop lower bound in task structure
@param ub Pointer to loop upper bound in task structure
@param st Loop stride
@param nogroup Flag, 1 if nogroup clause specified, 0 otherwise
@param sched Schedule specified 0/1/2 for none/grainsize/num_tasks
@param grainsize Schedule value if specified
@param modifier Modifier 'strict' for sched, 1 if present, 0 otherwise
@param task_dup Tasks duplication routine
Execute the taskloop construct.
*/
void __kmpc_taskloop_5(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
int nogroup, int sched, kmp_uint64 grainsize,
int modifier, void *task_dup) {
__kmp_assert_valid_gtid(gtid);
KA_TRACE(20, ("__kmpc_taskloop_5(enter): T#%d\n", gtid));
__kmp_taskloop(loc, gtid, task, if_val, lb, ub, st, nogroup, sched, grainsize,
modifier, task_dup);
KA_TRACE(20, ("__kmpc_taskloop_5(exit): T#%d\n", gtid));
}
@ingroup TASKING
@param gtid Global Thread ID of current thread
@return Returns a pointer to the thread's current task async handle. If no task
is present or gtid is invalid, returns NULL.
Acqurires a pointer to the target async handle from the current task.
*/
void **__kmpc_omp_get_target_async_handle_ptr(kmp_int32 gtid) {
if (gtid == KMP_GTID_DNE)
return NULL;
kmp_info_t *thread = __kmp_thread_from_gtid(gtid);
kmp_taskdata_t *taskdata = thread->th.th_current_task;
if (!taskdata)
return NULL;
return &taskdata->td_target_data.async_handle;
}
@ingroup TASKING
@param gtid Global Thread ID of current thread
@return Returns TRUE if the current task being executed of the given thread has
a task team allocated to it. Otherwise, returns FALSE.
Checks if the current thread has a task team.
*/
bool __kmpc_omp_has_task_team(kmp_int32 gtid) {
if (gtid == KMP_GTID_DNE)
return FALSE;
kmp_info_t *thread = __kmp_thread_from_gtid(gtid);
kmp_taskdata_t *taskdata = thread->th.th_current_task;
if (!taskdata)
return FALSE;
return taskdata->td_task_team != NULL;
}
#if OMPX_TASKGRAPH
static kmp_tdg_info_t *__kmp_find_tdg(kmp_int32 tdg_id) {
kmp_tdg_info_t *res = nullptr;
if (__kmp_max_tdgs == 0)
return res;
if (__kmp_global_tdgs == NULL)
__kmp_global_tdgs = (kmp_tdg_info_t **)__kmp_allocate(
sizeof(kmp_tdg_info_t *) * __kmp_max_tdgs);
if ((__kmp_global_tdgs[tdg_id]) &&
(__kmp_global_tdgs[tdg_id]->tdg_status != KMP_TDG_NONE))
res = __kmp_global_tdgs[tdg_id];
return res;
}
void __kmp_print_tdg_dot(kmp_tdg_info_t *tdg) {
kmp_int32 tdg_id = tdg->tdg_id;
KA_TRACE(10, ("__kmp_print_tdg_dot(enter): T#%d tdg_id=%d \n", gtid, tdg_id));
char file_name[20];
sprintf(file_name, "tdg_%d.dot", tdg_id);
kmp_safe_raii_file_t tdg_file(file_name, "w");
kmp_int32 num_tasks = KMP_ATOMIC_LD_RLX(&tdg->num_tasks);
fprintf(tdg_file,
"digraph TDG {\n"
" compound=true\n"
" subgraph cluster {\n"
" label=TDG_%d\n",
tdg_id);
for (kmp_int32 i = 0; i < num_tasks; i++) {
fprintf(tdg_file, " %d[style=bold]\n", i);
}
fprintf(tdg_file, " }\n");
for (kmp_int32 i = 0; i < num_tasks; i++) {
kmp_int32 nsuccessors = tdg->record_map[i].nsuccessors;
kmp_int32 *successors = tdg->record_map[i].successors;
if (nsuccessors > 0) {
for (kmp_int32 j = 0; j < nsuccessors; j++)
fprintf(tdg_file, " %d -> %d \n", i, successors[j]);
}
}
fprintf(tdg_file, "}");
KA_TRACE(10, ("__kmp_print_tdg_dot(exit): T#%d tdg_id=%d \n", gtid, tdg_id));
}
void __kmp_exec_tdg(kmp_int32 gtid, kmp_tdg_info_t *tdg) {
KMP_DEBUG_ASSERT(tdg->tdg_status == KMP_TDG_READY);
KA_TRACE(10, ("__kmp_exec_tdg(enter): T#%d tdg_id=%d num_roots=%d\n", gtid,
tdg->tdg_id, tdg->num_roots));
kmp_node_info_t *this_record_map = tdg->record_map;
kmp_int32 *this_root_tasks = tdg->root_tasks;
kmp_int32 this_num_roots = tdg->num_roots;
kmp_int32 this_num_tasks = KMP_ATOMIC_LD_RLX(&tdg->num_tasks);
kmp_info_t *thread = __kmp_threads[gtid];
kmp_taskdata_t *parent_task = thread->th.th_current_task;
if (tdg->rec_taskred_data) {
__kmpc_taskred_init(gtid, tdg->rec_num_taskred, tdg->rec_taskred_data);
}
for (kmp_int32 j = 0; j < this_num_tasks; j++) {
kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(this_record_map[j].task);
td->td_parent = parent_task;
this_record_map[j].parent_task = parent_task;
kmp_taskgroup_t *parent_taskgroup =
this_record_map[j].parent_task->td_taskgroup;
KMP_ATOMIC_ST_RLX(&this_record_map[j].npredecessors_counter,
this_record_map[j].npredecessors);
KMP_ATOMIC_INC(&this_record_map[j].parent_task->td_incomplete_child_tasks);
if (parent_taskgroup) {
KMP_ATOMIC_INC(&parent_taskgroup->count);
td->td_taskgroup = parent_taskgroup;
} else if (td->td_taskgroup != nullptr) {
td->td_taskgroup = nullptr;
}
if (this_record_map[j].parent_task->td_flags.tasktype == TASK_EXPLICIT)
KMP_ATOMIC_INC(&this_record_map[j].parent_task->td_allocated_child_tasks);
}
for (kmp_int32 j = 0; j < this_num_roots; ++j) {
__kmp_omp_task(gtid, this_record_map[this_root_tasks[j]].task, true);
}
KA_TRACE(10, ("__kmp_exec_tdg(exit): T#%d tdg_id=%d num_roots=%d\n", gtid,
tdg->tdg_id, tdg->num_roots));
}
static inline void __kmp_start_record(kmp_int32 gtid,
kmp_taskgraph_flags_t *flags,
kmp_int32 tdg_id) {
kmp_tdg_info_t *tdg =
(kmp_tdg_info_t *)__kmp_allocate(sizeof(kmp_tdg_info_t));
__kmp_global_tdgs[__kmp_curr_tdg_idx] = tdg;
tdg->tdg_id = tdg_id;
tdg->map_size = INIT_MAPSIZE;
tdg->num_roots = -1;
tdg->root_tasks = nullptr;
tdg->tdg_status = KMP_TDG_RECORDING;
tdg->rec_num_taskred = 0;
tdg->rec_taskred_data = nullptr;
KMP_ATOMIC_ST_RLX(&tdg->num_tasks, 0);
kmp_node_info_t *this_record_map =
(kmp_node_info_t *)__kmp_allocate(INIT_MAPSIZE * sizeof(kmp_node_info_t));
for (kmp_int32 i = 0; i < INIT_MAPSIZE; i++) {
kmp_int32 *successorsList =
(kmp_int32 *)__kmp_allocate(__kmp_successors_size * sizeof(kmp_int32));
this_record_map[i].task = nullptr;
this_record_map[i].successors = successorsList;
this_record_map[i].nsuccessors = 0;
this_record_map[i].npredecessors = 0;
this_record_map[i].successors_size = __kmp_successors_size;
KMP_ATOMIC_ST_RLX(&this_record_map[i].npredecessors_counter, 0);
}
__kmp_global_tdgs[__kmp_curr_tdg_idx]->record_map = this_record_map;
}
kmp_int32 __kmpc_start_record_task(ident_t *loc_ref, kmp_int32 gtid,
kmp_int32 input_flags, kmp_int32 tdg_id) {
kmp_int32 res;
kmp_taskgraph_flags_t *flags = (kmp_taskgraph_flags_t *)&input_flags;
KA_TRACE(10,
("__kmpc_start_record_task(enter): T#%d loc=%p flags=%d tdg_id=%d\n",
gtid, loc_ref, input_flags, tdg_id));
if (__kmp_max_tdgs == 0) {
KA_TRACE(
10,
("__kmpc_start_record_task(abandon): T#%d loc=%p flags=%d tdg_id = %d, "
"__kmp_max_tdgs = 0\n",
gtid, loc_ref, input_flags, tdg_id));
return 1;
}
__kmpc_taskgroup(loc_ref, gtid);
if (kmp_tdg_info_t *tdg = __kmp_find_tdg(tdg_id)) {
__kmp_exec_tdg(gtid, tdg);
res = 0;
} else {
__kmp_curr_tdg_idx = tdg_id;
KMP_DEBUG_ASSERT(__kmp_curr_tdg_idx < __kmp_max_tdgs);
__kmp_start_record(gtid, flags, tdg_id);
__kmp_num_tdg++;
res = 1;
}
KA_TRACE(10, ("__kmpc_start_record_task(exit): T#%d TDG %d starts to %s\n",
gtid, tdg_id, res ? "record" : "execute"));
return res;
}
void __kmp_end_record(kmp_int32 gtid, kmp_tdg_info_t *tdg) {
kmp_node_info_t *this_record_map = tdg->record_map;
kmp_int32 this_num_tasks = KMP_ATOMIC_LD_RLX(&tdg->num_tasks);
kmp_int32 *this_root_tasks =
(kmp_int32 *)__kmp_allocate(this_num_tasks * sizeof(kmp_int32));
kmp_int32 this_map_size = tdg->map_size;
kmp_int32 this_num_roots = 0;
kmp_info_t *thread = __kmp_threads[gtid];
for (kmp_int32 i = 0; i < this_num_tasks; i++) {
if (this_record_map[i].npredecessors == 0) {
this_root_tasks[this_num_roots++] = i;
}
}
tdg->map_size = this_map_size;
tdg->num_roots = this_num_roots;
tdg->root_tasks = this_root_tasks;
KMP_DEBUG_ASSERT(tdg->tdg_status == KMP_TDG_RECORDING);
tdg->tdg_status = KMP_TDG_READY;
if (thread->th.th_current_task->td_dephash) {
__kmp_dephash_free(thread, thread->th.th_current_task->td_dephash);
thread->th.th_current_task->td_dephash = NULL;
}
for (kmp_int32 i = 0; i < this_num_tasks; i++) {
KMP_ATOMIC_ST_RLX(&this_record_map[i].npredecessors_counter,
this_record_map[i].npredecessors);
}
KMP_ATOMIC_ST_RLX(&__kmp_tdg_task_id, 0);
if (__kmp_tdg_dot)
__kmp_print_tdg_dot(tdg);
}
void __kmpc_end_record_task(ident_t *loc_ref, kmp_int32 gtid,
kmp_int32 input_flags, kmp_int32 tdg_id) {
kmp_tdg_info_t *tdg = __kmp_find_tdg(tdg_id);
KA_TRACE(10, ("__kmpc_end_record_task(enter): T#%d loc=%p finishes recording"
" tdg=%d with flags=%d\n",
gtid, loc_ref, tdg_id, input_flags));
if (__kmp_max_tdgs) {
__kmpc_end_taskgroup(loc_ref, gtid);
if (__kmp_tdg_is_recording(tdg->tdg_status))
__kmp_end_record(gtid, tdg);
}
KA_TRACE(10, ("__kmpc_end_record_task(exit): T#%d loc=%p finished recording"
" tdg=%d, its status is now READY\n",
gtid, loc_ref, tdg_id));
}
#endif