* kmp_csupport.cpp -- kfront linkage support for OpenMP.
*/
#define __KMP_IMP
#include "omp.h"
#include "kmp.h"
#include "kmp_error.h"
#include "kmp_i18n.h"
#include "kmp_itt.h"
#include "kmp_lock.h"
#include "kmp_stats.h"
#include "kmp_utils.h"
#include "ompt-specific.h"
#define MAX_MESSAGE 512
* @ingroup STARTUP_SHUTDOWN
* @param loc in source location information
* @param flags in for future use (currently ignored)
*
* Initialize the runtime library. This call is optional; if it is not made then
* it will be implicitly called by attempts to use other library functions.
*/
void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
char *env;
if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
__kmp_str_match_true(env)) {
__kmp_middle_initialize();
__kmp_assign_root_init_mask();
KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
} else if (__kmp_ignore_mppbeg() == FALSE) {
__kmp_internal_begin();
KC_TRACE(10, ("__kmpc_begin: called\n"));
}
}
* @ingroup STARTUP_SHUTDOWN
* @param loc source location information
*
* Shutdown the runtime library. This is also optional, and even if called will
* not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to
* zero.
*/
void __kmpc_end(ident_t *loc) {
if (__kmp_ignore_mppend() == FALSE) {
KC_TRACE(10, ("__kmpc_end: called\n"));
KA_TRACE(30, ("__kmpc_end\n"));
__kmp_internal_end_thread(-1);
}
#if KMP_OS_WINDOWS && OMPT_SUPPORT
if (ompt_enabled.enabled)
__kmp_internal_end_library(__kmp_gtid_get_specific());
#endif
}
@ingroup THREAD_STATES
@param loc Source location information.
@return The global thread index of the active thread.
This function can be called in any context.
If the runtime has ony been entered at the outermost level from a
single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is
that which would be returned by omp_get_thread_num() in the outermost
active parallel construct. (Or zero if there is no active parallel
construct, since the primary thread is necessarily thread zero).
If multiple non-OpenMP threads all enter an OpenMP construct then this
will be a unique thread identifier among all the threads created by
the OpenMP runtime (but the value cannot be defined in terms of
OpenMP thread ids returned by omp_get_thread_num()).
*/
kmp_int32 __kmpc_global_thread_num(ident_t *loc) {
kmp_int32 gtid = __kmp_entry_gtid();
KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
return gtid;
}
@ingroup THREAD_STATES
@param loc Source location information.
@return The number of threads under control of the OpenMP<sup>*</sup> runtime
This function can be called in any context.
It returns the total number of threads under the control of the OpenMP runtime.
That is not a number that can be determined by any OpenMP standard calls, since
the library may be called from more than one non-OpenMP thread, and this
reflects the total over all such calls. Similarly the runtime maintains
underlying threads even when they are not active (since the cost of creating
and destroying OS threads is high), this call counts all such threads even if
they are not waiting for work.
*/
kmp_int32 __kmpc_global_num_threads(ident_t *loc) {
KC_TRACE(10,
("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
return TCR_4(__kmp_all_nth);
}
@ingroup THREAD_STATES
@param loc Source location information.
@return The thread number of the calling thread in the innermost active parallel
construct.
*/
kmp_int32 __kmpc_bound_thread_num(ident_t *loc) {
KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
return __kmp_tid_from_gtid(__kmp_entry_gtid());
}
@ingroup THREAD_STATES
@param loc Source location information.
@return The number of threads in the innermost active parallel construct.
*/
kmp_int32 __kmpc_bound_num_threads(ident_t *loc) {
KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
return __kmp_entry_thread()->th.th_team->t.t_nproc;
}
* @ingroup DEPRECATED
* @param loc location description
*
* This function need not be called. It always returns TRUE.
*/
kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
#ifndef KMP_DEBUG
return TRUE;
#else
const char *semi2;
const char *semi3;
int line_no;
if (__kmp_par_range == 0) {
return TRUE;
}
semi2 = loc->psource;
if (semi2 == NULL) {
return TRUE;
}
semi2 = strchr(semi2, ';');
if (semi2 == NULL) {
return TRUE;
}
semi2 = strchr(semi2 + 1, ';');
if (semi2 == NULL) {
return TRUE;
}
if (__kmp_par_range_filename[0]) {
const char *name = semi2 - 1;
while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
name--;
}
if ((*name == '/') || (*name == ';')) {
name++;
}
if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
return __kmp_par_range < 0;
}
}
semi3 = strchr(semi2 + 1, ';');
if (__kmp_par_range_routine[0]) {
if ((semi3 != NULL) && (semi3 > semi2) &&
(strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
return __kmp_par_range < 0;
}
}
if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
return __kmp_par_range > 0;
}
return __kmp_par_range < 0;
}
return TRUE;
#endif
}
@ingroup THREAD_STATES
@param loc Source location information.
@return 1 if this thread is executing inside an active parallel region, zero if
not.
*/
kmp_int32 __kmpc_in_parallel(ident_t *loc) {
return __kmp_entry_thread()->th.th_root->r.r_active;
}
@ingroup PARALLEL
@param loc source location information
@param global_tid global thread number
@param num_threads number of threads requested for this parallel construct
Set the number of threads to be used by the next fork spawned by this thread.
This call is only required if the parallel construct has a `num_threads` clause.
*/
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
kmp_int32 num_threads) {
KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
global_tid, num_threads));
__kmp_assert_valid_gtid(global_tid);
__kmp_push_num_threads(loc, global_tid, num_threads);
}
void __kmpc_push_num_threads_strict(ident_t *loc, kmp_int32 global_tid,
kmp_int32 num_threads, int severity,
const char *message) {
__kmp_push_num_threads(loc, global_tid, num_threads);
__kmp_set_strict_num_threads(loc, global_tid, severity, message);
}
@ingroup PARALLEL
@param loc source location information
@param global_tid global thread number
@param list_length number of entries in the num_threads_list array
@param num_threads_list array of numbers of threads requested for this parallel
construct and subsequent nested parallel constructs
Set the number of threads to be used by the next fork spawned by this thread,
and some nested forks as well.
This call is only required if the parallel construct has a `num_threads` clause
that has a list of integers as the argument.
*/
void __kmpc_push_num_threads_list(ident_t *loc, kmp_int32 global_tid,
kmp_uint32 list_length,
kmp_int32 *num_threads_list) {
KA_TRACE(20, ("__kmpc_push_num_threads_list: enter T#%d num_threads_list=",
global_tid));
KA_TRACE(20, ("%d", num_threads_list[0]));
#ifdef KMP_DEBUG
for (kmp_uint32 i = 1; i < list_length; ++i)
KA_TRACE(20, (", %d", num_threads_list[i]));
#endif
KA_TRACE(20, ("/n"));
__kmp_assert_valid_gtid(global_tid);
__kmp_push_num_threads_list(loc, global_tid, list_length, num_threads_list);
}
void __kmpc_push_num_threads_list_strict(ident_t *loc, kmp_int32 global_tid,
kmp_uint32 list_length,
kmp_int32 *num_threads_list,
int severity, const char *message) {
__kmp_push_num_threads_list(loc, global_tid, list_length, num_threads_list);
__kmp_set_strict_num_threads(loc, global_tid, severity, message);
}
void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
}
void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
kmp_int32 proc_bind) {
KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
proc_bind));
__kmp_assert_valid_gtid(global_tid);
__kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
}
@ingroup PARALLEL
@param loc source location information
@param argc total number of arguments in the ellipsis
@param microtask pointer to callback routine consisting of outlined parallel
construct
@param ... pointers to shared variables that aren't global
Do the actual fork and call the microtask in the relevant number of threads.
*/
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
int gtid = __kmp_entry_gtid();
#if (KMP_STATS_ENABLED)
stats_state_e previous_state = KMP_GET_THREAD_STATE();
if (previous_state == stats_state_e::SERIAL_REGION) {
KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
} else {
KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
}
int inParallel = __kmpc_in_parallel(loc);
if (inParallel) {
KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
} else {
KMP_COUNT_BLOCK(OMP_PARALLEL);
}
#endif
{
va_list ap;
va_start(ap, microtask);
#if OMPT_SUPPORT
ompt_frame_t *ompt_frame;
if (ompt_enabled.enabled) {
kmp_info_t *master_th = __kmp_threads[gtid];
ompt_frame = &master_th->th.th_current_task->ompt_task_info.frame;
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
}
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
#if INCLUDE_SSC_MARKS
SSC_MARK_FORKING();
#endif
__kmp_fork_call(loc, gtid, fork_context_intel, argc,
VOLATILE_CAST(microtask_t) microtask,
VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
kmp_va_addr_of(ap));
#if INCLUDE_SSC_MARKS
SSC_MARK_JOINING();
#endif
__kmp_join_call(loc, gtid
#if OMPT_SUPPORT
,
fork_context_intel
#endif
);
va_end(ap);
#if OMPT_SUPPORT
if (ompt_enabled.enabled) {
ompt_frame->enter_frame = ompt_data_none;
}
#endif
}
#if KMP_STATS_ENABLED
if (previous_state == stats_state_e::SERIAL_REGION) {
KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
KMP_SET_THREAD_STATE(previous_state);
} else {
KMP_POP_PARTITIONED_TIMER();
}
#endif
}
@ingroup PARALLEL
@param loc source location information
@param microtask pointer to callback routine consisting of outlined parallel
construct
@param cond condition for running in parallel
@param args struct of pointers to shared variables that aren't global
Perform a fork only if the condition is true.
*/
void __kmpc_fork_call_if(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
kmp_int32 cond, void *args) {
int gtid = __kmp_entry_gtid();
if (cond) {
if (args)
__kmpc_fork_call(loc, argc, microtask, args);
else
__kmpc_fork_call(loc, argc, microtask);
} else {
__kmpc_serialized_parallel(loc, gtid);
#if OMPT_SUPPORT
void *exit_frame_ptr;
#endif
if (args)
__kmp_invoke_microtask(VOLATILE_CAST(microtask_t) microtask, gtid,
0,
1, &args
#if OMPT_SUPPORT
,
&exit_frame_ptr
#endif
);
else
__kmp_invoke_microtask(VOLATILE_CAST(microtask_t) microtask, gtid,
0,
0,
nullptr
#if OMPT_SUPPORT
,
&exit_frame_ptr
#endif
);
__kmpc_end_serialized_parallel(loc, gtid);
}
}
@ingroup PARALLEL
@param loc source location information
@param global_tid global thread number
@param num_teams number of teams requested for the teams construct
@param num_threads number of threads per team requested for the teams construct
Set the number of teams to be used by the teams construct.
This call is only required if the teams construct has a `num_teams` clause
or a `thread_limit` clause (or both).
*/
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
kmp_int32 num_teams, kmp_int32 num_threads) {
KA_TRACE(20,
("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
global_tid, num_teams, num_threads));
__kmp_assert_valid_gtid(global_tid);
__kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
}
@ingroup PARALLEL
@param loc source location information
@param global_tid global thread number
@param thread_limit limit on number of threads which can be created within the
current task
Set the thread_limit for the current task
This call is there to support `thread_limit` clause on the `target` construct
*/
void __kmpc_set_thread_limit(ident_t *loc, kmp_int32 global_tid,
kmp_int32 thread_limit) {
__kmp_assert_valid_gtid(global_tid);
kmp_info_t *thread = __kmp_threads[global_tid];
if (thread_limit > 0)
thread->th.th_current_task->td_icvs.task_thread_limit = thread_limit;
}
@ingroup PARALLEL
@param loc source location information
@param global_tid global thread number
@param num_teams_lb lower bound on number of teams requested for the teams
construct
@param num_teams_ub upper bound on number of teams requested for the teams
construct
@param num_threads number of threads per team requested for the teams construct
Set the number of teams to be used by the teams construct. The number of initial
teams cretaed will be greater than or equal to the lower bound and less than or
equal to the upper bound.
This call is only required if the teams construct has a `num_teams` clause
or a `thread_limit` clause (or both).
*/
void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid,
kmp_int32 num_teams_lb, kmp_int32 num_teams_ub,
kmp_int32 num_threads) {
KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"
" num_teams_ub=%d num_threads=%d\n",
global_tid, num_teams_lb, num_teams_ub, num_threads));
__kmp_assert_valid_gtid(global_tid);
__kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub,
num_threads);
}
@ingroup PARALLEL
@param loc source location information
@param argc total number of arguments in the ellipsis
@param microtask pointer to callback routine consisting of outlined teams
construct
@param ... pointers to shared variables that aren't global
Do the actual fork and call the microtask in the relevant number of threads.
*/
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
...) {
int gtid = __kmp_entry_gtid();
kmp_info_t *this_thr = __kmp_threads[gtid];
va_list ap;
va_start(ap, microtask);
#if KMP_STATS_ENABLED
KMP_COUNT_BLOCK(OMP_TEAMS);
stats_state_e previous_state = KMP_GET_THREAD_STATE();
if (previous_state == stats_state_e::SERIAL_REGION) {
KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
} else {
KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
}
#endif
this_thr->th.th_teams_microtask = microtask;
this_thr->th.th_teams_level =
this_thr->th.th_team->t.t_level;
#if OMPT_SUPPORT
kmp_team_t *parent_team = this_thr->th.th_team;
int tid = __kmp_tid_from_gtid(gtid);
if (ompt_enabled.enabled) {
parent_team->t.t_implicit_task_taskdata[tid]
.ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
}
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
if (this_thr->th.th_teams_size.nteams == 0) {
__kmp_push_num_teams(loc, gtid, 0, 0);
}
KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
__kmp_fork_call(
loc, gtid, fork_context_intel, argc,
VOLATILE_CAST(microtask_t) __kmp_teams_master,
VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap));
__kmp_join_call(loc, gtid
#if OMPT_SUPPORT
,
fork_context_intel
#endif
);
KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
this_thr->th.th_cg_roots = tmp->up;
KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
" to node %p. cg_nthreads was %d\n",
this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
KMP_DEBUG_ASSERT(tmp->cg_nthreads);
int i = tmp->cg_nthreads--;
if (i == 1) {
__kmp_free(tmp);
}
KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
this_thr->th.th_current_task->td_icvs.thread_limit =
this_thr->th.th_cg_roots->cg_thread_limit;
this_thr->th.th_teams_microtask = NULL;
this_thr->th.th_teams_level = 0;
*(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
va_end(ap);
#if KMP_STATS_ENABLED
if (previous_state == stats_state_e::SERIAL_REGION) {
KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
KMP_SET_THREAD_STATE(previous_state);
} else {
KMP_POP_PARTITIONED_TIMER();
}
#endif
}
int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
@ingroup PARALLEL
@param loc source location information
@param global_tid global thread number
Enter a serialized parallel construct. This interface is used to handle a
conditional parallel region, like this,
@code
#pragma omp parallel if (condition)
@endcode
when the condition is false.
*/
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
__kmp_assert_valid_gtid(global_tid);
#if OMPT_SUPPORT
OMPT_STORE_RETURN_ADDRESS(global_tid);
#endif
__kmp_serialized_parallel(loc, global_tid);
}
@ingroup PARALLEL
@param loc source location information
@param global_tid global thread number
Leave a serialized parallel construct.
*/
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
kmp_internal_control_t *top;
kmp_info_t *this_thr;
kmp_team_t *serial_team;
KC_TRACE(10,
("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
unacceptable overhead */
if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
return;
__kmp_assert_valid_gtid(global_tid);
if (!TCR_4(__kmp_init_parallel))
__kmp_parallel_initialize();
__kmp_resume_if_soft_paused();
this_thr = __kmp_threads[global_tid];
serial_team = this_thr->th.th_serial_team;
kmp_task_team_t *task_team = this_thr->th.th_task_team;
if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
task_team->tt.tt_hidden_helper_task_encountered))
__kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
KMP_MB();
KMP_DEBUG_ASSERT(serial_team);
KMP_ASSERT(serial_team->t.t_serialized);
KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
KMP_DEBUG_ASSERT(serial_team->t.t_threads);
KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
#if OMPT_SUPPORT
if (ompt_enabled.enabled &&
this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
if (ompt_enabled.ompt_callback_implicit_task) {
ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
}
ompt_data_t *parent_task_data;
__ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
if (ompt_enabled.ompt_callback_parallel_end) {
ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
&(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
ompt_parallel_invoker_program | ompt_parallel_team,
OMPT_LOAD_RETURN_ADDRESS(global_tid));
}
__ompt_lw_taskteam_unlink(this_thr);
this_thr->th.ompt_thread_info.state = ompt_state_overhead;
}
#endif
* values */
top = serial_team->t.t_control_stack_top;
if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
serial_team->t.t_control_stack_top = top->next;
__kmp_free(top);
}
KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
{
dispatch_private_info_t *disp_buffer =
serial_team->t.t_dispatch->th_disp_buffer;
serial_team->t.t_dispatch->th_disp_buffer =
serial_team->t.t_dispatch->th_disp_buffer->next;
__kmp_free(disp_buffer);
}
if (serial_team->t.t_serialized > 1) {
__kmp_pop_task_team_node(this_thr, serial_team);
}
this_thr->th.th_def_allocator = serial_team->t.t_def_allocator;
--serial_team->t.t_serialized;
if (serial_team->t.t_serialized == 0) {
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
__kmp_clear_x87_fpu_status_word();
__kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
__kmp_load_mxcsr(&serial_team->t.t_mxcsr);
}
#endif
__kmp_pop_current_task_from_thread(this_thr);
#if OMPD_SUPPORT
if (ompd_state & OMPD_ENABLE_BP)
ompd_bp_parallel_end();
#endif
this_thr->th.th_team = serial_team->t.t_parent;
this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc;
this_thr->th.th_team_master =
serial_team->t.t_parent->t.t_threads[0];
this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
this_thr->th.th_dispatch =
&this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
this_thr->th.th_current_task->td_flags.executing = 1;
if (__kmp_tasking_mode != tskm_immediate_exec) {
KMP_DEBUG_ASSERT(serial_team->t.t_primary_task_state == 0 ||
serial_team->t.t_primary_task_state == 1);
this_thr->th.th_task_state =
(kmp_uint8)serial_team->t.t_primary_task_state;
this_thr->th.th_task_team =
this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
KA_TRACE(20,
("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
"team %p\n",
global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
}
#if KMP_AFFINITY_SUPPORTED
if (this_thr->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
__kmp_reset_root_init_mask(global_tid);
}
#endif
} else {
if (__kmp_tasking_mode != tskm_immediate_exec) {
KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
"depth of serial team %p to %d\n",
global_tid, serial_team, serial_team->t.t_serialized));
}
}
serial_team->t.t_level--;
if (__kmp_env_consistency_check)
__kmp_pop_parallel(global_tid, NULL);
#if OMPT_SUPPORT
if (ompt_enabled.enabled)
this_thr->th.ompt_thread_info.state =
((this_thr->th.th_team_serialized) ? ompt_state_work_serial
: ompt_state_work_parallel);
#endif
}
@ingroup SYNCHRONIZATION
@param loc source location information.
Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though
depending on the memory ordering convention obeyed by the compiler
even that may not be necessary).
*/
void __kmpc_flush(ident_t *loc) {
KC_TRACE(10, ("__kmpc_flush: called\n"));
KMP_MFENCE();
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.ompt_callback_flush) {
ompt_callbacks.ompt_callback(ompt_callback_flush)(
__ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
}
#endif
}
@ingroup SYNCHRONIZATION
@param loc source location information
@param global_tid thread id.
Execute a barrier.
*/
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
KMP_COUNT_BLOCK(OMP_BARRIER);
KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
__kmp_assert_valid_gtid(global_tid);
if (!TCR_4(__kmp_init_parallel))
__kmp_parallel_initialize();
__kmp_resume_if_soft_paused();
if (__kmp_env_consistency_check) {
if (loc == 0) {
KMP_WARNING(ConstructIdentInvalid);
}
__kmp_check_barrier(global_tid, ct_barrier, loc);
}
#if OMPT_SUPPORT
ompt_frame_t *ompt_frame;
if (ompt_enabled.enabled) {
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
if (ompt_frame->enter_frame.ptr == NULL)
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
}
OMPT_STORE_RETURN_ADDRESS(global_tid);
#endif
__kmp_threads[global_tid]->th.th_ident = loc;
__kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.enabled) {
ompt_frame->enter_frame = ompt_data_none;
}
#endif
}
@ingroup WORK_SHARING
@param loc source location information.
@param global_tid global thread number .
@return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise.
*/
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
int status = 0;
KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
__kmp_assert_valid_gtid(global_tid);
if (!TCR_4(__kmp_init_parallel))
__kmp_parallel_initialize();
__kmp_resume_if_soft_paused();
if (KMP_MASTER_GTID(global_tid)) {
KMP_COUNT_BLOCK(OMP_MASTER);
KMP_PUSH_PARTITIONED_TIMER(OMP_master);
status = 1;
}
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (status) {
if (ompt_enabled.ompt_callback_masked) {
kmp_info_t *this_thr = __kmp_threads[global_tid];
kmp_team_t *team = this_thr->th.th_team;
int tid = __kmp_tid_from_gtid(global_tid);
ompt_callbacks.ompt_callback(ompt_callback_masked)(
ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
OMPT_GET_RETURN_ADDRESS(0));
}
}
#endif
if (__kmp_env_consistency_check) {
#if KMP_USE_DYNAMIC_LOCK
if (status)
__kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
else
__kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
#else
if (status)
__kmp_push_sync(global_tid, ct_master, loc, NULL);
else
__kmp_check_sync(global_tid, ct_master, loc, NULL);
#endif
}
return status;
}
@ingroup WORK_SHARING
@param loc source location information.
@param global_tid global thread number .
Mark the end of a <tt>master</tt> region. This should only be called by the
thread that executes the <tt>master</tt> region.
*/
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
__kmp_assert_valid_gtid(global_tid);
KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
KMP_POP_PARTITIONED_TIMER();
#if OMPT_SUPPORT && OMPT_OPTIONAL
kmp_info_t *this_thr = __kmp_threads[global_tid];
kmp_team_t *team = this_thr->th.th_team;
if (ompt_enabled.ompt_callback_masked) {
int tid = __kmp_tid_from_gtid(global_tid);
ompt_callbacks.ompt_callback(ompt_callback_masked)(
ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
OMPT_GET_RETURN_ADDRESS(0));
}
#endif
if (__kmp_env_consistency_check) {
if (KMP_MASTER_GTID(global_tid))
__kmp_pop_sync(global_tid, ct_master, loc);
}
}
@ingroup WORK_SHARING
@param loc source location information.
@param global_tid global thread number.
@param filter result of evaluating filter clause on thread global_tid, or zero
if no filter clause present
@return 1 if this thread should execute the <tt>masked</tt> block, 0 otherwise.
*/
kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter) {
int status = 0;
int tid;
KC_TRACE(10, ("__kmpc_masked: called T#%d\n", global_tid));
__kmp_assert_valid_gtid(global_tid);
if (!TCR_4(__kmp_init_parallel))
__kmp_parallel_initialize();
__kmp_resume_if_soft_paused();
tid = __kmp_tid_from_gtid(global_tid);
if (tid == filter) {
KMP_COUNT_BLOCK(OMP_MASKED);
KMP_PUSH_PARTITIONED_TIMER(OMP_masked);
status = 1;
}
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (status) {
if (ompt_enabled.ompt_callback_masked) {
kmp_info_t *this_thr = __kmp_threads[global_tid];
kmp_team_t *team = this_thr->th.th_team;
ompt_callbacks.ompt_callback(ompt_callback_masked)(
ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
OMPT_GET_RETURN_ADDRESS(0));
}
}
#endif
if (__kmp_env_consistency_check) {
#if KMP_USE_DYNAMIC_LOCK
if (status)
__kmp_push_sync(global_tid, ct_masked, loc, NULL, 0);
else
__kmp_check_sync(global_tid, ct_masked, loc, NULL, 0);
#else
if (status)
__kmp_push_sync(global_tid, ct_masked, loc, NULL);
else
__kmp_check_sync(global_tid, ct_masked, loc, NULL);
#endif
}
return status;
}
@ingroup WORK_SHARING
@param loc source location information.
@param global_tid global thread number .
Mark the end of a <tt>masked</tt> region. This should only be called by the
thread that executes the <tt>masked</tt> region.
*/
void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid) {
KC_TRACE(10, ("__kmpc_end_masked: called T#%d\n", global_tid));
__kmp_assert_valid_gtid(global_tid);
KMP_POP_PARTITIONED_TIMER();
#if OMPT_SUPPORT && OMPT_OPTIONAL
kmp_info_t *this_thr = __kmp_threads[global_tid];
kmp_team_t *team = this_thr->th.th_team;
if (ompt_enabled.ompt_callback_masked) {
int tid = __kmp_tid_from_gtid(global_tid);
ompt_callbacks.ompt_callback(ompt_callback_masked)(
ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
OMPT_GET_RETURN_ADDRESS(0));
}
#endif
if (__kmp_env_consistency_check) {
__kmp_pop_sync(global_tid, ct_masked, loc);
}
}
@ingroup WORK_SHARING
@param loc source location information.
@param gtid global thread number.
Start execution of an <tt>ordered</tt> construct.
*/
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
int cid = 0;
kmp_info_t *th;
KMP_DEBUG_ASSERT(__kmp_init_serial);
KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
__kmp_assert_valid_gtid(gtid);
if (!TCR_4(__kmp_init_parallel))
__kmp_parallel_initialize();
__kmp_resume_if_soft_paused();
#if USE_ITT_BUILD
__kmp_itt_ordered_prep(gtid);
#endif
th = __kmp_threads[gtid];
#if OMPT_SUPPORT && OMPT_OPTIONAL
kmp_team_t *team;
ompt_wait_id_t lck;
void *codeptr_ra;
OMPT_STORE_RETURN_ADDRESS(gtid);
if (ompt_enabled.enabled) {
team = __kmp_team_from_gtid(gtid);
lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
th->th.ompt_thread_info.wait_id = lck;
th->th.ompt_thread_info.state = ompt_state_wait_ordered;
codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
if (ompt_enabled.ompt_callback_mutex_acquire) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
codeptr_ra);
}
}
#endif
if (th->th.th_dispatch->th_deo_fcn != 0)
(*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc);
else
__kmp_parallel_deo(>id, &cid, loc);
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.enabled) {
th->th.ompt_thread_info.state = ompt_state_work_parallel;
th->th.ompt_thread_info.wait_id = 0;
if (ompt_enabled.ompt_callback_mutex_acquired) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
}
}
#endif
#if USE_ITT_BUILD
__kmp_itt_ordered_start(gtid);
#endif
}
@ingroup WORK_SHARING
@param loc source location information.
@param gtid global thread number.
End execution of an <tt>ordered</tt> construct.
*/
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
int cid = 0;
kmp_info_t *th;
KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
__kmp_assert_valid_gtid(gtid);
#if USE_ITT_BUILD
__kmp_itt_ordered_end(gtid);
#endif
th = __kmp_threads[gtid];
if (th->th.th_dispatch->th_dxo_fcn != 0)
(*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc);
else
__kmp_parallel_dxo(>id, &cid, loc);
#if OMPT_SUPPORT && OMPT_OPTIONAL
OMPT_STORE_RETURN_ADDRESS(gtid);
if (ompt_enabled.ompt_callback_mutex_released) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
ompt_mutex_ordered,
(ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
->t.t_ordered.dt.t_value,
OMPT_LOAD_RETURN_ADDRESS(gtid));
}
#endif
}
#if KMP_USE_DYNAMIC_LOCK
static __forceinline void
__kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
kmp_int32 gtid, kmp_indirect_locktag_t tag) {
void *idx;
kmp_indirect_lock_t **lck;
lck = (kmp_indirect_lock_t **)crit;
kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
KMP_SET_I_LOCK_LOCATION(ilk, loc);
KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
KA_TRACE(20,
("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
#if USE_ITT_BUILD
__kmp_itt_critical_creating(ilk->lock, loc);
#endif
int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
if (status == 0) {
#if USE_ITT_BUILD
__kmp_itt_critical_destroyed(ilk->lock);
#endif
}
KMP_DEBUG_ASSERT(*lck != NULL);
}
#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
{ \
kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
!__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
kmp_uint32 spins; \
KMP_FSYNC_PREPARE(l); \
KMP_INIT_YIELD(spins); \
kmp_backoff_t backoff = __kmp_spin_backoff_params; \
do { \
if (TCR_4(__kmp_nth) > \
(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
KMP_YIELD(TRUE); \
} else { \
KMP_YIELD_SPIN(spins); \
} \
__kmp_spin_backoff(&backoff); \
} while ( \
KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
!__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
} \
KMP_FSYNC_ACQUIRED(l); \
}
#define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
{ \
kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
}
#define KMP_RELEASE_TAS_LOCK(lock, gtid) \
{ KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
#if KMP_USE_FUTEX
#include <sys/syscall.h>
#include <unistd.h>
#ifndef FUTEX_WAIT
#define FUTEX_WAIT 0
#endif
#ifndef FUTEX_WAKE
#define FUTEX_WAKE 1
#endif
#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
{ \
kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
kmp_int32 gtid_code = (gtid + 1) << 1; \
KMP_MB(); \
KMP_FSYNC_PREPARE(ftx); \
kmp_int32 poll_val; \
while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
if (!cond) { \
if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
poll_val | \
KMP_LOCK_BUSY(1, futex))) { \
continue; \
} \
poll_val |= KMP_LOCK_BUSY(1, futex); \
} \
kmp_int32 rc; \
if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
NULL, NULL, 0)) != 0) { \
continue; \
} \
gtid_code |= 1; \
} \
KMP_FSYNC_ACQUIRED(ftx); \
}
#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
{ \
kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
KMP_FSYNC_ACQUIRED(ftx); \
rc = TRUE; \
} else { \
rc = FALSE; \
} \
}
#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
{ \
kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
KMP_MB(); \
KMP_FSYNC_RELEASING(ftx); \
kmp_int32 poll_val = \
KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
if (KMP_LOCK_STRIP(poll_val) & 1) { \
syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
} \
KMP_MB(); \
KMP_YIELD_OVERSUB(); \
}
#endif
#else
static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
ident_t const *loc,
kmp_int32 gtid) {
kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
if (lck == NULL) {
void *idx;
lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
__kmp_init_user_lock_with_checks(lck);
__kmp_set_user_lock_location(lck, loc);
#if USE_ITT_BUILD
__kmp_itt_critical_creating(lck);
#endif
int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
if (status == 0) {
#if USE_ITT_BUILD
__kmp_itt_critical_destroyed(lck);
#endif
__kmp_destroy_user_lock_with_checks(lck);
__kmp_user_lock_free(&idx, gtid, lck);
lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
KMP_DEBUG_ASSERT(lck != NULL);
}
}
return lck;
}
#endif
@ingroup WORK_SHARING
@param loc source location information.
@param global_tid global thread number.
@param crit identity of the critical section. This could be a pointer to a lock
associated with the critical section, or some other suitably unique value.
Enter code protected by a `critical` construct.
This function blocks until the executing thread can enter the critical section.
*/
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
kmp_critical_name *crit) {
#if KMP_USE_DYNAMIC_LOCK
#if OMPT_SUPPORT && OMPT_OPTIONAL
OMPT_STORE_RETURN_ADDRESS(global_tid);
#endif
__kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
#else
KMP_COUNT_BLOCK(OMP_CRITICAL);
#if OMPT_SUPPORT && OMPT_OPTIONAL
ompt_state_t prev_state = ompt_state_undefined;
ompt_thread_info_t ti;
#endif
kmp_user_lock_p lck;
KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
__kmp_assert_valid_gtid(global_tid);
KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
KMP_CHECK_USER_LOCK_INIT();
if ((__kmp_user_lock_kind == lk_tas) &&
(sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
lck = (kmp_user_lock_p)crit;
}
#if KMP_USE_FUTEX
else if ((__kmp_user_lock_kind == lk_futex) &&
(sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
lck = (kmp_user_lock_p)crit;
}
#endif
else {
lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
}
if (__kmp_env_consistency_check)
__kmp_push_sync(global_tid, ct_critical, loc, lck);
#if USE_ITT_BUILD
__kmp_itt_critical_acquiring(lck);
#endif
#if OMPT_SUPPORT && OMPT_OPTIONAL
OMPT_STORE_RETURN_ADDRESS(gtid);
void *codeptr_ra = NULL;
if (ompt_enabled.enabled) {
ti = __kmp_threads[global_tid]->th.ompt_thread_info;
prev_state = ti.state;
ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
ti.state = ompt_state_wait_critical;
codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
if (ompt_enabled.ompt_callback_mutex_acquire) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
(ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
}
}
#endif
__kmp_acquire_user_lock_with_checks(lck, global_tid);
#if USE_ITT_BUILD
__kmp_itt_critical_acquired(lck);
#endif
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.enabled) {
ti.state = prev_state;
ti.wait_id = 0;
if (ompt_enabled.ompt_callback_mutex_acquired) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
}
}
#endif
KMP_POP_PARTITIONED_TIMER();
KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
#endif
}
#if KMP_USE_DYNAMIC_LOCK
static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
#if KMP_USE_TSX
#define KMP_TSX_LOCK(seq) lockseq_##seq
#else
#define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
#endif
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
#define KMP_CPUINFO_RTM (__kmp_cpuinfo.flags.rtm)
#else
#define KMP_CPUINFO_RTM 0
#endif
if (hint & kmp_lock_hint_hle)
return KMP_TSX_LOCK(hle);
if (hint & kmp_lock_hint_rtm)
return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq;
if (hint & kmp_lock_hint_adaptive)
return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
return __kmp_user_lock_seq;
if ((hint & omp_lock_hint_speculative) &&
(hint & omp_lock_hint_nonspeculative))
return __kmp_user_lock_seq;
if (hint & omp_lock_hint_contended)
return lockseq_queuing;
if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
return lockseq_tas;
if (hint & omp_lock_hint_speculative)
return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq;
return __kmp_user_lock_seq;
}
#if OMPT_SUPPORT && OMPT_OPTIONAL
#if KMP_USE_DYNAMIC_LOCK
static kmp_mutex_impl_t
__ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
if (user_lock) {
switch (KMP_EXTRACT_D_TAG(user_lock)) {
case 0:
break;
#if KMP_USE_FUTEX
case locktag_futex:
return kmp_mutex_impl_queuing;
#endif
case locktag_tas:
return kmp_mutex_impl_spin;
#if KMP_USE_TSX
case locktag_hle:
case locktag_rtm_spin:
return kmp_mutex_impl_speculative;
#endif
default:
return kmp_mutex_impl_none;
}
ilock = KMP_LOOKUP_I_LOCK(user_lock);
}
KMP_ASSERT(ilock);
switch (ilock->type) {
#if KMP_USE_TSX
case locktag_adaptive:
case locktag_rtm_queuing:
return kmp_mutex_impl_speculative;
#endif
case locktag_nested_tas:
return kmp_mutex_impl_spin;
#if KMP_USE_FUTEX
case locktag_nested_futex:
#endif
case locktag_ticket:
case locktag_queuing:
case locktag_drdpa:
case locktag_nested_ticket:
case locktag_nested_queuing:
case locktag_nested_drdpa:
return kmp_mutex_impl_queuing;
default:
return kmp_mutex_impl_none;
}
}
#else
static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
switch (__kmp_user_lock_kind) {
case lk_tas:
return kmp_mutex_impl_spin;
#if KMP_USE_FUTEX
case lk_futex:
#endif
case lk_ticket:
case lk_queuing:
case lk_drdpa:
return kmp_mutex_impl_queuing;
#if KMP_USE_TSX
case lk_hle:
case lk_rtm_queuing:
case lk_rtm_spin:
case lk_adaptive:
return kmp_mutex_impl_speculative;
#endif
default:
return kmp_mutex_impl_none;
}
}
#endif
#endif
@ingroup WORK_SHARING
@param loc source location information.
@param global_tid global thread number.
@param crit identity of the critical section. This could be a pointer to a lock
associated with the critical section, or some other suitably unique value.
@param hint the lock hint.
Enter code protected by a `critical` construct with a hint. The hint value is
used to suggest a lock implementation. This function blocks until the executing
thread can enter the critical section unless the hint suggests use of
speculative execution and the hardware supports it.
*/
void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
kmp_critical_name *crit, uint32_t hint) {
KMP_COUNT_BLOCK(OMP_CRITICAL);
kmp_user_lock_p lck;
#if OMPT_SUPPORT && OMPT_OPTIONAL
ompt_state_t prev_state = ompt_state_undefined;
ompt_thread_info_t ti;
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
if (!codeptr)
codeptr = OMPT_GET_RETURN_ADDRESS(0);
#endif
KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
__kmp_assert_valid_gtid(global_tid);
kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint);
if (*lk == 0) {
if (KMP_IS_D_LOCK(lockseq)) {
KMP_COMPARE_AND_STORE_ACQ32(
(volatile kmp_int32 *)&((kmp_base_tas_lock_t *)crit)->poll, 0,
KMP_GET_D_TAG(lockseq));
} else {
__kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq));
}
}
if (KMP_EXTRACT_D_TAG(lk) != 0) {
lck = (kmp_user_lock_p)lk;
if (__kmp_env_consistency_check) {
__kmp_push_sync(global_tid, ct_critical, loc, lck,
__kmp_map_hint_to_lock(hint));
}
#if USE_ITT_BUILD
__kmp_itt_critical_acquiring(lck);
#endif
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.enabled) {
ti = __kmp_threads[global_tid]->th.ompt_thread_info;
prev_state = ti.state;
ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
ti.state = ompt_state_wait_critical;
if (ompt_enabled.ompt_callback_mutex_acquire) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
ompt_mutex_critical, (unsigned int)hint,
__ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
codeptr);
}
}
#endif
#if KMP_USE_INLINED_TAS
if (lockseq == lockseq_tas && !__kmp_env_consistency_check) {
KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
} else
#elif KMP_USE_INLINED_FUTEX
if (lockseq == lockseq_futex && !__kmp_env_consistency_check) {
KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
} else
#endif
{
KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
}
} else {
kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
lck = ilk->lock;
if (__kmp_env_consistency_check) {
__kmp_push_sync(global_tid, ct_critical, loc, lck,
__kmp_map_hint_to_lock(hint));
}
#if USE_ITT_BUILD
__kmp_itt_critical_acquiring(lck);
#endif
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.enabled) {
ti = __kmp_threads[global_tid]->th.ompt_thread_info;
prev_state = ti.state;
ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
ti.state = ompt_state_wait_critical;
if (ompt_enabled.ompt_callback_mutex_acquire) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
ompt_mutex_critical, (unsigned int)hint,
__ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
codeptr);
}
}
#endif
KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
}
KMP_POP_PARTITIONED_TIMER();
#if USE_ITT_BUILD
__kmp_itt_critical_acquired(lck);
#endif
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.enabled) {
ti.state = prev_state;
ti.wait_id = 0;
if (ompt_enabled.ompt_callback_mutex_acquired) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
}
}
#endif
KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
}
#endif
@ingroup WORK_SHARING
@param loc source location information.
@param global_tid global thread number .
@param crit identity of the critical section. This could be a pointer to a lock
associated with the critical section, or some other suitably unique value.
Leave a critical section, releasing any lock that was held during its execution.
*/
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
kmp_critical_name *crit) {
kmp_user_lock_p lck;
KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
#if KMP_USE_DYNAMIC_LOCK
int locktag = KMP_EXTRACT_D_TAG(crit);
if (locktag) {
lck = (kmp_user_lock_p)crit;
KMP_ASSERT(lck != NULL);
if (__kmp_env_consistency_check) {
__kmp_pop_sync(global_tid, ct_critical, loc);
}
#if USE_ITT_BUILD
__kmp_itt_critical_releasing(lck);
#endif
#if KMP_USE_INLINED_TAS
if (locktag == locktag_tas && !__kmp_env_consistency_check) {
KMP_RELEASE_TAS_LOCK(lck, global_tid);
} else
#elif KMP_USE_INLINED_FUTEX
if (locktag == locktag_futex && !__kmp_env_consistency_check) {
KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
} else
#endif
{
KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
}
} else {
kmp_indirect_lock_t *ilk =
(kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
KMP_ASSERT(ilk != NULL);
lck = ilk->lock;
if (__kmp_env_consistency_check) {
__kmp_pop_sync(global_tid, ct_critical, loc);
}
#if USE_ITT_BUILD
__kmp_itt_critical_releasing(lck);
#endif
KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
}
#else
if ((__kmp_user_lock_kind == lk_tas) &&
(sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
lck = (kmp_user_lock_p)crit;
}
#if KMP_USE_FUTEX
else if ((__kmp_user_lock_kind == lk_futex) &&
(sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
lck = (kmp_user_lock_p)crit;
}
#endif
else {
lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
}
KMP_ASSERT(lck != NULL);
if (__kmp_env_consistency_check)
__kmp_pop_sync(global_tid, ct_critical, loc);
#if USE_ITT_BUILD
__kmp_itt_critical_releasing(lck);
#endif
__kmp_release_user_lock_with_checks(lck, global_tid);
#endif
#if OMPT_SUPPORT && OMPT_OPTIONAL
* for all #if branches */
OMPT_STORE_RETURN_ADDRESS(global_tid);
if (ompt_enabled.ompt_callback_mutex_released) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
OMPT_LOAD_RETURN_ADDRESS(0));
}
#endif
KMP_POP_PARTITIONED_TIMER();
KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
}
@ingroup SYNCHRONIZATION
@param loc source location information
@param global_tid thread id.
@return one if the thread should execute the master block, zero otherwise
Start execution of a combined barrier and master. The barrier is executed inside
this function.
*/
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
int status;
KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
__kmp_assert_valid_gtid(global_tid);
if (!TCR_4(__kmp_init_parallel))
__kmp_parallel_initialize();
__kmp_resume_if_soft_paused();
if (__kmp_env_consistency_check)
__kmp_check_barrier(global_tid, ct_barrier, loc);
#if OMPT_SUPPORT
ompt_frame_t *ompt_frame;
if (ompt_enabled.enabled) {
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
if (ompt_frame->enter_frame.ptr == NULL)
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
}
OMPT_STORE_RETURN_ADDRESS(global_tid);
#endif
#if USE_ITT_NOTIFY
__kmp_threads[global_tid]->th.th_ident = loc;
#endif
status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.enabled) {
ompt_frame->enter_frame = ompt_data_none;
}
#endif
return (status != 0) ? 0 : 1;
}
@ingroup SYNCHRONIZATION
@param loc source location information
@param global_tid thread id.
Complete the execution of a combined barrier and master. This function should
only be called at the completion of the <tt>master</tt> code. Other threads will
still be waiting at the barrier and this call releases them.
*/
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
__kmp_assert_valid_gtid(global_tid);
__kmp_end_split_barrier(bs_plain_barrier, global_tid);
}
@ingroup SYNCHRONIZATION
@param loc source location information
@param global_tid thread id.
@return one if the thread should execute the master block, zero otherwise
Start execution of a combined barrier and master(nowait) construct.
The barrier is executed inside this function.
There is no equivalent "end" function, since the
*/
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
kmp_int32 ret;
KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
__kmp_assert_valid_gtid(global_tid);
if (!TCR_4(__kmp_init_parallel))
__kmp_parallel_initialize();
__kmp_resume_if_soft_paused();
if (__kmp_env_consistency_check) {
if (loc == 0) {
KMP_WARNING(ConstructIdentInvalid);
}
__kmp_check_barrier(global_tid, ct_barrier, loc);
}
#if OMPT_SUPPORT
ompt_frame_t *ompt_frame;
if (ompt_enabled.enabled) {
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
if (ompt_frame->enter_frame.ptr == NULL)
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
}
OMPT_STORE_RETURN_ADDRESS(global_tid);
#endif
#if USE_ITT_NOTIFY
__kmp_threads[global_tid]->th.th_ident = loc;
#endif
__kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.enabled) {
ompt_frame->enter_frame = ompt_data_none;
}
#endif
ret = __kmpc_master(loc, global_tid);
if (__kmp_env_consistency_check) {
if (ret) {
__kmp_pop_sync(global_tid, ct_master, loc);
}
}
return (ret);
}
@ingroup WORK_SHARING
@param loc source location information
@param global_tid global thread number
@return One if this thread should execute the single construct, zero otherwise.
Test whether to execute a <tt>single</tt> construct.
There are no implicit barriers in the two "single" calls, rather the compiler
should introduce an explicit barrier if it is required.
*/
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
__kmp_assert_valid_gtid(global_tid);
kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
if (rc) {
KMP_COUNT_BLOCK(OMP_SINGLE);
KMP_PUSH_PARTITIONED_TIMER(OMP_single);
}
#if OMPT_SUPPORT && OMPT_OPTIONAL
kmp_info_t *this_thr = __kmp_threads[global_tid];
kmp_team_t *team = this_thr->th.th_team;
int tid = __kmp_tid_from_gtid(global_tid);
if (ompt_enabled.enabled) {
if (rc) {
if (ompt_enabled.ompt_callback_work) {
ompt_callbacks.ompt_callback(ompt_callback_work)(
ompt_work_single_executor, ompt_scope_begin,
&(team->t.ompt_team_info.parallel_data),
&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1, OMPT_GET_RETURN_ADDRESS(0));
}
} else {
if (ompt_enabled.ompt_callback_work) {
ompt_callbacks.ompt_callback(ompt_callback_work)(
ompt_work_single_other, ompt_scope_begin,
&(team->t.ompt_team_info.parallel_data),
&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1, OMPT_GET_RETURN_ADDRESS(0));
ompt_callbacks.ompt_callback(ompt_callback_work)(
ompt_work_single_other, ompt_scope_end,
&(team->t.ompt_team_info.parallel_data),
&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1, OMPT_GET_RETURN_ADDRESS(0));
}
}
}
#endif
return rc;
}
@ingroup WORK_SHARING
@param loc source location information
@param global_tid global thread number
Mark the end of a <tt>single</tt> construct. This function should
only be called by the thread that executed the block of code protected
by the `single` construct.
*/
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
__kmp_assert_valid_gtid(global_tid);
__kmp_exit_single(global_tid);
KMP_POP_PARTITIONED_TIMER();
#if OMPT_SUPPORT && OMPT_OPTIONAL
kmp_info_t *this_thr = __kmp_threads[global_tid];
kmp_team_t *team = this_thr->th.th_team;
int tid = __kmp_tid_from_gtid(global_tid);
if (ompt_enabled.ompt_callback_work) {
ompt_callbacks.ompt_callback(ompt_callback_work)(
ompt_work_single_executor, ompt_scope_end,
&(team->t.ompt_team_info.parallel_data),
&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
OMPT_GET_RETURN_ADDRESS(0));
}
#endif
}
@ingroup WORK_SHARING
@param loc Source location
@param global_tid Global thread id
Mark the end of a statically scheduled loop.
*/
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
KMP_POP_PARTITIONED_TIMER();
KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.ompt_callback_work) {
ompt_work_t ompt_work_type = ompt_work_loop_static;
ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
if (loc != NULL) {
if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
ompt_work_type = ompt_work_loop_static;
} else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
ompt_work_type = ompt_work_sections;
} else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
ompt_work_type = ompt_work_distribute;
} else {
}
KMP_DEBUG_ASSERT(ompt_work_type);
}
ompt_callbacks.ompt_callback(ompt_callback_work)(
ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
&(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
}
#endif
if (__kmp_env_consistency_check)
__kmp_pop_workshare(global_tid, ct_pdo, loc);
}
void ompc_set_num_threads(int arg) {
__kmp_set_num_threads(arg, __kmp_entry_gtid());
}
void ompc_set_dynamic(int flag) {
kmp_info_t *thread;
thread = __kmp_entry_thread();
__kmp_save_internal_controls(thread);
set__dynamic(thread, flag ? true : false);
}
void ompc_set_nested(int flag) {
kmp_info_t *thread;
thread = __kmp_entry_thread();
__kmp_save_internal_controls(thread);
set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
}
void ompc_set_max_active_levels(int max_active_levels) {
__kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
}
void ompc_set_schedule(omp_sched_t kind, int modifier) {
__kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
}
int ompc_get_ancestor_thread_num(int level) {
return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
}
int ompc_get_team_size(int level) {
return __kmp_get_team_size(__kmp_entry_gtid(), level);
}
void KMP_EXPAND_NAME(ompc_set_affinity_format)(char const *format) {
if (!__kmp_init_serial) {
__kmp_serial_initialize();
}
__kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
format, KMP_STRLEN(format) + 1);
}
size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(char *buffer, size_t size) {
size_t format_size;
if (!__kmp_init_serial) {
__kmp_serial_initialize();
}
format_size = KMP_STRLEN(__kmp_affinity_format);
if (buffer && size) {
__kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
format_size + 1);
}
return format_size;
}
void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format) {
int gtid;
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
__kmp_assign_root_init_mask();
gtid = __kmp_get_gtid();
#if KMP_AFFINITY_SUPPORTED
if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
__kmp_affinity.flags.reset) {
__kmp_reset_root_init_mask(gtid);
}
#endif
__kmp_aux_display_affinity(gtid, format);
}
size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size,
char const *format) {
int gtid;
size_t num_required;
kmp_str_buf_t capture_buf;
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
__kmp_assign_root_init_mask();
gtid = __kmp_get_gtid();
#if KMP_AFFINITY_SUPPORTED
if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
__kmp_affinity.flags.reset) {
__kmp_reset_root_init_mask(gtid);
}
#endif
__kmp_str_buf_init(&capture_buf);
num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
if (buffer && buf_size) {
__kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
capture_buf.used + 1);
}
__kmp_str_buf_free(&capture_buf);
return num_required;
}
void kmpc_set_stacksize(int arg) {
__kmp_aux_set_stacksize(arg);
}
void kmpc_set_stacksize_s(size_t arg) {
__kmp_aux_set_stacksize(arg);
}
void kmpc_set_blocktime(int arg) {
int gtid, tid, bt = arg;
kmp_info_t *thread;
gtid = __kmp_entry_gtid();
tid = __kmp_tid_from_gtid(gtid);
thread = __kmp_thread_from_gtid(gtid);
__kmp_aux_convert_blocktime(&bt);
__kmp_aux_set_blocktime(bt, thread, tid);
}
void kmpc_set_library(int arg) {
__kmp_user_set_library((enum library_type)arg);
}
void kmpc_set_defaults(char const *str) {
__kmp_aux_set_defaults(str, KMP_STRLEN(str));
}
void kmpc_set_disp_num_buffers(int arg) {
if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF &&
arg <= KMP_MAX_DISP_NUM_BUFF) {
__kmp_dispatch_num_buffers = arg;
}
}
int kmpc_set_affinity_mask_proc(int proc, void **mask) {
#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
return -1;
#else
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
__kmp_assign_root_init_mask();
return __kmp_aux_set_affinity_mask_proc(proc, mask);
#endif
}
int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
return -1;
#else
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
__kmp_assign_root_init_mask();
return __kmp_aux_unset_affinity_mask_proc(proc, mask);
#endif
}
int kmpc_get_affinity_mask_proc(int proc, void **mask) {
#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
return -1;
#else
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
__kmp_assign_root_init_mask();
return __kmp_aux_get_affinity_mask_proc(proc, mask);
#endif
}
@ingroup THREADPRIVATE
@param loc source location information
@param gtid global thread number
@param cpy_size size of the cpy_data buffer
@param cpy_data pointer to data to be copied
@param cpy_func helper function to call for copying data
@param didit flag variable: 1=single thread; 0=not single thread
__kmpc_copyprivate implements the interface for the private data broadcast
needed for the copyprivate clause associated with a single region in an
OpenMP<sup>*</sup> program (both C and Fortran).
All threads participating in the parallel region call this routine.
One of the threads (called the single thread) should have the <tt>didit</tt>
variable set to 1 and all other threads should have that variable set to 0.
All threads pass a pointer to a data buffer (cpy_data) that they have built.
The OpenMP specification forbids the use of nowait on the single region when a
copyprivate clause is present. However, @ref __kmpc_copyprivate implements a
barrier internally to avoid race conditions, so the code generation for the
single region should avoid generating a barrier after the call to @ref
__kmpc_copyprivate.
The <tt>gtid</tt> parameter is the global thread id for the current thread.
The <tt>loc</tt> parameter is a pointer to source location information.
Internal implementation: The single thread will first copy its descriptor
address (cpy_data) to a team-private location, then the other threads will each
call the function pointed to by the parameter cpy_func, which carries out the
copy by copying the data using the cpy_data buffer.
The cpy_func routine used for the copy and the contents of the data area defined
by cpy_data and cpy_size may be built in any fashion that will allow the copy
to be done. For instance, the cpy_data buffer can hold the actual data to be
copied or it may hold a list of pointers to the data. The cpy_func routine must
interpret the cpy_data buffer appropriately.
The interface to cpy_func is as follows:
@code
void cpy_func( void *destination, void *source )
@endcode
where void *destination is the cpy_data pointer for the thread being copied to
and void *source is the cpy_data pointer for the thread being copied from.
*/
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
void *cpy_data, void (*cpy_func)(void *, void *),
kmp_int32 didit) {
void **data_ptr;
KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
__kmp_assert_valid_gtid(gtid);
KMP_MB();
data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
if (__kmp_env_consistency_check) {
if (loc == 0) {
KMP_WARNING(ConstructIdentInvalid);
}
}
if (didit)
*data_ptr = cpy_data;
#if OMPT_SUPPORT
ompt_frame_t *ompt_frame;
if (ompt_enabled.enabled) {
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
if (ompt_frame->enter_frame.ptr == NULL)
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
}
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
#if USE_ITT_NOTIFY
__kmp_threads[gtid]->th.th_ident = loc;
#endif
__kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
if (!didit)
(*cpy_func)(cpy_data, *data_ptr);
{
#if OMPT_SUPPORT
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
#if USE_ITT_NOTIFY
__kmp_threads[gtid]->th.th_ident = loc;
#endif
__kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.enabled) {
ompt_frame->enter_frame = ompt_data_none;
}
#endif
}
}
@ingroup THREADPRIVATE
@param loc source location information
@param gtid global thread number
@param cpy_data pointer to the data to be saved/copied or 0
@return the saved pointer to the data
__kmpc_copyprivate_light is a lighter version of __kmpc_copyprivate:
__kmpc_copyprivate_light only saves the pointer it's given (if it's not 0, so
coming from single), and returns that pointer in all calls (for single thread
it's not needed). This version doesn't do any actual data copying. Data copying
has to be done somewhere else, e.g. inline in the generated code. Due to this,
this function doesn't have any barrier at the end of the function, like
__kmpc_copyprivate does, so generated code needs barrier after copying of all
data was done.
*/
void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data) {
void **data_ptr;
KC_TRACE(10, ("__kmpc_copyprivate_light: called T#%d\n", gtid));
KMP_MB();
data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
if (__kmp_env_consistency_check) {
if (loc == 0) {
KMP_WARNING(ConstructIdentInvalid);
}
}
if (cpy_data)
*data_ptr = cpy_data;
#if OMPT_SUPPORT
ompt_frame_t *ompt_frame;
if (ompt_enabled.enabled) {
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
if (ompt_frame->enter_frame.ptr == NULL)
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
OMPT_STORE_RETURN_ADDRESS(gtid);
}
#endif
#if USE_ITT_NOTIFY
__kmp_threads[gtid]->th.th_ident = loc;
#endif
__kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
return *data_ptr;
}
#define INIT_LOCK __kmp_init_user_lock_with_checks
#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
#define ACQUIRE_NESTED_LOCK_TIMED \
__kmp_acquire_nested_user_lock_with_checks_timed
#define RELEASE_LOCK __kmp_release_user_lock_with_checks
#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
#define TEST_LOCK __kmp_test_user_lock_with_checks
#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
#if KMP_USE_DYNAMIC_LOCK
static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
kmp_dyna_lockseq_t seq) {
if (KMP_IS_D_LOCK(seq)) {
KMP_INIT_D_LOCK(lock, seq);
#if USE_ITT_BUILD
__kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
#endif
} else {
KMP_INIT_I_LOCK(lock, seq);
#if USE_ITT_BUILD
kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
__kmp_itt_lock_creating(ilk->lock, loc);
#endif
}
}
static __forceinline void
__kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
kmp_dyna_lockseq_t seq) {
#if KMP_USE_TSX
if (seq == lockseq_hle || seq == lockseq_rtm_queuing ||
seq == lockseq_rtm_spin || seq == lockseq_adaptive)
seq = __kmp_user_lock_seq;
#endif
switch (seq) {
case lockseq_tas:
seq = lockseq_nested_tas;
break;
#if KMP_USE_FUTEX
case lockseq_futex:
seq = lockseq_nested_futex;
break;
#endif
case lockseq_ticket:
seq = lockseq_nested_ticket;
break;
case lockseq_queuing:
seq = lockseq_nested_queuing;
break;
case lockseq_drdpa:
seq = lockseq_nested_drdpa;
break;
default:
seq = lockseq_nested_queuing;
}
KMP_INIT_I_LOCK(lock, seq);
#if USE_ITT_BUILD
kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
__kmp_itt_lock_creating(ilk->lock, loc);
#endif
}
void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
uintptr_t hint) {
KMP_DEBUG_ASSERT(__kmp_init_serial);
if (__kmp_env_consistency_check && user_lock == NULL) {
KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
}
__kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
#if OMPT_SUPPORT && OMPT_OPTIONAL
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
if (!codeptr)
codeptr = OMPT_GET_RETURN_ADDRESS(0);
if (ompt_enabled.ompt_callback_lock_init) {
ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
ompt_mutex_lock, (omp_lock_hint_t)hint,
__ompt_get_mutex_impl_type(user_lock),
(ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
}
#endif
}
void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
void **user_lock, uintptr_t hint) {
KMP_DEBUG_ASSERT(__kmp_init_serial);
if (__kmp_env_consistency_check && user_lock == NULL) {
KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
}
__kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
#if OMPT_SUPPORT && OMPT_OPTIONAL
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
if (!codeptr)
codeptr = OMPT_GET_RETURN_ADDRESS(0);
if (ompt_enabled.ompt_callback_lock_init) {
ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
__ompt_get_mutex_impl_type(user_lock),
(ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
}
#endif
}
#endif
void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
#if KMP_USE_DYNAMIC_LOCK
KMP_DEBUG_ASSERT(__kmp_init_serial);
if (__kmp_env_consistency_check && user_lock == NULL) {
KMP_FATAL(LockIsUninitialized, "omp_init_lock");
}
__kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
#if OMPT_SUPPORT && OMPT_OPTIONAL
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
if (!codeptr)
codeptr = OMPT_GET_RETURN_ADDRESS(0);
if (ompt_enabled.ompt_callback_lock_init) {
ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
ompt_mutex_lock, omp_lock_hint_none,
__ompt_get_mutex_impl_type(user_lock),
(ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
}
#endif
#else
static char const *const func = "omp_init_lock";
kmp_user_lock_p lck;
KMP_DEBUG_ASSERT(__kmp_init_serial);
if (__kmp_env_consistency_check) {
if (user_lock == NULL) {
KMP_FATAL(LockIsUninitialized, func);
}
}
KMP_CHECK_USER_LOCK_INIT();
if ((__kmp_user_lock_kind == lk_tas) &&
(sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
lck = (kmp_user_lock_p)user_lock;
}
#if KMP_USE_FUTEX
else if ((__kmp_user_lock_kind == lk_futex) &&
(sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
lck = (kmp_user_lock_p)user_lock;
}
#endif
else {
lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
}
INIT_LOCK(lck);
__kmp_set_user_lock_location(lck, loc);
#if OMPT_SUPPORT && OMPT_OPTIONAL
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
if (!codeptr)
codeptr = OMPT_GET_RETURN_ADDRESS(0);
if (ompt_enabled.ompt_callback_lock_init) {
ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
(ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
}
#endif
#if USE_ITT_BUILD
__kmp_itt_lock_creating(lck);
#endif
#endif
}
void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
#if KMP_USE_DYNAMIC_LOCK
KMP_DEBUG_ASSERT(__kmp_init_serial);
if (__kmp_env_consistency_check && user_lock == NULL) {
KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
}
__kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
#if OMPT_SUPPORT && OMPT_OPTIONAL
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
if (!codeptr)
codeptr = OMPT_GET_RETURN_ADDRESS(0);
if (ompt_enabled.ompt_callback_lock_init) {
ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
ompt_mutex_nest_lock, omp_lock_hint_none,
__ompt_get_mutex_impl_type(user_lock),
(ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
}
#endif
#else
static char const *const func = "omp_init_nest_lock";
kmp_user_lock_p lck;
KMP_DEBUG_ASSERT(__kmp_init_serial);
if (__kmp_env_consistency_check) {
if (user_lock == NULL) {
KMP_FATAL(LockIsUninitialized, func);
}
}
KMP_CHECK_USER_LOCK_INIT();
if ((__kmp_user_lock_kind == lk_tas) &&
(sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
OMP_NEST_LOCK_T_SIZE)) {
lck = (kmp_user_lock_p)user_lock;
}
#if KMP_USE_FUTEX
else if ((__kmp_user_lock_kind == lk_futex) &&
(sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
OMP_NEST_LOCK_T_SIZE)) {
lck = (kmp_user_lock_p)user_lock;
}
#endif
else {
lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
}
INIT_NESTED_LOCK(lck);
__kmp_set_user_lock_location(lck, loc);
#if OMPT_SUPPORT && OMPT_OPTIONAL
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
if (!codeptr)
codeptr = OMPT_GET_RETURN_ADDRESS(0);
if (ompt_enabled.ompt_callback_lock_init) {
ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
(ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
}
#endif
#if USE_ITT_BUILD
__kmp_itt_lock_creating(lck);
#endif
#endif
}
void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
#if KMP_USE_DYNAMIC_LOCK
#if USE_ITT_BUILD
kmp_user_lock_p lck;
if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
} else {
lck = (kmp_user_lock_p)user_lock;
}
__kmp_itt_lock_destroyed(lck);
#endif
#if OMPT_SUPPORT && OMPT_OPTIONAL
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
if (!codeptr)
codeptr = OMPT_GET_RETURN_ADDRESS(0);
if (ompt_enabled.ompt_callback_lock_destroy) {
ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
}
#endif
KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
#else
kmp_user_lock_p lck;
if ((__kmp_user_lock_kind == lk_tas) &&
(sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
lck = (kmp_user_lock_p)user_lock;
}
#if KMP_USE_FUTEX
else if ((__kmp_user_lock_kind == lk_futex) &&
(sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
lck = (kmp_user_lock_p)user_lock;
}
#endif
else {
lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
}
#if OMPT_SUPPORT && OMPT_OPTIONAL
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
if (!codeptr)
codeptr = OMPT_GET_RETURN_ADDRESS(0);
if (ompt_enabled.ompt_callback_lock_destroy) {
ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
}
#endif
#if USE_ITT_BUILD
__kmp_itt_lock_destroyed(lck);
#endif
DESTROY_LOCK(lck);
if ((__kmp_user_lock_kind == lk_tas) &&
(sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
;
}
#if KMP_USE_FUTEX
else if ((__kmp_user_lock_kind == lk_futex) &&
(sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
;
}
#endif
else {
__kmp_user_lock_free(user_lock, gtid, lck);
}
#endif
}
void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
#if KMP_USE_DYNAMIC_LOCK
#if USE_ITT_BUILD
kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
__kmp_itt_lock_destroyed(ilk->lock);
#endif
#if OMPT_SUPPORT && OMPT_OPTIONAL
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
if (!codeptr)
codeptr = OMPT_GET_RETURN_ADDRESS(0);
if (ompt_enabled.ompt_callback_lock_destroy) {
ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
}
#endif
KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
#else
kmp_user_lock_p lck;
if ((__kmp_user_lock_kind == lk_tas) &&
(sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
OMP_NEST_LOCK_T_SIZE)) {
lck = (kmp_user_lock_p)user_lock;
}
#if KMP_USE_FUTEX
else if ((__kmp_user_lock_kind == lk_futex) &&
(sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
OMP_NEST_LOCK_T_SIZE)) {
lck = (kmp_user_lock_p)user_lock;
}
#endif
else {
lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
}
#if OMPT_SUPPORT && OMPT_OPTIONAL
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
if (!codeptr)
codeptr = OMPT_GET_RETURN_ADDRESS(0);
if (ompt_enabled.ompt_callback_lock_destroy) {
ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
}
#endif
#if USE_ITT_BUILD
__kmp_itt_lock_destroyed(lck);
#endif
DESTROY_NESTED_LOCK(lck);
if ((__kmp_user_lock_kind == lk_tas) &&
(sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
OMP_NEST_LOCK_T_SIZE)) {
;
}
#if KMP_USE_FUTEX
else if ((__kmp_user_lock_kind == lk_futex) &&
(sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
OMP_NEST_LOCK_T_SIZE)) {
;
}
#endif
else {
__kmp_user_lock_free(user_lock, gtid, lck);
}
#endif
}
void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
KMP_COUNT_BLOCK(OMP_set_lock);
#if KMP_USE_DYNAMIC_LOCK
int tag = KMP_EXTRACT_D_TAG(user_lock);
#if USE_ITT_BUILD
__kmp_itt_lock_acquiring(
(kmp_user_lock_p)
user_lock);
#endif
#if OMPT_SUPPORT && OMPT_OPTIONAL
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
if (!codeptr)
codeptr = OMPT_GET_RETURN_ADDRESS(0);
if (ompt_enabled.ompt_callback_mutex_acquire) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
ompt_mutex_lock, omp_lock_hint_none,
__ompt_get_mutex_impl_type(user_lock),
(ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
}
#endif
#if KMP_USE_INLINED_TAS
if (tag == locktag_tas && !__kmp_env_consistency_check) {
KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
} else
#elif KMP_USE_INLINED_FUTEX
if (tag == locktag_futex && !__kmp_env_consistency_check) {
KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
} else
#endif
{
__kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
}
#if USE_ITT_BUILD
__kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
#endif
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.ompt_callback_mutex_acquired) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
}
#endif
#else
kmp_user_lock_p lck;
if ((__kmp_user_lock_kind == lk_tas) &&
(sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
lck = (kmp_user_lock_p)user_lock;
}
#if KMP_USE_FUTEX
else if ((__kmp_user_lock_kind == lk_futex) &&
(sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
lck = (kmp_user_lock_p)user_lock;
}
#endif
else {
lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
}
#if USE_ITT_BUILD
__kmp_itt_lock_acquiring(lck);
#endif
#if OMPT_SUPPORT && OMPT_OPTIONAL
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
if (!codeptr)
codeptr = OMPT_GET_RETURN_ADDRESS(0);
if (ompt_enabled.ompt_callback_mutex_acquire) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
(ompt_wait_id_t)(uintptr_t)lck, codeptr);
}
#endif
ACQUIRE_LOCK(lck, gtid);
#if USE_ITT_BUILD
__kmp_itt_lock_acquired(lck);
#endif
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.ompt_callback_mutex_acquired) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
}
#endif
#endif
}
void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
#if KMP_USE_DYNAMIC_LOCK
#if USE_ITT_BUILD
__kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
#endif
#if OMPT_SUPPORT && OMPT_OPTIONAL
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
if (!codeptr)
codeptr = OMPT_GET_RETURN_ADDRESS(0);
if (ompt_enabled.enabled) {
if (ompt_enabled.ompt_callback_mutex_acquire) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
ompt_mutex_nest_lock, omp_lock_hint_none,
__ompt_get_mutex_impl_type(user_lock),
(ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
}
}
#endif
int acquire_status =
KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
(void)acquire_status;
#if USE_ITT_BUILD
__kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
#endif
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.enabled) {
if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
if (ompt_enabled.ompt_callback_mutex_acquired) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
codeptr);
}
} else {
if (ompt_enabled.ompt_callback_nest_lock) {
ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
}
}
}
#endif
#else
int acquire_status;
kmp_user_lock_p lck;
if ((__kmp_user_lock_kind == lk_tas) &&
(sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
OMP_NEST_LOCK_T_SIZE)) {
lck = (kmp_user_lock_p)user_lock;
}
#if KMP_USE_FUTEX
else if ((__kmp_user_lock_kind == lk_futex) &&
(sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
OMP_NEST_LOCK_T_SIZE)) {
lck = (kmp_user_lock_p)user_lock;
}
#endif
else {
lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
}
#if USE_ITT_BUILD
__kmp_itt_lock_acquiring(lck);
#endif
#if OMPT_SUPPORT && OMPT_OPTIONAL
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
if (!codeptr)
codeptr = OMPT_GET_RETURN_ADDRESS(0);
if (ompt_enabled.enabled) {
if (ompt_enabled.ompt_callback_mutex_acquire) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
ompt_mutex_nest_lock, omp_lock_hint_none,
__ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
codeptr);
}
}
#endif
ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
#if USE_ITT_BUILD
__kmp_itt_lock_acquired(lck);
#endif
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.enabled) {
if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
if (ompt_enabled.ompt_callback_mutex_acquired) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
}
} else {
if (ompt_enabled.ompt_callback_nest_lock) {
ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
}
}
}
#endif
#endif
}
void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
#if KMP_USE_DYNAMIC_LOCK
int tag = KMP_EXTRACT_D_TAG(user_lock);
#if USE_ITT_BUILD
__kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
#endif
#if KMP_USE_INLINED_TAS
if (tag == locktag_tas && !__kmp_env_consistency_check) {
KMP_RELEASE_TAS_LOCK(user_lock, gtid);
} else
#elif KMP_USE_INLINED_FUTEX
if (tag == locktag_futex && !__kmp_env_consistency_check) {
KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
} else
#endif
{
__kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
}
#if OMPT_SUPPORT && OMPT_OPTIONAL
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
if (!codeptr)
codeptr = OMPT_GET_RETURN_ADDRESS(0);
if (ompt_enabled.ompt_callback_mutex_released) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
}
#endif
#else
kmp_user_lock_p lck;
if ((__kmp_user_lock_kind == lk_tas) &&
(sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
#if KMP_OS_LINUX && \
(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
#if USE_ITT_BUILD
__kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
#endif
TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
KMP_MB();
#if OMPT_SUPPORT && OMPT_OPTIONAL
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
if (!codeptr)
codeptr = OMPT_GET_RETURN_ADDRESS(0);
if (ompt_enabled.ompt_callback_mutex_released) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
}
#endif
return;
#else
lck = (kmp_user_lock_p)user_lock;
#endif
}
#if KMP_USE_FUTEX
else if ((__kmp_user_lock_kind == lk_futex) &&
(sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
lck = (kmp_user_lock_p)user_lock;
}
#endif
else {
lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
}
#if USE_ITT_BUILD
__kmp_itt_lock_releasing(lck);
#endif
RELEASE_LOCK(lck, gtid);
#if OMPT_SUPPORT && OMPT_OPTIONAL
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
if (!codeptr)
codeptr = OMPT_GET_RETURN_ADDRESS(0);
if (ompt_enabled.ompt_callback_mutex_released) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
}
#endif
#endif
}
void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
#if KMP_USE_DYNAMIC_LOCK
#if USE_ITT_BUILD
__kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
#endif
int release_status =
KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
(void)release_status;
#if OMPT_SUPPORT && OMPT_OPTIONAL
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
if (!codeptr)
codeptr = OMPT_GET_RETURN_ADDRESS(0);
if (ompt_enabled.enabled) {
if (release_status == KMP_LOCK_RELEASED) {
if (ompt_enabled.ompt_callback_mutex_released) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
codeptr);
}
} else if (ompt_enabled.ompt_callback_nest_lock) {
ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
}
}
#endif
#else
kmp_user_lock_p lck;
if ((__kmp_user_lock_kind == lk_tas) &&
(sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
OMP_NEST_LOCK_T_SIZE)) {
#if KMP_OS_LINUX && \
(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
#if USE_ITT_BUILD
__kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
#endif
#if OMPT_SUPPORT && OMPT_OPTIONAL
int release_status = KMP_LOCK_STILL_HELD;
#endif
if (--(tl->lk.depth_locked) == 0) {
TCW_4(tl->lk.poll, 0);
#if OMPT_SUPPORT && OMPT_OPTIONAL
release_status = KMP_LOCK_RELEASED;
#endif
}
KMP_MB();
#if OMPT_SUPPORT && OMPT_OPTIONAL
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
if (!codeptr)
codeptr = OMPT_GET_RETURN_ADDRESS(0);
if (ompt_enabled.enabled) {
if (release_status == KMP_LOCK_RELEASED) {
if (ompt_enabled.ompt_callback_mutex_released) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
}
} else if (ompt_enabled.ompt_callback_nest_lock) {
ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
}
}
#endif
return;
#else
lck = (kmp_user_lock_p)user_lock;
#endif
}
#if KMP_USE_FUTEX
else if ((__kmp_user_lock_kind == lk_futex) &&
(sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
OMP_NEST_LOCK_T_SIZE)) {
lck = (kmp_user_lock_p)user_lock;
}
#endif
else {
lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
}
#if USE_ITT_BUILD
__kmp_itt_lock_releasing(lck);
#endif
int release_status;
release_status = RELEASE_NESTED_LOCK(lck, gtid);
#if OMPT_SUPPORT && OMPT_OPTIONAL
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
if (!codeptr)
codeptr = OMPT_GET_RETURN_ADDRESS(0);
if (ompt_enabled.enabled) {
if (release_status == KMP_LOCK_RELEASED) {
if (ompt_enabled.ompt_callback_mutex_released) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
}
} else if (ompt_enabled.ompt_callback_nest_lock) {
ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
}
}
#endif
#endif
}
int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
KMP_COUNT_BLOCK(OMP_test_lock);
#if KMP_USE_DYNAMIC_LOCK
int rc;
int tag = KMP_EXTRACT_D_TAG(user_lock);
#if USE_ITT_BUILD
__kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
#endif
#if OMPT_SUPPORT && OMPT_OPTIONAL
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
if (!codeptr)
codeptr = OMPT_GET_RETURN_ADDRESS(0);
if (ompt_enabled.ompt_callback_mutex_acquire) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
ompt_mutex_test_lock, omp_lock_hint_none,
__ompt_get_mutex_impl_type(user_lock),
(ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
}
#endif
#if KMP_USE_INLINED_TAS
if (tag == locktag_tas && !__kmp_env_consistency_check) {
KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
} else
#elif KMP_USE_INLINED_FUTEX
if (tag == locktag_futex && !__kmp_env_consistency_check) {
KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
} else
#endif
{
rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
}
if (rc) {
#if USE_ITT_BUILD
__kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
#endif
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.ompt_callback_mutex_acquired) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
}
#endif
return FTN_TRUE;
} else {
#if USE_ITT_BUILD
__kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
#endif
return FTN_FALSE;
}
#else
kmp_user_lock_p lck;
int rc;
if ((__kmp_user_lock_kind == lk_tas) &&
(sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
lck = (kmp_user_lock_p)user_lock;
}
#if KMP_USE_FUTEX
else if ((__kmp_user_lock_kind == lk_futex) &&
(sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
lck = (kmp_user_lock_p)user_lock;
}
#endif
else {
lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
}
#if USE_ITT_BUILD
__kmp_itt_lock_acquiring(lck);
#endif
#if OMPT_SUPPORT && OMPT_OPTIONAL
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
if (!codeptr)
codeptr = OMPT_GET_RETURN_ADDRESS(0);
if (ompt_enabled.ompt_callback_mutex_acquire) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
ompt_mutex_test_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
(ompt_wait_id_t)(uintptr_t)lck, codeptr);
}
#endif
rc = TEST_LOCK(lck, gtid);
#if USE_ITT_BUILD
if (rc) {
__kmp_itt_lock_acquired(lck);
} else {
__kmp_itt_lock_cancelled(lck);
}
#endif
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
}
#endif
return (rc ? FTN_TRUE : FTN_FALSE);
#endif
}
int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
#if KMP_USE_DYNAMIC_LOCK
int rc;
#if USE_ITT_BUILD
__kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
#endif
#if OMPT_SUPPORT && OMPT_OPTIONAL
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
if (!codeptr)
codeptr = OMPT_GET_RETURN_ADDRESS(0);
if (ompt_enabled.ompt_callback_mutex_acquire) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
ompt_mutex_test_nest_lock, omp_lock_hint_none,
__ompt_get_mutex_impl_type(user_lock),
(ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
}
#endif
rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
#if USE_ITT_BUILD
if (rc) {
__kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
} else {
__kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
}
#endif
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.enabled && rc) {
if (rc == 1) {
if (ompt_enabled.ompt_callback_mutex_acquired) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
codeptr);
}
} else {
if (ompt_enabled.ompt_callback_nest_lock) {
ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
}
}
}
#endif
return rc;
#else
kmp_user_lock_p lck;
int rc;
if ((__kmp_user_lock_kind == lk_tas) &&
(sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
OMP_NEST_LOCK_T_SIZE)) {
lck = (kmp_user_lock_p)user_lock;
}
#if KMP_USE_FUTEX
else if ((__kmp_user_lock_kind == lk_futex) &&
(sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
OMP_NEST_LOCK_T_SIZE)) {
lck = (kmp_user_lock_p)user_lock;
}
#endif
else {
lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
}
#if USE_ITT_BUILD
__kmp_itt_lock_acquiring(lck);
#endif
#if OMPT_SUPPORT && OMPT_OPTIONAL
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
if (!codeptr)
codeptr = OMPT_GET_RETURN_ADDRESS(0);
if (ompt_enabled.enabled) &&
ompt_enabled.ompt_callback_mutex_acquire) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
ompt_mutex_test_nest_lock, omp_lock_hint_none,
__ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
codeptr);
}
#endif
rc = TEST_NESTED_LOCK(lck, gtid);
#if USE_ITT_BUILD
if (rc) {
__kmp_itt_lock_acquired(lck);
} else {
__kmp_itt_lock_cancelled(lck);
}
#endif
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.enabled && rc) {
if (rc == 1) {
if (ompt_enabled.ompt_callback_mutex_acquired) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
}
} else {
if (ompt_enabled.ompt_callback_nest_lock) {
ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
}
}
}
#endif
return rc;
#endif
}
#define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
#define __KMP_GET_REDUCTION_METHOD(gtid) \
(__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
static __forceinline void
__kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
kmp_critical_name *crit) {
kmp_user_lock_p lck;
#if KMP_USE_DYNAMIC_LOCK
kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
if (*lk == 0) {
if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
KMP_GET_D_TAG(__kmp_user_lock_seq));
} else {
__kmp_init_indirect_csptr(crit, loc, global_tid,
KMP_GET_I_TAG(__kmp_user_lock_seq));
}
}
if (KMP_EXTRACT_D_TAG(lk) != 0) {
lck = (kmp_user_lock_p)lk;
KMP_DEBUG_ASSERT(lck != NULL);
if (__kmp_env_consistency_check) {
__kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
}
KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
} else {
kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
lck = ilk->lock;
KMP_DEBUG_ASSERT(lck != NULL);
if (__kmp_env_consistency_check) {
__kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
}
KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
}
#else
if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
lck = (kmp_user_lock_p)crit;
} else {
lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
}
KMP_DEBUG_ASSERT(lck != NULL);
if (__kmp_env_consistency_check)
__kmp_push_sync(global_tid, ct_critical, loc, lck);
__kmp_acquire_user_lock_with_checks(lck, global_tid);
#endif
}
static __forceinline void
__kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
kmp_critical_name *crit) {
kmp_user_lock_p lck;
#if KMP_USE_DYNAMIC_LOCK
if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
lck = (kmp_user_lock_p)crit;
if (__kmp_env_consistency_check)
__kmp_pop_sync(global_tid, ct_critical, loc);
KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
} else {
kmp_indirect_lock_t *ilk =
(kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
if (__kmp_env_consistency_check)
__kmp_pop_sync(global_tid, ct_critical, loc);
KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
}
#else
if (__kmp_base_user_lock_size > 32) {
lck = *((kmp_user_lock_p *)crit);
KMP_ASSERT(lck != NULL);
} else {
lck = (kmp_user_lock_p)crit;
}
if (__kmp_env_consistency_check)
__kmp_pop_sync(global_tid, ct_critical, loc);
__kmp_release_user_lock_with_checks(lck, global_tid);
#endif
}
static __forceinline int
__kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
int *task_state) {
kmp_team_t *team;
if (th->th.th_teams_microtask) {
*team_p = team = th->th.th_team;
if (team->t.t_level == th->th.th_teams_level) {
KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid);
th->th.th_info.ds.ds_tid = team->t.t_master_tid;
th->th.th_team = team->t.t_parent;
th->th.th_team_nproc = th->th.th_team->t.t_nproc;
th->th.th_task_team = th->th.th_team->t.t_task_team[0];
*task_state = th->th.th_task_state;
th->th.th_task_state = 0;
return 1;
}
}
return 0;
}
static __forceinline void
__kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
th->th.th_info.ds.ds_tid = 0;
th->th.th_team = team;
th->th.th_team_nproc = team->t.t_nproc;
th->th.th_task_team = team->t.t_task_team[task_state];
__kmp_type_convert(task_state, &(th->th.th_task_state));
}
@ingroup SYNCHRONIZATION
@param loc source location information
@param global_tid global thread number
@param num_vars number of items (variables) to be reduced
@param reduce_size size of data in bytes to be reduced
@param reduce_data pointer to data to be reduced
@param reduce_func callback function providing reduction operation on two
operands and returning result of reduction in lhs_data
@param lck pointer to the unique lock data structure
@result 1 for the primary thread, 0 for all other team threads, 2 for all team
threads if atomic reduction needed
The nowait version is used for a reduce clause with the nowait argument.
*/
kmp_int32
__kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
size_t reduce_size, void *reduce_data,
void (*reduce_func)(void *lhs_data, void *rhs_data),
kmp_critical_name *lck) {
KMP_COUNT_BLOCK(REDUCE_nowait);
int retval = 0;
PACKED_REDUCTION_METHOD_T packed_reduction_method;
kmp_info_t *th;
kmp_team_t *team;
int teams_swapped = 0, task_state;
KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
__kmp_assert_valid_gtid(global_tid);
if (!TCR_4(__kmp_init_parallel))
__kmp_parallel_initialize();
__kmp_resume_if_soft_paused();
#if KMP_USE_DYNAMIC_LOCK
if (__kmp_env_consistency_check)
__kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
#else
if (__kmp_env_consistency_check)
__kmp_push_sync(global_tid, ct_reduce, loc, NULL);
#endif
th = __kmp_thread_from_gtid(global_tid);
teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
packed_reduction_method = __kmp_determine_reduction_method(
loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
__KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
OMPT_REDUCTION_DECL(th, global_tid);
if (packed_reduction_method == critical_reduce_block) {
OMPT_REDUCTION_BEGIN;
__kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
retval = 1;
} else if (packed_reduction_method == empty_reduce_block) {
OMPT_REDUCTION_BEGIN;
retval = 1;
} else if (packed_reduction_method == atomic_reduce_block) {
retval = 2;
if (__kmp_env_consistency_check)
__kmp_pop_sync(global_tid, ct_reduce, loc);
} else if (TEST_REDUCTION_METHOD(packed_reduction_method,
tree_reduce_block)) {
#if OMPT_SUPPORT
ompt_frame_t *ompt_frame;
if (ompt_enabled.enabled) {
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
if (ompt_frame->enter_frame.ptr == NULL)
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
}
OMPT_STORE_RETURN_ADDRESS(global_tid);
#endif
#if USE_ITT_NOTIFY
__kmp_threads[global_tid]->th.th_ident = loc;
#endif
retval =
__kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
global_tid, FALSE, reduce_size, reduce_data, reduce_func);
retval = (retval != 0) ? (0) : (1);
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.enabled) {
ompt_frame->enter_frame = ompt_data_none;
}
#endif
if (__kmp_env_consistency_check) {
if (retval == 0) {
__kmp_pop_sync(global_tid, ct_reduce, loc);
}
}
} else {
KMP_ASSERT(0);
}
if (teams_swapped) {
__kmp_restore_swapped_teams(th, team, task_state);
}
KA_TRACE(
10,
("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
global_tid, packed_reduction_method, retval));
return retval;
}
@ingroup SYNCHRONIZATION
@param loc source location information
@param global_tid global thread id.
@param lck pointer to the unique lock data structure
Finish the execution of a reduce nowait.
*/
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
kmp_critical_name *lck) {
PACKED_REDUCTION_METHOD_T packed_reduction_method;
KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
__kmp_assert_valid_gtid(global_tid);
packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid);
if (packed_reduction_method == critical_reduce_block) {
__kmp_end_critical_section_reduce_block(loc, global_tid, lck);
OMPT_REDUCTION_END;
} else if (packed_reduction_method == empty_reduce_block) {
OMPT_REDUCTION_END;
} else if (packed_reduction_method == atomic_reduce_block) {
} else if (TEST_REDUCTION_METHOD(packed_reduction_method,
tree_reduce_block)) {
} else {
KMP_ASSERT(0);
}
if (__kmp_env_consistency_check)
__kmp_pop_sync(global_tid, ct_reduce, loc);
KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
global_tid, packed_reduction_method));
return;
}
@ingroup SYNCHRONIZATION
@param loc source location information
@param global_tid global thread number
@param num_vars number of items (variables) to be reduced
@param reduce_size size of data in bytes to be reduced
@param reduce_data pointer to data to be reduced
@param reduce_func callback function providing reduction operation on two
operands and returning result of reduction in lhs_data
@param lck pointer to the unique lock data structure
@result 1 for the primary thread, 0 for all other team threads, 2 for all team
threads if atomic reduction needed
A blocking reduce that includes an implicit barrier.
*/
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
size_t reduce_size, void *reduce_data,
void (*reduce_func)(void *lhs_data, void *rhs_data),
kmp_critical_name *lck) {
KMP_COUNT_BLOCK(REDUCE_wait);
int retval = 0;
PACKED_REDUCTION_METHOD_T packed_reduction_method;
kmp_info_t *th;
kmp_team_t *team;
int teams_swapped = 0, task_state;
KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
__kmp_assert_valid_gtid(global_tid);
if (!TCR_4(__kmp_init_parallel))
__kmp_parallel_initialize();
__kmp_resume_if_soft_paused();
#if KMP_USE_DYNAMIC_LOCK
if (__kmp_env_consistency_check)
__kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
#else
if (__kmp_env_consistency_check)
__kmp_push_sync(global_tid, ct_reduce, loc, NULL);
#endif
th = __kmp_thread_from_gtid(global_tid);
teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
packed_reduction_method = __kmp_determine_reduction_method(
loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
__KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
OMPT_REDUCTION_DECL(th, global_tid);
if (packed_reduction_method == critical_reduce_block) {
OMPT_REDUCTION_BEGIN;
__kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
retval = 1;
} else if (packed_reduction_method == empty_reduce_block) {
OMPT_REDUCTION_BEGIN;
retval = 1;
} else if (packed_reduction_method == atomic_reduce_block) {
retval = 2;
} else if (TEST_REDUCTION_METHOD(packed_reduction_method,
tree_reduce_block)) {
#if OMPT_SUPPORT
ompt_frame_t *ompt_frame;
if (ompt_enabled.enabled) {
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
if (ompt_frame->enter_frame.ptr == NULL)
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
}
OMPT_STORE_RETURN_ADDRESS(global_tid);
#endif
#if USE_ITT_NOTIFY
__kmp_threads[global_tid]->th.th_ident =
loc;
#endif
retval =
__kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
global_tid, TRUE, reduce_size, reduce_data, reduce_func);
retval = (retval != 0) ? (0) : (1);
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.enabled) {
ompt_frame->enter_frame = ompt_data_none;
}
#endif
if (__kmp_env_consistency_check) {
if (retval == 0) {
__kmp_pop_sync(global_tid, ct_reduce, loc);
}
}
} else {
KMP_ASSERT(0);
}
if (teams_swapped) {
__kmp_restore_swapped_teams(th, team, task_state);
}
KA_TRACE(10,
("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
global_tid, packed_reduction_method, retval));
return retval;
}
@ingroup SYNCHRONIZATION
@param loc source location information
@param global_tid global thread id.
@param lck pointer to the unique lock data structure
Finish the execution of a blocking reduce.
The <tt>lck</tt> pointer must be the same as that used in the corresponding
start function.
*/
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
kmp_critical_name *lck) {
PACKED_REDUCTION_METHOD_T packed_reduction_method;
kmp_info_t *th;
kmp_team_t *team;
int teams_swapped = 0, task_state;
KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
__kmp_assert_valid_gtid(global_tid);
th = __kmp_thread_from_gtid(global_tid);
teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
OMPT_REDUCTION_DECL(th, global_tid);
if (packed_reduction_method == critical_reduce_block) {
__kmp_end_critical_section_reduce_block(loc, global_tid, lck);
OMPT_REDUCTION_END;
#if OMPT_SUPPORT
ompt_frame_t *ompt_frame;
if (ompt_enabled.enabled) {
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
if (ompt_frame->enter_frame.ptr == NULL)
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
}
OMPT_STORE_RETURN_ADDRESS(global_tid);
#endif
#if USE_ITT_NOTIFY
__kmp_threads[global_tid]->th.th_ident = loc;
#endif
__kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.enabled) {
ompt_frame->enter_frame = ompt_data_none;
}
#endif
} else if (packed_reduction_method == empty_reduce_block) {
OMPT_REDUCTION_END;
#if OMPT_SUPPORT
ompt_frame_t *ompt_frame;
if (ompt_enabled.enabled) {
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
if (ompt_frame->enter_frame.ptr == NULL)
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
}
OMPT_STORE_RETURN_ADDRESS(global_tid);
#endif
#if USE_ITT_NOTIFY
__kmp_threads[global_tid]->th.th_ident = loc;
#endif
__kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.enabled) {
ompt_frame->enter_frame = ompt_data_none;
}
#endif
} else if (packed_reduction_method == atomic_reduce_block) {
#if OMPT_SUPPORT
ompt_frame_t *ompt_frame;
if (ompt_enabled.enabled) {
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
if (ompt_frame->enter_frame.ptr == NULL)
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
}
OMPT_STORE_RETURN_ADDRESS(global_tid);
#endif
#if USE_ITT_NOTIFY
__kmp_threads[global_tid]->th.th_ident = loc;
#endif
__kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.enabled) {
ompt_frame->enter_frame = ompt_data_none;
}
#endif
} else if (TEST_REDUCTION_METHOD(packed_reduction_method,
tree_reduce_block)) {
__kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
global_tid);
} else {
KMP_ASSERT(0);
}
if (teams_swapped) {
__kmp_restore_swapped_teams(th, team, task_state);
}
if (__kmp_env_consistency_check)
__kmp_pop_sync(global_tid, ct_reduce, loc);
KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
global_tid, packed_reduction_method));
return;
}
#undef __KMP_GET_REDUCTION_METHOD
#undef __KMP_SET_REDUCTION_METHOD
kmp_uint64 __kmpc_get_taskid() {
kmp_int32 gtid;
kmp_info_t *thread;
gtid = __kmp_get_gtid();
if (gtid < 0) {
return 0;
}
thread = __kmp_thread_from_gtid(gtid);
return thread->th.th_current_task->td_task_id;
}
kmp_uint64 __kmpc_get_parent_taskid() {
kmp_int32 gtid;
kmp_info_t *thread;
kmp_taskdata_t *parent_task;
gtid = __kmp_get_gtid();
if (gtid < 0) {
return 0;
}
thread = __kmp_thread_from_gtid(gtid);
parent_task = thread->th.th_current_task->td_parent;
return (parent_task == NULL ? 0 : parent_task->td_task_id);
}
@ingroup WORK_SHARING
@param loc source location information.
@param gtid global thread number.
@param num_dims number of associated doacross loops.
@param dims info on loops bounds.
Initialize doacross loop information.
Expect compiler send us inclusive bounds,
e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2.
*/
void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
const struct kmp_dim *dims) {
__kmp_assert_valid_gtid(gtid);
int j, idx;
kmp_int64 last, trace_count;
kmp_info_t *th = __kmp_threads[gtid];
kmp_team_t *team = th->th.th_team;
kmp_uint32 *flags;
kmp_disp_t *pr_buf = th->th.th_dispatch;
dispatch_shared_info_t *sh_buf;
KA_TRACE(
20,
("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
gtid, num_dims, !team->t.t_serialized));
KMP_DEBUG_ASSERT(dims != NULL);
KMP_DEBUG_ASSERT(num_dims > 0);
if (team->t.t_serialized) {
KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
return;
}
KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
idx = pr_buf->th_doacross_buf_idx++;
sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
th, sizeof(kmp_int64) * (4 * num_dims + 1));
KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
pr_buf->th_doacross_info[0] =
(kmp_int64)num_dims;
pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
pr_buf->th_doacross_info[2] = dims[0].lo;
pr_buf->th_doacross_info[3] = dims[0].up;
pr_buf->th_doacross_info[4] = dims[0].st;
last = 5;
for (j = 1; j < num_dims; ++j) {
kmp_int64
range_length;
if (dims[j].st == 1) {
range_length = dims[j].up - dims[j].lo + 1;
} else {
if (dims[j].st > 0) {
KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
} else {
KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
range_length =
(kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
}
}
pr_buf->th_doacross_info[last++] = range_length;
pr_buf->th_doacross_info[last++] = dims[j].lo;
pr_buf->th_doacross_info[last++] = dims[j].up;
pr_buf->th_doacross_info[last++] = dims[j].st;
}
if (dims[0].st == 1) {
trace_count = dims[0].up - dims[0].lo + 1;
} else if (dims[0].st > 0) {
KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
} else {
KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
}
for (j = 1; j < num_dims; ++j) {
trace_count *= pr_buf->th_doacross_info[4 * j + 1];
}
KMP_DEBUG_ASSERT(trace_count > 0);
if (idx != sh_buf->doacross_buf_idx) {
__kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
__kmp_eq_4, NULL);
}
#if KMP_32_BIT_ARCH
flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
(volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
#else
flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
(volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
#endif
if (flags == NULL) {
size_t size =
(size_t)trace_count / 8 + 8;
flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
KMP_MB();
sh_buf->doacross_flags = flags;
} else if (flags == (kmp_uint32 *)1) {
#if KMP_32_BIT_ARCH
while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
#else
while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
#endif
KMP_YIELD(TRUE);
KMP_MB();
} else {
KMP_MB();
}
KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1);
pr_buf->th_doacross_flags =
sh_buf->doacross_flags;
KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
}
void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
__kmp_assert_valid_gtid(gtid);
kmp_int64 shft;
size_t num_dims, i;
kmp_uint32 flag;
kmp_int64 iter_number;
kmp_info_t *th = __kmp_threads[gtid];
kmp_team_t *team = th->th.th_team;
kmp_disp_t *pr_buf;
kmp_int64 lo, up, st;
KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
if (team->t.t_serialized) {
KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
return;
}
pr_buf = th->th.th_dispatch;
KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
num_dims = (size_t)pr_buf->th_doacross_info[0];
lo = pr_buf->th_doacross_info[2];
up = pr_buf->th_doacross_info[3];
st = pr_buf->th_doacross_info[4];
#if OMPT_SUPPORT && OMPT_OPTIONAL
SimpleVLA<ompt_dependence_t> deps(num_dims);
#endif
if (st == 1) {
if (vec[0] < lo || vec[0] > up) {
KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n",
gtid, vec[0], lo, up));
return;
}
iter_number = vec[0] - lo;
} else if (st > 0) {
if (vec[0] < lo || vec[0] > up) {
KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n",
gtid, vec[0], lo, up));
return;
}
iter_number = (kmp_uint64)(vec[0] - lo) / st;
} else {
if (vec[0] > lo || vec[0] < up) {
KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n",
gtid, vec[0], lo, up));
return;
}
iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
}
#if OMPT_SUPPORT && OMPT_OPTIONAL
deps[0].variable.value = iter_number;
deps[0].dependence_type = ompt_dependence_type_sink;
#endif
for (i = 1; i < num_dims; ++i) {
kmp_int64 iter, ln;
size_t j = i * 4;
ln = pr_buf->th_doacross_info[j + 1];
lo = pr_buf->th_doacross_info[j + 2];
up = pr_buf->th_doacross_info[j + 3];
st = pr_buf->th_doacross_info[j + 4];
if (st == 1) {
if (vec[i] < lo || vec[i] > up) {
KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n",
gtid, vec[i], lo, up));
return;
}
iter = vec[i] - lo;
} else if (st > 0) {
if (vec[i] < lo || vec[i] > up) {
KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n",
gtid, vec[i], lo, up));
return;
}
iter = (kmp_uint64)(vec[i] - lo) / st;
} else {
if (vec[i] > lo || vec[i] < up) {
KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n",
gtid, vec[i], lo, up));
return;
}
iter = (kmp_uint64)(lo - vec[i]) / (-st);
}
iter_number = iter + ln * iter_number;
#if OMPT_SUPPORT && OMPT_OPTIONAL
deps[i].variable.value = iter;
deps[i].dependence_type = ompt_dependence_type_sink;
#endif
}
shft = iter_number % 32;
iter_number >>= 5;
flag = 1 << shft;
while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
KMP_YIELD(TRUE);
}
KMP_MB();
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.ompt_callback_dependences) {
ompt_callbacks.ompt_callback(ompt_callback_dependences)(
&(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
}
#endif
KA_TRACE(20,
("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
gtid, (iter_number << 5) + shft));
}
void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
__kmp_assert_valid_gtid(gtid);
kmp_int64 shft;
size_t num_dims, i;
kmp_uint32 flag;
kmp_int64 iter_number;
kmp_info_t *th = __kmp_threads[gtid];
kmp_team_t *team = th->th.th_team;
kmp_disp_t *pr_buf;
kmp_int64 lo, st;
KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
if (team->t.t_serialized) {
KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
return;
}
pr_buf = th->th.th_dispatch;
KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
num_dims = (size_t)pr_buf->th_doacross_info[0];
lo = pr_buf->th_doacross_info[2];
st = pr_buf->th_doacross_info[4];
#if OMPT_SUPPORT && OMPT_OPTIONAL
SimpleVLA<ompt_dependence_t> deps(num_dims);
#endif
if (st == 1) {
iter_number = vec[0] - lo;
} else if (st > 0) {
iter_number = (kmp_uint64)(vec[0] - lo) / st;
} else {
iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
}
#if OMPT_SUPPORT && OMPT_OPTIONAL
deps[0].variable.value = iter_number;
deps[0].dependence_type = ompt_dependence_type_source;
#endif
for (i = 1; i < num_dims; ++i) {
kmp_int64 iter, ln;
size_t j = i * 4;
ln = pr_buf->th_doacross_info[j + 1];
lo = pr_buf->th_doacross_info[j + 2];
st = pr_buf->th_doacross_info[j + 4];
if (st == 1) {
iter = vec[i] - lo;
} else if (st > 0) {
iter = (kmp_uint64)(vec[i] - lo) / st;
} else {
iter = (kmp_uint64)(lo - vec[i]) / (-st);
}
iter_number = iter + ln * iter_number;
#if OMPT_SUPPORT && OMPT_OPTIONAL
deps[i].variable.value = iter;
deps[i].dependence_type = ompt_dependence_type_source;
#endif
}
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.ompt_callback_dependences) {
ompt_callbacks.ompt_callback(ompt_callback_dependences)(
&(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
}
#endif
shft = iter_number % 32;
iter_number >>= 5;
flag = 1 << shft;
KMP_MB();
if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
(iter_number << 5) + shft));
}
void __kmpc_doacross_fini(ident_t *loc, int gtid) {
__kmp_assert_valid_gtid(gtid);
kmp_int32 num_done;
kmp_info_t *th = __kmp_threads[gtid];
kmp_team_t *team = th->th.th_team;
kmp_disp_t *pr_buf = th->th.th_dispatch;
KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
if (team->t.t_serialized) {
KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
return;
}
num_done =
KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1;
if (num_done == th->th.th_team_nproc) {
int idx = pr_buf->th_doacross_buf_idx - 1;
dispatch_shared_info_t *sh_buf =
&team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
(kmp_int64)&sh_buf->doacross_num_done);
KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
__kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
sh_buf->doacross_flags = NULL;
sh_buf->doacross_num_done = 0;
sh_buf->doacross_buf_idx +=
__kmp_dispatch_num_buffers;
}
pr_buf->th_doacross_flags = NULL;
__kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
pr_buf->th_doacross_info = NULL;
KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
}
void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
return __kmp_alloc(__kmp_entry_gtid(), 0, size, allocator);
}
void *omp_aligned_alloc(size_t align, size_t size,
omp_allocator_handle_t allocator) {
return __kmp_alloc(__kmp_entry_gtid(), align, size, allocator);
}
void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) {
return __kmp_calloc(__kmp_entry_gtid(), 0, nmemb, size, allocator);
}
void *omp_aligned_calloc(size_t align, size_t nmemb, size_t size,
omp_allocator_handle_t allocator) {
return __kmp_calloc(__kmp_entry_gtid(), align, nmemb, size, allocator);
}
void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator,
omp_allocator_handle_t free_allocator) {
return __kmp_realloc(__kmp_entry_gtid(), ptr, size, allocator,
free_allocator);
}
void omp_free(void *ptr, omp_allocator_handle_t allocator) {
___kmpc_free(__kmp_entry_gtid(), ptr, allocator);
}
int __kmpc_get_target_offload(void) {
if (!__kmp_init_serial) {
__kmp_serial_initialize();
}
return __kmp_target_offload;
}
int __kmpc_pause_resource(kmp_pause_status_t level) {
if (!__kmp_init_serial) {
return 1;
}
return __kmp_pause_resource(level);
}
void __kmpc_error(ident_t *loc, int severity, const char *message) {
if (!__kmp_init_serial)
__kmp_serial_initialize();
KMP_ASSERT(severity == severity_warning || severity == severity_fatal);
#if OMPT_SUPPORT
if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) {
ompt_callbacks.ompt_callback(ompt_callback_error)(
(ompt_severity_t)severity, message, KMP_STRLEN(message),
OMPT_GET_RETURN_ADDRESS(0));
}
#endif
char *src_loc;
if (loc && loc->psource) {
kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false);
src_loc =
__kmp_str_format("%s:%d:%d", str_loc.file, str_loc.line, str_loc.col);
__kmp_str_loc_free(&str_loc);
} else {
src_loc = __kmp_str_format("unknown");
}
if (severity == severity_warning)
KMP_WARNING(UserDirectedWarning, src_loc, message);
else
KMP_FATAL(UserDirectedError, src_loc, message);
__kmp_str_free(&src_loc);
}
void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
int tid = __kmp_tid_from_gtid(gtid);
ompt_callbacks.ompt_callback(ompt_callback_work)(
ompt_work_scope, ompt_scope_begin,
&(team->t.ompt_team_info.parallel_data),
&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
OMPT_GET_RETURN_ADDRESS(0));
}
#endif
}
void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
int tid = __kmp_tid_from_gtid(gtid);
ompt_callbacks.ompt_callback(ompt_callback_work)(
ompt_work_scope, ompt_scope_end,
&(team->t.ompt_team_info.parallel_data),
&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
OMPT_GET_RETURN_ADDRESS(0));
}
#endif
}
#ifdef KMP_USE_VERSION_SYMBOLS
extern "C" {
#ifdef omp_set_affinity_format
#undef omp_set_affinity_format
#endif
#ifdef omp_get_affinity_format
#undef omp_get_affinity_format
#endif
#ifdef omp_display_affinity
#undef omp_display_affinity
#endif
#ifdef omp_capture_affinity
#undef omp_capture_affinity
#endif
KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50,
"OMP_5.0");
KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50,
"OMP_5.0");
KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50,
"OMP_5.0");
KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50,
"OMP_5.0");
}
#endif