#include "Debug.h"
#include "Interface.h"
#include "Mapping.h"
#include "State.h"
#include "Synchronization.h"
#include "Types.h"
#include "Utils.h"
using namespace ompx;
#pragma omp begin declare target device_type(nohost)
namespace {
uint32_t determineNumberOfThreads(int32_t NumThreadsClause) {
uint32_t NThreadsICV =
NumThreadsClause != -1 ? NumThreadsClause : icv::NThreads;
uint32_t NumThreads = mapping::getMaxTeamThreads();
if (NThreadsICV != 0 && NThreadsICV < NumThreads)
NumThreads = NThreadsICV;
if (mapping::isSPMDMode())
return NumThreads;
if (NumThreads < mapping::getWarpSize())
NumThreads = 1;
else
NumThreads = (NumThreads & ~((uint32_t)mapping::getWarpSize() - 1));
return NumThreads;
}
[[clang::always_inline]] void invokeMicrotask(int32_t global_tid,
int32_t bound_tid, void *fn,
void **args, int64_t nargs) {
switch (nargs) {
#include "generated_microtask_cases.gen"
default:
PRINT("Too many arguments in kmp_invoke_microtask, aborting execution.\n");
__builtin_trap();
}
}
}
extern "C" {
[[clang::always_inline]] void __kmpc_parallel_spmd(IdentTy *ident,
int32_t num_threads,
void *fn, void **args,
const int64_t nargs) {
uint32_t TId = mapping::getThreadIdInBlock();
uint32_t NumThreads = determineNumberOfThreads(num_threads);
uint32_t PTeamSize =
NumThreads == mapping::getMaxTeamThreads() ? 0 : NumThreads;
synchronize::threadsAligned(atomic::seq_cst);
{
state::ValueRAII ParallelTeamSizeRAII(state::ParallelTeamSize, PTeamSize,
1u, TId == 0, ident,
true);
state::ValueRAII ActiveLevelRAII(icv::ActiveLevel, 1u, 0u, TId == 0, ident,
true);
state::ValueRAII LevelRAII(icv::Level, 1u, 0u, TId == 0, ident,
true);
synchronize::threadsAligned(atomic::acq_rel);
state::ParallelTeamSize.assert_eq(PTeamSize, ident,
true);
icv::ActiveLevel.assert_eq(1u, ident, true);
icv::Level.assert_eq(1u, ident, true);
synchronize::threadsAligned(atomic::relaxed);
if (!PTeamSize || TId < PTeamSize)
invokeMicrotask(TId, 0, fn, args, nargs);
synchronize::threadsAligned(atomic::seq_cst);
}
synchronize::threadsAligned(atomic::acq_rel);
state::ParallelTeamSize.assert_eq(1u, ident, true);
icv::ActiveLevel.assert_eq(0u, ident, true);
icv::Level.assert_eq(0u, ident, true);
synchronize::threadsAligned(atomic::relaxed);
return;
}
[[clang::always_inline]] void
__kmpc_parallel_51(IdentTy *ident, int32_t, int32_t if_expr,
int32_t num_threads, int proc_bind, void *fn,
void *wrapper_fn, void **args, int64_t nargs) {
uint32_t TId = mapping::getThreadIdInBlock();
ASSERT((config::mayUseNestedParallelism() || icv::Level == 0),
"nested parallelism while disabled");
if (OMP_UNLIKELY(!if_expr || state::HasThreadState ||
(config::mayUseNestedParallelism() && icv::Level))) {
state::DateEnvironmentRAII DERAII(ident);
++icv::Level;
invokeMicrotask(TId, 0, fn, args, nargs);
return;
}
ASSERT(state::HasThreadState == false, nullptr);
if (mapping::isSPMDMode()) {
__kmpc_parallel_spmd(ident, num_threads, fn, args, nargs);
return;
}
uint32_t NumThreads = determineNumberOfThreads(num_threads);
uint32_t MaxTeamThreads = mapping::getMaxTeamThreads();
uint32_t PTeamSize = NumThreads == MaxTeamThreads ? 0 : NumThreads;
bool IsActiveParallelRegion = NumThreads > 1;
if (!IsActiveParallelRegion) {
state::ValueRAII LevelRAII(icv::Level, 1u, 0u, true, ident);
invokeMicrotask(TId, 0, fn, args, nargs);
return;
}
void **GlobalArgs = nullptr;
if (nargs) {
__kmpc_begin_sharing_variables(&GlobalArgs, nargs);
switch (nargs) {
default:
for (int I = 0; I < nargs; I++)
GlobalArgs[I] = args[I];
break;
case 16:
GlobalArgs[15] = args[15];
[[fallthrough]];
case 15:
GlobalArgs[14] = args[14];
[[fallthrough]];
case 14:
GlobalArgs[13] = args[13];
[[fallthrough]];
case 13:
GlobalArgs[12] = args[12];
[[fallthrough]];
case 12:
GlobalArgs[11] = args[11];
[[fallthrough]];
case 11:
GlobalArgs[10] = args[10];
[[fallthrough]];
case 10:
GlobalArgs[9] = args[9];
[[fallthrough]];
case 9:
GlobalArgs[8] = args[8];
[[fallthrough]];
case 8:
GlobalArgs[7] = args[7];
[[fallthrough]];
case 7:
GlobalArgs[6] = args[6];
[[fallthrough]];
case 6:
GlobalArgs[5] = args[5];
[[fallthrough]];
case 5:
GlobalArgs[4] = args[4];
[[fallthrough]];
case 4:
GlobalArgs[3] = args[3];
[[fallthrough]];
case 3:
GlobalArgs[2] = args[2];
[[fallthrough]];
case 2:
GlobalArgs[1] = args[1];
[[fallthrough]];
case 1:
GlobalArgs[0] = args[0];
[[fallthrough]];
case 0:
break;
}
}
{
state::ValueRAII ParallelTeamSizeRAII(state::ParallelTeamSize, PTeamSize,
1u, true, ident,
true);
state::ValueRAII ParallelRegionFnRAII(state::ParallelRegionFn, wrapper_fn,
(void *)nullptr, true, ident,
true);
state::ValueRAII ActiveLevelRAII(icv::ActiveLevel, 1u, 0u, true, ident,
true);
state::ValueRAII LevelRAII(icv::Level, 1u, 0u, true, ident,
true);
synchronize::threads(atomic::seq_cst);
synchronize::threads(atomic::seq_cst);
}
if (nargs)
__kmpc_end_sharing_variables();
}
[[clang::noinline]] bool __kmpc_kernel_parallel(ParallelRegionFnTy *WorkFn) {
*WorkFn = state::ParallelRegionFn;
if (!*WorkFn)
return false;
uint32_t TId = mapping::getThreadIdInBlock();
bool ThreadIsActive = TId < state::getEffectivePTeamSize();
return ThreadIsActive;
}
[[clang::noinline]] void __kmpc_kernel_end_parallel() {
ASSERT(!mapping::isSPMDMode(), nullptr);
uint32_t TId = mapping::getThreadIdInBlock();
state::resetStateForThread(TId);
ASSERT(!mapping::isSPMDMode(), nullptr);
}
uint16_t __kmpc_parallel_level(IdentTy *, uint32_t) { return omp_get_level(); }
int32_t __kmpc_global_thread_num(IdentTy *) { return omp_get_thread_num(); }
void __kmpc_push_num_teams(IdentTy *loc, int32_t tid, int32_t num_teams,
int32_t thread_limit) {}
void __kmpc_push_proc_bind(IdentTy *loc, uint32_t tid, int proc_bind) {}
}
#pragma omp end declare target