aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/CodeGen/CGOpenMPRuntime.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2022-07-14 18:50:02 +0000
committerDimitry Andric <dim@FreeBSD.org>2022-07-14 18:50:02 +0000
commit1f917f69ff07f09b6dbb670971f57f8efe718b84 (patch)
tree99293cbc1411737cd995dac10a99b2c40ef0944c /clang/lib/CodeGen/CGOpenMPRuntime.cpp
parent145449b1e420787bb99721a429341fa6be3adfb6 (diff)
downloadsrc-1f917f69ff07f09b6dbb670971f57f8efe718b84.tar.gz
src-1f917f69ff07f09b6dbb670971f57f8efe718b84.zip
Vendor import of llvm-project main llvmorg-15-init-16436-g18a6ab5b8d1f.vendor/llvm-project/llvmorg-15-init-16436-g18a6ab5b8d1f
Diffstat (limited to 'clang/lib/CodeGen/CGOpenMPRuntime.cpp')
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.cpp140
1 files changed, 41 insertions, 99 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 305040b01c08..091eb9da5af4 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -6717,11 +6717,9 @@ llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
default:
break;
}
- } else if (DefaultNT == -1) {
- return nullptr;
}
- return Bld.getInt32(DefaultNT);
+ return llvm::ConstantInt::get(CGF.Int32Ty, DefaultNT);
}
static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
@@ -10189,9 +10187,8 @@ llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
return UDMMap.lookup(D);
}
-void CGOpenMPRuntime::emitTargetNumIterationsCall(
+llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall(
CodeGenFunction &CGF, const OMPExecutableDirective &D,
- llvm::Value *DeviceID,
llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
const OMPLoopDirective &D)>
SizeEmitter) {
@@ -10201,20 +10198,12 @@ void CGOpenMPRuntime::emitTargetNumIterationsCall(
if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
TD = getNestedDistributeDirective(CGM.getContext(), D);
if (!TD)
- return;
+ return llvm::ConstantInt::get(CGF.Int64Ty, 0);
+
const auto *LD = cast<OMPLoopDirective>(TD);
- auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
- PrePostActionTy &) {
- if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
- llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
- llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
- CGF.EmitRuntimeCall(
- OMPBuilder.getOrCreateRuntimeFunction(
- CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper),
- Args);
- }
- };
- emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
+ if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
+ return NumIterations;
+ return llvm::ConstantInt::get(CGF.Int64Ty, 0);
}
void CGOpenMPRuntime::emitTargetCall(
@@ -10308,26 +10297,34 @@ void CGOpenMPRuntime::emitTargetCall(
// Source location for the ident struct
llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
- // Emit tripcount for the target loop-based directive.
- emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
+ // Get tripcount for the target loop-based directive.
+ llvm::Value *NumIterations =
+ emitTargetNumIterationsCall(CGF, D, SizeEmitter);
+
+ // Arguments for the target kernel.
+ SmallVector<llvm::Value *> KernelArgs{
+ CGF.Builder.getInt32(/* Version */ 1),
+ PointerNum,
+ InputInfo.BasePointersArray.getPointer(),
+ InputInfo.PointersArray.getPointer(),
+ InputInfo.SizesArray.getPointer(),
+ MapTypesArray,
+ MapNamesArray,
+ InputInfo.MappersArray.getPointer(),
+ NumIterations};
+
+ // Arguments passed to the 'nowait' variant.
+ SmallVector<llvm::Value *> NoWaitKernelArgs{
+ CGF.Builder.getInt32(0),
+ llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
+ CGF.Builder.getInt32(0),
+ llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
+ };
+
+ bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
- bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
// The target region is an outlined function launched by the runtime
- // via calls __tgt_target() or __tgt_target_teams().
- //
- // __tgt_target() launches a target region with one team and one thread,
- // executing a serial region. This master thread may in turn launch
- // more threads within its team upon encountering a parallel region,
- // however, no additional teams can be launched on the device.
- //
- // __tgt_target_teams() launches a target region with one or more teams,
- // each with one or more threads. This call is required for target
- // constructs such as:
- // 'target teams'
- // 'target' / 'teams'
- // 'target teams distribute parallel for'
- // 'target parallel'
- // and so on.
+ // via calls to __tgt_target_kernel().
//
// Note that on the host and CPU targets, the runtime implementation of
// these calls simply call the outlined function without forking threads.
@@ -10338,70 +10335,15 @@ void CGOpenMPRuntime::emitTargetCall(
// In contrast, on the NVPTX target, the implementation of
// __tgt_target_teams() launches a GPU kernel with the requested number
// of teams and threads so no additional calls to the runtime are required.
- if (NumTeams) {
- // If we have NumTeams defined this means that we have an enclosed teams
- // region. Therefore we also expect to have NumThreads defined. These two
- // values should be defined in the presence of a teams directive,
- // regardless of having any clauses associated. If the user is using teams
- // but no clauses, these two values will be the default that should be
- // passed to the runtime library - a 32-bit integer with the value zero.
- assert(NumThreads && "Thread limit expression should be available along "
- "with number of teams.");
- SmallVector<llvm::Value *> OffloadingArgs = {
- RTLoc,
- DeviceID,
- OutlinedFnID,
- PointerNum,
- InputInfo.BasePointersArray.getPointer(),
- InputInfo.PointersArray.getPointer(),
- InputInfo.SizesArray.getPointer(),
- MapTypesArray,
- MapNamesArray,
- InputInfo.MappersArray.getPointer(),
- NumTeams,
- NumThreads};
- if (HasNowait) {
- // Add int32_t depNum = 0, void *depList = nullptr, int32_t
- // noAliasDepNum = 0, void *noAliasDepList = nullptr.
- OffloadingArgs.push_back(CGF.Builder.getInt32(0));
- OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
- OffloadingArgs.push_back(CGF.Builder.getInt32(0));
- OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
- }
- Return = CGF.EmitRuntimeCall(
- OMPBuilder.getOrCreateRuntimeFunction(
- CGM.getModule(), HasNowait
- ? OMPRTL___tgt_target_teams_nowait_mapper
- : OMPRTL___tgt_target_teams_mapper),
- OffloadingArgs);
- } else {
- SmallVector<llvm::Value *> OffloadingArgs = {
- RTLoc,
- DeviceID,
- OutlinedFnID,
- PointerNum,
- InputInfo.BasePointersArray.getPointer(),
- InputInfo.PointersArray.getPointer(),
- InputInfo.SizesArray.getPointer(),
- MapTypesArray,
- MapNamesArray,
- InputInfo.MappersArray.getPointer()};
- if (HasNowait) {
- // Add int32_t depNum = 0, void *depList = nullptr, int32_t
- // noAliasDepNum = 0, void *noAliasDepList = nullptr.
- OffloadingArgs.push_back(CGF.Builder.getInt32(0));
- OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
- OffloadingArgs.push_back(CGF.Builder.getInt32(0));
- OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
- }
- Return = CGF.EmitRuntimeCall(
- OMPBuilder.getOrCreateRuntimeFunction(
- CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
- : OMPRTL___tgt_target_mapper),
- OffloadingArgs);
- }
-
// Check the error code and execute the host version if required.
+ CGF.Builder.restoreIP(
+ HasNoWait ? OMPBuilder.emitTargetKernel(
+ CGF.Builder, Return, RTLoc, DeviceID, NumTeams,
+ NumThreads, OutlinedFnID, KernelArgs, NoWaitKernelArgs)
+ : OMPBuilder.emitTargetKernel(CGF.Builder, Return, RTLoc,
+ DeviceID, NumTeams, NumThreads,
+ OutlinedFnID, KernelArgs));
+
llvm::BasicBlock *OffloadFailedBlock =
CGF.createBasicBlock("omp_offload.failed");
llvm::BasicBlock *OffloadContBlock =