diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2022-07-14 18:50:02 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2022-07-14 18:50:02 +0000 |
commit | 1f917f69ff07f09b6dbb670971f57f8efe718b84 (patch) | |
tree | 99293cbc1411737cd995dac10a99b2c40ef0944c /clang/lib/CodeGen/CGOpenMPRuntime.cpp | |
parent | 145449b1e420787bb99721a429341fa6be3adfb6 (diff) | |
download | src-1f917f69ff07f09b6dbb670971f57f8efe718b84.tar.gz src-1f917f69ff07f09b6dbb670971f57f8efe718b84.zip |
Vendor import of llvm-project main llvmorg-15-init-16436-g18a6ab5b8d1f.vendor/llvm-project/llvmorg-15-init-16436-g18a6ab5b8d1f
Diffstat (limited to 'clang/lib/CodeGen/CGOpenMPRuntime.cpp')
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntime.cpp | 140 |
1 files changed, 41 insertions, 99 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 305040b01c08..091eb9da5af4 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -6717,11 +6717,9 @@ llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective( default: break; } - } else if (DefaultNT == -1) { - return nullptr; } - return Bld.getInt32(DefaultNT); + return llvm::ConstantInt::get(CGF.Int32Ty, DefaultNT); } static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, @@ -10189,9 +10187,8 @@ llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( return UDMMap.lookup(D); } -void CGOpenMPRuntime::emitTargetNumIterationsCall( +llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall( CodeGenFunction &CGF, const OMPExecutableDirective &D, - llvm::Value *DeviceID, llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter) { @@ -10201,20 +10198,12 @@ void CGOpenMPRuntime::emitTargetNumIterationsCall( if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) TD = getNestedDistributeDirective(CGM.getContext(), D); if (!TD) - return; + return llvm::ConstantInt::get(CGF.Int64Ty, 0); + const auto *LD = cast<OMPLoopDirective>(TD); - auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF, - PrePostActionTy &) { - if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { - llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); - llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations}; - CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper), - Args); - } - }; - emitInlinedDirective(CGF, OMPD_unknown, CodeGen); + if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) + return NumIterations; + return llvm::ConstantInt::get(CGF.Int64Ty, 0); } void CGOpenMPRuntime::emitTargetCall( @@ -10308,26 +10297,34 @@ void CGOpenMPRuntime::emitTargetCall( // Source location for the ident struct llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); - // Emit tripcount for the target loop-based directive. - emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); + // Get tripcount for the target loop-based directive. + llvm::Value *NumIterations = + emitTargetNumIterationsCall(CGF, D, SizeEmitter); + + // Arguments for the target kernel. + SmallVector<llvm::Value *> KernelArgs{ + CGF.Builder.getInt32(/* Version */ 1), + PointerNum, + InputInfo.BasePointersArray.getPointer(), + InputInfo.PointersArray.getPointer(), + InputInfo.SizesArray.getPointer(), + MapTypesArray, + MapNamesArray, + InputInfo.MappersArray.getPointer(), + NumIterations}; + + // Arguments passed to the 'nowait' variant. + SmallVector<llvm::Value *> NoWaitKernelArgs{ + CGF.Builder.getInt32(0), + llvm::ConstantPointerNull::get(CGM.VoidPtrTy), + CGF.Builder.getInt32(0), + llvm::ConstantPointerNull::get(CGM.VoidPtrTy), + }; + + bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>(); - bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); // The target region is an outlined function launched by the runtime - // via calls __tgt_target() or __tgt_target_teams(). - // - // __tgt_target() launches a target region with one team and one thread, - // executing a serial region. This master thread may in turn launch - // more threads within its team upon encountering a parallel region, - // however, no additional teams can be launched on the device. - // - // __tgt_target_teams() launches a target region with one or more teams, - // each with one or more threads. This call is required for target - // constructs such as: - // 'target teams' - // 'target' / 'teams' - // 'target teams distribute parallel for' - // 'target parallel' - // and so on. + // via calls to __tgt_target_kernel(). // // Note that on the host and CPU targets, the runtime implementation of // these calls simply call the outlined function without forking threads. @@ -10338,70 +10335,15 @@ void CGOpenMPRuntime::emitTargetCall( // In contrast, on the NVPTX target, the implementation of // __tgt_target_teams() launches a GPU kernel with the requested number // of teams and threads so no additional calls to the runtime are required. - if (NumTeams) { - // If we have NumTeams defined this means that we have an enclosed teams - // region. Therefore we also expect to have NumThreads defined. These two - // values should be defined in the presence of a teams directive, - // regardless of having any clauses associated. If the user is using teams - // but no clauses, these two values will be the default that should be - // passed to the runtime library - a 32-bit integer with the value zero. - assert(NumThreads && "Thread limit expression should be available along " - "with number of teams."); - SmallVector<llvm::Value *> OffloadingArgs = { - RTLoc, - DeviceID, - OutlinedFnID, - PointerNum, - InputInfo.BasePointersArray.getPointer(), - InputInfo.PointersArray.getPointer(), - InputInfo.SizesArray.getPointer(), - MapTypesArray, - MapNamesArray, - InputInfo.MappersArray.getPointer(), - NumTeams, - NumThreads}; - if (HasNowait) { - // Add int32_t depNum = 0, void *depList = nullptr, int32_t - // noAliasDepNum = 0, void *noAliasDepList = nullptr. - OffloadingArgs.push_back(CGF.Builder.getInt32(0)); - OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); - OffloadingArgs.push_back(CGF.Builder.getInt32(0)); - OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); - } - Return = CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), HasNowait - ? OMPRTL___tgt_target_teams_nowait_mapper - : OMPRTL___tgt_target_teams_mapper), - OffloadingArgs); - } else { - SmallVector<llvm::Value *> OffloadingArgs = { - RTLoc, - DeviceID, - OutlinedFnID, - PointerNum, - InputInfo.BasePointersArray.getPointer(), - InputInfo.PointersArray.getPointer(), - InputInfo.SizesArray.getPointer(), - MapTypesArray, - MapNamesArray, - InputInfo.MappersArray.getPointer()}; - if (HasNowait) { - // Add int32_t depNum = 0, void *depList = nullptr, int32_t - // noAliasDepNum = 0, void *noAliasDepList = nullptr. - OffloadingArgs.push_back(CGF.Builder.getInt32(0)); - OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); - OffloadingArgs.push_back(CGF.Builder.getInt32(0)); - OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); - } - Return = CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper - : OMPRTL___tgt_target_mapper), - OffloadingArgs); - } - // Check the error code and execute the host version if required. + CGF.Builder.restoreIP( + HasNoWait ? OMPBuilder.emitTargetKernel( + CGF.Builder, Return, RTLoc, DeviceID, NumTeams, + NumThreads, OutlinedFnID, KernelArgs, NoWaitKernelArgs) + : OMPBuilder.emitTargetKernel(CGF.Builder, Return, RTLoc, + DeviceID, NumTeams, NumThreads, + OutlinedFnID, KernelArgs)); + llvm::BasicBlock *OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed"); llvm::BasicBlock *OffloadContBlock = |