aboutsummaryrefslogtreecommitdiff
path: root/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen')
-rw-r--r--lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp64
-rw-r--r--lib/CodeGen/CGOpenMPRuntimeNVPTX.h32
-rw-r--r--lib/CodeGen/CGStmt.cpp4
-rw-r--r--lib/CodeGen/CGStmtOpenMP.cpp28
-rw-r--r--lib/CodeGen/CodeGenFunction.cpp27
-rw-r--r--lib/CodeGen/CodeGenFunction.h4
6 files changed, 89 insertions, 70 deletions
diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index 451f9e9221ad..fe0e2acdfdbf 100644
--- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -20,53 +20,64 @@
using namespace clang;
using namespace CodeGen;
-/// \brief Get the GPU warp size.
-llvm::Value *CGOpenMPRuntimeNVPTX::getNVPTXWarpSize(CodeGenFunction &CGF) {
+namespace {
+enum OpenMPRTLFunctionNVPTX {
+ /// \brief Call to void __kmpc_kernel_init(kmp_int32 omp_handle,
+ /// kmp_int32 thread_limit);
+ OMPRTL_NVPTX__kmpc_kernel_init,
+};
+
+// NVPTX Address space
+enum AddressSpace {
+ AddressSpaceShared = 3,
+};
+} // namespace
+
+/// Get the GPU warp size.
+static llvm::Value *getNVPTXWarpSize(CodeGenFunction &CGF) {
CGBuilderTy &Bld = CGF.Builder;
return Bld.CreateCall(
llvm::Intrinsic::getDeclaration(
- &CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize),
+ &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize),
llvm::None, "nvptx_warp_size");
}
-/// \brief Get the id of the current thread on the GPU.
-llvm::Value *CGOpenMPRuntimeNVPTX::getNVPTXThreadID(CodeGenFunction &CGF) {
+/// Get the id of the current thread on the GPU.
+static llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF) {
CGBuilderTy &Bld = CGF.Builder;
return Bld.CreateCall(
llvm::Intrinsic::getDeclaration(
- &CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x),
+ &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x),
llvm::None, "nvptx_tid");
}
-// \brief Get the maximum number of threads in a block of the GPU.
-llvm::Value *CGOpenMPRuntimeNVPTX::getNVPTXNumThreads(CodeGenFunction &CGF) {
+/// Get the maximum number of threads in a block of the GPU.
+static llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF) {
CGBuilderTy &Bld = CGF.Builder;
return Bld.CreateCall(
llvm::Intrinsic::getDeclaration(
- &CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x),
+ &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x),
llvm::None, "nvptx_num_threads");
}
-/// \brief Get barrier to synchronize all threads in a block.
-void CGOpenMPRuntimeNVPTX::getNVPTXCTABarrier(CodeGenFunction &CGF) {
+/// Get barrier to synchronize all threads in a block.
+static void getNVPTXCTABarrier(CodeGenFunction &CGF) {
CGBuilderTy &Bld = CGF.Builder;
Bld.CreateCall(llvm::Intrinsic::getDeclaration(
- &CGM.getModule(), llvm::Intrinsic::nvvm_barrier0));
+ &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_barrier0));
}
-// \brief Synchronize all GPU threads in a block.
-void CGOpenMPRuntimeNVPTX::syncCTAThreads(CodeGenFunction &CGF) {
- getNVPTXCTABarrier(CGF);
-}
+/// Synchronize all GPU threads in a block.
+static void syncCTAThreads(CodeGenFunction &CGF) { getNVPTXCTABarrier(CGF); }
-/// \brief Get the thread id of the OMP master thread.
+/// Get the thread id of the OMP master thread.
/// The master thread id is the first thread (lane) of the last warp in the
/// GPU block. Warp size is assumed to be some power of 2.
/// Thread id is 0 indexed.
/// E.g: If NumThreads is 33, master id is 32.
/// If NumThreads is 64, master id is 32.
/// If NumThreads is 1024, master id is 992.
-llvm::Value *CGOpenMPRuntimeNVPTX::getMasterThreadID(CodeGenFunction &CGF) {
+static llvm::Value *getMasterThreadID(CodeGenFunction &CGF) {
CGBuilderTy &Bld = CGF.Builder;
llvm::Value *NumThreads = getNVPTXNumThreads(CGF);
@@ -77,19 +88,6 @@ llvm::Value *CGOpenMPRuntimeNVPTX::getMasterThreadID(CodeGenFunction &CGF) {
Bld.CreateNot(Mask), "master_tid");
}
-namespace {
-enum OpenMPRTLFunctionNVPTX {
- /// \brief Call to void __kmpc_kernel_init(kmp_int32 omp_handle,
- /// kmp_int32 thread_limit);
- OMPRTL_NVPTX__kmpc_kernel_init,
-};
-
-// NVPTX Address space
-enum ADDRESS_SPACE {
- ADDRESS_SPACE_SHARED = 3,
-};
-} // namespace
-
CGOpenMPRuntimeNVPTX::WorkerFunctionState::WorkerFunctionState(
CodeGenModule &CGM)
: WorkerFn(nullptr), CGFI(nullptr) {
@@ -119,14 +117,14 @@ void CGOpenMPRuntimeNVPTX::initializeEnvironment() {
CGM.getModule(), CGM.Int32Ty, /*isConstant=*/false,
llvm::GlobalValue::CommonLinkage,
llvm::Constant::getNullValue(CGM.Int32Ty), "__omp_num_threads", 0,
- llvm::GlobalVariable::NotThreadLocal, ADDRESS_SPACE_SHARED);
+ llvm::GlobalVariable::NotThreadLocal, AddressSpaceShared);
ActiveWorkers->setAlignment(DL.getPrefTypeAlignment(CGM.Int32Ty));
WorkID = new llvm::GlobalVariable(
CGM.getModule(), CGM.Int64Ty, /*isConstant=*/false,
llvm::GlobalValue::CommonLinkage,
llvm::Constant::getNullValue(CGM.Int64Ty), "__tgt_work_id", 0,
- llvm::GlobalVariable::NotThreadLocal, ADDRESS_SPACE_SHARED);
+ llvm::GlobalVariable::NotThreadLocal, AddressSpaceShared);
WorkID->setAlignment(DL.getPrefTypeAlignment(CGM.Int64Ty));
}
diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
index e18d28cdda9f..a33fb27579f6 100644
--- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
+++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
@@ -50,38 +50,6 @@ public:
private:
//
- // NVPTX calls.
- //
-
- /// \brief Get the GPU warp size.
- llvm::Value *getNVPTXWarpSize(CodeGenFunction &CGF);
-
- /// \brief Get the id of the current thread on the GPU.
- llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF);
-
- // \brief Get the maximum number of threads in a block of the GPU.
- llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF);
-
- /// \brief Get barrier to synchronize all threads in a block.
- void getNVPTXCTABarrier(CodeGenFunction &CGF);
-
- // \brief Synchronize all GPU threads in a block.
- void syncCTAThreads(CodeGenFunction &CGF);
-
- //
- // OMP calls.
- //
-
- /// \brief Get the thread id of the OMP master thread.
- /// The master thread id is the first thread (lane) of the last warp in the
- /// GPU block. Warp size is assumed to be some power of 2.
- /// Thread id is 0 indexed.
- /// E.g: If NumThreads is 33, master id is 32.
- /// If NumThreads is 64, master id is 32.
- /// If NumThreads is 1024, master id is 992.
- llvm::Value *getMasterThreadID(CodeGenFunction &CGF);
-
- //
// Private state and methods.
//
diff --git a/lib/CodeGen/CGStmt.cpp b/lib/CodeGen/CGStmt.cpp
index f2acb798b881..8d391f95d9f7 100644
--- a/lib/CodeGen/CGStmt.cpp
+++ b/lib/CodeGen/CGStmt.cpp
@@ -326,6 +326,10 @@ void CodeGenFunction::EmitStmt(const Stmt *S) {
EmitOMPTargetTeamsDistributeParallelForDirective(
cast<OMPTargetTeamsDistributeParallelForDirective>(*S));
break;
+ case Stmt::OMPTargetTeamsDistributeParallelForSimdDirectiveClass:
+ EmitOMPTargetTeamsDistributeParallelForSimdDirective(
+ cast<OMPTargetTeamsDistributeParallelForSimdDirective>(*S));
+ break;
}
}
diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp
index ba39e1fbd41f..386c4f0fe69c 100644
--- a/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/lib/CodeGen/CGStmtOpenMP.cpp
@@ -2032,6 +2032,16 @@ void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
});
}
+void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
+ const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
+ CGM.getOpenMPRuntime().emitInlinedDirective(
+ *this, OMPD_target_teams_distribute_parallel_for_simd,
+ [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+ CGF.EmitStmt(
+ cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+ });
+}
+
/// \brief Emit a helper variable and return corresponding lvalue.
static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
const DeclRefExpr *Helper) {
@@ -2760,6 +2770,7 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) {
auto &RT = CGM.getOpenMPRuntime();
+ bool HasLastprivateClause = false;
// Check pre-condition.
{
OMPLoopScope PreInitScope(*this, S);
@@ -2793,6 +2804,16 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) {
EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
OMPPrivateScope LoopScope(*this);
+ if (EmitOMPFirstprivateClause(S, LoopScope)) {
+ // Emit implicit barrier to synchronize threads and avoid data races on
+ // initialization of firstprivate variables and post-update of
+ // lastprivate variables.
+ CGM.getOpenMPRuntime().emitBarrierCall(
+ *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
+ /*ForceSimpleCall=*/true);
+ }
+ EmitOMPPrivateClause(S, LoopScope);
+ HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
EmitOMPPrivateLoopCounters(S, LoopScope);
(void)LoopScope.Privatize();
@@ -2849,6 +2870,13 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) {
LB.getAddress(), UB.getAddress(), ST.getAddress(),
IL.getAddress(), Chunk);
}
+
+ // Emit final copy of the lastprivate variables if IsLastIter != 0.
+ if (HasLastprivateClause)
+ EmitOMPLastprivateClauseFinal(
+ S, /*NoFinals=*/false,
+ Builder.CreateIsNotNull(
+ EmitLoadOfScalar(IL, S.getLocStart())));
}
// We're now done with the loop, so jump to the continuation block.
diff --git a/lib/CodeGen/CodeGenFunction.cpp b/lib/CodeGen/CodeGenFunction.cpp
index a954f487d1e4..7cab13de923b 100644
--- a/lib/CodeGen/CodeGenFunction.cpp
+++ b/lib/CodeGen/CodeGenFunction.cpp
@@ -1049,6 +1049,19 @@ QualType CodeGenFunction::BuildFunctionArgList(GlobalDecl GD,
return ResTy;
}
+static bool
+shouldUseUndefinedBehaviorReturnOptimization(const FunctionDecl *FD,
+ const ASTContext &Context) {
+ QualType T = FD->getReturnType();
+ // Avoid the optimization for functions that return a record type with a
+ // trivial destructor or another trivially copyable type.
+ if (const RecordType *RT = T.getCanonicalType()->getAs<RecordType>()) {
+ if (const auto *ClassDecl = dyn_cast<CXXRecordDecl>(RT->getDecl()))
+ return !ClassDecl->hasTrivialDestructor();
+ }
+ return !T.isTriviallyCopyableType(Context);
+}
+
void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
const CGFunctionInfo &FnInfo) {
const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
@@ -1127,17 +1140,23 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
// function call is used by the caller, the behavior is undefined.
if (getLangOpts().CPlusPlus && !FD->hasImplicitReturnZero() && !SawAsmBlock &&
!FD->getReturnType()->isVoidType() && Builder.GetInsertBlock()) {
+ bool ShouldEmitUnreachable =
+ CGM.getCodeGenOpts().StrictReturn ||
+ shouldUseUndefinedBehaviorReturnOptimization(FD, getContext());
if (SanOpts.has(SanitizerKind::Return)) {
SanitizerScope SanScope(this);
llvm::Value *IsFalse = Builder.getFalse();
EmitCheck(std::make_pair(IsFalse, SanitizerKind::Return),
SanitizerHandler::MissingReturn,
EmitCheckSourceLocation(FD->getLocation()), None);
- } else if (CGM.getCodeGenOpts().OptimizationLevel == 0) {
- EmitTrapCall(llvm::Intrinsic::trap);
+ } else if (ShouldEmitUnreachable) {
+ if (CGM.getCodeGenOpts().OptimizationLevel == 0)
+ EmitTrapCall(llvm::Intrinsic::trap);
+ }
+ if (SanOpts.has(SanitizerKind::Return) || ShouldEmitUnreachable) {
+ Builder.CreateUnreachable();
+ Builder.ClearInsertionPoint();
}
- Builder.CreateUnreachable();
- Builder.ClearInsertionPoint();
}
// Emit the standard function epilogue.
diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h
index 222d0e97968a..1347f54df9ac 100644
--- a/lib/CodeGen/CodeGenFunction.h
+++ b/lib/CodeGen/CodeGenFunction.h
@@ -2699,6 +2699,8 @@ public:
const OMPTargetTeamsDistributeDirective &S);
void EmitOMPTargetTeamsDistributeParallelForDirective(
const OMPTargetTeamsDistributeParallelForDirective &S);
+ void EmitOMPTargetTeamsDistributeParallelForSimdDirective(
+ const OMPTargetTeamsDistributeParallelForSimdDirective &S);
/// Emit outlined function for the target directive.
static std::pair<llvm::Function * /*OutlinedFn*/,
@@ -3569,7 +3571,7 @@ public:
// If we still have any arguments, emit them using the type of the argument.
for (auto *A : llvm::make_range(Arg, ArgRange.end()))
- ArgTypes.push_back(getVarArgType(A));
+ ArgTypes.push_back(CallArgTypeInfo ? getVarArgType(A) : A->getType());
EmitCallArgs(Args, ArgTypes, ArgRange, CalleeDecl, ParamsToSkip, Order);
}