From 0c75eea8f661a82866688fd1fc4465883c4dd7d5 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Wed, 4 Jan 2017 22:11:23 +0000 Subject: Vendor import of clang trunk r291012: https://llvm.org/svn/llvm-project/cfe/trunk@291012 --- lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp | 64 +++++++++++++++++------------------- lib/CodeGen/CGOpenMPRuntimeNVPTX.h | 32 ------------------ lib/CodeGen/CGStmt.cpp | 4 +++ lib/CodeGen/CGStmtOpenMP.cpp | 28 ++++++++++++++++ lib/CodeGen/CodeGenFunction.cpp | 27 ++++++++++++--- lib/CodeGen/CodeGenFunction.h | 4 ++- 6 files changed, 89 insertions(+), 70 deletions(-) (limited to 'lib/CodeGen') diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 451f9e9221ad..fe0e2acdfdbf 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -20,53 +20,64 @@ using namespace clang; using namespace CodeGen; -/// \brief Get the GPU warp size. -llvm::Value *CGOpenMPRuntimeNVPTX::getNVPTXWarpSize(CodeGenFunction &CGF) { +namespace { +enum OpenMPRTLFunctionNVPTX { + /// \brief Call to void __kmpc_kernel_init(kmp_int32 omp_handle, + /// kmp_int32 thread_limit); + OMPRTL_NVPTX__kmpc_kernel_init, +}; + +// NVPTX Address space +enum AddressSpace { + AddressSpaceShared = 3, +}; +} // namespace + +/// Get the GPU warp size. +static llvm::Value *getNVPTXWarpSize(CodeGenFunction &CGF) { CGBuilderTy &Bld = CGF.Builder; return Bld.CreateCall( llvm::Intrinsic::getDeclaration( - &CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize), + &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize), llvm::None, "nvptx_warp_size"); } -/// \brief Get the id of the current thread on the GPU. -llvm::Value *CGOpenMPRuntimeNVPTX::getNVPTXThreadID(CodeGenFunction &CGF) { +/// Get the id of the current thread on the GPU. +static llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF) { CGBuilderTy &Bld = CGF.Builder; return Bld.CreateCall( llvm::Intrinsic::getDeclaration( - &CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x), + &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x), llvm::None, "nvptx_tid"); } -// \brief Get the maximum number of threads in a block of the GPU. -llvm::Value *CGOpenMPRuntimeNVPTX::getNVPTXNumThreads(CodeGenFunction &CGF) { +/// Get the maximum number of threads in a block of the GPU. +static llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF) { CGBuilderTy &Bld = CGF.Builder; return Bld.CreateCall( llvm::Intrinsic::getDeclaration( - &CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x), + &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x), llvm::None, "nvptx_num_threads"); } -/// \brief Get barrier to synchronize all threads in a block. -void CGOpenMPRuntimeNVPTX::getNVPTXCTABarrier(CodeGenFunction &CGF) { +/// Get barrier to synchronize all threads in a block. +static void getNVPTXCTABarrier(CodeGenFunction &CGF) { CGBuilderTy &Bld = CGF.Builder; Bld.CreateCall(llvm::Intrinsic::getDeclaration( - &CGM.getModule(), llvm::Intrinsic::nvvm_barrier0)); + &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_barrier0)); } -// \brief Synchronize all GPU threads in a block. -void CGOpenMPRuntimeNVPTX::syncCTAThreads(CodeGenFunction &CGF) { - getNVPTXCTABarrier(CGF); -} +/// Synchronize all GPU threads in a block. +static void syncCTAThreads(CodeGenFunction &CGF) { getNVPTXCTABarrier(CGF); } -/// \brief Get the thread id of the OMP master thread. +/// Get the thread id of the OMP master thread. /// The master thread id is the first thread (lane) of the last warp in the /// GPU block. Warp size is assumed to be some power of 2. /// Thread id is 0 indexed. /// E.g: If NumThreads is 33, master id is 32. /// If NumThreads is 64, master id is 32. /// If NumThreads is 1024, master id is 992. -llvm::Value *CGOpenMPRuntimeNVPTX::getMasterThreadID(CodeGenFunction &CGF) { +static llvm::Value *getMasterThreadID(CodeGenFunction &CGF) { CGBuilderTy &Bld = CGF.Builder; llvm::Value *NumThreads = getNVPTXNumThreads(CGF); @@ -77,19 +88,6 @@ llvm::Value *CGOpenMPRuntimeNVPTX::getMasterThreadID(CodeGenFunction &CGF) { Bld.CreateNot(Mask), "master_tid"); } -namespace { -enum OpenMPRTLFunctionNVPTX { - /// \brief Call to void __kmpc_kernel_init(kmp_int32 omp_handle, - /// kmp_int32 thread_limit); - OMPRTL_NVPTX__kmpc_kernel_init, -}; - -// NVPTX Address space -enum ADDRESS_SPACE { - ADDRESS_SPACE_SHARED = 3, -}; -} // namespace - CGOpenMPRuntimeNVPTX::WorkerFunctionState::WorkerFunctionState( CodeGenModule &CGM) : WorkerFn(nullptr), CGFI(nullptr) { @@ -119,14 +117,14 @@ void CGOpenMPRuntimeNVPTX::initializeEnvironment() { CGM.getModule(), CGM.Int32Ty, /*isConstant=*/false, llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(CGM.Int32Ty), "__omp_num_threads", 0, - llvm::GlobalVariable::NotThreadLocal, ADDRESS_SPACE_SHARED); + llvm::GlobalVariable::NotThreadLocal, AddressSpaceShared); ActiveWorkers->setAlignment(DL.getPrefTypeAlignment(CGM.Int32Ty)); WorkID = new llvm::GlobalVariable( CGM.getModule(), CGM.Int64Ty, /*isConstant=*/false, llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(CGM.Int64Ty), "__tgt_work_id", 0, - llvm::GlobalVariable::NotThreadLocal, ADDRESS_SPACE_SHARED); + llvm::GlobalVariable::NotThreadLocal, AddressSpaceShared); WorkID->setAlignment(DL.getPrefTypeAlignment(CGM.Int64Ty)); } diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h index e18d28cdda9f..a33fb27579f6 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -49,38 +49,6 @@ public: void emitEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST); private: - // - // NVPTX calls. - // - - /// \brief Get the GPU warp size. - llvm::Value *getNVPTXWarpSize(CodeGenFunction &CGF); - - /// \brief Get the id of the current thread on the GPU. - llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF); - - // \brief Get the maximum number of threads in a block of the GPU. - llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF); - - /// \brief Get barrier to synchronize all threads in a block. - void getNVPTXCTABarrier(CodeGenFunction &CGF); - - // \brief Synchronize all GPU threads in a block. - void syncCTAThreads(CodeGenFunction &CGF); - - // - // OMP calls. - // - - /// \brief Get the thread id of the OMP master thread. - /// The master thread id is the first thread (lane) of the last warp in the - /// GPU block. Warp size is assumed to be some power of 2. - /// Thread id is 0 indexed. - /// E.g: If NumThreads is 33, master id is 32. - /// If NumThreads is 64, master id is 32. - /// If NumThreads is 1024, master id is 992. - llvm::Value *getMasterThreadID(CodeGenFunction &CGF); - // // Private state and methods. // diff --git a/lib/CodeGen/CGStmt.cpp b/lib/CodeGen/CGStmt.cpp index f2acb798b881..8d391f95d9f7 100644 --- a/lib/CodeGen/CGStmt.cpp +++ b/lib/CodeGen/CGStmt.cpp @@ -326,6 +326,10 @@ void CodeGenFunction::EmitStmt(const Stmt *S) { EmitOMPTargetTeamsDistributeParallelForDirective( cast(*S)); break; + case Stmt::OMPTargetTeamsDistributeParallelForSimdDirectiveClass: + EmitOMPTargetTeamsDistributeParallelForSimdDirective( + cast(*S)); + break; } } diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp index ba39e1fbd41f..386c4f0fe69c 100644 --- a/lib/CodeGen/CGStmtOpenMP.cpp +++ b/lib/CodeGen/CGStmtOpenMP.cpp @@ -2032,6 +2032,16 @@ void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( }); } +void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( + const OMPTargetTeamsDistributeParallelForSimdDirective &S) { + CGM.getOpenMPRuntime().emitInlinedDirective( + *this, OMPD_target_teams_distribute_parallel_for_simd, + [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitStmt( + cast(S.getAssociatedStmt())->getCapturedStmt()); + }); +} + /// \brief Emit a helper variable and return corresponding lvalue. static LValue EmitOMPHelperVar(CodeGenFunction &CGF, const DeclRefExpr *Helper) { @@ -2760,6 +2770,7 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) { auto &RT = CGM.getOpenMPRuntime(); + bool HasLastprivateClause = false; // Check pre-condition. { OMPLoopScope PreInitScope(*this, S); @@ -2793,6 +2804,16 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) { EmitOMPHelperVar(*this, cast(S.getIsLastIterVariable())); OMPPrivateScope LoopScope(*this); + if (EmitOMPFirstprivateClause(S, LoopScope)) { + // Emit implicit barrier to synchronize threads and avoid data races on + // initialization of firstprivate variables and post-update of + // lastprivate variables. + CGM.getOpenMPRuntime().emitBarrierCall( + *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, + /*ForceSimpleCall=*/true); + } + EmitOMPPrivateClause(S, LoopScope); + HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); EmitOMPPrivateLoopCounters(S, LoopScope); (void)LoopScope.Privatize(); @@ -2849,6 +2870,13 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) { LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(), Chunk); } + + // Emit final copy of the lastprivate variables if IsLastIter != 0. + if (HasLastprivateClause) + EmitOMPLastprivateClauseFinal( + S, /*NoFinals=*/false, + Builder.CreateIsNotNull( + EmitLoadOfScalar(IL, S.getLocStart()))); } // We're now done with the loop, so jump to the continuation block. diff --git a/lib/CodeGen/CodeGenFunction.cpp b/lib/CodeGen/CodeGenFunction.cpp index a954f487d1e4..7cab13de923b 100644 --- a/lib/CodeGen/CodeGenFunction.cpp +++ b/lib/CodeGen/CodeGenFunction.cpp @@ -1049,6 +1049,19 @@ QualType CodeGenFunction::BuildFunctionArgList(GlobalDecl GD, return ResTy; } +static bool +shouldUseUndefinedBehaviorReturnOptimization(const FunctionDecl *FD, + const ASTContext &Context) { + QualType T = FD->getReturnType(); + // Avoid the optimization for functions that return a record type with a + // trivial destructor or another trivially copyable type. + if (const RecordType *RT = T.getCanonicalType()->getAs()) { + if (const auto *ClassDecl = dyn_cast(RT->getDecl())) + return !ClassDecl->hasTrivialDestructor(); + } + return !T.isTriviallyCopyableType(Context); +} + void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn, const CGFunctionInfo &FnInfo) { const FunctionDecl *FD = cast(GD.getDecl()); @@ -1127,17 +1140,23 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn, // function call is used by the caller, the behavior is undefined. if (getLangOpts().CPlusPlus && !FD->hasImplicitReturnZero() && !SawAsmBlock && !FD->getReturnType()->isVoidType() && Builder.GetInsertBlock()) { + bool ShouldEmitUnreachable = + CGM.getCodeGenOpts().StrictReturn || + shouldUseUndefinedBehaviorReturnOptimization(FD, getContext()); if (SanOpts.has(SanitizerKind::Return)) { SanitizerScope SanScope(this); llvm::Value *IsFalse = Builder.getFalse(); EmitCheck(std::make_pair(IsFalse, SanitizerKind::Return), SanitizerHandler::MissingReturn, EmitCheckSourceLocation(FD->getLocation()), None); - } else if (CGM.getCodeGenOpts().OptimizationLevel == 0) { - EmitTrapCall(llvm::Intrinsic::trap); + } else if (ShouldEmitUnreachable) { + if (CGM.getCodeGenOpts().OptimizationLevel == 0) + EmitTrapCall(llvm::Intrinsic::trap); + } + if (SanOpts.has(SanitizerKind::Return) || ShouldEmitUnreachable) { + Builder.CreateUnreachable(); + Builder.ClearInsertionPoint(); } - Builder.CreateUnreachable(); - Builder.ClearInsertionPoint(); } // Emit the standard function epilogue. diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h index 222d0e97968a..1347f54df9ac 100644 --- a/lib/CodeGen/CodeGenFunction.h +++ b/lib/CodeGen/CodeGenFunction.h @@ -2699,6 +2699,8 @@ public: const OMPTargetTeamsDistributeDirective &S); void EmitOMPTargetTeamsDistributeParallelForDirective( const OMPTargetTeamsDistributeParallelForDirective &S); + void EmitOMPTargetTeamsDistributeParallelForSimdDirective( + const OMPTargetTeamsDistributeParallelForSimdDirective &S); /// Emit outlined function for the target directive. static std::pairgetType()); EmitCallArgs(Args, ArgTypes, ArgRange, CalleeDecl, ParamsToSkip, Order); } -- cgit v1.2.3