diff options
Diffstat (limited to 'lib/CodeGen')
-rw-r--r-- | lib/CodeGen/CGBuiltin.cpp | 154 | ||||
-rw-r--r-- | lib/CodeGen/CGCall.cpp | 8 | ||||
-rw-r--r-- | lib/CodeGen/CGDebugInfo.cpp | 24 | ||||
-rw-r--r-- | lib/CodeGen/CGExpr.cpp | 7 | ||||
-rw-r--r-- | lib/CodeGen/CGExprScalar.cpp | 38 | ||||
-rw-r--r-- | lib/CodeGen/CGOpenMPRuntime.cpp | 822 | ||||
-rw-r--r-- | lib/CodeGen/CGOpenMPRuntime.h | 162 | ||||
-rw-r--r-- | lib/CodeGen/CGStmtOpenMP.cpp | 529 | ||||
-rw-r--r-- | lib/CodeGen/CodeGenFunction.h | 7 | ||||
-rw-r--r-- | lib/CodeGen/CodeGenModule.cpp | 9 | ||||
-rw-r--r-- | lib/CodeGen/MacroPPCallbacks.cpp | 4 | ||||
-rw-r--r-- | lib/CodeGen/ObjectFilePCHContainerOperations.cpp | 3 | ||||
-rw-r--r-- | lib/CodeGen/TargetInfo.cpp | 21 |
13 files changed, 1323 insertions, 465 deletions
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index bc902507c46e..f3527b0f39d1 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -2795,6 +2795,33 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1); return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1})); } + + case Builtin::BI__builtin_ms_va_start: + case Builtin::BI__builtin_ms_va_end: + return RValue::get( + EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(), + BuiltinID == Builtin::BI__builtin_ms_va_start)); + + case Builtin::BI__builtin_ms_va_copy: { + // Lower this manually. We can't reliably determine whether or not any + // given va_copy() is for a Win64 va_list from the calling convention + // alone, because it's legal to do this from a System V ABI function. + // With opaque pointer types, we won't have enough information in LLVM + // IR to determine this from the argument types, either. Best to do it + // now, while we have enough information. + Address DestAddr = EmitMSVAListRef(E->getArg(0)); + Address SrcAddr = EmitMSVAListRef(E->getArg(1)); + + llvm::Type *BPP = Int8PtrPtrTy; + + DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"), + DestAddr.getAlignment()); + SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"), + SrcAddr.getAlignment()); + + Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val"); + return RValue::get(Builder.CreateStore(ArgPtr, DestAddr)); + } } // If this is an alias for a lib function (e.g. __builtin_sin), emit @@ -7223,31 +7250,6 @@ static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E) { - if (BuiltinID == X86::BI__builtin_ms_va_start || - BuiltinID == X86::BI__builtin_ms_va_end) - return EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(), - BuiltinID == X86::BI__builtin_ms_va_start); - if (BuiltinID == X86::BI__builtin_ms_va_copy) { - // Lower this manually. We can't reliably determine whether or not any - // given va_copy() is for a Win64 va_list from the calling convention - // alone, because it's legal to do this from a System V ABI function. - // With opaque pointer types, we won't have enough information in LLVM - // IR to determine this from the argument types, either. Best to do it - // now, while we have enough information. - Address DestAddr = EmitMSVAListRef(E->getArg(0)); - Address SrcAddr = EmitMSVAListRef(E->getArg(1)); - - llvm::Type *BPP = Int8PtrPtrTy; - - DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"), - DestAddr.getAlignment()); - SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"), - SrcAddr.getAlignment()); - - Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val"); - return Builder.CreateStore(ArgPtr, DestAddr); - } - SmallVector<Value*, 4> Ops; // Find out if any arguments are required to be integer constant expressions. @@ -8790,12 +8792,14 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {X, Undef}); } + case SystemZ::BI__builtin_s390_vfsqsb: case SystemZ::BI__builtin_s390_vfsqdb: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType); return Builder.CreateCall(F, X); } + case SystemZ::BI__builtin_s390_vfmasb: case SystemZ::BI__builtin_s390_vfmadb: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); @@ -8804,6 +8808,7 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); return Builder.CreateCall(F, {X, Y, Z}); } + case SystemZ::BI__builtin_s390_vfmssb: case SystemZ::BI__builtin_s390_vfmsdb: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); @@ -8813,12 +8818,35 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")}); } + case SystemZ::BI__builtin_s390_vfnmasb: + case SystemZ::BI__builtin_s390_vfnmadb: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *X = EmitScalarExpr(E->getArg(0)); + Value *Y = EmitScalarExpr(E->getArg(1)); + Value *Z = EmitScalarExpr(E->getArg(2)); + Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); + Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); + return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, Z}), "sub"); + } + case SystemZ::BI__builtin_s390_vfnmssb: + case SystemZ::BI__builtin_s390_vfnmsdb: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *X = EmitScalarExpr(E->getArg(0)); + Value *Y = EmitScalarExpr(E->getArg(1)); + Value *Z = EmitScalarExpr(E->getArg(2)); + Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); + Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); + Value *NegZ = Builder.CreateFSub(Zero, Z, "sub"); + return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, NegZ})); + } + case SystemZ::BI__builtin_s390_vflpsb: case SystemZ::BI__builtin_s390_vflpdb: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); return Builder.CreateCall(F, X); } + case SystemZ::BI__builtin_s390_vflnsb: case SystemZ::BI__builtin_s390_vflndb: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); @@ -8826,6 +8854,7 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub"); } + case SystemZ::BI__builtin_s390_vfisb: case SystemZ::BI__builtin_s390_vfidb: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); @@ -8835,8 +8864,8 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext()); assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?"); (void)IsConstM4; (void)IsConstM5; - // Check whether this instance of vfidb can be represented via a LLVM - // standard intrinsic. We only support some combinations of M4 and M5. + // Check whether this instance can be represented via a LLVM standard + // intrinsic. We only support some combinations of M4 and M5. Intrinsic::ID ID = Intrinsic::not_intrinsic; switch (M4.getZExtValue()) { default: break; @@ -8861,11 +8890,76 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(ID, ResultType); return Builder.CreateCall(F, X); } - Function *F = CGM.getIntrinsic(Intrinsic::s390_vfidb); + switch (BuiltinID) { + case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break; + case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break; + default: llvm_unreachable("Unknown BuiltinID"); + } + Function *F = CGM.getIntrinsic(ID); Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4); Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5); return Builder.CreateCall(F, {X, M4Value, M5Value}); } + case SystemZ::BI__builtin_s390_vfmaxsb: + case SystemZ::BI__builtin_s390_vfmaxdb: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *X = EmitScalarExpr(E->getArg(0)); + Value *Y = EmitScalarExpr(E->getArg(1)); + // Constant-fold the M4 mask argument. + llvm::APSInt M4; + bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext()); + assert(IsConstM4 && "Constant arg isn't actually constant?"); + (void)IsConstM4; + // Check whether this instance can be represented via a LLVM standard + // intrinsic. We only support some values of M4. + Intrinsic::ID ID = Intrinsic::not_intrinsic; + switch (M4.getZExtValue()) { + default: break; + case 4: ID = Intrinsic::maxnum; break; + } + if (ID != Intrinsic::not_intrinsic) { + Function *F = CGM.getIntrinsic(ID, ResultType); + return Builder.CreateCall(F, {X, Y}); + } + switch (BuiltinID) { + case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break; + case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break; + default: llvm_unreachable("Unknown BuiltinID"); + } + Function *F = CGM.getIntrinsic(ID); + Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4); + return Builder.CreateCall(F, {X, Y, M4Value}); + } + case SystemZ::BI__builtin_s390_vfminsb: + case SystemZ::BI__builtin_s390_vfmindb: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *X = EmitScalarExpr(E->getArg(0)); + Value *Y = EmitScalarExpr(E->getArg(1)); + // Constant-fold the M4 mask argument. + llvm::APSInt M4; + bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext()); + assert(IsConstM4 && "Constant arg isn't actually constant?"); + (void)IsConstM4; + // Check whether this instance can be represented via a LLVM standard + // intrinsic. We only support some values of M4. + Intrinsic::ID ID = Intrinsic::not_intrinsic; + switch (M4.getZExtValue()) { + default: break; + case 4: ID = Intrinsic::minnum; break; + } + if (ID != Intrinsic::not_intrinsic) { + Function *F = CGM.getIntrinsic(ID, ResultType); + return Builder.CreateCall(F, {X, Y}); + } + switch (BuiltinID) { + case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break; + case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break; + default: llvm_unreachable("Unknown BuiltinID"); + } + Function *F = CGM.getIntrinsic(ID); + Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4); + return Builder.CreateCall(F, {X, Y, M4Value}); + } // Vector intrisincs that output the post-instruction CC value. @@ -8932,10 +9026,14 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, INTRINSIC_WITH_CC(s390_vstrczhs); INTRINSIC_WITH_CC(s390_vstrczfs); + INTRINSIC_WITH_CC(s390_vfcesbs); INTRINSIC_WITH_CC(s390_vfcedbs); + INTRINSIC_WITH_CC(s390_vfchsbs); INTRINSIC_WITH_CC(s390_vfchdbs); + INTRINSIC_WITH_CC(s390_vfchesbs); INTRINSIC_WITH_CC(s390_vfchedbs); + INTRINSIC_WITH_CC(s390_vftcisb); INTRINSIC_WITH_CC(s390_vftcidb); #undef INTRINSIC_WITH_CC diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp index cee656a62fe7..316bf44cb1c3 100644 --- a/lib/CodeGen/CGCall.cpp +++ b/lib/CodeGen/CGCall.cpp @@ -50,7 +50,7 @@ unsigned CodeGenTypes::ClangCallConvToLLVMCallConv(CallingConv CC) { case CC_X86FastCall: return llvm::CallingConv::X86_FastCall; case CC_X86RegCall: return llvm::CallingConv::X86_RegCall; case CC_X86ThisCall: return llvm::CallingConv::X86_ThisCall; - case CC_X86_64Win64: return llvm::CallingConv::X86_64_Win64; + case CC_Win64: return llvm::CallingConv::Win64; case CC_X86_64SysV: return llvm::CallingConv::X86_64_SysV; case CC_AAPCS: return llvm::CallingConv::ARM_AAPCS; case CC_AAPCS_VFP: return llvm::CallingConv::ARM_AAPCS_VFP; @@ -218,7 +218,7 @@ static CallingConv getCallingConventionForDecl(const Decl *D, bool IsWindows) { return CC_IntelOclBicc; if (D->hasAttr<MSABIAttr>()) - return IsWindows ? CC_C : CC_X86_64Win64; + return IsWindows ? CC_C : CC_Win64; if (D->hasAttr<SysVABIAttr>()) return IsWindows ? CC_X86_64SysV : CC_C; @@ -1877,8 +1877,8 @@ void CodeGenModule::ConstructAttributeList( // the function. const auto *TD = FD->getAttr<TargetAttr>(); TargetAttr::ParsedTargetAttr ParsedAttr = TD->parse(); - if (ParsedAttr.second != "") - TargetCPU = ParsedAttr.second; + if (ParsedAttr.Architecture != "") + TargetCPU = ParsedAttr.Architecture; if (TargetCPU != "") FuncAttrs.addAttribute("target-cpu", TargetCPU); if (!Features.empty()) { diff --git a/lib/CodeGen/CGDebugInfo.cpp b/lib/CodeGen/CGDebugInfo.cpp index b00d296fe34a..c9c450c32e3b 100644 --- a/lib/CodeGen/CGDebugInfo.cpp +++ b/lib/CodeGen/CGDebugInfo.cpp @@ -956,7 +956,7 @@ static unsigned getDwarfCC(CallingConv CC) { return llvm::dwarf::DW_CC_BORLAND_pascal; // FIXME: Create new DW_CC_ codes for these calling conventions. - case CC_X86_64Win64: + case CC_Win64: case CC_X86_64SysV: case CC_AAPCS: case CC_AAPCS_VFP: @@ -3970,10 +3970,10 @@ void CGDebugInfo::EmitUsingDirective(const UsingDirectiveDecl &UD) { const NamespaceDecl *NSDecl = UD.getNominatedNamespace(); if (!NSDecl->isAnonymousNamespace() || CGM.getCodeGenOpts().DebugExplicitImport) { + auto Loc = UD.getLocation(); DBuilder.createImportedModule( getCurrentContextDescriptor(cast<Decl>(UD.getDeclContext())), - getOrCreateNamespace(NSDecl), - getLineNumber(UD.getLocation())); + getOrCreateNamespace(NSDecl), getOrCreateFile(Loc), getLineNumber(Loc)); } } @@ -3996,10 +3996,12 @@ void CGDebugInfo::EmitUsingDecl(const UsingDecl &UD) { if (AT->getDeducedType().isNull()) return; if (llvm::DINode *Target = - getDeclarationOrDefinition(USD.getUnderlyingDecl())) + getDeclarationOrDefinition(USD.getUnderlyingDecl())) { + auto Loc = USD.getLocation(); DBuilder.createImportedDeclaration( getCurrentContextDescriptor(cast<Decl>(USD.getDeclContext())), Target, - getLineNumber(USD.getLocation())); + getOrCreateFile(Loc), getLineNumber(Loc)); + } } void CGDebugInfo::EmitImportDecl(const ImportDecl &ID) { @@ -4007,10 +4009,11 @@ void CGDebugInfo::EmitImportDecl(const ImportDecl &ID) { return; if (Module *M = ID.getImportedModule()) { auto Info = ExternalASTSource::ASTSourceDescriptor(*M); + auto Loc = ID.getLocation(); DBuilder.createImportedDeclaration( getCurrentContextDescriptor(cast<Decl>(ID.getDeclContext())), - getOrCreateModuleRef(Info, DebugTypeExtRefs), - getLineNumber(ID.getLocation())); + getOrCreateModuleRef(Info, DebugTypeExtRefs), getOrCreateFile(Loc), + getLineNumber(Loc)); } } @@ -4022,18 +4025,19 @@ CGDebugInfo::EmitNamespaceAlias(const NamespaceAliasDecl &NA) { if (VH) return cast<llvm::DIImportedEntity>(VH); llvm::DIImportedEntity *R; + auto Loc = NA.getLocation(); if (const auto *Underlying = dyn_cast<NamespaceAliasDecl>(NA.getAliasedNamespace())) // This could cache & dedup here rather than relying on metadata deduping. R = DBuilder.createImportedDeclaration( getCurrentContextDescriptor(cast<Decl>(NA.getDeclContext())), - EmitNamespaceAlias(*Underlying), getLineNumber(NA.getLocation()), - NA.getName()); + EmitNamespaceAlias(*Underlying), getOrCreateFile(Loc), + getLineNumber(Loc), NA.getName()); else R = DBuilder.createImportedDeclaration( getCurrentContextDescriptor(cast<Decl>(NA.getDeclContext())), getOrCreateNamespace(cast<NamespaceDecl>(NA.getAliasedNamespace())), - getLineNumber(NA.getLocation()), NA.getName()); + getOrCreateFile(Loc), getLineNumber(Loc), NA.getName()); VH.reset(R); return R; } diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp index 9f40ee5a00a3..9572bd3543bd 100644 --- a/lib/CodeGen/CGExpr.cpp +++ b/lib/CodeGen/CGExpr.cpp @@ -73,9 +73,12 @@ Address CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align, // cast alloca to the default address space when necessary. if (CastToDefaultAddrSpace && getASTAllocaAddressSpace() != LangAS::Default) { auto DestAddrSpace = getContext().getTargetAddressSpace(LangAS::Default); + auto CurIP = Builder.saveIP(); + Builder.SetInsertPoint(AllocaInsertPt); V = getTargetHooks().performAddrSpaceCast( *this, V, getASTAllocaAddressSpace(), LangAS::Default, Ty->getPointerTo(DestAddrSpace), /*non-null*/ true); + Builder.restoreIP(CurIP); } return Address(V, Align); @@ -3052,7 +3055,9 @@ static llvm::Value *emitArraySubscriptGEP(CodeGenFunction &CGF, SourceLocation loc, const llvm::Twine &name = "arrayidx") { if (inbounds) { - return CGF.EmitCheckedInBoundsGEP(ptr, indices, signedIndices, loc, name); + return CGF.EmitCheckedInBoundsGEP(ptr, indices, signedIndices, + CodeGenFunction::NotSubtraction, loc, + name); } else { return CGF.Builder.CreateGEP(ptr, indices, name); } diff --git a/lib/CodeGen/CGExprScalar.cpp b/lib/CodeGen/CGExprScalar.cpp index 43c86495f3d3..1170b014ec7f 100644 --- a/lib/CodeGen/CGExprScalar.cpp +++ b/lib/CodeGen/CGExprScalar.cpp @@ -1851,7 +1851,7 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, llvm::Value *input; int amount = (isInc ? 1 : -1); - bool signedIndex = !isInc; + bool isSubtraction = !isInc; if (const AtomicType *atomicTy = type->getAs<AtomicType>()) { type = atomicTy->getValueType(); @@ -1941,8 +1941,9 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, if (CGF.getLangOpts().isSignedOverflowDefined()) value = Builder.CreateGEP(value, numElts, "vla.inc"); else - value = CGF.EmitCheckedInBoundsGEP(value, numElts, signedIndex, - E->getExprLoc(), "vla.inc"); + value = CGF.EmitCheckedInBoundsGEP( + value, numElts, /*SignedIndices=*/false, isSubtraction, + E->getExprLoc(), "vla.inc"); // Arithmetic on function pointers (!) is just +-1. } else if (type->isFunctionType()) { @@ -1952,8 +1953,9 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, if (CGF.getLangOpts().isSignedOverflowDefined()) value = Builder.CreateGEP(value, amt, "incdec.funcptr"); else - value = CGF.EmitCheckedInBoundsGEP(value, amt, signedIndex, - E->getExprLoc(), "incdec.funcptr"); + value = CGF.EmitCheckedInBoundsGEP(value, amt, /*SignedIndices=*/false, + isSubtraction, E->getExprLoc(), + "incdec.funcptr"); value = Builder.CreateBitCast(value, input->getType()); // For everything else, we can just do a simple increment. @@ -1962,8 +1964,9 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, if (CGF.getLangOpts().isSignedOverflowDefined()) value = Builder.CreateGEP(value, amt, "incdec.ptr"); else - value = CGF.EmitCheckedInBoundsGEP(value, amt, signedIndex, - E->getExprLoc(), "incdec.ptr"); + value = CGF.EmitCheckedInBoundsGEP(value, amt, /*SignedIndices=*/false, + isSubtraction, E->getExprLoc(), + "incdec.ptr"); } // Vector increment/decrement. @@ -2044,7 +2047,8 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, if (CGF.getLangOpts().isSignedOverflowDefined()) value = Builder.CreateGEP(value, sizeValue, "incdec.objptr"); else - value = CGF.EmitCheckedInBoundsGEP(value, sizeValue, signedIndex, + value = CGF.EmitCheckedInBoundsGEP(value, sizeValue, + /*SignedIndices=*/false, isSubtraction, E->getExprLoc(), "incdec.objptr"); value = Builder.CreateBitCast(value, input->getType()); } @@ -2663,7 +2667,6 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF, } bool isSigned = indexOperand->getType()->isSignedIntegerOrEnumerationType(); - bool mayHaveNegativeGEPIndex = isSigned || isSubtraction; unsigned width = cast<llvm::IntegerType>(index->getType())->getBitWidth(); auto &DL = CGF.CGM.getDataLayout(); @@ -2715,7 +2718,7 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF, } else { index = CGF.Builder.CreateNSWMul(index, numElements, "vla.index"); pointer = - CGF.EmitCheckedInBoundsGEP(pointer, index, mayHaveNegativeGEPIndex, + CGF.EmitCheckedInBoundsGEP(pointer, index, isSigned, isSubtraction, op.E->getExprLoc(), "add.ptr"); } return pointer; @@ -2733,7 +2736,7 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF, if (CGF.getLangOpts().isSignedOverflowDefined()) return CGF.Builder.CreateGEP(pointer, index, "add.ptr"); - return CGF.EmitCheckedInBoundsGEP(pointer, index, mayHaveNegativeGEPIndex, + return CGF.EmitCheckedInBoundsGEP(pointer, index, isSigned, isSubtraction, op.E->getExprLoc(), "add.ptr"); } @@ -2807,7 +2810,7 @@ static Value* tryEmitFMulAdd(const BinOpInfo &op, Value *ScalarExprEmitter::EmitAdd(const BinOpInfo &op) { if (op.LHS->getType()->isPointerTy() || op.RHS->getType()->isPointerTy()) - return emitPointerArithmetic(CGF, op, /*subtraction*/ false); + return emitPointerArithmetic(CGF, op, CodeGenFunction::NotSubtraction); if (op.Ty->isSignedIntegerOrEnumerationType()) { switch (CGF.getLangOpts().getSignedOverflowBehavior()) { @@ -2878,7 +2881,7 @@ Value *ScalarExprEmitter::EmitSub(const BinOpInfo &op) { // If the RHS is not a pointer, then we have normal pointer // arithmetic. if (!op.RHS->getType()->isPointerTy()) - return emitPointerArithmetic(CGF, op, /*subtraction*/ true); + return emitPointerArithmetic(CGF, op, CodeGenFunction::IsSubtraction); // Otherwise, this is a pointer subtraction. @@ -3853,6 +3856,7 @@ LValue CodeGenFunction::EmitCompoundAssignmentLValue( Value *CodeGenFunction::EmitCheckedInBoundsGEP(Value *Ptr, ArrayRef<Value *> IdxList, bool SignedIndices, + bool IsSubtraction, SourceLocation Loc, const Twine &Name) { Value *GEPVal = Builder.CreateInBoundsGEP(Ptr, IdxList, Name); @@ -3958,15 +3962,19 @@ Value *CodeGenFunction::EmitCheckedInBoundsGEP(Value *Ptr, // pointer matches the sign of the total offset. llvm::Value *ValidGEP; auto *NoOffsetOverflow = Builder.CreateNot(OffsetOverflows); - auto *PosOrZeroValid = Builder.CreateICmpUGE(ComputedGEP, IntPtr); if (SignedIndices) { + auto *PosOrZeroValid = Builder.CreateICmpUGE(ComputedGEP, IntPtr); auto *PosOrZeroOffset = Builder.CreateICmpSGE(TotalOffset, Zero); llvm::Value *NegValid = Builder.CreateICmpULT(ComputedGEP, IntPtr); ValidGEP = Builder.CreateAnd( Builder.CreateSelect(PosOrZeroOffset, PosOrZeroValid, NegValid), NoOffsetOverflow); - } else { + } else if (!SignedIndices && !IsSubtraction) { + auto *PosOrZeroValid = Builder.CreateICmpUGE(ComputedGEP, IntPtr); ValidGEP = Builder.CreateAnd(PosOrZeroValid, NoOffsetOverflow); + } else { + auto *NegOrZeroValid = Builder.CreateICmpULE(ComputedGEP, IntPtr); + ValidGEP = Builder.CreateAnd(NegOrZeroValid, NoOffsetOverflow); } llvm::Constant *StaticArgs[] = {EmitCheckSourceLocation(Loc)}; diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp index a2ea0dec3e9d..d488bd4b30bf 100644 --- a/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/lib/CodeGen/CGOpenMPRuntime.cpp @@ -643,6 +643,12 @@ enum OpenMPRTLFunction { // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 // *vec); OMPRTL__kmpc_doacross_wait, + // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void + // *data); + OMPRTL__kmpc_task_reduction_init, + // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void + // *d); + OMPRTL__kmpc_task_reduction_get_th_data, // // Offloading related calls @@ -697,6 +703,414 @@ void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { } } +/// Check if the combiner is a call to UDR combiner and if it is so return the +/// UDR decl used for reduction. +static const OMPDeclareReductionDecl * +getReductionInit(const Expr *ReductionOp) { + if (auto *CE = dyn_cast<CallExpr>(ReductionOp)) + if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) + if (auto *DRE = + dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) + if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) + return DRD; + return nullptr; +} + +static void emitInitWithReductionInitializer(CodeGenFunction &CGF, + const OMPDeclareReductionDecl *DRD, + const Expr *InitOp, + Address Private, Address Original, + QualType Ty) { + if (DRD->getInitializer()) { + std::pair<llvm::Function *, llvm::Function *> Reduction = + CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); + auto *CE = cast<CallExpr>(InitOp); + auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); + const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); + const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); + auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); + auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); + CodeGenFunction::OMPPrivateScope PrivateScope(CGF); + PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), + [=]() -> Address { return Private; }); + PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), + [=]() -> Address { return Original; }); + (void)PrivateScope.Privatize(); + RValue Func = RValue::get(Reduction.second); + CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); + CGF.EmitIgnoredExpr(InitOp); + } else { + llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); + auto *GV = new llvm::GlobalVariable( + CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, + llvm::GlobalValue::PrivateLinkage, Init, ".init"); + LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); + RValue InitRVal; + switch (CGF.getEvaluationKind(Ty)) { + case TEK_Scalar: + InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation()); + break; + case TEK_Complex: + InitRVal = + RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation())); + break; + case TEK_Aggregate: + InitRVal = RValue::getAggregate(LV.getAddress()); + break; + } + OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue); + CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); + CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), + /*IsInitializer=*/false); + } +} + +/// \brief Emit initialization of arrays of complex types. +/// \param DestAddr Address of the array. +/// \param Type Type of array. +/// \param Init Initial expression of array. +/// \param SrcAddr Address of the original array. +static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, + QualType Type, const Expr *Init, + const OMPDeclareReductionDecl *DRD, + Address SrcAddr = Address::invalid()) { + // Perform element-by-element initialization. + QualType ElementTy; + + // Drill down to the base element type on both arrays. + auto ArrayTy = Type->getAsArrayTypeUnsafe(); + auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); + DestAddr = + CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); + if (DRD) + SrcAddr = + CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); + + llvm::Value *SrcBegin = nullptr; + if (DRD) + SrcBegin = SrcAddr.getPointer(); + auto DestBegin = DestAddr.getPointer(); + // Cast from pointer to array type to pointer to single element. + auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); + // The basic structure here is a while-do loop. + auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); + auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); + auto IsEmpty = + CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); + CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); + + // Enter the loop body, making that address the current address. + auto EntryBB = CGF.Builder.GetInsertBlock(); + CGF.EmitBlock(BodyBB); + + CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); + + llvm::PHINode *SrcElementPHI = nullptr; + Address SrcElementCurrent = Address::invalid(); + if (DRD) { + SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, + "omp.arraycpy.srcElementPast"); + SrcElementPHI->addIncoming(SrcBegin, EntryBB); + SrcElementCurrent = + Address(SrcElementPHI, + SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); + } + llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( + DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); + DestElementPHI->addIncoming(DestBegin, EntryBB); + Address DestElementCurrent = + Address(DestElementPHI, + DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); + + // Emit copy. + { + CodeGenFunction::RunCleanupsScope InitScope(CGF); + if (DRD && (DRD->getInitializer() || !Init)) { + emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, + SrcElementCurrent, ElementTy); + } else + CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), + /*IsInitializer=*/false); + } + + if (DRD) { + // Shift the address forward by one element. + auto SrcElementNext = CGF.Builder.CreateConstGEP1_32( + SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); + SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); + } + + // Shift the address forward by one element. + auto DestElementNext = CGF.Builder.CreateConstGEP1_32( + DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); + // Check whether we've reached the end. + auto Done = + CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); + CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); + DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); + + // Done. + CGF.EmitBlock(DoneBB, /*IsFinished=*/true); +} + +LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { + if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) + return CGF.EmitOMPArraySectionExpr(OASE); + if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(E)) + return CGF.EmitLValue(ASE); + auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), + CGF.CapturedStmtInfo && + CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr, + E->getType(), VK_LValue, E->getExprLoc()); + // Store the address of the original variable associated with the LHS + // implicit variable. + return CGF.EmitLValue(&DRE); +} + +LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, + const Expr *E) { + if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) + return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); + return LValue(); +} + +void ReductionCodeGen::emitAggregateInitialization( + CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, + const OMPDeclareReductionDecl *DRD) { + // Emit VarDecl with copy init for arrays. + // Get the address of the original variable captured in current + // captured region. + auto *PrivateVD = + cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); + EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), + DRD ? ClausesData[N].ReductionOp : PrivateVD->getInit(), + DRD, SharedLVal.getAddress()); +} + +ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, + ArrayRef<const Expr *> Privates, + ArrayRef<const Expr *> ReductionOps) { + ClausesData.reserve(Shareds.size()); + SharedAddresses.reserve(Shareds.size()); + Sizes.reserve(Shareds.size()); + BaseDecls.reserve(Shareds.size()); + auto IPriv = Privates.begin(); + auto IRed = ReductionOps.begin(); + for (const auto *Ref : Shareds) { + ClausesData.emplace_back(Ref, *IPriv, *IRed); + std::advance(IPriv, 1); + std::advance(IRed, 1); + } +} + +void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) { + assert(SharedAddresses.size() == N && + "Number of generated lvalues must be exactly N."); + SharedAddresses.emplace_back(emitSharedLValue(CGF, ClausesData[N].Ref), + emitSharedLValueUB(CGF, ClausesData[N].Ref)); +} + +void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { + auto *PrivateVD = + cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); + QualType PrivateType = PrivateVD->getType(); + bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); + if (!AsArraySection && !PrivateType->isVariablyModifiedType()) { + Sizes.emplace_back( + CGF.getTypeSize( + SharedAddresses[N].first.getType().getNonReferenceType()), + nullptr); + return; + } + llvm::Value *Size; + llvm::Value *SizeInChars; + llvm::Type *ElemType = + cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType()) + ->getElementType(); + auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); + if (AsArraySection) { + Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(), + SharedAddresses[N].first.getPointer()); + Size = CGF.Builder.CreateNUWAdd( + Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); + SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); + } else { + SizeInChars = CGF.getTypeSize( + SharedAddresses[N].first.getType().getNonReferenceType()); + Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); + } + Sizes.emplace_back(SizeInChars, Size); + CodeGenFunction::OpaqueValueMapping OpaqueMap( + CGF, + cast<OpaqueValueExpr>( + CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), + RValue::get(Size)); + CGF.EmitVariablyModifiedType(PrivateType); +} + +void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, + llvm::Value *Size) { + auto *PrivateVD = + cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); + QualType PrivateType = PrivateVD->getType(); + bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); + if (!AsArraySection && !PrivateType->isVariablyModifiedType()) { + assert(!Size && !Sizes[N].second && + "Size should be nullptr for non-variably modified redution " + "items."); + return; + } + CodeGenFunction::OpaqueValueMapping OpaqueMap( + CGF, + cast<OpaqueValueExpr>( + CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), + RValue::get(Size)); + CGF.EmitVariablyModifiedType(PrivateType); +} + +void ReductionCodeGen::emitInitialization( + CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, + llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { + assert(SharedAddresses.size() > N && "No variable was generated"); + auto *PrivateVD = + cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); + auto *DRD = getReductionInit(ClausesData[N].ReductionOp); + QualType PrivateType = PrivateVD->getType(); + PrivateAddr = CGF.Builder.CreateElementBitCast( + PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); + QualType SharedType = SharedAddresses[N].first.getType(); + SharedLVal = CGF.MakeAddrLValue( + CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(), + CGF.ConvertTypeForMem(SharedType)), + SharedType, SharedAddresses[N].first.getBaseInfo()); + if (isa<OMPArraySectionExpr>(ClausesData[N].Ref) || + CGF.getContext().getAsArrayType(PrivateVD->getType())) { + emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); + } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { + emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, + PrivateAddr, SharedLVal.getAddress(), + SharedLVal.getType()); + } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && + !CGF.isTrivialInitializer(PrivateVD->getInit())) { + CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, + PrivateVD->getType().getQualifiers(), + /*IsInitializer=*/false); + } +} + +bool ReductionCodeGen::needCleanups(unsigned N) { + auto *PrivateVD = + cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); + QualType PrivateType = PrivateVD->getType(); + QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); + return DTorKind != QualType::DK_none; +} + +void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, + Address PrivateAddr) { + auto *PrivateVD = + cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); + QualType PrivateType = PrivateVD->getType(); + QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); + if (needCleanups(N)) { + PrivateAddr = CGF.Builder.CreateElementBitCast( + PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); + CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); + } +} + +static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, + LValue BaseLV) { + BaseTy = BaseTy.getNonReferenceType(); + while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && + !CGF.getContext().hasSameType(BaseTy, ElTy)) { + if (auto *PtrTy = BaseTy->getAs<PointerType>()) + BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); + else { + BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(), + BaseTy->castAs<ReferenceType>()); + } + BaseTy = BaseTy->getPointeeType(); + } + return CGF.MakeAddrLValue( + CGF.Builder.CreateElementBitCast(BaseLV.getAddress(), + CGF.ConvertTypeForMem(ElTy)), + BaseLV.getType(), BaseLV.getBaseInfo()); +} + +static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, + llvm::Type *BaseLVType, CharUnits BaseLVAlignment, + llvm::Value *Addr) { + Address Tmp = Address::invalid(); + Address TopTmp = Address::invalid(); + Address MostTopTmp = Address::invalid(); + BaseTy = BaseTy.getNonReferenceType(); + while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && + !CGF.getContext().hasSameType(BaseTy, ElTy)) { + Tmp = CGF.CreateMemTemp(BaseTy); + if (TopTmp.isValid()) + CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); + else + MostTopTmp = Tmp; + TopTmp = Tmp; + BaseTy = BaseTy->getPointeeType(); + } + llvm::Type *Ty = BaseLVType; + if (Tmp.isValid()) + Ty = Tmp.getElementType(); + Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); + if (Tmp.isValid()) { + CGF.Builder.CreateStore(Addr, Tmp); + return MostTopTmp; + } + return Address(Addr, BaseLVAlignment); +} + +Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, + Address PrivateAddr) { + const DeclRefExpr *DE; + const VarDecl *OrigVD = nullptr; + if (auto *OASE = dyn_cast<OMPArraySectionExpr>(ClausesData[N].Ref)) { + auto *Base = OASE->getBase()->IgnoreParenImpCasts(); + while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) + Base = TempOASE->getBase()->IgnoreParenImpCasts(); + while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) + Base = TempASE->getBase()->IgnoreParenImpCasts(); + DE = cast<DeclRefExpr>(Base); + OrigVD = cast<VarDecl>(DE->getDecl()); + } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(ClausesData[N].Ref)) { + auto *Base = ASE->getBase()->IgnoreParenImpCasts(); + while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) + Base = TempASE->getBase()->IgnoreParenImpCasts(); + DE = cast<DeclRefExpr>(Base); + OrigVD = cast<VarDecl>(DE->getDecl()); + } + if (OrigVD) { + BaseDecls.emplace_back(OrigVD); + auto OriginalBaseLValue = CGF.EmitLValue(DE); + LValue BaseLValue = + loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), + OriginalBaseLValue); + llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( + BaseLValue.getPointer(), SharedAddresses[N].first.getPointer()); + llvm::Value *Ptr = + CGF.Builder.CreateGEP(PrivateAddr.getPointer(), Adjustment); + return castToBase(CGF, OrigVD->getType(), + SharedAddresses[N].first.getType(), + OriginalBaseLValue.getPointer()->getType(), + OriginalBaseLValue.getAlignment(), Ptr); + } + BaseDecls.emplace_back( + cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); + return PrivateAddr; +} + +bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { + auto *DRD = getReductionInit(ClausesData[N].ReductionOp); + return DRD && DRD->getInitializer(); +} + LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { return CGF.EmitLoadOfPointerLValue( CGF.GetAddrOfLocalVar(getThreadIDVariable()), @@ -1554,6 +1968,26 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); break; } + case OMPRTL__kmpc_task_reduction_init: { + // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void + // *data); + llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); + RTLFn = + CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init"); + break; + } + case OMPRTL__kmpc_task_reduction_get_th_data: { + // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void + // *d); + llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction( + FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data"); + break; + } case OMPRTL__tgt_target: { // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t @@ -1904,6 +2338,27 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( return nullptr; } +Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, + QualType VarType, + StringRef Name) { + llvm::Twine VarName(Name, ".artificial."); + llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); + llvm::Value *GAddr = getOrCreateInternalVariable(VarLVType, VarName); + llvm::Value *Args[] = { + emitUpdateLocation(CGF, SourceLocation()), + getThreadID(CGF, SourceLocation()), + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), + CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, + /*IsSigned=*/false), + getOrCreateInternalVariable(CGM.VoidPtrPtrTy, VarName + ".cache.")}; + return Address( + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + CGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), + VarLVType->getPointerTo(/*AddrSpace=*/0)), + CGM.getPointerAlign()); +} + /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen /// function. Here is the logic: /// if (Cond) { @@ -2699,6 +3154,8 @@ enum KmpTaskTFields { KmpTaskTStride, /// (Taskloops only) Is last iteration flag. KmpTaskTLastIter, + /// (Taskloops only) Reduction data. + KmpTaskTReductions, }; } // anonymous namespace @@ -3250,6 +3707,7 @@ createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, // kmp_uint64 ub; // kmp_int64 st; // kmp_int32 liter; + // void * reductions; // }; auto *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); UD->startDefinition(); @@ -3273,6 +3731,7 @@ createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, addFieldToRecordDecl(C, RD, KmpUInt64Ty); addFieldToRecordDecl(C, RD, KmpInt64Ty); addFieldToRecordDecl(C, RD, KmpInt32Ty); + addFieldToRecordDecl(C, RD, C.VoidPtrTy); } RD->completeDefinition(); return RD; @@ -3303,7 +3762,7 @@ createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, /// For taskloops: /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, -/// tt->shareds); +/// tt->reductions, tt->shareds); /// return 0; /// } /// \endcode @@ -3389,10 +3848,14 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); auto LILVal = CGF.EmitLValueForField(Base, *LIFI); auto *LIParam = CGF.EmitLoadOfLValue(LILVal, Loc).getScalarVal(); + auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); + auto RLVal = CGF.EmitLValueForField(Base, *RFI); + auto *RParam = CGF.EmitLoadOfLValue(RLVal, Loc).getScalarVal(); CallArgs.push_back(LBParam); CallArgs.push_back(UBParam); CallArgs.push_back(StParam); CallArgs.push_back(LIParam); + CallArgs.push_back(RParam); } CallArgs.push_back(SharedsParam); @@ -4155,6 +4618,16 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), /*IsInitializer=*/true); + // Store reductions address. + LValue RedLVal = CGF.EmitLValueForField( + Result.TDBase, + *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); + if (Data.Reductions) + CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); + else { + CGF.EmitNullInitialization(RedLVal.getAddress(), + CGF.getContext().VoidPtrTy); + } enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; llvm::Value *TaskArgs[] = { UpLoc, @@ -4680,6 +5153,353 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); } +/// Generates unique name for artificial threadprivate variables. +/// Format is: <Prefix> "." <Loc_raw_encoding> "_" <N> +static std::string generateUniqueName(StringRef Prefix, SourceLocation Loc, + unsigned N) { + SmallString<256> Buffer; + llvm::raw_svector_ostream Out(Buffer); + Out << Prefix << "." << Loc.getRawEncoding() << "_" << N; + return Out.str(); +} + +/// Emits reduction initializer function: +/// \code +/// void @.red_init(void* %arg) { +/// %0 = bitcast void* %arg to <type>* +/// store <type> <init>, <type>* %0 +/// ret void +/// } +/// \endcode +static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, + SourceLocation Loc, + ReductionCodeGen &RCG, unsigned N) { + auto &C = CGM.getContext(); + FunctionArgList Args; + ImplicitParamDecl Param(C, C.VoidPtrTy, ImplicitParamDecl::Other); + Args.emplace_back(&Param); + auto &FnInfo = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); + auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, + ".red_init.", &CGM.getModule()); + CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo); + CodeGenFunction CGF(CGM); + CGF.disableDebugInfo(); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args); + Address PrivateAddr = CGF.EmitLoadOfPointer( + CGF.GetAddrOfLocalVar(&Param), + C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); + llvm::Value *Size = nullptr; + // If the size of the reduction item is non-constant, load it from global + // threadprivate variable. + if (RCG.getSizes(N).second) { + Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( + CGF, CGM.getContext().getSizeType(), + generateUniqueName("reduction_size", Loc, N)); + Size = + CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, + CGM.getContext().getSizeType(), SourceLocation()); + } + RCG.emitAggregateType(CGF, N, Size); + LValue SharedLVal; + // If initializer uses initializer from declare reduction construct, emit a + // pointer to the address of the original reduction item (reuired by reduction + // initializer) + if (RCG.usesReductionInitializer(N)) { + Address SharedAddr = + CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( + CGF, CGM.getContext().VoidPtrTy, + generateUniqueName("reduction", Loc, N)); + SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); + } else { + SharedLVal = CGF.MakeNaturalAlignAddrLValue( + llvm::ConstantPointerNull::get(CGM.VoidPtrTy), + CGM.getContext().VoidPtrTy); + } + // Emit the initializer: + // %0 = bitcast void* %arg to <type>* + // store <type> <init>, <type>* %0 + RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal, + [](CodeGenFunction &) { return false; }); + CGF.FinishFunction(); + return Fn; +} + +/// Emits reduction combiner function: +/// \code +/// void @.red_comb(void* %arg0, void* %arg1) { +/// %lhs = bitcast void* %arg0 to <type>* +/// %rhs = bitcast void* %arg1 to <type>* +/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) +/// store <type> %2, <type>* %lhs +/// ret void +/// } +/// \endcode +static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, + SourceLocation Loc, + ReductionCodeGen &RCG, unsigned N, + const Expr *ReductionOp, + const Expr *LHS, const Expr *RHS, + const Expr *PrivateRef) { + auto &C = CGM.getContext(); + auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); + auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); + FunctionArgList Args; + ImplicitParamDecl ParamInOut(C, C.VoidPtrTy, ImplicitParamDecl::Other); + ImplicitParamDecl ParamIn(C, C.VoidPtrTy, ImplicitParamDecl::Other); + Args.emplace_back(&ParamInOut); + Args.emplace_back(&ParamIn); + auto &FnInfo = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); + auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, + ".red_comb.", &CGM.getModule()); + CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo); + CodeGenFunction CGF(CGM); + CGF.disableDebugInfo(); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args); + llvm::Value *Size = nullptr; + // If the size of the reduction item is non-constant, load it from global + // threadprivate variable. + if (RCG.getSizes(N).second) { + Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( + CGF, CGM.getContext().getSizeType(), + generateUniqueName("reduction_size", Loc, N)); + Size = + CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, + CGM.getContext().getSizeType(), SourceLocation()); + } + RCG.emitAggregateType(CGF, N, Size); + // Remap lhs and rhs variables to the addresses of the function arguments. + // %lhs = bitcast void* %arg0 to <type>* + // %rhs = bitcast void* %arg1 to <type>* + CodeGenFunction::OMPPrivateScope PrivateScope(CGF); + PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() -> Address { + // Pull out the pointer to the variable. + Address PtrAddr = CGF.EmitLoadOfPointer( + CGF.GetAddrOfLocalVar(&ParamInOut), + C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); + return CGF.Builder.CreateElementBitCast( + PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); + }); + PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() -> Address { + // Pull out the pointer to the variable. + Address PtrAddr = CGF.EmitLoadOfPointer( + CGF.GetAddrOfLocalVar(&ParamIn), + C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); + return CGF.Builder.CreateElementBitCast( + PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); + }); + PrivateScope.Privatize(); + // Emit the combiner body: + // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) + // store <type> %2, <type>* %lhs + CGM.getOpenMPRuntime().emitSingleReductionCombiner( + CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), + cast<DeclRefExpr>(RHS)); + CGF.FinishFunction(); + return Fn; +} + +/// Emits reduction finalizer function: +/// \code +/// void @.red_fini(void* %arg) { +/// %0 = bitcast void* %arg to <type>* +/// <destroy>(<type>* %0) +/// ret void +/// } +/// \endcode +static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, + SourceLocation Loc, + ReductionCodeGen &RCG, unsigned N) { + if (!RCG.needCleanups(N)) + return nullptr; + auto &C = CGM.getContext(); + FunctionArgList Args; + ImplicitParamDecl Param(C, C.VoidPtrTy, ImplicitParamDecl::Other); + Args.emplace_back(&Param); + auto &FnInfo = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); + auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, + ".red_fini.", &CGM.getModule()); + CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo); + CodeGenFunction CGF(CGM); + CGF.disableDebugInfo(); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args); + Address PrivateAddr = CGF.EmitLoadOfPointer( + CGF.GetAddrOfLocalVar(&Param), + C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); + llvm::Value *Size = nullptr; + // If the size of the reduction item is non-constant, load it from global + // threadprivate variable. + if (RCG.getSizes(N).second) { + Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( + CGF, CGM.getContext().getSizeType(), + generateUniqueName("reduction_size", Loc, N)); + Size = + CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, + CGM.getContext().getSizeType(), SourceLocation()); + } + RCG.emitAggregateType(CGF, N, Size); + // Emit the finalizer body: + // <destroy>(<type>* %0) + RCG.emitCleanups(CGF, N, PrivateAddr); + CGF.FinishFunction(); + return Fn; +} + +llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( + CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, + ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { + if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) + return nullptr; + + // Build typedef struct: + // kmp_task_red_input { + // void *reduce_shar; // shared reduction item + // size_t reduce_size; // size of data item + // void *reduce_init; // data initialization routine + // void *reduce_fini; // data finalization routine + // void *reduce_comb; // data combiner routine + // kmp_task_red_flags_t flags; // flags for additional info from compiler + // } kmp_task_red_input_t; + ASTContext &C = CGM.getContext(); + auto *RD = C.buildImplicitRecord("kmp_task_red_input_t"); + RD->startDefinition(); + const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); + const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); + const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); + const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); + const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); + const FieldDecl *FlagsFD = addFieldToRecordDecl( + C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); + RD->completeDefinition(); + QualType RDType = C.getRecordType(RD); + unsigned Size = Data.ReductionVars.size(); + llvm::APInt ArraySize(/*numBits=*/64, Size); + QualType ArrayRDType = C.getConstantArrayType( + RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0); + // kmp_task_red_input_t .rd_input.[Size]; + Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); + ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies, + Data.ReductionOps); + for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { + // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; + llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), + llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; + llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( + TaskRedInput.getPointer(), Idxs, + /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, + ".rd_input.gep."); + LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); + // ElemLVal.reduce_shar = &Shareds[Cnt]; + LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); + RCG.emitSharedLValue(CGF, Cnt); + llvm::Value *CastedShared = + CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer()); + CGF.EmitStoreOfScalar(CastedShared, SharedLVal); + RCG.emitAggregateType(CGF, Cnt); + llvm::Value *SizeValInChars; + llvm::Value *SizeVal; + std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); + // We use delayed creation/initialization for VLAs, array sections and + // custom reduction initializations. It is required because runtime does not + // provide the way to pass the sizes of VLAs/array sections to + // initializer/combiner/finalizer functions and does not pass the pointer to + // original reduction item to the initializer. Instead threadprivate global + // variables are used to store these values and use them in the functions. + bool DelayedCreation = !!SizeVal; + SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, + /*isSigned=*/false); + LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); + CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); + // ElemLVal.reduce_init = init; + LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); + llvm::Value *InitAddr = + CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); + CGF.EmitStoreOfScalar(InitAddr, InitLVal); + DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt); + // ElemLVal.reduce_fini = fini; + LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); + llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); + llvm::Value *FiniAddr = Fini + ? CGF.EmitCastToVoidPtr(Fini) + : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); + CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); + // ElemLVal.reduce_comb = comb; + LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); + llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( + CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], + RHSExprs[Cnt], Data.ReductionCopies[Cnt])); + CGF.EmitStoreOfScalar(CombAddr, CombLVal); + // ElemLVal.flags = 0; + LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); + if (DelayedCreation) { + CGF.EmitStoreOfScalar( + llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*IsSigned=*/true), + FlagsLVal); + } else + CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType()); + } + // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void + // *data); + llvm::Value *Args[] = { + CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, + /*isSigned=*/true), + llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), + CGM.VoidPtrTy)}; + return CGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args); +} + +void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, + SourceLocation Loc, + ReductionCodeGen &RCG, + unsigned N) { + auto Sizes = RCG.getSizes(N); + // Emit threadprivate global variable if the type is non-constant + // (Sizes.second = nullptr). + if (Sizes.second) { + llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, + /*isSigned=*/false); + Address SizeAddr = getAddrOfArtificialThreadPrivate( + CGF, CGM.getContext().getSizeType(), + generateUniqueName("reduction_size", Loc, N)); + CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); + } + // Store address of the original reduction item if custom initializer is used. + if (RCG.usesReductionInitializer(N)) { + Address SharedAddr = getAddrOfArtificialThreadPrivate( + CGF, CGM.getContext().VoidPtrTy, + generateUniqueName("reduction", Loc, N)); + CGF.Builder.CreateStore( + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy), + SharedAddr, /*IsVolatile=*/false); + } +} + +Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, + SourceLocation Loc, + llvm::Value *ReductionsPtr, + LValue SharedLVal) { + // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void + // *d); + llvm::Value *Args[] = { + CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, + /*isSigned=*/true), + ReductionsPtr, + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(), + CGM.VoidPtrTy)}; + return Address( + CGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args), + SharedLVal.getAlignment()); +} + void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc) { if (!CGF.HaveInsertPoint()) diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h index 6f460f121791..5dcf999bea37 100644 --- a/lib/CodeGen/CGOpenMPRuntime.h +++ b/lib/CodeGen/CGOpenMPRuntime.h @@ -96,15 +96,106 @@ struct OMPTaskDataTy final { SmallVector<const Expr *, 4> FirstprivateInits; SmallVector<const Expr *, 4> LastprivateVars; SmallVector<const Expr *, 4> LastprivateCopies; + SmallVector<const Expr *, 4> ReductionVars; + SmallVector<const Expr *, 4> ReductionCopies; + SmallVector<const Expr *, 4> ReductionOps; SmallVector<std::pair<OpenMPDependClauseKind, const Expr *>, 4> Dependences; llvm::PointerIntPair<llvm::Value *, 1, bool> Final; llvm::PointerIntPair<llvm::Value *, 1, bool> Schedule; llvm::PointerIntPair<llvm::Value *, 1, bool> Priority; + llvm::Value *Reductions = nullptr; unsigned NumberOfParts = 0; bool Tied = true; bool Nogroup = false; }; +/// Class intended to support codegen of all kind of the reduction clauses. +class ReductionCodeGen { +private: + /// Data required for codegen of reduction clauses. + struct ReductionData { + /// Reference to the original shared item. + const Expr *Ref = nullptr; + /// Helper expression for generation of private copy. + const Expr *Private = nullptr; + /// Helper expression for generation reduction operation. + const Expr *ReductionOp = nullptr; + ReductionData(const Expr *Ref, const Expr *Private, const Expr *ReductionOp) + : Ref(Ref), Private(Private), ReductionOp(ReductionOp) {} + }; + /// List of reduction-based clauses. + SmallVector<ReductionData, 4> ClausesData; + + /// List of addresses of original shared variables/expressions. + SmallVector<std::pair<LValue, LValue>, 4> SharedAddresses; + /// Sizes of the reduction items in chars. + SmallVector<std::pair<llvm::Value *, llvm::Value *>, 4> Sizes; + /// Base declarations for the reduction items. + SmallVector<const VarDecl *, 4> BaseDecls; + + /// Emits lvalue for shared expresion. + LValue emitSharedLValue(CodeGenFunction &CGF, const Expr *E); + /// Emits upper bound for shared expression (if array section). + LValue emitSharedLValueUB(CodeGenFunction &CGF, const Expr *E); + /// Performs aggregate initialization. + /// \param N Number of reduction item in the common list. + /// \param PrivateAddr Address of the corresponding private item. + /// \param SharedLVal Address of the original shared variable. + /// \param DRD Declare reduction construct used for reduction item. + void emitAggregateInitialization(CodeGenFunction &CGF, unsigned N, + Address PrivateAddr, LValue SharedLVal, + const OMPDeclareReductionDecl *DRD); + +public: + ReductionCodeGen(ArrayRef<const Expr *> Shareds, + ArrayRef<const Expr *> Privates, + ArrayRef<const Expr *> ReductionOps); + /// Emits lvalue for a reduction item. + /// \param N Number of the reduction item. + void emitSharedLValue(CodeGenFunction &CGF, unsigned N); + /// Emits the code for the variable-modified type, if required. + /// \param N Number of the reduction item. + void emitAggregateType(CodeGenFunction &CGF, unsigned N); + /// Emits the code for the variable-modified type, if required. + /// \param N Number of the reduction item. + /// \param Size Size of the type in chars. + void emitAggregateType(CodeGenFunction &CGF, unsigned N, llvm::Value *Size); + /// Performs initialization of the private copy for the reduction item. + /// \param N Number of the reduction item. + /// \param PrivateAddr Address of the corresponding private item. + /// \param DefaultInit Default initialization sequence that should be + /// performed if no reduction specific initialization is found. + /// \param SharedLVal Address of the original shared variable. + void + emitInitialization(CodeGenFunction &CGF, unsigned N, Address PrivateAddr, + LValue SharedLVal, + llvm::function_ref<bool(CodeGenFunction &)> DefaultInit); + /// Returns true if the private copy requires cleanups. + bool needCleanups(unsigned N); + /// Emits cleanup code for the reduction item. + /// \param N Number of the reduction item. + /// \param PrivateAddr Address of the corresponding private item. + void emitCleanups(CodeGenFunction &CGF, unsigned N, Address PrivateAddr); + /// Adjusts \p PrivatedAddr for using instead of the original variable + /// address in normal operations. + /// \param N Number of the reduction item. + /// \param PrivateAddr Address of the corresponding private item. + Address adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, + Address PrivateAddr); + /// Returns LValue for the reduction item. + LValue getSharedLValue(unsigned N) const { return SharedAddresses[N].first; } + /// Returns the size of the reduction item (in chars and total number of + /// elements in the item), or nullptr, if the size is a constant. + std::pair<llvm::Value *, llvm::Value *> getSizes(unsigned N) const { + return Sizes[N]; + } + /// Returns the base declaration of the reduction item. + const VarDecl *getBaseDecl(unsigned N) const { return BaseDecls[N]; } + /// Returns true if the initialization of the reduction item uses initializer + /// from declare reduction construct. + bool usesReductionInitializer(unsigned N) const; +}; + class CGOpenMPRuntime { protected: CodeGenModule &CGM; @@ -121,7 +212,7 @@ protected: /// \param OutlinedFnID Outlined function ID value to be defined by this call. /// \param IsOffloadEntry True if the outlined function is an offload entry. /// \param CodeGen Lambda codegen specific to an accelerator device. - /// An oulined function may not be an entry if, e.g. the if clause always + /// An outlined function may not be an entry if, e.g. the if clause always /// evaluates to false. virtual void emitTargetOutlinedFunctionHelper(const OMPExecutableDirective &D, StringRef ParentName, @@ -699,7 +790,7 @@ public: /// \param Loc Clang source location. /// \param ScheduleKind Schedule kind, specified by the 'schedule' clause. /// \param IVSize Size of the iteration variable in bits. - /// \param IVSigned Sign of the interation variable. + /// \param IVSigned Sign of the iteration variable. /// \param Ordered true if loop is ordered, false otherwise. /// \param DispatchValues struct containing llvm values for lower bound, upper /// bound, and chunk expression. @@ -723,7 +814,7 @@ public: /// \param Loc Clang source location. /// \param ScheduleKind Schedule kind, specified by the 'schedule' clause. /// \param IVSize Size of the iteration variable in bits. - /// \param IVSigned Sign of the interation variable. + /// \param IVSigned Sign of the iteration variable. /// \param Ordered true if loop is ordered, false otherwise. /// \param IL Address of the output variable in which the flag of the /// last iteration is returned. @@ -732,7 +823,7 @@ public: /// \param UB Address of the output variable in which the upper iteration /// number is returned. /// \param ST Address of the output variable in which the stride value is - /// returned nesessary to generated the static_chunked scheduled loop. + /// returned necessary to generated the static_chunked scheduled loop. /// \param Chunk Value of the chunk for the static_chunked scheduled loop. /// For the default (nullptr) value, the chunk 1 will be used. /// @@ -747,7 +838,7 @@ public: /// \param Loc Clang source location. /// \param SchedKind Schedule kind, specified by the 'dist_schedule' clause. /// \param IVSize Size of the iteration variable in bits. - /// \param IVSigned Sign of the interation variable. + /// \param IVSigned Sign of the iteration variable. /// \param Ordered true if loop is ordered, false otherwise. /// \param IL Address of the output variable in which the flag of the /// last iteration is returned. @@ -756,7 +847,7 @@ public: /// \param UB Address of the output variable in which the upper iteration /// number is returned. /// \param ST Address of the output variable in which the stride value is - /// returned nesessary to generated the static_chunked scheduled loop. + /// returned necessary to generated the static_chunked scheduled loop. /// \param Chunk Value of the chunk for the static_chunked scheduled loop. /// For the default (nullptr) value, the chunk 1 will be used. /// @@ -773,7 +864,7 @@ public: /// \param CGF Reference to current CodeGenFunction. /// \param Loc Clang source location. /// \param IVSize Size of the iteration variable in bits. - /// \param IVSigned Sign of the interation variable. + /// \param IVSigned Sign of the iteration variable. /// virtual void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, @@ -792,7 +883,7 @@ public: /// kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, /// kmp_int[32|64] *p_stride); /// \param IVSize Size of the iteration variable in bits. - /// \param IVSigned Sign of the interation variable. + /// \param IVSigned Sign of the iteration variable. /// \param IL Address of the output variable in which the flag of the /// last iteration is returned. /// \param LB Address of the output variable in which the lower iteration @@ -844,6 +935,14 @@ public: SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF = nullptr); + /// Creates artificial threadprivate variable with name \p Name and type \p + /// VarType. + /// \param VarType Type of the artificial threadprivate variable. + /// \param Name Name of the artificial threadprivate variable. + virtual Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, + QualType VarType, + StringRef Name); + /// \brief Emit flush of the variables specified in 'omp flush' directive. /// \param Vars List of variables to flush. virtual void emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *> Vars, @@ -1002,6 +1101,51 @@ public: ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options); + /// Emit a code for initialization of task reduction clause. Next code + /// should be emitted for reduction: + /// \code + /// + /// _task_red_item_t red_data[n]; + /// ... + /// red_data[i].shar = &origs[i]; + /// red_data[i].size = sizeof(origs[i]); + /// red_data[i].f_init = (void*)RedInit<i>; + /// red_data[i].f_fini = (void*)RedDest<i>; + /// red_data[i].f_comb = (void*)RedOp<i>; + /// red_data[i].flags = <Flag_i>; + /// ... + /// void* tg1 = __kmpc_task_reduction_init(gtid, n, red_data); + /// \endcode + /// + /// \param LHSExprs List of LHS in \a Data.ReductionOps reduction operations. + /// \param RHSExprs List of RHS in \a Data.ReductionOps reduction operations. + /// \param Data Additional data for task generation like tiedness, final + /// state, list of privates, reductions etc. + virtual llvm::Value *emitTaskReductionInit(CodeGenFunction &CGF, + SourceLocation Loc, + ArrayRef<const Expr *> LHSExprs, + ArrayRef<const Expr *> RHSExprs, + const OMPTaskDataTy &Data); + + /// Required to resolve existing problems in the runtime. Emits threadprivate + /// variables to store the size of the VLAs/array sections for + /// initializer/combiner/finalizer functions + emits threadprivate variable to + /// store the pointer to the original reduction item for the custom + /// initializer defined by declare reduction construct. + /// \param RCG Allows to reuse an existing data for the reductions. + /// \param N Reduction item for which fixups must be emitted. + virtual void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, + ReductionCodeGen &RCG, unsigned N); + + /// Get the address of `void *` type of the privatue copy of the reduction + /// item specified by the \p SharedLVal. + /// \param ReductionsPtr Pointer to the reduction data returned by the + /// emitTaskReductionInit function. + /// \param SharedLVal Address of the original reduction item. + virtual Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, + llvm::Value *ReductionsPtr, + LValue SharedLVal); + /// \brief Emit code for 'taskwait' directive. virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc); @@ -1029,7 +1173,7 @@ public: /// \param OutlinedFnID Outlined function ID value to be defined by this call. /// \param IsOffloadEntry True if the outlined function is an offload entry. /// \param CodeGen Code generation sequence for the \a D directive. - /// An oulined function may not be an entry if, e.g. the if clause always + /// An outlined function may not be an entry if, e.g. the if clause always /// evaluates to false. virtual void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp index 71797e2e6fbe..6135cf31d176 100644 --- a/lib/CodeGen/CGStmtOpenMP.cpp +++ b/lib/CodeGen/CGStmtOpenMP.cpp @@ -549,156 +549,6 @@ void CodeGenFunction::EmitOMPAggregateAssign( EmitBlock(DoneBB, /*IsFinished=*/true); } -/// Check if the combiner is a call to UDR combiner and if it is so return the -/// UDR decl used for reduction. -static const OMPDeclareReductionDecl * -getReductionInit(const Expr *ReductionOp) { - if (auto *CE = dyn_cast<CallExpr>(ReductionOp)) - if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) - if (auto *DRE = - dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) - if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) - return DRD; - return nullptr; -} - -static void emitInitWithReductionInitializer(CodeGenFunction &CGF, - const OMPDeclareReductionDecl *DRD, - const Expr *InitOp, - Address Private, Address Original, - QualType Ty) { - if (DRD->getInitializer()) { - std::pair<llvm::Function *, llvm::Function *> Reduction = - CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); - auto *CE = cast<CallExpr>(InitOp); - auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); - const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); - const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); - auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); - auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); - CodeGenFunction::OMPPrivateScope PrivateScope(CGF); - PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), - [=]() -> Address { return Private; }); - PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), - [=]() -> Address { return Original; }); - (void)PrivateScope.Privatize(); - RValue Func = RValue::get(Reduction.second); - CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); - CGF.EmitIgnoredExpr(InitOp); - } else { - llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); - auto *GV = new llvm::GlobalVariable( - CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, - llvm::GlobalValue::PrivateLinkage, Init, ".init"); - LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); - RValue InitRVal; - switch (CGF.getEvaluationKind(Ty)) { - case TEK_Scalar: - InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation()); - break; - case TEK_Complex: - InitRVal = - RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation())); - break; - case TEK_Aggregate: - InitRVal = RValue::getAggregate(LV.getAddress()); - break; - } - OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue); - CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); - CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), - /*IsInitializer=*/false); - } -} - -/// \brief Emit initialization of arrays of complex types. -/// \param DestAddr Address of the array. -/// \param Type Type of array. -/// \param Init Initial expression of array. -/// \param SrcAddr Address of the original array. -static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, - QualType Type, const Expr *Init, - Address SrcAddr = Address::invalid()) { - auto *DRD = getReductionInit(Init); - // Perform element-by-element initialization. - QualType ElementTy; - - // Drill down to the base element type on both arrays. - auto ArrayTy = Type->getAsArrayTypeUnsafe(); - auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); - DestAddr = - CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); - if (DRD) - SrcAddr = - CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); - - llvm::Value *SrcBegin = nullptr; - if (DRD) - SrcBegin = SrcAddr.getPointer(); - auto DestBegin = DestAddr.getPointer(); - // Cast from pointer to array type to pointer to single element. - auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); - // The basic structure here is a while-do loop. - auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); - auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); - auto IsEmpty = - CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); - CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); - - // Enter the loop body, making that address the current address. - auto EntryBB = CGF.Builder.GetInsertBlock(); - CGF.EmitBlock(BodyBB); - - CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); - - llvm::PHINode *SrcElementPHI = nullptr; - Address SrcElementCurrent = Address::invalid(); - if (DRD) { - SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, - "omp.arraycpy.srcElementPast"); - SrcElementPHI->addIncoming(SrcBegin, EntryBB); - SrcElementCurrent = - Address(SrcElementPHI, - SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); - } - llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( - DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); - DestElementPHI->addIncoming(DestBegin, EntryBB); - Address DestElementCurrent = - Address(DestElementPHI, - DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); - - // Emit copy. - { - CodeGenFunction::RunCleanupsScope InitScope(CGF); - if (DRD && (DRD->getInitializer() || !Init)) { - emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, - SrcElementCurrent, ElementTy); - } else - CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), - /*IsInitializer=*/false); - } - - if (DRD) { - // Shift the address forward by one element. - auto SrcElementNext = CGF.Builder.CreateConstGEP1_32( - SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); - SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); - } - - // Shift the address forward by one element. - auto DestElementNext = CGF.Builder.CreateConstGEP1_32( - DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); - // Check whether we've reached the end. - auto Done = - CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); - CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); - DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); - - // Done. - CGF.EmitBlock(DoneBB, /*IsFinished=*/true); -} - void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, Address SrcAddr, const VarDecl *DestVD, const VarDecl *SrcVD, const Expr *Copy) { @@ -1051,253 +901,106 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal( EmitBlock(DoneBB, /*IsFinished=*/true); } -static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, - LValue BaseLV, llvm::Value *Addr) { - Address Tmp = Address::invalid(); - Address TopTmp = Address::invalid(); - Address MostTopTmp = Address::invalid(); - BaseTy = BaseTy.getNonReferenceType(); - while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && - !CGF.getContext().hasSameType(BaseTy, ElTy)) { - Tmp = CGF.CreateMemTemp(BaseTy); - if (TopTmp.isValid()) - CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); - else - MostTopTmp = Tmp; - TopTmp = Tmp; - BaseTy = BaseTy->getPointeeType(); - } - llvm::Type *Ty = BaseLV.getPointer()->getType(); - if (Tmp.isValid()) - Ty = Tmp.getElementType(); - Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); - if (Tmp.isValid()) { - CGF.Builder.CreateStore(Addr, Tmp); - return MostTopTmp; - } - return Address(Addr, BaseLV.getAlignment()); -} - -static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, - LValue BaseLV) { - BaseTy = BaseTy.getNonReferenceType(); - while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && - !CGF.getContext().hasSameType(BaseTy, ElTy)) { - if (auto *PtrTy = BaseTy->getAs<PointerType>()) - BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); - else { - BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(), - BaseTy->castAs<ReferenceType>()); - } - BaseTy = BaseTy->getPointeeType(); - } - return CGF.MakeAddrLValue( - Address( - CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - BaseLV.getPointer(), CGF.ConvertTypeForMem(ElTy)->getPointerTo()), - BaseLV.getAlignment()), - BaseLV.getType(), BaseLV.getBaseInfo()); -} - void CodeGenFunction::EmitOMPReductionClauseInit( const OMPExecutableDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { if (!HaveInsertPoint()) return; + SmallVector<const Expr *, 4> Shareds; + SmallVector<const Expr *, 4> Privates; + SmallVector<const Expr *, 4> ReductionOps; + SmallVector<const Expr *, 4> LHSs; + SmallVector<const Expr *, 4> RHSs; for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { - auto ILHS = C->lhs_exprs().begin(); - auto IRHS = C->rhs_exprs().begin(); auto IPriv = C->privates().begin(); auto IRed = C->reduction_ops().begin(); - for (auto IRef : C->varlists()) { - auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); - auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); - auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); - auto *DRD = getReductionInit(*IRed); - if (auto *OASE = dyn_cast<OMPArraySectionExpr>(IRef)) { - auto *Base = OASE->getBase()->IgnoreParenImpCasts(); - while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) - Base = TempOASE->getBase()->IgnoreParenImpCasts(); - while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) - Base = TempASE->getBase()->IgnoreParenImpCasts(); - auto *DE = cast<DeclRefExpr>(Base); - auto *OrigVD = cast<VarDecl>(DE->getDecl()); - auto OASELValueLB = EmitOMPArraySectionExpr(OASE); - auto OASELValueUB = - EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); - auto OriginalBaseLValue = EmitLValue(DE); - LValue BaseLValue = - loadToBegin(*this, OrigVD->getType(), OASELValueLB.getType(), - OriginalBaseLValue); - // Store the address of the original variable associated with the LHS - // implicit variable. - PrivateScope.addPrivate(LHSVD, [OASELValueLB]() -> Address { - return OASELValueLB.getAddress(); - }); - // Emit reduction copy. - bool IsRegistered = PrivateScope.addPrivate( - OrigVD, [this, OrigVD, PrivateVD, BaseLValue, OASELValueLB, - OASELValueUB, OriginalBaseLValue, DRD, IRed]() -> Address { - // Emit VarDecl with copy init for arrays. - // Get the address of the original variable captured in current - // captured region. - auto *Size = Builder.CreatePtrDiff(OASELValueUB.getPointer(), - OASELValueLB.getPointer()); - Size = Builder.CreateNUWAdd( - Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); - CodeGenFunction::OpaqueValueMapping OpaqueMap( - *this, cast<OpaqueValueExpr>( - getContext() - .getAsVariableArrayType(PrivateVD->getType()) - ->getSizeExpr()), - RValue::get(Size)); - EmitVariablyModifiedType(PrivateVD->getType()); - auto Emission = EmitAutoVarAlloca(*PrivateVD); - auto Addr = Emission.getAllocatedAddress(); - auto *Init = PrivateVD->getInit(); - EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), - DRD ? *IRed : Init, - OASELValueLB.getAddress()); - EmitAutoVarCleanups(Emission); - // Emit private VarDecl with reduction init. - auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), - OASELValueLB.getPointer()); - auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); - return castToBase(*this, OrigVD->getType(), - OASELValueLB.getType(), OriginalBaseLValue, - Ptr); - }); - assert(IsRegistered && "private var already registered as private"); - // Silence the warning about unused variable. - (void)IsRegistered; - PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { - return GetAddrOfLocalVar(PrivateVD); - }); - } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(IRef)) { - auto *Base = ASE->getBase()->IgnoreParenImpCasts(); - while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) - Base = TempASE->getBase()->IgnoreParenImpCasts(); - auto *DE = cast<DeclRefExpr>(Base); - auto *OrigVD = cast<VarDecl>(DE->getDecl()); - auto ASELValue = EmitLValue(ASE); - auto OriginalBaseLValue = EmitLValue(DE); - LValue BaseLValue = loadToBegin( - *this, OrigVD->getType(), ASELValue.getType(), OriginalBaseLValue); - // Store the address of the original variable associated with the LHS - // implicit variable. - PrivateScope.addPrivate( - LHSVD, [ASELValue]() -> Address { return ASELValue.getAddress(); }); - // Emit reduction copy. - bool IsRegistered = PrivateScope.addPrivate( - OrigVD, [this, OrigVD, PrivateVD, BaseLValue, ASELValue, - OriginalBaseLValue, DRD, IRed]() -> Address { - // Emit private VarDecl with reduction init. - AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); - auto Addr = Emission.getAllocatedAddress(); - if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { - emitInitWithReductionInitializer(*this, DRD, *IRed, Addr, - ASELValue.getAddress(), - ASELValue.getType()); - } else - EmitAutoVarInit(Emission); - EmitAutoVarCleanups(Emission); - auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), - ASELValue.getPointer()); - auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); - return castToBase(*this, OrigVD->getType(), ASELValue.getType(), - OriginalBaseLValue, Ptr); - }); - assert(IsRegistered && "private var already registered as private"); - // Silence the warning about unused variable. - (void)IsRegistered; - PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { - return Builder.CreateElementBitCast( - GetAddrOfLocalVar(PrivateVD), ConvertTypeForMem(RHSVD->getType()), - "rhs.begin"); - }); - } else { - auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); - QualType Type = PrivateVD->getType(); - if (getContext().getAsArrayType(Type)) { - // Store the address of the original variable associated with the LHS - // implicit variable. - DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), - CapturedStmtInfo->lookup(OrigVD) != nullptr, - IRef->getType(), VK_LValue, IRef->getExprLoc()); - Address OriginalAddr = EmitLValue(&DRE).getAddress(); - PrivateScope.addPrivate(LHSVD, [this, &OriginalAddr, - LHSVD]() -> Address { - OriginalAddr = Builder.CreateElementBitCast( - OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); - return OriginalAddr; - }); - bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { - if (Type->isVariablyModifiedType()) { - CodeGenFunction::OpaqueValueMapping OpaqueMap( - *this, cast<OpaqueValueExpr>( - getContext() - .getAsVariableArrayType(PrivateVD->getType()) - ->getSizeExpr()), - RValue::get( - getTypeSize(OrigVD->getType().getNonReferenceType()))); - EmitVariablyModifiedType(Type); - } - auto Emission = EmitAutoVarAlloca(*PrivateVD); - auto Addr = Emission.getAllocatedAddress(); - auto *Init = PrivateVD->getInit(); - EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), - DRD ? *IRed : Init, OriginalAddr); - EmitAutoVarCleanups(Emission); - return Emission.getAllocatedAddress(); - }); - assert(IsRegistered && "private var already registered as private"); - // Silence the warning about unused variable. - (void)IsRegistered; - PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { - return Builder.CreateElementBitCast( - GetAddrOfLocalVar(PrivateVD), - ConvertTypeForMem(RHSVD->getType()), "rhs.begin"); - }); - } else { - // Store the address of the original variable associated with the LHS - // implicit variable. - Address OriginalAddr = Address::invalid(); - PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef, - &OriginalAddr]() -> Address { - DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), - CapturedStmtInfo->lookup(OrigVD) != nullptr, - IRef->getType(), VK_LValue, IRef->getExprLoc()); - OriginalAddr = EmitLValue(&DRE).getAddress(); - return OriginalAddr; - }); - // Emit reduction copy. - bool IsRegistered = PrivateScope.addPrivate( - OrigVD, [this, PrivateVD, OriginalAddr, DRD, IRed]() -> Address { - // Emit private VarDecl with reduction init. - AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); - auto Addr = Emission.getAllocatedAddress(); - if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { - emitInitWithReductionInitializer(*this, DRD, *IRed, Addr, - OriginalAddr, - PrivateVD->getType()); - } else - EmitAutoVarInit(Emission); - EmitAutoVarCleanups(Emission); - return Addr; - }); - assert(IsRegistered && "private var already registered as private"); - // Silence the warning about unused variable. - (void)IsRegistered; - PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { - return GetAddrOfLocalVar(PrivateVD); - }); - } + auto ILHS = C->lhs_exprs().begin(); + auto IRHS = C->rhs_exprs().begin(); + for (const auto *Ref : C->varlists()) { + Shareds.emplace_back(Ref); + Privates.emplace_back(*IPriv); + ReductionOps.emplace_back(*IRed); + LHSs.emplace_back(*ILHS); + RHSs.emplace_back(*IRHS); + std::advance(IPriv, 1); + std::advance(IRed, 1); + std::advance(ILHS, 1); + std::advance(IRHS, 1); + } + } + ReductionCodeGen RedCG(Shareds, Privates, ReductionOps); + unsigned Count = 0; + auto ILHS = LHSs.begin(); + auto IRHS = RHSs.begin(); + auto IPriv = Privates.begin(); + for (const auto *IRef : Shareds) { + auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); + // Emit private VarDecl with reduction init. + RedCG.emitSharedLValue(*this, Count); + RedCG.emitAggregateType(*this, Count); + auto Emission = EmitAutoVarAlloca(*PrivateVD); + RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(), + RedCG.getSharedLValue(Count), + [&Emission](CodeGenFunction &CGF) { + CGF.EmitAutoVarInit(Emission); + return true; + }); + EmitAutoVarCleanups(Emission); + Address BaseAddr = RedCG.adjustPrivateAddress( + *this, Count, Emission.getAllocatedAddress()); + bool IsRegistered = PrivateScope.addPrivate( + RedCG.getBaseDecl(Count), [BaseAddr]() -> Address { return BaseAddr; }); + assert(IsRegistered && "private var already registered as private"); + // Silence the warning about unused variable. + (void)IsRegistered; + + auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); + auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); + if (isa<OMPArraySectionExpr>(IRef)) { + // Store the address of the original variable associated with the LHS + // implicit variable. + PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address { + return RedCG.getSharedLValue(Count).getAddress(); + }); + PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { + return GetAddrOfLocalVar(PrivateVD); + }); + } else if (isa<ArraySubscriptExpr>(IRef)) { + // Store the address of the original variable associated with the LHS + // implicit variable. + PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address { + return RedCG.getSharedLValue(Count).getAddress(); + }); + PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { + return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD), + ConvertTypeForMem(RHSVD->getType()), + "rhs.begin"); + }); + } else { + QualType Type = PrivateVD->getType(); + bool IsArray = getContext().getAsArrayType(Type) != nullptr; + Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(); + // Store the address of the original variable associated with the LHS + // implicit variable. + if (IsArray) { + OriginalAddr = Builder.CreateElementBitCast( + OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); } - ++ILHS; - ++IRHS; - ++IPriv; - ++IRed; + PrivateScope.addPrivate( + LHSVD, [OriginalAddr]() -> Address { return OriginalAddr; }); + PrivateScope.addPrivate( + RHSVD, [this, PrivateVD, RHSVD, IsArray]() -> Address { + return IsArray + ? Builder.CreateElementBitCast( + GetAddrOfLocalVar(PrivateVD), + ConvertTypeForMem(RHSVD->getType()), "rhs.begin") + : GetAddrOfLocalVar(PrivateVD); + }); } + ++ILHS; + ++IRHS; + ++IPriv; + ++Count; } } @@ -2994,11 +2697,32 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, ++ID; } } + SmallVector<const Expr *, 4> LHSs; + SmallVector<const Expr *, 4> RHSs; + for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { + auto IPriv = C->privates().begin(); + auto IRed = C->reduction_ops().begin(); + auto ILHS = C->lhs_exprs().begin(); + auto IRHS = C->rhs_exprs().begin(); + for (const auto *Ref : C->varlists()) { + Data.ReductionVars.emplace_back(Ref); + Data.ReductionCopies.emplace_back(*IPriv); + Data.ReductionOps.emplace_back(*IRed); + LHSs.emplace_back(*ILHS); + RHSs.emplace_back(*IRHS); + std::advance(IPriv, 1); + std::advance(IRed, 1); + std::advance(ILHS, 1); + std::advance(IRHS, 1); + } + } + Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit( + *this, S.getLocStart(), LHSs, RHSs, Data); // Build list of dependences. for (const auto *C : S.getClausesOfKind<OMPDependClause>()) for (auto *IRef : C->varlists()) Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); - auto &&CodeGen = [&Data, CS, &BodyGen, &LastprivateDstsOrigs]( + auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs]( CodeGenFunction &CGF, PrePostActionTy &Action) { // Set proper addresses for generated private copies. OMPPrivateScope Scope(CGF); @@ -3053,6 +2777,34 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); } } + if (Data.Reductions) { + OMPLexicalScope LexScope(CGF, S, /*AsInlined=*/true); + ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies, + Data.ReductionOps); + llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( + CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9))); + for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { + RedCG.emitSharedLValue(CGF, Cnt); + RedCG.emitAggregateType(CGF, Cnt); + Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( + CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); + Replacement = + Address(CGF.EmitScalarConversion( + Replacement.getPointer(), CGF.getContext().VoidPtrTy, + CGF.getContext().getPointerType( + Data.ReductionCopies[Cnt]->getType()), + SourceLocation()), + Replacement.getAlignment()); + Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); + Scope.addPrivate(RedCG.getBaseDecl(Cnt), + [Replacement]() { return Replacement; }); + // FIXME: This must removed once the runtime library is fixed. + // Emit required threadprivate variables for + // initilizer/combiner/finalizer. + CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(), + RedCG, Cnt); + } + } (void)Scope.Privatize(); Action.Enter(CGF); @@ -3714,6 +3466,7 @@ static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, case OMPC_firstprivate: case OMPC_lastprivate: case OMPC_reduction: + case OMPC_task_reduction: case OMPC_safelen: case OMPC_simdlen: case OMPC_collapse: diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h index 5933e029be8d..753dd92f3071 100644 --- a/lib/CodeGen/CodeGenFunction.h +++ b/lib/CodeGen/CodeGenFunction.h @@ -3589,12 +3589,19 @@ public: /// nonnull, if \p LHS is marked _Nonnull. void EmitNullabilityCheck(LValue LHS, llvm::Value *RHS, SourceLocation Loc); + /// An enumeration which makes it easier to specify whether or not an + /// operation is a subtraction. + enum { NotSubtraction = false, IsSubtraction = true }; + /// Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to /// detect undefined behavior when the pointer overflow sanitizer is enabled. /// \p SignedIndices indicates whether any of the GEP indices are signed. + /// \p IsSubtraction indicates whether the expression used to form the GEP + /// is a subtraction. llvm::Value *EmitCheckedInBoundsGEP(llvm::Value *Ptr, ArrayRef<llvm::Value *> IdxList, bool SignedIndices, + bool IsSubtraction, SourceLocation Loc, const Twine &Name = ""); diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp index 4b15b8ac4c71..5561d4520cc8 100644 --- a/lib/CodeGen/CodeGenModule.cpp +++ b/lib/CodeGen/CodeGenModule.cpp @@ -4499,18 +4499,19 @@ void CodeGenModule::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap, // Make a copy of the features as passed on the command line into the // beginning of the additional features from the function to override. - ParsedAttr.first.insert(ParsedAttr.first.begin(), + ParsedAttr.Features.insert(ParsedAttr.Features.begin(), Target.getTargetOpts().FeaturesAsWritten.begin(), Target.getTargetOpts().FeaturesAsWritten.end()); - if (ParsedAttr.second != "") - TargetCPU = ParsedAttr.second; + if (ParsedAttr.Architecture != "") + TargetCPU = ParsedAttr.Architecture ; // Now populate the feature map, first with the TargetCPU which is either // the default or a new one from the target attribute string. Then we'll use // the passed in features (FeaturesAsWritten) along with the new ones from // the attribute. - Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU, ParsedAttr.first); + Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU, + ParsedAttr.Features); } else { Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU, Target.getTargetOpts().Features); diff --git a/lib/CodeGen/MacroPPCallbacks.cpp b/lib/CodeGen/MacroPPCallbacks.cpp index 6a31dfe53d64..a6f21d8ddcfb 100644 --- a/lib/CodeGen/MacroPPCallbacks.cpp +++ b/lib/CodeGen/MacroPPCallbacks.cpp @@ -26,8 +26,8 @@ void MacroPPCallbacks::writeMacroDefinition(const IdentifierInfo &II, if (MI.isFunctionLike()) { Name << '('; - if (!MI.arg_empty()) { - MacroInfo::arg_iterator AI = MI.arg_begin(), E = MI.arg_end(); + if (!MI.param_empty()) { + MacroInfo::param_iterator AI = MI.param_begin(), E = MI.param_end(); for (; AI + 1 != E; ++AI) { Name << (*AI)->getName(); Name << ','; diff --git a/lib/CodeGen/ObjectFilePCHContainerOperations.cpp b/lib/CodeGen/ObjectFilePCHContainerOperations.cpp index 37ecc05aa1ee..d0760b9cc2a6 100644 --- a/lib/CodeGen/ObjectFilePCHContainerOperations.cpp +++ b/lib/CodeGen/ObjectFilePCHContainerOperations.cpp @@ -152,6 +152,9 @@ public: CodeGenOpts.CodeModel = "default"; CodeGenOpts.ThreadModel = "single"; CodeGenOpts.DebugTypeExtRefs = true; + // When building a module MainFileName is the name of the modulemap file. + CodeGenOpts.MainFileName = + LangOpts.CurrentModule.empty() ? MainFileName : LangOpts.CurrentModule; CodeGenOpts.setDebugInfo(codegenoptions::FullDebugInfo); CodeGenOpts.setDebuggerTuning(CI.getCodeGenOpts().getDebuggerTuning()); } diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp index eeebd60a2d20..c17828974e92 100644 --- a/lib/CodeGen/TargetInfo.cpp +++ b/lib/CodeGen/TargetInfo.cpp @@ -4785,7 +4785,8 @@ class AArch64ABIInfo : public SwiftABIInfo { public: enum ABIKind { AAPCS = 0, - DarwinPCS + DarwinPCS, + Win64 }; private: @@ -4823,10 +4824,14 @@ private: Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override { - return isDarwinPCS() ? EmitDarwinVAArg(VAListAddr, Ty, CGF) - : EmitAAPCSVAArg(VAListAddr, Ty, CGF); + return Kind == Win64 ? EmitMSVAArg(CGF, VAListAddr, Ty) + : isDarwinPCS() ? EmitDarwinVAArg(VAListAddr, Ty, CGF) + : EmitAAPCSVAArg(VAListAddr, Ty, CGF); } + Address EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override; + bool shouldPassIndirectlyForSwift(CharUnits totalSize, ArrayRef<llvm::Type*> scalars, bool asReturnValue) const override { @@ -5332,6 +5337,14 @@ Address AArch64ABIInfo::EmitDarwinVAArg(Address VAListAddr, QualType Ty, TyInfo, SlotSize, /*AllowHigherAlign*/ true); } +Address AArch64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const { + return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false, + CGF.getContext().getTypeInfoInChars(Ty), + CharUnits::fromQuantity(8), + /*allowHigherAlign*/ false); +} + //===----------------------------------------------------------------------===// // ARM ABI Implementation //===----------------------------------------------------------------------===// @@ -8494,6 +8507,8 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { AArch64ABIInfo::ABIKind Kind = AArch64ABIInfo::AAPCS; if (getTarget().getABI() == "darwinpcs") Kind = AArch64ABIInfo::DarwinPCS; + else if (Triple.isOSWindows()) + Kind = AArch64ABIInfo::Win64; return SetCGInfo(new AArch64TargetCodeGenInfo(Types, Kind)); } |