aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/clang/lib/CodeGen/CGCall.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/clang/lib/CodeGen/CGCall.cpp')
-rw-r--r--contrib/llvm-project/clang/lib/CodeGen/CGCall.cpp222
1 files changed, 148 insertions, 74 deletions
diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGCall.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGCall.cpp
index 34f7a421c933..4e26c35c6342 100644
--- a/contrib/llvm-project/clang/lib/CodeGen/CGCall.cpp
+++ b/contrib/llvm-project/clang/lib/CodeGen/CGCall.cpp
@@ -62,6 +62,8 @@ unsigned CodeGenTypes::ClangCallConvToLLVMCallConv(CallingConv CC) {
// TODO: Add support for __vectorcall to LLVM.
case CC_X86VectorCall: return llvm::CallingConv::X86_VectorCall;
case CC_AArch64VectorCall: return llvm::CallingConv::AArch64_VectorCall;
+ case CC_AArch64SVEPCS: return llvm::CallingConv::AArch64_SVE_VectorCall;
+ case CC_AMDGPUKernelCall: return llvm::CallingConv::AMDGPU_KERNEL;
case CC_SpirFunction: return llvm::CallingConv::SPIR_FUNC;
case CC_OpenCLKernel: return CGM.getTargetCodeGenInfo().getOpenCLKernelCallingConv();
case CC_PreserveMost: return llvm::CallingConv::PreserveMost;
@@ -228,6 +230,12 @@ static CallingConv getCallingConventionForDecl(const ObjCMethodDecl *D,
if (D->hasAttr<AArch64VectorPcsAttr>())
return CC_AArch64VectorCall;
+ if (D->hasAttr<AArch64SVEPcsAttr>())
+ return CC_AArch64SVEPCS;
+
+ if (D->hasAttr<AMDGPUKernelCallAttr>())
+ return CC_AMDGPUKernelCall;
+
if (D->hasAttr<IntelOclBiccAttr>())
return CC_IntelOclBicc;
@@ -833,6 +841,7 @@ CGFunctionInfo *CGFunctionInfo::create(unsigned llvmCC,
FI->NumArgs = argTypes.size();
FI->HasExtParameterInfos = !paramInfos.empty();
FI->getArgsBuffer()[0].type = resultType;
+ FI->MaxVectorWidth = 0;
for (unsigned i = 0, e = argTypes.size(); i != e; ++i)
FI->getArgsBuffer()[i + 1].type = argTypes[i];
for (unsigned i = 0, e = paramInfos.size(); i != e; ++i)
@@ -942,8 +951,7 @@ getTypeExpansion(QualType Ty, const ASTContext &Context) {
if (const auto *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
assert(!CXXRD->isDynamicClass() &&
"cannot expand vtable pointers in dynamic classes");
- for (const CXXBaseSpecifier &BS : CXXRD->bases())
- Bases.push_back(&BS);
+ llvm::append_range(Bases, llvm::make_pointer_range(CXXRD->bases()));
}
for (const auto *FD : RD->fields()) {
@@ -1012,11 +1020,12 @@ static void forConstantArrayExpansion(CodeGenFunction &CGF,
CharUnits EltSize = CGF.getContext().getTypeSizeInChars(CAE->EltTy);
CharUnits EltAlign =
BaseAddr.getAlignment().alignmentOfArrayElement(EltSize);
+ llvm::Type *EltTy = CGF.ConvertTypeForMem(CAE->EltTy);
for (int i = 0, n = CAE->NumElts; i < n; i++) {
llvm::Value *EltAddr = CGF.Builder.CreateConstGEP2_32(
BaseAddr.getElementType(), BaseAddr.getPointer(), 0, i);
- Fn(Address(EltAddr, EltAlign));
+ Fn(Address(EltAddr, EltTy, EltAlign));
}
}
@@ -1276,8 +1285,8 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty,
}
// If coercing a fixed vector to a scalable vector for ABI compatibility, and
- // the types match, use the llvm.experimental.vector.insert intrinsic to
- // perform the conversion.
+ // the types match, use the llvm.vector.insert intrinsic to perform the
+ // conversion.
if (auto *ScalableDst = dyn_cast<llvm::ScalableVectorType>(Ty)) {
if (auto *FixedSrc = dyn_cast<llvm::FixedVectorType>(SrcTy)) {
// If we are casting a fixed i8 vector to a scalable 16 x i1 predicate
@@ -1804,6 +1813,8 @@ void CodeGenModule::getDefaultFunctionAttributes(StringRef Name,
if (AttrOnCallSite) {
// Attributes that should go on the call site only.
+ // FIXME: Look for 'BuiltinAttr' on the function rather than re-checking
+ // the -fno-builtin-foo list.
if (!CodeGenOpts.SimplifyLibCalls || LangOpts.isNoBuiltinFunc(Name))
FuncAttrs.addAttribute(llvm::Attribute::NoBuiltin);
if (!CodeGenOpts.TrapFuncName.empty())
@@ -1838,7 +1849,7 @@ void CodeGenModule::getDefaultFunctionAttributes(StringRef Name,
CodeGenOpts.FP32DenormalMode.str());
}
- if (LangOpts.getFPExceptionMode() == LangOptions::FPE_Ignore)
+ if (LangOpts.getDefaultExceptionMode() == LangOptions::FPE_Ignore)
FuncAttrs.addAttribute("no-trapping-math", "true");
// TODO: Are these all needed?
@@ -1878,6 +1889,37 @@ void CodeGenModule::getDefaultFunctionAttributes(StringRef Name,
if (CodeGenOpts.SpeculativeLoadHardening)
FuncAttrs.addAttribute(llvm::Attribute::SpeculativeLoadHardening);
+
+ // Add zero-call-used-regs attribute.
+ switch (CodeGenOpts.getZeroCallUsedRegs()) {
+ case llvm::ZeroCallUsedRegs::ZeroCallUsedRegsKind::Skip:
+ FuncAttrs.removeAttribute("zero-call-used-regs");
+ break;
+ case llvm::ZeroCallUsedRegs::ZeroCallUsedRegsKind::UsedGPRArg:
+ FuncAttrs.addAttribute("zero-call-used-regs", "used-gpr-arg");
+ break;
+ case llvm::ZeroCallUsedRegs::ZeroCallUsedRegsKind::UsedGPR:
+ FuncAttrs.addAttribute("zero-call-used-regs", "used-gpr");
+ break;
+ case llvm::ZeroCallUsedRegs::ZeroCallUsedRegsKind::UsedArg:
+ FuncAttrs.addAttribute("zero-call-used-regs", "used-arg");
+ break;
+ case llvm::ZeroCallUsedRegs::ZeroCallUsedRegsKind::Used:
+ FuncAttrs.addAttribute("zero-call-used-regs", "used");
+ break;
+ case llvm::ZeroCallUsedRegs::ZeroCallUsedRegsKind::AllGPRArg:
+ FuncAttrs.addAttribute("zero-call-used-regs", "all-gpr-arg");
+ break;
+ case llvm::ZeroCallUsedRegs::ZeroCallUsedRegsKind::AllGPR:
+ FuncAttrs.addAttribute("zero-call-used-regs", "all-gpr");
+ break;
+ case llvm::ZeroCallUsedRegs::ZeroCallUsedRegsKind::AllArg:
+ FuncAttrs.addAttribute("zero-call-used-regs", "all-arg");
+ break;
+ case llvm::ZeroCallUsedRegs::ZeroCallUsedRegsKind::All:
+ FuncAttrs.addAttribute("zero-call-used-regs", "all");
+ break;
+ }
}
if (getLangOpts().assumeFunctionsAreConvergent()) {
@@ -2166,6 +2208,15 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
FuncAttrs.addAttribute(llvm::Attribute::SpeculativeLoadHardening);
if (TargetDecl->hasAttr<NoSplitStackAttr>())
FuncAttrs.removeAttribute("split-stack");
+ if (TargetDecl->hasAttr<ZeroCallUsedRegsAttr>()) {
+ // A function "__attribute__((...))" overrides the command-line flag.
+ auto Kind =
+ TargetDecl->getAttr<ZeroCallUsedRegsAttr>()->getZeroCallUsedRegs();
+ FuncAttrs.removeAttribute("zero-call-used-regs");
+ FuncAttrs.addAttribute(
+ "zero-call-used-regs",
+ ZeroCallUsedRegsAttr::ConvertZeroCallUsedRegsKindToStr(Kind));
+ }
// Add NonLazyBind attribute to function declarations when -fno-plt
// is used.
@@ -2253,7 +2304,7 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
getLangOpts().Sanitize.has(SanitizerKind::Return);
// Determine if the return type could be partially undef
- if (!CodeGenOpts.DisableNoundefAttrs && HasStrictReturn) {
+ if (CodeGenOpts.EnableNoundefAttrs && HasStrictReturn) {
if (!RetTy->isVoidType() && RetAI.getKind() != ABIArgInfo::Indirect &&
DetermineNoUndef(RetTy, getTypes(), DL, RetAI))
RetAttrs.addAttribute(llvm::Attribute::NoUndef);
@@ -2387,7 +2438,7 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
}
// Decide whether the argument we're handling could be partially undef
- if (!CodeGenOpts.DisableNoundefAttrs &&
+ if (CodeGenOpts.EnableNoundefAttrs &&
DetermineNoUndef(ParamType, getTypes(), DL, AI)) {
Attrs.addAttribute(llvm::Attribute::NoUndef);
}
@@ -2485,6 +2536,20 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
}
}
+ // From OpenCL spec v3.0.10 section 6.3.5 Alignment of Types:
+ // > For arguments to a __kernel function declared to be a pointer to a
+ // > data type, the OpenCL compiler can assume that the pointee is always
+ // > appropriately aligned as required by the data type.
+ if (TargetDecl && TargetDecl->hasAttr<OpenCLKernelAttr>() &&
+ ParamType->isPointerType()) {
+ QualType PTy = ParamType->getPointeeType();
+ if (!PTy->isIncompleteType() && PTy->isConstantSizeType()) {
+ llvm::Align Alignment =
+ getNaturalPointeeTypeAlignment(ParamType).getAsAlign();
+ Attrs.addAlignmentAttr(Alignment);
+ }
+ }
+
switch (FI.getExtParameterInfo(ArgNo).getABI()) {
case ParameterABI::Ordinary:
break;
@@ -2632,7 +2697,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
Address ArgStruct = Address::invalid();
if (IRFunctionArgs.hasInallocaArg()) {
ArgStruct = Address(Fn->getArg(IRFunctionArgs.getInallocaArgNo()),
- FI.getArgStructAlignment());
+ FI.getArgStruct(), FI.getArgStructAlignment());
assert(ArgStruct.getType() == FI.getArgStruct()->getPointerTo());
}
@@ -2682,7 +2747,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
Address V =
Builder.CreateStructGEP(ArgStruct, FieldIndex, Arg->getName());
if (ArgI.getInAllocaIndirect())
- V = Address(Builder.CreateLoad(V),
+ V = Address(Builder.CreateLoad(V), ConvertTypeForMem(Ty),
getContext().getTypeAlignInChars(Ty));
ArgVals.push_back(ParamValue::forIndirect(V));
break;
@@ -2831,7 +2896,8 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
assert(pointeeTy->isPointerType());
Address temp =
CreateMemTemp(pointeeTy, getPointerAlign(), "swifterror.temp");
- Address arg = Address(V, getContext().getTypeAlignInChars(pointeeTy));
+ Address arg(V, ConvertTypeForMem(pointeeTy),
+ getContext().getTypeAlignInChars(pointeeTy));
llvm::Value *incomingErrorValue = Builder.CreateLoad(arg);
Builder.CreateStore(incomingErrorValue, temp);
V = temp.getPointer();
@@ -2864,8 +2930,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
// VLST arguments are coerced to VLATs at the function boundary for
// ABI consistency. If this is a VLST that was coerced to
// a VLAT at the function boundary and the types match up, use
- // llvm.experimental.vector.extract to convert back to the original
- // VLST.
+ // llvm.vector.extract to convert back to the original VLST.
if (auto *VecTyTo = dyn_cast<llvm::FixedVectorType>(ConvertType(Ty))) {
llvm::Value *Coerced = Fn->getArg(FirstIRArg);
if (auto *VecTyFrom =
@@ -3172,7 +3237,8 @@ static llvm::StoreInst *findDominatingStoreToReturnValue(CodeGenFunction &CGF) {
// ReturnValue to some other location.
auto GetStoreIfValid = [&CGF](llvm::User *U) -> llvm::StoreInst * {
auto *SI = dyn_cast<llvm::StoreInst>(U);
- if (!SI || SI->getPointerOperand() != CGF.ReturnValue.getPointer())
+ if (!SI || SI->getPointerOperand() != CGF.ReturnValue.getPointer() ||
+ SI->getValueOperand()->getType() != CGF.ReturnValue.getElementType())
return nullptr;
// These aren't actually possible for non-coerced returns, and we
// only care about non-coerced returns on this code path.
@@ -3186,28 +3252,19 @@ static llvm::StoreInst *findDominatingStoreToReturnValue(CodeGenFunction &CGF) {
if (!CGF.ReturnValue.getPointer()->hasOneUse()) {
llvm::BasicBlock *IP = CGF.Builder.GetInsertBlock();
if (IP->empty()) return nullptr;
- llvm::Instruction *I = &IP->back();
-
- // Skip lifetime markers
- for (llvm::BasicBlock::reverse_iterator II = IP->rbegin(),
- IE = IP->rend();
- II != IE; ++II) {
- if (llvm::IntrinsicInst *Intrinsic =
- dyn_cast<llvm::IntrinsicInst>(&*II)) {
- if (Intrinsic->getIntrinsicID() == llvm::Intrinsic::lifetime_end) {
- const llvm::Value *CastAddr = Intrinsic->getArgOperand(1);
- ++II;
- if (II == IE)
- break;
- if (isa<llvm::BitCastInst>(&*II) && (CastAddr == &*II))
- continue;
- }
- }
- I = &*II;
- break;
- }
- return GetStoreIfValid(I);
+ // Look at directly preceding instruction, skipping bitcasts and lifetime
+ // markers.
+ for (llvm::Instruction &I : make_range(IP->rbegin(), IP->rend())) {
+ if (isa<llvm::BitCastInst>(&I))
+ continue;
+ if (auto *II = dyn_cast<llvm::IntrinsicInst>(&I))
+ if (II->getIntrinsicID() == llvm::Intrinsic::lifetime_end)
+ continue;
+
+ return GetStoreIfValid(&I);
+ }
+ return nullptr;
}
llvm::StoreInst *store =
@@ -3408,7 +3465,7 @@ llvm::Value *CodeGenFunction::EmitCMSEClearRecord(llvm::Value *Src,
int CharsPerElt =
ATy->getArrayElementType()->getScalarSizeInBits() / CharWidth;
int MaskIndex = 0;
- llvm::Value *R = llvm::UndefValue::get(ATy);
+ llvm::Value *R = llvm::PoisonValue::get(ATy);
for (int I = 0, N = ATy->getArrayNumElements(); I != N; ++I) {
uint64_t Mask = buildMultiCharMask(Bits, MaskIndex, CharsPerElt, CharWidth,
DataLayout.isBigEndian());
@@ -3457,8 +3514,7 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
--EI;
llvm::Value *ArgStruct = &*EI;
llvm::Value *SRet = Builder.CreateStructGEP(
- EI->getType()->getPointerElementType(), ArgStruct,
- RetAI.getInAllocaFieldIndex());
+ FI.getArgStruct(), ArgStruct, RetAI.getInAllocaFieldIndex());
llvm::Type *Ty =
cast<llvm::GetElementPtrInst>(SRet)->getResultElementType();
RV = Builder.CreateAlignedLoad(Ty, SRet, getPointerAlign(), "sret");
@@ -3583,7 +3639,7 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
// Construct a return type that lacks padding elements.
llvm::Type *returnType = RetAI.getUnpaddedCoerceAndExpandType();
- RV = llvm::UndefValue::get(returnType);
+ RV = llvm::PoisonValue::get(returnType);
for (unsigned i = 0, e = results.size(); i != e; ++i) {
RV = Builder.CreateInsertValue(RV, results[i], i);
}
@@ -3690,14 +3746,14 @@ static AggValueSlot createPlaceholderSlot(CodeGenFunction &CGF,
// placeholders.
llvm::Type *IRTy = CGF.ConvertTypeForMem(Ty);
llvm::Type *IRPtrTy = IRTy->getPointerTo();
- llvm::Value *Placeholder = llvm::UndefValue::get(IRPtrTy->getPointerTo());
+ llvm::Value *Placeholder = llvm::PoisonValue::get(IRPtrTy->getPointerTo());
// FIXME: When we generate this IR in one pass, we shouldn't need
// this win32-specific alignment hack.
CharUnits Align = CharUnits::fromQuantity(4);
Placeholder = CGF.Builder.CreateAlignedLoad(IRPtrTy, Placeholder, Align);
- return AggValueSlot::forAddr(Address(Placeholder, Align),
+ return AggValueSlot::forAddr(Address(Placeholder, IRTy, Align),
Ty.getQualifiers(),
AggValueSlot::IsNotDestructed,
AggValueSlot::DoesNotNeedGCBarriers,
@@ -3880,7 +3936,9 @@ static void emitWritebackArg(CodeGenFunction &CGF, CallArgList &args,
// because of the crazy ObjC compatibility rules.
llvm::PointerType *destType =
- cast<llvm::PointerType>(CGF.ConvertType(CRE->getType()));
+ cast<llvm::PointerType>(CGF.ConvertType(CRE->getType()));
+ llvm::Type *destElemType =
+ CGF.ConvertTypeForMem(CRE->getType()->getPointeeType());
// If the address is a constant null, just pass the appropriate null.
if (isProvablyNull(srcAddr.getPointer())) {
@@ -3890,8 +3948,8 @@ static void emitWritebackArg(CodeGenFunction &CGF, CallArgList &args,
}
// Create the temporary.
- Address temp = CGF.CreateTempAlloca(destType->getPointerElementType(),
- CGF.getPointerAlign(), "icr.temp");
+ Address temp =
+ CGF.CreateTempAlloca(destElemType, CGF.getPointerAlign(), "icr.temp");
// Loading an l-value can introduce a cleanup if the l-value is __weak,
// and that cleanup will be conditional if we can't prove that the l-value
// isn't null, so we need to register a dominating point so that the cleanups
@@ -3901,8 +3959,8 @@ static void emitWritebackArg(CodeGenFunction &CGF, CallArgList &args,
// Zero-initialize it if we're not doing a copy-initialization.
bool shouldCopy = CRE->shouldCopy();
if (!shouldCopy) {
- llvm::Value *null = llvm::ConstantPointerNull::get(
- cast<llvm::PointerType>(destType->getPointerElementType()));
+ llvm::Value *null =
+ llvm::ConstantPointerNull::get(cast<llvm::PointerType>(destElemType));
CGF.Builder.CreateStore(null, temp);
}
@@ -3944,8 +4002,7 @@ static void emitWritebackArg(CodeGenFunction &CGF, CallArgList &args,
assert(srcRV.isScalar());
llvm::Value *src = srcRV.getScalarVal();
- src = CGF.Builder.CreateBitCast(src, destType->getPointerElementType(),
- "icr.cast");
+ src = CGF.Builder.CreateBitCast(src, destElemType, "icr.cast");
// Use an ordinary store, not a store-to-lvalue.
CGF.Builder.CreateStore(src, temp);
@@ -4341,7 +4398,7 @@ void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E,
type);
// This unreachable is a temporary marker which will be removed later.
llvm::Instruction *IsActive = Builder.CreateUnreachable();
- args.addArgCleanupDeactivation(EHStack.getInnermostEHScope(), IsActive);
+ args.addArgCleanupDeactivation(EHStack.stable_begin(), IsActive);
}
return;
}
@@ -4613,6 +4670,19 @@ public:
} // namespace
+static unsigned getMaxVectorWidth(const llvm::Type *Ty) {
+ if (auto *VT = dyn_cast<llvm::VectorType>(Ty))
+ return VT->getPrimitiveSizeInBits().getKnownMinSize();
+ if (auto *AT = dyn_cast<llvm::ArrayType>(Ty))
+ return getMaxVectorWidth(AT->getElementType());
+
+ unsigned MaxVectorWidth = 0;
+ if (auto *ST = dyn_cast<llvm::StructType>(Ty))
+ for (auto *I : ST->elements())
+ MaxVectorWidth = std::max(MaxVectorWidth, getMaxVectorWidth(I));
+ return MaxVectorWidth;
+}
+
RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
const CGCallee &Callee,
ReturnValueSlot ReturnValue,
@@ -4687,7 +4757,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
AI->setAlignment(Align.getAsAlign());
AI->setUsedWithInAlloca(true);
assert(AI->isUsedWithInAlloca() && !AI->isStaticAlloca());
- ArgMemory = Address(AI, Align);
+ ArgMemory = Address(AI, ArgStruct, Align);
}
ClangToLLVMArgMapping IRFunctionArgs(CGM.getContext(), CallInfo);
@@ -4785,13 +4855,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// Store the RValue into the argument struct.
Address Addr =
Builder.CreateStructGEP(ArgMemory, ArgInfo.getInAllocaFieldIndex());
- unsigned AS = Addr.getType()->getPointerAddressSpace();
- llvm::Type *MemType = ConvertTypeForMem(I->Ty)->getPointerTo(AS);
// There are some cases where a trivial bitcast is not avoidable. The
// definition of a type later in a translation unit may change it's type
// from {}* to (%struct.foo*)*.
- if (Addr.getType() != MemType)
- Addr = Builder.CreateBitCast(Addr, MemType);
+ Addr = Builder.CreateElementBitCast(Addr, ConvertTypeForMem(I->Ty));
I->copyInto(*this, Addr);
}
break;
@@ -4914,8 +4981,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
assert(!swiftErrorTemp.isValid() && "multiple swifterror args");
QualType pointeeTy = I->Ty->getPointeeType();
- swiftErrorArg =
- Address(V, getContext().getTypeAlignInChars(pointeeTy));
+ swiftErrorArg = Address(V, ConvertTypeForMem(pointeeTy),
+ getContext().getTypeAlignInChars(pointeeTy));
swiftErrorTemp =
CreateMemTemp(pointeeTy, getPointerAlign(), "swifterror.temp");
@@ -5083,14 +5150,16 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
#ifndef NDEBUG
// Assert that these structs have equivalent element types.
llvm::StructType *FullTy = CallInfo.getArgStruct();
- llvm::StructType *DeclaredTy =
- cast<llvm::StructType>(LastParamTy->getPointerElementType());
- assert(DeclaredTy->getNumElements() == FullTy->getNumElements());
- for (llvm::StructType::element_iterator DI = DeclaredTy->element_begin(),
- DE = DeclaredTy->element_end(),
- FI = FullTy->element_begin();
- DI != DE; ++DI, ++FI)
- assert(*DI == *FI);
+ if (!LastParamTy->isOpaquePointerTy()) {
+ llvm::StructType *DeclaredTy = cast<llvm::StructType>(
+ LastParamTy->getNonOpaquePointerElementType());
+ assert(DeclaredTy->getNumElements() == FullTy->getNumElements());
+ for (auto DI = DeclaredTy->element_begin(),
+ DE = DeclaredTy->element_end(),
+ FI = FullTy->element_begin();
+ DI != DE; ++DI, ++FI)
+ assert(*DI == *FI);
+ }
#endif
Arg = Builder.CreateBitCast(Arg, LastParamTy);
}
@@ -5167,12 +5236,9 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
#endif
// Update the largest vector width if any arguments have vector types.
- for (unsigned i = 0; i < IRCallArgs.size(); ++i) {
- if (auto *VT = dyn_cast<llvm::VectorType>(IRCallArgs[i]->getType()))
- LargestVectorWidth =
- std::max((uint64_t)LargestVectorWidth,
- VT->getPrimitiveSizeInBits().getKnownMinSize());
- }
+ for (unsigned i = 0; i < IRCallArgs.size(); ++i)
+ LargestVectorWidth = std::max(LargestVectorWidth,
+ getMaxVectorWidth(IRCallArgs[i]->getType()));
// Compute the calling convention and attributes.
unsigned CallingConv;
@@ -5191,12 +5257,22 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
if (InNoMergeAttributedStmt)
Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::NoMerge);
+ // Add call-site noinline attribute if exists.
+ if (InNoInlineAttributedStmt)
+ Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::NoInline);
+
+ // Add call-site always_inline attribute if exists.
+ if (InAlwaysInlineAttributedStmt)
+ Attrs =
+ Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::AlwaysInline);
+
// Apply some call-site-specific attributes.
// TODO: work this into building the attribute set.
// Apply always_inline to all calls within flatten functions.
// FIXME: should this really take priority over __try, below?
if (CurCodeDecl && CurCodeDecl->hasAttr<FlattenAttr>() &&
+ !InNoInlineAttributedStmt &&
!(TargetDecl && TargetDecl->hasAttr<NoInlineAttr>())) {
Attrs =
Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::AlwaysInline);
@@ -5284,10 +5360,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
CI->setName("call");
// Update largest vector width from the return type.
- if (auto *VT = dyn_cast<llvm::VectorType>(CI->getType()))
- LargestVectorWidth =
- std::max((uint64_t)LargestVectorWidth,
- VT->getPrimitiveSizeInBits().getKnownMinSize());
+ LargestVectorWidth =
+ std::max(LargestVectorWidth, getMaxVectorWidth(CI->getType()));
// Insert instrumentation or attach profile metadata at indirect call sites.
// For more details, see the comment before the definition of