aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2024-09-22 09:37:02 +0000
committerDimitry Andric <dim@FreeBSD.org>2024-09-22 09:37:02 +0000
commit1de139fdd502e836552eba1049a42b45514d8f7b (patch)
tree9418bf14cdd3b0ea4bf5dbe50ee2ec4550264ddc
parent7432c96084d72c631f1e5ddf1cc2e24fd9c92482 (diff)
Vendor import of llvm-project branch release/19.x llvmorg-19.1.0-0-ga4bf6cd7cfb1, a.k.a. 19.1.0 release.vendor/llvm-project/llvmorg-19.1.0-0-ga4bf6cd7cfb1
-rw-r--r--clang/lib/Basic/Targets/X86.cpp4
-rw-r--r--clang/lib/CodeGen/CGCall.cpp146
-rw-r--r--clang/lib/CodeGen/CGExprAgg.cpp23
-rw-r--r--clang/lib/CodeGen/CGStmt.cpp2
-rw-r--r--clang/lib/CodeGen/CodeGenFunction.h7
-rw-r--r--clang/lib/Format/UnwrappedLineParser.cpp6
-rw-r--r--clang/lib/Sema/SemaExpr.cpp21
-rw-r--r--clang/lib/Sema/SemaExprCXX.cpp3
-rw-r--r--clang/lib/Sema/SemaLambda.cpp1
-rw-r--r--clang/lib/Sema/SemaLookup.cpp2
-rw-r--r--clang/lib/StaticAnalyzer/Core/ExprEngine.cpp5
-rw-r--r--compiler-rt/lib/builtins/cpu_model/x86.c20
-rw-r--r--compiler-rt/lib/builtins/divtc3.c2
-rw-r--r--compiler-rt/lib/builtins/multc3.c2
-rw-r--r--libcxx/include/chrono2
-rw-r--r--lld/ELF/Arch/Hexagon.cpp8
-rw-r--r--llvm/include/llvm/TargetParser/X86TargetParser.def3
-rw-r--r--llvm/include/llvm/TargetParser/X86TargetParser.h1
-rw-r--r--llvm/lib/CodeGen/ModuloSchedule.cpp3
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp4
-rw-r--r--llvm/lib/IR/BasicBlock.cpp12
-rw-r--r--llvm/lib/Target/AArch64/AArch64FrameLowering.cpp33
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp30
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp9
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.cpp2
-rw-r--r--llvm/lib/Target/X86/X86.td15
-rw-r--r--llvm/lib/Target/X86/X86PfmCounters.td1
-rw-r--r--llvm/lib/TargetParser/Host.cpp19
-rw-r--r--llvm/lib/TargetParser/X86TargetParser.cpp5
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp8
31 files changed, 227 insertions, 176 deletions
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index 18e6dbf03e00..072c97e6c8c6 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -723,6 +723,9 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
case CK_ZNVER4:
defineCPUMacros(Builder, "znver4");
break;
+ case CK_ZNVER5:
+ defineCPUMacros(Builder, "znver5");
+ break;
case CK_Geode:
defineCPUMacros(Builder, "geode");
break;
@@ -1613,6 +1616,7 @@ std::optional<unsigned> X86TargetInfo::getCPUCacheLineSize() const {
case CK_ZNVER2:
case CK_ZNVER3:
case CK_ZNVER4:
+ case CK_ZNVER5:
// Deprecated
case CK_x86_64:
case CK_x86_64_v2:
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 6e69e84a2344..d7ebffa8c5e4 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -1336,75 +1336,50 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty,
return CGF.Builder.CreateLoad(Tmp);
}
-// Function to store a first-class aggregate into memory. We prefer to
-// store the elements rather than the aggregate to be more friendly to
-// fast-isel.
-// FIXME: Do we need to recurse here?
-void CodeGenFunction::EmitAggregateStore(llvm::Value *Val, Address Dest,
- bool DestIsVolatile) {
- // Prefer scalar stores to first-class aggregate stores.
- if (llvm::StructType *STy = dyn_cast<llvm::StructType>(Val->getType())) {
- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
- Address EltPtr = Builder.CreateStructGEP(Dest, i);
- llvm::Value *Elt = Builder.CreateExtractValue(Val, i);
- Builder.CreateStore(Elt, EltPtr, DestIsVolatile);
- }
- } else {
- Builder.CreateStore(Val, Dest, DestIsVolatile);
- }
-}
-
-/// CreateCoercedStore - Create a store to \arg DstPtr from \arg Src,
-/// where the source and destination may have different types. The
-/// destination is known to be aligned to \arg DstAlign bytes.
-///
-/// This safely handles the case when the src type is larger than the
-/// destination type; the upper bits of the src will be lost.
-static void CreateCoercedStore(llvm::Value *Src,
- Address Dst,
- bool DstIsVolatile,
- CodeGenFunction &CGF) {
- llvm::Type *SrcTy = Src->getType();
- llvm::Type *DstTy = Dst.getElementType();
- if (SrcTy == DstTy) {
- CGF.Builder.CreateStore(Src, Dst, DstIsVolatile);
- return;
- }
-
- llvm::TypeSize SrcSize = CGF.CGM.getDataLayout().getTypeAllocSize(SrcTy);
-
- if (llvm::StructType *DstSTy = dyn_cast<llvm::StructType>(DstTy)) {
- Dst = EnterStructPointerForCoercedAccess(Dst, DstSTy,
- SrcSize.getFixedValue(), CGF);
- DstTy = Dst.getElementType();
- }
-
- llvm::PointerType *SrcPtrTy = llvm::dyn_cast<llvm::PointerType>(SrcTy);
- llvm::PointerType *DstPtrTy = llvm::dyn_cast<llvm::PointerType>(DstTy);
- if (SrcPtrTy && DstPtrTy &&
- SrcPtrTy->getAddressSpace() != DstPtrTy->getAddressSpace()) {
- Src = CGF.Builder.CreateAddrSpaceCast(Src, DstTy);
- CGF.Builder.CreateStore(Src, Dst, DstIsVolatile);
+void CodeGenFunction::CreateCoercedStore(llvm::Value *Src, Address Dst,
+ llvm::TypeSize DstSize,
+ bool DstIsVolatile) {
+ if (!DstSize)
return;
- }
- // If the source and destination are integer or pointer types, just do an
- // extension or truncation to the desired type.
- if ((isa<llvm::IntegerType>(SrcTy) || isa<llvm::PointerType>(SrcTy)) &&
- (isa<llvm::IntegerType>(DstTy) || isa<llvm::PointerType>(DstTy))) {
- Src = CoerceIntOrPtrToIntOrPtr(Src, DstTy, CGF);
- CGF.Builder.CreateStore(Src, Dst, DstIsVolatile);
- return;
+ llvm::Type *SrcTy = Src->getType();
+ llvm::TypeSize SrcSize = CGM.getDataLayout().getTypeAllocSize(SrcTy);
+
+ // GEP into structs to try to make types match.
+ // FIXME: This isn't really that useful with opaque types, but it impacts a
+ // lot of regression tests.
+ if (SrcTy != Dst.getElementType()) {
+ if (llvm::StructType *DstSTy =
+ dyn_cast<llvm::StructType>(Dst.getElementType())) {
+ assert(!SrcSize.isScalable());
+ Dst = EnterStructPointerForCoercedAccess(Dst, DstSTy,
+ SrcSize.getFixedValue(), *this);
+ }
}
- llvm::TypeSize DstSize = CGF.CGM.getDataLayout().getTypeAllocSize(DstTy);
-
- // If store is legal, just bitcast the src pointer.
- if (isa<llvm::ScalableVectorType>(SrcTy) ||
- isa<llvm::ScalableVectorType>(DstTy) ||
- SrcSize.getFixedValue() <= DstSize.getFixedValue()) {
- Dst = Dst.withElementType(SrcTy);
- CGF.EmitAggregateStore(Src, Dst, DstIsVolatile);
+ if (SrcSize.isScalable() || SrcSize <= DstSize) {
+ if (SrcTy->isIntegerTy() && Dst.getElementType()->isPointerTy() &&
+ SrcSize == CGM.getDataLayout().getTypeAllocSize(Dst.getElementType())) {
+ // If the value is supposed to be a pointer, convert it before storing it.
+ Src = CoerceIntOrPtrToIntOrPtr(Src, Dst.getElementType(), *this);
+ Builder.CreateStore(Src, Dst, DstIsVolatile);
+ } else if (llvm::StructType *STy =
+ dyn_cast<llvm::StructType>(Src->getType())) {
+ // Prefer scalar stores to first-class aggregate stores.
+ Dst = Dst.withElementType(SrcTy);
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ Address EltPtr = Builder.CreateStructGEP(Dst, i);
+ llvm::Value *Elt = Builder.CreateExtractValue(Src, i);
+ Builder.CreateStore(Elt, EltPtr, DstIsVolatile);
+ }
+ } else {
+ Builder.CreateStore(Src, Dst.withElementType(SrcTy), DstIsVolatile);
+ }
+ } else if (SrcTy->isIntegerTy()) {
+ // If the source is a simple integer, coerce it directly.
+ llvm::Type *DstIntTy = Builder.getIntNTy(DstSize.getFixedValue() * 8);
+ Src = CoerceIntOrPtrToIntOrPtr(Src, DstIntTy, *this);
+ Builder.CreateStore(Src, Dst.withElementType(DstIntTy), DstIsVolatile);
} else {
// Otherwise do coercion through memory. This is stupid, but
// simple.
@@ -1416,12 +1391,12 @@ static void CreateCoercedStore(llvm::Value *Src,
// FIXME: Assert that we aren't truncating non-padding bits when have access
// to that information.
RawAddress Tmp =
- CreateTempAllocaForCoercion(CGF, SrcTy, Dst.getAlignment());
- CGF.Builder.CreateStore(Src, Tmp);
- CGF.Builder.CreateMemCpy(
- Dst.emitRawPointer(CGF), Dst.getAlignment().getAsAlign(),
- Tmp.getPointer(), Tmp.getAlignment().getAsAlign(),
- llvm::ConstantInt::get(CGF.IntPtrTy, DstSize.getFixedValue()));
+ CreateTempAllocaForCoercion(*this, SrcTy, Dst.getAlignment());
+ Builder.CreateStore(Src, Tmp);
+ Builder.CreateMemCpy(Dst.emitRawPointer(*this),
+ Dst.getAlignment().getAsAlign(), Tmp.getPointer(),
+ Tmp.getAlignment().getAsAlign(),
+ Builder.CreateTypeSize(IntPtrTy, DstSize));
}
}
@@ -3309,7 +3284,12 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
assert(NumIRArgs == 1);
auto AI = Fn->getArg(FirstIRArg);
AI->setName(Arg->getName() + ".coerce");
- CreateCoercedStore(AI, Ptr, /*DstIsVolatile=*/false, *this);
+ CreateCoercedStore(
+ AI, Ptr,
+ llvm::TypeSize::getFixed(
+ getContext().getTypeSizeInChars(Ty).getQuantity() -
+ ArgI.getDirectOffset()),
+ /*DstIsVolatile=*/false);
}
// Match to what EmitParmDecl is expecting for this type.
@@ -5939,17 +5919,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
llvm::Value *Imag = Builder.CreateExtractValue(CI, 1);
return RValue::getComplex(std::make_pair(Real, Imag));
}
- case TEK_Aggregate: {
- Address DestPtr = ReturnValue.getAddress();
- bool DestIsVolatile = ReturnValue.isVolatile();
-
- if (!DestPtr.isValid()) {
- DestPtr = CreateMemTemp(RetTy, "agg.tmp");
- DestIsVolatile = false;
- }
- EmitAggregateStore(CI, DestPtr, DestIsVolatile);
- return RValue::getAggregate(DestPtr);
- }
+ case TEK_Aggregate:
+ break;
case TEK_Scalar: {
// If the argument doesn't match, perform a bitcast to coerce it.
// This can happen due to trivial type mismatches.
@@ -5959,7 +5930,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
return RValue::get(V);
}
}
- llvm_unreachable("bad evaluation kind");
}
// If coercing a fixed vector from a scalable vector for ABI
@@ -5981,10 +5951,13 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
Address DestPtr = ReturnValue.getValue();
bool DestIsVolatile = ReturnValue.isVolatile();
+ uint64_t DestSize =
+ getContext().getTypeInfoDataSizeInChars(RetTy).Width.getQuantity();
if (!DestPtr.isValid()) {
DestPtr = CreateMemTemp(RetTy, "coerce");
DestIsVolatile = false;
+ DestSize = getContext().getTypeSizeInChars(RetTy).getQuantity();
}
// An empty record can overlap other data (if declared with
@@ -5993,7 +5966,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
if (!isEmptyRecord(getContext(), RetTy, true)) {
// If the value is offset in memory, apply the offset now.
Address StorePtr = emitAddressAtOffset(*this, DestPtr, RetAI);
- CreateCoercedStore(CI, StorePtr, DestIsVolatile, *this);
+ CreateCoercedStore(
+ CI, StorePtr,
+ llvm::TypeSize::getFixed(DestSize - RetAI.getDirectOffset()),
+ DestIsVolatile);
}
return convertTempToRValue(DestPtr, RetTy, SourceLocation());
diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp
index c3c10e73ff05..d9f44f4be617 100644
--- a/clang/lib/CodeGen/CGExprAgg.cpp
+++ b/clang/lib/CodeGen/CGExprAgg.cpp
@@ -131,15 +131,12 @@ public:
EnsureDest(E->getType());
if (llvm::Value *Result = ConstantEmitter(CGF).tryEmitConstantExpr(E)) {
- Address StoreDest = Dest.getAddress();
- // The emitted value is guaranteed to have the same size as the
- // destination but can have a different type. Just do a bitcast in this
- // case to avoid incorrect GEPs.
- if (Result->getType() != StoreDest.getType())
- StoreDest = StoreDest.withElementType(Result->getType());
-
- CGF.EmitAggregateStore(Result, StoreDest,
- E->getType().isVolatileQualified());
+ CGF.CreateCoercedStore(
+ Result, Dest.getAddress(),
+ llvm::TypeSize::getFixed(
+ Dest.getPreferredSize(CGF.getContext(), E->getType())
+ .getQuantity()),
+ E->getType().isVolatileQualified());
return;
}
return Visit(E->getSubExpr());
@@ -2050,6 +2047,10 @@ CodeGenFunction::getOverlapForFieldInit(const FieldDecl *FD) {
if (!FD->hasAttr<NoUniqueAddressAttr>() || !FD->getType()->isRecordType())
return AggValueSlot::DoesNotOverlap;
+ // Empty fields can overlap earlier fields.
+ if (FD->getType()->getAsCXXRecordDecl()->isEmpty())
+ return AggValueSlot::MayOverlap;
+
// If the field lies entirely within the enclosing class's nvsize, its tail
// padding cannot overlap any already-initialized object. (The only subobjects
// with greater addresses that might already be initialized are vbases.)
@@ -2072,6 +2073,10 @@ AggValueSlot::Overlap_t CodeGenFunction::getOverlapForBaseInit(
if (IsVirtual)
return AggValueSlot::MayOverlap;
+ // Empty bases can overlap earlier bases.
+ if (BaseRD->isEmpty())
+ return AggValueSlot::MayOverlap;
+
// If the base class is laid out entirely within the nvsize of the derived
// class, its tail padding cannot yet be initialized, so we can issue
// stores at the full width of the base class.
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index aa97f685ac7a..2f466602d2f6 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -745,7 +745,7 @@ void CodeGenFunction::EmitAttributedStmt(const AttributedStmt &S) {
} break;
case attr::CXXAssume: {
const Expr *Assumption = cast<CXXAssumeAttr>(A)->getAssumption();
- if (getLangOpts().CXXAssumptions &&
+ if (getLangOpts().CXXAssumptions && Builder.GetInsertBlock() &&
!Assumption->HasSideEffects(getContext())) {
llvm::Value *AssumptionVal = EvaluateExprAsBool(Assumption);
Builder.CreateAssumption(AssumptionVal);
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index ba7b565d9755..60e6841e1b3d 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4838,9 +4838,10 @@ public:
void EmitAggFinalDestCopy(QualType Type, AggValueSlot Dest, const LValue &Src,
ExprValueKind SrcKind);
- /// Build all the stores needed to initialize an aggregate at Dest with the
- /// value Val.
- void EmitAggregateStore(llvm::Value *Val, Address Dest, bool DestIsVolatile);
+ /// Create a store to \arg DstPtr from \arg Src, truncating the stored value
+ /// to at most \arg DstSize bytes.
+ void CreateCoercedStore(llvm::Value *Src, Address Dst, llvm::TypeSize DstSize,
+ bool DstIsVolatile);
/// EmitExtendGCLifetime - Given a pointer to an Objective-C object,
/// make sure it survives garbage collection until this point.
diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index 60e65aaa83e9..7813d86ff0ea 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -570,7 +570,8 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
Keywords.kw_as));
ProbablyBracedList =
- ProbablyBracedList || (IsCpp && NextTok->is(tok::l_paren));
+ ProbablyBracedList || (IsCpp && (PrevTok->Tok.isLiteral() ||
+ NextTok->is(tok::l_paren)));
// If there is a comma, semicolon or right paren after the closing
// brace, we assume this is a braced initializer list.
@@ -609,8 +610,9 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
ProbablyBracedList = NextTok->isNot(tok::l_square);
}
- // Cpp macro definition body containing nonempty braced list or block:
+ // Cpp macro definition body that is a nonempty braced list or block:
if (IsCpp && Line->InMacroBody && PrevTok != FormatTok &&
+ !FormatTok->Previous && NextTok->is(tok::eof) &&
// A statement can end with only `;` (simple statement), a block
// closing brace (compound statement), or `:` (label statement).
// If PrevTok is a block opening brace, Tok ends an empty block.
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index edb8b79a2220..f56ca398cda8 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -5430,11 +5430,24 @@ struct EnsureImmediateInvocationInDefaultArgs
// Rewrite to source location to refer to the context in which they are used.
ExprResult TransformSourceLocExpr(SourceLocExpr *E) {
- if (E->getParentContext() == SemaRef.CurContext)
+ DeclContext *DC = E->getParentContext();
+ if (DC == SemaRef.CurContext)
return E;
- return getDerived().RebuildSourceLocExpr(E->getIdentKind(), E->getType(),
- E->getBeginLoc(), E->getEndLoc(),
- SemaRef.CurContext);
+
+ // FIXME: During instantiation, because the rebuild of defaults arguments
+ // is not always done in the context of the template instantiator,
+ // we run the risk of producing a dependent source location
+ // that would never be rebuilt.
+ // This usually happens during overloadĀ resolution, or in contexts
+ // where the value of the source location does not matter.
+ // However, we should find a better way to deal with source location
+ // of function templates.
+ if (!SemaRef.CurrentInstantiationScope ||
+ !SemaRef.CurContext->isDependentContext() || DC->isDependentContext())
+ DC = SemaRef.CurContext;
+
+ return getDerived().RebuildSourceLocExpr(
+ E->getIdentKind(), E->getType(), E->getBeginLoc(), E->getEndLoc(), DC);
}
};
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index 14d1f395af90..de50786f4d6c 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -5140,7 +5140,8 @@ static bool HasNonDeletedDefaultedEqualityComparison(Sema &S,
// const ClassT& obj;
OpaqueValueExpr Operand(
- {}, Decl->getTypeForDecl()->getCanonicalTypeUnqualified().withConst(),
+ KeyLoc,
+ Decl->getTypeForDecl()->getCanonicalTypeUnqualified().withConst(),
ExprValueKind::VK_LValue);
UnresolvedSet<16> Functions;
// obj == obj;
diff --git a/clang/lib/Sema/SemaLambda.cpp b/clang/lib/Sema/SemaLambda.cpp
index 601077e9f333..809b94bb7412 100644
--- a/clang/lib/Sema/SemaLambda.cpp
+++ b/clang/lib/Sema/SemaLambda.cpp
@@ -1318,7 +1318,6 @@ void Sema::ActOnLambdaExpressionAfterIntroducer(LambdaIntroducer &Intro,
if (C->Init.isUsable()) {
addInitCapture(LSI, cast<VarDecl>(Var), C->Kind == LCK_ByRef);
- PushOnScopeChains(Var, CurScope, false);
} else {
TryCaptureKind Kind = C->Kind == LCK_ByRef ? TryCapture_ExplicitByRef
: TryCapture_ExplicitByVal;
diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp
index 7a6a64529f52..d3d4bf27ae72 100644
--- a/clang/lib/Sema/SemaLookup.cpp
+++ b/clang/lib/Sema/SemaLookup.cpp
@@ -570,7 +570,7 @@ void LookupResult::resolveKind() {
// For non-type declarations, check for a prior lookup result naming this
// canonical declaration.
- if (!D->isPlaceholderVar(getSema().getLangOpts()) && !ExistingI) {
+ if (!ExistingI) {
auto UniqueResult = Unique.insert(std::make_pair(D, I));
if (!UniqueResult.second) {
// We've seen this entity before.
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index 62a240ecbc60..c11468a08ae5 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -1928,6 +1928,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
case Stmt::CXXRewrittenBinaryOperatorClass:
case Stmt::RequiresExprClass:
case Expr::CXXParenListInitExprClass:
+ case Stmt::EmbedExprClass:
// Fall through.
// Cases we intentionally don't evaluate, since they don't need
@@ -2430,10 +2431,6 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
Bldr.addNodes(Dst);
break;
}
-
- case Stmt::EmbedExprClass:
- llvm::report_fatal_error("Support for EmbedExpr is not implemented.");
- break;
}
}
diff --git a/compiler-rt/lib/builtins/cpu_model/x86.c b/compiler-rt/lib/builtins/cpu_model/x86.c
index 867ed97e57bf..b1c4abd9d11d 100644
--- a/compiler-rt/lib/builtins/cpu_model/x86.c
+++ b/compiler-rt/lib/builtins/cpu_model/x86.c
@@ -59,6 +59,7 @@ enum ProcessorTypes {
INTEL_SIERRAFOREST,
INTEL_GRANDRIDGE,
INTEL_CLEARWATERFOREST,
+ AMDFAM1AH,
CPU_TYPE_MAX
};
@@ -97,6 +98,7 @@ enum ProcessorSubtypes {
INTEL_COREI7_ARROWLAKE,
INTEL_COREI7_ARROWLAKE_S,
INTEL_COREI7_PANTHERLAKE,
+ AMDFAM1AH_ZNVER5,
CPU_SUBTYPE_MAX
};
@@ -803,6 +805,24 @@ static const char *getAMDProcessorTypeAndSubtype(unsigned Family,
break; // "znver4"
}
break; // family 19h
+ case 26:
+ CPU = "znver5";
+ *Type = AMDFAM1AH;
+ if (Model <= 0x77) {
+ // Models 00h-0Fh (Breithorn).
+ // Models 10h-1Fh (Breithorn-Dense).
+ // Models 20h-2Fh (Strix 1).
+ // Models 30h-37h (Strix 2).
+ // Models 38h-3Fh (Strix 3).
+ // Models 40h-4Fh (Granite Ridge).
+ // Models 50h-5Fh (Weisshorn).
+ // Models 60h-6Fh (Krackan1).
+ // Models 70h-77h (Sarlak).
+ CPU = "znver5";
+ *Subtype = AMDFAM1AH_ZNVER5;
+ break; // "znver5"
+ }
+ break;
default:
break; // Unknown AMD CPU.
}
diff --git a/compiler-rt/lib/builtins/divtc3.c b/compiler-rt/lib/builtins/divtc3.c
index 099de5802daf..c393de815337 100644
--- a/compiler-rt/lib/builtins/divtc3.c
+++ b/compiler-rt/lib/builtins/divtc3.c
@@ -13,7 +13,7 @@
#define QUAD_PRECISION
#include "fp_lib.h"
-#if defined(CRT_HAS_F128)
+#if defined(CRT_HAS_128BIT) && defined(CRT_HAS_F128)
// Returns: the quotient of (a + ib) / (c + id)
diff --git a/compiler-rt/lib/builtins/multc3.c b/compiler-rt/lib/builtins/multc3.c
index 61a3f45e4727..a89832f0e883 100644
--- a/compiler-rt/lib/builtins/multc3.c
+++ b/compiler-rt/lib/builtins/multc3.c
@@ -15,7 +15,7 @@
#include "int_lib.h"
#include "int_math.h"
-#if defined(CRT_HAS_F128)
+#if defined(CRT_HAS_128BIT) && defined(CRT_HAS_F128)
// Returns: the product of a + ib and c + id
diff --git a/libcxx/include/chrono b/libcxx/include/chrono
index 990c415ec2e9..7bec5e5a26ef 100644
--- a/libcxx/include/chrono
+++ b/libcxx/include/chrono
@@ -1015,8 +1015,8 @@ constexpr chrono::year operator ""y(unsigned lo
# include <charconv>
# if !defined(_LIBCPP_HAS_NO_LOCALIZATION)
# include <locale>
+# include <ostream>
# endif
-# include <ostream>
#endif
#endif // _LIBCPP_CHRONO
diff --git a/lld/ELF/Arch/Hexagon.cpp b/lld/ELF/Arch/Hexagon.cpp
index 54821c299bde..abde3cd96491 100644
--- a/lld/ELF/Arch/Hexagon.cpp
+++ b/lld/ELF/Arch/Hexagon.cpp
@@ -60,17 +60,15 @@ Hexagon::Hexagon() {
}
uint32_t Hexagon::calcEFlags() const {
- assert(!ctx.objectFiles.empty());
-
// The architecture revision must always be equal to or greater than
// greatest revision in the list of inputs.
- uint32_t ret = 0;
+ std::optional<uint32_t> ret;
for (InputFile *f : ctx.objectFiles) {
uint32_t eflags = cast<ObjFile<ELF32LE>>(f)->getObj().getHeader().e_flags;
- if (eflags > ret)
+ if (!ret || eflags > *ret)
ret = eflags;
}
- return ret;
+ return ret.value_or(/* Default Arch Rev: */ 0x60);
}
static uint32_t applyMask(uint32_t mask, uint32_t data) {
diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.def b/llvm/include/llvm/TargetParser/X86TargetParser.def
index 92798cbe4b4c..008cf5381c12 100644
--- a/llvm/include/llvm/TargetParser/X86TargetParser.def
+++ b/llvm/include/llvm/TargetParser/X86TargetParser.def
@@ -49,11 +49,13 @@ X86_CPU_TYPE(ZHAOXIN_FAM7H, "zhaoxin_fam7h")
X86_CPU_TYPE(INTEL_SIERRAFOREST, "sierraforest")
X86_CPU_TYPE(INTEL_GRANDRIDGE, "grandridge")
X86_CPU_TYPE(INTEL_CLEARWATERFOREST, "clearwaterforest")
+X86_CPU_TYPE(AMDFAM1AH, "amdfam1ah")
// Alternate names supported by __builtin_cpu_is and target multiversioning.
X86_CPU_TYPE_ALIAS(INTEL_BONNELL, "atom")
X86_CPU_TYPE_ALIAS(AMDFAM10H, "amdfam10")
X86_CPU_TYPE_ALIAS(AMDFAM15H, "amdfam15")
+X86_CPU_TYPE_ALIAS(AMDFAM1AH, "amdfam1a")
X86_CPU_TYPE_ALIAS(INTEL_SILVERMONT, "slm")
#undef X86_CPU_TYPE_ALIAS
@@ -104,6 +106,7 @@ X86_CPU_SUBTYPE(INTEL_COREI7_GRANITERAPIDS_D,"graniterapids-d")
X86_CPU_SUBTYPE(INTEL_COREI7_ARROWLAKE, "arrowlake")
X86_CPU_SUBTYPE(INTEL_COREI7_ARROWLAKE_S, "arrowlake-s")
X86_CPU_SUBTYPE(INTEL_COREI7_PANTHERLAKE, "pantherlake")
+X86_CPU_SUBTYPE(AMDFAM1AH_ZNVER5, "znver5")
// Alternate names supported by __builtin_cpu_is and target multiversioning.
X86_CPU_SUBTYPE_ALIAS(INTEL_COREI7_ALDERLAKE, "raptorlake")
diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.h b/llvm/include/llvm/TargetParser/X86TargetParser.h
index 2083e585af4a..5468aaa81edb 100644
--- a/llvm/include/llvm/TargetParser/X86TargetParser.h
+++ b/llvm/include/llvm/TargetParser/X86TargetParser.h
@@ -147,6 +147,7 @@ enum CPUKind {
CK_x86_64_v3,
CK_x86_64_v4,
CK_Geode,
+ CK_ZNVER5,
};
/// Parse \p CPU string into a CPUKind. Will only accept 64-bit capable CPUs if
diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp
index 0f29ebe3ee79..b1a2bfaf7895 100644
--- a/llvm/lib/CodeGen/ModuloSchedule.cpp
+++ b/llvm/lib/CodeGen/ModuloSchedule.cpp
@@ -130,6 +130,7 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
// Generate the prolog instructions that set up the pipeline.
generateProlog(MaxStageCount, KernelBB, VRMap, PrologBBs);
MF.insert(BB->getIterator(), KernelBB);
+ LIS.insertMBBInMaps(KernelBB);
// Rearrange the instructions to generate the new, pipelined loop,
// and update register names as needed.
@@ -210,6 +211,7 @@ void ModuloScheduleExpander::generateProlog(unsigned LastStage,
NewBB->transferSuccessors(PredBB);
PredBB->addSuccessor(NewBB);
PredBB = NewBB;
+ LIS.insertMBBInMaps(NewBB);
// Generate instructions for each appropriate stage. Process instructions
// in original program order.
@@ -283,6 +285,7 @@ void ModuloScheduleExpander::generateEpilog(
PredBB->replaceSuccessor(LoopExitBB, NewBB);
NewBB->addSuccessor(LoopExitBB);
+ LIS.insertMBBInMaps(NewBB);
if (EpilogStart == LoopExitBB)
EpilogStart = NewBB;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index df3d207d85d3..b961d3bb1fec 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -1453,6 +1453,10 @@ void SelectionDAGISel::reportIPToStateForBlocks(MachineFunction *MF) {
if (BB->getFirstMayFaultInst()) {
// Report IP range only for blocks with Faulty inst
auto MBBb = MBB.getFirstNonPHI();
+
+ if (MBBb == MBB.end())
+ continue;
+
MachineInstr *MIb = &*MBBb;
if (MIb->isTerminator())
continue;
diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp
index 0a9498f051cb..46896d3cdf7d 100644
--- a/llvm/lib/IR/BasicBlock.cpp
+++ b/llvm/lib/IR/BasicBlock.cpp
@@ -975,8 +975,16 @@ void BasicBlock::spliceDebugInfoImpl(BasicBlock::iterator Dest, BasicBlock *Src,
if (ReadFromTail && Src->getMarker(Last)) {
DbgMarker *FromLast = Src->getMarker(Last);
if (LastIsEnd) {
- Dest->adoptDbgRecords(Src, Last, true);
- // adoptDbgRecords will release any trailers.
+ if (Dest == end()) {
+ // Abosrb the trailing markers from Src.
+ assert(FromLast == Src->getTrailingDbgRecords());
+ createMarker(Dest)->absorbDebugValues(*FromLast, true);
+ FromLast->eraseFromParent();
+ Src->deleteTrailingDbgRecords();
+ } else {
+ // adoptDbgRecords will release any trailers.
+ Dest->adoptDbgRecords(Src, Last, true);
+ }
assert(!Src->getTrailingDbgRecords());
} else {
// FIXME: can we use adoptDbgRecords here to reduce allocations?
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index ba46ededc63a..87e057a468af 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -2931,16 +2931,6 @@ struct RegPairInfo {
} // end anonymous namespace
-unsigned findFreePredicateReg(BitVector &SavedRegs) {
- for (unsigned PReg = AArch64::P8; PReg <= AArch64::P15; ++PReg) {
- if (SavedRegs.test(PReg)) {
- unsigned PNReg = PReg - AArch64::P0 + AArch64::PN0;
- return PNReg;
- }
- }
- return AArch64::NoRegister;
-}
-
static void computeCalleeSaveRegisterPairs(
MachineFunction &MF, ArrayRef<CalleeSavedInfo> CSI,
const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs,
@@ -3645,7 +3635,6 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
unsigned ExtraCSSpill = 0;
bool HasUnpairedGPR64 = false;
- bool HasPairZReg = false;
// Figure out which callee-saved registers to save/restore.
for (unsigned i = 0; CSRegs[i]; ++i) {
const unsigned Reg = CSRegs[i];
@@ -3699,28 +3688,6 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
!RegInfo->isReservedReg(MF, PairedReg))
ExtraCSSpill = PairedReg;
}
- // Check if there is a pair of ZRegs, so it can select PReg for spill/fill
- HasPairZReg |= (AArch64::ZPRRegClass.contains(Reg, CSRegs[i ^ 1]) &&
- SavedRegs.test(CSRegs[i ^ 1]));
- }
-
- if (HasPairZReg && (Subtarget.hasSVE2p1() || Subtarget.hasSME2())) {
- AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
- // Find a suitable predicate register for the multi-vector spill/fill
- // instructions.
- unsigned PnReg = findFreePredicateReg(SavedRegs);
- if (PnReg != AArch64::NoRegister)
- AFI->setPredicateRegForFillSpill(PnReg);
- // If no free callee-save has been found assign one.
- if (!AFI->getPredicateRegForFillSpill() &&
- MF.getFunction().getCallingConv() ==
- CallingConv::AArch64_SVE_VectorCall) {
- SavedRegs.set(AArch64::P8);
- AFI->setPredicateRegForFillSpill(AArch64::PN8);
- }
-
- assert(!RegInfo->isReservedReg(MF, AFI->getPredicateRegForFillSpill()) &&
- "Predicate cannot be a reserved register");
}
if (MF.getFunction().getCallingConv() == CallingConv::Win64 &&
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 377bcd5868fb..805684ef69a5 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -5144,10 +5144,6 @@ void AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
if (PNRReg.isValid() && !PNRReg.isVirtual())
MI.addDef(PNRReg, RegState::Implicit);
MI.addMemOperand(MMO);
-
- if (PNRReg.isValid() && PNRReg.isVirtual())
- BuildMI(MBB, MBBI, DebugLoc(), get(TargetOpcode::COPY), PNRReg)
- .addReg(DestReg);
}
bool llvm::isNZCVTouchedInInstructionRange(const MachineInstr &DefMI,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 39ae7c96cf77..a71c9453d968 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -4349,6 +4349,7 @@ AMDGPUTargetLowering::performMulLoHiCombine(SDNode *N,
SelectionDAG &DAG = DCI.DAG;
SDLoc DL(N);
+ bool Signed = N->getOpcode() == ISD::SMUL_LOHI;
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -4363,20 +4364,25 @@ AMDGPUTargetLowering::performMulLoHiCombine(SDNode *N,
// Try to use two fast 24-bit multiplies (one for each half of the result)
// instead of one slow extending multiply.
- unsigned LoOpcode, HiOpcode;
- if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) {
- N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32);
- N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32);
- LoOpcode = AMDGPUISD::MUL_U24;
- HiOpcode = AMDGPUISD::MULHI_U24;
- } else if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) {
- N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32);
- N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32);
- LoOpcode = AMDGPUISD::MUL_I24;
- HiOpcode = AMDGPUISD::MULHI_I24;
+ unsigned LoOpcode = 0;
+ unsigned HiOpcode = 0;
+ if (Signed) {
+ if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) {
+ N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32);
+ N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32);
+ LoOpcode = AMDGPUISD::MUL_I24;
+ HiOpcode = AMDGPUISD::MULHI_I24;
+ }
} else {
- return SDValue();
+ if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) {
+ N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32);
+ N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32);
+ LoOpcode = AMDGPUISD::MUL_U24;
+ HiOpcode = AMDGPUISD::MULHI_U24;
+ }
}
+ if (!LoOpcode)
+ return SDValue();
SDValue Lo = DAG.getNode(LoOpcode, DL, MVT::i32, N0, N1);
SDValue Hi = DAG.getNode(HiOpcode, DL, MVT::i32, N0, N1);
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 21cf4d9eeac1..758de9d732fa 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -9338,12 +9338,13 @@ SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
SDValue Op0 = Op->getOperand(0);
+ if (!Subtarget.isPPC64() || (Op0.getOpcode() != ISD::BUILD_PAIR) ||
+ (Op.getValueType() != MVT::f128))
+ return SDValue();
+
SDValue Lo = Op0.getOperand(0);
SDValue Hi = Op0.getOperand(1);
-
- if ((Op.getValueType() != MVT::f128) ||
- (Op0.getOpcode() != ISD::BUILD_PAIR) || (Lo.getValueType() != MVT::i64) ||
- (Hi.getValueType() != MVT::i64) || !Subtarget.isPPC64())
+ if ((Lo.getValueType() != MVT::i64) || (Hi.getValueType() != MVT::i64))
return SDValue();
if (!Subtarget.isLittleEndian())
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index ba3b4bd701d6..6c0cbeadebf4 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -2902,7 +2902,7 @@ RISCVInstrInfo::getOutliningTypeImpl(MachineBasicBlock::iterator &MBBI,
// if any possible.
if (MO.getTargetFlags() == RISCVII::MO_PCREL_LO &&
(MI.getMF()->getTarget().getFunctionSections() || F.hasComdat() ||
- F.hasSection()))
+ F.hasSection() || F.getSectionPrefix()))
return outliner::InstrType::Illegal;
}
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 9dafd5e628ca..e82e624f7099 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -1543,6 +1543,19 @@ def ProcessorFeatures {
FeatureVPOPCNTDQ];
list<SubtargetFeature> ZN4Features =
!listconcat(ZN3Features, ZN4AdditionalFeatures);
+
+
+ list<SubtargetFeature> ZN5Tuning = ZN4Tuning;
+ list<SubtargetFeature> ZN5AdditionalFeatures = [FeatureVNNI,
+ FeatureMOVDIRI,
+ FeatureMOVDIR64B,
+ FeatureVP2INTERSECT,
+ FeaturePREFETCHI,
+ FeatureAVXVNNI
+ ];
+ list<SubtargetFeature> ZN5Features =
+ !listconcat(ZN4Features, ZN5AdditionalFeatures);
+
}
//===----------------------------------------------------------------------===//
@@ -1892,6 +1905,8 @@ def : ProcModel<"znver3", Znver3Model, ProcessorFeatures.ZN3Features,
ProcessorFeatures.ZN3Tuning>;
def : ProcModel<"znver4", Znver4Model, ProcessorFeatures.ZN4Features,
ProcessorFeatures.ZN4Tuning>;
+def : ProcModel<"znver5", Znver4Model, ProcessorFeatures.ZN5Features,
+ ProcessorFeatures.ZN5Tuning>;
def : Proc<"geode", [FeatureX87, FeatureCX8, FeatureMMX, FeaturePRFCHW],
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
diff --git a/llvm/lib/Target/X86/X86PfmCounters.td b/llvm/lib/Target/X86/X86PfmCounters.td
index 2b1dac411c99..c30e989cdc2a 100644
--- a/llvm/lib/Target/X86/X86PfmCounters.td
+++ b/llvm/lib/Target/X86/X86PfmCounters.td
@@ -350,3 +350,4 @@ def ZnVer4PfmCounters : ProcPfmCounters {
let ValidationCounters = DefaultAMDPfmValidationCounters;
}
def : PfmCountersBinding<"znver4", ZnVer4PfmCounters>;
+def : PfmCountersBinding<"znver5", ZnVer4PfmCounters>;
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index 7e637cba4cfb..865b6a44adbb 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -1213,6 +1213,25 @@ static const char *getAMDProcessorTypeAndSubtype(unsigned Family,
break; // "znver4"
}
break; // family 19h
+ case 26:
+ CPU = "znver5";
+ *Type = X86::AMDFAM1AH;
+ if (Model <= 0x77) {
+ // Models 00h-0Fh (Breithorn).
+ // Models 10h-1Fh (Breithorn-Dense).
+ // Models 20h-2Fh (Strix 1).
+ // Models 30h-37h (Strix 2).
+ // Models 38h-3Fh (Strix 3).
+ // Models 40h-4Fh (Granite Ridge).
+ // Models 50h-5Fh (Weisshorn).
+ // Models 60h-6Fh (Krackan1).
+ // Models 70h-77h (Sarlak).
+ CPU = "znver5";
+ *Subtype = X86::AMDFAM1AH_ZNVER5;
+ break; // "znver5"
+ }
+ break;
+
default:
break; // Unknown AMD CPU.
}
diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index dcf9130052ac..a6f3b5ba5d33 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -238,6 +238,10 @@ static constexpr FeatureBitset FeaturesZNVER4 =
FeatureAVX512BITALG | FeatureAVX512VPOPCNTDQ | FeatureAVX512BF16 |
FeatureGFNI | FeatureSHSTK;
+static constexpr FeatureBitset FeaturesZNVER5 =
+ FeaturesZNVER4 | FeatureAVXVNNI | FeatureMOVDIRI | FeatureMOVDIR64B |
+ FeatureAVX512VP2INTERSECT | FeaturePREFETCHI | FeatureAVXVNNI;
+
// D151696 tranplanted Mangling and OnlyForCPUDispatchSpecific from
// X86TargetParser.def to here. They are assigned by following ways:
// 1. Copy the mangling from the original CPU_SPEICIFC MACROs. If no, assign
@@ -417,6 +421,7 @@ constexpr ProcInfo Processors[] = {
{ {"znver2"}, CK_ZNVER2, FEATURE_AVX2, FeaturesZNVER2, '\0', false },
{ {"znver3"}, CK_ZNVER3, FEATURE_AVX2, FeaturesZNVER3, '\0', false },
{ {"znver4"}, CK_ZNVER4, FEATURE_AVX512VBMI2, FeaturesZNVER4, '\0', false },
+ { {"znver5"}, CK_ZNVER5, FEATURE_AVX512VP2INTERSECT, FeaturesZNVER5, '\0', false },
// Generic 64-bit processor.
{ {"x86-64"}, CK_x86_64, FEATURE_SSE2 , FeaturesX86_64, '\0', false },
{ {"x86-64-v2"}, CK_x86_64_v2, FEATURE_SSE4_2 , FeaturesX86_64_V2, '\0', false },
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index cca9eeebaa53..ab2b96cdc42d 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -15211,7 +15211,8 @@ bool BoUpSLP::collectValuesToDemote(
if (any_of(E.Scalars, [&](Value *V) {
return !all_of(V->users(), [=](User *U) {
return getTreeEntry(U) ||
- (UserIgnoreList && UserIgnoreList->contains(U)) ||
+ (E.Idx == 0 && UserIgnoreList &&
+ UserIgnoreList->contains(U)) ||
(!isa<CmpInst>(U) && U->getType()->isSized() &&
!U->getType()->isScalableTy() &&
DL->getTypeSizeInBits(U->getType()) <= BitWidth);
@@ -15539,6 +15540,11 @@ void BoUpSLP::computeMinimumValueSizes() {
const TreeEntry *UserTE = E.UserTreeIndices.back().UserTE;
if (TE == UserTE || !TE)
return false;
+ if (!isa<CastInst, BinaryOperator, FreezeInst, PHINode,
+ SelectInst>(U) ||
+ !isa<CastInst, BinaryOperator, FreezeInst, PHINode,
+ SelectInst>(UserTE->getMainOp()))
+ return true;
unsigned UserTESz = DL->getTypeSizeInBits(
UserTE->Scalars.front()->getType());
auto It = MinBWs.find(TE);