src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2024-09-22 09:37:02 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2024-09-22 09:37:02 +0000
commit	1de139fdd502e836552eba1049a42b45514d8f7b (patch)
tree	9418bf14cdd3b0ea4bf5dbe50ee2ec4550264ddc
parent	7432c96084d72c631f1e5ddf1cc2e24fd9c92482 (diff)

Vendor import of llvm-project branch release/19.x llvmorg-19.1.0-0-ga4bf6cd7cfb1, a.k.a. 19.1.0 release.vendor/llvm-project/llvmorg-19.1.0-0-ga4bf6cd7cfb1

Diffstat

-rw-r--r--

clang/lib/Basic/Targets/X86.cpp

-rw-r--r--

clang/lib/CodeGen/CGCall.cpp

146

-rw-r--r--

clang/lib/CodeGen/CGExprAgg.cpp

-rw-r--r--

clang/lib/CodeGen/CGStmt.cpp

-rw-r--r--

clang/lib/CodeGen/CodeGenFunction.h

-rw-r--r--

clang/lib/Format/UnwrappedLineParser.cpp

-rw-r--r--

clang/lib/Sema/SemaExpr.cpp

-rw-r--r--

clang/lib/Sema/SemaExprCXX.cpp

-rw-r--r--

clang/lib/Sema/SemaLambda.cpp

-rw-r--r--

clang/lib/Sema/SemaLookup.cpp

-rw-r--r--

clang/lib/StaticAnalyzer/Core/ExprEngine.cpp

-rw-r--r--

compiler-rt/lib/builtins/cpu_model/x86.c

-rw-r--r--

compiler-rt/lib/builtins/divtc3.c

-rw-r--r--

compiler-rt/lib/builtins/multc3.c

-rw-r--r--

libcxx/include/chrono

-rw-r--r--

lld/ELF/Arch/Hexagon.cpp

-rw-r--r--

llvm/include/llvm/TargetParser/X86TargetParser.def

-rw-r--r--

llvm/include/llvm/TargetParser/X86TargetParser.h

-rw-r--r--

llvm/lib/CodeGen/ModuloSchedule.cpp

-rw-r--r--

llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp

-rw-r--r--

llvm/lib/IR/BasicBlock.cpp

-rw-r--r--

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

-rw-r--r--

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

-rw-r--r--

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

-rw-r--r--

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

-rw-r--r--

llvm/lib/Target/RISCV/RISCVInstrInfo.cpp

-rw-r--r--

llvm/lib/Target/X86/X86.td

-rw-r--r--

llvm/lib/Target/X86/X86PfmCounters.td

-rw-r--r--

llvm/lib/TargetParser/Host.cpp

-rw-r--r--

llvm/lib/TargetParser/X86TargetParser.cpp

-rw-r--r--

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

31 files changed, 227 insertions, 176 deletions

diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index 18e6dbf03e00..072c97e6c8c6 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp

@@ -723,6 +723,9 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,

case CK_ZNVER4:

defineCPUMacros(Builder, "znver4");

break;

+ case CK_ZNVER5:

+ defineCPUMacros(Builder, "znver5");

+ break;

case CK_Geode:

defineCPUMacros(Builder, "geode");

break;

@@ -1613,6 +1616,7 @@ std::optional<unsigned> X86TargetInfo::getCPUCacheLineSize() const {

case CK_ZNVER2:

case CK_ZNVER3:

case CK_ZNVER4:

+ case CK_ZNVER5:

// Deprecated

case CK_x86_64:

case CK_x86_64_v2:

diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 6e69e84a2344..d7ebffa8c5e4 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp

@@ -1336,75 +1336,50 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty,

return CGF.Builder.CreateLoad(Tmp);

}

-// Function to store a first-class aggregate into memory. We prefer to

-// store the elements rather than the aggregate to be more friendly to

-// fast-isel.

-// FIXME: Do we need to recurse here?

-void CodeGenFunction::EmitAggregateStore(llvm::Value *Val, Address Dest,

- bool DestIsVolatile) {

- // Prefer scalar stores to first-class aggregate stores.

- if (llvm::StructType *STy = dyn_cast<llvm::StructType>(Val->getType())) {

- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {

- Address EltPtr = Builder.CreateStructGEP(Dest, i);

- llvm::Value *Elt = Builder.CreateExtractValue(Val, i);

- Builder.CreateStore(Elt, EltPtr, DestIsVolatile);

- }

- } else {

- Builder.CreateStore(Val, Dest, DestIsVolatile);

- }

-/// CreateCoercedStore - Create a store to \arg DstPtr from \arg Src,

-/// where the source and destination may have different types. The

-/// destination is known to be aligned to \arg DstAlign bytes.

-///

-/// This safely handles the case when the src type is larger than the

-/// destination type; the upper bits of the src will be lost.

-static void CreateCoercedStore(llvm::Value *Src,

- Address Dst,

- bool DstIsVolatile,

- CodeGenFunction &CGF) {

- llvm::Type *SrcTy = Src->getType();

- llvm::Type *DstTy = Dst.getElementType();

- if (SrcTy == DstTy) {

- CGF.Builder.CreateStore(Src, Dst, DstIsVolatile);

- return;

- }

- llvm::TypeSize SrcSize = CGF.CGM.getDataLayout().getTypeAllocSize(SrcTy);

- if (llvm::StructType *DstSTy = dyn_cast<llvm::StructType>(DstTy)) {

- Dst = EnterStructPointerForCoercedAccess(Dst, DstSTy,

- SrcSize.getFixedValue(), CGF);

- DstTy = Dst.getElementType();

- }

- llvm::PointerType *SrcPtrTy = llvm::dyn_cast<llvm::PointerType>(SrcTy);

- llvm::PointerType *DstPtrTy = llvm::dyn_cast<llvm::PointerType>(DstTy);

- if (SrcPtrTy && DstPtrTy &&

- SrcPtrTy->getAddressSpace() != DstPtrTy->getAddressSpace()) {

- Src = CGF.Builder.CreateAddrSpaceCast(Src, DstTy);

- CGF.Builder.CreateStore(Src, Dst, DstIsVolatile);

+void CodeGenFunction::CreateCoercedStore(llvm::Value *Src, Address Dst,

+ llvm::TypeSize DstSize,

+ bool DstIsVolatile) {

+ if (!DstSize)

return;

- }

- // If the source and destination are integer or pointer types, just do an

- // extension or truncation to the desired type.

- if ((isa<llvm::IntegerType>(SrcTy) || isa<llvm::PointerType>(SrcTy)) &&

- (isa<llvm::IntegerType>(DstTy) || isa<llvm::PointerType>(DstTy))) {

- Src = CoerceIntOrPtrToIntOrPtr(Src, DstTy, CGF);

- CGF.Builder.CreateStore(Src, Dst, DstIsVolatile);

- return;

+ llvm::Type *SrcTy = Src->getType();

+ llvm::TypeSize SrcSize = CGM.getDataLayout().getTypeAllocSize(SrcTy);

+ // GEP into structs to try to make types match.

+ // FIXME: This isn't really that useful with opaque types, but it impacts a

+ // lot of regression tests.

+ if (SrcTy != Dst.getElementType()) {

+ if (llvm::StructType *DstSTy =

+ dyn_cast<llvm::StructType>(Dst.getElementType())) {

+ assert(!SrcSize.isScalable());

+ Dst = EnterStructPointerForCoercedAccess(Dst, DstSTy,

+ SrcSize.getFixedValue(), *this);

+ }

}

- llvm::TypeSize DstSize = CGF.CGM.getDataLayout().getTypeAllocSize(DstTy);

- // If store is legal, just bitcast the src pointer.

- if (isa<llvm::ScalableVectorType>(SrcTy) ||

- isa<llvm::ScalableVectorType>(DstTy) ||

- SrcSize.getFixedValue() <= DstSize.getFixedValue()) {

- Dst = Dst.withElementType(SrcTy);

- CGF.EmitAggregateStore(Src, Dst, DstIsVolatile);

+ if (SrcSize.isScalable() || SrcSize <= DstSize) {

+ if (SrcTy->isIntegerTy() && Dst.getElementType()->isPointerTy() &&

+ SrcSize == CGM.getDataLayout().getTypeAllocSize(Dst.getElementType())) {

+ // If the value is supposed to be a pointer, convert it before storing it.

+ Src = CoerceIntOrPtrToIntOrPtr(Src, Dst.getElementType(), *this);

+ Builder.CreateStore(Src, Dst, DstIsVolatile);

+ } else if (llvm::StructType *STy =

+ dyn_cast<llvm::StructType>(Src->getType())) {

+ // Prefer scalar stores to first-class aggregate stores.

+ Dst = Dst.withElementType(SrcTy);

+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {

+ Address EltPtr = Builder.CreateStructGEP(Dst, i);

+ llvm::Value *Elt = Builder.CreateExtractValue(Src, i);

+ Builder.CreateStore(Elt, EltPtr, DstIsVolatile);

+ }

+ } else {

+ Builder.CreateStore(Src, Dst.withElementType(SrcTy), DstIsVolatile);

+ }

+ } else if (SrcTy->isIntegerTy()) {

+ // If the source is a simple integer, coerce it directly.

+ llvm::Type *DstIntTy = Builder.getIntNTy(DstSize.getFixedValue() * 8);

+ Src = CoerceIntOrPtrToIntOrPtr(Src, DstIntTy, *this);

+ Builder.CreateStore(Src, Dst.withElementType(DstIntTy), DstIsVolatile);

} else {

// Otherwise do coercion through memory. This is stupid, but

// simple.

@@ -1416,12 +1391,12 @@ static void CreateCoercedStore(llvm::Value *Src,

// FIXME: Assert that we aren't truncating non-padding bits when have access

// to that information.

RawAddress Tmp =

- CreateTempAllocaForCoercion(CGF, SrcTy, Dst.getAlignment());

- CGF.Builder.CreateStore(Src, Tmp);

- CGF.Builder.CreateMemCpy(

- Dst.emitRawPointer(CGF), Dst.getAlignment().getAsAlign(),

- Tmp.getPointer(), Tmp.getAlignment().getAsAlign(),

- llvm::ConstantInt::get(CGF.IntPtrTy, DstSize.getFixedValue()));

+ CreateTempAllocaForCoercion(*this, SrcTy, Dst.getAlignment());

+ Builder.CreateStore(Src, Tmp);

+ Builder.CreateMemCpy(Dst.emitRawPointer(*this),

+ Dst.getAlignment().getAsAlign(), Tmp.getPointer(),

+ Tmp.getAlignment().getAsAlign(),

+ Builder.CreateTypeSize(IntPtrTy, DstSize));

}

@@ -3309,7 +3284,12 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,

assert(NumIRArgs == 1);

auto AI = Fn->getArg(FirstIRArg);

AI->setName(Arg->getName() + ".coerce");

- CreateCoercedStore(AI, Ptr, /*DstIsVolatile=*/false, *this);

+ CreateCoercedStore(

+ AI, Ptr,

+ llvm::TypeSize::getFixed(

+ getContext().getTypeSizeInChars(Ty).getQuantity() -

+ ArgI.getDirectOffset()),

+ /*DstIsVolatile=*/false);

}

// Match to what EmitParmDecl is expecting for this type.

@@ -5939,17 +5919,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,

llvm::Value *Imag = Builder.CreateExtractValue(CI, 1);

return RValue::getComplex(std::make_pair(Real, Imag));

}

- case TEK_Aggregate: {

- Address DestPtr = ReturnValue.getAddress();

- bool DestIsVolatile = ReturnValue.isVolatile();

- if (!DestPtr.isValid()) {

- DestPtr = CreateMemTemp(RetTy, "agg.tmp");

- DestIsVolatile = false;

- }

- EmitAggregateStore(CI, DestPtr, DestIsVolatile);

- return RValue::getAggregate(DestPtr);

- }

+ case TEK_Aggregate:

+ break;

case TEK_Scalar: {

// If the argument doesn't match, perform a bitcast to coerce it.

// This can happen due to trivial type mismatches.

@@ -5959,7 +5930,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,

return RValue::get(V);

}

- llvm_unreachable("bad evaluation kind");

}

// If coercing a fixed vector from a scalable vector for ABI

@@ -5981,10 +5951,13 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,

Address DestPtr = ReturnValue.getValue();

bool DestIsVolatile = ReturnValue.isVolatile();

+ uint64_t DestSize =

+ getContext().getTypeInfoDataSizeInChars(RetTy).Width.getQuantity();

if (!DestPtr.isValid()) {

DestPtr = CreateMemTemp(RetTy, "coerce");

DestIsVolatile = false;

+ DestSize = getContext().getTypeSizeInChars(RetTy).getQuantity();

}

// An empty record can overlap other data (if declared with

@@ -5993,7 +5966,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,

if (!isEmptyRecord(getContext(), RetTy, true)) {

// If the value is offset in memory, apply the offset now.

Address StorePtr = emitAddressAtOffset(*this, DestPtr, RetAI);

- CreateCoercedStore(CI, StorePtr, DestIsVolatile, *this);

+ CreateCoercedStore(

+ CI, StorePtr,

+ llvm::TypeSize::getFixed(DestSize - RetAI.getDirectOffset()),

+ DestIsVolatile);

}

return convertTempToRValue(DestPtr, RetTy, SourceLocation());

diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp
index c3c10e73ff05..d9f44f4be617 100644
--- a/clang/lib/CodeGen/CGExprAgg.cpp
+++ b/clang/lib/CodeGen/CGExprAgg.cpp

@@ -131,15 +131,12 @@ public:

EnsureDest(E->getType());

if (llvm::Value *Result = ConstantEmitter(CGF).tryEmitConstantExpr(E)) {

- Address StoreDest = Dest.getAddress();

- // The emitted value is guaranteed to have the same size as the

- // destination but can have a different type. Just do a bitcast in this

- // case to avoid incorrect GEPs.

- if (Result->getType() != StoreDest.getType())

- StoreDest = StoreDest.withElementType(Result->getType());

- CGF.EmitAggregateStore(Result, StoreDest,

- E->getType().isVolatileQualified());

+ CGF.CreateCoercedStore(

+ Result, Dest.getAddress(),

+ llvm::TypeSize::getFixed(

+ Dest.getPreferredSize(CGF.getContext(), E->getType())

+ .getQuantity()),

+ E->getType().isVolatileQualified());

return;

}

return Visit(E->getSubExpr());

@@ -2050,6 +2047,10 @@ CodeGenFunction::getOverlapForFieldInit(const FieldDecl *FD) {

if (!FD->hasAttr<NoUniqueAddressAttr>() || !FD->getType()->isRecordType())

return AggValueSlot::DoesNotOverlap;

+ // Empty fields can overlap earlier fields.

+ if (FD->getType()->getAsCXXRecordDecl()->isEmpty())

+ return AggValueSlot::MayOverlap;

// If the field lies entirely within the enclosing class's nvsize, its tail

// padding cannot overlap any already-initialized object. (The only subobjects

// with greater addresses that might already be initialized are vbases.)

@@ -2072,6 +2073,10 @@ AggValueSlot::Overlap_t CodeGenFunction::getOverlapForBaseInit(

if (IsVirtual)

return AggValueSlot::MayOverlap;

+ // Empty bases can overlap earlier bases.

+ if (BaseRD->isEmpty())

+ return AggValueSlot::MayOverlap;

// If the base class is laid out entirely within the nvsize of the derived

// class, its tail padding cannot yet be initialized, so we can issue

// stores at the full width of the base class.

diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index aa97f685ac7a..2f466602d2f6 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp

@@ -745,7 +745,7 @@ void CodeGenFunction::EmitAttributedStmt(const AttributedStmt &S) {

} break;

case attr::CXXAssume: {

const Expr *Assumption = cast<CXXAssumeAttr>(A)->getAssumption();

- if (getLangOpts().CXXAssumptions &&

+ if (getLangOpts().CXXAssumptions && Builder.GetInsertBlock() &&

!Assumption->HasSideEffects(getContext())) {

llvm::Value *AssumptionVal = EvaluateExprAsBool(Assumption);

Builder.CreateAssumption(AssumptionVal);

diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index ba7b565d9755..60e6841e1b3d 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h

@@ -4838,9 +4838,10 @@ public:

void EmitAggFinalDestCopy(QualType Type, AggValueSlot Dest, const LValue &Src,

ExprValueKind SrcKind);

- /// Build all the stores needed to initialize an aggregate at Dest with the

- /// value Val.

- void EmitAggregateStore(llvm::Value *Val, Address Dest, bool DestIsVolatile);

+ /// Create a store to \arg DstPtr from \arg Src, truncating the stored value

+ /// to at most \arg DstSize bytes.

+ void CreateCoercedStore(llvm::Value *Src, Address Dst, llvm::TypeSize DstSize,

+ bool DstIsVolatile);

/// EmitExtendGCLifetime - Given a pointer to an Objective-C object,

/// make sure it survives garbage collection until this point.

diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index 60e65aaa83e9..7813d86ff0ea 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp

@@ -570,7 +570,8 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {

NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,

Keywords.kw_as));

ProbablyBracedList =

- ProbablyBracedList || (IsCpp && NextTok->is(tok::l_paren));

+ ProbablyBracedList || (IsCpp && (PrevTok->Tok.isLiteral() ||

+ NextTok->is(tok::l_paren)));

// If there is a comma, semicolon or right paren after the closing

// brace, we assume this is a braced initializer list.

@@ -609,8 +610,9 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {

ProbablyBracedList = NextTok->isNot(tok::l_square);

}

- // Cpp macro definition body containing nonempty braced list or block:

+ // Cpp macro definition body that is a nonempty braced list or block:

if (IsCpp && Line->InMacroBody && PrevTok != FormatTok &&

+ !FormatTok->Previous && NextTok->is(tok::eof) &&

// A statement can end with only `;` (simple statement), a block

// closing brace (compound statement), or `:` (label statement).

// If PrevTok is a block opening brace, Tok ends an empty block.

diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index edb8b79a2220..f56ca398cda8 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp

@@ -5430,11 +5430,24 @@ struct EnsureImmediateInvocationInDefaultArgs

// Rewrite to source location to refer to the context in which they are used.

ExprResult TransformSourceLocExpr(SourceLocExpr *E) {

- if (E->getParentContext() == SemaRef.CurContext)

+ DeclContext *DC = E->getParentContext();

+ if (DC == SemaRef.CurContext)

return E;

- return getDerived().RebuildSourceLocExpr(E->getIdentKind(), E->getType(),

- E->getBeginLoc(), E->getEndLoc(),

- SemaRef.CurContext);

+ // FIXME: During instantiation, because the rebuild of defaults arguments

+ // is not always done in the context of the template instantiator,

+ // we run the risk of producing a dependent source location

+ // that would never be rebuilt.

+ // This usually happens during overload resolution, or in contexts

+ // where the value of the source location does not matter.

+ // However, we should find a better way to deal with source location

+ // of function templates.

+ if (!SemaRef.CurrentInstantiationScope ||

+ !SemaRef.CurContext->isDependentContext() || DC->isDependentContext())

+ DC = SemaRef.CurContext;

+ return getDerived().RebuildSourceLocExpr(

+ E->getIdentKind(), E->getType(), E->getBeginLoc(), E->getEndLoc(), DC);

}

};

diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index 14d1f395af90..de50786f4d6c 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp

@@ -5140,7 +5140,8 @@ static bool HasNonDeletedDefaultedEqualityComparison(Sema &S,

// const ClassT& obj;

OpaqueValueExpr Operand(

- {}, Decl->getTypeForDecl()->getCanonicalTypeUnqualified().withConst(),

+ KeyLoc,

+ Decl->getTypeForDecl()->getCanonicalTypeUnqualified().withConst(),

ExprValueKind::VK_LValue);

UnresolvedSet<16> Functions;

// obj == obj;

diff --git a/clang/lib/Sema/SemaLambda.cpp b/clang/lib/Sema/SemaLambda.cpp
index 601077e9f333..809b94bb7412 100644
--- a/clang/lib/Sema/SemaLambda.cpp
+++ b/clang/lib/Sema/SemaLambda.cpp

@@ -1318,7 +1318,6 @@ void Sema::ActOnLambdaExpressionAfterIntroducer(LambdaIntroducer &Intro,

if (C->Init.isUsable()) {

addInitCapture(LSI, cast<VarDecl>(Var), C->Kind == LCK_ByRef);

- PushOnScopeChains(Var, CurScope, false);

} else {

TryCaptureKind Kind = C->Kind == LCK_ByRef ? TryCapture_ExplicitByRef

: TryCapture_ExplicitByVal;

diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp
index 7a6a64529f52..d3d4bf27ae72 100644
--- a/clang/lib/Sema/SemaLookup.cpp
+++ b/clang/lib/Sema/SemaLookup.cpp

@@ -570,7 +570,7 @@ void LookupResult::resolveKind() {

// For non-type declarations, check for a prior lookup result naming this

// canonical declaration.

- if (!D->isPlaceholderVar(getSema().getLangOpts()) && !ExistingI) {

+ if (!ExistingI) {

auto UniqueResult = Unique.insert(std::make_pair(D, I));

if (!UniqueResult.second) {

// We've seen this entity before.

diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index 62a240ecbc60..c11468a08ae5 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp

@@ -1928,6 +1928,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,

case Stmt::CXXRewrittenBinaryOperatorClass:

case Stmt::RequiresExprClass:

case Expr::CXXParenListInitExprClass:

+ case Stmt::EmbedExprClass:

// Fall through.

// Cases we intentionally don't evaluate, since they don't need

@@ -2430,10 +2431,6 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,

Bldr.addNodes(Dst);

break;

}

- case Stmt::EmbedExprClass:

- llvm::report_fatal_error("Support for EmbedExpr is not implemented.");

- break;

}

diff --git a/compiler-rt/lib/builtins/cpu_model/x86.c b/compiler-rt/lib/builtins/cpu_model/x86.c
index 867ed97e57bf..b1c4abd9d11d 100644
--- a/compiler-rt/lib/builtins/cpu_model/x86.c
+++ b/compiler-rt/lib/builtins/cpu_model/x86.c

@@ -59,6 +59,7 @@ enum ProcessorTypes {

INTEL_SIERRAFOREST,

INTEL_GRANDRIDGE,

INTEL_CLEARWATERFOREST,

+ AMDFAM1AH,

CPU_TYPE_MAX

};

@@ -97,6 +98,7 @@ enum ProcessorSubtypes {

INTEL_COREI7_ARROWLAKE,

INTEL_COREI7_ARROWLAKE_S,

INTEL_COREI7_PANTHERLAKE,

+ AMDFAM1AH_ZNVER5,

CPU_SUBTYPE_MAX

};

@@ -803,6 +805,24 @@ static const char *getAMDProcessorTypeAndSubtype(unsigned Family,

break; // "znver4"

}

break; // family 19h

+ case 26:

+ CPU = "znver5";

+ *Type = AMDFAM1AH;

+ if (Model <= 0x77) {

+ // Models 00h-0Fh (Breithorn).

+ // Models 10h-1Fh (Breithorn-Dense).

+ // Models 20h-2Fh (Strix 1).

+ // Models 30h-37h (Strix 2).

+ // Models 38h-3Fh (Strix 3).

+ // Models 40h-4Fh (Granite Ridge).

+ // Models 50h-5Fh (Weisshorn).

+ // Models 60h-6Fh (Krackan1).

+ // Models 70h-77h (Sarlak).

+ CPU = "znver5";

+ *Subtype = AMDFAM1AH_ZNVER5;

+ break; // "znver5"

+ }

+ break;

default:

break; // Unknown AMD CPU.

}

diff --git a/compiler-rt/lib/builtins/divtc3.c b/compiler-rt/lib/builtins/divtc3.c
index 099de5802daf..c393de815337 100644
--- a/compiler-rt/lib/builtins/divtc3.c
+++ b/compiler-rt/lib/builtins/divtc3.c

@@ -13,7 +13,7 @@

#define QUAD_PRECISION

#include "fp_lib.h"

-#if defined(CRT_HAS_F128)

+#if defined(CRT_HAS_128BIT) && defined(CRT_HAS_F128)

// Returns: the quotient of (a + ib) / (c + id)

diff --git a/compiler-rt/lib/builtins/multc3.c b/compiler-rt/lib/builtins/multc3.c
index 61a3f45e4727..a89832f0e883 100644
--- a/compiler-rt/lib/builtins/multc3.c
+++ b/compiler-rt/lib/builtins/multc3.c

@@ -15,7 +15,7 @@

#include "int_lib.h"

#include "int_math.h"

-#if defined(CRT_HAS_F128)

+#if defined(CRT_HAS_128BIT) && defined(CRT_HAS_F128)

// Returns: the product of a + ib and c + id

diff --git a/libcxx/include/chrono b/libcxx/include/chrono
index 990c415ec2e9..7bec5e5a26ef 100644
--- a/libcxx/include/chrono
+++ b/libcxx/include/chrono

@@ -1015,8 +1015,8 @@ constexpr chrono::year operator ""y(unsigned lo

# include <charconv>

# if !defined(_LIBCPP_HAS_NO_LOCALIZATION)

# include <locale>

+# include <ostream>

# endif

-# include <ostream>

#endif

#endif // _LIBCPP_CHRONO

diff --git a/lld/ELF/Arch/Hexagon.cpp b/lld/ELF/Arch/Hexagon.cpp
index 54821c299bde..abde3cd96491 100644
--- a/lld/ELF/Arch/Hexagon.cpp
+++ b/lld/ELF/Arch/Hexagon.cpp

@@ -60,17 +60,15 @@ Hexagon::Hexagon() {

}

uint32_t Hexagon::calcEFlags() const {

- assert(!ctx.objectFiles.empty());

// The architecture revision must always be equal to or greater than

// greatest revision in the list of inputs.

- uint32_t ret = 0;

+ std::optional<uint32_t> ret;

for (InputFile *f : ctx.objectFiles) {

uint32_t eflags = cast<ObjFile<ELF32LE>>(f)->getObj().getHeader().e_flags;

- if (eflags > ret)

+ if (!ret || eflags > *ret)

ret = eflags;

}

- return ret;

+ return ret.value_or(/* Default Arch Rev: */ 0x60);

}

static uint32_t applyMask(uint32_t mask, uint32_t data) {

diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.def b/llvm/include/llvm/TargetParser/X86TargetParser.def
index 92798cbe4b4c..008cf5381c12 100644
--- a/llvm/include/llvm/TargetParser/X86TargetParser.def
+++ b/llvm/include/llvm/TargetParser/X86TargetParser.def

@@ -49,11 +49,13 @@ X86_CPU_TYPE(ZHAOXIN_FAM7H, "zhaoxin_fam7h")

X86_CPU_TYPE(INTEL_SIERRAFOREST, "sierraforest")

X86_CPU_TYPE(INTEL_GRANDRIDGE, "grandridge")

X86_CPU_TYPE(INTEL_CLEARWATERFOREST, "clearwaterforest")

+X86_CPU_TYPE(AMDFAM1AH, "amdfam1ah")

// Alternate names supported by __builtin_cpu_is and target multiversioning.

X86_CPU_TYPE_ALIAS(INTEL_BONNELL, "atom")

X86_CPU_TYPE_ALIAS(AMDFAM10H, "amdfam10")

X86_CPU_TYPE_ALIAS(AMDFAM15H, "amdfam15")

+X86_CPU_TYPE_ALIAS(AMDFAM1AH, "amdfam1a")

X86_CPU_TYPE_ALIAS(INTEL_SILVERMONT, "slm")

#undef X86_CPU_TYPE_ALIAS

@@ -104,6 +106,7 @@ X86_CPU_SUBTYPE(INTEL_COREI7_GRANITERAPIDS_D,"graniterapids-d")

X86_CPU_SUBTYPE(INTEL_COREI7_ARROWLAKE, "arrowlake")

X86_CPU_SUBTYPE(INTEL_COREI7_ARROWLAKE_S, "arrowlake-s")

X86_CPU_SUBTYPE(INTEL_COREI7_PANTHERLAKE, "pantherlake")

+X86_CPU_SUBTYPE(AMDFAM1AH_ZNVER5, "znver5")

// Alternate names supported by __builtin_cpu_is and target multiversioning.

X86_CPU_SUBTYPE_ALIAS(INTEL_COREI7_ALDERLAKE, "raptorlake")

diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.h b/llvm/include/llvm/TargetParser/X86TargetParser.h
index 2083e585af4a..5468aaa81edb 100644
--- a/llvm/include/llvm/TargetParser/X86TargetParser.h
+++ b/llvm/include/llvm/TargetParser/X86TargetParser.h

@@ -147,6 +147,7 @@ enum CPUKind {

CK_x86_64_v3,

CK_x86_64_v4,

CK_Geode,

+ CK_ZNVER5,

};

/// Parse \p CPU string into a CPUKind. Will only accept 64-bit capable CPUs if

diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp
index 0f29ebe3ee79..b1a2bfaf7895 100644
--- a/llvm/lib/CodeGen/ModuloSchedule.cpp
+++ b/llvm/lib/CodeGen/ModuloSchedule.cpp

@@ -130,6 +130,7 @@ void ModuloScheduleExpander::generatePipelinedLoop() {

// Generate the prolog instructions that set up the pipeline.

generateProlog(MaxStageCount, KernelBB, VRMap, PrologBBs);

MF.insert(BB->getIterator(), KernelBB);

+ LIS.insertMBBInMaps(KernelBB);

// Rearrange the instructions to generate the new, pipelined loop,

// and update register names as needed.

@@ -210,6 +211,7 @@ void ModuloScheduleExpander::generateProlog(unsigned LastStage,

NewBB->transferSuccessors(PredBB);

PredBB->addSuccessor(NewBB);

PredBB = NewBB;

+ LIS.insertMBBInMaps(NewBB);

// Generate instructions for each appropriate stage. Process instructions

// in original program order.

@@ -283,6 +285,7 @@ void ModuloScheduleExpander::generateEpilog(

PredBB->replaceSuccessor(LoopExitBB, NewBB);

NewBB->addSuccessor(LoopExitBB);

+ LIS.insertMBBInMaps(NewBB);

if (EpilogStart == LoopExitBB)

EpilogStart = NewBB;

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index df3d207d85d3..b961d3bb1fec 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp

@@ -1453,6 +1453,10 @@ void SelectionDAGISel::reportIPToStateForBlocks(MachineFunction *MF) {

if (BB->getFirstMayFaultInst()) {

// Report IP range only for blocks with Faulty inst

auto MBBb = MBB.getFirstNonPHI();

+ if (MBBb == MBB.end())

+ continue;

MachineInstr *MIb = &*MBBb;

if (MIb->isTerminator())

continue;

diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp
index 0a9498f051cb..46896d3cdf7d 100644
--- a/llvm/lib/IR/BasicBlock.cpp
+++ b/llvm/lib/IR/BasicBlock.cpp

@@ -975,8 +975,16 @@ void BasicBlock::spliceDebugInfoImpl(BasicBlock::iterator Dest, BasicBlock *Src,

if (ReadFromTail && Src->getMarker(Last)) {

DbgMarker *FromLast = Src->getMarker(Last);

if (LastIsEnd) {

- Dest->adoptDbgRecords(Src, Last, true);

- // adoptDbgRecords will release any trailers.

+ if (Dest == end()) {

+ // Abosrb the trailing markers from Src.

+ assert(FromLast == Src->getTrailingDbgRecords());

+ createMarker(Dest)->absorbDebugValues(*FromLast, true);

+ FromLast->eraseFromParent();

+ Src->deleteTrailingDbgRecords();

+ } else {

+ // adoptDbgRecords will release any trailers.

+ Dest->adoptDbgRecords(Src, Last, true);

+ }

assert(!Src->getTrailingDbgRecords());

} else {

// FIXME: can we use adoptDbgRecords here to reduce allocations?

diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index ba46ededc63a..87e057a468af 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

@@ -2931,16 +2931,6 @@ struct RegPairInfo {

} // end anonymous namespace

-unsigned findFreePredicateReg(BitVector &SavedRegs) {

- for (unsigned PReg = AArch64::P8; PReg <= AArch64::P15; ++PReg) {

- if (SavedRegs.test(PReg)) {

- unsigned PNReg = PReg - AArch64::P0 + AArch64::PN0;

- return PNReg;

- }

- return AArch64::NoRegister;

static void computeCalleeSaveRegisterPairs(

MachineFunction &MF, ArrayRef<CalleeSavedInfo> CSI,

const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs,

@@ -3645,7 +3635,6 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,

unsigned ExtraCSSpill = 0;

bool HasUnpairedGPR64 = false;

- bool HasPairZReg = false;

// Figure out which callee-saved registers to save/restore.

for (unsigned i = 0; CSRegs[i]; ++i) {

const unsigned Reg = CSRegs[i];

@@ -3699,28 +3688,6 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,

!RegInfo->isReservedReg(MF, PairedReg))

ExtraCSSpill = PairedReg;

}

- // Check if there is a pair of ZRegs, so it can select PReg for spill/fill

- HasPairZReg |= (AArch64::ZPRRegClass.contains(Reg, CSRegs[i ^ 1]) &&

- SavedRegs.test(CSRegs[i ^ 1]));

- }

- if (HasPairZReg && (Subtarget.hasSVE2p1() || Subtarget.hasSME2())) {

- AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();

- // Find a suitable predicate register for the multi-vector spill/fill

- // instructions.

- unsigned PnReg = findFreePredicateReg(SavedRegs);

- if (PnReg != AArch64::NoRegister)

- AFI->setPredicateRegForFillSpill(PnReg);

- // If no free callee-save has been found assign one.

- if (!AFI->getPredicateRegForFillSpill() &&

- MF.getFunction().getCallingConv() ==

- CallingConv::AArch64_SVE_VectorCall) {

- SavedRegs.set(AArch64::P8);

- AFI->setPredicateRegForFillSpill(AArch64::PN8);

- }

- assert(!RegInfo->isReservedReg(MF, AFI->getPredicateRegForFillSpill()) &&

- "Predicate cannot be a reserved register");

}

if (MF.getFunction().getCallingConv() == CallingConv::Win64 &&

diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 377bcd5868fb..805684ef69a5 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

@@ -5144,10 +5144,6 @@ void AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,

if (PNRReg.isValid() && !PNRReg.isVirtual())

MI.addDef(PNRReg, RegState::Implicit);

MI.addMemOperand(MMO);

- if (PNRReg.isValid() && PNRReg.isVirtual())

- BuildMI(MBB, MBBI, DebugLoc(), get(TargetOpcode::COPY), PNRReg)

- .addReg(DestReg);

}

bool llvm::isNZCVTouchedInInstructionRange(const MachineInstr &DefMI,

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 39ae7c96cf77..a71c9453d968 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

@@ -4349,6 +4349,7 @@ AMDGPUTargetLowering::performMulLoHiCombine(SDNode *N,

SelectionDAG &DAG = DCI.DAG;

SDLoc DL(N);

+ bool Signed = N->getOpcode() == ISD::SMUL_LOHI;

SDValue N0 = N->getOperand(0);

SDValue N1 = N->getOperand(1);

@@ -4363,20 +4364,25 @@ AMDGPUTargetLowering::performMulLoHiCombine(SDNode *N,

// Try to use two fast 24-bit multiplies (one for each half of the result)

// instead of one slow extending multiply.

- unsigned LoOpcode, HiOpcode;

- if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) {

- N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32);

- N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32);

- LoOpcode = AMDGPUISD::MUL_U24;

- HiOpcode = AMDGPUISD::MULHI_U24;

- } else if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) {

- N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32);

- N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32);

- LoOpcode = AMDGPUISD::MUL_I24;

- HiOpcode = AMDGPUISD::MULHI_I24;

+ unsigned LoOpcode = 0;

+ unsigned HiOpcode = 0;

+ if (Signed) {

+ if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) {

+ N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32);

+ N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32);

+ LoOpcode = AMDGPUISD::MUL_I24;

+ HiOpcode = AMDGPUISD::MULHI_I24;

+ }

} else {

- return SDValue();

+ if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) {

+ N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32);

+ N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32);

+ LoOpcode = AMDGPUISD::MUL_U24;

+ HiOpcode = AMDGPUISD::MULHI_U24;

+ }

}

+ if (!LoOpcode)

+ return SDValue();

SDValue Lo = DAG.getNode(LoOpcode, DL, MVT::i32, N0, N1);

SDValue Hi = DAG.getNode(HiOpcode, DL, MVT::i32, N0, N1);

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 21cf4d9eeac1..758de9d732fa 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp

@@ -9338,12 +9338,13 @@ SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {

SDLoc dl(Op);

SDValue Op0 = Op->getOperand(0);

+ if (!Subtarget.isPPC64() || (Op0.getOpcode() != ISD::BUILD_PAIR) ||

+ (Op.getValueType() != MVT::f128))

+ return SDValue();

SDValue Lo = Op0.getOperand(0);

SDValue Hi = Op0.getOperand(1);

- if ((Op.getValueType() != MVT::f128) ||

- (Op0.getOpcode() != ISD::BUILD_PAIR) || (Lo.getValueType() != MVT::i64) ||

- (Hi.getValueType() != MVT::i64) || !Subtarget.isPPC64())

+ if ((Lo.getValueType() != MVT::i64) || (Hi.getValueType() != MVT::i64))

return SDValue();

if (!Subtarget.isLittleEndian())

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index ba3b4bd701d6..6c0cbeadebf4 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp

@@ -2902,7 +2902,7 @@ RISCVInstrInfo::getOutliningTypeImpl(MachineBasicBlock::iterator &MBBI,

// if any possible.

if (MO.getTargetFlags() == RISCVII::MO_PCREL_LO &&

(MI.getMF()->getTarget().getFunctionSections() || F.hasComdat() ||

- F.hasSection()))

+ F.hasSection() || F.getSectionPrefix()))

return outliner::InstrType::Illegal;

}

diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 9dafd5e628ca..e82e624f7099 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td

@@ -1543,6 +1543,19 @@ def ProcessorFeatures {

FeatureVPOPCNTDQ];

list<SubtargetFeature> ZN4Features =

!listconcat(ZN3Features, ZN4AdditionalFeatures);

+ list<SubtargetFeature> ZN5Tuning = ZN4Tuning;

+ list<SubtargetFeature> ZN5AdditionalFeatures = [FeatureVNNI,

+ FeatureMOVDIRI,

+ FeatureMOVDIR64B,

+ FeatureVP2INTERSECT,

+ FeaturePREFETCHI,

+ FeatureAVXVNNI

+ ];

+ list<SubtargetFeature> ZN5Features =

+ !listconcat(ZN4Features, ZN5AdditionalFeatures);

}

//===----------------------------------------------------------------------===//

@@ -1892,6 +1905,8 @@ def : ProcModel<"znver3", Znver3Model, ProcessorFeatures.ZN3Features,

ProcessorFeatures.ZN3Tuning>;

def : ProcModel<"znver4", Znver4Model, ProcessorFeatures.ZN4Features,

ProcessorFeatures.ZN4Tuning>;

+def : ProcModel<"znver5", Znver4Model, ProcessorFeatures.ZN5Features,

+ ProcessorFeatures.ZN5Tuning>;

def : Proc<"geode", [FeatureX87, FeatureCX8, FeatureMMX, FeaturePRFCHW],

[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;

diff --git a/llvm/lib/Target/X86/X86PfmCounters.td b/llvm/lib/Target/X86/X86PfmCounters.td
index 2b1dac411c99..c30e989cdc2a 100644
--- a/llvm/lib/Target/X86/X86PfmCounters.td
+++ b/llvm/lib/Target/X86/X86PfmCounters.td

@@ -350,3 +350,4 @@ def ZnVer4PfmCounters : ProcPfmCounters {

let ValidationCounters = DefaultAMDPfmValidationCounters;

}

def : PfmCountersBinding<"znver4", ZnVer4PfmCounters>;

+def : PfmCountersBinding<"znver5", ZnVer4PfmCounters>;

diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index 7e637cba4cfb..865b6a44adbb 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp

@@ -1213,6 +1213,25 @@ static const char *getAMDProcessorTypeAndSubtype(unsigned Family,

break; // "znver4"

}

break; // family 19h

+ case 26:

+ CPU = "znver5";

+ *Type = X86::AMDFAM1AH;

+ if (Model <= 0x77) {

+ // Models 00h-0Fh (Breithorn).

+ // Models 10h-1Fh (Breithorn-Dense).

+ // Models 20h-2Fh (Strix 1).

+ // Models 30h-37h (Strix 2).

+ // Models 38h-3Fh (Strix 3).

+ // Models 40h-4Fh (Granite Ridge).

+ // Models 50h-5Fh (Weisshorn).

+ // Models 60h-6Fh (Krackan1).

+ // Models 70h-77h (Sarlak).

+ CPU = "znver5";

+ *Subtype = X86::AMDFAM1AH_ZNVER5;

+ break; // "znver5"

+ }

+ break;

default:

break; // Unknown AMD CPU.

}

diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index dcf9130052ac..a6f3b5ba5d33 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp

@@ -238,6 +238,10 @@ static constexpr FeatureBitset FeaturesZNVER4 =

FeatureAVX512BITALG | FeatureAVX512VPOPCNTDQ | FeatureAVX512BF16 |

FeatureGFNI | FeatureSHSTK;

+static constexpr FeatureBitset FeaturesZNVER5 =

+ FeaturesZNVER4 | FeatureAVXVNNI | FeatureMOVDIRI | FeatureMOVDIR64B |

+ FeatureAVX512VP2INTERSECT | FeaturePREFETCHI | FeatureAVXVNNI;

// D151696 tranplanted Mangling and OnlyForCPUDispatchSpecific from

// X86TargetParser.def to here. They are assigned by following ways:

// 1. Copy the mangling from the original CPU_SPEICIFC MACROs. If no, assign

@@ -417,6 +421,7 @@ constexpr ProcInfo Processors[] = {

{ {"znver2"}, CK_ZNVER2, FEATURE_AVX2, FeaturesZNVER2, '\0', false },

{ {"znver3"}, CK_ZNVER3, FEATURE_AVX2, FeaturesZNVER3, '\0', false },

{ {"znver4"}, CK_ZNVER4, FEATURE_AVX512VBMI2, FeaturesZNVER4, '\0', false },

+ { {"znver5"}, CK_ZNVER5, FEATURE_AVX512VP2INTERSECT, FeaturesZNVER5, '\0', false },

// Generic 64-bit processor.

{ {"x86-64"}, CK_x86_64, FEATURE_SSE2 , FeaturesX86_64, '\0', false },

{ {"x86-64-v2"}, CK_x86_64_v2, FEATURE_SSE4_2 , FeaturesX86_64_V2, '\0', false },

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index cca9eeebaa53..ab2b96cdc42d 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

@@ -15211,7 +15211,8 @@ bool BoUpSLP::collectValuesToDemote(

if (any_of(E.Scalars, [&](Value *V) {

return !all_of(V->users(), [=](User *U) {

return getTreeEntry(U) ||

- (UserIgnoreList && UserIgnoreList->contains(U)) ||

+ (E.Idx == 0 && UserIgnoreList &&

+ UserIgnoreList->contains(U)) ||

(!isa<CmpInst>(U) && U->getType()->isSized() &&

!U->getType()->isScalableTy() &&

DL->getTypeSizeInBits(U->getType()) <= BitWidth);

@@ -15539,6 +15540,11 @@ void BoUpSLP::computeMinimumValueSizes() {

const TreeEntry *UserTE = E.UserTreeIndices.back().UserTE;

if (TE == UserTE || !TE)

return false;

+ if (!isa<CastInst, BinaryOperator, FreezeInst, PHINode,

+ SelectInst>(U) ||

+ !isa<CastInst, BinaryOperator, FreezeInst, PHINode,

+ SelectInst>(UserTE->getMainOp()))

+ return true;

unsigned UserTESz = DL->getTypeSizeInBits(

UserTE->Scalars.front()->getType());

auto It = MinBWs.find(TE);