aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-12-29 00:56:15 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-12-29 00:56:15 +0000
commitfe4fed2e4d17945c38474cf0746792d04bf84b7d (patch)
treef82cc30abef889351b2dbe8d8aa2874056dbebbd /contrib/llvm/lib
parentbbd32193a0463b1c7383443a45b774a2fe4d3430 (diff)
parent55e6d896ad333f07bb3b1ba487df214fc268a4ab (diff)
Merge llvm, clang, lld, lldb, compiler-rt and libc++ trunk r321545,
update build glue and version numbers, add new intrinsics headers, and update OptionalObsoleteFiles.inc.
Notes
Notes: svn path=/projects/clang600-import/; revision=327330
Diffstat (limited to 'contrib/llvm/lib')
-rw-r--r--contrib/llvm/lib/Analysis/DemandedBits.cpp4
-rw-r--r--contrib/llvm/lib/Analysis/InstructionSimplify.cpp40
-rw-r--r--contrib/llvm/lib/Analysis/Lint.cpp12
-rw-r--r--contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp20
-rw-r--r--contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp4
-rw-r--r--contrib/llvm/lib/Analysis/ScalarEvolution.cpp12
-rw-r--r--contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp15
-rw-r--r--contrib/llvm/lib/Analysis/TargetTransformInfo.cpp4
-rw-r--r--contrib/llvm/lib/Analysis/ValueTracking.cpp26
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp43
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp17
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp39
-rw-r--r--contrib/llvm/lib/IR/SafepointIRVerifier.cpp156
-rw-r--r--contrib/llvm/lib/LTO/LTOModule.cpp24
-rw-r--r--contrib/llvm/lib/LTO/ThinLTOCodeGenerator.cpp2
-rw-r--r--contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp2
-rw-r--r--contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp10
-rw-r--r--contrib/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp2
-rw-r--r--contrib/llvm/lib/Support/ARMAttributeParser.cpp2
-rw-r--r--contrib/llvm/lib/Support/CommandLine.cpp46
-rw-r--r--contrib/llvm/lib/TableGen/Main.cpp2
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp44
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp10
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp6
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.cpp6
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp2
-rw-r--r--contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp5
-rw-r--r--contrib/llvm/lib/Target/X86/X86.td3
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelLowering.cpp164
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelLowering.h4
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrAVX512.td26
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrFPStack.td6
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrInfo.td9
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrSSE.td23
-rw-r--r--contrib/llvm/lib/Target/X86/X86WinEHState.cpp6
-rw-r--r--contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp11
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp3
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp36
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp8
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp7
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/GVNSink.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp56
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp9
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp7
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp7
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp179
47 files changed, 568 insertions, 557 deletions
diff --git a/contrib/llvm/lib/Analysis/DemandedBits.cpp b/contrib/llvm/lib/Analysis/DemandedBits.cpp
index 7276f2524fed..de7d21f9f133 100644
--- a/contrib/llvm/lib/Analysis/DemandedBits.cpp
+++ b/contrib/llvm/lib/Analysis/DemandedBits.cpp
@@ -385,8 +385,8 @@ bool DemandedBits::isInstructionDead(Instruction *I) {
void DemandedBits::print(raw_ostream &OS) {
performAnalysis();
for (auto &KV : AliveBits) {
- OS << "DemandedBits: 0x" << utohexstr(KV.second.getLimitedValue()) << " for "
- << *KV.first << "\n";
+ OS << "DemandedBits: 0x" << Twine::utohexstr(KV.second.getLimitedValue())
+ << " for " << *KV.first << '\n';
}
}
diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
index 3ce1281743c3..93fb1143e505 100644
--- a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -3897,8 +3897,9 @@ static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQ
// If extracting a specified index from the vector, see if we can recursively
// find a previously computed scalar that was inserted into the vector.
if (auto *IdxC = dyn_cast<ConstantInt>(Idx))
- if (Value *Elt = findScalarElement(Vec, IdxC->getZExtValue()))
- return Elt;
+ if (IdxC->getValue().ule(Vec->getType()->getVectorNumElements()))
+ if (Value *Elt = findScalarElement(Vec, IdxC->getZExtValue()))
+ return Elt;
// An undef extract index can be arbitrarily chosen to be an out-of-range
// index value, which would result in the instruction being undef.
@@ -4494,6 +4495,22 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
return *ArgBegin;
return nullptr;
}
+ case Intrinsic::bswap: {
+ Value *IIOperand = *ArgBegin;
+ Value *X = nullptr;
+ // bswap(bswap(x)) -> x
+ if (match(IIOperand, m_BSwap(m_Value(X))))
+ return X;
+ return nullptr;
+ }
+ case Intrinsic::bitreverse: {
+ Value *IIOperand = *ArgBegin;
+ Value *X = nullptr;
+ // bitreverse(bitreverse(x)) -> x
+ if (match(IIOperand, m_BitReverse(m_Value(X))))
+ return X;
+ return nullptr;
+ }
default:
return nullptr;
}
@@ -4548,6 +4565,16 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
return SimplifyRelativeLoad(C0, C1, Q.DL);
return nullptr;
}
+ case Intrinsic::powi:
+ if (ConstantInt *Power = dyn_cast<ConstantInt>(RHS)) {
+ // powi(x, 0) -> 1.0
+ if (Power->isZero())
+ return ConstantFP::get(LHS->getType(), 1.0);
+ // powi(x, 1) -> x
+ if (Power->isOne())
+ return LHS;
+ }
+ return nullptr;
default:
return nullptr;
}
@@ -4616,6 +4643,12 @@ Value *llvm::SimplifyCall(ImmutableCallSite CS, Value *V,
return ::SimplifyCall(CS, V, Args.begin(), Args.end(), Q, RecursionLimit);
}
+Value *llvm::SimplifyCall(ImmutableCallSite ICS, const SimplifyQuery &Q) {
+ CallSite CS(const_cast<Instruction*>(ICS.getInstruction()));
+ return ::SimplifyCall(CS, CS.getCalledValue(), CS.arg_begin(), CS.arg_end(),
+ Q, RecursionLimit);
+}
+
/// See if we can compute a simplified version of this instruction.
/// If not, this returns null.
@@ -4750,8 +4783,7 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ,
break;
case Instruction::Call: {
CallSite CS(cast<CallInst>(I));
- Result = SimplifyCall(CS, CS.getCalledValue(), CS.arg_begin(), CS.arg_end(),
- Q);
+ Result = SimplifyCall(CS, Q);
break;
}
#define HANDLE_CAST_INST(num, opc, clas) case Instruction::opc:
diff --git a/contrib/llvm/lib/Analysis/Lint.cpp b/contrib/llvm/lib/Analysis/Lint.cpp
index 7b792ed0a2e2..0e3f498cb14c 100644
--- a/contrib/llvm/lib/Analysis/Lint.cpp
+++ b/contrib/llvm/lib/Analysis/Lint.cpp
@@ -265,13 +265,21 @@ void Lint::visitCallSite(CallSite CS) {
// Check that noalias arguments don't alias other arguments. This is
// not fully precise because we don't know the sizes of the dereferenced
// memory regions.
- if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy())
- for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI)
+ if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy()) {
+ AttributeList PAL = CS.getAttributes();
+ unsigned ArgNo = 0;
+ for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI) {
+ // Skip ByVal arguments since they will be memcpy'd to the callee's
+ // stack so we're not really passing the pointer anyway.
+ if (PAL.hasParamAttribute(ArgNo++, Attribute::ByVal))
+ continue;
if (AI != BI && (*BI)->getType()->isPointerTy()) {
AliasResult Result = AA->alias(*AI, *BI);
Assert(Result != MustAlias && Result != PartialAlias,
"Unusual: noalias argument aliases another argument", &I);
}
+ }
+ }
// Check that an sret argument points to valid memory.
if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) {
diff --git a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index bb7bf967994c..bf83f52ccf2e 100644
--- a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -920,14 +920,6 @@ void MemoryDependenceResults::getNonLocalPointerDependency(
Instruction *QueryInst, SmallVectorImpl<NonLocalDepResult> &Result) {
const MemoryLocation Loc = MemoryLocation::get(QueryInst);
bool isLoad = isa<LoadInst>(QueryInst);
- return getNonLocalPointerDependencyFrom(QueryInst, Loc, isLoad, Result);
-}
-
-void MemoryDependenceResults::getNonLocalPointerDependencyFrom(
- Instruction *QueryInst,
- const MemoryLocation &Loc,
- bool isLoad,
- SmallVectorImpl<NonLocalDepResult> &Result) {
BasicBlock *FromBB = QueryInst->getParent();
assert(FromBB);
@@ -1127,15 +1119,21 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
// If we already have a cache entry for this CacheKey, we may need to do some
// work to reconcile the cache entry and the current query.
if (!Pair.second) {
- if (CacheInfo->Size != Loc.Size) {
- // The query's Size differs from the cached one. Throw out the
- // cached data and proceed with the query at the new size.
+ if (CacheInfo->Size < Loc.Size) {
+ // The query's Size is greater than the cached one. Throw out the
+ // cached data and proceed with the query at the greater size.
CacheInfo->Pair = BBSkipFirstBlockPair();
CacheInfo->Size = Loc.Size;
for (auto &Entry : CacheInfo->NonLocalDeps)
if (Instruction *Inst = Entry.getResult().getInst())
RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
CacheInfo->NonLocalDeps.clear();
+ } else if (CacheInfo->Size > Loc.Size) {
+ // This query's Size is less than the cached one. Conservatively restart
+ // the query using the greater size.
+ return getNonLocalPointerDepFromBB(
+ QueryInst, Pointer, Loc.getWithNewSize(CacheInfo->Size), isLoad,
+ StartBB, Result, Visited, SkipFirstBlock);
}
// If the query's AATags are inconsistent with the cached one,
diff --git a/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
index 10badd89a4a8..efa5bd564ad0 100644
--- a/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -306,7 +306,9 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
NonRenamableLocal || HasInlineAsmMaybeReferencingInternal ||
// Inliner doesn't handle variadic functions.
// FIXME: refactor this to use the same code that inliner is using.
- F.isVarArg();
+ F.isVarArg() ||
+ // Don't try to import functions with noinline attribute.
+ F.getAttributes().hasFnAttribute(Attribute::NoInline);
GlobalValueSummary::GVFlags Flags(F.getLinkage(), NotEligibleForImport,
/* Live = */ false, F.isDSOLocal());
FunctionSummary::FFlags FunFlags{
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
index 2a8088dc4452..f34549ae52b4 100644
--- a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -1268,7 +1268,11 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
}
if (!hasTrunc)
return getAddExpr(Operands);
- UniqueSCEVs.FindNodeOrInsertPos(ID, IP); // Mutates IP, returns NULL.
+ // In spite we checked in the beginning that ID is not in the cache,
+ // it is possible that during recursion and different modification
+ // ID came to cache, so if we found it, just return it.
+ if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP))
+ return S;
}
// trunc(x1*x2*...*xN) --> trunc(x1)*trunc(x2)*...*trunc(xN) if we can
@@ -1284,7 +1288,11 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
}
if (!hasTrunc)
return getMulExpr(Operands);
- UniqueSCEVs.FindNodeOrInsertPos(ID, IP); // Mutates IP, returns NULL.
+ // In spite we checked in the beginning that ID is not in the cache,
+ // it is possible that during recursion and different modification
+ // ID came to cache, so if we found it, just return it.
+ if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP))
+ return S;
}
// If the input value is a chrec scev, truncate the chrec's operands.
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
index 86f714b930d0..3ceda677ba61 100644
--- a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -187,8 +187,21 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
// generated code.
if (isa<DbgInfoIntrinsic>(IP))
ScanLimit++;
+
+ // Conservatively, do not use any instruction which has any of wrap/exact
+ // flags installed.
+ // TODO: Instead of simply disable poison instructions we can be clever
+ // here and match SCEV to this instruction.
+ auto canGeneratePoison = [](Instruction *I) {
+ if (isa<OverflowingBinaryOperator>(I) &&
+ (I->hasNoSignedWrap() || I->hasNoUnsignedWrap()))
+ return true;
+ if (isa<PossiblyExactOperator>(I) && I->isExact())
+ return true;
+ return false;
+ };
if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS &&
- IP->getOperand(1) == RHS)
+ IP->getOperand(1) == RHS && !canGeneratePoison(&*IP))
return &*IP;
if (IP == BlockBegin) break;
}
diff --git a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp
index c9e9c6d1a419..b744cae51ed7 100644
--- a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -314,10 +314,6 @@ int TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
return Cost;
}
-bool TargetTransformInfo::isOutOfOrder() const {
- return TTIImpl->isOutOfOrder();
-}
-
unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const {
return TTIImpl->getNumberOfRegisters(Vector);
}
diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp
index 2730daefa625..cd4cee631568 100644
--- a/contrib/llvm/lib/Analysis/ValueTracking.cpp
+++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp
@@ -4238,14 +4238,14 @@ static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred,
LHS = CmpLHS;
RHS = CmpRHS;
- // If the predicate is an "or-equal" (FP) predicate, then signed zeroes may
- // return inconsistent results between implementations.
- // (0.0 <= -0.0) ? 0.0 : -0.0 // Returns 0.0
- // minNum(0.0, -0.0) // May return -0.0 or 0.0 (IEEE 754-2008 5.3.1)
- // Therefore we behave conservatively and only proceed if at least one of the
- // operands is known to not be zero, or if we don't care about signed zeroes.
+ // Signed zero may return inconsistent results between implementations.
+ // (0.0 <= -0.0) ? 0.0 : -0.0 // Returns 0.0
+ // minNum(0.0, -0.0) // May return -0.0 or 0.0 (IEEE 754-2008 5.3.1)
+ // Therefore, we behave conservatively and only proceed if at least one of the
+ // operands is known to not be zero or if we don't care about signed zero.
switch (Pred) {
default: break;
+ // FIXME: Include OGT/OLT/UGT/ULT.
case CmpInst::FCMP_OGE: case CmpInst::FCMP_OLE:
case CmpInst::FCMP_UGE: case CmpInst::FCMP_ULE:
if (!FMF.noSignedZeros() && !isKnownNonZero(CmpLHS) &&
@@ -4493,14 +4493,24 @@ SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS,
// Deal with type mismatches.
if (CastOp && CmpLHS->getType() != TrueVal->getType()) {
- if (Value *C = lookThroughCast(CmpI, TrueVal, FalseVal, CastOp))
+ if (Value *C = lookThroughCast(CmpI, TrueVal, FalseVal, CastOp)) {
+ // If this is a potential fmin/fmax with a cast to integer, then ignore
+ // -0.0 because there is no corresponding integer value.
+ if (*CastOp == Instruction::FPToSI || *CastOp == Instruction::FPToUI)
+ FMF.setNoSignedZeros();
return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS,
cast<CastInst>(TrueVal)->getOperand(0), C,
LHS, RHS);
- if (Value *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp))
+ }
+ if (Value *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp)) {
+ // If this is a potential fmin/fmax with a cast to integer, then ignore
+ // -0.0 because there is no corresponding integer value.
+ if (*CastOp == Instruction::FPToSI || *CastOp == Instruction::FPToUI)
+ FMF.setNoSignedZeros();
return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS,
C, cast<CastInst>(FalseVal)->getOperand(0),
LHS, RHS);
+ }
}
return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, TrueVal, FalseVal,
LHS, RHS);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 17f907eb07e8..3218dce8f575 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1922,14 +1922,16 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
EVT VT = Sel.getValueType();
SDLoc DL(Sel);
SDValue NewCT = DAG.getNode(BinOpcode, DL, VT, CT, C1);
- assert((NewCT.isUndef() || isConstantOrConstantVector(NewCT) ||
- isConstantFPBuildVectorOrConstantFP(NewCT)) &&
- "Failed to constant fold a binop with constant operands");
+ if (!NewCT.isUndef() &&
+ !isConstantOrConstantVector(NewCT, true) &&
+ !isConstantFPBuildVectorOrConstantFP(NewCT))
+ return SDValue();
SDValue NewCF = DAG.getNode(BinOpcode, DL, VT, CF, C1);
- assert((NewCF.isUndef() || isConstantOrConstantVector(NewCF) ||
- isConstantFPBuildVectorOrConstantFP(NewCF)) &&
- "Failed to constant fold a binop with constant operands");
+ if (!NewCF.isUndef() &&
+ !isConstantOrConstantVector(NewCF, true) &&
+ !isConstantFPBuildVectorOrConstantFP(NewCF))
+ return SDValue();
return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
}
@@ -3577,7 +3579,8 @@ SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
// TODO: What is the 'or' equivalent of this fold?
// (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
- if (IsAnd && LL == RL && CC0 == CC1 && IsInteger && CC0 == ISD::SETNE &&
+ if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
+ IsInteger && CC0 == ISD::SETNE &&
((isNullConstant(LR) && isAllOnesConstant(RR)) ||
(isAllOnesConstant(LR) && isNullConstant(RR)))) {
SDValue One = DAG.getConstant(1, DL, OpVT);
@@ -3641,15 +3644,18 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
VT.getSizeInBits() <= 64) {
if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
- APInt ADDC = ADDI->getAPIntValue();
- if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
+ if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
// Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
// immediate for an add, but it is legal if its top c2 bits are set,
// transform the ADD so the immediate doesn't need to be materialized
// in a register.
- if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
+ APInt ADDC = ADDI->getAPIntValue();
+ APInt SRLC = SRLI->getAPIntValue();
+ if (ADDC.getMinSignedBits() <= 64 &&
+ SRLC.ult(VT.getSizeInBits()) &&
+ !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
- SRLI->getZExtValue());
+ SRLC.getZExtValue());
if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
ADDC |= Mask;
if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
@@ -3987,6 +3993,12 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// reassociate and
if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
return RAND;
+
+ // Try to convert a constant mask AND into a shuffle clear mask.
+ if (VT.isVector())
+ if (SDValue Shuffle = XformToShuffleWithZero(N))
+ return Shuffle;
+
// fold (and (or x, C), D) -> D if (C & D) == D
auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
@@ -16480,6 +16492,8 @@ SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
/// vector_shuffle V, Zero, <0, 4, 2, 4>
SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
+ assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
+
EVT VT = N->getValueType(0);
SDValue LHS = N->getOperand(0);
SDValue RHS = peekThroughBitcast(N->getOperand(1));
@@ -16490,9 +16504,6 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
if (LegalOperations)
return SDValue();
- if (N->getOpcode() != ISD::AND)
- return SDValue();
-
if (RHS.getOpcode() != ISD::BUILD_VECTOR)
return SDValue();
@@ -16581,10 +16592,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
return Fold;
- // Try to convert a constant mask AND into a shuffle clear mask.
- if (SDValue Shuffle = XformToShuffleWithZero(N))
- return Shuffle;
-
// Type legalization might introduce new shuffles in the DAG.
// Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
// -> (shuffle (VBinOp (A, B)), Undef, Mask).
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 74970ab5792c..7643790df350 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -49,6 +49,8 @@
using namespace llvm;
+#define DEBUG_TYPE "legalizevectorops"
+
namespace {
class VectorLegalizer {
@@ -226,7 +228,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
if (Op.getOpcode() == ISD::LOAD) {
LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
ISD::LoadExtType ExtType = LD->getExtensionType();
- if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD)
+ if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) {
+ DEBUG(dbgs() << "\nLegalizing extending vector load: "; Node->dump(&DAG));
switch (TLI.getLoadExtAction(LD->getExtensionType(), LD->getValueType(0),
LD->getMemoryVT())) {
default: llvm_unreachable("This action is not supported yet!");
@@ -252,11 +255,14 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
Changed = true;
return LegalizeOp(ExpandLoad(Op));
}
+ }
} else if (Op.getOpcode() == ISD::STORE) {
StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
EVT StVT = ST->getMemoryVT();
MVT ValVT = ST->getValue().getSimpleValueType();
- if (StVT.isVector() && ST->isTruncatingStore())
+ if (StVT.isVector() && ST->isTruncatingStore()) {
+ DEBUG(dbgs() << "\nLegalizing truncating vector store: ";
+ Node->dump(&DAG));
switch (TLI.getTruncStoreAction(ValVT, StVT)) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Legal:
@@ -270,6 +276,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
Changed = true;
return LegalizeOp(ExpandStore(Op));
}
+ }
} else if (Op.getOpcode() == ISD::MSCATTER || Op.getOpcode() == ISD::MSTORE)
HasVectorValue = true;
@@ -376,6 +383,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
break;
}
+ DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG));
+
switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Promote:
@@ -383,12 +392,16 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
Changed = true;
break;
case TargetLowering::Legal:
+ DEBUG(dbgs() << "Legal node: nothing to do\n");
break;
case TargetLowering::Custom: {
+ DEBUG(dbgs() << "Trying custom legalization\n");
if (SDValue Tmp1 = TLI.LowerOperation(Op, DAG)) {
+ DEBUG(dbgs() << "Successfully custom legalized node\n");
Result = Tmp1;
break;
}
+ DEBUG(dbgs() << "Could not custom legalize node\n");
LLVM_FALLTHROUGH;
}
case TargetLowering::Expand:
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index a04c770c51c4..4c8b63d2f239 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5943,7 +5943,9 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
CSEMap.InsertNode(N, IP);
InsertNode(N);
- return SDValue(N, 0);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
}
SDValue SelectionDAG::getLoad(EVT VT, const SDLoc &dl, SDValue Chain,
@@ -6043,7 +6045,9 @@ SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val,
CSEMap.InsertNode(N, IP);
InsertNode(N);
- return SDValue(N, 0);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
}
SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val,
@@ -6108,7 +6112,9 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val,
CSEMap.InsertNode(N, IP);
InsertNode(N);
- return SDValue(N, 0);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
}
SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl,
@@ -6134,7 +6140,9 @@ SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl,
CSEMap.InsertNode(N, IP);
InsertNode(N);
- return SDValue(N, 0);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
}
SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,
@@ -6160,7 +6168,9 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,
CSEMap.InsertNode(N, IP);
InsertNode(N);
- return SDValue(N, 0);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
}
SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,
@@ -6189,7 +6199,9 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,
CSEMap.InsertNode(N, IP);
InsertNode(N);
- return SDValue(N, 0);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
}
SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
@@ -6224,7 +6236,9 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
CSEMap.InsertNode(N, IP);
InsertNode(N);
- return SDValue(N, 0);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
}
SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
@@ -6256,7 +6270,9 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
CSEMap.InsertNode(N, IP);
InsertNode(N);
- return SDValue(N, 0);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
}
SDValue SelectionDAG::getVAArg(EVT VT, const SDLoc &dl, SDValue Chain,
@@ -7112,6 +7128,8 @@ void SelectionDAG::transferDbgValues(SDValue From, SDValue To,
void SelectionDAG::salvageDebugInfo(SDNode &N) {
if (!N.getHasDebugValue())
return;
+
+ SmallVector<SDDbgValue *, 2> ClonedDVs;
for (auto DV : GetDbgValues(&N)) {
if (DV->isInvalidated())
continue;
@@ -7135,13 +7153,16 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) {
SDDbgValue *Clone =
getDbgValue(DV->getVariable(), DIExpr, N0.getNode(), N0.getResNo(),
DV->isIndirect(), DV->getDebugLoc(), DV->getOrder());
+ ClonedDVs.push_back(Clone);
DV->setIsInvalidated();
- AddDbgValue(Clone, N0.getNode(), false);
DEBUG(dbgs() << "SALVAGE: Rewriting"; N0.getNode()->dumprFull(this);
dbgs() << " into " << *DIExpr << '\n');
}
}
}
+
+ for (SDDbgValue *Dbg : ClonedDVs)
+ AddDbgValue(Dbg, Dbg->getSDNode(), false);
}
namespace {
diff --git a/contrib/llvm/lib/IR/SafepointIRVerifier.cpp b/contrib/llvm/lib/IR/SafepointIRVerifier.cpp
index 68e0ce39a54e..04deb434cec2 100644
--- a/contrib/llvm/lib/IR/SafepointIRVerifier.cpp
+++ b/contrib/llvm/lib/IR/SafepointIRVerifier.cpp
@@ -237,6 +237,59 @@ class InstructionVerifier;
/// Builds BasicBlockState for each BB of the function.
/// It can traverse function for verification and provides all required
/// information.
+///
+/// GC pointer may be in one of three states: relocated, unrelocated and
+/// poisoned.
+/// Relocated pointer may be used without any restrictions.
+/// Unrelocated pointer cannot be dereferenced, passed as argument to any call
+/// or returned. Unrelocated pointer may be safely compared against another
+/// unrelocated pointer or against a pointer exclusively derived from null.
+/// Poisoned pointers are produced when we somehow derive pointer from relocated
+/// and unrelocated pointers (e.g. phi, select). This pointers may be safely
+/// used in a very limited number of situations. Currently the only way to use
+/// it is comparison against constant exclusively derived from null. All
+/// limitations arise due to their undefined state: this pointers should be
+/// treated as relocated and unrelocated simultaneously.
+/// Rules of deriving:
+/// R + U = P - that's where the poisoned pointers come from
+/// P + X = P
+/// U + U = U
+/// R + R = R
+/// X + C = X
+/// Where "+" - any operation that somehow derive pointer, U - unrelocated,
+/// R - relocated and P - poisoned, C - constant, X - U or R or P or C or
+/// nothing (in case when "+" is unary operation).
+/// Deriving of pointers by itself is always safe.
+/// NOTE: when we are making decision on the status of instruction's result:
+/// a) for phi we need to check status of each input *at the end of
+/// corresponding predecessor BB*.
+/// b) for other instructions we need to check status of each input *at the
+/// current point*.
+///
+/// FIXME: This works fairly well except one case
+/// bb1:
+/// p = *some GC-ptr def*
+/// p1 = gep p, offset
+/// / |
+/// / |
+/// bb2: |
+/// safepoint |
+/// \ |
+/// \ |
+/// bb3:
+/// p2 = phi [p, bb2] [p1, bb1]
+/// p3 = phi [p, bb2] [p, bb1]
+/// here p and p1 is unrelocated
+/// p2 and p3 is poisoned (though they shouldn't be)
+///
+/// This leads to some weird results:
+/// cmp eq p, p2 - illegal instruction (false-positive)
+/// cmp eq p1, p2 - illegal instruction (false-positive)
+/// cmp eq p, p3 - illegal instruction (false-positive)
+/// cmp eq p, p1 - ok
+/// To fix this we need to introduce conception of generations and be able to
+/// check if two values belong to one generation or not. This way p2 will be
+/// considered to be unrelocated and no false alarm will happen.
class GCPtrTracker {
const Function &F;
SpecificBumpPtrAllocator<BasicBlockState> BSAllocator;
@@ -244,6 +297,9 @@ class GCPtrTracker {
// This set contains defs of unrelocated pointers that are proved to be legal
// and don't need verification.
DenseSet<const Instruction *> ValidUnrelocatedDefs;
+ // This set contains poisoned defs. They can be safely ignored during
+ // verification too.
+ DenseSet<const Value *> PoisonedDefs;
public:
GCPtrTracker(const Function &F, const DominatorTree &DT);
@@ -251,6 +307,8 @@ public:
BasicBlockState *getBasicBlockState(const BasicBlock *BB);
const BasicBlockState *getBasicBlockState(const BasicBlock *BB) const;
+ bool isValuePoisoned(const Value *V) const { return PoisonedDefs.count(V); }
+
/// Traverse each BB of the function and call
/// InstructionVerifier::verifyInstruction for each possibly invalid
/// instruction.
@@ -349,7 +407,9 @@ const BasicBlockState *GCPtrTracker::getBasicBlockState(
}
bool GCPtrTracker::instructionMayBeSkipped(const Instruction *I) const {
- return ValidUnrelocatedDefs.count(I);
+ // Poisoned defs are skipped since they are always safe by itself by
+ // definition (for details see comment to this class).
+ return ValidUnrelocatedDefs.count(I) || PoisonedDefs.count(I);
}
void GCPtrTracker::verifyFunction(GCPtrTracker &&Tracker,
@@ -418,31 +478,78 @@ bool GCPtrTracker::removeValidUnrelocatedDefs(const BasicBlock *BB,
"Passed Contribution should be from the passed BasicBlockState!");
AvailableValueSet AvailableSet = BBS->AvailableIn;
bool ContributionChanged = false;
+ // For explanation why instructions are processed this way see
+ // "Rules of deriving" in the comment to this class.
for (const Instruction &I : *BB) {
- bool ProducesUnrelocatedPointer = false;
- if ((isa<GetElementPtrInst>(I) || isa<BitCastInst>(I)) &&
- containsGCPtrType(I.getType())) {
- // GEP/bitcast of unrelocated pointer is legal by itself but this
- // def shouldn't appear in any AvailableSet.
+ bool ValidUnrelocatedPointerDef = false;
+ bool PoisonedPointerDef = false;
+ // TODO: `select` instructions should be handled here too.
+ if (const PHINode *PN = dyn_cast<PHINode>(&I)) {
+ if (containsGCPtrType(PN->getType())) {
+ // If both is true, output is poisoned.
+ bool HasRelocatedInputs = false;
+ bool HasUnrelocatedInputs = false;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ const BasicBlock *InBB = PN->getIncomingBlock(i);
+ const Value *InValue = PN->getIncomingValue(i);
+
+ if (isNotExclusivelyConstantDerived(InValue)) {
+ if (isValuePoisoned(InValue)) {
+ // If any of inputs is poisoned, output is always poisoned too.
+ HasRelocatedInputs = true;
+ HasUnrelocatedInputs = true;
+ break;
+ }
+ if (BlockMap[InBB]->AvailableOut.count(InValue))
+ HasRelocatedInputs = true;
+ else
+ HasUnrelocatedInputs = true;
+ }
+ }
+ if (HasUnrelocatedInputs) {
+ if (HasRelocatedInputs)
+ PoisonedPointerDef = true;
+ else
+ ValidUnrelocatedPointerDef = true;
+ }
+ }
+ } else if ((isa<GetElementPtrInst>(I) || isa<BitCastInst>(I)) &&
+ containsGCPtrType(I.getType())) {
+ // GEP/bitcast of unrelocated pointer is legal by itself but this def
+ // shouldn't appear in any AvailableSet.
for (const Value *V : I.operands())
if (containsGCPtrType(V->getType()) &&
isNotExclusivelyConstantDerived(V) && !AvailableSet.count(V)) {
- ProducesUnrelocatedPointer = true;
+ if (isValuePoisoned(V))
+ PoisonedPointerDef = true;
+ else
+ ValidUnrelocatedPointerDef = true;
break;
}
}
- if (!ProducesUnrelocatedPointer) {
- bool Cleared = false;
- transferInstruction(I, Cleared, AvailableSet);
- (void)Cleared;
- } else {
- // Remove def of unrelocated pointer from Contribution of this BB
- // and trigger update of all its successors.
+ assert(!(ValidUnrelocatedPointerDef && PoisonedPointerDef) &&
+ "Value cannot be both unrelocated and poisoned!");
+ if (ValidUnrelocatedPointerDef) {
+ // Remove def of unrelocated pointer from Contribution of this BB and
+ // trigger update of all its successors.
Contribution.erase(&I);
+ PoisonedDefs.erase(&I);
ValidUnrelocatedDefs.insert(&I);
- DEBUG(dbgs() << "Removing " << I << " from Contribution of "
+ DEBUG(dbgs() << "Removing urelocated " << I << " from Contribution of "
<< BB->getName() << "\n");
ContributionChanged = true;
+ } else if (PoisonedPointerDef) {
+ // Mark pointer as poisoned, remove its def from Contribution and trigger
+ // update of all successors.
+ Contribution.erase(&I);
+ PoisonedDefs.insert(&I);
+ DEBUG(dbgs() << "Removing poisoned " << I << " from Contribution of "
+ << BB->getName() << "\n");
+ ContributionChanged = true;
+ } else {
+ bool Cleared = false;
+ transferInstruction(I, Cleared, AvailableSet);
+ (void)Cleared;
}
}
return ContributionChanged;
@@ -524,8 +631,8 @@ void InstructionVerifier::verifyInstruction(
// Returns true if LHS and RHS are unrelocated pointers and they are
// valid unrelocated uses.
- auto hasValidUnrelocatedUse = [&AvailableSet, baseTyLHS, baseTyRHS, &LHS,
- &RHS] () {
+ auto hasValidUnrelocatedUse = [&AvailableSet, Tracker, baseTyLHS, baseTyRHS,
+ &LHS, &RHS] () {
// A cmp instruction has valid unrelocated pointer operands only if
// both operands are unrelocated pointers.
// In the comparison between two pointers, if one is an unrelocated
@@ -545,12 +652,23 @@ void InstructionVerifier::verifyInstruction(
(baseTyLHS == BaseType::NonConstant &&
baseTyRHS == BaseType::ExclusivelySomeConstant))
return false;
+
+ // If one of pointers is poisoned and other is not exclusively derived
+ // from null it is an invalid expression: it produces poisoned result
+ // and unless we want to track all defs (not only gc pointers) the only
+ // option is to prohibit such instructions.
+ if ((Tracker->isValuePoisoned(LHS) && baseTyRHS != ExclusivelyNull) ||
+ (Tracker->isValuePoisoned(RHS) && baseTyLHS != ExclusivelyNull))
+ return false;
+
// All other cases are valid cases enumerated below:
- // 1. Comparison between an exlusively derived null pointer and a
+ // 1. Comparison between an exclusively derived null pointer and a
// constant base pointer.
- // 2. Comparison between an exlusively derived null pointer and a
+ // 2. Comparison between an exclusively derived null pointer and a
// non-constant unrelocated base pointer.
// 3. Comparison between 2 unrelocated pointers.
+ // 4. Comparison between a pointer exclusively derived from null and a
+ // non-constant poisoned pointer.
return true;
};
if (!hasValidUnrelocatedUse()) {
diff --git a/contrib/llvm/lib/LTO/LTOModule.cpp b/contrib/llvm/lib/LTO/LTOModule.cpp
index 51b4f225939f..626d2f5dc813 100644
--- a/contrib/llvm/lib/LTO/LTOModule.cpp
+++ b/contrib/llvm/lib/LTO/LTOModule.cpp
@@ -388,24 +388,20 @@ void LTOModule::addDefinedDataSymbol(StringRef Name, const GlobalValue *v) {
// from the ObjC data structures generated by the front end.
// special case if this data blob is an ObjC class definition
- std::string Section = v->getSection();
- if (Section.compare(0, 15, "__OBJC,__class,") == 0) {
- if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
- addObjCClass(gv);
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(v)) {
+ StringRef Section = GV->getSection();
+ if (Section.startswith("__OBJC,__class,")) {
+ addObjCClass(GV);
}
- }
- // special case if this data blob is an ObjC category definition
- else if (Section.compare(0, 18, "__OBJC,__category,") == 0) {
- if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
- addObjCCategory(gv);
+ // special case if this data blob is an ObjC category definition
+ else if (Section.startswith("__OBJC,__category,")) {
+ addObjCCategory(GV);
}
- }
- // special case if this data blob is the list of referenced classes
- else if (Section.compare(0, 18, "__OBJC,__cls_refs,") == 0) {
- if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
- addObjCClassRef(gv);
+ // special case if this data blob is the list of referenced classes
+ else if (Section.startswith("__OBJC,__cls_refs,")) {
+ addObjCClassRef(GV);
}
}
}
diff --git a/contrib/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/contrib/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
index c8b3892375f6..abcd8905ad35 100644
--- a/contrib/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/contrib/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -76,7 +76,7 @@ static void saveTempBitcode(const Module &TheModule, StringRef TempDir,
if (TempDir.empty())
return;
// User asked to save temps, let dump the bitcode file after import.
- std::string SaveTempPath = (TempDir + llvm::utostr(count) + Suffix).str();
+ std::string SaveTempPath = (TempDir + llvm::Twine(count) + Suffix).str();
std::error_code EC;
raw_fd_ostream OS(SaveTempPath, EC, sys::fs::F_None);
if (EC)
diff --git a/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
index 195ddc78d454..5bbf49290f17 100644
--- a/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
+++ b/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
@@ -1086,7 +1086,7 @@ bool DarwinAsmParser::parseVersionMin(StringRef Directive, SMLoc Loc,
return false;
}
-Triple::OSType getOSTypeFromPlatform(MachO::PlatformType Type) {
+static Triple::OSType getOSTypeFromPlatform(MachO::PlatformType Type) {
switch (Type) {
case MachO::PLATFORM_MACOS: return Triple::MacOSX;
case MachO::PLATFORM_IOS: return Triple::IOS;
diff --git a/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp
index 38720c23ff26..3e2150a451e0 100644
--- a/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp
@@ -423,13 +423,17 @@ bool ELFAsmParser::parseGroup(StringRef &GroupName) {
if (L.isNot(AsmToken::Comma))
return TokError("expected group name");
Lex();
- if (getParser().parseIdentifier(GroupName))
- return true;
+ if (L.is(AsmToken::Integer)) {
+ GroupName = getTok().getString();
+ Lex();
+ } else if (getParser().parseIdentifier(GroupName)) {
+ return TokError("invalid group name");
+ }
if (L.is(AsmToken::Comma)) {
Lex();
StringRef Linkage;
if (getParser().parseIdentifier(Linkage))
- return true;
+ return TokError("invalid linkage");
if (Linkage != "comdat")
return TokError("Linkage must be 'comdat'");
}
diff --git a/contrib/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp b/contrib/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
index ccc8cc56eb0a..8dbd58632f0e 100644
--- a/contrib/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
+++ b/contrib/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
@@ -628,7 +628,7 @@ CoverageMapping::getInstantiationGroups(StringRef Filename) const {
}
std::vector<InstantiationGroup> Result;
- for (const auto &InstantiationSet : InstantiationSetCollector) {
+ for (auto &InstantiationSet : InstantiationSetCollector) {
InstantiationGroup IG{InstantiationSet.first.first,
InstantiationSet.first.second,
std::move(InstantiationSet.second)};
diff --git a/contrib/llvm/lib/Support/ARMAttributeParser.cpp b/contrib/llvm/lib/Support/ARMAttributeParser.cpp
index 3d800eb7a96c..e39bddc4e8f2 100644
--- a/contrib/llvm/lib/Support/ARMAttributeParser.cpp
+++ b/contrib/llvm/lib/Support/ARMAttributeParser.cpp
@@ -666,7 +666,7 @@ void ARMAttributeParser::ParseSubsection(const uint8_t *Data, uint32_t Length) {
ParseIndexList(Data, Offset, Indicies);
break;
default:
- errs() << "unrecognised tag: 0x" << utohexstr(Tag) << '\n';
+ errs() << "unrecognised tag: 0x" << Twine::utohexstr(Tag) << '\n';
return;
}
diff --git a/contrib/llvm/lib/Support/CommandLine.cpp b/contrib/llvm/lib/Support/CommandLine.cpp
index b547a0932709..4caf4a4fdce0 100644
--- a/contrib/llvm/lib/Support/CommandLine.cpp
+++ b/contrib/llvm/lib/Support/CommandLine.cpp
@@ -688,7 +688,9 @@ static bool EatsUnboundedNumberOfValues(const Option *O) {
O->getNumOccurrencesFlag() == cl::OneOrMore;
}
-static bool isWhitespace(char C) { return strchr(" \t\n\r\f\v", C); }
+static bool isWhitespace(char C) {
+ return C == ' ' || C == '\t' || C == '\r' || C == '\n';
+}
static bool isQuote(char C) { return C == '\"' || C == '\''; }
@@ -709,17 +711,19 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver,
break;
}
+ char C = Src[I];
+
// Backslash escapes the next character.
- if (I + 1 < E && Src[I] == '\\') {
+ if (I + 1 < E && C == '\\') {
++I; // Skip the escape.
Token.push_back(Src[I]);
continue;
}
// Consume a quoted string.
- if (isQuote(Src[I])) {
- char Quote = Src[I++];
- while (I != E && Src[I] != Quote) {
+ if (isQuote(C)) {
+ ++I;
+ while (I != E && Src[I] != C) {
// Backslash escapes the next character.
if (Src[I] == '\\' && I + 1 != E)
++I;
@@ -732,7 +736,7 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver,
}
// End the token if this is whitespace.
- if (isWhitespace(Src[I])) {
+ if (isWhitespace(C)) {
if (!Token.empty())
NewArgv.push_back(Saver.save(StringRef(Token)).data());
Token.clear();
@@ -740,7 +744,7 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver,
}
// This is a normal character. Append it.
- Token.push_back(Src[I]);
+ Token.push_back(C);
}
// Append the last token after hitting EOF with no whitespace.
@@ -798,25 +802,27 @@ void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver,
// end of the source string.
enum { INIT, UNQUOTED, QUOTED } State = INIT;
for (size_t I = 0, E = Src.size(); I != E; ++I) {
+ char C = Src[I];
+
// INIT state indicates that the current input index is at the start of
// the string or between tokens.
if (State == INIT) {
- if (isWhitespace(Src[I])) {
+ if (isWhitespace(C)) {
// Mark the end of lines in response files
- if (MarkEOLs && Src[I] == '\n')
+ if (MarkEOLs && C == '\n')
NewArgv.push_back(nullptr);
continue;
}
- if (Src[I] == '"') {
+ if (C == '"') {
State = QUOTED;
continue;
}
- if (Src[I] == '\\') {
+ if (C == '\\') {
I = parseBackslash(Src, I, Token);
State = UNQUOTED;
continue;
}
- Token.push_back(Src[I]);
+ Token.push_back(C);
State = UNQUOTED;
continue;
}
@@ -825,38 +831,38 @@ void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver,
// quotes.
if (State == UNQUOTED) {
// Whitespace means the end of the token.
- if (isWhitespace(Src[I])) {
+ if (isWhitespace(C)) {
NewArgv.push_back(Saver.save(StringRef(Token)).data());
Token.clear();
State = INIT;
// Mark the end of lines in response files
- if (MarkEOLs && Src[I] == '\n')
+ if (MarkEOLs && C == '\n')
NewArgv.push_back(nullptr);
continue;
}
- if (Src[I] == '"') {
+ if (C == '"') {
State = QUOTED;
continue;
}
- if (Src[I] == '\\') {
+ if (C == '\\') {
I = parseBackslash(Src, I, Token);
continue;
}
- Token.push_back(Src[I]);
+ Token.push_back(C);
continue;
}
// QUOTED state means that it's reading a token quoted by double quotes.
if (State == QUOTED) {
- if (Src[I] == '"') {
+ if (C == '"') {
State = UNQUOTED;
continue;
}
- if (Src[I] == '\\') {
+ if (C == '\\') {
I = parseBackslash(Src, I, Token);
continue;
}
- Token.push_back(Src[I]);
+ Token.push_back(C);
}
}
// Append the last token after hitting EOF with no whitespace.
diff --git a/contrib/llvm/lib/TableGen/Main.cpp b/contrib/llvm/lib/TableGen/Main.cpp
index fc9d0cc08885..be35f894cccd 100644
--- a/contrib/llvm/lib/TableGen/Main.cpp
+++ b/contrib/llvm/lib/TableGen/Main.cpp
@@ -110,7 +110,7 @@ int llvm::TableGenMain(char *argv0, TableGenMainFn *MainFn) {
return 1;
if (ErrorsPrinted > 0)
- return reportError(argv0, utostr(ErrorsPrinted) + " errors.\n");
+ return reportError(argv0, Twine(ErrorsPrinted) + " errors.\n");
// Declare success.
Out.keep();
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index abbba7d1d5a9..40836b00b9e6 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -3673,15 +3673,6 @@ static bool getFMAPatterns(MachineInstr &Root,
}
break;
case AArch64::FSUBv2f32:
- if (canCombineWithFMUL(MBB, Root.getOperand(1),
- AArch64::FMULv2i32_indexed)) {
- Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP1);
- Found = true;
- } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
- AArch64::FMULv2f32)) {
- Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP1);
- Found = true;
- }
if (canCombineWithFMUL(MBB, Root.getOperand(2),
AArch64::FMULv2i32_indexed)) {
Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2);
@@ -3691,17 +3682,17 @@ static bool getFMAPatterns(MachineInstr &Root,
Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP2);
Found = true;
}
- break;
- case AArch64::FSUBv2f64:
if (canCombineWithFMUL(MBB, Root.getOperand(1),
- AArch64::FMULv2i64_indexed)) {
- Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP1);
+ AArch64::FMULv2i32_indexed)) {
+ Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP1);
Found = true;
} else if (canCombineWithFMUL(MBB, Root.getOperand(1),
- AArch64::FMULv2f64)) {
- Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP1);
+ AArch64::FMULv2f32)) {
+ Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP1);
Found = true;
}
+ break;
+ case AArch64::FSUBv2f64:
if (canCombineWithFMUL(MBB, Root.getOperand(2),
AArch64::FMULv2i64_indexed)) {
Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2);
@@ -3711,17 +3702,17 @@ static bool getFMAPatterns(MachineInstr &Root,
Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP2);
Found = true;
}
- break;
- case AArch64::FSUBv4f32:
if (canCombineWithFMUL(MBB, Root.getOperand(1),
- AArch64::FMULv4i32_indexed)) {
- Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP1);
+ AArch64::FMULv2i64_indexed)) {
+ Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP1);
Found = true;
} else if (canCombineWithFMUL(MBB, Root.getOperand(1),
- AArch64::FMULv4f32)) {
- Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP1);
+ AArch64::FMULv2f64)) {
+ Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP1);
Found = true;
}
+ break;
+ case AArch64::FSUBv4f32:
if (canCombineWithFMUL(MBB, Root.getOperand(2),
AArch64::FMULv4i32_indexed)) {
Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2);
@@ -3731,6 +3722,15 @@ static bool getFMAPatterns(MachineInstr &Root,
Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP2);
Found = true;
}
+ if (canCombineWithFMUL(MBB, Root.getOperand(1),
+ AArch64::FMULv4i32_indexed)) {
+ Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP1);
+ Found = true;
+ } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
+ AArch64::FMULv4f32)) {
+ Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP1);
+ Found = true;
+ }
break;
}
return Found;
@@ -5062,4 +5062,4 @@ MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
It = MBB.insert(It, LDRXpost);
return It;
-} \ No newline at end of file
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
index 8b6c571dee02..740861851185 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
@@ -27,6 +27,8 @@
using namespace llvm;
+namespace {
+
// --------------------------------------------------------------------
// Implementation of permutation networks.
@@ -147,6 +149,7 @@ private:
void build();
bool color();
};
+} // namespace
std::pair<bool,uint8_t> Coloring::getUniqueColor(const NodeSet &Nodes) {
uint8_t Color = None;
@@ -300,6 +303,7 @@ void Coloring::dump() const {
dbgs() << " }\n}\n";
}
+namespace {
// Base class of for reordering networks. They don't strictly need to be
// permutations, as outputs with repeated occurrences of an input element
// are allowed.
@@ -408,7 +412,7 @@ struct BenesNetwork : public PermNetwork {
private:
bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step);
};
-
+} // namespace
bool ForwardDeltaNetwork::route(ElemType *P, RowType *T, unsigned Size,
unsigned Step) {
@@ -602,6 +606,7 @@ bool BenesNetwork::route(ElemType *P, RowType *T, unsigned Size,
// Support for building selection results (output instructions that are
// parts of the final selection).
+namespace {
struct OpRef {
OpRef(SDValue V) : OpV(V) {}
bool isValue() const { return OpV.getNode() != nullptr; }
@@ -689,6 +694,7 @@ struct ResultStack {
void print(raw_ostream &OS, const SelectionDAG &G) const;
};
+} // namespace
void OpRef::print(raw_ostream &OS, const SelectionDAG &G) const {
if (isValue()) {
@@ -740,6 +746,7 @@ void ResultStack::print(raw_ostream &OS, const SelectionDAG &G) const {
}
}
+namespace {
struct ShuffleMask {
ShuffleMask(ArrayRef<int> M) : Mask(M) {
for (unsigned I = 0, E = Mask.size(); I != E; ++I) {
@@ -763,6 +770,7 @@ struct ShuffleMask {
return ShuffleMask(Mask.take_back(H));
}
};
+} // namespace
// --------------------------------------------------------------------
// The HvxSelector class.
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 2aa395642c40..753cfff4cdae 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -1797,11 +1797,7 @@ void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
llvm_unreachable("unsupported fp type");
APInt API = APF.bitcastToAPInt();
- std::string hexstr(utohexstr(API.getZExtValue()));
- O << lead;
- if (hexstr.length() < numHex)
- O << std::string(numHex - hexstr.length(), '0');
- O << utohexstr(API.getZExtValue());
+ O << lead << format_hex_no_prefix(API.getZExtValue(), numHex, /*Upper=*/true);
}
void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.cpp
index 86a28f7d0700..a754a6a36dab 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.cpp
@@ -11,6 +11,7 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/Support/Format.h"
using namespace llvm;
#define DEBUG_TYPE "nvptx-mcexpr"
@@ -47,10 +48,7 @@ void NVPTXFloatMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
}
APInt API = APF.bitcastToAPInt();
- std::string HexStr(utohexstr(API.getZExtValue()));
- if (HexStr.length() < NumHex)
- OS << std::string(NumHex - HexStr.length(), '0');
- OS << utohexstr(API.getZExtValue());
+ OS << format_hex_no_prefix(API.getZExtValue(), NumHex, /*Upper=*/true);
}
const NVPTXGenericMCSymbolRefExpr*
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
index d19463ccb51f..204d97cbdd44 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
@@ -190,7 +190,7 @@ void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) {
if (isVerbose()) {
OutStreamer->AddComment("fallthrough-return: $pop" +
- utostr(MFI->getWARegStackId(
+ Twine(MFI->getWARegStackId(
MFI->getWAReg(MI->getOperand(0).getReg()))));
OutStreamer->AddBlankLine();
}
diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 87c65347e334..f1ce430f3323 100644
--- a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -2377,10 +2377,11 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
Flags |= Prefix;
Name = Parser.getTok().getString();
Parser.Lex(); // eat the prefix
- // Hack: we could have something like
+ // Hack: we could have something like "rep # some comment" or
// "lock; cmpxchg16b $1" or "lock\0A\09incl" or "lock/incl"
while (Name.startswith(";") || Name.startswith("\n") ||
- Name.startswith("\t") || Name.startswith("/")) {
+ Name.startswith("#") || Name.startswith("\t") ||
+ Name.startswith("/")) {
Name = Parser.getTok().getString();
Parser.Lex(); // go to next prefix or instr
}
diff --git a/contrib/llvm/lib/Target/X86/X86.td b/contrib/llvm/lib/Target/X86/X86.td
index 7e7c35569093..ba998467b799 100644
--- a/contrib/llvm/lib/Target/X86/X86.td
+++ b/contrib/llvm/lib/Target/X86/X86.td
@@ -739,7 +739,8 @@ def ICLFeatures : ProcessorFeatures<CNLFeatures.Value, [
FeatureVNNI,
FeatureVPCLMULQDQ,
FeatureVPOPCNTDQ,
- FeatureGFNI
+ FeatureGFNI,
+ FeatureCLWB
]>;
class IcelakeProc<string Name> : ProcModel<Name, SkylakeServerModel,
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
index 5ac5d0348f8a..9edd799779c7 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1310,8 +1310,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal);
-
- setOperationAction(ISD::MUL, MVT::v8i64, Legal);
}
if (Subtarget.hasCDI()) {
@@ -1388,8 +1386,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UINT_TO_FP, VT, Legal);
setOperationAction(ISD::FP_TO_SINT, VT, Legal);
setOperationAction(ISD::FP_TO_UINT, VT, Legal);
-
- setOperationAction(ISD::MUL, VT, Legal);
}
}
@@ -7108,8 +7104,8 @@ static SDValue ConvertI1VectorToInteger(SDValue Op, SelectionDAG &DAG) {
return DAG.getConstant(Immediate, dl, VT);
}
// Lower BUILD_VECTOR operation for v8i1 and v16i1 types.
-SDValue
-X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
+static SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
MVT VT = Op.getSimpleValueType();
assert((VT.getVectorElementType() == MVT::i1) &&
@@ -7131,8 +7127,8 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
DAG.getBuildVector(MVT::v32i1, dl, Op.getNode()->ops().slice(32, 32));
// We have to manually lower both halves so getNode doesn't try to
// reassemble the build_vector.
- Lower = LowerBUILD_VECTORvXi1(Lower, DAG);
- Upper = LowerBUILD_VECTORvXi1(Upper, DAG);
+ Lower = LowerBUILD_VECTORvXi1(Lower, DAG, Subtarget);
+ Upper = LowerBUILD_VECTORvXi1(Upper, DAG, Subtarget);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lower, Upper);
}
SDValue Imm = ConvertI1VectorToInteger(Op, DAG);
@@ -7881,7 +7877,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// Generate vectors for predicate vectors.
if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512())
- return LowerBUILD_VECTORvXi1(Op, DAG);
+ return LowerBUILD_VECTORvXi1(Op, DAG, Subtarget);
if (SDValue VectorConstant = materializeVectorConstant(Op, DAG, Subtarget))
return VectorConstant;
@@ -15543,7 +15539,6 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (SrcVT.isVector()) {
if (SrcVT == MVT::v2i32 && VT == MVT::v2f64) {
return DAG.getNode(X86ISD::CVTSI2P, dl, VT,
@@ -15551,9 +15546,15 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
DAG.getUNDEF(SrcVT)));
}
if (SrcVT.getVectorElementType() == MVT::i1) {
- if (SrcVT == MVT::v2i1 && TLI.isTypeLegal(SrcVT))
- return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
- DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v2i64, Src));
+ if (SrcVT == MVT::v2i1) {
+ // For v2i1, we need to widen to v4i1 first.
+ assert(VT == MVT::v2f64 && "Unexpected type");
+ Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Src,
+ DAG.getUNDEF(MVT::v2i1));
+ return DAG.getNode(X86ISD::CVTSI2P, dl, Op.getValueType(),
+ DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Src));
+ }
+
MVT IntegerVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements());
return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
DAG.getNode(ISD::SIGN_EXTEND, dl, IntegerVT, Src));
@@ -15653,8 +15654,8 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
}
/// 64-bit unsigned integer to double expansion.
-SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
- SelectionDAG &DAG) const {
+static SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
// This algorithm is not obvious. Here it is what we're trying to output:
/*
movq %rax, %xmm0
@@ -15674,7 +15675,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
// Build some magic constants.
static const uint32_t CV0[] = { 0x43300000, 0x45300000, 0, 0 };
Constant *C0 = ConstantDataVector::get(*Context, CV0);
- auto PtrVT = getPointerTy(DAG.getDataLayout());
+ auto PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
SDValue CPIdx0 = DAG.getConstantPool(C0, PtrVT, 16);
SmallVector<Constant*,2> CV1;
@@ -15721,8 +15722,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
}
/// 32-bit unsigned integer to float expansion.
-SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op,
- SelectionDAG &DAG) const {
+static SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
SDLoc dl(Op);
// FP constant to bias correct the final result.
SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), dl,
@@ -15755,16 +15756,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op,
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias);
// Handle final rounding.
- MVT DestVT = Op.getSimpleValueType();
-
- if (DestVT.bitsLT(MVT::f64))
- return DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub,
- DAG.getIntPtrConstant(0, dl));
- if (DestVT.bitsGT(MVT::f64))
- return DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub);
-
- // Handle final rounding.
- return Sub;
+ return DAG.getFPExtendOrRound(Sub, dl, Op.getSimpleValueType());
}
static SDValue lowerUINT_TO_FP_v2i32(SDValue Op, SelectionDAG &DAG,
@@ -15896,16 +15888,22 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(ISD::FADD, DL, VecFloatVT, LowBitcast, FHigh);
}
-SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op,
- SelectionDAG &DAG) const {
+static SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
SDValue N0 = Op.getOperand(0);
MVT SrcVT = N0.getSimpleValueType();
SDLoc dl(Op);
if (SrcVT.getVectorElementType() == MVT::i1) {
- if (SrcVT == MVT::v2i1)
- return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(),
- DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64, N0));
+ if (SrcVT == MVT::v2i1) {
+ // For v2i1, we need to widen to v4i1 first.
+ assert(Op.getValueType() == MVT::v2f64 && "Unexpected type");
+ N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, N0,
+ DAG.getUNDEF(MVT::v2i1));
+ return DAG.getNode(X86ISD::CVTUI2P, dl, MVT::v2f64,
+ DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0));
+ }
+
MVT IntegerVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements());
return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(),
DAG.getNode(ISD::ZERO_EXTEND, dl, IntegerVT, N0));
@@ -15930,7 +15928,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
auto PtrVT = getPointerTy(DAG.getDataLayout());
if (Op.getSimpleValueType().isVector())
- return lowerUINT_TO_FP_vec(Op, DAG);
+ return lowerUINT_TO_FP_vec(Op, DAG, Subtarget);
MVT SrcVT = N0.getSimpleValueType();
MVT DstVT = Op.getSimpleValueType();
@@ -15943,9 +15941,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
}
if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64)
- return LowerUINT_TO_FP_i64(Op, DAG);
+ return LowerUINT_TO_FP_i64(Op, DAG, Subtarget);
if (SrcVT == MVT::i32 && X86ScalarSSEf64)
- return LowerUINT_TO_FP_i32(Op, DAG);
+ return LowerUINT_TO_FP_i32(Op, DAG, Subtarget);
if (Subtarget.is64Bit() && SrcVT == MVT::i64 && DstVT == MVT::f32)
return SDValue();
@@ -16283,7 +16281,7 @@ static SDValue LowerZERO_EXTEND_Mask(SDValue Op,
// Truncate if we had to extend i16/i8 above.
if (VT != ExtVT) {
WideVT = MVT::getVectorVT(VT.getVectorElementType(), NumElts);
- SelectedVal = DAG.getNode(X86ISD::VTRUNC, DL, WideVT, SelectedVal);
+ SelectedVal = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SelectedVal);
}
// Extract back to 128/256-bit if we widened.
@@ -18428,7 +18426,7 @@ static SDValue LowerSIGN_EXTEND_Mask(SDValue Op,
// Truncate if we had to extend i16/i8 above.
if (VT != ExtVT) {
WideVT = MVT::getVectorVT(VTElt, NumElts);
- V = DAG.getNode(X86ISD::VTRUNC, dl, WideVT, V);
+ V = DAG.getNode(ISD::TRUNCATE, dl, WideVT, V);
}
// Extract back to 128/256-bit if we widened.
@@ -18681,6 +18679,14 @@ static SDValue LowerExtended1BitVectorLoad(SDValue Op,
// Replace chain users with the new chain.
assert(Load->getNumValues() == 2 && "Loads must carry a chain!");
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1));
+ if (Subtarget.hasVLX()) {
+ // Extract to v4i1/v2i1.
+ SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MemVT, Load,
+ DAG.getIntPtrConstant(0, dl));
+ // Finally, do a normal sign-extend to the desired register.
+ return DAG.getNode(ExtOpcode, dl, Op.getValueType(), Extract);
+ }
+
MVT ExtVT = MVT::getVectorVT(VT.getScalarType(), 8);
SDValue ExtVec = DAG.getNode(ExtOpcode, dl, ExtVT, Load);
@@ -18700,22 +18706,25 @@ static SDValue LowerExtended1BitVectorLoad(SDValue Op,
if (NumElts <= 8) {
// A subset, assume that we have only AVX-512F
- unsigned NumBitsToLoad = 8;
- MVT TypeToLoad = MVT::getIntegerVT(NumBitsToLoad);
- SDValue Load = DAG.getLoad(TypeToLoad, dl, Ld->getChain(),
+ SDValue Load = DAG.getLoad(MVT::i8, dl, Ld->getChain(),
Ld->getBasePtr(),
Ld->getMemOperand());
// Replace chain users with the new chain.
assert(Load->getNumValues() == 2 && "Loads must carry a chain!");
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1));
- MVT MaskVT = MVT::getVectorVT(MVT::i1, NumBitsToLoad);
- SDValue BitVec = DAG.getBitcast(MaskVT, Load);
+ SDValue BitVec = DAG.getBitcast(MVT::v8i1, Load);
if (NumElts == 8)
return DAG.getNode(ExtOpcode, dl, VT, BitVec);
- // we should take care to v4i1 and v2i1
+ if (Subtarget.hasVLX()) {
+ // Extract to v4i1/v2i1.
+ SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MemVT, BitVec,
+ DAG.getIntPtrConstant(0, dl));
+ // Finally, do a normal sign-extend to the desired register.
+ return DAG.getNode(ExtOpcode, dl, Op.getValueType(), Extract);
+ }
MVT ExtVT = MVT::getVectorVT(VT.getScalarType(), 8);
SDValue ExtVec = DAG.getNode(ExtOpcode, dl, ExtVT, BitVec);
@@ -18730,13 +18739,12 @@ static SDValue LowerExtended1BitVectorLoad(SDValue Op,
Ld->getBasePtr(),
Ld->getMemOperand());
- SDValue BasePtrHi =
- DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
- DAG.getConstant(2, dl, BasePtr.getValueType()));
+ SDValue BasePtrHi = DAG.getMemBasePlusOffset(BasePtr, 2, dl);
- SDValue LoadHi = DAG.getLoad(MVT::v16i1, dl, Ld->getChain(),
- BasePtrHi,
- Ld->getMemOperand());
+ SDValue LoadHi = DAG.getLoad(MVT::v16i1, dl, Ld->getChain(), BasePtrHi,
+ Ld->getPointerInfo().getWithOffset(2),
+ MinAlign(Ld->getAlignment(), 2U),
+ Ld->getMemOperand()->getFlags());
SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
LoadLo.getValue(1), LoadHi.getValue(1));
@@ -22086,7 +22094,14 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
// Lower v4i32 mul as 2x shuffle, 2x pmuludq, 2x shuffle.
if (VT == MVT::v4i32) {
assert(Subtarget.hasSSE2() && !Subtarget.hasSSE41() &&
- "Should not custom lower when pmuldq is available!");
+ "Should not custom lower when pmulld is available!");
+
+ // If the upper 17 bits of each element are zero then we can use PMADD.
+ APInt Mask17 = APInt::getHighBitsSet(32, 17);
+ if (DAG.MaskedValueIsZero(A, Mask17) && DAG.MaskedValueIsZero(B, Mask17))
+ return DAG.getNode(X86ISD::VPMADDWD, dl, VT,
+ DAG.getBitcast(MVT::v8i16, A),
+ DAG.getBitcast(MVT::v8i16, B));
// Extract the odd parts.
static const int UnpackMask[] = { 1, -1, 3, -1 };
@@ -22138,6 +22153,11 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
bool AHiIsZero = DAG.MaskedValueIsZero(A, UpperBitsMask);
bool BHiIsZero = DAG.MaskedValueIsZero(B, UpperBitsMask);
+ // If DQI is supported we can use MULLQ, but MULUDQ is still better if the
+ // the high bits are known to be zero.
+ if (Subtarget.hasDQI() && (!AHiIsZero || !BHiIsZero))
+ return Op;
+
// Bit cast to 32-bit vectors for MULUDQ.
SDValue Alo = DAG.getBitcast(MulVT, A);
SDValue Blo = DAG.getBitcast(MulVT, B);
@@ -31001,8 +31021,8 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
DAG.ReplaceAllUsesOfValueWith(SDValue(Extract, 0), Vals[IdxVal]);
}
- // The replacement was made in place; don't return anything.
- return SDValue();
+ // The replacement was made in place; return N so it won't be revisited.
+ return SDValue(N, 0);
}
/// If a vector select has an operand that is -1 or 0, try to simplify the
@@ -32256,6 +32276,13 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG,
if ((NumElts % 2) != 0)
return SDValue();
+ // If the upper 17 bits of each element are zero then we can use PMADD.
+ APInt Mask17 = APInt::getHighBitsSet(32, 17);
+ if (VT == MVT::v4i32 && DAG.MaskedValueIsZero(N0, Mask17) &&
+ DAG.MaskedValueIsZero(N1, Mask17))
+ return DAG.getNode(X86ISD::VPMADDWD, DL, VT, DAG.getBitcast(MVT::v8i16, N0),
+ DAG.getBitcast(MVT::v8i16, N1));
+
unsigned RegSize = 128;
MVT OpsVT = MVT::getVectorVT(MVT::i16, RegSize / 16);
EVT ReducedVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, NumElts);
@@ -33047,10 +33074,8 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG,
// The right side has to be a 'trunc' or a constant vector.
bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE &&
N1.getOperand(0).getValueType() == VT;
- ConstantSDNode *RHSConstSplat = nullptr;
- if (auto *RHSBV = dyn_cast<BuildVectorSDNode>(N1))
- RHSConstSplat = RHSBV->getConstantSplatNode();
- if (!RHSTrunc && !RHSConstSplat)
+ if (!RHSTrunc &&
+ !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()))
return SDValue();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -33060,13 +33085,10 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG,
// Set N0 and N1 to hold the inputs to the new wide operation.
N0 = N0->getOperand(0);
- if (RHSConstSplat) {
- N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, VT.getVectorElementType(),
- SDValue(RHSConstSplat, 0));
- N1 = DAG.getSplatBuildVector(VT, DL, N1);
- } else if (RHSTrunc) {
+ if (RHSTrunc)
N1 = N1->getOperand(0);
- }
+ else
+ N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N1);
// Generate the wide operation.
SDValue Op = DAG.getNode(Narrow->getOpcode(), DL, VT, N0, N1);
@@ -34039,15 +34061,14 @@ static SDValue combineLoad(SDNode *N, SelectionDAG &DAG,
Ptr = DAG.getMemBasePlusOffset(Ptr, 16, dl);
SDValue Load2 =
- DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
- std::min(16U, Alignment), Ld->getMemOperand()->getFlags());
+ DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr,
+ Ld->getPointerInfo().getWithOffset(16),
+ MinAlign(Alignment, 16U), Ld->getMemOperand()->getFlags());
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
Load1.getValue(1),
Load2.getValue(1));
- SDValue NewVec = DAG.getUNDEF(RegVT);
- NewVec = insert128BitVector(NewVec, Load1, 0, DAG, dl);
- NewVec = insert128BitVector(NewVec, Load2, NumElems / 2, DAG, dl);
+ SDValue NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, RegVT, Load1, Load2);
return DCI.CombineTo(N, NewVec, TF, true);
}
@@ -34453,8 +34474,9 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
DAG.getStore(St->getChain(), dl, Value0, Ptr0, St->getPointerInfo(),
Alignment, St->getMemOperand()->getFlags());
SDValue Ch1 =
- DAG.getStore(St->getChain(), dl, Value1, Ptr1, St->getPointerInfo(),
- std::min(16U, Alignment), St->getMemOperand()->getFlags());
+ DAG.getStore(St->getChain(), dl, Value1, Ptr1,
+ St->getPointerInfo().getWithOffset(16),
+ MinAlign(Alignment, 16U), St->getMemOperand()->getFlags());
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1);
}
@@ -34876,7 +34898,7 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
// X86 is rubbish at scalar and vector i64 multiplies (until AVX512DQ) - its
// better to truncate if we have the chance.
if (SrcVT.getScalarType() == MVT::i64 && TLI.isOperationLegal(Opcode, VT) &&
- !TLI.isOperationLegal(Opcode, SrcVT))
+ !Subtarget.hasDQI())
return TruncateArithmetic(Src.getOperand(0), Src.getOperand(1));
LLVM_FALLTHROUGH;
case ISD::ADD: {
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.h b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
index 8464081b1b08..7708f577ba70 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1167,7 +1167,6 @@ namespace llvm {
bool isReplace) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
@@ -1183,9 +1182,6 @@ namespace llvm {
SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrAVX512.td b/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
index 2a2286e42405..dcd84930741b 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -4420,12 +4420,12 @@ defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus,
defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus,
SSE_INTALU_ITINS_P, HasBWI, 0>;
defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
- SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
+ SSE_INTMUL_ITINS_P, HasAVX512, 1>, T8PD;
defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
- SSE_INTALU_ITINS_P, HasBWI, 1>;
+ SSE_INTMUL_ITINS_P, HasBWI, 1>;
defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
- SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD;
-defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SSE_INTALU_ITINS_P,
+ SSE_INTMUL_ITINS_P, HasDQI, 1>, T8PD;
+defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SSE_INTMUL_ITINS_P,
HasBWI, 1>;
defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SSE_INTMUL_ITINS_P,
HasBWI, 1>;
@@ -4454,7 +4454,7 @@ multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, OpndItins itins,
}
}
-defm VPMULDQ : avx512_binop_all<0x28, "vpmuldq", SSE_INTALU_ITINS_P,
+defm VPMULDQ : avx512_binop_all<0x28, "vpmuldq", SSE_INTMUL_ITINS_P,
avx512vl_i32_info, avx512vl_i64_info,
X86pmuldq, HasAVX512, 1>,T8PD;
defm VPMULUDQ : avx512_binop_all<0xF4, "vpmuludq", SSE_INTMUL_ITINS_P,
@@ -8704,17 +8704,6 @@ def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
IIC_SSE_MOV_S_RR>, EVEX, Sched<[WriteMove]>;
}
-// Use 512bit version to implement 128/256 bit in case NoVLX.
-multiclass avx512_convert_mask_to_vector_lowering<X86VectorVTInfo X86Info,
- X86VectorVTInfo _> {
-
- def : Pat<(X86Info.VT (X86vsext (X86Info.KVT X86Info.KRC:$src))),
- (X86Info.VT (EXTRACT_SUBREG
- (_.VT (!cast<Instruction>(NAME#"Zrr")
- (_.KVT (COPY_TO_REGCLASS X86Info.KRC:$src,_.KRC)))),
- X86Info.SubRegIdx))>;
-}
-
multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
string OpcodeStr, Predicate prd> {
let Predicates = [prd] in
@@ -8724,11 +8713,6 @@ let Predicates = [prd] in
defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
}
-let Predicates = [prd, NoVLX] in {
- defm Z256_Alt : avx512_convert_mask_to_vector_lowering<VTInfo.info256,VTInfo.info512>;
- defm Z128_Alt : avx512_convert_mask_to_vector_lowering<VTInfo.info128,VTInfo.info512>;
- }
-
}
defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFPStack.td b/contrib/llvm/lib/Target/X86/X86InstrFPStack.td
index 7e89a4111d86..619b399ef8d8 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrFPStack.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrFPStack.td
@@ -141,6 +141,7 @@ def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2), TwoArgFP,
// These instructions cannot address 80-bit memory.
multiclass FPBinary<SDNode OpNode, Format fp, string asmstring,
bit Forward = 1> {
+let mayLoad = 1, hasSideEffects = 1 in {
// ST(0) = ST(0) + [mem]
def _Fp32m : FpIf32<(outs RFP32:$dst),
(ins RFP32:$src1, f32mem:$src2), OneArgFPRW,
@@ -177,10 +178,8 @@ def _Fp80m64: FpI_<(outs RFP80:$dst),
(OpNode RFP80:$src1, (f80 (extloadf64 addr:$src2)))),
(set RFP80:$dst,
(OpNode (f80 (extloadf64 addr:$src2)), RFP80:$src1)))]>;
-let mayLoad = 1 in
def _F32m : FPI<0xD8, fp, (outs), (ins f32mem:$src),
!strconcat("f", asmstring, "{s}\t$src")>;
-let mayLoad = 1 in
def _F64m : FPI<0xDC, fp, (outs), (ins f64mem:$src),
!strconcat("f", asmstring, "{l}\t$src")>;
// ST(0) = ST(0) + [memint]
@@ -226,12 +225,11 @@ def _FpI32m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i32mem:$src2),
(OpNode RFP80:$src1, (X86fild addr:$src2, i32))),
(set RFP80:$dst,
(OpNode (X86fild addr:$src2, i32), RFP80:$src1)))]>;
-let mayLoad = 1 in
def _FI16m : FPI<0xDE, fp, (outs), (ins i16mem:$src),
!strconcat("fi", asmstring, "{s}\t$src")>;
-let mayLoad = 1 in
def _FI32m : FPI<0xDA, fp, (outs), (ins i32mem:$src),
!strconcat("fi", asmstring, "{l}\t$src")>;
+} // mayLoad = 1, hasSideEffects = 1
}
let Defs = [FPSW] in {
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.td b/contrib/llvm/lib/Target/X86/X86InstrInfo.td
index fdf3e73e4fcd..27c67500b26f 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.td
@@ -832,9 +832,11 @@ def NoVLX : Predicate<"!Subtarget->hasVLX()">;
def NoVLX_Or_NoBWI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasBWI()">;
def NoVLX_Or_NoDQI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasDQI()">;
def PKU : Predicate<"Subtarget->hasPKU()">;
-def HasVNNI : Predicate<"Subtarget->hasVNNI()">;
+def HasVNNI : Predicate<"Subtarget->hasVNNI()">,
+ AssemblerPredicate<"FeatureVNNI", "AVX-512 VNNI ISA">;
-def HasBITALG : Predicate<"Subtarget->hasBITALG()">;
+def HasBITALG : Predicate<"Subtarget->hasBITALG()">,
+ AssemblerPredicate<"FeatureBITALG", "AVX-512 BITALG ISA">;
def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">;
def HasAES : Predicate<"Subtarget->hasAES()">;
def HasVAES : Predicate<"Subtarget->hasVAES()">;
@@ -866,7 +868,8 @@ def HasBMI2 : Predicate<"Subtarget->hasBMI2()">;
def NoBMI2 : Predicate<"!Subtarget->hasBMI2()">;
def HasVBMI : Predicate<"Subtarget->hasVBMI()">,
AssemblerPredicate<"FeatureVBMI", "AVX-512 VBMI ISA">;
-def HasVBMI2 : Predicate<"Subtarget->hasVBMI2()">;
+def HasVBMI2 : Predicate<"Subtarget->hasVBMI2()">,
+ AssemblerPredicate<"FeatureVBMI2", "AVX-512 VBMI2 ISA">;
def HasIFMA : Predicate<"Subtarget->hasIFMA()">,
AssemblerPredicate<"FeatureIFMA", "AVX-512 IFMA ISA">;
def HasRTM : Predicate<"Subtarget->hasRTM()">;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrSSE.td b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
index b48fa1841979..cb84f9aecf79 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
@@ -3734,7 +3734,7 @@ multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>,
+ [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))], itins.rr>,
Sched<[itins.Sched]>;
def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2),
@@ -3742,8 +3742,8 @@ multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1),
- (bitconvert (memop_frag addr:$src2)))))]>,
- Sched<[itins.Sched.Folded, ReadAfterLd]>;
+ (bitconvert (memop_frag addr:$src2)))))],
+ itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
} // ExeDomain = SSEPackedInt
@@ -6313,7 +6313,7 @@ multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>,
+ [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>,
Sched<[itins.Sched]>;
def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2),
@@ -6321,8 +6321,8 @@ multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst,
- (OpVT (OpNode RC:$src1, (bitconvert (memop_frag addr:$src2)))))]>,
- Sched<[itins.Sched.Folded, ReadAfterLd]>;
+ (OpVT (OpNode RC:$src1, (bitconvert (memop_frag addr:$src2)))))],
+ itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
/// SS48I_binop_rm2 - Simple SSE41 binary operator with different src and dst
@@ -6338,7 +6338,7 @@ multiclass SS48I_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>,
+ [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))], itins.rr>,
Sched<[itins.Sched]>;
def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2),
@@ -6346,8 +6346,8 @@ multiclass SS48I_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1),
- (bitconvert (memop_frag addr:$src2)))))]>,
- Sched<[itins.Sched.Folded, ReadAfterLd]>;
+ (bitconvert (memop_frag addr:$src2)))))],
+ itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
let Predicates = [HasAVX, NoVLX] in {
@@ -6924,14 +6924,15 @@ multiclass SS42I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, Sched<[itins.Sched]>;
+ [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>,
+ Sched<[itins.Sched]>;
def rm : SS428I<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst,
- (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>,
+ (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))], itins.rm>,
Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
diff --git a/contrib/llvm/lib/Target/X86/X86WinEHState.cpp b/contrib/llvm/lib/Target/X86/X86WinEHState.cpp
index 0472a85f50da..6d6dedc60736 100644
--- a/contrib/llvm/lib/Target/X86/X86WinEHState.cpp
+++ b/contrib/llvm/lib/Target/X86/X86WinEHState.cpp
@@ -149,6 +149,12 @@ void WinEHStatePass::getAnalysisUsage(AnalysisUsage &AU) const {
}
bool WinEHStatePass::runOnFunction(Function &F) {
+ // Don't insert state stores or exception handler thunks for
+ // available_externally functions. The handler needs to reference the LSDA,
+ // which will not be emitted in this case.
+ if (F.hasAvailableExternallyLinkage())
+ return false;
+
// Check the personality. Do nothing if this personality doesn't use funclets.
if (!F.hasPersonalityFn())
return false;
diff --git a/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp b/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 7086c2eb52c4..a69c009e1a54 100644
--- a/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -181,8 +181,9 @@ public:
StringRef Name, bool IsThinLTOPreLink,
std::function<AssumptionCache &(Function &)> GetAssumptionCache,
std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo)
- : GetAC(GetAssumptionCache), GetTTI(GetTargetTransformInfo),
- Filename(Name), IsThinLTOPreLink(IsThinLTOPreLink) {}
+ : GetAC(std::move(GetAssumptionCache)),
+ GetTTI(std::move(GetTargetTransformInfo)), Filename(Name),
+ IsThinLTOPreLink(IsThinLTOPreLink) {}
bool doInitialization(Module &M);
bool runOnModule(Module &M, ModuleAnalysisManager *AM);
@@ -1547,14 +1548,14 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM) {
// Populate the symbol map.
for (const auto &N_F : M.getValueSymbolTable()) {
- std::string OrigName = N_F.getKey();
+ StringRef OrigName = N_F.getKey();
Function *F = dyn_cast<Function>(N_F.getValue());
if (F == nullptr)
continue;
SymbolMap[OrigName] = F;
auto pos = OrigName.find('.');
- if (pos != std::string::npos) {
- std::string NewName = OrigName.substr(0, pos);
+ if (pos != StringRef::npos) {
+ StringRef NewName = OrigName.substr(0, pos);
auto r = SymbolMap.insert(std::make_pair(NewName, F));
// Failiing to insert means there is already an entry in SymbolMap,
// thus there are multiple functions that are mapped to the same
diff --git a/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index 945133074059..caffc03339c4 100644
--- a/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -90,8 +90,7 @@ void promoteTypeIds(Module &M, StringRef ModuleId) {
if (isa<MDNode>(MD) && cast<MDNode>(MD)->isDistinct()) {
Metadata *&GlobalMD = LocalToGlobal[MD];
if (!GlobalMD) {
- std::string NewName =
- (to_string(LocalToGlobal.size()) + ModuleId).str();
+ std::string NewName = (Twine(LocalToGlobal.size()) + ModuleId).str();
GlobalMD = MDString::get(M.getContext(), NewName);
}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index a088d447337f..40e52ee755e5 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1802,9 +1802,7 @@ Instruction *InstCombiner::visitVACopyInst(VACopyInst &I) {
/// instructions. For normal calls, it allows visitCallSite to do the heavy
/// lifting.
Instruction *InstCombiner::visitCallInst(CallInst &CI) {
- auto Args = CI.arg_operands();
- if (Value *V = SimplifyCall(&CI, CI.getCalledValue(), Args.begin(),
- Args.end(), SQ.getWithInstruction(&CI)))
+ if (Value *V = SimplifyCall(&CI, SQ.getWithInstruction(&CI)))
return replaceInstUsesWith(CI, V);
if (isFreeCall(&CI, &TLI))
@@ -1903,16 +1901,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/false))
return replaceInstUsesWith(CI, N);
return nullptr;
-
case Intrinsic::bswap: {
Value *IIOperand = II->getArgOperand(0);
Value *X = nullptr;
- // TODO should this be in InstSimplify?
- // bswap(bswap(x)) -> x
- if (match(IIOperand, m_BSwap(m_Value(X))))
- return replaceInstUsesWith(CI, X);
-
// bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
unsigned C = X->getType()->getPrimitiveSizeInBits() -
@@ -1923,18 +1915,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
break;
}
-
- case Intrinsic::bitreverse: {
- Value *IIOperand = II->getArgOperand(0);
- Value *X = nullptr;
-
- // TODO should this be in InstSimplify?
- // bitreverse(bitreverse(x)) -> x
- if (match(IIOperand, m_BitReverse(m_Value(X))))
- return replaceInstUsesWith(CI, X);
- break;
- }
-
case Intrinsic::masked_load:
if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II, Builder))
return replaceInstUsesWith(CI, SimplifiedMaskedOp);
@@ -1948,16 +1928,16 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::powi:
if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
- // powi(x, 0) -> 1.0
- if (Power->isZero())
- return replaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0));
- // powi(x, 1) -> x
- if (Power->isOne())
- return replaceInstUsesWith(CI, II->getArgOperand(0));
+ // 0 and 1 are handled in instsimplify
+
// powi(x, -1) -> 1/x
if (Power->isMinusOne())
return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0),
II->getArgOperand(0));
+ // powi(x, 2) -> x*x
+ if (Power->equalsInt(2))
+ return BinaryOperator::CreateFMul(II->getArgOperand(0),
+ II->getArgOperand(0));
}
break;
@@ -2396,7 +2376,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// The compare intrinsic uses the above assumptions and therefore
// doesn't require additional flags.
if ((match(Arg0, m_OneUse(m_FSub(m_Value(A), m_Value(B)))) &&
- match(Arg1, m_Zero()) &&
+ match(Arg1, m_Zero()) && isa<Instruction>(Arg0) &&
cast<Instruction>(Arg0)->getFastMathFlags().noInfs())) {
if (Arg0IsZero)
std::swap(A, B);
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 87666360c1a0..541dde6c47d2 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -1631,9 +1631,5 @@ Instruction *InstCombiner::visitFRem(BinaryOperator &I) {
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
- // Handle cases involving: rem X, (select Cond, Y, Z)
- if (simplifyDivRemOfSelectWithZeroOp(I))
- return &I;
-
return nullptr;
}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 65a96b965227..aeac8910af6b 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -181,11 +181,13 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
// If extracting a specified index from the vector, see if we can recursively
// find a previously computed scalar that was inserted into the vector.
if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) {
- unsigned IndexVal = IdxC->getZExtValue();
unsigned VectorWidth = EI.getVectorOperandType()->getNumElements();
- // InstSimplify handles cases where the index is invalid.
- assert(IndexVal < VectorWidth);
+ // InstSimplify should handle cases where the index is invalid.
+ if (!IdxC->getValue().ule(VectorWidth))
+ return nullptr;
+
+ unsigned IndexVal = IdxC->getZExtValue();
// This instruction only demands the single element from the input vector.
// If the input vector has a single use, simplify it based on this use
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 8328d4031941..8e39f24d819c 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -2702,9 +2702,10 @@ void FunctionStackPoisoner::copyArgsPassedByValToAllocas() {
unsigned Align = Arg.getParamAlignment();
if (Align == 0) Align = DL.getABITypeAlignment(Ty);
- const std::string &Name = Arg.hasName() ? Arg.getName().str() :
- "Arg" + llvm::to_string(Arg.getArgNo());
- AllocaInst *AI = IRB.CreateAlloca(Ty, nullptr, Twine(Name) + ".byval");
+ AllocaInst *AI = IRB.CreateAlloca(
+ Ty, nullptr,
+ (Arg.hasName() ? Arg.getName() : "Arg" + Twine(Arg.getArgNo())) +
+ ".byval");
AI->setAlignment(Align);
Arg.replaceAllUsesWith(AI);
diff --git a/contrib/llvm/lib/Transforms/Scalar/GVNSink.cpp b/contrib/llvm/lib/Transforms/Scalar/GVNSink.cpp
index 814a62cd7d65..bf92e43c4715 100644
--- a/contrib/llvm/lib/Transforms/Scalar/GVNSink.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GVNSink.cpp
@@ -641,7 +641,7 @@ Optional<SinkingInstructionCandidate> GVNSink::analyzeInstructionForSinking(
DenseMap<uint32_t, unsigned> VNums;
for (auto *I : Insts) {
uint32_t N = VN.lookupOrAdd(I);
- DEBUG(dbgs() << " VN=" << utohexstr(N) << " for" << *I << "\n");
+ DEBUG(dbgs() << " VN=" << Twine::utohexstr(N) << " for" << *I << "\n");
if (N == ~0U)
return None;
VNums[N]++;
diff --git a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 6af3fef963dc..9c870b42a747 100644
--- a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -476,33 +476,22 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
Alignment = DL.getABITypeAlignment(EltType);
}
- // Remember the debug location.
- DebugLoc Loc;
- if (!Range.TheStores.empty())
- Loc = Range.TheStores[0]->getDebugLoc();
+ AMemSet =
+ Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment);
DEBUG(dbgs() << "Replace stores:\n";
for (Instruction *SI : Range.TheStores)
- dbgs() << *SI << '\n');
+ dbgs() << *SI << '\n';
+ dbgs() << "With: " << *AMemSet << '\n');
+
+ if (!Range.TheStores.empty())
+ AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
// Zap all the stores.
for (Instruction *SI : Range.TheStores) {
MD->removeInstruction(SI);
SI->eraseFromParent();
}
-
- // Create the memset after removing the stores, so that if there any cached
- // non-local dependencies on the removed instructions in
- // MemoryDependenceAnalysis, the cache entries are updated to "dirty"
- // entries pointing below the memset, so subsequent queries include the
- // memset.
- AMemSet =
- Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment);
- if (!Range.TheStores.empty())
- AMemSet->setDebugLoc(Loc);
-
- DEBUG(dbgs() << "With: " << *AMemSet << '\n');
-
++NumMemSetInfer;
}
@@ -1042,22 +1031,9 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
//
// NOTE: This is conservative, it will stop on any read from the source loc,
// not just the defining memcpy.
- MemoryLocation SourceLoc = MemoryLocation::getForSource(MDep);
- MemDepResult SourceDep = MD->getPointerDependencyFrom(SourceLoc, false,
- M->getIterator(), M->getParent());
-
- if (SourceDep.isNonLocal()) {
- SmallVector<NonLocalDepResult, 2> NonLocalDepResults;
- MD->getNonLocalPointerDependencyFrom(M, SourceLoc, /*isLoad=*/false,
- NonLocalDepResults);
- if (NonLocalDepResults.size() == 1) {
- SourceDep = NonLocalDepResults[0].getResult();
- assert((!SourceDep.getInst() ||
- LookupDomTree().dominates(SourceDep.getInst(), M)) &&
- "when memdep returns exactly one result, it should dominate");
- }
- }
-
+ MemDepResult SourceDep =
+ MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false,
+ M->getIterator(), M->getParent());
if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
return false;
@@ -1259,18 +1235,6 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M) {
MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(
SrcLoc, true, M->getIterator(), M->getParent());
- if (SrcDepInfo.isNonLocal()) {
- SmallVector<NonLocalDepResult, 2> NonLocalDepResults;
- MD->getNonLocalPointerDependencyFrom(M, SrcLoc, /*isLoad=*/true,
- NonLocalDepResults);
- if (NonLocalDepResults.size() == 1) {
- SrcDepInfo = NonLocalDepResults[0].getResult();
- assert((!SrcDepInfo.getInst() ||
- LookupDomTree().dominates(SrcDepInfo.getInst(), M)) &&
- "when memdep returns exactly one result, it should dominate");
- }
- }
-
if (SrcDepInfo.isClobber()) {
if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
return processMemCpyMemCpyDependence(M, MDep);
diff --git a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index 3b45cfa482e6..c44edbed8ed9 100644
--- a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -2796,17 +2796,12 @@ static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
StatepointLiveSetTy Updated;
findLiveSetAtInst(Inst, RevisedLivenessData, Updated);
-#ifndef NDEBUG
- DenseSet<Value *> Bases;
- for (auto KVPair : Info.PointerToBase)
- Bases.insert(KVPair.second);
-#endif
-
// We may have base pointers which are now live that weren't before. We need
// to update the PointerToBase structure to reflect this.
for (auto V : Updated)
if (Info.PointerToBase.insert({V, V}).second) {
- assert(Bases.count(V) && "Can't find base for unexpected live value!");
+ assert(isKnownBaseResult(V) &&
+ "Can't find base for unexpected live value!");
continue;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index efff06f79cb7..e00541d3c812 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -648,8 +648,13 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
SmallVector<BasicBlock*, 4> Preds(predecessors(LatchExit));
NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa",
DT, LI, PreserveLCSSA);
+ // NewExit gets its DebugLoc from LatchExit, which is not part of the
+ // original Loop.
+ // Fix this by setting Loop's DebugLoc to NewExit.
+ auto *NewExitTerminator = NewExit->getTerminator();
+ NewExitTerminator->setDebugLoc(Header->getTerminator()->getDebugLoc());
// Split NewExit to insert epilog remainder loop.
- EpilogPreHeader = SplitBlock(NewExit, NewExit->getTerminator(), DT, LI);
+ EpilogPreHeader = SplitBlock(NewExit, NewExitTerminator, DT, LI);
EpilogPreHeader->setName(Header->getName() + ".epil.preheader");
} else {
// If prolog remainder
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
index c3fa05a11a24..fe106e33bca1 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -880,9 +880,10 @@ bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop,
/// If we are able to find such sequence, we return the instructions
/// we found, namely %casted_phi and the instructions on its use-def chain up
/// to the phi (not including the phi).
-bool getCastsForInductionPHI(
- PredicatedScalarEvolution &PSE, const SCEVUnknown *PhiScev,
- const SCEVAddRecExpr *AR, SmallVectorImpl<Instruction *> &CastInsts) {
+static bool getCastsForInductionPHI(PredicatedScalarEvolution &PSE,
+ const SCEVUnknown *PhiScev,
+ const SCEVAddRecExpr *AR,
+ SmallVectorImpl<Instruction *> &CastInsts) {
assert(CastInsts.empty() && "CastInsts is expected to be empty.");
auto *PN = cast<PHINode>(PhiScev->getValue());
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index b3c80424c8b9..e7358dbcb624 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -127,16 +127,6 @@ static cl::opt<unsigned> MaxSpeculationDepth(
cl::desc("Limit maximum recursion depth when calculating costs of "
"speculatively executed instructions"));
-static cl::opt<unsigned> DependenceChainLatency(
- "dependence-chain-latency", cl::Hidden, cl::init(8),
- cl::desc("Limit the maximum latency of dependence chain containing cmp "
- "for if conversion"));
-
-static cl::opt<unsigned> SmallBBSize(
- "small-bb-size", cl::Hidden, cl::init(40),
- cl::desc("Check dependence chain latency only in basic block smaller than "
- "this number"));
-
STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
STATISTIC(NumLinearMaps,
"Number of switch instructions turned into linear mapping");
@@ -405,166 +395,6 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
return true;
}
-/// Estimate the code size of the specified BB.
-static unsigned CountBBCodeSize(BasicBlock *BB,
- const TargetTransformInfo &TTI) {
- unsigned Size = 0;
- for (auto II = BB->begin(); !isa<TerminatorInst>(II); ++II)
- Size += TTI.getInstructionCost(&(*II), TargetTransformInfo::TCK_CodeSize);
- return Size;
-}
-
-/// Find out the latency of the longest dependence chain in the BB if
-/// LongestChain is true, or the dependence chain containing the compare
-/// instruction feeding the block's conditional branch.
-static unsigned FindDependenceChainLatency(BasicBlock *BB,
- DenseMap<Instruction *, unsigned> &Instructions,
- const TargetTransformInfo &TTI,
- bool LongestChain) {
- unsigned MaxLatency = 0;
-
- BasicBlock::iterator II;
- for (II = BB->begin(); !isa<TerminatorInst>(II); ++II) {
- unsigned Latency = 0;
- for (unsigned O = 0, E = II->getNumOperands(); O != E; ++O) {
- Instruction *Op = dyn_cast<Instruction>(II->getOperand(O));
- if (Op && Instructions.count(Op)) {
- auto OpLatency = Instructions[Op];
- if (OpLatency > Latency)
- Latency = OpLatency;
- }
- }
- Latency += TTI.getInstructionCost(&(*II), TargetTransformInfo::TCK_Latency);
- Instructions[&(*II)] = Latency;
-
- if (Latency > MaxLatency)
- MaxLatency = Latency;
- }
-
- if (LongestChain)
- return MaxLatency;
-
- // The length of the dependence chain containing the compare instruction is
- // wanted, so the terminator must be a BranchInst.
- assert(isa<BranchInst>(II));
- BranchInst* Br = cast<BranchInst>(II);
- Instruction *Cmp = dyn_cast<Instruction>(Br->getCondition());
- if (Cmp && Instructions.count(Cmp))
- return Instructions[Cmp];
- else
- return 0;
-}
-
-/// Instructions in BB2 may depend on instructions in BB1, and instructions
-/// in BB1 may have users in BB2. If the last (in terms of latency) such kind
-/// of instruction in BB1 is I, then the instructions after I can be executed
-/// in parallel with instructions in BB2.
-/// This function returns the latency of I.
-static unsigned LatencyAdjustment(BasicBlock *BB1, BasicBlock *BB2,
- BasicBlock *IfBlock1, BasicBlock *IfBlock2,
- DenseMap<Instruction *, unsigned> &BB1Instructions) {
- unsigned LastLatency = 0;
- SmallVector<Instruction *, 16> Worklist;
- BasicBlock::iterator II;
- for (II = BB2->begin(); !isa<TerminatorInst>(II); ++II) {
- if (PHINode *PN = dyn_cast<PHINode>(II)) {
- // Look for users in BB2.
- bool InBBUser = false;
- for (User *U : PN->users()) {
- if (cast<Instruction>(U)->getParent() == BB2) {
- InBBUser = true;
- break;
- }
- }
- // No such user, we don't care about this instruction and its operands.
- if (!InBBUser)
- break;
- }
- Worklist.push_back(&(*II));
- }
-
- while (!Worklist.empty()) {
- Instruction *I = Worklist.pop_back_val();
- for (unsigned O = 0, E = I->getNumOperands(); O != E; ++O) {
- if (Instruction *Op = dyn_cast<Instruction>(I->getOperand(O))) {
- if (Op->getParent() == IfBlock1 || Op->getParent() == IfBlock2)
- Worklist.push_back(Op);
- else if (Op->getParent() == BB1 && BB1Instructions.count(Op)) {
- if (BB1Instructions[Op] > LastLatency)
- LastLatency = BB1Instructions[Op];
- }
- }
- }
- }
-
- return LastLatency;
-}
-
-/// If after if conversion, most of the instructions in this new BB construct a
-/// long and slow dependence chain, it may be slower than cmp/branch, even
-/// if the branch has a high miss rate, because the control dependence is
-/// transformed into data dependence, and control dependence can be speculated,
-/// and thus, the second part can execute in parallel with the first part on
-/// modern OOO processor.
-///
-/// To check this condition, this function finds the length of the dependence
-/// chain in BB1 (only the part that can be executed in parallel with code after
-/// branch in BB2) containing cmp, and if the length is longer than a threshold,
-/// don't perform if conversion.
-///
-/// BB1, BB2, IfBlock1 and IfBlock2 are candidate BBs for if conversion.
-/// SpeculationSize contains the code size of IfBlock1 and IfBlock2.
-static bool FindLongDependenceChain(BasicBlock *BB1, BasicBlock *BB2,
- BasicBlock *IfBlock1, BasicBlock *IfBlock2,
- unsigned SpeculationSize,
- const TargetTransformInfo &TTI) {
- // Accumulated latency of each instruction in their BBs.
- DenseMap<Instruction *, unsigned> BB1Instructions;
- DenseMap<Instruction *, unsigned> BB2Instructions;
-
- if (!TTI.isOutOfOrder())
- return false;
-
- unsigned NewBBSize = CountBBCodeSize(BB1, TTI) + CountBBCodeSize(BB2, TTI)
- + SpeculationSize;
-
- // We check small BB only since it is more difficult to find unrelated
- // instructions to fill functional units in a small BB.
- if (NewBBSize > SmallBBSize)
- return false;
-
- auto BB1Chain =
- FindDependenceChainLatency(BB1, BB1Instructions, TTI, false);
- auto BB2Chain =
- FindDependenceChainLatency(BB2, BB2Instructions, TTI, true);
-
- // If there are many unrelated instructions in the new BB, there will be
- // other instructions for the processor to issue regardless of the length
- // of this new dependence chain.
- // Modern processors can issue 3 or more instructions in each cycle. But in
- // real world applications, an IPC of 2 is already very good for non-loop
- // code with small basic blocks. Higher IPC is usually found in programs with
- // small kernel. So IPC of 2 is more reasonable for most applications.
- if ((BB1Chain + BB2Chain) * 2 <= NewBBSize)
- return false;
-
- // We only care about part of the dependence chain in BB1 that can be
- // executed in parallel with BB2, so adjust the latency.
- BB1Chain -=
- LatencyAdjustment(BB1, BB2, IfBlock1, IfBlock2, BB1Instructions);
-
- // Correctly predicted branch instruction can skip the dependence chain in
- // BB1, but misprediction has a penalty, so only when the dependence chain is
- // longer than DependenceChainLatency, then branch is better than select.
- // Besides misprediction penalty, the threshold value DependenceChainLatency
- // also depends on branch misprediction rate, taken branch latency and cmov
- // latency.
- if (BB1Chain >= DependenceChainLatency)
- return true;
-
- return false;
-}
-
/// Extract ConstantInt from value, looking through IntToPtr
/// and PointerNullValue. Return NULL if value is not a constant int.
static ConstantInt *GetConstantInt(Value *V, const DataLayout &DL) {
@@ -2214,11 +2044,6 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
if (!HaveRewritablePHIs && !(HoistCondStores && SpeculatedStoreValue))
return false;
- // Don't do if conversion for long dependence chain.
- if (FindLongDependenceChain(BB, EndBB, ThenBB, nullptr,
- CountBBCodeSize(ThenBB, TTI), TTI))
- return false;
-
// If we get here, we can hoist the instruction and if-convert.
DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
@@ -2526,10 +2351,6 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
}
}
- if (FindLongDependenceChain(DomBlock, BB, IfBlock1, IfBlock2,
- AggressiveInsts.size(), TTI))
- return false;
-
DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond << " T: "
<< IfTrue->getName() << " F: " << IfFalse->getName() << "\n");