aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-05-17 20:22:39 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-05-17 20:22:39 +0000
commit7af96fb3afd6725a2824a0a5ca5dad34e5e0b056 (patch)
tree6661ffbabf869009597684462f5a3df3beccc952 /lib
parent6b3f41ed88e8e440e11a4fbf20b6600529f80049 (diff)
Vendor import of llvm trunk r303291:vendor/llvm/llvm-trunk-r303291
Notes
Notes: svn path=/vendor/llvm/dist/; revision=318414 svn path=/vendor/llvm/llvm-trunk-r303291/; revision=318415; tag=vendor/llvm/llvm-trunk-r303291
Diffstat (limited to 'lib')
-rw-r--r--lib/Analysis/DependenceAnalysis.cpp33
-rw-r--r--lib/Analysis/InlineCost.cpp42
-rw-r--r--lib/Analysis/InstructionSimplify.cpp18
-rw-r--r--lib/Analysis/ProfileSummaryInfo.cpp2
-rw-r--r--lib/Analysis/ScalarEvolution.cpp43
-rw-r--r--lib/CodeGen/AggressiveAntiDepBreaker.cpp5
-rw-r--r--lib/CodeGen/AsmPrinter/CodeViewDebug.cpp2
-rw-r--r--lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp3
-rw-r--r--lib/CodeGen/GlobalISel/IRTranslator.cpp5
-rw-r--r--lib/CodeGen/MachineVerifier.cpp2
-rw-r--r--lib/CodeGen/RegAllocGreedy.cpp11
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp28
-rw-r--r--lib/CodeGen/SpillPlacement.cpp4
-rw-r--r--lib/CodeGen/StackColoring.cpp6
-rw-r--r--lib/CodeGen/TargetLoweringBase.cpp2
-rw-r--r--lib/CodeGen/TargetPassConfig.cpp24
-rw-r--r--lib/CodeGen/TargetRegisterInfo.cpp3
-rw-r--r--lib/DebugInfo/CodeView/CVTypeDumper.cpp22
-rw-r--r--lib/DebugInfo/CodeView/CVTypeVisitor.cpp99
-rw-r--r--lib/DebugInfo/CodeView/RandomAccessTypeVisitor.cpp10
-rw-r--r--lib/DebugInfo/CodeView/TypeDumpVisitor.cpp3
-rw-r--r--lib/DebugInfo/CodeView/TypeStreamMerger.cpp17
-rw-r--r--lib/DebugInfo/DWARF/DWARFContext.cpp61
-rw-r--r--lib/DebugInfo/PDB/Native/PDBTypeServerHandler.cpp3
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp10
-rw-r--r--lib/Support/CrashRecoveryContext.cpp130
-rw-r--r--lib/Support/Unix/Path.inc30
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.cpp3
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp341
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.h1
-rw-r--r--lib/Target/AArch64/AArch64TargetTransformInfo.cpp25
-rw-r--r--lib/Target/AArch64/AArch64TargetTransformInfo.h3
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp30
-rw-r--r--lib/Target/AMDGPU/AMDGPUSubtarget.h4
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.cpp3
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.cpp14
-rw-r--r--lib/Target/AMDGPU/SIRegisterInfo.cpp26
-rw-r--r--lib/Target/AMDGPU/VOP3Instructions.td20
-rw-r--r--lib/Target/ARM/ARMInstructionSelector.cpp15
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.cpp3
-rw-r--r--lib/Target/Mips/MipsDelaySlotFiller.cpp2
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp23
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp45
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h1
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td4
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp13
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td12
-rw-r--r--lib/Target/SystemZ/SystemZTargetTransformInfo.cpp34
-rw-r--r--lib/Target/WebAssembly/WebAssemblyRegColoring.cpp3
-rw-r--r--lib/Target/WebAssembly/known_gcc_test_failures.txt3
-rw-r--r--lib/Target/X86/X86.td3
-rw-r--r--lib/Target/X86/X86FixupLEAs.cpp269
-rw-r--r--lib/Target/X86/X86InstructionSelector.cpp66
-rw-r--r--lib/Target/X86/X86LegalizerInfo.cpp5
-rw-r--r--lib/Target/X86/X86Subtarget.h6
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp2
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.cpp18
-rw-r--r--lib/Transforms/Coroutines/CoroFrame.cpp28
-rw-r--r--lib/Transforms/InstCombine/InstCombineInternal.h21
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp47
-rw-r--r--lib/Transforms/Scalar/LICM.cpp2
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp16
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp3
-rw-r--r--lib/Transforms/Scalar/NewGVN.cpp68
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp2
-rw-r--r--lib/Transforms/Scalar/SimpleLoopUnswitch.cpp37
66 files changed, 927 insertions, 912 deletions
diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp
index a4672efeedd6..e4d58bf1b4eb 100644
--- a/lib/Analysis/DependenceAnalysis.cpp
+++ b/lib/Analysis/DependenceAnalysis.cpp
@@ -2984,7 +2984,7 @@ bool DependenceInfo::propagate(const SCEV *&Src, const SCEV *&Dst,
SmallVectorImpl<Constraint> &Constraints,
bool &Consistent) {
bool Result = false;
- for (int LI = Loops.find_first(); LI >= 0; LI = Loops.find_next(LI)) {
+ for (unsigned LI : Loops.set_bits()) {
DEBUG(dbgs() << "\t Constraint[" << LI << "] is");
DEBUG(Constraints[LI].dump(dbgs()));
if (Constraints[LI].isDistance())
@@ -3266,7 +3266,7 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
// For debugging purposes, dump a small bit vector to dbgs().
static void dumpSmallBitVector(SmallBitVector &BV) {
dbgs() << "{";
- for (int VI = BV.find_first(); VI >= 0; VI = BV.find_next(VI)) {
+ for (unsigned VI : BV.set_bits()) {
dbgs() << VI;
if (BV.find_next(VI) >= 0)
dbgs() << ' ';
@@ -3506,7 +3506,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
NewConstraint.setAny(SE);
// test separable subscripts
- for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) {
+ for (unsigned SI : Separable.set_bits()) {
DEBUG(dbgs() << "testing subscript " << SI);
switch (Pair[SI].Classification) {
case Subscript::ZIV:
@@ -3545,14 +3545,14 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
SmallVector<Constraint, 4> Constraints(MaxLevels + 1);
for (unsigned II = 0; II <= MaxLevels; ++II)
Constraints[II].setAny(SE);
- for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) {
+ for (unsigned SI : Coupled.set_bits()) {
DEBUG(dbgs() << "testing subscript group " << SI << " { ");
SmallBitVector Group(Pair[SI].Group);
SmallBitVector Sivs(Pairs);
SmallBitVector Mivs(Pairs);
SmallBitVector ConstrainedLevels(MaxLevels + 1);
SmallVector<Subscript *, 4> PairsInGroup;
- for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) {
+ for (unsigned SJ : Group.set_bits()) {
DEBUG(dbgs() << SJ << " ");
if (Pair[SJ].Classification == Subscript::SIV)
Sivs.set(SJ);
@@ -3564,7 +3564,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
DEBUG(dbgs() << "}\n");
while (Sivs.any()) {
bool Changed = false;
- for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) {
+ for (unsigned SJ : Sivs.set_bits()) {
DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n");
// SJ is an SIV subscript that's part of the current coupled group
unsigned Level;
@@ -3588,7 +3588,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
DEBUG(dbgs() << " propagating\n");
DEBUG(dbgs() << "\tMivs = ");
DEBUG(dumpSmallBitVector(Mivs));
- for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) {
+ for (unsigned SJ : Mivs.set_bits()) {
// SJ is an MIV subscript that's part of the current coupled group
DEBUG(dbgs() << "\tSJ = " << SJ << "\n");
if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops,
@@ -3622,7 +3622,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
}
// test & propagate remaining RDIVs
- for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) {
+ for (unsigned SJ : Mivs.set_bits()) {
if (Pair[SJ].Classification == Subscript::RDIV) {
DEBUG(dbgs() << "RDIV test\n");
if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result))
@@ -3635,7 +3635,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
// test remaining MIVs
// This code is temporary.
// Better to somehow test all remaining subscripts simultaneously.
- for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) {
+ for (unsigned SJ : Mivs.set_bits()) {
if (Pair[SJ].Classification == Subscript::MIV) {
DEBUG(dbgs() << "MIV test\n");
if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result))
@@ -3647,9 +3647,8 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
// update Result.DV from constraint vector
DEBUG(dbgs() << " updating\n");
- for (int SJ = ConstrainedLevels.find_first(); SJ >= 0;
- SJ = ConstrainedLevels.find_next(SJ)) {
- if (SJ > (int)CommonLevels)
+ for (unsigned SJ : ConstrainedLevels.set_bits()) {
+ if (SJ > CommonLevels)
break;
updateDirection(Result.DV[SJ - 1], Constraints[SJ]);
if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE)
@@ -3859,7 +3858,7 @@ const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep,
NewConstraint.setAny(SE);
// test separable subscripts
- for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) {
+ for (unsigned SI : Separable.set_bits()) {
switch (Pair[SI].Classification) {
case Subscript::SIV: {
unsigned Level;
@@ -3886,12 +3885,12 @@ const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep,
SmallVector<Constraint, 4> Constraints(MaxLevels + 1);
for (unsigned II = 0; II <= MaxLevels; ++II)
Constraints[II].setAny(SE);
- for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) {
+ for (unsigned SI : Coupled.set_bits()) {
SmallBitVector Group(Pair[SI].Group);
SmallBitVector Sivs(Pairs);
SmallBitVector Mivs(Pairs);
SmallBitVector ConstrainedLevels(MaxLevels + 1);
- for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) {
+ for (unsigned SJ : Group.set_bits()) {
if (Pair[SJ].Classification == Subscript::SIV)
Sivs.set(SJ);
else
@@ -3899,7 +3898,7 @@ const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep,
}
while (Sivs.any()) {
bool Changed = false;
- for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) {
+ for (unsigned SJ : Sivs.set_bits()) {
// SJ is an SIV subscript that's part of the current coupled group
unsigned Level;
const SCEV *SplitIter = nullptr;
@@ -3914,7 +3913,7 @@ const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep,
}
if (Changed) {
// propagate, possibly creating new SIVs and ZIVs
- for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) {
+ for (unsigned SJ : Mivs.set_bits()) {
// SJ is an MIV subscript that's part of the current coupled group
if (propagate(Pair[SJ].Src, Pair[SJ].Dst,
Pair[SJ].Loops, Constraints, Result.Consistent)) {
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index 44c14cb17c22..4702569126c6 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -669,21 +669,33 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
Threshold = MaxIfValid(Threshold, Params.HintThreshold);
if (PSI) {
BlockFrequencyInfo *CallerBFI = GetBFI ? &((*GetBFI)(*Caller)) : nullptr;
- if (PSI->isHotCallSite(CS, CallerBFI)) {
- DEBUG(dbgs() << "Hot callsite.\n");
- Threshold = Params.HotCallSiteThreshold.getValue();
- } else if (PSI->isFunctionEntryHot(&Callee)) {
- DEBUG(dbgs() << "Hot callee.\n");
- // If callsite hotness can not be determined, we may still know
- // that the callee is hot and treat it as a weaker hint for threshold
- // increase.
- Threshold = MaxIfValid(Threshold, Params.HintThreshold);
- } else if (PSI->isColdCallSite(CS, CallerBFI)) {
- DEBUG(dbgs() << "Cold callsite.\n");
- Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold);
- } else if (PSI->isFunctionEntryCold(&Callee)) {
- DEBUG(dbgs() << "Cold callee.\n");
- Threshold = MinIfValid(Threshold, Params.ColdThreshold);
+ // FIXME: After switching to the new passmanager, simplify the logic below
+ // by checking only the callsite hotness/coldness. The check for CallerBFI
+ // exists only because we do not have BFI available with the old PM.
+ //
+ // Use callee's hotness information only if we have no way of determining
+ // callsite's hotness information. Callsite hotness can be determined if
+ // sample profile is used (which adds hotness metadata to calls) or if
+ // caller's BlockFrequencyInfo is available.
+ if (CallerBFI || PSI->hasSampleProfile()) {
+ if (PSI->isHotCallSite(CS, CallerBFI)) {
+ DEBUG(dbgs() << "Hot callsite.\n");
+ Threshold = Params.HotCallSiteThreshold.getValue();
+ } else if (PSI->isColdCallSite(CS, CallerBFI)) {
+ DEBUG(dbgs() << "Cold callsite.\n");
+ Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold);
+ }
+ } else {
+ if (PSI->isFunctionEntryHot(&Callee)) {
+ DEBUG(dbgs() << "Hot callee.\n");
+ // If callsite hotness can not be determined, we may still know
+ // that the callee is hot and treat it as a weaker hint for threshold
+ // increase.
+ Threshold = MaxIfValid(Threshold, Params.HintThreshold);
+ } else if (PSI->isFunctionEntryCold(&Callee)) {
+ DEBUG(dbgs() << "Cold callee.\n");
+ Threshold = MinIfValid(Threshold, Params.ColdThreshold);
+ }
}
}
}
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 5728887cc1e9..5652248a60ce 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -1752,6 +1752,24 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
(A == Op0 || B == Op0))
return Op0;
+ // A mask that only clears known zeros of a shifted value is a no-op.
+ Value *X;
+ const APInt *Mask;
+ const APInt *ShAmt;
+ if (match(Op1, m_APInt(Mask))) {
+ // If all bits in the inverted and shifted mask are clear:
+ // and (shl X, ShAmt), Mask --> shl X, ShAmt
+ if (match(Op0, m_Shl(m_Value(X), m_APInt(ShAmt))) &&
+ (~(*Mask)).lshr(*ShAmt).isNullValue())
+ return Op0;
+
+ // If all bits in the inverted and shifted mask are clear:
+ // and (lshr X, ShAmt), Mask --> lshr X, ShAmt
+ if (match(Op0, m_LShr(m_Value(X), m_APInt(ShAmt))) &&
+ (~(*Mask)).shl(*ShAmt).isNullValue())
+ return Op0;
+ }
+
// A & (-A) = A if A is a power of two or zero.
if (match(Op0, m_Neg(m_Specific(Op1))) ||
match(Op1, m_Neg(m_Specific(Op0)))) {
diff --git a/lib/Analysis/ProfileSummaryInfo.cpp b/lib/Analysis/ProfileSummaryInfo.cpp
index 502f4205b689..12b86daa602b 100644
--- a/lib/Analysis/ProfileSummaryInfo.cpp
+++ b/lib/Analysis/ProfileSummaryInfo.cpp
@@ -75,7 +75,7 @@ ProfileSummaryInfo::getProfileCount(const Instruction *Inst,
return None;
assert((isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) &&
"We can only get profile count for call/invoke instruction.");
- if (computeSummary() && Summary->getKind() == ProfileSummary::PSK_Sample) {
+ if (hasSampleProfile()) {
// In sample PGO mode, check if there is a profile metadata on the
// instruction. If it is present, determine hotness solely based on that,
// since the sampled entry count may not be accurate.
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 800354d2f5b4..a746ddfd7a63 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -629,19 +629,19 @@ static int CompareSCEVComplexity(
const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS);
const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS);
- // If there is a dominance relationship between the loops, sort by the
- // dominance. Otherwise, sort by depth. We require such order in getAddExpr.
+ // There is always a dominance between two recs that are used by one SCEV,
+ // so we can safely sort recs by loop header dominance. We require such
+ // order in getAddExpr.
const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop();
if (LLoop != RLoop) {
const BasicBlock *LHead = LLoop->getHeader(), *RHead = RLoop->getHeader();
assert(LHead != RHead && "Two loops share the same header?");
if (DT.dominates(LHead, RHead))
return 1;
- else if (DT.dominates(RHead, LHead))
- return -1;
- unsigned LDepth = LLoop->getLoopDepth(), RDepth = RLoop->getLoopDepth();
- if (LDepth != RDepth)
- return (int)LDepth - (int)RDepth;
+ else
+ assert(DT.dominates(RHead, LHead) &&
+ "No dominance between recurrences used by one SCEV?");
+ return -1;
}
// Addrec complexity grows with operand count.
@@ -2512,22 +2512,23 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(),
AddRec->op_end());
for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
- ++OtherIdx)
- if (const auto *OtherAddRec = dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]))
- if (OtherAddRec->getLoop() == AddRecLoop) {
- for (unsigned i = 0, e = OtherAddRec->getNumOperands();
- i != e; ++i) {
- if (i >= AddRecOps.size()) {
- AddRecOps.append(OtherAddRec->op_begin()+i,
- OtherAddRec->op_end());
- break;
- }
- SmallVector<const SCEV *, 2> TwoOps = {
- AddRecOps[i], OtherAddRec->getOperand(i)};
- AddRecOps[i] = getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1);
+ ++OtherIdx) {
+ const auto *OtherAddRec = cast<SCEVAddRecExpr>(Ops[OtherIdx]);
+ if (OtherAddRec->getLoop() == AddRecLoop) {
+ for (unsigned i = 0, e = OtherAddRec->getNumOperands();
+ i != e; ++i) {
+ if (i >= AddRecOps.size()) {
+ AddRecOps.append(OtherAddRec->op_begin()+i,
+ OtherAddRec->op_end());
+ break;
}
- Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
+ SmallVector<const SCEV *, 2> TwoOps = {
+ AddRecOps[i], OtherAddRec->getOperand(i)};
+ AddRecOps[i] = getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1);
}
+ Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
+ }
+ }
// Step size has changed, so we cannot guarantee no self-wraparound.
Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, SCEV::FlagAnyWrap);
return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 3a57772cc7f5..43b245c66400 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -128,8 +128,7 @@ AggressiveAntiDepBreaker::AggressiveAntiDepBreaker(
}
DEBUG(dbgs() << "AntiDep Critical-Path Registers:");
- DEBUG(for (int r = CriticalPathSet.find_first(); r != -1;
- r = CriticalPathSet.find_next(r))
+ DEBUG(for (unsigned r : CriticalPathSet.set_bits())
dbgs() << " " << TRI->getName(r));
DEBUG(dbgs() << '\n');
}
@@ -571,7 +570,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
DEBUG({
dbgs() << " ::";
- for (int r = BV.find_first(); r != -1; r = BV.find_next(r))
+ for (unsigned r : BV.set_bits())
dbgs() << " " << TRI->getName(r);
dbgs() << "\n";
});
diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index 98163bffb60b..7d945690e9c3 100644
--- a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -501,7 +501,7 @@ void CodeViewDebug::emitTypeInformation() {
Error E = Reader.readArray(Types, Reader.getLength());
if (!E) {
TypeVisitorCallbacks C;
- E = CVTypeVisitor(C).visitTypeStream(Types);
+ E = codeview::visitTypeStream(Types, C);
}
if (E) {
logAllUnhandledErrors(std::move(E), errs(), "error: ");
diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
index 22fd7bb46056..20e1467b30c3 100644
--- a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
+++ b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
@@ -209,8 +209,7 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF,
} else if (MO.isRegMask()) {
// If this is a register mask operand, clobber all debug values in
// non-CSRs.
- for (int I = ChangingRegs.find_first(); I != -1;
- I = ChangingRegs.find_next(I)) {
+ for (unsigned I : ChangingRegs.set_bits()) {
// Don't consider SP to be clobbered by register masks.
if (unsigned(I) != SP && TRI->isPhysicalRegister(I) &&
MO.clobbersPhysReg(I)) {
diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 811858f136eb..77dfb13ac1f2 100644
--- a/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -1129,6 +1129,11 @@ void IRTranslator::finalizeFunction() {
ValToVReg.clear();
FrameIndices.clear();
MachinePreds.clear();
+ // MachineIRBuilder::DebugLoc can outlive the DILocation it holds. Clear it
+ // to avoid accessing free’d memory (in runOnMachineFunction) and to avoid
+ // destroying it twice (in ~IRTranslator() and ~LLVMContext())
+ EntryBuilder = MachineIRBuilder();
+ CurBuilder = MachineIRBuilder();
}
bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index ab433273b189..b53b002f55a6 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -760,7 +760,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
const MachineFrameInfo &MFI = MF->getFrameInfo();
BitVector PR = MFI.getPristineRegs(*MF);
- for (int I = PR.find_first(); I>0; I = PR.find_next(I)) {
+ for (unsigned I : PR.set_bits()) {
for (MCSubRegIterator SubRegs(I, TRI, /*IncludeSelf=*/true);
SubRegs.isValid(); ++SubRegs)
regsLive.insert(*SubRegs);
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index 06500289c971..47d726f6da7a 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -285,8 +285,7 @@ class RAGreedy : public MachineFunctionPass,
// Set B[i] = C for every live bundle where B[i] was NoCand.
unsigned getBundles(SmallVectorImpl<unsigned> &B, unsigned C) {
unsigned Count = 0;
- for (int i = LiveBundles.find_first(); i >= 0;
- i = LiveBundles.find_next(i))
+ for (unsigned i : LiveBundles.set_bits())
if (B[i] == NoCand) {
B[i] = C;
Count++;
@@ -1162,9 +1161,8 @@ bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {
}
DEBUG({
- for (int i = Cand.LiveBundles.find_first(); i>=0;
- i = Cand.LiveBundles.find_next(i))
- dbgs() << " EB#" << i;
+ for (int i : Cand.LiveBundles.set_bits())
+ dbgs() << " EB#" << i;
dbgs() << ".\n";
});
return true;
@@ -1482,8 +1480,7 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
DEBUG({
dbgs() << ", total = "; MBFI->printBlockFreq(dbgs(), Cost)
<< " with bundles";
- for (int i = Cand.LiveBundles.find_first(); i>=0;
- i = Cand.LiveBundles.find_next(i))
+ for (int i : Cand.LiveBundles.set_bits())
dbgs() << " EB#" << i;
dbgs() << ".\n";
});
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index caf5cb497a71..0ccee175abfb 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -13087,14 +13087,28 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
}
}
- // If this is a store followed by a store with the same value to the same
- // location, then the store is dead/noop.
if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
- if (ST1->getBasePtr() == Ptr && ST->getMemoryVT() == ST1->getMemoryVT() &&
- ST1->getValue() == Value && ST->isUnindexed() && !ST->isVolatile() &&
- ST1->isUnindexed() && !ST1->isVolatile()) {
- // The store is dead, remove it.
- return Chain;
+ if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() &&
+ !ST1->isVolatile() && ST1->getBasePtr() == Ptr &&
+ ST->getMemoryVT() == ST1->getMemoryVT()) {
+ // If this is a store followed by a store with the same value to the same
+ // location, then the store is dead/noop.
+ if (ST1->getValue() == Value) {
+ // The store is dead, remove it.
+ return Chain;
+ }
+
+ // If this is a store who's preceeding store to the same location
+ // and no one other node is chained to that store we can effectively
+ // drop the store. Do not remove stores to undef as they may be used as
+ // data sinks.
+ if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
+ !ST1->getBasePtr().isUndef()) {
+ // ST1 is fully overwritten and can be elided. Combine with it's chain
+ // value.
+ CombineTo(ST1, ST1->getChain());
+ return SDValue();
+ }
}
}
diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp
index f10c98ef4e50..43cbf4add0f8 100644
--- a/lib/CodeGen/SpillPlacement.cpp
+++ b/lib/CodeGen/SpillPlacement.cpp
@@ -310,7 +310,7 @@ void SpillPlacement::addLinks(ArrayRef<unsigned> Links) {
bool SpillPlacement::scanActiveBundles() {
RecentPositive.clear();
- for (int n = ActiveNodes->find_first(); n>=0; n = ActiveNodes->find_next(n)) {
+ for (unsigned n : ActiveNodes->set_bits()) {
update(n);
// A node that must spill, or a node without any links is not going to
// change its value ever again, so exclude it from iterations.
@@ -365,7 +365,7 @@ SpillPlacement::finish() {
// Write preferences back to ActiveNodes.
bool Perfect = true;
- for (int n = ActiveNodes->find_first(); n>=0; n = ActiveNodes->find_next(n))
+ for (unsigned n : ActiveNodes->set_bits())
if (!nodes[n].preferReg()) {
ActiveNodes->reset(n);
Perfect = false;
diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp
index f51d959a089a..86a16187fcb6 100644
--- a/lib/CodeGen/StackColoring.cpp
+++ b/lib/CodeGen/StackColoring.cpp
@@ -703,12 +703,10 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
// Create the interval of the blocks that we previously found to be 'alive'.
BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB];
- for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1;
- pos = MBBLiveness.LiveIn.find_next(pos)) {
+ for (unsigned pos : MBBLiveness.LiveIn.set_bits()) {
Starts[pos] = Indexes->getMBBStartIdx(&MBB);
}
- for (int pos = MBBLiveness.LiveOut.find_first(); pos != -1;
- pos = MBBLiveness.LiveOut.find_next(pos)) {
+ for (unsigned pos : MBBLiveness.LiveOut.set_bits()) {
Finishes[pos] = Indexes->getMBBEndIdx(&MBB);
}
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index 39aa946fa840..5f63fd4320bb 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -1312,7 +1312,7 @@ TargetLoweringBase::findRepresentativeClass(const TargetRegisterInfo *TRI,
// Find the first legal register class with the largest spill size.
const TargetRegisterClass *BestRC = RC;
- for (int i = SuperRegRC.find_first(); i >= 0; i = SuperRegRC.find_next(i)) {
+ for (unsigned i : SuperRegRC.set_bits()) {
const TargetRegisterClass *SuperRC = TRI->getRegClass(i);
// We want the largest possible spill size.
if (TRI->getSpillSize(*SuperRC) <= TRI->getSpillSize(*BestRC))
diff --git a/lib/CodeGen/TargetPassConfig.cpp b/lib/CodeGen/TargetPassConfig.cpp
index e6c5d8753b83..9724cb074584 100644
--- a/lib/CodeGen/TargetPassConfig.cpp
+++ b/lib/CodeGen/TargetPassConfig.cpp
@@ -564,6 +564,14 @@ void TargetPassConfig::addISelPrepare() {
addPass(createVerifierPass());
}
+/// -regalloc=... command line option.
+static FunctionPass *useDefaultRegisterAllocator() { return nullptr; }
+static cl::opt<RegisterRegAlloc::FunctionPassCtor, false,
+ RegisterPassParser<RegisterRegAlloc> >
+RegAlloc("regalloc",
+ cl::init(&useDefaultRegisterAllocator),
+ cl::desc("Register allocator to use"));
+
/// Add the complete set of target-independent postISel code generator passes.
///
/// This can be read as the standard order of major LLVM CodeGen stages. Stages
@@ -625,8 +633,12 @@ void TargetPassConfig::addMachinePasses() {
// including phi elimination and scheduling.
if (getOptimizeRegAlloc())
addOptimizedRegAlloc(createRegAllocPass(true));
- else
+ else {
+ if (RegAlloc != &useDefaultRegisterAllocator &&
+ RegAlloc != &createFastRegisterAllocator)
+ report_fatal_error("Must use fast (default) register allocator for unoptimized regalloc.");
addFastRegAlloc(createRegAllocPass(false));
+ }
// Run post-ra passes.
addPostRegAlloc();
@@ -759,19 +771,12 @@ MachinePassRegistry RegisterRegAlloc::Registry;
/// A dummy default pass factory indicates whether the register allocator is
/// overridden on the command line.
static llvm::once_flag InitializeDefaultRegisterAllocatorFlag;
-static FunctionPass *useDefaultRegisterAllocator() { return nullptr; }
+
static RegisterRegAlloc
defaultRegAlloc("default",
"pick register allocator based on -O option",
useDefaultRegisterAllocator);
-/// -regalloc=... command line option.
-static cl::opt<RegisterRegAlloc::FunctionPassCtor, false,
- RegisterPassParser<RegisterRegAlloc> >
-RegAlloc("regalloc",
- cl::init(&useDefaultRegisterAllocator),
- cl::desc("Register allocator to use"));
-
static void initializeDefaultRegisterAllocatorOnce() {
RegisterRegAlloc::FunctionPassCtor Ctor = RegisterRegAlloc::getDefault();
@@ -781,7 +786,6 @@ static void initializeDefaultRegisterAllocatorOnce() {
}
}
-
/// Instantiate the default register allocator pass for this target for either
/// the optimized or unoptimized allocation path. This will be added to the pass
/// manager by addFastRegAlloc in the unoptimized case or addOptimizedRegAlloc
diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp
index f6e4c17d514c..41ec082a24cf 100644
--- a/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/lib/CodeGen/TargetRegisterInfo.cpp
@@ -50,8 +50,7 @@ bool TargetRegisterInfo::checkAllSuperRegsMarked(const BitVector &RegisterSet,
ArrayRef<MCPhysReg> Exceptions) const {
// Check that all super registers of reserved regs are reserved as well.
BitVector Checked(getNumRegs());
- for (int Reg = RegisterSet.find_first(); Reg>=0;
- Reg = RegisterSet.find_next(Reg)) {
+ for (unsigned Reg : RegisterSet.set_bits()) {
if (Checked[Reg])
continue;
for (MCSuperRegIterator SR(Reg, this); SR.isValid(); ++SR) {
diff --git a/lib/DebugInfo/CodeView/CVTypeDumper.cpp b/lib/DebugInfo/CodeView/CVTypeDumper.cpp
index bcc8218d9446..02e1682f76e7 100644
--- a/lib/DebugInfo/CodeView/CVTypeDumper.cpp
+++ b/lib/DebugInfo/CodeView/CVTypeDumper.cpp
@@ -11,7 +11,6 @@
#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
#include "llvm/DebugInfo/CodeView/TypeDatabase.h"
#include "llvm/DebugInfo/CodeView/TypeDatabaseVisitor.h"
-#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
#include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h"
#include "llvm/Support/BinaryByteStream.h"
@@ -21,38 +20,23 @@ using namespace llvm::codeview;
Error CVTypeDumper::dump(const CVType &Record, TypeVisitorCallbacks &Dumper) {
TypeDatabaseVisitor DBV(TypeDB);
- TypeDeserializer Deserializer;
TypeVisitorCallbackPipeline Pipeline;
- Pipeline.addCallbackToPipeline(Deserializer);
Pipeline.addCallbackToPipeline(DBV);
Pipeline.addCallbackToPipeline(Dumper);
- CVTypeVisitor Visitor(Pipeline);
- if (Handler)
- Visitor.addTypeServerHandler(*Handler);
-
CVType RecordCopy = Record;
- if (auto EC = Visitor.visitTypeRecord(RecordCopy))
- return EC;
- return Error::success();
+ return codeview::visitTypeRecord(RecordCopy, Pipeline, VDS_BytesPresent,
+ Handler);
}
Error CVTypeDumper::dump(const CVTypeArray &Types,
TypeVisitorCallbacks &Dumper) {
TypeDatabaseVisitor DBV(TypeDB);
- TypeDeserializer Deserializer;
TypeVisitorCallbackPipeline Pipeline;
- Pipeline.addCallbackToPipeline(Deserializer);
Pipeline.addCallbackToPipeline(DBV);
Pipeline.addCallbackToPipeline(Dumper);
- CVTypeVisitor Visitor(Pipeline);
- if (Handler)
- Visitor.addTypeServerHandler(*Handler);
-
- if (auto EC = Visitor.visitTypeStream(Types))
- return EC;
- return Error::success();
+ return codeview::visitTypeStream(Types, Pipeline, Handler);
}
Error CVTypeDumper::dump(ArrayRef<uint8_t> Data, TypeVisitorCallbacks &Dumper) {
diff --git a/lib/DebugInfo/CodeView/CVTypeVisitor.cpp b/lib/DebugInfo/CodeView/CVTypeVisitor.cpp
index b6ed0453d9c4..0f7f5f667790 100644
--- a/lib/DebugInfo/CodeView/CVTypeVisitor.cpp
+++ b/lib/DebugInfo/CodeView/CVTypeVisitor.cpp
@@ -59,13 +59,8 @@ static Expected<TypeServer2Record> deserializeTypeServerRecord(CVType &Record) {
};
TypeServer2Record R(TypeRecordKind::TypeServer2);
- TypeDeserializer Deserializer;
StealTypeServerVisitor Thief(R);
- TypeVisitorCallbackPipeline Pipeline;
- Pipeline.addCallbackToPipeline(Deserializer);
- Pipeline.addCallbackToPipeline(Thief);
- CVTypeVisitor Visitor(Pipeline);
- if (auto EC = Visitor.visitTypeRecord(Record))
+ if (auto EC = visitTypeRecord(Record, Thief))
return std::move(EC);
return R;
@@ -178,7 +173,7 @@ static Error visitMemberRecord(CVMemberRecord &Record,
return Error::success();
}
-Error CVTypeVisitor::visitMemberRecord(CVMemberRecord &Record) {
+Error CVTypeVisitor::visitMemberRecord(CVMemberRecord Record) {
return ::visitMemberRecord(Record, Callbacks);
}
@@ -224,3 +219,93 @@ Error CVTypeVisitor::visitFieldListMemberStream(ArrayRef<uint8_t> Data) {
BinaryStreamReader SR(S);
return visitFieldListMemberStream(SR);
}
+
+namespace {
+struct FieldListVisitHelper {
+ FieldListVisitHelper(TypeVisitorCallbacks &Callbacks, ArrayRef<uint8_t> Data,
+ VisitorDataSource Source)
+ : Stream(Data, llvm::support::little), Reader(Stream),
+ Deserializer(Reader),
+ Visitor((Source == VDS_BytesPresent) ? Pipeline : Callbacks) {
+ if (Source == VDS_BytesPresent) {
+ Pipeline.addCallbackToPipeline(Deserializer);
+ Pipeline.addCallbackToPipeline(Callbacks);
+ }
+ }
+
+ BinaryByteStream Stream;
+ BinaryStreamReader Reader;
+ FieldListDeserializer Deserializer;
+ TypeVisitorCallbackPipeline Pipeline;
+ CVTypeVisitor Visitor;
+};
+
+struct VisitHelper {
+ VisitHelper(TypeVisitorCallbacks &Callbacks, VisitorDataSource Source,
+ TypeServerHandler *TS)
+ : Visitor((Source == VDS_BytesPresent) ? Pipeline : Callbacks) {
+ if (TS)
+ Visitor.addTypeServerHandler(*TS);
+ if (Source == VDS_BytesPresent) {
+ Pipeline.addCallbackToPipeline(Deserializer);
+ Pipeline.addCallbackToPipeline(Callbacks);
+ }
+ }
+
+ TypeDeserializer Deserializer;
+ TypeVisitorCallbackPipeline Pipeline;
+ CVTypeVisitor Visitor;
+};
+}
+
+Error llvm::codeview::visitTypeRecord(CVType &Record, TypeIndex Index,
+ TypeVisitorCallbacks &Callbacks,
+ VisitorDataSource Source,
+ TypeServerHandler *TS) {
+ VisitHelper Helper(Callbacks, Source, TS);
+ return Helper.Visitor.visitTypeRecord(Record, Index);
+}
+
+Error llvm::codeview::visitTypeRecord(CVType &Record,
+ TypeVisitorCallbacks &Callbacks,
+ VisitorDataSource Source,
+ TypeServerHandler *TS) {
+ VisitHelper Helper(Callbacks, Source, TS);
+ return Helper.Visitor.visitTypeRecord(Record);
+}
+
+Error llvm::codeview::visitMemberRecordStream(ArrayRef<uint8_t> FieldList,
+ TypeVisitorCallbacks &Callbacks) {
+ CVTypeVisitor Visitor(Callbacks);
+ return Visitor.visitFieldListMemberStream(FieldList);
+}
+
+Error llvm::codeview::visitMemberRecord(CVMemberRecord Record,
+ TypeVisitorCallbacks &Callbacks,
+ VisitorDataSource Source) {
+ FieldListVisitHelper Helper(Callbacks, Record.Data, Source);
+ return Helper.Visitor.visitMemberRecord(Record);
+}
+
+Error llvm::codeview::visitMemberRecord(TypeLeafKind Kind,
+ ArrayRef<uint8_t> Record,
+ TypeVisitorCallbacks &Callbacks) {
+ CVMemberRecord R;
+ R.Data = Record;
+ R.Kind = Kind;
+ return visitMemberRecord(R, Callbacks, VDS_BytesPresent);
+}
+
+Error llvm::codeview::visitTypeStream(const CVTypeArray &Types,
+ TypeVisitorCallbacks &Callbacks,
+ TypeServerHandler *TS) {
+ VisitHelper Helper(Callbacks, VDS_BytesPresent, TS);
+ return Helper.Visitor.visitTypeStream(Types);
+}
+
+Error llvm::codeview::visitTypeStream(CVTypeRange Types,
+ TypeVisitorCallbacks &Callbacks,
+ TypeServerHandler *TS) {
+ VisitHelper Helper(Callbacks, VDS_BytesPresent, TS);
+ return Helper.Visitor.visitTypeStream(Types);
+}
diff --git a/lib/DebugInfo/CodeView/RandomAccessTypeVisitor.cpp b/lib/DebugInfo/CodeView/RandomAccessTypeVisitor.cpp
index 4cb9acbe07d9..704d1131108a 100644
--- a/lib/DebugInfo/CodeView/RandomAccessTypeVisitor.cpp
+++ b/lib/DebugInfo/CodeView/RandomAccessTypeVisitor.cpp
@@ -9,6 +9,7 @@
#include "llvm/DebugInfo/CodeView/RandomAccessTypeVisitor.h"
+#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
#include "llvm/DebugInfo/CodeView/TypeDatabase.h"
#include "llvm/DebugInfo/CodeView/TypeServerHandler.h"
#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
@@ -20,9 +21,7 @@ RandomAccessTypeVisitor::RandomAccessTypeVisitor(
const CVTypeArray &Types, uint32_t NumRecords,
PartialOffsetArray PartialOffsets)
: Database(NumRecords), Types(Types), DatabaseVisitor(Database),
- InternalVisitor(Pipeline), PartialOffsets(PartialOffsets) {
- Pipeline.addCallbackToPipeline(Deserializer);
- Pipeline.addCallbackToPipeline(DatabaseVisitor);
+ PartialOffsets(PartialOffsets) {
KnownOffsets.resize(Database.capacity());
}
@@ -38,8 +37,7 @@ Error RandomAccessTypeVisitor::visitTypeIndex(TypeIndex TI,
assert(Database.contains(TI));
auto &Record = Database.getTypeRecord(TI);
- CVTypeVisitor V(Callbacks);
- return V.visitTypeRecord(Record, TI);
+ return codeview::visitTypeRecord(Record, TI, Callbacks);
}
Error RandomAccessTypeVisitor::visitRangeForType(TypeIndex TI) {
@@ -78,7 +76,7 @@ Error RandomAccessTypeVisitor::visitRange(TypeIndex Begin, uint32_t BeginOffset,
while (Begin != End) {
assert(!Database.contains(Begin));
- if (auto EC = InternalVisitor.visitTypeRecord(*RI, Begin))
+ if (auto EC = codeview::visitTypeRecord(*RI, Begin, DatabaseVisitor))
return EC;
KnownOffsets[Begin.toArrayIndex()] = BeginOffset;
diff --git a/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp b/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp
index 27a6e0987886..9485c9cfedff 100644
--- a/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp
+++ b/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp
@@ -216,8 +216,7 @@ Error TypeDumpVisitor::visitMemberEnd(CVMemberRecord &Record) {
Error TypeDumpVisitor::visitKnownRecord(CVType &CVR,
FieldListRecord &FieldList) {
- CVTypeVisitor Visitor(*this);
- if (auto EC = Visitor.visitFieldListMemberStream(FieldList.Data))
+ if (auto EC = codeview::visitMemberRecordStream(FieldList.Data, *this))
return EC;
return Error::success();
diff --git a/lib/DebugInfo/CodeView/TypeStreamMerger.cpp b/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
index aad20ae6dda1..51f24fa3f135 100644
--- a/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
+++ b/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
@@ -361,8 +361,7 @@ Error TypeStreamMerger::visitKnownRecord(CVType &, FieldListRecord &R) {
// Visit the members inside the field list.
HadUntranslatedMember = false;
FieldListBuilder.begin();
- CVTypeVisitor Visitor(*this);
- if (auto EC = Visitor.visitFieldListMemberStream(R.Data))
+ if (auto EC = codeview::visitMemberRecordStream(R.Data, *this))
return EC;
// Write the record if we translated all field list members.
@@ -440,18 +439,9 @@ Error TypeStreamMerger::visitUnknownType(CVType &Rec) {
Error TypeStreamMerger::mergeStream(const CVTypeArray &Types) {
assert(IndexMap.empty());
- TypeVisitorCallbackPipeline Pipeline;
LastError = Error::success();
- TypeDeserializer Deserializer;
- Pipeline.addCallbackToPipeline(Deserializer);
- Pipeline.addCallbackToPipeline(*this);
-
- CVTypeVisitor Visitor(Pipeline);
- if (Handler)
- Visitor.addTypeServerHandler(*Handler);
-
- if (auto EC = Visitor.visitTypeStream(Types))
+ if (auto EC = codeview::visitTypeStream(Types, *this, Handler))
return EC;
// If we found bad indices but no other errors, try doing another pass and see
@@ -466,7 +456,8 @@ Error TypeStreamMerger::mergeStream(const CVTypeArray &Types) {
IsSecondPass = true;
NumBadIndices = 0;
CurIndex = TypeIndex(TypeIndex::FirstNonSimpleIndex);
- if (auto EC = Visitor.visitTypeStream(Types))
+
+ if (auto EC = codeview::visitTypeStream(Types, *this, Handler))
return EC;
assert(NumBadIndices <= BadIndicesRemaining &&
diff --git a/lib/DebugInfo/DWARF/DWARFContext.cpp b/lib/DebugInfo/DWARF/DWARFContext.cpp
index 59a060d143ff..61e75a2b56ab 100644
--- a/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -1086,49 +1086,32 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
continue;
}
+ if (Section.relocation_begin() == Section.relocation_end())
+ continue;
+
std::map<SymbolRef, uint64_t> AddrCache;
- if (Section.relocation_begin() != Section.relocation_end()) {
- uint64_t SectionSize = RelocatedSection->getSize();
- for (const RelocationRef &Reloc : Section.relocations()) {
- // FIXME: it's not clear how to correctly handle scattered
- // relocations.
- if (isRelocScattered(Obj, Reloc))
- continue;
+ for (const RelocationRef &Reloc : Section.relocations()) {
+ // FIXME: it's not clear how to correctly handle scattered
+ // relocations.
+ if (isRelocScattered(Obj, Reloc))
+ continue;
- Expected<uint64_t> SymAddrOrErr =
- getSymbolAddress(Obj, Reloc, L, AddrCache);
- if (!SymAddrOrErr) {
- errs() << toString(SymAddrOrErr.takeError()) << '\n';
- continue;
- }
+ Expected<uint64_t> SymAddrOrErr =
+ getSymbolAddress(Obj, Reloc, L, AddrCache);
+ if (!SymAddrOrErr) {
+ errs() << toString(SymAddrOrErr.takeError()) << '\n';
+ continue;
+ }
- object::RelocVisitor V(Obj);
- object::RelocToApply R(V.visit(Reloc.getType(), Reloc, *SymAddrOrErr));
- if (V.error()) {
- SmallString<32> Name;
- Reloc.getTypeName(Name);
- errs() << "error: failed to compute relocation: "
- << Name << "\n";
- continue;
- }
- uint64_t Address = Reloc.getOffset();
- if (Address + R.Width > SectionSize) {
- errs() << "error: " << R.Width << "-byte relocation starting "
- << Address << " bytes into section " << name << " which is "
- << SectionSize << " bytes long.\n";
- continue;
- }
- if (R.Width > 8) {
- errs() << "error: can't handle a relocation of more than 8 bytes at "
- "a time.\n";
- continue;
- }
- DEBUG(dbgs() << "Writing " << format("%p", R.Value)
- << " at " << format("%p", Address)
- << " with width " << format("%d", R.Width)
- << "\n");
- Map->insert({Address, {(uint8_t)R.Width, R.Value}});
+ object::RelocVisitor V(Obj);
+ object::RelocToApply R(V.visit(Reloc.getType(), Reloc, *SymAddrOrErr));
+ if (V.error()) {
+ SmallString<32> Name;
+ Reloc.getTypeName(Name);
+ errs() << "error: failed to compute relocation: " << Name << "\n";
+ continue;
}
+ Map->insert({Reloc.getOffset(), {R.Value}});
}
}
}
diff --git a/lib/DebugInfo/PDB/Native/PDBTypeServerHandler.cpp b/lib/DebugInfo/PDB/Native/PDBTypeServerHandler.cpp
index 629f3e80b0ed..cb783cf4fea7 100644
--- a/lib/DebugInfo/PDB/Native/PDBTypeServerHandler.cpp
+++ b/lib/DebugInfo/PDB/Native/PDBTypeServerHandler.cpp
@@ -55,9 +55,8 @@ PDBTypeServerHandler::handleInternal(PDBFile &File,
auto ExpectedTpi = File.getPDBTpiStream();
if (!ExpectedTpi)
return ExpectedTpi.takeError();
- CVTypeVisitor Visitor(Callbacks);
- if (auto EC = Visitor.visitTypeStream(ExpectedTpi->types(nullptr)))
+ if (auto EC = codeview::visitTypeStream(ExpectedTpi->typeArray(), Callbacks))
return std::move(EC);
return true;
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index e9a4b71c903d..ab86e5d6a0fd 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -705,7 +705,7 @@ RuntimeDyldImpl::emitSection(const ObjectFile &Obj,
unsigned Alignment = (unsigned)Alignment64 & 0xffffffffL;
unsigned PaddingSize = 0;
unsigned StubBufSize = 0;
- bool IsRequired = isRequiredForExecution(Section) || ProcessAllSections;
+ bool IsRequired = isRequiredForExecution(Section);
bool IsVirtual = Section.isVirtual();
bool IsZeroInit = isZeroInit(Section);
bool IsReadOnly = isReadOnlyData(Section);
@@ -745,8 +745,8 @@ RuntimeDyldImpl::emitSection(const ObjectFile &Obj,
Alignment = std::max(Alignment, getStubAlignment());
// Some sections, such as debug info, don't need to be loaded for execution.
- // Leave those where they are.
- if (IsRequired) {
+ // Process those only if explicitly requested.
+ if (IsRequired || ProcessAllSections) {
Allocate = DataSize + PaddingSize + StubBufSize;
if (!Allocate)
Allocate = 1;
@@ -790,6 +790,10 @@ RuntimeDyldImpl::emitSection(const ObjectFile &Obj,
Sections.push_back(
SectionEntry(Name, Addr, DataSize, Allocate, (uintptr_t)pData));
+ // Debug info sections are linked as if their load address was zero
+ if (!IsRequired)
+ Sections.back().setLoadAddress(0);
+
if (Checker)
Checker->registerSection(Obj.getFileName(), SectionID);
diff --git a/lib/Support/CrashRecoveryContext.cpp b/lib/Support/CrashRecoveryContext.cpp
index 98865f5e065e..bd38dd88201f 100644
--- a/lib/Support/CrashRecoveryContext.cpp
+++ b/lib/Support/CrashRecoveryContext.cpp
@@ -78,6 +78,9 @@ static bool gCrashRecoveryEnabled = false;
static ManagedStatic<sys::ThreadLocal<const CrashRecoveryContext>>
tlIsRecoveringFromCrash;
+static void installExceptionOrSignalHandlers();
+static void uninstallExceptionOrSignalHandlers();
+
CrashRecoveryContextCleanup::~CrashRecoveryContextCleanup() {}
CrashRecoveryContext::~CrashRecoveryContext() {
@@ -113,6 +116,23 @@ CrashRecoveryContext *CrashRecoveryContext::GetCurrent() {
return CRCI->CRC;
}
+void CrashRecoveryContext::Enable() {
+ sys::ScopedLock L(*gCrashRecoveryContextMutex);
+ // FIXME: Shouldn't this be a refcount or something?
+ if (gCrashRecoveryEnabled)
+ return;
+ gCrashRecoveryEnabled = true;
+ installExceptionOrSignalHandlers();
+}
+
+void CrashRecoveryContext::Disable() {
+ sys::ScopedLock L(*gCrashRecoveryContextMutex);
+ if (!gCrashRecoveryEnabled)
+ return;
+ gCrashRecoveryEnabled = false;
+ uninstallExceptionOrSignalHandlers();
+}
+
void CrashRecoveryContext::registerCleanup(CrashRecoveryContextCleanup *cleanup)
{
if (!cleanup)
@@ -140,30 +160,70 @@ CrashRecoveryContext::unregisterCleanup(CrashRecoveryContextCleanup *cleanup) {
delete cleanup;
}
-#ifdef LLVM_ON_WIN32
+#if defined(_MSC_VER)
+// If _MSC_VER is defined, we must have SEH. Use it if it's available. It's way
+// better than VEH. Vectored exception handling catches all exceptions happening
+// on the thread with installed exception handlers, so it can interfere with
+// internal exception handling of other libraries on that thread. SEH works
+// exactly as you would expect normal exception handling to work: it only
+// catches exceptions if they would bubble out from the stack frame with __try /
+// __except.
-#include "Windows/WindowsSupport.h"
+static void installExceptionOrSignalHandlers() {}
+static void uninstallExceptionOrSignalHandlers() {}
-// On Windows, we can make use of vectored exception handling to
-// catch most crashing situations. Note that this does mean
-// we will be alerted of exceptions *before* structured exception
-// handling has the opportunity to catch it. But that isn't likely
-// to cause problems because nowhere in the project is SEH being
-// used.
+bool CrashRecoveryContext::RunSafely(function_ref<void()> Fn) {
+ if (!gCrashRecoveryEnabled) {
+ Fn();
+ return true;
+ }
+
+ bool Result = true;
+ __try {
+ Fn();
+ } __except (1) { // Catch any exception.
+ Result = false;
+ }
+ return Result;
+}
+
+#else // !_MSC_VER
+
+#if defined(LLVM_ON_WIN32)
+// This is a non-MSVC compiler, probably mingw gcc or clang without
+// -fms-extensions. Use vectored exception handling (VEH).
+//
+// On Windows, we can make use of vectored exception handling to catch most
+// crashing situations. Note that this does mean we will be alerted of
+// exceptions *before* structured exception handling has the opportunity to
+// catch it. Unfortunately, this causes problems in practice with other code
+// running on threads with LLVM crash recovery contexts, so we would like to
+// eventually move away from VEH.
//
-// Vectored exception handling is built on top of SEH, and so it
-// works on a per-thread basis.
+// Vectored works on a per-thread basis, which is an advantage over
+// SetUnhandledExceptionFilter. SetUnhandledExceptionFilter also doesn't have
+// any native support for chaining exception handlers, but VEH allows more than
+// one.
//
// The vectored exception handler functionality was added in Windows
// XP, so if support for older versions of Windows is required,
// it will have to be added.
-//
-// If we want to support as far back as Win2k, we could use the
-// SetUnhandledExceptionFilter API, but there's a risk of that
-// being entirely overwritten (it's not a chain).
+
+#include "Windows/WindowsSupport.h"
static LONG CALLBACK ExceptionHandler(PEXCEPTION_POINTERS ExceptionInfo)
{
+ // DBG_PRINTEXCEPTION_WIDE_C is not properly defined on all supported
+ // compilers and platforms, so we define it manually.
+ constexpr ULONG DbgPrintExceptionWideC = 0x4001000AL;
+ switch (ExceptionInfo->ExceptionRecord->ExceptionCode)
+ {
+ case DBG_PRINTEXCEPTION_C:
+ case DbgPrintExceptionWideC:
+ case 0x406D1388: // set debugger thread name
+ return EXCEPTION_CONTINUE_EXECUTION;
+ }
+
// Lookup the current thread local recovery object.
const CrashRecoveryContextImpl *CRCI = CurrentContext->get();
@@ -192,14 +252,7 @@ static LONG CALLBACK ExceptionHandler(PEXCEPTION_POINTERS ExceptionInfo)
// non-NULL, valid VEH handles, or NULL.
static sys::ThreadLocal<const void> sCurrentExceptionHandle;
-void CrashRecoveryContext::Enable() {
- sys::ScopedLock L(*gCrashRecoveryContextMutex);
-
- if (gCrashRecoveryEnabled)
- return;
-
- gCrashRecoveryEnabled = true;
-
+static void installExceptionOrSignalHandlers() {
// We can set up vectored exception handling now. We will install our
// handler as the front of the list, though there's no assurances that
// it will remain at the front (another call could install itself before
@@ -208,14 +261,7 @@ void CrashRecoveryContext::Enable() {
sCurrentExceptionHandle.set(handle);
}
-void CrashRecoveryContext::Disable() {
- sys::ScopedLock L(*gCrashRecoveryContextMutex);
-
- if (!gCrashRecoveryEnabled)
- return;
-
- gCrashRecoveryEnabled = false;
-
+static void uninstallExceptionOrSignalHandlers() {
PVOID currentHandle = const_cast<PVOID>(sCurrentExceptionHandle.get());
if (currentHandle) {
// Now we can remove the vectored exception handler from the chain
@@ -226,7 +272,7 @@ void CrashRecoveryContext::Disable() {
}
}
-#else
+#else // !LLVM_ON_WIN32
// Generic POSIX implementation.
//
@@ -278,14 +324,7 @@ static void CrashRecoverySignalHandler(int Signal) {
const_cast<CrashRecoveryContextImpl*>(CRCI)->HandleCrash();
}
-void CrashRecoveryContext::Enable() {
- sys::ScopedLock L(*gCrashRecoveryContextMutex);
-
- if (gCrashRecoveryEnabled)
- return;
-
- gCrashRecoveryEnabled = true;
-
+static void installExceptionOrSignalHandlers() {
// Setup the signal handler.
struct sigaction Handler;
Handler.sa_handler = CrashRecoverySignalHandler;
@@ -297,20 +336,13 @@ void CrashRecoveryContext::Enable() {
}
}
-void CrashRecoveryContext::Disable() {
- sys::ScopedLock L(*gCrashRecoveryContextMutex);
-
- if (!gCrashRecoveryEnabled)
- return;
-
- gCrashRecoveryEnabled = false;
-
+static void uninstallExceptionOrSignalHandlers() {
// Restore the previous signal handlers.
for (unsigned i = 0; i != NumSignals; ++i)
sigaction(Signals[i], &PrevActions[i], nullptr);
}
-#endif
+#endif // !LLVM_ON_WIN32
bool CrashRecoveryContext::RunSafely(function_ref<void()> Fn) {
// If crash recovery is disabled, do nothing.
@@ -328,6 +360,8 @@ bool CrashRecoveryContext::RunSafely(function_ref<void()> Fn) {
return true;
}
+#endif // !_MSC_VER
+
void CrashRecoveryContext::HandleCrash() {
CrashRecoveryContextImpl *CRCI = (CrashRecoveryContextImpl *) Impl;
assert(CRCI && "Crash recovery context never initialized!");
diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc
index cdea09be41e0..fa28ba1b6ab6 100644
--- a/lib/Support/Unix/Path.inc
+++ b/lib/Support/Unix/Path.inc
@@ -103,16 +103,13 @@
#define STATVFS_F_FLAG(vfs) (vfs).f_flags
#endif
-#if defined(__FreeBSD__) || defined(__NetBSD__)
-#include <sys/sysctl.h>
-#endif
-
using namespace llvm;
namespace llvm {
namespace sys {
namespace fs {
-#if defined(__Bitrig__) || defined(__OpenBSD__) || defined(__minix) || \
+#if defined(__FreeBSD__) || defined (__NetBSD__) || defined(__Bitrig__) || \
+ defined(__OpenBSD__) || defined(__minix) || defined(__FreeBSD_kernel__) || \
defined(__linux__) || defined(__CYGWIN__) || defined(__DragonFly__) || \
defined(_AIX)
static int
@@ -167,7 +164,7 @@ getprogpath(char ret[PATH_MAX], const char *bin)
free(pv);
return nullptr;
}
-#endif // Bitrig || OpenBSD || minix || linux || CYGWIN || DragonFly || AIX
+#endif // __FreeBSD__ || __NetBSD__ || __FreeBSD_kernel__
/// GetMainExecutable - Return the path to the main executable, given the
/// value of argv[0] from program startup.
@@ -183,24 +180,9 @@ std::string getMainExecutable(const char *argv0, void *MainAddr) {
if (realpath(exe_path, link_path))
return link_path;
}
-#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__)
- int mib[4];
- mib[0] = CTL_KERN;
-#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
- mib[1] = KERN_PROC;
- mib[2] = KERN_PROC_PATHNAME;
- mib[3] = -1;
-#else
- mib[1] = KERN_PROC_ARGS;
- mib[2] = -1;
- mib[3] = KERN_PROC_PATHNAME;
-#endif
- char exe_path[PATH_MAX];
- size_t cb = sizeof(exe_path);
- if (sysctl(mib, 4, exe_path, &cb, NULL, 0) == 0)
- return exe_path;
-#elif defined(__Bitrig__) || defined(__OpenBSD__) || defined(__minix) || \
- defined(__DragonFly__) || defined(_AIX)
+#elif defined(__FreeBSD__) || defined (__NetBSD__) || defined(__Bitrig__) || \
+ defined(__OpenBSD__) || defined(__minix) || defined(__DragonFly__) || \
+ defined(__FreeBSD_kernel__) || defined(_AIX)
char exe_path[PATH_MAX];
if (getprogpath(exe_path, argv0) != NULL)
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp
index dc916c034661..1aec602a2a36 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1158,8 +1158,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
}
DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:";
- for (int Reg = SavedRegs.find_first(); Reg != -1;
- Reg = SavedRegs.find_next(Reg))
+ for (unsigned Reg : SavedRegs.set_bits())
dbgs() << ' ' << PrintReg(Reg, RegInfo);
dbgs() << "\n";);
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 4f7c2e122390..1af36086ad90 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -553,7 +553,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::INTRINSIC_VOID);
setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
- setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 8;
MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 4;
@@ -659,6 +658,19 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
+ // Vector reductions
+ for (MVT VT : MVT::integer_valuetypes()) {
+ setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
+ }
+ for (MVT VT : MVT::fp_valuetypes()) {
+ setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
+ }
+
setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
// Likewise, narrowing and extending vector loads/stores aren't handled
@@ -2606,6 +2618,14 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerMUL(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN:
return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMIN:
+ return LowerVECREDUCE(Op, DAG);
}
}
@@ -7128,6 +7148,47 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
return Cmp;
}
+static SDValue getReductionSDNode(unsigned Op, SDLoc DL, SDValue ScalarOp,
+ SelectionDAG &DAG) {
+ SDValue VecOp = ScalarOp.getOperand(0);
+ auto Rdx = DAG.getNode(Op, DL, VecOp.getSimpleValueType(), VecOp);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarOp.getValueType(), Rdx,
+ DAG.getConstant(0, DL, MVT::i64));
+}
+
+SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ switch (Op.getOpcode()) {
+ case ISD::VECREDUCE_ADD:
+ return getReductionSDNode(AArch64ISD::UADDV, dl, Op, DAG);
+ case ISD::VECREDUCE_SMAX:
+ return getReductionSDNode(AArch64ISD::SMAXV, dl, Op, DAG);
+ case ISD::VECREDUCE_SMIN:
+ return getReductionSDNode(AArch64ISD::SMINV, dl, Op, DAG);
+ case ISD::VECREDUCE_UMAX:
+ return getReductionSDNode(AArch64ISD::UMAXV, dl, Op, DAG);
+ case ISD::VECREDUCE_UMIN:
+ return getReductionSDNode(AArch64ISD::UMINV, dl, Op, DAG);
+ case ISD::VECREDUCE_FMAX: {
+ assert(Op->getFlags().hasNoNaNs() && "fmax vector reduction needs NoNaN flag");
+ return DAG.getNode(
+ ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(),
+ DAG.getConstant(Intrinsic::aarch64_neon_fmaxnmv, dl, MVT::i32),
+ Op.getOperand(0));
+ }
+ case ISD::VECREDUCE_FMIN: {
+ assert(Op->getFlags().hasNoNaNs() && "fmin vector reduction needs NoNaN flag");
+ return DAG.getNode(
+ ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(),
+ DAG.getConstant(Intrinsic::aarch64_neon_fminnmv, dl, MVT::i32),
+ Op.getOperand(0));
+ }
+ default:
+ llvm_unreachable("Unhandled reduction");
+ }
+}
+
/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
/// specified in the intrinsic calls.
@@ -9490,266 +9551,6 @@ static SDValue performSTORECombine(SDNode *N,
return SDValue();
}
-/// This function handles the log2-shuffle pattern produced by the
-/// LoopVectorizer for the across vector reduction. It consists of
-/// log2(NumVectorElements) steps and, in each step, 2^(s) elements
-/// are reduced, where s is an induction variable from 0 to
-/// log2(NumVectorElements).
-static SDValue tryMatchAcrossLaneShuffleForReduction(SDNode *N, SDValue OpV,
- unsigned Op,
- SelectionDAG &DAG) {
- EVT VTy = OpV->getOperand(0).getValueType();
- if (!VTy.isVector())
- return SDValue();
-
- int NumVecElts = VTy.getVectorNumElements();
- if (Op == ISD::FMAXNUM || Op == ISD::FMINNUM) {
- if (NumVecElts != 4)
- return SDValue();
- } else {
- if (NumVecElts != 4 && NumVecElts != 8 && NumVecElts != 16)
- return SDValue();
- }
-
- int NumExpectedSteps = APInt(8, NumVecElts).logBase2();
- SDValue PreOp = OpV;
- // Iterate over each step of the across vector reduction.
- for (int CurStep = 0; CurStep != NumExpectedSteps; ++CurStep) {
- SDValue CurOp = PreOp.getOperand(0);
- SDValue Shuffle = PreOp.getOperand(1);
- if (Shuffle.getOpcode() != ISD::VECTOR_SHUFFLE) {
- // Try to swap the 1st and 2nd operand as add and min/max instructions
- // are commutative.
- CurOp = PreOp.getOperand(1);
- Shuffle = PreOp.getOperand(0);
- if (Shuffle.getOpcode() != ISD::VECTOR_SHUFFLE)
- return SDValue();
- }
-
- // Check if the input vector is fed by the operator we want to handle,
- // except the last step; the very first input vector is not necessarily
- // the same operator we are handling.
- if (CurOp.getOpcode() != Op && (CurStep != (NumExpectedSteps - 1)))
- return SDValue();
-
- // Check if it forms one step of the across vector reduction.
- // E.g.,
- // %cur = add %1, %0
- // %shuffle = vector_shuffle %cur, <2, 3, u, u>
- // %pre = add %cur, %shuffle
- if (Shuffle.getOperand(0) != CurOp)
- return SDValue();
-
- int NumMaskElts = 1 << CurStep;
- ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Shuffle)->getMask();
- // Check mask values in each step.
- // We expect the shuffle mask in each step follows a specific pattern
- // denoted here by the <M, U> form, where M is a sequence of integers
- // starting from NumMaskElts, increasing by 1, and the number integers
- // in M should be NumMaskElts. U is a sequence of UNDEFs and the number
- // of undef in U should be NumVecElts - NumMaskElts.
- // E.g., for <8 x i16>, mask values in each step should be :
- // step 0 : <1,u,u,u,u,u,u,u>
- // step 1 : <2,3,u,u,u,u,u,u>
- // step 2 : <4,5,6,7,u,u,u,u>
- for (int i = 0; i < NumVecElts; ++i)
- if ((i < NumMaskElts && Mask[i] != (NumMaskElts + i)) ||
- (i >= NumMaskElts && !(Mask[i] < 0)))
- return SDValue();
-
- PreOp = CurOp;
- }
- unsigned Opcode;
- bool IsIntrinsic = false;
-
- switch (Op) {
- default:
- llvm_unreachable("Unexpected operator for across vector reduction");
- case ISD::ADD:
- Opcode = AArch64ISD::UADDV;
- break;
- case ISD::SMAX:
- Opcode = AArch64ISD::SMAXV;
- break;
- case ISD::UMAX:
- Opcode = AArch64ISD::UMAXV;
- break;
- case ISD::SMIN:
- Opcode = AArch64ISD::SMINV;
- break;
- case ISD::UMIN:
- Opcode = AArch64ISD::UMINV;
- break;
- case ISD::FMAXNUM:
- Opcode = Intrinsic::aarch64_neon_fmaxnmv;
- IsIntrinsic = true;
- break;
- case ISD::FMINNUM:
- Opcode = Intrinsic::aarch64_neon_fminnmv;
- IsIntrinsic = true;
- break;
- }
- SDLoc DL(N);
-
- return IsIntrinsic
- ? DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, N->getValueType(0),
- DAG.getConstant(Opcode, DL, MVT::i32), PreOp)
- : DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0),
- DAG.getNode(Opcode, DL, PreOp.getSimpleValueType(), PreOp),
- DAG.getConstant(0, DL, MVT::i64));
-}
-
-/// Target-specific DAG combine for the across vector min/max reductions.
-/// This function specifically handles the final clean-up step of the vector
-/// min/max reductions produced by the LoopVectorizer. It is the log2-shuffle
-/// pattern, which narrows down and finds the final min/max value from all
-/// elements of the vector.
-/// For example, for a <16 x i8> vector :
-/// svn0 = vector_shuffle %0, undef<8,9,10,11,12,13,14,15,u,u,u,u,u,u,u,u>
-/// %smax0 = smax %arr, svn0
-/// %svn1 = vector_shuffle %smax0, undef<4,5,6,7,u,u,u,u,u,u,u,u,u,u,u,u>
-/// %smax1 = smax %smax0, %svn1
-/// %svn2 = vector_shuffle %smax1, undef<2,3,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
-/// %smax2 = smax %smax1, svn2
-/// %svn3 = vector_shuffle %smax2, undef<1,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
-/// %sc = setcc %smax2, %svn3, gt
-/// %n0 = extract_vector_elt %sc, #0
-/// %n1 = extract_vector_elt %smax2, #0
-/// %n2 = extract_vector_elt $smax2, #1
-/// %result = select %n0, %n1, n2
-/// becomes :
-/// %1 = smaxv %0
-/// %result = extract_vector_elt %1, 0
-static SDValue
-performAcrossLaneMinMaxReductionCombine(SDNode *N, SelectionDAG &DAG,
- const AArch64Subtarget *Subtarget) {
- if (!Subtarget->hasNEON())
- return SDValue();
-
- SDValue N0 = N->getOperand(0);
- SDValue IfTrue = N->getOperand(1);
- SDValue IfFalse = N->getOperand(2);
-
- // Check if the SELECT merges up the final result of the min/max
- // from a vector.
- if (N0.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
- IfTrue.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
- IfFalse.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
- return SDValue();
-
- // Expect N0 is fed by SETCC.
- SDValue SetCC = N0.getOperand(0);
- EVT SetCCVT = SetCC.getValueType();
- if (SetCC.getOpcode() != ISD::SETCC || !SetCCVT.isVector() ||
- SetCCVT.getVectorElementType() != MVT::i1)
- return SDValue();
-
- SDValue VectorOp = SetCC.getOperand(0);
- unsigned Op = VectorOp->getOpcode();
- // Check if the input vector is fed by the operator we want to handle.
- if (Op != ISD::SMAX && Op != ISD::UMAX && Op != ISD::SMIN &&
- Op != ISD::UMIN && Op != ISD::FMAXNUM && Op != ISD::FMINNUM)
- return SDValue();
-
- EVT VTy = VectorOp.getValueType();
- if (!VTy.isVector())
- return SDValue();
-
- if (VTy.getSizeInBits() < 64)
- return SDValue();
-
- EVT EltTy = VTy.getVectorElementType();
- if (Op == ISD::FMAXNUM || Op == ISD::FMINNUM) {
- if (EltTy != MVT::f32)
- return SDValue();
- } else {
- if (EltTy != MVT::i32 && EltTy != MVT::i16 && EltTy != MVT::i8)
- return SDValue();
- }
-
- // Check if extracting from the same vector.
- // For example,
- // %sc = setcc %vector, %svn1, gt
- // %n0 = extract_vector_elt %sc, #0
- // %n1 = extract_vector_elt %vector, #0
- // %n2 = extract_vector_elt $vector, #1
- if (!(VectorOp == IfTrue->getOperand(0) &&
- VectorOp == IfFalse->getOperand(0)))
- return SDValue();
-
- // Check if the condition code is matched with the operator type.
- ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
- if ((Op == ISD::SMAX && CC != ISD::SETGT && CC != ISD::SETGE) ||
- (Op == ISD::UMAX && CC != ISD::SETUGT && CC != ISD::SETUGE) ||
- (Op == ISD::SMIN && CC != ISD::SETLT && CC != ISD::SETLE) ||
- (Op == ISD::UMIN && CC != ISD::SETULT && CC != ISD::SETULE) ||
- (Op == ISD::FMAXNUM && CC != ISD::SETOGT && CC != ISD::SETOGE &&
- CC != ISD::SETUGT && CC != ISD::SETUGE && CC != ISD::SETGT &&
- CC != ISD::SETGE) ||
- (Op == ISD::FMINNUM && CC != ISD::SETOLT && CC != ISD::SETOLE &&
- CC != ISD::SETULT && CC != ISD::SETULE && CC != ISD::SETLT &&
- CC != ISD::SETLE))
- return SDValue();
-
- // Expect to check only lane 0 from the vector SETCC.
- if (!isNullConstant(N0.getOperand(1)))
- return SDValue();
-
- // Expect to extract the true value from lane 0.
- if (!isNullConstant(IfTrue.getOperand(1)))
- return SDValue();
-
- // Expect to extract the false value from lane 1.
- if (!isOneConstant(IfFalse.getOperand(1)))
- return SDValue();
-
- return tryMatchAcrossLaneShuffleForReduction(N, SetCC, Op, DAG);
-}
-
-/// Target-specific DAG combine for the across vector add reduction.
-/// This function specifically handles the final clean-up step of the vector
-/// add reduction produced by the LoopVectorizer. It is the log2-shuffle
-/// pattern, which adds all elements of a vector together.
-/// For example, for a <4 x i32> vector :
-/// %1 = vector_shuffle %0, <2,3,u,u>
-/// %2 = add %0, %1
-/// %3 = vector_shuffle %2, <1,u,u,u>
-/// %4 = add %2, %3
-/// %result = extract_vector_elt %4, 0
-/// becomes :
-/// %0 = uaddv %0
-/// %result = extract_vector_elt %0, 0
-static SDValue
-performAcrossLaneAddReductionCombine(SDNode *N, SelectionDAG &DAG,
- const AArch64Subtarget *Subtarget) {
- if (!Subtarget->hasNEON())
- return SDValue();
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
-
- // Check if the input vector is fed by the ADD.
- if (N0->getOpcode() != ISD::ADD)
- return SDValue();
-
- // The vector extract idx must constant zero because we only expect the final
- // result of the reduction is placed in lane 0.
- if (!isNullConstant(N1))
- return SDValue();
-
- EVT VTy = N0.getValueType();
- if (!VTy.isVector())
- return SDValue();
-
- EVT EltTy = VTy.getVectorElementType();
- if (EltTy != MVT::i32 && EltTy != MVT::i16 && EltTy != MVT::i8)
- return SDValue();
-
- if (VTy.getSizeInBits() < 64)
- return SDValue();
-
- return tryMatchAcrossLaneShuffleForReduction(N, N0, ISD::ADD, DAG);
-}
/// Target-specific DAG combine function for NEON load/store intrinsics
/// to merge base address updates.
@@ -10428,12 +10229,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performBitcastCombine(N, DCI, DAG);
case ISD::CONCAT_VECTORS:
return performConcatVectorsCombine(N, DCI, DAG);
- case ISD::SELECT: {
- SDValue RV = performSelectCombine(N, DCI);
- if (!RV.getNode())
- RV = performAcrossLaneMinMaxReductionCombine(N, DAG, Subtarget);
- return RV;
- }
+ case ISD::SELECT:
+ return performSelectCombine(N, DCI);
case ISD::VSELECT:
return performVSelectCombine(N, DCI.DAG);
case ISD::LOAD:
@@ -10455,8 +10252,6 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performNVCASTCombine(N);
case ISD::INSERT_VECTOR_ELT:
return performPostLD1Combine(N, DCI, true);
- case ISD::EXTRACT_VECTOR_ELT:
- return performAcrossLaneAddReductionCombine(N, DAG, Subtarget);
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN:
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
@@ -10676,6 +10471,14 @@ void AArch64TargetLowering::ReplaceNodeResults(
case ISD::BITCAST:
ReplaceBITCASTResults(N, Results, DAG);
return;
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ Results.push_back(LowerVECREDUCE(SDValue(N, 0), DAG));
+ return;
+
case AArch64ISD::SADDV:
ReplaceReductionResults(N, Results, DAG, ISD::ADD, AArch64ISD::SADDV);
return;
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
index 89db566c219c..ecc2517fb288 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -568,6 +568,7 @@ private:
SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
std::vector<SDNode *> *Created) const override;
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 7c6f55c06bce..43569af04347 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -769,3 +769,28 @@ unsigned AArch64TTIImpl::getMinPrefetchStride() {
unsigned AArch64TTIImpl::getMaxPrefetchIterationsAhead() {
return ST->getMaxPrefetchIterationsAhead();
}
+
+bool AArch64TTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty,
+ TTI::ReductionFlags Flags) const {
+ assert(isa<VectorType>(Ty) && "Expected Ty to be a vector type");
+ unsigned ScalarBits = Ty->getScalarSizeInBits();
+ switch (Opcode) {
+ case Instruction::FAdd:
+ case Instruction::FMul:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Mul:
+ return false;
+ case Instruction::Add:
+ return ScalarBits * Ty->getVectorNumElements() >= 128;
+ case Instruction::ICmp:
+ return (ScalarBits < 64) &&
+ (ScalarBits * Ty->getVectorNumElements() >= 128);
+ case Instruction::FCmp:
+ return Flags.NoNaN;
+ default:
+ llvm_unreachable("Unhandled reduction opcode");
+ }
+ return false;
+}
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.h b/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 280d97f3c502..d0299149c38c 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -145,6 +145,9 @@ public:
bool shouldExpandReduction(const IntrinsicInst *II) const {
return false;
}
+
+ bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
+ TTI::ReductionFlags Flags) const;
/// @}
};
diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 7c99752b881f..c3ac796a0a44 100644
--- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -1707,10 +1707,38 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
// FIXME: Look for on separate components
if (Src.getOpcode() == ISD::FNEG) {
- Mods |= (SISrcMods::NEG | SISrcMods::NEG_HI);
+ Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
Src = Src.getOperand(0);
}
+ if (Src.getOpcode() == ISD::BUILD_VECTOR) {
+ unsigned VecMods = Mods;
+
+ SDValue Lo = Src.getOperand(0);
+ SDValue Hi = Src.getOperand(1);
+
+ if (Lo.getOpcode() == ISD::FNEG) {
+ Lo = Lo.getOperand(0);
+ Mods ^= SISrcMods::NEG;
+ }
+
+ if (Hi.getOpcode() == ISD::FNEG) {
+ Hi = Hi.getOperand(0);
+ Mods ^= SISrcMods::NEG_HI;
+ }
+
+ if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
+ // Really a scalar input. Just select from the low half of the register to
+ // avoid packing.
+
+ Src = Lo;
+ SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
+ return true;
+ }
+
+ Mods = VecMods;
+ }
+
// Packed instructions do not have abs modifiers.
// FIXME: Handle abs/neg of individual components.
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h
index bed7d326b3dd..e543cae07ada 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -289,6 +289,10 @@ public:
return getGeneration() >= GFX9;
}
+ bool hasMin3Max3_16() const {
+ return getGeneration() >= GFX9;
+ }
+
bool hasCARRY() const {
return (getGeneration() >= EVERGREEN);
}
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index 48a14e4dbea2..286be355bc14 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4491,7 +4491,8 @@ SDValue SITargetLowering::performMinMaxCombine(SDNode *N,
if (Opc != AMDGPUISD::FMIN_LEGACY && Opc != AMDGPUISD::FMAX_LEGACY &&
- VT != MVT::f64) {
+ VT != MVT::f64 &&
+ ((VT != MVT::f16 && VT != MVT::i16) || Subtarget->hasMin3Max3_16())) {
// max(max(a, b), c) -> max3(a, b, c)
// min(min(a, b), c) -> min3(a, b, c)
if (Op0.getOpcode() == Opc && Op0.hasOneUse()) {
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp
index 065fd09eb356..38a16b525a75 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -765,7 +765,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
.addFrameIndex(FrameIndex) // addr
.addMemOperand(MMO)
.addReg(MFI->getScratchRSrcReg(), RegState::Implicit)
- .addReg(MFI->getScratchWaveOffsetReg(), RegState::Implicit);
+ .addReg(MFI->getFrameOffsetReg(), RegState::Implicit);
// Add the scratch resource registers as implicit uses because we may end up
// needing them, and need to ensure that the reserved registers are
// correctly handled.
@@ -796,7 +796,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
.addReg(SrcReg, getKillRegState(isKill)) // data
.addFrameIndex(FrameIndex) // addr
.addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
- .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
+ .addReg(MFI->getFrameOffsetReg()) // scratch_offset
.addImm(0) // offset
.addMemOperand(MMO);
}
@@ -869,7 +869,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
.addFrameIndex(FrameIndex) // addr
.addMemOperand(MMO)
.addReg(MFI->getScratchRSrcReg(), RegState::Implicit)
- .addReg(MFI->getScratchWaveOffsetReg(), RegState::Implicit);
+ .addReg(MFI->getFrameOffsetReg(), RegState::Implicit);
if (ST.hasScalarStores()) {
// m0 is used for offset to scalar stores if used to spill.
@@ -892,10 +892,10 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
unsigned Opcode = getVGPRSpillRestoreOpcode(SpillSize);
BuildMI(MBB, MI, DL, get(Opcode), DestReg)
- .addFrameIndex(FrameIndex) // vaddr
- .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
- .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
- .addImm(0) // offset
+ .addFrameIndex(FrameIndex) // vaddr
+ .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
+ .addReg(MFI->getFrameOffsetReg()) // scratch_offset
+ .addImm(0) // offset
.addMemOperand(MMO);
}
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 8820e294562b..06cfc95be96a 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -654,11 +654,11 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
if (Offset != 0) {
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
- .addReg(MFI->getScratchWaveOffsetReg())
+ .addReg(MFI->getFrameOffsetReg())
.addImm(Offset);
} else {
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
- .addReg(MFI->getScratchWaveOffsetReg());
+ .addReg(MFI->getFrameOffsetReg());
}
BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp))
@@ -715,11 +715,11 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
= MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
EltSize, MinAlign(Align, EltSize * i));
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
- .addReg(TmpReg, RegState::Kill) // src
- .addFrameIndex(Index) // vaddr
- .addReg(MFI->getScratchRSrcReg()) // srrsrc
- .addReg(MFI->getScratchWaveOffsetReg()) // soffset
- .addImm(i * 4) // offset
+ .addReg(TmpReg, RegState::Kill) // src
+ .addFrameIndex(Index) // vaddr
+ .addReg(MFI->getScratchRSrcReg()) // srrsrc
+ .addReg(MFI->getFrameOffsetReg()) // soffset
+ .addImm(i * 4) // offset
.addMemOperand(MMO);
}
}
@@ -806,11 +806,11 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
if (Offset != 0) {
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
- .addReg(MFI->getScratchWaveOffsetReg())
+ .addReg(MFI->getFrameOffsetReg())
.addImm(Offset);
} else {
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
- .addReg(MFI->getScratchWaveOffsetReg());
+ .addReg(MFI->getFrameOffsetReg());
}
auto MIB =
@@ -853,10 +853,10 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
MinAlign(Align, EltSize * i));
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
- .addFrameIndex(Index) // vaddr
- .addReg(MFI->getScratchRSrcReg()) // srsrc
- .addReg(MFI->getScratchWaveOffsetReg()) // soffset
- .addImm(i * 4) // offset
+ .addFrameIndex(Index) // vaddr
+ .addReg(MFI->getScratchRSrcReg()) // srsrc
+ .addReg(MFI->getFrameOffsetReg()) // soffset
+ .addImm(i * 4) // offset
.addMemOperand(MMO);
auto MIB =
diff --git a/lib/Target/AMDGPU/VOP3Instructions.td b/lib/Target/AMDGPU/VOP3Instructions.td
index ffa6c60d6b1f..c0b5069948fb 100644
--- a/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/lib/Target/AMDGPU/VOP3Instructions.td
@@ -300,10 +300,19 @@ def V_AND_OR_B32 : VOP3Inst <"v_and_or_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
def V_OR3_B32 : VOP3Inst <"v_or3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
def V_XAD_U32 : VOP3Inst <"v_xad_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
+
def V_MED3_F16 : VOP3Inst <"v_med3_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, AMDGPUfmed3>;
def V_MED3_I16 : VOP3Inst <"v_med3_i16", VOP3_Profile<VOP_I16_I16_I16_I16>, AMDGPUsmed3>;
def V_MED3_U16 : VOP3Inst <"v_med3_u16", VOP3_Profile<VOP_I16_I16_I16_I16>, AMDGPUumed3>;
-}
+
+def V_MIN3_F16 : VOP3Inst <"v_min3_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, AMDGPUfmin3>;
+def V_MIN3_I16 : VOP3Inst <"v_min3_i16", VOP3_Profile<VOP_I16_I16_I16_I16>, AMDGPUsmin3>;
+def V_MIN3_U16 : VOP3Inst <"v_min3_u16", VOP3_Profile<VOP_I16_I16_I16_I16>, AMDGPUumin3>;
+
+def V_MAX3_F16 : VOP3Inst <"v_max3_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, AMDGPUfmax3>;
+def V_MAX3_I16 : VOP3Inst <"v_max3_i16", VOP3_Profile<VOP_I16_I16_I16_I16>, AMDGPUsmax3>;
+def V_MAX3_U16 : VOP3Inst <"v_max3_u16", VOP3_Profile<VOP_I16_I16_I16_I16>, AMDGPUumax3>;
+} // End SubtargetPredicate = isGFX9
//===----------------------------------------------------------------------===//
@@ -509,6 +518,15 @@ defm V_OR3_B32 : VOP3_Real_vi <0x202>;
defm V_PACK_B32_F16 : VOP3_Real_vi <0x2a0>;
defm V_XAD_U32 : VOP3_Real_vi <0x1f3>;
+
+defm V_MIN3_F16 : VOP3_Real_vi <0x1f4>;
+defm V_MIN3_I16 : VOP3_Real_vi <0x1f5>;
+defm V_MIN3_U16 : VOP3_Real_vi <0x1f6>;
+
+defm V_MAX3_F16 : VOP3_Real_vi <0x1f7>;
+defm V_MAX3_I16 : VOP3_Real_vi <0x1f8>;
+defm V_MAX3_U16 : VOP3_Real_vi <0x1f9>;
+
defm V_MED3_F16 : VOP3_Real_vi <0x1fa>;
defm V_MED3_I16 : VOP3_Real_vi <0x1fb>;
defm V_MED3_U16 : VOP3_Real_vi <0x1fc>;
diff --git a/lib/Target/ARM/ARMInstructionSelector.cpp b/lib/Target/ARM/ARMInstructionSelector.cpp
index 8c680cdf9b47..b1f059835ff5 100644
--- a/lib/Target/ARM/ARMInstructionSelector.cpp
+++ b/lib/Target/ARM/ARMInstructionSelector.cpp
@@ -345,25 +345,10 @@ bool ARMInstructionSelector::select(MachineInstr &I) const {
I.setDesc(TII.get(COPY));
return selectCopy(I, TII, MRI, TRI, RBI);
}
- case G_ADD:
case G_GEP:
I.setDesc(TII.get(ARM::ADDrr));
MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
break;
- case G_SUB:
- I.setDesc(TII.get(ARM::SUBrr));
- MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
- break;
- case G_MUL:
- if (TII.getSubtarget().hasV6Ops()) {
- I.setDesc(TII.get(ARM::MUL));
- } else {
- assert(TII.getSubtarget().useMulOps() && "Unsupported target");
- I.setDesc(TII.get(ARM::MULv5));
- MIB->getOperand(0).setIsEarlyClobber(true);
- }
- MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
- break;
case G_FRAME_INDEX:
// Add 0 to the given frame index and hope it will eventually be folded into
// the user(s).
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index d0fd366ab9ed..1a17d4e33e4f 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -571,8 +571,7 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
GPRsNoLRSP.reset(ARM::LR);
GPRsNoLRSP.reset(ARM::SP);
GPRsNoLRSP.reset(ARM::PC);
- for (int Register = GPRsNoLRSP.find_first(); Register != -1;
- Register = GPRsNoLRSP.find_next(Register)) {
+ for (unsigned Register : GPRsNoLRSP.set_bits()) {
if (!UsedRegs.contains(Register)) {
// Remember the first pop-friendly register and exit.
if (PopFriendly.test(Register)) {
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
index ae58c26e145a..1597057ad63f 100644
--- a/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -386,7 +386,7 @@ void RegDefsUses::setCallerSaved(const MachineInstr &MI) {
void RegDefsUses::setUnallocatableRegs(const MachineFunction &MF) {
BitVector AllocSet = TRI.getAllocatableSet(MF);
- for (int R = AllocSet.find_first(); R != -1; R = AllocSet.find_next(R))
+ for (unsigned R : AllocSet.set_bits())
for (MCRegAliasIterator AI(R, &TRI, false); AI.isValid(); ++AI)
AllocSet.set(*AI);
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index 40bfe3a449f7..57a1d373c88c 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -1765,31 +1765,36 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
// Check whether the frame pointer register is allocated. If so, make sure it
// is spilled to the correct offset.
if (needsFP(MF)) {
- HasGPSaveArea = true;
-
int FI = PFI->getFramePointerSaveIndex();
assert(FI && "No Frame Pointer Save Slot!");
-
MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
+ // FP is R31/X31, so no need to update MinGPR/MinG8R.
+ HasGPSaveArea = true;
}
if (PFI->usesPICBase()) {
- HasGPSaveArea = true;
-
int FI = PFI->getPICBasePointerSaveIndex();
assert(FI && "No PIC Base Pointer Save Slot!");
-
MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
+
+ MinGPR = std::min<unsigned>(MinGPR, PPC::R30);
+ HasGPSaveArea = true;
}
const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
if (RegInfo->hasBasePointer(MF)) {
- HasGPSaveArea = true;
-
int FI = PFI->getBasePointerSaveIndex();
assert(FI && "No Base Pointer Save Slot!");
-
MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
+
+ unsigned BP = RegInfo->getBaseRegister(MF);
+ if (PPC::G8RCRegClass.contains(BP)) {
+ MinG8R = std::min<unsigned>(MinG8R, BP);
+ HasG8SaveArea = true;
+ } else if (PPC::GPRCRegClass.contains(BP)) {
+ MinGPR = std::min<unsigned>(MinGPR, BP);
+ HasGPSaveArea = true;
+ }
}
// General register save area starts right below the Floating-point
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 17bdd595da10..144aea850833 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -410,6 +410,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// To handle counter-based loop conditions.
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
+
// Comparisons that require checking two conditions.
setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
@@ -8184,6 +8189,26 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return Flags;
}
+SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
+ SelectionDAG &DAG) const {
+ // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
+ // the beginning of the argument list.
+ int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
+ SDLoc DL(Op);
+ switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {
+ case Intrinsic::ppc_cfence: {
+ assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.");
+ return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
+ Op.getOperand(ArgStart + 1))),
+ 0);
+ }
+ default:
+ break;
+ }
+ return SDValue();
+}
+
SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -8649,6 +8674,9 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
// Frame & Return address.
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+
+ case ISD::INTRINSIC_VOID:
+ return LowerINTRINSIC_VOID(Op, DAG);
}
}
@@ -8753,12 +8781,19 @@ Instruction *PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
Instruction *PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
Instruction *Inst,
AtomicOrdering Ord) const {
- if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord))
+ if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
+ // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
+ // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
+ // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
+ if (isa<LoadInst>(Inst) && Subtarget.isPPC64())
+ return Builder.CreateCall(
+ Intrinsic::getDeclaration(
+ Builder.GetInsertBlock()->getParent()->getParent(),
+ Intrinsic::ppc_cfence, {Inst->getType()}),
+ {Inst});
+ // FIXME: Can use isync for rmw operation.
return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
- // FIXME: this is too conservative, a dependent branch + isync is enough.
- // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
- // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
- // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
+ }
return nullptr;
}
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 4fc744257262..acb77943b118 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -905,6 +905,7 @@ namespace llvm {
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index a8433919f0f3..a3f894c81a01 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -983,6 +983,10 @@ def LDgotTprelL: Pseudo<(outs g8rc:$rD), (ins s16imm64:$disp, g8rc_nox0:$reg),
[(set i64:$rD,
(PPCldGotTprelL tglobaltlsaddr:$disp, i64:$reg))]>,
isPPC64;
+
+let isBarrier = 1, isPseudo = 1, Defs = [CR7], Itinerary = IIC_LdStSync in
+def CFENCE8 : Pseudo<(outs), (ins g8rc:$cr), "#CFENCE8", []>;
+
def : Pat<(PPCaddTls i64:$in, tglobaltlsaddr:$g),
(ADD8TLS $in, tglobaltlsaddr:$g)>;
def ADDIStlsgdHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 790a8902b3d2..3afcec1248d5 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -1873,6 +1873,8 @@ PPCInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
}
bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
+ auto &MBB = *MI.getParent();
+ auto DL = MI.getDebugLoc();
switch (MI.getOpcode()) {
case TargetOpcode::LOAD_STACK_GUARD: {
assert(Subtarget.isTargetLinux() &&
@@ -1920,6 +1922,17 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MI.setDesc(get(Opcode));
return true;
}
+ case PPC::CFENCE8: {
+ auto Val = MI.getOperand(0).getReg();
+ BuildMI(MBB, MI, DL, get(PPC::CMPW), PPC::CR7).addReg(Val).addReg(Val);
+ BuildMI(MBB, MI, DL, get(PPC::CTRL_DEP))
+ .addImm(PPC::PRED_NE_MINUS)
+ .addReg(PPC::CR7)
+ .addImm(1);
+ MI.setDesc(get(PPC::ISYNC));
+ MI.RemoveOperand(0);
+ return true;
+ }
}
return false;
}
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 1af5e7f28342..0766cfe4a987 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -1223,9 +1223,15 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
// FIXME: should be able to write a pattern for PPCcondbranch, but can't use
// a two-value operand where a dag node expects two operands. :(
let isCodeGenOnly = 1 in {
- def BCC : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst),
- "b${cond:cc}${cond:pm} ${cond:reg}, $dst"
- /*[(PPCcondbranch crrc:$crS, imm:$opc, bb:$dst)]*/>;
+ class BCC_class : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst),
+ "b${cond:cc}${cond:pm} ${cond:reg}, $dst"
+ /*[(PPCcondbranch crrc:$crS, imm:$opc, bb:$dst)]*/>;
+ def BCC : BCC_class;
+
+ // The same as BCC, except that it's not a terminator. Used for introducing
+ // control flow dependency without creating new blocks.
+ let isTerminator = 0 in def CTRL_DEP : BCC_class;
+
def BCCA : BForm<16, 1, 0, (outs), (ins pred:$cond, abscondbrtarget:$dst),
"b${cond:cc}a${cond:pm} ${cond:reg}, $dst">;
diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index f56b238f91e6..6a3dc6799c43 100644
--- a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -325,6 +325,30 @@ int SystemZTTIImpl::getArithmeticInstrCost(
unsigned ScalarBits = Ty->getScalarSizeInBits();
+ // Div with a constant which is a power of 2 will be converted by
+ // DAGCombiner to use shifts. With vector shift-element instructions, a
+ // vector sdiv costs about as much as a scalar one.
+ const unsigned SDivCostEstimate = 4;
+ bool SDivPow2 = false;
+ bool UDivPow2 = false;
+ if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv) &&
+ Args.size() == 2) {
+ const ConstantInt *CI = nullptr;
+ if (const Constant *C = dyn_cast<Constant>(Args[1])) {
+ if (C->getType()->isVectorTy())
+ CI = dyn_cast_or_null<const ConstantInt>(C->getSplatValue());
+ else
+ CI = dyn_cast<const ConstantInt>(C);
+ }
+ if (CI != nullptr &&
+ (CI->getValue().isPowerOf2() || (-CI->getValue()).isPowerOf2())) {
+ if (Opcode == Instruction::SDiv)
+ SDivPow2 = true;
+ else
+ UDivPow2 = true;
+ }
+ }
+
if (Ty->isVectorTy()) {
assert (ST->hasVector() && "getArithmeticInstrCost() called with vector type.");
unsigned VF = Ty->getVectorNumElements();
@@ -333,10 +357,13 @@ int SystemZTTIImpl::getArithmeticInstrCost(
// These vector operations are custom handled, but are still supported
// with one instruction per vector, regardless of element size.
if (Opcode == Instruction::Shl || Opcode == Instruction::LShr ||
- Opcode == Instruction::AShr) {
+ Opcode == Instruction::AShr || UDivPow2) {
return NumVectors;
}
+ if (SDivPow2)
+ return (NumVectors * SDivCostEstimate);
+
// These FP operations are supported with a single vector instruction for
// double (base implementation assumes float generally costs 2). For
// FP128, the scalar cost is 1, and there is no overhead since the values
@@ -395,6 +422,11 @@ int SystemZTTIImpl::getArithmeticInstrCost(
// 2 * ipm sequences ; xor ; shift ; compare
return 7;
+ if (UDivPow2)
+ return 1;
+ if (SDivPow2)
+ return SDivCostEstimate;
+
// An extra extension for narrow types is needed.
if ((Opcode == Instruction::SDiv || Opcode == Instruction::SRem))
// sext of op(s) for narrow types
diff --git a/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp b/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
index 5fd4a8d1949e..ba39b6cdb568 100644
--- a/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
@@ -140,8 +140,7 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) {
// Check if it's possible to reuse any of the used colors.
if (!MRI->isLiveIn(Old))
- for (int C(UsedColors.find_first()); C != -1;
- C = UsedColors.find_next(C)) {
+ for (unsigned C : UsedColors.set_bits()) {
if (MRI->getRegClass(SortedIntervals[C]->reg) != RC)
continue;
for (LiveInterval *OtherLI : Assignments[C])
diff --git a/lib/Target/WebAssembly/known_gcc_test_failures.txt b/lib/Target/WebAssembly/known_gcc_test_failures.txt
index 8e8e5fd1eff1..54619589c341 100644
--- a/lib/Target/WebAssembly/known_gcc_test_failures.txt
+++ b/lib/Target/WebAssembly/known_gcc_test_failures.txt
@@ -33,9 +33,6 @@ built-in-setjmp.c
pr60003.c
# Error in the program / unsupported by Clang.
-scal-to-vec1.c
-scal-to-vec2.c
-scal-to-vec3.c
20000822-1.c
20010209-1.c
20010605-1.c
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 3a421fe77392..784c3a6557ff 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -235,8 +235,6 @@ def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true",
"LEA instruction needs inputs at AG stage">;
def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
"LEA instruction with certain arguments is slow">;
-def FeatureSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true",
- "LEA instruction with 3 ops or certain registers is slow">;
def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
"INC and DEC instructions are slower than ADD and SUB">;
def FeatureSoftFloat
@@ -482,7 +480,6 @@ def SNBFeatures : ProcessorFeatures<[], [
FeatureXSAVE,
FeatureXSAVEOPT,
FeatureLAHFSAHF,
- FeatureSlow3OpsLEA,
FeatureFastScalarFSQRT,
FeatureFastSHLDRotate
]>;
diff --git a/lib/Target/X86/X86FixupLEAs.cpp b/lib/Target/X86/X86FixupLEAs.cpp
index 9f649dad8bc0..2cd4c1a3e7b3 100644
--- a/lib/Target/X86/X86FixupLEAs.cpp
+++ b/lib/Target/X86/X86FixupLEAs.cpp
@@ -27,26 +27,20 @@
#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
-namespace llvm {
-void initializeFixupLEAPassPass(PassRegistry &);
-}
-
-#define FIXUPLEA_DESC "X86 LEA Fixup"
-#define FIXUPLEA_NAME "x86-fixup-LEAs"
-
-#define DEBUG_TYPE FIXUPLEA_NAME
+#define DEBUG_TYPE "x86-fixup-LEAs"
STATISTIC(NumLEAs, "Number of LEA instructions created");
namespace {
class FixupLEAPass : public MachineFunctionPass {
enum RegUsageState { RU_NotUsed, RU_Write, RU_Read };
-
+ static char ID;
/// \brief Loop over all of the instructions in the basic block
/// replacing applicable instructions with LEA instructions,
/// where appropriate.
bool processBasicBlock(MachineFunction &MF, MachineFunction::iterator MFI);
+ StringRef getPassName() const override { return "X86 LEA Fixup"; }
/// \brief Given a machine register, look for the instruction
/// which writes it in the current basic block. If found,
@@ -68,22 +62,6 @@ class FixupLEAPass : public MachineFunctionPass {
void processInstructionForSLM(MachineBasicBlock::iterator &I,
MachineFunction::iterator MFI);
-
- /// \brief Given a LEA instruction which is unprofitable
- /// on SNB+ try to replace it with other instructions.
- /// According to Intel's Optimization Reference Manual:
- /// " For LEA instructions with three source operands and some specific
- /// situations, instruction latency has increased to 3 cycles, and must
- /// dispatch via port 1:
- /// - LEA that has all three source operands: base, index, and offset
- /// - LEA that uses base and index registers where the base is EBP, RBP,
- /// or R13
- /// - LEA that uses RIP relative addressing mode
- /// - LEA that uses 16-bit addressing mode "
- /// This function currently handles the first 2 cases only.
- MachineInstr *processInstrForSlow3OpLEA(MachineInstr &MI,
- MachineFunction::iterator MFI);
-
/// \brief Look for LEAs that add 1 to reg or subtract 1 from reg
/// and convert them to INC or DEC respectively.
bool fixupIncDec(MachineBasicBlock::iterator &I,
@@ -107,13 +85,7 @@ class FixupLEAPass : public MachineFunctionPass {
MachineBasicBlock::iterator &MBBI) const;
public:
- static char ID;
-
- StringRef getPassName() const override { return FIXUPLEA_DESC; }
-
- FixupLEAPass() : MachineFunctionPass(ID) {
- initializeFixupLEAPassPass(*PassRegistry::getPassRegistry());
- }
+ FixupLEAPass() : MachineFunctionPass(ID) {}
/// \brief Loop over all of the basic blocks,
/// replacing instructions by equivalent LEA instructions
@@ -132,11 +104,8 @@ private:
bool OptIncDec;
bool OptLEA;
};
-}
-
char FixupLEAPass::ID = 0;
-
-INITIALIZE_PASS(FixupLEAPass, FIXUPLEA_NAME, FIXUPLEA_DESC, false, false)
+}
MachineInstr *
FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI,
@@ -199,7 +168,7 @@ bool FixupLEAPass::runOnMachineFunction(MachineFunction &Func) {
MF = &Func;
const X86Subtarget &ST = Func.getSubtarget<X86Subtarget>();
OptIncDec = !ST.slowIncDec() || Func.getFunction()->optForMinSize();
- OptLEA = ST.LEAusesAG() || ST.slowLEA() || ST.slow3OpsLEA();
+ OptLEA = ST.LEAusesAG() || ST.slowLEA();
if (!OptLEA && !OptIncDec)
return false;
@@ -273,64 +242,9 @@ FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I,
return MachineBasicBlock::iterator();
}
-static inline bool isLEA(const int Opcode) {
- return Opcode == X86::LEA16r || Opcode == X86::LEA32r ||
- Opcode == X86::LEA64r || Opcode == X86::LEA64_32r;
-}
-
-static inline bool isInefficientLEAReg(unsigned int Reg) {
- return Reg == X86::EBP || Reg == X86::RBP || Reg == X86::R13;
-}
-
-static inline bool isRegOperand(const MachineOperand &Op) {
- return Op.isReg() && Op.getReg() != X86::NoRegister;
-}
-/// hasIneffecientLEARegs - LEA that uses base and index registers
-/// where the base is EBP, RBP, or R13
-static inline bool hasInefficientLEABaseReg(const MachineOperand &Base,
- const MachineOperand &Index) {
- return Base.isReg() && isInefficientLEAReg(Base.getReg()) &&
- isRegOperand(Index);
-}
-
-static inline bool hasLEAOffset(const MachineOperand &Offset) {
- return (Offset.isImm() && Offset.getImm() != 0) || Offset.isGlobal();
-}
-
-// LEA instruction that has all three operands: offset, base and index
-static inline bool isThreeOperandsLEA(const MachineOperand &Base,
- const MachineOperand &Index,
- const MachineOperand &Offset) {
- return isRegOperand(Base) && isRegOperand(Index) && hasLEAOffset(Offset);
-}
-
-static inline int getADDrrFromLEA(int LEAOpcode) {
- switch (LEAOpcode) {
- default:
- llvm_unreachable("Unexpected LEA instruction");
- case X86::LEA16r:
- return X86::ADD16rr;
- case X86::LEA32r:
- return X86::ADD32rr;
- case X86::LEA64_32r:
- case X86::LEA64r:
- return X86::ADD64rr;
- }
-}
-
-static inline int getADDriFromLEA(int LEAOpcode, const MachineOperand &Offset) {
- bool IsInt8 = Offset.isImm() && isInt<8>(Offset.getImm());
- switch (LEAOpcode) {
- default:
- llvm_unreachable("Unexpected LEA instruction");
- case X86::LEA16r:
- return IsInt8 ? X86::ADD16ri8 : X86::ADD16ri;
- case X86::LEA32r:
- case X86::LEA64_32r:
- return IsInt8 ? X86::ADD32ri8 : X86::ADD32ri;
- case X86::LEA64r:
- return IsInt8 ? X86::ADD64ri8 : X86::ADD64ri32;
- }
+static inline bool isLEA(const int opcode) {
+ return opcode == X86::LEA16r || opcode == X86::LEA32r ||
+ opcode == X86::LEA64r || opcode == X86::LEA64_32r;
}
/// isLEASimpleIncOrDec - Does this LEA have one these forms:
@@ -423,8 +337,8 @@ void FixupLEAPass::seekLEAFixup(MachineOperand &p,
void FixupLEAPass::processInstructionForSLM(MachineBasicBlock::iterator &I,
MachineFunction::iterator MFI) {
MachineInstr &MI = *I;
- const int Opcode = MI.getOpcode();
- if (!isLEA(Opcode))
+ const int opcode = MI.getOpcode();
+ if (!isLEA(opcode))
return;
if (MI.getOperand(5).getReg() != 0 || !MI.getOperand(4).isImm() ||
!TII->isSafeToClobberEFLAGS(*MFI, I))
@@ -436,142 +350,53 @@ void FixupLEAPass::processInstructionForSLM(MachineBasicBlock::iterator &I,
return;
if (MI.getOperand(2).getImm() > 1)
return;
+ int addrr_opcode, addri_opcode;
+ switch (opcode) {
+ default:
+ llvm_unreachable("Unexpected LEA instruction");
+ case X86::LEA16r:
+ addrr_opcode = X86::ADD16rr;
+ addri_opcode = X86::ADD16ri;
+ break;
+ case X86::LEA32r:
+ addrr_opcode = X86::ADD32rr;
+ addri_opcode = X86::ADD32ri;
+ break;
+ case X86::LEA64_32r:
+ case X86::LEA64r:
+ addrr_opcode = X86::ADD64rr;
+ addri_opcode = X86::ADD64ri32;
+ break;
+ }
DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I->dump(););
DEBUG(dbgs() << "FixLEA: Replaced by: ";);
MachineInstr *NewMI = nullptr;
+ const MachineOperand &Dst = MI.getOperand(0);
// Make ADD instruction for two registers writing to LEA's destination
if (SrcR1 != 0 && SrcR2 != 0) {
- const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(Opcode));
- const MachineOperand &Src = MI.getOperand(SrcR1 == DstR ? 3 : 1);
- NewMI =
- BuildMI(*MFI, I, MI.getDebugLoc(), ADDrr, DstR).addReg(DstR).add(Src);
+ const MachineOperand &Src1 = MI.getOperand(SrcR1 == DstR ? 1 : 3);
+ const MachineOperand &Src2 = MI.getOperand(SrcR1 == DstR ? 3 : 1);
+ NewMI = BuildMI(*MF, MI.getDebugLoc(), TII->get(addrr_opcode))
+ .add(Dst)
+ .add(Src1)
+ .add(Src2);
+ MFI->insert(I, NewMI);
DEBUG(NewMI->dump(););
}
// Make ADD instruction for immediate
if (MI.getOperand(4).getImm() != 0) {
- const MCInstrDesc &ADDri =
- TII->get(getADDriFromLEA(Opcode, MI.getOperand(4)));
const MachineOperand &SrcR = MI.getOperand(SrcR1 == DstR ? 1 : 3);
- NewMI = BuildMI(*MFI, I, MI.getDebugLoc(), ADDri, DstR)
+ NewMI = BuildMI(*MF, MI.getDebugLoc(), TII->get(addri_opcode))
+ .add(Dst)
.add(SrcR)
.addImm(MI.getOperand(4).getImm());
+ MFI->insert(I, NewMI);
DEBUG(NewMI->dump(););
}
if (NewMI) {
MFI->erase(I);
- I = NewMI;
- }
-}
-
-MachineInstr *
-FixupLEAPass::processInstrForSlow3OpLEA(MachineInstr &MI,
- MachineFunction::iterator MFI) {
-
- const int LEAOpcode = MI.getOpcode();
- if (!isLEA(LEAOpcode))
- return nullptr;
-
- const MachineOperand &Dst = MI.getOperand(0);
- const MachineOperand &Base = MI.getOperand(1);
- const MachineOperand &Scale = MI.getOperand(2);
- const MachineOperand &Index = MI.getOperand(3);
- const MachineOperand &Offset = MI.getOperand(4);
- const MachineOperand &Segment = MI.getOperand(5);
-
- if (!(isThreeOperandsLEA(Base, Index, Offset) ||
- hasInefficientLEABaseReg(Base, Index)) ||
- !TII->isSafeToClobberEFLAGS(*MFI, MI) ||
- Segment.getReg() != X86::NoRegister)
- return nullptr;
-
- unsigned int DstR = Dst.getReg();
- unsigned int BaseR = Base.getReg();
- unsigned int IndexR = Index.getReg();
- unsigned SSDstR =
- (LEAOpcode == X86::LEA64_32r) ? getX86SubSuperRegister(DstR, 64) : DstR;
- bool IsScale1 = Scale.getImm() == 1;
- bool IsInefficientBase = isInefficientLEAReg(BaseR);
- bool IsInefficientIndex = isInefficientLEAReg(IndexR);
-
- // Skip these cases since it takes more than 2 instructions
- // to replace the LEA instruction.
- if (IsInefficientBase && SSDstR == BaseR && !IsScale1)
- return nullptr;
- if (LEAOpcode == X86::LEA64_32r && IsInefficientBase &&
- (IsInefficientIndex || !IsScale1))
- return nullptr;
-
- const DebugLoc DL = MI.getDebugLoc();
- const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(LEAOpcode));
- const MCInstrDesc &ADDri = TII->get(getADDriFromLEA(LEAOpcode, Offset));
-
- DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MI.dump(););
- DEBUG(dbgs() << "FixLEA: Replaced by: ";);
-
- // First try to replace LEA with one or two (for the 3-op LEA case)
- // add instructions:
- // 1.lea (%base,%index,1), %base => add %index,%base
- // 2.lea (%base,%index,1), %index => add %base,%index
- if (IsScale1 && (DstR == BaseR || DstR == IndexR)) {
- const MachineOperand &Src = DstR == BaseR ? Index : Base;
- MachineInstr *NewMI =
- BuildMI(*MFI, MI, DL, ADDrr, DstR).addReg(DstR).add(Src);
- DEBUG(NewMI->dump(););
- // Create ADD instruction for the Offset in case of 3-Ops LEA.
- if (hasLEAOffset(Offset)) {
- NewMI = BuildMI(*MFI, MI, DL, ADDri, DstR).addReg(DstR).add(Offset);
- DEBUG(NewMI->dump(););
- }
- return NewMI;
- }
- // If the base is inefficient try switching the index and base operands,
- // otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction:
- // lea offset(%base,%index,scale),%dst =>
- // lea (%base,%index,scale); add offset,%dst
- if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) {
- MachineInstr *NewMI = BuildMI(*MFI, MI, DL, TII->get(LEAOpcode))
- .add(Dst)
- .add(IsInefficientBase ? Index : Base)
- .add(Scale)
- .add(IsInefficientBase ? Base : Index)
- .addImm(0)
- .add(Segment);
- DEBUG(NewMI->dump(););
- // Create ADD instruction for the Offset in case of 3-Ops LEA.
- if (hasLEAOffset(Offset)) {
- NewMI = BuildMI(*MFI, MI, DL, ADDri, DstR).addReg(DstR).add(Offset);
- DEBUG(NewMI->dump(););
- }
- return NewMI;
- }
- // Handle the rest of the cases with inefficient base register:
- assert(SSDstR != BaseR && "SSDstR == BaseR should be handled already!");
- assert(IsInefficientBase && "efficient base should be handled already!");
-
- // lea (%base,%index,1), %dst => mov %base,%dst; add %index,%dst
- if (IsScale1 && !hasLEAOffset(Offset)) {
- TII->copyPhysReg(*MFI, MI, DL, DstR, BaseR, Base.isKill());
- DEBUG(MI.getPrevNode()->dump(););
-
- MachineInstr *NewMI =
- BuildMI(*MFI, MI, DL, ADDrr, DstR).addReg(DstR).add(Index);
- DEBUG(NewMI->dump(););
- return NewMI;
+ I = static_cast<MachineBasicBlock::iterator>(NewMI);
}
- // lea offset(%base,%index,scale), %dst =>
- // lea offset( ,%index,scale), %dst; add %base,%dst
- MachineInstr *NewMI = BuildMI(*MFI, MI, DL, TII->get(LEAOpcode))
- .add(Dst)
- .addReg(0)
- .add(Scale)
- .add(Index)
- .add(Offset)
- .add(Segment);
- DEBUG(NewMI->dump(););
-
- NewMI = BuildMI(*MFI, MI, DL, ADDrr, DstR).addReg(DstR).add(Base);
- DEBUG(NewMI->dump(););
- return NewMI;
}
bool FixupLEAPass::processBasicBlock(MachineFunction &MF,
@@ -585,16 +410,8 @@ bool FixupLEAPass::processBasicBlock(MachineFunction &MF,
if (OptLEA) {
if (MF.getSubtarget<X86Subtarget>().isSLM())
processInstructionForSLM(I, MFI);
-
- else {
- if (MF.getSubtarget<X86Subtarget>().slow3OpsLEA()) {
- if (auto *NewMI = processInstrForSlow3OpLEA(*I, MFI)) {
- MFI->erase(I);
- I = NewMI;
- }
- } else
- processInstruction(I, MFI);
- }
+ else
+ processInstruction(I, MFI);
}
}
return false;
diff --git a/lib/Target/X86/X86InstructionSelector.cpp b/lib/Target/X86/X86InstructionSelector.cpp
index de58d719acb4..5eb5ad52840a 100644
--- a/lib/Target/X86/X86InstructionSelector.cpp
+++ b/lib/Target/X86/X86InstructionSelector.cpp
@@ -19,6 +19,7 @@
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -72,6 +73,9 @@ private:
bool selectCmp(MachineInstr &I, MachineRegisterInfo &MRI,
MachineFunction &MF) const;
+ bool selectUadde(MachineInstr &I, MachineRegisterInfo &MRI,
+ MachineFunction &MF) const;
+
const X86TargetMachine &TM;
const X86Subtarget &STI;
const X86InstrInfo &TII;
@@ -243,6 +247,8 @@ bool X86InstructionSelector::select(MachineInstr &I) const {
return true;
if (selectCmp(I, MRI, MF))
return true;
+ if (selectUadde(I, MRI, MF))
+ return true;
return false;
}
@@ -564,6 +570,66 @@ bool X86InstructionSelector::selectCmp(MachineInstr &I,
return true;
}
+bool X86InstructionSelector::selectUadde(MachineInstr &I,
+ MachineRegisterInfo &MRI,
+ MachineFunction &MF) const {
+ if (I.getOpcode() != TargetOpcode::G_UADDE)
+ return false;
+
+ const unsigned DstReg = I.getOperand(0).getReg();
+ const unsigned CarryOutReg = I.getOperand(1).getReg();
+ const unsigned Op0Reg = I.getOperand(2).getReg();
+ const unsigned Op1Reg = I.getOperand(3).getReg();
+ unsigned CarryInReg = I.getOperand(4).getReg();
+
+ const LLT DstTy = MRI.getType(DstReg);
+
+ if (DstTy != LLT::scalar(32))
+ return false;
+
+ // find CarryIn def instruction.
+ MachineInstr *Def = MRI.getVRegDef(CarryInReg);
+ while (Def->getOpcode() == TargetOpcode::G_TRUNC) {
+ CarryInReg = Def->getOperand(1).getReg();
+ Def = MRI.getVRegDef(CarryInReg);
+ }
+
+ unsigned Opcode;
+ if (Def->getOpcode() == TargetOpcode::G_UADDE) {
+ // carry set by prev ADD.
+
+ BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), X86::EFLAGS)
+ .addReg(CarryInReg);
+
+ if (!RBI.constrainGenericRegister(CarryInReg, X86::GR32RegClass, MRI))
+ return false;
+
+ Opcode = X86::ADC32rr;
+ } else if (auto val = getConstantVRegVal(CarryInReg, MRI)) {
+ // carry is constant, support only 0.
+ if (*val != 0)
+ return false;
+
+ Opcode = X86::ADD32rr;
+ } else
+ return false;
+
+ MachineInstr &AddInst =
+ *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opcode), DstReg)
+ .addReg(Op0Reg)
+ .addReg(Op1Reg);
+
+ BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), CarryOutReg)
+ .addReg(X86::EFLAGS);
+
+ if (!constrainSelectedInstRegOperands(AddInst, TII, TRI, RBI) ||
+ !RBI.constrainGenericRegister(CarryOutReg, X86::GR32RegClass, MRI))
+ return false;
+
+ I.eraseFromParent();
+ return true;
+}
+
InstructionSelector *
llvm::createX86InstructionSelector(const X86TargetMachine &TM,
X86Subtarget &Subtarget,
diff --git a/lib/Target/X86/X86LegalizerInfo.cpp b/lib/Target/X86/X86LegalizerInfo.cpp
index cf26238c0239..8ce240714f17 100644
--- a/lib/Target/X86/X86LegalizerInfo.cpp
+++ b/lib/Target/X86/X86LegalizerInfo.cpp
@@ -59,6 +59,11 @@ void X86LegalizerInfo::setLegalizerInfo32bit() {
for (auto Ty : {s8, s16, s32})
setAction({BinOp, Ty}, Legal);
+ for (unsigned Op : {G_UADDE}) {
+ setAction({Op, s32}, Legal);
+ setAction({Op, 1, s1}, Legal);
+ }
+
for (unsigned MemOp : {G_LOAD, G_STORE}) {
for (auto Ty : {s8, s16, s32, p0})
setAction({MemOp, Ty}, Legal);
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 02be95e2e556..de1514243aeb 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -253,11 +253,6 @@ protected:
/// True if the LEA instruction with certain arguments is slow
bool SlowLEA;
- /// True if the LEA instruction has all three source operands: base, index,
- /// and offset or if the LEA instruction uses base and index registers where
- /// the base is EBP, RBP,or R13
- bool Slow3OpsLEA;
-
/// True if INC and DEC instructions are slow when writing to flags
bool SlowIncDec;
@@ -495,7 +490,6 @@ public:
bool callRegIndirect() const { return CallRegIndirect; }
bool LEAusesAG() const { return LEAUsesAG; }
bool slowLEA() const { return SlowLEA; }
- bool slow3OpsLEA() const { return Slow3OpsLEA; }
bool slowIncDec() const { return SlowIncDec; }
bool hasCDI() const { return HasCDI; }
bool hasPFI() const { return HasPFI; }
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index c6a90725d89c..9a82e6e50463 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -61,7 +61,6 @@ static cl::opt<bool> EnableMachineCombinerPass("x86-machine-combiner",
namespace llvm {
void initializeWinEHStatePassPass(PassRegistry &);
-void initializeFixupLEAPassPass(PassRegistry &);
void initializeX86ExecutionDepsFixPass(PassRegistry &);
} // end namespace llvm
@@ -76,7 +75,6 @@ extern "C" void LLVMInitializeX86Target() {
initializeWinEHStatePassPass(PR);
initializeFixupBWInstPassPass(PR);
initializeEvexToVexInstPassPass(PR);
- initializeFixupLEAPassPass(PR);
initializeX86ExecutionDepsFixPass(PR);
}
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index 80e18161a94b..8566bd91c89e 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1392,6 +1392,16 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
// CTLZ: llvm\test\CodeGen\X86\vector-lzcnt-*.ll
// CTPOP: llvm\test\CodeGen\X86\vector-popcnt-*.ll
// CTTZ: llvm\test\CodeGen\X86\vector-tzcnt-*.ll
+ static const CostTblEntry AVX512BWCostTbl[] = {
+ { ISD::BITREVERSE, MVT::v8i64, 5 },
+ { ISD::BITREVERSE, MVT::v16i32, 5 },
+ { ISD::BITREVERSE, MVT::v32i16, 5 },
+ { ISD::BITREVERSE, MVT::v64i8, 5 },
+ };
+ static const CostTblEntry AVX512CostTbl[] = {
+ { ISD::BITREVERSE, MVT::v8i64, 36 },
+ { ISD::BITREVERSE, MVT::v16i32, 24 },
+ };
static const CostTblEntry XOPCostTbl[] = {
{ ISD::BITREVERSE, MVT::v4i64, 4 },
{ ISD::BITREVERSE, MVT::v8i32, 4 },
@@ -1550,6 +1560,14 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
MVT MTy = LT.second;
// Attempt to lookup cost.
+ if (ST->hasBWI())
+ if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
+
+ if (ST->hasAVX512())
+ if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
+
if (ST->hasXOP())
if (const auto *Entry = CostTableLookup(XOPCostTbl, ISD, MTy))
return LT.first * Entry->Cost;
diff --git a/lib/Transforms/Coroutines/CoroFrame.cpp b/lib/Transforms/Coroutines/CoroFrame.cpp
index 4480220f2cd4..417d57f7625b 100644
--- a/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -347,6 +347,27 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
return FrameTy;
}
+// We need to make room to insert a spill after initial PHIs, but before
+// catchswitch instruction. Placing it before violates the requirement that
+// catchswitch, like all other EHPads must be the first nonPHI in a block.
+//
+// Split away catchswitch into a separate block and insert in its place:
+//
+// cleanuppad <InsertPt> cleanupret.
+//
+// cleanupret instruction will act as an insert point for the spill.
+static Instruction *splitBeforeCatchSwitch(CatchSwitchInst *CatchSwitch) {
+ BasicBlock *CurrentBlock = CatchSwitch->getParent();
+ BasicBlock *NewBlock = CurrentBlock->splitBasicBlock(CatchSwitch);
+ CurrentBlock->getTerminator()->eraseFromParent();
+
+ auto *CleanupPad =
+ CleanupPadInst::Create(CatchSwitch->getParentPad(), {}, "", CurrentBlock);
+ auto *CleanupRet =
+ CleanupReturnInst::Create(CleanupPad, NewBlock, CurrentBlock);
+ return CleanupRet;
+}
+
// Replace all alloca and SSA values that are accessed across suspend points
// with GetElementPointer from coroutine frame + loads and stores. Create an
// AllocaSpillBB that will become the new entry block for the resume parts of
@@ -437,8 +458,11 @@ static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) {
InsertPt = NewBB->getTerminator();
} else if (dyn_cast<PHINode>(CurrentValue)) {
// Skip the PHINodes and EH pads instructions.
- InsertPt =
- &*cast<Instruction>(E.def())->getParent()->getFirstInsertionPt();
+ BasicBlock *DefBlock = cast<Instruction>(E.def())->getParent();
+ if (auto *CSI = dyn_cast<CatchSwitchInst>(DefBlock->getTerminator()))
+ InsertPt = splitBeforeCatchSwitch(CSI);
+ else
+ InsertPt = &*DefBlock->getFirstInsertionPt();
} else {
// For all other values, the spill is placed immediately after
// the definition.
diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h
index 1424f61fe701..f88a2c6acc3f 100644
--- a/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -74,6 +74,27 @@ static inline unsigned getComplexity(Value *V) {
return isa<Constant>(V) ? (isa<UndefValue>(V) ? 0 : 1) : 2;
}
+/// Predicate canonicalization reduces the number of patterns that need to be
+/// matched by other transforms. For example, we may swap the operands of a
+/// conditional branch or select to create a compare with a canonical (inverted)
+/// predicate which is then more likely to be matched with other values.
+static inline bool isCanonicalPredicate(CmpInst::Predicate Pred) {
+ switch (Pred) {
+ case CmpInst::ICMP_NE:
+ case CmpInst::ICMP_ULE:
+ case CmpInst::ICMP_SLE:
+ case CmpInst::ICMP_UGE:
+ case CmpInst::ICMP_SGE:
+ // TODO: There are 16 FCMP predicates. Should others be (not) canonical?
+ case CmpInst::FCMP_ONE:
+ case CmpInst::FCMP_OLE:
+ case CmpInst::FCMP_OGE:
+ return false;
+ default:
+ return true;
+ }
+}
+
/// \brief Add one to a Constant
static inline Constant *AddOne(Constant *C) {
return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 65b1148cb03b..7ed9fd566b37 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2210,37 +2210,17 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
return &BI;
}
- // Canonicalize fcmp_one -> fcmp_oeq
- FCmpInst::Predicate FPred; Value *Y;
- if (match(&BI, m_Br(m_OneUse(m_FCmp(FPred, m_Value(X), m_Value(Y))),
- TrueDest, FalseDest))) {
- // TODO: Why are we only transforming these 3 predicates?
- if (FPred == FCmpInst::FCMP_ONE || FPred == FCmpInst::FCMP_OLE ||
- FPred == FCmpInst::FCMP_OGE) {
- FCmpInst *Cond = cast<FCmpInst>(BI.getCondition());
- Cond->setPredicate(FCmpInst::getInversePredicate(FPred));
-
- // Swap Destinations and condition.
- BI.swapSuccessors();
- Worklist.Add(Cond);
- return &BI;
- }
- }
-
- // Canonicalize icmp_ne -> icmp_eq
- ICmpInst::Predicate IPred;
- if (match(&BI, m_Br(m_OneUse(m_ICmp(IPred, m_Value(X), m_Value(Y))),
- TrueDest, FalseDest))) {
- if (IPred == ICmpInst::ICMP_NE || IPred == ICmpInst::ICMP_ULE ||
- IPred == ICmpInst::ICMP_SLE || IPred == ICmpInst::ICMP_UGE ||
- IPred == ICmpInst::ICMP_SGE) {
- ICmpInst *Cond = cast<ICmpInst>(BI.getCondition());
- Cond->setPredicate(ICmpInst::getInversePredicate(IPred));
- // Swap Destinations and condition.
- BI.swapSuccessors();
- Worklist.Add(Cond);
- return &BI;
- }
+ // Canonicalize, for example, icmp_ne -> icmp_eq or fcmp_one -> fcmp_oeq.
+ CmpInst::Predicate Pred;
+ if (match(&BI, m_Br(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())), TrueDest,
+ FalseDest)) &&
+ !isCanonicalPredicate(Pred)) {
+ // Swap destinations and condition.
+ CmpInst *Cond = cast<CmpInst>(BI.getCondition());
+ Cond->setPredicate(CmpInst::getInversePredicate(Pred));
+ BI.swapSuccessors();
+ Worklist.Add(Cond);
+ return &BI;
}
return nullptr;
@@ -3053,7 +3033,10 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
}
}
- InstrsForInstCombineWorklist.push_back(Inst);
+ // Skip processing debug intrinsics in InstCombine. Processing these call instructions
+ // consumes non-trivial amount of time and provides no value for the optimization.
+ if (!isa<DbgInfoIntrinsic>(Inst))
+ InstrsForInstCombineWorklist.push_back(Inst);
}
// Recursively visit successors. If this is a branch or switch on a
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 340c81fed0fd..37b9c4b1094e 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -546,7 +546,7 @@ static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT,
// If there are escaping uses of invariant.start instruction, the load maybe
// non-invariant.
if (!II || II->getIntrinsicID() != Intrinsic::invariant_start ||
- II->hasNUsesOrMore(1))
+ !II->use_empty())
continue;
unsigned InvariantSizeInBits =
cast<ConstantInt>(II->getArgOperand(0))->getSExtValue() * 8;
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 6693a26e8890..cb6223b070a6 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -1292,13 +1292,15 @@ bool LoopIdiomRecognize::recognizeAndInsertCTLZ() {
BasicBlock *PH = CurLoop->getLoopPreheader();
Value *InitX = PhiX->getIncomingValueForBlock(PH);
// If we check X != 0 before entering the loop we don't need a zero
- // check in CTLZ intrinsic.
- if (BasicBlock *PreCondBB = PH->getSinglePredecessor())
- if (BranchInst *PreCondBr =
- dyn_cast<BranchInst>(PreCondBB->getTerminator())) {
- if (matchCondition(PreCondBr, PH) == InitX)
- ZeroCheck = true;
- }
+ // check in CTLZ intrinsic, but only if Cnt Phi is not used outside of the
+ // loop (if it is used we count CTLZ(X >> 1)).
+ if (!IsCntPhiUsedOutsideLoop)
+ if (BasicBlock *PreCondBB = PH->getSinglePredecessor())
+ if (BranchInst *PreCondBr =
+ dyn_cast<BranchInst>(PreCondBB->getTerminator())) {
+ if (matchCondition(PreCondBr, PH) == InitX)
+ ZeroCheck = true;
+ }
// Check if CTLZ intrinsic is profitable. Assume it is always profitable
// if we delete the loop (the loop has only 6 instructions):
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index ccedb98d7fa1..bd1f21c69eba 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -3902,8 +3902,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
// Compute the difference between the two.
int64_t Imm = (uint64_t)JImm - M->first;
- for (int LUIdx = UsedByIndices.find_first(); LUIdx != -1;
- LUIdx = UsedByIndices.find_next(LUIdx))
+ for (unsigned LUIdx : UsedByIndices.set_bits())
// Make a memo of this use, offset, and register tuple.
if (UniqueItems.insert(std::make_pair(LUIdx, Imm)).second)
WorkItems.push_back(WorkItem(LUIdx, Imm, OrigReg));
diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp
index 5e0a705782ea..0e7572f8d2e5 100644
--- a/lib/Transforms/Scalar/NewGVN.cpp
+++ b/lib/Transforms/Scalar/NewGVN.cpp
@@ -642,6 +642,7 @@ private:
void updateProcessedCount(Value *V);
void verifyMemoryCongruency() const;
void verifyIterationSettled(Function &F);
+ void verifyStoreExpressions() const;
bool singleReachablePHIPath(const MemoryAccess *, const MemoryAccess *) const;
BasicBlock *getBlockForValue(Value *V) const;
void deleteExpression(const Expression *E) const;
@@ -2003,7 +2004,6 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E,
// If it's not a memory use, set the MemoryAccess equivalence
auto *InstMA = dyn_cast_or_null<MemoryDef>(MSSA->getMemoryAccess(I));
- bool InstWasMemoryLeader = InstMA && OldClass->getMemoryLeader() == InstMA;
if (InstMA)
moveMemoryToNewCongruenceClass(I, InstMA, OldClass, NewClass);
ValueToClass[I] = NewClass;
@@ -2029,31 +2029,6 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E,
if (OldClass->getStoredValue())
OldClass->setStoredValue(nullptr);
}
- // If we destroy the old access leader and it's a store, we have to
- // effectively destroy the congruence class. When it comes to scalars,
- // anything with the same value is as good as any other. That means that
- // one leader is as good as another, and as long as you have some leader for
- // the value, you are good.. When it comes to *memory states*, only one
- // particular thing really represents the definition of a given memory
- // state. Once it goes away, we need to re-evaluate which pieces of memory
- // are really still equivalent. The best way to do this is to re-value
- // number things. The only way to really make that happen is to destroy the
- // rest of the class. In order to effectively destroy the class, we reset
- // ExpressionToClass for each by using the ValueToExpression mapping. The
- // members later get marked as touched due to the leader change. We will
- // create new congruence classes, and the pieces that are still equivalent
- // will end back together in a new class. If this becomes too expensive, it
- // is possible to use a versioning scheme for the congruence classes to
- // avoid the expressions finding this old class. Note that the situation is
- // different for memory phis, becuase they are evaluated anew each time, and
- // they become equal not by hashing, but by seeing if all operands are the
- // same (or only one is reachable).
- if (OldClass->getStoreCount() > 0 && InstWasMemoryLeader) {
- DEBUG(dbgs() << "Kicking everything out of class " << OldClass->getID()
- << " because MemoryAccess leader changed");
- for (auto Member : *OldClass)
- ExpressionToClass.erase(ValueToExpression.lookup(Member));
- }
OldClass->setLeader(getNextValueLeader(OldClass));
OldClass->resetNextLeader();
markValueLeaderChangeTouched(OldClass);
@@ -2062,7 +2037,6 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E,
// Perform congruence finding on a given value numbering expression.
void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) {
- ValueToExpression[I] = E;
// This is guaranteed to return something, since it will at least find
// TOP.
@@ -2132,6 +2106,18 @@ void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) {
if (auto *CI = dyn_cast<CmpInst>(I))
markPredicateUsersTouched(CI);
}
+ // If we changed the class of the store, we want to ensure nothing finds the
+ // old store expression. In particular, loads do not compare against stored
+ // value, so they will find old store expressions (and associated class
+ // mappings) if we leave them in the table.
+ if (ClassChanged && isa<StoreExpression>(E)) {
+ auto *OldE = ValueToExpression.lookup(I);
+ // It could just be that the old class died. We don't want to erase it if we
+ // just moved classes.
+ if (OldE && isa<StoreExpression>(OldE) && !OldE->equals(*E))
+ ExpressionToClass.erase(OldE);
+ }
+ ValueToExpression[I] = E;
}
// Process the fact that Edge (from, to) is reachable, including marking
@@ -2651,6 +2637,30 @@ void NewGVN::verifyIterationSettled(Function &F) {
#endif
}
+// Verify that for each store expression in the expression to class mapping,
+// only the latest appears, and multiple ones do not appear.
+// Because loads do not use the stored value when doing equality with stores,
+// if we don't erase the old store expressions from the table, a load can find
+// a no-longer valid StoreExpression.
+void NewGVN::verifyStoreExpressions() const {
+#ifndef NDEBUG
+ DenseSet<std::pair<const Value *, const Value *>> StoreExpressionSet;
+ for (const auto &KV : ExpressionToClass) {
+ if (auto *SE = dyn_cast<StoreExpression>(KV.first)) {
+ // Make sure a version that will conflict with loads is not already there
+ auto Res =
+ StoreExpressionSet.insert({SE->getOperand(0), SE->getMemoryLeader()});
+ assert(Res.second &&
+ "Stored expression conflict exists in expression table");
+ auto *ValueExpr = ValueToExpression.lookup(SE->getStoreInst());
+ assert(ValueExpr && ValueExpr->equals(*SE) &&
+ "StoreExpression in ExpressionToClass is not latest "
+ "StoreExpression for value");
+ }
+ }
+#endif
+}
+
// This is the main value numbering loop, it iterates over the initial touched
// instruction set, propagating value numbers, marking things touched, etc,
// until the set of touched instructions is completely empty.
@@ -2668,8 +2678,7 @@ void NewGVN::iterateTouchedInstructions() {
// TODO: As we hit a new block, we should push and pop equalities into a
// table lookupOperandLeader can use, to catch things PredicateInfo
// might miss, like edge-only equivalences.
- for (int InstrNum = TouchedInstructions.find_first(); InstrNum != -1;
- InstrNum = TouchedInstructions.find_next(InstrNum)) {
+ for (unsigned InstrNum : TouchedInstructions.set_bits()) {
// This instruction was found to be dead. We don't bother looking
// at it again.
@@ -2776,6 +2785,7 @@ bool NewGVN::runGVN() {
iterateTouchedInstructions();
verifyMemoryCongruency();
verifyIterationSettled(F);
+ verifyStoreExpressions();
Changed |= eliminateInstructions(F);
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index ef29d4141600..53320bff0883 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -1922,7 +1922,7 @@ Instruction *ReassociatePass::canonicalizeNegConstExpr(Instruction *I) {
// User must be a binary operator with one or more uses.
Instruction *User = I->user_back();
- if (!isa<BinaryOperator>(User) || !User->hasNUsesOrMore(1))
+ if (!isa<BinaryOperator>(User) || User->use_empty())
return nullptr;
unsigned UserOpcode = User->getOpcode();
diff --git a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 4f608c97147d..b32a61a7e8f8 100644
--- a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -1,4 +1,4 @@
-//===-- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow --------===//
+//===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,25 +7,41 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Sequence.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GenericDomTree.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <utility>
#define DEBUG_TYPE "simple-loop-unswitch"
@@ -174,7 +190,7 @@ static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB,
// When the loop exit is directly unswitched we just need to update the
// incoming basic block. We loop to handle weird cases with repeated
// incoming blocks, but expect to typically only have one operand here.
- for (auto i : llvm::seq<int>(0, PN->getNumOperands())) {
+ for (auto i : seq<int>(0, PN->getNumOperands())) {
assert(PN->getIncomingBlock(i) == &OldExitingBB &&
"Found incoming block different from unique predecessor!");
PN->setIncomingBlock(i, &OldPH);
@@ -688,9 +704,11 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
}
namespace {
+
class SimpleLoopUnswitchLegacyPass : public LoopPass {
public:
static char ID; // Pass ID, replacement for typeid
+
explicit SimpleLoopUnswitchLegacyPass() : LoopPass(ID) {
initializeSimpleLoopUnswitchLegacyPassPass(
*PassRegistry::getPassRegistry());
@@ -703,7 +721,8 @@ public:
getLoopAnalysisUsage(AU);
}
};
-} // namespace
+
+} // end anonymous namespace
bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
if (skipLoop(L))