aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Transforms/Scalar
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Transforms/Scalar')
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/ADCE.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/BDCE.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp35
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp93
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp36
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp1063
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/EarlyCSE.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/Float2Int.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp234
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNHoist.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/GuardWidening.cpp86
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp211
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp190
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp64
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp369
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp126
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDeletion.cpp55
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDistribute.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFlatten.cpp270
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp414
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInterchange.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPassManager.cpp42
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPredication.cpp160
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopRotation.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSink.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp162
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp340
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp26
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp594
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/MergeICmps.cpp204
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/NaryReassociate.cpp114
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/NewGVN.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/Reassociate.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp48
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/SCCP.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp201
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalarizer.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp81
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp14
56 files changed, 3161 insertions, 2409 deletions
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ADCE.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ADCE.cpp
index 6f3fdb88eda5..b693acceb3f6 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ADCE.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ADCE.cpp
@@ -538,7 +538,7 @@ bool AggressiveDeadCodeElimination::removeDeadInstructions() {
// that have no side effects and do not influence the control flow or return
// value of the function, and may therefore be deleted safely.
// NOTE: We reuse the Worklist vector here for memory efficiency.
- for (Instruction &I : instructions(F)) {
+ for (Instruction &I : llvm::reverse(instructions(F))) {
// Check if the instruction is alive.
if (isLive(&I))
continue;
@@ -554,9 +554,11 @@ bool AggressiveDeadCodeElimination::removeDeadInstructions() {
// Prepare to delete.
Worklist.push_back(&I);
salvageDebugInfo(I);
- I.dropAllReferences();
}
+ for (Instruction *&I : Worklist)
+ I->dropAllReferences();
+
for (Instruction *&I : Worklist) {
++NumRemoved;
I->eraseFromParent();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/BDCE.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/BDCE.cpp
index c06125788f37..6c2467db79f7 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/BDCE.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/BDCE.cpp
@@ -53,7 +53,7 @@ static void clearAssumptionsOfUsers(Instruction *I, DemandedBits &DB) {
// in the def-use chain needs to be changed.
auto *J = dyn_cast<Instruction>(JU);
if (J && J->getType()->isIntOrIntVectorTy() &&
- !DB.getDemandedBits(J).isAllOnesValue()) {
+ !DB.getDemandedBits(J).isAllOnes()) {
Visited.insert(J);
WorkList.push_back(J);
}
@@ -84,7 +84,7 @@ static void clearAssumptionsOfUsers(Instruction *I, DemandedBits &DB) {
// that in the def-use chain needs to be changed.
auto *K = dyn_cast<Instruction>(KU);
if (K && Visited.insert(K).second && K->getType()->isIntOrIntVectorTy() &&
- !DB.getDemandedBits(K).isAllOnesValue())
+ !DB.getDemandedBits(K).isAllOnes())
WorkList.push_back(K);
}
}
@@ -103,12 +103,9 @@ static bool bitTrackingDCE(Function &F, DemandedBits &DB) {
// Remove instructions that are dead, either because they were not reached
// during analysis or have no demanded bits.
if (DB.isInstructionDead(&I) ||
- (I.getType()->isIntOrIntVectorTy() &&
- DB.getDemandedBits(&I).isNullValue() &&
+ (I.getType()->isIntOrIntVectorTy() && DB.getDemandedBits(&I).isZero() &&
wouldInstructionBeTriviallyDead(&I))) {
- salvageDebugInfo(I);
Worklist.push_back(&I);
- I.dropAllReferences();
Changed = true;
continue;
}
@@ -155,6 +152,11 @@ static bool bitTrackingDCE(Function &F, DemandedBits &DB) {
}
}
+ for (Instruction *&I : llvm::reverse(Worklist)) {
+ salvageDebugInfo(*I);
+ I->dropAllReferences();
+ }
+
for (Instruction *&I : Worklist) {
++NumRemoved;
I->eraseFromParent();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
index 2eb94b721d96..95de59fa8262 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
@@ -467,7 +467,7 @@ static PredsWithCondsTy shouldSplitOnPredicatedArgument(CallBase &CB,
BasicBlock *StopAt = CSDTNode ? CSDTNode->getIDom()->getBlock() : nullptr;
SmallVector<std::pair<BasicBlock *, ConditionsTy>, 2> PredsCS;
- for (auto *Pred : make_range(Preds.rbegin(), Preds.rend())) {
+ for (auto *Pred : llvm::reverse(Preds)) {
ConditionsTy Conditions;
// Record condition on edge BB(CS) <- Pred
recordCondition(CB, Pred, CB.getParent(), Conditions);
@@ -505,8 +505,7 @@ static bool doCallSiteSplitting(Function &F, TargetLibraryInfo &TLI,
DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Lazy);
bool Changed = false;
- for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE;) {
- BasicBlock &BB = *BI++;
+ for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
auto II = BB.getFirstNonPHIOrDbg()->getIterator();
auto IE = BB.getTerminator()->getIterator();
// Iterate until we reach the terminator instruction. tryToSplitCallSite
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
index 535f50d4f904..27f54f8026e1 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -762,7 +762,7 @@ void ConstantHoistingPass::emitBaseConstants(Instruction *Base,
PointerType *Int8PtrTy = Type::getInt8PtrTy(*Ctx,
cast<PointerType>(Ty)->getAddressSpace());
Base = new BitCastInst(Base, Int8PtrTy, "base_bitcast", InsertionPt);
- Mat = GetElementPtrInst::Create(Int8PtrTy->getElementType(), Base,
+ Mat = GetElementPtrInst::Create(Type::getInt8Ty(*Ctx), Base,
Offset, "mat_gep", InsertionPt);
Mat = new BitCastInst(Mat, Ty, "mat_bitcast", InsertionPt);
} else
@@ -819,10 +819,9 @@ void ConstantHoistingPass::emitBaseConstants(Instruction *Base,
// Aside from constant GEPs, only constant cast expressions are collected.
assert(ConstExpr->isCast() && "ConstExpr should be a cast");
- Instruction *ConstExprInst = ConstExpr->getAsInstruction();
+ Instruction *ConstExprInst = ConstExpr->getAsInstruction(
+ findMatInsertPt(ConstUser.Inst, ConstUser.OpndIdx));
ConstExprInst->setOperand(0, Mat);
- ConstExprInst->insertBefore(findMatInsertPt(ConstUser.Inst,
- ConstUser.OpndIdx));
// Use the same debug location as the instruction we are about to update.
ConstExprInst->setDebugLoc(ConstUser.Inst->getDebugLoc());
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index efd1c025d0cd..7f2d5d7d9987 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstraintSystem.h"
#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
@@ -268,6 +269,31 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT) {
continue;
WorkList.emplace_back(DT.getNode(&BB));
+ // True as long as long as the current instruction is guaranteed to execute.
+ bool GuaranteedToExecute = true;
+ // Scan BB for assume calls.
+ // TODO: also use this scan to queue conditions to simplify, so we can
+ // interleave facts from assumes and conditions to simplify in a single
+ // basic block. And to skip another traversal of each basic block when
+ // simplifying.
+ for (Instruction &I : BB) {
+ Value *Cond;
+ // For now, just handle assumes with a single compare as condition.
+ if (match(&I, m_Intrinsic<Intrinsic::assume>(m_Value(Cond))) &&
+ isa<CmpInst>(Cond)) {
+ if (GuaranteedToExecute) {
+ // The assume is guaranteed to execute when BB is entered, hence Cond
+ // holds on entry to BB.
+ WorkList.emplace_back(DT.getNode(&BB), cast<CmpInst>(Cond), false);
+ } else {
+ // Otherwise the condition only holds in the successors.
+ for (BasicBlock *Succ : successors(&BB))
+ WorkList.emplace_back(DT.getNode(Succ), cast<CmpInst>(Cond), false);
+ }
+ }
+ GuaranteedToExecute &= isGuaranteedToTransferExecutionToSuccessor(&I);
+ }
+
auto *Br = dyn_cast<BranchInst>(BB.getTerminator());
if (!Br || !Br->isConditional())
continue;
@@ -395,8 +421,13 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT) {
for (auto &E : reverse(DFSInStack))
dbgs() << " C " << *E.Condition << " " << E.IsNot << "\n";
});
- Cmp->replaceAllUsesWith(
- ConstantInt::getTrue(F.getParent()->getContext()));
+ Cmp->replaceUsesWithIf(
+ ConstantInt::getTrue(F.getParent()->getContext()), [](Use &U) {
+ // Conditions in an assume trivially simplify to true. Skip uses
+ // in assume calls to not destroy the available information.
+ auto *II = dyn_cast<IntrinsicInst>(U.getUser());
+ return !II || II->getIntrinsicID() != Intrinsic::assume;
+ });
NumCondsRemoved++;
Changed = true;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 36cbd42a5fdd..ca9567dc7ac8 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -67,6 +67,7 @@ STATISTIC(NumUDivURemsNarrowed,
STATISTIC(NumAShrs, "Number of ashr converted to lshr");
STATISTIC(NumSRems, "Number of srem converted to urem");
STATISTIC(NumSExt, "Number of sext converted to zext");
+STATISTIC(NumSICmps, "Number of signed icmp preds simplified to unsigned");
STATISTIC(NumAnd, "Number of ands removed");
STATISTIC(NumNW, "Number of no-wrap deductions");
STATISTIC(NumNSW, "Number of no-signed-wrap deductions");
@@ -295,11 +296,34 @@ static bool processMemAccess(Instruction *I, LazyValueInfo *LVI) {
return true;
}
+static bool processICmp(ICmpInst *Cmp, LazyValueInfo *LVI) {
+ // Only for signed relational comparisons of scalar integers.
+ if (Cmp->getType()->isVectorTy() ||
+ !Cmp->getOperand(0)->getType()->isIntegerTy())
+ return false;
+
+ if (!Cmp->isSigned())
+ return false;
+
+ ICmpInst::Predicate UnsignedPred =
+ ConstantRange::getEquivalentPredWithFlippedSignedness(
+ Cmp->getPredicate(), LVI->getConstantRange(Cmp->getOperand(0), Cmp),
+ LVI->getConstantRange(Cmp->getOperand(1), Cmp));
+
+ if (UnsignedPred == ICmpInst::Predicate::BAD_ICMP_PREDICATE)
+ return false;
+
+ ++NumSICmps;
+ Cmp->setPredicate(UnsignedPred);
+
+ return true;
+}
+
/// See if LazyValueInfo's ability to exploit edge conditions or range
/// information is sufficient to prove this comparison. Even for local
/// conditions, this can sometimes prove conditions instcombine can't by
/// exploiting range information.
-static bool processCmp(CmpInst *Cmp, LazyValueInfo *LVI) {
+static bool constantFoldCmp(CmpInst *Cmp, LazyValueInfo *LVI) {
Value *Op0 = Cmp->getOperand(0);
auto *C = dyn_cast<Constant>(Cmp->getOperand(1));
if (!C)
@@ -318,6 +342,17 @@ static bool processCmp(CmpInst *Cmp, LazyValueInfo *LVI) {
return true;
}
+static bool processCmp(CmpInst *Cmp, LazyValueInfo *LVI) {
+ if (constantFoldCmp(Cmp, LVI))
+ return true;
+
+ if (auto *ICmp = dyn_cast<ICmpInst>(Cmp))
+ if (processICmp(ICmp, LVI))
+ return true;
+
+ return false;
+}
+
/// Simplify a switch instruction by removing cases which can never fire. If the
/// uselessness of a case could be determined locally then constant propagation
/// would already have figured it out. Instead, walk the predecessors and
@@ -341,7 +376,13 @@ static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI,
// ConstantFoldTerminator() as the underlying SwitchInst can be changed.
SwitchInstProfUpdateWrapper SI(*I);
- for (auto CI = SI->case_begin(), CE = SI->case_end(); CI != CE;) {
+ APInt Low =
+ APInt::getSignedMaxValue(Cond->getType()->getScalarSizeInBits());
+ APInt High =
+ APInt::getSignedMinValue(Cond->getType()->getScalarSizeInBits());
+
+ SwitchInst::CaseIt CI = SI->case_begin();
+ for (auto CE = SI->case_end(); CI != CE;) {
ConstantInt *Case = CI->getCaseValue();
LazyValueInfo::Tristate State =
LVI->getPredicateAt(CmpInst::ICMP_EQ, Cond, Case, I,
@@ -374,9 +415,28 @@ static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI,
break;
}
+ // Get Lower/Upper bound from switch cases.
+ Low = APIntOps::smin(Case->getValue(), Low);
+ High = APIntOps::smax(Case->getValue(), High);
+
// Increment the case iterator since we didn't delete it.
++CI;
}
+
+ // Try to simplify default case as unreachable
+ if (CI == SI->case_end() && SI->getNumCases() != 0 &&
+ !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg())) {
+ const ConstantRange SIRange =
+ LVI->getConstantRange(SI->getCondition(), SI);
+
+ // If the numbered switch cases cover the entire range of the condition,
+ // then the default case is not reachable.
+ if (SIRange.getSignedMin() == Low && SIRange.getSignedMax() == High &&
+ SI->getNumCases() == High - Low + 1) {
+ createUnreachableSwitchDefault(SI, &DTU);
+ Changed = true;
+ }
+ }
}
if (Changed)
@@ -690,7 +750,7 @@ static bool narrowSDivOrSRem(BinaryOperator *Instr, LazyValueInfo *LVI) {
// sdiv/srem is UB if divisor is -1 and divident is INT_MIN, so unless we can
// prove that such a combination is impossible, we need to bump the bitwidth.
- if (CRs[1]->contains(APInt::getAllOnesValue(OrigWidth)) &&
+ if (CRs[1]->contains(APInt::getAllOnes(OrigWidth)) &&
CRs[0]->contains(
APInt::getSignedMinValue(MinSignedBits).sextOrSelf(OrigWidth)))
++MinSignedBits;
@@ -1023,49 +1083,48 @@ static bool runImpl(Function &F, LazyValueInfo *LVI, DominatorTree *DT,
// blocks.
for (BasicBlock *BB : depth_first(&F.getEntryBlock())) {
bool BBChanged = false;
- for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
- Instruction *II = &*BI++;
- switch (II->getOpcode()) {
+ for (Instruction &II : llvm::make_early_inc_range(*BB)) {
+ switch (II.getOpcode()) {
case Instruction::Select:
- BBChanged |= processSelect(cast<SelectInst>(II), LVI);
+ BBChanged |= processSelect(cast<SelectInst>(&II), LVI);
break;
case Instruction::PHI:
- BBChanged |= processPHI(cast<PHINode>(II), LVI, DT, SQ);
+ BBChanged |= processPHI(cast<PHINode>(&II), LVI, DT, SQ);
break;
case Instruction::ICmp:
case Instruction::FCmp:
- BBChanged |= processCmp(cast<CmpInst>(II), LVI);
+ BBChanged |= processCmp(cast<CmpInst>(&II), LVI);
break;
case Instruction::Load:
case Instruction::Store:
- BBChanged |= processMemAccess(II, LVI);
+ BBChanged |= processMemAccess(&II, LVI);
break;
case Instruction::Call:
case Instruction::Invoke:
- BBChanged |= processCallSite(cast<CallBase>(*II), LVI);
+ BBChanged |= processCallSite(cast<CallBase>(II), LVI);
break;
case Instruction::SRem:
case Instruction::SDiv:
- BBChanged |= processSDivOrSRem(cast<BinaryOperator>(II), LVI);
+ BBChanged |= processSDivOrSRem(cast<BinaryOperator>(&II), LVI);
break;
case Instruction::UDiv:
case Instruction::URem:
- BBChanged |= processUDivOrURem(cast<BinaryOperator>(II), LVI);
+ BBChanged |= processUDivOrURem(cast<BinaryOperator>(&II), LVI);
break;
case Instruction::AShr:
- BBChanged |= processAShr(cast<BinaryOperator>(II), LVI);
+ BBChanged |= processAShr(cast<BinaryOperator>(&II), LVI);
break;
case Instruction::SExt:
- BBChanged |= processSExt(cast<SExtInst>(II), LVI);
+ BBChanged |= processSExt(cast<SExtInst>(&II), LVI);
break;
case Instruction::Add:
case Instruction::Sub:
case Instruction::Mul:
case Instruction::Shl:
- BBChanged |= processBinOp(cast<BinaryOperator>(II), LVI);
+ BBChanged |= processBinOp(cast<BinaryOperator>(&II), LVI);
break;
case Instruction::And:
- BBChanged |= processAnd(cast<BinaryOperator>(II), LVI);
+ BBChanged |= processAnd(cast<BinaryOperator>(&II), LVI);
break;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
index 90679bcac4b7..8c4523206070 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
@@ -1,9 +1,8 @@
//===- DFAJumpThreading.cpp - Threads a switch statement inside a loop ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -84,8 +83,6 @@
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
#include <deque>
-#include <unordered_map>
-#include <unordered_set>
using namespace llvm;
@@ -147,8 +144,7 @@ private:
Stack.push_back(SIToUnfold);
while (!Stack.empty()) {
- SelectInstToUnfold SIToUnfold = Stack.back();
- Stack.pop_back();
+ SelectInstToUnfold SIToUnfold = Stack.pop_back_val();
std::vector<SelectInstToUnfold> NewSIsToUnfold;
std::vector<BasicBlock *> NewBBs;
@@ -174,6 +170,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
}
@@ -350,7 +347,7 @@ struct ClonedBlock {
typedef std::deque<BasicBlock *> PathType;
typedef std::vector<PathType> PathsType;
-typedef std::set<const BasicBlock *> VisitedBlocks;
+typedef SmallPtrSet<const BasicBlock *, 8> VisitedBlocks;
typedef std::vector<ClonedBlock> CloneList;
// This data structure keeps track of all blocks that have been cloned. If two
@@ -493,7 +490,7 @@ private:
}
bool isPredictableValue(Value *InpVal, SmallSet<Value *, 16> &SeenValues) {
- if (SeenValues.find(InpVal) != SeenValues.end())
+ if (SeenValues.contains(InpVal))
return true;
if (isa<ConstantInt>(InpVal))
@@ -508,7 +505,7 @@ private:
void addInstToQueue(Value *Val, std::deque<Instruction *> &Q,
SmallSet<Value *, 16> &SeenValues) {
- if (SeenValues.find(Val) != SeenValues.end())
+ if (SeenValues.contains(Val))
return;
if (Instruction *I = dyn_cast<Instruction>(Val))
Q.push_back(I);
@@ -533,7 +530,7 @@ private:
return false;
if (isa<PHINode>(SIUse) &&
- SIBB->getSingleSuccessor() != dyn_cast<Instruction>(SIUse)->getParent())
+ SIBB->getSingleSuccessor() != cast<Instruction>(SIUse)->getParent())
return false;
// If select will not be sunk during unfolding, and it is in the same basic
@@ -621,13 +618,9 @@ private:
// Some blocks have multiple edges to the same successor, and this set
// is used to prevent a duplicate path from being generated
SmallSet<BasicBlock *, 4> Successors;
-
- for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) {
- BasicBlock *Succ = *SI;
-
- if (Successors.find(Succ) != Successors.end())
+ for (BasicBlock *Succ : successors(BB)) {
+ if (!Successors.insert(Succ).second)
continue;
- Successors.insert(Succ);
// Found a cycle through the SwitchBlock
if (Succ == SwitchBlock) {
@@ -636,7 +629,7 @@ private:
}
// We have encountered a cycle, do not get caught in it
- if (Visited.find(Succ) != Visited.end())
+ if (Visited.contains(Succ))
continue;
PathsType SuccPaths = paths(Succ, Visited, PathDepth + 1);
@@ -668,15 +661,14 @@ private:
SmallSet<Value *, 16> SeenValues;
while (!Stack.empty()) {
- PHINode *CurPhi = Stack.back();
- Stack.pop_back();
+ PHINode *CurPhi = Stack.pop_back_val();
Res[CurPhi->getParent()] = CurPhi;
SeenValues.insert(CurPhi);
for (Value *Incoming : CurPhi->incoming_values()) {
if (Incoming == FirstDef || isa<ConstantInt>(Incoming) ||
- SeenValues.find(Incoming) != SeenValues.end()) {
+ SeenValues.contains(Incoming)) {
continue;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index d22b3f409585..a8ec8bb97970 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -13,10 +13,10 @@
// in between both MemoryDefs. A bit more concretely:
//
// For all MemoryDefs StartDef:
-// 1. Get the next dominating clobbering MemoryDef (EarlierAccess) by walking
+// 1. Get the next dominating clobbering MemoryDef (MaybeDeadAccess) by walking
// upwards.
-// 2. Check that there are no reads between EarlierAccess and the StartDef by
-// checking all uses starting at EarlierAccess and walking until we see
+// 2. Check that there are no reads between MaybeDeadAccess and the StartDef by
+// checking all uses starting at MaybeDeadAccess and walking until we see
// StartDef.
// 3. For each found CurrentDef, check that:
// 1. There are no barrier instructions between CurrentDef and StartDef (like
@@ -56,6 +56,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
@@ -78,6 +79,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <cassert>
@@ -122,7 +124,7 @@ EnablePartialStoreMerging("enable-dse-partial-store-merging",
static cl::opt<unsigned>
MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden,
cl::desc("The number of memory instructions to scan for "
- "dead store elimination (default = 100)"));
+ "dead store elimination (default = 150)"));
static cl::opt<unsigned> MemorySSAUpwardsStepLimit(
"dse-memoryssa-walklimit", cl::init(90), cl::Hidden,
cl::desc("The maximum number of steps while walking upwards to find "
@@ -203,39 +205,6 @@ static bool hasAnalyzableMemoryWrite(Instruction *I,
return false;
}
-/// Return a Location stored to by the specified instruction. If isRemovable
-/// returns true, this function and getLocForRead completely describe the memory
-/// operations for this instruction.
-static MemoryLocation getLocForWrite(Instruction *Inst,
- const TargetLibraryInfo &TLI) {
- if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
- return MemoryLocation::get(SI);
-
- // memcpy/memmove/memset.
- if (auto *MI = dyn_cast<AnyMemIntrinsic>(Inst))
- return MemoryLocation::getForDest(MI);
-
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
- switch (II->getIntrinsicID()) {
- default:
- return MemoryLocation(); // Unhandled intrinsic.
- case Intrinsic::init_trampoline:
- return MemoryLocation::getAfter(II->getArgOperand(0));
- case Intrinsic::masked_store:
- return MemoryLocation::getForArgument(II, 1, TLI);
- case Intrinsic::lifetime_end: {
- uint64_t Len = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue();
- return MemoryLocation(II->getArgOperand(1), Len);
- }
- }
- }
- if (auto *CB = dyn_cast<CallBase>(Inst))
- // All the supported TLI functions so far happen to have dest as their
- // first argument.
- return MemoryLocation::getAfter(CB->getArgOperand(0));
- return MemoryLocation();
-}
-
/// If the value of this instruction and the memory it writes to is unused, may
/// we delete this instruction?
static bool isRemovable(Instruction *I) {
@@ -333,147 +302,146 @@ enum OverwriteResult {
} // end anonymous namespace
/// Check if two instruction are masked stores that completely
-/// overwrite one another. More specifically, \p Later has to
-/// overwrite \p Earlier.
-static OverwriteResult isMaskedStoreOverwrite(const Instruction *Later,
- const Instruction *Earlier,
+/// overwrite one another. More specifically, \p KillingI has to
+/// overwrite \p DeadI.
+static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI,
+ const Instruction *DeadI,
BatchAAResults &AA) {
- const auto *IIL = dyn_cast<IntrinsicInst>(Later);
- const auto *IIE = dyn_cast<IntrinsicInst>(Earlier);
- if (IIL == nullptr || IIE == nullptr)
+ const auto *KillingII = dyn_cast<IntrinsicInst>(KillingI);
+ const auto *DeadII = dyn_cast<IntrinsicInst>(DeadI);
+ if (KillingII == nullptr || DeadII == nullptr)
return OW_Unknown;
- if (IIL->getIntrinsicID() != Intrinsic::masked_store ||
- IIE->getIntrinsicID() != Intrinsic::masked_store)
+ if (KillingII->getIntrinsicID() != Intrinsic::masked_store ||
+ DeadII->getIntrinsicID() != Intrinsic::masked_store)
return OW_Unknown;
// Pointers.
- Value *LP = IIL->getArgOperand(1)->stripPointerCasts();
- Value *EP = IIE->getArgOperand(1)->stripPointerCasts();
- if (LP != EP && !AA.isMustAlias(LP, EP))
+ Value *KillingPtr = KillingII->getArgOperand(1)->stripPointerCasts();
+ Value *DeadPtr = DeadII->getArgOperand(1)->stripPointerCasts();
+ if (KillingPtr != DeadPtr && !AA.isMustAlias(KillingPtr, DeadPtr))
return OW_Unknown;
// Masks.
- // TODO: check that Later's mask is a superset of the Earlier's mask.
- if (IIL->getArgOperand(3) != IIE->getArgOperand(3))
+ // TODO: check that KillingII's mask is a superset of the DeadII's mask.
+ if (KillingII->getArgOperand(3) != DeadII->getArgOperand(3))
return OW_Unknown;
return OW_Complete;
}
-/// Return 'OW_Complete' if a store to the 'Later' location completely
-/// overwrites a store to the 'Earlier' location, 'OW_End' if the end of the
-/// 'Earlier' location is completely overwritten by 'Later', 'OW_Begin' if the
-/// beginning of the 'Earlier' location is overwritten by 'Later'.
-/// 'OW_PartialEarlierWithFullLater' means that an earlier (big) store was
-/// overwritten by a latter (smaller) store which doesn't write outside the big
+/// Return 'OW_Complete' if a store to the 'KillingLoc' location completely
+/// overwrites a store to the 'DeadLoc' location, 'OW_End' if the end of the
+/// 'DeadLoc' location is completely overwritten by 'KillingLoc', 'OW_Begin'
+/// if the beginning of the 'DeadLoc' location is overwritten by 'KillingLoc'.
+/// 'OW_PartialEarlierWithFullLater' means that a dead (big) store was
+/// overwritten by a killing (smaller) store which doesn't write outside the big
/// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.
-/// NOTE: This function must only be called if both \p Later and \p Earlier
-/// write to the same underlying object with valid \p EarlierOff and \p
-/// LaterOff.
-static OverwriteResult isPartialOverwrite(const MemoryLocation &Later,
- const MemoryLocation &Earlier,
- int64_t EarlierOff, int64_t LaterOff,
- Instruction *DepWrite,
+/// NOTE: This function must only be called if both \p KillingLoc and \p
+/// DeadLoc belong to the same underlying object with valid \p KillingOff and
+/// \p DeadOff.
+static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc,
+ const MemoryLocation &DeadLoc,
+ int64_t KillingOff, int64_t DeadOff,
+ Instruction *DeadI,
InstOverlapIntervalsTy &IOL) {
- const uint64_t LaterSize = Later.Size.getValue();
- const uint64_t EarlierSize = Earlier.Size.getValue();
+ const uint64_t KillingSize = KillingLoc.Size.getValue();
+ const uint64_t DeadSize = DeadLoc.Size.getValue();
// We may now overlap, although the overlap is not complete. There might also
// be other incomplete overlaps, and together, they might cover the complete
- // earlier write.
+ // dead store.
// Note: The correctness of this logic depends on the fact that this function
// is not even called providing DepWrite when there are any intervening reads.
if (EnablePartialOverwriteTracking &&
- LaterOff < int64_t(EarlierOff + EarlierSize) &&
- int64_t(LaterOff + LaterSize) >= EarlierOff) {
+ KillingOff < int64_t(DeadOff + DeadSize) &&
+ int64_t(KillingOff + KillingSize) >= DeadOff) {
// Insert our part of the overlap into the map.
- auto &IM = IOL[DepWrite];
- LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: Earlier [" << EarlierOff
- << ", " << int64_t(EarlierOff + EarlierSize)
- << ") Later [" << LaterOff << ", "
- << int64_t(LaterOff + LaterSize) << ")\n");
+ auto &IM = IOL[DeadI];
+ LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: DeadLoc [" << DeadOff << ", "
+ << int64_t(DeadOff + DeadSize) << ") KillingLoc ["
+ << KillingOff << ", " << int64_t(KillingOff + KillingSize)
+ << ")\n");
// Make sure that we only insert non-overlapping intervals and combine
// adjacent intervals. The intervals are stored in the map with the ending
// offset as the key (in the half-open sense) and the starting offset as
// the value.
- int64_t LaterIntStart = LaterOff, LaterIntEnd = LaterOff + LaterSize;
+ int64_t KillingIntStart = KillingOff;
+ int64_t KillingIntEnd = KillingOff + KillingSize;
- // Find any intervals ending at, or after, LaterIntStart which start
- // before LaterIntEnd.
- auto ILI = IM.lower_bound(LaterIntStart);
- if (ILI != IM.end() && ILI->second <= LaterIntEnd) {
+ // Find any intervals ending at, or after, KillingIntStart which start
+ // before KillingIntEnd.
+ auto ILI = IM.lower_bound(KillingIntStart);
+ if (ILI != IM.end() && ILI->second <= KillingIntEnd) {
// This existing interval is overlapped with the current store somewhere
- // in [LaterIntStart, LaterIntEnd]. Merge them by erasing the existing
+ // in [KillingIntStart, KillingIntEnd]. Merge them by erasing the existing
// intervals and adjusting our start and end.
- LaterIntStart = std::min(LaterIntStart, ILI->second);
- LaterIntEnd = std::max(LaterIntEnd, ILI->first);
+ KillingIntStart = std::min(KillingIntStart, ILI->second);
+ KillingIntEnd = std::max(KillingIntEnd, ILI->first);
ILI = IM.erase(ILI);
// Continue erasing and adjusting our end in case other previous
// intervals are also overlapped with the current store.
//
- // |--- ealier 1 ---| |--- ealier 2 ---|
- // |------- later---------|
+ // |--- dead 1 ---| |--- dead 2 ---|
+ // |------- killing---------|
//
- while (ILI != IM.end() && ILI->second <= LaterIntEnd) {
- assert(ILI->second > LaterIntStart && "Unexpected interval");
- LaterIntEnd = std::max(LaterIntEnd, ILI->first);
+ while (ILI != IM.end() && ILI->second <= KillingIntEnd) {
+ assert(ILI->second > KillingIntStart && "Unexpected interval");
+ KillingIntEnd = std::max(KillingIntEnd, ILI->first);
ILI = IM.erase(ILI);
}
}
- IM[LaterIntEnd] = LaterIntStart;
+ IM[KillingIntEnd] = KillingIntStart;
ILI = IM.begin();
- if (ILI->second <= EarlierOff &&
- ILI->first >= int64_t(EarlierOff + EarlierSize)) {
- LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: Earlier ["
- << EarlierOff << ", "
- << int64_t(EarlierOff + EarlierSize)
- << ") Composite Later [" << ILI->second << ", "
+ if (ILI->second <= DeadOff && ILI->first >= int64_t(DeadOff + DeadSize)) {
+ LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: DeadLoc ["
+ << DeadOff << ", " << int64_t(DeadOff + DeadSize)
+ << ") Composite KillingLoc [" << ILI->second << ", "
<< ILI->first << ")\n");
++NumCompletePartials;
return OW_Complete;
}
}
- // Check for an earlier store which writes to all the memory locations that
- // the later store writes to.
- if (EnablePartialStoreMerging && LaterOff >= EarlierOff &&
- int64_t(EarlierOff + EarlierSize) > LaterOff &&
- uint64_t(LaterOff - EarlierOff) + LaterSize <= EarlierSize) {
- LLVM_DEBUG(dbgs() << "DSE: Partial overwrite an earlier load ["
- << EarlierOff << ", "
- << int64_t(EarlierOff + EarlierSize)
- << ") by a later store [" << LaterOff << ", "
- << int64_t(LaterOff + LaterSize) << ")\n");
+ // Check for a dead store which writes to all the memory locations that
+ // the killing store writes to.
+ if (EnablePartialStoreMerging && KillingOff >= DeadOff &&
+ int64_t(DeadOff + DeadSize) > KillingOff &&
+ uint64_t(KillingOff - DeadOff) + KillingSize <= DeadSize) {
+ LLVM_DEBUG(dbgs() << "DSE: Partial overwrite a dead load [" << DeadOff
+ << ", " << int64_t(DeadOff + DeadSize)
+ << ") by a killing store [" << KillingOff << ", "
+ << int64_t(KillingOff + KillingSize) << ")\n");
// TODO: Maybe come up with a better name?
return OW_PartialEarlierWithFullLater;
}
- // Another interesting case is if the later store overwrites the end of the
- // earlier store.
+ // Another interesting case is if the killing store overwrites the end of the
+ // dead store.
//
- // |--earlier--|
- // |-- later --|
+ // |--dead--|
+ // |-- killing --|
//
- // In this case we may want to trim the size of earlier to avoid generating
- // writes to addresses which will definitely be overwritten later
+ // In this case we may want to trim the size of dead store to avoid
+ // generating stores to addresses which will definitely be overwritten killing
+ // store.
if (!EnablePartialOverwriteTracking &&
- (LaterOff > EarlierOff && LaterOff < int64_t(EarlierOff + EarlierSize) &&
- int64_t(LaterOff + LaterSize) >= int64_t(EarlierOff + EarlierSize)))
+ (KillingOff > DeadOff && KillingOff < int64_t(DeadOff + DeadSize) &&
+ int64_t(KillingOff + KillingSize) >= int64_t(DeadOff + DeadSize)))
return OW_End;
- // Finally, we also need to check if the later store overwrites the beginning
- // of the earlier store.
+ // Finally, we also need to check if the killing store overwrites the
+ // beginning of the dead store.
//
- // |--earlier--|
- // |-- later --|
+ // |--dead--|
+ // |-- killing --|
//
// In this case we may want to move the destination address and trim the size
- // of earlier to avoid generating writes to addresses which will definitely
- // be overwritten later.
+ // of dead store to avoid generating stores to addresses which will definitely
+ // be overwritten killing store.
if (!EnablePartialOverwriteTracking &&
- (LaterOff <= EarlierOff && int64_t(LaterOff + LaterSize) > EarlierOff)) {
- assert(int64_t(LaterOff + LaterSize) < int64_t(EarlierOff + EarlierSize) &&
+ (KillingOff <= DeadOff && int64_t(KillingOff + KillingSize) > DeadOff)) {
+ assert(int64_t(KillingOff + KillingSize) < int64_t(DeadOff + DeadSize) &&
"Expect to be handled as OW_Complete");
return OW_Begin;
}
@@ -505,7 +473,12 @@ memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI,
BasicBlock::iterator SecondBBI(SecondI);
BasicBlock *FirstBB = FirstI->getParent();
BasicBlock *SecondBB = SecondI->getParent();
- MemoryLocation MemLoc = MemoryLocation::get(SecondI);
+ MemoryLocation MemLoc;
+ if (auto *MemSet = dyn_cast<MemSetInst>(SecondI))
+ MemLoc = MemoryLocation::getForDest(MemSet);
+ else
+ MemLoc = MemoryLocation::get(SecondI);
+
auto *MemLocPtr = const_cast<Value *>(MemLoc.Ptr);
// Start checking the SecondBB.
@@ -568,11 +541,11 @@ memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI,
return true;
}
-static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierStart,
- uint64_t &EarlierSize, int64_t LaterStart,
- uint64_t LaterSize, bool IsOverwriteEnd) {
- auto *EarlierIntrinsic = cast<AnyMemIntrinsic>(EarlierWrite);
- Align PrefAlign = EarlierIntrinsic->getDestAlign().valueOrOne();
+static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
+ uint64_t &DeadSize, int64_t KillingStart,
+ uint64_t KillingSize, bool IsOverwriteEnd) {
+ auto *DeadIntrinsic = cast<AnyMemIntrinsic>(DeadI);
+ Align PrefAlign = DeadIntrinsic->getDestAlign().valueOrOne();
// We assume that memet/memcpy operates in chunks of the "largest" native
// type size and aligned on the same value. That means optimal start and size
@@ -593,19 +566,19 @@ static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierStart,
// Compute start and size of the region to remove. Make sure 'PrefAlign' is
// maintained on the remaining store.
if (IsOverwriteEnd) {
- // Calculate required adjustment for 'LaterStart'in order to keep remaining
- // store size aligned on 'PerfAlign'.
+ // Calculate required adjustment for 'KillingStart' in order to keep
+ // remaining store size aligned on 'PerfAlign'.
uint64_t Off =
- offsetToAlignment(uint64_t(LaterStart - EarlierStart), PrefAlign);
- ToRemoveStart = LaterStart + Off;
- if (EarlierSize <= uint64_t(ToRemoveStart - EarlierStart))
+ offsetToAlignment(uint64_t(KillingStart - DeadStart), PrefAlign);
+ ToRemoveStart = KillingStart + Off;
+ if (DeadSize <= uint64_t(ToRemoveStart - DeadStart))
return false;
- ToRemoveSize = EarlierSize - uint64_t(ToRemoveStart - EarlierStart);
+ ToRemoveSize = DeadSize - uint64_t(ToRemoveStart - DeadStart);
} else {
- ToRemoveStart = EarlierStart;
- assert(LaterSize >= uint64_t(EarlierStart - LaterStart) &&
+ ToRemoveStart = DeadStart;
+ assert(KillingSize >= uint64_t(DeadStart - KillingStart) &&
"Not overlapping accesses?");
- ToRemoveSize = LaterSize - uint64_t(EarlierStart - LaterStart);
+ ToRemoveSize = KillingSize - uint64_t(DeadStart - KillingStart);
// Calculate required adjustment for 'ToRemoveSize'in order to keep
// start of the remaining store aligned on 'PerfAlign'.
uint64_t Off = offsetToAlignment(ToRemoveSize, PrefAlign);
@@ -619,10 +592,10 @@ static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierStart,
}
assert(ToRemoveSize > 0 && "Shouldn't reach here if nothing to remove");
- assert(EarlierSize > ToRemoveSize && "Can't remove more than original size");
+ assert(DeadSize > ToRemoveSize && "Can't remove more than original size");
- uint64_t NewSize = EarlierSize - ToRemoveSize;
- if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(EarlierWrite)) {
+ uint64_t NewSize = DeadSize - ToRemoveSize;
+ if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(DeadI)) {
// When shortening an atomic memory intrinsic, the newly shortened
// length must remain an integer multiple of the element size.
const uint32_t ElementSize = AMI->getElementSizeInBytes();
@@ -631,65 +604,62 @@ static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierStart,
}
LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
- << (IsOverwriteEnd ? "END" : "BEGIN") << ": "
- << *EarlierWrite << "\n KILLER [" << ToRemoveStart << ", "
+ << (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *DeadI
+ << "\n KILLER [" << ToRemoveStart << ", "
<< int64_t(ToRemoveStart + ToRemoveSize) << ")\n");
- Value *EarlierWriteLength = EarlierIntrinsic->getLength();
- Value *TrimmedLength =
- ConstantInt::get(EarlierWriteLength->getType(), NewSize);
- EarlierIntrinsic->setLength(TrimmedLength);
- EarlierIntrinsic->setDestAlignment(PrefAlign);
+ Value *DeadWriteLength = DeadIntrinsic->getLength();
+ Value *TrimmedLength = ConstantInt::get(DeadWriteLength->getType(), NewSize);
+ DeadIntrinsic->setLength(TrimmedLength);
+ DeadIntrinsic->setDestAlignment(PrefAlign);
if (!IsOverwriteEnd) {
- Value *OrigDest = EarlierIntrinsic->getRawDest();
+ Value *OrigDest = DeadIntrinsic->getRawDest();
Type *Int8PtrTy =
- Type::getInt8PtrTy(EarlierIntrinsic->getContext(),
+ Type::getInt8PtrTy(DeadIntrinsic->getContext(),
OrigDest->getType()->getPointerAddressSpace());
Value *Dest = OrigDest;
if (OrigDest->getType() != Int8PtrTy)
- Dest = CastInst::CreatePointerCast(OrigDest, Int8PtrTy, "", EarlierWrite);
+ Dest = CastInst::CreatePointerCast(OrigDest, Int8PtrTy, "", DeadI);
Value *Indices[1] = {
- ConstantInt::get(EarlierWriteLength->getType(), ToRemoveSize)};
+ ConstantInt::get(DeadWriteLength->getType(), ToRemoveSize)};
Instruction *NewDestGEP = GetElementPtrInst::CreateInBounds(
- Type::getInt8Ty(EarlierIntrinsic->getContext()),
- Dest, Indices, "", EarlierWrite);
- NewDestGEP->setDebugLoc(EarlierIntrinsic->getDebugLoc());
+ Type::getInt8Ty(DeadIntrinsic->getContext()), Dest, Indices, "", DeadI);
+ NewDestGEP->setDebugLoc(DeadIntrinsic->getDebugLoc());
if (NewDestGEP->getType() != OrigDest->getType())
NewDestGEP = CastInst::CreatePointerCast(NewDestGEP, OrigDest->getType(),
- "", EarlierWrite);
- EarlierIntrinsic->setDest(NewDestGEP);
+ "", DeadI);
+ DeadIntrinsic->setDest(NewDestGEP);
}
- // Finally update start and size of earlier access.
+ // Finally update start and size of dead access.
if (!IsOverwriteEnd)
- EarlierStart += ToRemoveSize;
- EarlierSize = NewSize;
+ DeadStart += ToRemoveSize;
+ DeadSize = NewSize;
return true;
}
-static bool tryToShortenEnd(Instruction *EarlierWrite,
- OverlapIntervalsTy &IntervalMap,
- int64_t &EarlierStart, uint64_t &EarlierSize) {
- if (IntervalMap.empty() || !isShortenableAtTheEnd(EarlierWrite))
+static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap,
+ int64_t &DeadStart, uint64_t &DeadSize) {
+ if (IntervalMap.empty() || !isShortenableAtTheEnd(DeadI))
return false;
OverlapIntervalsTy::iterator OII = --IntervalMap.end();
- int64_t LaterStart = OII->second;
- uint64_t LaterSize = OII->first - LaterStart;
+ int64_t KillingStart = OII->second;
+ uint64_t KillingSize = OII->first - KillingStart;
- assert(OII->first - LaterStart >= 0 && "Size expected to be positive");
+ assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
- if (LaterStart > EarlierStart &&
- // Note: "LaterStart - EarlierStart" is known to be positive due to
+ if (KillingStart > DeadStart &&
+ // Note: "KillingStart - KillingStart" is known to be positive due to
// preceding check.
- (uint64_t)(LaterStart - EarlierStart) < EarlierSize &&
- // Note: "EarlierSize - (uint64_t)(LaterStart - EarlierStart)" is known to
+ (uint64_t)(KillingStart - DeadStart) < DeadSize &&
+ // Note: "DeadSize - (uint64_t)(KillingStart - DeadStart)" is known to
// be non negative due to preceding checks.
- LaterSize >= EarlierSize - (uint64_t)(LaterStart - EarlierStart)) {
- if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart,
- LaterSize, true)) {
+ KillingSize >= DeadSize - (uint64_t)(KillingStart - DeadStart)) {
+ if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
+ true)) {
IntervalMap.erase(OII);
return true;
}
@@ -697,28 +667,28 @@ static bool tryToShortenEnd(Instruction *EarlierWrite,
return false;
}
-static bool tryToShortenBegin(Instruction *EarlierWrite,
+static bool tryToShortenBegin(Instruction *DeadI,
OverlapIntervalsTy &IntervalMap,
- int64_t &EarlierStart, uint64_t &EarlierSize) {
- if (IntervalMap.empty() || !isShortenableAtTheBeginning(EarlierWrite))
+ int64_t &DeadStart, uint64_t &DeadSize) {
+ if (IntervalMap.empty() || !isShortenableAtTheBeginning(DeadI))
return false;
OverlapIntervalsTy::iterator OII = IntervalMap.begin();
- int64_t LaterStart = OII->second;
- uint64_t LaterSize = OII->first - LaterStart;
+ int64_t KillingStart = OII->second;
+ uint64_t KillingSize = OII->first - KillingStart;
- assert(OII->first - LaterStart >= 0 && "Size expected to be positive");
+ assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
- if (LaterStart <= EarlierStart &&
- // Note: "EarlierStart - LaterStart" is known to be non negative due to
+ if (KillingStart <= DeadStart &&
+ // Note: "DeadStart - KillingStart" is known to be non negative due to
// preceding check.
- LaterSize > (uint64_t)(EarlierStart - LaterStart)) {
- // Note: "LaterSize - (uint64_t)(EarlierStart - LaterStart)" is known to be
- // positive due to preceding checks.
- assert(LaterSize - (uint64_t)(EarlierStart - LaterStart) < EarlierSize &&
+ KillingSize > (uint64_t)(DeadStart - KillingStart)) {
+ // Note: "KillingSize - (uint64_t)(DeadStart - DeadStart)" is known to
+ // be positive due to preceding checks.
+ assert(KillingSize - (uint64_t)(DeadStart - KillingStart) < DeadSize &&
"Should have been handled as OW_Complete");
- if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart,
- LaterSize, false)) {
+ if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
+ false)) {
IntervalMap.erase(OII);
return true;
}
@@ -726,71 +696,48 @@ static bool tryToShortenBegin(Instruction *EarlierWrite,
return false;
}
-static bool removePartiallyOverlappedStores(const DataLayout &DL,
- InstOverlapIntervalsTy &IOL,
- const TargetLibraryInfo &TLI) {
- bool Changed = false;
- for (auto OI : IOL) {
- Instruction *EarlierWrite = OI.first;
- MemoryLocation Loc = getLocForWrite(EarlierWrite, TLI);
- assert(isRemovable(EarlierWrite) && "Expect only removable instruction");
-
- const Value *Ptr = Loc.Ptr->stripPointerCasts();
- int64_t EarlierStart = 0;
- uint64_t EarlierSize = Loc.Size.getValue();
- GetPointerBaseWithConstantOffset(Ptr, EarlierStart, DL);
- OverlapIntervalsTy &IntervalMap = OI.second;
- Changed |=
- tryToShortenEnd(EarlierWrite, IntervalMap, EarlierStart, EarlierSize);
- if (IntervalMap.empty())
- continue;
- Changed |=
- tryToShortenBegin(EarlierWrite, IntervalMap, EarlierStart, EarlierSize);
- }
- return Changed;
-}
-
-static Constant *tryToMergePartialOverlappingStores(
- StoreInst *Earlier, StoreInst *Later, int64_t InstWriteOffset,
- int64_t DepWriteOffset, const DataLayout &DL, BatchAAResults &AA,
- DominatorTree *DT) {
-
- if (Earlier && isa<ConstantInt>(Earlier->getValueOperand()) &&
- DL.typeSizeEqualsStoreSize(Earlier->getValueOperand()->getType()) &&
- Later && isa<ConstantInt>(Later->getValueOperand()) &&
- DL.typeSizeEqualsStoreSize(Later->getValueOperand()->getType()) &&
- memoryIsNotModifiedBetween(Earlier, Later, AA, DL, DT)) {
+static Constant *
+tryToMergePartialOverlappingStores(StoreInst *KillingI, StoreInst *DeadI,
+ int64_t KillingOffset, int64_t DeadOffset,
+ const DataLayout &DL, BatchAAResults &AA,
+ DominatorTree *DT) {
+
+ if (DeadI && isa<ConstantInt>(DeadI->getValueOperand()) &&
+ DL.typeSizeEqualsStoreSize(DeadI->getValueOperand()->getType()) &&
+ KillingI && isa<ConstantInt>(KillingI->getValueOperand()) &&
+ DL.typeSizeEqualsStoreSize(KillingI->getValueOperand()->getType()) &&
+ memoryIsNotModifiedBetween(DeadI, KillingI, AA, DL, DT)) {
// If the store we find is:
// a) partially overwritten by the store to 'Loc'
- // b) the later store is fully contained in the earlier one and
+ // b) the killing store is fully contained in the dead one and
// c) they both have a constant value
// d) none of the two stores need padding
- // Merge the two stores, replacing the earlier store's value with a
+ // Merge the two stores, replacing the dead store's value with a
// merge of both values.
// TODO: Deal with other constant types (vectors, etc), and probably
// some mem intrinsics (if needed)
- APInt EarlierValue =
- cast<ConstantInt>(Earlier->getValueOperand())->getValue();
- APInt LaterValue = cast<ConstantInt>(Later->getValueOperand())->getValue();
- unsigned LaterBits = LaterValue.getBitWidth();
- assert(EarlierValue.getBitWidth() > LaterValue.getBitWidth());
- LaterValue = LaterValue.zext(EarlierValue.getBitWidth());
+ APInt DeadValue = cast<ConstantInt>(DeadI->getValueOperand())->getValue();
+ APInt KillingValue =
+ cast<ConstantInt>(KillingI->getValueOperand())->getValue();
+ unsigned KillingBits = KillingValue.getBitWidth();
+ assert(DeadValue.getBitWidth() > KillingValue.getBitWidth());
+ KillingValue = KillingValue.zext(DeadValue.getBitWidth());
// Offset of the smaller store inside the larger store
- unsigned BitOffsetDiff = (InstWriteOffset - DepWriteOffset) * 8;
- unsigned LShiftAmount = DL.isBigEndian() ? EarlierValue.getBitWidth() -
- BitOffsetDiff - LaterBits
- : BitOffsetDiff;
- APInt Mask = APInt::getBitsSet(EarlierValue.getBitWidth(), LShiftAmount,
- LShiftAmount + LaterBits);
+ unsigned BitOffsetDiff = (KillingOffset - DeadOffset) * 8;
+ unsigned LShiftAmount =
+ DL.isBigEndian() ? DeadValue.getBitWidth() - BitOffsetDiff - KillingBits
+ : BitOffsetDiff;
+ APInt Mask = APInt::getBitsSet(DeadValue.getBitWidth(), LShiftAmount,
+ LShiftAmount + KillingBits);
// Clear the bits we'll be replacing, then OR with the smaller
// store, shifted appropriately.
- APInt Merged = (EarlierValue & ~Mask) | (LaterValue << LShiftAmount);
- LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n Earlier: " << *Earlier
- << "\n Later: " << *Later
+ APInt Merged = (DeadValue & ~Mask) | (KillingValue << LShiftAmount);
+ LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n Dead: " << *DeadI
+ << "\n Killing: " << *KillingI
<< "\n Merged Value: " << Merged << '\n');
- return ConstantInt::get(Earlier->getValueOperand()->getType(), Merged);
+ return ConstantInt::get(DeadI->getValueOperand()->getType(), Merged);
}
return nullptr;
}
@@ -819,14 +766,17 @@ bool isNoopIntrinsic(Instruction *I) {
}
// Check if we can ignore \p D for DSE.
-bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
+bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller,
+ const TargetLibraryInfo &TLI) {
Instruction *DI = D->getMemoryInst();
// Calls that only access inaccessible memory cannot read or write any memory
// locations we consider for elimination.
if (auto *CB = dyn_cast<CallBase>(DI))
- if (CB->onlyAccessesInaccessibleMemory())
+ if (CB->onlyAccessesInaccessibleMemory()) {
+ if (isAllocLikeFn(DI, &TLI))
+ return false;
return true;
-
+ }
// We can eliminate stores to locations not visible to the caller across
// throwing instructions.
if (DI->mayThrow() && !DefVisibleToCaller)
@@ -841,7 +791,7 @@ bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
return true;
// Skip intrinsics that do not really read or modify memory.
- if (isNoopIntrinsic(D->getMemoryInst()))
+ if (isNoopIntrinsic(DI))
return true;
return false;
@@ -850,6 +800,7 @@ bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
struct DSEState {
Function &F;
AliasAnalysis &AA;
+ EarliestEscapeInfo EI;
/// The single BatchAA instance that is used to cache AA queries. It will
/// not be invalidated over the whole run. This is safe, because:
@@ -892,30 +843,29 @@ struct DSEState {
/// basic block.
DenseMap<BasicBlock *, InstOverlapIntervalsTy> IOLs;
+ // Class contains self-reference, make sure it's not copied/moved.
+ DSEState(const DSEState &) = delete;
+ DSEState &operator=(const DSEState &) = delete;
+
DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
const LoopInfo &LI)
- : F(F), AA(AA), BatchAA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI),
- DL(F.getParent()->getDataLayout()), LI(LI) {}
-
- static DSEState get(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
- DominatorTree &DT, PostDominatorTree &PDT,
- const TargetLibraryInfo &TLI, const LoopInfo &LI) {
- DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);
+ : F(F), AA(AA), EI(DT, LI), BatchAA(AA, &EI), MSSA(MSSA), DT(DT),
+ PDT(PDT), TLI(TLI), DL(F.getParent()->getDataLayout()), LI(LI) {
// Collect blocks with throwing instructions not modeled in MemorySSA and
// alloc-like objects.
unsigned PO = 0;
for (BasicBlock *BB : post_order(&F)) {
- State.PostOrderNumbers[BB] = PO++;
+ PostOrderNumbers[BB] = PO++;
for (Instruction &I : *BB) {
MemoryAccess *MA = MSSA.getMemoryAccess(&I);
if (I.mayThrow() && !MA)
- State.ThrowingBlocks.insert(I.getParent());
+ ThrowingBlocks.insert(I.getParent());
auto *MD = dyn_cast_or_null<MemoryDef>(MA);
- if (MD && State.MemDefs.size() < MemorySSADefsPerBlockLimit &&
- (State.getLocForWriteEx(&I) || State.isMemTerminatorInst(&I)))
- State.MemDefs.push_back(MD);
+ if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit &&
+ (getLocForWriteEx(&I) || isMemTerminatorInst(&I)))
+ MemDefs.push_back(MD);
}
}
@@ -925,131 +875,134 @@ struct DSEState {
if (AI.hasPassPointeeByValueCopyAttr()) {
// For byval, the caller doesn't know the address of the allocation.
if (AI.hasByValAttr())
- State.InvisibleToCallerBeforeRet.insert({&AI, true});
- State.InvisibleToCallerAfterRet.insert({&AI, true});
+ InvisibleToCallerBeforeRet.insert({&AI, true});
+ InvisibleToCallerAfterRet.insert({&AI, true});
}
// Collect whether there is any irreducible control flow in the function.
- State.ContainsIrreducibleLoops = mayContainIrreducibleControl(F, &LI);
-
- return State;
+ ContainsIrreducibleLoops = mayContainIrreducibleControl(F, &LI);
}
- /// Return 'OW_Complete' if a store to the 'Later' location (by \p LaterI
- /// instruction) completely overwrites a store to the 'Earlier' location.
- /// (by \p EarlierI instruction).
- /// Return OW_MaybePartial if \p Later does not completely overwrite
- /// \p Earlier, but they both write to the same underlying object. In that
- /// case, use isPartialOverwrite to check if \p Later partially overwrites
- /// \p Earlier. Returns 'OW_Unknown' if nothing can be determined.
- OverwriteResult
- isOverwrite(const Instruction *LaterI, const Instruction *EarlierI,
- const MemoryLocation &Later, const MemoryLocation &Earlier,
- int64_t &EarlierOff, int64_t &LaterOff) {
+ /// Return 'OW_Complete' if a store to the 'KillingLoc' location (by \p
+ /// KillingI instruction) completely overwrites a store to the 'DeadLoc'
+ /// location (by \p DeadI instruction).
+ /// Return OW_MaybePartial if \p KillingI does not completely overwrite
+ /// \p DeadI, but they both write to the same underlying object. In that
+ /// case, use isPartialOverwrite to check if \p KillingI partially overwrites
+ /// \p DeadI. Returns 'OW_Unknown' if nothing can be determined.
+ OverwriteResult isOverwrite(const Instruction *KillingI,
+ const Instruction *DeadI,
+ const MemoryLocation &KillingLoc,
+ const MemoryLocation &DeadLoc,
+ int64_t &KillingOff, int64_t &DeadOff) {
// AliasAnalysis does not always account for loops. Limit overwrite checks
- // to dependencies for which we can guarantee they are independant of any
+ // to dependencies for which we can guarantee they are independent of any
// loops they are in.
- if (!isGuaranteedLoopIndependent(EarlierI, LaterI, Earlier))
+ if (!isGuaranteedLoopIndependent(DeadI, KillingI, DeadLoc))
return OW_Unknown;
// FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll
// get imprecise values here, though (except for unknown sizes).
- if (!Later.Size.isPrecise() || !Earlier.Size.isPrecise()) {
+ if (!KillingLoc.Size.isPrecise() || !DeadLoc.Size.isPrecise()) {
// In case no constant size is known, try to an IR values for the number
// of bytes written and check if they match.
- const auto *LaterMemI = dyn_cast<MemIntrinsic>(LaterI);
- const auto *EarlierMemI = dyn_cast<MemIntrinsic>(EarlierI);
- if (LaterMemI && EarlierMemI) {
- const Value *LaterV = LaterMemI->getLength();
- const Value *EarlierV = EarlierMemI->getLength();
- if (LaterV == EarlierV && BatchAA.isMustAlias(Earlier, Later))
+ const auto *KillingMemI = dyn_cast<MemIntrinsic>(KillingI);
+ const auto *DeadMemI = dyn_cast<MemIntrinsic>(DeadI);
+ if (KillingMemI && DeadMemI) {
+ const Value *KillingV = KillingMemI->getLength();
+ const Value *DeadV = DeadMemI->getLength();
+ if (KillingV == DeadV && BatchAA.isMustAlias(DeadLoc, KillingLoc))
return OW_Complete;
}
// Masked stores have imprecise locations, but we can reason about them
// to some extent.
- return isMaskedStoreOverwrite(LaterI, EarlierI, BatchAA);
+ return isMaskedStoreOverwrite(KillingI, DeadI, BatchAA);
}
- const uint64_t LaterSize = Later.Size.getValue();
- const uint64_t EarlierSize = Earlier.Size.getValue();
+ const uint64_t KillingSize = KillingLoc.Size.getValue();
+ const uint64_t DeadSize = DeadLoc.Size.getValue();
// Query the alias information
- AliasResult AAR = BatchAA.alias(Later, Earlier);
+ AliasResult AAR = BatchAA.alias(KillingLoc, DeadLoc);
// If the start pointers are the same, we just have to compare sizes to see if
- // the later store was larger than the earlier store.
+ // the killing store was larger than the dead store.
if (AAR == AliasResult::MustAlias) {
- // Make sure that the Later size is >= the Earlier size.
- if (LaterSize >= EarlierSize)
+ // Make sure that the KillingSize size is >= the DeadSize size.
+ if (KillingSize >= DeadSize)
return OW_Complete;
}
// If we hit a partial alias we may have a full overwrite
if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) {
int32_t Off = AAR.getOffset();
- if (Off >= 0 && (uint64_t)Off + EarlierSize <= LaterSize)
+ if (Off >= 0 && (uint64_t)Off + DeadSize <= KillingSize)
return OW_Complete;
}
- // Check to see if the later store is to the entire object (either a global,
- // an alloca, or a byval/inalloca argument). If so, then it clearly
+ // Check to see if the killing store is to the entire object (either a
+ // global, an alloca, or a byval/inalloca argument). If so, then it clearly
// overwrites any other store to the same object.
- const Value *P1 = Earlier.Ptr->stripPointerCasts();
- const Value *P2 = Later.Ptr->stripPointerCasts();
- const Value *UO1 = getUnderlyingObject(P1), *UO2 = getUnderlyingObject(P2);
+ const Value *DeadPtr = DeadLoc.Ptr->stripPointerCasts();
+ const Value *KillingPtr = KillingLoc.Ptr->stripPointerCasts();
+ const Value *DeadUndObj = getUnderlyingObject(DeadPtr);
+ const Value *KillingUndObj = getUnderlyingObject(KillingPtr);
// If we can't resolve the same pointers to the same object, then we can't
// analyze them at all.
- if (UO1 != UO2)
+ if (DeadUndObj != KillingUndObj)
return OW_Unknown;
- // If the "Later" store is to a recognizable object, get its size.
- uint64_t ObjectSize = getPointerSize(UO2, DL, TLI, &F);
- if (ObjectSize != MemoryLocation::UnknownSize)
- if (ObjectSize == LaterSize && ObjectSize >= EarlierSize)
+ // If the KillingI store is to a recognizable object, get its size.
+ uint64_t KillingUndObjSize = getPointerSize(KillingUndObj, DL, TLI, &F);
+ if (KillingUndObjSize != MemoryLocation::UnknownSize)
+ if (KillingUndObjSize == KillingSize && KillingUndObjSize >= DeadSize)
return OW_Complete;
// Okay, we have stores to two completely different pointers. Try to
// decompose the pointer into a "base + constant_offset" form. If the base
// pointers are equal, then we can reason about the two stores.
- EarlierOff = 0;
- LaterOff = 0;
- const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, DL);
- const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, DL);
-
- // If the base pointers still differ, we have two completely different stores.
- if (BP1 != BP2)
+ DeadOff = 0;
+ KillingOff = 0;
+ const Value *DeadBasePtr =
+ GetPointerBaseWithConstantOffset(DeadPtr, DeadOff, DL);
+ const Value *KillingBasePtr =
+ GetPointerBaseWithConstantOffset(KillingPtr, KillingOff, DL);
+
+ // If the base pointers still differ, we have two completely different
+ // stores.
+ if (DeadBasePtr != KillingBasePtr)
return OW_Unknown;
- // The later access completely overlaps the earlier store if and only if
- // both start and end of the earlier one is "inside" the later one:
- // |<->|--earlier--|<->|
- // |-------later-------|
+ // The killing access completely overlaps the dead store if and only if
+ // both start and end of the dead one is "inside" the killing one:
+ // |<->|--dead--|<->|
+ // |-----killing------|
// Accesses may overlap if and only if start of one of them is "inside"
// another one:
- // |<->|--earlier--|<----->|
- // |-------later-------|
+ // |<->|--dead--|<-------->|
+ // |-------killing--------|
// OR
- // |----- earlier -----|
- // |<->|---later---|<----->|
+ // |-------dead-------|
+ // |<->|---killing---|<----->|
//
// We have to be careful here as *Off is signed while *.Size is unsigned.
- // Check if the earlier access starts "not before" the later one.
- if (EarlierOff >= LaterOff) {
- // If the earlier access ends "not after" the later access then the earlier
- // one is completely overwritten by the later one.
- if (uint64_t(EarlierOff - LaterOff) + EarlierSize <= LaterSize)
+ // Check if the dead access starts "not before" the killing one.
+ if (DeadOff >= KillingOff) {
+ // If the dead access ends "not after" the killing access then the
+ // dead one is completely overwritten by the killing one.
+ if (uint64_t(DeadOff - KillingOff) + DeadSize <= KillingSize)
return OW_Complete;
- // If start of the earlier access is "before" end of the later access then
- // accesses overlap.
- else if ((uint64_t)(EarlierOff - LaterOff) < LaterSize)
+ // If start of the dead access is "before" end of the killing access
+ // then accesses overlap.
+ else if ((uint64_t)(DeadOff - KillingOff) < KillingSize)
return OW_MaybePartial;
}
- // If start of the later access is "before" end of the earlier access then
+ // If start of the killing access is "before" end of the dead access then
// accesses overlap.
- else if ((uint64_t)(LaterOff - EarlierOff) < EarlierSize) {
+ else if ((uint64_t)(KillingOff - DeadOff) < DeadSize) {
return OW_MaybePartial;
}
@@ -1106,8 +1059,13 @@ struct DSEState {
LibFunc LF;
if (TLI.getLibFunc(*CB, LF) && TLI.has(LF)) {
switch (LF) {
- case LibFunc_strcpy:
case LibFunc_strncpy:
+ if (const auto *Len = dyn_cast<ConstantInt>(CB->getArgOperand(2)))
+ return MemoryLocation(CB->getArgOperand(0),
+ LocationSize::precise(Len->getZExtValue()),
+ CB->getAAMetadata());
+ LLVM_FALLTHROUGH;
+ case LibFunc_strcpy:
case LibFunc_strcat:
case LibFunc_strncat:
return {MemoryLocation::getAfter(CB->getArgOperand(0))};
@@ -1145,8 +1103,8 @@ struct DSEState {
int64_t InstWriteOffset, DepWriteOffset;
if (auto CC = getLocForWriteEx(UseInst))
- return isOverwrite(UseInst, DefInst, *CC, DefLoc, DepWriteOffset,
- InstWriteOffset) == OW_Complete;
+ return isOverwrite(UseInst, DefInst, *CC, DefLoc, InstWriteOffset,
+ DepWriteOffset) == OW_Complete;
return false;
}
@@ -1248,9 +1206,10 @@ struct DSEState {
const Value *LocUO = getUnderlyingObject(Loc.Ptr);
return BatchAA.isMustAlias(TermLoc.Ptr, LocUO);
}
- int64_t InstWriteOffset, DepWriteOffset;
- return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, DepWriteOffset,
- InstWriteOffset) == OW_Complete;
+ int64_t InstWriteOffset = 0;
+ int64_t DepWriteOffset = 0;
+ return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, InstWriteOffset,
+ DepWriteOffset) == OW_Complete;
}
// Returns true if \p Use may read from \p DefLoc.
@@ -1270,10 +1229,6 @@ struct DSEState {
if (CB->onlyAccessesInaccessibleMemory())
return false;
- // NOTE: For calls, the number of stores removed could be slightly improved
- // by using AA.callCapturesBefore(UseInst, DefLoc, &DT), but that showed to
- // be expensive compared to the benefits in practice. For now, avoid more
- // expensive analysis to limit compile-time.
return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc));
}
@@ -1329,15 +1284,15 @@ struct DSEState {
return IsGuaranteedLoopInvariantBase(Ptr);
}
- // Find a MemoryDef writing to \p DefLoc and dominating \p StartAccess, with
- // no read access between them or on any other path to a function exit block
- // if \p DefLoc is not accessible after the function returns. If there is no
- // such MemoryDef, return None. The returned value may not (completely)
- // overwrite \p DefLoc. Currently we bail out when we encounter an aliasing
- // MemoryUse (read).
+ // Find a MemoryDef writing to \p KillingLoc and dominating \p StartAccess,
+ // with no read access between them or on any other path to a function exit
+ // block if \p KillingLoc is not accessible after the function returns. If
+ // there is no such MemoryDef, return None. The returned value may not
+ // (completely) overwrite \p KillingLoc. Currently we bail out when we
+ // encounter an aliasing MemoryUse (read).
Optional<MemoryAccess *>
getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess,
- const MemoryLocation &DefLoc, const Value *DefUO,
+ const MemoryLocation &KillingLoc, const Value *KillingUndObj,
unsigned &ScanLimit, unsigned &WalkerStepLimit,
bool IsMemTerm, unsigned &PartialLimit) {
if (ScanLimit == 0 || WalkerStepLimit == 0) {
@@ -1389,19 +1344,20 @@ struct DSEState {
MemoryDef *CurrentDef = cast<MemoryDef>(Current);
Instruction *CurrentI = CurrentDef->getMemoryInst();
- if (canSkipDef(CurrentDef, !isInvisibleToCallerBeforeRet(DefUO)))
+ if (canSkipDef(CurrentDef, !isInvisibleToCallerBeforeRet(KillingUndObj),
+ TLI))
continue;
// Before we try to remove anything, check for any extra throwing
// instructions that block us from DSEing
- if (mayThrowBetween(KillingI, CurrentI, DefUO)) {
+ if (mayThrowBetween(KillingI, CurrentI, KillingUndObj)) {
LLVM_DEBUG(dbgs() << " ... skip, may throw!\n");
return None;
}
// Check for anything that looks like it will be a barrier to further
// removal
- if (isDSEBarrier(DefUO, CurrentI)) {
+ if (isDSEBarrier(KillingUndObj, CurrentI)) {
LLVM_DEBUG(dbgs() << " ... skip, barrier\n");
return None;
}
@@ -1410,14 +1366,14 @@ struct DSEState {
// clobber, bail out, as the path is not profitable. We skip this check
// for intrinsic calls, because the code knows how to handle memcpy
// intrinsics.
- if (!isa<IntrinsicInst>(CurrentI) && isReadClobber(DefLoc, CurrentI))
+ if (!isa<IntrinsicInst>(CurrentI) && isReadClobber(KillingLoc, CurrentI))
return None;
// Quick check if there are direct uses that are read-clobbers.
- if (any_of(Current->uses(), [this, &DefLoc, StartAccess](Use &U) {
+ if (any_of(Current->uses(), [this, &KillingLoc, StartAccess](Use &U) {
if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(U.getUser()))
return !MSSA.dominates(StartAccess, UseOrDef) &&
- isReadClobber(DefLoc, UseOrDef->getMemoryInst());
+ isReadClobber(KillingLoc, UseOrDef->getMemoryInst());
return false;
})) {
LLVM_DEBUG(dbgs() << " ... found a read clobber\n");
@@ -1450,9 +1406,10 @@ struct DSEState {
if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI))
continue;
} else {
- int64_t InstWriteOffset, DepWriteOffset;
- auto OR = isOverwrite(KillingI, CurrentI, DefLoc, *CurrentLoc,
- DepWriteOffset, InstWriteOffset);
+ int64_t KillingOffset = 0;
+ int64_t DeadOffset = 0;
+ auto OR = isOverwrite(KillingI, CurrentI, KillingLoc, *CurrentLoc,
+ KillingOffset, DeadOffset);
// If Current does not write to the same object as KillingDef, check
// the next candidate.
if (OR == OW_Unknown)
@@ -1473,30 +1430,25 @@ struct DSEState {
};
// Accesses to objects accessible after the function returns can only be
- // eliminated if the access is killed along all paths to the exit. Collect
+ // eliminated if the access is dead along all paths to the exit. Collect
// the blocks with killing (=completely overwriting MemoryDefs) and check if
- // they cover all paths from EarlierAccess to any function exit.
+ // they cover all paths from MaybeDeadAccess to any function exit.
SmallPtrSet<Instruction *, 16> KillingDefs;
KillingDefs.insert(KillingDef->getMemoryInst());
- MemoryAccess *EarlierAccess = Current;
- Instruction *EarlierMemInst =
- cast<MemoryDef>(EarlierAccess)->getMemoryInst();
- LLVM_DEBUG(dbgs() << " Checking for reads of " << *EarlierAccess << " ("
- << *EarlierMemInst << ")\n");
+ MemoryAccess *MaybeDeadAccess = Current;
+ MemoryLocation MaybeDeadLoc = *CurrentLoc;
+ Instruction *MaybeDeadI = cast<MemoryDef>(MaybeDeadAccess)->getMemoryInst();
+ LLVM_DEBUG(dbgs() << " Checking for reads of " << *MaybeDeadAccess << " ("
+ << *MaybeDeadI << ")\n");
SmallSetVector<MemoryAccess *, 32> WorkList;
auto PushMemUses = [&WorkList](MemoryAccess *Acc) {
for (Use &U : Acc->uses())
WorkList.insert(cast<MemoryAccess>(U.getUser()));
};
- PushMemUses(EarlierAccess);
-
- // Optimistically collect all accesses for reads. If we do not find any
- // read clobbers, add them to the cache.
- SmallPtrSet<MemoryAccess *, 16> KnownNoReads;
- if (!EarlierMemInst->mayReadFromMemory())
- KnownNoReads.insert(EarlierAccess);
- // Check if EarlierDef may be read.
+ PushMemUses(MaybeDeadAccess);
+
+ // Check if DeadDef may be read.
for (unsigned I = 0; I < WorkList.size(); I++) {
MemoryAccess *UseAccess = WorkList[I];
@@ -1508,7 +1460,6 @@ struct DSEState {
}
--ScanLimit;
NumDomMemDefChecks++;
- KnownNoReads.insert(UseAccess);
if (isa<MemoryPhi>(UseAccess)) {
if (any_of(KillingDefs, [this, UseAccess](Instruction *KI) {
@@ -1535,7 +1486,7 @@ struct DSEState {
// A memory terminator kills all preceeding MemoryDefs and all succeeding
// MemoryAccesses. We do not have to check it's users.
- if (isMemTerminator(*CurrentLoc, EarlierMemInst, UseInst)) {
+ if (isMemTerminator(MaybeDeadLoc, MaybeDeadI, UseInst)) {
LLVM_DEBUG(
dbgs()
<< " ... skipping, memterminator invalidates following accesses\n");
@@ -1548,14 +1499,14 @@ struct DSEState {
continue;
}
- if (UseInst->mayThrow() && !isInvisibleToCallerBeforeRet(DefUO)) {
+ if (UseInst->mayThrow() && !isInvisibleToCallerBeforeRet(KillingUndObj)) {
LLVM_DEBUG(dbgs() << " ... found throwing instruction\n");
return None;
}
// Uses which may read the original MemoryDef mean we cannot eliminate the
// original MD. Stop walk.
- if (isReadClobber(*CurrentLoc, UseInst)) {
+ if (isReadClobber(MaybeDeadLoc, UseInst)) {
LLVM_DEBUG(dbgs() << " ... found read clobber\n");
return None;
}
@@ -1563,16 +1514,16 @@ struct DSEState {
// If this worklist walks back to the original memory access (and the
// pointer is not guarenteed loop invariant) then we cannot assume that a
// store kills itself.
- if (EarlierAccess == UseAccess &&
- !isGuaranteedLoopInvariant(CurrentLoc->Ptr)) {
+ if (MaybeDeadAccess == UseAccess &&
+ !isGuaranteedLoopInvariant(MaybeDeadLoc.Ptr)) {
LLVM_DEBUG(dbgs() << " ... found not loop invariant self access\n");
return None;
}
- // Otherwise, for the KillingDef and EarlierAccess we only have to check
+ // Otherwise, for the KillingDef and MaybeDeadAccess we only have to check
// if it reads the memory location.
// TODO: It would probably be better to check for self-reads before
// calling the function.
- if (KillingDef == UseAccess || EarlierAccess == UseAccess) {
+ if (KillingDef == UseAccess || MaybeDeadAccess == UseAccess) {
LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n");
continue;
}
@@ -1581,18 +1532,18 @@ struct DSEState {
// the original location. Otherwise we have to check uses of *all*
// MemoryDefs we discover, including non-aliasing ones. Otherwise we might
// miss cases like the following
- // 1 = Def(LoE) ; <----- EarlierDef stores [0,1]
+ // 1 = Def(LoE) ; <----- DeadDef stores [0,1]
// 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3]
// Use(2) ; MayAlias 2 *and* 1, loads [0, 3].
// (The Use points to the *first* Def it may alias)
// 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias,
// stores [0,1]
if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) {
- if (isCompleteOverwrite(*CurrentLoc, EarlierMemInst, UseInst)) {
+ if (isCompleteOverwrite(MaybeDeadLoc, MaybeDeadI, UseInst)) {
BasicBlock *MaybeKillingBlock = UseInst->getParent();
if (PostOrderNumbers.find(MaybeKillingBlock)->second <
- PostOrderNumbers.find(EarlierAccess->getBlock())->second) {
- if (!isInvisibleToCallerAfterRet(DefUO)) {
+ PostOrderNumbers.find(MaybeDeadAccess->getBlock())->second) {
+ if (!isInvisibleToCallerAfterRet(KillingUndObj)) {
LLVM_DEBUG(dbgs()
<< " ... found killing def " << *UseInst << "\n");
KillingDefs.insert(UseInst);
@@ -1608,9 +1559,9 @@ struct DSEState {
}
// For accesses to locations visible after the function returns, make sure
- // that the location is killed (=overwritten) along all paths from
- // EarlierAccess to the exit.
- if (!isInvisibleToCallerAfterRet(DefUO)) {
+ // that the location is dead (=overwritten) along all paths from
+ // MaybeDeadAccess to the exit.
+ if (!isInvisibleToCallerAfterRet(KillingUndObj)) {
SmallPtrSet<BasicBlock *, 16> KillingBlocks;
for (Instruction *KD : KillingDefs)
KillingBlocks.insert(KD->getParent());
@@ -1619,25 +1570,24 @@ struct DSEState {
// Find the common post-dominator of all killing blocks.
BasicBlock *CommonPred = *KillingBlocks.begin();
- for (auto I = std::next(KillingBlocks.begin()), E = KillingBlocks.end();
- I != E; I++) {
+ for (BasicBlock *BB : llvm::drop_begin(KillingBlocks)) {
if (!CommonPred)
break;
- CommonPred = PDT.findNearestCommonDominator(CommonPred, *I);
+ CommonPred = PDT.findNearestCommonDominator(CommonPred, BB);
}
// If CommonPred is in the set of killing blocks, just check if it
- // post-dominates EarlierAccess.
+ // post-dominates MaybeDeadAccess.
if (KillingBlocks.count(CommonPred)) {
- if (PDT.dominates(CommonPred, EarlierAccess->getBlock()))
- return {EarlierAccess};
+ if (PDT.dominates(CommonPred, MaybeDeadAccess->getBlock()))
+ return {MaybeDeadAccess};
return None;
}
- // If the common post-dominator does not post-dominate EarlierAccess,
- // there is a path from EarlierAccess to an exit not going through a
+ // If the common post-dominator does not post-dominate MaybeDeadAccess,
+ // there is a path from MaybeDeadAccess to an exit not going through a
// killing block.
- if (PDT.dominates(CommonPred, EarlierAccess->getBlock())) {
+ if (PDT.dominates(CommonPred, MaybeDeadAccess->getBlock())) {
SetVector<BasicBlock *> WorkList;
// If CommonPred is null, there are multiple exits from the function.
@@ -1650,16 +1600,16 @@ struct DSEState {
NumCFGTries++;
// Check if all paths starting from an exit node go through one of the
- // killing blocks before reaching EarlierAccess.
+ // killing blocks before reaching MaybeDeadAccess.
for (unsigned I = 0; I < WorkList.size(); I++) {
NumCFGChecks++;
BasicBlock *Current = WorkList[I];
if (KillingBlocks.count(Current))
continue;
- if (Current == EarlierAccess->getBlock())
+ if (Current == MaybeDeadAccess->getBlock())
return None;
- // EarlierAccess is reachable from the entry, so we don't have to
+ // MaybeDeadAccess is reachable from the entry, so we don't have to
// explore unreachable blocks further.
if (!DT.isReachableFromEntry(Current))
continue;
@@ -1671,14 +1621,14 @@ struct DSEState {
return None;
}
NumCFGSuccess++;
- return {EarlierAccess};
+ return {MaybeDeadAccess};
}
return None;
}
- // No aliasing MemoryUses of EarlierAccess found, EarlierAccess is
+ // No aliasing MemoryUses of MaybeDeadAccess found, MaybeDeadAccess is
// potentially dead.
- return {EarlierAccess};
+ return {MaybeDeadAccess};
}
// Delete dead memory defs
@@ -1701,6 +1651,7 @@ struct DSEState {
if (MemoryDef *MD = dyn_cast<MemoryDef>(MA)) {
SkipStores.insert(MD);
}
+
Updater.removeMemoryAccess(MA);
}
@@ -1715,47 +1666,49 @@ struct DSEState {
NowDeadInsts.push_back(OpI);
}
+ EI.removeInstruction(DeadInst);
DeadInst->eraseFromParent();
}
}
- // Check for any extra throws between SI and NI that block DSE. This only
- // checks extra maythrows (those that aren't MemoryDef's). MemoryDef that may
- // throw are handled during the walk from one def to the next.
- bool mayThrowBetween(Instruction *SI, Instruction *NI,
- const Value *SILocUnd) {
- // First see if we can ignore it by using the fact that SI is an
+ // Check for any extra throws between \p KillingI and \p DeadI that block
+ // DSE. This only checks extra maythrows (those that aren't MemoryDef's).
+ // MemoryDef that may throw are handled during the walk from one def to the
+ // next.
+ bool mayThrowBetween(Instruction *KillingI, Instruction *DeadI,
+ const Value *KillingUndObj) {
+ // First see if we can ignore it by using the fact that KillingI is an
// alloca/alloca like object that is not visible to the caller during
// execution of the function.
- if (SILocUnd && isInvisibleToCallerBeforeRet(SILocUnd))
+ if (KillingUndObj && isInvisibleToCallerBeforeRet(KillingUndObj))
return false;
- if (SI->getParent() == NI->getParent())
- return ThrowingBlocks.count(SI->getParent());
+ if (KillingI->getParent() == DeadI->getParent())
+ return ThrowingBlocks.count(KillingI->getParent());
return !ThrowingBlocks.empty();
}
- // Check if \p NI acts as a DSE barrier for \p SI. The following instructions
- // act as barriers:
- // * A memory instruction that may throw and \p SI accesses a non-stack
+ // Check if \p DeadI acts as a DSE barrier for \p KillingI. The following
+ // instructions act as barriers:
+ // * A memory instruction that may throw and \p KillingI accesses a non-stack
// object.
// * Atomic stores stronger that monotonic.
- bool isDSEBarrier(const Value *SILocUnd, Instruction *NI) {
- // If NI may throw it acts as a barrier, unless we are to an alloca/alloca
- // like object that does not escape.
- if (NI->mayThrow() && !isInvisibleToCallerBeforeRet(SILocUnd))
+ bool isDSEBarrier(const Value *KillingUndObj, Instruction *DeadI) {
+ // If DeadI may throw it acts as a barrier, unless we are to an
+ // alloca/alloca like object that does not escape.
+ if (DeadI->mayThrow() && !isInvisibleToCallerBeforeRet(KillingUndObj))
return true;
- // If NI is an atomic load/store stronger than monotonic, do not try to
+ // If DeadI is an atomic load/store stronger than monotonic, do not try to
// eliminate/reorder it.
- if (NI->isAtomic()) {
- if (auto *LI = dyn_cast<LoadInst>(NI))
+ if (DeadI->isAtomic()) {
+ if (auto *LI = dyn_cast<LoadInst>(DeadI))
return isStrongerThanMonotonic(LI->getOrdering());
- if (auto *SI = dyn_cast<StoreInst>(NI))
+ if (auto *SI = dyn_cast<StoreInst>(DeadI))
return isStrongerThanMonotonic(SI->getOrdering());
- if (auto *ARMW = dyn_cast<AtomicRMWInst>(NI))
+ if (auto *ARMW = dyn_cast<AtomicRMWInst>(DeadI))
return isStrongerThanMonotonic(ARMW->getOrdering());
- if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(NI))
+ if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(DeadI))
return isStrongerThanMonotonic(CmpXchg->getSuccessOrdering()) ||
isStrongerThanMonotonic(CmpXchg->getFailureOrdering());
llvm_unreachable("other instructions should be skipped in MemorySSA");
@@ -1776,7 +1729,6 @@ struct DSEState {
continue;
Instruction *DefI = Def->getMemoryInst();
- SmallVector<const Value *, 4> Pointers;
auto DefLoc = getLocForWriteEx(DefI);
if (!DefLoc)
continue;
@@ -1787,7 +1739,7 @@ struct DSEState {
// uncommon. If it turns out to be important, we can use
// getUnderlyingObjects here instead.
const Value *UO = getUnderlyingObject(DefLoc->Ptr);
- if (!UO || !isInvisibleToCallerAfterRet(UO))
+ if (!isInvisibleToCallerAfterRet(UO))
continue;
if (isWriteAtEndOfFunction(Def)) {
@@ -1804,8 +1756,7 @@ struct DSEState {
/// \returns true if \p Def is a no-op store, either because it
/// directly stores back a loaded value or stores zero to a calloced object.
- bool storeIsNoop(MemoryDef *Def, const MemoryLocation &DefLoc,
- const Value *DefUO) {
+ bool storeIsNoop(MemoryDef *Def, const Value *DefUO) {
StoreInst *Store = dyn_cast<StoreInst>(Def->getMemoryInst());
MemSetInst *MemSet = dyn_cast<MemSetInst>(Def->getMemoryInst());
Constant *StoredConstant = nullptr;
@@ -1816,13 +1767,78 @@ struct DSEState {
if (StoredConstant && StoredConstant->isNullValue()) {
auto *DefUOInst = dyn_cast<Instruction>(DefUO);
- if (DefUOInst && isCallocLikeFn(DefUOInst, &TLI)) {
- auto *UnderlyingDef = cast<MemoryDef>(MSSA.getMemoryAccess(DefUOInst));
- // If UnderlyingDef is the clobbering access of Def, no instructions
- // between them can modify the memory location.
- auto *ClobberDef =
- MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def);
- return UnderlyingDef == ClobberDef;
+ if (DefUOInst) {
+ if (isCallocLikeFn(DefUOInst, &TLI)) {
+ auto *UnderlyingDef =
+ cast<MemoryDef>(MSSA.getMemoryAccess(DefUOInst));
+ // If UnderlyingDef is the clobbering access of Def, no instructions
+ // between them can modify the memory location.
+ auto *ClobberDef =
+ MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def);
+ return UnderlyingDef == ClobberDef;
+ }
+
+ if (MemSet) {
+ if (F.hasFnAttribute(Attribute::SanitizeMemory) ||
+ F.hasFnAttribute(Attribute::SanitizeAddress) ||
+ F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
+ F.getName() == "calloc")
+ return false;
+ auto *Malloc = const_cast<CallInst *>(dyn_cast<CallInst>(DefUOInst));
+ if (!Malloc)
+ return false;
+ auto *InnerCallee = Malloc->getCalledFunction();
+ if (!InnerCallee)
+ return false;
+ LibFunc Func;
+ if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) ||
+ Func != LibFunc_malloc)
+ return false;
+
+ auto shouldCreateCalloc = [](CallInst *Malloc, CallInst *Memset) {
+ // Check for br(icmp ptr, null), truebb, falsebb) pattern at the end
+ // of malloc block
+ auto *MallocBB = Malloc->getParent(),
+ *MemsetBB = Memset->getParent();
+ if (MallocBB == MemsetBB)
+ return true;
+ auto *Ptr = Memset->getArgOperand(0);
+ auto *TI = MallocBB->getTerminator();
+ ICmpInst::Predicate Pred;
+ BasicBlock *TrueBB, *FalseBB;
+ if (!match(TI, m_Br(m_ICmp(Pred, m_Specific(Ptr), m_Zero()), TrueBB,
+ FalseBB)))
+ return false;
+ if (Pred != ICmpInst::ICMP_EQ || MemsetBB != FalseBB)
+ return false;
+ return true;
+ };
+
+ if (Malloc->getOperand(0) == MemSet->getLength()) {
+ if (shouldCreateCalloc(Malloc, MemSet) &&
+ DT.dominates(Malloc, MemSet) &&
+ memoryIsNotModifiedBetween(Malloc, MemSet, BatchAA, DL, &DT)) {
+ IRBuilder<> IRB(Malloc);
+ const auto &DL = Malloc->getModule()->getDataLayout();
+ if (auto *Calloc =
+ emitCalloc(ConstantInt::get(IRB.getIntPtrTy(DL), 1),
+ Malloc->getArgOperand(0), IRB, TLI)) {
+ MemorySSAUpdater Updater(&MSSA);
+ auto *LastDef = cast<MemoryDef>(
+ Updater.getMemorySSA()->getMemoryAccess(Malloc));
+ auto *NewAccess = Updater.createMemoryAccessAfter(
+ cast<Instruction>(Calloc), LastDef, LastDef);
+ auto *NewAccessMD = cast<MemoryDef>(NewAccess);
+ Updater.insertDef(NewAccessMD, /*RenameUses=*/true);
+ Updater.removeMemoryAccess(Malloc);
+ Malloc->replaceAllUsesWith(Calloc);
+ Malloc->eraseFromParent();
+ return true;
+ }
+ return false;
+ }
+ }
+ }
}
}
@@ -1875,6 +1891,76 @@ struct DSEState {
return false;
}
+
+ bool removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL) {
+ bool Changed = false;
+ for (auto OI : IOL) {
+ Instruction *DeadI = OI.first;
+ MemoryLocation Loc = *getLocForWriteEx(DeadI);
+ assert(isRemovable(DeadI) && "Expect only removable instruction");
+
+ const Value *Ptr = Loc.Ptr->stripPointerCasts();
+ int64_t DeadStart = 0;
+ uint64_t DeadSize = Loc.Size.getValue();
+ GetPointerBaseWithConstantOffset(Ptr, DeadStart, DL);
+ OverlapIntervalsTy &IntervalMap = OI.second;
+ Changed |= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize);
+ if (IntervalMap.empty())
+ continue;
+ Changed |= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);
+ }
+ return Changed;
+ }
+
+ /// Eliminates writes to locations where the value that is being written
+ /// is already stored at the same location.
+ bool eliminateRedundantStoresOfExistingValues() {
+ bool MadeChange = false;
+ LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs that write the "
+ "already existing value\n");
+ for (auto *Def : MemDefs) {
+ if (SkipStores.contains(Def) || MSSA.isLiveOnEntryDef(Def) ||
+ !isRemovable(Def->getMemoryInst()))
+ continue;
+ auto *UpperDef = dyn_cast<MemoryDef>(Def->getDefiningAccess());
+ if (!UpperDef || MSSA.isLiveOnEntryDef(UpperDef))
+ continue;
+
+ Instruction *DefInst = Def->getMemoryInst();
+ Instruction *UpperInst = UpperDef->getMemoryInst();
+ auto IsRedundantStore = [this, DefInst,
+ UpperInst](MemoryLocation UpperLoc) {
+ if (DefInst->isIdenticalTo(UpperInst))
+ return true;
+ if (auto *MemSetI = dyn_cast<MemSetInst>(UpperInst)) {
+ if (auto *SI = dyn_cast<StoreInst>(DefInst)) {
+ auto MaybeDefLoc = getLocForWriteEx(DefInst);
+ if (!MaybeDefLoc)
+ return false;
+ int64_t InstWriteOffset = 0;
+ int64_t DepWriteOffset = 0;
+ auto OR = isOverwrite(UpperInst, DefInst, UpperLoc, *MaybeDefLoc,
+ InstWriteOffset, DepWriteOffset);
+ Value *StoredByte = isBytewiseValue(SI->getValueOperand(), DL);
+ return StoredByte && StoredByte == MemSetI->getOperand(1) &&
+ OR == OW_Complete;
+ }
+ }
+ return false;
+ };
+
+ auto MaybeUpperLoc = getLocForWriteEx(UpperInst);
+ if (!MaybeUpperLoc || !IsRedundantStore(*MaybeUpperLoc) ||
+ isReadClobber(*MaybeUpperLoc, DefInst))
+ continue;
+ LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *DefInst
+ << '\n');
+ deleteDeadInstruction(DefInst);
+ NumRedundantStores++;
+ MadeChange = true;
+ }
+ return MadeChange;
+ }
};
static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
@@ -1883,68 +1969,64 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
const LoopInfo &LI) {
bool MadeChange = false;
- DSEState State = DSEState::get(F, AA, MSSA, DT, PDT, TLI, LI);
+ DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);
// For each store:
for (unsigned I = 0; I < State.MemDefs.size(); I++) {
MemoryDef *KillingDef = State.MemDefs[I];
if (State.SkipStores.count(KillingDef))
continue;
- Instruction *SI = KillingDef->getMemoryInst();
+ Instruction *KillingI = KillingDef->getMemoryInst();
- Optional<MemoryLocation> MaybeSILoc;
- if (State.isMemTerminatorInst(SI))
- MaybeSILoc = State.getLocForTerminator(SI).map(
+ Optional<MemoryLocation> MaybeKillingLoc;
+ if (State.isMemTerminatorInst(KillingI))
+ MaybeKillingLoc = State.getLocForTerminator(KillingI).map(
[](const std::pair<MemoryLocation, bool> &P) { return P.first; });
else
- MaybeSILoc = State.getLocForWriteEx(SI);
+ MaybeKillingLoc = State.getLocForWriteEx(KillingI);
- if (!MaybeSILoc) {
+ if (!MaybeKillingLoc) {
LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "
- << *SI << "\n");
+ << *KillingI << "\n");
continue;
}
- MemoryLocation SILoc = *MaybeSILoc;
- assert(SILoc.Ptr && "SILoc should not be null");
- const Value *SILocUnd = getUnderlyingObject(SILoc.Ptr);
-
- MemoryAccess *Current = KillingDef;
+ MemoryLocation KillingLoc = *MaybeKillingLoc;
+ assert(KillingLoc.Ptr && "KillingLoc should not be null");
+ const Value *KillingUndObj = getUnderlyingObject(KillingLoc.Ptr);
LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
- << *Current << " (" << *SI << ")\n");
+ << *KillingDef << " (" << *KillingI << ")\n");
unsigned ScanLimit = MemorySSAScanLimit;
unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit;
unsigned PartialLimit = MemorySSAPartialStoreLimit;
// Worklist of MemoryAccesses that may be killed by KillingDef.
SetVector<MemoryAccess *> ToCheck;
-
- if (SILocUnd)
- ToCheck.insert(KillingDef->getDefiningAccess());
+ ToCheck.insert(KillingDef->getDefiningAccess());
bool Shortend = false;
- bool IsMemTerm = State.isMemTerminatorInst(SI);
+ bool IsMemTerm = State.isMemTerminatorInst(KillingI);
// Check if MemoryAccesses in the worklist are killed by KillingDef.
for (unsigned I = 0; I < ToCheck.size(); I++) {
- Current = ToCheck[I];
+ MemoryAccess *Current = ToCheck[I];
if (State.SkipStores.count(Current))
continue;
- Optional<MemoryAccess *> Next = State.getDomMemoryDef(
- KillingDef, Current, SILoc, SILocUnd, ScanLimit, WalkerStepLimit,
- IsMemTerm, PartialLimit);
+ Optional<MemoryAccess *> MaybeDeadAccess = State.getDomMemoryDef(
+ KillingDef, Current, KillingLoc, KillingUndObj, ScanLimit,
+ WalkerStepLimit, IsMemTerm, PartialLimit);
- if (!Next) {
+ if (!MaybeDeadAccess) {
LLVM_DEBUG(dbgs() << " finished walk\n");
continue;
}
- MemoryAccess *EarlierAccess = *Next;
- LLVM_DEBUG(dbgs() << " Checking if we can kill " << *EarlierAccess);
- if (isa<MemoryPhi>(EarlierAccess)) {
+ MemoryAccess *DeadAccess = *MaybeDeadAccess;
+ LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DeadAccess);
+ if (isa<MemoryPhi>(DeadAccess)) {
LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n");
- for (Value *V : cast<MemoryPhi>(EarlierAccess)->incoming_values()) {
+ for (Value *V : cast<MemoryPhi>(DeadAccess)->incoming_values()) {
MemoryAccess *IncomingAccess = cast<MemoryAccess>(V);
BasicBlock *IncomingBlock = IncomingAccess->getBlock();
- BasicBlock *PhiBlock = EarlierAccess->getBlock();
+ BasicBlock *PhiBlock = DeadAccess->getBlock();
// We only consider incoming MemoryAccesses that come before the
// MemoryPhi. Otherwise we could discover candidates that do not
@@ -1955,72 +2037,73 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
}
continue;
}
- auto *NextDef = cast<MemoryDef>(EarlierAccess);
- Instruction *NI = NextDef->getMemoryInst();
- LLVM_DEBUG(dbgs() << " (" << *NI << ")\n");
- ToCheck.insert(NextDef->getDefiningAccess());
+ auto *DeadDefAccess = cast<MemoryDef>(DeadAccess);
+ Instruction *DeadI = DeadDefAccess->getMemoryInst();
+ LLVM_DEBUG(dbgs() << " (" << *DeadI << ")\n");
+ ToCheck.insert(DeadDefAccess->getDefiningAccess());
NumGetDomMemoryDefPassed++;
if (!DebugCounter::shouldExecute(MemorySSACounter))
continue;
- MemoryLocation NILoc = *State.getLocForWriteEx(NI);
+ MemoryLocation DeadLoc = *State.getLocForWriteEx(DeadI);
if (IsMemTerm) {
- const Value *NIUnd = getUnderlyingObject(NILoc.Ptr);
- if (SILocUnd != NIUnd)
+ const Value *DeadUndObj = getUnderlyingObject(DeadLoc.Ptr);
+ if (KillingUndObj != DeadUndObj)
continue;
- LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *NI
- << "\n KILLER: " << *SI << '\n');
- State.deleteDeadInstruction(NI);
+ LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DeadI
+ << "\n KILLER: " << *KillingI << '\n');
+ State.deleteDeadInstruction(DeadI);
++NumFastStores;
MadeChange = true;
} else {
- // Check if NI overwrites SI.
- int64_t InstWriteOffset, DepWriteOffset;
- OverwriteResult OR = State.isOverwrite(SI, NI, SILoc, NILoc,
- DepWriteOffset, InstWriteOffset);
+ // Check if DeadI overwrites KillingI.
+ int64_t KillingOffset = 0;
+ int64_t DeadOffset = 0;
+ OverwriteResult OR = State.isOverwrite(
+ KillingI, DeadI, KillingLoc, DeadLoc, KillingOffset, DeadOffset);
if (OR == OW_MaybePartial) {
auto Iter = State.IOLs.insert(
std::make_pair<BasicBlock *, InstOverlapIntervalsTy>(
- NI->getParent(), InstOverlapIntervalsTy()));
+ DeadI->getParent(), InstOverlapIntervalsTy()));
auto &IOL = Iter.first->second;
- OR = isPartialOverwrite(SILoc, NILoc, DepWriteOffset, InstWriteOffset,
- NI, IOL);
+ OR = isPartialOverwrite(KillingLoc, DeadLoc, KillingOffset,
+ DeadOffset, DeadI, IOL);
}
if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) {
- auto *Earlier = dyn_cast<StoreInst>(NI);
- auto *Later = dyn_cast<StoreInst>(SI);
+ auto *DeadSI = dyn_cast<StoreInst>(DeadI);
+ auto *KillingSI = dyn_cast<StoreInst>(KillingI);
// We are re-using tryToMergePartialOverlappingStores, which requires
- // Earlier to domiante Later.
+ // DeadSI to dominate DeadSI.
// TODO: implement tryToMergeParialOverlappingStores using MemorySSA.
- if (Earlier && Later && DT.dominates(Earlier, Later)) {
+ if (DeadSI && KillingSI && DT.dominates(DeadSI, KillingSI)) {
if (Constant *Merged = tryToMergePartialOverlappingStores(
- Earlier, Later, InstWriteOffset, DepWriteOffset, State.DL,
+ KillingSI, DeadSI, KillingOffset, DeadOffset, State.DL,
State.BatchAA, &DT)) {
// Update stored value of earlier store to merged constant.
- Earlier->setOperand(0, Merged);
+ DeadSI->setOperand(0, Merged);
++NumModifiedStores;
MadeChange = true;
Shortend = true;
- // Remove later store and remove any outstanding overlap intervals
- // for the updated store.
- State.deleteDeadInstruction(Later);
- auto I = State.IOLs.find(Earlier->getParent());
+ // Remove killing store and remove any outstanding overlap
+ // intervals for the updated store.
+ State.deleteDeadInstruction(KillingSI);
+ auto I = State.IOLs.find(DeadSI->getParent());
if (I != State.IOLs.end())
- I->second.erase(Earlier);
+ I->second.erase(DeadSI);
break;
}
}
}
if (OR == OW_Complete) {
- LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *NI
- << "\n KILLER: " << *SI << '\n');
- State.deleteDeadInstruction(NI);
+ LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DeadI
+ << "\n KILLER: " << *KillingI << '\n');
+ State.deleteDeadInstruction(DeadI);
++NumFastStores;
MadeChange = true;
}
@@ -2028,10 +2111,11 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
}
// Check if the store is a no-op.
- if (!Shortend && isRemovable(SI) &&
- State.storeIsNoop(KillingDef, SILoc, SILocUnd)) {
- LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *SI << '\n');
- State.deleteDeadInstruction(SI);
+ if (!Shortend && isRemovable(KillingI) &&
+ State.storeIsNoop(KillingDef, KillingUndObj)) {
+ LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *KillingI
+ << '\n');
+ State.deleteDeadInstruction(KillingI);
NumRedundantStores++;
MadeChange = true;
continue;
@@ -2040,8 +2124,9 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
if (EnablePartialOverwriteTracking)
for (auto &KV : State.IOLs)
- MadeChange |= removePartiallyOverlappedStores(State.DL, KV.second, TLI);
+ MadeChange |= State.removePartiallyOverlappedStores(KV.second);
+ MadeChange |= State.eliminateRedundantStoresOfExistingValues();
MadeChange |= State.eliminateDeadWritesAtEndOfFunction();
return MadeChange;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index 978c6a77b8dc..90f71f7729a7 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -293,7 +293,7 @@ static unsigned getHashValueImpl(SimpleValue Val) {
// TODO: Extend this to handle intrinsics with >2 operands where the 1st
// 2 operands are commutative.
auto *II = dyn_cast<IntrinsicInst>(Inst);
- if (II && II->isCommutative() && II->getNumArgOperands() == 2) {
+ if (II && II->isCommutative() && II->arg_size() == 2) {
Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
if (LHS > RHS)
std::swap(LHS, RHS);
@@ -363,7 +363,7 @@ static bool isEqualImpl(SimpleValue LHS, SimpleValue RHS) {
auto *LII = dyn_cast<IntrinsicInst>(LHSI);
auto *RII = dyn_cast<IntrinsicInst>(RHSI);
if (LII && RII && LII->getIntrinsicID() == RII->getIntrinsicID() &&
- LII->isCommutative() && LII->getNumArgOperands() == 2) {
+ LII->isCommutative() && LII->arg_size() == 2) {
return LII->getArgOperand(0) == RII->getArgOperand(1) &&
LII->getArgOperand(1) == RII->getArgOperand(0);
}
@@ -1265,6 +1265,12 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
continue;
}
+ // Skip pseudoprobe intrinsics, for the same reason as assume intrinsics.
+ if (match(&Inst, m_Intrinsic<Intrinsic::pseudoprobe>())) {
+ LLVM_DEBUG(dbgs() << "EarlyCSE skipping pseudoprobe: " << Inst << '\n');
+ continue;
+ }
+
// We can skip all invariant.start intrinsics since they only read memory,
// and we can forward values across it. For invariant starts without
// invariant ends, we can use the fact that the invariantness never ends to
@@ -1642,6 +1648,16 @@ PreservedAnalyses EarlyCSEPass::run(Function &F,
return PA;
}
+void EarlyCSEPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<EarlyCSEPass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+ OS << "<";
+ if (UseMemorySSA)
+ OS << "memssa";
+ OS << ">";
+}
+
namespace {
/// A simple and fast domtree-based CSE pass.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Float2Int.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Float2Int.cpp
index 8a5d4f568774..a98bb8358aef 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Float2Int.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Float2Int.cpp
@@ -256,7 +256,7 @@ void Float2IntPass::walkForwards() {
Op = [](ArrayRef<ConstantRange> Ops) {
assert(Ops.size() == 1 && "FNeg is a unary operator!");
unsigned Size = Ops[0].getBitWidth();
- auto Zero = ConstantRange(APInt::getNullValue(Size));
+ auto Zero = ConstantRange(APInt::getZero(Size));
return Zero.sub(Ops[0]);
};
break;
@@ -372,7 +372,7 @@ bool Float2IntPass::validateAndTransform() {
// If it does, transformation would be illegal.
//
// Don't count the roots, as they terminate the graphs.
- if (Roots.count(I) == 0) {
+ if (!Roots.contains(I)) {
// Set the type of the conversion while we're here.
if (!ConvertedToTy)
ConvertedToTy = I->getType();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp
index 16368aec7c3f..00506fb86006 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -126,7 +126,7 @@ static cl::opt<uint32_t> MaxBBSpeculations(
"into) when deducing if a value is fully available or not in GVN "
"(default = 600)"));
-struct llvm::GVN::Expression {
+struct llvm::GVNPass::Expression {
uint32_t opcode;
bool commutative = false;
Type *type = nullptr;
@@ -155,17 +155,18 @@ struct llvm::GVN::Expression {
namespace llvm {
-template <> struct DenseMapInfo<GVN::Expression> {
- static inline GVN::Expression getEmptyKey() { return ~0U; }
- static inline GVN::Expression getTombstoneKey() { return ~1U; }
+template <> struct DenseMapInfo<GVNPass::Expression> {
+ static inline GVNPass::Expression getEmptyKey() { return ~0U; }
+ static inline GVNPass::Expression getTombstoneKey() { return ~1U; }
- static unsigned getHashValue(const GVN::Expression &e) {
+ static unsigned getHashValue(const GVNPass::Expression &e) {
using llvm::hash_value;
return static_cast<unsigned>(hash_value(e));
}
- static bool isEqual(const GVN::Expression &LHS, const GVN::Expression &RHS) {
+ static bool isEqual(const GVNPass::Expression &LHS,
+ const GVNPass::Expression &RHS) {
return LHS == RHS;
}
};
@@ -246,7 +247,7 @@ struct llvm::gvn::AvailableValue {
/// Emit code at the specified insertion point to adjust the value defined
/// here to the specified type. This handles various coercion cases.
Value *MaterializeAdjustedValue(LoadInst *Load, Instruction *InsertPt,
- GVN &gvn) const;
+ GVNPass &gvn) const;
};
/// Represents an AvailableValue which can be rematerialized at the end of
@@ -276,7 +277,7 @@ struct llvm::gvn::AvailableValueInBlock {
/// Emit code at the end of this block to adjust the value defined here to
/// the specified type. This handles various coercion cases.
- Value *MaterializeAdjustedValue(LoadInst *Load, GVN &gvn) const {
+ Value *MaterializeAdjustedValue(LoadInst *Load, GVNPass &gvn) const {
return AV.MaterializeAdjustedValue(Load, BB->getTerminator(), gvn);
}
};
@@ -285,7 +286,7 @@ struct llvm::gvn::AvailableValueInBlock {
// ValueTable Internal Functions
//===----------------------------------------------------------------------===//
-GVN::Expression GVN::ValueTable::createExpr(Instruction *I) {
+GVNPass::Expression GVNPass::ValueTable::createExpr(Instruction *I) {
Expression e;
e.type = I->getType();
e.opcode = I->getOpcode();
@@ -330,9 +331,8 @@ GVN::Expression GVN::ValueTable::createExpr(Instruction *I) {
return e;
}
-GVN::Expression GVN::ValueTable::createCmpExpr(unsigned Opcode,
- CmpInst::Predicate Predicate,
- Value *LHS, Value *RHS) {
+GVNPass::Expression GVNPass::ValueTable::createCmpExpr(
+ unsigned Opcode, CmpInst::Predicate Predicate, Value *LHS, Value *RHS) {
assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
"Not a comparison!");
Expression e;
@@ -350,7 +350,8 @@ GVN::Expression GVN::ValueTable::createCmpExpr(unsigned Opcode,
return e;
}
-GVN::Expression GVN::ValueTable::createExtractvalueExpr(ExtractValueInst *EI) {
+GVNPass::Expression
+GVNPass::ValueTable::createExtractvalueExpr(ExtractValueInst *EI) {
assert(EI && "Not an ExtractValueInst?");
Expression e;
e.type = EI->getType();
@@ -382,20 +383,21 @@ GVN::Expression GVN::ValueTable::createExtractvalueExpr(ExtractValueInst *EI) {
// ValueTable External Functions
//===----------------------------------------------------------------------===//
-GVN::ValueTable::ValueTable() = default;
-GVN::ValueTable::ValueTable(const ValueTable &) = default;
-GVN::ValueTable::ValueTable(ValueTable &&) = default;
-GVN::ValueTable::~ValueTable() = default;
-GVN::ValueTable &GVN::ValueTable::operator=(const GVN::ValueTable &Arg) = default;
+GVNPass::ValueTable::ValueTable() = default;
+GVNPass::ValueTable::ValueTable(const ValueTable &) = default;
+GVNPass::ValueTable::ValueTable(ValueTable &&) = default;
+GVNPass::ValueTable::~ValueTable() = default;
+GVNPass::ValueTable &
+GVNPass::ValueTable::operator=(const GVNPass::ValueTable &Arg) = default;
/// add - Insert a value into the table with a specified value number.
-void GVN::ValueTable::add(Value *V, uint32_t num) {
+void GVNPass::ValueTable::add(Value *V, uint32_t num) {
valueNumbering.insert(std::make_pair(V, num));
if (PHINode *PN = dyn_cast<PHINode>(V))
NumberingPhi[num] = PN;
}
-uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) {
+uint32_t GVNPass::ValueTable::lookupOrAddCall(CallInst *C) {
if (AA->doesNotAccessMemory(C)) {
Expression exp = createExpr(C);
uint32_t e = assignExpNewValueNum(exp).first;
@@ -421,13 +423,12 @@ uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) {
// a normal load or store instruction.
CallInst *local_cdep = dyn_cast<CallInst>(local_dep.getInst());
- if (!local_cdep ||
- local_cdep->getNumArgOperands() != C->getNumArgOperands()) {
+ if (!local_cdep || local_cdep->arg_size() != C->arg_size()) {
valueNumbering[C] = nextValueNumber;
return nextValueNumber++;
}
- for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
+ for (unsigned i = 0, e = C->arg_size(); i < e; ++i) {
uint32_t c_vn = lookupOrAdd(C->getArgOperand(i));
uint32_t cd_vn = lookupOrAdd(local_cdep->getArgOperand(i));
if (c_vn != cd_vn) {
@@ -477,11 +478,11 @@ uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) {
return nextValueNumber++;
}
- if (cdep->getNumArgOperands() != C->getNumArgOperands()) {
+ if (cdep->arg_size() != C->arg_size()) {
valueNumbering[C] = nextValueNumber;
return nextValueNumber++;
}
- for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
+ for (unsigned i = 0, e = C->arg_size(); i < e; ++i) {
uint32_t c_vn = lookupOrAdd(C->getArgOperand(i));
uint32_t cd_vn = lookupOrAdd(cdep->getArgOperand(i));
if (c_vn != cd_vn) {
@@ -500,11 +501,13 @@ uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) {
}
/// Returns true if a value number exists for the specified value.
-bool GVN::ValueTable::exists(Value *V) const { return valueNumbering.count(V) != 0; }
+bool GVNPass::ValueTable::exists(Value *V) const {
+ return valueNumbering.count(V) != 0;
+}
/// lookup_or_add - Returns the value number for the specified value, assigning
/// it a new number if it did not have one before.
-uint32_t GVN::ValueTable::lookupOrAdd(Value *V) {
+uint32_t GVNPass::ValueTable::lookupOrAdd(Value *V) {
DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
if (VI != valueNumbering.end())
return VI->second;
@@ -581,7 +584,7 @@ uint32_t GVN::ValueTable::lookupOrAdd(Value *V) {
/// Returns the value number of the specified value. Fails if
/// the value has not yet been numbered.
-uint32_t GVN::ValueTable::lookup(Value *V, bool Verify) const {
+uint32_t GVNPass::ValueTable::lookup(Value *V, bool Verify) const {
DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V);
if (Verify) {
assert(VI != valueNumbering.end() && "Value not numbered?");
@@ -594,15 +597,15 @@ uint32_t GVN::ValueTable::lookup(Value *V, bool Verify) const {
/// assigning it a new number if it did not have one before. Useful when
/// we deduced the result of a comparison, but don't immediately have an
/// instruction realizing that comparison to hand.
-uint32_t GVN::ValueTable::lookupOrAddCmp(unsigned Opcode,
- CmpInst::Predicate Predicate,
- Value *LHS, Value *RHS) {
+uint32_t GVNPass::ValueTable::lookupOrAddCmp(unsigned Opcode,
+ CmpInst::Predicate Predicate,
+ Value *LHS, Value *RHS) {
Expression exp = createCmpExpr(Opcode, Predicate, LHS, RHS);
return assignExpNewValueNum(exp).first;
}
/// Remove all entries from the ValueTable.
-void GVN::ValueTable::clear() {
+void GVNPass::ValueTable::clear() {
valueNumbering.clear();
expressionNumbering.clear();
NumberingPhi.clear();
@@ -614,7 +617,7 @@ void GVN::ValueTable::clear() {
}
/// Remove a value from the value numbering.
-void GVN::ValueTable::erase(Value *V) {
+void GVNPass::ValueTable::erase(Value *V) {
uint32_t Num = valueNumbering.lookup(V);
valueNumbering.erase(V);
// If V is PHINode, V <--> value number is an one-to-one mapping.
@@ -624,7 +627,7 @@ void GVN::ValueTable::erase(Value *V) {
/// verifyRemoved - Verify that the value is removed from all internal data
/// structures.
-void GVN::ValueTable::verifyRemoved(const Value *V) const {
+void GVNPass::ValueTable::verifyRemoved(const Value *V) const {
for (DenseMap<Value*, uint32_t>::const_iterator
I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) {
assert(I->first != V && "Inst still occurs in value numbering map!");
@@ -635,28 +638,28 @@ void GVN::ValueTable::verifyRemoved(const Value *V) const {
// GVN Pass
//===----------------------------------------------------------------------===//
-bool GVN::isPREEnabled() const {
+bool GVNPass::isPREEnabled() const {
return Options.AllowPRE.getValueOr(GVNEnablePRE);
}
-bool GVN::isLoadPREEnabled() const {
+bool GVNPass::isLoadPREEnabled() const {
return Options.AllowLoadPRE.getValueOr(GVNEnableLoadPRE);
}
-bool GVN::isLoadInLoopPREEnabled() const {
+bool GVNPass::isLoadInLoopPREEnabled() const {
return Options.AllowLoadInLoopPRE.getValueOr(GVNEnableLoadInLoopPRE);
}
-bool GVN::isLoadPRESplitBackedgeEnabled() const {
+bool GVNPass::isLoadPRESplitBackedgeEnabled() const {
return Options.AllowLoadPRESplitBackedge.getValueOr(
GVNEnableSplitBackedgeInLoadPRE);
}
-bool GVN::isMemDepEnabled() const {
+bool GVNPass::isMemDepEnabled() const {
return Options.AllowMemDep.getValueOr(GVNEnableMemDep);
}
-PreservedAnalyses GVN::run(Function &F, FunctionAnalysisManager &AM) {
+PreservedAnalyses GVNPass::run(Function &F, FunctionAnalysisManager &AM) {
// FIXME: The order of evaluation of these 'getResult' calls is very
// significant! Re-ordering these variables will cause GVN when run alone to
// be less effective! We should fix memdep and basic-aa to not exhibit this
@@ -684,8 +687,26 @@ PreservedAnalyses GVN::run(Function &F, FunctionAnalysisManager &AM) {
return PA;
}
+void GVNPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<GVNPass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+
+ OS << "<";
+ if (Options.AllowPRE != None)
+ OS << (Options.AllowPRE.getValue() ? "" : "no-") << "pre;";
+ if (Options.AllowLoadPRE != None)
+ OS << (Options.AllowLoadPRE.getValue() ? "" : "no-") << "load-pre;";
+ if (Options.AllowLoadPRESplitBackedge != None)
+ OS << (Options.AllowLoadPRESplitBackedge.getValue() ? "" : "no-")
+ << "split-backedge-load-pre;";
+ if (Options.AllowMemDep != None)
+ OS << (Options.AllowMemDep.getValue() ? "" : "no-") << "memdep";
+ OS << ">";
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD void GVN::dump(DenseMap<uint32_t, Value*>& d) const {
+LLVM_DUMP_METHOD void GVNPass::dump(DenseMap<uint32_t, Value *> &d) const {
errs() << "{\n";
for (auto &I : d) {
errs() << I.first << "\n";
@@ -835,7 +856,7 @@ static bool IsValueFullyAvailableInBlock(
static Value *
ConstructSSAForLoadSet(LoadInst *Load,
SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock,
- GVN &gvn) {
+ GVNPass &gvn) {
// Check for the fully redundant, dominating load case. In this case, we can
// just use the dominating value directly.
if (ValuesPerBlock.size() == 1 &&
@@ -878,7 +899,7 @@ ConstructSSAForLoadSet(LoadInst *Load,
Value *AvailableValue::MaterializeAdjustedValue(LoadInst *Load,
Instruction *InsertPt,
- GVN &gvn) const {
+ GVNPass &gvn) const {
Value *Res;
Type *LoadTy = Load->getType();
const DataLayout &DL = Load->getModule()->getDataLayout();
@@ -1002,8 +1023,8 @@ static void reportMayClobberedLoad(LoadInst *Load, MemDepResult DepInfo,
ORE->emit(R);
}
-bool GVN::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo,
- Value *Address, AvailableValue &Res) {
+bool GVNPass::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo,
+ Value *Address, AvailableValue &Res) {
assert((DepInfo.isDef() || DepInfo.isClobber()) &&
"expected a local dependence");
assert(Load->isUnordered() && "rules below are incorrect for ordered access");
@@ -1137,9 +1158,9 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo,
return false;
}
-void GVN::AnalyzeLoadAvailability(LoadInst *Load, LoadDepVect &Deps,
- AvailValInBlkVect &ValuesPerBlock,
- UnavailBlkVect &UnavailableBlocks) {
+void GVNPass::AnalyzeLoadAvailability(LoadInst *Load, LoadDepVect &Deps,
+ AvailValInBlkVect &ValuesPerBlock,
+ UnavailBlkVect &UnavailableBlocks) {
// Filter out useless results (non-locals, etc). Keep track of the blocks
// where we have a value available in repl, also keep track of whether we see
// dependencies that produce an unknown value for the load (such as a call
@@ -1182,7 +1203,7 @@ void GVN::AnalyzeLoadAvailability(LoadInst *Load, LoadDepVect &Deps,
"post condition violation");
}
-void GVN::eliminatePartiallyRedundantLoad(
+void GVNPass::eliminatePartiallyRedundantLoad(
LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
MapVector<BasicBlock *, Value *> &AvailableLoads) {
for (const auto &AvailableLoad : AvailableLoads) {
@@ -1212,8 +1233,7 @@ void GVN::eliminatePartiallyRedundantLoad(
}
// Transfer the old load's AA tags to the new load.
- AAMDNodes Tags;
- Load->getAAMetadata(Tags);
+ AAMDNodes Tags = Load->getAAMetadata();
if (Tags)
NewLoad->setAAMetadata(Tags);
@@ -1257,8 +1277,8 @@ void GVN::eliminatePartiallyRedundantLoad(
});
}
-bool GVN::PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
- UnavailBlkVect &UnavailableBlocks) {
+bool GVNPass::PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
+ UnavailBlkVect &UnavailableBlocks) {
// Okay, we have *some* definitions of the value. This means that the value
// is available in some of our (transitive) predecessors. Lets think about
// doing PRE of this load. This will involve inserting a new load into the
@@ -1498,8 +1518,9 @@ bool GVN::PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
return true;
}
-bool GVN::performLoopLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
- UnavailBlkVect &UnavailableBlocks) {
+bool GVNPass::performLoopLoadPRE(LoadInst *Load,
+ AvailValInBlkVect &ValuesPerBlock,
+ UnavailBlkVect &UnavailableBlocks) {
if (!LI)
return false;
@@ -1590,7 +1611,7 @@ static void reportLoadElim(LoadInst *Load, Value *AvailableValue,
/// Attempt to eliminate a load whose dependencies are
/// non-local by performing PHI construction.
-bool GVN::processNonLocalLoad(LoadInst *Load) {
+bool GVNPass::processNonLocalLoad(LoadInst *Load) {
// non-local speculations are not allowed under asan.
if (Load->getParent()->getParent()->hasFnAttribute(
Attribute::SanitizeAddress) ||
@@ -1622,10 +1643,8 @@ bool GVN::processNonLocalLoad(LoadInst *Load) {
// If this load follows a GEP, see if we can PRE the indices before analyzing.
if (GetElementPtrInst *GEP =
dyn_cast<GetElementPtrInst>(Load->getOperand(0))) {
- for (GetElementPtrInst::op_iterator OI = GEP->idx_begin(),
- OE = GEP->idx_end();
- OI != OE; ++OI)
- if (Instruction *I = dyn_cast<Instruction>(OI->get()))
+ for (Use &U : GEP->indices())
+ if (Instruction *I = dyn_cast<Instruction>(U.get()))
Changed |= performScalarPRE(I);
}
@@ -1673,8 +1692,11 @@ bool GVN::processNonLocalLoad(LoadInst *Load) {
if (!isLoadInLoopPREEnabled() && LI && LI->getLoopFor(Load->getParent()))
return Changed;
- return Changed || PerformLoadPRE(Load, ValuesPerBlock, UnavailableBlocks) ||
- performLoopLoadPRE(Load, ValuesPerBlock, UnavailableBlocks);
+ if (performLoopLoadPRE(Load, ValuesPerBlock, UnavailableBlocks) ||
+ PerformLoadPRE(Load, ValuesPerBlock, UnavailableBlocks))
+ return true;
+
+ return Changed;
}
static bool impliesEquivalanceIfTrue(CmpInst* Cmp) {
@@ -1738,7 +1760,7 @@ static bool hasUsersIn(Value *V, BasicBlock *BB) {
return false;
}
-bool GVN::processAssumeIntrinsic(AssumeInst *IntrinsicI) {
+bool GVNPass::processAssumeIntrinsic(AssumeInst *IntrinsicI) {
Value *V = IntrinsicI->getArgOperand(0);
if (ConstantInt *Cond = dyn_cast<ConstantInt>(V)) {
@@ -1882,7 +1904,7 @@ static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) {
/// Attempt to eliminate a load, first by eliminating it
/// locally, and then attempting non-local elimination if that fails.
-bool GVN::processLoad(LoadInst *L) {
+bool GVNPass::processLoad(LoadInst *L) {
if (!MD)
return false;
@@ -1936,7 +1958,7 @@ bool GVN::processLoad(LoadInst *L) {
/// Return a pair the first field showing the value number of \p Exp and the
/// second field showing whether it is a value number newly created.
std::pair<uint32_t, bool>
-GVN::ValueTable::assignExpNewValueNum(Expression &Exp) {
+GVNPass::ValueTable::assignExpNewValueNum(Expression &Exp) {
uint32_t &e = expressionNumbering[Exp];
bool CreateNewValNum = !e;
if (CreateNewValNum) {
@@ -1951,8 +1973,8 @@ GVN::ValueTable::assignExpNewValueNum(Expression &Exp) {
/// Return whether all the values related with the same \p num are
/// defined in \p BB.
-bool GVN::ValueTable::areAllValsInBB(uint32_t Num, const BasicBlock *BB,
- GVN &Gvn) {
+bool GVNPass::ValueTable::areAllValsInBB(uint32_t Num, const BasicBlock *BB,
+ GVNPass &Gvn) {
LeaderTableEntry *Vals = &Gvn.LeaderTable[Num];
while (Vals && Vals->BB == BB)
Vals = Vals->Next;
@@ -1960,9 +1982,9 @@ bool GVN::ValueTable::areAllValsInBB(uint32_t Num, const BasicBlock *BB,
}
/// Wrap phiTranslateImpl to provide caching functionality.
-uint32_t GVN::ValueTable::phiTranslate(const BasicBlock *Pred,
- const BasicBlock *PhiBlock, uint32_t Num,
- GVN &Gvn) {
+uint32_t GVNPass::ValueTable::phiTranslate(const BasicBlock *Pred,
+ const BasicBlock *PhiBlock,
+ uint32_t Num, GVNPass &Gvn) {
auto FindRes = PhiTranslateTable.find({Num, Pred});
if (FindRes != PhiTranslateTable.end())
return FindRes->second;
@@ -1973,9 +1995,10 @@ uint32_t GVN::ValueTable::phiTranslate(const BasicBlock *Pred,
// Return true if the value number \p Num and NewNum have equal value.
// Return false if the result is unknown.
-bool GVN::ValueTable::areCallValsEqual(uint32_t Num, uint32_t NewNum,
- const BasicBlock *Pred,
- const BasicBlock *PhiBlock, GVN &Gvn) {
+bool GVNPass::ValueTable::areCallValsEqual(uint32_t Num, uint32_t NewNum,
+ const BasicBlock *Pred,
+ const BasicBlock *PhiBlock,
+ GVNPass &Gvn) {
CallInst *Call = nullptr;
LeaderTableEntry *Vals = &Gvn.LeaderTable[Num];
while (Vals) {
@@ -2008,9 +2031,9 @@ bool GVN::ValueTable::areCallValsEqual(uint32_t Num, uint32_t NewNum,
/// Translate value number \p Num using phis, so that it has the values of
/// the phis in BB.
-uint32_t GVN::ValueTable::phiTranslateImpl(const BasicBlock *Pred,
- const BasicBlock *PhiBlock,
- uint32_t Num, GVN &Gvn) {
+uint32_t GVNPass::ValueTable::phiTranslateImpl(const BasicBlock *Pred,
+ const BasicBlock *PhiBlock,
+ uint32_t Num, GVNPass &Gvn) {
if (PHINode *PN = NumberingPhi[Num]) {
for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {
if (PN->getParent() == PhiBlock && PN->getIncomingBlock(i) == Pred)
@@ -2063,8 +2086,8 @@ uint32_t GVN::ValueTable::phiTranslateImpl(const BasicBlock *Pred,
/// Erase stale entry from phiTranslate cache so phiTranslate can be computed
/// again.
-void GVN::ValueTable::eraseTranslateCacheEntry(uint32_t Num,
- const BasicBlock &CurrBlock) {
+void GVNPass::ValueTable::eraseTranslateCacheEntry(
+ uint32_t Num, const BasicBlock &CurrBlock) {
for (const BasicBlock *Pred : predecessors(&CurrBlock))
PhiTranslateTable.erase({Num, Pred});
}
@@ -2074,7 +2097,7 @@ void GVN::ValueTable::eraseTranslateCacheEntry(uint32_t Num,
// and then scan the list to find one whose block dominates the block in
// question. This is fast because dominator tree queries consist of only
// a few comparisons of DFS numbers.
-Value *GVN::findLeader(const BasicBlock *BB, uint32_t num) {
+Value *GVNPass::findLeader(const BasicBlock *BB, uint32_t num) {
LeaderTableEntry Vals = LeaderTable[num];
if (!Vals.Val) return nullptr;
@@ -2113,7 +2136,7 @@ static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E,
return Pred != nullptr;
}
-void GVN::assignBlockRPONumber(Function &F) {
+void GVNPass::assignBlockRPONumber(Function &F) {
BlockRPONumber.clear();
uint32_t NextBlockNumber = 1;
ReversePostOrderTraversal<Function *> RPOT(&F);
@@ -2122,7 +2145,7 @@ void GVN::assignBlockRPONumber(Function &F) {
InvalidBlockRPONumbers = false;
}
-bool GVN::replaceOperandsForInBlockEquality(Instruction *Instr) const {
+bool GVNPass::replaceOperandsForInBlockEquality(Instruction *Instr) const {
bool Changed = false;
for (unsigned OpNum = 0; OpNum < Instr->getNumOperands(); ++OpNum) {
Value *Operand = Instr->getOperand(OpNum);
@@ -2142,8 +2165,9 @@ bool GVN::replaceOperandsForInBlockEquality(Instruction *Instr) const {
/// 'RHS' everywhere in the scope. Returns whether a change was made.
/// If DominatesByEdge is false, then it means that we will propagate the RHS
/// value starting from the end of Root.Start.
-bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root,
- bool DominatesByEdge) {
+bool GVNPass::propagateEquality(Value *LHS, Value *RHS,
+ const BasicBlockEdge &Root,
+ bool DominatesByEdge) {
SmallVector<std::pair<Value*, Value*>, 4> Worklist;
Worklist.push_back(std::make_pair(LHS, RHS));
bool Changed = false;
@@ -2291,7 +2315,7 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root,
/// When calculating availability, handle an instruction
/// by inserting it into the appropriate sets
-bool GVN::processInstruction(Instruction *I) {
+bool GVNPass::processInstruction(Instruction *I) {
// Ignore dbg info intrinsics.
if (isa<DbgInfoIntrinsic>(I))
return false;
@@ -2432,10 +2456,10 @@ bool GVN::processInstruction(Instruction *I) {
}
/// runOnFunction - This is the main transformation entry point for a function.
-bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
- const TargetLibraryInfo &RunTLI, AAResults &RunAA,
- MemoryDependenceResults *RunMD, LoopInfo *LI,
- OptimizationRemarkEmitter *RunORE, MemorySSA *MSSA) {
+bool GVNPass::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
+ const TargetLibraryInfo &RunTLI, AAResults &RunAA,
+ MemoryDependenceResults *RunMD, LoopInfo *LI,
+ OptimizationRemarkEmitter *RunORE, MemorySSA *MSSA) {
AC = &RunAC;
DT = &RunDT;
VN.setDomTree(DT);
@@ -2457,10 +2481,8 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
// Merge unconditional branches, allowing PRE to catch more
// optimization opportunities.
- for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) {
- BasicBlock *BB = &*FI++;
-
- bool removedBlock = MergeBlockIntoPredecessor(BB, &DTU, LI, MSSAU, MD);
+ for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
+ bool removedBlock = MergeBlockIntoPredecessor(&BB, &DTU, LI, MSSAU, MD);
if (removedBlock)
++NumGVNBlocks;
@@ -2502,7 +2524,7 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
return Changed;
}
-bool GVN::processBlock(BasicBlock *BB) {
+bool GVNPass::processBlock(BasicBlock *BB) {
// FIXME: Kill off InstrsToErase by doing erasing eagerly in a helper function
// (and incrementing BI before processing an instruction).
assert(InstrsToErase.empty() &&
@@ -2563,8 +2585,8 @@ bool GVN::processBlock(BasicBlock *BB) {
}
// Instantiate an expression in a predecessor that lacked it.
-bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
- BasicBlock *Curr, unsigned int ValNo) {
+bool GVNPass::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
+ BasicBlock *Curr, unsigned int ValNo) {
// Because we are going top-down through the block, all value numbers
// will be available in the predecessor by the time we need them. Any
// that weren't originally present will have been instantiated earlier
@@ -2612,7 +2634,7 @@ bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
return true;
}
-bool GVN::performScalarPRE(Instruction *CurInst) {
+bool GVNPass::performScalarPRE(Instruction *CurInst) {
if (isa<AllocaInst>(CurInst) || CurInst->isTerminator() ||
isa<PHINode>(CurInst) || CurInst->getType()->isVoidTy() ||
CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
@@ -2797,7 +2819,7 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
/// Perform a purely local form of PRE that looks for diamond
/// control flow patterns and attempts to perform simple PRE at the join point.
-bool GVN::performPRE(Function &F) {
+bool GVNPass::performPRE(Function &F) {
bool Changed = false;
for (BasicBlock *CurrentBlock : depth_first(&F.getEntryBlock())) {
// Nothing to PRE in the entry block.
@@ -2824,7 +2846,7 @@ bool GVN::performPRE(Function &F) {
/// Split the critical edge connecting the given two blocks, and return
/// the block inserted to the critical edge.
-BasicBlock *GVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) {
+BasicBlock *GVNPass::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) {
// GVN does not require loop-simplify, do not try to preserve it if it is not
// possible.
BasicBlock *BB = SplitCriticalEdge(
@@ -2840,7 +2862,7 @@ BasicBlock *GVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) {
/// Split critical edges found during the previous
/// iteration that may enable further optimization.
-bool GVN::splitCriticalEdges() {
+bool GVNPass::splitCriticalEdges() {
if (toSplit.empty())
return false;
@@ -2860,7 +2882,7 @@ bool GVN::splitCriticalEdges() {
}
/// Executes one iteration of GVN
-bool GVN::iterateOnFunction(Function &F) {
+bool GVNPass::iterateOnFunction(Function &F) {
cleanupGlobalSets();
// Top-down walk of the dominator tree
@@ -2876,7 +2898,7 @@ bool GVN::iterateOnFunction(Function &F) {
return Changed;
}
-void GVN::cleanupGlobalSets() {
+void GVNPass::cleanupGlobalSets() {
VN.clear();
LeaderTable.clear();
BlockRPONumber.clear();
@@ -2887,7 +2909,7 @@ void GVN::cleanupGlobalSets() {
/// Verify that the specified instruction does not occur in our
/// internal data structures.
-void GVN::verifyRemoved(const Instruction *Inst) const {
+void GVNPass::verifyRemoved(const Instruction *Inst) const {
VN.verifyRemoved(Inst);
// Walk through the value number scope to make sure the instruction isn't
@@ -2907,7 +2929,7 @@ void GVN::verifyRemoved(const Instruction *Inst) const {
/// function is to add all these blocks to "DeadBlocks". For the dead blocks'
/// live successors, update their phi nodes by replacing the operands
/// corresponding to dead blocks with UndefVal.
-void GVN::addDeadBlock(BasicBlock *BB) {
+void GVNPass::addDeadBlock(BasicBlock *BB) {
SmallVector<BasicBlock *, 4> NewDead;
SmallSetVector<BasicBlock *, 4> DF;
@@ -2995,7 +3017,7 @@ void GVN::addDeadBlock(BasicBlock *BB) {
// dead blocks with "UndefVal" in an hope these PHIs will optimized away.
//
// Return true iff *NEW* dead code are found.
-bool GVN::processFoldableCondBr(BranchInst *BI) {
+bool GVNPass::processFoldableCondBr(BranchInst *BI) {
if (!BI || BI->isUnconditional())
return false;
@@ -3023,7 +3045,7 @@ bool GVN::processFoldableCondBr(BranchInst *BI) {
// associated val-num. As it normally has far more live instructions than dead
// instructions, it makes more sense just to "fabricate" a val-number for the
// dead code than checking if instruction involved is dead or not.
-void GVN::assignValNumForDeadCode() {
+void GVNPass::assignValNumForDeadCode() {
for (BasicBlock *BB : DeadBlocks) {
for (Instruction &Inst : *BB) {
unsigned ValNum = VN.lookupOrAdd(&Inst);
@@ -3078,7 +3100,7 @@ public:
}
private:
- GVN Impl;
+ GVNPass Impl;
};
char GVNLegacyPass::ID = 0;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNHoist.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNHoist.cpp
index 790d71992da4..fdc3afd9348a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNHoist.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNHoist.cpp
@@ -169,7 +169,7 @@ class InsnInfo {
public:
// Inserts I and its value number in VNtoScalars.
- void insert(Instruction *I, GVN::ValueTable &VN) {
+ void insert(Instruction *I, GVNPass::ValueTable &VN) {
// Scalar instruction.
unsigned V = VN.lookupOrAdd(I);
VNtoScalars[{V, InvalidVN}].push_back(I);
@@ -184,7 +184,7 @@ class LoadInfo {
public:
// Insert Load and the value number of its memory address in VNtoLoads.
- void insert(LoadInst *Load, GVN::ValueTable &VN) {
+ void insert(LoadInst *Load, GVNPass::ValueTable &VN) {
if (Load->isSimple()) {
unsigned V = VN.lookupOrAdd(Load->getPointerOperand());
VNtoLoads[{V, InvalidVN}].push_back(Load);
@@ -201,7 +201,7 @@ class StoreInfo {
public:
// Insert the Store and a hash number of the store address and the stored
// value in VNtoStores.
- void insert(StoreInst *Store, GVN::ValueTable &VN) {
+ void insert(StoreInst *Store, GVNPass::ValueTable &VN) {
if (!Store->isSimple())
return;
// Hash the store address and the stored value.
@@ -221,7 +221,7 @@ class CallInfo {
public:
// Insert Call and its value numbering in one of the VNtoCalls* containers.
- void insert(CallInst *Call, GVN::ValueTable &VN) {
+ void insert(CallInst *Call, GVNPass::ValueTable &VN) {
// A call that doesNotAccessMemory is handled as a Scalar,
// onlyReadsMemory will be handled as a Load instruction,
// all other calls will be handled as stores.
@@ -274,7 +274,7 @@ public:
unsigned int rank(const Value *V) const;
private:
- GVN::ValueTable VN;
+ GVNPass::ValueTable VN;
DominatorTree *DT;
PostDominatorTree *PDT;
AliasAnalysis *AA;
@@ -377,12 +377,12 @@ private:
if (!Root)
return;
// Depth first walk on PDom tree to fill the CHIargs at each PDF.
- RenameStackType RenameStack;
for (auto Node : depth_first(Root)) {
BasicBlock *BB = Node->getBlock();
if (!BB)
continue;
+ RenameStackType RenameStack;
// Collect all values in BB and push to stack.
fillRenameStack(BB, ValueBBs, RenameStack);
@@ -827,6 +827,8 @@ void GVNHoist::fillRenameStack(BasicBlock *BB, InValuesType &ValueBBs,
auto it1 = ValueBBs.find(BB);
if (it1 != ValueBBs.end()) {
// Iterate in reverse order to keep lower ranked values on the top.
+ LLVM_DEBUG(dbgs() << "\nVisiting: " << BB->getName()
+ << " for pushing instructions on stack";);
for (std::pair<VNType, Instruction *> &VI : reverse(it1->second)) {
// Get the value of instruction I
LLVM_DEBUG(dbgs() << "\nPushing on stack: " << *VI.second);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GuardWidening.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GuardWidening.cpp
index 61eb4ce0ed46..82b81003ef21 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GuardWidening.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GuardWidening.cpp
@@ -46,6 +46,7 @@
#include "llvm/Analysis/GuardUtils.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/ConstantRange.h"
@@ -105,8 +106,10 @@ static void setCondition(Instruction *I, Value *NewCond) {
}
// Eliminates the guard instruction properly.
-static void eliminateGuard(Instruction *GuardInst) {
+static void eliminateGuard(Instruction *GuardInst, MemorySSAUpdater *MSSAU) {
GuardInst->eraseFromParent();
+ if (MSSAU)
+ MSSAU->removeMemoryAccess(GuardInst);
++GuardsEliminated;
}
@@ -114,6 +117,7 @@ class GuardWideningImpl {
DominatorTree &DT;
PostDominatorTree *PDT;
LoopInfo &LI;
+ MemorySSAUpdater *MSSAU;
/// Together, these describe the region of interest. This might be all of
/// the blocks within a function, or only a given loop's blocks and preheader.
@@ -269,12 +273,12 @@ class GuardWideningImpl {
}
public:
-
explicit GuardWideningImpl(DominatorTree &DT, PostDominatorTree *PDT,
- LoopInfo &LI, DomTreeNode *Root,
+ LoopInfo &LI, MemorySSAUpdater *MSSAU,
+ DomTreeNode *Root,
std::function<bool(BasicBlock*)> BlockFilter)
- : DT(DT), PDT(PDT), LI(LI), Root(Root), BlockFilter(BlockFilter)
- {}
+ : DT(DT), PDT(PDT), LI(LI), MSSAU(MSSAU), Root(Root),
+ BlockFilter(BlockFilter) {}
/// The entry point for this pass.
bool run();
@@ -313,7 +317,7 @@ bool GuardWideningImpl::run() {
if (!WidenedGuards.count(I)) {
assert(isa<ConstantInt>(getCondition(I)) && "Should be!");
if (isSupportedGuardInstruction(I))
- eliminateGuard(I);
+ eliminateGuard(I, MSSAU);
else {
assert(isa<BranchInst>(I) &&
"Eliminated something other than guard or branch?");
@@ -514,27 +518,20 @@ bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1,
ConstantRange CR1 =
ConstantRange::makeExactICmpRegion(Pred1, RHS1->getValue());
- // SubsetIntersect is a subset of the actual mathematical intersection of
- // CR0 and CR1, while SupersetIntersect is a superset of the actual
- // mathematical intersection. If these two ConstantRanges are equal, then
- // we know we were able to represent the actual mathematical intersection
- // of CR0 and CR1, and can use the same to generate an icmp instruction.
- //
// Given what we're doing here and the semantics of guards, it would
- // actually be correct to just use SubsetIntersect, but that may be too
+ // be correct to use a subset intersection, but that may be too
// aggressive in cases we care about.
- auto SubsetIntersect = CR0.inverse().unionWith(CR1.inverse()).inverse();
- auto SupersetIntersect = CR0.intersectWith(CR1);
-
- APInt NewRHSAP;
- CmpInst::Predicate Pred;
- if (SubsetIntersect == SupersetIntersect &&
- SubsetIntersect.getEquivalentICmp(Pred, NewRHSAP)) {
- if (InsertPt) {
- ConstantInt *NewRHS = ConstantInt::get(Cond0->getContext(), NewRHSAP);
- Result = new ICmpInst(InsertPt, Pred, LHS, NewRHS, "wide.chk");
+ if (Optional<ConstantRange> Intersect = CR0.exactIntersectWith(CR1)) {
+ APInt NewRHSAP;
+ CmpInst::Predicate Pred;
+ if (Intersect->getEquivalentICmp(Pred, NewRHSAP)) {
+ if (InsertPt) {
+ ConstantInt *NewRHS =
+ ConstantInt::get(Cond0->getContext(), NewRHSAP);
+ Result = new ICmpInst(InsertPt, Pred, LHS, NewRHS, "wide.chk");
+ }
+ return true;
}
- return true;
}
}
}
@@ -766,12 +763,18 @@ PreservedAnalyses GuardWideningPass::run(Function &F,
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &LI = AM.getResult<LoopAnalysis>(F);
auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
- if (!GuardWideningImpl(DT, &PDT, LI, DT.getRootNode(),
- [](BasicBlock*) { return true; } ).run())
+ auto *MSSAA = AM.getCachedResult<MemorySSAAnalysis>(F);
+ std::unique_ptr<MemorySSAUpdater> MSSAU;
+ if (MSSAA)
+ MSSAU = std::make_unique<MemorySSAUpdater>(&MSSAA->getMSSA());
+ if (!GuardWideningImpl(DT, &PDT, LI, MSSAU ? MSSAU.get() : nullptr,
+ DT.getRootNode(), [](BasicBlock *) { return true; })
+ .run())
return PreservedAnalyses::all();
PreservedAnalyses PA;
PA.preserveSet<CFGAnalyses>();
+ PA.preserve<MemorySSAAnalysis>();
return PA;
}
@@ -784,11 +787,17 @@ PreservedAnalyses GuardWideningPass::run(Loop &L, LoopAnalysisManager &AM,
auto BlockFilter = [&](BasicBlock *BB) {
return BB == RootBB || L.contains(BB);
};
- if (!GuardWideningImpl(AR.DT, nullptr, AR.LI, AR.DT.getNode(RootBB),
- BlockFilter).run())
+ std::unique_ptr<MemorySSAUpdater> MSSAU;
+ if (AR.MSSA)
+ MSSAU = std::make_unique<MemorySSAUpdater>(AR.MSSA);
+ if (!GuardWideningImpl(AR.DT, nullptr, AR.LI, MSSAU ? MSSAU.get() : nullptr,
+ AR.DT.getNode(RootBB), BlockFilter).run())
return PreservedAnalyses::all();
- return getLoopPassPreservedAnalyses();
+ auto PA = getLoopPassPreservedAnalyses();
+ if (AR.MSSA)
+ PA.preserve<MemorySSAAnalysis>();
+ return PA;
}
namespace {
@@ -805,8 +814,14 @@ struct GuardWideningLegacyPass : public FunctionPass {
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
- return GuardWideningImpl(DT, &PDT, LI, DT.getRootNode(),
- [](BasicBlock*) { return true; } ).run();
+ auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>();
+ std::unique_ptr<MemorySSAUpdater> MSSAU;
+ if (MSSAWP)
+ MSSAU = std::make_unique<MemorySSAUpdater>(&MSSAWP->getMSSA());
+ return GuardWideningImpl(DT, &PDT, LI, MSSAU ? MSSAU.get() : nullptr,
+ DT.getRootNode(),
+ [](BasicBlock *) { return true; })
+ .run();
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -814,6 +829,7 @@ struct GuardWideningLegacyPass : public FunctionPass {
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<PostDominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
}
};
@@ -833,13 +849,18 @@ struct LoopGuardWideningLegacyPass : public LoopPass {
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
auto *PDTWP = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>();
auto *PDT = PDTWP ? &PDTWP->getPostDomTree() : nullptr;
+ auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>();
+ std::unique_ptr<MemorySSAUpdater> MSSAU;
+ if (MSSAWP)
+ MSSAU = std::make_unique<MemorySSAUpdater>(&MSSAWP->getMSSA());
+
BasicBlock *RootBB = L->getLoopPredecessor();
if (!RootBB)
RootBB = L->getHeader();
auto BlockFilter = [&](BasicBlock *BB) {
return BB == RootBB || L->contains(BB);
};
- return GuardWideningImpl(DT, PDT, LI,
+ return GuardWideningImpl(DT, PDT, LI, MSSAU ? MSSAU.get() : nullptr,
DT.getNode(RootBB), BlockFilter).run();
}
@@ -847,6 +868,7 @@ struct LoopGuardWideningLegacyPass : public LoopPass {
AU.setPreservesCFG();
getLoopAnalysisUsage(AU);
AU.addPreserved<PostDominatorTreeWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
}
};
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 9ee2a2d0bf08..ae2fe2767074 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -89,6 +89,7 @@
#include <utility>
using namespace llvm;
+using namespace PatternMatch;
#define DEBUG_TYPE "indvars"
@@ -155,6 +156,10 @@ class IndVarSimplify {
bool rewriteNonIntegerIVs(Loop *L);
bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
+ /// Try to improve our exit conditions by converting condition from signed
+ /// to unsigned or rotating computation out of the loop.
+ /// (See inline comment about why this is duplicated from simplifyAndExtend)
+ bool canonicalizeExitCondition(Loop *L);
/// Try to eliminate loop exits based on analyzeable exit counts
bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
/// Try to form loop invariant tests for loop exits by changing how many
@@ -494,6 +499,7 @@ bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
MadeAnyChanges = true;
PN.setIncomingValue(IncomingValIdx,
ExitVal->getIncomingValue(PreheaderIdx));
+ SE->forgetValue(&PN);
}
}
}
@@ -541,18 +547,18 @@ static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
return;
}
- if (!WI.WidestNativeType) {
+ if (!WI.WidestNativeType ||
+ Width > SE->getTypeSizeInBits(WI.WidestNativeType)) {
WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
WI.IsSigned = IsSigned;
return;
}
- // We extend the IV to satisfy the sign of its first user, arbitrarily.
- if (WI.IsSigned != IsSigned)
- return;
-
- if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
- WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
+ // We extend the IV to satisfy the sign of its user(s), or 'signed'
+ // if there are multiple users with both sign- and zero extensions,
+ // in order not to introduce nondeterministic behaviour based on the
+ // unspecified order of a PHI nodes' users-iterator.
+ WI.IsSigned |= IsSigned;
}
//===----------------------------------------------------------------------===//
@@ -1274,9 +1280,9 @@ bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
// Skip debug info intrinsics.
do {
--I;
- } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
+ } while (I->isDebugOrPseudoInst() && I != Preheader->begin());
- if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
+ if (I->isDebugOrPseudoInst() && I == Preheader->begin())
Done = true;
} else {
Done = true;
@@ -1309,6 +1315,18 @@ static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
replaceExitCond(BI, NewCond, DeadInsts);
}
+static void replaceLoopPHINodesWithPreheaderValues(
+ Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
+ assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
+ auto *LoopPreheader = L->getLoopPreheader();
+ auto *LoopHeader = L->getHeader();
+ for (auto &PN : LoopHeader->phis()) {
+ auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader);
+ PN.replaceAllUsesWith(PreheaderIncoming);
+ DeadInsts.emplace_back(&PN);
+ }
+}
+
static void replaceWithInvariantCond(
const Loop *L, BasicBlock *ExitingBB, ICmpInst::Predicate InvariantPred,
const SCEV *InvariantLHS, const SCEV *InvariantRHS, SCEVExpander &Rewriter,
@@ -1333,7 +1351,6 @@ static bool optimizeLoopExitWithUnknownExitCount(
SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
ICmpInst::Predicate Pred;
Value *LHS, *RHS;
- using namespace PatternMatch;
BasicBlock *TrueSucc, *FalseSucc;
if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc))))
@@ -1394,6 +1411,140 @@ static bool optimizeLoopExitWithUnknownExitCount(
return true;
}
+bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
+ // Note: This is duplicating a particular part on SimplifyIndVars reasoning.
+ // We need to duplicate it because given icmp zext(small-iv), C, IVUsers
+ // never reaches the icmp since the zext doesn't fold to an AddRec unless
+ // it already has flags. The alternative to this would be to extending the
+ // set of "interesting" IV users to include the icmp, but doing that
+ // regresses results in practice by querying SCEVs before trip counts which
+ // rely on them which results in SCEV caching sub-optimal answers. The
+ // concern about caching sub-optimal results is why we only query SCEVs of
+ // the loop invariant RHS here.
+ SmallVector<BasicBlock*, 16> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+ bool Changed = false;
+ for (auto *ExitingBB : ExitingBlocks) {
+ auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
+ if (!BI)
+ continue;
+ assert(BI->isConditional() && "exit branch must be conditional");
+
+ auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
+ if (!ICmp || !ICmp->hasOneUse())
+ continue;
+
+ auto *LHS = ICmp->getOperand(0);
+ auto *RHS = ICmp->getOperand(1);
+ // For the range reasoning, avoid computing SCEVs in the loop to avoid
+ // poisoning cache with sub-optimal results. For the must-execute case,
+ // this is a neccessary precondition for correctness.
+ if (!L->isLoopInvariant(RHS)) {
+ if (!L->isLoopInvariant(LHS))
+ continue;
+ // Same logic applies for the inverse case
+ std::swap(LHS, RHS);
+ }
+
+ // Match (icmp signed-cond zext, RHS)
+ Value *LHSOp = nullptr;
+ if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned())
+ continue;
+
+ const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
+ const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
+ const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
+ auto FullCR = ConstantRange::getFull(InnerBitWidth);
+ FullCR = FullCR.zeroExtend(OuterBitWidth);
+ auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
+ if (FullCR.contains(RHSCR)) {
+ // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
+ // replace the signed condition with the unsigned version.
+ ICmp->setPredicate(ICmp->getUnsignedPredicate());
+ Changed = true;
+ // Note: No SCEV invalidation needed. We've changed the predicate, but
+ // have not changed exit counts, or the values produced by the compare.
+ continue;
+ }
+ }
+
+ // Now that we've canonicalized the condition to match the extend,
+ // see if we can rotate the extend out of the loop.
+ for (auto *ExitingBB : ExitingBlocks) {
+ auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
+ if (!BI)
+ continue;
+ assert(BI->isConditional() && "exit branch must be conditional");
+
+ auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
+ if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned())
+ continue;
+
+ bool Swapped = false;
+ auto *LHS = ICmp->getOperand(0);
+ auto *RHS = ICmp->getOperand(1);
+ if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS))
+ // Nothing to rotate
+ continue;
+ if (L->isLoopInvariant(LHS)) {
+ // Same logic applies for the inverse case until we actually pick
+ // which operand of the compare to update.
+ Swapped = true;
+ std::swap(LHS, RHS);
+ }
+ assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS));
+
+ // Match (icmp unsigned-cond zext, RHS)
+ // TODO: Extend to handle corresponding sext/signed-cmp case
+ // TODO: Extend to other invertible functions
+ Value *LHSOp = nullptr;
+ if (!match(LHS, m_ZExt(m_Value(LHSOp))))
+ continue;
+
+ // In general, we only rotate if we can do so without increasing the number
+ // of instructions. The exception is when we have an zext(add-rec). The
+ // reason for allowing this exception is that we know we need to get rid
+ // of the zext for SCEV to be able to compute a trip count for said loops;
+ // we consider the new trip count valuable enough to increase instruction
+ // count by one.
+ if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp)))
+ continue;
+
+ // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS
+ // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS)
+ // when zext is loop varying and RHS is loop invariant. This converts
+ // loop varying work to loop-invariant work.
+ auto doRotateTransform = [&]() {
+ assert(ICmp->isUnsigned() && "must have proven unsigned already");
+ auto *NewRHS =
+ CastInst::Create(Instruction::Trunc, RHS, LHSOp->getType(), "",
+ L->getLoopPreheader()->getTerminator());
+ ICmp->setOperand(Swapped ? 1 : 0, LHSOp);
+ ICmp->setOperand(Swapped ? 0 : 1, NewRHS);
+ if (LHS->use_empty())
+ DeadInsts.push_back(LHS);
+ };
+
+
+ const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
+ const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
+ const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
+ auto FullCR = ConstantRange::getFull(InnerBitWidth);
+ FullCR = FullCR.zeroExtend(OuterBitWidth);
+ auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
+ if (FullCR.contains(RHSCR)) {
+ doRotateTransform();
+ Changed = true;
+ // Note, we are leaving SCEV in an unfortunately imprecise case here
+ // as rotation tends to reveal information about trip counts not
+ // previously visible.
+ continue;
+ }
+ }
+
+ return Changed;
+}
+
bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
SmallVector<BasicBlock*, 16> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
@@ -1499,20 +1650,18 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
// If we know we'd exit on the first iteration, rewrite the exit to
// reflect this. This does not imply the loop must exit through this
// exit; there may be an earlier one taken on the first iteration.
- // TODO: Given we know the backedge can't be taken, we should go ahead
- // and break it. Or at least, kill all the header phis and simplify.
+ // We know that the backedge can't be taken, so we replace all
+ // the header PHIs with values coming from the preheader.
if (ExitCount->isZero()) {
foldExit(L, ExitingBB, true, DeadInsts);
+ replaceLoopPHINodesWithPreheaderValues(L, DeadInsts);
Changed = true;
continue;
}
- // If we end up with a pointer exit count, bail. Note that we can end up
- // with a pointer exit count for one exiting block, and not for another in
- // the same loop.
- if (!ExitCount->getType()->isIntegerTy() ||
- !MaxExitCount->getType()->isIntegerTy())
- continue;
+ assert(ExitCount->getType()->isIntegerTy() &&
+ MaxExitCount->getType()->isIntegerTy() &&
+ "Exit counts must be integers");
Type *WiderType =
SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
@@ -1569,14 +1718,11 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
// through *explicit* control flow. We have to eliminate the possibility of
// implicit exits (see below) before we know it's truly exact.
const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
- if (isa<SCEVCouldNotCompute>(ExactBTC) ||
- !SE->isLoopInvariant(ExactBTC, L) ||
- !isSafeToExpand(ExactBTC, *SE))
+ if (isa<SCEVCouldNotCompute>(ExactBTC) || !isSafeToExpand(ExactBTC, *SE))
return false;
- // If we end up with a pointer exit count, bail. It may be unsized.
- if (!ExactBTC->getType()->isIntegerTy())
- return false;
+ assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
+ assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
auto BadExit = [&](BasicBlock *ExitingBB) {
// If our exiting block exits multiple loops, we can only rewrite the
@@ -1603,15 +1749,12 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
return true;
const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
- if (isa<SCEVCouldNotCompute>(ExitCount) ||
- !SE->isLoopInvariant(ExitCount, L) ||
- !isSafeToExpand(ExitCount, *SE))
- return true;
-
- // If we end up with a pointer exit count, bail. It may be unsized.
- if (!ExitCount->getType()->isIntegerTy())
+ if (isa<SCEVCouldNotCompute>(ExitCount) || !isSafeToExpand(ExitCount, *SE))
return true;
+ assert(SE->isLoopInvariant(ExitCount, L) &&
+ "Exit count must be loop invariant");
+ assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
return false;
};
@@ -1781,7 +1924,11 @@ bool IndVarSimplify::run(Loop *L) {
}
// Eliminate redundant IV cycles.
- NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
+ NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI);
+
+ // Try to convert exit conditions to unsigned and rotate computation
+ // out of the loop. Note: Handles invalidation internally if needed.
+ Changed |= canonicalizeExitCondition(L);
// Try to eliminate loop exits based on analyzeable exit counts
if (optimizeLoopExits(L, Rewriter)) {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index f7d631f5e785..883d4afff3bd 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -96,10 +96,13 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
@@ -115,6 +118,7 @@
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -146,6 +150,14 @@ static const unsigned UninitializedAddressSpace =
namespace {
using ValueToAddrSpaceMapTy = DenseMap<const Value *, unsigned>;
+// Different from ValueToAddrSpaceMapTy, where a new addrspace is inferred on
+// the *def* of a value, PredicatedAddrSpaceMapTy is map where a new
+// addrspace is inferred on the *use* of a pointer. This map is introduced to
+// infer addrspace from the addrspace predicate assumption built from assume
+// intrinsic. In that scenario, only specific uses (under valid assumption
+// context) could be inferred with a new addrspace.
+using PredicatedAddrSpaceMapTy =
+ DenseMap<std::pair<const Value *, const Value *>, unsigned>;
using PostorderStackTy = llvm::SmallVector<PointerIntPair<Value *, 1, bool>, 4>;
class InferAddressSpaces : public FunctionPass {
@@ -160,6 +172,8 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetTransformInfoWrapperPass>();
}
@@ -167,6 +181,8 @@ public:
};
class InferAddressSpacesImpl {
+ AssumptionCache &AC;
+ DominatorTree *DT = nullptr;
const TargetTransformInfo *TTI = nullptr;
const DataLayout *DL = nullptr;
@@ -174,21 +190,24 @@ class InferAddressSpacesImpl {
/// possible.
unsigned FlatAddrSpace = 0;
- // Returns the new address space of V if updated; otherwise, returns None.
- Optional<unsigned>
- updateAddressSpace(const Value &V,
- const ValueToAddrSpaceMapTy &InferredAddrSpace) const;
+ // Try to update the address space of V. If V is updated, returns true and
+ // false otherwise.
+ bool updateAddressSpace(const Value &V,
+ ValueToAddrSpaceMapTy &InferredAddrSpace,
+ PredicatedAddrSpaceMapTy &PredicatedAS) const;
// Tries to infer the specific address space of each address expression in
// Postorder.
void inferAddressSpaces(ArrayRef<WeakTrackingVH> Postorder,
- ValueToAddrSpaceMapTy *InferredAddrSpace) const;
+ ValueToAddrSpaceMapTy &InferredAddrSpace,
+ PredicatedAddrSpaceMapTy &PredicatedAS) const;
bool isSafeToCastConstAddrSpace(Constant *C, unsigned NewAS) const;
Value *cloneInstructionWithNewAddressSpace(
Instruction *I, unsigned NewAddrSpace,
const ValueToValueMapTy &ValueWithNewAddrSpace,
+ const PredicatedAddrSpaceMapTy &PredicatedAS,
SmallVectorImpl<const Use *> *UndefUsesToFix) const;
// Changes the flat address expressions in function F to point to specific
@@ -196,7 +215,8 @@ class InferAddressSpacesImpl {
// all flat expressions in the use-def graph of function F.
bool rewriteWithNewAddressSpaces(
const TargetTransformInfo &TTI, ArrayRef<WeakTrackingVH> Postorder,
- const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const;
+ const ValueToAddrSpaceMapTy &InferredAddrSpace,
+ const PredicatedAddrSpaceMapTy &PredicatedAS, Function *F) const;
void appendsFlatAddressExpressionToPostorderStack(
Value *V, PostorderStackTy &PostorderStack,
@@ -211,14 +231,18 @@ class InferAddressSpacesImpl {
std::vector<WeakTrackingVH> collectFlatAddressExpressions(Function &F) const;
Value *cloneValueWithNewAddressSpace(
- Value *V, unsigned NewAddrSpace,
- const ValueToValueMapTy &ValueWithNewAddrSpace,
- SmallVectorImpl<const Use *> *UndefUsesToFix) const;
+ Value *V, unsigned NewAddrSpace,
+ const ValueToValueMapTy &ValueWithNewAddrSpace,
+ const PredicatedAddrSpaceMapTy &PredicatedAS,
+ SmallVectorImpl<const Use *> *UndefUsesToFix) const;
unsigned joinAddressSpaces(unsigned AS1, unsigned AS2) const;
+ unsigned getPredicatedAddrSpace(const Value &V, Value *Opnd) const;
+
public:
- InferAddressSpacesImpl(const TargetTransformInfo *TTI, unsigned FlatAddrSpace)
- : TTI(TTI), FlatAddrSpace(FlatAddrSpace) {}
+ InferAddressSpacesImpl(AssumptionCache &AC, DominatorTree *DT,
+ const TargetTransformInfo *TTI, unsigned FlatAddrSpace)
+ : AC(AC), DT(DT), TTI(TTI), FlatAddrSpace(FlatAddrSpace) {}
bool run(Function &F);
};
@@ -232,8 +256,12 @@ void initializeInferAddressSpacesPass(PassRegistry &);
} // end namespace llvm
-INITIALIZE_PASS(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces",
- false, false)
+INITIALIZE_PASS_BEGIN(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_END(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces",
+ false, false)
// Check whether that's no-op pointer bicast using a pair of
// `ptrtoint`/`inttoptr` due to the missing no-op pointer bitcast over
@@ -505,6 +533,7 @@ InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const {
static Value *operandWithNewAddressSpaceOrCreateUndef(
const Use &OperandUse, unsigned NewAddrSpace,
const ValueToValueMapTy &ValueWithNewAddrSpace,
+ const PredicatedAddrSpaceMapTy &PredicatedAS,
SmallVectorImpl<const Use *> *UndefUsesToFix) {
Value *Operand = OperandUse.get();
@@ -517,6 +546,18 @@ static Value *operandWithNewAddressSpaceOrCreateUndef(
if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Operand))
return NewOperand;
+ Instruction *Inst = cast<Instruction>(OperandUse.getUser());
+ auto I = PredicatedAS.find(std::make_pair(Inst, Operand));
+ if (I != PredicatedAS.end()) {
+ // Insert an addrspacecast on that operand before the user.
+ unsigned NewAS = I->second;
+ Type *NewPtrTy = PointerType::getWithSamePointeeType(
+ cast<PointerType>(Operand->getType()), NewAS);
+ auto *NewI = new AddrSpaceCastInst(Operand, NewPtrTy);
+ NewI->insertBefore(Inst);
+ return NewI;
+ }
+
UndefUsesToFix->push_back(&OperandUse);
return UndefValue::get(NewPtrTy);
}
@@ -536,6 +577,7 @@ static Value *operandWithNewAddressSpaceOrCreateUndef(
Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace(
Instruction *I, unsigned NewAddrSpace,
const ValueToValueMapTy &ValueWithNewAddrSpace,
+ const PredicatedAddrSpaceMapTy &PredicatedAS,
SmallVectorImpl<const Use *> *UndefUsesToFix) const {
Type *NewPtrType = PointerType::getWithSamePointeeType(
cast<PointerType>(I->getType()), NewAddrSpace);
@@ -557,7 +599,7 @@ Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace(
assert(II->getIntrinsicID() == Intrinsic::ptrmask);
Value *NewPtr = operandWithNewAddressSpaceOrCreateUndef(
II->getArgOperandUse(0), NewAddrSpace, ValueWithNewAddrSpace,
- UndefUsesToFix);
+ PredicatedAS, UndefUsesToFix);
Value *Rewrite =
TTI->rewriteIntrinsicWithAddressSpace(II, II->getArgOperand(0), NewPtr);
if (Rewrite) {
@@ -586,7 +628,8 @@ Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace(
NewPointerOperands.push_back(nullptr);
else
NewPointerOperands.push_back(operandWithNewAddressSpaceOrCreateUndef(
- OperandUse, NewAddrSpace, ValueWithNewAddrSpace, UndefUsesToFix));
+ OperandUse, NewAddrSpace, ValueWithNewAddrSpace, PredicatedAS,
+ UndefUsesToFix));
}
switch (I->getOpcode()) {
@@ -708,9 +751,8 @@ static Value *cloneConstantExprWithNewAddressSpace(
if (CE->getOpcode() == Instruction::GetElementPtr) {
// Needs to specify the source type while constructing a getelementptr
// constant expression.
- return CE->getWithOperands(
- NewOperands, TargetType, /*OnlyIfReduced=*/false,
- NewOperands[0]->getType()->getPointerElementType());
+ return CE->getWithOperands(NewOperands, TargetType, /*OnlyIfReduced=*/false,
+ cast<GEPOperator>(CE)->getSourceElementType());
}
return CE->getWithOperands(NewOperands, TargetType);
@@ -724,6 +766,7 @@ static Value *cloneConstantExprWithNewAddressSpace(
Value *InferAddressSpacesImpl::cloneValueWithNewAddressSpace(
Value *V, unsigned NewAddrSpace,
const ValueToValueMapTy &ValueWithNewAddrSpace,
+ const PredicatedAddrSpaceMapTy &PredicatedAS,
SmallVectorImpl<const Use *> *UndefUsesToFix) const {
// All values in Postorder are flat address expressions.
assert(V->getType()->getPointerAddressSpace() == FlatAddrSpace &&
@@ -731,7 +774,7 @@ Value *InferAddressSpacesImpl::cloneValueWithNewAddressSpace(
if (Instruction *I = dyn_cast<Instruction>(V)) {
Value *NewV = cloneInstructionWithNewAddressSpace(
- I, NewAddrSpace, ValueWithNewAddrSpace, UndefUsesToFix);
+ I, NewAddrSpace, ValueWithNewAddrSpace, PredicatedAS, UndefUsesToFix);
if (Instruction *NewI = dyn_cast_or_null<Instruction>(NewV)) {
if (NewI->getParent() == nullptr) {
NewI->insertBefore(I);
@@ -779,46 +822,43 @@ bool InferAddressSpacesImpl::run(Function &F) {
// Runs a data-flow analysis to refine the address spaces of every expression
// in Postorder.
ValueToAddrSpaceMapTy InferredAddrSpace;
- inferAddressSpaces(Postorder, &InferredAddrSpace);
+ PredicatedAddrSpaceMapTy PredicatedAS;
+ inferAddressSpaces(Postorder, InferredAddrSpace, PredicatedAS);
// Changes the address spaces of the flat address expressions who are inferred
// to point to a specific address space.
- return rewriteWithNewAddressSpaces(*TTI, Postorder, InferredAddrSpace, &F);
+ return rewriteWithNewAddressSpaces(*TTI, Postorder, InferredAddrSpace,
+ PredicatedAS, &F);
}
// Constants need to be tracked through RAUW to handle cases with nested
// constant expressions, so wrap values in WeakTrackingVH.
void InferAddressSpacesImpl::inferAddressSpaces(
ArrayRef<WeakTrackingVH> Postorder,
- ValueToAddrSpaceMapTy *InferredAddrSpace) const {
+ ValueToAddrSpaceMapTy &InferredAddrSpace,
+ PredicatedAddrSpaceMapTy &PredicatedAS) const {
SetVector<Value *> Worklist(Postorder.begin(), Postorder.end());
// Initially, all expressions are in the uninitialized address space.
for (Value *V : Postorder)
- (*InferredAddrSpace)[V] = UninitializedAddressSpace;
+ InferredAddrSpace[V] = UninitializedAddressSpace;
while (!Worklist.empty()) {
Value *V = Worklist.pop_back_val();
- // Tries to update the address space of the stack top according to the
+ // Try to update the address space of the stack top according to the
// address spaces of its operands.
- LLVM_DEBUG(dbgs() << "Updating the address space of\n " << *V << '\n');
- Optional<unsigned> NewAS = updateAddressSpace(*V, *InferredAddrSpace);
- if (!NewAS.hasValue())
+ if (!updateAddressSpace(*V, InferredAddrSpace, PredicatedAS))
continue;
- // If any updates are made, grabs its users to the worklist because
- // their address spaces can also be possibly updated.
- LLVM_DEBUG(dbgs() << " to " << NewAS.getValue() << '\n');
- (*InferredAddrSpace)[V] = NewAS.getValue();
for (Value *User : V->users()) {
// Skip if User is already in the worklist.
if (Worklist.count(User))
continue;
- auto Pos = InferredAddrSpace->find(User);
+ auto Pos = InferredAddrSpace.find(User);
// Our algorithm only updates the address spaces of flat address
// expressions, which are those in InferredAddrSpace.
- if (Pos == InferredAddrSpace->end())
+ if (Pos == InferredAddrSpace.end())
continue;
// Function updateAddressSpace moves the address space down a lattice
@@ -832,10 +872,37 @@ void InferAddressSpacesImpl::inferAddressSpaces(
}
}
-Optional<unsigned> InferAddressSpacesImpl::updateAddressSpace(
- const Value &V, const ValueToAddrSpaceMapTy &InferredAddrSpace) const {
+unsigned InferAddressSpacesImpl::getPredicatedAddrSpace(const Value &V,
+ Value *Opnd) const {
+ const Instruction *I = dyn_cast<Instruction>(&V);
+ if (!I)
+ return UninitializedAddressSpace;
+
+ Opnd = Opnd->stripInBoundsOffsets();
+ for (auto &AssumeVH : AC.assumptionsFor(Opnd)) {
+ if (!AssumeVH)
+ continue;
+ CallInst *CI = cast<CallInst>(AssumeVH);
+ if (!isValidAssumeForContext(CI, I, DT))
+ continue;
+
+ const Value *Ptr;
+ unsigned AS;
+ std::tie(Ptr, AS) = TTI->getPredicatedAddrSpace(CI->getArgOperand(0));
+ if (Ptr)
+ return AS;
+ }
+
+ return UninitializedAddressSpace;
+}
+
+bool InferAddressSpacesImpl::updateAddressSpace(
+ const Value &V, ValueToAddrSpaceMapTy &InferredAddrSpace,
+ PredicatedAddrSpaceMapTy &PredicatedAS) const {
assert(InferredAddrSpace.count(&V));
+ LLVM_DEBUG(dbgs() << "Updating the address space of\n " << V << '\n');
+
// The new inferred address space equals the join of the address spaces
// of all its pointer operands.
unsigned NewAS = UninitializedAddressSpace;
@@ -861,7 +928,7 @@ Optional<unsigned> InferAddressSpacesImpl::updateAddressSpace(
// address space is known.
if ((C1 && Src0AS == UninitializedAddressSpace) ||
(C0 && Src1AS == UninitializedAddressSpace))
- return None;
+ return false;
if (C0 && isSafeToCastConstAddrSpace(C0, Src1AS))
NewAS = Src1AS;
@@ -878,10 +945,23 @@ Optional<unsigned> InferAddressSpacesImpl::updateAddressSpace(
// Otherwise, infer the address space from its pointer operands.
for (Value *PtrOperand : getPointerOperands(V, *DL, TTI)) {
auto I = InferredAddrSpace.find(PtrOperand);
- unsigned OperandAS =
- I != InferredAddrSpace.end()
- ? I->second
- : PtrOperand->getType()->getPointerAddressSpace();
+ unsigned OperandAS;
+ if (I == InferredAddrSpace.end()) {
+ OperandAS = PtrOperand->getType()->getPointerAddressSpace();
+ if (OperandAS == FlatAddrSpace) {
+ // Check AC for assumption dominating V.
+ unsigned AS = getPredicatedAddrSpace(V, PtrOperand);
+ if (AS != UninitializedAddressSpace) {
+ LLVM_DEBUG(dbgs()
+ << " deduce operand AS from the predicate addrspace "
+ << AS << '\n');
+ OperandAS = AS;
+ // Record this use with the predicated AS.
+ PredicatedAS[std::make_pair(&V, PtrOperand)] = OperandAS;
+ }
+ }
+ } else
+ OperandAS = I->second;
// join(flat, *) = flat. So we can break if NewAS is already flat.
NewAS = joinAddressSpaces(NewAS, OperandAS);
@@ -894,8 +974,13 @@ Optional<unsigned> InferAddressSpacesImpl::updateAddressSpace(
unsigned OldAS = InferredAddrSpace.lookup(&V);
assert(OldAS != FlatAddrSpace);
if (OldAS == NewAS)
- return None;
- return NewAS;
+ return false;
+
+ // If any updates are made, grabs its users to the worklist because
+ // their address spaces can also be possibly updated.
+ LLVM_DEBUG(dbgs() << " to " << NewAS << '\n');
+ InferredAddrSpace[&V] = NewAS;
+ return true;
}
/// \p returns true if \p U is the pointer operand of a memory instruction with
@@ -1026,7 +1111,8 @@ static Value::use_iterator skipToNextUser(Value::use_iterator I,
bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
const TargetTransformInfo &TTI, ArrayRef<WeakTrackingVH> Postorder,
- const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const {
+ const ValueToAddrSpaceMapTy &InferredAddrSpace,
+ const PredicatedAddrSpaceMapTy &PredicatedAS, Function *F) const {
// For each address expression to be modified, creates a clone of it with its
// pointer operands converted to the new address space. Since the pointer
// operands are converted, the clone is naturally in the new address space by
@@ -1042,8 +1128,9 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
continue;
if (V->getType()->getPointerAddressSpace() != NewAddrSpace) {
- Value *New = cloneValueWithNewAddressSpace(
- V, NewAddrSpace, ValueWithNewAddrSpace, &UndefUsesToFix);
+ Value *New =
+ cloneValueWithNewAddressSpace(V, NewAddrSpace, ValueWithNewAddrSpace,
+ PredicatedAS, &UndefUsesToFix);
if (New)
ValueWithNewAddrSpace[V] = New;
}
@@ -1155,8 +1242,9 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
if (AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(CurUser)) {
unsigned NewAS = NewV->getType()->getPointerAddressSpace();
if (ASC->getDestAddressSpace() == NewAS) {
- if (ASC->getType()->getPointerElementType() !=
- NewV->getType()->getPointerElementType()) {
+ if (!cast<PointerType>(ASC->getType())
+ ->hasSameElementTypeAs(
+ cast<PointerType>(NewV->getType()))) {
NewV = CastInst::Create(Instruction::BitCast, NewV,
ASC->getType(), "", ASC);
}
@@ -1199,7 +1287,10 @@ bool InferAddressSpaces::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
+ auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
return InferAddressSpacesImpl(
+ getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F), DT,
&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F),
FlatAddrSpace)
.run(F);
@@ -1217,11 +1308,14 @@ InferAddressSpacesPass::InferAddressSpacesPass(unsigned AddressSpace)
PreservedAnalyses InferAddressSpacesPass::run(Function &F,
FunctionAnalysisManager &AM) {
bool Changed =
- InferAddressSpacesImpl(&AM.getResult<TargetIRAnalysis>(F), FlatAddrSpace)
+ InferAddressSpacesImpl(AM.getResult<AssumptionAnalysis>(F),
+ AM.getCachedResult<DominatorTreeAnalysis>(F),
+ &AM.getResult<TargetIRAnalysis>(F), FlatAddrSpace)
.run(F);
if (Changed) {
PreservedAnalyses PA;
PA.preserveSet<CFGAnalyses>();
+ PA.preserve<DominatorTreeAnalysis>();
return PA;
}
return PreservedAnalyses::all();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 9dc3b0351346..fe9a7211967c 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -331,7 +331,7 @@ bool JumpThreading::runOnFunction(Function &F) {
BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
}
- bool Changed = Impl.runImpl(F, TLI, LVI, AA, &DTU, F.hasProfileData(),
+ bool Changed = Impl.runImpl(F, TLI, TTI, LVI, AA, &DTU, F.hasProfileData(),
std::move(BFI), std::move(BPI));
if (PrintLVIAfterJumpThreading) {
dbgs() << "LVI for function '" << F.getName() << "':\n";
@@ -360,7 +360,7 @@ PreservedAnalyses JumpThreadingPass::run(Function &F,
BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
}
- bool Changed = runImpl(F, &TLI, &LVI, &AA, &DTU, F.hasProfileData(),
+ bool Changed = runImpl(F, &TLI, &TTI, &LVI, &AA, &DTU, F.hasProfileData(),
std::move(BFI), std::move(BPI));
if (PrintLVIAfterJumpThreading) {
@@ -377,12 +377,14 @@ PreservedAnalyses JumpThreadingPass::run(Function &F,
}
bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
- LazyValueInfo *LVI_, AliasAnalysis *AA_,
- DomTreeUpdater *DTU_, bool HasProfileData_,
+ TargetTransformInfo *TTI_, LazyValueInfo *LVI_,
+ AliasAnalysis *AA_, DomTreeUpdater *DTU_,
+ bool HasProfileData_,
std::unique_ptr<BlockFrequencyInfo> BFI_,
std::unique_ptr<BranchProbabilityInfo> BPI_) {
LLVM_DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
TLI = TLI_;
+ TTI = TTI_;
LVI = LVI_;
AA = AA_;
DTU = DTU_;
@@ -514,7 +516,8 @@ static void replaceFoldableUses(Instruction *Cond, Value *ToVal) {
/// Return the cost of duplicating a piece of this block from first non-phi
/// and before StopAt instruction to thread across it. Stop scanning the block
/// when exceeding the threshold. If duplication is impossible, returns ~0U.
-static unsigned getJumpThreadDuplicationCost(BasicBlock *BB,
+static unsigned getJumpThreadDuplicationCost(const TargetTransformInfo *TTI,
+ BasicBlock *BB,
Instruction *StopAt,
unsigned Threshold) {
assert(StopAt->getParent() == BB && "Not an instruction from proper BB?");
@@ -550,26 +553,21 @@ static unsigned getJumpThreadDuplicationCost(BasicBlock *BB,
if (Size > Threshold)
return Size;
- // Debugger intrinsics don't incur code size.
- if (isa<DbgInfoIntrinsic>(I)) continue;
-
- // Pseudo-probes don't incur code size.
- if (isa<PseudoProbeInst>(I))
- continue;
-
- // If this is a pointer->pointer bitcast, it is free.
- if (isa<BitCastInst>(I) && I->getType()->isPointerTy())
- continue;
-
- // Freeze instruction is free, too.
- if (isa<FreezeInst>(I))
- continue;
-
// Bail out if this instruction gives back a token type, it is not possible
// to duplicate it if it is used outside this BB.
if (I->getType()->isTokenTy() && I->isUsedOutsideOfBlock(BB))
return ~0U;
+ // Blocks with NoDuplicate are modelled as having infinite cost, so they
+ // are never duplicated.
+ if (const CallInst *CI = dyn_cast<CallInst>(I))
+ if (CI->cannotDuplicate() || CI->isConvergent())
+ return ~0U;
+
+ if (TTI->getUserCost(&*I, TargetTransformInfo::TCK_SizeAndLatency)
+ == TargetTransformInfo::TCC_Free)
+ continue;
+
// All other instructions count for at least one unit.
++Size;
@@ -578,11 +576,7 @@ static unsigned getJumpThreadDuplicationCost(BasicBlock *BB,
// as having cost of 2 total, and if they are a vector intrinsic, we model
// them as having cost 1.
if (const CallInst *CI = dyn_cast<CallInst>(I)) {
- if (CI->cannotDuplicate() || CI->isConvergent())
- // Blocks with NoDuplicate are modelled as having infinite cost, so they
- // are never duplicated.
- return ~0U;
- else if (!isa<IntrinsicInst>(CI))
+ if (!isa<IntrinsicInst>(CI))
Size += 3;
else if (!CI->getType()->isVectorTy())
Size += 1;
@@ -1363,8 +1357,7 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) {
// If all of the loads and stores that feed the value have the same AA tags,
// then we can propagate them onto any newly inserted loads.
- AAMDNodes AATags;
- LoadI->getAAMetadata(AATags);
+ AAMDNodes AATags = LoadI->getAAMetadata();
SmallPtrSet<BasicBlock*, 8> PredsScanned;
@@ -2235,10 +2228,10 @@ bool JumpThreadingPass::maybethreadThroughTwoBasicBlocks(BasicBlock *BB,
}
// Compute the cost of duplicating BB and PredBB.
- unsigned BBCost =
- getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
+ unsigned BBCost = getJumpThreadDuplicationCost(
+ TTI, BB, BB->getTerminator(), BBDupThreshold);
unsigned PredBBCost = getJumpThreadDuplicationCost(
- PredBB, PredBB->getTerminator(), BBDupThreshold);
+ TTI, PredBB, PredBB->getTerminator(), BBDupThreshold);
// Give up if costs are too high. We need to check BBCost and PredBBCost
// individually before checking their sum because getJumpThreadDuplicationCost
@@ -2346,8 +2339,8 @@ bool JumpThreadingPass::tryThreadEdge(
return false;
}
- unsigned JumpThreadCost =
- getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
+ unsigned JumpThreadCost = getJumpThreadDuplicationCost(
+ TTI, BB, BB->getTerminator(), BBDupThreshold);
if (JumpThreadCost > BBDupThreshold) {
LLVM_DEBUG(dbgs() << " Not threading BB '" << BB->getName()
<< "' - Cost is too high: " << JumpThreadCost << "\n");
@@ -2615,8 +2608,8 @@ bool JumpThreadingPass::duplicateCondBranchOnPHIIntoPred(
return false;
}
- unsigned DuplicationCost =
- getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
+ unsigned DuplicationCost = getJumpThreadDuplicationCost(
+ TTI, BB, BB->getTerminator(), BBDupThreshold);
if (DuplicationCost > BBDupThreshold) {
LLVM_DEBUG(dbgs() << " Not duplicating BB '" << BB->getName()
<< "' - Cost is too high: " << DuplicationCost << "\n");
@@ -3032,7 +3025,8 @@ bool JumpThreadingPass::threadGuard(BasicBlock *BB, IntrinsicInst *Guard,
ValueToValueMapTy UnguardedMapping, GuardedMapping;
Instruction *AfterGuard = Guard->getNextNode();
- unsigned Cost = getJumpThreadDuplicationCost(BB, AfterGuard, BBDupThreshold);
+ unsigned Cost =
+ getJumpThreadDuplicationCost(TTI, BB, AfterGuard, BBDupThreshold);
if (Cost > BBDupThreshold)
return false;
// Duplicate all instructions before the guard and the guard itself to the
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp
index 30058df3ded5..bf714d167670 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -117,13 +117,6 @@ static cl::opt<uint32_t> MaxNumUsesTraversed(
cl::desc("Max num uses visited for identifying load "
"invariance in loop using invariant start (default = 8)"));
-// Default value of zero implies we use the regular alias set tracker mechanism
-// instead of the cross product using AA to identify aliasing of the memory
-// location we are interested in.
-static cl::opt<int>
-LICMN2Theshold("licm-n2-threshold", cl::Hidden, cl::init(0),
- cl::desc("How many instruction to cross product using AA"));
-
// Experimental option to allow imprecision in LICM in pathological cases, in
// exchange for faster compile. This is to be removed if MemorySSA starts to
// address the same issue. This flag applies only when LICM uses MemorySSA
@@ -151,7 +144,8 @@ cl::opt<unsigned> llvm::SetLicmMssaNoAccForPromotionCap(
static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
const LoopSafetyInfo *SafetyInfo,
- TargetTransformInfo *TTI, bool &FreeInLoop);
+ TargetTransformInfo *TTI, bool &FreeInLoop,
+ bool LoopNestMode);
static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
MemorySSAUpdater *MSSAU, ScalarEvolution *SE,
@@ -180,7 +174,7 @@ static Instruction *cloneInstructionInExitBlock(
const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU);
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
- AliasSetTracker *AST, MemorySSAUpdater *MSSAU);
+ MemorySSAUpdater *MSSAU);
static void moveInstructionBefore(Instruction &I, Instruction &Dest,
ICFLoopSafetyInfo &SafetyInfo,
@@ -206,9 +200,6 @@ struct LoopInvariantCodeMotion {
private:
unsigned LicmMssaOptCap;
unsigned LicmMssaNoAccForPromotionCap;
-
- std::unique_ptr<AliasSetTracker>
- collectAliasInfoForLoop(Loop *L, LoopInfo *LI, AAResults *AA);
};
struct LegacyLICMPass : public LoopPass {
@@ -228,9 +219,7 @@ struct LegacyLICMPass : public LoopPass {
<< L->getHeader()->getNameOrAsOperand() << "\n");
auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
- MemorySSA *MSSA = EnableMSSALoopDependency
- ? (&getAnalysis<MemorySSAWrapperPass>().getMSSA())
- : nullptr;
+ MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
bool hasProfileData = L->getHeader()->getParent()->hasProfileData();
BlockFrequencyInfo *BFI =
hasProfileData ? &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI()
@@ -257,10 +246,8 @@ struct LegacyLICMPass : public LoopPass {
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
- if (EnableMSSALoopDependency) {
- AU.addRequired<MemorySSAWrapperPass>();
- AU.addPreserved<MemorySSAWrapperPass>();
- }
+ AU.addRequired<MemorySSAWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
getLoopAnalysisUsage(AU);
LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
@@ -275,6 +262,9 @@ private:
PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR, LPMUpdater &) {
+ if (!AR.MSSA)
+ report_fatal_error("LICM requires MemorySSA (loop-mssa)");
+
// For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
// pass. Function analyses need to be preserved across loop transformations
// but ORE cannot be preserved (see comment before the pass definition).
@@ -289,8 +279,7 @@ PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
PA.preserve<DominatorTreeAnalysis>();
PA.preserve<LoopAnalysis>();
- if (AR.MSSA)
- PA.preserve<MemorySSAAnalysis>();
+ PA.preserve<MemorySSAAnalysis>();
return PA;
}
@@ -298,6 +287,9 @@ PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
PreservedAnalyses LNICMPass::run(LoopNest &LN, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
LPMUpdater &) {
+ if (!AR.MSSA)
+ report_fatal_error("LNICM requires MemorySSA (loop-mssa)");
+
// For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
// pass. Function analyses need to be preserved across loop transformations
// but ORE cannot be preserved (see comment before the pass definition).
@@ -316,8 +308,7 @@ PreservedAnalyses LNICMPass::run(LoopNest &LN, LoopAnalysisManager &AM,
PA.preserve<DominatorTreeAnalysis>();
PA.preserve<LoopAnalysis>();
- if (AR.MSSA)
- PA.preserve<MemorySSAAnalysis>();
+ PA.preserve<MemorySSAAnalysis>();
return PA;
}
@@ -386,10 +377,6 @@ bool LoopInvariantCodeMotion::runOnLoop(
return false;
}
- std::unique_ptr<AliasSetTracker> CurAST;
- std::unique_ptr<MemorySSAUpdater> MSSAU;
- std::unique_ptr<SinkAndHoistLICMFlags> Flags;
-
// Don't sink stores from loops with coroutine suspend instructions.
// LICM would sink instructions into the default destination of
// the coroutine switch. The default destination of the switch is to
@@ -406,17 +393,9 @@ bool LoopInvariantCodeMotion::runOnLoop(
});
});
- if (!MSSA) {
- LLVM_DEBUG(dbgs() << "LICM: Using Alias Set Tracker.\n");
- CurAST = collectAliasInfoForLoop(L, LI, AA);
- Flags = std::make_unique<SinkAndHoistLICMFlags>(
- LicmMssaOptCap, LicmMssaNoAccForPromotionCap, /*IsSink=*/true);
- } else {
- LLVM_DEBUG(dbgs() << "LICM: Using MemorySSA.\n");
- MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
- Flags = std::make_unique<SinkAndHoistLICMFlags>(
- LicmMssaOptCap, LicmMssaNoAccForPromotionCap, /*IsSink=*/true, L, MSSA);
- }
+ MemorySSAUpdater MSSAU(MSSA);
+ SinkAndHoistLICMFlags Flags(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+ /*IsSink=*/true, L, MSSA);
// Get the preheader block to move instructions into...
BasicBlock *Preheader = L->getLoopPreheader();
@@ -435,14 +414,16 @@ bool LoopInvariantCodeMotion::runOnLoop(
// us to sink instructions in one pass, without iteration. After sinking
// instructions, we perform another pass to hoist them out of the loop.
if (L->hasDedicatedExits())
- Changed |=
- sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, TTI, L,
- CurAST.get(), MSSAU.get(), &SafetyInfo, *Flags.get(), ORE);
- Flags->setIsSink(false);
+ Changed |= LoopNestMode
+ ? sinkRegionForLoopNest(DT->getNode(L->getHeader()), AA, LI,
+ DT, BFI, TLI, TTI, L, &MSSAU,
+ &SafetyInfo, Flags, ORE)
+ : sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI,
+ TLI, TTI, L, &MSSAU, &SafetyInfo, Flags, ORE);
+ Flags.setIsSink(false);
if (Preheader)
Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, L,
- CurAST.get(), MSSAU.get(), SE, &SafetyInfo,
- *Flags.get(), ORE, LoopNestMode);
+ &MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode);
// Now that all loop invariants have been removed from the loop, promote any
// memory references to scalars that we can.
@@ -452,7 +433,7 @@ bool LoopInvariantCodeMotion::runOnLoop(
// preheader for SSA updater, so also avoid sinking when no preheader
// is available.
if (!DisablePromotion && Preheader && L->hasDedicatedExits() &&
- !Flags->tooManyMemoryAccesses() && !HasCoroSuspendInst) {
+ !Flags.tooManyMemoryAccesses() && !HasCoroSuspendInst) {
// Figure out the loop exits and their insertion points
SmallVector<BasicBlock *, 8> ExitBlocks;
L->getUniqueExitBlocks(ExitBlocks);
@@ -466,55 +447,29 @@ bool LoopInvariantCodeMotion::runOnLoop(
SmallVector<Instruction *, 8> InsertPts;
SmallVector<MemoryAccess *, 8> MSSAInsertPts;
InsertPts.reserve(ExitBlocks.size());
- if (MSSAU)
- MSSAInsertPts.reserve(ExitBlocks.size());
+ MSSAInsertPts.reserve(ExitBlocks.size());
for (BasicBlock *ExitBlock : ExitBlocks) {
InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
- if (MSSAU)
- MSSAInsertPts.push_back(nullptr);
+ MSSAInsertPts.push_back(nullptr);
}
PredIteratorCache PIC;
+ // Promoting one set of accesses may make the pointers for another set
+ // loop invariant, so run this in a loop (with the MaybePromotable set
+ // decreasing in size over time).
bool Promoted = false;
- if (CurAST.get()) {
- // Loop over all of the alias sets in the tracker object.
- for (AliasSet &AS : *CurAST) {
- // We can promote this alias set if it has a store, if it is a "Must"
- // alias set, if the pointer is loop invariant, and if we are not
- // eliminating any volatile loads or stores.
- if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
- !L->isLoopInvariant(AS.begin()->getValue()))
- continue;
-
- assert(
- !AS.empty() &&
- "Must alias set should have at least one pointer element in it!");
-
- SmallSetVector<Value *, 8> PointerMustAliases;
- for (const auto &ASI : AS)
- PointerMustAliases.insert(ASI.getValue());
-
- Promoted |= promoteLoopAccessesToScalars(
- PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,
- DT, TLI, L, CurAST.get(), MSSAU.get(), &SafetyInfo, ORE);
+ bool LocalPromoted;
+ do {
+ LocalPromoted = false;
+ for (const SmallSetVector<Value *, 8> &PointerMustAliases :
+ collectPromotionCandidates(MSSA, AA, L)) {
+ LocalPromoted |= promoteLoopAccessesToScalars(
+ PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC,
+ LI, DT, TLI, L, &MSSAU, &SafetyInfo, ORE);
}
- } else {
- // Promoting one set of accesses may make the pointers for another set
- // loop invariant, so run this in a loop (with the MaybePromotable set
- // decreasing in size over time).
- bool LocalPromoted;
- do {
- LocalPromoted = false;
- for (const SmallSetVector<Value *, 8> &PointerMustAliases :
- collectPromotionCandidates(MSSA, AA, L)) {
- LocalPromoted |= promoteLoopAccessesToScalars(
- PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC,
- LI, DT, TLI, L, /*AST*/nullptr, MSSAU.get(), &SafetyInfo, ORE);
- }
- Promoted |= LocalPromoted;
- } while (LocalPromoted);
- }
+ Promoted |= LocalPromoted;
+ } while (LocalPromoted);
// Once we have promoted values across the loop body we have to
// recursively reform LCSSA as any nested loop may now have values defined
@@ -536,8 +491,8 @@ bool LoopInvariantCodeMotion::runOnLoop(
assert((L->isOutermost() || L->getParentLoop()->isLCSSAForm(*DT)) &&
"Parent loop not left in LCSSA form after LICM!");
- if (MSSAU.get() && VerifyMemorySSA)
- MSSAU->getMemorySSA()->verifyMemorySSA();
+ if (VerifyMemorySSA)
+ MSSA->verifyMemorySSA();
if (Changed && SE)
SE->forgetLoopDispositions(L);
@@ -552,17 +507,15 @@ bool LoopInvariantCodeMotion::runOnLoop(
bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
DominatorTree *DT, BlockFrequencyInfo *BFI,
TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
- Loop *CurLoop, AliasSetTracker *CurAST,
- MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo,
+ Loop *CurLoop, MemorySSAUpdater *MSSAU,
+ ICFLoopSafetyInfo *SafetyInfo,
SinkAndHoistLICMFlags &Flags,
- OptimizationRemarkEmitter *ORE) {
+ OptimizationRemarkEmitter *ORE, Loop *OutermostLoop) {
// Verify inputs.
assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
- CurLoop != nullptr && SafetyInfo != nullptr &&
+ CurLoop != nullptr && MSSAU != nullptr && SafetyInfo != nullptr &&
"Unexpected input to sinkRegion.");
- assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&
- "Either AliasSetTracker or MemorySSA should be initialized.");
// We want to visit children before parents. We will enque all the parents
// before their children in the worklist and process the worklist in reverse
@@ -587,7 +540,7 @@ bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
salvageKnowledge(&I);
salvageDebugInfo(I);
++II;
- eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
+ eraseInstruction(I, *SafetyInfo, MSSAU);
Changed = true;
continue;
}
@@ -598,26 +551,46 @@ bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
// operands of the instruction are loop invariant.
//
bool FreeInLoop = false;
+ bool LoopNestMode = OutermostLoop != nullptr;
if (!I.mayHaveSideEffects() &&
- isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) &&
- canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags,
- ORE)) {
+ isNotUsedOrFreeInLoop(I, LoopNestMode ? OutermostLoop : CurLoop,
+ SafetyInfo, TTI, FreeInLoop, LoopNestMode) &&
+ canSinkOrHoistInst(I, AA, DT, CurLoop, /*CurAST*/nullptr, MSSAU, true,
+ &Flags, ORE)) {
if (sink(I, LI, DT, BFI, CurLoop, SafetyInfo, MSSAU, ORE)) {
if (!FreeInLoop) {
++II;
salvageDebugInfo(I);
- eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
+ eraseInstruction(I, *SafetyInfo, MSSAU);
}
Changed = true;
}
}
}
}
- if (MSSAU && VerifyMemorySSA)
+ if (VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
return Changed;
}
+bool llvm::sinkRegionForLoopNest(
+ DomTreeNode *N, AAResults *AA, LoopInfo *LI, DominatorTree *DT,
+ BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
+ Loop *CurLoop, MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo,
+ SinkAndHoistLICMFlags &Flags, OptimizationRemarkEmitter *ORE) {
+
+ bool Changed = false;
+ SmallPriorityWorklist<Loop *, 4> Worklist;
+ Worklist.insert(CurLoop);
+ appendLoopsToWorklist(*CurLoop, Worklist);
+ while (!Worklist.empty()) {
+ Loop *L = Worklist.pop_back_val();
+ Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI,
+ TTI, L, MSSAU, SafetyInfo, Flags, ORE, CurLoop);
+ }
+ return Changed;
+}
+
namespace {
// This is a helper class for hoistRegion to make it able to hoist control flow
// in order to be able to hoist phis. The way this works is that we initially
@@ -820,9 +793,8 @@ public:
if (HoistTarget == InitialPreheader) {
// Phis in the loop header now need to use the new preheader.
InitialPreheader->replaceSuccessorsPhiUsesWith(HoistCommonSucc);
- if (MSSAU)
- MSSAU->wireOldPredecessorsToNewImmediatePredecessor(
- HoistTarget->getSingleSuccessor(), HoistCommonSucc, {HoistTarget});
+ MSSAU->wireOldPredecessorsToNewImmediatePredecessor(
+ HoistTarget->getSingleSuccessor(), HoistCommonSucc, {HoistTarget});
// The new preheader dominates the loop header.
DomTreeNode *PreheaderNode = DT->getNode(HoistCommonSucc);
DomTreeNode *HeaderNode = DT->getNode(CurLoop->getHeader());
@@ -884,16 +856,14 @@ static bool worthSinkOrHoistInst(Instruction &I, BasicBlock *DstBlock,
bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
DominatorTree *DT, BlockFrequencyInfo *BFI,
TargetLibraryInfo *TLI, Loop *CurLoop,
- AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
- ScalarEvolution *SE, ICFLoopSafetyInfo *SafetyInfo,
+ MemorySSAUpdater *MSSAU, ScalarEvolution *SE,
+ ICFLoopSafetyInfo *SafetyInfo,
SinkAndHoistLICMFlags &Flags,
OptimizationRemarkEmitter *ORE, bool LoopNestMode) {
// Verify inputs.
assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
- CurLoop != nullptr && SafetyInfo != nullptr &&
+ CurLoop != nullptr && MSSAU != nullptr && SafetyInfo != nullptr &&
"Unexpected input to hoistRegion.");
- assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&
- "Either AliasSetTracker or MemorySSA should be initialized.");
ControlFlowHoister CFH(LI, DT, CurLoop, MSSAU);
@@ -913,8 +883,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
if (!LoopNestMode && inSubLoop(BB, CurLoop, LI))
continue;
- for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
- Instruction &I = *II++;
+ for (Instruction &I : llvm::make_early_inc_range(*BB)) {
// Try constant folding this instruction. If all the operands are
// constants, it is technically hoistable, but it would be better to
// just fold it.
@@ -922,12 +891,10 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
&I, I.getModule()->getDataLayout(), TLI)) {
LLVM_DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C
<< '\n');
- if (CurAST)
- CurAST->copyValue(&I, C);
// FIXME MSSA: Such replacements may make accesses unoptimized (D51960).
I.replaceAllUsesWith(C);
if (isInstructionTriviallyDead(&I, TLI))
- eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
+ eraseInstruction(I, *SafetyInfo, MSSAU);
Changed = true;
continue;
}
@@ -940,8 +907,8 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
// and we have accurately duplicated the control flow from the loop header
// to that block.
if (CurLoop->hasLoopInvariantOperands(&I) &&
- canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags,
- ORE) &&
+ canSinkOrHoistInst(I, AA, DT, CurLoop, /*CurAST*/ nullptr, MSSAU,
+ true, &Flags, ORE) &&
worthSinkOrHoistInst(I, CurLoop->getLoopPreheader(), ORE, BFI) &&
isSafeToExecuteUnconditionally(
I, DT, TLI, CurLoop, SafetyInfo, ORE,
@@ -970,7 +937,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
SafetyInfo->insertInstructionTo(Product, I.getParent());
Product->insertAfter(&I);
I.replaceAllUsesWith(Product);
- eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
+ eraseInstruction(I, *SafetyInfo, MSSAU);
hoist(*ReciprocalDivisor, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB),
SafetyInfo, MSSAU, SE, ORE);
@@ -1049,7 +1016,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
}
}
}
- if (MSSAU && VerifyMemorySSA)
+ if (VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
// Now that we've finished hoisting make sure that LI and DT are still
@@ -1101,6 +1068,10 @@ static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT,
return false;
Addr = BC->getOperand(0);
}
+ // If we've ended up at a global/constant, bail. We shouldn't be looking at
+ // uselists for non-local Values in a loop pass.
+ if (isa<Constant>(Addr))
+ return false;
unsigned UsesVisited = 0;
// Traverse all uses of the load operand value, to see if invariant.start is
@@ -1273,7 +1244,7 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
// writes to this memory in the loop, we can hoist or sink.
if (AAResults::onlyAccessesArgPointees(Behavior)) {
// TODO: expand to writeable arguments
- for (Value *Op : CI->arg_operands())
+ for (Value *Op : CI->args())
if (Op->getType()->isPointerTy()) {
bool Invalidated;
if (CurAST)
@@ -1443,7 +1414,8 @@ static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop,
/// (e.g., a GEP can be folded into a load as an addressing mode in the loop).
static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
const LoopSafetyInfo *SafetyInfo,
- TargetTransformInfo *TTI, bool &FreeInLoop) {
+ TargetTransformInfo *TTI, bool &FreeInLoop,
+ bool LoopNestMode) {
const auto &BlockColors = SafetyInfo->getBlockColors();
bool IsFree = isFreeInLoop(I, CurLoop, TTI);
for (const User *U : I.users()) {
@@ -1460,6 +1432,15 @@ static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
if (!BlockColors.empty() &&
BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1)
return false;
+
+ if (LoopNestMode) {
+ while (isa<PHINode>(UI) && UI->hasOneUser() &&
+ UI->getNumOperands() == 1) {
+ if (!CurLoop->contains(UI))
+ break;
+ UI = cast<Instruction>(UI->user_back());
+ }
+ }
}
if (CurLoop->contains(UI)) {
@@ -1546,9 +1527,7 @@ static Instruction *cloneInstructionInExitBlock(
}
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
- AliasSetTracker *AST, MemorySSAUpdater *MSSAU) {
- if (AST)
- AST->deleteValue(&I);
+ MemorySSAUpdater *MSSAU) {
if (MSSAU)
MSSAU->removeMemoryAccess(&I);
SafetyInfo.removeInstruction(&I);
@@ -1599,8 +1578,7 @@ static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) {
// predecessor fairly simple.
if (!SafetyInfo->getBlockColors().empty() && BB->getFirstNonPHI()->isEHPad())
return false;
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
- BasicBlock *BBPred = *PI;
+ for (BasicBlock *BBPred : predecessors(BB)) {
if (isa<IndirectBrInst>(BBPred->getTerminator()) ||
isa<CallBrInst>(BBPred->getTerminator()))
return false;
@@ -1786,7 +1764,7 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
Instruction *New = sinkThroughTriviallyReplaceablePHI(
PN, &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU);
PN->replaceAllUsesWith(New);
- eraseInstruction(*PN, *SafetyInfo, nullptr, nullptr);
+ eraseInstruction(*PN, *SafetyInfo, nullptr);
Changed = true;
}
return Changed;
@@ -1875,11 +1853,10 @@ class LoopPromoter : public LoadAndStorePromoter {
SmallVectorImpl<Instruction *> &LoopInsertPts;
SmallVectorImpl<MemoryAccess *> &MSSAInsertPts;
PredIteratorCache &PredCache;
- AliasSetTracker *AST;
MemorySSAUpdater *MSSAU;
LoopInfo &LI;
DebugLoc DL;
- int Alignment;
+ Align Alignment;
bool UnorderedAtomic;
AAMDNodes AATags;
ICFLoopSafetyInfo &SafetyInfo;
@@ -1907,13 +1884,13 @@ public:
SmallVectorImpl<BasicBlock *> &LEB,
SmallVectorImpl<Instruction *> &LIP,
SmallVectorImpl<MemoryAccess *> &MSSAIP, PredIteratorCache &PIC,
- AliasSetTracker *ast, MemorySSAUpdater *MSSAU, LoopInfo &li,
- DebugLoc dl, int alignment, bool UnorderedAtomic,
- const AAMDNodes &AATags, ICFLoopSafetyInfo &SafetyInfo)
+ MemorySSAUpdater *MSSAU, LoopInfo &li, DebugLoc dl,
+ Align Alignment, bool UnorderedAtomic, const AAMDNodes &AATags,
+ ICFLoopSafetyInfo &SafetyInfo)
: LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
LoopExitBlocks(LEB), LoopInsertPts(LIP), MSSAInsertPts(MSSAIP),
- PredCache(PIC), AST(ast), MSSAU(MSSAU), LI(li), DL(std::move(dl)),
- Alignment(alignment), UnorderedAtomic(UnorderedAtomic), AATags(AATags),
+ PredCache(PIC), MSSAU(MSSAU), LI(li), DL(std::move(dl)),
+ Alignment(Alignment), UnorderedAtomic(UnorderedAtomic), AATags(AATags),
SafetyInfo(SafetyInfo) {}
bool isInstInList(Instruction *I,
@@ -1940,39 +1917,29 @@ public:
StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
if (UnorderedAtomic)
NewSI->setOrdering(AtomicOrdering::Unordered);
- NewSI->setAlignment(Align(Alignment));
+ NewSI->setAlignment(Alignment);
NewSI->setDebugLoc(DL);
if (AATags)
NewSI->setAAMetadata(AATags);
- if (MSSAU) {
- MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i];
- MemoryAccess *NewMemAcc;
- if (!MSSAInsertPoint) {
- NewMemAcc = MSSAU->createMemoryAccessInBB(
- NewSI, nullptr, NewSI->getParent(), MemorySSA::Beginning);
- } else {
- NewMemAcc =
- MSSAU->createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint);
- }
- MSSAInsertPts[i] = NewMemAcc;
- MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true);
- // FIXME: true for safety, false may still be correct.
+ MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i];
+ MemoryAccess *NewMemAcc;
+ if (!MSSAInsertPoint) {
+ NewMemAcc = MSSAU->createMemoryAccessInBB(
+ NewSI, nullptr, NewSI->getParent(), MemorySSA::Beginning);
+ } else {
+ NewMemAcc =
+ MSSAU->createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint);
}
+ MSSAInsertPts[i] = NewMemAcc;
+ MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true);
+ // FIXME: true for safety, false may still be correct.
}
}
- void replaceLoadWithValue(LoadInst *LI, Value *V) const override {
- // Update alias analysis.
- if (AST)
- AST->copyValue(LI, V);
- }
void instructionDeleted(Instruction *I) const override {
SafetyInfo.removeInstruction(I);
- if (AST)
- AST->deleteValue(I);
- if (MSSAU)
- MSSAU->removeMemoryAccess(I);
+ MSSAU->removeMemoryAccess(I);
}
};
@@ -2023,8 +1990,8 @@ bool llvm::promoteLoopAccessesToScalars(
SmallVectorImpl<Instruction *> &InsertPts,
SmallVectorImpl<MemoryAccess *> &MSSAInsertPts, PredIteratorCache &PIC,
LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
- Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
- ICFLoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) {
+ Loop *CurLoop, MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo,
+ OptimizationRemarkEmitter *ORE) {
// Verify inputs.
assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
SafetyInfo != nullptr &&
@@ -2189,9 +2156,9 @@ bool llvm::promoteLoopAccessesToScalars(
// Merge the AA tags.
if (LoopUses.empty()) {
// On the first load/store, just take its AA tags.
- UI->getAAMetadata(AATags);
+ AATags = UI->getAAMetadata();
} else if (AATags) {
- UI->getAAMetadata(AATags, /* Merge = */ true);
+ AATags = AATags.merge(UI->getAAMetadata());
}
LoopUses.push_back(UI);
@@ -2256,9 +2223,8 @@ bool llvm::promoteLoopAccessesToScalars(
SmallVector<PHINode *, 16> NewPHIs;
SSAUpdater SSA(&NewPHIs);
LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
- InsertPts, MSSAInsertPts, PIC, CurAST, MSSAU, *LI, DL,
- Alignment.value(), SawUnorderedAtomic, AATags,
- *SafetyInfo);
+ InsertPts, MSSAInsertPts, PIC, MSSAU, *LI, DL,
+ Alignment, SawUnorderedAtomic, AATags, *SafetyInfo);
// Set up the preheader to have a definition of the value. It is the live-out
// value from the preheader that uses in the loop will use.
@@ -2273,24 +2239,22 @@ bool llvm::promoteLoopAccessesToScalars(
PreheaderLoad->setAAMetadata(AATags);
SSA.AddAvailableValue(Preheader, PreheaderLoad);
- if (MSSAU) {
- MemoryAccess *PreheaderLoadMemoryAccess = MSSAU->createMemoryAccessInBB(
- PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End);
- MemoryUse *NewMemUse = cast<MemoryUse>(PreheaderLoadMemoryAccess);
- MSSAU->insertUse(NewMemUse, /*RenameUses=*/true);
- }
+ MemoryAccess *PreheaderLoadMemoryAccess = MSSAU->createMemoryAccessInBB(
+ PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End);
+ MemoryUse *NewMemUse = cast<MemoryUse>(PreheaderLoadMemoryAccess);
+ MSSAU->insertUse(NewMemUse, /*RenameUses=*/true);
- if (MSSAU && VerifyMemorySSA)
+ if (VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
// Rewrite all the loads in the loop and remember all the definitions from
// stores in the loop.
Promoter.run(LoopUses);
- if (MSSAU && VerifyMemorySSA)
+ if (VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
// If the SSAUpdater didn't use the load in the preheader, just zap it now.
if (PreheaderLoad->use_empty())
- eraseInstruction(*PreheaderLoad, *SafetyInfo, CurAST, MSSAU);
+ eraseInstruction(*PreheaderLoad, *SafetyInfo, MSSAU);
return true;
}
@@ -2356,71 +2320,10 @@ collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L) {
return Result;
}
-/// Returns an owning pointer to an alias set which incorporates aliasing info
-/// from L and all subloops of L.
-std::unique_ptr<AliasSetTracker>
-LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI,
- AAResults *AA) {
- auto CurAST = std::make_unique<AliasSetTracker>(*AA);
-
- // Add everything from all the sub loops.
- for (Loop *InnerL : L->getSubLoops())
- for (BasicBlock *BB : InnerL->blocks())
- CurAST->add(*BB);
-
- // And merge in this loop (without anything from inner loops).
- for (BasicBlock *BB : L->blocks())
- if (LI->getLoopFor(BB) == L)
- CurAST->add(*BB);
-
- return CurAST;
-}
-
static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
AliasSetTracker *CurAST, Loop *CurLoop,
AAResults *AA) {
- // First check to see if any of the basic blocks in CurLoop invalidate *V.
- bool isInvalidatedAccordingToAST = CurAST->getAliasSetFor(MemLoc).isMod();
-
- if (!isInvalidatedAccordingToAST || !LICMN2Theshold)
- return isInvalidatedAccordingToAST;
-
- // Check with a diagnostic analysis if we can refine the information above.
- // This is to identify the limitations of using the AST.
- // The alias set mechanism used by LICM has a major weakness in that it
- // combines all things which may alias into a single set *before* asking
- // modref questions. As a result, a single readonly call within a loop will
- // collapse all loads and stores into a single alias set and report
- // invalidation if the loop contains any store. For example, readonly calls
- // with deopt states have this form and create a general alias set with all
- // loads and stores. In order to get any LICM in loops containing possible
- // deopt states we need a more precise invalidation of checking the mod ref
- // info of each instruction within the loop and LI. This has a complexity of
- // O(N^2), so currently, it is used only as a diagnostic tool since the
- // default value of LICMN2Threshold is zero.
-
- // Don't look at nested loops.
- if (CurLoop->begin() != CurLoop->end())
- return true;
-
- int N = 0;
- for (BasicBlock *BB : CurLoop->getBlocks())
- for (Instruction &I : *BB) {
- if (N >= LICMN2Theshold) {
- LLVM_DEBUG(dbgs() << "Alasing N2 threshold exhausted for "
- << *(MemLoc.Ptr) << "\n");
- return true;
- }
- N++;
- auto Res = AA->getModRefInfo(&I, MemLoc);
- if (isModSet(Res)) {
- LLVM_DEBUG(dbgs() << "Aliasing failed on " << I << " for "
- << *(MemLoc.Ptr) << "\n");
- return true;
- }
- }
- LLVM_DEBUG(dbgs() << "Aliasing okay for " << *(MemLoc.Ptr) << "\n");
- return false;
+ return CurAST->getAliasSetFor(MemLoc).isMod();
}
bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp
index 993b154dc9a8..d438d56e38ca 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/LoopBoundSplit.h"
+#include "llvm/ADT/Sequence.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -39,10 +40,12 @@ struct ConditionInfo {
ICmpInst::Predicate Pred;
/// AddRec llvm value
Value *AddRecValue;
+ /// Non PHI AddRec llvm value
+ Value *NonPHIAddRecValue;
/// Bound llvm value
Value *BoundValue;
/// AddRec SCEV
- const SCEV *AddRecSCEV;
+ const SCEVAddRecExpr *AddRecSCEV;
/// Bound SCEV
const SCEV *BoundSCEV;
@@ -54,19 +57,31 @@ struct ConditionInfo {
} // namespace
static void analyzeICmp(ScalarEvolution &SE, ICmpInst *ICmp,
- ConditionInfo &Cond) {
+ ConditionInfo &Cond, const Loop &L) {
Cond.ICmp = ICmp;
if (match(ICmp, m_ICmp(Cond.Pred, m_Value(Cond.AddRecValue),
m_Value(Cond.BoundValue)))) {
- Cond.AddRecSCEV = SE.getSCEV(Cond.AddRecValue);
- Cond.BoundSCEV = SE.getSCEV(Cond.BoundValue);
+ const SCEV *AddRecSCEV = SE.getSCEV(Cond.AddRecValue);
+ const SCEV *BoundSCEV = SE.getSCEV(Cond.BoundValue);
+ const SCEVAddRecExpr *LHSAddRecSCEV = dyn_cast<SCEVAddRecExpr>(AddRecSCEV);
+ const SCEVAddRecExpr *RHSAddRecSCEV = dyn_cast<SCEVAddRecExpr>(BoundSCEV);
// Locate AddRec in LHSSCEV and Bound in RHSSCEV.
- if (isa<SCEVAddRecExpr>(Cond.BoundSCEV) &&
- !isa<SCEVAddRecExpr>(Cond.AddRecSCEV)) {
+ if (!LHSAddRecSCEV && RHSAddRecSCEV) {
std::swap(Cond.AddRecValue, Cond.BoundValue);
- std::swap(Cond.AddRecSCEV, Cond.BoundSCEV);
+ std::swap(AddRecSCEV, BoundSCEV);
Cond.Pred = ICmpInst::getSwappedPredicate(Cond.Pred);
}
+
+ Cond.AddRecSCEV = dyn_cast<SCEVAddRecExpr>(AddRecSCEV);
+ Cond.BoundSCEV = BoundSCEV;
+ Cond.NonPHIAddRecValue = Cond.AddRecValue;
+
+ // If the Cond.AddRecValue is PHI node, update Cond.NonPHIAddRecValue with
+ // value from backedge.
+ if (Cond.AddRecSCEV && isa<PHINode>(Cond.AddRecValue)) {
+ PHINode *PN = cast<PHINode>(Cond.AddRecValue);
+ Cond.NonPHIAddRecValue = PN->getIncomingValueForBlock(L.getLoopLatch());
+ }
}
}
@@ -118,21 +133,20 @@ static bool calculateUpperBound(const Loop &L, ScalarEvolution &SE,
static bool hasProcessableCondition(const Loop &L, ScalarEvolution &SE,
ICmpInst *ICmp, ConditionInfo &Cond,
bool IsExitCond) {
- analyzeICmp(SE, ICmp, Cond);
+ analyzeICmp(SE, ICmp, Cond, L);
// The BoundSCEV should be evaluated at loop entry.
if (!SE.isAvailableAtLoopEntry(Cond.BoundSCEV, &L))
return false;
- const SCEVAddRecExpr *AddRecSCEV = dyn_cast<SCEVAddRecExpr>(Cond.AddRecSCEV);
// Allowed AddRec as induction variable.
- if (!AddRecSCEV)
+ if (!Cond.AddRecSCEV)
return false;
- if (!AddRecSCEV->isAffine())
+ if (!Cond.AddRecSCEV->isAffine())
return false;
- const SCEV *StepRecSCEV = AddRecSCEV->getStepRecurrence(SE);
+ const SCEV *StepRecSCEV = Cond.AddRecSCEV->getStepRecurrence(SE);
// Allowed constant step.
if (!isa<SCEVConstant>(StepRecSCEV))
return false;
@@ -264,6 +278,14 @@ static BranchInst *findSplitCandidate(const Loop &L, ScalarEvolution &SE,
SplitCandidateCond.BoundSCEV->getType())
continue;
+ // After transformation, we assume the split condition of the pre-loop is
+ // always true. In order to guarantee it, we need to check the start value
+ // of the split cond AddRec satisfies the split condition.
+ if (!SE.isLoopEntryGuardedByCond(&L, SplitCandidateCond.Pred,
+ SplitCandidateCond.AddRecSCEV->getStart(),
+ SplitCandidateCond.BoundSCEV))
+ continue;
+
SplitCandidateCond.BI = BI;
return BI;
}
@@ -341,13 +363,45 @@ static bool splitLoopBound(Loop &L, DominatorTree &DT, LoopInfo &LI,
".split", &LI, &DT, PostLoopBlocks);
remapInstructionsInBlocks(PostLoopBlocks, VMap);
- // Add conditional branch to check we can skip post-loop in its preheader.
BasicBlock *PostLoopPreHeader = PostLoop->getLoopPreheader();
- IRBuilder<> Builder(PostLoopPreHeader);
+ IRBuilder<> Builder(&PostLoopPreHeader->front());
+
+ // Update phi nodes in header of post-loop.
+ bool isExitingLatch =
+ (L.getExitingBlock() == L.getLoopLatch()) ? true : false;
+ Value *ExitingCondLCSSAPhi = nullptr;
+ for (PHINode &PN : L.getHeader()->phis()) {
+ // Create LCSSA phi node in preheader of post-loop.
+ PHINode *LCSSAPhi =
+ Builder.CreatePHI(PN.getType(), 1, PN.getName() + ".lcssa");
+ LCSSAPhi->setDebugLoc(PN.getDebugLoc());
+ // If the exiting block is loop latch, the phi does not have the update at
+ // last iteration. In this case, update lcssa phi with value from backedge.
+ LCSSAPhi->addIncoming(
+ isExitingLatch ? PN.getIncomingValueForBlock(L.getLoopLatch()) : &PN,
+ L.getExitingBlock());
+
+ // Update the start value of phi node in post-loop with the LCSSA phi node.
+ PHINode *PostLoopPN = cast<PHINode>(VMap[&PN]);
+ PostLoopPN->setIncomingValueForBlock(PostLoopPreHeader, LCSSAPhi);
+
+ // Find PHI with exiting condition from pre-loop. The PHI should be
+ // SCEVAddRecExpr and have same incoming value from backedge with
+ // ExitingCond.
+ if (!SE.isSCEVable(PN.getType()))
+ continue;
+
+ const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(&PN));
+ if (PhiSCEV && ExitingCond.NonPHIAddRecValue ==
+ PN.getIncomingValueForBlock(L.getLoopLatch()))
+ ExitingCondLCSSAPhi = LCSSAPhi;
+ }
+
+ // Add conditional branch to check we can skip post-loop in its preheader.
Instruction *OrigBI = PostLoopPreHeader->getTerminator();
ICmpInst::Predicate Pred = ICmpInst::ICMP_NE;
Value *Cond =
- Builder.CreateICmp(Pred, ExitingCond.AddRecValue, ExitingCond.BoundValue);
+ Builder.CreateICmp(Pred, ExitingCondLCSSAPhi, ExitingCond.BoundValue);
Builder.CreateCondBr(Cond, PostLoop->getHeader(), PostLoop->getExitBlock());
OrigBI->eraseFromParent();
@@ -368,21 +422,6 @@ static bool splitLoopBound(Loop &L, DominatorTree &DT, LoopInfo &LI,
// Replace exiting bound value of pre-loop NewBound.
ExitingCond.ICmp->setOperand(1, NewBoundValue);
- // Replace IV's start value of post-loop by NewBound.
- for (PHINode &PN : L.getHeader()->phis()) {
- // Find PHI with exiting condition from pre-loop.
- if (SE.isSCEVable(PN.getType()) && isa<SCEVAddRecExpr>(SE.getSCEV(&PN))) {
- for (Value *Op : PN.incoming_values()) {
- if (Op == ExitingCond.AddRecValue) {
- // Find cloned PHI for post-loop.
- PHINode *PostLoopPN = cast<PHINode>(VMap[&PN]);
- PostLoopPN->setIncomingValueForBlock(PostLoopPreHeader,
- NewBoundValue);
- }
- }
- }
- }
-
// Replace SplitCandidateCond.BI's condition of pre-loop by True.
LLVMContext &Context = PreHeader->getContext();
SplitCandidateCond.BI->setCondition(ConstantInt::getTrue(Context));
@@ -398,6 +437,30 @@ static bool splitLoopBound(Loop &L, DominatorTree &DT, LoopInfo &LI,
else
ExitingCond.BI->setSuccessor(1, PostLoopPreHeader);
+ // Update phi node in exit block of post-loop.
+ Builder.SetInsertPoint(&PostLoopPreHeader->front());
+ for (PHINode &PN : PostLoop->getExitBlock()->phis()) {
+ for (auto i : seq<int>(0, PN.getNumOperands())) {
+ // Check incoming block is pre-loop's exiting block.
+ if (PN.getIncomingBlock(i) == L.getExitingBlock()) {
+ Value *IncomingValue = PN.getIncomingValue(i);
+
+ // Create LCSSA phi node for incoming value.
+ PHINode *LCSSAPhi =
+ Builder.CreatePHI(PN.getType(), 1, PN.getName() + ".lcssa");
+ LCSSAPhi->setDebugLoc(PN.getDebugLoc());
+ LCSSAPhi->addIncoming(IncomingValue, PN.getIncomingBlock(i));
+
+ // Replace pre-loop's exiting block by post-loop's preheader.
+ PN.setIncomingBlock(i, PostLoopPreHeader);
+ // Replace incoming value by LCSSAPhi.
+ PN.setIncomingValue(i, LCSSAPhi);
+ // Add a new incoming value with post-loop's exiting block.
+ PN.addIncoming(VMap[IncomingValue], PostLoop->getExitingBlock());
+ }
+ }
+ }
+
// Update dominator tree.
DT.changeImmediateDominator(PostLoopPreHeader, L.getExitingBlock());
DT.changeImmediateDominator(PostLoop->getExitBlock(), PostLoopPreHeader);
@@ -406,10 +469,7 @@ static bool splitLoopBound(Loop &L, DominatorTree &DT, LoopInfo &LI,
SE.forgetLoop(&L);
// Canonicalize loops.
- // TODO: Try to update LCSSA information according to above change.
- formLCSSA(L, DT, &LI, &SE);
simplifyLoop(&L, &DT, &LI, &SE, nullptr, nullptr, true);
- formLCSSA(*PostLoop, DT, &LI, &SE);
simplifyLoop(PostLoop, &DT, &LI, &SE, nullptr, nullptr, true);
// Add new post-loop to loop pass manager.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
index a5d7835bd094..77d76609c926 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
@@ -29,6 +29,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
@@ -127,6 +128,8 @@ public:
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addPreserved<ScalarEvolutionWrapperPass>();
@@ -143,6 +146,7 @@ INITIALIZE_PASS_BEGIN(LoopDataPrefetchLegacyPass, "loop-data-prefetch",
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_END(LoopDataPrefetchLegacyPass, "loop-data-prefetch",
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
index f7e8442fae81..5814e2f043d5 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -36,6 +36,8 @@ using namespace llvm;
#define DEBUG_TYPE "loop-delete"
STATISTIC(NumDeleted, "Number of loops deleted");
+STATISTIC(NumBackedgesBroken,
+ "Number of loops for which we managed to break the backedge");
static cl::opt<bool> EnableSymbolicExecution(
"loop-deletion-enable-symbolic-execution", cl::Hidden, cl::init(true),
@@ -191,6 +193,20 @@ getValueOnFirstIteration(Value *V, DenseMap<Value *, Value *> &FirstIterValue,
Value *RHS =
getValueOnFirstIteration(BO->getOperand(1), FirstIterValue, SQ);
FirstIterV = SimplifyBinOp(BO->getOpcode(), LHS, RHS, SQ);
+ } else if (auto *Cmp = dyn_cast<ICmpInst>(V)) {
+ Value *LHS =
+ getValueOnFirstIteration(Cmp->getOperand(0), FirstIterValue, SQ);
+ Value *RHS =
+ getValueOnFirstIteration(Cmp->getOperand(1), FirstIterValue, SQ);
+ FirstIterV = SimplifyICmpInst(Cmp->getPredicate(), LHS, RHS, SQ);
+ } else if (auto *Select = dyn_cast<SelectInst>(V)) {
+ Value *Cond =
+ getValueOnFirstIteration(Select->getCondition(), FirstIterValue, SQ);
+ if (auto *C = dyn_cast<ConstantInt>(Cond)) {
+ auto *Selected = C->isAllOnesValue() ? Select->getTrueValue()
+ : Select->getFalseValue();
+ FirstIterV = getValueOnFirstIteration(Selected, FirstIterValue, SQ);
+ }
}
if (!FirstIterV)
FirstIterV = V;
@@ -314,22 +330,20 @@ static bool canProveExitOnFirstIteration(Loop *L, DominatorTree &DT,
}
using namespace PatternMatch;
- ICmpInst::Predicate Pred;
- Value *LHS, *RHS;
+ Value *Cond;
BasicBlock *IfTrue, *IfFalse;
auto *Term = BB->getTerminator();
- if (match(Term, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
+ if (match(Term, m_Br(m_Value(Cond),
m_BasicBlock(IfTrue), m_BasicBlock(IfFalse)))) {
- if (!LHS->getType()->isIntegerTy()) {
+ auto *ICmp = dyn_cast<ICmpInst>(Cond);
+ if (!ICmp || !ICmp->getType()->isIntegerTy()) {
MarkAllSuccessorsLive(BB);
continue;
}
// Can we prove constant true or false for this condition?
- LHS = getValueOnFirstIteration(LHS, FirstIterValue, SQ);
- RHS = getValueOnFirstIteration(RHS, FirstIterValue, SQ);
- auto *KnownCondition = SimplifyICmpInst(Pred, LHS, RHS, SQ);
- if (!KnownCondition) {
+ auto *KnownCondition = getValueOnFirstIteration(ICmp, FirstIterValue, SQ);
+ if (KnownCondition == ICmp) {
// Failed to simplify.
MarkAllSuccessorsLive(BB);
continue;
@@ -393,14 +407,25 @@ breakBackedgeIfNotTaken(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
if (!L->getLoopLatch())
return LoopDeletionResult::Unmodified;
- auto *BTC = SE.getBackedgeTakenCount(L);
- if (!isa<SCEVCouldNotCompute>(BTC) && SE.isKnownNonZero(BTC))
- return LoopDeletionResult::Unmodified;
- if (!BTC->isZero() && !canProveExitOnFirstIteration(L, DT, LI))
- return LoopDeletionResult::Unmodified;
+ auto *BTC = SE.getSymbolicMaxBackedgeTakenCount(L);
+ if (BTC->isZero()) {
+ // SCEV knows this backedge isn't taken!
+ breakLoopBackedge(L, DT, SE, LI, MSSA);
+ ++NumBackedgesBroken;
+ return LoopDeletionResult::Deleted;
+ }
- breakLoopBackedge(L, DT, SE, LI, MSSA);
- return LoopDeletionResult::Deleted;
+ // If SCEV leaves open the possibility of a zero trip count, see if
+ // symbolically evaluating the first iteration lets us prove the backedge
+ // unreachable.
+ if (isa<SCEVCouldNotCompute>(BTC) || !SE.isKnownNonZero(BTC))
+ if (canProveExitOnFirstIteration(L, DT, LI)) {
+ breakLoopBackedge(L, DT, SE, LI, MSSA);
+ ++NumBackedgesBroken;
+ return LoopDeletionResult::Deleted;
+ }
+
+ return LoopDeletionResult::Unmodified;
}
/// Remove a loop if it is dead.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDistribute.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
index bac3dc0f3fb9..0f4c767c1e4c 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
@@ -1057,8 +1057,8 @@ PreservedAnalyses LoopDistributePass::run(Function &F,
auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
std::function<const LoopAccessInfo &(Loop &)> GetLAA =
[&](Loop &L) -> const LoopAccessInfo & {
- LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE,
- TLI, TTI, nullptr, nullptr};
+ LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE,
+ TLI, TTI, nullptr, nullptr, nullptr};
return LAM.getResult<LoopAccessAnalysis>(L, AR);
};
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFlatten.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
index f54289f85ef5..965d1575518e 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
@@ -27,6 +27,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/LoopFlatten.h"
+
+#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
@@ -49,11 +51,13 @@
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
-#define DEBUG_TYPE "loop-flatten"
-
using namespace llvm;
using namespace llvm::PatternMatch;
+#define DEBUG_TYPE "loop-flatten"
+
+STATISTIC(NumFlattened, "Number of loops flattened");
+
static cl::opt<unsigned> RepeatedInstructionThreshold(
"loop-flatten-cost-threshold", cl::Hidden, cl::init(2),
cl::desc("Limit on the cost of instructions that can be repeated due to "
@@ -90,9 +94,33 @@ struct FlattenInfo {
// Whether this holds the flatten info before or after widening.
bool Widened = false;
+ // Holds the old/narrow induction phis, i.e. the Phis before IV widening has
+ // been applied. This bookkeeping is used so we can skip some checks on these
+ // phi nodes.
+ PHINode *NarrowInnerInductionPHI = nullptr;
+ PHINode *NarrowOuterInductionPHI = nullptr;
+
FlattenInfo(Loop *OL, Loop *IL) : OuterLoop(OL), InnerLoop(IL) {};
+
+ bool isNarrowInductionPhi(PHINode *Phi) {
+ // This can't be the narrow phi if we haven't widened the IV first.
+ if (!Widened)
+ return false;
+ return NarrowInnerInductionPHI == Phi || NarrowOuterInductionPHI == Phi;
+ }
};
+static bool
+setLoopComponents(Value *&TC, Value *&TripCount, BinaryOperator *&Increment,
+ SmallPtrSetImpl<Instruction *> &IterationInstructions) {
+ TripCount = TC;
+ IterationInstructions.insert(Increment);
+ LLVM_DEBUG(dbgs() << "Found Increment: "; Increment->dump());
+ LLVM_DEBUG(dbgs() << "Found trip count: "; TripCount->dump());
+ LLVM_DEBUG(dbgs() << "Successfully found all loop components\n");
+ return true;
+}
+
// Finds the induction variable, increment and trip count for a simple loop that
// we can flatten.
static bool findLoopComponents(
@@ -164,36 +192,68 @@ static bool findLoopComponents(
return false;
}
// The trip count is the RHS of the compare. If this doesn't match the trip
- // count computed by SCEV then this is either because the trip count variable
- // has been widened (then leave the trip count as it is), or because it is a
- // constant and another transformation has changed the compare, e.g.
- // icmp ult %inc, tripcount -> icmp ult %j, tripcount-1, then we don't flatten
- // the loop (yet).
- TripCount = Compare->getOperand(1);
+ // count computed by SCEV then this is because the trip count variable
+ // has been widened so the types don't match, or because it is a constant and
+ // another transformation has changed the compare (e.g. icmp ult %inc,
+ // tripcount -> icmp ult %j, tripcount-1), or both.
+ Value *RHS = Compare->getOperand(1);
+ const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
+ if (isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
+ LLVM_DEBUG(dbgs() << "Backedge-taken count is not predictable\n");
+ return false;
+ }
+ // The use of the Extend=false flag on getTripCountFromExitCount was added
+ // during a refactoring to preserve existing behavior. However, there's
+ // nothing obvious in the surrounding code when handles the overflow case.
+ // FIXME: audit code to establish whether there's a latent bug here.
const SCEV *SCEVTripCount =
- SE->getTripCountFromExitCount(SE->getBackedgeTakenCount(L));
- if (SE->getSCEV(TripCount) != SCEVTripCount) {
- if (!IsWidened) {
- LLVM_DEBUG(dbgs() << "Could not find valid trip count\n");
- return false;
- }
- auto TripCountInst = dyn_cast<Instruction>(TripCount);
- if (!TripCountInst) {
- LLVM_DEBUG(dbgs() << "Could not find valid extended trip count\n");
- return false;
+ SE->getTripCountFromExitCount(BackedgeTakenCount, false);
+ const SCEV *SCEVRHS = SE->getSCEV(RHS);
+ if (SCEVRHS == SCEVTripCount)
+ return setLoopComponents(RHS, TripCount, Increment, IterationInstructions);
+ ConstantInt *ConstantRHS = dyn_cast<ConstantInt>(RHS);
+ if (ConstantRHS) {
+ const SCEV *BackedgeTCExt = nullptr;
+ if (IsWidened) {
+ const SCEV *SCEVTripCountExt;
+ // Find the extended backedge taken count and extended trip count using
+ // SCEV. One of these should now match the RHS of the compare.
+ BackedgeTCExt = SE->getZeroExtendExpr(BackedgeTakenCount, RHS->getType());
+ SCEVTripCountExt = SE->getTripCountFromExitCount(BackedgeTCExt, false);
+ if (SCEVRHS != BackedgeTCExt && SCEVRHS != SCEVTripCountExt) {
+ LLVM_DEBUG(dbgs() << "Could not find valid trip count\n");
+ return false;
+ }
}
- if ((!isa<ZExtInst>(TripCountInst) && !isa<SExtInst>(TripCountInst)) ||
- SE->getSCEV(TripCountInst->getOperand(0)) != SCEVTripCount) {
- LLVM_DEBUG(dbgs() << "Could not find valid extended trip count\n");
- return false;
+ // If the RHS of the compare is equal to the backedge taken count we need
+ // to add one to get the trip count.
+ if (SCEVRHS == BackedgeTCExt || SCEVRHS == BackedgeTakenCount) {
+ ConstantInt *One = ConstantInt::get(ConstantRHS->getType(), 1);
+ Value *NewRHS = ConstantInt::get(
+ ConstantRHS->getContext(), ConstantRHS->getValue() + One->getValue());
+ return setLoopComponents(NewRHS, TripCount, Increment,
+ IterationInstructions);
}
+ return setLoopComponents(RHS, TripCount, Increment, IterationInstructions);
}
- IterationInstructions.insert(Increment);
- LLVM_DEBUG(dbgs() << "Found increment: "; Increment->dump());
- LLVM_DEBUG(dbgs() << "Found trip count: "; TripCount->dump());
-
- LLVM_DEBUG(dbgs() << "Successfully found all loop components\n");
- return true;
+ // If the RHS isn't a constant then check that the reason it doesn't match
+ // the SCEV trip count is because the RHS is a ZExt or SExt instruction
+ // (and take the trip count to be the RHS).
+ if (!IsWidened) {
+ LLVM_DEBUG(dbgs() << "Could not find valid trip count\n");
+ return false;
+ }
+ auto *TripCountInst = dyn_cast<Instruction>(RHS);
+ if (!TripCountInst) {
+ LLVM_DEBUG(dbgs() << "Could not find valid trip count\n");
+ return false;
+ }
+ if ((!isa<ZExtInst>(TripCountInst) && !isa<SExtInst>(TripCountInst)) ||
+ SE->getSCEV(TripCountInst->getOperand(0)) != SCEVTripCount) {
+ LLVM_DEBUG(dbgs() << "Could not find valid extended trip count\n");
+ return false;
+ }
+ return setLoopComponents(RHS, TripCount, Increment, IterationInstructions);
}
static bool checkPHIs(FlattenInfo &FI, const TargetTransformInfo *TTI) {
@@ -221,6 +281,8 @@ static bool checkPHIs(FlattenInfo &FI, const TargetTransformInfo *TTI) {
// them specially when doing the transformation.
if (&InnerPHI == FI.InnerInductionPHI)
continue;
+ if (FI.isNarrowInductionPhi(&InnerPHI))
+ continue;
// Each inner loop PHI node must have two incoming values/blocks - one
// from the pre-header, and one from the latch.
@@ -266,6 +328,8 @@ static bool checkPHIs(FlattenInfo &FI, const TargetTransformInfo *TTI) {
}
for (PHINode &OuterPHI : FI.OuterLoop->getHeader()->phis()) {
+ if (FI.isNarrowInductionPhi(&OuterPHI))
+ continue;
if (!SafeOuterPHIs.count(&OuterPHI)) {
LLVM_DEBUG(dbgs() << "found unsafe PHI in outer loop: "; OuterPHI.dump());
return false;
@@ -356,18 +420,25 @@ static bool checkIVUsers(FlattenInfo &FI) {
if (U == FI.InnerIncrement)
continue;
- // After widening the IVs, a trunc instruction might have been introduced, so
- // look through truncs.
+ // After widening the IVs, a trunc instruction might have been introduced,
+ // so look through truncs.
if (isa<TruncInst>(U)) {
if (!U->hasOneUse())
return false;
U = *U->user_begin();
}
+ // If the use is in the compare (which is also the condition of the inner
+ // branch) then the compare has been altered by another transformation e.g
+ // icmp ult %inc, tripcount -> icmp ult %j, tripcount-1, where tripcount is
+ // a constant. Ignore this use as the compare gets removed later anyway.
+ if (U == FI.InnerBranch->getCondition())
+ continue;
+
LLVM_DEBUG(dbgs() << "Found use of inner induction variable: "; U->dump());
- Value *MatchedMul;
- Value *MatchedItCount;
+ Value *MatchedMul = nullptr;
+ Value *MatchedItCount = nullptr;
bool IsAdd = match(U, m_c_Add(m_Specific(FI.InnerInductionPHI),
m_Value(MatchedMul))) &&
match(MatchedMul, m_c_Mul(m_Specific(FI.OuterInductionPHI),
@@ -375,11 +446,23 @@ static bool checkIVUsers(FlattenInfo &FI) {
// Matches the same pattern as above, except it also looks for truncs
// on the phi, which can be the result of widening the induction variables.
- bool IsAddTrunc = match(U, m_c_Add(m_Trunc(m_Specific(FI.InnerInductionPHI)),
- m_Value(MatchedMul))) &&
- match(MatchedMul,
- m_c_Mul(m_Trunc(m_Specific(FI.OuterInductionPHI)),
- m_Value(MatchedItCount)));
+ bool IsAddTrunc =
+ match(U, m_c_Add(m_Trunc(m_Specific(FI.InnerInductionPHI)),
+ m_Value(MatchedMul))) &&
+ match(MatchedMul, m_c_Mul(m_Trunc(m_Specific(FI.OuterInductionPHI)),
+ m_Value(MatchedItCount)));
+
+ if (!MatchedItCount)
+ return false;
+ // Look through extends if the IV has been widened.
+ if (FI.Widened &&
+ (isa<SExtInst>(MatchedItCount) || isa<ZExtInst>(MatchedItCount))) {
+ assert(MatchedItCount->getType() == FI.InnerInductionPHI->getType() &&
+ "Unexpected type mismatch in types after widening");
+ MatchedItCount = isa<SExtInst>(MatchedItCount)
+ ? dyn_cast<SExtInst>(MatchedItCount)->getOperand(0)
+ : dyn_cast<ZExtInst>(MatchedItCount)->getOperand(0);
+ }
if ((IsAdd || IsAddTrunc) && MatchedItCount == InnerTripCount) {
LLVM_DEBUG(dbgs() << "Use is optimisable\n");
@@ -451,17 +534,27 @@ static OverflowResult checkOverflow(FlattenInfo &FI, DominatorTree *DT,
for (Value *V : FI.LinearIVUses) {
for (Value *U : V->users()) {
if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
- // The IV is used as the operand of a GEP, and the IV is at least as
- // wide as the address space of the GEP. In this case, the GEP would
- // wrap around the address space before the IV increment wraps, which
- // would be UB.
- if (GEP->isInBounds() &&
- V->getType()->getIntegerBitWidth() >=
- DL.getPointerTypeSizeInBits(GEP->getType())) {
- LLVM_DEBUG(
- dbgs() << "use of linear IV would be UB if overflow occurred: ";
- GEP->dump());
- return OverflowResult::NeverOverflows;
+ for (Value *GEPUser : U->users()) {
+ Instruction *GEPUserInst = dyn_cast<Instruction>(GEPUser);
+ if (!isa<LoadInst>(GEPUserInst) &&
+ !(isa<StoreInst>(GEPUserInst) &&
+ GEP == GEPUserInst->getOperand(1)))
+ continue;
+ if (!isGuaranteedToExecuteForEveryIteration(GEPUserInst,
+ FI.InnerLoop))
+ continue;
+ // The IV is used as the operand of a GEP which dominates the loop
+ // latch, and the IV is at least as wide as the address space of the
+ // GEP. In this case, the GEP would wrap around the address space
+ // before the IV increment wraps, which would be UB.
+ if (GEP->isInBounds() &&
+ V->getType()->getIntegerBitWidth() >=
+ DL.getPointerTypeSizeInBits(GEP->getType())) {
+ LLVM_DEBUG(
+ dbgs() << "use of linear IV would be UB if overflow occurred: ";
+ GEP->dump());
+ return OverflowResult::NeverOverflows;
+ }
}
}
}
@@ -518,7 +611,7 @@ static bool CanFlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
static bool DoFlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
ScalarEvolution *SE, AssumptionCache *AC,
- const TargetTransformInfo *TTI) {
+ const TargetTransformInfo *TTI, LPMUpdater *U) {
Function *F = FI.OuterLoop->getHeader()->getParent();
LLVM_DEBUG(dbgs() << "Checks all passed, doing the transformation\n");
{
@@ -574,7 +667,13 @@ static bool DoFlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
// deleted, and any information that have about the outer loop invalidated.
SE->forgetLoop(FI.OuterLoop);
SE->forgetLoop(FI.InnerLoop);
+ if (U)
+ U->markLoopAsDeleted(*FI.InnerLoop, FI.InnerLoop->getName());
LI->erase(FI.InnerLoop);
+
+ // Increment statistic value.
+ NumFlattened++;
+
return true;
}
@@ -605,14 +704,11 @@ static bool CanWidenIV(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
}
SCEVExpander Rewriter(*SE, DL, "loopflatten");
- SmallVector<WideIVInfo, 2> WideIVs;
SmallVector<WeakTrackingVH, 4> DeadInsts;
- WideIVs.push_back( {FI.InnerInductionPHI, MaxLegalType, false });
- WideIVs.push_back( {FI.OuterInductionPHI, MaxLegalType, false });
unsigned ElimExt = 0;
unsigned Widened = 0;
- for (const auto &WideIV : WideIVs) {
+ auto CreateWideIV = [&] (WideIVInfo WideIV, bool &Deleted) -> bool {
PHINode *WidePhi = createWideIV(WideIV, LI, SE, Rewriter, DT, DeadInsts,
ElimExt, Widened, true /* HasGuards */,
true /* UsePostIncrementRanges */);
@@ -620,17 +716,35 @@ static bool CanWidenIV(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
return false;
LLVM_DEBUG(dbgs() << "Created wide phi: "; WidePhi->dump());
LLVM_DEBUG(dbgs() << "Deleting old phi: "; WideIV.NarrowIV->dump());
- RecursivelyDeleteDeadPHINode(WideIV.NarrowIV);
- }
- // After widening, rediscover all the loop components.
+ Deleted = RecursivelyDeleteDeadPHINode(WideIV.NarrowIV);
+ return true;
+ };
+
+ bool Deleted;
+ if (!CreateWideIV({FI.InnerInductionPHI, MaxLegalType, false }, Deleted))
+ return false;
+ // Add the narrow phi to list, so that it will be adjusted later when the
+ // the transformation is performed.
+ if (!Deleted)
+ FI.InnerPHIsToTransform.insert(FI.InnerInductionPHI);
+
+ if (!CreateWideIV({FI.OuterInductionPHI, MaxLegalType, false }, Deleted))
+ return false;
+
assert(Widened && "Widened IV expected");
FI.Widened = true;
+
+ // Save the old/narrow induction phis, which we need to ignore in CheckPHIs.
+ FI.NarrowInnerInductionPHI = FI.InnerInductionPHI;
+ FI.NarrowOuterInductionPHI = FI.OuterInductionPHI;
+
+ // After widening, rediscover all the loop components.
return CanFlattenLoopPair(FI, DT, LI, SE, AC, TTI);
}
static bool FlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
ScalarEvolution *SE, AssumptionCache *AC,
- const TargetTransformInfo *TTI) {
+ const TargetTransformInfo *TTI, LPMUpdater *U) {
LLVM_DEBUG(
dbgs() << "Loop flattening running on outer loop "
<< FI.OuterLoop->getHeader()->getName() << " and inner loop "
@@ -641,12 +755,30 @@ static bool FlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
return false;
// Check if we can widen the induction variables to avoid overflow checks.
- if (CanWidenIV(FI, DT, LI, SE, AC, TTI))
- return DoFlattenLoopPair(FI, DT, LI, SE, AC, TTI);
-
- // Check if the new iteration variable might overflow. In this case, we
- // need to version the loop, and select the original version at runtime if
- // the iteration space is too large.
+ bool CanFlatten = CanWidenIV(FI, DT, LI, SE, AC, TTI);
+
+ // It can happen that after widening of the IV, flattening may not be
+ // possible/happening, e.g. when it is deemed unprofitable. So bail here if
+ // that is the case.
+ // TODO: IV widening without performing the actual flattening transformation
+ // is not ideal. While this codegen change should not matter much, it is an
+ // unnecessary change which is better to avoid. It's unlikely this happens
+ // often, because if it's unprofitibale after widening, it should be
+ // unprofitabe before widening as checked in the first round of checks. But
+ // 'RepeatedInstructionThreshold' is set to only 2, which can probably be
+ // relaxed. Because this is making a code change (the IV widening, but not
+ // the flattening), we return true here.
+ if (FI.Widened && !CanFlatten)
+ return true;
+
+ // If we have widened and can perform the transformation, do that here.
+ if (CanFlatten)
+ return DoFlattenLoopPair(FI, DT, LI, SE, AC, TTI, U);
+
+ // Otherwise, if we haven't widened the IV, check if the new iteration
+ // variable might overflow. In this case, we need to version the loop, and
+ // select the original version at runtime if the iteration space is too
+ // large.
// TODO: We currently don't version the loop.
OverflowResult OR = checkOverflow(FI, DT, AC);
if (OR == OverflowResult::AlwaysOverflowsHigh ||
@@ -659,18 +791,18 @@ static bool FlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
}
LLVM_DEBUG(dbgs() << "Multiply cannot overflow, modifying loop in-place\n");
- return DoFlattenLoopPair(FI, DT, LI, SE, AC, TTI);
+ return DoFlattenLoopPair(FI, DT, LI, SE, AC, TTI, U);
}
bool Flatten(LoopNest &LN, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE,
- AssumptionCache *AC, TargetTransformInfo *TTI) {
+ AssumptionCache *AC, TargetTransformInfo *TTI, LPMUpdater *U) {
bool Changed = false;
for (Loop *InnerLoop : LN.getLoops()) {
auto *OuterLoop = InnerLoop->getParentLoop();
if (!OuterLoop)
continue;
FlattenInfo FI(OuterLoop, InnerLoop);
- Changed |= FlattenLoopPair(FI, DT, LI, SE, AC, TTI);
+ Changed |= FlattenLoopPair(FI, DT, LI, SE, AC, TTI, U);
}
return Changed;
}
@@ -685,12 +817,12 @@ PreservedAnalyses LoopFlattenPass::run(LoopNest &LN, LoopAnalysisManager &LAM,
// in simplified form, and also needs LCSSA. Running
// this pass will simplify all loops that contain inner loops,
// regardless of whether anything ends up being flattened.
- Changed |= Flatten(LN, &AR.DT, &AR.LI, &AR.SE, &AR.AC, &AR.TTI);
+ Changed |= Flatten(LN, &AR.DT, &AR.LI, &AR.SE, &AR.AC, &AR.TTI, &U);
if (!Changed)
return PreservedAnalyses::all();
- return PreservedAnalyses::none();
+ return getLoopPassPreservedAnalyses();
}
namespace {
@@ -735,7 +867,7 @@ bool LoopFlattenLegacyPass::runOnFunction(Function &F) {
bool Changed = false;
for (Loop *L : *LI) {
auto LN = LoopNest::getLoopNest(*L, *SE);
- Changed |= Flatten(*LN, DT, LI, SE, AC, TTI);
+ Changed |= Flatten(*LN, DT, LI, SE, AC, TTI, nullptr);
}
return Changed;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index a153f393448c..42da86a9ecf5 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -217,15 +217,15 @@ private:
bool processLoopMemCpy(MemCpyInst *MCI, const SCEV *BECount);
bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
- bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
+ bool processLoopStridedStore(Value *DestPtr, const SCEV *StoreSizeSCEV,
MaybeAlign StoreAlignment, Value *StoredVal,
Instruction *TheStore,
SmallPtrSetImpl<Instruction *> &Stores,
const SCEVAddRecExpr *Ev, const SCEV *BECount,
- bool NegStride, bool IsLoopMemset = false);
+ bool IsNegStride, bool IsLoopMemset = false);
bool processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *BECount);
bool processLoopStoreOfLoopLoad(Value *DestPtr, Value *SourcePtr,
- unsigned StoreSize, MaybeAlign StoreAlign,
+ const SCEV *StoreSize, MaybeAlign StoreAlign,
MaybeAlign LoadAlign, Instruction *TheStore,
Instruction *TheLoad,
const SCEVAddRecExpr *StoreEv,
@@ -625,8 +625,8 @@ bool LoopIdiomRecognize::runOnLoopBlock(
// We can only promote stores in this block if they are unconditionally
// executed in the loop. For a block to be unconditionally executed, it has
// to dominate all the exit blocks of the loop. Verify this now.
- for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
- if (!DT->dominates(BB, ExitBlocks[i]))
+ for (BasicBlock *ExitBlock : ExitBlocks)
+ if (!DT->dominates(BB, ExitBlock))
return false;
bool MadeChange = false;
@@ -750,16 +750,13 @@ bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
bool Changed = false;
// For stores that start but don't end a link in the chain:
- for (SetVector<StoreInst *>::iterator it = Heads.begin(), e = Heads.end();
- it != e; ++it) {
- if (Tails.count(*it))
+ for (StoreInst *I : Heads) {
+ if (Tails.count(I))
continue;
// We found a store instr that starts a chain. Now follow the chain and try
// to transform it.
SmallPtrSet<Instruction *, 8> AdjacentStores;
- StoreInst *I = *it;
-
StoreInst *HeadStore = I;
unsigned StoreSize = 0;
@@ -784,12 +781,14 @@ bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
if (StoreSize != Stride && StoreSize != -Stride)
continue;
- bool NegStride = StoreSize == -Stride;
+ bool IsNegStride = StoreSize == -Stride;
- if (processLoopStridedStore(StorePtr, StoreSize,
+ Type *IntIdxTy = DL->getIndexType(StorePtr->getType());
+ const SCEV *StoreSizeSCEV = SE->getConstant(IntIdxTy, StoreSize);
+ if (processLoopStridedStore(StorePtr, StoreSizeSCEV,
MaybeAlign(HeadStore->getAlignment()),
StoredVal, HeadStore, AdjacentStores, StoreEv,
- BECount, NegStride)) {
+ BECount, IsNegStride)) {
TransformedStores.insert(AdjacentStores.begin(), AdjacentStores.end());
Changed = true;
}
@@ -857,15 +856,15 @@ bool LoopIdiomRecognize::processLoopMemCpy(MemCpyInst *MCI,
// Check if the stride matches the size of the memcpy. If so, then we know
// that every byte is touched in the loop.
- const SCEVConstant *StoreStride =
+ const SCEVConstant *ConstStoreStride =
dyn_cast<SCEVConstant>(StoreEv->getOperand(1));
- const SCEVConstant *LoadStride =
+ const SCEVConstant *ConstLoadStride =
dyn_cast<SCEVConstant>(LoadEv->getOperand(1));
- if (!StoreStride || !LoadStride)
+ if (!ConstStoreStride || !ConstLoadStride)
return false;
- APInt StoreStrideValue = StoreStride->getAPInt();
- APInt LoadStrideValue = LoadStride->getAPInt();
+ APInt StoreStrideValue = ConstStoreStride->getAPInt();
+ APInt LoadStrideValue = ConstLoadStride->getAPInt();
// Huge stride value - give up
if (StoreStrideValue.getBitWidth() > 64 || LoadStrideValue.getBitWidth() > 64)
return false;
@@ -875,7 +874,7 @@ bool LoopIdiomRecognize::processLoopMemCpy(MemCpyInst *MCI,
return OptimizationRemarkMissed(DEBUG_TYPE, "SizeStrideUnequal", MCI)
<< ore::NV("Inst", "memcpy") << " in "
<< ore::NV("Function", MCI->getFunction())
- << " function will not be hoised: "
+ << " function will not be hoisted: "
<< ore::NV("Reason", "memcpy size is not equal to stride");
});
return false;
@@ -887,16 +886,17 @@ bool LoopIdiomRecognize::processLoopMemCpy(MemCpyInst *MCI,
if (StoreStrideInt != LoadStrideInt)
return false;
- return processLoopStoreOfLoopLoad(Dest, Source, (unsigned)SizeInBytes,
- MCI->getDestAlign(), MCI->getSourceAlign(),
- MCI, MCI, StoreEv, LoadEv, BECount);
+ return processLoopStoreOfLoopLoad(
+ Dest, Source, SE->getConstant(Dest->getType(), SizeInBytes),
+ MCI->getDestAlign(), MCI->getSourceAlign(), MCI, MCI, StoreEv, LoadEv,
+ BECount);
}
/// processLoopMemSet - See if this memset can be promoted to a large memset.
bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
const SCEV *BECount) {
- // We can only handle non-volatile memsets with a constant size.
- if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength()))
+ // We can only handle non-volatile memsets.
+ if (MSI->isVolatile())
return false;
// If we're not allowed to hack on memset, we fail.
@@ -909,23 +909,72 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
// loop, which indicates a strided store. If we have something else, it's a
// random store we can't handle.
const SCEVAddRecExpr *Ev = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Pointer));
- if (!Ev || Ev->getLoop() != CurLoop || !Ev->isAffine())
+ if (!Ev || Ev->getLoop() != CurLoop)
return false;
-
- // Reject memsets that are so large that they overflow an unsigned.
- uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
- if ((SizeInBytes >> 32) != 0)
+ if (!Ev->isAffine()) {
+ LLVM_DEBUG(dbgs() << " Pointer is not affine, abort\n");
return false;
+ }
- // Check to see if the stride matches the size of the memset. If so, then we
- // know that every byte is touched in the loop.
- const SCEVConstant *ConstStride = dyn_cast<SCEVConstant>(Ev->getOperand(1));
- if (!ConstStride)
+ const SCEV *PointerStrideSCEV = Ev->getOperand(1);
+ const SCEV *MemsetSizeSCEV = SE->getSCEV(MSI->getLength());
+ if (!PointerStrideSCEV || !MemsetSizeSCEV)
return false;
- APInt Stride = ConstStride->getAPInt();
- if (SizeInBytes != Stride && SizeInBytes != -Stride)
- return false;
+ bool IsNegStride = false;
+ const bool IsConstantSize = isa<ConstantInt>(MSI->getLength());
+
+ if (IsConstantSize) {
+ // Memset size is constant.
+ // Check if the pointer stride matches the memset size. If so, then
+ // we know that every byte is touched in the loop.
+ LLVM_DEBUG(dbgs() << " memset size is constant\n");
+ uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
+ const SCEVConstant *ConstStride = dyn_cast<SCEVConstant>(Ev->getOperand(1));
+ if (!ConstStride)
+ return false;
+
+ APInt Stride = ConstStride->getAPInt();
+ if (SizeInBytes != Stride && SizeInBytes != -Stride)
+ return false;
+
+ IsNegStride = SizeInBytes == -Stride;
+ } else {
+ // Memset size is non-constant.
+ // Check if the pointer stride matches the memset size.
+ // To be conservative, the pass would not promote pointers that aren't in
+ // address space zero. Also, the pass only handles memset length and stride
+ // that are invariant for the top level loop.
+ LLVM_DEBUG(dbgs() << " memset size is non-constant\n");
+ if (Pointer->getType()->getPointerAddressSpace() != 0) {
+ LLVM_DEBUG(dbgs() << " pointer is not in address space zero, "
+ << "abort\n");
+ return false;
+ }
+ if (!SE->isLoopInvariant(MemsetSizeSCEV, CurLoop)) {
+ LLVM_DEBUG(dbgs() << " memset size is not a loop-invariant, "
+ << "abort\n");
+ return false;
+ }
+
+ // Compare positive direction PointerStrideSCEV with MemsetSizeSCEV
+ IsNegStride = PointerStrideSCEV->isNonConstantNegative();
+ const SCEV *PositiveStrideSCEV =
+ IsNegStride ? SE->getNegativeSCEV(PointerStrideSCEV)
+ : PointerStrideSCEV;
+ LLVM_DEBUG(dbgs() << " MemsetSizeSCEV: " << *MemsetSizeSCEV << "\n"
+ << " PositiveStrideSCEV: " << *PositiveStrideSCEV
+ << "\n");
+
+ if (PositiveStrideSCEV != MemsetSizeSCEV) {
+ // TODO: folding can be done to the SCEVs
+ // The folding is to fold expressions that is covered by the loop guard
+ // at loop entry. After the folding, compare again and proceed
+ // optimization if equal.
+ LLVM_DEBUG(dbgs() << " SCEV don't match, abort\n");
+ return false;
+ }
+ }
// Verify that the memset value is loop invariant. If not, we can't promote
// the memset.
@@ -935,10 +984,10 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
SmallPtrSet<Instruction *, 1> MSIs;
MSIs.insert(MSI);
- bool NegStride = SizeInBytes == -Stride;
- return processLoopStridedStore(
- Pointer, (unsigned)SizeInBytes, MaybeAlign(MSI->getDestAlignment()),
- SplatValue, MSI, MSIs, Ev, BECount, NegStride, /*IsLoopMemset=*/true);
+ return processLoopStridedStore(Pointer, SE->getSCEV(MSI->getLength()),
+ MaybeAlign(MSI->getDestAlignment()),
+ SplatValue, MSI, MSIs, Ev, BECount,
+ IsNegStride, /*IsLoopMemset=*/true);
}
/// mayLoopAccessLocation - Return true if the specified loop might access the
@@ -946,9 +995,9 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
/// argument specifies what the verboten forms of access are (read or write).
static bool
mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
- const SCEV *BECount, unsigned StoreSize,
+ const SCEV *BECount, const SCEV *StoreSizeSCEV,
AliasAnalysis &AA,
- SmallPtrSetImpl<Instruction *> &IgnoredStores) {
+ SmallPtrSetImpl<Instruction *> &IgnoredInsts) {
// Get the location that may be stored across the loop. Since the access is
// strided positively through memory, we say that the modified location starts
// at the pointer and has infinite size.
@@ -956,9 +1005,11 @@ mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
// If the loop iterates a fixed number of times, we can refine the access size
// to be exactly the size of the memset, which is (BECount+1)*StoreSize
- if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
+ const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount);
+ const SCEVConstant *ConstSize = dyn_cast<SCEVConstant>(StoreSizeSCEV);
+ if (BECst && ConstSize)
AccessSize = LocationSize::precise((BECst->getValue()->getZExtValue() + 1) *
- StoreSize);
+ ConstSize->getValue()->getZExtValue());
// TODO: For this to be really effective, we have to dive into the pointer
// operand in the store. Store to &A[i] of 100 will always return may alias
@@ -966,14 +1017,12 @@ mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
// which will then no-alias a store to &A[100].
MemoryLocation StoreLoc(Ptr, AccessSize);
- for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
- ++BI)
- for (Instruction &I : **BI)
- if (IgnoredStores.count(&I) == 0 &&
+ for (BasicBlock *B : L->blocks())
+ for (Instruction &I : *B)
+ if (!IgnoredInsts.contains(&I) &&
isModOrRefSet(
intersectModRef(AA.getModRefInfo(&I, StoreLoc), Access)))
return true;
-
return false;
}
@@ -981,57 +1030,67 @@ mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
// we're trying to memset. Therefore, we need to recompute the base pointer,
// which is just Start - BECount*Size.
static const SCEV *getStartForNegStride(const SCEV *Start, const SCEV *BECount,
- Type *IntPtr, unsigned StoreSize,
+ Type *IntPtr, const SCEV *StoreSizeSCEV,
ScalarEvolution *SE) {
const SCEV *Index = SE->getTruncateOrZeroExtend(BECount, IntPtr);
- if (StoreSize != 1)
- Index = SE->getMulExpr(Index, SE->getConstant(IntPtr, StoreSize),
+ if (!StoreSizeSCEV->isOne()) {
+ // index = back edge count * store size
+ Index = SE->getMulExpr(Index,
+ SE->getTruncateOrZeroExtend(StoreSizeSCEV, IntPtr),
SCEV::FlagNUW);
+ }
+ // base pointer = start - index * store size
return SE->getMinusSCEV(Start, Index);
}
-/// Compute the number of bytes as a SCEV from the backedge taken count.
-///
-/// This also maps the SCEV into the provided type and tries to handle the
-/// computation in a way that will fold cleanly.
-static const SCEV *getNumBytes(const SCEV *BECount, Type *IntPtr,
- unsigned StoreSize, Loop *CurLoop,
- const DataLayout *DL, ScalarEvolution *SE) {
- const SCEV *NumBytesS;
- // The # stored bytes is (BECount+1)*Size. Expand the trip count out to
+/// Compute trip count from the backedge taken count.
+static const SCEV *getTripCount(const SCEV *BECount, Type *IntPtr,
+ Loop *CurLoop, const DataLayout *DL,
+ ScalarEvolution *SE) {
+ const SCEV *TripCountS = nullptr;
+ // The # stored bytes is (BECount+1). Expand the trip count out to
// pointer size if it isn't already.
//
// If we're going to need to zero extend the BE count, check if we can add
// one to it prior to zero extending without overflow. Provided this is safe,
// it allows better simplification of the +1.
- if (DL->getTypeSizeInBits(BECount->getType()).getFixedSize() <
- DL->getTypeSizeInBits(IntPtr).getFixedSize() &&
+ if (DL->getTypeSizeInBits(BECount->getType()) <
+ DL->getTypeSizeInBits(IntPtr) &&
SE->isLoopEntryGuardedByCond(
CurLoop, ICmpInst::ICMP_NE, BECount,
SE->getNegativeSCEV(SE->getOne(BECount->getType())))) {
- NumBytesS = SE->getZeroExtendExpr(
+ TripCountS = SE->getZeroExtendExpr(
SE->getAddExpr(BECount, SE->getOne(BECount->getType()), SCEV::FlagNUW),
IntPtr);
} else {
- NumBytesS = SE->getAddExpr(SE->getTruncateOrZeroExtend(BECount, IntPtr),
- SE->getOne(IntPtr), SCEV::FlagNUW);
+ TripCountS = SE->getAddExpr(SE->getTruncateOrZeroExtend(BECount, IntPtr),
+ SE->getOne(IntPtr), SCEV::FlagNUW);
}
- // And scale it based on the store size.
- if (StoreSize != 1) {
- NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
- SCEV::FlagNUW);
- }
- return NumBytesS;
+ return TripCountS;
+}
+
+/// Compute the number of bytes as a SCEV from the backedge taken count.
+///
+/// This also maps the SCEV into the provided type and tries to handle the
+/// computation in a way that will fold cleanly.
+static const SCEV *getNumBytes(const SCEV *BECount, Type *IntPtr,
+ const SCEV *StoreSizeSCEV, Loop *CurLoop,
+ const DataLayout *DL, ScalarEvolution *SE) {
+ const SCEV *TripCountSCEV = getTripCount(BECount, IntPtr, CurLoop, DL, SE);
+
+ return SE->getMulExpr(TripCountSCEV,
+ SE->getTruncateOrZeroExtend(StoreSizeSCEV, IntPtr),
+ SCEV::FlagNUW);
}
/// processLoopStridedStore - We see a strided store of some value. If we can
/// transform this into a memset or memset_pattern in the loop preheader, do so.
bool LoopIdiomRecognize::processLoopStridedStore(
- Value *DestPtr, unsigned StoreSize, MaybeAlign StoreAlignment,
+ Value *DestPtr, const SCEV *StoreSizeSCEV, MaybeAlign StoreAlignment,
Value *StoredVal, Instruction *TheStore,
SmallPtrSetImpl<Instruction *> &Stores, const SCEVAddRecExpr *Ev,
- const SCEV *BECount, bool NegStride, bool IsLoopMemset) {
+ const SCEV *BECount, bool IsNegStride, bool IsLoopMemset) {
Value *SplatValue = isBytewiseValue(StoredVal, *DL);
Constant *PatternValue = nullptr;
@@ -1056,8 +1115,8 @@ bool LoopIdiomRecognize::processLoopStridedStore(
bool Changed = false;
const SCEV *Start = Ev->getStart();
// Handle negative strided loops.
- if (NegStride)
- Start = getStartForNegStride(Start, BECount, IntIdxTy, StoreSize, SE);
+ if (IsNegStride)
+ Start = getStartForNegStride(Start, BECount, IntIdxTy, StoreSizeSCEV, SE);
// TODO: ideally we should still be able to generate memset if SCEV expander
// is taught to generate the dependencies at the latest point.
@@ -1082,7 +1141,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
Changed = true;
if (mayLoopAccessLocation(BasePtr, ModRefInfo::ModRef, CurLoop, BECount,
- StoreSize, *AA, Stores))
+ StoreSizeSCEV, *AA, Stores))
return Changed;
if (avoidLIRForMultiBlockLoop(/*IsMemset=*/true, IsLoopMemset))
@@ -1091,7 +1150,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
// Okay, everything looks good, insert the memset.
const SCEV *NumBytesS =
- getNumBytes(BECount, IntIdxTy, StoreSize, CurLoop, DL, SE);
+ getNumBytes(BECount, IntIdxTy, StoreSizeSCEV, CurLoop, DL, SE);
// TODO: ideally we should still be able to generate memset if SCEV expander
// is taught to generate the dependencies at the latest point.
@@ -1138,13 +1197,20 @@ bool LoopIdiomRecognize::processLoopStridedStore(
<< "\n");
ORE.emit([&]() {
- return OptimizationRemark(DEBUG_TYPE, "ProcessLoopStridedStore",
- NewCall->getDebugLoc(), Preheader)
- << "Transformed loop-strided store in "
- << ore::NV("Function", TheStore->getFunction())
- << " function into a call to "
- << ore::NV("NewFunction", NewCall->getCalledFunction())
- << "() intrinsic";
+ OptimizationRemark R(DEBUG_TYPE, "ProcessLoopStridedStore",
+ NewCall->getDebugLoc(), Preheader);
+ R << "Transformed loop-strided store in "
+ << ore::NV("Function", TheStore->getFunction())
+ << " function into a call to "
+ << ore::NV("NewFunction", NewCall->getCalledFunction())
+ << "() intrinsic";
+ if (!Stores.empty())
+ R << ore::setExtraArgs();
+ for (auto *I : Stores) {
+ R << ore::NV("FromBlock", I->getParent()->getName())
+ << ore::NV("ToBlock", Preheader->getName());
+ }
+ return R;
});
// Okay, the memset has been formed. Zap the original store and anything that
@@ -1181,16 +1247,63 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
// random load we can't handle.
Value *LoadPtr = LI->getPointerOperand();
const SCEVAddRecExpr *LoadEv = cast<SCEVAddRecExpr>(SE->getSCEV(LoadPtr));
- return processLoopStoreOfLoopLoad(StorePtr, LoadPtr, StoreSize,
+
+ const SCEV *StoreSizeSCEV = SE->getConstant(StorePtr->getType(), StoreSize);
+ return processLoopStoreOfLoopLoad(StorePtr, LoadPtr, StoreSizeSCEV,
SI->getAlign(), LI->getAlign(), SI, LI,
StoreEv, LoadEv, BECount);
}
+class MemmoveVerifier {
+public:
+ explicit MemmoveVerifier(const Value &LoadBasePtr, const Value &StoreBasePtr,
+ const DataLayout &DL)
+ : DL(DL), LoadOff(0), StoreOff(0),
+ BP1(llvm::GetPointerBaseWithConstantOffset(
+ LoadBasePtr.stripPointerCasts(), LoadOff, DL)),
+ BP2(llvm::GetPointerBaseWithConstantOffset(
+ StoreBasePtr.stripPointerCasts(), StoreOff, DL)),
+ IsSameObject(BP1 == BP2) {}
+
+ bool loadAndStoreMayFormMemmove(unsigned StoreSize, bool IsNegStride,
+ const Instruction &TheLoad,
+ bool IsMemCpy) const {
+ if (IsMemCpy) {
+ // Ensure that LoadBasePtr is after StoreBasePtr or before StoreBasePtr
+ // for negative stride.
+ if ((!IsNegStride && LoadOff <= StoreOff) ||
+ (IsNegStride && LoadOff >= StoreOff))
+ return false;
+ } else {
+ // Ensure that LoadBasePtr is after StoreBasePtr or before StoreBasePtr
+ // for negative stride. LoadBasePtr shouldn't overlap with StoreBasePtr.
+ int64_t LoadSize =
+ DL.getTypeSizeInBits(TheLoad.getType()).getFixedSize() / 8;
+ if (BP1 != BP2 || LoadSize != int64_t(StoreSize))
+ return false;
+ if ((!IsNegStride && LoadOff < StoreOff + int64_t(StoreSize)) ||
+ (IsNegStride && LoadOff + LoadSize > StoreOff))
+ return false;
+ }
+ return true;
+ }
+
+private:
+ const DataLayout &DL;
+ int64_t LoadOff;
+ int64_t StoreOff;
+ const Value *BP1;
+ const Value *BP2;
+
+public:
+ const bool IsSameObject;
+};
+
bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
- Value *DestPtr, Value *SourcePtr, unsigned StoreSize, MaybeAlign StoreAlign,
- MaybeAlign LoadAlign, Instruction *TheStore, Instruction *TheLoad,
- const SCEVAddRecExpr *StoreEv, const SCEVAddRecExpr *LoadEv,
- const SCEV *BECount) {
+ Value *DestPtr, Value *SourcePtr, const SCEV *StoreSizeSCEV,
+ MaybeAlign StoreAlign, MaybeAlign LoadAlign, Instruction *TheStore,
+ Instruction *TheLoad, const SCEVAddRecExpr *StoreEv,
+ const SCEVAddRecExpr *LoadEv, const SCEV *BECount) {
// FIXME: until llvm.memcpy.inline supports dynamic sizes, we need to
// conservatively bail here, since otherwise we may have to transform
@@ -1213,11 +1326,18 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
Type *IntIdxTy = Builder.getIntNTy(DL->getIndexSizeInBits(StrAS));
APInt Stride = getStoreStride(StoreEv);
- bool NegStride = StoreSize == -Stride;
+ const SCEVConstant *ConstStoreSize = dyn_cast<SCEVConstant>(StoreSizeSCEV);
+
+ // TODO: Deal with non-constant size; Currently expect constant store size
+ assert(ConstStoreSize && "store size is expected to be a constant");
+
+ int64_t StoreSize = ConstStoreSize->getValue()->getZExtValue();
+ bool IsNegStride = StoreSize == -Stride;
// Handle negative strided loops.
- if (NegStride)
- StrStart = getStartForNegStride(StrStart, BECount, IntIdxTy, StoreSize, SE);
+ if (IsNegStride)
+ StrStart =
+ getStartForNegStride(StrStart, BECount, IntIdxTy, StoreSizeSCEV, SE);
// Okay, we have a strided store "p[i]" of a loaded value. We can turn
// this into a memcpy in the loop preheader now if we want. However, this
@@ -1237,24 +1357,24 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
// the return value will read this comment, and leave them alone.
Changed = true;
- SmallPtrSet<Instruction *, 2> Stores;
- Stores.insert(TheStore);
+ SmallPtrSet<Instruction *, 2> IgnoredInsts;
+ IgnoredInsts.insert(TheStore);
bool IsMemCpy = isa<MemCpyInst>(TheStore);
const StringRef InstRemark = IsMemCpy ? "memcpy" : "load and store";
- bool UseMemMove =
+ bool LoopAccessStore =
mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount,
- StoreSize, *AA, Stores);
- if (UseMemMove) {
+ StoreSizeSCEV, *AA, IgnoredInsts);
+ if (LoopAccessStore) {
// For memmove case it's not enough to guarantee that loop doesn't access
// TheStore and TheLoad. Additionally we need to make sure that TheStore is
// the only user of TheLoad.
if (!TheLoad->hasOneUse())
return Changed;
- Stores.insert(TheLoad);
+ IgnoredInsts.insert(TheLoad);
if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop,
- BECount, StoreSize, *AA, Stores)) {
+ BECount, StoreSizeSCEV, *AA, IgnoredInsts)) {
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessStore",
TheStore)
@@ -1265,15 +1385,16 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
});
return Changed;
}
- Stores.erase(TheLoad);
+ IgnoredInsts.erase(TheLoad);
}
const SCEV *LdStart = LoadEv->getStart();
unsigned LdAS = SourcePtr->getType()->getPointerAddressSpace();
// Handle negative strided loops.
- if (NegStride)
- LdStart = getStartForNegStride(LdStart, BECount, IntIdxTy, StoreSize, SE);
+ if (IsNegStride)
+ LdStart =
+ getStartForNegStride(LdStart, BECount, IntIdxTy, StoreSizeSCEV, SE);
// For a memcpy, we have to make sure that the input array is not being
// mutated by the loop.
@@ -1283,42 +1404,40 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
// If the store is a memcpy instruction, we must check if it will write to
// the load memory locations. So remove it from the ignored stores.
if (IsMemCpy)
- Stores.erase(TheStore);
+ IgnoredInsts.erase(TheStore);
+ MemmoveVerifier Verifier(*LoadBasePtr, *StoreBasePtr, *DL);
if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount,
- StoreSize, *AA, Stores)) {
- ORE.emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessLoad", TheLoad)
- << ore::NV("Inst", InstRemark) << " in "
- << ore::NV("Function", TheStore->getFunction())
- << " function will not be hoisted: "
- << ore::NV("Reason", "The loop may access load location");
- });
- return Changed;
- }
- if (UseMemMove) {
- // Ensure that LoadBasePtr is after StoreBasePtr or before StoreBasePtr for
- // negative stride. LoadBasePtr shouldn't overlap with StoreBasePtr.
- int64_t LoadOff = 0, StoreOff = 0;
- const Value *BP1 = llvm::GetPointerBaseWithConstantOffset(
- LoadBasePtr->stripPointerCasts(), LoadOff, *DL);
- const Value *BP2 = llvm::GetPointerBaseWithConstantOffset(
- StoreBasePtr->stripPointerCasts(), StoreOff, *DL);
- int64_t LoadSize =
- DL->getTypeSizeInBits(TheLoad->getType()).getFixedSize() / 8;
- if (BP1 != BP2 || LoadSize != int64_t(StoreSize))
+ StoreSizeSCEV, *AA, IgnoredInsts)) {
+ if (!IsMemCpy) {
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessLoad",
+ TheLoad)
+ << ore::NV("Inst", InstRemark) << " in "
+ << ore::NV("Function", TheStore->getFunction())
+ << " function will not be hoisted: "
+ << ore::NV("Reason", "The loop may access load location");
+ });
return Changed;
- if ((!NegStride && LoadOff < StoreOff + int64_t(StoreSize)) ||
- (NegStride && LoadOff + LoadSize > StoreOff))
+ }
+ // At this point loop may access load only for memcpy in same underlying
+ // object. If that's not the case bail out.
+ if (!Verifier.IsSameObject)
return Changed;
}
+ bool UseMemMove = IsMemCpy ? Verifier.IsSameObject : LoopAccessStore;
+ if (UseMemMove)
+ if (!Verifier.loadAndStoreMayFormMemmove(StoreSize, IsNegStride, *TheLoad,
+ IsMemCpy))
+ return Changed;
+
if (avoidLIRForMultiBlockLoop())
return Changed;
// Okay, everything is safe, we can transform this!
const SCEV *NumBytesS =
- getNumBytes(BECount, IntIdxTy, StoreSize, CurLoop, DL, SE);
+ getNumBytes(BECount, IntIdxTy, StoreSizeSCEV, CurLoop, DL, SE);
Value *NumBytes =
Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());
@@ -1380,11 +1499,14 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
<< ore::NV("NewFunction", NewCall->getCalledFunction())
<< "() intrinsic from " << ore::NV("Inst", InstRemark)
<< " instruction in " << ore::NV("Function", TheStore->getFunction())
- << " function";
+ << " function"
+ << ore::setExtraArgs()
+ << ore::NV("FromBlock", TheStore->getParent()->getName())
+ << ore::NV("ToBlock", Preheader->getName());
});
- // Okay, the memcpy has been formed. Zap the original store and anything that
- // feeds into it.
+ // Okay, a new call to memcpy/memmove has been formed. Zap the original store
+ // and anything that feeds into it.
if (MSSAU)
MSSAU->removeMemoryAccess(TheStore, true);
deleteDeadInstruction(TheStore);
@@ -1549,24 +1671,22 @@ static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB,
// step 4: Find the instruction which count the population: cnt2 = cnt1 + 1
{
CountInst = nullptr;
- for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI()->getIterator(),
- IterE = LoopEntry->end();
- Iter != IterE; Iter++) {
- Instruction *Inst = &*Iter;
- if (Inst->getOpcode() != Instruction::Add)
+ for (Instruction &Inst : llvm::make_range(
+ LoopEntry->getFirstNonPHI()->getIterator(), LoopEntry->end())) {
+ if (Inst.getOpcode() != Instruction::Add)
continue;
- ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
+ ConstantInt *Inc = dyn_cast<ConstantInt>(Inst.getOperand(1));
if (!Inc || !Inc->isOne())
continue;
- PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry);
+ PHINode *Phi = getRecurrenceVar(Inst.getOperand(0), &Inst, LoopEntry);
if (!Phi)
continue;
// Check if the result of the instruction is live of the loop.
bool LiveOutLoop = false;
- for (User *U : Inst->users()) {
+ for (User *U : Inst.users()) {
if ((cast<Instruction>(U))->getParent() != LoopEntry) {
LiveOutLoop = true;
break;
@@ -1574,7 +1694,7 @@ static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB,
}
if (LiveOutLoop) {
- CountInst = Inst;
+ CountInst = &Inst;
CountPhi = Phi;
break;
}
@@ -1675,22 +1795,20 @@ static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL,
// plus "cnt0". Currently it is not optimized.
// This step could be used to detect POPCNT instruction:
// cnt.next = cnt + (x.next & 1)
- for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI()->getIterator(),
- IterE = LoopEntry->end();
- Iter != IterE; Iter++) {
- Instruction *Inst = &*Iter;
- if (Inst->getOpcode() != Instruction::Add)
+ for (Instruction &Inst : llvm::make_range(
+ LoopEntry->getFirstNonPHI()->getIterator(), LoopEntry->end())) {
+ if (Inst.getOpcode() != Instruction::Add)
continue;
- ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
+ ConstantInt *Inc = dyn_cast<ConstantInt>(Inst.getOperand(1));
if (!Inc || (!Inc->isOne() && !Inc->isMinusOne()))
continue;
- PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry);
+ PHINode *Phi = getRecurrenceVar(Inst.getOperand(0), &Inst, LoopEntry);
if (!Phi)
continue;
- CntInst = Inst;
+ CntInst = &Inst;
CntPhi = Phi;
break;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
index 3153a8721193..b9e63a4bc06f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -105,9 +105,7 @@ static bool simplifyLoopInst(Loop &L, DominatorTree &DT, LoopInfo &LI,
if (!V || !LI.replacementPreservesLCSSAForm(&I, V))
continue;
- for (Value::use_iterator UI = I.use_begin(), UE = I.use_end();
- UI != UE;) {
- Use &U = *UI++;
+ for (Use &U : llvm::make_early_inc_range(I.uses())) {
auto *UserI = cast<Instruction>(U.getUser());
U.set(V);
@@ -195,15 +193,10 @@ public:
const TargetLibraryInfo &TLI =
getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
*L->getHeader()->getParent());
- MemorySSA *MSSA = nullptr;
- Optional<MemorySSAUpdater> MSSAU;
- if (EnableMSSALoopDependency) {
- MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
- MSSAU = MemorySSAUpdater(MSSA);
- }
+ MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
+ MemorySSAUpdater MSSAU(MSSA);
- return simplifyLoopInst(*L, DT, LI, AC, TLI,
- MSSAU.hasValue() ? MSSAU.getPointer() : nullptr);
+ return simplifyLoopInst(*L, DT, LI, AC, TLI, &MSSAU);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -211,10 +204,8 @@ public:
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.setPreservesCFG();
- if (EnableMSSALoopDependency) {
- AU.addRequired<MemorySSAWrapperPass>();
- AU.addPreserved<MemorySSAWrapperPass>();
- }
+ AU.addRequired<MemorySSAWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
getLoopAnalysisUsage(AU);
}
};
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 34545f35b3c3..9f605b4ac4ad 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -1710,16 +1710,12 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
auto &OuterInnerReductions = LIL.getOuterInnerReductions();
// Now update the reduction PHIs in the inner and outer loop headers.
SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs;
- for (PHINode &PHI : InnerLoopHeader->phis()) {
- if (OuterInnerReductions.find(&PHI) == OuterInnerReductions.end())
- continue;
- InnerLoopPHIs.push_back(cast<PHINode>(&PHI));
- }
- for (PHINode &PHI : OuterLoopHeader->phis()) {
- if (OuterInnerReductions.find(&PHI) == OuterInnerReductions.end())
- continue;
- OuterLoopPHIs.push_back(cast<PHINode>(&PHI));
- }
+ for (PHINode &PHI : InnerLoopHeader->phis())
+ if (OuterInnerReductions.contains(&PHI))
+ InnerLoopPHIs.push_back(cast<PHINode>(&PHI));
+ for (PHINode &PHI : OuterLoopHeader->phis())
+ if (OuterInnerReductions.contains(&PHI))
+ OuterLoopPHIs.push_back(cast<PHINode>(&PHI));
// Now move the remaining reduction PHIs from outer to inner loop header and
// vice versa. The PHI nodes must be part of a reduction across the inner and
@@ -1767,6 +1763,7 @@ bool LoopInterchangeTransform::adjustLoopLinks() {
return Changed;
}
+namespace {
/// Main LoopInterchange Pass.
struct LoopInterchangeLegacyPass : public LoopPass {
static char ID;
@@ -1795,6 +1792,7 @@ struct LoopInterchangeLegacyPass : public LoopPass {
return LoopInterchange(SE, LI, DI, DT, ORE).run(L);
}
};
+} // namespace
char LoopInterchangeLegacyPass::ID = 0;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
index aaf586173e44..21d59936616b 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
@@ -34,7 +34,6 @@
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -109,8 +108,8 @@ struct StoreToLoadForwardingCandidate {
// Currently we only support accesses with unit stride. FIXME: we should be
// able to handle non unit stirde as well as long as the stride is equal to
// the dependence distance.
- if (getPtrStride(PSE, LoadPtr, L) != 1 ||
- getPtrStride(PSE, StorePtr, L) != 1)
+ if (getPtrStride(PSE, LoadType, LoadPtr, L) != 1 ||
+ getPtrStride(PSE, LoadType, StorePtr, L) != 1)
return false;
auto &DL = Load->getParent()->getModule()->getDataLayout();
@@ -718,15 +717,12 @@ PreservedAnalyses LoopLoadEliminationPass::run(Function &F,
auto *PSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
auto *BFI = (PSI && PSI->hasProfileSummary()) ?
&AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
- MemorySSA *MSSA = EnableMSSALoopDependency
- ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA()
- : nullptr;
auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
bool Changed = eliminateLoadsAcrossLoops(
F, LI, DT, BFI, PSI, &SE, &AC, [&](Loop &L) -> const LoopAccessInfo & {
- LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE,
- TLI, TTI, nullptr, MSSA};
+ LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE,
+ TLI, TTI, nullptr, nullptr, nullptr};
return LAM.getResult<LoopAccessAnalysis>(L, AR);
});
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPassManager.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
index f4fce4871331..3df4cfe8e4c1 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
@@ -10,6 +10,7 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
@@ -44,6 +45,18 @@ PassManager<Loop, LoopAnalysisManager, LoopStandardAnalysisResults &,
return PA;
}
+void PassManager<Loop, LoopAnalysisManager, LoopStandardAnalysisResults &,
+ LPMUpdater &>::printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)>
+ MapClassName2PassName) {
+ for (unsigned Idx = 0, Size = LoopPasses.size(); Idx != Size; ++Idx) {
+ auto *P = LoopPasses[Idx].get();
+ P->printPipeline(OS, MapClassName2PassName);
+ if (Idx + 1 < Size)
+ OS << ",";
+ }
+}
+
// Run both loop passes and loop-nest passes on top-level loop \p L.
PreservedAnalyses
LoopPassManager::runWithLoopNestPasses(Loop &L, LoopAnalysisManager &AM,
@@ -112,12 +125,6 @@ LoopPassManager::runWithLoopNestPasses(Loop &L, LoopAnalysisManager &AM,
// notify the updater, otherwise U.ParentL might gets outdated and triggers
// assertion failures in addSiblingLoops and addChildLoops.
U.setParentLoop(L.getParentLoop());
-
- // FIXME: Historically, the pass managers all called the LLVM context's
- // yield function here. We don't have a generic way to acquire the
- // context and it isn't yet clear what the right pattern is for yielding
- // in the new pass manager so it is currently omitted.
- // ...getContext().yield();
}
return PA;
}
@@ -161,17 +168,17 @@ LoopPassManager::runWithoutLoopNestPasses(Loop &L, LoopAnalysisManager &AM,
// notify the updater, otherwise U.ParentL might gets outdated and triggers
// assertion failures in addSiblingLoops and addChildLoops.
U.setParentLoop(L.getParentLoop());
-
- // FIXME: Historically, the pass managers all called the LLVM context's
- // yield function here. We don't have a generic way to acquire the
- // context and it isn't yet clear what the right pattern is for yielding
- // in the new pass manager so it is currently omitted.
- // ...getContext().yield();
}
return PA;
}
} // namespace llvm
+void FunctionToLoopPassAdaptor::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ OS << (UseMemorySSA ? "loop-mssa(" : "loop(");
+ Pass->printPipeline(OS, MapClassName2PassName);
+ OS << ")";
+}
PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F,
FunctionAnalysisManager &AM) {
// Before we even compute any loop analyses, first run a miniature function
@@ -201,6 +208,10 @@ PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F,
BlockFrequencyInfo *BFI = UseBlockFrequencyInfo && F.hasProfileData()
? (&AM.getResult<BlockFrequencyAnalysis>(F))
: nullptr;
+ BranchProbabilityInfo *BPI =
+ UseBranchProbabilityInfo && F.hasProfileData()
+ ? (&AM.getResult<BranchProbabilityAnalysis>(F))
+ : nullptr;
LoopStandardAnalysisResults LAR = {AM.getResult<AAManager>(F),
AM.getResult<AssumptionAnalysis>(F),
AM.getResult<DominatorTreeAnalysis>(F),
@@ -209,6 +220,7 @@ PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F,
AM.getResult<TargetLibraryAnalysis>(F),
AM.getResult<TargetIRAnalysis>(F),
BFI,
+ BPI,
MSSA};
// Setup the loop analysis manager from its proxy. It is important that
@@ -285,6 +297,10 @@ PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F,
else
PI.runAfterPass<Loop>(*Pass, *L, PassPA);
+ if (LAR.MSSA && !PassPA.getChecker<MemorySSAAnalysis>().preserved())
+ report_fatal_error("Loop pass manager using MemorySSA contains a pass "
+ "that does not preserve MemorySSA");
+
#ifndef NDEBUG
// LoopAnalysisResults should always be valid.
// Note that we don't LAR.SE.verify() because that can change observed SE
@@ -325,6 +341,8 @@ PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F,
PA.preserve<ScalarEvolutionAnalysis>();
if (UseBlockFrequencyInfo && F.hasProfileData())
PA.preserve<BlockFrequencyAnalysis>();
+ if (UseBranchProbabilityInfo && F.hasProfileData())
+ PA.preserve<BranchProbabilityAnalysis>();
if (UseMemorySSA)
PA.preserve<MemorySSAAnalysis>();
return PA;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPredication.cpp
index 4f97641e2027..aa7e79a589f2 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPredication.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPredication.cpp
@@ -183,6 +183,8 @@
#include "llvm/Analysis/GuardUtils.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/IR/Function.h"
@@ -254,7 +256,7 @@ class LoopPredication {
DominatorTree *DT;
ScalarEvolution *SE;
LoopInfo *LI;
- BranchProbabilityInfo *BPI;
+ MemorySSAUpdater *MSSAU;
Loop *L;
const DataLayout *DL;
@@ -302,16 +304,15 @@ class LoopPredication {
// If the loop always exits through another block in the loop, we should not
// predicate based on the latch check. For example, the latch check can be a
// very coarse grained check and there can be more fine grained exit checks
- // within the loop. We identify such unprofitable loops through BPI.
+ // within the loop.
bool isLoopProfitableToPredicate();
bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
public:
- LoopPredication(AliasAnalysis *AA, DominatorTree *DT,
- ScalarEvolution *SE, LoopInfo *LI,
- BranchProbabilityInfo *BPI)
- : AA(AA), DT(DT), SE(SE), LI(LI), BPI(BPI) {};
+ LoopPredication(AliasAnalysis *AA, DominatorTree *DT, ScalarEvolution *SE,
+ LoopInfo *LI, MemorySSAUpdater *MSSAU)
+ : AA(AA), DT(DT), SE(SE), LI(LI), MSSAU(MSSAU){};
bool runOnLoop(Loop *L);
};
@@ -325,6 +326,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<BranchProbabilityInfoWrapperPass>();
getLoopAnalysisUsage(AU);
+ AU.addPreserved<MemorySSAWrapperPass>();
}
bool runOnLoop(Loop *L, LPPassManager &LPM) override {
@@ -333,10 +335,12 @@ public:
auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- BranchProbabilityInfo &BPI =
- getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
+ auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>();
+ std::unique_ptr<MemorySSAUpdater> MSSAU;
+ if (MSSAWP)
+ MSSAU = std::make_unique<MemorySSAUpdater>(&MSSAWP->getMSSA());
auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- LoopPredication LP(AA, DT, SE, LI, &BPI);
+ LoopPredication LP(AA, DT, SE, LI, MSSAU ? MSSAU.get() : nullptr);
return LP.runOnLoop(L);
}
};
@@ -358,16 +362,18 @@ Pass *llvm::createLoopPredicationPass() {
PreservedAnalyses LoopPredicationPass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
LPMUpdater &U) {
- Function *F = L.getHeader()->getParent();
- // For the new PM, we also can't use BranchProbabilityInfo as an analysis
- // pass. Function analyses need to be preserved across loop transformations
- // but BPI is not preserved, hence a newly built one is needed.
- BranchProbabilityInfo BPI(*F, AR.LI, &AR.TLI, &AR.DT, nullptr);
- LoopPredication LP(&AR.AA, &AR.DT, &AR.SE, &AR.LI, &BPI);
+ std::unique_ptr<MemorySSAUpdater> MSSAU;
+ if (AR.MSSA)
+ MSSAU = std::make_unique<MemorySSAUpdater>(AR.MSSA);
+ LoopPredication LP(&AR.AA, &AR.DT, &AR.SE, &AR.LI,
+ MSSAU ? MSSAU.get() : nullptr);
if (!LP.runOnLoop(&L))
return PreservedAnalyses::all();
- return getLoopPassPreservedAnalyses();
+ auto PA = getLoopPassPreservedAnalyses();
+ if (AR.MSSA)
+ PA.preserve<MemorySSAAnalysis>();
+ return PA;
}
Optional<LoopICmp>
@@ -809,7 +815,7 @@ bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard,
Value *AllChecks = Builder.CreateAnd(Checks);
auto *OldCond = Guard->getOperand(0);
Guard->setOperand(0, AllChecks);
- RecursivelyDeleteTriviallyDeadInstructions(OldCond);
+ RecursivelyDeleteTriviallyDeadInstructions(OldCond, nullptr /* TLI */, MSSAU);
LLVM_DEBUG(dbgs() << "Widened checks = " << NumWidened << "\n");
return true;
@@ -835,7 +841,7 @@ bool LoopPredication::widenWidenableBranchGuardConditions(
Value *AllChecks = Builder.CreateAnd(Checks);
auto *OldCond = BI->getCondition();
BI->setCondition(AllChecks);
- RecursivelyDeleteTriviallyDeadInstructions(OldCond);
+ RecursivelyDeleteTriviallyDeadInstructions(OldCond, nullptr /* TLI */, MSSAU);
assert(isGuardAsWidenableBranch(BI) &&
"Stopped being a guard after transform?");
@@ -912,7 +918,7 @@ Optional<LoopICmp> LoopPredication::parseLoopLatchICmp() {
bool LoopPredication::isLoopProfitableToPredicate() {
- if (SkipProfitabilityChecks || !BPI)
+ if (SkipProfitabilityChecks)
return true;
SmallVector<std::pair<BasicBlock *, BasicBlock *>, 8> ExitEdges;
@@ -934,8 +940,61 @@ bool LoopPredication::isLoopProfitableToPredicate() {
"expected to be an exiting block with 2 succs!");
unsigned LatchBrExitIdx =
LatchTerm->getSuccessor(0) == L->getHeader() ? 1 : 0;
+ // We compute branch probabilities without BPI. We do not rely on BPI since
+ // Loop predication is usually run in an LPM and BPI is only preserved
+ // lossily within loop pass managers, while BPI has an inherent notion of
+ // being complete for an entire function.
+
+ // If the latch exits into a deoptimize or an unreachable block, do not
+ // predicate on that latch check.
+ auto *LatchExitBlock = LatchTerm->getSuccessor(LatchBrExitIdx);
+ if (isa<UnreachableInst>(LatchTerm) ||
+ LatchExitBlock->getTerminatingDeoptimizeCall())
+ return false;
+
+ auto IsValidProfileData = [](MDNode *ProfileData, const Instruction *Term) {
+ if (!ProfileData || !ProfileData->getOperand(0))
+ return false;
+ if (MDString *MDS = dyn_cast<MDString>(ProfileData->getOperand(0)))
+ if (!MDS->getString().equals("branch_weights"))
+ return false;
+ if (ProfileData->getNumOperands() != 1 + Term->getNumSuccessors())
+ return false;
+ return true;
+ };
+ MDNode *LatchProfileData = LatchTerm->getMetadata(LLVMContext::MD_prof);
+ // Latch terminator has no valid profile data, so nothing to check
+ // profitability on.
+ if (!IsValidProfileData(LatchProfileData, LatchTerm))
+ return true;
+
+ auto ComputeBranchProbability =
+ [&](const BasicBlock *ExitingBlock,
+ const BasicBlock *ExitBlock) -> BranchProbability {
+ auto *Term = ExitingBlock->getTerminator();
+ MDNode *ProfileData = Term->getMetadata(LLVMContext::MD_prof);
+ unsigned NumSucc = Term->getNumSuccessors();
+ if (IsValidProfileData(ProfileData, Term)) {
+ uint64_t Numerator = 0, Denominator = 0, ProfVal = 0;
+ for (unsigned i = 0; i < NumSucc; i++) {
+ ConstantInt *CI =
+ mdconst::extract<ConstantInt>(ProfileData->getOperand(i + 1));
+ ProfVal = CI->getValue().getZExtValue();
+ if (Term->getSuccessor(i) == ExitBlock)
+ Numerator += ProfVal;
+ Denominator += ProfVal;
+ }
+ return BranchProbability::getBranchProbability(Numerator, Denominator);
+ } else {
+ assert(LatchBlock != ExitingBlock &&
+ "Latch term should always have profile data!");
+ // No profile data, so we choose the weight as 1/num_of_succ(Src)
+ return BranchProbability::getBranchProbability(1, NumSucc);
+ }
+ };
+
BranchProbability LatchExitProbability =
- BPI->getEdgeProbability(LatchBlock, LatchBrExitIdx);
+ ComputeBranchProbability(LatchBlock, LatchExitBlock);
// Protect against degenerate inputs provided by the user. Providing a value
// less than one, can invert the definition of profitable loop predication.
@@ -948,18 +1007,18 @@ bool LoopPredication::isLoopProfitableToPredicate() {
LLVM_DEBUG(dbgs() << "The value is set to 1.0\n");
ScaleFactor = 1.0;
}
- const auto LatchProbabilityThreshold =
- LatchExitProbability * ScaleFactor;
+ const auto LatchProbabilityThreshold = LatchExitProbability * ScaleFactor;
for (const auto &ExitEdge : ExitEdges) {
BranchProbability ExitingBlockProbability =
- BPI->getEdgeProbability(ExitEdge.first, ExitEdge.second);
+ ComputeBranchProbability(ExitEdge.first, ExitEdge.second);
// Some exiting edge has higher probability than the latch exiting edge.
// No longer profitable to predicate.
if (ExitingBlockProbability > LatchProbabilityThreshold)
return false;
}
- // Using BPI, we have concluded that the most probable way to exit from the
+
+ // We have concluded that the most probable way to exit from the
// loop is through the latch (or there's no profile information and all
// exits are equally likely).
return true;
@@ -1071,28 +1130,26 @@ bool LoopPredication::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
// widen so that we gain ability to analyze it's exit count and perform this
// transform. TODO: It'd be nice to know for sure the exit became
// analyzeable after dropping widenability.
- {
- bool Invalidate = false;
+ bool ChangedLoop = false;
- for (auto *ExitingBB : ExitingBlocks) {
- if (LI->getLoopFor(ExitingBB) != L)
- continue;
+ for (auto *ExitingBB : ExitingBlocks) {
+ if (LI->getLoopFor(ExitingBB) != L)
+ continue;
- auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
- if (!BI)
- continue;
+ auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
+ if (!BI)
+ continue;
- Use *Cond, *WC;
- BasicBlock *IfTrueBB, *IfFalseBB;
- if (parseWidenableBranch(BI, Cond, WC, IfTrueBB, IfFalseBB) &&
- L->contains(IfTrueBB)) {
- WC->set(ConstantInt::getTrue(IfTrueBB->getContext()));
- Invalidate = true;
- }
+ Use *Cond, *WC;
+ BasicBlock *IfTrueBB, *IfFalseBB;
+ if (parseWidenableBranch(BI, Cond, WC, IfTrueBB, IfFalseBB) &&
+ L->contains(IfTrueBB)) {
+ WC->set(ConstantInt::getTrue(IfTrueBB->getContext()));
+ ChangedLoop = true;
}
- if (Invalidate)
- SE->forgetLoop(L);
}
+ if (ChangedLoop)
+ SE->forgetLoop(L);
// The use of umin(all analyzeable exits) instead of latch is subtle, but
// important for profitability. We may have a loop which hasn't been fully
@@ -1104,18 +1161,24 @@ bool LoopPredication::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
if (isa<SCEVCouldNotCompute>(MinEC) || MinEC->getType()->isPointerTy() ||
!SE->isLoopInvariant(MinEC, L) ||
!isSafeToExpandAt(MinEC, WidenableBR, *SE))
- return false;
+ return ChangedLoop;
// Subtlety: We need to avoid inserting additional uses of the WC. We know
// that it can only have one transitive use at the moment, and thus moving
// that use to just before the branch and inserting code before it and then
// modifying the operand is legal.
auto *IP = cast<Instruction>(WidenableBR->getCondition());
+ // Here we unconditionally modify the IR, so after this point we should return
+ // only `true`!
IP->moveBefore(WidenableBR);
+ if (MSSAU)
+ if (auto *MUD = MSSAU->getMemorySSA()->getMemoryAccess(IP))
+ MSSAU->moveToPlace(MUD, WidenableBR->getParent(),
+ MemorySSA::BeforeTerminator);
Rewriter.setInsertPoint(IP);
IRBuilder<> B(IP);
- bool Changed = false;
+ bool InvalidateLoop = false;
Value *MinECV = nullptr; // lazily generated if needed
for (BasicBlock *ExitingBB : ExitingBlocks) {
// If our exiting block exits multiple loops, we can only rewrite the
@@ -1172,16 +1235,18 @@ bool LoopPredication::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
Value *OldCond = BI->getCondition();
BI->setCondition(ConstantInt::get(OldCond->getType(), !ExitIfTrue));
- Changed = true;
+ InvalidateLoop = true;
}
- if (Changed)
+ if (InvalidateLoop)
// We just mutated a bunch of loop exits changing there exit counts
// widely. We need to force recomputation of the exit counts given these
// changes. Note that all of the inserted exits are never taken, and
// should be removed next time the CFG is modified.
SE->forgetLoop(L);
- return Changed;
+
+ // Always return `true` since we have moved the WidenableBR's condition.
+ return true;
}
bool LoopPredication::runOnLoop(Loop *Loop) {
@@ -1242,5 +1307,8 @@ bool LoopPredication::runOnLoop(Loop *Loop) {
for (auto *Guard : GuardsAsWidenableBranches)
Changed |= widenWidenableBranchGuardConditions(Guard, Expander);
Changed |= predicateLoopExits(L, Expander);
+
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
return Changed;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopRotation.cpp
index 6d5b19443c76..5ba137b1c85f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopRotation.cpp
@@ -99,8 +99,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetTransformInfoWrapperPass>();
- if (EnableMSSALoopDependency)
- AU.addPreserved<MemorySSAWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
getLoopAnalysisUsage(AU);
// Lazy BFI and BPI are marked as preserved here so LoopRotate
@@ -121,13 +120,11 @@ public:
auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
const SimplifyQuery SQ = getBestSimplifyQuery(*this, F);
Optional<MemorySSAUpdater> MSSAU;
- if (EnableMSSALoopDependency) {
- // Not requiring MemorySSA and getting it only if available will split
- // the loop pass pipeline when LoopRotate is being run first.
- auto *MSSAA = getAnalysisIfAvailable<MemorySSAWrapperPass>();
- if (MSSAA)
- MSSAU = MemorySSAUpdater(&MSSAA->getMSSA());
- }
+ // Not requiring MemorySSA and getting it only if available will split
+ // the loop pass pipeline when LoopRotate is being run first.
+ auto *MSSAA = getAnalysisIfAvailable<MemorySSAWrapperPass>();
+ if (MSSAA)
+ MSSAU = MemorySSAUpdater(&MSSAA->getMSSA());
// Vectorization requires loop-rotation. Use default threshold for loops the
// user explicitly marked for vectorization, even when header duplication is
// disabled.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
index cc6d11220807..a87843d658a9 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
@@ -733,13 +733,12 @@ public:
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ auto *MSSAA = getAnalysisIfAvailable<MemorySSAWrapperPass>();
Optional<MemorySSAUpdater> MSSAU;
- if (EnableMSSALoopDependency) {
- MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
- MSSAU = MemorySSAUpdater(MSSA);
- if (VerifyMemorySSA)
- MSSA->verifyMemorySSA();
- }
+ if (MSSAA)
+ MSSAU = MemorySSAUpdater(&MSSAA->getMSSA());
+ if (MSSAA && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
bool DeleteCurrentLoop = false;
bool Changed = simplifyLoopCFG(
*L, DT, LI, SE, MSSAU.hasValue() ? MSSAU.getPointer() : nullptr,
@@ -750,10 +749,7 @@ public:
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
- if (EnableMSSALoopDependency) {
- AU.addRequired<MemorySSAWrapperPass>();
- AU.addPreserved<MemorySSAWrapperPass>();
- }
+ AU.addPreserved<MemorySSAWrapperPass>();
AU.addPreserved<DependenceAnalysisWrapperPass>();
getLoopAnalysisUsage(AU);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSink.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSink.cpp
index a01287f587d7..c9c9e60d0921 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSink.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSink.cpp
@@ -323,15 +323,14 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
// Traverse preheader's instructions in reverse order becaue if A depends
// on B (A appears after B), A needs to be sinked first before B can be
// sinked.
- for (auto II = Preheader->rbegin(), E = Preheader->rend(); II != E;) {
- Instruction *I = &*II++;
+ for (Instruction &I : llvm::make_early_inc_range(llvm::reverse(*Preheader))) {
// No need to check for instruction's operands are loop invariant.
- assert(L.hasLoopInvariantOperands(I) &&
+ assert(L.hasLoopInvariantOperands(&I) &&
"Insts in a loop's preheader should have loop invariant operands!");
- if (!canSinkOrHoistInst(*I, &AA, &DT, &L, CurAST, MSSAU.get(), false,
+ if (!canSinkOrHoistInst(I, &AA, &DT, &L, CurAST, MSSAU.get(), false,
LICMFlags.get()))
continue;
- if (sinkInstruction(L, *I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI,
+ if (sinkInstruction(L, I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI,
MSSAU.get()))
Changed = true;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 404852f1dd4d..a9a2266e1196 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -136,6 +136,12 @@ using namespace llvm;
/// worst cases before LSR burns too much compile time and stack space.
static const unsigned MaxIVUsers = 200;
+/// Limit the size of expression that SCEV-based salvaging will attempt to
+/// translate into a DIExpression.
+/// Choose a maximum size such that debuginfo is not excessively increased and
+/// the salvaging is not too expensive for the compiler.
+static const unsigned MaxSCEVSalvageExpressionSize = 64;
+
// Temporary flag to cleanup congruent phis after LSR phi expansion.
// It's currently disabled until we can determine whether it's truly useful or
// not. The flag should be removed after the v3.0 release.
@@ -689,7 +695,7 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
const APInt &RA = RC->getAPInt();
// Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do
// some folding.
- if (RA.isAllOnesValue()) {
+ if (RA.isAllOnes()) {
if (LHS->getType()->isPointerTy())
return nullptr;
return SE.getMulExpr(LHS, RC);
@@ -2816,9 +2822,7 @@ static const SCEV *getExprBase(const SCEV *S) {
// there's nothing more complex.
// FIXME: not sure if we want to recognize negation.
const SCEVAddExpr *Add = cast<SCEVAddExpr>(S);
- for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(Add->op_end()),
- E(Add->op_begin()); I != E; ++I) {
- const SCEV *SubExpr = *I;
+ for (const SCEV *SubExpr : reverse(Add->operands())) {
if (SubExpr->getSCEVType() == scAddExpr)
return getExprBase(SubExpr);
@@ -3150,7 +3154,7 @@ void LSRInstance::CollectChains() {
void LSRInstance::FinalizeChain(IVChain &Chain) {
assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
LLVM_DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n");
-
+
for (const IVInc &Inc : Chain) {
LLVM_DEBUG(dbgs() << " Inc: " << *Inc.UserInst << "\n");
auto UseI = find(Inc.UserInst->operands(), Inc.IVOperand);
@@ -3385,7 +3389,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
void
LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
// Mark uses whose expressions cannot be expanded.
- if (!isSafeToExpand(S, SE))
+ if (!isSafeToExpand(S, SE, /*CanonicalMode*/ false))
LU.RigidFormula = true;
Formula F;
@@ -3934,6 +3938,9 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
// Check each interesting stride.
for (int64_t Factor : Factors) {
+ // Check that Factor can be represented by IntTy
+ if (!ConstantInt::isValueValidForType(IntTy, Factor))
+ continue;
// Check that the multiplication doesn't overflow.
if (Base.BaseOffset == std::numeric_limits<int64_t>::min() && Factor == -1)
continue;
@@ -4082,6 +4089,14 @@ void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
if (DstTy->isPointerTy())
return;
+ // It is invalid to extend a pointer type so exit early if ScaledReg or
+ // any of the BaseRegs are pointers.
+ if (Base.ScaledReg && Base.ScaledReg->getType()->isPointerTy())
+ return;
+ if (any_of(Base.BaseRegs,
+ [](const SCEV *S) { return S->getType()->isPointerTy(); }))
+ return;
+
for (Type *SrcTy : Types) {
if (SrcTy != DstTy && TTI.isTruncateFree(SrcTy, DstTy)) {
Formula F = Base;
@@ -5689,23 +5704,6 @@ LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
}
}
-#ifndef NDEBUG
- // All dominating loops must have preheaders, or SCEVExpander may not be able
- // to materialize an AddRecExpr whose Start is an outer AddRecExpr.
- //
- // IVUsers analysis should only create users that are dominated by simple loop
- // headers. Since this loop should dominate all of its users, its user list
- // should be empty if this loop itself is not within a simple loop nest.
- for (DomTreeNode *Rung = DT.getNode(L->getLoopPreheader());
- Rung; Rung = Rung->getIDom()) {
- BasicBlock *BB = Rung->getBlock();
- const Loop *DomLoop = LI.getLoopFor(BB);
- if (DomLoop && DomLoop->getHeader() == BB) {
- assert(DomLoop->getLoopPreheader() && "LSR needs a simplified loop nest");
- }
- }
-#endif // DEBUG
-
LLVM_DEBUG(dbgs() << "\nLSR on loop ";
L->getHeader()->printAsOperand(dbgs(), /*PrintType=*/false);
dbgs() << ":\n");
@@ -5870,6 +5868,7 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<MemorySSAWrapperPass>();
}
+namespace {
struct SCEVDbgValueBuilder {
SCEVDbgValueBuilder() = default;
SCEVDbgValueBuilder(const SCEVDbgValueBuilder &Base) {
@@ -6117,14 +6116,15 @@ struct DVIRecoveryRec {
Metadata *LocationOp;
const llvm::SCEV *SCEV;
};
+} // namespace
-static bool RewriteDVIUsingIterCount(DVIRecoveryRec CachedDVI,
+static void RewriteDVIUsingIterCount(DVIRecoveryRec CachedDVI,
const SCEVDbgValueBuilder &IterationCount,
ScalarEvolution &SE) {
// LSR may add locations to previously single location-op DVIs which
// are currently not supported.
if (CachedDVI.DVI->getNumVariableLocationOps() != 1)
- return false;
+ return;
// SCEVs for SSA values are most frquently of the form
// {start,+,stride}, but sometimes they are ({start,+,stride} + %a + ..).
@@ -6132,48 +6132,70 @@ static bool RewriteDVIUsingIterCount(DVIRecoveryRec CachedDVI,
// SCEVs have not been observed to result in debuginfo-lossy optimisations,
// so its not expected this point will be reached.
if (!isa<SCEVAddRecExpr>(CachedDVI.SCEV))
- return false;
+ return;
LLVM_DEBUG(dbgs() << "scev-salvage: Value to salvage SCEV: "
<< *CachedDVI.SCEV << '\n');
const auto *Rec = cast<SCEVAddRecExpr>(CachedDVI.SCEV);
if (!Rec->isAffine())
- return false;
+ return;
+
+ if (CachedDVI.SCEV->getExpressionSize() > MaxSCEVSalvageExpressionSize)
+ return;
// Initialise a new builder with the iteration count expression. In
// combination with the value's SCEV this enables recovery.
SCEVDbgValueBuilder RecoverValue(IterationCount);
if (!RecoverValue.SCEVToValueExpr(*Rec, SE))
- return false;
+ return;
LLVM_DEBUG(dbgs() << "scev-salvage: Updating: " << *CachedDVI.DVI << '\n');
RecoverValue.applyExprToDbgValue(*CachedDVI.DVI, CachedDVI.Expr);
LLVM_DEBUG(dbgs() << "scev-salvage: to: " << *CachedDVI.DVI << '\n');
- return true;
}
-static bool
+static void RewriteDVIUsingOffset(DVIRecoveryRec &DVIRec, llvm::PHINode &IV,
+ int64_t Offset) {
+ assert(!DVIRec.DVI->hasArgList() && "Expected single location-op dbg.value.");
+ DbgValueInst *DVI = DVIRec.DVI;
+ SmallVector<uint64_t, 8> Ops;
+ DIExpression::appendOffset(Ops, Offset);
+ DIExpression *Expr = DIExpression::prependOpcodes(DVIRec.Expr, Ops, true);
+ LLVM_DEBUG(dbgs() << "scev-salvage: Updating: " << *DVIRec.DVI << '\n');
+ DVI->setExpression(Expr);
+ llvm::Value *ValIV = dyn_cast<llvm::Value>(&IV);
+ DVI->replaceVariableLocationOp(
+ 0u, llvm::MetadataAsValue::get(DVI->getContext(),
+ llvm::ValueAsMetadata::get(ValIV)));
+ LLVM_DEBUG(dbgs() << "scev-salvage: updated with offset to IV: "
+ << *DVIRec.DVI << '\n');
+}
+
+static void
DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE,
llvm::PHINode *LSRInductionVar,
SmallVector<DVIRecoveryRec, 2> &DVIToUpdate) {
if (DVIToUpdate.empty())
- return false;
+ return;
const llvm::SCEV *SCEVInductionVar = SE.getSCEV(LSRInductionVar);
assert(SCEVInductionVar &&
"Anticipated a SCEV for the post-LSR induction variable");
- bool Changed = false;
if (const SCEVAddRecExpr *IVAddRec =
dyn_cast<SCEVAddRecExpr>(SCEVInductionVar)) {
if (!IVAddRec->isAffine())
- return false;
+ return;
+ if (IVAddRec->getExpressionSize() > MaxSCEVSalvageExpressionSize)
+ return;
+
+ // The iteration count is required to recover location values.
SCEVDbgValueBuilder IterCountExpr;
IterCountExpr.pushValue(LSRInductionVar);
if (!IterCountExpr.SCEVToIterCountExpr(*IVAddRec, SE))
- return false;
+ return;
LLVM_DEBUG(dbgs() << "scev-salvage: IV SCEV: " << *SCEVInductionVar
<< '\n');
@@ -6196,14 +6218,26 @@ DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE,
DVIRec.DVI->setExpression(DVIRec.Expr);
}
- Changed |= RewriteDVIUsingIterCount(DVIRec, IterCountExpr, SE);
+ LLVM_DEBUG(dbgs() << "scev-salvage: value to recover SCEV: "
+ << *DVIRec.SCEV << '\n');
+
+ // Create a simple expression if the IV and value to salvage SCEVs
+ // start values differ by only a constant value.
+ if (Optional<APInt> Offset =
+ SE.computeConstantDifference(DVIRec.SCEV, SCEVInductionVar)) {
+ if (Offset.getValue().getMinSignedBits() <= 64)
+ RewriteDVIUsingOffset(DVIRec, *LSRInductionVar,
+ Offset.getValue().getSExtValue());
+ } else {
+ RewriteDVIUsingIterCount(DVIRec, IterCountExpr, SE);
+ }
}
}
- return Changed;
}
/// Identify and cache salvageable DVI locations and expressions along with the
-/// corresponding SCEV(s). Also ensure that the DVI is not deleted before
+/// corresponding SCEV(s). Also ensure that the DVI is not deleted between
+/// cacheing and salvaging.
static void
DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE,
SmallVector<DVIRecoveryRec, 2> &SalvageableDVISCEVs,
@@ -6214,6 +6248,9 @@ DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE,
if (!DVI)
continue;
+ if (DVI->isUndef())
+ continue;
+
if (DVI->hasArgList())
continue;
@@ -6221,6 +6258,16 @@ DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE,
!SE.isSCEVable(DVI->getVariableLocationOp(0)->getType()))
continue;
+ // SCEVUnknown wraps an llvm::Value, it does not have a start and stride.
+ // Therefore no translation to DIExpression is performed.
+ const SCEV *S = SE.getSCEV(DVI->getVariableLocationOp(0));
+ if (isa<SCEVUnknown>(S))
+ continue;
+
+ // Avoid wasting resources generating an expression containing undef.
+ if (SE.containsUndefs(S))
+ continue;
+
SalvageableDVISCEVs.push_back(
{DVI, DVI->getExpression(), DVI->getRawLocation(),
SE.getSCEV(DVI->getVariableLocationOp(0))});
@@ -6234,33 +6281,32 @@ DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE,
/// surviving subsequent transforms.
static llvm::PHINode *GetInductionVariable(const Loop &L, ScalarEvolution &SE,
const LSRInstance &LSR) {
- // For now, just pick the first IV generated and inserted. Ideally pick an IV
- // that is unlikely to be optimised away by subsequent transforms.
+
+ auto IsSuitableIV = [&](PHINode *P) {
+ if (!SE.isSCEVable(P->getType()))
+ return false;
+ if (const SCEVAddRecExpr *Rec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(P)))
+ return Rec->isAffine() && !SE.containsUndefs(SE.getSCEV(P));
+ return false;
+ };
+
+ // For now, just pick the first IV that was generated and inserted by
+ // ScalarEvolution. Ideally pick an IV that is unlikely to be optimised away
+ // by subsequent transforms.
for (const WeakVH &IV : LSR.getScalarEvolutionIVs()) {
if (!IV)
continue;
- assert(isa<PHINode>(&*IV) && "Expected PhI node.");
- if (SE.isSCEVable((*IV).getType())) {
- PHINode *Phi = dyn_cast<PHINode>(&*IV);
- LLVM_DEBUG(dbgs() << "scev-salvage: IV : " << *IV
- << "with SCEV: " << *SE.getSCEV(Phi) << "\n");
- return Phi;
- }
- }
+ // There should only be PHI node IVs.
+ PHINode *P = cast<PHINode>(&*IV);
- for (PHINode &Phi : L.getHeader()->phis()) {
- if (!SE.isSCEVable(Phi.getType()))
- continue;
-
- const llvm::SCEV *PhiSCEV = SE.getSCEV(&Phi);
- if (const llvm::SCEVAddRecExpr *Rec = dyn_cast<SCEVAddRecExpr>(PhiSCEV))
- if (!Rec->isAffine())
- continue;
+ if (IsSuitableIV(P))
+ return P;
+ }
- LLVM_DEBUG(dbgs() << "scev-salvage: Selected IV from loop header: " << Phi
- << " with SCEV: " << *PhiSCEV << "\n");
- return &Phi;
+ for (PHINode &P : L.getHeader()->phis()) {
+ if (IsSuitableIV(&P))
+ return &P;
}
return nullptr;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
index 71eb393fcdd7..1ecbb86724e1 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
@@ -286,8 +286,8 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
AssumptionCache &AC, DependenceInfo &DI,
OptimizationRemarkEmitter &ORE, int OptLevel) {
TargetTransformInfo::UnrollingPreferences UP =
- gatherUnrollingPreferences(L, SE, TTI, nullptr, nullptr, OptLevel, None,
- None, None, None, None, None);
+ gatherUnrollingPreferences(L, SE, TTI, nullptr, nullptr, ORE, OptLevel,
+ None, None, None, None, None, None);
TargetTransformInfo::PeelingPreferences PP =
gatherPeelingPreferences(L, SE, TTI, None, None);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 49501f324a49..67702520511b 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -184,7 +184,8 @@ static const unsigned NoThreshold = std::numeric_limits<unsigned>::max();
/// flags, TTI overrides and user specified parameters.
TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI,
- BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel,
+ BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
+ OptimizationRemarkEmitter &ORE, int OptLevel,
Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
Optional<bool> UserUpperBound, Optional<unsigned> UserFullUnrollMaxCount) {
@@ -214,7 +215,7 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
UP.MaxIterationsCountToAnalyze = UnrollMaxIterationsCountToAnalyze;
// Override with any target specific settings
- TTI.getUnrollingPreferences(L, SE, UP);
+ TTI.getUnrollingPreferences(L, SE, UP, &ORE);
// Apply size attributes
bool OptForSize = L->getHeader()->getParent()->hasOptSize() ||
@@ -318,6 +319,16 @@ struct EstimatedUnrollCost {
unsigned RolledDynamicCost;
};
+struct PragmaInfo {
+ PragmaInfo(bool UUC, bool PFU, unsigned PC, bool PEU)
+ : UserUnrollCount(UUC), PragmaFullUnroll(PFU), PragmaCount(PC),
+ PragmaEnableUnroll(PEU) {}
+ const bool UserUnrollCount;
+ const bool PragmaFullUnroll;
+ const unsigned PragmaCount;
+ const bool PragmaEnableUnroll;
+};
+
} // end anonymous namespace
/// Figure out if the loop is worth full unrolling.
@@ -746,13 +757,132 @@ public:
// Returns loop size estimation for unrolled loop, given the unrolling
// configuration specified by UP.
- uint64_t getUnrolledLoopSize(TargetTransformInfo::UnrollingPreferences &UP) {
+ uint64_t
+ getUnrolledLoopSize(const TargetTransformInfo::UnrollingPreferences &UP,
+ const unsigned CountOverwrite = 0) const {
assert(LoopSize >= UP.BEInsns &&
"LoopSize should not be less than BEInsns!");
- return (uint64_t)(LoopSize - UP.BEInsns) * UP.Count + UP.BEInsns;
+ if (CountOverwrite)
+ return static_cast<uint64_t>(LoopSize - UP.BEInsns) * CountOverwrite +
+ UP.BEInsns;
+ else
+ return static_cast<uint64_t>(LoopSize - UP.BEInsns) * UP.Count +
+ UP.BEInsns;
}
};
+static Optional<unsigned>
+shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo,
+ const unsigned TripMultiple, const unsigned TripCount,
+ const UnrollCostEstimator UCE,
+ const TargetTransformInfo::UnrollingPreferences &UP) {
+
+ // Using unroll pragma
+ // 1st priority is unroll count set by "unroll-count" option.
+
+ if (PInfo.UserUnrollCount) {
+ if (UP.AllowRemainder &&
+ UCE.getUnrolledLoopSize(UP, (unsigned)UnrollCount) < UP.Threshold)
+ return (unsigned)UnrollCount;
+ }
+
+ // 2nd priority is unroll count set by pragma.
+ if (PInfo.PragmaCount > 0) {
+ if ((UP.AllowRemainder || (TripMultiple % PInfo.PragmaCount == 0)) &&
+ UCE.getUnrolledLoopSize(UP, PInfo.PragmaCount) < PragmaUnrollThreshold)
+ return PInfo.PragmaCount;
+ }
+
+ if (PInfo.PragmaFullUnroll && TripCount != 0) {
+ if (UCE.getUnrolledLoopSize(UP, TripCount) < PragmaUnrollThreshold)
+ return TripCount;
+ }
+ // if didn't return until here, should continue to other priorties
+ return None;
+}
+
+static Optional<unsigned> shouldFullUnroll(
+ Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT,
+ ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
+ const unsigned FullUnrollTripCount, const UnrollCostEstimator UCE,
+ const TargetTransformInfo::UnrollingPreferences &UP) {
+
+ if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) {
+ // When computing the unrolled size, note that BEInsns are not replicated
+ // like the rest of the loop body.
+ if (UCE.getUnrolledLoopSize(UP) < UP.Threshold) {
+ return FullUnrollTripCount;
+
+ } else {
+ // The loop isn't that small, but we still can fully unroll it if that
+ // helps to remove a significant number of instructions.
+ // To check that, run additional analysis on the loop.
+ if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
+ L, FullUnrollTripCount, DT, SE, EphValues, TTI,
+ UP.Threshold * UP.MaxPercentThresholdBoost / 100,
+ UP.MaxIterationsCountToAnalyze)) {
+ unsigned Boost =
+ getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost);
+ if (Cost->UnrolledCost < UP.Threshold * Boost / 100) {
+ return FullUnrollTripCount;
+ }
+ }
+ }
+ }
+ return None;
+}
+
+static Optional<unsigned>
+shouldPartialUnroll(const unsigned LoopSize, const unsigned TripCount,
+ const UnrollCostEstimator UCE,
+ const TargetTransformInfo::UnrollingPreferences &UP) {
+
+ unsigned count = UP.Count;
+ if (TripCount) {
+ if (!UP.Partial) {
+ LLVM_DEBUG(dbgs() << " will not try to unroll partially because "
+ << "-unroll-allow-partial not given\n");
+ count = 0;
+ return count;
+ }
+ if (count == 0)
+ count = TripCount;
+ if (UP.PartialThreshold != NoThreshold) {
+ // Reduce unroll count to be modulo of TripCount for partial unrolling.
+ if (UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold)
+ count = (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
+ (LoopSize - UP.BEInsns);
+ if (count > UP.MaxCount)
+ count = UP.MaxCount;
+ while (count != 0 && TripCount % count != 0)
+ count--;
+ if (UP.AllowRemainder && count <= 1) {
+ // If there is no Count that is modulo of TripCount, set Count to
+ // largest power-of-two factor that satisfies the threshold limit.
+ // As we'll create fixup loop, do the type of unrolling only if
+ // remainder loop is allowed.
+ count = UP.DefaultUnrollRuntimeCount;
+ while (count != 0 &&
+ UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold)
+ count >>= 1;
+ }
+ if (count < 2) {
+ count = 0;
+ }
+ } else {
+ count = TripCount;
+ }
+ if (count > UP.MaxCount)
+ count = UP.MaxCount;
+
+ LLVM_DEBUG(dbgs() << " partially unrolling with count: " << count << "\n");
+
+ return count;
+ }
+
+ // if didn't return until here, should continue to other priorties
+ return None;
+}
// Returns true if unroll count was set explicitly.
// Calculates unroll count and writes it to UP.Count.
// Unless IgnoreUser is true, will also use metadata and command-line options
@@ -770,7 +900,18 @@ bool llvm::computeUnrollCount(
TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound) {
UnrollCostEstimator UCE(*L, LoopSize);
+ Optional<unsigned> UnrollFactor;
+
+ const bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
+ const bool PragmaFullUnroll = hasUnrollFullPragma(L);
+ const unsigned PragmaCount = unrollCountPragmaValue(L);
+ const bool PragmaEnableUnroll = hasUnrollEnablePragma(L);
+ const bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
+ PragmaEnableUnroll || UserUnrollCount;
+
+ PragmaInfo PInfo(UserUnrollCount, PragmaFullUnroll, PragmaCount,
+ PragmaEnableUnroll);
// Use an explicit peel count that has been specified for testing. In this
// case it's not permitted to also specify an explicit unroll count.
if (PP.PeelCount) {
@@ -782,47 +923,29 @@ bool llvm::computeUnrollCount(
UP.Runtime = false;
return true;
}
-
// Check for explicit Count.
// 1st priority is unroll count set by "unroll-count" option.
- bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
- if (UserUnrollCount) {
- UP.Count = UnrollCount;
- UP.AllowExpensiveTripCount = true;
- UP.Force = true;
- if (UP.AllowRemainder && UCE.getUnrolledLoopSize(UP) < UP.Threshold)
- return true;
- }
-
// 2nd priority is unroll count set by pragma.
- unsigned PragmaCount = unrollCountPragmaValue(L);
- if (PragmaCount > 0) {
- UP.Count = PragmaCount;
- UP.Runtime = true;
- UP.AllowExpensiveTripCount = true;
- UP.Force = true;
- if ((UP.AllowRemainder || (TripMultiple % PragmaCount == 0)) &&
- UCE.getUnrolledLoopSize(UP) < PragmaUnrollThreshold)
- return true;
- }
- bool PragmaFullUnroll = hasUnrollFullPragma(L);
- if (PragmaFullUnroll && TripCount != 0) {
- UP.Count = TripCount;
- if (UCE.getUnrolledLoopSize(UP) < PragmaUnrollThreshold)
- return false;
- }
+ UnrollFactor = shouldPragmaUnroll(L, PInfo, TripMultiple, TripCount, UCE, UP);
+
+ if (UnrollFactor) {
+ UP.Count = *UnrollFactor;
- bool PragmaEnableUnroll = hasUnrollEnablePragma(L);
- bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
- PragmaEnableUnroll || UserUnrollCount;
-
- if (ExplicitUnroll && TripCount != 0) {
- // If the loop has an unrolling pragma, we want to be more aggressive with
- // unrolling limits. Set thresholds to at least the PragmaUnrollThreshold
- // value which is larger than the default limits.
- UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
- UP.PartialThreshold =
- std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
+ if (UserUnrollCount || (PragmaCount > 0)) {
+ UP.AllowExpensiveTripCount = true;
+ UP.Force = true;
+ }
+ UP.Runtime |= (PragmaCount > 0);
+ return ExplicitUnroll;
+ } else {
+ if (ExplicitUnroll && TripCount != 0) {
+ // If the loop has an unrolling pragma, we want to be more aggressive with
+ // unrolling limits. Set thresholds to at least the PragmaUnrollThreshold
+ // value which is larger than the default limits.
+ UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
+ UP.PartialThreshold =
+ std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
+ }
}
// 3rd priority is full unroll count.
@@ -852,71 +975,55 @@ bool llvm::computeUnrollCount(
unsigned FullUnrollTripCount =
ExactTripCount ? ExactTripCount : FullUnrollMaxTripCount;
UP.Count = FullUnrollTripCount;
- if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) {
- // When computing the unrolled size, note that BEInsns are not replicated
- // like the rest of the loop body.
- if (UCE.getUnrolledLoopSize(UP) < UP.Threshold) {
- UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount);
- return ExplicitUnroll;
- } else {
- // The loop isn't that small, but we still can fully unroll it if that
- // helps to remove a significant number of instructions.
- // To check that, run additional analysis on the loop.
- if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
- L, FullUnrollTripCount, DT, SE, EphValues, TTI,
- UP.Threshold * UP.MaxPercentThresholdBoost / 100,
- UP.MaxIterationsCountToAnalyze)) {
- unsigned Boost =
- getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost);
- if (Cost->UnrolledCost < UP.Threshold * Boost / 100) {
- UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount);
- return ExplicitUnroll;
- }
- }
- }
+
+ UnrollFactor =
+ shouldFullUnroll(L, TTI, DT, SE, EphValues, FullUnrollTripCount, UCE, UP);
+
+ // if shouldFullUnroll can do the unrolling, some side parameteres should be
+ // set
+ if (UnrollFactor) {
+ UP.Count = *UnrollFactor;
+ UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount);
+ TripCount = FullUnrollTripCount;
+ TripMultiple = UP.UpperBound ? 1 : TripMultiple;
+ return ExplicitUnroll;
+ } else {
+ UP.Count = FullUnrollTripCount;
}
// 4th priority is loop peeling.
- computePeelCount(L, LoopSize, PP, TripCount, SE, UP.Threshold);
+ computePeelCount(L, LoopSize, PP, TripCount, DT, SE, UP.Threshold);
if (PP.PeelCount) {
UP.Runtime = false;
UP.Count = 1;
return ExplicitUnroll;
}
+ // Before starting partial unrolling, set up.partial to true,
+ // if user explicitly asked for unrolling
+ if (TripCount)
+ UP.Partial |= ExplicitUnroll;
+
// 5th priority is partial unrolling.
// Try partial unroll only when TripCount could be statically calculated.
- if (TripCount) {
- UP.Partial |= ExplicitUnroll;
- if (!UP.Partial) {
- LLVM_DEBUG(dbgs() << " will not try to unroll partially because "
- << "-unroll-allow-partial not given\n");
- UP.Count = 0;
- return false;
- }
- if (UP.Count == 0)
- UP.Count = TripCount;
+ UnrollFactor = shouldPartialUnroll(LoopSize, TripCount, UCE, UP);
+
+ if (UnrollFactor) {
+ UP.Count = *UnrollFactor;
+
+ if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
+ UP.Count != TripCount)
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE,
+ "FullUnrollAsDirectedTooLarge",
+ L->getStartLoc(), L->getHeader())
+ << "Unable to fully unroll loop as directed by unroll pragma "
+ "because "
+ "unrolled size is too large.";
+ });
+
if (UP.PartialThreshold != NoThreshold) {
- // Reduce unroll count to be modulo of TripCount for partial unrolling.
- if (UCE.getUnrolledLoopSize(UP) > UP.PartialThreshold)
- UP.Count =
- (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
- (LoopSize - UP.BEInsns);
- if (UP.Count > UP.MaxCount)
- UP.Count = UP.MaxCount;
- while (UP.Count != 0 && TripCount % UP.Count != 0)
- UP.Count--;
- if (UP.AllowRemainder && UP.Count <= 1) {
- // If there is no Count that is modulo of TripCount, set Count to
- // largest power-of-two factor that satisfies the threshold limit.
- // As we'll create fixup loop, do the type of unrolling only if
- // remainder loop is allowed.
- UP.Count = UP.DefaultUnrollRuntimeCount;
- while (UP.Count != 0 &&
- UCE.getUnrolledLoopSize(UP) > UP.PartialThreshold)
- UP.Count >>= 1;
- }
- if (UP.Count < 2) {
+ if (UP.Count == 0) {
if (PragmaEnableUnroll)
ORE->emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE,
@@ -926,25 +1033,8 @@ bool llvm::computeUnrollCount(
"pragma "
"because unrolled size is too large.";
});
- UP.Count = 0;
}
- } else {
- UP.Count = TripCount;
}
- if (UP.Count > UP.MaxCount)
- UP.Count = UP.MaxCount;
- if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
- UP.Count != TripCount)
- ORE->emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE,
- "FullUnrollAsDirectedTooLarge",
- L->getStartLoc(), L->getHeader())
- << "Unable to fully unroll loop as directed by unroll pragma "
- "because "
- "unrolled size is too large.";
- });
- LLVM_DEBUG(dbgs() << " partially unrolling with count: " << UP.Count
- << "\n");
return ExplicitUnroll;
}
assert(TripCount == 0 &&
@@ -981,8 +1071,6 @@ bool llvm::computeUnrollCount(
UP.AllowExpensiveTripCount = true;
}
}
-
- // Reduce count based on the type of unrolling and the threshold values.
UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
if (!UP.Runtime) {
LLVM_DEBUG(
@@ -1017,7 +1105,7 @@ bool llvm::computeUnrollCount(
using namespace ore;
- if (PragmaCount > 0 && !UP.AllowRemainder)
+ if (unrollCountPragmaValue(L) > 0 && !UP.AllowRemainder)
ORE->emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE,
"DifferentUnrollCountFromDirected",
@@ -1079,7 +1167,7 @@ static LoopUnrollResult tryToUnrollLoop(
bool NotDuplicatable;
bool Convergent;
TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
- L, SE, TTI, BFI, PSI, OptLevel, ProvidedThreshold, ProvidedCount,
+ L, SE, TTI, BFI, PSI, ORE, OptLevel, ProvidedThreshold, ProvidedCount,
ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
ProvidedFullUnrollMaxCount);
TargetTransformInfo::PeelingPreferences PP = gatherPeelingPreferences(
@@ -1529,3 +1617,25 @@ PreservedAnalyses LoopUnrollPass::run(Function &F,
return getLoopPassPreservedAnalyses();
}
+
+void LoopUnrollPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<LoopUnrollPass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+ OS << "<";
+ if (UnrollOpts.AllowPartial != None)
+ OS << (UnrollOpts.AllowPartial.getValue() ? "" : "no-") << "partial;";
+ if (UnrollOpts.AllowPeeling != None)
+ OS << (UnrollOpts.AllowPeeling.getValue() ? "" : "no-") << "peeling;";
+ if (UnrollOpts.AllowRuntime != None)
+ OS << (UnrollOpts.AllowRuntime.getValue() ? "" : "no-") << "runtime;";
+ if (UnrollOpts.AllowUpperBound != None)
+ OS << (UnrollOpts.AllowUpperBound.getValue() ? "" : "no-") << "upperbound;";
+ if (UnrollOpts.AllowProfileBasedPeeling != None)
+ OS << (UnrollOpts.AllowProfileBasedPeeling.getValue() ? "" : "no-")
+ << "profile-peeling;";
+ if (UnrollOpts.FullUnrollMaxCount != None)
+ OS << "full-unroll-max=" << UnrollOpts.FullUnrollMaxCount << ";";
+ OS << "O" << UnrollOpts.OptLevel;
+ OS << ">";
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
index 9a854ff80246..76bb5497c2c2 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -232,10 +232,8 @@ namespace {
AU.addPreserved<LazyBranchProbabilityInfoPass>();
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetTransformInfoWrapperPass>();
- if (EnableMSSALoopDependency) {
- AU.addRequired<MemorySSAWrapperPass>();
- AU.addPreserved<MemorySSAWrapperPass>();
- }
+ AU.addRequired<MemorySSAWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
if (HasBranchDivergence)
AU.addRequired<LegacyDivergenceAnalysis>();
getLoopAnalysisUsage(AU);
@@ -539,11 +537,8 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPMRef) {
LPM = &LPMRef;
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- if (EnableMSSALoopDependency) {
- MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
- MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
- assert(DT && "Cannot update MemorySSA without a valid DomTree.");
- }
+ MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
+ MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
CurrentLoop = L;
Function *F = CurrentLoop->getHeader()->getParent();
@@ -551,19 +546,19 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPMRef) {
if (SanitizeMemory)
SafetyInfo.computeLoopSafetyInfo(L);
- if (MSSA && VerifyMemorySSA)
+ if (VerifyMemorySSA)
MSSA->verifyMemorySSA();
bool Changed = false;
do {
assert(CurrentLoop->isLCSSAForm(*DT));
- if (MSSA && VerifyMemorySSA)
+ if (VerifyMemorySSA)
MSSA->verifyMemorySSA();
RedoLoop = false;
Changed |= processCurrentLoop();
} while (RedoLoop);
- if (MSSA && VerifyMemorySSA)
+ if (VerifyMemorySSA)
MSSA->verifyMemorySSA();
return Changed;
@@ -1312,8 +1307,7 @@ void LoopUnswitch::splitExitEdges(
for (unsigned I = 0, E = ExitBlocks.size(); I != E; ++I) {
BasicBlock *ExitBlock = ExitBlocks[I];
- SmallVector<BasicBlock *, 4> Preds(pred_begin(ExitBlock),
- pred_end(ExitBlock));
+ SmallVector<BasicBlock *, 4> Preds(predecessors(ExitBlock));
// Although SplitBlockPredecessors doesn't preserve loop-simplify in
// general, if we call it on all predecessors of all exits then it does.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp
index bd3001988369..186065db327e 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp
@@ -55,11 +55,17 @@ static bool replaceConditionalBranchesOnConstant(Instruction *II,
Value *NewValue,
DomTreeUpdater *DTU) {
bool HasDeadBlocks = false;
- SmallSetVector<Instruction *, 8> Worklist;
+ SmallSetVector<Instruction *, 8> UnsimplifiedUsers;
replaceAndRecursivelySimplify(II, NewValue, nullptr, nullptr, nullptr,
- &Worklist);
- for (auto I : Worklist) {
- BranchInst *BI = dyn_cast<BranchInst>(I);
+ &UnsimplifiedUsers);
+ // UnsimplifiedUsers can contain PHI nodes that may be removed when
+ // replacing the branch instructions, so use a value handle worklist
+ // to handle those possibly removed instructions.
+ SmallVector<WeakVH, 8> Worklist(UnsimplifiedUsers.begin(),
+ UnsimplifiedUsers.end());
+
+ for (auto &VH : Worklist) {
+ BranchInst *BI = dyn_cast_or_null<BranchInst>(VH);
if (!BI)
continue;
if (BI->isUnconditional())
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
index ead8082f3036..1c186e9a0488 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
@@ -357,11 +357,10 @@ static bool lowerExpectIntrinsic(Function &F) {
// Remove llvm.expect intrinsics. Iterate backwards in order
// to process select instructions before the intrinsic gets
// removed.
- for (auto BI = BB.rbegin(), BE = BB.rend(); BI != BE;) {
- Instruction *Inst = &*BI++;
- CallInst *CI = dyn_cast<CallInst>(Inst);
+ for (Instruction &Inst : llvm::make_early_inc_range(llvm::reverse(BB))) {
+ CallInst *CI = dyn_cast<CallInst>(&Inst);
if (!CI) {
- if (SelectInst *SI = dyn_cast<SelectInst>(Inst)) {
+ if (SelectInst *SI = dyn_cast<SelectInst>(&Inst)) {
if (handleBrSelExpect(*SI))
ExpectIntrinsicsHandled++;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
index 42c183a6408e..4e4097e13271 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -900,8 +900,7 @@ public:
// UndefedInsts and then check that we in fact remove them.
SmallSet<Instruction *, 16> UndefedInsts;
for (auto *Inst : reverse(ToRemove)) {
- for (auto I = Inst->use_begin(), E = Inst->use_end(); I != E;) {
- Use &U = *I++;
+ for (Use &U : llvm::make_early_inc_range(Inst->uses())) {
if (auto *Undefed = dyn_cast<Instruction>(U.getUser()))
UndefedInsts.insert(Undefed);
U.set(UndefValue::get(Inst->getType()));
@@ -981,8 +980,9 @@ public:
Value *EltPtr = createElementPtr(Ptr, EltTy, Builder);
MatrixTy Result;
for (unsigned I = 0, E = Shape.getNumVectors(); I < E; ++I) {
- Value *GEP = computeVectorAddr(EltPtr, Builder.getInt64(I), Stride,
- Shape.getStride(), EltTy, Builder);
+ Value *GEP = computeVectorAddr(
+ EltPtr, Builder.getIntN(Stride->getType()->getScalarSizeInBits(), I),
+ Stride, Shape.getStride(), EltTy, Builder);
Value *Vector = Builder.CreateAlignedLoad(
VecTy, GEP, getAlignForIndex(I, Stride, EltTy, MAlign),
IsVolatile, "col.load");
@@ -1071,9 +1071,11 @@ public:
auto VType = cast<VectorType>(Ty);
Value *EltPtr = createElementPtr(Ptr, VType->getElementType(), Builder);
for (auto Vec : enumerate(StoreVal.vectors())) {
- Value *GEP = computeVectorAddr(EltPtr, Builder.getInt64(Vec.index()),
- Stride, StoreVal.getStride(),
- VType->getElementType(), Builder);
+ Value *GEP = computeVectorAddr(
+ EltPtr,
+ Builder.getIntN(Stride->getType()->getScalarSizeInBits(),
+ Vec.index()),
+ Stride, StoreVal.getStride(), VType->getElementType(), Builder);
Builder.CreateAlignedStore(Vec.value(), GEP,
getAlignForIndex(Vec.index(), Stride,
VType->getElementType(),
@@ -2261,6 +2263,16 @@ PreservedAnalyses LowerMatrixIntrinsicsPass::run(Function &F,
return PreservedAnalyses::all();
}
+void LowerMatrixIntrinsicsPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<LowerMatrixIntrinsicsPass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+ OS << "<";
+ if (Minimal)
+ OS << "minimal";
+ OS << ">";
+}
+
namespace {
class LowerMatrixIntrinsicsLegacyPass : public FunctionPass {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 9afbe0e9a2a5..67335a45fb58 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -22,7 +22,6 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/Loads.h"
-#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
@@ -67,9 +66,10 @@ using namespace llvm;
#define DEBUG_TYPE "memcpyopt"
-static cl::opt<bool>
- EnableMemorySSA("enable-memcpyopt-memoryssa", cl::init(true), cl::Hidden,
- cl::desc("Use MemorySSA-backed MemCpyOpt."));
+static cl::opt<bool> EnableMemCpyOptWithoutLibcalls(
+ "enable-memcpyopt-without-libcalls", cl::init(false), cl::Hidden,
+ cl::ZeroOrMore,
+ cl::desc("Enable memcpyopt even when libcalls are disabled"));
STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
STATISTIC(NumMemSetInfer, "Number of memsets inferred");
@@ -282,13 +282,9 @@ private:
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
- if (!EnableMemorySSA)
- AU.addRequired<MemoryDependenceWrapperPass>();
- AU.addPreserved<MemoryDependenceWrapperPass>();
AU.addRequired<AAResultsWrapperPass>();
AU.addPreserved<AAResultsWrapperPass>();
- if (EnableMemorySSA)
- AU.addRequired<MemorySSAWrapperPass>();
+ AU.addRequired<MemorySSAWrapperPass>();
AU.addPreserved<MemorySSAWrapperPass>();
}
};
@@ -304,7 +300,6 @@ INITIALIZE_PASS_BEGIN(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization",
false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
@@ -329,10 +324,7 @@ static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start,
}
void MemCpyOptPass::eraseInstruction(Instruction *I) {
- if (MSSAU)
- MSSAU->removeMemoryAccess(I);
- if (MD)
- MD->removeInstruction(I);
+ MSSAU->removeMemoryAccess(I);
I->eraseFromParent();
}
@@ -394,14 +386,12 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
// memsets.
MemoryDef *LastMemDef = nullptr;
for (++BI; !BI->isTerminator(); ++BI) {
- if (MSSAU) {
- auto *CurrentAcc = cast_or_null<MemoryUseOrDef>(
- MSSAU->getMemorySSA()->getMemoryAccess(&*BI));
- if (CurrentAcc) {
- MemInsertPoint = CurrentAcc;
- if (auto *CurrentDef = dyn_cast<MemoryDef>(CurrentAcc))
- LastMemDef = CurrentDef;
- }
+ auto *CurrentAcc = cast_or_null<MemoryUseOrDef>(
+ MSSAU->getMemorySSA()->getMemoryAccess(&*BI));
+ if (CurrentAcc) {
+ MemInsertPoint = CurrentAcc;
+ if (auto *CurrentDef = dyn_cast<MemoryDef>(CurrentAcc))
+ LastMemDef = CurrentDef;
}
// Calls that only access inaccessible memory do not block merging
@@ -503,19 +493,17 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
if (!Range.TheStores.empty())
AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
- if (MSSAU) {
- assert(LastMemDef && MemInsertPoint &&
- "Both LastMemDef and MemInsertPoint need to be set");
- auto *NewDef =
- cast<MemoryDef>(MemInsertPoint->getMemoryInst() == &*BI
- ? MSSAU->createMemoryAccessBefore(
- AMemSet, LastMemDef, MemInsertPoint)
- : MSSAU->createMemoryAccessAfter(
- AMemSet, LastMemDef, MemInsertPoint));
- MSSAU->insertDef(NewDef, /*RenameUses=*/true);
- LastMemDef = NewDef;
- MemInsertPoint = NewDef;
- }
+ assert(LastMemDef && MemInsertPoint &&
+ "Both LastMemDef and MemInsertPoint need to be set");
+ auto *NewDef =
+ cast<MemoryDef>(MemInsertPoint->getMemoryInst() == &*BI
+ ? MSSAU->createMemoryAccessBefore(
+ AMemSet, LastMemDef, MemInsertPoint)
+ : MSSAU->createMemoryAccessAfter(
+ AMemSet, LastMemDef, MemInsertPoint));
+ MSSAU->insertDef(NewDef, /*RenameUses=*/true);
+ LastMemDef = NewDef;
+ MemInsertPoint = NewDef;
// Zap all the stores.
for (Instruction *SI : Range.TheStores)
@@ -624,17 +612,15 @@ bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
// TODO: Simplify this once P will be determined by MSSA, in which case the
// discrepancy can no longer occur.
MemoryUseOrDef *MemInsertPoint = nullptr;
- if (MSSAU) {
- if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(P)) {
- MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator());
- } else {
- const Instruction *ConstP = P;
- for (const Instruction &I : make_range(++ConstP->getReverseIterator(),
- ++LI->getReverseIterator())) {
- if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
- MemInsertPoint = MA;
- break;
- }
+ if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(P)) {
+ MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator());
+ } else {
+ const Instruction *ConstP = P;
+ for (const Instruction &I : make_range(++ConstP->getReverseIterator(),
+ ++LI->getReverseIterator())) {
+ if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
+ MemInsertPoint = MA;
+ break;
}
}
}
@@ -643,12 +629,10 @@ bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
for (auto *I : llvm::reverse(ToLift)) {
LLVM_DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n");
I->moveBefore(P);
- if (MSSAU) {
- assert(MemInsertPoint && "Must have found insert point");
- if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(I)) {
- MSSAU->moveAfter(MA, MemInsertPoint);
- MemInsertPoint = MA;
- }
+ assert(MemInsertPoint && "Must have found insert point");
+ if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(I)) {
+ MSSAU->moveAfter(MA, MemInsertPoint);
+ MemInsertPoint = MA;
}
}
@@ -682,7 +666,13 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
LI->getParent() == SI->getParent()) {
auto *T = LI->getType();
- if (T->isAggregateType()) {
+ // Don't introduce calls to memcpy/memmove intrinsics out of thin air if
+ // the corresponding libcalls are not available.
+ // TODO: We should really distinguish between libcall availability and
+ // our ability to introduce intrinsics.
+ if (T->isAggregateType() &&
+ (EnableMemCpyOptWithoutLibcalls ||
+ (TLI->has(LibFunc_memcpy) && TLI->has(LibFunc_memmove)))) {
MemoryLocation LoadLoc = MemoryLocation::get(LI);
// We use alias analysis to check if an instruction may store to
@@ -712,9 +702,10 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
if (P) {
// If we load from memory that may alias the memory we store to,
// memmove must be used to preserve semantic. If not, memcpy can
- // be used.
+ // be used. Also, if we load from constant memory, memcpy can be used
+ // as the constant memory won't be modified.
bool UseMemMove = false;
- if (!AA->isNoAlias(MemoryLocation::get(SI), LoadLoc))
+ if (isModSet(AA->getModRefInfo(SI, LoadLoc)))
UseMemMove = true;
uint64_t Size = DL.getTypeStoreSize(T);
@@ -733,13 +724,10 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => "
<< *M << "\n");
- if (MSSAU) {
- auto *LastDef =
- cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI));
- auto *NewAccess =
- MSSAU->createMemoryAccessAfter(M, LastDef, LastDef);
- MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
- }
+ auto *LastDef =
+ cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI));
+ auto *NewAccess = MSSAU->createMemoryAccessAfter(M, LastDef, LastDef);
+ MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
eraseInstruction(SI);
eraseInstruction(LI);
@@ -755,38 +743,21 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// happen to be using a load-store pair to implement it, rather than
// a memcpy.
CallInst *C = nullptr;
- if (EnableMemorySSA) {
- if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
- MSSA->getWalker()->getClobberingMemoryAccess(LI))) {
- // The load most post-dom the call. Limit to the same block for now.
- // TODO: Support non-local call-slot optimization?
- if (LoadClobber->getBlock() == SI->getParent())
- C = dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
- }
- } else {
- MemDepResult ldep = MD->getDependency(LI);
- if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst()))
- C = dyn_cast<CallInst>(ldep.getInst());
+ if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
+ MSSA->getWalker()->getClobberingMemoryAccess(LI))) {
+ // The load most post-dom the call. Limit to the same block for now.
+ // TODO: Support non-local call-slot optimization?
+ if (LoadClobber->getBlock() == SI->getParent())
+ C = dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
}
if (C) {
// Check that nothing touches the dest of the "copy" between
// the call and the store.
MemoryLocation StoreLoc = MemoryLocation::get(SI);
- if (EnableMemorySSA) {
- if (accessedBetween(*AA, StoreLoc, MSSA->getMemoryAccess(C),
- MSSA->getMemoryAccess(SI)))
- C = nullptr;
- } else {
- for (BasicBlock::iterator I = --SI->getIterator(),
- E = C->getIterator();
- I != E; --I) {
- if (isModOrRefSet(AA->getModRefInfo(&*I, StoreLoc))) {
- C = nullptr;
- break;
- }
- }
- }
+ if (accessedBetween(*AA, StoreLoc, MSSA->getMemoryAccess(C),
+ MSSA->getMemoryAccess(SI)))
+ C = nullptr;
}
if (C) {
@@ -805,6 +776,13 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
}
}
+ // The following code creates memset intrinsics out of thin air. Don't do
+ // this if the corresponding libfunc is not available.
+ // TODO: We should really distinguish between libcall availability and
+ // our ability to introduce intrinsics.
+ if (!(TLI->has(LibFunc_memset) || EnableMemCpyOptWithoutLibcalls))
+ return false;
+
// There are two cases that are interesting for this code to handle: memcpy
// and memset. Right now we only handle memset.
@@ -831,13 +809,12 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
- if (MSSAU) {
- assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI)));
- auto *LastDef =
- cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI));
- auto *NewAccess = MSSAU->createMemoryAccessAfter(M, LastDef, LastDef);
- MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
- }
+ // The newly inserted memset is immediately overwritten by the original
+ // store, so we do not need to rename uses.
+ auto *StoreDef = cast<MemoryDef>(MSSA->getMemoryAccess(SI));
+ auto *NewAccess = MSSAU->createMemoryAccessBefore(
+ M, StoreDef->getDefiningAccess(), StoreDef);
+ MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/false);
eraseInstruction(SI);
NumMemSetInfer++;
@@ -1033,11 +1010,6 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
}
- // Drop any cached information about the call, because we may have changed
- // its dependence information by changing its parameter.
- if (MD)
- MD->removeInstruction(C);
-
// Update AA metadata
// FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be
// handled here, but combineMetadata doesn't support them yet
@@ -1086,28 +1058,19 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
//
// TODO: If the code between M and MDep is transparent to the destination "c",
// then we could still perform the xform by moving M up to the first memcpy.
- if (EnableMemorySSA) {
- // TODO: It would be sufficient to check the MDep source up to the memcpy
- // size of M, rather than MDep.
- if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep),
- MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M)))
- return false;
- } else {
- // NOTE: This is conservative, it will stop on any read from the source loc,
- // not just the defining memcpy.
- MemDepResult SourceDep =
- MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false,
- M->getIterator(), M->getParent());
- if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
- return false;
- }
+ // TODO: It would be sufficient to check the MDep source up to the memcpy
+ // size of M, rather than MDep.
+ if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep),
+ MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M)))
+ return false;
// If the dest of the second might alias the source of the first, then the
- // source and dest might overlap. We still want to eliminate the intermediate
- // value, but we have to generate a memmove instead of memcpy.
+ // source and dest might overlap. In addition, if the source of the first
+ // points to constant memory, they won't overlap by definition. Otherwise, we
+ // still want to eliminate the intermediate value, but we have to generate a
+ // memmove instead of memcpy.
bool UseMemMove = false;
- if (!AA->isNoAlias(MemoryLocation::getForDest(M),
- MemoryLocation::getForSource(MDep)))
+ if (isModSet(AA->getModRefInfo(M, MemoryLocation::getForSource(MDep))))
UseMemMove = true;
// If all checks passed, then we can transform M.
@@ -1134,12 +1097,10 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
MDep->getRawSource(), MDep->getSourceAlign(),
M->getLength(), M->isVolatile());
- if (MSSAU) {
- assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)));
- auto *LastDef = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
- auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
- MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
- }
+ assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)));
+ auto *LastDef = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
+ auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
+ MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
// Remove the instruction we're replacing.
eraseInstruction(M);
@@ -1169,30 +1130,16 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
// Check that src and dst of the memcpy aren't the same. While memcpy
// operands cannot partially overlap, exact equality is allowed.
- if (!AA->isNoAlias(MemoryLocation(MemCpy->getSource(),
- LocationSize::precise(1)),
- MemoryLocation(MemCpy->getDest(),
- LocationSize::precise(1))))
+ if (isModSet(AA->getModRefInfo(MemCpy, MemoryLocation::getForSource(MemCpy))))
return false;
- if (EnableMemorySSA) {
- // We know that dst up to src_size is not written. We now need to make sure
- // that dst up to dst_size is not accessed. (If we did not move the memset,
- // checking for reads would be sufficient.)
- if (accessedBetween(*AA, MemoryLocation::getForDest(MemSet),
- MSSA->getMemoryAccess(MemSet),
- MSSA->getMemoryAccess(MemCpy))) {
- return false;
- }
- } else {
- // We have already checked that dst up to src_size is not accessed. We
- // need to make sure that there are no accesses up to dst_size either.
- MemDepResult DstDepInfo = MD->getPointerDependencyFrom(
- MemoryLocation::getForDest(MemSet), false, MemCpy->getIterator(),
- MemCpy->getParent());
- if (DstDepInfo.getInst() != MemSet)
- return false;
- }
+ // We know that dst up to src_size is not written. We now need to make sure
+ // that dst up to dst_size is not accessed. (If we did not move the memset,
+ // checking for reads would be sufficient.)
+ if (accessedBetween(*AA, MemoryLocation::getForDest(MemSet),
+ MSSA->getMemoryAccess(MemSet),
+ MSSA->getMemoryAccess(MemCpy)))
+ return false;
// Use the same i8* dest as the memcpy, killing the memset dest if different.
Value *Dest = MemCpy->getRawDest();
@@ -1242,18 +1189,16 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
SrcSize),
MemSet->getOperand(1), MemsetLen, MaybeAlign(Align));
- if (MSSAU) {
- assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) &&
- "MemCpy must be a MemoryDef");
- // The new memset is inserted after the memcpy, but it is known that its
- // defining access is the memset about to be removed which immediately
- // precedes the memcpy.
- auto *LastDef =
- cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
- auto *NewAccess = MSSAU->createMemoryAccessBefore(
- NewMemSet, LastDef->getDefiningAccess(), LastDef);
- MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
- }
+ assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) &&
+ "MemCpy must be a MemoryDef");
+ // The new memset is inserted after the memcpy, but it is known that its
+ // defining access is the memset about to be removed which immediately
+ // precedes the memcpy.
+ auto *LastDef =
+ cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
+ auto *NewAccess = MSSAU->createMemoryAccessBefore(
+ NewMemSet, LastDef->getDefiningAccess(), LastDef);
+ MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
eraseInstruction(MemSet);
return true;
@@ -1261,23 +1206,8 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
/// Determine whether the instruction has undefined content for the given Size,
/// either because it was freshly alloca'd or started its lifetime.
-static bool hasUndefContents(Instruction *I, Value *Size) {
- if (isa<AllocaInst>(I))
- return true;
-
- if (ConstantInt *CSize = dyn_cast<ConstantInt>(Size)) {
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
- if (II->getIntrinsicID() == Intrinsic::lifetime_start)
- if (ConstantInt *LTSize = dyn_cast<ConstantInt>(II->getArgOperand(0)))
- if (LTSize->getZExtValue() >= CSize->getZExtValue())
- return true;
- }
-
- return false;
-}
-
-static bool hasUndefContentsMSSA(MemorySSA *MSSA, AliasAnalysis *AA, Value *V,
- MemoryDef *Def, Value *Size) {
+static bool hasUndefContents(MemorySSA *MSSA, AliasAnalysis *AA, Value *V,
+ MemoryDef *Def, Value *Size) {
if (MSSA->isLiveOnEntryDef(Def))
return isa<AllocaInst>(getUnderlyingObject(V));
@@ -1351,19 +1281,12 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
// easily represent this location, we use the full 0..CopySize range.
MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
bool CanReduceSize = false;
- if (EnableMemorySSA) {
- MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet);
- MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
- MemSetAccess->getDefiningAccess(), MemCpyLoc);
- if (auto *MD = dyn_cast<MemoryDef>(Clobber))
- if (hasUndefContentsMSSA(MSSA, AA, MemCpy->getSource(), MD, CopySize))
- CanReduceSize = true;
- } else {
- MemDepResult DepInfo = MD->getPointerDependencyFrom(
- MemCpyLoc, true, MemSet->getIterator(), MemSet->getParent());
- if (DepInfo.isDef() && hasUndefContents(DepInfo.getInst(), CopySize))
+ MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet);
+ MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
+ MemSetAccess->getDefiningAccess(), MemCpyLoc);
+ if (auto *MD = dyn_cast<MemoryDef>(Clobber))
+ if (hasUndefContents(MSSA, AA, MemCpy->getSource(), MD, CopySize))
CanReduceSize = true;
- }
if (!CanReduceSize)
return false;
@@ -1375,12 +1298,10 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
Instruction *NewM =
Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
CopySize, MaybeAlign(MemCpy->getDestAlignment()));
- if (MSSAU) {
- auto *LastDef =
- cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
- auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
- MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
- }
+ auto *LastDef =
+ cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
+ auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
+ MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
return true;
}
@@ -1410,151 +1331,90 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
Instruction *NewM =
Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(),
MaybeAlign(M->getDestAlignment()), false);
- if (MSSAU) {
- auto *LastDef =
- cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
- auto *NewAccess =
- MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
- MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
- }
+ auto *LastDef =
+ cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
+ auto *NewAccess =
+ MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
+ MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
eraseInstruction(M);
++NumCpyToSet;
return true;
}
- if (EnableMemorySSA) {
- MemoryUseOrDef *MA = MSSA->getMemoryAccess(M);
- MemoryAccess *AnyClobber = MSSA->getWalker()->getClobberingMemoryAccess(MA);
- MemoryLocation DestLoc = MemoryLocation::getForDest(M);
- const MemoryAccess *DestClobber =
- MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc);
-
- // Try to turn a partially redundant memset + memcpy into
- // memcpy + smaller memset. We don't need the memcpy size for this.
- // The memcpy most post-dom the memset, so limit this to the same basic
- // block. A non-local generalization is likely not worthwhile.
- if (auto *MD = dyn_cast<MemoryDef>(DestClobber))
- if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst()))
- if (DestClobber->getBlock() == M->getParent())
- if (processMemSetMemCpyDependence(M, MDep))
- return true;
-
- MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
- AnyClobber, MemoryLocation::getForSource(M));
-
- // There are four possible optimizations we can do for memcpy:
- // a) memcpy-memcpy xform which exposes redundance for DSE.
- // b) call-memcpy xform for return slot optimization.
- // c) memcpy from freshly alloca'd space or space that has just started
- // its lifetime copies undefined data, and we can therefore eliminate
- // the memcpy in favor of the data that was already at the destination.
- // d) memcpy from a just-memset'd source can be turned into memset.
- if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
- if (Instruction *MI = MD->getMemoryInst()) {
- if (ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
- if (auto *C = dyn_cast<CallInst>(MI)) {
- // The memcpy must post-dom the call. Limit to the same block for
- // now. Additionally, we need to ensure that there are no accesses
- // to dest between the call and the memcpy. Accesses to src will be
- // checked by performCallSlotOptzn().
- // TODO: Support non-local call-slot optimization?
- if (C->getParent() == M->getParent() &&
- !accessedBetween(*AA, DestLoc, MD, MA)) {
- // FIXME: Can we pass in either of dest/src alignment here instead
- // of conservatively taking the minimum?
- Align Alignment = std::min(M->getDestAlign().valueOrOne(),
- M->getSourceAlign().valueOrOne());
- if (performCallSlotOptzn(
- M, M, M->getDest(), M->getSource(),
- TypeSize::getFixed(CopySize->getZExtValue()), Alignment,
- C)) {
- LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
- << " call: " << *C << "\n"
- << " memcpy: " << *M << "\n");
- eraseInstruction(M);
- ++NumMemCpyInstr;
- return true;
- }
- }
- }
- }
- if (auto *MDep = dyn_cast<MemCpyInst>(MI))
- return processMemCpyMemCpyDependence(M, MDep);
- if (auto *MDep = dyn_cast<MemSetInst>(MI)) {
- if (performMemCpyToMemSetOptzn(M, MDep)) {
- LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n");
- eraseInstruction(M);
- ++NumCpyToSet;
- return true;
- }
- }
- }
-
- if (hasUndefContentsMSSA(MSSA, AA, M->getSource(), MD, M->getLength())) {
- LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n");
- eraseInstruction(M);
- ++NumMemCpyInstr;
- return true;
- }
- }
- } else {
- MemDepResult DepInfo = MD->getDependency(M);
-
- // Try to turn a partially redundant memset + memcpy into
- // memcpy + smaller memset. We don't need the memcpy size for this.
- if (DepInfo.isClobber())
- if (MemSetInst *MDep = dyn_cast<MemSetInst>(DepInfo.getInst()))
+ MemoryUseOrDef *MA = MSSA->getMemoryAccess(M);
+ MemoryAccess *AnyClobber = MSSA->getWalker()->getClobberingMemoryAccess(MA);
+ MemoryLocation DestLoc = MemoryLocation::getForDest(M);
+ const MemoryAccess *DestClobber =
+ MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc);
+
+ // Try to turn a partially redundant memset + memcpy into
+ // memcpy + smaller memset. We don't need the memcpy size for this.
+ // The memcpy most post-dom the memset, so limit this to the same basic
+ // block. A non-local generalization is likely not worthwhile.
+ if (auto *MD = dyn_cast<MemoryDef>(DestClobber))
+ if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst()))
+ if (DestClobber->getBlock() == M->getParent())
if (processMemSetMemCpyDependence(M, MDep))
return true;
- // There are four possible optimizations we can do for memcpy:
- // a) memcpy-memcpy xform which exposes redundance for DSE.
- // b) call-memcpy xform for return slot optimization.
- // c) memcpy from freshly alloca'd space or space that has just started
- // its lifetime copies undefined data, and we can therefore eliminate
- // the memcpy in favor of the data that was already at the destination.
- // d) memcpy from a just-memset'd source can be turned into memset.
- if (ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
- if (DepInfo.isClobber()) {
- if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
- // FIXME: Can we pass in either of dest/src alignment here instead
- // of conservatively taking the minimum?
- Align Alignment = std::min(M->getDestAlign().valueOrOne(),
- M->getSourceAlign().valueOrOne());
- if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
- TypeSize::getFixed(CopySize->getZExtValue()),
- Alignment, C)) {
- eraseInstruction(M);
- ++NumMemCpyInstr;
- return true;
+ MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
+ AnyClobber, MemoryLocation::getForSource(M));
+
+ // There are four possible optimizations we can do for memcpy:
+ // a) memcpy-memcpy xform which exposes redundance for DSE.
+ // b) call-memcpy xform for return slot optimization.
+ // c) memcpy from freshly alloca'd space or space that has just started
+ // its lifetime copies undefined data, and we can therefore eliminate
+ // the memcpy in favor of the data that was already at the destination.
+ // d) memcpy from a just-memset'd source can be turned into memset.
+ if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
+ if (Instruction *MI = MD->getMemoryInst()) {
+ if (ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
+ if (auto *C = dyn_cast<CallInst>(MI)) {
+ // The memcpy must post-dom the call. Limit to the same block for
+ // now. Additionally, we need to ensure that there are no accesses
+ // to dest between the call and the memcpy. Accesses to src will be
+ // checked by performCallSlotOptzn().
+ // TODO: Support non-local call-slot optimization?
+ if (C->getParent() == M->getParent() &&
+ !accessedBetween(*AA, DestLoc, MD, MA)) {
+ // FIXME: Can we pass in either of dest/src alignment here instead
+ // of conservatively taking the minimum?
+ Align Alignment = std::min(M->getDestAlign().valueOrOne(),
+ M->getSourceAlign().valueOrOne());
+ if (performCallSlotOptzn(
+ M, M, M->getDest(), M->getSource(),
+ TypeSize::getFixed(CopySize->getZExtValue()), Alignment,
+ C)) {
+ LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
+ << " call: " << *C << "\n"
+ << " memcpy: " << *M << "\n");
+ eraseInstruction(M);
+ ++NumMemCpyInstr;
+ return true;
+ }
}
}
}
- }
-
- MemoryLocation SrcLoc = MemoryLocation::getForSource(M);
- MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(
- SrcLoc, true, M->getIterator(), M->getParent());
-
- if (SrcDepInfo.isClobber()) {
- if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
+ if (auto *MDep = dyn_cast<MemCpyInst>(MI))
return processMemCpyMemCpyDependence(M, MDep);
- } else if (SrcDepInfo.isDef()) {
- if (hasUndefContents(SrcDepInfo.getInst(), M->getLength())) {
- eraseInstruction(M);
- ++NumMemCpyInstr;
- return true;
- }
- }
-
- if (SrcDepInfo.isClobber())
- if (MemSetInst *MDep = dyn_cast<MemSetInst>(SrcDepInfo.getInst()))
+ if (auto *MDep = dyn_cast<MemSetInst>(MI)) {
if (performMemCpyToMemSetOptzn(M, MDep)) {
+ LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n");
eraseInstruction(M);
++NumCpyToSet;
return true;
}
+ }
+ }
+
+ if (hasUndefContents(MSSA, AA, M->getSource(), MD, M->getLength())) {
+ LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n");
+ eraseInstruction(M);
+ ++NumMemCpyInstr;
+ return true;
+ }
}
return false;
@@ -1563,12 +1423,8 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
/// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
/// not to alias.
bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
- if (!TLI->has(LibFunc_memmove))
- return false;
-
- // See if the pointers alias.
- if (!AA->isNoAlias(MemoryLocation::getForDest(M),
- MemoryLocation::getForSource(M)))
+ // See if the source could be modified by this memmove potentially.
+ if (isModSet(AA->getModRefInfo(M, MemoryLocation::getForSource(M))))
return false;
LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
@@ -1584,11 +1440,6 @@ bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
// For MemorySSA nothing really changes (except that memcpy may imply stricter
// aliasing guarantees).
- // MemDep may have over conservative information about this instruction, just
- // conservatively flush it from the cache.
- if (MD)
- MD->removeInstruction(M);
-
++NumMoveToCpy;
return true;
}
@@ -1601,22 +1452,14 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
Type *ByValTy = CB.getParamByValType(ArgNo);
TypeSize ByValSize = DL.getTypeAllocSize(ByValTy);
MemoryLocation Loc(ByValArg, LocationSize::precise(ByValSize));
+ MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
+ if (!CallAccess)
+ return false;
MemCpyInst *MDep = nullptr;
- if (EnableMemorySSA) {
- MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
- if (!CallAccess)
- return false;
- MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
- CallAccess->getDefiningAccess(), Loc);
- if (auto *MD = dyn_cast<MemoryDef>(Clobber))
- MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
- } else {
- MemDepResult DepInfo = MD->getPointerDependencyFrom(
- Loc, true, CB.getIterator(), CB.getParent());
- if (!DepInfo.isClobber())
- return false;
- MDep = dyn_cast<MemCpyInst>(DepInfo.getInst());
- }
+ MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
+ CallAccess->getDefiningAccess(), Loc);
+ if (auto *MD = dyn_cast<MemoryDef>(Clobber))
+ MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
// If the byval argument isn't fed by a memcpy, ignore it. If it is fed by
// a memcpy, see if we can byval from the source of the memcpy instead of the
@@ -1655,19 +1498,9 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
// *b = 42;
// foo(*a)
// It would be invalid to transform the second memcpy into foo(*b).
- if (EnableMemorySSA) {
- if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep),
- MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(&CB)))
- return false;
- } else {
- // NOTE: This is conservative, it will stop on any read from the source loc,
- // not just the defining memcpy.
- MemDepResult SourceDep = MD->getPointerDependencyFrom(
- MemoryLocation::getForSource(MDep), false,
- CB.getIterator(), MDep->getParent());
- if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
- return false;
- }
+ if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep),
+ MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(&CB)))
+ return false;
Value *TmpCast = MDep->getSource();
if (MDep->getSource()->getType() != ByValArg->getType()) {
@@ -1734,47 +1567,33 @@ bool MemCpyOptPass::iterateOnFunction(Function &F) {
}
PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) {
- auto *MD = !EnableMemorySSA ? &AM.getResult<MemoryDependenceAnalysis>(F)
- : AM.getCachedResult<MemoryDependenceAnalysis>(F);
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
auto *AA = &AM.getResult<AAManager>(F);
auto *AC = &AM.getResult<AssumptionAnalysis>(F);
auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
- auto *MSSA = EnableMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F)
- : AM.getCachedResult<MemorySSAAnalysis>(F);
+ auto *MSSA = &AM.getResult<MemorySSAAnalysis>(F);
- bool MadeChange =
- runImpl(F, MD, &TLI, AA, AC, DT, MSSA ? &MSSA->getMSSA() : nullptr);
+ bool MadeChange = runImpl(F, &TLI, AA, AC, DT, &MSSA->getMSSA());
if (!MadeChange)
return PreservedAnalyses::all();
PreservedAnalyses PA;
PA.preserveSet<CFGAnalyses>();
- if (MD)
- PA.preserve<MemoryDependenceAnalysis>();
- if (MSSA)
- PA.preserve<MemorySSAAnalysis>();
+ PA.preserve<MemorySSAAnalysis>();
return PA;
}
-bool MemCpyOptPass::runImpl(Function &F, MemoryDependenceResults *MD_,
- TargetLibraryInfo *TLI_, AliasAnalysis *AA_,
- AssumptionCache *AC_, DominatorTree *DT_,
- MemorySSA *MSSA_) {
+bool MemCpyOptPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
+ AliasAnalysis *AA_, AssumptionCache *AC_,
+ DominatorTree *DT_, MemorySSA *MSSA_) {
bool MadeChange = false;
- MD = MD_;
TLI = TLI_;
AA = AA_;
AC = AC_;
DT = DT_;
MSSA = MSSA_;
MemorySSAUpdater MSSAU_(MSSA_);
- MSSAU = MSSA_ ? &MSSAU_ : nullptr;
- // If we don't have at least memset and memcpy, there is little point of doing
- // anything here. These are required by a freestanding implementation, so if
- // even they are disabled, there is no point in trying hard.
- if (!TLI->has(LibFunc_memset) || !TLI->has(LibFunc_memcpy))
- return false;
+ MSSAU = &MSSAU_;
while (true) {
if (!iterateOnFunction(F))
@@ -1782,10 +1601,9 @@ bool MemCpyOptPass::runImpl(Function &F, MemoryDependenceResults *MD_,
MadeChange = true;
}
- if (MSSA_ && VerifyMemorySSA)
+ if (VerifyMemorySSA)
MSSA_->verifyMemorySSA();
- MD = nullptr;
return MadeChange;
}
@@ -1794,17 +1612,11 @@ bool MemCpyOptLegacyPass::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
- auto *MDWP = !EnableMemorySSA
- ? &getAnalysis<MemoryDependenceWrapperPass>()
- : getAnalysisIfAvailable<MemoryDependenceWrapperPass>();
auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto *MSSAWP = EnableMemorySSA
- ? &getAnalysis<MemorySSAWrapperPass>()
- : getAnalysisIfAvailable<MemorySSAWrapperPass>();
+ auto *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
- return Impl.runImpl(F, MDWP ? & MDWP->getMemDep() : nullptr, TLI, AA, AC, DT,
- MSSAWP ? &MSSAWP->getMSSA() : nullptr);
+ return Impl.runImpl(F, TLI, AA, AC, DT, MSSA);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MergeICmps.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MergeICmps.cpp
index f13f24ad2027..aac0deea5be3 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MergeICmps.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MergeICmps.cpp
@@ -66,15 +66,6 @@ namespace {
#define DEBUG_TYPE "mergeicmps"
-// Returns true if the instruction is a simple load or a simple store
-static bool isSimpleLoadOrStore(const Instruction *I) {
- if (const LoadInst *LI = dyn_cast<LoadInst>(I))
- return LI->isSimple();
- if (const StoreInst *SI = dyn_cast<StoreInst>(I))
- return SI->isSimple();
- return false;
-}
-
// A BCE atom "Binary Compare Expression Atom" represents an integer load
// that is a constant offset from a base value, e.g. `a` or `o.c` in the example
// at the top.
@@ -154,6 +145,10 @@ BCEAtom visitICmpLoadOperand(Value *const Val, BaseIdentifier &BaseId) {
return {};
}
Value *const Addr = LoadI->getOperand(0);
+ if (Addr->getType()->getPointerAddressSpace() != 0) {
+ LLVM_DEBUG(dbgs() << "from non-zero AddressSpace\n");
+ return {};
+ }
auto *const GEP = dyn_cast<GetElementPtrInst>(Addr);
if (!GEP)
return {};
@@ -234,6 +229,8 @@ class BCECmpBlock {
InstructionSet BlockInsts;
// The block requires splitting.
bool RequireSplit = false;
+ // Original order of this block in the chain.
+ unsigned OrigOrder = 0;
private:
BCECmp Cmp;
@@ -244,14 +241,13 @@ bool BCECmpBlock::canSinkBCECmpInst(const Instruction *Inst,
// If this instruction may clobber the loads and is in middle of the BCE cmp
// block instructions, then bail for now.
if (Inst->mayWriteToMemory()) {
- // Bail if this is not a simple load or store
- if (!isSimpleLoadOrStore(Inst))
- return false;
- // Disallow stores that might alias the BCE operands
- MemoryLocation LLoc = MemoryLocation::get(Cmp.Lhs.LoadI);
- MemoryLocation RLoc = MemoryLocation::get(Cmp.Rhs.LoadI);
- if (isModSet(AA.getModRefInfo(Inst, LLoc)) ||
- isModSet(AA.getModRefInfo(Inst, RLoc)))
+ auto MayClobber = [&](LoadInst *LI) {
+ // If a potentially clobbering instruction comes before the load,
+ // we can still safely sink the load.
+ return !Inst->comesBefore(LI) &&
+ isModSet(AA.getModRefInfo(Inst, MemoryLocation::get(LI)));
+ };
+ if (MayClobber(Cmp.Lhs.LoadI) || MayClobber(Cmp.Rhs.LoadI))
return false;
}
// Make sure this instruction does not use any of the BCE cmp block
@@ -386,39 +382,83 @@ static inline void enqueueBlock(std::vector<BCECmpBlock> &Comparisons,
<< Comparison.Rhs().BaseId << " + "
<< Comparison.Rhs().Offset << "\n");
LLVM_DEBUG(dbgs() << "\n");
+ Comparison.OrigOrder = Comparisons.size();
Comparisons.push_back(std::move(Comparison));
}
// A chain of comparisons.
class BCECmpChain {
- public:
- BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
- AliasAnalysis &AA);
-
- int size() const { return Comparisons_.size(); }
+public:
+ using ContiguousBlocks = std::vector<BCECmpBlock>;
-#ifdef MERGEICMPS_DOT_ON
- void dump() const;
-#endif // MERGEICMPS_DOT_ON
+ BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
+ AliasAnalysis &AA);
bool simplify(const TargetLibraryInfo &TLI, AliasAnalysis &AA,
DomTreeUpdater &DTU);
-private:
- static bool IsContiguous(const BCECmpBlock &First,
- const BCECmpBlock &Second) {
- return First.Lhs().BaseId == Second.Lhs().BaseId &&
- First.Rhs().BaseId == Second.Rhs().BaseId &&
- First.Lhs().Offset + First.SizeBits() / 8 == Second.Lhs().Offset &&
- First.Rhs().Offset + First.SizeBits() / 8 == Second.Rhs().Offset;
+ bool atLeastOneMerged() const {
+ return any_of(MergedBlocks_,
+ [](const auto &Blocks) { return Blocks.size() > 1; });
}
+private:
PHINode &Phi_;
- std::vector<BCECmpBlock> Comparisons_;
+ // The list of all blocks in the chain, grouped by contiguity.
+ std::vector<ContiguousBlocks> MergedBlocks_;
// The original entry block (before sorting);
BasicBlock *EntryBlock_;
};
+static bool areContiguous(const BCECmpBlock &First, const BCECmpBlock &Second) {
+ return First.Lhs().BaseId == Second.Lhs().BaseId &&
+ First.Rhs().BaseId == Second.Rhs().BaseId &&
+ First.Lhs().Offset + First.SizeBits() / 8 == Second.Lhs().Offset &&
+ First.Rhs().Offset + First.SizeBits() / 8 == Second.Rhs().Offset;
+}
+
+static unsigned getMinOrigOrder(const BCECmpChain::ContiguousBlocks &Blocks) {
+ unsigned MinOrigOrder = std::numeric_limits<unsigned>::max();
+ for (const BCECmpBlock &Block : Blocks)
+ MinOrigOrder = std::min(MinOrigOrder, Block.OrigOrder);
+ return MinOrigOrder;
+}
+
+/// Given a chain of comparison blocks, groups the blocks into contiguous
+/// ranges that can be merged together into a single comparison.
+static std::vector<BCECmpChain::ContiguousBlocks>
+mergeBlocks(std::vector<BCECmpBlock> &&Blocks) {
+ std::vector<BCECmpChain::ContiguousBlocks> MergedBlocks;
+
+ // Sort to detect continuous offsets.
+ llvm::sort(Blocks,
+ [](const BCECmpBlock &LhsBlock, const BCECmpBlock &RhsBlock) {
+ return std::tie(LhsBlock.Lhs(), LhsBlock.Rhs()) <
+ std::tie(RhsBlock.Lhs(), RhsBlock.Rhs());
+ });
+
+ BCECmpChain::ContiguousBlocks *LastMergedBlock = nullptr;
+ for (BCECmpBlock &Block : Blocks) {
+ if (!LastMergedBlock || !areContiguous(LastMergedBlock->back(), Block)) {
+ MergedBlocks.emplace_back();
+ LastMergedBlock = &MergedBlocks.back();
+ } else {
+ LLVM_DEBUG(dbgs() << "Merging block " << Block.BB->getName() << " into "
+ << LastMergedBlock->back().BB->getName() << "\n");
+ }
+ LastMergedBlock->push_back(std::move(Block));
+ }
+
+ // While we allow reordering for merging, do not reorder unmerged comparisons.
+ // Doing so may introduce branch on poison.
+ llvm::sort(MergedBlocks, [](const BCECmpChain::ContiguousBlocks &LhsBlocks,
+ const BCECmpChain::ContiguousBlocks &RhsBlocks) {
+ return getMinOrigOrder(LhsBlocks) < getMinOrigOrder(RhsBlocks);
+ });
+
+ return MergedBlocks;
+}
+
BCECmpChain::BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
AliasAnalysis &AA)
: Phi_(Phi) {
@@ -498,47 +538,9 @@ BCECmpChain::BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
return;
}
EntryBlock_ = Comparisons[0].BB;
- Comparisons_ = std::move(Comparisons);
-#ifdef MERGEICMPS_DOT_ON
- errs() << "BEFORE REORDERING:\n\n";
- dump();
-#endif // MERGEICMPS_DOT_ON
- // Reorder blocks by LHS. We can do that without changing the
- // semantics because we are only accessing dereferencable memory.
- llvm::sort(Comparisons_,
- [](const BCECmpBlock &LhsBlock, const BCECmpBlock &RhsBlock) {
- return std::tie(LhsBlock.Lhs(), LhsBlock.Rhs()) <
- std::tie(RhsBlock.Lhs(), RhsBlock.Rhs());
- });
-#ifdef MERGEICMPS_DOT_ON
- errs() << "AFTER REORDERING:\n\n";
- dump();
-#endif // MERGEICMPS_DOT_ON
+ MergedBlocks_ = mergeBlocks(std::move(Comparisons));
}
-#ifdef MERGEICMPS_DOT_ON
-void BCECmpChain::dump() const {
- errs() << "digraph dag {\n";
- errs() << " graph [bgcolor=transparent];\n";
- errs() << " node [color=black,style=filled,fillcolor=lightyellow];\n";
- errs() << " edge [color=black];\n";
- for (size_t I = 0; I < Comparisons_.size(); ++I) {
- const auto &Comparison = Comparisons_[I];
- errs() << " \"" << I << "\" [label=\"%"
- << Comparison.Lhs().Base()->getName() << " + "
- << Comparison.Lhs().Offset << " == %"
- << Comparison.Rhs().Base()->getName() << " + "
- << Comparison.Rhs().Offset << " (" << (Comparison.SizeBits() / 8)
- << " bytes)\"];\n";
- const Value *const Val = Phi_.getIncomingValueForBlock(Comparison.BB);
- if (I > 0) errs() << " \"" << (I - 1) << "\" -> \"" << I << "\";\n";
- errs() << " \"" << I << "\" -> \"Phi\" [label=\"" << *Val << "\"];\n";
- }
- errs() << " \"Phi\" [label=\"Phi\"];\n";
- errs() << "}\n\n";
-}
-#endif // MERGEICMPS_DOT_ON
-
namespace {
// A class to compute the name of a set of merged basic blocks.
@@ -661,47 +663,18 @@ static BasicBlock *mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
bool BCECmpChain::simplify(const TargetLibraryInfo &TLI, AliasAnalysis &AA,
DomTreeUpdater &DTU) {
- assert(Comparisons_.size() >= 2 && "simplifying trivial BCECmpChain");
- // First pass to check if there is at least one merge. If not, we don't do
- // anything and we keep analysis passes intact.
- const auto AtLeastOneMerged = [this]() {
- for (size_t I = 1; I < Comparisons_.size(); ++I) {
- if (IsContiguous(Comparisons_[I - 1], Comparisons_[I]))
- return true;
- }
- return false;
- };
- if (!AtLeastOneMerged())
- return false;
-
+ assert(atLeastOneMerged() && "simplifying trivial BCECmpChain");
LLVM_DEBUG(dbgs() << "Simplifying comparison chain starting at block "
<< EntryBlock_->getName() << "\n");
// Effectively merge blocks. We go in the reverse direction from the phi block
// so that the next block is always available to branch to.
- const auto mergeRange = [this, &TLI, &AA, &DTU](int I, int Num,
- BasicBlock *InsertBefore,
- BasicBlock *Next) {
- return mergeComparisons(makeArrayRef(Comparisons_).slice(I, Num),
- InsertBefore, Next, Phi_, TLI, AA, DTU);
- };
- int NumMerged = 1;
+ BasicBlock *InsertBefore = EntryBlock_;
BasicBlock *NextCmpBlock = Phi_.getParent();
- for (int I = static_cast<int>(Comparisons_.size()) - 2; I >= 0; --I) {
- if (IsContiguous(Comparisons_[I], Comparisons_[I + 1])) {
- LLVM_DEBUG(dbgs() << "Merging block " << Comparisons_[I].BB->getName()
- << " into " << Comparisons_[I + 1].BB->getName()
- << "\n");
- ++NumMerged;
- } else {
- NextCmpBlock = mergeRange(I + 1, NumMerged, NextCmpBlock, NextCmpBlock);
- NumMerged = 1;
- }
+ for (const auto &Blocks : reverse(MergedBlocks_)) {
+ InsertBefore = NextCmpBlock = mergeComparisons(
+ Blocks, InsertBefore, NextCmpBlock, Phi_, TLI, AA, DTU);
}
- // Insert the entry block for the new chain before the old entry block.
- // If the old entry block was the function entry, this ensures that the new
- // entry can become the function entry.
- NextCmpBlock = mergeRange(0, NumMerged, EntryBlock_, NextCmpBlock);
// Replace the original cmp chain with the new cmp chain by pointing all
// predecessors of EntryBlock_ to NextCmpBlock instead. This makes all cmp
@@ -729,13 +702,16 @@ bool BCECmpChain::simplify(const TargetLibraryInfo &TLI, AliasAnalysis &AA,
// Delete merged blocks. This also removes incoming values in phi.
SmallVector<BasicBlock *, 16> DeadBlocks;
- for (auto &Cmp : Comparisons_) {
- LLVM_DEBUG(dbgs() << "Deleting merged block " << Cmp.BB->getName() << "\n");
- DeadBlocks.push_back(Cmp.BB);
+ for (const auto &Blocks : MergedBlocks_) {
+ for (const BCECmpBlock &Block : Blocks) {
+ LLVM_DEBUG(dbgs() << "Deleting merged block " << Block.BB->getName()
+ << "\n");
+ DeadBlocks.push_back(Block.BB);
+ }
}
DeleteDeadBlocks(DeadBlocks, &DTU);
- Comparisons_.clear();
+ MergedBlocks_.clear();
return true;
}
@@ -835,8 +811,8 @@ bool processPhi(PHINode &Phi, const TargetLibraryInfo &TLI, AliasAnalysis &AA,
if (Blocks.empty()) return false;
BCECmpChain CmpChain(Blocks, Phi, AA);
- if (CmpChain.size() < 2) {
- LLVM_DEBUG(dbgs() << "skip: only one compare block\n");
+ if (!CmpChain.atLeastOneMerged()) {
+ LLVM_DEBUG(dbgs() << "skip: nothing merged\n");
return false;
}
@@ -862,9 +838,9 @@ static bool runImpl(Function &F, const TargetLibraryInfo &TLI,
bool MadeChange = false;
- for (auto BBIt = ++F.begin(); BBIt != F.end(); ++BBIt) {
+ for (BasicBlock &BB : llvm::drop_begin(F)) {
// A Phi operation is always first in a basic block.
- if (auto *const Phi = dyn_cast<PHINode>(&*BBIt->begin()))
+ if (auto *const Phi = dyn_cast<PHINode>(&*BB.begin()))
MadeChange |= processPhi(*Phi, TLI, AA, DTU);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
index 033fc168a67f..734532a6670c 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
@@ -420,3 +420,12 @@ MergedLoadStoreMotionPass::run(Function &F, FunctionAnalysisManager &AM) {
PA.preserveSet<CFGAnalyses>();
return PA;
}
+
+void MergedLoadStoreMotionPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<MergedLoadStoreMotionPass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+ OS << "<";
+ OS << (Options.SplitFooterBB ? "" : "no-") << "split-footer-bb";
+ OS << ">";
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/NaryReassociate.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
index ded5caf53b5a..6dca30d9876e 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
@@ -282,8 +282,12 @@ NaryReassociatePass::matchAndReassociateMinOrMax(Instruction *I,
m_Value(LHS), m_Value(RHS));
if (match(I, MinMaxMatcher)) {
OrigSCEV = SE->getSCEV(I);
- return dyn_cast_or_null<Instruction>(
- tryReassociateMinOrMax(I, MinMaxMatcher, LHS, RHS));
+ if (auto *NewMinMax = dyn_cast_or_null<Instruction>(
+ tryReassociateMinOrMax(I, MinMaxMatcher, LHS, RHS)))
+ return NewMinMax;
+ if (auto *NewMinMax = dyn_cast_or_null<Instruction>(
+ tryReassociateMinOrMax(I, MinMaxMatcher, RHS, LHS)))
+ return NewMinMax;
}
return nullptr;
}
@@ -596,58 +600,60 @@ Value *NaryReassociatePass::tryReassociateMinOrMax(Instruction *I,
Value *LHS, Value *RHS) {
Value *A = nullptr, *B = nullptr;
MaxMinT m_MaxMin(m_Value(A), m_Value(B));
- for (unsigned int i = 0; i < 2; ++i) {
- if (!LHS->hasNUsesOrMore(3) && match(LHS, m_MaxMin)) {
- const SCEV *AExpr = SE->getSCEV(A), *BExpr = SE->getSCEV(B);
- const SCEV *RHSExpr = SE->getSCEV(RHS);
- for (unsigned int j = 0; j < 2; ++j) {
- if (j == 0) {
- if (BExpr == RHSExpr)
- continue;
- // Transform 'I = (A op B) op RHS' to 'I = (A op RHS) op B' on the
- // first iteration.
- std::swap(BExpr, RHSExpr);
- } else {
- if (AExpr == RHSExpr)
- continue;
- // Transform 'I = (A op RHS) op B' 'I = (B op RHS) op A' on the second
- // iteration.
- std::swap(AExpr, RHSExpr);
- }
-
- // The optimization is profitable only if LHS can be removed in the end.
- // In other words LHS should be used (directly or indirectly) by I only.
- if (llvm::any_of(LHS->users(), [&](auto *U) {
- return U != I && !(U->hasOneUser() && *U->users().begin() == I);
- }))
- continue;
-
- SCEVExpander Expander(*SE, *DL, "nary-reassociate");
- SmallVector<const SCEV *, 2> Ops1{ BExpr, AExpr };
- const SCEVTypes SCEVType = convertToSCEVype(m_MaxMin);
- const SCEV *R1Expr = SE->getMinMaxExpr(SCEVType, Ops1);
-
- Instruction *R1MinMax = findClosestMatchingDominator(R1Expr, I);
-
- if (!R1MinMax)
- continue;
-
- LLVM_DEBUG(dbgs() << "NARY: Found common sub-expr: " << *R1MinMax
- << "\n");
-
- R1Expr = SE->getUnknown(R1MinMax);
- SmallVector<const SCEV *, 2> Ops2{ RHSExpr, R1Expr };
- const SCEV *R2Expr = SE->getMinMaxExpr(SCEVType, Ops2);
-
- Value *NewMinMax = Expander.expandCodeFor(R2Expr, I->getType(), I);
- NewMinMax->setName(Twine(I->getName()).concat(".nary"));
-
- LLVM_DEBUG(dbgs() << "NARY: Deleting: " << *I << "\n"
- << "NARY: Inserting: " << *NewMinMax << "\n");
- return NewMinMax;
- }
- }
- std::swap(LHS, RHS);
+
+ if (LHS->hasNUsesOrMore(3) ||
+ // The optimization is profitable only if LHS can be removed in the end.
+ // In other words LHS should be used (directly or indirectly) by I only.
+ llvm::any_of(LHS->users(),
+ [&](auto *U) {
+ return U != I &&
+ !(U->hasOneUser() && *U->users().begin() == I);
+ }) ||
+ !match(LHS, m_MaxMin))
+ return nullptr;
+
+ auto tryCombination = [&](Value *A, const SCEV *AExpr, Value *B,
+ const SCEV *BExpr, Value *C,
+ const SCEV *CExpr) -> Value * {
+ SmallVector<const SCEV *, 2> Ops1{BExpr, AExpr};
+ const SCEVTypes SCEVType = convertToSCEVype(m_MaxMin);
+ const SCEV *R1Expr = SE->getMinMaxExpr(SCEVType, Ops1);
+
+ Instruction *R1MinMax = findClosestMatchingDominator(R1Expr, I);
+
+ if (!R1MinMax)
+ return nullptr;
+
+ LLVM_DEBUG(dbgs() << "NARY: Found common sub-expr: " << *R1MinMax << "\n");
+
+ SmallVector<const SCEV *, 2> Ops2{SE->getUnknown(C),
+ SE->getUnknown(R1MinMax)};
+ const SCEV *R2Expr = SE->getMinMaxExpr(SCEVType, Ops2);
+
+ SCEVExpander Expander(*SE, *DL, "nary-reassociate");
+ Value *NewMinMax = Expander.expandCodeFor(R2Expr, I->getType(), I);
+ NewMinMax->setName(Twine(I->getName()).concat(".nary"));
+
+ LLVM_DEBUG(dbgs() << "NARY: Deleting: " << *I << "\n"
+ << "NARY: Inserting: " << *NewMinMax << "\n");
+ return NewMinMax;
+ };
+
+ const SCEV *AExpr = SE->getSCEV(A);
+ const SCEV *BExpr = SE->getSCEV(B);
+ const SCEV *RHSExpr = SE->getSCEV(RHS);
+
+ if (BExpr != RHSExpr) {
+ // Try (A op RHS) op B
+ if (auto *NewMinMax = tryCombination(A, AExpr, RHS, RHSExpr, B, BExpr))
+ return NewMinMax;
+ }
+
+ if (AExpr != RHSExpr) {
+ // Try (RHS op B) op A
+ if (auto *NewMinMax = tryCombination(RHS, RHSExpr, B, BExpr, A, AExpr))
+ return NewMinMax;
}
+
return nullptr;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/NewGVN.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/NewGVN.cpp
index a137d13c6ea0..91215cd19e2b 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -1194,9 +1194,10 @@ NewGVN::ExprResult NewGVN::createExpression(Instruction *I) const {
SimplifyCastInst(CI->getOpcode(), E->getOperand(0), CI->getType(), SQ);
if (auto Simplified = checkExprResults(E, I, V))
return Simplified;
- } else if (isa<GetElementPtrInst>(I)) {
- Value *V = SimplifyGEPInst(
- E->getType(), ArrayRef<Value *>(E->op_begin(), E->op_end()), SQ);
+ } else if (auto *GEPI = dyn_cast<GetElementPtrInst>(I)) {
+ Value *V = SimplifyGEPInst(GEPI->getSourceElementType(),
+ ArrayRef<Value *>(E->op_begin(), E->op_end()),
+ GEPI->isInBounds(), SQ);
if (auto Simplified = checkExprResults(E, I, V))
return Simplified;
} else if (AllConstant) {
@@ -1818,7 +1819,7 @@ NewGVN::ExprResult NewGVN::performSymbolicCmpEvaluation(Instruction *I) const {
// See if we know something about the comparison itself, like it is the target
// of an assume.
auto *CmpPI = PredInfo->getPredicateInfoFor(I);
- if (dyn_cast_or_null<PredicateAssume>(CmpPI))
+ if (isa_and_nonnull<PredicateAssume>(CmpPI))
return ExprResult::some(
createConstantExpression(ConstantInt::getTrue(CI->getType())));
@@ -3606,7 +3607,7 @@ void NewGVN::convertClassToDFSOrdered(
// Skip uses in unreachable blocks, as we're going
// to delete them.
- if (ReachableBlocks.count(IBlock) == 0)
+ if (!ReachableBlocks.contains(IBlock))
continue;
DomTreeNode *DomNode = DT->getNode(IBlock);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
index 7872c553b412..44027ccd92ca 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
@@ -82,7 +82,7 @@ static bool optimizeSQRT(CallInst *Call, Function *CalledFunc,
// Add attribute "readnone" so that backend can use a native sqrt instruction
// for this call.
- Call->addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone);
+ Call->addFnAttr(Attribute::ReadNone);
// Insert a FP compare instruction and use it as the CurrBB branch condition.
Builder.SetInsertPoint(CurrBBTerm);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reassociate.cpp
index 888edc4d69a8..b0fb8daaba8f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -140,7 +140,7 @@ XorOpnd::XorOpnd(Value *V) {
// view the operand as "V | 0"
SymbolicPart = V;
- ConstPart = APInt::getNullValue(V->getType()->getScalarSizeInBits());
+ ConstPart = APInt::getZero(V->getType()->getScalarSizeInBits());
isOr = true;
}
@@ -1279,10 +1279,10 @@ static Value *OptimizeAndOrXor(unsigned Opcode,
/// be returned.
static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd,
const APInt &ConstOpnd) {
- if (ConstOpnd.isNullValue())
+ if (ConstOpnd.isZero())
return nullptr;
- if (ConstOpnd.isAllOnesValue())
+ if (ConstOpnd.isAllOnes())
return Opnd;
Instruction *I = BinaryOperator::CreateAnd(
@@ -1304,7 +1304,7 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
// = ((x | c1) ^ c1) ^ (c1 ^ c2)
// = (x & ~c1) ^ (c1 ^ c2)
// It is useful only when c1 == c2.
- if (!Opnd1->isOrExpr() || Opnd1->getConstPart().isNullValue())
+ if (!Opnd1->isOrExpr() || Opnd1->getConstPart().isZero())
return false;
if (!Opnd1->getValue()->hasOneUse())
@@ -1361,7 +1361,7 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
APInt C3((~C1) ^ C2);
// Do not increase code size!
- if (!C3.isNullValue() && !C3.isAllOnesValue()) {
+ if (!C3.isZero() && !C3.isAllOnes()) {
int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2;
if (NewInstNum > DeadInstNum)
return false;
@@ -1377,7 +1377,7 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
APInt C3 = C1 ^ C2;
// Do not increase code size
- if (!C3.isNullValue() && !C3.isAllOnesValue()) {
+ if (!C3.isZero() && !C3.isAllOnes()) {
int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2;
if (NewInstNum > DeadInstNum)
return false;
@@ -1468,8 +1468,7 @@ Value *ReassociatePass::OptimizeXor(Instruction *I,
Value *CV;
// Step 3.1: Try simplifying "CurrOpnd ^ ConstOpnd"
- if (!ConstOpnd.isNullValue() &&
- CombineXorOpnd(I, CurrOpnd, ConstOpnd, CV)) {
+ if (!ConstOpnd.isZero() && CombineXorOpnd(I, CurrOpnd, ConstOpnd, CV)) {
Changed = true;
if (CV)
*CurrOpnd = XorOpnd(CV);
@@ -1510,7 +1509,7 @@ Value *ReassociatePass::OptimizeXor(Instruction *I,
ValueEntry VE(getRank(O.getValue()), O.getValue());
Ops.push_back(VE);
}
- if (!ConstOpnd.isNullValue()) {
+ if (!ConstOpnd.isZero()) {
Value *C = ConstantInt::get(Ty, ConstOpnd);
ValueEntry VE(getRank(C), C);
Ops.push_back(VE);
@@ -1519,7 +1518,7 @@ Value *ReassociatePass::OptimizeXor(Instruction *I,
if (Sz == 1)
return Ops.back().Op;
if (Sz == 0) {
- assert(ConstOpnd.isNullValue());
+ assert(ConstOpnd.isZero());
return ConstantInt::get(Ty, ConstOpnd);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index bc0fecc972fc..2d3490b2d29e 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -755,7 +755,7 @@ public:
}
bool operator==(const BDVState &Other) const {
- return OriginalValue == OriginalValue && BaseValue == Other.BaseValue &&
+ return OriginalValue == Other.OriginalValue && BaseValue == Other.BaseValue &&
Status == Other.Status;
}
@@ -910,7 +910,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
#ifndef NDEBUG
VerifyStates();
LLVM_DEBUG(dbgs() << "States after initialization:\n");
- for (auto Pair : States) {
+ for (const auto &Pair : States) {
LLVM_DEBUG(dbgs() << " " << Pair.second << " for " << *Pair.first << "\n");
}
#endif
@@ -1002,7 +1002,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
#ifndef NDEBUG
VerifyStates();
LLVM_DEBUG(dbgs() << "States after meet iteration:\n");
- for (auto Pair : States) {
+ for (const auto &Pair : States) {
LLVM_DEBUG(dbgs() << " " << Pair.second << " for " << *Pair.first << "\n");
}
#endif
@@ -1163,7 +1163,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
// llvm::Value of the correct type (and still remain pure).
// This will remove the need to add bitcasts.
assert(Base->stripPointerCasts() == OldBase->stripPointerCasts() &&
- "Sanity -- findBaseOrBDV should be pure!");
+ "findBaseOrBDV should be pure!");
#endif
}
Value *Base = BlockToValue[InBB];
@@ -1377,11 +1377,11 @@ static AttributeList legalizeCallAttributes(LLVMContext &Ctx,
return AL;
// Remove the readonly, readnone, and statepoint function attributes.
- AttrBuilder FnAttrs = AL.getFnAttributes();
+ AttrBuilder FnAttrs = AL.getFnAttrs();
for (auto Attr : FnAttrsToStrip)
FnAttrs.removeAttribute(Attr);
- for (Attribute A : AL.getFnAttributes()) {
+ for (Attribute A : AL.getFnAttrs()) {
if (isStatepointDirectiveAttr(A))
FnAttrs.remove(A);
}
@@ -1533,9 +1533,8 @@ static StringRef getDeoptLowering(CallBase *Call) {
// FIXME: Calls have a *really* confusing interface around attributes
// with values.
const AttributeList &CSAS = Call->getAttributes();
- if (CSAS.hasAttribute(AttributeList::FunctionIndex, DeoptLowering))
- return CSAS.getAttribute(AttributeList::FunctionIndex, DeoptLowering)
- .getValueAsString();
+ if (CSAS.hasFnAttr(DeoptLowering))
+ return CSAS.getFnAttr(DeoptLowering).getValueAsString();
Function *F = Call->getCalledFunction();
assert(F && F->hasFnAttribute(DeoptLowering));
return F->getFnAttribute(DeoptLowering).getValueAsString();
@@ -1801,7 +1800,7 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
CallInst *GCResult = Builder.CreateGCResult(Token, Call->getType(), Name);
GCResult->setAttributes(
AttributeList::get(GCResult->getContext(), AttributeList::ReturnIndex,
- Call->getAttributes().getRetAttributes()));
+ Call->getAttributes().getRetAttrs()));
// We cannot RAUW or delete CS.getInstruction() because it could be in the
// live set of some other safepoint, in which case that safepoint's
@@ -1855,7 +1854,7 @@ makeStatepointExplicit(DominatorTree &DT, CallBase *Call,
// It receives iterator to the statepoint gc relocates and emits a store to the
// assigned location (via allocaMap) for the each one of them. It adds the
// visited values into the visitedLiveValues set, which we will later use them
-// for sanity checking.
+// for validation checking.
static void
insertRelocationStores(iterator_range<Value::user_iterator> GCRelocs,
DenseMap<Value *, AllocaInst *> &AllocaMap,
@@ -2454,7 +2453,7 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
SmallVectorImpl<CallBase *> &ToUpdate,
DefiningValueMapTy &DVCache) {
#ifndef NDEBUG
- // sanity check the input
+ // Validate the input
std::set<CallBase *> Uniqued;
Uniqued.insert(ToUpdate.begin(), ToUpdate.end());
assert(Uniqued.size() == ToUpdate.size() && "no duplicates please!");
@@ -2620,9 +2619,9 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
// we just grab that.
llvm::append_range(Live, Info.StatepointToken->gc_args());
#ifndef NDEBUG
- // Do some basic sanity checks on our liveness results before performing
- // relocation. Relocation can and will turn mistakes in liveness results
- // into non-sensical code which is must harder to debug.
+ // Do some basic validation checking on our liveness results before
+ // performing relocation. Relocation can and will turn mistakes in liveness
+ // results into non-sensical code which is must harder to debug.
// TODO: It would be nice to test consistency as well
assert(DT.isReachableFromEntry(Info.StatepointToken->getParent()) &&
"statepoint must be reachable or liveness is meaningless");
@@ -2641,7 +2640,7 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
unique_unsorted(Live);
#ifndef NDEBUG
- // sanity check
+ // Validation check
for (auto *Ptr : Live)
assert(isHandledGCPointerType(Ptr->getType()) &&
"must be a gc pointer type");
@@ -2656,18 +2655,19 @@ template <typename AttrHolder>
static void RemoveNonValidAttrAtIndex(LLVMContext &Ctx, AttrHolder &AH,
unsigned Index) {
AttrBuilder R;
- if (AH.getDereferenceableBytes(Index))
+ AttributeSet AS = AH.getAttributes().getAttributes(Index);
+ if (AS.getDereferenceableBytes())
R.addAttribute(Attribute::get(Ctx, Attribute::Dereferenceable,
- AH.getDereferenceableBytes(Index)));
- if (AH.getDereferenceableOrNullBytes(Index))
+ AS.getDereferenceableBytes()));
+ if (AS.getDereferenceableOrNullBytes())
R.addAttribute(Attribute::get(Ctx, Attribute::DereferenceableOrNull,
- AH.getDereferenceableOrNullBytes(Index)));
+ AS.getDereferenceableOrNullBytes()));
for (auto Attr : ParamAttrsToStrip)
- if (AH.getAttributes().hasAttribute(Index, Attr))
+ if (AS.hasAttribute(Attr))
R.addAttribute(Attr);
if (!R.empty())
- AH.setAttributes(AH.getAttributes().removeAttributes(Ctx, Index, R));
+ AH.setAttributes(AH.getAttributes().removeAttributesAtIndex(Ctx, Index, R));
}
static void stripNonValidAttributesFromPrototype(Function &F) {
@@ -3016,7 +3016,7 @@ static SetVector<Value *> computeKillSet(BasicBlock *BB) {
#ifndef NDEBUG
/// Check that the items in 'Live' dominate 'TI'. This is used as a basic
-/// sanity check for the liveness computation.
+/// validation check for the liveness computation.
static void checkBasicSSA(DominatorTree &DT, SetVector<Value *> &Live,
Instruction *TI, bool TermOkay = false) {
for (Value *V : Live) {
@@ -3103,7 +3103,7 @@ static void computeLiveInValues(DominatorTree &DT, Function &F,
} // while (!Worklist.empty())
#ifndef NDEBUG
- // Sanity check our output against SSA properties. This helps catch any
+ // Verify our output against SSA properties. This helps catch any
// missing kills during the above iteration.
for (BasicBlock &BB : F)
checkBasicSSA(DT, Data, BB);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SCCP.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SCCP.cpp
index b09f896d0157..28e00c873361 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SCCP.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -490,17 +490,17 @@ bool llvm::runIPSCCP(
AttrBuilder AttributesToRemove;
AttributesToRemove.addAttribute(Attribute::ArgMemOnly);
AttributesToRemove.addAttribute(Attribute::InaccessibleMemOrArgMemOnly);
- F.removeAttributes(AttributeList::FunctionIndex, AttributesToRemove);
+ F.removeFnAttrs(AttributesToRemove);
for (User *U : F.users()) {
auto *CB = dyn_cast<CallBase>(U);
if (!CB || CB->getCalledFunction() != &F)
continue;
- CB->removeAttributes(AttributeList::FunctionIndex,
- AttributesToRemove);
+ CB->removeFnAttrs(AttributesToRemove);
}
}
+ MadeChanges |= ReplacedPointerArg;
}
SmallPtrSet<Value *, 32> InsertedValues;
@@ -540,14 +540,13 @@ bool llvm::runIPSCCP(
DTU.deleteBB(DeadBB);
for (BasicBlock &BB : F) {
- for (BasicBlock::iterator BI = BB.begin(), E = BB.end(); BI != E;) {
- Instruction *Inst = &*BI++;
- if (Solver.getPredicateInfoFor(Inst)) {
- if (auto *II = dyn_cast<IntrinsicInst>(Inst)) {
+ for (Instruction &Inst : llvm::make_early_inc_range(BB)) {
+ if (Solver.getPredicateInfoFor(&Inst)) {
+ if (auto *II = dyn_cast<IntrinsicInst>(&Inst)) {
if (II->getIntrinsicID() == Intrinsic::ssa_copy) {
Value *Op = II->getOperand(0);
- Inst->replaceAllUsesWith(Op);
- Inst->eraseFromParent();
+ Inst.replaceAllUsesWith(Op);
+ Inst.eraseFromParent();
}
}
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp
index fe160d5415bd..31c8999c3724 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -122,7 +122,7 @@ namespace {
class IRBuilderPrefixedInserter final : public IRBuilderDefaultInserter {
std::string Prefix;
- const Twine getNameWithPrefix(const Twine &Name) const {
+ Twine getNameWithPrefix(const Twine &Name) const {
return Name.isTriviallyEmpty() ? Name : Prefix + Name;
}
@@ -1275,8 +1275,7 @@ static void speculatePHINodeLoads(PHINode &PN) {
// Get the AA tags and alignment to use from one of the loads. It does not
// matter which one we get and if any differ.
- AAMDNodes AATags;
- SomeLoad->getAAMetadata(AATags);
+ AAMDNodes AATags = SomeLoad->getAAMetadata();
Align Alignment = SomeLoad->getAlign();
// Rewrite all loads of the PN to use the new PHI.
@@ -1330,14 +1329,21 @@ static void speculatePHINodeLoads(PHINode &PN) {
/// %V = select i1 %cond, i32 %V1, i32 %V2
///
/// We can do this to a select if its only uses are loads and if the operand
-/// to the select can be loaded unconditionally.
+/// to the select can be loaded unconditionally. If found an intervening bitcast
+/// with a single use of the load, allow the promotion.
static bool isSafeSelectToSpeculate(SelectInst &SI) {
Value *TValue = SI.getTrueValue();
Value *FValue = SI.getFalseValue();
const DataLayout &DL = SI.getModule()->getDataLayout();
for (User *U : SI.users()) {
- LoadInst *LI = dyn_cast<LoadInst>(U);
+ LoadInst *LI;
+ BitCastInst *BC = dyn_cast<BitCastInst>(U);
+ if (BC && BC->hasOneUse())
+ LI = dyn_cast<LoadInst>(*BC->user_begin());
+ else
+ LI = dyn_cast<LoadInst>(U);
+
if (!LI || !LI->isSimple())
return false;
@@ -1363,13 +1369,27 @@ static void speculateSelectInstLoads(SelectInst &SI) {
Value *FV = SI.getFalseValue();
// Replace the loads of the select with a select of two loads.
while (!SI.use_empty()) {
- LoadInst *LI = cast<LoadInst>(SI.user_back());
+ LoadInst *LI;
+ BitCastInst *BC = dyn_cast<BitCastInst>(SI.user_back());
+ if (BC) {
+ assert(BC->hasOneUse() && "Bitcast should have a single use.");
+ LI = cast<LoadInst>(BC->user_back());
+ } else {
+ LI = cast<LoadInst>(SI.user_back());
+ }
+
assert(LI->isSimple() && "We only speculate simple loads");
IRB.SetInsertPoint(LI);
- LoadInst *TL = IRB.CreateLoad(LI->getType(), TV,
+ Value *NewTV =
+ BC ? IRB.CreateBitCast(TV, BC->getType(), TV->getName() + ".sroa.cast")
+ : TV;
+ Value *NewFV =
+ BC ? IRB.CreateBitCast(FV, BC->getType(), FV->getName() + ".sroa.cast")
+ : FV;
+ LoadInst *TL = IRB.CreateLoad(LI->getType(), NewTV,
LI->getName() + ".sroa.speculate.load.true");
- LoadInst *FL = IRB.CreateLoad(LI->getType(), FV,
+ LoadInst *FL = IRB.CreateLoad(LI->getType(), NewFV,
LI->getName() + ".sroa.speculate.load.false");
NumLoadsSpeculated += 2;
@@ -1377,8 +1397,7 @@ static void speculateSelectInstLoads(SelectInst &SI) {
TL->setAlignment(LI->getAlign());
FL->setAlignment(LI->getAlign());
- AAMDNodes Tags;
- LI->getAAMetadata(Tags);
+ AAMDNodes Tags = LI->getAAMetadata();
if (Tags) {
TL->setAAMetadata(Tags);
FL->setAAMetadata(Tags);
@@ -1390,6 +1409,8 @@ static void speculateSelectInstLoads(SelectInst &SI) {
LLVM_DEBUG(dbgs() << " speculated to: " << *V << "\n");
LI->replaceAllUsesWith(V);
LI->eraseFromParent();
+ if (BC)
+ BC->eraseFromParent();
}
SI.eraseFromParent();
}
@@ -1462,76 +1483,6 @@ static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &DL,
return buildGEP(IRB, BasePtr, Indices, NamePrefix);
}
-/// Recursively compute indices for a natural GEP.
-///
-/// This is the recursive step for getNaturalGEPWithOffset that walks down the
-/// element types adding appropriate indices for the GEP.
-static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL,
- Value *Ptr, Type *Ty, APInt &Offset,
- Type *TargetTy,
- SmallVectorImpl<Value *> &Indices,
- const Twine &NamePrefix) {
- if (Offset == 0)
- return getNaturalGEPWithType(IRB, DL, Ptr, Ty, TargetTy, Indices,
- NamePrefix);
-
- // We can't recurse through pointer types.
- if (Ty->isPointerTy())
- return nullptr;
-
- // We try to analyze GEPs over vectors here, but note that these GEPs are
- // extremely poorly defined currently. The long-term goal is to remove GEPing
- // over a vector from the IR completely.
- if (VectorType *VecTy = dyn_cast<VectorType>(Ty)) {
- unsigned ElementSizeInBits =
- DL.getTypeSizeInBits(VecTy->getScalarType()).getFixedSize();
- if (ElementSizeInBits % 8 != 0) {
- // GEPs over non-multiple of 8 size vector elements are invalid.
- return nullptr;
- }
- APInt ElementSize(Offset.getBitWidth(), ElementSizeInBits / 8);
- APInt NumSkippedElements = Offset.sdiv(ElementSize);
- if (NumSkippedElements.ugt(cast<FixedVectorType>(VecTy)->getNumElements()))
- return nullptr;
- Offset -= NumSkippedElements * ElementSize;
- Indices.push_back(IRB.getInt(NumSkippedElements));
- return getNaturalGEPRecursively(IRB, DL, Ptr, VecTy->getElementType(),
- Offset, TargetTy, Indices, NamePrefix);
- }
-
- if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
- Type *ElementTy = ArrTy->getElementType();
- APInt ElementSize(Offset.getBitWidth(),
- DL.getTypeAllocSize(ElementTy).getFixedSize());
- APInt NumSkippedElements = Offset.sdiv(ElementSize);
- if (NumSkippedElements.ugt(ArrTy->getNumElements()))
- return nullptr;
-
- Offset -= NumSkippedElements * ElementSize;
- Indices.push_back(IRB.getInt(NumSkippedElements));
- return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy,
- Indices, NamePrefix);
- }
-
- StructType *STy = dyn_cast<StructType>(Ty);
- if (!STy)
- return nullptr;
-
- const StructLayout *SL = DL.getStructLayout(STy);
- uint64_t StructOffset = Offset.getZExtValue();
- if (StructOffset >= SL->getSizeInBytes())
- return nullptr;
- unsigned Index = SL->getElementContainingOffset(StructOffset);
- Offset -= APInt(Offset.getBitWidth(), SL->getElementOffset(Index));
- Type *ElementTy = STy->getElementType(Index);
- if (Offset.uge(DL.getTypeAllocSize(ElementTy).getFixedSize()))
- return nullptr; // The offset points into alignment padding.
-
- Indices.push_back(IRB.getInt32(Index));
- return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy,
- Indices, NamePrefix);
-}
-
/// Get a natural GEP from a base pointer to a particular offset and
/// resulting in a particular type.
///
@@ -1556,18 +1507,15 @@ static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL,
Type *ElementTy = Ty->getElementType();
if (!ElementTy->isSized())
return nullptr; // We can't GEP through an unsized element.
- if (isa<ScalableVectorType>(ElementTy))
+
+ SmallVector<APInt> IntIndices = DL.getGEPIndicesForOffset(ElementTy, Offset);
+ if (Offset != 0)
return nullptr;
- APInt ElementSize(Offset.getBitWidth(),
- DL.getTypeAllocSize(ElementTy).getFixedSize());
- if (ElementSize == 0)
- return nullptr; // Zero-length arrays can't help us build a natural GEP.
- APInt NumSkippedElements = Offset.sdiv(ElementSize);
-
- Offset -= NumSkippedElements * ElementSize;
- Indices.push_back(IRB.getInt(NumSkippedElements));
- return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy,
- Indices, NamePrefix);
+
+ for (const APInt &Index : IntIndices)
+ Indices.push_back(IRB.getInt(Index));
+ return getNaturalGEPWithType(IRB, DL, Ptr, ElementTy, TargetTy, Indices,
+ NamePrefix);
}
/// Compute an adjusted pointer from Ptr by Offset bytes where the
@@ -1588,6 +1536,15 @@ static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL,
static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
APInt Offset, Type *PointerTy,
const Twine &NamePrefix) {
+ // Create i8 GEP for opaque pointers.
+ if (Ptr->getType()->isOpaquePointerTy()) {
+ if (Offset != 0)
+ Ptr = IRB.CreateInBoundsGEP(IRB.getInt8Ty(), Ptr, IRB.getInt(Offset),
+ NamePrefix + "sroa_idx");
+ return IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr, PointerTy,
+ NamePrefix + "sroa_cast");
+ }
+
// Even though we don't look through PHI nodes, we could be called on an
// instruction in an unreachable block, which may be on a cycle.
SmallPtrSet<Value *, 4> Visited;
@@ -1851,13 +1808,13 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
} else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
if (!II->isLifetimeStartOrEnd() && !II->isDroppable())
return false;
- } else if (U->get()->getType()->getPointerElementType()->isStructTy()) {
- // Disable vector promotion when there are loads or stores of an FCA.
- return false;
} else if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
if (LI->isVolatile())
return false;
Type *LTy = LI->getType();
+ // Disable vector promotion when there are loads or stores of an FCA.
+ if (LTy->isStructTy())
+ return false;
if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {
assert(LTy->isIntegerTy());
LTy = SplitIntTy;
@@ -1868,6 +1825,9 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
if (SI->isVolatile())
return false;
Type *STy = SI->getValueOperand()->getType();
+ // Disable vector promotion when there are loads or stores of an FCA.
+ if (STy->isStructTy())
+ return false;
if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {
assert(STy->isIntegerTy());
STy = SplitIntTy;
@@ -2282,7 +2242,7 @@ class llvm::sroa::AllocaSliceRewriter
const DataLayout &DL;
AllocaSlices &AS;
- SROA &Pass;
+ SROAPass &Pass;
AllocaInst &OldAI, &NewAI;
const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
Type *NewAllocaTy;
@@ -2330,7 +2290,7 @@ class llvm::sroa::AllocaSliceRewriter
IRBuilderTy IRB;
public:
- AllocaSliceRewriter(const DataLayout &DL, AllocaSlices &AS, SROA &Pass,
+ AllocaSliceRewriter(const DataLayout &DL, AllocaSlices &AS, SROAPass &Pass,
AllocaInst &OldAI, AllocaInst &NewAI,
uint64_t NewAllocaBeginOffset,
uint64_t NewAllocaEndOffset, bool IsIntegerPromotable,
@@ -2510,8 +2470,7 @@ private:
Value *OldOp = LI.getOperand(0);
assert(OldOp == OldPtr);
- AAMDNodes AATags;
- LI.getAAMetadata(AATags);
+ AAMDNodes AATags = LI.getAAMetadata();
unsigned AS = LI.getPointerAddressSpace();
@@ -2675,9 +2634,7 @@ private:
Value *OldOp = SI.getOperand(1);
assert(OldOp == OldPtr);
- AAMDNodes AATags;
- SI.getAAMetadata(AATags);
-
+ AAMDNodes AATags = SI.getAAMetadata();
Value *V = SI.getValueOperand();
// Strip all inbounds GEPs and pointer casts to try to dig out any root
@@ -2743,7 +2700,9 @@ private:
deleteIfTriviallyDead(OldOp);
LLVM_DEBUG(dbgs() << " to: " << *NewSI << "\n");
- return NewSI->getPointerOperand() == &NewAI && !SI.isVolatile();
+ return NewSI->getPointerOperand() == &NewAI &&
+ NewSI->getValueOperand()->getType() == NewAllocaTy &&
+ !SI.isVolatile();
}
/// Compute an integer value from splatting an i8 across the given
@@ -2784,8 +2743,7 @@ private:
LLVM_DEBUG(dbgs() << " original: " << II << "\n");
assert(II.getRawDest() == OldPtr);
- AAMDNodes AATags;
- II.getAAMetadata(AATags);
+ AAMDNodes AATags = II.getAAMetadata();
// If the memset has a variable size, it cannot be split, just adjust the
// pointer to the new alloca.
@@ -2913,8 +2871,7 @@ private:
LLVM_DEBUG(dbgs() << " original: " << II << "\n");
- AAMDNodes AATags;
- II.getAAMetadata(AATags);
+ AAMDNodes AATags = II.getAAMetadata();
bool IsDest = &II.getRawDestUse() == OldUse;
assert((IsDest && II.getRawDest() == OldPtr) ||
@@ -3421,9 +3378,7 @@ private:
// We have an aggregate being loaded, split it apart.
LLVM_DEBUG(dbgs() << " original: " << LI << "\n");
- AAMDNodes AATags;
- LI.getAAMetadata(AATags);
- LoadOpSplitter Splitter(&LI, *U, LI.getType(), AATags,
+ LoadOpSplitter Splitter(&LI, *U, LI.getType(), LI.getAAMetadata(),
getAdjustedAlignment(&LI, 0), DL);
Value *V = UndefValue::get(LI.getType());
Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca");
@@ -3474,9 +3429,7 @@ private:
// We have an aggregate being stored, split it apart.
LLVM_DEBUG(dbgs() << " original: " << SI << "\n");
- AAMDNodes AATags;
- SI.getAAMetadata(AATags);
- StoreOpSplitter Splitter(&SI, *U, V->getType(), AATags,
+ StoreOpSplitter Splitter(&SI, *U, V->getType(), SI.getAAMetadata(),
getAdjustedAlignment(&SI, 0), DL);
Splitter.emitSplitOps(V->getType(), V, V->getName() + ".fca");
Visited.erase(&SI);
@@ -3802,7 +3755,7 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset,
/// there all along.
///
/// \returns true if any changes are made.
-bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
+bool SROAPass::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
LLVM_DEBUG(dbgs() << "Pre-splitting loads and stores\n");
// Track the loads and stores which are candidates for pre-splitting here, in
@@ -4282,8 +4235,8 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
/// appropriate new offsets. It also evaluates how successful the rewrite was
/// at enabling promotion and if it was successful queues the alloca to be
/// promoted.
-AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
- Partition &P) {
+AllocaInst *SROAPass::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
+ Partition &P) {
// Try to compute a friendly type for this partition of the alloca. This
// won't always succeed, in which case we fall back to a legal integer type
// or an i8 array of an appropriate size.
@@ -4434,7 +4387,7 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
/// Walks the slices of an alloca and form partitions based on them,
/// rewriting each of their uses.
-bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
+bool SROAPass::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
if (AS.begin() == AS.end())
return false;
@@ -4605,7 +4558,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
}
/// Clobber a use with undef, deleting the used value if it becomes dead.
-void SROA::clobberUse(Use &U) {
+void SROAPass::clobberUse(Use &U) {
Value *OldV = U;
// Replace the use with an undef value.
U = UndefValue::get(OldV->getType());
@@ -4624,7 +4577,7 @@ void SROA::clobberUse(Use &U) {
/// This analyzes the alloca to ensure we can reason about it, builds
/// the slices of the alloca, and then hands it off to be split and
/// rewritten as needed.
-bool SROA::runOnAlloca(AllocaInst &AI) {
+bool SROAPass::runOnAlloca(AllocaInst &AI) {
LLVM_DEBUG(dbgs() << "SROA alloca: " << AI << "\n");
++NumAllocasAnalyzed;
@@ -4698,7 +4651,7 @@ bool SROA::runOnAlloca(AllocaInst &AI) {
///
/// We also record the alloca instructions deleted here so that they aren't
/// subsequently handed to mem2reg to promote.
-bool SROA::deleteDeadInstructions(
+bool SROAPass::deleteDeadInstructions(
SmallPtrSetImpl<AllocaInst *> &DeletedAllocas) {
bool Changed = false;
while (!DeadInsts.empty()) {
@@ -4737,7 +4690,7 @@ bool SROA::deleteDeadInstructions(
/// This attempts to promote whatever allocas have been identified as viable in
/// the PromotableAllocas list. If that list is empty, there is nothing to do.
/// This function returns whether any promotion occurred.
-bool SROA::promoteAllocas(Function &F) {
+bool SROAPass::promoteAllocas(Function &F) {
if (PromotableAllocas.empty())
return false;
@@ -4749,8 +4702,8 @@ bool SROA::promoteAllocas(Function &F) {
return true;
}
-PreservedAnalyses SROA::runImpl(Function &F, DominatorTree &RunDT,
- AssumptionCache &RunAC) {
+PreservedAnalyses SROAPass::runImpl(Function &F, DominatorTree &RunDT,
+ AssumptionCache &RunAC) {
LLVM_DEBUG(dbgs() << "SROA function: " << F.getName() << "\n");
C = &F.getContext();
DT = &RunDT;
@@ -4804,7 +4757,7 @@ PreservedAnalyses SROA::runImpl(Function &F, DominatorTree &RunDT,
return PA;
}
-PreservedAnalyses SROA::run(Function &F, FunctionAnalysisManager &AM) {
+PreservedAnalyses SROAPass::run(Function &F, FunctionAnalysisManager &AM) {
return runImpl(F, AM.getResult<DominatorTreeAnalysis>(F),
AM.getResult<AssumptionAnalysis>(F));
}
@@ -4815,7 +4768,7 @@ PreservedAnalyses SROA::run(Function &F, FunctionAnalysisManager &AM) {
/// SROA pass.
class llvm::sroa::SROALegacyPass : public FunctionPass {
/// The SROA implementation.
- SROA Impl;
+ SROAPass Impl;
public:
static char ID;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
index ca288a533f46..1284bae820a4 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
@@ -873,13 +873,11 @@ static bool runImpl(Function &F, const TargetTransformInfo &TTI,
auto &DL = F.getParent()->getDataLayout();
while (MadeChange) {
MadeChange = false;
- for (Function::iterator I = F.begin(); I != F.end();) {
- BasicBlock *BB = &*I++;
+ for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
bool ModifiedDTOnIteration = false;
- MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration, TTI, DL,
+ MadeChange |= optimizeBlock(BB, ModifiedDTOnIteration, TTI, DL,
DTU.hasValue() ? DTU.getPointer() : nullptr);
-
// Restart BB iteration if the dominator tree of the Function was changed
if (ModifiedDTOnIteration)
break;
@@ -933,7 +931,7 @@ static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
if (II) {
// The scalarization code below does not work for scalable vectors.
if (isa<ScalableVectorType>(II->getType()) ||
- any_of(II->arg_operands(),
+ any_of(II->args(),
[](Value *V) { return isa<ScalableVectorType>(V->getType()); }))
return false;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 8ef6b69673be..6b7419abe1d1 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -66,6 +66,15 @@ static cl::opt<bool>
namespace {
+BasicBlock::iterator skipPastPhiNodesAndDbg(BasicBlock::iterator Itr) {
+ BasicBlock *BB = Itr->getParent();
+ if (isa<PHINode>(Itr))
+ Itr = BB->getFirstInsertionPt();
+ if (Itr != BB->end())
+ Itr = skipDebugIntrinsics(Itr);
+ return Itr;
+}
+
// Used to store the scattered form of a vector.
using ValueVector = SmallVector<Value *, 8>;
@@ -371,10 +380,11 @@ Scatterer ScalarizerVisitor::scatter(Instruction *Point, Value *V) {
return Scatterer(Point->getParent(), Point->getIterator(),
UndefValue::get(V->getType()));
// Put the scattered form of an instruction directly after the
- // instruction.
+ // instruction, skipping over PHI nodes and debug intrinsics.
BasicBlock *BB = VOp->getParent();
- return Scatterer(BB, std::next(BasicBlock::iterator(VOp)),
- V, &Scattered[V]);
+ return Scatterer(
+ BB, skipPastPhiNodesAndDbg(std::next(BasicBlock::iterator(VOp))), V,
+ &Scattered[V]);
}
// In the fallback case, just put the scattered before Point and
// keep the result local to Point.
@@ -530,7 +540,7 @@ bool ScalarizerVisitor::splitCall(CallInst &CI) {
return false;
unsigned NumElems = cast<FixedVectorType>(VT)->getNumElements();
- unsigned NumArgs = CI.getNumArgOperands();
+ unsigned NumArgs = CI.arg_size();
ValueVector ScalarOperands(NumArgs);
SmallVector<Scatterer, 8> Scattered(NumArgs);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index f216956406b6..ffa2f9adb978 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -1164,8 +1164,11 @@ bool SeparateConstOffsetFromGEP::run(Function &F) {
DL = &F.getParent()->getDataLayout();
bool Changed = false;
for (BasicBlock &B : F) {
- for (BasicBlock::iterator I = B.begin(), IE = B.end(); I != IE;)
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I++))
+ if (!DT->isReachableFromEntry(&B))
+ continue;
+
+ for (Instruction &I : llvm::make_early_inc_range(B))
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I))
Changed |= splitGEP(GEP);
// No need to split GEP ConstantExprs because all its indices are constant
// already.
@@ -1258,10 +1261,8 @@ bool SeparateConstOffsetFromGEP::reuniteExts(Function &F) {
DominatingSubs.clear();
for (const auto Node : depth_first(DT)) {
BasicBlock *BB = Node->getBlock();
- for (auto I = BB->begin(); I != BB->end(); ) {
- Instruction *Cur = &*I++;
- Changed |= reuniteExts(Cur);
- }
+ for (Instruction &I : llvm::make_early_inc_range(*BB))
+ Changed |= reuniteExts(&I);
}
return Changed;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index b1c105258027..a27da047bfd3 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -28,6 +28,7 @@
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/MustExecute.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
@@ -49,7 +50,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GenericDomTree.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -81,6 +81,7 @@ static cl::opt<bool> EnableNonTrivialUnswitch(
static cl::opt<int>
UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden,
+ cl::ZeroOrMore,
cl::desc("The cost threshold for unswitching a loop."));
static cl::opt<bool> EnableUnswitchCostMultiplier(
@@ -108,6 +109,10 @@ static cl::opt<unsigned>
cl::desc("Max number of memory uses to explore during "
"partial unswitching analysis"),
cl::init(100), cl::Hidden);
+static cl::opt<bool> FreezeLoopUnswitchCond(
+ "freeze-loop-unswitch-cond", cl::init(false), cl::Hidden,
+ cl::desc("If enabled, the freeze instruction will be added to condition "
+ "of loop unswitch to prevent miscompilation."));
/// Collect all of the loop invariant input values transitively used by the
/// homogeneous instruction graph from a given root.
@@ -195,15 +200,15 @@ static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB,
/// Copy a set of loop invariant values \p ToDuplicate and insert them at the
/// end of \p BB and conditionally branch on the copied condition. We only
/// branch on a single value.
-static void buildPartialUnswitchConditionalBranch(BasicBlock &BB,
- ArrayRef<Value *> Invariants,
- bool Direction,
- BasicBlock &UnswitchedSucc,
- BasicBlock &NormalSucc) {
+static void buildPartialUnswitchConditionalBranch(
+ BasicBlock &BB, ArrayRef<Value *> Invariants, bool Direction,
+ BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze) {
IRBuilder<> IRB(&BB);
Value *Cond = Direction ? IRB.CreateOr(Invariants) :
IRB.CreateAnd(Invariants);
+ if (InsertFreeze)
+ Cond = IRB.CreateFreeze(Cond, Cond->getName() + ".fr");
IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
Direction ? &NormalSucc : &UnswitchedSucc);
}
@@ -564,7 +569,7 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
"Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"
" condition!");
buildPartialUnswitchConditionalBranch(*OldPH, Invariants, ExitDirection,
- *UnswitchedBB, *NewPH);
+ *UnswitchedBB, *NewPH, false);
}
// Update the dominator tree with the added edge.
@@ -2123,6 +2128,13 @@ static void unswitchNontrivialInvariants(
SE->forgetTopmostLoop(&L);
}
+ bool InsertFreeze = false;
+ if (FreezeLoopUnswitchCond) {
+ ICFLoopSafetyInfo SafetyInfo;
+ SafetyInfo.computeLoopSafetyInfo(&L);
+ InsertFreeze = !SafetyInfo.isGuaranteedToExecute(TI, &DT, &L);
+ }
+
// If the edge from this terminator to a successor dominates that successor,
// store a map from each block in its dominator subtree to it. This lets us
// tell when cloning for a particular successor if a block is dominated by
@@ -2197,6 +2209,11 @@ static void unswitchNontrivialInvariants(
BasicBlock *ClonedPH = ClonedPHs.begin()->second;
BI->setSuccessor(ClonedSucc, ClonedPH);
BI->setSuccessor(1 - ClonedSucc, LoopPH);
+ if (InsertFreeze) {
+ auto Cond = BI->getCondition();
+ if (!isGuaranteedNotToBeUndefOrPoison(Cond, &AC, BI, &DT))
+ BI->setCondition(new FreezeInst(Cond, Cond->getName() + ".fr", BI));
+ }
DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
} else {
assert(SI && "Must either be a branch or switch!");
@@ -2211,6 +2228,11 @@ static void unswitchNontrivialInvariants(
else
Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second);
+ if (InsertFreeze) {
+ auto Cond = SI->getCondition();
+ if (!isGuaranteedNotToBeUndefOrPoison(Cond, &AC, SI, &DT))
+ SI->setCondition(new FreezeInst(Cond, Cond->getName() + ".fr", SI));
+ }
// We need to use the set to populate domtree updates as even when there
// are multiple cases pointing at the same successor we only want to
// remove and insert one edge in the domtree.
@@ -2291,7 +2313,7 @@ static void unswitchNontrivialInvariants(
*SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU);
else
buildPartialUnswitchConditionalBranch(*SplitBB, Invariants, Direction,
- *ClonedPH, *LoopPH);
+ *ClonedPH, *LoopPH, InsertFreeze);
DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
if (MSSAU) {
@@ -2370,7 +2392,9 @@ static void unswitchNontrivialInvariants(
ConstantInt *ContinueReplacement =
Direction ? ConstantInt::getFalse(BI->getContext())
: ConstantInt::getTrue(BI->getContext());
- for (Value *Invariant : Invariants)
+ for (Value *Invariant : Invariants) {
+ assert(!isa<Constant>(Invariant) &&
+ "Should not be replacing constant values!");
// Use make_early_inc_range here as set invalidates the iterator.
for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
Instruction *UserI = dyn_cast<Instruction>(U.getUser());
@@ -2385,6 +2409,7 @@ static void unswitchNontrivialInvariants(
DT.dominates(ClonedPH, UserI->getParent()))
U.set(UnswitchedReplacement);
}
+ }
}
// We can change which blocks are exit blocks of all the cloned sibling
@@ -2727,6 +2752,9 @@ static bool unswitchBestCondition(
Cond = CondNext;
BI->setCondition(Cond);
+ if (isa<Constant>(Cond))
+ continue;
+
if (L.isLoopInvariant(BI->getCondition())) {
UnswitchCandidates.push_back({BI, {BI->getCondition()}});
continue;
@@ -3121,6 +3149,17 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
return PA;
}
+void SimpleLoopUnswitchPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<SimpleLoopUnswitchPass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+
+ OS << "<";
+ OS << (NonTrivial ? "" : "no-") << "nontrivial;";
+ OS << (Trivial ? "" : "no-") << "trivial";
+ OS << ">";
+}
+
namespace {
class SimpleLoopUnswitchLegacyPass : public LoopPass {
@@ -3140,10 +3179,8 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetTransformInfoWrapperPass>();
- if (EnableMSSALoopDependency) {
- AU.addRequired<MemorySSAWrapperPass>();
- AU.addPreserved<MemorySSAWrapperPass>();
- }
+ AU.addRequired<MemorySSAWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
getLoopAnalysisUsage(AU);
}
};
@@ -3164,12 +3201,8 @@ bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- MemorySSA *MSSA = nullptr;
- Optional<MemorySSAUpdater> MSSAU;
- if (EnableMSSALoopDependency) {
- MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
- MSSAU = MemorySSAUpdater(MSSA);
- }
+ MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
+ MemorySSAUpdater MSSAU(MSSA);
auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
auto *SE = SEWP ? &SEWP->getSE() : nullptr;
@@ -3197,15 +3230,13 @@ bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
LPM.markLoopAsDeleted(L);
};
- if (MSSA && VerifyMemorySSA)
+ if (VerifyMemorySSA)
MSSA->verifyMemorySSA();
- bool Changed =
- unswitchLoop(*L, DT, LI, AC, AA, TTI, true, NonTrivial, UnswitchCB, SE,
- MSSAU.hasValue() ? MSSAU.getPointer() : nullptr,
- DestroyLoopCB);
+ bool Changed = unswitchLoop(*L, DT, LI, AC, AA, TTI, true, NonTrivial,
+ UnswitchCB, SE, &MSSAU, DestroyLoopCB);
- if (MSSA && VerifyMemorySSA)
+ if (VerifyMemorySSA)
MSSA->verifyMemorySSA();
// Historically this pass has had issues with the dominator tree so verify it
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 09d59b0e884a..86d3620c312e 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -224,7 +224,11 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
SmallVector<WeakVH, 16> LoopHeaders(UniqueLoopHeaders.begin(),
UniqueLoopHeaders.end());
+ unsigned IterCnt = 0;
+ (void)IterCnt;
while (LocalChange) {
+ assert(IterCnt++ < 1000 &&
+ "Sanity: iterative simplification didn't converge!");
LocalChange = false;
// Loop over all of the basic blocks and remove them if they are unneeded.
@@ -319,6 +323,21 @@ SimplifyCFGPass::SimplifyCFGPass(const SimplifyCFGOptions &Opts)
applyCommandLineOverridesToOptions(Options);
}
+void SimplifyCFGPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<SimplifyCFGPass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+ OS << "<";
+ OS << "bonus-inst-threshold=" << Options.BonusInstThreshold << ";";
+ OS << (Options.ForwardSwitchCondToPhi ? "" : "no-") << "forward-switch-cond;";
+ OS << (Options.ConvertSwitchToLookupTable ? "" : "no-")
+ << "switch-to-lookup;";
+ OS << (Options.NeedCanonicalLoop ? "" : "no-") << "keep-loops;";
+ OS << (Options.HoistCommonInsts ? "" : "no-") << "hoist-common-insts;";
+ OS << (Options.SinkCommonInsts ? "" : "no-") << "sink-common-insts";
+ OS << ">";
+}
+
PreservedAnalyses SimplifyCFGPass::run(Function &F,
FunctionAnalysisManager &AM) {
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
index dfa30418ea01..06169a7834f6 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
@@ -268,7 +268,7 @@ bool SpeculativeExecutionPass::considerHoistingFromTo(
if (const auto *DVI = dyn_cast<DbgVariableIntrinsic>(U)) {
return all_of(DVI->location_ops(), [&NotHoisted](Value *V) {
if (const auto *I = dyn_cast_or_null<Instruction>(V)) {
- if (NotHoisted.count(I) == 0)
+ if (!NotHoisted.contains(I))
return true;
}
return false;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
index 20b8b982e14b..b47378808216 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
@@ -607,7 +607,7 @@ Value *StraightLineStrengthReduce::emitBump(const Candidate &Basis,
if (IndexOffset == 1)
return C.Stride;
// Common case 2: if (i' - i) is -1, Bump = -S.
- if (IndexOffset.isAllOnesValue())
+ if (IndexOffset.isAllOnes())
return Builder.CreateNeg(C.Stride);
// Otherwise, Bump = (i' - i) * sext/trunc(S). Note that (i' - i) and S may
@@ -620,7 +620,7 @@ Value *StraightLineStrengthReduce::emitBump(const Candidate &Basis,
ConstantInt *Exponent = ConstantInt::get(DeltaType, IndexOffset.logBase2());
return Builder.CreateShl(ExtendedStride, Exponent);
}
- if ((-IndexOffset).isPowerOf2()) {
+ if (IndexOffset.isNegatedPowerOf2()) {
// If (i - i') is a power of 2, Bump = -sext/trunc(S) << log(i' - i).
ConstantInt *Exponent =
ConstantInt::get(DeltaType, (-IndexOffset).logBase2());
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 846a9321f53e..3bcf92e28a21 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -262,7 +262,7 @@ static bool markTails(Function &F, OptimizationRemarkEmitter *ORE) {
// Note that this runs whether we know an alloca has escaped or not. If
// it has, then we can't trust Tracker.AllocaUsers to be accurate.
bool SafeToTail = true;
- for (auto &Arg : CI->arg_operands()) {
+ for (auto &Arg : CI->args()) {
if (isa<Constant>(Arg.getUser()))
continue;
if (Argument *A = dyn_cast<Argument>(Arg.getUser()))
@@ -584,8 +584,8 @@ void TailRecursionEliminator::insertAccumulator(Instruction *AccRecInstr) {
// call instruction into the newly created temporarily variable.
void TailRecursionEliminator::copyByValueOperandIntoLocalTemp(CallInst *CI,
int OpndIdx) {
- PointerType *ArgTy = cast<PointerType>(CI->getArgOperand(OpndIdx)->getType());
- Type *AggTy = ArgTy->getElementType();
+ Type *AggTy = CI->getParamByValType(OpndIdx);
+ assert(AggTy);
const DataLayout &DL = F.getParent()->getDataLayout();
// Get alignment of byVal operand.
@@ -611,8 +611,8 @@ void TailRecursionEliminator::copyByValueOperandIntoLocalTemp(CallInst *CI,
// into the corresponding function argument location.
void TailRecursionEliminator::copyLocalTempOfByValueOperandIntoArguments(
CallInst *CI, int OpndIdx) {
- PointerType *ArgTy = cast<PointerType>(CI->getArgOperand(OpndIdx)->getType());
- Type *AggTy = ArgTy->getElementType();
+ Type *AggTy = CI->getParamByValType(OpndIdx);
+ assert(AggTy);
const DataLayout &DL = F.getParent()->getDataLayout();
// Get alignment of byVal operand.
@@ -667,7 +667,7 @@ bool TailRecursionEliminator::eliminateCall(CallInst *CI) {
createTailRecurseLoopHeader(CI);
// Copy values of ByVal operands into local temporarily variables.
- for (unsigned I = 0, E = CI->getNumArgOperands(); I != E; ++I) {
+ for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
if (CI->isByValArgument(I))
copyByValueOperandIntoLocalTemp(CI, I);
}
@@ -675,7 +675,7 @@ bool TailRecursionEliminator::eliminateCall(CallInst *CI) {
// Ok, now that we know we have a pseudo-entry block WITH all of the
// required PHI nodes, add entries into the PHI node for the actual
// parameters passed into the tail-recursive call.
- for (unsigned I = 0, E = CI->getNumArgOperands(); I != E; ++I) {
+ for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
if (CI->isByValArgument(I)) {
copyLocalTempOfByValueOperandIntoArguments(CI, I);
ArgumentPHIs[I]->addIncoming(F.getArg(I), BB);