aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp182
1 files changed, 117 insertions, 65 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp
index 30d959704745..1316919e65da 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp
@@ -10,6 +10,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/SelectOptimize.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
@@ -39,7 +40,6 @@
#include <memory>
#include <queue>
#include <stack>
-#include <string>
using namespace llvm;
@@ -97,36 +97,22 @@ static cl::opt<bool>
namespace {
-class SelectOptimize : public FunctionPass {
+class SelectOptimizeImpl {
const TargetMachine *TM = nullptr;
const TargetSubtargetInfo *TSI = nullptr;
const TargetLowering *TLI = nullptr;
const TargetTransformInfo *TTI = nullptr;
const LoopInfo *LI = nullptr;
- DominatorTree *DT = nullptr;
- std::unique_ptr<BlockFrequencyInfo> BFI;
- std::unique_ptr<BranchProbabilityInfo> BPI;
+ BlockFrequencyInfo *BFI;
ProfileSummaryInfo *PSI = nullptr;
OptimizationRemarkEmitter *ORE = nullptr;
TargetSchedModel TSchedModel;
public:
- static char ID;
-
- SelectOptimize() : FunctionPass(ID) {
- initializeSelectOptimizePass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<ProfileSummaryInfoWrapperPass>();
- AU.addRequired<TargetPassConfig>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
- }
+ SelectOptimizeImpl() = default;
+ SelectOptimizeImpl(const TargetMachine *TM) : TM(TM){};
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+ bool runOnFunction(Function &F, Pass &P);
private:
// Select groups consist of consecutive select instructions with the same
@@ -212,29 +198,94 @@ private:
// Returns true if the target architecture supports lowering a given select.
bool isSelectKindSupported(SelectInst *SI);
};
+
+class SelectOptimize : public FunctionPass {
+ SelectOptimizeImpl Impl;
+
+public:
+ static char ID;
+
+ SelectOptimize() : FunctionPass(ID) {
+ initializeSelectOptimizePass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ return Impl.runOnFunction(F, *this);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ AU.addRequired<TargetPassConfig>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addRequired<BlockFrequencyInfoWrapperPass>();
+ AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
+ }
+};
+
} // namespace
+PreservedAnalyses SelectOptimizePass::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+ SelectOptimizeImpl Impl(TM);
+ return Impl.run(F, FAM);
+}
+
char SelectOptimize::ID = 0;
INITIALIZE_PASS_BEGIN(SelectOptimize, DEBUG_TYPE, "Optimize selects", false,
false)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
INITIALIZE_PASS_END(SelectOptimize, DEBUG_TYPE, "Optimize selects", false,
false)
FunctionPass *llvm::createSelectOptimizePass() { return new SelectOptimize(); }
-bool SelectOptimize::runOnFunction(Function &F) {
- TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
+PreservedAnalyses SelectOptimizeImpl::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+ TSI = TM->getSubtargetImpl(F);
+ TLI = TSI->getTargetLowering();
+
+ // If none of the select types are supported then skip this pass.
+ // This is an optimization pass. Legality issues will be handled by
+ // instruction selection.
+ if (!TLI->isSelectSupported(TargetLowering::ScalarValSelect) &&
+ !TLI->isSelectSupported(TargetLowering::ScalarCondVectorVal) &&
+ !TLI->isSelectSupported(TargetLowering::VectorMaskSelect))
+ return PreservedAnalyses::all();
+
+ TTI = &FAM.getResult<TargetIRAnalysis>(F);
+ if (!TTI->enableSelectOptimize())
+ return PreservedAnalyses::all();
+
+ PSI = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F)
+ .getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
+ assert(PSI && "This pass requires module analysis pass `profile-summary`!");
+ BFI = &FAM.getResult<BlockFrequencyAnalysis>(F);
+
+ // When optimizing for size, selects are preferable over branches.
+ if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI))
+ return PreservedAnalyses::all();
+
+ LI = &FAM.getResult<LoopAnalysis>(F);
+ ORE = &FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+ TSchedModel.init(TSI);
+
+ bool Changed = optimizeSelects(F);
+ return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+}
+
+bool SelectOptimizeImpl::runOnFunction(Function &F, Pass &P) {
+ TM = &P.getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
TSI = TM->getSubtargetImpl(F);
TLI = TSI->getTargetLowering();
- // If none of the select types is supported then skip this pass.
+ // If none of the select types are supported then skip this pass.
// This is an optimization pass. Legality issues will be handled by
// instruction selection.
if (!TLI->isSelectSupported(TargetLowering::ScalarValSelect) &&
@@ -242,27 +293,25 @@ bool SelectOptimize::runOnFunction(Function &F) {
!TLI->isSelectSupported(TargetLowering::VectorMaskSelect))
return false;
- TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ TTI = &P.getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
if (!TTI->enableSelectOptimize())
return false;
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- BPI.reset(new BranchProbabilityInfo(F, *LI));
- BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
- PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
- ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
+ LI = &P.getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ BFI = &P.getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
+ PSI = &P.getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ ORE = &P.getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
TSchedModel.init(TSI);
// When optimizing for size, selects are preferable over branches.
- if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI.get()))
+ if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI))
return false;
return optimizeSelects(F);
}
-bool SelectOptimize::optimizeSelects(Function &F) {
+bool SelectOptimizeImpl::optimizeSelects(Function &F) {
// Determine for which select groups it is profitable converting to branches.
SelectGroups ProfSIGroups;
// Base heuristics apply only to non-loops and outer loops.
@@ -278,8 +327,8 @@ bool SelectOptimize::optimizeSelects(Function &F) {
return !ProfSIGroups.empty();
}
-void SelectOptimize::optimizeSelectsBase(Function &F,
- SelectGroups &ProfSIGroups) {
+void SelectOptimizeImpl::optimizeSelectsBase(Function &F,
+ SelectGroups &ProfSIGroups) {
// Collect all the select groups.
SelectGroups SIGroups;
for (BasicBlock &BB : F) {
@@ -294,8 +343,8 @@ void SelectOptimize::optimizeSelectsBase(Function &F,
findProfitableSIGroupsBase(SIGroups, ProfSIGroups);
}
-void SelectOptimize::optimizeSelectsInnerLoops(Function &F,
- SelectGroups &ProfSIGroups) {
+void SelectOptimizeImpl::optimizeSelectsInnerLoops(Function &F,
+ SelectGroups &ProfSIGroups) {
SmallVector<Loop *, 4> Loops(LI->begin(), LI->end());
// Need to check size on each iteration as we accumulate child loops.
for (unsigned long i = 0; i < Loops.size(); ++i)
@@ -332,7 +381,7 @@ getTrueOrFalseValue(SelectInst *SI, bool isTrue,
return V;
}
-void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
+void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
for (SelectGroup &ASI : ProfSIGroups) {
// The code transformation here is a modified version of the sinking
// transformation in CodeGenPrepare::optimizeSelectInst with a more
@@ -425,7 +474,7 @@ void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
BasicBlock *StartBlock = SI->getParent();
BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI));
BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
- BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock).getFrequency());
+ BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock));
// Delete the unconditional branch that was just created by the split.
StartBlock->getTerminator()->eraseFromParent();
@@ -439,7 +488,7 @@ void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
DIt++;
}
for (auto *DI : DebugPseudoINS) {
- DI->moveBefore(&*EndBlock->getFirstInsertionPt());
+ DI->moveBeforePreserving(&*EndBlock->getFirstInsertionPt());
}
// These are the new basic blocks for the conditional branch.
@@ -505,7 +554,8 @@ void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
for (auto It = ASI.rbegin(); It != ASI.rend(); ++It) {
SelectInst *SI = *It;
// The select itself is replaced with a PHI Node.
- PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front());
+ PHINode *PN = PHINode::Create(SI->getType(), 2, "");
+ PN->insertBefore(EndBlock->begin());
PN->takeName(SI);
PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);
PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
@@ -531,8 +581,8 @@ static bool isSpecialSelect(SelectInst *SI) {
return false;
}
-void SelectOptimize::collectSelectGroups(BasicBlock &BB,
- SelectGroups &SIGroups) {
+void SelectOptimizeImpl::collectSelectGroups(BasicBlock &BB,
+ SelectGroups &SIGroups) {
BasicBlock::iterator BBIt = BB.begin();
while (BBIt != BB.end()) {
Instruction *I = &*BBIt++;
@@ -565,8 +615,8 @@ void SelectOptimize::collectSelectGroups(BasicBlock &BB,
}
}
-void SelectOptimize::findProfitableSIGroupsBase(SelectGroups &SIGroups,
- SelectGroups &ProfSIGroups) {
+void SelectOptimizeImpl::findProfitableSIGroupsBase(
+ SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {
for (SelectGroup &ASI : SIGroups) {
++NumSelectOptAnalyzed;
if (isConvertToBranchProfitableBase(ASI))
@@ -580,14 +630,14 @@ static void EmitAndPrintRemark(OptimizationRemarkEmitter *ORE,
ORE->emit(Rem);
}
-void SelectOptimize::findProfitableSIGroupsInnerLoops(
+void SelectOptimizeImpl::findProfitableSIGroupsInnerLoops(
const Loop *L, SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {
NumSelectOptAnalyzed += SIGroups.size();
// For each select group in an inner-most loop,
// a branch is more preferable than a select/conditional-move if:
// i) conversion to branches for all the select groups of the loop satisfies
// loop-level heuristics including reducing the loop's critical path by
- // some threshold (see SelectOptimize::checkLoopHeuristics); and
+ // some threshold (see SelectOptimizeImpl::checkLoopHeuristics); and
// ii) the total cost of the select group is cheaper with a branch compared
// to its predicated version. The cost is in terms of latency and the cost
// of a select group is the cost of its most expensive select instruction
@@ -627,7 +677,7 @@ void SelectOptimize::findProfitableSIGroupsInnerLoops(
}
}
-bool SelectOptimize::isConvertToBranchProfitableBase(
+bool SelectOptimizeImpl::isConvertToBranchProfitableBase(
const SmallVector<SelectInst *, 2> &ASI) {
SelectInst *SI = ASI.front();
LLVM_DEBUG(dbgs() << "Analyzing select group containing " << *SI << "\n");
@@ -635,7 +685,7 @@ bool SelectOptimize::isConvertToBranchProfitableBase(
OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", SI);
// Skip cold basic blocks. Better to optimize for size for cold blocks.
- if (PSI->isColdBlock(SI->getParent(), BFI.get())) {
+ if (PSI->isColdBlock(SI->getParent(), BFI)) {
++NumSelectColdBB;
ORmiss << "Not converted to branch because of cold basic block. ";
EmitAndPrintRemark(ORE, ORmiss);
@@ -678,7 +728,7 @@ static InstructionCost divideNearest(InstructionCost Numerator,
return (Numerator + (Denominator / 2)) / Denominator;
}
-bool SelectOptimize::hasExpensiveColdOperand(
+bool SelectOptimizeImpl::hasExpensiveColdOperand(
const SmallVector<SelectInst *, 2> &ASI) {
bool ColdOperand = false;
uint64_t TrueWeight, FalseWeight, TotalWeight;
@@ -752,9 +802,10 @@ static bool isSafeToSinkLoad(Instruction *LoadI, Instruction *SI) {
// (sufficiently-accurate in practice), we populate this set with the
// instructions of the backwards dependence slice that only have one-use and
// form an one-use chain that leads to the source instruction.
-void SelectOptimize::getExclBackwardsSlice(Instruction *I,
- std::stack<Instruction *> &Slice,
- Instruction *SI, bool ForSinking) {
+void SelectOptimizeImpl::getExclBackwardsSlice(Instruction *I,
+ std::stack<Instruction *> &Slice,
+ Instruction *SI,
+ bool ForSinking) {
SmallPtrSet<Instruction *, 2> Visited;
std::queue<Instruction *> Worklist;
Worklist.push(I);
@@ -798,7 +849,7 @@ void SelectOptimize::getExclBackwardsSlice(Instruction *I,
}
}
-bool SelectOptimize::isSelectHighlyPredictable(const SelectInst *SI) {
+bool SelectOptimizeImpl::isSelectHighlyPredictable(const SelectInst *SI) {
uint64_t TrueWeight, FalseWeight;
if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) {
uint64_t Max = std::max(TrueWeight, FalseWeight);
@@ -812,8 +863,8 @@ bool SelectOptimize::isSelectHighlyPredictable(const SelectInst *SI) {
return false;
}
-bool SelectOptimize::checkLoopHeuristics(const Loop *L,
- const CostInfo LoopCost[2]) {
+bool SelectOptimizeImpl::checkLoopHeuristics(const Loop *L,
+ const CostInfo LoopCost[2]) {
// Loop-level checks to determine if a non-predicated version (with branches)
// of the loop is more profitable than its predicated version.
@@ -881,7 +932,7 @@ bool SelectOptimize::checkLoopHeuristics(const Loop *L,
// and non-predicated version of the given loop.
// Returns false if unable to compute these costs due to invalid cost of loop
// instruction(s).
-bool SelectOptimize::computeLoopCosts(
+bool SelectOptimizeImpl::computeLoopCosts(
const Loop *L, const SelectGroups &SIGroups,
DenseMap<const Instruction *, CostInfo> &InstCostMap, CostInfo *LoopCost) {
LLVM_DEBUG(dbgs() << "Calculating Latency / IPredCost / INonPredCost of loop "
@@ -969,7 +1020,7 @@ bool SelectOptimize::computeLoopCosts(
}
SmallPtrSet<const Instruction *, 2>
-SelectOptimize::getSIset(const SelectGroups &SIGroups) {
+SelectOptimizeImpl::getSIset(const SelectGroups &SIGroups) {
SmallPtrSet<const Instruction *, 2> SIset;
for (const SelectGroup &ASI : SIGroups)
for (const SelectInst *SI : ASI)
@@ -977,7 +1028,8 @@ SelectOptimize::getSIset(const SelectGroups &SIGroups) {
return SIset;
}
-std::optional<uint64_t> SelectOptimize::computeInstCost(const Instruction *I) {
+std::optional<uint64_t>
+SelectOptimizeImpl::computeInstCost(const Instruction *I) {
InstructionCost ICost =
TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency);
if (auto OC = ICost.getValue())
@@ -986,8 +1038,8 @@ std::optional<uint64_t> SelectOptimize::computeInstCost(const Instruction *I) {
}
ScaledNumber<uint64_t>
-SelectOptimize::getMispredictionCost(const SelectInst *SI,
- const Scaled64 CondCost) {
+SelectOptimizeImpl::getMispredictionCost(const SelectInst *SI,
+ const Scaled64 CondCost) {
uint64_t MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty;
// Account for the default misprediction rate when using a branch
@@ -1012,8 +1064,8 @@ SelectOptimize::getMispredictionCost(const SelectInst *SI,
// Returns the cost of a branch when the prediction is correct.
// TrueCost * TrueProbability + FalseCost * FalseProbability.
ScaledNumber<uint64_t>
-SelectOptimize::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
- const SelectInst *SI) {
+SelectOptimizeImpl::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
+ const SelectInst *SI) {
Scaled64 PredPathCost;
uint64_t TrueWeight, FalseWeight;
if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) {
@@ -1033,7 +1085,7 @@ SelectOptimize::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
return PredPathCost;
}
-bool SelectOptimize::isSelectKindSupported(SelectInst *SI) {
+bool SelectOptimizeImpl::isSelectKindSupported(SelectInst *SI) {
bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
if (VectorCond)
return false;