diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp | 182 |
1 files changed, 117 insertions, 65 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp index 30d959704745..1316919e65da 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp @@ -10,6 +10,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/SelectOptimize.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BlockFrequencyInfo.h" @@ -39,7 +40,6 @@ #include <memory> #include <queue> #include <stack> -#include <string> using namespace llvm; @@ -97,36 +97,22 @@ static cl::opt<bool> namespace { -class SelectOptimize : public FunctionPass { +class SelectOptimizeImpl { const TargetMachine *TM = nullptr; const TargetSubtargetInfo *TSI = nullptr; const TargetLowering *TLI = nullptr; const TargetTransformInfo *TTI = nullptr; const LoopInfo *LI = nullptr; - DominatorTree *DT = nullptr; - std::unique_ptr<BlockFrequencyInfo> BFI; - std::unique_ptr<BranchProbabilityInfo> BPI; + BlockFrequencyInfo *BFI; ProfileSummaryInfo *PSI = nullptr; OptimizationRemarkEmitter *ORE = nullptr; TargetSchedModel TSchedModel; public: - static char ID; - - SelectOptimize() : FunctionPass(ID) { - initializeSelectOptimizePass(*PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F) override; - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<ProfileSummaryInfoWrapperPass>(); - AU.addRequired<TargetPassConfig>(); - AU.addRequired<TargetTransformInfoWrapperPass>(); - AU.addRequired<DominatorTreeWrapperPass>(); - AU.addRequired<LoopInfoWrapperPass>(); - AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); - } + SelectOptimizeImpl() = default; + SelectOptimizeImpl(const TargetMachine *TM) : TM(TM){}; + PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); + bool runOnFunction(Function &F, Pass &P); private: // Select groups consist of consecutive select instructions with the same @@ -212,29 +198,94 @@ private: // Returns true if the target architecture supports lowering a given select. bool isSelectKindSupported(SelectInst *SI); }; + +class SelectOptimize : public FunctionPass { + SelectOptimizeImpl Impl; + +public: + static char ID; + + SelectOptimize() : FunctionPass(ID) { + initializeSelectOptimizePass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + return Impl.runOnFunction(F, *this); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<ProfileSummaryInfoWrapperPass>(); + AU.addRequired<TargetPassConfig>(); + AU.addRequired<TargetTransformInfoWrapperPass>(); + AU.addRequired<LoopInfoWrapperPass>(); + AU.addRequired<BlockFrequencyInfoWrapperPass>(); + AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); + } +}; + } // namespace +PreservedAnalyses SelectOptimizePass::run(Function &F, + FunctionAnalysisManager &FAM) { + SelectOptimizeImpl Impl(TM); + return Impl.run(F, FAM); +} + char SelectOptimize::ID = 0; INITIALIZE_PASS_BEGIN(SelectOptimize, DEBUG_TYPE, "Optimize selects", false, false) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) INITIALIZE_PASS_END(SelectOptimize, DEBUG_TYPE, "Optimize selects", false, false) FunctionPass *llvm::createSelectOptimizePass() { return new SelectOptimize(); } -bool SelectOptimize::runOnFunction(Function &F) { - TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>(); +PreservedAnalyses SelectOptimizeImpl::run(Function &F, + FunctionAnalysisManager &FAM) { + TSI = TM->getSubtargetImpl(F); + TLI = TSI->getTargetLowering(); + + // If none of the select types are supported then skip this pass. + // This is an optimization pass. Legality issues will be handled by + // instruction selection. + if (!TLI->isSelectSupported(TargetLowering::ScalarValSelect) && + !TLI->isSelectSupported(TargetLowering::ScalarCondVectorVal) && + !TLI->isSelectSupported(TargetLowering::VectorMaskSelect)) + return PreservedAnalyses::all(); + + TTI = &FAM.getResult<TargetIRAnalysis>(F); + if (!TTI->enableSelectOptimize()) + return PreservedAnalyses::all(); + + PSI = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F) + .getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); + assert(PSI && "This pass requires module analysis pass `profile-summary`!"); + BFI = &FAM.getResult<BlockFrequencyAnalysis>(F); + + // When optimizing for size, selects are preferable over branches. + if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI)) + return PreservedAnalyses::all(); + + LI = &FAM.getResult<LoopAnalysis>(F); + ORE = &FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); + TSchedModel.init(TSI); + + bool Changed = optimizeSelects(F); + return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); +} + +bool SelectOptimizeImpl::runOnFunction(Function &F, Pass &P) { + TM = &P.getAnalysis<TargetPassConfig>().getTM<TargetMachine>(); TSI = TM->getSubtargetImpl(F); TLI = TSI->getTargetLowering(); - // If none of the select types is supported then skip this pass. + // If none of the select types are supported then skip this pass. // This is an optimization pass. Legality issues will be handled by // instruction selection. if (!TLI->isSelectSupported(TargetLowering::ScalarValSelect) && @@ -242,27 +293,25 @@ bool SelectOptimize::runOnFunction(Function &F) { !TLI->isSelectSupported(TargetLowering::VectorMaskSelect)) return false; - TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); + TTI = &P.getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); if (!TTI->enableSelectOptimize()) return false; - DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); - BPI.reset(new BranchProbabilityInfo(F, *LI)); - BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI)); - PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); - ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); + LI = &P.getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + BFI = &P.getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(); + PSI = &P.getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); + ORE = &P.getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); TSchedModel.init(TSI); // When optimizing for size, selects are preferable over branches. - if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI.get())) + if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI)) return false; return optimizeSelects(F); } -bool SelectOptimize::optimizeSelects(Function &F) { +bool SelectOptimizeImpl::optimizeSelects(Function &F) { // Determine for which select groups it is profitable converting to branches. SelectGroups ProfSIGroups; // Base heuristics apply only to non-loops and outer loops. @@ -278,8 +327,8 @@ bool SelectOptimize::optimizeSelects(Function &F) { return !ProfSIGroups.empty(); } -void SelectOptimize::optimizeSelectsBase(Function &F, - SelectGroups &ProfSIGroups) { +void SelectOptimizeImpl::optimizeSelectsBase(Function &F, + SelectGroups &ProfSIGroups) { // Collect all the select groups. SelectGroups SIGroups; for (BasicBlock &BB : F) { @@ -294,8 +343,8 @@ void SelectOptimize::optimizeSelectsBase(Function &F, findProfitableSIGroupsBase(SIGroups, ProfSIGroups); } -void SelectOptimize::optimizeSelectsInnerLoops(Function &F, - SelectGroups &ProfSIGroups) { +void SelectOptimizeImpl::optimizeSelectsInnerLoops(Function &F, + SelectGroups &ProfSIGroups) { SmallVector<Loop *, 4> Loops(LI->begin(), LI->end()); // Need to check size on each iteration as we accumulate child loops. for (unsigned long i = 0; i < Loops.size(); ++i) @@ -332,7 +381,7 @@ getTrueOrFalseValue(SelectInst *SI, bool isTrue, return V; } -void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { +void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { for (SelectGroup &ASI : ProfSIGroups) { // The code transformation here is a modified version of the sinking // transformation in CodeGenPrepare::optimizeSelectInst with a more @@ -425,7 +474,7 @@ void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { BasicBlock *StartBlock = SI->getParent(); BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI)); BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end"); - BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock).getFrequency()); + BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock)); // Delete the unconditional branch that was just created by the split. StartBlock->getTerminator()->eraseFromParent(); @@ -439,7 +488,7 @@ void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { DIt++; } for (auto *DI : DebugPseudoINS) { - DI->moveBefore(&*EndBlock->getFirstInsertionPt()); + DI->moveBeforePreserving(&*EndBlock->getFirstInsertionPt()); } // These are the new basic blocks for the conditional branch. @@ -505,7 +554,8 @@ void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { for (auto It = ASI.rbegin(); It != ASI.rend(); ++It) { SelectInst *SI = *It; // The select itself is replaced with a PHI Node. - PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front()); + PHINode *PN = PHINode::Create(SI->getType(), 2, ""); + PN->insertBefore(EndBlock->begin()); PN->takeName(SI); PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock); PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock); @@ -531,8 +581,8 @@ static bool isSpecialSelect(SelectInst *SI) { return false; } -void SelectOptimize::collectSelectGroups(BasicBlock &BB, - SelectGroups &SIGroups) { +void SelectOptimizeImpl::collectSelectGroups(BasicBlock &BB, + SelectGroups &SIGroups) { BasicBlock::iterator BBIt = BB.begin(); while (BBIt != BB.end()) { Instruction *I = &*BBIt++; @@ -565,8 +615,8 @@ void SelectOptimize::collectSelectGroups(BasicBlock &BB, } } -void SelectOptimize::findProfitableSIGroupsBase(SelectGroups &SIGroups, - SelectGroups &ProfSIGroups) { +void SelectOptimizeImpl::findProfitableSIGroupsBase( + SelectGroups &SIGroups, SelectGroups &ProfSIGroups) { for (SelectGroup &ASI : SIGroups) { ++NumSelectOptAnalyzed; if (isConvertToBranchProfitableBase(ASI)) @@ -580,14 +630,14 @@ static void EmitAndPrintRemark(OptimizationRemarkEmitter *ORE, ORE->emit(Rem); } -void SelectOptimize::findProfitableSIGroupsInnerLoops( +void SelectOptimizeImpl::findProfitableSIGroupsInnerLoops( const Loop *L, SelectGroups &SIGroups, SelectGroups &ProfSIGroups) { NumSelectOptAnalyzed += SIGroups.size(); // For each select group in an inner-most loop, // a branch is more preferable than a select/conditional-move if: // i) conversion to branches for all the select groups of the loop satisfies // loop-level heuristics including reducing the loop's critical path by - // some threshold (see SelectOptimize::checkLoopHeuristics); and + // some threshold (see SelectOptimizeImpl::checkLoopHeuristics); and // ii) the total cost of the select group is cheaper with a branch compared // to its predicated version. The cost is in terms of latency and the cost // of a select group is the cost of its most expensive select instruction @@ -627,7 +677,7 @@ void SelectOptimize::findProfitableSIGroupsInnerLoops( } } -bool SelectOptimize::isConvertToBranchProfitableBase( +bool SelectOptimizeImpl::isConvertToBranchProfitableBase( const SmallVector<SelectInst *, 2> &ASI) { SelectInst *SI = ASI.front(); LLVM_DEBUG(dbgs() << "Analyzing select group containing " << *SI << "\n"); @@ -635,7 +685,7 @@ bool SelectOptimize::isConvertToBranchProfitableBase( OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", SI); // Skip cold basic blocks. Better to optimize for size for cold blocks. - if (PSI->isColdBlock(SI->getParent(), BFI.get())) { + if (PSI->isColdBlock(SI->getParent(), BFI)) { ++NumSelectColdBB; ORmiss << "Not converted to branch because of cold basic block. "; EmitAndPrintRemark(ORE, ORmiss); @@ -678,7 +728,7 @@ static InstructionCost divideNearest(InstructionCost Numerator, return (Numerator + (Denominator / 2)) / Denominator; } -bool SelectOptimize::hasExpensiveColdOperand( +bool SelectOptimizeImpl::hasExpensiveColdOperand( const SmallVector<SelectInst *, 2> &ASI) { bool ColdOperand = false; uint64_t TrueWeight, FalseWeight, TotalWeight; @@ -752,9 +802,10 @@ static bool isSafeToSinkLoad(Instruction *LoadI, Instruction *SI) { // (sufficiently-accurate in practice), we populate this set with the // instructions of the backwards dependence slice that only have one-use and // form an one-use chain that leads to the source instruction. -void SelectOptimize::getExclBackwardsSlice(Instruction *I, - std::stack<Instruction *> &Slice, - Instruction *SI, bool ForSinking) { +void SelectOptimizeImpl::getExclBackwardsSlice(Instruction *I, + std::stack<Instruction *> &Slice, + Instruction *SI, + bool ForSinking) { SmallPtrSet<Instruction *, 2> Visited; std::queue<Instruction *> Worklist; Worklist.push(I); @@ -798,7 +849,7 @@ void SelectOptimize::getExclBackwardsSlice(Instruction *I, } } -bool SelectOptimize::isSelectHighlyPredictable(const SelectInst *SI) { +bool SelectOptimizeImpl::isSelectHighlyPredictable(const SelectInst *SI) { uint64_t TrueWeight, FalseWeight; if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) { uint64_t Max = std::max(TrueWeight, FalseWeight); @@ -812,8 +863,8 @@ bool SelectOptimize::isSelectHighlyPredictable(const SelectInst *SI) { return false; } -bool SelectOptimize::checkLoopHeuristics(const Loop *L, - const CostInfo LoopCost[2]) { +bool SelectOptimizeImpl::checkLoopHeuristics(const Loop *L, + const CostInfo LoopCost[2]) { // Loop-level checks to determine if a non-predicated version (with branches) // of the loop is more profitable than its predicated version. @@ -881,7 +932,7 @@ bool SelectOptimize::checkLoopHeuristics(const Loop *L, // and non-predicated version of the given loop. // Returns false if unable to compute these costs due to invalid cost of loop // instruction(s). -bool SelectOptimize::computeLoopCosts( +bool SelectOptimizeImpl::computeLoopCosts( const Loop *L, const SelectGroups &SIGroups, DenseMap<const Instruction *, CostInfo> &InstCostMap, CostInfo *LoopCost) { LLVM_DEBUG(dbgs() << "Calculating Latency / IPredCost / INonPredCost of loop " @@ -969,7 +1020,7 @@ bool SelectOptimize::computeLoopCosts( } SmallPtrSet<const Instruction *, 2> -SelectOptimize::getSIset(const SelectGroups &SIGroups) { +SelectOptimizeImpl::getSIset(const SelectGroups &SIGroups) { SmallPtrSet<const Instruction *, 2> SIset; for (const SelectGroup &ASI : SIGroups) for (const SelectInst *SI : ASI) @@ -977,7 +1028,8 @@ SelectOptimize::getSIset(const SelectGroups &SIGroups) { return SIset; } -std::optional<uint64_t> SelectOptimize::computeInstCost(const Instruction *I) { +std::optional<uint64_t> +SelectOptimizeImpl::computeInstCost(const Instruction *I) { InstructionCost ICost = TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency); if (auto OC = ICost.getValue()) @@ -986,8 +1038,8 @@ std::optional<uint64_t> SelectOptimize::computeInstCost(const Instruction *I) { } ScaledNumber<uint64_t> -SelectOptimize::getMispredictionCost(const SelectInst *SI, - const Scaled64 CondCost) { +SelectOptimizeImpl::getMispredictionCost(const SelectInst *SI, + const Scaled64 CondCost) { uint64_t MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty; // Account for the default misprediction rate when using a branch @@ -1012,8 +1064,8 @@ SelectOptimize::getMispredictionCost(const SelectInst *SI, // Returns the cost of a branch when the prediction is correct. // TrueCost * TrueProbability + FalseCost * FalseProbability. ScaledNumber<uint64_t> -SelectOptimize::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost, - const SelectInst *SI) { +SelectOptimizeImpl::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost, + const SelectInst *SI) { Scaled64 PredPathCost; uint64_t TrueWeight, FalseWeight; if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) { @@ -1033,7 +1085,7 @@ SelectOptimize::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost, return PredPathCost; } -bool SelectOptimize::isSelectKindSupported(SelectInst *SI) { +bool SelectOptimizeImpl::isSelectKindSupported(SelectInst *SI) { bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1); if (VectorCond) return false; |