1 files changed, 117 insertions, 65 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp
index 30d959704745..1316919e65da 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp
@@ -10,6 +10,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/SelectOptimize.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/BlockFrequencyInfo.h"
@@ -39,7 +40,6 @@
 #include <memory>
 #include <queue>
 #include <stack>
-#include <string>
 
 using namespace llvm;
 
@@ -97,36 +97,22 @@ static cl::opt<bool>
 
 namespace {
 
-class SelectOptimize : public FunctionPass {
+class SelectOptimizeImpl {
   const TargetMachine *TM = nullptr;
   const TargetSubtargetInfo *TSI = nullptr;
   const TargetLowering *TLI = nullptr;
   const TargetTransformInfo *TTI = nullptr;
   const LoopInfo *LI = nullptr;
-  DominatorTree *DT = nullptr;
-  std::unique_ptr<BlockFrequencyInfo> BFI;
-  std::unique_ptr<BranchProbabilityInfo> BPI;
+  BlockFrequencyInfo *BFI;
   ProfileSummaryInfo *PSI = nullptr;
   OptimizationRemarkEmitter *ORE = nullptr;
   TargetSchedModel TSchedModel;
 
 public:
-  static char ID;
-
-  SelectOptimize() : FunctionPass(ID) {
-    initializeSelectOptimizePass(*PassRegistry::getPassRegistry());
-  }
-
-  bool runOnFunction(Function &F) override;
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.addRequired<ProfileSummaryInfoWrapperPass>();
-    AU.addRequired<TargetPassConfig>();
-    AU.addRequired<TargetTransformInfoWrapperPass>();
-    AU.addRequired<DominatorTreeWrapperPass>();
-    AU.addRequired<LoopInfoWrapperPass>();
-    AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
-  }
+  SelectOptimizeImpl() = default;
+  SelectOptimizeImpl(const TargetMachine *TM) : TM(TM){};
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+  bool runOnFunction(Function &F, Pass &P);
 
 private:
   // Select groups consist of consecutive select instructions with the same
@@ -212,29 +198,94 @@ private:
   // Returns true if the target architecture supports lowering a given select.
   bool isSelectKindSupported(SelectInst *SI);
 };
+
+class SelectOptimize : public FunctionPass {
+  SelectOptimizeImpl Impl;
+
+public:
+  static char ID;
+
+  SelectOptimize() : FunctionPass(ID) {
+    initializeSelectOptimizePass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnFunction(Function &F) override {
+    return Impl.runOnFunction(F, *this);
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<ProfileSummaryInfoWrapperPass>();
+    AU.addRequired<TargetPassConfig>();
+    AU.addRequired<TargetTransformInfoWrapperPass>();
+    AU.addRequired<LoopInfoWrapperPass>();
+    AU.addRequired<BlockFrequencyInfoWrapperPass>();
+    AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
+  }
+};
+
 } // namespace
 
+PreservedAnalyses SelectOptimizePass::run(Function &F,
+                                          FunctionAnalysisManager &FAM) {
+  SelectOptimizeImpl Impl(TM);
+  return Impl.run(F, FAM);
+}
+
 char SelectOptimize::ID = 0;
 
 INITIALIZE_PASS_BEGIN(SelectOptimize, DEBUG_TYPE, "Optimize selects", false,
                       false)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
 INITIALIZE_PASS_END(SelectOptimize, DEBUG_TYPE, "Optimize selects", false,
                     false)
 
 FunctionPass *llvm::createSelectOptimizePass() { return new SelectOptimize(); }
 
-bool SelectOptimize::runOnFunction(Function &F) {
-  TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
+PreservedAnalyses SelectOptimizeImpl::run(Function &F,
+                                          FunctionAnalysisManager &FAM) {
+  TSI = TM->getSubtargetImpl(F);
+  TLI = TSI->getTargetLowering();
+
+  // If none of the select types are supported then skip this pass.
+  // This is an optimization pass. Legality issues will be handled by
+  // instruction selection.
+  if (!TLI->isSelectSupported(TargetLowering::ScalarValSelect) &&
+      !TLI->isSelectSupported(TargetLowering::ScalarCondVectorVal) &&
+      !TLI->isSelectSupported(TargetLowering::VectorMaskSelect))
+    return PreservedAnalyses::all();
+
+  TTI = &FAM.getResult<TargetIRAnalysis>(F);
+  if (!TTI->enableSelectOptimize())
+    return PreservedAnalyses::all();
+
+  PSI = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F)
+            .getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
+  assert(PSI && "This pass requires module analysis pass `profile-summary`!");
+  BFI = &FAM.getResult<BlockFrequencyAnalysis>(F);
+
+  // When optimizing for size, selects are preferable over branches.
+  if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI))
+    return PreservedAnalyses::all();
+
+  LI = &FAM.getResult<LoopAnalysis>(F);
+  ORE = &FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+  TSchedModel.init(TSI);
+
+  bool Changed = optimizeSelects(F);
+  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+}
+
+bool SelectOptimizeImpl::runOnFunction(Function &F, Pass &P) {
+  TM = &P.getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
   TSI = TM->getSubtargetImpl(F);
   TLI = TSI->getTargetLowering();
 
-  // If none of the select types is supported then skip this pass.
+  // If none of the select types are supported then skip this pass.
   // This is an optimization pass. Legality issues will be handled by
   // instruction selection.
   if (!TLI->isSelectSupported(TargetLowering::ScalarValSelect) &&
@@ -242,27 +293,25 @@ bool SelectOptimize::runOnFunction(Function &F) {
       !TLI->isSelectSupported(TargetLowering::VectorMaskSelect))
     return false;
 
-  TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+  TTI = &P.getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
 
   if (!TTI->enableSelectOptimize())
     return false;
 
-  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-  BPI.reset(new BranchProbabilityInfo(F, *LI));
-  BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
-  PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
-  ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
+  LI = &P.getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+  BFI = &P.getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
+  PSI = &P.getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+  ORE = &P.getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
   TSchedModel.init(TSI);
 
   // When optimizing for size, selects are preferable over branches.
-  if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI.get()))
+  if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI))
     return false;
 
   return optimizeSelects(F);
 }
 
-bool SelectOptimize::optimizeSelects(Function &F) {
+bool SelectOptimizeImpl::optimizeSelects(Function &F) {
   // Determine for which select groups it is profitable converting to branches.
   SelectGroups ProfSIGroups;
   // Base heuristics apply only to non-loops and outer loops.
@@ -278,8 +327,8 @@ bool SelectOptimize::optimizeSelects(Function &F) {
   return !ProfSIGroups.empty();
 }
 
-void SelectOptimize::optimizeSelectsBase(Function &F,
-                                         SelectGroups &ProfSIGroups) {
+void SelectOptimizeImpl::optimizeSelectsBase(Function &F,
+                                             SelectGroups &ProfSIGroups) {
   // Collect all the select groups.
   SelectGroups SIGroups;
   for (BasicBlock &BB : F) {
@@ -294,8 +343,8 @@ void SelectOptimize::optimizeSelectsBase(Function &F,
   findProfitableSIGroupsBase(SIGroups, ProfSIGroups);
 }
 
-void SelectOptimize::optimizeSelectsInnerLoops(Function &F,
-                                               SelectGroups &ProfSIGroups) {
+void SelectOptimizeImpl::optimizeSelectsInnerLoops(Function &F,
+                                                   SelectGroups &ProfSIGroups) {
   SmallVector<Loop *, 4> Loops(LI->begin(), LI->end());
   // Need to check size on each iteration as we accumulate child loops.
   for (unsigned long i = 0; i < Loops.size(); ++i)
@@ -332,7 +381,7 @@ getTrueOrFalseValue(SelectInst *SI, bool isTrue,
   return V;
 }
 
-void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
+void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
   for (SelectGroup &ASI : ProfSIGroups) {
     // The code transformation here is a modified version of the sinking
     // transformation in CodeGenPrepare::optimizeSelectInst with a more
@@ -425,7 +474,7 @@ void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
     BasicBlock *StartBlock = SI->getParent();
     BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI));
     BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
-    BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock).getFrequency());
+    BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock));
     // Delete the unconditional branch that was just created by the split.
     StartBlock->getTerminator()->eraseFromParent();
 
@@ -439,7 +488,7 @@ void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
       DIt++;
     }
     for (auto *DI : DebugPseudoINS) {
-      DI->moveBefore(&*EndBlock->getFirstInsertionPt());
+      DI->moveBeforePreserving(&*EndBlock->getFirstInsertionPt());
     }
 
     // These are the new basic blocks for the conditional branch.
@@ -505,7 +554,8 @@ void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
     for (auto It = ASI.rbegin(); It != ASI.rend(); ++It) {
       SelectInst *SI = *It;
       // The select itself is replaced with a PHI Node.
-      PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front());
+      PHINode *PN = PHINode::Create(SI->getType(), 2, "");
+      PN->insertBefore(EndBlock->begin());
       PN->takeName(SI);
       PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);
       PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
@@ -531,8 +581,8 @@ static bool isSpecialSelect(SelectInst *SI) {
   return false;
 }
 
-void SelectOptimize::collectSelectGroups(BasicBlock &BB,
-                                         SelectGroups &SIGroups) {
+void SelectOptimizeImpl::collectSelectGroups(BasicBlock &BB,
+                                             SelectGroups &SIGroups) {
   BasicBlock::iterator BBIt = BB.begin();
   while (BBIt != BB.end()) {
     Instruction *I = &*BBIt++;
@@ -565,8 +615,8 @@ void SelectOptimize::collectSelectGroups(BasicBlock &BB,
   }
 }
 
-void SelectOptimize::findProfitableSIGroupsBase(SelectGroups &SIGroups,
-                                                SelectGroups &ProfSIGroups) {
+void SelectOptimizeImpl::findProfitableSIGroupsBase(
+    SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {
   for (SelectGroup &ASI : SIGroups) {
     ++NumSelectOptAnalyzed;
     if (isConvertToBranchProfitableBase(ASI))
@@ -580,14 +630,14 @@ static void EmitAndPrintRemark(OptimizationRemarkEmitter *ORE,
   ORE->emit(Rem);
 }
 
-void SelectOptimize::findProfitableSIGroupsInnerLoops(
+void SelectOptimizeImpl::findProfitableSIGroupsInnerLoops(
     const Loop *L, SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {
   NumSelectOptAnalyzed += SIGroups.size();
   // For each select group in an inner-most loop,
   // a branch is more preferable than a select/conditional-move if:
   // i) conversion to branches for all the select groups of the loop satisfies
   //    loop-level heuristics including reducing the loop's critical path by
-  //    some threshold (see SelectOptimize::checkLoopHeuristics); and
+  //    some threshold (see SelectOptimizeImpl::checkLoopHeuristics); and
   // ii) the total cost of the select group is cheaper with a branch compared
   //     to its predicated version. The cost is in terms of latency and the cost
   //     of a select group is the cost of its most expensive select instruction
@@ -627,7 +677,7 @@ void SelectOptimize::findProfitableSIGroupsInnerLoops(
   }
 }
 
-bool SelectOptimize::isConvertToBranchProfitableBase(
+bool SelectOptimizeImpl::isConvertToBranchProfitableBase(
     const SmallVector<SelectInst *, 2> &ASI) {
   SelectInst *SI = ASI.front();
   LLVM_DEBUG(dbgs() << "Analyzing select group containing " << *SI << "\n");
@@ -635,7 +685,7 @@ bool SelectOptimize::isConvertToBranchProfitableBase(
   OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", SI);
 
   // Skip cold basic blocks. Better to optimize for size for cold blocks.
-  if (PSI->isColdBlock(SI->getParent(), BFI.get())) {
+  if (PSI->isColdBlock(SI->getParent(), BFI)) {
     ++NumSelectColdBB;
     ORmiss << "Not converted to branch because of cold basic block. ";
     EmitAndPrintRemark(ORE, ORmiss);
@@ -678,7 +728,7 @@ static InstructionCost divideNearest(InstructionCost Numerator,
   return (Numerator + (Denominator / 2)) / Denominator;
 }
 
-bool SelectOptimize::hasExpensiveColdOperand(
+bool SelectOptimizeImpl::hasExpensiveColdOperand(
     const SmallVector<SelectInst *, 2> &ASI) {
   bool ColdOperand = false;
   uint64_t TrueWeight, FalseWeight, TotalWeight;
@@ -752,9 +802,10 @@ static bool isSafeToSinkLoad(Instruction *LoadI, Instruction *SI) {
 // (sufficiently-accurate in practice), we populate this set with the
 // instructions of the backwards dependence slice that only have one-use and
 // form an one-use chain that leads to the source instruction.
-void SelectOptimize::getExclBackwardsSlice(Instruction *I,
-                                           std::stack<Instruction *> &Slice,
-                                           Instruction *SI, bool ForSinking) {
+void SelectOptimizeImpl::getExclBackwardsSlice(Instruction *I,
+                                               std::stack<Instruction *> &Slice,
+                                               Instruction *SI,
+                                               bool ForSinking) {
   SmallPtrSet<Instruction *, 2> Visited;
   std::queue<Instruction *> Worklist;
   Worklist.push(I);
@@ -798,7 +849,7 @@ void SelectOptimize::getExclBackwardsSlice(Instruction *I,
   }
 }
 
-bool SelectOptimize::isSelectHighlyPredictable(const SelectInst *SI) {
+bool SelectOptimizeImpl::isSelectHighlyPredictable(const SelectInst *SI) {
   uint64_t TrueWeight, FalseWeight;
   if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) {
     uint64_t Max = std::max(TrueWeight, FalseWeight);
@@ -812,8 +863,8 @@ bool SelectOptimize::isSelectHighlyPredictable(const SelectInst *SI) {
   return false;
 }
 
-bool SelectOptimize::checkLoopHeuristics(const Loop *L,
-                                         const CostInfo LoopCost[2]) {
+bool SelectOptimizeImpl::checkLoopHeuristics(const Loop *L,
+                                             const CostInfo LoopCost[2]) {
   // Loop-level checks to determine if a non-predicated version (with branches)
   // of the loop is more profitable than its predicated version.
 
@@ -881,7 +932,7 @@ bool SelectOptimize::checkLoopHeuristics(const Loop *L,
 // and non-predicated version of the given loop.
 // Returns false if unable to compute these costs due to invalid cost of loop
 // instruction(s).
-bool SelectOptimize::computeLoopCosts(
+bool SelectOptimizeImpl::computeLoopCosts(
     const Loop *L, const SelectGroups &SIGroups,
     DenseMap<const Instruction *, CostInfo> &InstCostMap, CostInfo *LoopCost) {
   LLVM_DEBUG(dbgs() << "Calculating Latency / IPredCost / INonPredCost of loop "
@@ -969,7 +1020,7 @@ bool SelectOptimize::computeLoopCosts(
 }
 
 SmallPtrSet<const Instruction *, 2>
-SelectOptimize::getSIset(const SelectGroups &SIGroups) {
+SelectOptimizeImpl::getSIset(const SelectGroups &SIGroups) {
   SmallPtrSet<const Instruction *, 2> SIset;
   for (const SelectGroup &ASI : SIGroups)
     for (const SelectInst *SI : ASI)
@@ -977,7 +1028,8 @@ SelectOptimize::getSIset(const SelectGroups &SIGroups) {
   return SIset;
 }
 
-std::optional<uint64_t> SelectOptimize::computeInstCost(const Instruction *I) {
+std::optional<uint64_t>
+SelectOptimizeImpl::computeInstCost(const Instruction *I) {
   InstructionCost ICost =
       TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency);
   if (auto OC = ICost.getValue())
@@ -986,8 +1038,8 @@ std::optional<uint64_t> SelectOptimize::computeInstCost(const Instruction *I) {
 }
 
 ScaledNumber<uint64_t>
-SelectOptimize::getMispredictionCost(const SelectInst *SI,
-                                     const Scaled64 CondCost) {
+SelectOptimizeImpl::getMispredictionCost(const SelectInst *SI,
+                                         const Scaled64 CondCost) {
   uint64_t MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty;
 
   // Account for the default misprediction rate when using a branch
@@ -1012,8 +1064,8 @@ SelectOptimize::getMispredictionCost(const SelectInst *SI,
 // Returns the cost of a branch when the prediction is correct.
 // TrueCost * TrueProbability + FalseCost * FalseProbability.
 ScaledNumber<uint64_t>
-SelectOptimize::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
-                                     const SelectInst *SI) {
+SelectOptimizeImpl::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
+                                         const SelectInst *SI) {
   Scaled64 PredPathCost;
   uint64_t TrueWeight, FalseWeight;
   if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) {
@@ -1033,7 +1085,7 @@ SelectOptimize::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
   return PredPathCost;
 }
 
-bool SelectOptimize::isSelectKindSupported(SelectInst *SI) {
+bool SelectOptimizeImpl::isSelectKindSupported(SelectInst *SI) {
   bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
   if (VectorCond)
     return false;