diff options
Diffstat (limited to 'llvm/lib/CodeGen/ShrinkWrap.cpp')
-rw-r--r-- | llvm/lib/CodeGen/ShrinkWrap.cpp | 536 |
1 files changed, 457 insertions, 79 deletions
diff --git a/llvm/lib/CodeGen/ShrinkWrap.cpp b/llvm/lib/CodeGen/ShrinkWrap.cpp index 2411b1ad5203..4b1d3637a746 100644 --- a/llvm/lib/CodeGen/ShrinkWrap.cpp +++ b/llvm/lib/CodeGen/ShrinkWrap.cpp @@ -53,6 +53,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" @@ -97,6 +98,9 @@ STATISTIC(NumCandidatesDropped, static cl::opt<cl::boolOrDefault> EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden, cl::desc("enable the shrink-wrapping pass")); +static cl::opt<bool> EnablePostShrinkWrapOpt( + "enable-shrink-wrap-region-split", cl::init(true), cl::Hidden, + cl::desc("enable splitting of the restore block if possible")); namespace { @@ -110,44 +114,44 @@ namespace { class ShrinkWrap : public MachineFunctionPass { /// Hold callee-saved information. RegisterClassInfo RCI; - MachineDominatorTree *MDT; - MachinePostDominatorTree *MPDT; + MachineDominatorTree *MDT = nullptr; + MachinePostDominatorTree *MPDT = nullptr; /// Current safe point found for the prologue. /// The prologue will be inserted before the first instruction /// in this basic block. - MachineBasicBlock *Save; + MachineBasicBlock *Save = nullptr; /// Current safe point found for the epilogue. /// The epilogue will be inserted before the first terminator instruction /// in this basic block. - MachineBasicBlock *Restore; + MachineBasicBlock *Restore = nullptr; /// Hold the information of the basic block frequency. /// Use to check the profitability of the new points. - MachineBlockFrequencyInfo *MBFI; + MachineBlockFrequencyInfo *MBFI = nullptr; /// Hold the loop information. Used to determine if Save and Restore /// are in the same loop. - MachineLoopInfo *MLI; + MachineLoopInfo *MLI = nullptr; // Emit remarks. MachineOptimizationRemarkEmitter *ORE = nullptr; /// Frequency of the Entry block. - uint64_t EntryFreq; + uint64_t EntryFreq = 0; /// Current opcode for frame setup. - unsigned FrameSetupOpcode; + unsigned FrameSetupOpcode = ~0u; /// Current opcode for frame destroy. - unsigned FrameDestroyOpcode; + unsigned FrameDestroyOpcode = ~0u; /// Stack pointer register, used by llvm.{savestack,restorestack} Register SP; /// Entry block. - const MachineBasicBlock *Entry; + const MachineBasicBlock *Entry = nullptr; using SetOfRegs = SmallSetVector<unsigned, 16>; @@ -155,12 +159,18 @@ class ShrinkWrap : public MachineFunctionPass { mutable SetOfRegs CurrentCSRs; /// Current MachineFunction. - MachineFunction *MachineFunc; + MachineFunction *MachineFunc = nullptr; + + /// Is `true` for block numbers where we can guarantee no stack access + /// or computation of stack-relative addresses on any CFG path including + /// the block itself. + BitVector StackAddressUsedBlockInfo; /// Check if \p MI uses or defines a callee-saved register or /// a frame index. If this is the case, this means \p MI must happen /// after Save and before Restore. - bool useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS) const; + bool useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS, + bool StackAddressUsed) const; const SetOfRegs &getCurrentCSRs(RegScavenger *RS) const { if (CurrentCSRs.empty()) { @@ -184,6 +194,32 @@ class ShrinkWrap : public MachineFunctionPass { /// this call. void updateSaveRestorePoints(MachineBasicBlock &MBB, RegScavenger *RS); + // Try to find safe point based on dominance and block frequency without + // any change in IR. + bool performShrinkWrapping( + const ReversePostOrderTraversal<MachineBasicBlock *> &RPOT, + RegScavenger *RS); + + /// This function tries to split the restore point if doing so can shrink the + /// save point further. \return True if restore point is split. + bool postShrinkWrapping(bool HasCandidate, MachineFunction &MF, + RegScavenger *RS); + + /// This function analyzes if the restore point can split to create a new + /// restore point. This function collects + /// 1. Any preds of current restore that are reachable by callee save/FI + /// blocks + /// - indicated by DirtyPreds + /// 2. Any preds of current restore that are not DirtyPreds - indicated by + /// CleanPreds + /// Both sets should be non-empty for considering restore point split. + bool checkIfRestoreSplittable( + const MachineBasicBlock *CurRestore, + const DenseSet<const MachineBasicBlock *> &ReachableByDirty, + SmallVectorImpl<MachineBasicBlock *> &DirtyPreds, + SmallVectorImpl<MachineBasicBlock *> &CleanPreds, + const TargetInstrInfo *TII, RegScavenger *RS); + /// Initialize the pass for \p MF. void init(MachineFunction &MF) { RCI.runOnMachineFunction(MF); @@ -257,15 +293,32 @@ INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass) INITIALIZE_PASS_END(ShrinkWrap, DEBUG_TYPE, "Shrink Wrap Pass", false, false) -bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI, - RegScavenger *RS) const { - // This prevents premature stack popping when occurs a indirect stack - // access. It is overly aggressive for the moment. - // TODO: - Obvious non-stack loads and store, such as global values, - // are known to not access the stack. - // - Further, data dependency and alias analysis can validate - // that load and stores never derive from the stack pointer. - if (MI.mayLoadOrStore()) +bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS, + bool StackAddressUsed) const { + /// Check if \p Op is known to access an address not on the function's stack . + /// At the moment, accesses where the underlying object is a global, function + /// argument, or jump table are considered non-stack accesses. Note that the + /// caller's stack may get accessed when passing an argument via the stack, + /// but not the stack of the current function. + /// + auto IsKnownNonStackPtr = [](MachineMemOperand *Op) { + if (Op->getValue()) { + const Value *UO = getUnderlyingObject(Op->getValue()); + if (!UO) + return false; + if (auto *Arg = dyn_cast<Argument>(UO)) + return !Arg->hasPassPointeeByValueCopyAttr(); + return isa<GlobalValue>(UO); + } + if (const PseudoSourceValue *PSV = Op->getPseudoValue()) + return PSV->isJumpTable(); + return false; + }; + // Load/store operations may access the stack indirectly when we previously + // computed an address to a stack location. + if (StackAddressUsed && MI.mayLoadOrStore() && + (MI.isCall() || MI.hasUnmodeledSideEffects() || MI.memoperands_empty() || + !all_of(MI.memoperands(), IsKnownNonStackPtr))) return true; if (MI.getOpcode() == FrameSetupOpcode || @@ -320,18 +373,314 @@ bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI, /// Helper function to find the immediate (post) dominator. template <typename ListOfBBs, typename DominanceAnalysis> static MachineBasicBlock *FindIDom(MachineBasicBlock &Block, ListOfBBs BBs, - DominanceAnalysis &Dom) { + DominanceAnalysis &Dom, bool Strict = true) { MachineBasicBlock *IDom = &Block; for (MachineBasicBlock *BB : BBs) { IDom = Dom.findNearestCommonDominator(IDom, BB); if (!IDom) break; } - if (IDom == &Block) + if (Strict && IDom == &Block) return nullptr; return IDom; } +static bool isAnalyzableBB(const TargetInstrInfo &TII, + MachineBasicBlock &Entry) { + // Check if the block is analyzable. + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; + SmallVector<MachineOperand, 4> Cond; + return !TII.analyzeBranch(Entry, TBB, FBB, Cond); +} + +/// Determines if any predecessor of MBB is on the path from block that has use +/// or def of CSRs/FI to MBB. +/// ReachableByDirty: All blocks reachable from block that has use or def of +/// CSR/FI. +static bool +hasDirtyPred(const DenseSet<const MachineBasicBlock *> &ReachableByDirty, + const MachineBasicBlock &MBB) { + for (const MachineBasicBlock *PredBB : MBB.predecessors()) + if (ReachableByDirty.count(PredBB)) + return true; + return false; +} + +/// Derives the list of all the basic blocks reachable from MBB. +static void markAllReachable(DenseSet<const MachineBasicBlock *> &Visited, + const MachineBasicBlock &MBB) { + SmallVector<MachineBasicBlock *, 4> Worklist(MBB.succ_begin(), + MBB.succ_end()); + Visited.insert(&MBB); + while (!Worklist.empty()) { + MachineBasicBlock *SuccMBB = Worklist.pop_back_val(); + if (!Visited.insert(SuccMBB).second) + continue; + Worklist.append(SuccMBB->succ_begin(), SuccMBB->succ_end()); + } +} + +/// Collect blocks reachable by use or def of CSRs/FI. +static void collectBlocksReachableByDirty( + const DenseSet<const MachineBasicBlock *> &DirtyBBs, + DenseSet<const MachineBasicBlock *> &ReachableByDirty) { + for (const MachineBasicBlock *MBB : DirtyBBs) { + if (ReachableByDirty.count(MBB)) + continue; + // Mark all offsprings as reachable. + markAllReachable(ReachableByDirty, *MBB); + } +} + +/// \return true if there is a clean path from SavePoint to the original +/// Restore. +static bool +isSaveReachableThroughClean(const MachineBasicBlock *SavePoint, + ArrayRef<MachineBasicBlock *> CleanPreds) { + DenseSet<const MachineBasicBlock *> Visited; + SmallVector<MachineBasicBlock *, 4> Worklist(CleanPreds.begin(), + CleanPreds.end()); + while (!Worklist.empty()) { + MachineBasicBlock *CleanBB = Worklist.pop_back_val(); + if (CleanBB == SavePoint) + return true; + if (!Visited.insert(CleanBB).second || !CleanBB->pred_size()) + continue; + Worklist.append(CleanBB->pred_begin(), CleanBB->pred_end()); + } + return false; +} + +/// This function updates the branches post restore point split. +/// +/// Restore point has been split. +/// Old restore point: MBB +/// New restore point: NMBB +/// Any basic block(say BBToUpdate) which had a fallthrough to MBB +/// previously should +/// 1. Fallthrough to NMBB iff NMBB is inserted immediately above MBB in the +/// block layout OR +/// 2. Branch unconditionally to NMBB iff NMBB is inserted at any other place. +static void updateTerminator(MachineBasicBlock *BBToUpdate, + MachineBasicBlock *NMBB, + const TargetInstrInfo *TII) { + DebugLoc DL = BBToUpdate->findBranchDebugLoc(); + // if NMBB isn't the new layout successor for BBToUpdate, insert unconditional + // branch to it + if (!BBToUpdate->isLayoutSuccessor(NMBB)) + TII->insertUnconditionalBranch(*BBToUpdate, NMBB, DL); +} + +/// This function splits the restore point and returns new restore point/BB. +/// +/// DirtyPreds: Predessors of \p MBB that are ReachableByDirty +/// +/// Decision has been made to split the restore point. +/// old restore point: \p MBB +/// new restore point: \p NMBB +/// This function makes the necessary block layout changes so that +/// 1. \p NMBB points to \p MBB unconditionally +/// 2. All dirtyPreds that previously pointed to \p MBB point to \p NMBB +static MachineBasicBlock * +tryToSplitRestore(MachineBasicBlock *MBB, + ArrayRef<MachineBasicBlock *> DirtyPreds, + const TargetInstrInfo *TII) { + MachineFunction *MF = MBB->getParent(); + + // get the list of DirtyPreds who have a fallthrough to MBB + // before the block layout change. This is just to ensure that if the NMBB is + // inserted after MBB, then we create unconditional branch from + // DirtyPred/CleanPred to NMBB + SmallPtrSet<MachineBasicBlock *, 8> MBBFallthrough; + for (MachineBasicBlock *BB : DirtyPreds) + if (BB->getFallThrough(false) == MBB) + MBBFallthrough.insert(BB); + + MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock(); + // Insert this block at the end of the function. Inserting in between may + // interfere with control flow optimizer decisions. + MF->insert(MF->end(), NMBB); + + for (const MachineBasicBlock::RegisterMaskPair &LI : MBB->liveins()) + NMBB->addLiveIn(LI.PhysReg); + + TII->insertUnconditionalBranch(*NMBB, MBB, DebugLoc()); + + // After splitting, all predecessors of the restore point should be dirty + // blocks. + for (MachineBasicBlock *SuccBB : DirtyPreds) + SuccBB->ReplaceUsesOfBlockWith(MBB, NMBB); + + NMBB->addSuccessor(MBB); + + for (MachineBasicBlock *BBToUpdate : MBBFallthrough) + updateTerminator(BBToUpdate, NMBB, TII); + + return NMBB; +} + +/// This function undoes the restore point split done earlier. +/// +/// DirtyPreds: All predecessors of \p NMBB that are ReachableByDirty. +/// +/// Restore point was split and the change needs to be unrolled. Make necessary +/// changes to reset restore point from \p NMBB to \p MBB. +static void rollbackRestoreSplit(MachineFunction &MF, MachineBasicBlock *NMBB, + MachineBasicBlock *MBB, + ArrayRef<MachineBasicBlock *> DirtyPreds, + const TargetInstrInfo *TII) { + // For a BB, if NMBB is fallthrough in the current layout, then in the new + // layout a. BB should fallthrough to MBB OR b. BB should undconditionally + // branch to MBB + SmallPtrSet<MachineBasicBlock *, 8> NMBBFallthrough; + for (MachineBasicBlock *BB : DirtyPreds) + if (BB->getFallThrough(false) == NMBB) + NMBBFallthrough.insert(BB); + + NMBB->removeSuccessor(MBB); + for (MachineBasicBlock *SuccBB : DirtyPreds) + SuccBB->ReplaceUsesOfBlockWith(NMBB, MBB); + + NMBB->erase(NMBB->begin(), NMBB->end()); + NMBB->eraseFromParent(); + + for (MachineBasicBlock *BBToUpdate : NMBBFallthrough) + updateTerminator(BBToUpdate, MBB, TII); +} + +// A block is deemed fit for restore point split iff there exist +// 1. DirtyPreds - preds of CurRestore reachable from use or def of CSR/FI +// 2. CleanPreds - preds of CurRestore that arent DirtyPreds +bool ShrinkWrap::checkIfRestoreSplittable( + const MachineBasicBlock *CurRestore, + const DenseSet<const MachineBasicBlock *> &ReachableByDirty, + SmallVectorImpl<MachineBasicBlock *> &DirtyPreds, + SmallVectorImpl<MachineBasicBlock *> &CleanPreds, + const TargetInstrInfo *TII, RegScavenger *RS) { + for (const MachineInstr &MI : *CurRestore) + if (useOrDefCSROrFI(MI, RS, /*StackAddressUsed=*/true)) + return false; + + for (MachineBasicBlock *PredBB : CurRestore->predecessors()) { + if (!isAnalyzableBB(*TII, *PredBB)) + return false; + + if (ReachableByDirty.count(PredBB)) + DirtyPreds.push_back(PredBB); + else + CleanPreds.push_back(PredBB); + } + + return !(CleanPreds.empty() || DirtyPreds.empty()); +} + +bool ShrinkWrap::postShrinkWrapping(bool HasCandidate, MachineFunction &MF, + RegScavenger *RS) { + if (!EnablePostShrinkWrapOpt) + return false; + + MachineBasicBlock *InitSave = nullptr; + MachineBasicBlock *InitRestore = nullptr; + + if (HasCandidate) { + InitSave = Save; + InitRestore = Restore; + } else { + InitRestore = nullptr; + InitSave = &MF.front(); + for (MachineBasicBlock &MBB : MF) { + if (MBB.isEHFuncletEntry()) + return false; + if (MBB.isReturnBlock()) { + // Do not support multiple restore points. + if (InitRestore) + return false; + InitRestore = &MBB; + } + } + } + + if (!InitSave || !InitRestore || InitRestore == InitSave || + !MDT->dominates(InitSave, InitRestore) || + !MPDT->dominates(InitRestore, InitSave)) + return false; + + // Bail out of the optimization if any of the basic block is target of + // INLINEASM_BR instruction + for (MachineBasicBlock &MBB : MF) + if (MBB.isInlineAsmBrIndirectTarget()) + return false; + + DenseSet<const MachineBasicBlock *> DirtyBBs; + for (MachineBasicBlock &MBB : MF) { + if (MBB.isEHPad()) { + DirtyBBs.insert(&MBB); + continue; + } + for (const MachineInstr &MI : MBB) + if (useOrDefCSROrFI(MI, RS, /*StackAddressUsed=*/true)) { + DirtyBBs.insert(&MBB); + break; + } + } + + // Find blocks reachable from the use or def of CSRs/FI. + DenseSet<const MachineBasicBlock *> ReachableByDirty; + collectBlocksReachableByDirty(DirtyBBs, ReachableByDirty); + + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + SmallVector<MachineBasicBlock *, 2> DirtyPreds; + SmallVector<MachineBasicBlock *, 2> CleanPreds; + if (!checkIfRestoreSplittable(InitRestore, ReachableByDirty, DirtyPreds, + CleanPreds, TII, RS)) + return false; + + // Trying to reach out to the new save point which dominates all dirty blocks. + MachineBasicBlock *NewSave = + FindIDom<>(**DirtyPreds.begin(), DirtyPreds, *MDT, false); + + while (NewSave && (hasDirtyPred(ReachableByDirty, *NewSave) || + EntryFreq < MBFI->getBlockFreq(NewSave).getFrequency() || + /*Entry freq has been observed more than a loop block in + some cases*/ + MLI->getLoopFor(NewSave))) + NewSave = FindIDom<>(**NewSave->pred_begin(), NewSave->predecessors(), *MDT, + false); + + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + if (!NewSave || NewSave == InitSave || + isSaveReachableThroughClean(NewSave, CleanPreds) || + !TFI->canUseAsPrologue(*NewSave)) + return false; + + // Now we know that splitting a restore point can isolate the restore point + // from clean blocks and doing so can shrink the save point. + MachineBasicBlock *NewRestore = + tryToSplitRestore(InitRestore, DirtyPreds, TII); + + // Make sure if the new restore point is valid as an epilogue, depending on + // targets. + if (!TFI->canUseAsEpilogue(*NewRestore)) { + rollbackRestoreSplit(MF, NewRestore, InitRestore, DirtyPreds, TII); + return false; + } + + Save = NewSave; + Restore = NewRestore; + + MDT->runOnMachineFunction(MF); + MPDT->runOnMachineFunction(MF); + + assert((MDT->dominates(Save, Restore) && MPDT->dominates(Restore, Save)) && + "Incorrect save or restore point due to dominance relations"); + assert((!MLI->getLoopFor(Save) && !MLI->getLoopFor(Restore)) && + "Unexpected save or restore point in a loop"); + assert((EntryFreq >= MBFI->getBlockFreq(Save).getFrequency() && + EntryFreq >= MBFI->getBlockFreq(Restore).getFrequency()) && + "Incorrect save or restore point based on block frequency"); + return true; +} + void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB, RegScavenger *RS) { // Get rid of the easy cases first. @@ -356,7 +705,7 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB, // terminator. if (Restore == &MBB) { for (const MachineInstr &Terminator : MBB.terminators()) { - if (!useOrDefCSROrFI(Terminator, RS)) + if (!useOrDefCSROrFI(Terminator, RS, /*StackAddressUsed=*/true)) continue; // One of the terminator needs to happen before the restore point. if (MBB.succ_empty()) { @@ -463,47 +812,24 @@ static bool giveUpWithRemarks(MachineOptimizationRemarkEmitter *ORE, return false; } -bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(MF.getFunction()) || MF.empty() || !isShrinkWrapEnabled(MF)) - return false; - - LLVM_DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n'); - - init(MF); - - ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin()); - if (containsIrreducibleCFG<MachineBasicBlock *>(RPOT, *MLI)) { - // If MF is irreducible, a block may be in a loop without - // MachineLoopInfo reporting it. I.e., we may use the - // post-dominance property in loops, which lead to incorrect - // results. Moreover, we may miss that the prologue and - // epilogue are not in the same loop, leading to unbalanced - // construction/deconstruction of the stack frame. - return giveUpWithRemarks(ORE, "UnsupportedIrreducibleCFG", - "Irreducible CFGs are not supported yet.", - MF.getFunction().getSubprogram(), &MF.front()); - } - - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - std::unique_ptr<RegScavenger> RS( - TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr); - - for (MachineBasicBlock &MBB : MF) { - LLVM_DEBUG(dbgs() << "Look into: " << MBB.getNumber() << ' ' - << MBB.getName() << '\n'); +bool ShrinkWrap::performShrinkWrapping( + const ReversePostOrderTraversal<MachineBasicBlock *> &RPOT, + RegScavenger *RS) { + for (MachineBasicBlock *MBB : RPOT) { + LLVM_DEBUG(dbgs() << "Look into: " << printMBBReference(*MBB) << '\n'); - if (MBB.isEHFuncletEntry()) + if (MBB->isEHFuncletEntry()) return giveUpWithRemarks(ORE, "UnsupportedEHFunclets", "EH Funclets are not supported yet.", - MBB.front().getDebugLoc(), &MBB); + MBB->front().getDebugLoc(), MBB); - if (MBB.isEHPad() || MBB.isInlineAsmBrIndirectTarget()) { + if (MBB->isEHPad() || MBB->isInlineAsmBrIndirectTarget()) { // Push the prologue and epilogue outside of the region that may throw (or // jump out via inlineasm_br), by making sure that all the landing pads // are at least at the boundary of the save and restore points. The // problem is that a basic block can jump out from the middle in these // cases, which we do not handle. - updateSaveRestorePoints(MBB, RS.get()); + updateSaveRestorePoints(*MBB, RS); if (!ArePointsInteresting()) { LLVM_DEBUG(dbgs() << "EHPad/inlineasm_br prevents shrink-wrapping\n"); return false; @@ -511,22 +837,37 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) { continue; } - for (const MachineInstr &MI : MBB) { - if (!useOrDefCSROrFI(MI, RS.get())) - continue; - // Save (resp. restore) point must dominate (resp. post dominate) - // MI. Look for the proper basic block for those. - updateSaveRestorePoints(MBB, RS.get()); - // If we are at a point where we cannot improve the placement of - // save/restore instructions, just give up. - if (!ArePointsInteresting()) { - LLVM_DEBUG(dbgs() << "No Shrink wrap candidate found\n"); - return false; + bool StackAddressUsed = false; + // Check if we found any stack accesses in the predecessors. We are not + // doing a full dataflow analysis here to keep things simple but just + // rely on a reverse portorder traversal (RPOT) to guarantee predecessors + // are already processed except for loops (and accept the conservative + // result for loops). + for (const MachineBasicBlock *Pred : MBB->predecessors()) { + if (StackAddressUsedBlockInfo.test(Pred->getNumber())) { + StackAddressUsed = true; + break; } - // No need to look for other instructions, this basic block - // will already be part of the handled region. - break; } + + for (const MachineInstr &MI : *MBB) { + if (useOrDefCSROrFI(MI, RS, StackAddressUsed)) { + // Save (resp. restore) point must dominate (resp. post dominate) + // MI. Look for the proper basic block for those. + updateSaveRestorePoints(*MBB, RS); + // If we are at a point where we cannot improve the placement of + // save/restore instructions, just give up. + if (!ArePointsInteresting()) { + LLVM_DEBUG(dbgs() << "No Shrink wrap candidate found\n"); + return false; + } + // No need to look for other instructions, this basic block + // will already be part of the handled region. + StackAddressUsed = true; + break; + } + } + StackAddressUsedBlockInfo[MBB->getNumber()] = StackAddressUsed; } if (!ArePointsInteresting()) { // If the points are not interesting at this point, then they must be null @@ -540,13 +881,13 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) { LLVM_DEBUG(dbgs() << "\n ** Results **\nFrequency of the Entry: " << EntryFreq << '\n'); - const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + const TargetFrameLowering *TFI = + MachineFunc->getSubtarget().getFrameLowering(); do { LLVM_DEBUG(dbgs() << "Shrink wrap candidates (#, Name, Freq):\nSave: " - << Save->getNumber() << ' ' << Save->getName() << ' ' + << printMBBReference(*Save) << ' ' << MBFI->getBlockFreq(Save).getFrequency() - << "\nRestore: " << Restore->getNumber() << ' ' - << Restore->getName() << ' ' + << "\nRestore: " << printMBBReference(*Restore) << ' ' << MBFI->getBlockFreq(Restore).getFrequency() << '\n'); bool IsSaveCheap, TargetCanUseSaveAsPrologue = false; @@ -570,24 +911,61 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) { break; NewBB = Restore; } - updateSaveRestorePoints(*NewBB, RS.get()); + updateSaveRestorePoints(*NewBB, RS); } while (Save && Restore); if (!ArePointsInteresting()) { ++NumCandidatesDropped; return false; } + return true; +} + +bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(MF.getFunction()) || MF.empty() || !isShrinkWrapEnabled(MF)) + return false; + + LLVM_DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n'); + + init(MF); + + ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin()); + if (containsIrreducibleCFG<MachineBasicBlock *>(RPOT, *MLI)) { + // If MF is irreducible, a block may be in a loop without + // MachineLoopInfo reporting it. I.e., we may use the + // post-dominance property in loops, which lead to incorrect + // results. Moreover, we may miss that the prologue and + // epilogue are not in the same loop, leading to unbalanced + // construction/deconstruction of the stack frame. + return giveUpWithRemarks(ORE, "UnsupportedIrreducibleCFG", + "Irreducible CFGs are not supported yet.", + MF.getFunction().getSubprogram(), &MF.front()); + } + + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + std::unique_ptr<RegScavenger> RS( + TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr); + + bool Changed = false; + + StackAddressUsedBlockInfo.resize(MF.getNumBlockIDs(), true); + bool HasCandidate = performShrinkWrapping(RPOT, RS.get()); + StackAddressUsedBlockInfo.clear(); + Changed = postShrinkWrapping(HasCandidate, MF, RS.get()); + if (!HasCandidate && !Changed) + return false; + if (!ArePointsInteresting()) + return Changed; LLVM_DEBUG(dbgs() << "Final shrink wrap candidates:\nSave: " - << Save->getNumber() << ' ' << Save->getName() - << "\nRestore: " << Restore->getNumber() << ' ' - << Restore->getName() << '\n'); + << printMBBReference(*Save) << ' ' + << "\nRestore: " << printMBBReference(*Restore) << '\n'); MachineFrameInfo &MFI = MF.getFrameInfo(); MFI.setSavePoint(Save); MFI.setRestorePoint(Restore); ++NumCandidates; - return false; + return Changed; } bool ShrinkWrap::isShrinkWrapEnabled(const MachineFunction &MF) { |