diff options
Diffstat (limited to 'contrib/llvm/lib/Transforms/Utils')
35 files changed, 19568 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp new file mode 100644 index 000000000000..12de9eed4b85 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -0,0 +1,771 @@ +//===-- BasicBlockUtils.cpp - BasicBlock Utilities -------------------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This family of functions perform manipulations on basic blocks, and +// instructions contained within basic blocks. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ValueHandle.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" +#include <algorithm> +using namespace llvm; + +/// DeleteDeadBlock - Delete the specified block, which must have no +/// predecessors. +void llvm::DeleteDeadBlock(BasicBlock *BB) { + assert((pred_begin(BB) == pred_end(BB) || + // Can delete self loop. + BB->getSinglePredecessor() == BB) && "Block is not dead!"); + TerminatorInst *BBTerm = BB->getTerminator(); + + // Loop through all of our successors and make sure they know that one + // of their predecessors is going away. + for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) + BBTerm->getSuccessor(i)->removePredecessor(BB); + + // Zap all the instructions in the block. + while (!BB->empty()) { + Instruction &I = BB->back(); + // If this instruction is used, replace uses with an arbitrary value. + // Because control flow can't get here, we don't care what we replace the + // value with. Note that since this block is unreachable, and all values + // contained within it must dominate their uses, that all uses will + // eventually be removed (they are themselves dead). + if (!I.use_empty()) + I.replaceAllUsesWith(UndefValue::get(I.getType())); + BB->getInstList().pop_back(); + } + + // Zap the block! + BB->eraseFromParent(); +} + +/// FoldSingleEntryPHINodes - We know that BB has one predecessor. If there are +/// any single-entry PHI nodes in it, fold them away. This handles the case +/// when all entries to the PHI nodes in a block are guaranteed equal, such as +/// when the block has exactly one predecessor. +void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, Pass *P) { + if (!isa<PHINode>(BB->begin())) return; + + AliasAnalysis *AA = 0; + MemoryDependenceAnalysis *MemDep = 0; + if (P) { + AA = P->getAnalysisIfAvailable<AliasAnalysis>(); + MemDep = P->getAnalysisIfAvailable<MemoryDependenceAnalysis>(); + } + + while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) { + if (PN->getIncomingValue(0) != PN) + PN->replaceAllUsesWith(PN->getIncomingValue(0)); + else + PN->replaceAllUsesWith(UndefValue::get(PN->getType())); + + if (MemDep) + MemDep->removeInstruction(PN); // Memdep updates AA itself. + else if (AA && isa<PointerType>(PN->getType())) + AA->deleteValue(PN); + + PN->eraseFromParent(); + } +} + + +/// DeleteDeadPHIs - Examine each PHI in the given block and delete it if it +/// is dead. Also recursively delete any operands that become dead as +/// a result. This includes tracing the def-use list from the PHI to see if +/// it is ultimately unused or if it reaches an unused cycle. +bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) { + // Recursively deleting a PHI may cause multiple PHIs to be deleted + // or RAUW'd undef, so use an array of WeakVH for the PHIs to delete. + SmallVector<WeakVH, 8> PHIs; + for (BasicBlock::iterator I = BB->begin(); + PHINode *PN = dyn_cast<PHINode>(I); ++I) + PHIs.push_back(PN); + + bool Changed = false; + for (unsigned i = 0, e = PHIs.size(); i != e; ++i) + if (PHINode *PN = dyn_cast_or_null<PHINode>(PHIs[i].operator Value*())) + Changed |= RecursivelyDeleteDeadPHINode(PN, TLI); + + return Changed; +} + +/// MergeBlockIntoPredecessor - Attempts to merge a block into its predecessor, +/// if possible. The return value indicates success or failure. +bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) { + // Don't merge away blocks who have their address taken. + if (BB->hasAddressTaken()) return false; + + // Can't merge if there are multiple predecessors, or no predecessors. + BasicBlock *PredBB = BB->getUniquePredecessor(); + if (!PredBB) return false; + + // Don't break self-loops. + if (PredBB == BB) return false; + // Don't break invokes. + if (isa<InvokeInst>(PredBB->getTerminator())) return false; + + succ_iterator SI(succ_begin(PredBB)), SE(succ_end(PredBB)); + BasicBlock *OnlySucc = BB; + for (; SI != SE; ++SI) + if (*SI != OnlySucc) { + OnlySucc = 0; // There are multiple distinct successors! + break; + } + + // Can't merge if there are multiple successors. + if (!OnlySucc) return false; + + // Can't merge if there is PHI loop. + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) { + if (PHINode *PN = dyn_cast<PHINode>(BI)) { + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingValue(i) == PN) + return false; + } else + break; + } + + // Begin by getting rid of unneeded PHIs. + if (isa<PHINode>(BB->front())) + FoldSingleEntryPHINodes(BB, P); + + // Delete the unconditional branch from the predecessor... + PredBB->getInstList().pop_back(); + + // Make all PHI nodes that referred to BB now refer to Pred as their + // source... + BB->replaceAllUsesWith(PredBB); + + // Move all definitions in the successor to the predecessor... + PredBB->getInstList().splice(PredBB->end(), BB->getInstList()); + + // Inherit predecessors name if it exists. + if (!PredBB->hasName()) + PredBB->takeName(BB); + + // Finally, erase the old block and update dominator info. + if (P) { + if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) { + if (DomTreeNode *DTN = DT->getNode(BB)) { + DomTreeNode *PredDTN = DT->getNode(PredBB); + SmallVector<DomTreeNode*, 8> Children(DTN->begin(), DTN->end()); + for (SmallVectorImpl<DomTreeNode *>::iterator DI = Children.begin(), + DE = Children.end(); DI != DE; ++DI) + DT->changeImmediateDominator(*DI, PredDTN); + + DT->eraseNode(BB); + } + + if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>()) + LI->removeBlock(BB); + + if (MemoryDependenceAnalysis *MD = + P->getAnalysisIfAvailable<MemoryDependenceAnalysis>()) + MD->invalidateCachedPredecessors(); + } + } + + BB->eraseFromParent(); + return true; +} + +/// ReplaceInstWithValue - Replace all uses of an instruction (specified by BI) +/// with a value, then remove and delete the original instruction. +/// +void llvm::ReplaceInstWithValue(BasicBlock::InstListType &BIL, + BasicBlock::iterator &BI, Value *V) { + Instruction &I = *BI; + // Replaces all of the uses of the instruction with uses of the value + I.replaceAllUsesWith(V); + + // Make sure to propagate a name if there is one already. + if (I.hasName() && !V->hasName()) + V->takeName(&I); + + // Delete the unnecessary instruction now... + BI = BIL.erase(BI); +} + + +/// ReplaceInstWithInst - Replace the instruction specified by BI with the +/// instruction specified by I. The original instruction is deleted and BI is +/// updated to point to the new instruction. +/// +void llvm::ReplaceInstWithInst(BasicBlock::InstListType &BIL, + BasicBlock::iterator &BI, Instruction *I) { + assert(I->getParent() == 0 && + "ReplaceInstWithInst: Instruction already inserted into basic block!"); + + // Insert the new instruction into the basic block... + BasicBlock::iterator New = BIL.insert(BI, I); + + // Replace all uses of the old instruction, and delete it. + ReplaceInstWithValue(BIL, BI, I); + + // Move BI back to point to the newly inserted instruction + BI = New; +} + +/// ReplaceInstWithInst - Replace the instruction specified by From with the +/// instruction specified by To. +/// +void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) { + BasicBlock::iterator BI(From); + ReplaceInstWithInst(From->getParent()->getInstList(), BI, To); +} + +/// SplitEdge - Split the edge connecting specified block. Pass P must +/// not be NULL. +BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) { + unsigned SuccNum = GetSuccessorNumber(BB, Succ); + + // If this is a critical edge, let SplitCriticalEdge do it. + TerminatorInst *LatchTerm = BB->getTerminator(); + if (SplitCriticalEdge(LatchTerm, SuccNum, P)) + return LatchTerm->getSuccessor(SuccNum); + + // If the edge isn't critical, then BB has a single successor or Succ has a + // single pred. Split the block. + if (BasicBlock *SP = Succ->getSinglePredecessor()) { + // If the successor only has a single pred, split the top of the successor + // block. + assert(SP == BB && "CFG broken"); + SP = NULL; + return SplitBlock(Succ, Succ->begin(), P); + } + + // Otherwise, if BB has a single successor, split it at the bottom of the + // block. + assert(BB->getTerminator()->getNumSuccessors() == 1 && + "Should have a single succ!"); + return SplitBlock(BB, BB->getTerminator(), P); +} + +/// SplitBlock - Split the specified block at the specified instruction - every +/// thing before SplitPt stays in Old and everything starting with SplitPt moves +/// to a new block. The two blocks are joined by an unconditional branch and +/// the loop info is updated. +/// +BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) { + BasicBlock::iterator SplitIt = SplitPt; + while (isa<PHINode>(SplitIt) || isa<LandingPadInst>(SplitIt)) + ++SplitIt; + BasicBlock *New = Old->splitBasicBlock(SplitIt, Old->getName()+".split"); + + // The new block lives in whichever loop the old one did. This preserves + // LCSSA as well, because we force the split point to be after any PHI nodes. + if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>()) + if (Loop *L = LI->getLoopFor(Old)) + L->addBasicBlockToLoop(New, LI->getBase()); + + if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) { + // Old dominates New. New node dominates all other nodes dominated by Old. + if (DomTreeNode *OldNode = DT->getNode(Old)) { + std::vector<DomTreeNode *> Children; + for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end(); + I != E; ++I) + Children.push_back(*I); + + DomTreeNode *NewNode = DT->addNewBlock(New,Old); + for (std::vector<DomTreeNode *>::iterator I = Children.begin(), + E = Children.end(); I != E; ++I) + DT->changeImmediateDominator(*I, NewNode); + } + } + + return New; +} + +/// UpdateAnalysisInformation - Update DominatorTree, LoopInfo, and LCCSA +/// analysis information. +static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB, + ArrayRef<BasicBlock *> Preds, + Pass *P, bool &HasLoopExit) { + if (!P) return; + + LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>(); + Loop *L = LI ? LI->getLoopFor(OldBB) : 0; + + // If we need to preserve loop analyses, collect some information about how + // this split will affect loops. + bool IsLoopEntry = !!L; + bool SplitMakesNewLoopHeader = false; + if (LI) { + bool PreserveLCSSA = P->mustPreserveAnalysisID(LCSSAID); + for (ArrayRef<BasicBlock*>::iterator + i = Preds.begin(), e = Preds.end(); i != e; ++i) { + BasicBlock *Pred = *i; + + // If we need to preserve LCSSA, determine if any of the preds is a loop + // exit. + if (PreserveLCSSA) + if (Loop *PL = LI->getLoopFor(Pred)) + if (!PL->contains(OldBB)) + HasLoopExit = true; + + // If we need to preserve LoopInfo, note whether any of the preds crosses + // an interesting loop boundary. + if (!L) continue; + if (L->contains(Pred)) + IsLoopEntry = false; + else + SplitMakesNewLoopHeader = true; + } + } + + // Update dominator tree if available. + DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>(); + if (DT) + DT->splitBlock(NewBB); + + if (!L) return; + + if (IsLoopEntry) { + // Add the new block to the nearest enclosing loop (and not an adjacent + // loop). To find this, examine each of the predecessors and determine which + // loops enclose them, and select the most-nested loop which contains the + // loop containing the block being split. + Loop *InnermostPredLoop = 0; + for (ArrayRef<BasicBlock*>::iterator + i = Preds.begin(), e = Preds.end(); i != e; ++i) { + BasicBlock *Pred = *i; + if (Loop *PredLoop = LI->getLoopFor(Pred)) { + // Seek a loop which actually contains the block being split (to avoid + // adjacent loops). + while (PredLoop && !PredLoop->contains(OldBB)) + PredLoop = PredLoop->getParentLoop(); + + // Select the most-nested of these loops which contains the block. + if (PredLoop && PredLoop->contains(OldBB) && + (!InnermostPredLoop || + InnermostPredLoop->getLoopDepth() < PredLoop->getLoopDepth())) + InnermostPredLoop = PredLoop; + } + } + + if (InnermostPredLoop) + InnermostPredLoop->addBasicBlockToLoop(NewBB, LI->getBase()); + } else { + L->addBasicBlockToLoop(NewBB, LI->getBase()); + if (SplitMakesNewLoopHeader) + L->moveToHeader(NewBB); + } +} + +/// UpdatePHINodes - Update the PHI nodes in OrigBB to include the values coming +/// from NewBB. This also updates AliasAnalysis, if available. +static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB, + ArrayRef<BasicBlock*> Preds, BranchInst *BI, + Pass *P, bool HasLoopExit) { + // Otherwise, create a new PHI node in NewBB for each PHI node in OrigBB. + AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : 0; + for (BasicBlock::iterator I = OrigBB->begin(); isa<PHINode>(I); ) { + PHINode *PN = cast<PHINode>(I++); + + // Check to see if all of the values coming in are the same. If so, we + // don't need to create a new PHI node, unless it's needed for LCSSA. + Value *InVal = 0; + if (!HasLoopExit) { + InVal = PN->getIncomingValueForBlock(Preds[0]); + for (unsigned i = 1, e = Preds.size(); i != e; ++i) + if (InVal != PN->getIncomingValueForBlock(Preds[i])) { + InVal = 0; + break; + } + } + + if (InVal) { + // If all incoming values for the new PHI would be the same, just don't + // make a new PHI. Instead, just remove the incoming values from the old + // PHI. + for (unsigned i = 0, e = Preds.size(); i != e; ++i) { + // Explicitly check the BB index here to handle duplicates in Preds. + int Idx = PN->getBasicBlockIndex(Preds[i]); + if (Idx >= 0) + PN->removeIncomingValue(Idx, false); + } + } else { + // If the values coming into the block are not the same, we need a PHI. + // Create the new PHI node, insert it into NewBB at the end of the block + PHINode *NewPHI = + PHINode::Create(PN->getType(), Preds.size(), PN->getName() + ".ph", BI); + if (AA) AA->copyValue(PN, NewPHI); + + // Move all of the PHI values for 'Preds' to the new PHI. + for (unsigned i = 0, e = Preds.size(); i != e; ++i) { + Value *V = PN->removeIncomingValue(Preds[i], false); + NewPHI->addIncoming(V, Preds[i]); + } + + InVal = NewPHI; + } + + // Add an incoming value to the PHI node in the loop for the preheader + // edge. + PN->addIncoming(InVal, NewBB); + } +} + +/// SplitBlockPredecessors - This method transforms BB by introducing a new +/// basic block into the function, and moving some of the predecessors of BB to +/// be predecessors of the new block. The new predecessors are indicated by the +/// Preds array, which has NumPreds elements in it. The new block is given a +/// suffix of 'Suffix'. +/// +/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree, +/// LoopInfo, and LCCSA but no other analyses. In particular, it does not +/// preserve LoopSimplify (because it's complicated to handle the case where one +/// of the edges being split is an exit of a loop with other exits). +/// +BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, + ArrayRef<BasicBlock*> Preds, + const char *Suffix, Pass *P) { + // Create new basic block, insert right before the original block. + BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix, + BB->getParent(), BB); + + // The new block unconditionally branches to the old block. + BranchInst *BI = BranchInst::Create(BB, NewBB); + + // Move the edges from Preds to point to NewBB instead of BB. + for (unsigned i = 0, e = Preds.size(); i != e; ++i) { + // This is slightly more strict than necessary; the minimum requirement + // is that there be no more than one indirectbr branching to BB. And + // all BlockAddress uses would need to be updated. + assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) && + "Cannot split an edge from an IndirectBrInst"); + Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB); + } + + // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI + // node becomes an incoming value for BB's phi node. However, if the Preds + // list is empty, we need to insert dummy entries into the PHI nodes in BB to + // account for the newly created predecessor. + if (Preds.size() == 0) { + // Insert dummy values as the incoming value. + for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I) + cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB); + return NewBB; + } + + // Update DominatorTree, LoopInfo, and LCCSA analysis information. + bool HasLoopExit = false; + UpdateAnalysisInformation(BB, NewBB, Preds, P, HasLoopExit); + + // Update the PHI nodes in BB with the values coming from NewBB. + UpdatePHINodes(BB, NewBB, Preds, BI, P, HasLoopExit); + return NewBB; +} + +/// SplitLandingPadPredecessors - This method transforms the landing pad, +/// OrigBB, by introducing two new basic blocks into the function. One of those +/// new basic blocks gets the predecessors listed in Preds. The other basic +/// block gets the remaining predecessors of OrigBB. The landingpad instruction +/// OrigBB is clone into both of the new basic blocks. The new blocks are given +/// the suffixes 'Suffix1' and 'Suffix2', and are returned in the NewBBs vector. +/// +/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree, +/// DominanceFrontier, LoopInfo, and LCCSA but no other analyses. In particular, +/// it does not preserve LoopSimplify (because it's complicated to handle the +/// case where one of the edges being split is an exit of a loop with other +/// exits). +/// +void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, + ArrayRef<BasicBlock*> Preds, + const char *Suffix1, const char *Suffix2, + Pass *P, + SmallVectorImpl<BasicBlock*> &NewBBs) { + assert(OrigBB->isLandingPad() && "Trying to split a non-landing pad!"); + + // Create a new basic block for OrigBB's predecessors listed in Preds. Insert + // it right before the original block. + BasicBlock *NewBB1 = BasicBlock::Create(OrigBB->getContext(), + OrigBB->getName() + Suffix1, + OrigBB->getParent(), OrigBB); + NewBBs.push_back(NewBB1); + + // The new block unconditionally branches to the old block. + BranchInst *BI1 = BranchInst::Create(OrigBB, NewBB1); + + // Move the edges from Preds to point to NewBB1 instead of OrigBB. + for (unsigned i = 0, e = Preds.size(); i != e; ++i) { + // This is slightly more strict than necessary; the minimum requirement + // is that there be no more than one indirectbr branching to BB. And + // all BlockAddress uses would need to be updated. + assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) && + "Cannot split an edge from an IndirectBrInst"); + Preds[i]->getTerminator()->replaceUsesOfWith(OrigBB, NewBB1); + } + + // Update DominatorTree, LoopInfo, and LCCSA analysis information. + bool HasLoopExit = false; + UpdateAnalysisInformation(OrigBB, NewBB1, Preds, P, HasLoopExit); + + // Update the PHI nodes in OrigBB with the values coming from NewBB1. + UpdatePHINodes(OrigBB, NewBB1, Preds, BI1, P, HasLoopExit); + + // Move the remaining edges from OrigBB to point to NewBB2. + SmallVector<BasicBlock*, 8> NewBB2Preds; + for (pred_iterator i = pred_begin(OrigBB), e = pred_end(OrigBB); + i != e; ) { + BasicBlock *Pred = *i++; + if (Pred == NewBB1) continue; + assert(!isa<IndirectBrInst>(Pred->getTerminator()) && + "Cannot split an edge from an IndirectBrInst"); + NewBB2Preds.push_back(Pred); + e = pred_end(OrigBB); + } + + BasicBlock *NewBB2 = 0; + if (!NewBB2Preds.empty()) { + // Create another basic block for the rest of OrigBB's predecessors. + NewBB2 = BasicBlock::Create(OrigBB->getContext(), + OrigBB->getName() + Suffix2, + OrigBB->getParent(), OrigBB); + NewBBs.push_back(NewBB2); + + // The new block unconditionally branches to the old block. + BranchInst *BI2 = BranchInst::Create(OrigBB, NewBB2); + + // Move the remaining edges from OrigBB to point to NewBB2. + for (SmallVectorImpl<BasicBlock*>::iterator + i = NewBB2Preds.begin(), e = NewBB2Preds.end(); i != e; ++i) + (*i)->getTerminator()->replaceUsesOfWith(OrigBB, NewBB2); + + // Update DominatorTree, LoopInfo, and LCCSA analysis information. + HasLoopExit = false; + UpdateAnalysisInformation(OrigBB, NewBB2, NewBB2Preds, P, HasLoopExit); + + // Update the PHI nodes in OrigBB with the values coming from NewBB2. + UpdatePHINodes(OrigBB, NewBB2, NewBB2Preds, BI2, P, HasLoopExit); + } + + LandingPadInst *LPad = OrigBB->getLandingPadInst(); + Instruction *Clone1 = LPad->clone(); + Clone1->setName(Twine("lpad") + Suffix1); + NewBB1->getInstList().insert(NewBB1->getFirstInsertionPt(), Clone1); + + if (NewBB2) { + Instruction *Clone2 = LPad->clone(); + Clone2->setName(Twine("lpad") + Suffix2); + NewBB2->getInstList().insert(NewBB2->getFirstInsertionPt(), Clone2); + + // Create a PHI node for the two cloned landingpad instructions. + PHINode *PN = PHINode::Create(LPad->getType(), 2, "lpad.phi", LPad); + PN->addIncoming(Clone1, NewBB1); + PN->addIncoming(Clone2, NewBB2); + LPad->replaceAllUsesWith(PN); + LPad->eraseFromParent(); + } else { + // There is no second clone. Just replace the landing pad with the first + // clone. + LPad->replaceAllUsesWith(Clone1); + LPad->eraseFromParent(); + } +} + +/// FoldReturnIntoUncondBranch - This method duplicates the specified return +/// instruction into a predecessor which ends in an unconditional branch. If +/// the return instruction returns a value defined by a PHI, propagate the +/// right value into the return. It returns the new return instruction in the +/// predecessor. +ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, + BasicBlock *Pred) { + Instruction *UncondBranch = Pred->getTerminator(); + // Clone the return and add it to the end of the predecessor. + Instruction *NewRet = RI->clone(); + Pred->getInstList().push_back(NewRet); + + // If the return instruction returns a value, and if the value was a + // PHI node in "BB", propagate the right value into the return. + for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end(); + i != e; ++i) { + Value *V = *i; + Instruction *NewBC = 0; + if (BitCastInst *BCI = dyn_cast<BitCastInst>(V)) { + // Return value might be bitcasted. Clone and insert it before the + // return instruction. + V = BCI->getOperand(0); + NewBC = BCI->clone(); + Pred->getInstList().insert(NewRet, NewBC); + *i = NewBC; + } + if (PHINode *PN = dyn_cast<PHINode>(V)) { + if (PN->getParent() == BB) { + if (NewBC) + NewBC->setOperand(0, PN->getIncomingValueForBlock(Pred)); + else + *i = PN->getIncomingValueForBlock(Pred); + } + } + } + + // Update any PHI nodes in the returning block to realize that we no + // longer branch to them. + BB->removePredecessor(Pred); + UncondBranch->eraseFromParent(); + return cast<ReturnInst>(NewRet); +} + +/// SplitBlockAndInsertIfThen - Split the containing block at the +/// specified instruction - everything before and including Cmp stays +/// in the old basic block, and everything after Cmp is moved to a +/// new block. The two blocks are connected by a conditional branch +/// (with value of Cmp being the condition). +/// Before: +/// Head +/// Cmp +/// Tail +/// After: +/// Head +/// Cmp +/// if (Cmp) +/// ThenBlock +/// Tail +/// +/// If Unreachable is true, then ThenBlock ends with +/// UnreachableInst, otherwise it branches to Tail. +/// Returns the NewBasicBlock's terminator. + +TerminatorInst *llvm::SplitBlockAndInsertIfThen(Instruction *Cmp, + bool Unreachable, MDNode *BranchWeights) { + Instruction *SplitBefore = Cmp->getNextNode(); + BasicBlock *Head = SplitBefore->getParent(); + BasicBlock *Tail = Head->splitBasicBlock(SplitBefore); + TerminatorInst *HeadOldTerm = Head->getTerminator(); + LLVMContext &C = Head->getContext(); + BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail); + TerminatorInst *CheckTerm; + if (Unreachable) + CheckTerm = new UnreachableInst(C, ThenBlock); + else + CheckTerm = BranchInst::Create(Tail, ThenBlock); + BranchInst *HeadNewTerm = + BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cmp); + HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights); + ReplaceInstWithInst(HeadOldTerm, HeadNewTerm); + return CheckTerm; +} + +/// GetIfCondition - Given a basic block (BB) with two predecessors, +/// check to see if the merge at this block is due +/// to an "if condition". If so, return the boolean condition that determines +/// which entry into BB will be taken. Also, return by references the block +/// that will be entered from if the condition is true, and the block that will +/// be entered if the condition is false. +/// +/// This does no checking to see if the true/false blocks have large or unsavory +/// instructions in them. +Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, + BasicBlock *&IfFalse) { + PHINode *SomePHI = dyn_cast<PHINode>(BB->begin()); + BasicBlock *Pred1 = NULL; + BasicBlock *Pred2 = NULL; + + if (SomePHI) { + if (SomePHI->getNumIncomingValues() != 2) + return NULL; + Pred1 = SomePHI->getIncomingBlock(0); + Pred2 = SomePHI->getIncomingBlock(1); + } else { + pred_iterator PI = pred_begin(BB), PE = pred_end(BB); + if (PI == PE) // No predecessor + return NULL; + Pred1 = *PI++; + if (PI == PE) // Only one predecessor + return NULL; + Pred2 = *PI++; + if (PI != PE) // More than two predecessors + return NULL; + } + + // We can only handle branches. Other control flow will be lowered to + // branches if possible anyway. + BranchInst *Pred1Br = dyn_cast<BranchInst>(Pred1->getTerminator()); + BranchInst *Pred2Br = dyn_cast<BranchInst>(Pred2->getTerminator()); + if (Pred1Br == 0 || Pred2Br == 0) + return 0; + + // Eliminate code duplication by ensuring that Pred1Br is conditional if + // either are. + if (Pred2Br->isConditional()) { + // If both branches are conditional, we don't have an "if statement". In + // reality, we could transform this case, but since the condition will be + // required anyway, we stand no chance of eliminating it, so the xform is + // probably not profitable. + if (Pred1Br->isConditional()) + return 0; + + std::swap(Pred1, Pred2); + std::swap(Pred1Br, Pred2Br); + } + + if (Pred1Br->isConditional()) { + // The only thing we have to watch out for here is to make sure that Pred2 + // doesn't have incoming edges from other blocks. If it does, the condition + // doesn't dominate BB. + if (Pred2->getSinglePredecessor() == 0) + return 0; + + // If we found a conditional branch predecessor, make sure that it branches + // to BB and Pred2Br. If it doesn't, this isn't an "if statement". + if (Pred1Br->getSuccessor(0) == BB && + Pred1Br->getSuccessor(1) == Pred2) { + IfTrue = Pred1; + IfFalse = Pred2; + } else if (Pred1Br->getSuccessor(0) == Pred2 && + Pred1Br->getSuccessor(1) == BB) { + IfTrue = Pred2; + IfFalse = Pred1; + } else { + // We know that one arm of the conditional goes to BB, so the other must + // go somewhere unrelated, and this must not be an "if statement". + return 0; + } + + return Pred1Br->getCondition(); + } + + // Ok, if we got here, both predecessors end with an unconditional branch to + // BB. Don't panic! If both blocks only have a single (identical) + // predecessor, and THAT is a conditional branch, then we're all ok! + BasicBlock *CommonPred = Pred1->getSinglePredecessor(); + if (CommonPred == 0 || CommonPred != Pred2->getSinglePredecessor()) + return 0; + + // Otherwise, if this is a conditional branch, then we can use it! + BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator()); + if (BI == 0) return 0; + + assert(BI->isConditional() && "Two successors but not conditional?"); + if (BI->getSuccessor(0) == Pred1) { + IfTrue = Pred1; + IfFalse = Pred2; + } else { + IfTrue = Pred2; + IfFalse = Pred1; + } + return BI->getCondition(); +} diff --git a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp new file mode 100644 index 000000000000..0e7f7f784401 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -0,0 +1,370 @@ +//===- BreakCriticalEdges.cpp - Critical Edge Elimination Pass ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// BreakCriticalEdges pass - Break all of the critical edges in the CFG by +// inserting a dummy basic block. This pass may be "required" by passes that +// cannot deal with critical edges. For this usage, the structure type is +// forward declared. This pass obviously invalidates the CFG, but can update +// dominator trees. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "break-crit-edges" +#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +using namespace llvm; + +STATISTIC(NumBroken, "Number of blocks inserted"); + +namespace { + struct BreakCriticalEdges : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + BreakCriticalEdges() : FunctionPass(ID) { + initializeBreakCriticalEdgesPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved<DominatorTree>(); + AU.addPreserved<LoopInfo>(); + + // No loop canonicalization guarantees are broken by this pass. + AU.addPreservedID(LoopSimplifyID); + } + }; +} + +char BreakCriticalEdges::ID = 0; +INITIALIZE_PASS(BreakCriticalEdges, "break-crit-edges", + "Break critical edges in CFG", false, false) + +// Publicly exposed interface to pass... +char &llvm::BreakCriticalEdgesID = BreakCriticalEdges::ID; +FunctionPass *llvm::createBreakCriticalEdgesPass() { + return new BreakCriticalEdges(); +} + +// runOnFunction - Loop over all of the edges in the CFG, breaking critical +// edges as they are found. +// +bool BreakCriticalEdges::runOnFunction(Function &F) { + bool Changed = false; + for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { + TerminatorInst *TI = I->getTerminator(); + if (TI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(TI)) + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + if (SplitCriticalEdge(TI, i, this)) { + ++NumBroken; + Changed = true; + } + } + + return Changed; +} + +//===----------------------------------------------------------------------===// +// Implementation of the external critical edge manipulation functions +//===----------------------------------------------------------------------===// + +/// createPHIsForSplitLoopExit - When a loop exit edge is split, LCSSA form +/// may require new PHIs in the new exit block. This function inserts the +/// new PHIs, as needed. Preds is a list of preds inside the loop, SplitBB +/// is the new loop exit block, and DestBB is the old loop exit, now the +/// successor of SplitBB. +static void createPHIsForSplitLoopExit(ArrayRef<BasicBlock *> Preds, + BasicBlock *SplitBB, + BasicBlock *DestBB) { + // SplitBB shouldn't have anything non-trivial in it yet. + assert((SplitBB->getFirstNonPHI() == SplitBB->getTerminator() || + SplitBB->isLandingPad()) && "SplitBB has non-PHI nodes!"); + + // For each PHI in the destination block. + for (BasicBlock::iterator I = DestBB->begin(); + PHINode *PN = dyn_cast<PHINode>(I); ++I) { + unsigned Idx = PN->getBasicBlockIndex(SplitBB); + Value *V = PN->getIncomingValue(Idx); + + // If the input is a PHI which already satisfies LCSSA, don't create + // a new one. + if (const PHINode *VP = dyn_cast<PHINode>(V)) + if (VP->getParent() == SplitBB) + continue; + + // Otherwise a new PHI is needed. Create one and populate it. + PHINode *NewPN = + PHINode::Create(PN->getType(), Preds.size(), "split", + SplitBB->isLandingPad() ? + SplitBB->begin() : SplitBB->getTerminator()); + for (unsigned i = 0, e = Preds.size(); i != e; ++i) + NewPN->addIncoming(V, Preds[i]); + + // Update the original PHI. + PN->setIncomingValue(Idx, NewPN); + } +} + +/// SplitCriticalEdge - If this edge is a critical edge, insert a new node to +/// split the critical edge. This will update DominatorTree information if it +/// is available, thus calling this pass will not invalidate either of them. +/// This returns the new block if the edge was split, null otherwise. +/// +/// If MergeIdenticalEdges is true (not the default), *all* edges from TI to the +/// specified successor will be merged into the same critical edge block. +/// This is most commonly interesting with switch instructions, which may +/// have many edges to any one destination. This ensures that all edges to that +/// dest go to one block instead of each going to a different block, but isn't +/// the standard definition of a "critical edge". +/// +/// It is invalid to call this function on a critical edge that starts at an +/// IndirectBrInst. Splitting these edges will almost always create an invalid +/// program because the address of the new block won't be the one that is jumped +/// to. +/// +BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, + Pass *P, bool MergeIdenticalEdges, + bool DontDeleteUselessPhis, + bool SplitLandingPads) { + if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return 0; + + assert(!isa<IndirectBrInst>(TI) && + "Cannot split critical edge from IndirectBrInst"); + + BasicBlock *TIBB = TI->getParent(); + BasicBlock *DestBB = TI->getSuccessor(SuccNum); + + // Splitting the critical edge to a landing pad block is non-trivial. Don't do + // it in this generic function. + if (DestBB->isLandingPad()) return 0; + + // Create a new basic block, linking it into the CFG. + BasicBlock *NewBB = BasicBlock::Create(TI->getContext(), + TIBB->getName() + "." + DestBB->getName() + "_crit_edge"); + // Create our unconditional branch. + BranchInst *NewBI = BranchInst::Create(DestBB, NewBB); + NewBI->setDebugLoc(TI->getDebugLoc()); + + // Branch to the new block, breaking the edge. + TI->setSuccessor(SuccNum, NewBB); + + // Insert the block into the function... right after the block TI lives in. + Function &F = *TIBB->getParent(); + Function::iterator FBBI = TIBB; + F.getBasicBlockList().insert(++FBBI, NewBB); + + // If there are any PHI nodes in DestBB, we need to update them so that they + // merge incoming values from NewBB instead of from TIBB. + { + unsigned BBIdx = 0; + for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) { + // We no longer enter through TIBB, now we come in through NewBB. + // Revector exactly one entry in the PHI node that used to come from + // TIBB to come from NewBB. + PHINode *PN = cast<PHINode>(I); + + // Reuse the previous value of BBIdx if it lines up. In cases where we + // have multiple phi nodes with *lots* of predecessors, this is a speed + // win because we don't have to scan the PHI looking for TIBB. This + // happens because the BB list of PHI nodes are usually in the same + // order. + if (PN->getIncomingBlock(BBIdx) != TIBB) + BBIdx = PN->getBasicBlockIndex(TIBB); + PN->setIncomingBlock(BBIdx, NewBB); + } + } + + // If there are any other edges from TIBB to DestBB, update those to go + // through the split block, making those edges non-critical as well (and + // reducing the number of phi entries in the DestBB if relevant). + if (MergeIdenticalEdges) { + for (unsigned i = SuccNum+1, e = TI->getNumSuccessors(); i != e; ++i) { + if (TI->getSuccessor(i) != DestBB) continue; + + // Remove an entry for TIBB from DestBB phi nodes. + DestBB->removePredecessor(TIBB, DontDeleteUselessPhis); + + // We found another edge to DestBB, go to NewBB instead. + TI->setSuccessor(i, NewBB); + } + } + + + + // If we don't have a pass object, we can't update anything... + if (P == 0) return NewBB; + + DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>(); + LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>(); + + // If we have nothing to update, just return. + if (DT == 0 && LI == 0) + return NewBB; + + // Now update analysis information. Since the only predecessor of NewBB is + // the TIBB, TIBB clearly dominates NewBB. TIBB usually doesn't dominate + // anything, as there are other successors of DestBB. However, if all other + // predecessors of DestBB are already dominated by DestBB (e.g. DestBB is a + // loop header) then NewBB dominates DestBB. + SmallVector<BasicBlock*, 8> OtherPreds; + + // If there is a PHI in the block, loop over predecessors with it, which is + // faster than iterating pred_begin/end. + if (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) { + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingBlock(i) != NewBB) + OtherPreds.push_back(PN->getIncomingBlock(i)); + } else { + for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB); + I != E; ++I) { + BasicBlock *P = *I; + if (P != NewBB) + OtherPreds.push_back(P); + } + } + + bool NewBBDominatesDestBB = true; + + // Should we update DominatorTree information? + if (DT) { + DomTreeNode *TINode = DT->getNode(TIBB); + + // The new block is not the immediate dominator for any other nodes, but + // TINode is the immediate dominator for the new node. + // + if (TINode) { // Don't break unreachable code! + DomTreeNode *NewBBNode = DT->addNewBlock(NewBB, TIBB); + DomTreeNode *DestBBNode = 0; + + // If NewBBDominatesDestBB hasn't been computed yet, do so with DT. + if (!OtherPreds.empty()) { + DestBBNode = DT->getNode(DestBB); + while (!OtherPreds.empty() && NewBBDominatesDestBB) { + if (DomTreeNode *OPNode = DT->getNode(OtherPreds.back())) + NewBBDominatesDestBB = DT->dominates(DestBBNode, OPNode); + OtherPreds.pop_back(); + } + OtherPreds.clear(); + } + + // If NewBBDominatesDestBB, then NewBB dominates DestBB, otherwise it + // doesn't dominate anything. + if (NewBBDominatesDestBB) { + if (!DestBBNode) DestBBNode = DT->getNode(DestBB); + DT->changeImmediateDominator(DestBBNode, NewBBNode); + } + } + } + + // Update LoopInfo if it is around. + if (LI) { + if (Loop *TIL = LI->getLoopFor(TIBB)) { + // If one or the other blocks were not in a loop, the new block is not + // either, and thus LI doesn't need to be updated. + if (Loop *DestLoop = LI->getLoopFor(DestBB)) { + if (TIL == DestLoop) { + // Both in the same loop, the NewBB joins loop. + DestLoop->addBasicBlockToLoop(NewBB, LI->getBase()); + } else if (TIL->contains(DestLoop)) { + // Edge from an outer loop to an inner loop. Add to the outer loop. + TIL->addBasicBlockToLoop(NewBB, LI->getBase()); + } else if (DestLoop->contains(TIL)) { + // Edge from an inner loop to an outer loop. Add to the outer loop. + DestLoop->addBasicBlockToLoop(NewBB, LI->getBase()); + } else { + // Edge from two loops with no containment relation. Because these + // are natural loops, we know that the destination block must be the + // header of its loop (adding a branch into a loop elsewhere would + // create an irreducible loop). + assert(DestLoop->getHeader() == DestBB && + "Should not create irreducible loops!"); + if (Loop *P = DestLoop->getParentLoop()) + P->addBasicBlockToLoop(NewBB, LI->getBase()); + } + } + // If TIBB is in a loop and DestBB is outside of that loop, split the + // other exit blocks of the loop that also have predecessors outside + // the loop, to maintain a LoopSimplify guarantee. + if (!TIL->contains(DestBB) && + P->mustPreserveAnalysisID(LoopSimplifyID)) { + assert(!TIL->contains(NewBB) && + "Split point for loop exit is contained in loop!"); + + // Update LCSSA form in the newly created exit block. + if (P->mustPreserveAnalysisID(LCSSAID)) + createPHIsForSplitLoopExit(TIBB, NewBB, DestBB); + + // For each unique exit block... + // FIXME: This code is functionally equivalent to the corresponding + // loop in LoopSimplify. + SmallVector<BasicBlock *, 4> ExitBlocks; + TIL->getExitBlocks(ExitBlocks); + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { + // Collect all the preds that are inside the loop, and note + // whether there are any preds outside the loop. + SmallVector<BasicBlock *, 4> Preds; + bool HasPredOutsideOfLoop = false; + BasicBlock *Exit = ExitBlocks[i]; + for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); + I != E; ++I) { + BasicBlock *P = *I; + if (TIL->contains(P)) { + if (isa<IndirectBrInst>(P->getTerminator())) { + Preds.clear(); + break; + } + Preds.push_back(P); + } else { + HasPredOutsideOfLoop = true; + } + } + // If there are any preds not in the loop, we'll need to split + // the edges. The Preds.empty() check is needed because a block + // may appear multiple times in the list. We can't use + // getUniqueExitBlocks above because that depends on LoopSimplify + // form, which we're in the process of restoring! + if (!Preds.empty() && HasPredOutsideOfLoop) { + if (!Exit->isLandingPad()) { + BasicBlock *NewExitBB = + SplitBlockPredecessors(Exit, Preds, "split", P); + if (P->mustPreserveAnalysisID(LCSSAID)) + createPHIsForSplitLoopExit(Preds, NewExitBB, Exit); + } else if (SplitLandingPads) { + SmallVector<BasicBlock*, 8> NewBBs; + SplitLandingPadPredecessors(Exit, Preds, + ".split1", ".split2", + P, NewBBs); + if (P->mustPreserveAnalysisID(LCSSAID)) + createPHIsForSplitLoopExit(Preds, NewBBs[0], Exit); + } + } + } + } + // LCSSA form was updated above for the case where LoopSimplify is + // available, which means that all predecessors of loop exit blocks + // are within the loop. Without LoopSimplify form, it would be + // necessary to insert a new phi. + assert((!P->mustPreserveAnalysisID(LCSSAID) || + P->mustPreserveAnalysisID(LoopSimplifyID)) && + "SplitCriticalEdge doesn't know how to update LCCSA form " + "without LoopSimplify!"); + } + } + + return NewBB; +} diff --git a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp new file mode 100644 index 000000000000..6d13217df55d --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -0,0 +1,597 @@ +//===- BuildLibCalls.cpp - Utility builder for libcalls -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements some functions that will create standard C libcalls. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/BuildLibCalls.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/Target/TargetLibraryInfo.h" + +using namespace llvm; + +/// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*. +Value *llvm::CastToCStr(Value *V, IRBuilder<> &B) { + return B.CreateBitCast(V, B.getInt8PtrTy(), "cstr"); +} + +/// EmitStrLen - Emit a call to the strlen function to the builder, for the +/// specified pointer. This always returns an integer value of size intptr_t. +Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::strlen)) + return 0; + + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeSet AS[2]; + AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture); + Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind }; + AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, + ArrayRef<Attribute::AttrKind>(AVs, 2)); + + LLVMContext &Context = B.GetInsertBlock()->getContext(); + Constant *StrLen = M->getOrInsertFunction("strlen", + AttributeSet::get(M->getContext(), + AS), + TD->getIntPtrType(Context), + B.getInt8PtrTy(), + NULL); + CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen"); + if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; +} + +/// EmitStrNLen - Emit a call to the strnlen function to the builder, for the +/// specified pointer. Ptr is required to be some pointer type, MaxLen must +/// be of size_t type, and the return value has 'intptr_t' type. +Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B, + const DataLayout *TD, const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::strnlen)) + return 0; + + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeSet AS[2]; + AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture); + Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind }; + AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, + ArrayRef<Attribute::AttrKind>(AVs, 2)); + + LLVMContext &Context = B.GetInsertBlock()->getContext(); + Constant *StrNLen = M->getOrInsertFunction("strnlen", + AttributeSet::get(M->getContext(), + AS), + TD->getIntPtrType(Context), + B.getInt8PtrTy(), + TD->getIntPtrType(Context), + NULL); + CallInst *CI = B.CreateCall2(StrNLen, CastToCStr(Ptr, B), MaxLen, "strnlen"); + if (const Function *F = dyn_cast<Function>(StrNLen->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; +} + +/// EmitStrChr - Emit a call to the strchr function to the builder, for the +/// specified pointer and character. Ptr is required to be some pointer type, +/// and the return value has 'i8*' type. +Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B, + const DataLayout *TD, const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::strchr)) + return 0; + + Module *M = B.GetInsertBlock()->getParent()->getParent(); + Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind }; + AttributeSet AS = + AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, + ArrayRef<Attribute::AttrKind>(AVs, 2)); + + Type *I8Ptr = B.getInt8PtrTy(); + Type *I32Ty = B.getInt32Ty(); + Constant *StrChr = M->getOrInsertFunction("strchr", + AttributeSet::get(M->getContext(), + AS), + I8Ptr, I8Ptr, I32Ty, NULL); + CallInst *CI = B.CreateCall2(StrChr, CastToCStr(Ptr, B), + ConstantInt::get(I32Ty, C), "strchr"); + if (const Function *F = dyn_cast<Function>(StrChr->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + return CI; +} + +/// EmitStrNCmp - Emit a call to the strncmp function to the builder. +Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, + IRBuilder<> &B, const DataLayout *TD, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::strncmp)) + return 0; + + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeSet AS[3]; + AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture); + AS[1] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture); + Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind }; + AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, + ArrayRef<Attribute::AttrKind>(AVs, 2)); + + LLVMContext &Context = B.GetInsertBlock()->getContext(); + Value *StrNCmp = M->getOrInsertFunction("strncmp", + AttributeSet::get(M->getContext(), + AS), + B.getInt32Ty(), + B.getInt8PtrTy(), + B.getInt8PtrTy(), + TD->getIntPtrType(Context), NULL); + CallInst *CI = B.CreateCall3(StrNCmp, CastToCStr(Ptr1, B), + CastToCStr(Ptr2, B), Len, "strncmp"); + + if (const Function *F = dyn_cast<Function>(StrNCmp->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; +} + +/// EmitStrCpy - Emit a call to the strcpy function to the builder, for the +/// specified pointer arguments. +Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B, + const DataLayout *TD, const TargetLibraryInfo *TLI, + StringRef Name) { + if (!TLI->has(LibFunc::strcpy)) + return 0; + + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeSet AS[2]; + AS[0] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture); + AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); + Type *I8Ptr = B.getInt8PtrTy(); + Value *StrCpy = M->getOrInsertFunction(Name, + AttributeSet::get(M->getContext(), AS), + I8Ptr, I8Ptr, I8Ptr, NULL); + CallInst *CI = B.CreateCall2(StrCpy, CastToCStr(Dst, B), CastToCStr(Src, B), + Name); + if (const Function *F = dyn_cast<Function>(StrCpy->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + return CI; +} + +/// EmitStrNCpy - Emit a call to the strncpy function to the builder, for the +/// specified pointer arguments. +Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, + IRBuilder<> &B, const DataLayout *TD, + const TargetLibraryInfo *TLI, StringRef Name) { + if (!TLI->has(LibFunc::strncpy)) + return 0; + + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeSet AS[2]; + AS[0] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture); + AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); + Type *I8Ptr = B.getInt8PtrTy(); + Value *StrNCpy = M->getOrInsertFunction(Name, + AttributeSet::get(M->getContext(), + AS), + I8Ptr, I8Ptr, I8Ptr, + Len->getType(), NULL); + CallInst *CI = B.CreateCall3(StrNCpy, CastToCStr(Dst, B), CastToCStr(Src, B), + Len, "strncpy"); + if (const Function *F = dyn_cast<Function>(StrNCpy->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + return CI; +} + +/// EmitMemCpyChk - Emit a call to the __memcpy_chk function to the builder. +/// This expects that the Len and ObjSize have type 'intptr_t' and Dst/Src +/// are pointers. +Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, + IRBuilder<> &B, const DataLayout *TD, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::memcpy_chk)) + return 0; + + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeSet AS; + AS = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); + LLVMContext &Context = B.GetInsertBlock()->getContext(); + Value *MemCpy = M->getOrInsertFunction("__memcpy_chk", + AttributeSet::get(M->getContext(), AS), + B.getInt8PtrTy(), + B.getInt8PtrTy(), + B.getInt8PtrTy(), + TD->getIntPtrType(Context), + TD->getIntPtrType(Context), NULL); + Dst = CastToCStr(Dst, B); + Src = CastToCStr(Src, B); + CallInst *CI = B.CreateCall4(MemCpy, Dst, Src, Len, ObjSize); + if (const Function *F = dyn_cast<Function>(MemCpy->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + return CI; +} + +/// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is +/// a pointer, Val is an i32 value, and Len is an 'intptr_t' value. +Value *llvm::EmitMemChr(Value *Ptr, Value *Val, + Value *Len, IRBuilder<> &B, const DataLayout *TD, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::memchr)) + return 0; + + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeSet AS; + Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind }; + AS = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, + ArrayRef<Attribute::AttrKind>(AVs, 2)); + LLVMContext &Context = B.GetInsertBlock()->getContext(); + Value *MemChr = M->getOrInsertFunction("memchr", + AttributeSet::get(M->getContext(), AS), + B.getInt8PtrTy(), + B.getInt8PtrTy(), + B.getInt32Ty(), + TD->getIntPtrType(Context), + NULL); + CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr"); + + if (const Function *F = dyn_cast<Function>(MemChr->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; +} + +/// EmitMemCmp - Emit a call to the memcmp function. +Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, + Value *Len, IRBuilder<> &B, const DataLayout *TD, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::memcmp)) + return 0; + + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeSet AS[3]; + AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture); + AS[1] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture); + Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind }; + AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, + ArrayRef<Attribute::AttrKind>(AVs, 2)); + + LLVMContext &Context = B.GetInsertBlock()->getContext(); + Value *MemCmp = M->getOrInsertFunction("memcmp", + AttributeSet::get(M->getContext(), AS), + B.getInt32Ty(), + B.getInt8PtrTy(), + B.getInt8PtrTy(), + TD->getIntPtrType(Context), NULL); + CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B), + Len, "memcmp"); + + if (const Function *F = dyn_cast<Function>(MemCmp->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; +} + +/// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' (e.g. +/// 'floor'). This function is known to take a single of type matching 'Op' and +/// returns one value with the same type. If 'Op' is a long double, 'l' is +/// added as the suffix of name, if 'Op' is a float, we add a 'f' suffix. +Value *llvm::EmitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B, + const AttributeSet &Attrs) { + SmallString<20> NameBuffer; + if (!Op->getType()->isDoubleTy()) { + // If we need to add a suffix, copy into NameBuffer. + NameBuffer += Name; + if (Op->getType()->isFloatTy()) + NameBuffer += 'f'; // floorf + else + NameBuffer += 'l'; // floorl + Name = NameBuffer; + } + + Module *M = B.GetInsertBlock()->getParent()->getParent(); + Value *Callee = M->getOrInsertFunction(Name, Op->getType(), + Op->getType(), NULL); + CallInst *CI = B.CreateCall(Callee, Op, Name); + CI->setAttributes(Attrs); + if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; +} + +/// EmitPutChar - Emit a call to the putchar function. This assumes that Char +/// is an integer. +Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const DataLayout *TD, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::putchar)) + return 0; + + Module *M = B.GetInsertBlock()->getParent()->getParent(); + Value *PutChar = M->getOrInsertFunction("putchar", B.getInt32Ty(), + B.getInt32Ty(), NULL); + CallInst *CI = B.CreateCall(PutChar, + B.CreateIntCast(Char, + B.getInt32Ty(), + /*isSigned*/true, + "chari"), + "putchar"); + + if (const Function *F = dyn_cast<Function>(PutChar->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + return CI; +} + +/// EmitPutS - Emit a call to the puts function. This assumes that Str is +/// some pointer. +Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const DataLayout *TD, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::puts)) + return 0; + + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeSet AS[2]; + AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture); + AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); + + Value *PutS = M->getOrInsertFunction("puts", + AttributeSet::get(M->getContext(), AS), + B.getInt32Ty(), + B.getInt8PtrTy(), + NULL); + CallInst *CI = B.CreateCall(PutS, CastToCStr(Str, B), "puts"); + if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + return CI; +} + +/// EmitFPutC - Emit a call to the fputc function. This assumes that Char is +/// an integer and File is a pointer to FILE. +Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B, + const DataLayout *TD, const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::fputc)) + return 0; + + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeSet AS[2]; + AS[0] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture); + AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); + Constant *F; + if (File->getType()->isPointerTy()) + F = M->getOrInsertFunction("fputc", + AttributeSet::get(M->getContext(), AS), + B.getInt32Ty(), + B.getInt32Ty(), File->getType(), + NULL); + else + F = M->getOrInsertFunction("fputc", + B.getInt32Ty(), + B.getInt32Ty(), + File->getType(), NULL); + Char = B.CreateIntCast(Char, B.getInt32Ty(), /*isSigned*/true, + "chari"); + CallInst *CI = B.CreateCall2(F, Char, File, "fputc"); + + if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) + CI->setCallingConv(Fn->getCallingConv()); + return CI; +} + +/// EmitFPutS - Emit a call to the puts function. Str is required to be a +/// pointer and File is a pointer to FILE. +Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, + const DataLayout *TD, const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::fputs)) + return 0; + + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeSet AS[3]; + AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture); + AS[1] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture); + AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); + StringRef FPutsName = TLI->getName(LibFunc::fputs); + Constant *F; + if (File->getType()->isPointerTy()) + F = M->getOrInsertFunction(FPutsName, + AttributeSet::get(M->getContext(), AS), + B.getInt32Ty(), + B.getInt8PtrTy(), + File->getType(), NULL); + else + F = M->getOrInsertFunction(FPutsName, B.getInt32Ty(), + B.getInt8PtrTy(), + File->getType(), NULL); + CallInst *CI = B.CreateCall2(F, CastToCStr(Str, B), File, "fputs"); + + if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) + CI->setCallingConv(Fn->getCallingConv()); + return CI; +} + +/// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is +/// a pointer, Size is an 'intptr_t', and File is a pointer to FILE. +Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, + IRBuilder<> &B, const DataLayout *TD, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::fwrite)) + return 0; + + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeSet AS[3]; + AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture); + AS[1] = AttributeSet::get(M->getContext(), 4, Attribute::NoCapture); + AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); + LLVMContext &Context = B.GetInsertBlock()->getContext(); + StringRef FWriteName = TLI->getName(LibFunc::fwrite); + Constant *F; + if (File->getType()->isPointerTy()) + F = M->getOrInsertFunction(FWriteName, + AttributeSet::get(M->getContext(), AS), + TD->getIntPtrType(Context), + B.getInt8PtrTy(), + TD->getIntPtrType(Context), + TD->getIntPtrType(Context), + File->getType(), NULL); + else + F = M->getOrInsertFunction(FWriteName, TD->getIntPtrType(Context), + B.getInt8PtrTy(), + TD->getIntPtrType(Context), + TD->getIntPtrType(Context), + File->getType(), NULL); + CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size, + ConstantInt::get(TD->getIntPtrType(Context), 1), File); + + if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) + CI->setCallingConv(Fn->getCallingConv()); + return CI; +} + +SimplifyFortifiedLibCalls::~SimplifyFortifiedLibCalls() { } + +bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const DataLayout *TD, + const TargetLibraryInfo *TLI) { + // We really need DataLayout for later. + if (!TD) return false; + + this->CI = CI; + Function *Callee = CI->getCalledFunction(); + StringRef Name = Callee->getName(); + FunctionType *FT = Callee->getFunctionType(); + LLVMContext &Context = CI->getParent()->getContext(); + IRBuilder<> B(CI); + + if (Name == "__memcpy_chk") { + // Check if this has the right signature. + if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + FT->getParamType(2) != TD->getIntPtrType(Context) || + FT->getParamType(3) != TD->getIntPtrType(Context)) + return false; + + if (isFoldable(3, 2, false)) { + B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1); + replaceCall(CI->getArgOperand(0)); + return true; + } + return false; + } + + // Should be similar to memcpy. + if (Name == "__mempcpy_chk") { + return false; + } + + if (Name == "__memmove_chk") { + // Check if this has the right signature. + if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + FT->getParamType(2) != TD->getIntPtrType(Context) || + FT->getParamType(3) != TD->getIntPtrType(Context)) + return false; + + if (isFoldable(3, 2, false)) { + B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1); + replaceCall(CI->getArgOperand(0)); + return true; + } + return false; + } + + if (Name == "__memset_chk") { + // Check if this has the right signature. + if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isIntegerTy() || + FT->getParamType(2) != TD->getIntPtrType(Context) || + FT->getParamType(3) != TD->getIntPtrType(Context)) + return false; + + if (isFoldable(3, 2, false)) { + Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), + false); + B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1); + replaceCall(CI->getArgOperand(0)); + return true; + } + return false; + } + + if (Name == "__strcpy_chk" || Name == "__stpcpy_chk") { + // Check if this has the right signature. + if (FT->getNumParams() != 3 || + FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != Type::getInt8PtrTy(Context) || + FT->getParamType(2) != TD->getIntPtrType(Context)) + return 0; + + + // If a) we don't have any length information, or b) we know this will + // fit then just lower to a plain st[rp]cpy. Otherwise we'll keep our + // st[rp]cpy_chk call which may fail at runtime if the size is too long. + // TODO: It might be nice to get a maximum length out of the possible + // string lengths for varying. + if (isFoldable(2, 1, true)) { + Value *Ret = EmitStrCpy(CI->getArgOperand(0), CI->getArgOperand(1), B, TD, + TLI, Name.substr(2, 6)); + if (!Ret) + return false; + replaceCall(Ret); + return true; + } + return false; + } + + if (Name == "__strncpy_chk" || Name == "__stpncpy_chk") { + // Check if this has the right signature. + if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != Type::getInt8PtrTy(Context) || + !FT->getParamType(2)->isIntegerTy() || + FT->getParamType(3) != TD->getIntPtrType(Context)) + return false; + + if (isFoldable(3, 2, false)) { + Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), B, TD, TLI, + Name.substr(2, 7)); + if (!Ret) + return false; + replaceCall(Ret); + return true; + } + return false; + } + + if (Name == "__strcat_chk") { + return false; + } + + if (Name == "__strncat_chk") { + return false; + } + + return false; +} diff --git a/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp b/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp new file mode 100644 index 000000000000..1f517d038d19 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -0,0 +1,262 @@ +//===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains an optimization for div and rem on architectures that +// execute short instructions significantly faster than longer instructions. +// For example, on Intel Atom 32-bit divides are slow enough that during +// runtime it is profitable to check the value of the operands, and if they are +// positive and less than 256 use an unsigned 8-bit divide. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "bypass-slow-division" +#include "llvm/Transforms/Utils/BypassSlowDivision.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" + +using namespace llvm; + +namespace { + struct DivOpInfo { + bool SignedOp; + Value *Dividend; + Value *Divisor; + + DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor) + : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {} + }; + + struct DivPhiNodes { + PHINode *Quotient; + PHINode *Remainder; + + DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder) + : Quotient(InQuotient), Remainder(InRemainder) {} + }; +} + +namespace llvm { + template<> + struct DenseMapInfo<DivOpInfo> { + static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) { + return Val1.SignedOp == Val2.SignedOp && + Val1.Dividend == Val2.Dividend && + Val1.Divisor == Val2.Divisor; + } + + static DivOpInfo getEmptyKey() { + return DivOpInfo(false, 0, 0); + } + + static DivOpInfo getTombstoneKey() { + return DivOpInfo(true, 0, 0); + } + + static unsigned getHashValue(const DivOpInfo &Val) { + return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^ + reinterpret_cast<uintptr_t>(Val.Divisor)) ^ + (unsigned)Val.SignedOp; + } + }; + + typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy; +} + +// insertFastDiv - Substitutes the div/rem instruction with code that checks the +// value of the operands and uses a shorter-faster div/rem instruction when +// possible and the longer-slower div/rem instruction otherwise. +static bool insertFastDiv(Function &F, + Function::iterator &I, + BasicBlock::iterator &J, + IntegerType *BypassType, + bool UseDivOp, + bool UseSignedOp, + DivCacheTy &PerBBDivCache) { + // Get instruction operands + Instruction *Instr = J; + Value *Dividend = Instr->getOperand(0); + Value *Divisor = Instr->getOperand(1); + + if (isa<ConstantInt>(Divisor) || + (isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) { + // Operations with immediate values should have + // been solved and replaced during compile time. + return false; + } + + // Basic Block is split before divide + BasicBlock *MainBB = I; + BasicBlock *SuccessorBB = I->splitBasicBlock(J); + ++I; //advance iterator I to successorBB + + // Add new basic block for slow divide operation + BasicBlock *SlowBB = BasicBlock::Create(F.getContext(), "", + MainBB->getParent(), SuccessorBB); + SlowBB->moveBefore(SuccessorBB); + IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin()); + Value *SlowQuotientV; + Value *SlowRemainderV; + if (UseSignedOp) { + SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor); + SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor); + } else { + SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor); + SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor); + } + SlowBuilder.CreateBr(SuccessorBB); + + // Add new basic block for fast divide operation + BasicBlock *FastBB = BasicBlock::Create(F.getContext(), "", + MainBB->getParent(), SuccessorBB); + FastBB->moveBefore(SlowBB); + IRBuilder<> FastBuilder(FastBB, FastBB->begin()); + Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor, + BypassType); + Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend, + BypassType); + + // udiv/urem because optimization only handles positive numbers + Value *ShortQuotientV = FastBuilder.CreateExactUDiv(ShortDividendV, + ShortDivisorV); + Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV, + ShortDivisorV); + Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt, + ShortQuotientV, + Dividend->getType()); + Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt, + ShortRemainderV, + Dividend->getType()); + FastBuilder.CreateBr(SuccessorBB); + + // Phi nodes for result of div and rem + IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin()); + PHINode *QuoPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2); + QuoPhi->addIncoming(SlowQuotientV, SlowBB); + QuoPhi->addIncoming(FastQuotientV, FastBB); + PHINode *RemPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2); + RemPhi->addIncoming(SlowRemainderV, SlowBB); + RemPhi->addIncoming(FastRemainderV, FastBB); + + // Replace Instr with appropriate phi node + if (UseDivOp) + Instr->replaceAllUsesWith(QuoPhi); + else + Instr->replaceAllUsesWith(RemPhi); + Instr->eraseFromParent(); + + // Combine operands into a single value with OR for value testing below + MainBB->getInstList().back().eraseFromParent(); + IRBuilder<> MainBuilder(MainBB, MainBB->end()); + Value *OrV = MainBuilder.CreateOr(Dividend, Divisor); + + // BitMask is inverted to check if the operands are + // larger than the bypass type + uint64_t BitMask = ~BypassType->getBitMask(); + Value *AndV = MainBuilder.CreateAnd(OrV, BitMask); + + // Compare operand values and branch + Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0); + Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV); + MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB); + + // point iterator J at first instruction of successorBB + J = I->begin(); + + // Cache phi nodes to be used later in place of other instances + // of div or rem with the same sign, dividend, and divisor + DivOpInfo Key(UseSignedOp, Dividend, Divisor); + DivPhiNodes Value(QuoPhi, RemPhi); + PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value)); + return true; +} + +// reuseOrInsertFastDiv - Reuses previously computed dividend or remainder if +// operands and operation are identical. Otherwise call insertFastDiv to perform +// the optimization and cache the resulting dividend and remainder. +static bool reuseOrInsertFastDiv(Function &F, + Function::iterator &I, + BasicBlock::iterator &J, + IntegerType *BypassType, + bool UseDivOp, + bool UseSignedOp, + DivCacheTy &PerBBDivCache) { + // Get instruction operands + Instruction *Instr = J; + DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1)); + DivCacheTy::iterator CacheI = PerBBDivCache.find(Key); + + if (CacheI == PerBBDivCache.end()) { + // If previous instance does not exist, insert fast div + return insertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp, + PerBBDivCache); + } + + // Replace operation value with previously generated phi node + DivPhiNodes &Value = CacheI->second; + if (UseDivOp) { + // Replace all uses of div instruction with quotient phi node + J->replaceAllUsesWith(Value.Quotient); + } else { + // Replace all uses of rem instruction with remainder phi node + J->replaceAllUsesWith(Value.Remainder); + } + + // Advance to next operation + ++J; + + // Remove redundant operation + Instr->eraseFromParent(); + return true; +} + +// bypassSlowDivision - This optimization identifies DIV instructions that can +// be profitably bypassed and carried out with a shorter, faster divide. +bool llvm::bypassSlowDivision(Function &F, + Function::iterator &I, + const DenseMap<unsigned int, unsigned int> &BypassWidths) { + DivCacheTy DivCache; + + bool MadeChange = false; + for (BasicBlock::iterator J = I->begin(); J != I->end(); J++) { + + // Get instruction details + unsigned Opcode = J->getOpcode(); + bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv; + bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem; + bool UseSignedOp = Opcode == Instruction::SDiv || + Opcode == Instruction::SRem; + + // Only optimize div or rem ops + if (!UseDivOp && !UseRemOp) + continue; + + // Skip division on vector types, only optimize integer instructions + if (!J->getType()->isIntegerTy()) + continue; + + // Get bitwidth of div/rem instruction + IntegerType *T = cast<IntegerType>(J->getType()); + unsigned int bitwidth = T->getBitWidth(); + + // Continue if bitwidth is not bypassed + DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth); + if (BI == BypassWidths.end()) + continue; + + // Get type for div/rem instruction with bypass bitwidth + IntegerType *BT = IntegerType::get(J->getContext(), BI->second); + + MadeChange |= reuseOrInsertFastDiv(F, I, J, BT, UseDivOp, + UseSignedOp, DivCache); + } + + return MadeChange; +} diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp new file mode 100644 index 000000000000..d105f5e24a2b --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -0,0 +1,574 @@ +//===- CloneFunction.cpp - Clone a function into another function ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the CloneFunctionInto interface, which is used as the +// low-level function cloner. This is used by the CloneFunction and function +// inliner to do the dirty work of copying the body of a function around. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/Support/CFG.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/ValueMapper.h" +#include <map> +using namespace llvm; + +// CloneBasicBlock - See comments in Cloning.h +BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, + ValueToValueMapTy &VMap, + const Twine &NameSuffix, Function *F, + ClonedCodeInfo *CodeInfo) { + BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F); + if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix); + + bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false; + + // Loop over all instructions, and copy them over. + for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); + II != IE; ++II) { + Instruction *NewInst = II->clone(); + if (II->hasName()) + NewInst->setName(II->getName()+NameSuffix); + NewBB->getInstList().push_back(NewInst); + VMap[II] = NewInst; // Add instruction map to value. + + hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II)); + if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) { + if (isa<ConstantInt>(AI->getArraySize())) + hasStaticAllocas = true; + else + hasDynamicAllocas = true; + } + } + + if (CodeInfo) { + CodeInfo->ContainsCalls |= hasCalls; + CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas; + CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas && + BB != &BB->getParent()->getEntryBlock(); + } + return NewBB; +} + +// Clone OldFunc into NewFunc, transforming the old arguments into references to +// VMap values. +// +void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, + ValueToValueMapTy &VMap, + bool ModuleLevelChanges, + SmallVectorImpl<ReturnInst*> &Returns, + const char *NameSuffix, ClonedCodeInfo *CodeInfo, + ValueMapTypeRemapper *TypeMapper, + ValueMaterializer *Materializer) { + assert(NameSuffix && "NameSuffix cannot be null!"); + +#ifndef NDEBUG + for (Function::const_arg_iterator I = OldFunc->arg_begin(), + E = OldFunc->arg_end(); I != E; ++I) + assert(VMap.count(I) && "No mapping from source argument specified!"); +#endif + + AttributeSet OldAttrs = OldFunc->getAttributes(); + // Clone any argument attributes that are present in the VMap. + for (Function::const_arg_iterator I = OldFunc->arg_begin(), + E = OldFunc->arg_end(); + I != E; ++I) + if (Argument *Anew = dyn_cast<Argument>(VMap[I])) { + AttributeSet attrs = + OldAttrs.getParamAttributes(I->getArgNo() + 1); + if (attrs.getNumSlots() > 0) + Anew->addAttr(attrs); + } + + NewFunc->setAttributes(NewFunc->getAttributes() + .addAttributes(NewFunc->getContext(), + AttributeSet::ReturnIndex, + OldAttrs.getRetAttributes())); + NewFunc->setAttributes(NewFunc->getAttributes() + .addAttributes(NewFunc->getContext(), + AttributeSet::FunctionIndex, + OldAttrs.getFnAttributes())); + + // Loop over all of the basic blocks in the function, cloning them as + // appropriate. Note that we save BE this way in order to handle cloning of + // recursive functions into themselves. + // + for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end(); + BI != BE; ++BI) { + const BasicBlock &BB = *BI; + + // Create a new basic block and copy instructions into it! + BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo); + + // Add basic block mapping. + VMap[&BB] = CBB; + + // It is only legal to clone a function if a block address within that + // function is never referenced outside of the function. Given that, we + // want to map block addresses from the old function to block addresses in + // the clone. (This is different from the generic ValueMapper + // implementation, which generates an invalid blockaddress when + // cloning a function.) + if (BB.hasAddressTaken()) { + Constant *OldBBAddr = BlockAddress::get(const_cast<Function*>(OldFunc), + const_cast<BasicBlock*>(&BB)); + VMap[OldBBAddr] = BlockAddress::get(NewFunc, CBB); + } + + // Note return instructions for the caller. + if (ReturnInst *RI = dyn_cast<ReturnInst>(CBB->getTerminator())) + Returns.push_back(RI); + } + + // Loop over all of the instructions in the function, fixing up operand + // references as we go. This uses VMap to do all the hard work. + for (Function::iterator BB = cast<BasicBlock>(VMap[OldFunc->begin()]), + BE = NewFunc->end(); BB != BE; ++BB) + // Loop over all instructions, fixing each one as we find it... + for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II) + RemapInstruction(II, VMap, + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, + TypeMapper, Materializer); +} + +/// CloneFunction - Return a copy of the specified function, but without +/// embedding the function into another module. Also, any references specified +/// in the VMap are changed to refer to their mapped value instead of the +/// original one. If any of the arguments to the function are in the VMap, +/// the arguments are deleted from the resultant function. The VMap is +/// updated to include mappings from all of the instructions and basicblocks in +/// the function from their old to new values. +/// +Function *llvm::CloneFunction(const Function *F, ValueToValueMapTy &VMap, + bool ModuleLevelChanges, + ClonedCodeInfo *CodeInfo) { + std::vector<Type*> ArgTypes; + + // The user might be deleting arguments to the function by specifying them in + // the VMap. If so, we need to not add the arguments to the arg ty vector + // + for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; ++I) + if (VMap.count(I) == 0) // Haven't mapped the argument to anything yet? + ArgTypes.push_back(I->getType()); + + // Create a new function type... + FunctionType *FTy = FunctionType::get(F->getFunctionType()->getReturnType(), + ArgTypes, F->getFunctionType()->isVarArg()); + + // Create the new function... + Function *NewF = Function::Create(FTy, F->getLinkage(), F->getName()); + + // Loop over the arguments, copying the names of the mapped arguments over... + Function::arg_iterator DestI = NewF->arg_begin(); + for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; ++I) + if (VMap.count(I) == 0) { // Is this argument preserved? + DestI->setName(I->getName()); // Copy the name over... + VMap[I] = DestI++; // Add mapping to VMap + } + + SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned. + CloneFunctionInto(NewF, F, VMap, ModuleLevelChanges, Returns, "", CodeInfo); + return NewF; +} + + + +namespace { + /// PruningFunctionCloner - This class is a private class used to implement + /// the CloneAndPruneFunctionInto method. + struct PruningFunctionCloner { + Function *NewFunc; + const Function *OldFunc; + ValueToValueMapTy &VMap; + bool ModuleLevelChanges; + const char *NameSuffix; + ClonedCodeInfo *CodeInfo; + const DataLayout *TD; + public: + PruningFunctionCloner(Function *newFunc, const Function *oldFunc, + ValueToValueMapTy &valueMap, + bool moduleLevelChanges, + const char *nameSuffix, + ClonedCodeInfo *codeInfo, + const DataLayout *td) + : NewFunc(newFunc), OldFunc(oldFunc), + VMap(valueMap), ModuleLevelChanges(moduleLevelChanges), + NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) { + } + + /// CloneBlock - The specified block is found to be reachable, clone it and + /// anything that it can reach. + void CloneBlock(const BasicBlock *BB, + std::vector<const BasicBlock*> &ToClone); + }; +} + +/// CloneBlock - The specified block is found to be reachable, clone it and +/// anything that it can reach. +void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, + std::vector<const BasicBlock*> &ToClone){ + WeakVH &BBEntry = VMap[BB]; + + // Have we already cloned this block? + if (BBEntry) return; + + // Nope, clone it now. + BasicBlock *NewBB; + BBEntry = NewBB = BasicBlock::Create(BB->getContext()); + if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix); + + // It is only legal to clone a function if a block address within that + // function is never referenced outside of the function. Given that, we + // want to map block addresses from the old function to block addresses in + // the clone. (This is different from the generic ValueMapper + // implementation, which generates an invalid blockaddress when + // cloning a function.) + // + // Note that we don't need to fix the mapping for unreachable blocks; + // the default mapping there is safe. + if (BB->hasAddressTaken()) { + Constant *OldBBAddr = BlockAddress::get(const_cast<Function*>(OldFunc), + const_cast<BasicBlock*>(BB)); + VMap[OldBBAddr] = BlockAddress::get(NewFunc, NewBB); + } + + + bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false; + + // Loop over all instructions, and copy them over, DCE'ing as we go. This + // loop doesn't include the terminator. + for (BasicBlock::const_iterator II = BB->begin(), IE = --BB->end(); + II != IE; ++II) { + Instruction *NewInst = II->clone(); + + // Eagerly remap operands to the newly cloned instruction, except for PHI + // nodes for which we defer processing until we update the CFG. + if (!isa<PHINode>(NewInst)) { + RemapInstruction(NewInst, VMap, + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); + + // If we can simplify this instruction to some other value, simply add + // a mapping to that value rather than inserting a new instruction into + // the basic block. + if (Value *V = SimplifyInstruction(NewInst, TD)) { + // On the off-chance that this simplifies to an instruction in the old + // function, map it back into the new function. + if (Value *MappedV = VMap.lookup(V)) + V = MappedV; + + VMap[II] = V; + delete NewInst; + continue; + } + } + + if (II->hasName()) + NewInst->setName(II->getName()+NameSuffix); + VMap[II] = NewInst; // Add instruction map to value. + NewBB->getInstList().push_back(NewInst); + hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II)); + if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) { + if (isa<ConstantInt>(AI->getArraySize())) + hasStaticAllocas = true; + else + hasDynamicAllocas = true; + } + } + + // Finally, clone over the terminator. + const TerminatorInst *OldTI = BB->getTerminator(); + bool TerminatorDone = false; + if (const BranchInst *BI = dyn_cast<BranchInst>(OldTI)) { + if (BI->isConditional()) { + // If the condition was a known constant in the callee... + ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition()); + // Or is a known constant in the caller... + if (Cond == 0) { + Value *V = VMap[BI->getCondition()]; + Cond = dyn_cast_or_null<ConstantInt>(V); + } + + // Constant fold to uncond branch! + if (Cond) { + BasicBlock *Dest = BI->getSuccessor(!Cond->getZExtValue()); + VMap[OldTI] = BranchInst::Create(Dest, NewBB); + ToClone.push_back(Dest); + TerminatorDone = true; + } + } + } else if (const SwitchInst *SI = dyn_cast<SwitchInst>(OldTI)) { + // If switching on a value known constant in the caller. + ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition()); + if (Cond == 0) { // Or known constant after constant prop in the callee... + Value *V = VMap[SI->getCondition()]; + Cond = dyn_cast_or_null<ConstantInt>(V); + } + if (Cond) { // Constant fold to uncond branch! + SwitchInst::ConstCaseIt Case = SI->findCaseValue(Cond); + BasicBlock *Dest = const_cast<BasicBlock*>(Case.getCaseSuccessor()); + VMap[OldTI] = BranchInst::Create(Dest, NewBB); + ToClone.push_back(Dest); + TerminatorDone = true; + } + } + + if (!TerminatorDone) { + Instruction *NewInst = OldTI->clone(); + if (OldTI->hasName()) + NewInst->setName(OldTI->getName()+NameSuffix); + NewBB->getInstList().push_back(NewInst); + VMap[OldTI] = NewInst; // Add instruction map to value. + + // Recursively clone any reachable successor blocks. + const TerminatorInst *TI = BB->getTerminator(); + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + ToClone.push_back(TI->getSuccessor(i)); + } + + if (CodeInfo) { + CodeInfo->ContainsCalls |= hasCalls; + CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas; + CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas && + BB != &BB->getParent()->front(); + } +} + +/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto, +/// except that it does some simple constant prop and DCE on the fly. The +/// effect of this is to copy significantly less code in cases where (for +/// example) a function call with constant arguments is inlined, and those +/// constant arguments cause a significant amount of code in the callee to be +/// dead. Since this doesn't produce an exact copy of the input, it can't be +/// used for things like CloneFunction or CloneModule. +void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, + ValueToValueMapTy &VMap, + bool ModuleLevelChanges, + SmallVectorImpl<ReturnInst*> &Returns, + const char *NameSuffix, + ClonedCodeInfo *CodeInfo, + const DataLayout *TD, + Instruction *TheCall) { + assert(NameSuffix && "NameSuffix cannot be null!"); + +#ifndef NDEBUG + for (Function::const_arg_iterator II = OldFunc->arg_begin(), + E = OldFunc->arg_end(); II != E; ++II) + assert(VMap.count(II) && "No mapping from source argument specified!"); +#endif + + PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges, + NameSuffix, CodeInfo, TD); + + // Clone the entry block, and anything recursively reachable from it. + std::vector<const BasicBlock*> CloneWorklist; + CloneWorklist.push_back(&OldFunc->getEntryBlock()); + while (!CloneWorklist.empty()) { + const BasicBlock *BB = CloneWorklist.back(); + CloneWorklist.pop_back(); + PFC.CloneBlock(BB, CloneWorklist); + } + + // Loop over all of the basic blocks in the old function. If the block was + // reachable, we have cloned it and the old block is now in the value map: + // insert it into the new function in the right order. If not, ignore it. + // + // Defer PHI resolution until rest of function is resolved. + SmallVector<const PHINode*, 16> PHIToResolve; + for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end(); + BI != BE; ++BI) { + Value *V = VMap[BI]; + BasicBlock *NewBB = cast_or_null<BasicBlock>(V); + if (NewBB == 0) continue; // Dead block. + + // Add the new block to the new function. + NewFunc->getBasicBlockList().push_back(NewBB); + + // Handle PHI nodes specially, as we have to remove references to dead + // blocks. + for (BasicBlock::const_iterator I = BI->begin(), E = BI->end(); I != E; ++I) + if (const PHINode *PN = dyn_cast<PHINode>(I)) + PHIToResolve.push_back(PN); + else + break; + + // Finally, remap the terminator instructions, as those can't be remapped + // until all BBs are mapped. + RemapInstruction(NewBB->getTerminator(), VMap, + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); + } + + // Defer PHI resolution until rest of function is resolved, PHI resolution + // requires the CFG to be up-to-date. + for (unsigned phino = 0, e = PHIToResolve.size(); phino != e; ) { + const PHINode *OPN = PHIToResolve[phino]; + unsigned NumPreds = OPN->getNumIncomingValues(); + const BasicBlock *OldBB = OPN->getParent(); + BasicBlock *NewBB = cast<BasicBlock>(VMap[OldBB]); + + // Map operands for blocks that are live and remove operands for blocks + // that are dead. + for (; phino != PHIToResolve.size() && + PHIToResolve[phino]->getParent() == OldBB; ++phino) { + OPN = PHIToResolve[phino]; + PHINode *PN = cast<PHINode>(VMap[OPN]); + for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) { + Value *V = VMap[PN->getIncomingBlock(pred)]; + if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(V)) { + Value *InVal = MapValue(PN->getIncomingValue(pred), + VMap, + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); + assert(InVal && "Unknown input value?"); + PN->setIncomingValue(pred, InVal); + PN->setIncomingBlock(pred, MappedBlock); + } else { + PN->removeIncomingValue(pred, false); + --pred, --e; // Revisit the next entry. + } + } + } + + // The loop above has removed PHI entries for those blocks that are dead + // and has updated others. However, if a block is live (i.e. copied over) + // but its terminator has been changed to not go to this block, then our + // phi nodes will have invalid entries. Update the PHI nodes in this + // case. + PHINode *PN = cast<PHINode>(NewBB->begin()); + NumPreds = std::distance(pred_begin(NewBB), pred_end(NewBB)); + if (NumPreds != PN->getNumIncomingValues()) { + assert(NumPreds < PN->getNumIncomingValues()); + // Count how many times each predecessor comes to this block. + std::map<BasicBlock*, unsigned> PredCount; + for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB); + PI != E; ++PI) + --PredCount[*PI]; + + // Figure out how many entries to remove from each PHI. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + ++PredCount[PN->getIncomingBlock(i)]; + + // At this point, the excess predecessor entries are positive in the + // map. Loop over all of the PHIs and remove excess predecessor + // entries. + BasicBlock::iterator I = NewBB->begin(); + for (; (PN = dyn_cast<PHINode>(I)); ++I) { + for (std::map<BasicBlock*, unsigned>::iterator PCI =PredCount.begin(), + E = PredCount.end(); PCI != E; ++PCI) { + BasicBlock *Pred = PCI->first; + for (unsigned NumToRemove = PCI->second; NumToRemove; --NumToRemove) + PN->removeIncomingValue(Pred, false); + } + } + } + + // If the loops above have made these phi nodes have 0 or 1 operand, + // replace them with undef or the input value. We must do this for + // correctness, because 0-operand phis are not valid. + PN = cast<PHINode>(NewBB->begin()); + if (PN->getNumIncomingValues() == 0) { + BasicBlock::iterator I = NewBB->begin(); + BasicBlock::const_iterator OldI = OldBB->begin(); + while ((PN = dyn_cast<PHINode>(I++))) { + Value *NV = UndefValue::get(PN->getType()); + PN->replaceAllUsesWith(NV); + assert(VMap[OldI] == PN && "VMap mismatch"); + VMap[OldI] = NV; + PN->eraseFromParent(); + ++OldI; + } + } + } + + // Make a second pass over the PHINodes now that all of them have been + // remapped into the new function, simplifying the PHINode and performing any + // recursive simplifications exposed. This will transparently update the + // WeakVH in the VMap. Notably, we rely on that so that if we coalesce + // two PHINodes, the iteration over the old PHIs remains valid, and the + // mapping will just map us to the new node (which may not even be a PHI + // node). + for (unsigned Idx = 0, Size = PHIToResolve.size(); Idx != Size; ++Idx) + if (PHINode *PN = dyn_cast<PHINode>(VMap[PHIToResolve[Idx]])) + recursivelySimplifyInstruction(PN, TD); + + // Now that the inlined function body has been fully constructed, go through + // and zap unconditional fall-through branches. This happen all the time when + // specializing code: code specialization turns conditional branches into + // uncond branches, and this code folds them. + Function::iterator Begin = cast<BasicBlock>(VMap[&OldFunc->getEntryBlock()]); + Function::iterator I = Begin; + while (I != NewFunc->end()) { + // Check if this block has become dead during inlining or other + // simplifications. Note that the first block will appear dead, as it has + // not yet been wired up properly. + if (I != Begin && (pred_begin(I) == pred_end(I) || + I->getSinglePredecessor() == I)) { + BasicBlock *DeadBB = I++; + DeleteDeadBlock(DeadBB); + continue; + } + + // We need to simplify conditional branches and switches with a constant + // operand. We try to prune these out when cloning, but if the + // simplification required looking through PHI nodes, those are only + // available after forming the full basic block. That may leave some here, + // and we still want to prune the dead code as early as possible. + ConstantFoldTerminator(I); + + BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator()); + if (!BI || BI->isConditional()) { ++I; continue; } + + BasicBlock *Dest = BI->getSuccessor(0); + if (!Dest->getSinglePredecessor()) { + ++I; continue; + } + + // We shouldn't be able to get single-entry PHI nodes here, as instsimplify + // above should have zapped all of them.. + assert(!isa<PHINode>(Dest->begin())); + + // We know all single-entry PHI nodes in the inlined function have been + // removed, so we just need to splice the blocks. + BI->eraseFromParent(); + + // Make all PHI nodes that referred to Dest now refer to I as their source. + Dest->replaceAllUsesWith(I); + + // Move all the instructions in the succ to the pred. + I->getInstList().splice(I->end(), Dest->getInstList()); + + // Remove the dest block. + Dest->eraseFromParent(); + + // Do not increment I, iteratively merge all things this block branches to. + } + + // Make a final pass over the basic blocks from theh old function to gather + // any return instructions which survived folding. We have to do this here + // because we can iteratively remove and merge returns above. + for (Function::iterator I = cast<BasicBlock>(VMap[&OldFunc->getEntryBlock()]), + E = NewFunc->end(); + I != E; ++I) + if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator())) + Returns.push_back(RI); +} diff --git a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp new file mode 100644 index 000000000000..64df089e1b81 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp @@ -0,0 +1,122 @@ +//===- CloneModule.cpp - Clone an entire module ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the CloneModule interface which makes a copy of an +// entire module. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Module.h" +#include "llvm/Transforms/Utils/ValueMapper.h" +using namespace llvm; + +/// CloneModule - Return an exact copy of the specified module. This is not as +/// easy as it might seem because we have to worry about making copies of global +/// variables and functions, and making their (initializers and references, +/// respectively) refer to the right globals. +/// +Module *llvm::CloneModule(const Module *M) { + // Create the value map that maps things from the old module over to the new + // module. + ValueToValueMapTy VMap; + return CloneModule(M, VMap); +} + +Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { + // First off, we need to create the new module. + Module *New = new Module(M->getModuleIdentifier(), M->getContext()); + New->setDataLayout(M->getDataLayout()); + New->setTargetTriple(M->getTargetTriple()); + New->setModuleInlineAsm(M->getModuleInlineAsm()); + + // Loop over all of the global variables, making corresponding globals in the + // new module. Here we add them to the VMap and to the new Module. We + // don't worry about attributes or initializers, they will come later. + // + for (Module::const_global_iterator I = M->global_begin(), E = M->global_end(); + I != E; ++I) { + GlobalVariable *GV = new GlobalVariable(*New, + I->getType()->getElementType(), + I->isConstant(), I->getLinkage(), + (Constant*) 0, I->getName(), + (GlobalVariable*) 0, + I->getThreadLocalMode(), + I->getType()->getAddressSpace()); + GV->copyAttributesFrom(I); + VMap[I] = GV; + } + + // Loop over the functions in the module, making external functions as before + for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) { + Function *NF = + Function::Create(cast<FunctionType>(I->getType()->getElementType()), + I->getLinkage(), I->getName(), New); + NF->copyAttributesFrom(I); + VMap[I] = NF; + } + + // Loop over the aliases in the module + for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); + I != E; ++I) { + GlobalAlias *GA = new GlobalAlias(I->getType(), I->getLinkage(), + I->getName(), NULL, New); + GA->copyAttributesFrom(I); + VMap[I] = GA; + } + + // Now that all of the things that global variable initializer can refer to + // have been created, loop through and copy the global variable referrers + // over... We also set the attributes on the global now. + // + for (Module::const_global_iterator I = M->global_begin(), E = M->global_end(); + I != E; ++I) { + GlobalVariable *GV = cast<GlobalVariable>(VMap[I]); + if (I->hasInitializer()) + GV->setInitializer(MapValue(I->getInitializer(), VMap)); + } + + // Similarly, copy over function bodies now... + // + for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) { + Function *F = cast<Function>(VMap[I]); + if (!I->isDeclaration()) { + Function::arg_iterator DestI = F->arg_begin(); + for (Function::const_arg_iterator J = I->arg_begin(); J != I->arg_end(); + ++J) { + DestI->setName(J->getName()); + VMap[J] = DestI++; + } + + SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned. + CloneFunctionInto(F, I, VMap, /*ModuleLevelChanges=*/true, Returns); + } + } + + // And aliases + for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); + I != E; ++I) { + GlobalAlias *GA = cast<GlobalAlias>(VMap[I]); + if (const Constant *C = I->getAliasee()) + GA->setAliasee(MapValue(C, VMap)); + } + + // And named metadata.... + for (Module::const_named_metadata_iterator I = M->named_metadata_begin(), + E = M->named_metadata_end(); I != E; ++I) { + const NamedMDNode &NMD = *I; + NamedMDNode *NewNMD = New->getOrInsertNamedMetadata(NMD.getName()); + for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i) + NewNMD->addOperand(MapValue(NMD.getOperand(i), VMap)); + } + + return New; +} diff --git a/contrib/llvm/lib/Transforms/Utils/CmpInstAnalysis.cpp b/contrib/llvm/lib/Transforms/Utils/CmpInstAnalysis.cpp new file mode 100644 index 000000000000..8fa412a18b99 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/CmpInstAnalysis.cpp @@ -0,0 +1,96 @@ +//===- CmpInstAnalysis.cpp - Utils to help fold compares ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file holds routines to help analyse compare instructions +// and fold them into constants or other compare instructions +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/CmpInstAnalysis.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" + +using namespace llvm; + +/// getICmpCode - Encode a icmp predicate into a three bit mask. These bits +/// are carefully arranged to allow folding of expressions such as: +/// +/// (A < B) | (A > B) --> (A != B) +/// +/// Note that this is only valid if the first and second predicates have the +/// same sign. Is illegal to do: (A u< B) | (A s> B) +/// +/// Three bits are used to represent the condition, as follows: +/// 0 A > B +/// 1 A == B +/// 2 A < B +/// +/// <=> Value Definition +/// 000 0 Always false +/// 001 1 A > B +/// 010 2 A == B +/// 011 3 A >= B +/// 100 4 A < B +/// 101 5 A != B +/// 110 6 A <= B +/// 111 7 Always true +/// +unsigned llvm::getICmpCode(const ICmpInst *ICI, bool InvertPred) { + ICmpInst::Predicate Pred = InvertPred ? ICI->getInversePredicate() + : ICI->getPredicate(); + switch (Pred) { + // False -> 0 + case ICmpInst::ICMP_UGT: return 1; // 001 + case ICmpInst::ICMP_SGT: return 1; // 001 + case ICmpInst::ICMP_EQ: return 2; // 010 + case ICmpInst::ICMP_UGE: return 3; // 011 + case ICmpInst::ICMP_SGE: return 3; // 011 + case ICmpInst::ICMP_ULT: return 4; // 100 + case ICmpInst::ICMP_SLT: return 4; // 100 + case ICmpInst::ICMP_NE: return 5; // 101 + case ICmpInst::ICMP_ULE: return 6; // 110 + case ICmpInst::ICMP_SLE: return 6; // 110 + // True -> 7 + default: + llvm_unreachable("Invalid ICmp predicate!"); + } +} + +/// getICmpValue - This is the complement of getICmpCode, which turns an +/// opcode and two operands into either a constant true or false, or the +/// predicate for a new ICmp instruction. The sign is passed in to determine +/// which kind of predicate to use in the new icmp instruction. +/// Non-NULL return value will be a true or false constant. +/// NULL return means a new ICmp is needed. The predicate for which is +/// output in NewICmpPred. +Value *llvm::getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS, + CmpInst::Predicate &NewICmpPred) { + switch (Code) { + default: llvm_unreachable("Illegal ICmp code!"); + case 0: // False. + return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); + case 1: NewICmpPred = Sign ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break; + case 2: NewICmpPred = ICmpInst::ICMP_EQ; break; + case 3: NewICmpPred = Sign ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break; + case 4: NewICmpPred = Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break; + case 5: NewICmpPred = ICmpInst::ICMP_NE; break; + case 6: NewICmpPred = Sign ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break; + case 7: // True. + return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1); + } + return NULL; +} + +/// PredicatesFoldable - Return true if both predicates match sign or if at +/// least one of them is an equality comparison (which is signless). +bool llvm::PredicatesFoldable(ICmpInst::Predicate p1, ICmpInst::Predicate p2) { + return (CmpInst::isSigned(p1) == CmpInst::isSigned(p2)) || + (CmpInst::isSigned(p1) && ICmpInst::isEquality(p2)) || + (CmpInst::isSigned(p2) && ICmpInst::isEquality(p1)); +} diff --git a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp new file mode 100644 index 000000000000..6f0086443693 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -0,0 +1,779 @@ +//===- CodeExtractor.cpp - Pull code region into a new function -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the interface to tear out a code region, such as an +// individual loop or a parallel section, into a new function, replacing it with +// a call to the new function. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/CodeExtractor.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/RegionInfo.h" +#include "llvm/Analysis/RegionIterator.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include <algorithm> +#include <set> +using namespace llvm; + +// Provide a command-line option to aggregate function arguments into a struct +// for functions produced by the code extractor. This is useful when converting +// extracted functions to pthread-based code, as only one argument (void*) can +// be passed in to pthread_create(). +static cl::opt<bool> +AggregateArgsOpt("aggregate-extracted-args", cl::Hidden, + cl::desc("Aggregate arguments to code-extracted functions")); + +/// \brief Test whether a block is valid for extraction. +static bool isBlockValidForExtraction(const BasicBlock &BB) { + // Landing pads must be in the function where they were inserted for cleanup. + if (BB.isLandingPad()) + return false; + + // Don't hoist code containing allocas, invokes, or vastarts. + for (BasicBlock::const_iterator I = BB.begin(), E = BB.end(); I != E; ++I) { + if (isa<AllocaInst>(I) || isa<InvokeInst>(I)) + return false; + if (const CallInst *CI = dyn_cast<CallInst>(I)) + if (const Function *F = CI->getCalledFunction()) + if (F->getIntrinsicID() == Intrinsic::vastart) + return false; + } + + return true; +} + +/// \brief Build a set of blocks to extract if the input blocks are viable. +template <typename IteratorT> +static SetVector<BasicBlock *> buildExtractionBlockSet(IteratorT BBBegin, + IteratorT BBEnd) { + SetVector<BasicBlock *> Result; + + assert(BBBegin != BBEnd); + + // Loop over the blocks, adding them to our set-vector, and aborting with an + // empty set if we encounter invalid blocks. + for (IteratorT I = BBBegin, E = BBEnd; I != E; ++I) { + if (!Result.insert(*I)) + llvm_unreachable("Repeated basic blocks in extraction input"); + + if (!isBlockValidForExtraction(**I)) { + Result.clear(); + return Result; + } + } + +#ifndef NDEBUG + for (SetVector<BasicBlock *>::iterator I = llvm::next(Result.begin()), + E = Result.end(); + I != E; ++I) + for (pred_iterator PI = pred_begin(*I), PE = pred_end(*I); + PI != PE; ++PI) + assert(Result.count(*PI) && + "No blocks in this region may have entries from outside the region" + " except for the first block!"); +#endif + + return Result; +} + +/// \brief Helper to call buildExtractionBlockSet with an ArrayRef. +static SetVector<BasicBlock *> +buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs) { + return buildExtractionBlockSet(BBs.begin(), BBs.end()); +} + +/// \brief Helper to call buildExtractionBlockSet with a RegionNode. +static SetVector<BasicBlock *> +buildExtractionBlockSet(const RegionNode &RN) { + if (!RN.isSubRegion()) + // Just a single BasicBlock. + return buildExtractionBlockSet(RN.getNodeAs<BasicBlock>()); + + const Region &R = *RN.getNodeAs<Region>(); + + return buildExtractionBlockSet(R.block_begin(), R.block_end()); +} + +CodeExtractor::CodeExtractor(BasicBlock *BB, bool AggregateArgs) + : DT(0), AggregateArgs(AggregateArgs||AggregateArgsOpt), + Blocks(buildExtractionBlockSet(BB)), NumExitBlocks(~0U) {} + +CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT, + bool AggregateArgs) + : DT(DT), AggregateArgs(AggregateArgs||AggregateArgsOpt), + Blocks(buildExtractionBlockSet(BBs)), NumExitBlocks(~0U) {} + +CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs) + : DT(&DT), AggregateArgs(AggregateArgs||AggregateArgsOpt), + Blocks(buildExtractionBlockSet(L.getBlocks())), NumExitBlocks(~0U) {} + +CodeExtractor::CodeExtractor(DominatorTree &DT, const RegionNode &RN, + bool AggregateArgs) + : DT(&DT), AggregateArgs(AggregateArgs||AggregateArgsOpt), + Blocks(buildExtractionBlockSet(RN)), NumExitBlocks(~0U) {} + +/// definedInRegion - Return true if the specified value is defined in the +/// extracted region. +static bool definedInRegion(const SetVector<BasicBlock *> &Blocks, Value *V) { + if (Instruction *I = dyn_cast<Instruction>(V)) + if (Blocks.count(I->getParent())) + return true; + return false; +} + +/// definedInCaller - Return true if the specified value is defined in the +/// function being code extracted, but not in the region being extracted. +/// These values must be passed in as live-ins to the function. +static bool definedInCaller(const SetVector<BasicBlock *> &Blocks, Value *V) { + if (isa<Argument>(V)) return true; + if (Instruction *I = dyn_cast<Instruction>(V)) + if (!Blocks.count(I->getParent())) + return true; + return false; +} + +void CodeExtractor::findInputsOutputs(ValueSet &Inputs, + ValueSet &Outputs) const { + for (SetVector<BasicBlock *>::const_iterator I = Blocks.begin(), + E = Blocks.end(); + I != E; ++I) { + BasicBlock *BB = *I; + + // If a used value is defined outside the region, it's an input. If an + // instruction is used outside the region, it's an output. + for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); + II != IE; ++II) { + for (User::op_iterator OI = II->op_begin(), OE = II->op_end(); + OI != OE; ++OI) + if (definedInCaller(Blocks, *OI)) + Inputs.insert(*OI); + + for (Value::use_iterator UI = II->use_begin(), UE = II->use_end(); + UI != UE; ++UI) + if (!definedInRegion(Blocks, *UI)) { + Outputs.insert(II); + break; + } + } + } +} + +/// severSplitPHINodes - If a PHI node has multiple inputs from outside of the +/// region, we need to split the entry block of the region so that the PHI node +/// is easier to deal with. +void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { + unsigned NumPredsFromRegion = 0; + unsigned NumPredsOutsideRegion = 0; + + if (Header != &Header->getParent()->getEntryBlock()) { + PHINode *PN = dyn_cast<PHINode>(Header->begin()); + if (!PN) return; // No PHI nodes. + + // If the header node contains any PHI nodes, check to see if there is more + // than one entry from outside the region. If so, we need to sever the + // header block into two. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (Blocks.count(PN->getIncomingBlock(i))) + ++NumPredsFromRegion; + else + ++NumPredsOutsideRegion; + + // If there is one (or fewer) predecessor from outside the region, we don't + // need to do anything special. + if (NumPredsOutsideRegion <= 1) return; + } + + // Otherwise, we need to split the header block into two pieces: one + // containing PHI nodes merging values from outside of the region, and a + // second that contains all of the code for the block and merges back any + // incoming values from inside of the region. + BasicBlock::iterator AfterPHIs = Header->getFirstNonPHI(); + BasicBlock *NewBB = Header->splitBasicBlock(AfterPHIs, + Header->getName()+".ce"); + + // We only want to code extract the second block now, and it becomes the new + // header of the region. + BasicBlock *OldPred = Header; + Blocks.remove(OldPred); + Blocks.insert(NewBB); + Header = NewBB; + + // Okay, update dominator sets. The blocks that dominate the new one are the + // blocks that dominate TIBB plus the new block itself. + if (DT) + DT->splitBlock(NewBB); + + // Okay, now we need to adjust the PHI nodes and any branches from within the + // region to go to the new header block instead of the old header block. + if (NumPredsFromRegion) { + PHINode *PN = cast<PHINode>(OldPred->begin()); + // Loop over all of the predecessors of OldPred that are in the region, + // changing them to branch to NewBB instead. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (Blocks.count(PN->getIncomingBlock(i))) { + TerminatorInst *TI = PN->getIncomingBlock(i)->getTerminator(); + TI->replaceUsesOfWith(OldPred, NewBB); + } + + // Okay, everything within the region is now branching to the right block, we + // just have to update the PHI nodes now, inserting PHI nodes into NewBB. + for (AfterPHIs = OldPred->begin(); isa<PHINode>(AfterPHIs); ++AfterPHIs) { + PHINode *PN = cast<PHINode>(AfterPHIs); + // Create a new PHI node in the new region, which has an incoming value + // from OldPred of PN. + PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion, + PN->getName()+".ce", NewBB->begin()); + NewPN->addIncoming(PN, OldPred); + + // Loop over all of the incoming value in PN, moving them to NewPN if they + // are from the extracted region. + for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) { + if (Blocks.count(PN->getIncomingBlock(i))) { + NewPN->addIncoming(PN->getIncomingValue(i), PN->getIncomingBlock(i)); + PN->removeIncomingValue(i); + --i; + } + } + } + } +} + +void CodeExtractor::splitReturnBlocks() { + for (SetVector<BasicBlock *>::iterator I = Blocks.begin(), E = Blocks.end(); + I != E; ++I) + if (ReturnInst *RI = dyn_cast<ReturnInst>((*I)->getTerminator())) { + BasicBlock *New = (*I)->splitBasicBlock(RI, (*I)->getName()+".ret"); + if (DT) { + // Old dominates New. New node dominates all other nodes dominated + // by Old. + DomTreeNode *OldNode = DT->getNode(*I); + SmallVector<DomTreeNode*, 8> Children; + for (DomTreeNode::iterator DI = OldNode->begin(), DE = OldNode->end(); + DI != DE; ++DI) + Children.push_back(*DI); + + DomTreeNode *NewNode = DT->addNewBlock(New, *I); + + for (SmallVectorImpl<DomTreeNode *>::iterator I = Children.begin(), + E = Children.end(); I != E; ++I) + DT->changeImmediateDominator(*I, NewNode); + } + } +} + +/// constructFunction - make a function based on inputs and outputs, as follows: +/// f(in0, ..., inN, out0, ..., outN) +/// +Function *CodeExtractor::constructFunction(const ValueSet &inputs, + const ValueSet &outputs, + BasicBlock *header, + BasicBlock *newRootNode, + BasicBlock *newHeader, + Function *oldFunction, + Module *M) { + DEBUG(dbgs() << "inputs: " << inputs.size() << "\n"); + DEBUG(dbgs() << "outputs: " << outputs.size() << "\n"); + + // This function returns unsigned, outputs will go back by reference. + switch (NumExitBlocks) { + case 0: + case 1: RetTy = Type::getVoidTy(header->getContext()); break; + case 2: RetTy = Type::getInt1Ty(header->getContext()); break; + default: RetTy = Type::getInt16Ty(header->getContext()); break; + } + + std::vector<Type*> paramTy; + + // Add the types of the input values to the function's argument list + for (ValueSet::const_iterator i = inputs.begin(), e = inputs.end(); + i != e; ++i) { + const Value *value = *i; + DEBUG(dbgs() << "value used in func: " << *value << "\n"); + paramTy.push_back(value->getType()); + } + + // Add the types of the output values to the function's argument list. + for (ValueSet::const_iterator I = outputs.begin(), E = outputs.end(); + I != E; ++I) { + DEBUG(dbgs() << "instr used in func: " << **I << "\n"); + if (AggregateArgs) + paramTy.push_back((*I)->getType()); + else + paramTy.push_back(PointerType::getUnqual((*I)->getType())); + } + + DEBUG(dbgs() << "Function type: " << *RetTy << " f("); + for (std::vector<Type*>::iterator i = paramTy.begin(), + e = paramTy.end(); i != e; ++i) + DEBUG(dbgs() << **i << ", "); + DEBUG(dbgs() << ")\n"); + + if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { + PointerType *StructPtr = + PointerType::getUnqual(StructType::get(M->getContext(), paramTy)); + paramTy.clear(); + paramTy.push_back(StructPtr); + } + FunctionType *funcType = + FunctionType::get(RetTy, paramTy, false); + + // Create the new function + Function *newFunction = Function::Create(funcType, + GlobalValue::InternalLinkage, + oldFunction->getName() + "_" + + header->getName(), M); + // If the old function is no-throw, so is the new one. + if (oldFunction->doesNotThrow()) + newFunction->setDoesNotThrow(); + + newFunction->getBasicBlockList().push_back(newRootNode); + + // Create an iterator to name all of the arguments we inserted. + Function::arg_iterator AI = newFunction->arg_begin(); + + // Rewrite all users of the inputs in the extracted region to use the + // arguments (or appropriate addressing into struct) instead. + for (unsigned i = 0, e = inputs.size(); i != e; ++i) { + Value *RewriteVal; + if (AggregateArgs) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext())); + Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i); + TerminatorInst *TI = newFunction->begin()->getTerminator(); + GetElementPtrInst *GEP = + GetElementPtrInst::Create(AI, Idx, "gep_" + inputs[i]->getName(), TI); + RewriteVal = new LoadInst(GEP, "loadgep_" + inputs[i]->getName(), TI); + } else + RewriteVal = AI++; + + std::vector<User*> Users(inputs[i]->use_begin(), inputs[i]->use_end()); + for (std::vector<User*>::iterator use = Users.begin(), useE = Users.end(); + use != useE; ++use) + if (Instruction* inst = dyn_cast<Instruction>(*use)) + if (Blocks.count(inst->getParent())) + inst->replaceUsesOfWith(inputs[i], RewriteVal); + } + + // Set names for input and output arguments. + if (!AggregateArgs) { + AI = newFunction->arg_begin(); + for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++AI) + AI->setName(inputs[i]->getName()); + for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++AI) + AI->setName(outputs[i]->getName()+".out"); + } + + // Rewrite branches to basic blocks outside of the loop to new dummy blocks + // within the new function. This must be done before we lose track of which + // blocks were originally in the code region. + std::vector<User*> Users(header->use_begin(), header->use_end()); + for (unsigned i = 0, e = Users.size(); i != e; ++i) + // The BasicBlock which contains the branch is not in the region + // modify the branch target to a new block + if (TerminatorInst *TI = dyn_cast<TerminatorInst>(Users[i])) + if (!Blocks.count(TI->getParent()) && + TI->getParent()->getParent() == oldFunction) + TI->replaceUsesOfWith(header, newHeader); + + return newFunction; +} + +/// FindPhiPredForUseInBlock - Given a value and a basic block, find a PHI +/// that uses the value within the basic block, and return the predecessor +/// block associated with that use, or return 0 if none is found. +static BasicBlock* FindPhiPredForUseInBlock(Value* Used, BasicBlock* BB) { + for (Value::use_iterator UI = Used->use_begin(), + UE = Used->use_end(); UI != UE; ++UI) { + PHINode *P = dyn_cast<PHINode>(*UI); + if (P && P->getParent() == BB) + return P->getIncomingBlock(UI); + } + + return 0; +} + +/// emitCallAndSwitchStatement - This method sets up the caller side by adding +/// the call instruction, splitting any PHI nodes in the header block as +/// necessary. +void CodeExtractor:: +emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, + ValueSet &inputs, ValueSet &outputs) { + // Emit a call to the new function, passing in: *pointer to struct (if + // aggregating parameters), or plan inputs and allocated memory for outputs + std::vector<Value*> params, StructValues, ReloadOutputs, Reloads; + + LLVMContext &Context = newFunction->getContext(); + + // Add inputs as params, or to be filled into the struct + for (ValueSet::iterator i = inputs.begin(), e = inputs.end(); i != e; ++i) + if (AggregateArgs) + StructValues.push_back(*i); + else + params.push_back(*i); + + // Create allocas for the outputs + for (ValueSet::iterator i = outputs.begin(), e = outputs.end(); i != e; ++i) { + if (AggregateArgs) { + StructValues.push_back(*i); + } else { + AllocaInst *alloca = + new AllocaInst((*i)->getType(), 0, (*i)->getName()+".loc", + codeReplacer->getParent()->begin()->begin()); + ReloadOutputs.push_back(alloca); + params.push_back(alloca); + } + } + + AllocaInst *Struct = 0; + if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { + std::vector<Type*> ArgTypes; + for (ValueSet::iterator v = StructValues.begin(), + ve = StructValues.end(); v != ve; ++v) + ArgTypes.push_back((*v)->getType()); + + // Allocate a struct at the beginning of this function + Type *StructArgTy = StructType::get(newFunction->getContext(), ArgTypes); + Struct = + new AllocaInst(StructArgTy, 0, "structArg", + codeReplacer->getParent()->begin()->begin()); + params.push_back(Struct); + + for (unsigned i = 0, e = inputs.size(); i != e; ++i) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); + GetElementPtrInst *GEP = + GetElementPtrInst::Create(Struct, Idx, + "gep_" + StructValues[i]->getName()); + codeReplacer->getInstList().push_back(GEP); + StoreInst *SI = new StoreInst(StructValues[i], GEP); + codeReplacer->getInstList().push_back(SI); + } + } + + // Emit the call to the function + CallInst *call = CallInst::Create(newFunction, params, + NumExitBlocks > 1 ? "targetBlock" : ""); + codeReplacer->getInstList().push_back(call); + + Function::arg_iterator OutputArgBegin = newFunction->arg_begin(); + unsigned FirstOut = inputs.size(); + if (!AggregateArgs) + std::advance(OutputArgBegin, inputs.size()); + + // Reload the outputs passed in by reference + for (unsigned i = 0, e = outputs.size(); i != e; ++i) { + Value *Output = 0; + if (AggregateArgs) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); + GetElementPtrInst *GEP + = GetElementPtrInst::Create(Struct, Idx, + "gep_reload_" + outputs[i]->getName()); + codeReplacer->getInstList().push_back(GEP); + Output = GEP; + } else { + Output = ReloadOutputs[i]; + } + LoadInst *load = new LoadInst(Output, outputs[i]->getName()+".reload"); + Reloads.push_back(load); + codeReplacer->getInstList().push_back(load); + std::vector<User*> Users(outputs[i]->use_begin(), outputs[i]->use_end()); + for (unsigned u = 0, e = Users.size(); u != e; ++u) { + Instruction *inst = cast<Instruction>(Users[u]); + if (!Blocks.count(inst->getParent())) + inst->replaceUsesOfWith(outputs[i], load); + } + } + + // Now we can emit a switch statement using the call as a value. + SwitchInst *TheSwitch = + SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)), + codeReplacer, 0, codeReplacer); + + // Since there may be multiple exits from the original region, make the new + // function return an unsigned, switch on that number. This loop iterates + // over all of the blocks in the extracted region, updating any terminator + // instructions in the to-be-extracted region that branch to blocks that are + // not in the region to be extracted. + std::map<BasicBlock*, BasicBlock*> ExitBlockMap; + + unsigned switchVal = 0; + for (SetVector<BasicBlock*>::const_iterator i = Blocks.begin(), + e = Blocks.end(); i != e; ++i) { + TerminatorInst *TI = (*i)->getTerminator(); + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + if (!Blocks.count(TI->getSuccessor(i))) { + BasicBlock *OldTarget = TI->getSuccessor(i); + // add a new basic block which returns the appropriate value + BasicBlock *&NewTarget = ExitBlockMap[OldTarget]; + if (!NewTarget) { + // If we don't already have an exit stub for this non-extracted + // destination, create one now! + NewTarget = BasicBlock::Create(Context, + OldTarget->getName() + ".exitStub", + newFunction); + unsigned SuccNum = switchVal++; + + Value *brVal = 0; + switch (NumExitBlocks) { + case 0: + case 1: break; // No value needed. + case 2: // Conditional branch, return a bool + brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum); + break; + default: + brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum); + break; + } + + ReturnInst *NTRet = ReturnInst::Create(Context, brVal, NewTarget); + + // Update the switch instruction. + TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context), + SuccNum), + OldTarget); + + // Restore values just before we exit + Function::arg_iterator OAI = OutputArgBegin; + for (unsigned out = 0, e = outputs.size(); out != e; ++out) { + // For an invoke, the normal destination is the only one that is + // dominated by the result of the invocation + BasicBlock *DefBlock = cast<Instruction>(outputs[out])->getParent(); + + bool DominatesDef = true; + + if (InvokeInst *Invoke = dyn_cast<InvokeInst>(outputs[out])) { + DefBlock = Invoke->getNormalDest(); + + // Make sure we are looking at the original successor block, not + // at a newly inserted exit block, which won't be in the dominator + // info. + for (std::map<BasicBlock*, BasicBlock*>::iterator I = + ExitBlockMap.begin(), E = ExitBlockMap.end(); I != E; ++I) + if (DefBlock == I->second) { + DefBlock = I->first; + break; + } + + // In the extract block case, if the block we are extracting ends + // with an invoke instruction, make sure that we don't emit a + // store of the invoke value for the unwind block. + if (!DT && DefBlock != OldTarget) + DominatesDef = false; + } + + if (DT) { + DominatesDef = DT->dominates(DefBlock, OldTarget); + + // If the output value is used by a phi in the target block, + // then we need to test for dominance of the phi's predecessor + // instead. Unfortunately, this a little complicated since we + // have already rewritten uses of the value to uses of the reload. + BasicBlock* pred = FindPhiPredForUseInBlock(Reloads[out], + OldTarget); + if (pred && DT && DT->dominates(DefBlock, pred)) + DominatesDef = true; + } + + if (DominatesDef) { + if (AggregateArgs) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), + FirstOut+out); + GetElementPtrInst *GEP = + GetElementPtrInst::Create(OAI, Idx, + "gep_" + outputs[out]->getName(), + NTRet); + new StoreInst(outputs[out], GEP, NTRet); + } else { + new StoreInst(outputs[out], OAI, NTRet); + } + } + // Advance output iterator even if we don't emit a store + if (!AggregateArgs) ++OAI; + } + } + + // rewrite the original branch instruction with this new target + TI->setSuccessor(i, NewTarget); + } + } + + // Now that we've done the deed, simplify the switch instruction. + Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType(); + switch (NumExitBlocks) { + case 0: + // There are no successors (the block containing the switch itself), which + // means that previously this was the last part of the function, and hence + // this should be rewritten as a `ret' + + // Check if the function should return a value + if (OldFnRetTy->isVoidTy()) { + ReturnInst::Create(Context, 0, TheSwitch); // Return void + } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) { + // return what we have + ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch); + } else { + // Otherwise we must have code extracted an unwind or something, just + // return whatever we want. + ReturnInst::Create(Context, + Constant::getNullValue(OldFnRetTy), TheSwitch); + } + + TheSwitch->eraseFromParent(); + break; + case 1: + // Only a single destination, change the switch into an unconditional + // branch. + BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch); + TheSwitch->eraseFromParent(); + break; + case 2: + BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2), + call, TheSwitch); + TheSwitch->eraseFromParent(); + break; + default: + // Otherwise, make the default destination of the switch instruction be one + // of the other successors. + TheSwitch->setCondition(call); + TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks)); + // Remove redundant case + TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks-1)); + break; + } +} + +void CodeExtractor::moveCodeToFunction(Function *newFunction) { + Function *oldFunc = (*Blocks.begin())->getParent(); + Function::BasicBlockListType &oldBlocks = oldFunc->getBasicBlockList(); + Function::BasicBlockListType &newBlocks = newFunction->getBasicBlockList(); + + for (SetVector<BasicBlock*>::const_iterator i = Blocks.begin(), + e = Blocks.end(); i != e; ++i) { + // Delete the basic block from the old function, and the list of blocks + oldBlocks.remove(*i); + + // Insert this basic block into the new function + newBlocks.push_back(*i); + } +} + +Function *CodeExtractor::extractCodeRegion() { + if (!isEligible()) + return 0; + + ValueSet inputs, outputs; + + // Assumption: this is a single-entry code region, and the header is the first + // block in the region. + BasicBlock *header = *Blocks.begin(); + + // If we have to split PHI nodes or the entry block, do so now. + severSplitPHINodes(header); + + // If we have any return instructions in the region, split those blocks so + // that the return is not in the region. + splitReturnBlocks(); + + Function *oldFunction = header->getParent(); + + // This takes place of the original loop + BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), + "codeRepl", oldFunction, + header); + + // The new function needs a root node because other nodes can branch to the + // head of the region, but the entry node of a function cannot have preds. + BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(), + "newFuncRoot"); + newFuncRoot->getInstList().push_back(BranchInst::Create(header)); + + // Find inputs to, outputs from the code region. + findInputsOutputs(inputs, outputs); + + SmallPtrSet<BasicBlock *, 1> ExitBlocks; + for (SetVector<BasicBlock *>::iterator I = Blocks.begin(), E = Blocks.end(); + I != E; ++I) + for (succ_iterator SI = succ_begin(*I), SE = succ_end(*I); SI != SE; ++SI) + if (!Blocks.count(*SI)) + ExitBlocks.insert(*SI); + NumExitBlocks = ExitBlocks.size(); + + // Construct new function based on inputs/outputs & add allocas for all defs. + Function *newFunction = constructFunction(inputs, outputs, header, + newFuncRoot, + codeReplacer, oldFunction, + oldFunction->getParent()); + + emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs); + + moveCodeToFunction(newFunction); + + // Loop over all of the PHI nodes in the header block, and change any + // references to the old incoming edge to be the new incoming edge. + for (BasicBlock::iterator I = header->begin(); isa<PHINode>(I); ++I) { + PHINode *PN = cast<PHINode>(I); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (!Blocks.count(PN->getIncomingBlock(i))) + PN->setIncomingBlock(i, newFuncRoot); + } + + // Look at all successors of the codeReplacer block. If any of these blocks + // had PHI nodes in them, we need to update the "from" block to be the code + // replacer, not the original block in the extracted region. + std::vector<BasicBlock*> Succs(succ_begin(codeReplacer), + succ_end(codeReplacer)); + for (unsigned i = 0, e = Succs.size(); i != e; ++i) + for (BasicBlock::iterator I = Succs[i]->begin(); isa<PHINode>(I); ++I) { + PHINode *PN = cast<PHINode>(I); + std::set<BasicBlock*> ProcessedPreds; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (Blocks.count(PN->getIncomingBlock(i))) { + if (ProcessedPreds.insert(PN->getIncomingBlock(i)).second) + PN->setIncomingBlock(i, codeReplacer); + else { + // There were multiple entries in the PHI for this block, now there + // is only one, so remove the duplicated entries. + PN->removeIncomingValue(i, false); + --i; --e; + } + } + } + + //cerr << "NEW FUNCTION: " << *newFunction; + // verifyFunction(*newFunction); + + // cerr << "OLD FUNCTION: " << *oldFunction; + // verifyFunction(*oldFunction); + + DEBUG(if (verifyFunction(*newFunction)) + report_fatal_error("verifyFunction failed!")); + return newFunction; +} diff --git a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp new file mode 100644 index 000000000000..0723b3534c2b --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp @@ -0,0 +1,149 @@ +//===- DemoteRegToStack.cpp - Move a virtual register to the stack --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Type.h" +using namespace llvm; + +/// DemoteRegToStack - This function takes a virtual register computed by an +/// Instruction and replaces it with a slot in the stack frame, allocated via +/// alloca. This allows the CFG to be changed around without fear of +/// invalidating the SSA information for the value. It returns the pointer to +/// the alloca inserted to create a stack slot for I. +AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, + Instruction *AllocaPoint) { + if (I.use_empty()) { + I.eraseFromParent(); + return 0; + } + + // Create a stack slot to hold the value. + AllocaInst *Slot; + if (AllocaPoint) { + Slot = new AllocaInst(I.getType(), 0, + I.getName()+".reg2mem", AllocaPoint); + } else { + Function *F = I.getParent()->getParent(); + Slot = new AllocaInst(I.getType(), 0, I.getName()+".reg2mem", + F->getEntryBlock().begin()); + } + + // Change all of the users of the instruction to read from the stack slot. + while (!I.use_empty()) { + Instruction *U = cast<Instruction>(I.use_back()); + if (PHINode *PN = dyn_cast<PHINode>(U)) { + // If this is a PHI node, we can't insert a load of the value before the + // use. Instead insert the load in the predecessor block corresponding + // to the incoming value. + // + // Note that if there are multiple edges from a basic block to this PHI + // node that we cannot have multiple loads. The problem is that the + // resulting PHI node will have multiple values (from each load) coming in + // from the same block, which is illegal SSA form. For this reason, we + // keep track of and reuse loads we insert. + DenseMap<BasicBlock*, Value*> Loads; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingValue(i) == &I) { + Value *&V = Loads[PN->getIncomingBlock(i)]; + if (V == 0) { + // Insert the load into the predecessor block + V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads, + PN->getIncomingBlock(i)->getTerminator()); + } + PN->setIncomingValue(i, V); + } + + } else { + // If this is a normal instruction, just insert a load. + Value *V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads, U); + U->replaceUsesOfWith(&I, V); + } + } + + + // Insert stores of the computed value into the stack slot. We have to be + // careful if I is an invoke instruction, because we can't insert the store + // AFTER the terminator instruction. + BasicBlock::iterator InsertPt; + if (!isa<TerminatorInst>(I)) { + InsertPt = &I; + ++InsertPt; + } else { + InvokeInst &II = cast<InvokeInst>(I); + if (II.getNormalDest()->getSinglePredecessor()) + InsertPt = II.getNormalDest()->getFirstInsertionPt(); + else { + // We cannot demote invoke instructions to the stack if their normal edge + // is critical. Therefore, split the critical edge and insert the store + // in the newly created basic block. + unsigned SuccNum = GetSuccessorNumber(I.getParent(), II.getNormalDest()); + TerminatorInst *TI = &cast<TerminatorInst>(I); + assert (isCriticalEdge(TI, SuccNum) && + "Expected a critical edge!"); + BasicBlock *BB = SplitCriticalEdge(TI, SuccNum); + assert (BB && "Unable to split critical edge."); + InsertPt = BB->getFirstInsertionPt(); + } + } + + for (; isa<PHINode>(InsertPt) || isa<LandingPadInst>(InsertPt); ++InsertPt) + /* empty */; // Don't insert before PHI nodes or landingpad instrs. + + new StoreInst(&I, Slot, InsertPt); + return Slot; +} + +/// DemotePHIToStack - This function takes a virtual register computed by a PHI +/// node and replaces it with a slot in the stack frame allocated via alloca. +/// The PHI node is deleted. It returns the pointer to the alloca inserted. +AllocaInst *llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) { + if (P->use_empty()) { + P->eraseFromParent(); + return 0; + } + + // Create a stack slot to hold the value. + AllocaInst *Slot; + if (AllocaPoint) { + Slot = new AllocaInst(P->getType(), 0, + P->getName()+".reg2mem", AllocaPoint); + } else { + Function *F = P->getParent()->getParent(); + Slot = new AllocaInst(P->getType(), 0, P->getName()+".reg2mem", + F->getEntryBlock().begin()); + } + + // Iterate over each operand inserting a store in each predecessor. + for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) { + if (InvokeInst *II = dyn_cast<InvokeInst>(P->getIncomingValue(i))) { + assert(II->getParent() != P->getIncomingBlock(i) && + "Invoke edge not supported yet"); (void)II; + } + new StoreInst(P->getIncomingValue(i), Slot, + P->getIncomingBlock(i)->getTerminator()); + } + + // Insert a load in place of the PHI and replace all uses. + BasicBlock::iterator InsertPt = P; + + for (; isa<PHINode>(InsertPt) || isa<LandingPadInst>(InsertPt); ++InsertPt) + /* empty */; // Don't insert before PHI nodes or landingpad instrs. + + Value *V = new LoadInst(Slot, P->getName()+".reload", InsertPt); + P->replaceAllUsesWith(V); + + // Delete PHI. + P->eraseFromParent(); + return Slot; +} diff --git a/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp b/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp new file mode 100644 index 000000000000..1da226bfcbe1 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp @@ -0,0 +1,486 @@ +//===- FlatternCFG.cpp - Code to perform CFG flattening ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Reduce conditional branches in CFG. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "flattencfg" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +using namespace llvm; + +namespace { +class FlattenCFGOpt { + AliasAnalysis *AA; + /// \brief Use parallel-and or parallel-or to generate conditions for + /// conditional branches. + bool FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder, Pass *P = 0); + /// \brief If \param BB is the merge block of an if-region, attempt to merge + /// the if-region with an adjacent if-region upstream if two if-regions + /// contain identical instructions. + bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder, Pass *P = 0); + /// \brief Compare a pair of blocks: \p Block1 and \p Block2, which + /// are from two if-regions whose entry blocks are \p Head1 and \p + /// Head2. \returns true if \p Block1 and \p Block2 contain identical + /// instructions, and have no memory reference alias with \p Head2. + /// This is used as a legality check for merging if-regions. + bool CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2, + BasicBlock *Block1, BasicBlock *Block2); + +public: + FlattenCFGOpt(AliasAnalysis *AA) : AA(AA) {} + bool run(BasicBlock *BB); +}; +} + +/// If \param [in] BB has more than one predecessor that is a conditional +/// branch, attempt to use parallel and/or for the branch condition. \returns +/// true on success. +/// +/// Before: +/// ...... +/// %cmp10 = fcmp une float %tmp1, %tmp2 +/// br i1 %cmp1, label %if.then, label %lor.rhs +/// +/// lor.rhs: +/// ...... +/// %cmp11 = fcmp une float %tmp3, %tmp4 +/// br i1 %cmp11, label %if.then, label %ifend +/// +/// if.end: // the merge block +/// ...... +/// +/// if.then: // has two predecessors, both of them contains conditional branch. +/// ...... +/// br label %if.end; +/// +/// After: +/// ...... +/// %cmp10 = fcmp une float %tmp1, %tmp2 +/// ...... +/// %cmp11 = fcmp une float %tmp3, %tmp4 +/// %cmp12 = or i1 %cmp10, %cmp11 // parallel-or mode. +/// br i1 %cmp12, label %if.then, label %ifend +/// +/// if.end: +/// ...... +/// +/// if.then: +/// ...... +/// br label %if.end; +/// +/// Current implementation handles two cases. +/// Case 1: \param BB is on the else-path. +/// +/// BB1 +/// / | +/// BB2 | +/// / \ | +/// BB3 \ | where, BB1, BB2 contain conditional branches. +/// \ | / BB3 contains unconditional branch. +/// \ | / BB4 corresponds to \param BB which is also the merge. +/// BB => BB4 +/// +/// +/// Corresponding source code: +/// +/// if (a == b && c == d) +/// statement; // BB3 +/// +/// Case 2: \param BB BB is on the then-path. +/// +/// BB1 +/// / | +/// | BB2 +/// \ / | where BB1, BB2 contain conditional branches. +/// BB => BB3 | BB3 contains unconditiona branch and corresponds +/// \ / to \param BB. BB4 is the merge. +/// BB4 +/// +/// Corresponding source code: +/// +/// if (a == b || c == d) +/// statement; // BB3 +/// +/// In both cases, \param BB is the common successor of conditional branches. +/// In Case 1, \param BB (BB4) has an unconditional branch (BB3) as +/// its predecessor. In Case 2, \param BB (BB3) only has conditional branches +/// as its predecessors. +/// +bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder, + Pass *P) { + PHINode *PHI = dyn_cast<PHINode>(BB->begin()); + if (PHI) + return false; // For simplicity, avoid cases containing PHI nodes. + + BasicBlock *LastCondBlock = NULL; + BasicBlock *FirstCondBlock = NULL; + BasicBlock *UnCondBlock = NULL; + int Idx = -1; + + // Check predecessors of \param BB. + SmallPtrSet<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB)); + for (SmallPtrSetIterator<BasicBlock *> PI = Preds.begin(), PE = Preds.end(); + PI != PE; ++PI) { + BasicBlock *Pred = *PI; + BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator()); + + // All predecessors should terminate with a branch. + if (!PBI) + return false; + + BasicBlock *PP = Pred->getSinglePredecessor(); + + if (PBI->isUnconditional()) { + // Case 1: Pred (BB3) is an unconditional block, it should + // have a single predecessor (BB2) that is also a predecessor + // of \param BB (BB4) and should not have address-taken. + // There should exist only one such unconditional + // branch among the predecessors. + if (UnCondBlock || !PP || (Preds.count(PP) == 0) || + Pred->hasAddressTaken()) + return false; + + UnCondBlock = Pred; + continue; + } + + // Only conditional branches are allowed beyond this point. + assert(PBI->isConditional()); + + // Condition's unique use should be the branch instruction. + Value *PC = PBI->getCondition(); + if (!PC || !PC->hasOneUse()) + return false; + + if (PP && Preds.count(PP)) { + // These are internal condition blocks to be merged from, e.g., + // BB2 in both cases. + // Should not be address-taken. + if (Pred->hasAddressTaken()) + return false; + + // Instructions in the internal condition blocks should be safe + // to hoist up. + for (BasicBlock::iterator BI = Pred->begin(), BE = PBI; BI != BE;) { + Instruction *CI = BI++; + if (isa<PHINode>(CI) || !isSafeToSpeculativelyExecute(CI)) + return false; + } + } else { + // This is the condition block to be merged into, e.g. BB1 in + // both cases. + if (FirstCondBlock) + return false; + FirstCondBlock = Pred; + } + + // Find whether BB is uniformly on the true (or false) path + // for all of its predecessors. + BasicBlock *PS1 = PBI->getSuccessor(0); + BasicBlock *PS2 = PBI->getSuccessor(1); + BasicBlock *PS = (PS1 == BB) ? PS2 : PS1; + int CIdx = (PS1 == BB) ? 0 : 1; + + if (Idx == -1) + Idx = CIdx; + else if (CIdx != Idx) + return false; + + // PS is the successor which is not BB. Check successors to identify + // the last conditional branch. + if (Preds.count(PS) == 0) { + // Case 2. + LastCondBlock = Pred; + } else { + // Case 1 + BranchInst *BPS = dyn_cast<BranchInst>(PS->getTerminator()); + if (BPS && BPS->isUnconditional()) { + // Case 1: PS(BB3) should be an unconditional branch. + LastCondBlock = Pred; + } + } + } + + if (!FirstCondBlock || !LastCondBlock || (FirstCondBlock == LastCondBlock)) + return false; + + TerminatorInst *TBB = LastCondBlock->getTerminator(); + BasicBlock *PS1 = TBB->getSuccessor(0); + BasicBlock *PS2 = TBB->getSuccessor(1); + BranchInst *PBI1 = dyn_cast<BranchInst>(PS1->getTerminator()); + BranchInst *PBI2 = dyn_cast<BranchInst>(PS2->getTerminator()); + + // If PS1 does not jump into PS2, but PS2 jumps into PS1, + // attempt branch inversion. + if (!PBI1 || !PBI1->isUnconditional() || + (PS1->getTerminator()->getSuccessor(0) != PS2)) { + // Check whether PS2 jumps into PS1. + if (!PBI2 || !PBI2->isUnconditional() || + (PS2->getTerminator()->getSuccessor(0) != PS1)) + return false; + + // Do branch inversion. + BasicBlock *CurrBlock = LastCondBlock; + bool EverChanged = false; + while (1) { + BranchInst *BI = dyn_cast<BranchInst>(CurrBlock->getTerminator()); + CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition()); + CmpInst::Predicate Predicate = CI->getPredicate(); + // Cannonicalize icmp_ne -> icmp_eq, fcmp_one -> fcmp_oeq + if ((Predicate == CmpInst::ICMP_NE) || (Predicate == CmpInst::FCMP_ONE)) { + CI->setPredicate(ICmpInst::getInversePredicate(Predicate)); + BI->swapSuccessors(); + EverChanged = true; + } + if (CurrBlock == FirstCondBlock) + break; + CurrBlock = CurrBlock->getSinglePredecessor(); + } + return EverChanged; + } + + // PS1 must have a conditional branch. + if (!PBI1 || !PBI1->isUnconditional()) + return false; + + // PS2 should not contain PHI node. + PHI = dyn_cast<PHINode>(PS2->begin()); + if (PHI) + return false; + + // Do the transformation. + BasicBlock *CB; + BranchInst *PBI = dyn_cast<BranchInst>(FirstCondBlock->getTerminator()); + bool Iteration = true; + IRBuilder<>::InsertPointGuard Guard(Builder); + Value *PC = PBI->getCondition(); + + do { + CB = PBI->getSuccessor(1 - Idx); + // Delete the conditional branch. + FirstCondBlock->getInstList().pop_back(); + FirstCondBlock->getInstList() + .splice(FirstCondBlock->end(), CB->getInstList()); + PBI = cast<BranchInst>(FirstCondBlock->getTerminator()); + Value *CC = PBI->getCondition(); + // Merge conditions. + Builder.SetInsertPoint(PBI); + Value *NC; + if (Idx == 0) + // Case 2, use parallel or. + NC = Builder.CreateOr(PC, CC); + else + // Case 1, use parallel and. + NC = Builder.CreateAnd(PC, CC); + + PBI->replaceUsesOfWith(CC, NC); + PC = NC; + if (CB == LastCondBlock) + Iteration = false; + // Remove internal conditional branches. + CB->dropAllReferences(); + // make CB unreachable and let downstream to delete the block. + new UnreachableInst(CB->getContext(), CB); + } while (Iteration); + + DEBUG(dbgs() << "Use parallel and/or in:\n" << *FirstCondBlock); + return true; +} + +/// Compare blocks from two if-regions, where \param Head1 is the entry of the +/// 1st if-region. \param Head2 is the entry of the 2nd if-region. \param +/// Block1 is a block in the 1st if-region to compare. \param Block2 is a block +// in the 2nd if-region to compare. \returns true if \param Block1 and \param +/// Block2 have identical instructions and do not have memory reference alias +/// with \param Head2. +/// +bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2, + BasicBlock *Block1, + BasicBlock *Block2) { + TerminatorInst *PTI2 = Head2->getTerminator(); + Instruction *PBI2 = Head2->begin(); + + bool eq1 = (Block1 == Head1); + bool eq2 = (Block2 == Head2); + if (eq1 || eq2) { + // An empty then-path or else-path. + return (eq1 == eq2); + } + + // Check whether instructions in Block1 and Block2 are identical + // and do not alias with instructions in Head2. + BasicBlock::iterator iter1 = Block1->begin(); + BasicBlock::iterator end1 = Block1->getTerminator(); + BasicBlock::iterator iter2 = Block2->begin(); + BasicBlock::iterator end2 = Block2->getTerminator(); + + while (1) { + if (iter1 == end1) { + if (iter2 != end2) + return false; + break; + } + + if (!iter1->isIdenticalTo(iter2)) + return false; + + // Illegal to remove instructions with side effects except + // non-volatile stores. + if (iter1->mayHaveSideEffects()) { + Instruction *CurI = &*iter1; + StoreInst *SI = dyn_cast<StoreInst>(CurI); + if (!SI || SI->isVolatile()) + return false; + } + + // For simplicity and speed, data dependency check can be + // avoided if read from memory doesn't exist. + if (iter1->mayReadFromMemory()) + return false; + + if (iter1->mayWriteToMemory()) { + for (BasicBlock::iterator BI = PBI2, BE = PTI2; BI != BE; ++BI) { + if (BI->mayReadFromMemory() || BI->mayWriteToMemory()) { + // Check alias with Head2. + if (!AA || AA->alias(iter1, BI)) + return false; + } + } + } + ++iter1; + ++iter2; + } + + return true; +} + +/// Check whether \param BB is the merge block of a if-region. If yes, check +/// whether there exists an adjacent if-region upstream, the two if-regions +/// contain identical instructions and can be legally merged. \returns true if +/// the two if-regions are merged. +/// +/// From: +/// if (a) +/// statement; +/// if (b) +/// statement; +/// +/// To: +/// if (a || b) +/// statement; +/// +bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder, + Pass *P) { + BasicBlock *IfTrue2, *IfFalse2; + Value *IfCond2 = GetIfCondition(BB, IfTrue2, IfFalse2); + Instruction *CInst2 = dyn_cast_or_null<Instruction>(IfCond2); + if (!CInst2) + return false; + + BasicBlock *SecondEntryBlock = CInst2->getParent(); + if (SecondEntryBlock->hasAddressTaken()) + return false; + + BasicBlock *IfTrue1, *IfFalse1; + Value *IfCond1 = GetIfCondition(SecondEntryBlock, IfTrue1, IfFalse1); + Instruction *CInst1 = dyn_cast_or_null<Instruction>(IfCond1); + if (!CInst1) + return false; + + BasicBlock *FirstEntryBlock = CInst1->getParent(); + + // Either then-path or else-path should be empty. + if ((IfTrue1 != FirstEntryBlock) && (IfFalse1 != FirstEntryBlock)) + return false; + if ((IfTrue2 != SecondEntryBlock) && (IfFalse2 != SecondEntryBlock)) + return false; + + TerminatorInst *PTI2 = SecondEntryBlock->getTerminator(); + Instruction *PBI2 = SecondEntryBlock->begin(); + + if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfTrue1, + IfTrue2)) + return false; + + if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfFalse1, + IfFalse2)) + return false; + + // Check whether \param SecondEntryBlock has side-effect and is safe to + // speculate. + for (BasicBlock::iterator BI = PBI2, BE = PTI2; BI != BE; ++BI) { + Instruction *CI = BI; + if (isa<PHINode>(CI) || CI->mayHaveSideEffects() || + !isSafeToSpeculativelyExecute(CI)) + return false; + } + + // Merge \param SecondEntryBlock into \param FirstEntryBlock. + FirstEntryBlock->getInstList().pop_back(); + FirstEntryBlock->getInstList() + .splice(FirstEntryBlock->end(), SecondEntryBlock->getInstList()); + BranchInst *PBI = dyn_cast<BranchInst>(FirstEntryBlock->getTerminator()); + Value *CC = PBI->getCondition(); + BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); + BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + Builder.SetInsertPoint(PBI); + Value *NC = Builder.CreateOr(CInst1, CC); + PBI->replaceUsesOfWith(CC, NC); + Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt); + + // Remove IfTrue1 + if (IfTrue1 != FirstEntryBlock) { + IfTrue1->dropAllReferences(); + IfTrue1->eraseFromParent(); + } + + // Remove IfFalse1 + if (IfFalse1 != FirstEntryBlock) { + IfFalse1->dropAllReferences(); + IfFalse1->eraseFromParent(); + } + + // Remove \param SecondEntryBlock + SecondEntryBlock->dropAllReferences(); + SecondEntryBlock->eraseFromParent(); + DEBUG(dbgs() << "If conditions merged into:\n" << *FirstEntryBlock); + return true; +} + +bool FlattenCFGOpt::run(BasicBlock *BB) { + bool Changed = false; + assert(BB && BB->getParent() && "Block not embedded in function!"); + assert(BB->getTerminator() && "Degenerate basic block encountered!"); + + IRBuilder<> Builder(BB); + + if (FlattenParallelAndOr(BB, Builder)) + return true; + + if (MergeIfRegion(BB, Builder)) + return true; + + return Changed; +} + +/// FlattenCFG - This function is used to flatten a CFG. For +/// example, it uses parallel-and and parallel-or mode to collapse +// if-conditions and merge if-regions with identical statements. +/// +bool llvm::FlattenCFG(BasicBlock *BB, AliasAnalysis *AA) { + return FlattenCFGOpt(AA).run(BB); +} diff --git a/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp b/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp new file mode 100644 index 000000000000..5f0a563ceec0 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp @@ -0,0 +1,183 @@ +//===-- GlobalStatus.cpp - Compute status info for globals -----------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Transforms/Utils/GlobalStatus.h" + +using namespace llvm; + +/// Return the stronger of the two ordering. If the two orderings are acquire +/// and release, then return AcquireRelease. +/// +static AtomicOrdering strongerOrdering(AtomicOrdering X, AtomicOrdering Y) { + if (X == Acquire && Y == Release) + return AcquireRelease; + if (Y == Acquire && X == Release) + return AcquireRelease; + return (AtomicOrdering)std::max(X, Y); +} + +/// It is safe to destroy a constant iff it is only used by constants itself. +/// Note that constants cannot be cyclic, so this test is pretty easy to +/// implement recursively. +/// +bool llvm::isSafeToDestroyConstant(const Constant *C) { + if (isa<GlobalValue>(C)) + return false; + + for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E; + ++UI) + if (const Constant *CU = dyn_cast<Constant>(*UI)) { + if (!isSafeToDestroyConstant(CU)) + return false; + } else + return false; + return true; +} + +static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS, + SmallPtrSet<const PHINode *, 16> &PhiUsers) { + for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; + ++UI) { + const User *U = *UI; + if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) { + GS.HasNonInstructionUser = true; + + // If the result of the constantexpr isn't pointer type, then we won't + // know to expect it in various places. Just reject early. + if (!isa<PointerType>(CE->getType())) + return true; + + if (analyzeGlobalAux(CE, GS, PhiUsers)) + return true; + } else if (const Instruction *I = dyn_cast<Instruction>(U)) { + if (!GS.HasMultipleAccessingFunctions) { + const Function *F = I->getParent()->getParent(); + if (GS.AccessingFunction == 0) + GS.AccessingFunction = F; + else if (GS.AccessingFunction != F) + GS.HasMultipleAccessingFunctions = true; + } + if (const LoadInst *LI = dyn_cast<LoadInst>(I)) { + GS.IsLoaded = true; + // Don't hack on volatile loads. + if (LI->isVolatile()) + return true; + GS.Ordering = strongerOrdering(GS.Ordering, LI->getOrdering()); + } else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) { + // Don't allow a store OF the address, only stores TO the address. + if (SI->getOperand(0) == V) + return true; + + // Don't hack on volatile stores. + if (SI->isVolatile()) + return true; + + GS.Ordering = strongerOrdering(GS.Ordering, SI->getOrdering()); + + // If this is a direct store to the global (i.e., the global is a scalar + // value, not an aggregate), keep more specific information about + // stores. + if (GS.StoredType != GlobalStatus::Stored) { + if (const GlobalVariable *GV = + dyn_cast<GlobalVariable>(SI->getOperand(1))) { + Value *StoredVal = SI->getOperand(0); + + if (Constant *C = dyn_cast<Constant>(StoredVal)) { + if (C->isThreadDependent()) { + // The stored value changes between threads; don't track it. + return true; + } + } + + if (StoredVal == GV->getInitializer()) { + if (GS.StoredType < GlobalStatus::InitializerStored) + GS.StoredType = GlobalStatus::InitializerStored; + } else if (isa<LoadInst>(StoredVal) && + cast<LoadInst>(StoredVal)->getOperand(0) == GV) { + if (GS.StoredType < GlobalStatus::InitializerStored) + GS.StoredType = GlobalStatus::InitializerStored; + } else if (GS.StoredType < GlobalStatus::StoredOnce) { + GS.StoredType = GlobalStatus::StoredOnce; + GS.StoredOnceValue = StoredVal; + } else if (GS.StoredType == GlobalStatus::StoredOnce && + GS.StoredOnceValue == StoredVal) { + // noop. + } else { + GS.StoredType = GlobalStatus::Stored; + } + } else { + GS.StoredType = GlobalStatus::Stored; + } + } + } else if (isa<BitCastInst>(I)) { + if (analyzeGlobalAux(I, GS, PhiUsers)) + return true; + } else if (isa<GetElementPtrInst>(I)) { + if (analyzeGlobalAux(I, GS, PhiUsers)) + return true; + } else if (isa<SelectInst>(I)) { + if (analyzeGlobalAux(I, GS, PhiUsers)) + return true; + } else if (const PHINode *PN = dyn_cast<PHINode>(I)) { + // PHI nodes we can check just like select or GEP instructions, but we + // have to be careful about infinite recursion. + if (PhiUsers.insert(PN)) // Not already visited. + if (analyzeGlobalAux(I, GS, PhiUsers)) + return true; + } else if (isa<CmpInst>(I)) { + GS.IsCompared = true; + } else if (const MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) { + if (MTI->isVolatile()) + return true; + if (MTI->getArgOperand(0) == V) + GS.StoredType = GlobalStatus::Stored; + if (MTI->getArgOperand(1) == V) + GS.IsLoaded = true; + } else if (const MemSetInst *MSI = dyn_cast<MemSetInst>(I)) { + assert(MSI->getArgOperand(0) == V && "Memset only takes one pointer!"); + if (MSI->isVolatile()) + return true; + GS.StoredType = GlobalStatus::Stored; + } else if (ImmutableCallSite C = I) { + if (!C.isCallee(UI)) + return true; + GS.IsLoaded = true; + } else { + return true; // Any other non-load instruction might take address! + } + } else if (const Constant *C = dyn_cast<Constant>(U)) { + GS.HasNonInstructionUser = true; + // We might have a dead and dangling constant hanging off of here. + if (!isSafeToDestroyConstant(C)) + return true; + } else { + GS.HasNonInstructionUser = true; + // Otherwise must be some other user. + return true; + } + } + + return false; +} + +bool GlobalStatus::analyzeGlobal(const Value *V, GlobalStatus &GS) { + SmallPtrSet<const PHINode *, 16> PhiUsers; + return analyzeGlobalAux(V, GS, PhiUsers); +} + +GlobalStatus::GlobalStatus() + : IsCompared(false), IsLoaded(false), StoredType(NotStored), + StoredOnceValue(0), AccessingFunction(0), + HasMultipleAccessingFunctions(false), HasNonInstructionUser(false), + Ordering(NotAtomic) {} diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp new file mode 100644 index 000000000000..d021bcef4027 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -0,0 +1,932 @@ +//===- InlineFunction.cpp - Code to perform function inlining -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements inlining of a function into a call site, resolving +// parameters and the return value as appropriate. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Transforms/Utils/Local.h" +using namespace llvm; + +bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI, + bool InsertLifetime) { + return InlineFunction(CallSite(CI), IFI, InsertLifetime); +} +bool llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI, + bool InsertLifetime) { + return InlineFunction(CallSite(II), IFI, InsertLifetime); +} + +namespace { + /// A class for recording information about inlining through an invoke. + class InvokeInliningInfo { + BasicBlock *OuterResumeDest; ///< Destination of the invoke's unwind. + BasicBlock *InnerResumeDest; ///< Destination for the callee's resume. + LandingPadInst *CallerLPad; ///< LandingPadInst associated with the invoke. + PHINode *InnerEHValuesPHI; ///< PHI for EH values from landingpad insts. + SmallVector<Value*, 8> UnwindDestPHIValues; + + public: + InvokeInliningInfo(InvokeInst *II) + : OuterResumeDest(II->getUnwindDest()), InnerResumeDest(0), + CallerLPad(0), InnerEHValuesPHI(0) { + // If there are PHI nodes in the unwind destination block, we need to keep + // track of which values came into them from the invoke before removing + // the edge from this block. + llvm::BasicBlock *InvokeBB = II->getParent(); + BasicBlock::iterator I = OuterResumeDest->begin(); + for (; isa<PHINode>(I); ++I) { + // Save the value to use for this edge. + PHINode *PHI = cast<PHINode>(I); + UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB)); + } + + CallerLPad = cast<LandingPadInst>(I); + } + + /// getOuterResumeDest - The outer unwind destination is the target of + /// unwind edges introduced for calls within the inlined function. + BasicBlock *getOuterResumeDest() const { + return OuterResumeDest; + } + + BasicBlock *getInnerResumeDest(); + + LandingPadInst *getLandingPadInst() const { return CallerLPad; } + + /// forwardResume - Forward the 'resume' instruction to the caller's landing + /// pad block. When the landing pad block has only one predecessor, this is + /// a simple branch. When there is more than one predecessor, we need to + /// split the landing pad block after the landingpad instruction and jump + /// to there. + void forwardResume(ResumeInst *RI, + SmallPtrSet<LandingPadInst*, 16> &InlinedLPads); + + /// addIncomingPHIValuesFor - Add incoming-PHI values to the unwind + /// destination block for the given basic block, using the values for the + /// original invoke's source block. + void addIncomingPHIValuesFor(BasicBlock *BB) const { + addIncomingPHIValuesForInto(BB, OuterResumeDest); + } + + void addIncomingPHIValuesForInto(BasicBlock *src, BasicBlock *dest) const { + BasicBlock::iterator I = dest->begin(); + for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) { + PHINode *phi = cast<PHINode>(I); + phi->addIncoming(UnwindDestPHIValues[i], src); + } + } + }; +} + +/// getInnerResumeDest - Get or create a target for the branch from ResumeInsts. +BasicBlock *InvokeInliningInfo::getInnerResumeDest() { + if (InnerResumeDest) return InnerResumeDest; + + // Split the landing pad. + BasicBlock::iterator SplitPoint = CallerLPad; ++SplitPoint; + InnerResumeDest = + OuterResumeDest->splitBasicBlock(SplitPoint, + OuterResumeDest->getName() + ".body"); + + // The number of incoming edges we expect to the inner landing pad. + const unsigned PHICapacity = 2; + + // Create corresponding new PHIs for all the PHIs in the outer landing pad. + BasicBlock::iterator InsertPoint = InnerResumeDest->begin(); + BasicBlock::iterator I = OuterResumeDest->begin(); + for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) { + PHINode *OuterPHI = cast<PHINode>(I); + PHINode *InnerPHI = PHINode::Create(OuterPHI->getType(), PHICapacity, + OuterPHI->getName() + ".lpad-body", + InsertPoint); + OuterPHI->replaceAllUsesWith(InnerPHI); + InnerPHI->addIncoming(OuterPHI, OuterResumeDest); + } + + // Create a PHI for the exception values. + InnerEHValuesPHI = PHINode::Create(CallerLPad->getType(), PHICapacity, + "eh.lpad-body", InsertPoint); + CallerLPad->replaceAllUsesWith(InnerEHValuesPHI); + InnerEHValuesPHI->addIncoming(CallerLPad, OuterResumeDest); + + // All done. + return InnerResumeDest; +} + +/// forwardResume - Forward the 'resume' instruction to the caller's landing pad +/// block. When the landing pad block has only one predecessor, this is a simple +/// branch. When there is more than one predecessor, we need to split the +/// landing pad block after the landingpad instruction and jump to there. +void InvokeInliningInfo::forwardResume(ResumeInst *RI, + SmallPtrSet<LandingPadInst*, 16> &InlinedLPads) { + BasicBlock *Dest = getInnerResumeDest(); + LandingPadInst *OuterLPad = getLandingPadInst(); + BasicBlock *Src = RI->getParent(); + + BranchInst::Create(Dest, Src); + + // Update the PHIs in the destination. They were inserted in an order which + // makes this work. + addIncomingPHIValuesForInto(Src, Dest); + + InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src); + RI->eraseFromParent(); + + // Append the clauses from the outer landing pad instruction into the inlined + // landing pad instructions. + for (SmallPtrSet<LandingPadInst*, 16>::iterator I = InlinedLPads.begin(), + E = InlinedLPads.end(); I != E; ++I) { + LandingPadInst *InlinedLPad = *I; + for (unsigned OuterIdx = 0, OuterNum = OuterLPad->getNumClauses(); + OuterIdx != OuterNum; ++OuterIdx) + InlinedLPad->addClause(OuterLPad->getClause(OuterIdx)); + } +} + +/// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into +/// an invoke, we have to turn all of the calls that can throw into +/// invokes. This function analyze BB to see if there are any calls, and if so, +/// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI +/// nodes in that block with the values specified in InvokeDestPHIValues. +/// +/// Returns true to indicate that the next block should be skipped. +static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, + InvokeInliningInfo &Invoke) { + LandingPadInst *LPI = Invoke.getLandingPadInst(); + + for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) { + Instruction *I = BBI++; + + if (LandingPadInst *L = dyn_cast<LandingPadInst>(I)) { + unsigned NumClauses = LPI->getNumClauses(); + L->reserveClauses(NumClauses); + for (unsigned i = 0; i != NumClauses; ++i) + L->addClause(LPI->getClause(i)); + } + + // We only need to check for function calls: inlined invoke + // instructions require no special handling. + CallInst *CI = dyn_cast<CallInst>(I); + + // If this call cannot unwind, don't convert it to an invoke. + // Inline asm calls cannot throw. + if (!CI || CI->doesNotThrow() || isa<InlineAsm>(CI->getCalledValue())) + continue; + + // Convert this function call into an invoke instruction. First, split the + // basic block. + BasicBlock *Split = BB->splitBasicBlock(CI, CI->getName()+".noexc"); + + // Delete the unconditional branch inserted by splitBasicBlock + BB->getInstList().pop_back(); + + // Create the new invoke instruction. + ImmutableCallSite CS(CI); + SmallVector<Value*, 8> InvokeArgs(CS.arg_begin(), CS.arg_end()); + InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), Split, + Invoke.getOuterResumeDest(), + InvokeArgs, CI->getName(), BB); + II->setCallingConv(CI->getCallingConv()); + II->setAttributes(CI->getAttributes()); + + // Make sure that anything using the call now uses the invoke! This also + // updates the CallGraph if present, because it uses a WeakVH. + CI->replaceAllUsesWith(II); + + // Delete the original call + Split->getInstList().pop_front(); + + // Update any PHI nodes in the exceptional block to indicate that there is + // now a new entry in them. + Invoke.addIncomingPHIValuesFor(BB); + return false; + } + + return false; +} + +/// HandleInlinedInvoke - If we inlined an invoke site, we need to convert calls +/// in the body of the inlined function into invokes. +/// +/// II is the invoke instruction being inlined. FirstNewBlock is the first +/// block of the inlined code (the last block is the end of the function), +/// and InlineCodeInfo is information about the code that got inlined. +static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, + ClonedCodeInfo &InlinedCodeInfo) { + BasicBlock *InvokeDest = II->getUnwindDest(); + + Function *Caller = FirstNewBlock->getParent(); + + // The inlined code is currently at the end of the function, scan from the + // start of the inlined code to its end, checking for stuff we need to + // rewrite. + InvokeInliningInfo Invoke(II); + + // Get all of the inlined landing pad instructions. + SmallPtrSet<LandingPadInst*, 16> InlinedLPads; + for (Function::iterator I = FirstNewBlock, E = Caller->end(); I != E; ++I) + if (InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) + InlinedLPads.insert(II->getLandingPadInst()); + + for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){ + if (InlinedCodeInfo.ContainsCalls) + if (HandleCallsInBlockInlinedThroughInvoke(BB, Invoke)) { + // Honor a request to skip the next block. + ++BB; + continue; + } + + // Forward any resumes that are remaining here. + if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) + Invoke.forwardResume(RI, InlinedLPads); + } + + // Now that everything is happy, we have one final detail. The PHI nodes in + // the exception destination block still have entries due to the original + // invoke instruction. Eliminate these entries (which might even delete the + // PHI node) now. + InvokeDest->removePredecessor(II->getParent()); +} + +/// UpdateCallGraphAfterInlining - Once we have cloned code over from a callee +/// into the caller, update the specified callgraph to reflect the changes we +/// made. Note that it's possible that not all code was copied over, so only +/// some edges of the callgraph may remain. +static void UpdateCallGraphAfterInlining(CallSite CS, + Function::iterator FirstNewBlock, + ValueToValueMapTy &VMap, + InlineFunctionInfo &IFI) { + CallGraph &CG = *IFI.CG; + const Function *Caller = CS.getInstruction()->getParent()->getParent(); + const Function *Callee = CS.getCalledFunction(); + CallGraphNode *CalleeNode = CG[Callee]; + CallGraphNode *CallerNode = CG[Caller]; + + // Since we inlined some uninlined call sites in the callee into the caller, + // add edges from the caller to all of the callees of the callee. + CallGraphNode::iterator I = CalleeNode->begin(), E = CalleeNode->end(); + + // Consider the case where CalleeNode == CallerNode. + CallGraphNode::CalledFunctionsVector CallCache; + if (CalleeNode == CallerNode) { + CallCache.assign(I, E); + I = CallCache.begin(); + E = CallCache.end(); + } + + for (; I != E; ++I) { + const Value *OrigCall = I->first; + + ValueToValueMapTy::iterator VMI = VMap.find(OrigCall); + // Only copy the edge if the call was inlined! + if (VMI == VMap.end() || VMI->second == 0) + continue; + + // If the call was inlined, but then constant folded, there is no edge to + // add. Check for this case. + Instruction *NewCall = dyn_cast<Instruction>(VMI->second); + if (NewCall == 0) continue; + + // Remember that this call site got inlined for the client of + // InlineFunction. + IFI.InlinedCalls.push_back(NewCall); + + // It's possible that inlining the callsite will cause it to go from an + // indirect to a direct call by resolving a function pointer. If this + // happens, set the callee of the new call site to a more precise + // destination. This can also happen if the call graph node of the caller + // was just unnecessarily imprecise. + if (I->second->getFunction() == 0) + if (Function *F = CallSite(NewCall).getCalledFunction()) { + // Indirect call site resolved to direct call. + CallerNode->addCalledFunction(CallSite(NewCall), CG[F]); + + continue; + } + + CallerNode->addCalledFunction(CallSite(NewCall), I->second); + } + + // Update the call graph by deleting the edge from Callee to Caller. We must + // do this after the loop above in case Caller and Callee are the same. + CallerNode->removeCallEdgeFor(CS); +} + +/// HandleByValArgument - When inlining a call site that has a byval argument, +/// we have to make the implicit memcpy explicit by adding it. +static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, + const Function *CalledFunc, + InlineFunctionInfo &IFI, + unsigned ByValAlignment) { + Type *AggTy = cast<PointerType>(Arg->getType())->getElementType(); + + // If the called function is readonly, then it could not mutate the caller's + // copy of the byval'd memory. In this case, it is safe to elide the copy and + // temporary. + if (CalledFunc->onlyReadsMemory()) { + // If the byval argument has a specified alignment that is greater than the + // passed in pointer, then we either have to round up the input pointer or + // give up on this transformation. + if (ByValAlignment <= 1) // 0 = unspecified, 1 = no particular alignment. + return Arg; + + // If the pointer is already known to be sufficiently aligned, or if we can + // round it up to a larger alignment, then we don't need a temporary. + if (getOrEnforceKnownAlignment(Arg, ByValAlignment, + IFI.TD) >= ByValAlignment) + return Arg; + + // Otherwise, we have to make a memcpy to get a safe alignment. This is bad + // for code quality, but rarely happens and is required for correctness. + } + + LLVMContext &Context = Arg->getContext(); + + Type *VoidPtrTy = Type::getInt8PtrTy(Context); + + // Create the alloca. If we have DataLayout, use nice alignment. + unsigned Align = 1; + if (IFI.TD) + Align = IFI.TD->getPrefTypeAlignment(AggTy); + + // If the byval had an alignment specified, we *must* use at least that + // alignment, as it is required by the byval argument (and uses of the + // pointer inside the callee). + Align = std::max(Align, ByValAlignment); + + Function *Caller = TheCall->getParent()->getParent(); + + Value *NewAlloca = new AllocaInst(AggTy, 0, Align, Arg->getName(), + &*Caller->begin()->begin()); + // Emit a memcpy. + Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)}; + Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(), + Intrinsic::memcpy, + Tys); + Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall); + Value *SrcCast = new BitCastInst(Arg, VoidPtrTy, "tmp", TheCall); + + Value *Size; + if (IFI.TD == 0) + Size = ConstantExpr::getSizeOf(AggTy); + else + Size = ConstantInt::get(Type::getInt64Ty(Context), + IFI.TD->getTypeStoreSize(AggTy)); + + // Always generate a memcpy of alignment 1 here because we don't know + // the alignment of the src pointer. Other optimizations can infer + // better alignment. + Value *CallArgs[] = { + DestCast, SrcCast, Size, + ConstantInt::get(Type::getInt32Ty(Context), 1), + ConstantInt::getFalse(Context) // isVolatile + }; + IRBuilder<>(TheCall).CreateCall(MemCpyFn, CallArgs); + + // Uses of the argument in the function should use our new alloca + // instead. + return NewAlloca; +} + +// isUsedByLifetimeMarker - Check whether this Value is used by a lifetime +// intrinsic. +static bool isUsedByLifetimeMarker(Value *V) { + for (Value::use_iterator UI = V->use_begin(), UE = V->use_end(); UI != UE; + ++UI) { + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(*UI)) { + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + return true; + } + } + } + return false; +} + +// hasLifetimeMarkers - Check whether the given alloca already has +// lifetime.start or lifetime.end intrinsics. +static bool hasLifetimeMarkers(AllocaInst *AI) { + Type *Int8PtrTy = Type::getInt8PtrTy(AI->getType()->getContext()); + if (AI->getType() == Int8PtrTy) + return isUsedByLifetimeMarker(AI); + + // Do a scan to find all the casts to i8*. + for (Value::use_iterator I = AI->use_begin(), E = AI->use_end(); I != E; + ++I) { + if (I->getType() != Int8PtrTy) continue; + if (I->stripPointerCasts() != AI) continue; + if (isUsedByLifetimeMarker(*I)) + return true; + } + return false; +} + +/// updateInlinedAtInfo - Helper function used by fixupLineNumbers to +/// recursively update InlinedAtEntry of a DebugLoc. +static DebugLoc updateInlinedAtInfo(const DebugLoc &DL, + const DebugLoc &InlinedAtDL, + LLVMContext &Ctx) { + if (MDNode *IA = DL.getInlinedAt(Ctx)) { + DebugLoc NewInlinedAtDL + = updateInlinedAtInfo(DebugLoc::getFromDILocation(IA), InlinedAtDL, Ctx); + return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx), + NewInlinedAtDL.getAsMDNode(Ctx)); + } + + return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx), + InlinedAtDL.getAsMDNode(Ctx)); +} + +/// fixupLineNumbers - Update inlined instructions' line numbers to +/// to encode location where these instructions are inlined. +static void fixupLineNumbers(Function *Fn, Function::iterator FI, + Instruction *TheCall) { + DebugLoc TheCallDL = TheCall->getDebugLoc(); + if (TheCallDL.isUnknown()) + return; + + for (; FI != Fn->end(); ++FI) { + for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); + BI != BE; ++BI) { + DebugLoc DL = BI->getDebugLoc(); + if (!DL.isUnknown()) { + BI->setDebugLoc(updateInlinedAtInfo(DL, TheCallDL, BI->getContext())); + if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(BI)) { + LLVMContext &Ctx = BI->getContext(); + MDNode *InlinedAt = BI->getDebugLoc().getInlinedAt(Ctx); + DVI->setOperand(2, createInlinedVariable(DVI->getVariable(), + InlinedAt, Ctx)); + } + } + } + } +} + +/// InlineFunction - This function inlines the called function into the basic +/// block of the caller. This returns false if it is not possible to inline +/// this call. The program is still in a well defined state if this occurs +/// though. +/// +/// Note that this only does one level of inlining. For example, if the +/// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now +/// exists in the instruction stream. Similarly this will inline a recursive +/// function by one level. +bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, + bool InsertLifetime) { + Instruction *TheCall = CS.getInstruction(); + assert(TheCall->getParent() && TheCall->getParent()->getParent() && + "Instruction not in function!"); + + // If IFI has any state in it, zap it before we fill it in. + IFI.reset(); + + const Function *CalledFunc = CS.getCalledFunction(); + if (CalledFunc == 0 || // Can't inline external function or indirect + CalledFunc->isDeclaration() || // call, or call to a vararg function! + CalledFunc->getFunctionType()->isVarArg()) return false; + + // If the call to the callee is not a tail call, we must clear the 'tail' + // flags on any calls that we inline. + bool MustClearTailCallFlags = + !(isa<CallInst>(TheCall) && cast<CallInst>(TheCall)->isTailCall()); + + // If the call to the callee cannot throw, set the 'nounwind' flag on any + // calls that we inline. + bool MarkNoUnwind = CS.doesNotThrow(); + + BasicBlock *OrigBB = TheCall->getParent(); + Function *Caller = OrigBB->getParent(); + + // GC poses two hazards to inlining, which only occur when the callee has GC: + // 1. If the caller has no GC, then the callee's GC must be propagated to the + // caller. + // 2. If the caller has a differing GC, it is invalid to inline. + if (CalledFunc->hasGC()) { + if (!Caller->hasGC()) + Caller->setGC(CalledFunc->getGC()); + else if (CalledFunc->getGC() != Caller->getGC()) + return false; + } + + // Get the personality function from the callee if it contains a landing pad. + Value *CalleePersonality = 0; + for (Function::const_iterator I = CalledFunc->begin(), E = CalledFunc->end(); + I != E; ++I) + if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) { + const BasicBlock *BB = II->getUnwindDest(); + const LandingPadInst *LP = BB->getLandingPadInst(); + CalleePersonality = LP->getPersonalityFn(); + break; + } + + // Find the personality function used by the landing pads of the caller. If it + // exists, then check to see that it matches the personality function used in + // the callee. + if (CalleePersonality) { + for (Function::const_iterator I = Caller->begin(), E = Caller->end(); + I != E; ++I) + if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) { + const BasicBlock *BB = II->getUnwindDest(); + const LandingPadInst *LP = BB->getLandingPadInst(); + + // If the personality functions match, then we can perform the + // inlining. Otherwise, we can't inline. + // TODO: This isn't 100% true. Some personality functions are proper + // supersets of others and can be used in place of the other. + if (LP->getPersonalityFn() != CalleePersonality) + return false; + + break; + } + } + + // Get an iterator to the last basic block in the function, which will have + // the new function inlined after it. + Function::iterator LastBlock = &Caller->back(); + + // Make sure to capture all of the return instructions from the cloned + // function. + SmallVector<ReturnInst*, 8> Returns; + ClonedCodeInfo InlinedFunctionInfo; + Function::iterator FirstNewBlock; + + { // Scope to destroy VMap after cloning. + ValueToValueMapTy VMap; + + assert(CalledFunc->arg_size() == CS.arg_size() && + "No varargs calls can be inlined!"); + + // Calculate the vector of arguments to pass into the function cloner, which + // matches up the formal to the actual argument values. + CallSite::arg_iterator AI = CS.arg_begin(); + unsigned ArgNo = 0; + for (Function::const_arg_iterator I = CalledFunc->arg_begin(), + E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) { + Value *ActualArg = *AI; + + // When byval arguments actually inlined, we need to make the copy implied + // by them explicit. However, we don't do this if the callee is readonly + // or readnone, because the copy would be unneeded: the callee doesn't + // modify the struct. + if (CS.isByValArgument(ArgNo)) { + ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI, + CalledFunc->getParamAlignment(ArgNo+1)); + + // Calls that we inline may use the new alloca, so we need to clear + // their 'tail' flags if HandleByValArgument introduced a new alloca and + // the callee has calls. + MustClearTailCallFlags |= ActualArg != *AI; + } + + VMap[I] = ActualArg; + } + + // We want the inliner to prune the code as it copies. We would LOVE to + // have no dead or constant instructions leftover after inlining occurs + // (which can happen, e.g., because an argument was constant), but we'll be + // happy with whatever the cloner can do. + CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, + /*ModuleLevelChanges=*/false, Returns, ".i", + &InlinedFunctionInfo, IFI.TD, TheCall); + + // Remember the first block that is newly cloned over. + FirstNewBlock = LastBlock; ++FirstNewBlock; + + // Update the callgraph if requested. + if (IFI.CG) + UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI); + + // Update inlined instructions' line number information. + fixupLineNumbers(Caller, FirstNewBlock, TheCall); + } + + // If there are any alloca instructions in the block that used to be the entry + // block for the callee, move them to the entry block of the caller. First + // calculate which instruction they should be inserted before. We insert the + // instructions at the end of the current alloca list. + { + BasicBlock::iterator InsertPoint = Caller->begin()->begin(); + for (BasicBlock::iterator I = FirstNewBlock->begin(), + E = FirstNewBlock->end(); I != E; ) { + AllocaInst *AI = dyn_cast<AllocaInst>(I++); + if (AI == 0) continue; + + // If the alloca is now dead, remove it. This often occurs due to code + // specialization. + if (AI->use_empty()) { + AI->eraseFromParent(); + continue; + } + + if (!isa<Constant>(AI->getArraySize())) + continue; + + // Keep track of the static allocas that we inline into the caller. + IFI.StaticAllocas.push_back(AI); + + // Scan for the block of allocas that we can move over, and move them + // all at once. + while (isa<AllocaInst>(I) && + isa<Constant>(cast<AllocaInst>(I)->getArraySize())) { + IFI.StaticAllocas.push_back(cast<AllocaInst>(I)); + ++I; + } + + // Transfer all of the allocas over in a block. Using splice means + // that the instructions aren't removed from the symbol table, then + // reinserted. + Caller->getEntryBlock().getInstList().splice(InsertPoint, + FirstNewBlock->getInstList(), + AI, I); + } + } + + // Leave lifetime markers for the static alloca's, scoping them to the + // function we just inlined. + if (InsertLifetime && !IFI.StaticAllocas.empty()) { + IRBuilder<> builder(FirstNewBlock->begin()); + for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) { + AllocaInst *AI = IFI.StaticAllocas[ai]; + + // If the alloca is already scoped to something smaller than the whole + // function then there's no need to add redundant, less accurate markers. + if (hasLifetimeMarkers(AI)) + continue; + + // Try to determine the size of the allocation. + ConstantInt *AllocaSize = 0; + if (ConstantInt *AIArraySize = + dyn_cast<ConstantInt>(AI->getArraySize())) { + if (IFI.TD) { + Type *AllocaType = AI->getAllocatedType(); + uint64_t AllocaTypeSize = IFI.TD->getTypeAllocSize(AllocaType); + uint64_t AllocaArraySize = AIArraySize->getLimitedValue(); + assert(AllocaArraySize > 0 && "array size of AllocaInst is zero"); + // Check that array size doesn't saturate uint64_t and doesn't + // overflow when it's multiplied by type size. + if (AllocaArraySize != ~0ULL && + UINT64_MAX / AllocaArraySize >= AllocaTypeSize) { + AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()), + AllocaArraySize * AllocaTypeSize); + } + } + } + + builder.CreateLifetimeStart(AI, AllocaSize); + for (unsigned ri = 0, re = Returns.size(); ri != re; ++ri) { + IRBuilder<> builder(Returns[ri]); + builder.CreateLifetimeEnd(AI, AllocaSize); + } + } + } + + // If the inlined code contained dynamic alloca instructions, wrap the inlined + // code with llvm.stacksave/llvm.stackrestore intrinsics. + if (InlinedFunctionInfo.ContainsDynamicAllocas) { + Module *M = Caller->getParent(); + // Get the two intrinsics we care about. + Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave); + Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore); + + // Insert the llvm.stacksave. + CallInst *SavedPtr = IRBuilder<>(FirstNewBlock, FirstNewBlock->begin()) + .CreateCall(StackSave, "savedstack"); + + // Insert a call to llvm.stackrestore before any return instructions in the + // inlined function. + for (unsigned i = 0, e = Returns.size(); i != e; ++i) { + IRBuilder<>(Returns[i]).CreateCall(StackRestore, SavedPtr); + } + } + + // If we are inlining tail call instruction through a call site that isn't + // marked 'tail', we must remove the tail marker for any calls in the inlined + // code. Also, calls inlined through a 'nounwind' call site should be marked + // 'nounwind'. + if (InlinedFunctionInfo.ContainsCalls && + (MustClearTailCallFlags || MarkNoUnwind)) { + for (Function::iterator BB = FirstNewBlock, E = Caller->end(); + BB != E; ++BB) + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (CallInst *CI = dyn_cast<CallInst>(I)) { + if (MustClearTailCallFlags) + CI->setTailCall(false); + if (MarkNoUnwind) + CI->setDoesNotThrow(); + } + } + + // If we are inlining for an invoke instruction, we must make sure to rewrite + // any call instructions into invoke instructions. + if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) + HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo); + + // If we cloned in _exactly one_ basic block, and if that block ends in a + // return instruction, we splice the body of the inlined callee directly into + // the calling basic block. + if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) { + // Move all of the instructions right before the call. + OrigBB->getInstList().splice(TheCall, FirstNewBlock->getInstList(), + FirstNewBlock->begin(), FirstNewBlock->end()); + // Remove the cloned basic block. + Caller->getBasicBlockList().pop_back(); + + // If the call site was an invoke instruction, add a branch to the normal + // destination. + if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { + BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall); + NewBr->setDebugLoc(Returns[0]->getDebugLoc()); + } + + // If the return instruction returned a value, replace uses of the call with + // uses of the returned value. + if (!TheCall->use_empty()) { + ReturnInst *R = Returns[0]; + if (TheCall == R->getReturnValue()) + TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); + else + TheCall->replaceAllUsesWith(R->getReturnValue()); + } + // Since we are now done with the Call/Invoke, we can delete it. + TheCall->eraseFromParent(); + + // Since we are now done with the return instruction, delete it also. + Returns[0]->eraseFromParent(); + + // We are now done with the inlining. + return true; + } + + // Otherwise, we have the normal case, of more than one block to inline or + // multiple return sites. + + // We want to clone the entire callee function into the hole between the + // "starter" and "ender" blocks. How we accomplish this depends on whether + // this is an invoke instruction or a call instruction. + BasicBlock *AfterCallBB; + BranchInst *CreatedBranchToNormalDest = NULL; + if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { + + // Add an unconditional branch to make this look like the CallInst case... + CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), TheCall); + + // Split the basic block. This guarantees that no PHI nodes will have to be + // updated due to new incoming edges, and make the invoke case more + // symmetric to the call case. + AfterCallBB = OrigBB->splitBasicBlock(CreatedBranchToNormalDest, + CalledFunc->getName()+".exit"); + + } else { // It's a call + // If this is a call instruction, we need to split the basic block that + // the call lives in. + // + AfterCallBB = OrigBB->splitBasicBlock(TheCall, + CalledFunc->getName()+".exit"); + } + + // Change the branch that used to go to AfterCallBB to branch to the first + // basic block of the inlined function. + // + TerminatorInst *Br = OrigBB->getTerminator(); + assert(Br && Br->getOpcode() == Instruction::Br && + "splitBasicBlock broken!"); + Br->setOperand(0, FirstNewBlock); + + + // Now that the function is correct, make it a little bit nicer. In + // particular, move the basic blocks inserted from the end of the function + // into the space made by splitting the source basic block. + Caller->getBasicBlockList().splice(AfterCallBB, Caller->getBasicBlockList(), + FirstNewBlock, Caller->end()); + + // Handle all of the return instructions that we just cloned in, and eliminate + // any users of the original call/invoke instruction. + Type *RTy = CalledFunc->getReturnType(); + + PHINode *PHI = 0; + if (Returns.size() > 1) { + // The PHI node should go at the front of the new basic block to merge all + // possible incoming values. + if (!TheCall->use_empty()) { + PHI = PHINode::Create(RTy, Returns.size(), TheCall->getName(), + AfterCallBB->begin()); + // Anything that used the result of the function call should now use the + // PHI node as their operand. + TheCall->replaceAllUsesWith(PHI); + } + + // Loop over all of the return instructions adding entries to the PHI node + // as appropriate. + if (PHI) { + for (unsigned i = 0, e = Returns.size(); i != e; ++i) { + ReturnInst *RI = Returns[i]; + assert(RI->getReturnValue()->getType() == PHI->getType() && + "Ret value not consistent in function!"); + PHI->addIncoming(RI->getReturnValue(), RI->getParent()); + } + } + + + // Add a branch to the merge points and remove return instructions. + DebugLoc Loc; + for (unsigned i = 0, e = Returns.size(); i != e; ++i) { + ReturnInst *RI = Returns[i]; + BranchInst* BI = BranchInst::Create(AfterCallBB, RI); + Loc = RI->getDebugLoc(); + BI->setDebugLoc(Loc); + RI->eraseFromParent(); + } + // We need to set the debug location to *somewhere* inside the + // inlined function. The line number may be nonsensical, but the + // instruction will at least be associated with the right + // function. + if (CreatedBranchToNormalDest) + CreatedBranchToNormalDest->setDebugLoc(Loc); + } else if (!Returns.empty()) { + // Otherwise, if there is exactly one return value, just replace anything + // using the return value of the call with the computed value. + if (!TheCall->use_empty()) { + if (TheCall == Returns[0]->getReturnValue()) + TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); + else + TheCall->replaceAllUsesWith(Returns[0]->getReturnValue()); + } + + // Update PHI nodes that use the ReturnBB to use the AfterCallBB. + BasicBlock *ReturnBB = Returns[0]->getParent(); + ReturnBB->replaceAllUsesWith(AfterCallBB); + + // Splice the code from the return block into the block that it will return + // to, which contains the code that was after the call. + AfterCallBB->getInstList().splice(AfterCallBB->begin(), + ReturnBB->getInstList()); + + if (CreatedBranchToNormalDest) + CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc()); + + // Delete the return instruction now and empty ReturnBB now. + Returns[0]->eraseFromParent(); + ReturnBB->eraseFromParent(); + } else if (!TheCall->use_empty()) { + // No returns, but something is using the return value of the call. Just + // nuke the result. + TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); + } + + // Since we are now done with the Call/Invoke, we can delete it. + TheCall->eraseFromParent(); + + // We should always be able to fold the entry block of the function into the + // single predecessor of the block... + assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!"); + BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0); + + // Splice the code entry block into calling block, right before the + // unconditional branch. + CalleeEntry->replaceAllUsesWith(OrigBB); // Update PHI nodes + OrigBB->getInstList().splice(Br, CalleeEntry->getInstList()); + + // Remove the unconditional branch. + OrigBB->getInstList().erase(Br); + + // Now we can remove the CalleeEntry block, which is now empty. + Caller->getBasicBlockList().erase(CalleeEntry); + + // If we inserted a phi node, check to see if it has a single value (e.g. all + // the entries are the same or undef). If so, remove the PHI so it doesn't + // block other optimizations. + if (PHI) { + if (Value *V = SimplifyInstruction(PHI, IFI.TD)) { + PHI->replaceAllUsesWith(V); + PHI->eraseFromParent(); + } + } + + return true; +} diff --git a/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp b/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp new file mode 100644 index 000000000000..a020bc7398f5 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp @@ -0,0 +1,64 @@ +//===- InstructionNamer.cpp - Give anonymous instructions names -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is a little utility pass that gives instructions names, this is mostly +// useful when diffing the effect of an optimization because deleting an +// unnamed instruction can change all other instruction numbering, making the +// diff very noisy. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Type.h" +#include "llvm/Pass.h" +using namespace llvm; + +namespace { + struct InstNamer : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + InstNamer() : FunctionPass(ID) { + initializeInstNamerPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &Info) const { + Info.setPreservesAll(); + } + + bool runOnFunction(Function &F) { + for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); + AI != AE; ++AI) + if (!AI->hasName() && !AI->getType()->isVoidTy()) + AI->setName("arg"); + + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + if (!BB->hasName()) + BB->setName("bb"); + + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (!I->hasName() && !I->getType()->isVoidTy()) + I->setName("tmp"); + } + return true; + } + }; + + char InstNamer::ID = 0; +} + +INITIALIZE_PASS(InstNamer, "instnamer", + "Assign names to anonymous instructions", false, false) +char &llvm::InstructionNamerID = InstNamer::ID; +//===----------------------------------------------------------------------===// +// +// InstructionNamer - Give any unnamed non-void instructions "tmp" names. +// +FunctionPass *llvm::createInstructionNamerPass() { + return new InstNamer(); +} diff --git a/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp b/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp new file mode 100644 index 000000000000..3cb8ded8506a --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp @@ -0,0 +1,524 @@ +//===-- IntegerDivision.cpp - Expand integer division ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains an implementation of 32bit scalar integer division for +// targets that don't have native support. It's largely derived from +// compiler-rt's implementation of __udivsi3, but hand-tuned to reduce the +// amount of control flow +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "integer-division" +#include "llvm/Transforms/Utils/IntegerDivision.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" + +using namespace llvm; + +/// Generate code to compute the remainder of two signed integers. Returns the +/// remainder, which will have the sign of the dividend. Builder's insert point +/// should be pointing where the caller wants code generated, e.g. at the srem +/// instruction. This will generate a urem in the process, and Builder's insert +/// point will be pointing at the uren (if present, i.e. not folded), ready to +/// be expanded if the user wishes +static Value *generateSignedRemainderCode(Value *Dividend, Value *Divisor, + IRBuilder<> &Builder) { + ConstantInt *ThirtyOne = Builder.getInt32(31); + + // ; %dividend_sgn = ashr i32 %dividend, 31 + // ; %divisor_sgn = ashr i32 %divisor, 31 + // ; %dvd_xor = xor i32 %dividend, %dividend_sgn + // ; %dvs_xor = xor i32 %divisor, %divisor_sgn + // ; %u_dividend = sub i32 %dvd_xor, %dividend_sgn + // ; %u_divisor = sub i32 %dvs_xor, %divisor_sgn + // ; %urem = urem i32 %dividend, %divisor + // ; %xored = xor i32 %urem, %dividend_sgn + // ; %srem = sub i32 %xored, %dividend_sgn + Value *DividendSign = Builder.CreateAShr(Dividend, ThirtyOne); + Value *DivisorSign = Builder.CreateAShr(Divisor, ThirtyOne); + Value *DvdXor = Builder.CreateXor(Dividend, DividendSign); + Value *DvsXor = Builder.CreateXor(Divisor, DivisorSign); + Value *UDividend = Builder.CreateSub(DvdXor, DividendSign); + Value *UDivisor = Builder.CreateSub(DvsXor, DivisorSign); + Value *URem = Builder.CreateURem(UDividend, UDivisor); + Value *Xored = Builder.CreateXor(URem, DividendSign); + Value *SRem = Builder.CreateSub(Xored, DividendSign); + + if (Instruction *URemInst = dyn_cast<Instruction>(URem)) + Builder.SetInsertPoint(URemInst); + + return SRem; +} + + +/// Generate code to compute the remainder of two unsigned integers. Returns the +/// remainder. Builder's insert point should be pointing where the caller wants +/// code generated, e.g. at the urem instruction. This will generate a udiv in +/// the process, and Builder's insert point will be pointing at the udiv (if +/// present, i.e. not folded), ready to be expanded if the user wishes +static Value *generatedUnsignedRemainderCode(Value *Dividend, Value *Divisor, + IRBuilder<> &Builder) { + // Remainder = Dividend - Quotient*Divisor + + // ; %quotient = udiv i32 %dividend, %divisor + // ; %product = mul i32 %divisor, %quotient + // ; %remainder = sub i32 %dividend, %product + Value *Quotient = Builder.CreateUDiv(Dividend, Divisor); + Value *Product = Builder.CreateMul(Divisor, Quotient); + Value *Remainder = Builder.CreateSub(Dividend, Product); + + if (Instruction *UDiv = dyn_cast<Instruction>(Quotient)) + Builder.SetInsertPoint(UDiv); + + return Remainder; +} + +/// Generate code to divide two signed integers. Returns the quotient, rounded +/// towards 0. Builder's insert point should be pointing where the caller wants +/// code generated, e.g. at the sdiv instruction. This will generate a udiv in +/// the process, and Builder's insert point will be pointing at the udiv (if +/// present, i.e. not folded), ready to be expanded if the user wishes. +static Value *generateSignedDivisionCode(Value *Dividend, Value *Divisor, + IRBuilder<> &Builder) { + // Implementation taken from compiler-rt's __divsi3 + + ConstantInt *ThirtyOne = Builder.getInt32(31); + + // ; %tmp = ashr i32 %dividend, 31 + // ; %tmp1 = ashr i32 %divisor, 31 + // ; %tmp2 = xor i32 %tmp, %dividend + // ; %u_dvnd = sub nsw i32 %tmp2, %tmp + // ; %tmp3 = xor i32 %tmp1, %divisor + // ; %u_dvsr = sub nsw i32 %tmp3, %tmp1 + // ; %q_sgn = xor i32 %tmp1, %tmp + // ; %q_mag = udiv i32 %u_dvnd, %u_dvsr + // ; %tmp4 = xor i32 %q_mag, %q_sgn + // ; %q = sub i32 %tmp4, %q_sgn + Value *Tmp = Builder.CreateAShr(Dividend, ThirtyOne); + Value *Tmp1 = Builder.CreateAShr(Divisor, ThirtyOne); + Value *Tmp2 = Builder.CreateXor(Tmp, Dividend); + Value *U_Dvnd = Builder.CreateSub(Tmp2, Tmp); + Value *Tmp3 = Builder.CreateXor(Tmp1, Divisor); + Value *U_Dvsr = Builder.CreateSub(Tmp3, Tmp1); + Value *Q_Sgn = Builder.CreateXor(Tmp1, Tmp); + Value *Q_Mag = Builder.CreateUDiv(U_Dvnd, U_Dvsr); + Value *Tmp4 = Builder.CreateXor(Q_Mag, Q_Sgn); + Value *Q = Builder.CreateSub(Tmp4, Q_Sgn); + + if (Instruction *UDiv = dyn_cast<Instruction>(Q_Mag)) + Builder.SetInsertPoint(UDiv); + + return Q; +} + +/// Generates code to divide two unsigned scalar 32-bit integers. Returns the +/// quotient, rounded towards 0. Builder's insert point should be pointing where +/// the caller wants code generated, e.g. at the udiv instruction. +static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor, + IRBuilder<> &Builder) { + // The basic algorithm can be found in the compiler-rt project's + // implementation of __udivsi3.c. Here, we do a lower-level IR based approach + // that's been hand-tuned to lessen the amount of control flow involved. + + // Some helper values + IntegerType *I32Ty = Builder.getInt32Ty(); + + ConstantInt *Zero = Builder.getInt32(0); + ConstantInt *One = Builder.getInt32(1); + ConstantInt *ThirtyOne = Builder.getInt32(31); + ConstantInt *NegOne = ConstantInt::getSigned(I32Ty, -1); + ConstantInt *True = Builder.getTrue(); + + BasicBlock *IBB = Builder.GetInsertBlock(); + Function *F = IBB->getParent(); + Function *CTLZi32 = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, + I32Ty); + + // Our CFG is going to look like: + // +---------------------+ + // | special-cases | + // | ... | + // +---------------------+ + // | | + // | +----------+ + // | | bb1 | + // | | ... | + // | +----------+ + // | | | + // | | +------------+ + // | | | preheader | + // | | | ... | + // | | +------------+ + // | | | + // | | | +---+ + // | | | | | + // | | +------------+ | + // | | | do-while | | + // | | | ... | | + // | | +------------+ | + // | | | | | + // | +-----------+ +---+ + // | | loop-exit | + // | | ... | + // | +-----------+ + // | | + // +-------+ + // | ... | + // | end | + // +-------+ + BasicBlock *SpecialCases = Builder.GetInsertBlock(); + SpecialCases->setName(Twine(SpecialCases->getName(), "_udiv-special-cases")); + BasicBlock *End = SpecialCases->splitBasicBlock(Builder.GetInsertPoint(), + "udiv-end"); + BasicBlock *LoopExit = BasicBlock::Create(Builder.getContext(), + "udiv-loop-exit", F, End); + BasicBlock *DoWhile = BasicBlock::Create(Builder.getContext(), + "udiv-do-while", F, End); + BasicBlock *Preheader = BasicBlock::Create(Builder.getContext(), + "udiv-preheader", F, End); + BasicBlock *BB1 = BasicBlock::Create(Builder.getContext(), + "udiv-bb1", F, End); + + // We'll be overwriting the terminator to insert our extra blocks + SpecialCases->getTerminator()->eraseFromParent(); + + // First off, check for special cases: dividend or divisor is zero, divisor + // is greater than dividend, and divisor is 1. + // ; special-cases: + // ; %ret0_1 = icmp eq i32 %divisor, 0 + // ; %ret0_2 = icmp eq i32 %dividend, 0 + // ; %ret0_3 = or i1 %ret0_1, %ret0_2 + // ; %tmp0 = tail call i32 @llvm.ctlz.i32(i32 %divisor, i1 true) + // ; %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %dividend, i1 true) + // ; %sr = sub nsw i32 %tmp0, %tmp1 + // ; %ret0_4 = icmp ugt i32 %sr, 31 + // ; %ret0 = or i1 %ret0_3, %ret0_4 + // ; %retDividend = icmp eq i32 %sr, 31 + // ; %retVal = select i1 %ret0, i32 0, i32 %dividend + // ; %earlyRet = or i1 %ret0, %retDividend + // ; br i1 %earlyRet, label %end, label %bb1 + Builder.SetInsertPoint(SpecialCases); + Value *Ret0_1 = Builder.CreateICmpEQ(Divisor, Zero); + Value *Ret0_2 = Builder.CreateICmpEQ(Dividend, Zero); + Value *Ret0_3 = Builder.CreateOr(Ret0_1, Ret0_2); + Value *Tmp0 = Builder.CreateCall2(CTLZi32, Divisor, True); + Value *Tmp1 = Builder.CreateCall2(CTLZi32, Dividend, True); + Value *SR = Builder.CreateSub(Tmp0, Tmp1); + Value *Ret0_4 = Builder.CreateICmpUGT(SR, ThirtyOne); + Value *Ret0 = Builder.CreateOr(Ret0_3, Ret0_4); + Value *RetDividend = Builder.CreateICmpEQ(SR, ThirtyOne); + Value *RetVal = Builder.CreateSelect(Ret0, Zero, Dividend); + Value *EarlyRet = Builder.CreateOr(Ret0, RetDividend); + Builder.CreateCondBr(EarlyRet, End, BB1); + + // ; bb1: ; preds = %special-cases + // ; %sr_1 = add i32 %sr, 1 + // ; %tmp2 = sub i32 31, %sr + // ; %q = shl i32 %dividend, %tmp2 + // ; %skipLoop = icmp eq i32 %sr_1, 0 + // ; br i1 %skipLoop, label %loop-exit, label %preheader + Builder.SetInsertPoint(BB1); + Value *SR_1 = Builder.CreateAdd(SR, One); + Value *Tmp2 = Builder.CreateSub(ThirtyOne, SR); + Value *Q = Builder.CreateShl(Dividend, Tmp2); + Value *SkipLoop = Builder.CreateICmpEQ(SR_1, Zero); + Builder.CreateCondBr(SkipLoop, LoopExit, Preheader); + + // ; preheader: ; preds = %bb1 + // ; %tmp3 = lshr i32 %dividend, %sr_1 + // ; %tmp4 = add i32 %divisor, -1 + // ; br label %do-while + Builder.SetInsertPoint(Preheader); + Value *Tmp3 = Builder.CreateLShr(Dividend, SR_1); + Value *Tmp4 = Builder.CreateAdd(Divisor, NegOne); + Builder.CreateBr(DoWhile); + + // ; do-while: ; preds = %do-while, %preheader + // ; %carry_1 = phi i32 [ 0, %preheader ], [ %carry, %do-while ] + // ; %sr_3 = phi i32 [ %sr_1, %preheader ], [ %sr_2, %do-while ] + // ; %r_1 = phi i32 [ %tmp3, %preheader ], [ %r, %do-while ] + // ; %q_2 = phi i32 [ %q, %preheader ], [ %q_1, %do-while ] + // ; %tmp5 = shl i32 %r_1, 1 + // ; %tmp6 = lshr i32 %q_2, 31 + // ; %tmp7 = or i32 %tmp5, %tmp6 + // ; %tmp8 = shl i32 %q_2, 1 + // ; %q_1 = or i32 %carry_1, %tmp8 + // ; %tmp9 = sub i32 %tmp4, %tmp7 + // ; %tmp10 = ashr i32 %tmp9, 31 + // ; %carry = and i32 %tmp10, 1 + // ; %tmp11 = and i32 %tmp10, %divisor + // ; %r = sub i32 %tmp7, %tmp11 + // ; %sr_2 = add i32 %sr_3, -1 + // ; %tmp12 = icmp eq i32 %sr_2, 0 + // ; br i1 %tmp12, label %loop-exit, label %do-while + Builder.SetInsertPoint(DoWhile); + PHINode *Carry_1 = Builder.CreatePHI(I32Ty, 2); + PHINode *SR_3 = Builder.CreatePHI(I32Ty, 2); + PHINode *R_1 = Builder.CreatePHI(I32Ty, 2); + PHINode *Q_2 = Builder.CreatePHI(I32Ty, 2); + Value *Tmp5 = Builder.CreateShl(R_1, One); + Value *Tmp6 = Builder.CreateLShr(Q_2, ThirtyOne); + Value *Tmp7 = Builder.CreateOr(Tmp5, Tmp6); + Value *Tmp8 = Builder.CreateShl(Q_2, One); + Value *Q_1 = Builder.CreateOr(Carry_1, Tmp8); + Value *Tmp9 = Builder.CreateSub(Tmp4, Tmp7); + Value *Tmp10 = Builder.CreateAShr(Tmp9, 31); + Value *Carry = Builder.CreateAnd(Tmp10, One); + Value *Tmp11 = Builder.CreateAnd(Tmp10, Divisor); + Value *R = Builder.CreateSub(Tmp7, Tmp11); + Value *SR_2 = Builder.CreateAdd(SR_3, NegOne); + Value *Tmp12 = Builder.CreateICmpEQ(SR_2, Zero); + Builder.CreateCondBr(Tmp12, LoopExit, DoWhile); + + // ; loop-exit: ; preds = %do-while, %bb1 + // ; %carry_2 = phi i32 [ 0, %bb1 ], [ %carry, %do-while ] + // ; %q_3 = phi i32 [ %q, %bb1 ], [ %q_1, %do-while ] + // ; %tmp13 = shl i32 %q_3, 1 + // ; %q_4 = or i32 %carry_2, %tmp13 + // ; br label %end + Builder.SetInsertPoint(LoopExit); + PHINode *Carry_2 = Builder.CreatePHI(I32Ty, 2); + PHINode *Q_3 = Builder.CreatePHI(I32Ty, 2); + Value *Tmp13 = Builder.CreateShl(Q_3, One); + Value *Q_4 = Builder.CreateOr(Carry_2, Tmp13); + Builder.CreateBr(End); + + // ; end: ; preds = %loop-exit, %special-cases + // ; %q_5 = phi i32 [ %q_4, %loop-exit ], [ %retVal, %special-cases ] + // ; ret i32 %q_5 + Builder.SetInsertPoint(End, End->begin()); + PHINode *Q_5 = Builder.CreatePHI(I32Ty, 2); + + // Populate the Phis, since all values have now been created. Our Phis were: + // ; %carry_1 = phi i32 [ 0, %preheader ], [ %carry, %do-while ] + Carry_1->addIncoming(Zero, Preheader); + Carry_1->addIncoming(Carry, DoWhile); + // ; %sr_3 = phi i32 [ %sr_1, %preheader ], [ %sr_2, %do-while ] + SR_3->addIncoming(SR_1, Preheader); + SR_3->addIncoming(SR_2, DoWhile); + // ; %r_1 = phi i32 [ %tmp3, %preheader ], [ %r, %do-while ] + R_1->addIncoming(Tmp3, Preheader); + R_1->addIncoming(R, DoWhile); + // ; %q_2 = phi i32 [ %q, %preheader ], [ %q_1, %do-while ] + Q_2->addIncoming(Q, Preheader); + Q_2->addIncoming(Q_1, DoWhile); + // ; %carry_2 = phi i32 [ 0, %bb1 ], [ %carry, %do-while ] + Carry_2->addIncoming(Zero, BB1); + Carry_2->addIncoming(Carry, DoWhile); + // ; %q_3 = phi i32 [ %q, %bb1 ], [ %q_1, %do-while ] + Q_3->addIncoming(Q, BB1); + Q_3->addIncoming(Q_1, DoWhile); + // ; %q_5 = phi i32 [ %q_4, %loop-exit ], [ %retVal, %special-cases ] + Q_5->addIncoming(Q_4, LoopExit); + Q_5->addIncoming(RetVal, SpecialCases); + + return Q_5; +} + +/// Generate code to calculate the remainder of two integers, replacing Rem with +/// the generated code. This currently generates code using the udiv expansion, +/// but future work includes generating more specialized code, e.g. when more +/// information about the operands are known. Currently only implements 32bit +/// scalar division (due to udiv's limitation), but future work is removing this +/// limitation. +/// +/// @brief Replace Rem with generated code. +bool llvm::expandRemainder(BinaryOperator *Rem) { + assert((Rem->getOpcode() == Instruction::SRem || + Rem->getOpcode() == Instruction::URem) && + "Trying to expand remainder from a non-remainder function"); + + IRBuilder<> Builder(Rem); + + // First prepare the sign if it's a signed remainder + if (Rem->getOpcode() == Instruction::SRem) { + Value *Remainder = generateSignedRemainderCode(Rem->getOperand(0), + Rem->getOperand(1), Builder); + + Rem->replaceAllUsesWith(Remainder); + Rem->dropAllReferences(); + Rem->eraseFromParent(); + + // If we didn't actually generate a udiv instruction, we're done + BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint()); + if (!BO || BO->getOpcode() != Instruction::URem) + return true; + + Rem = BO; + } + + Value *Remainder = generatedUnsignedRemainderCode(Rem->getOperand(0), + Rem->getOperand(1), + Builder); + + Rem->replaceAllUsesWith(Remainder); + Rem->dropAllReferences(); + Rem->eraseFromParent(); + + // Expand the udiv + if (BinaryOperator *UDiv = dyn_cast<BinaryOperator>(Builder.GetInsertPoint())) { + assert(UDiv->getOpcode() == Instruction::UDiv && "Non-udiv in expansion?"); + expandDivision(UDiv); + } + + return true; +} + + +/// Generate code to divide two integers, replacing Div with the generated +/// code. This currently generates code similarly to compiler-rt's +/// implementations, but future work includes generating more specialized code +/// when more information about the operands are known. Currently only +/// implements 32bit scalar division, but future work is removing this +/// limitation. +/// +/// @brief Replace Div with generated code. +bool llvm::expandDivision(BinaryOperator *Div) { + assert((Div->getOpcode() == Instruction::SDiv || + Div->getOpcode() == Instruction::UDiv) && + "Trying to expand division from a non-division function"); + + IRBuilder<> Builder(Div); + + if (Div->getType()->isVectorTy()) + llvm_unreachable("Div over vectors not supported"); + + // First prepare the sign if it's a signed division + if (Div->getOpcode() == Instruction::SDiv) { + // Lower the code to unsigned division, and reset Div to point to the udiv. + Value *Quotient = generateSignedDivisionCode(Div->getOperand(0), + Div->getOperand(1), Builder); + Div->replaceAllUsesWith(Quotient); + Div->dropAllReferences(); + Div->eraseFromParent(); + + // If we didn't actually generate a udiv instruction, we're done + BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint()); + if (!BO || BO->getOpcode() != Instruction::UDiv) + return true; + + Div = BO; + } + + // Insert the unsigned division code + Value *Quotient = generateUnsignedDivisionCode(Div->getOperand(0), + Div->getOperand(1), + Builder); + Div->replaceAllUsesWith(Quotient); + Div->dropAllReferences(); + Div->eraseFromParent(); + + return true; +} + +/// Generate code to compute the remainder of two integers of bitwidth up to +/// 32 bits. Uses the above routines and extends the inputs/truncates the +/// outputs to operate in 32 bits; that is, these routines are good for targets +/// that have no or very little suppport for smaller than 32 bit integer +/// arithmetic. +/// +/// @brief Replace Rem with emulation code. +bool llvm::expandRemainderUpTo32Bits(BinaryOperator *Rem) { + assert((Rem->getOpcode() == Instruction::SRem || + Rem->getOpcode() == Instruction::URem) && + "Trying to expand remainder from a non-remainder function"); + + Type *RemTy = Rem->getType(); + if (RemTy->isVectorTy()) + llvm_unreachable("Div over vectors not supported"); + + unsigned RemTyBitWidth = RemTy->getIntegerBitWidth(); + + if (RemTyBitWidth > 32) + llvm_unreachable("Div of bitwidth greater than 32 not supported"); + + if (RemTyBitWidth == 32) + return expandRemainder(Rem); + + // If bitwidth smaller than 32 extend inputs, truncate output and proceed + // with 32 bit division. + IRBuilder<> Builder(Rem); + + Value *ExtDividend; + Value *ExtDivisor; + Value *ExtRem; + Value *Trunc; + Type *Int32Ty = Builder.getInt32Ty(); + + if (Rem->getOpcode() == Instruction::SRem) { + ExtDividend = Builder.CreateSExt(Rem->getOperand(0), Int32Ty); + ExtDivisor = Builder.CreateSExt(Rem->getOperand(1), Int32Ty); + ExtRem = Builder.CreateSRem(ExtDividend, ExtDivisor); + } else { + ExtDividend = Builder.CreateZExt(Rem->getOperand(0), Int32Ty); + ExtDivisor = Builder.CreateZExt(Rem->getOperand(1), Int32Ty); + ExtRem = Builder.CreateURem(ExtDividend, ExtDivisor); + } + Trunc = Builder.CreateTrunc(ExtRem, RemTy); + + Rem->replaceAllUsesWith(Trunc); + Rem->dropAllReferences(); + Rem->eraseFromParent(); + + return expandRemainder(cast<BinaryOperator>(ExtRem)); +} + + +/// Generate code to divide two integers of bitwidth up to 32 bits. Uses the +/// above routines and extends the inputs/truncates the outputs to operate +/// in 32 bits; that is, these routines are good for targets that have no +/// or very little support for smaller than 32 bit integer arithmetic. +/// +/// @brief Replace Div with emulation code. +bool llvm::expandDivisionUpTo32Bits(BinaryOperator *Div) { + assert((Div->getOpcode() == Instruction::SDiv || + Div->getOpcode() == Instruction::UDiv) && + "Trying to expand division from a non-division function"); + + Type *DivTy = Div->getType(); + if (DivTy->isVectorTy()) + llvm_unreachable("Div over vectors not supported"); + + unsigned DivTyBitWidth = DivTy->getIntegerBitWidth(); + + if (DivTyBitWidth > 32) + llvm_unreachable("Div of bitwidth greater than 32 not supported"); + + if (DivTyBitWidth == 32) + return expandDivision(Div); + + // If bitwidth smaller than 32 extend inputs, truncate output and proceed + // with 32 bit division. + IRBuilder<> Builder(Div); + + Value *ExtDividend; + Value *ExtDivisor; + Value *ExtDiv; + Value *Trunc; + Type *Int32Ty = Builder.getInt32Ty(); + + if (Div->getOpcode() == Instruction::SDiv) { + ExtDividend = Builder.CreateSExt(Div->getOperand(0), Int32Ty); + ExtDivisor = Builder.CreateSExt(Div->getOperand(1), Int32Ty); + ExtDiv = Builder.CreateSDiv(ExtDividend, ExtDivisor); + } else { + ExtDividend = Builder.CreateZExt(Div->getOperand(0), Int32Ty); + ExtDivisor = Builder.CreateZExt(Div->getOperand(1), Int32Ty); + ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor); + } + Trunc = Builder.CreateTrunc(ExtDiv, DivTy); + + Div->replaceAllUsesWith(Trunc); + Div->dropAllReferences(); + Div->eraseFromParent(); + + return expandDivision(cast<BinaryOperator>(ExtDiv)); +} diff --git a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp new file mode 100644 index 000000000000..f15e8d59276b --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp @@ -0,0 +1,281 @@ +//===-- LCSSA.cpp - Convert loops into loop-closed SSA form ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass transforms loops by placing phi nodes at the end of the loops for +// all values that are live across the loop boundary. For example, it turns +// the left into the right code: +// +// for (...) for (...) +// if (c) if (c) +// X1 = ... X1 = ... +// else else +// X2 = ... X2 = ... +// X3 = phi(X1, X2) X3 = phi(X1, X2) +// ... = X3 + 4 X4 = phi(X3) +// ... = X4 + 4 +// +// This is still valid LLVM; the extra phi nodes are purely redundant, and will +// be trivially eliminated by InstCombine. The major benefit of this +// transformation is that it makes many other loop optimizations, such as +// LoopUnswitching, simpler. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "lcssa" +#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Pass.h" +#include "llvm/Support/PredIteratorCache.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" +using namespace llvm; + +STATISTIC(NumLCSSA, "Number of live out of a loop variables"); + +namespace { + struct LCSSA : public LoopPass { + static char ID; // Pass identification, replacement for typeid + LCSSA() : LoopPass(ID) { + initializeLCSSAPass(*PassRegistry::getPassRegistry()); + } + + // Cached analysis information for the current function. + DominatorTree *DT; + LoopInfo *LI; + ScalarEvolution *SE; + PredIteratorCache PredCache; + Loop *L; + + virtual bool runOnLoop(Loop *L, LPPassManager &LPM); + + /// This transformation requires natural loop information & requires that + /// loop preheaders be inserted into the CFG. It maintains both of these, + /// as well as the CFG. It also requires dominator information. + /// + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + + AU.addRequired<DominatorTree>(); + AU.addRequired<LoopInfo>(); + AU.addPreservedID(LoopSimplifyID); + AU.addPreserved<ScalarEvolution>(); + } + private: + bool ProcessInstruction(Instruction *Inst, + const SmallVectorImpl<BasicBlock*> &ExitBlocks); + + /// verifyAnalysis() - Verify loop nest. + virtual void verifyAnalysis() const { + // Check the special guarantees that LCSSA makes. + assert(L->isLCSSAForm(*DT) && "LCSSA form not preserved!"); + } + }; +} + +char LCSSA::ID = 0; +INITIALIZE_PASS_BEGIN(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_END(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false) + +Pass *llvm::createLCSSAPass() { return new LCSSA(); } +char &llvm::LCSSAID = LCSSA::ID; + + +/// BlockDominatesAnExit - Return true if the specified block dominates at least +/// one of the blocks in the specified list. +static bool BlockDominatesAnExit(BasicBlock *BB, + const SmallVectorImpl<BasicBlock*> &ExitBlocks, + DominatorTree *DT) { + DomTreeNode *DomNode = DT->getNode(BB); + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) + if (DT->dominates(DomNode, DT->getNode(ExitBlocks[i]))) + return true; + + return false; +} + + +/// runOnFunction - Process all loops in the function, inner-most out. +bool LCSSA::runOnLoop(Loop *TheLoop, LPPassManager &LPM) { + L = TheLoop; + + DT = &getAnalysis<DominatorTree>(); + LI = &getAnalysis<LoopInfo>(); + SE = getAnalysisIfAvailable<ScalarEvolution>(); + + // Get the set of exiting blocks. + SmallVector<BasicBlock*, 8> ExitBlocks; + L->getExitBlocks(ExitBlocks); + + if (ExitBlocks.empty()) + return false; + + // Look at all the instructions in the loop, checking to see if they have uses + // outside the loop. If so, rewrite those uses. + bool MadeChange = false; + + for (Loop::block_iterator BBI = L->block_begin(), E = L->block_end(); + BBI != E; ++BBI) { + BasicBlock *BB = *BBI; + + // For large loops, avoid use-scanning by using dominance information: In + // particular, if a block does not dominate any of the loop exits, then none + // of the values defined in the block could be used outside the loop. + if (!BlockDominatesAnExit(BB, ExitBlocks, DT)) + continue; + + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); + I != E; ++I) { + // Reject two common cases fast: instructions with no uses (like stores) + // and instructions with one use that is in the same block as this. + if (I->use_empty() || + (I->hasOneUse() && I->use_back()->getParent() == BB && + !isa<PHINode>(I->use_back()))) + continue; + + MadeChange |= ProcessInstruction(I, ExitBlocks); + } + } + + // If we modified the code, remove any caches about the loop from SCEV to + // avoid dangling entries. + // FIXME: This is a big hammer, can we clear the cache more selectively? + if (SE && MadeChange) + SE->forgetLoop(L); + + assert(L->isLCSSAForm(*DT)); + PredCache.clear(); + + return MadeChange; +} + +/// isExitBlock - Return true if the specified block is in the list. +static bool isExitBlock(BasicBlock *BB, + const SmallVectorImpl<BasicBlock*> &ExitBlocks) { + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) + if (ExitBlocks[i] == BB) + return true; + return false; +} + +/// ProcessInstruction - Given an instruction in the loop, check to see if it +/// has any uses that are outside the current loop. If so, insert LCSSA PHI +/// nodes and rewrite the uses. +bool LCSSA::ProcessInstruction(Instruction *Inst, + const SmallVectorImpl<BasicBlock*> &ExitBlocks) { + SmallVector<Use*, 16> UsesToRewrite; + + BasicBlock *InstBB = Inst->getParent(); + + for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end(); + UI != E; ++UI) { + User *U = *UI; + BasicBlock *UserBB = cast<Instruction>(U)->getParent(); + if (PHINode *PN = dyn_cast<PHINode>(U)) + UserBB = PN->getIncomingBlock(UI); + + if (InstBB != UserBB && !L->contains(UserBB)) + UsesToRewrite.push_back(&UI.getUse()); + } + + // If there are no uses outside the loop, exit with no change. + if (UsesToRewrite.empty()) return false; + + ++NumLCSSA; // We are applying the transformation + + // Invoke instructions are special in that their result value is not available + // along their unwind edge. The code below tests to see whether DomBB dominates + // the value, so adjust DomBB to the normal destination block, which is + // effectively where the value is first usable. + BasicBlock *DomBB = Inst->getParent(); + if (InvokeInst *Inv = dyn_cast<InvokeInst>(Inst)) + DomBB = Inv->getNormalDest(); + + DomTreeNode *DomNode = DT->getNode(DomBB); + + SmallVector<PHINode*, 16> AddedPHIs; + + SSAUpdater SSAUpdate; + SSAUpdate.Initialize(Inst->getType(), Inst->getName()); + + // Insert the LCSSA phi's into all of the exit blocks dominated by the + // value, and add them to the Phi's map. + for (SmallVectorImpl<BasicBlock*>::const_iterator BBI = ExitBlocks.begin(), + BBE = ExitBlocks.end(); BBI != BBE; ++BBI) { + BasicBlock *ExitBB = *BBI; + if (!DT->dominates(DomNode, DT->getNode(ExitBB))) continue; + + // If we already inserted something for this BB, don't reprocess it. + if (SSAUpdate.HasValueForBlock(ExitBB)) continue; + + PHINode *PN = PHINode::Create(Inst->getType(), + PredCache.GetNumPreds(ExitBB), + Inst->getName()+".lcssa", + ExitBB->begin()); + + // Add inputs from inside the loop for this PHI. + for (BasicBlock **PI = PredCache.GetPreds(ExitBB); *PI; ++PI) { + PN->addIncoming(Inst, *PI); + + // If the exit block has a predecessor not within the loop, arrange for + // the incoming value use corresponding to that predecessor to be + // rewritten in terms of a different LCSSA PHI. + if (!L->contains(*PI)) + UsesToRewrite.push_back( + &PN->getOperandUse( + PN->getOperandNumForIncomingValue(PN->getNumIncomingValues()-1))); + } + + AddedPHIs.push_back(PN); + + // Remember that this phi makes the value alive in this block. + SSAUpdate.AddAvailableValue(ExitBB, PN); + } + + // Rewrite all uses outside the loop in terms of the new PHIs we just + // inserted. + for (unsigned i = 0, e = UsesToRewrite.size(); i != e; ++i) { + // If this use is in an exit block, rewrite to use the newly inserted PHI. + // This is required for correctness because SSAUpdate doesn't handle uses in + // the same block. It assumes the PHI we inserted is at the end of the + // block. + Instruction *User = cast<Instruction>(UsesToRewrite[i]->getUser()); + BasicBlock *UserBB = User->getParent(); + if (PHINode *PN = dyn_cast<PHINode>(User)) + UserBB = PN->getIncomingBlock(*UsesToRewrite[i]); + + if (isa<PHINode>(UserBB->begin()) && + isExitBlock(UserBB, ExitBlocks)) { + // Tell the VHs that the uses changed. This updates SCEV's caches. + if (UsesToRewrite[i]->get()->hasValueHandle()) + ValueHandleBase::ValueIsRAUWd(*UsesToRewrite[i], UserBB->begin()); + UsesToRewrite[i]->set(UserBB->begin()); + continue; + } + + // Otherwise, do full PHI insertion. + SSAUpdate.RewriteUse(*UsesToRewrite[i]); + } + + // Remove PHI nodes that did not have any uses rewritten. + for (unsigned i = 0, e = AddedPHIs.size(); i != e; ++i) { + if (AddedPHIs[i]->use_empty()) + AddedPHIs[i]->eraseFromParent(); + } + + return true; +} + diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp new file mode 100644 index 000000000000..2768041fb2b9 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp @@ -0,0 +1,1273 @@ +//===-- Local.cpp - Functions to perform local transformations ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This family of functions perform various local transformations to the +// program. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/DIBuilder.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Operator.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/ValueHandle.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +STATISTIC(NumRemoved, "Number of unreachable basic blocks removed"); + +//===----------------------------------------------------------------------===// +// Local constant propagation. +// + +/// ConstantFoldTerminator - If a terminator instruction is predicated on a +/// constant value, convert it into an unconditional branch to the constant +/// destination. This is a nontrivial operation because the successors of this +/// basic block must have their PHI nodes updated. +/// Also calls RecursivelyDeleteTriviallyDeadInstructions() on any branch/switch +/// conditions and indirectbr addresses this might make dead if +/// DeleteDeadConditions is true. +bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, + const TargetLibraryInfo *TLI) { + TerminatorInst *T = BB->getTerminator(); + IRBuilder<> Builder(T); + + // Branch - See if we are conditional jumping on constant + if (BranchInst *BI = dyn_cast<BranchInst>(T)) { + if (BI->isUnconditional()) return false; // Can't optimize uncond branch + BasicBlock *Dest1 = BI->getSuccessor(0); + BasicBlock *Dest2 = BI->getSuccessor(1); + + if (ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition())) { + // Are we branching on constant? + // YES. Change to unconditional branch... + BasicBlock *Destination = Cond->getZExtValue() ? Dest1 : Dest2; + BasicBlock *OldDest = Cond->getZExtValue() ? Dest2 : Dest1; + + //cerr << "Function: " << T->getParent()->getParent() + // << "\nRemoving branch from " << T->getParent() + // << "\n\nTo: " << OldDest << endl; + + // Let the basic block know that we are letting go of it. Based on this, + // it will adjust it's PHI nodes. + OldDest->removePredecessor(BB); + + // Replace the conditional branch with an unconditional one. + Builder.CreateBr(Destination); + BI->eraseFromParent(); + return true; + } + + if (Dest2 == Dest1) { // Conditional branch to same location? + // This branch matches something like this: + // br bool %cond, label %Dest, label %Dest + // and changes it into: br label %Dest + + // Let the basic block know that we are letting go of one copy of it. + assert(BI->getParent() && "Terminator not inserted in block!"); + Dest1->removePredecessor(BI->getParent()); + + // Replace the conditional branch with an unconditional one. + Builder.CreateBr(Dest1); + Value *Cond = BI->getCondition(); + BI->eraseFromParent(); + if (DeleteDeadConditions) + RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI); + return true; + } + return false; + } + + if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) { + // If we are switching on a constant, we can convert the switch into a + // single branch instruction! + ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition()); + BasicBlock *TheOnlyDest = SI->getDefaultDest(); + BasicBlock *DefaultDest = TheOnlyDest; + + // Figure out which case it goes to. + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); + i != e; ++i) { + // Found case matching a constant operand? + if (i.getCaseValue() == CI) { + TheOnlyDest = i.getCaseSuccessor(); + break; + } + + // Check to see if this branch is going to the same place as the default + // dest. If so, eliminate it as an explicit compare. + if (i.getCaseSuccessor() == DefaultDest) { + MDNode* MD = SI->getMetadata(LLVMContext::MD_prof); + // MD should have 2 + NumCases operands. + if (MD && MD->getNumOperands() == 2 + SI->getNumCases()) { + // Collect branch weights into a vector. + SmallVector<uint32_t, 8> Weights; + for (unsigned MD_i = 1, MD_e = MD->getNumOperands(); MD_i < MD_e; + ++MD_i) { + ConstantInt* CI = dyn_cast<ConstantInt>(MD->getOperand(MD_i)); + assert(CI); + Weights.push_back(CI->getValue().getZExtValue()); + } + // Merge weight of this case to the default weight. + unsigned idx = i.getCaseIndex(); + Weights[0] += Weights[idx+1]; + // Remove weight for this case. + std::swap(Weights[idx+1], Weights.back()); + Weights.pop_back(); + SI->setMetadata(LLVMContext::MD_prof, + MDBuilder(BB->getContext()). + createBranchWeights(Weights)); + } + // Remove this entry. + DefaultDest->removePredecessor(SI->getParent()); + SI->removeCase(i); + --i; --e; + continue; + } + + // Otherwise, check to see if the switch only branches to one destination. + // We do this by reseting "TheOnlyDest" to null when we find two non-equal + // destinations. + if (i.getCaseSuccessor() != TheOnlyDest) TheOnlyDest = 0; + } + + if (CI && !TheOnlyDest) { + // Branching on a constant, but not any of the cases, go to the default + // successor. + TheOnlyDest = SI->getDefaultDest(); + } + + // If we found a single destination that we can fold the switch into, do so + // now. + if (TheOnlyDest) { + // Insert the new branch. + Builder.CreateBr(TheOnlyDest); + BasicBlock *BB = SI->getParent(); + + // Remove entries from PHI nodes which we no longer branch to... + for (unsigned i = 0, e = SI->getNumSuccessors(); i != e; ++i) { + // Found case matching a constant operand? + BasicBlock *Succ = SI->getSuccessor(i); + if (Succ == TheOnlyDest) + TheOnlyDest = 0; // Don't modify the first branch to TheOnlyDest + else + Succ->removePredecessor(BB); + } + + // Delete the old switch. + Value *Cond = SI->getCondition(); + SI->eraseFromParent(); + if (DeleteDeadConditions) + RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI); + return true; + } + + if (SI->getNumCases() == 1) { + // Otherwise, we can fold this switch into a conditional branch + // instruction if it has only one non-default destination. + SwitchInst::CaseIt FirstCase = SI->case_begin(); + Value *Cond = Builder.CreateICmpEQ(SI->getCondition(), + FirstCase.getCaseValue(), "cond"); + + // Insert the new branch. + BranchInst *NewBr = Builder.CreateCondBr(Cond, + FirstCase.getCaseSuccessor(), + SI->getDefaultDest()); + MDNode* MD = SI->getMetadata(LLVMContext::MD_prof); + if (MD && MD->getNumOperands() == 3) { + ConstantInt *SICase = dyn_cast<ConstantInt>(MD->getOperand(2)); + ConstantInt *SIDef = dyn_cast<ConstantInt>(MD->getOperand(1)); + assert(SICase && SIDef); + // The TrueWeight should be the weight for the single case of SI. + NewBr->setMetadata(LLVMContext::MD_prof, + MDBuilder(BB->getContext()). + createBranchWeights(SICase->getValue().getZExtValue(), + SIDef->getValue().getZExtValue())); + } + + // Delete the old switch. + SI->eraseFromParent(); + return true; + } + return false; + } + + if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(T)) { + // indirectbr blockaddress(@F, @BB) -> br label @BB + if (BlockAddress *BA = + dyn_cast<BlockAddress>(IBI->getAddress()->stripPointerCasts())) { + BasicBlock *TheOnlyDest = BA->getBasicBlock(); + // Insert the new branch. + Builder.CreateBr(TheOnlyDest); + + for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { + if (IBI->getDestination(i) == TheOnlyDest) + TheOnlyDest = 0; + else + IBI->getDestination(i)->removePredecessor(IBI->getParent()); + } + Value *Address = IBI->getAddress(); + IBI->eraseFromParent(); + if (DeleteDeadConditions) + RecursivelyDeleteTriviallyDeadInstructions(Address, TLI); + + // If we didn't find our destination in the IBI successor list, then we + // have undefined behavior. Replace the unconditional branch with an + // 'unreachable' instruction. + if (TheOnlyDest) { + BB->getTerminator()->eraseFromParent(); + new UnreachableInst(BB->getContext(), BB); + } + + return true; + } + } + + return false; +} + + +//===----------------------------------------------------------------------===// +// Local dead code elimination. +// + +/// isInstructionTriviallyDead - Return true if the result produced by the +/// instruction is not used, and the instruction has no side effects. +/// +bool llvm::isInstructionTriviallyDead(Instruction *I, + const TargetLibraryInfo *TLI) { + if (!I->use_empty() || isa<TerminatorInst>(I)) return false; + + // We don't want the landingpad instruction removed by anything this general. + if (isa<LandingPadInst>(I)) + return false; + + // We don't want debug info removed by anything this general, unless + // debug info is empty. + if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(I)) { + if (DDI->getAddress()) + return false; + return true; + } + if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(I)) { + if (DVI->getValue()) + return false; + return true; + } + + if (!I->mayHaveSideEffects()) return true; + + // Special case intrinsics that "may have side effects" but can be deleted + // when dead. + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + // Safe to delete llvm.stacksave if dead. + if (II->getIntrinsicID() == Intrinsic::stacksave) + return true; + + // Lifetime intrinsics are dead when their right-hand is undef. + if (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) + return isa<UndefValue>(II->getArgOperand(1)); + } + + if (isAllocLikeFn(I, TLI)) return true; + + if (CallInst *CI = isFreeCall(I, TLI)) + if (Constant *C = dyn_cast<Constant>(CI->getArgOperand(0))) + return C->isNullValue() || isa<UndefValue>(C); + + return false; +} + +/// RecursivelyDeleteTriviallyDeadInstructions - If the specified value is a +/// trivially dead instruction, delete it. If that makes any of its operands +/// trivially dead, delete them too, recursively. Return true if any +/// instructions were deleted. +bool +llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V, + const TargetLibraryInfo *TLI) { + Instruction *I = dyn_cast<Instruction>(V); + if (!I || !I->use_empty() || !isInstructionTriviallyDead(I, TLI)) + return false; + + SmallVector<Instruction*, 16> DeadInsts; + DeadInsts.push_back(I); + + do { + I = DeadInsts.pop_back_val(); + + // Null out all of the instruction's operands to see if any operand becomes + // dead as we go. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + Value *OpV = I->getOperand(i); + I->setOperand(i, 0); + + if (!OpV->use_empty()) continue; + + // If the operand is an instruction that became dead as we nulled out the + // operand, and if it is 'trivially' dead, delete it in a future loop + // iteration. + if (Instruction *OpI = dyn_cast<Instruction>(OpV)) + if (isInstructionTriviallyDead(OpI, TLI)) + DeadInsts.push_back(OpI); + } + + I->eraseFromParent(); + } while (!DeadInsts.empty()); + + return true; +} + +/// areAllUsesEqual - Check whether the uses of a value are all the same. +/// This is similar to Instruction::hasOneUse() except this will also return +/// true when there are no uses or multiple uses that all refer to the same +/// value. +static bool areAllUsesEqual(Instruction *I) { + Value::use_iterator UI = I->use_begin(); + Value::use_iterator UE = I->use_end(); + if (UI == UE) + return true; + + User *TheUse = *UI; + for (++UI; UI != UE; ++UI) { + if (*UI != TheUse) + return false; + } + return true; +} + +/// RecursivelyDeleteDeadPHINode - If the specified value is an effectively +/// dead PHI node, due to being a def-use chain of single-use nodes that +/// either forms a cycle or is terminated by a trivially dead instruction, +/// delete it. If that makes any of its operands trivially dead, delete them +/// too, recursively. Return true if a change was made. +bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN, + const TargetLibraryInfo *TLI) { + SmallPtrSet<Instruction*, 4> Visited; + for (Instruction *I = PN; areAllUsesEqual(I) && !I->mayHaveSideEffects(); + I = cast<Instruction>(*I->use_begin())) { + if (I->use_empty()) + return RecursivelyDeleteTriviallyDeadInstructions(I, TLI); + + // If we find an instruction more than once, we're on a cycle that + // won't prove fruitful. + if (!Visited.insert(I)) { + // Break the cycle and delete the instruction and its operands. + I->replaceAllUsesWith(UndefValue::get(I->getType())); + (void)RecursivelyDeleteTriviallyDeadInstructions(I, TLI); + return true; + } + } + return false; +} + +/// SimplifyInstructionsInBlock - Scan the specified basic block and try to +/// simplify any instructions in it and recursively delete dead instructions. +/// +/// This returns true if it changed the code, note that it can delete +/// instructions in other blocks as well in this block. +bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const DataLayout *TD, + const TargetLibraryInfo *TLI) { + bool MadeChange = false; + +#ifndef NDEBUG + // In debug builds, ensure that the terminator of the block is never replaced + // or deleted by these simplifications. The idea of simplification is that it + // cannot introduce new instructions, and there is no way to replace the + // terminator of a block without introducing a new instruction. + AssertingVH<Instruction> TerminatorVH(--BB->end()); +#endif + + for (BasicBlock::iterator BI = BB->begin(), E = --BB->end(); BI != E; ) { + assert(!BI->isTerminator()); + Instruction *Inst = BI++; + + WeakVH BIHandle(BI); + if (recursivelySimplifyInstruction(Inst, TD, TLI)) { + MadeChange = true; + if (BIHandle != BI) + BI = BB->begin(); + continue; + } + + MadeChange |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI); + if (BIHandle != BI) + BI = BB->begin(); + } + return MadeChange; +} + +//===----------------------------------------------------------------------===// +// Control Flow Graph Restructuring. +// + + +/// RemovePredecessorAndSimplify - Like BasicBlock::removePredecessor, this +/// method is called when we're about to delete Pred as a predecessor of BB. If +/// BB contains any PHI nodes, this drops the entries in the PHI nodes for Pred. +/// +/// Unlike the removePredecessor method, this attempts to simplify uses of PHI +/// nodes that collapse into identity values. For example, if we have: +/// x = phi(1, 0, 0, 0) +/// y = and x, z +/// +/// .. and delete the predecessor corresponding to the '1', this will attempt to +/// recursively fold the and to 0. +void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred, + DataLayout *TD) { + // This only adjusts blocks with PHI nodes. + if (!isa<PHINode>(BB->begin())) + return; + + // Remove the entries for Pred from the PHI nodes in BB, but do not simplify + // them down. This will leave us with single entry phi nodes and other phis + // that can be removed. + BB->removePredecessor(Pred, true); + + WeakVH PhiIt = &BB->front(); + while (PHINode *PN = dyn_cast<PHINode>(PhiIt)) { + PhiIt = &*++BasicBlock::iterator(cast<Instruction>(PhiIt)); + Value *OldPhiIt = PhiIt; + + if (!recursivelySimplifyInstruction(PN, TD)) + continue; + + // If recursive simplification ended up deleting the next PHI node we would + // iterate to, then our iterator is invalid, restart scanning from the top + // of the block. + if (PhiIt != OldPhiIt) PhiIt = &BB->front(); + } +} + + +/// MergeBasicBlockIntoOnlyPred - DestBB is a block with one predecessor and its +/// predecessor is known to have one successor (DestBB!). Eliminate the edge +/// between them, moving the instructions in the predecessor into DestBB and +/// deleting the predecessor block. +/// +void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) { + // If BB has single-entry PHI nodes, fold them. + while (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) { + Value *NewVal = PN->getIncomingValue(0); + // Replace self referencing PHI with undef, it must be dead. + if (NewVal == PN) NewVal = UndefValue::get(PN->getType()); + PN->replaceAllUsesWith(NewVal); + PN->eraseFromParent(); + } + + BasicBlock *PredBB = DestBB->getSinglePredecessor(); + assert(PredBB && "Block doesn't have a single predecessor!"); + + // Zap anything that took the address of DestBB. Not doing this will give the + // address an invalid value. + if (DestBB->hasAddressTaken()) { + BlockAddress *BA = BlockAddress::get(DestBB); + Constant *Replacement = + ConstantInt::get(llvm::Type::getInt32Ty(BA->getContext()), 1); + BA->replaceAllUsesWith(ConstantExpr::getIntToPtr(Replacement, + BA->getType())); + BA->destroyConstant(); + } + + // Anything that branched to PredBB now branches to DestBB. + PredBB->replaceAllUsesWith(DestBB); + + // Splice all the instructions from PredBB to DestBB. + PredBB->getTerminator()->eraseFromParent(); + DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList()); + + if (P) { + DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>(); + if (DT) { + BasicBlock *PredBBIDom = DT->getNode(PredBB)->getIDom()->getBlock(); + DT->changeImmediateDominator(DestBB, PredBBIDom); + DT->eraseNode(PredBB); + } + } + // Nuke BB. + PredBB->eraseFromParent(); +} + +/// CanMergeValues - Return true if we can choose one of these values to use +/// in place of the other. Note that we will always choose the non-undef +/// value to keep. +static bool CanMergeValues(Value *First, Value *Second) { + return First == Second || isa<UndefValue>(First) || isa<UndefValue>(Second); +} + +/// CanPropagatePredecessorsForPHIs - Return true if we can fold BB, an +/// almost-empty BB ending in an unconditional branch to Succ, into Succ. +/// +/// Assumption: Succ is the single successor for BB. +/// +static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { + assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!"); + + DEBUG(dbgs() << "Looking to fold " << BB->getName() << " into " + << Succ->getName() << "\n"); + // Shortcut, if there is only a single predecessor it must be BB and merging + // is always safe + if (Succ->getSinglePredecessor()) return true; + + // Make a list of the predecessors of BB + SmallPtrSet<BasicBlock*, 16> BBPreds(pred_begin(BB), pred_end(BB)); + + // Look at all the phi nodes in Succ, to see if they present a conflict when + // merging these blocks + for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) { + PHINode *PN = cast<PHINode>(I); + + // If the incoming value from BB is again a PHINode in + // BB which has the same incoming value for *PI as PN does, we can + // merge the phi nodes and then the blocks can still be merged + PHINode *BBPN = dyn_cast<PHINode>(PN->getIncomingValueForBlock(BB)); + if (BBPN && BBPN->getParent() == BB) { + for (unsigned PI = 0, PE = PN->getNumIncomingValues(); PI != PE; ++PI) { + BasicBlock *IBB = PN->getIncomingBlock(PI); + if (BBPreds.count(IBB) && + !CanMergeValues(BBPN->getIncomingValueForBlock(IBB), + PN->getIncomingValue(PI))) { + DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in " + << Succ->getName() << " is conflicting with " + << BBPN->getName() << " with regard to common predecessor " + << IBB->getName() << "\n"); + return false; + } + } + } else { + Value* Val = PN->getIncomingValueForBlock(BB); + for (unsigned PI = 0, PE = PN->getNumIncomingValues(); PI != PE; ++PI) { + // See if the incoming value for the common predecessor is equal to the + // one for BB, in which case this phi node will not prevent the merging + // of the block. + BasicBlock *IBB = PN->getIncomingBlock(PI); + if (BBPreds.count(IBB) && + !CanMergeValues(Val, PN->getIncomingValue(PI))) { + DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in " + << Succ->getName() << " is conflicting with regard to common " + << "predecessor " << IBB->getName() << "\n"); + return false; + } + } + } + } + + return true; +} + +typedef SmallVector<BasicBlock *, 16> PredBlockVector; +typedef DenseMap<BasicBlock *, Value *> IncomingValueMap; + +/// \brief Determines the value to use as the phi node input for a block. +/// +/// Select between \p OldVal any value that we know flows from \p BB +/// to a particular phi on the basis of which one (if either) is not +/// undef. Update IncomingValues based on the selected value. +/// +/// \param OldVal The value we are considering selecting. +/// \param BB The block that the value flows in from. +/// \param IncomingValues A map from block-to-value for other phi inputs +/// that we have examined. +/// +/// \returns the selected value. +static Value *selectIncomingValueForBlock(Value *OldVal, BasicBlock *BB, + IncomingValueMap &IncomingValues) { + if (!isa<UndefValue>(OldVal)) { + assert((!IncomingValues.count(BB) || + IncomingValues.find(BB)->second == OldVal) && + "Expected OldVal to match incoming value from BB!"); + + IncomingValues.insert(std::make_pair(BB, OldVal)); + return OldVal; + } + + IncomingValueMap::const_iterator It = IncomingValues.find(BB); + if (It != IncomingValues.end()) return It->second; + + return OldVal; +} + +/// \brief Create a map from block to value for the operands of a +/// given phi. +/// +/// Create a map from block to value for each non-undef value flowing +/// into \p PN. +/// +/// \param PN The phi we are collecting the map for. +/// \param IncomingValues [out] The map from block to value for this phi. +static void gatherIncomingValuesToPhi(PHINode *PN, + IncomingValueMap &IncomingValues) { + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + BasicBlock *BB = PN->getIncomingBlock(i); + Value *V = PN->getIncomingValue(i); + + if (!isa<UndefValue>(V)) + IncomingValues.insert(std::make_pair(BB, V)); + } +} + +/// \brief Replace the incoming undef values to a phi with the values +/// from a block-to-value map. +/// +/// \param PN The phi we are replacing the undefs in. +/// \param IncomingValues A map from block to value. +static void replaceUndefValuesInPhi(PHINode *PN, + const IncomingValueMap &IncomingValues) { + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *V = PN->getIncomingValue(i); + + if (!isa<UndefValue>(V)) continue; + + BasicBlock *BB = PN->getIncomingBlock(i); + IncomingValueMap::const_iterator It = IncomingValues.find(BB); + if (It == IncomingValues.end()) continue; + + PN->setIncomingValue(i, It->second); + } +} + +/// \brief Replace a value flowing from a block to a phi with +/// potentially multiple instances of that value flowing from the +/// block's predecessors to the phi. +/// +/// \param BB The block with the value flowing into the phi. +/// \param BBPreds The predecessors of BB. +/// \param PN The phi that we are updating. +static void redirectValuesFromPredecessorsToPhi(BasicBlock *BB, + const PredBlockVector &BBPreds, + PHINode *PN) { + Value *OldVal = PN->removeIncomingValue(BB, false); + assert(OldVal && "No entry in PHI for Pred BB!"); + + IncomingValueMap IncomingValues; + + // We are merging two blocks - BB, and the block containing PN - and + // as a result we need to redirect edges from the predecessors of BB + // to go to the block containing PN, and update PN + // accordingly. Since we allow merging blocks in the case where the + // predecessor and successor blocks both share some predecessors, + // and where some of those common predecessors might have undef + // values flowing into PN, we want to rewrite those values to be + // consistent with the non-undef values. + + gatherIncomingValuesToPhi(PN, IncomingValues); + + // If this incoming value is one of the PHI nodes in BB, the new entries + // in the PHI node are the entries from the old PHI. + if (isa<PHINode>(OldVal) && cast<PHINode>(OldVal)->getParent() == BB) { + PHINode *OldValPN = cast<PHINode>(OldVal); + for (unsigned i = 0, e = OldValPN->getNumIncomingValues(); i != e; ++i) { + // Note that, since we are merging phi nodes and BB and Succ might + // have common predecessors, we could end up with a phi node with + // identical incoming branches. This will be cleaned up later (and + // will trigger asserts if we try to clean it up now, without also + // simplifying the corresponding conditional branch). + BasicBlock *PredBB = OldValPN->getIncomingBlock(i); + Value *PredVal = OldValPN->getIncomingValue(i); + Value *Selected = selectIncomingValueForBlock(PredVal, PredBB, + IncomingValues); + + // And add a new incoming value for this predecessor for the + // newly retargeted branch. + PN->addIncoming(Selected, PredBB); + } + } else { + for (unsigned i = 0, e = BBPreds.size(); i != e; ++i) { + // Update existing incoming values in PN for this + // predecessor of BB. + BasicBlock *PredBB = BBPreds[i]; + Value *Selected = selectIncomingValueForBlock(OldVal, PredBB, + IncomingValues); + + // And add a new incoming value for this predecessor for the + // newly retargeted branch. + PN->addIncoming(Selected, PredBB); + } + } + + replaceUndefValuesInPhi(PN, IncomingValues); +} + +/// TryToSimplifyUncondBranchFromEmptyBlock - BB is known to contain an +/// unconditional branch, and contains no instructions other than PHI nodes, +/// potential side-effect free intrinsics and the branch. If possible, +/// eliminate BB by rewriting all the predecessors to branch to the successor +/// block and return true. If we can't transform, return false. +bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) { + assert(BB != &BB->getParent()->getEntryBlock() && + "TryToSimplifyUncondBranchFromEmptyBlock called on entry block!"); + + // We can't eliminate infinite loops. + BasicBlock *Succ = cast<BranchInst>(BB->getTerminator())->getSuccessor(0); + if (BB == Succ) return false; + + // Check to see if merging these blocks would cause conflicts for any of the + // phi nodes in BB or Succ. If not, we can safely merge. + if (!CanPropagatePredecessorsForPHIs(BB, Succ)) return false; + + // Check for cases where Succ has multiple predecessors and a PHI node in BB + // has uses which will not disappear when the PHI nodes are merged. It is + // possible to handle such cases, but difficult: it requires checking whether + // BB dominates Succ, which is non-trivial to calculate in the case where + // Succ has multiple predecessors. Also, it requires checking whether + // constructing the necessary self-referential PHI node doesn't introduce any + // conflicts; this isn't too difficult, but the previous code for doing this + // was incorrect. + // + // Note that if this check finds a live use, BB dominates Succ, so BB is + // something like a loop pre-header (or rarely, a part of an irreducible CFG); + // folding the branch isn't profitable in that case anyway. + if (!Succ->getSinglePredecessor()) { + BasicBlock::iterator BBI = BB->begin(); + while (isa<PHINode>(*BBI)) { + for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end(); + UI != E; ++UI) { + if (PHINode* PN = dyn_cast<PHINode>(*UI)) { + if (PN->getIncomingBlock(UI) != BB) + return false; + } else { + return false; + } + } + ++BBI; + } + } + + DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB); + + if (isa<PHINode>(Succ->begin())) { + // If there is more than one pred of succ, and there are PHI nodes in + // the successor, then we need to add incoming edges for the PHI nodes + // + const PredBlockVector BBPreds(pred_begin(BB), pred_end(BB)); + + // Loop over all of the PHI nodes in the successor of BB. + for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) { + PHINode *PN = cast<PHINode>(I); + + redirectValuesFromPredecessorsToPhi(BB, BBPreds, PN); + } + } + + if (Succ->getSinglePredecessor()) { + // BB is the only predecessor of Succ, so Succ will end up with exactly + // the same predecessors BB had. + + // Copy over any phi, debug or lifetime instruction. + BB->getTerminator()->eraseFromParent(); + Succ->getInstList().splice(Succ->getFirstNonPHI(), BB->getInstList()); + } else { + while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) { + // We explicitly check for such uses in CanPropagatePredecessorsForPHIs. + assert(PN->use_empty() && "There shouldn't be any uses here!"); + PN->eraseFromParent(); + } + } + + // Everything that jumped to BB now goes to Succ. + BB->replaceAllUsesWith(Succ); + if (!Succ->hasName()) Succ->takeName(BB); + BB->eraseFromParent(); // Delete the old basic block. + return true; +} + +/// EliminateDuplicatePHINodes - Check for and eliminate duplicate PHI +/// nodes in this block. This doesn't try to be clever about PHI nodes +/// which differ only in the order of the incoming values, but instcombine +/// orders them so it usually won't matter. +/// +bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { + bool Changed = false; + + // This implementation doesn't currently consider undef operands + // specially. Theoretically, two phis which are identical except for + // one having an undef where the other doesn't could be collapsed. + + // Map from PHI hash values to PHI nodes. If multiple PHIs have + // the same hash value, the element is the first PHI in the + // linked list in CollisionMap. + DenseMap<uintptr_t, PHINode *> HashMap; + + // Maintain linked lists of PHI nodes with common hash values. + DenseMap<PHINode *, PHINode *> CollisionMap; + + // Examine each PHI. + for (BasicBlock::iterator I = BB->begin(); + PHINode *PN = dyn_cast<PHINode>(I++); ) { + // Compute a hash value on the operands. Instcombine will likely have sorted + // them, which helps expose duplicates, but we have to check all the + // operands to be safe in case instcombine hasn't run. + uintptr_t Hash = 0; + // This hash algorithm is quite weak as hash functions go, but it seems + // to do a good enough job for this particular purpose, and is very quick. + for (User::op_iterator I = PN->op_begin(), E = PN->op_end(); I != E; ++I) { + Hash ^= reinterpret_cast<uintptr_t>(static_cast<Value *>(*I)); + Hash = (Hash << 7) | (Hash >> (sizeof(uintptr_t) * CHAR_BIT - 7)); + } + for (PHINode::block_iterator I = PN->block_begin(), E = PN->block_end(); + I != E; ++I) { + Hash ^= reinterpret_cast<uintptr_t>(static_cast<BasicBlock *>(*I)); + Hash = (Hash << 7) | (Hash >> (sizeof(uintptr_t) * CHAR_BIT - 7)); + } + // Avoid colliding with the DenseMap sentinels ~0 and ~0-1. + Hash >>= 1; + // If we've never seen this hash value before, it's a unique PHI. + std::pair<DenseMap<uintptr_t, PHINode *>::iterator, bool> Pair = + HashMap.insert(std::make_pair(Hash, PN)); + if (Pair.second) continue; + // Otherwise it's either a duplicate or a hash collision. + for (PHINode *OtherPN = Pair.first->second; ; ) { + if (OtherPN->isIdenticalTo(PN)) { + // A duplicate. Replace this PHI with its duplicate. + PN->replaceAllUsesWith(OtherPN); + PN->eraseFromParent(); + Changed = true; + break; + } + // A non-duplicate hash collision. + DenseMap<PHINode *, PHINode *>::iterator I = CollisionMap.find(OtherPN); + if (I == CollisionMap.end()) { + // Set this PHI to be the head of the linked list of colliding PHIs. + PHINode *Old = Pair.first->second; + Pair.first->second = PN; + CollisionMap[PN] = Old; + break; + } + // Proceed to the next PHI in the list. + OtherPN = I->second; + } + } + + return Changed; +} + +/// enforceKnownAlignment - If the specified pointer points to an object that +/// we control, modify the object's alignment to PrefAlign. This isn't +/// often possible though. If alignment is important, a more reliable approach +/// is to simply align all global variables and allocation instructions to +/// their preferred alignment from the beginning. +/// +static unsigned enforceKnownAlignment(Value *V, unsigned Align, + unsigned PrefAlign, const DataLayout *TD) { + V = V->stripPointerCasts(); + + if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) { + // If the preferred alignment is greater than the natural stack alignment + // then don't round up. This avoids dynamic stack realignment. + if (TD && TD->exceedsNaturalStackAlignment(PrefAlign)) + return Align; + // If there is a requested alignment and if this is an alloca, round up. + if (AI->getAlignment() >= PrefAlign) + return AI->getAlignment(); + AI->setAlignment(PrefAlign); + return PrefAlign; + } + + if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { + // If there is a large requested alignment and we can, bump up the alignment + // of the global. + if (GV->isDeclaration()) return Align; + // If the memory we set aside for the global may not be the memory used by + // the final program then it is impossible for us to reliably enforce the + // preferred alignment. + if (GV->isWeakForLinker()) return Align; + + if (GV->getAlignment() >= PrefAlign) + return GV->getAlignment(); + // We can only increase the alignment of the global if it has no alignment + // specified or if it is not assigned a section. If it is assigned a + // section, the global could be densely packed with other objects in the + // section, increasing the alignment could cause padding issues. + if (!GV->hasSection() || GV->getAlignment() == 0) + GV->setAlignment(PrefAlign); + return GV->getAlignment(); + } + + return Align; +} + +/// getOrEnforceKnownAlignment - If the specified pointer has an alignment that +/// we can determine, return it, otherwise return 0. If PrefAlign is specified, +/// and it is more than the alignment of the ultimate object, see if we can +/// increase the alignment of the ultimate object, making this check succeed. +unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, + const DataLayout *DL) { + assert(V->getType()->isPointerTy() && + "getOrEnforceKnownAlignment expects a pointer!"); + unsigned BitWidth = DL ? DL->getPointerTypeSizeInBits(V->getType()) : 64; + + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + ComputeMaskedBits(V, KnownZero, KnownOne, DL); + unsigned TrailZ = KnownZero.countTrailingOnes(); + + // Avoid trouble with ridiculously large TrailZ values, such as + // those computed from a null pointer. + TrailZ = std::min(TrailZ, unsigned(sizeof(unsigned) * CHAR_BIT - 1)); + + unsigned Align = 1u << std::min(BitWidth - 1, TrailZ); + + // LLVM doesn't support alignments larger than this currently. + Align = std::min(Align, +Value::MaximumAlignment); + + if (PrefAlign > Align) + Align = enforceKnownAlignment(V, Align, PrefAlign, DL); + + // We don't need to make any adjustment. + return Align; +} + +///===---------------------------------------------------------------------===// +/// Dbg Intrinsic utilities +/// + +/// See if there is a dbg.value intrinsic for DIVar before I. +static bool LdStHasDebugValue(DIVariable &DIVar, Instruction *I) { + // Since we can't guarantee that the original dbg.declare instrinsic + // is removed by LowerDbgDeclare(), we need to make sure that we are + // not inserting the same dbg.value intrinsic over and over. + llvm::BasicBlock::InstListType::iterator PrevI(I); + if (PrevI != I->getParent()->getInstList().begin()) { + --PrevI; + if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(PrevI)) + if (DVI->getValue() == I->getOperand(0) && + DVI->getOffset() == 0 && + DVI->getVariable() == DIVar) + return true; + } + return false; +} + +/// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value +/// that has an associated llvm.dbg.decl intrinsic. +bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, + StoreInst *SI, DIBuilder &Builder) { + DIVariable DIVar(DDI->getVariable()); + assert((!DIVar || DIVar.isVariable()) && + "Variable in DbgDeclareInst should be either null or a DIVariable."); + if (!DIVar) + return false; + + if (LdStHasDebugValue(DIVar, SI)) + return true; + + Instruction *DbgVal = NULL; + // If an argument is zero extended then use argument directly. The ZExt + // may be zapped by an optimization pass in future. + Argument *ExtendedArg = NULL; + if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0))) + ExtendedArg = dyn_cast<Argument>(ZExt->getOperand(0)); + if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0))) + ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0)); + if (ExtendedArg) + DbgVal = Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, SI); + else + DbgVal = Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar, SI); + + // Propagate any debug metadata from the store onto the dbg.value. + DebugLoc SIDL = SI->getDebugLoc(); + if (!SIDL.isUnknown()) + DbgVal->setDebugLoc(SIDL); + // Otherwise propagate debug metadata from dbg.declare. + else + DbgVal->setDebugLoc(DDI->getDebugLoc()); + return true; +} + +/// Inserts a llvm.dbg.value intrinsic before a load of an alloca'd value +/// that has an associated llvm.dbg.decl intrinsic. +bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, + LoadInst *LI, DIBuilder &Builder) { + DIVariable DIVar(DDI->getVariable()); + assert((!DIVar || DIVar.isVariable()) && + "Variable in DbgDeclareInst should be either null or a DIVariable."); + if (!DIVar) + return false; + + if (LdStHasDebugValue(DIVar, LI)) + return true; + + Instruction *DbgVal = + Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0, + DIVar, LI); + + // Propagate any debug metadata from the store onto the dbg.value. + DebugLoc LIDL = LI->getDebugLoc(); + if (!LIDL.isUnknown()) + DbgVal->setDebugLoc(LIDL); + // Otherwise propagate debug metadata from dbg.declare. + else + DbgVal->setDebugLoc(DDI->getDebugLoc()); + return true; +} + +/// LowerDbgDeclare - Lowers llvm.dbg.declare intrinsics into appropriate set +/// of llvm.dbg.value intrinsics. +bool llvm::LowerDbgDeclare(Function &F) { + DIBuilder DIB(*F.getParent()); + SmallVector<DbgDeclareInst *, 4> Dbgs; + for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) + for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) { + if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) + Dbgs.push_back(DDI); + } + if (Dbgs.empty()) + return false; + + for (SmallVectorImpl<DbgDeclareInst *>::iterator I = Dbgs.begin(), + E = Dbgs.end(); I != E; ++I) { + DbgDeclareInst *DDI = *I; + AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress()); + // If this is an alloca for a scalar variable, insert a dbg.value + // at each load and store to the alloca and erase the dbg.declare. + if (AI && !AI->isArrayAllocation()) { + + // We only remove the dbg.declare intrinsic if all uses are + // converted to dbg.value intrinsics. + bool RemoveDDI = true; + for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); + UI != E; ++UI) + if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) + ConvertDebugDeclareToDebugValue(DDI, SI, DIB); + else if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) + ConvertDebugDeclareToDebugValue(DDI, LI, DIB); + else + RemoveDDI = false; + if (RemoveDDI) + DDI->eraseFromParent(); + } + } + return true; +} + +/// FindAllocaDbgDeclare - Finds the llvm.dbg.declare intrinsic describing the +/// alloca 'V', if any. +DbgDeclareInst *llvm::FindAllocaDbgDeclare(Value *V) { + if (MDNode *DebugNode = MDNode::getIfExists(V->getContext(), V)) + for (Value::use_iterator UI = DebugNode->use_begin(), + E = DebugNode->use_end(); UI != E; ++UI) + if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI)) + return DDI; + + return 0; +} + +bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress, + DIBuilder &Builder) { + DbgDeclareInst *DDI = FindAllocaDbgDeclare(AI); + if (!DDI) + return false; + DIVariable DIVar(DDI->getVariable()); + assert((!DIVar || DIVar.isVariable()) && + "Variable in DbgDeclareInst should be either null or a DIVariable."); + if (!DIVar) + return false; + + // Create a copy of the original DIDescriptor for user variable, appending + // "deref" operation to a list of address elements, as new llvm.dbg.declare + // will take a value storing address of the memory for variable, not + // alloca itself. + Type *Int64Ty = Type::getInt64Ty(AI->getContext()); + SmallVector<Value*, 4> NewDIVarAddress; + if (DIVar.hasComplexAddress()) { + for (unsigned i = 0, n = DIVar.getNumAddrElements(); i < n; ++i) { + NewDIVarAddress.push_back( + ConstantInt::get(Int64Ty, DIVar.getAddrElement(i))); + } + } + NewDIVarAddress.push_back(ConstantInt::get(Int64Ty, DIBuilder::OpDeref)); + DIVariable NewDIVar = Builder.createComplexVariable( + DIVar.getTag(), DIVar.getContext(), DIVar.getName(), + DIVar.getFile(), DIVar.getLineNumber(), DIVar.getType(), + NewDIVarAddress, DIVar.getArgNumber()); + + // Insert llvm.dbg.declare in the same basic block as the original alloca, + // and remove old llvm.dbg.declare. + BasicBlock *BB = AI->getParent(); + Builder.insertDeclare(NewAllocaAddress, NewDIVar, BB); + DDI->eraseFromParent(); + return true; +} + +/// changeToUnreachable - Insert an unreachable instruction before the specified +/// instruction, making it and the rest of the code in the block dead. +static void changeToUnreachable(Instruction *I, bool UseLLVMTrap) { + BasicBlock *BB = I->getParent(); + // Loop over all of the successors, removing BB's entry from any PHI + // nodes. + for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) + (*SI)->removePredecessor(BB); + + // Insert a call to llvm.trap right before this. This turns the undefined + // behavior into a hard fail instead of falling through into random code. + if (UseLLVMTrap) { + Function *TrapFn = + Intrinsic::getDeclaration(BB->getParent()->getParent(), Intrinsic::trap); + CallInst *CallTrap = CallInst::Create(TrapFn, "", I); + CallTrap->setDebugLoc(I->getDebugLoc()); + } + new UnreachableInst(I->getContext(), I); + + // All instructions after this are dead. + BasicBlock::iterator BBI = I, BBE = BB->end(); + while (BBI != BBE) { + if (!BBI->use_empty()) + BBI->replaceAllUsesWith(UndefValue::get(BBI->getType())); + BB->getInstList().erase(BBI++); + } +} + +/// changeToCall - Convert the specified invoke into a normal call. +static void changeToCall(InvokeInst *II) { + SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3); + CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II); + NewCall->takeName(II); + NewCall->setCallingConv(II->getCallingConv()); + NewCall->setAttributes(II->getAttributes()); + NewCall->setDebugLoc(II->getDebugLoc()); + II->replaceAllUsesWith(NewCall); + + // Follow the call by a branch to the normal destination. + BranchInst::Create(II->getNormalDest(), II); + + // Update PHI nodes in the unwind destination + II->getUnwindDest()->removePredecessor(II->getParent()); + II->eraseFromParent(); +} + +static bool markAliveBlocks(BasicBlock *BB, + SmallPtrSet<BasicBlock*, 128> &Reachable) { + + SmallVector<BasicBlock*, 128> Worklist; + Worklist.push_back(BB); + Reachable.insert(BB); + bool Changed = false; + do { + BB = Worklist.pop_back_val(); + + // Do a quick scan of the basic block, turning any obviously unreachable + // instructions into LLVM unreachable insts. The instruction combining pass + // canonicalizes unreachable insts into stores to null or undef. + for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;++BBI){ + if (CallInst *CI = dyn_cast<CallInst>(BBI)) { + if (CI->doesNotReturn()) { + // If we found a call to a no-return function, insert an unreachable + // instruction after it. Make sure there isn't *already* one there + // though. + ++BBI; + if (!isa<UnreachableInst>(BBI)) { + // Don't insert a call to llvm.trap right before the unreachable. + changeToUnreachable(BBI, false); + Changed = true; + } + break; + } + } + + // Store to undef and store to null are undefined and used to signal that + // they should be changed to unreachable by passes that can't modify the + // CFG. + if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) { + // Don't touch volatile stores. + if (SI->isVolatile()) continue; + + Value *Ptr = SI->getOperand(1); + + if (isa<UndefValue>(Ptr) || + (isa<ConstantPointerNull>(Ptr) && + SI->getPointerAddressSpace() == 0)) { + changeToUnreachable(SI, true); + Changed = true; + break; + } + } + } + + // Turn invokes that call 'nounwind' functions into ordinary calls. + if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) { + Value *Callee = II->getCalledValue(); + if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) { + changeToUnreachable(II, true); + Changed = true; + } else if (II->doesNotThrow()) { + if (II->use_empty() && II->onlyReadsMemory()) { + // jump to the normal destination branch. + BranchInst::Create(II->getNormalDest(), II); + II->getUnwindDest()->removePredecessor(II->getParent()); + II->eraseFromParent(); + } else + changeToCall(II); + Changed = true; + } + } + + Changed |= ConstantFoldTerminator(BB, true); + for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) + if (Reachable.insert(*SI)) + Worklist.push_back(*SI); + } while (!Worklist.empty()); + return Changed; +} + +/// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even +/// if they are in a dead cycle. Return true if a change was made, false +/// otherwise. +bool llvm::removeUnreachableBlocks(Function &F) { + SmallPtrSet<BasicBlock*, 128> Reachable; + bool Changed = markAliveBlocks(F.begin(), Reachable); + + // If there are unreachable blocks in the CFG... + if (Reachable.size() == F.size()) + return Changed; + + assert(Reachable.size() < F.size()); + NumRemoved += F.size()-Reachable.size(); + + // Loop over all of the basic blocks that are not reachable, dropping all of + // their internal references... + for (Function::iterator BB = ++F.begin(), E = F.end(); BB != E; ++BB) { + if (Reachable.count(BB)) + continue; + + for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) + if (Reachable.count(*SI)) + (*SI)->removePredecessor(BB); + BB->dropAllReferences(); + } + + for (Function::iterator I = ++F.begin(); I != F.end();) + if (!Reachable.count(I)) + I = F.getBasicBlockList().erase(I); + else + ++I; + + return true; +} diff --git a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp new file mode 100644 index 000000000000..6d5f16ca333b --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp @@ -0,0 +1,801 @@ +//===- LoopSimplify.cpp - Loop Canonicalization Pass ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass performs several transformations to transform natural loops into a +// simpler form, which makes subsequent analyses and transformations simpler and +// more effective. +// +// Loop pre-header insertion guarantees that there is a single, non-critical +// entry edge from outside of the loop to the loop header. This simplifies a +// number of analyses and transformations, such as LICM. +// +// Loop exit-block insertion guarantees that all exit blocks from the loop +// (blocks which are outside of the loop that have predecessors inside of the +// loop) only have predecessors from inside of the loop (and are thus dominated +// by the loop header). This simplifies transformations such as store-sinking +// that are built into LICM. +// +// This pass also guarantees that loops will have exactly one backedge. +// +// Indirectbr instructions introduce several complications. If the loop +// contains or is entered by an indirectbr instruction, it may not be possible +// to transform the loop and make these guarantees. Client code should check +// that these conditions are true before relying on them. +// +// Note that the simplifycfg pass will clean up blocks which are split out but +// end up being unnecessary, so usage of this pass should not pessimize +// generated code. +// +// This pass obviously modifies the CFG, but updates loop information and +// dominator information. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "loop-simplify" +#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/DependenceAnalysis.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +using namespace llvm; + +STATISTIC(NumInserted, "Number of pre-header or exit blocks inserted"); +STATISTIC(NumNested , "Number of nested loops split out"); + +namespace { + struct LoopSimplify : public LoopPass { + static char ID; // Pass identification, replacement for typeid + LoopSimplify() : LoopPass(ID) { + initializeLoopSimplifyPass(*PassRegistry::getPassRegistry()); + } + + // AA - If we have an alias analysis object to update, this is it, otherwise + // this is null. + AliasAnalysis *AA; + LoopInfo *LI; + DominatorTree *DT; + ScalarEvolution *SE; + Loop *L; + virtual bool runOnLoop(Loop *L, LPPassManager &LPM); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + // We need loop information to identify the loops... + AU.addRequired<DominatorTree>(); + AU.addPreserved<DominatorTree>(); + + AU.addRequired<LoopInfo>(); + AU.addPreserved<LoopInfo>(); + + AU.addPreserved<AliasAnalysis>(); + AU.addPreserved<ScalarEvolution>(); + AU.addPreserved<DependenceAnalysis>(); + AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added. + } + + /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees. + void verifyAnalysis() const; + + private: + bool ProcessLoop(Loop *L, LPPassManager &LPM); + BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit); + Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM, + BasicBlock *Preheader); + BasicBlock *InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader); + }; +} + +static void PlaceSplitBlockCarefully(BasicBlock *NewBB, + SmallVectorImpl<BasicBlock*> &SplitPreds, + Loop *L); + +char LoopSimplify::ID = 0; +INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify", + "Canonicalize natural loops", true, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_END(LoopSimplify, "loop-simplify", + "Canonicalize natural loops", true, false) + +// Publicly exposed interface to pass... +char &llvm::LoopSimplifyID = LoopSimplify::ID; +Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); } + +/// runOnLoop - Run down all loops in the CFG (recursively, but we could do +/// it in any convenient order) inserting preheaders... +/// +bool LoopSimplify::runOnLoop(Loop *l, LPPassManager &LPM) { + L = l; + bool Changed = false; + LI = &getAnalysis<LoopInfo>(); + AA = getAnalysisIfAvailable<AliasAnalysis>(); + DT = &getAnalysis<DominatorTree>(); + SE = getAnalysisIfAvailable<ScalarEvolution>(); + + Changed |= ProcessLoop(L, LPM); + + return Changed; +} + +/// ProcessLoop - Walk the loop structure in depth first order, ensuring that +/// all loops have preheaders. +/// +bool LoopSimplify::ProcessLoop(Loop *L, LPPassManager &LPM) { + bool Changed = false; +ReprocessLoop: + + // Check to see that no blocks (other than the header) in this loop have + // predecessors that are not in the loop. This is not valid for natural + // loops, but can occur if the blocks are unreachable. Since they are + // unreachable we can just shamelessly delete those CFG edges! + for (Loop::block_iterator BB = L->block_begin(), E = L->block_end(); + BB != E; ++BB) { + if (*BB == L->getHeader()) continue; + + SmallPtrSet<BasicBlock*, 4> BadPreds; + for (pred_iterator PI = pred_begin(*BB), + PE = pred_end(*BB); PI != PE; ++PI) { + BasicBlock *P = *PI; + if (!L->contains(P)) + BadPreds.insert(P); + } + + // Delete each unique out-of-loop (and thus dead) predecessor. + for (SmallPtrSet<BasicBlock*, 4>::iterator I = BadPreds.begin(), + E = BadPreds.end(); I != E; ++I) { + + DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor " + << (*I)->getName() << "\n"); + + // Inform each successor of each dead pred. + for (succ_iterator SI = succ_begin(*I), SE = succ_end(*I); SI != SE; ++SI) + (*SI)->removePredecessor(*I); + // Zap the dead pred's terminator and replace it with unreachable. + TerminatorInst *TI = (*I)->getTerminator(); + TI->replaceAllUsesWith(UndefValue::get(TI->getType())); + (*I)->getTerminator()->eraseFromParent(); + new UnreachableInst((*I)->getContext(), *I); + Changed = true; + } + } + + // If there are exiting blocks with branches on undef, resolve the undef in + // the direction which will exit the loop. This will help simplify loop + // trip count computations. + SmallVector<BasicBlock*, 8> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(), + E = ExitingBlocks.end(); I != E; ++I) + if (BranchInst *BI = dyn_cast<BranchInst>((*I)->getTerminator())) + if (BI->isConditional()) { + if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) { + + DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in " + << (*I)->getName() << "\n"); + + BI->setCondition(ConstantInt::get(Cond->getType(), + !L->contains(BI->getSuccessor(0)))); + + // This may make the loop analyzable, force SCEV recomputation. + if (SE) + SE->forgetLoop(L); + + Changed = true; + } + } + + // Does the loop already have a preheader? If so, don't insert one. + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) { + Preheader = InsertPreheaderForLoop(L, this); + if (Preheader) { + ++NumInserted; + Changed = true; + } + } + + // Next, check to make sure that all exit nodes of the loop only have + // predecessors that are inside of the loop. This check guarantees that the + // loop preheader/header will dominate the exit blocks. If the exit block has + // predecessors from outside of the loop, split the edge now. + SmallVector<BasicBlock*, 8> ExitBlocks; + L->getExitBlocks(ExitBlocks); + + SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(), + ExitBlocks.end()); + for (SmallSetVector<BasicBlock *, 8>::iterator I = ExitBlockSet.begin(), + E = ExitBlockSet.end(); I != E; ++I) { + BasicBlock *ExitBlock = *I; + for (pred_iterator PI = pred_begin(ExitBlock), PE = pred_end(ExitBlock); + PI != PE; ++PI) + // Must be exactly this loop: no subloops, parent loops, or non-loop preds + // allowed. + if (!L->contains(*PI)) { + if (RewriteLoopExitBlock(L, ExitBlock)) { + ++NumInserted; + Changed = true; + } + break; + } + } + + // If the header has more than two predecessors at this point (from the + // preheader and from multiple backedges), we must adjust the loop. + BasicBlock *LoopLatch = L->getLoopLatch(); + if (!LoopLatch) { + // If this is really a nested loop, rip it out into a child loop. Don't do + // this for loops with a giant number of backedges, just factor them into a + // common backedge instead. + if (L->getNumBackEdges() < 8) { + if (SeparateNestedLoop(L, LPM, Preheader)) { + ++NumNested; + // This is a big restructuring change, reprocess the whole loop. + Changed = true; + // GCC doesn't tail recursion eliminate this. + goto ReprocessLoop; + } + } + + // If we either couldn't, or didn't want to, identify nesting of the loops, + // insert a new block that all backedges target, then make it jump to the + // loop header. + LoopLatch = InsertUniqueBackedgeBlock(L, Preheader); + if (LoopLatch) { + ++NumInserted; + Changed = true; + } + } + + // Scan over the PHI nodes in the loop header. Since they now have only two + // incoming values (the loop is canonicalized), we may have simplified the PHI + // down to 'X = phi [X, Y]', which should be replaced with 'Y'. + PHINode *PN; + for (BasicBlock::iterator I = L->getHeader()->begin(); + (PN = dyn_cast<PHINode>(I++)); ) + if (Value *V = SimplifyInstruction(PN, 0, 0, DT)) { + if (AA) AA->deleteValue(PN); + if (SE) SE->forgetValue(PN); + PN->replaceAllUsesWith(V); + PN->eraseFromParent(); + } + + // If this loop has multiple exits and the exits all go to the same + // block, attempt to merge the exits. This helps several passes, such + // as LoopRotation, which do not support loops with multiple exits. + // SimplifyCFG also does this (and this code uses the same utility + // function), however this code is loop-aware, where SimplifyCFG is + // not. That gives it the advantage of being able to hoist + // loop-invariant instructions out of the way to open up more + // opportunities, and the disadvantage of having the responsibility + // to preserve dominator information. + bool UniqueExit = true; + if (!ExitBlocks.empty()) + for (unsigned i = 1, e = ExitBlocks.size(); i != e; ++i) + if (ExitBlocks[i] != ExitBlocks[0]) { + UniqueExit = false; + break; + } + if (UniqueExit) { + for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { + BasicBlock *ExitingBlock = ExitingBlocks[i]; + if (!ExitingBlock->getSinglePredecessor()) continue; + BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator()); + if (!BI || !BI->isConditional()) continue; + CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition()); + if (!CI || CI->getParent() != ExitingBlock) continue; + + // Attempt to hoist out all instructions except for the + // comparison and the branch. + bool AllInvariant = true; + for (BasicBlock::iterator I = ExitingBlock->begin(); &*I != BI; ) { + Instruction *Inst = I++; + // Skip debug info intrinsics. + if (isa<DbgInfoIntrinsic>(Inst)) + continue; + if (Inst == CI) + continue; + if (!L->makeLoopInvariant(Inst, Changed, + Preheader ? Preheader->getTerminator() : 0)) { + AllInvariant = false; + break; + } + } + if (!AllInvariant) continue; + + // The block has now been cleared of all instructions except for + // a comparison and a conditional branch. SimplifyCFG may be able + // to fold it now. + if (!FoldBranchToCommonDest(BI)) continue; + + // Success. The block is now dead, so remove it from the loop, + // update the dominator tree and delete it. + DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block " + << ExitingBlock->getName() << "\n"); + + // If any reachable control flow within this loop has changed, notify + // ScalarEvolution. Currently assume the parent loop doesn't change + // (spliting edges doesn't count). If blocks, CFG edges, or other values + // in the parent loop change, then we need call to forgetLoop() for the + // parent instead. + if (SE) + SE->forgetLoop(L); + + assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock)); + Changed = true; + LI->removeBlock(ExitingBlock); + + DomTreeNode *Node = DT->getNode(ExitingBlock); + const std::vector<DomTreeNodeBase<BasicBlock> *> &Children = + Node->getChildren(); + while (!Children.empty()) { + DomTreeNode *Child = Children.front(); + DT->changeImmediateDominator(Child, Node->getIDom()); + } + DT->eraseNode(ExitingBlock); + + BI->getSuccessor(0)->removePredecessor(ExitingBlock); + BI->getSuccessor(1)->removePredecessor(ExitingBlock); + ExitingBlock->eraseFromParent(); + } + } + + return Changed; +} + +/// InsertPreheaderForLoop - Once we discover that a loop doesn't have a +/// preheader, this method is called to insert one. This method has two phases: +/// preheader insertion and analysis updating. +/// +BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) { + BasicBlock *Header = L->getHeader(); + + // Compute the set of predecessors of the loop that are not in the loop. + SmallVector<BasicBlock*, 8> OutsideBlocks; + for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header); + PI != PE; ++PI) { + BasicBlock *P = *PI; + if (!L->contains(P)) { // Coming in from outside the loop? + // If the loop is branched to from an indirect branch, we won't + // be able to fully transform the loop, because it prohibits + // edge splitting. + if (isa<IndirectBrInst>(P->getTerminator())) return 0; + + // Keep track of it. + OutsideBlocks.push_back(P); + } + } + + // Split out the loop pre-header. + BasicBlock *PreheaderBB; + if (!Header->isLandingPad()) { + PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader", + PP); + } else { + SmallVector<BasicBlock*, 2> NewBBs; + SplitLandingPadPredecessors(Header, OutsideBlocks, ".preheader", + ".split-lp", PP, NewBBs); + PreheaderBB = NewBBs[0]; + } + + PreheaderBB->getTerminator()->setDebugLoc( + Header->getFirstNonPHI()->getDebugLoc()); + DEBUG(dbgs() << "LoopSimplify: Creating pre-header " + << PreheaderBB->getName() << "\n"); + + // Make sure that NewBB is put someplace intelligent, which doesn't mess up + // code layout too horribly. + PlaceSplitBlockCarefully(PreheaderBB, OutsideBlocks, L); + + return PreheaderBB; +} + +/// RewriteLoopExitBlock - Ensure that the loop preheader dominates all exit +/// blocks. This method is used to split exit blocks that have predecessors +/// outside of the loop. +BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) { + SmallVector<BasicBlock*, 8> LoopBlocks; + for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) { + BasicBlock *P = *I; + if (L->contains(P)) { + // Don't do this if the loop is exited via an indirect branch. + if (isa<IndirectBrInst>(P->getTerminator())) return 0; + + LoopBlocks.push_back(P); + } + } + + assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?"); + BasicBlock *NewExitBB = 0; + + if (Exit->isLandingPad()) { + SmallVector<BasicBlock*, 2> NewBBs; + SplitLandingPadPredecessors(Exit, ArrayRef<BasicBlock*>(&LoopBlocks[0], + LoopBlocks.size()), + ".loopexit", ".nonloopexit", + this, NewBBs); + NewExitBB = NewBBs[0]; + } else { + NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", this); + } + + DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block " + << NewExitBB->getName() << "\n"); + return NewExitBB; +} + +/// AddBlockAndPredsToSet - Add the specified block, and all of its +/// predecessors, to the specified set, if it's not already in there. Stop +/// predecessor traversal when we reach StopBlock. +static void AddBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock, + std::set<BasicBlock*> &Blocks) { + std::vector<BasicBlock *> WorkList; + WorkList.push_back(InputBB); + do { + BasicBlock *BB = WorkList.back(); WorkList.pop_back(); + if (Blocks.insert(BB).second && BB != StopBlock) + // If BB is not already processed and it is not a stop block then + // insert its predecessor in the work list + for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) { + BasicBlock *WBB = *I; + WorkList.push_back(WBB); + } + } while(!WorkList.empty()); +} + +/// FindPHIToPartitionLoops - The first part of loop-nestification is to find a +/// PHI node that tells us how to partition the loops. +static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT, + AliasAnalysis *AA, LoopInfo *LI) { + for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) { + PHINode *PN = cast<PHINode>(I); + ++I; + if (Value *V = SimplifyInstruction(PN, 0, 0, DT)) { + // This is a degenerate PHI already, don't modify it! + PN->replaceAllUsesWith(V); + if (AA) AA->deleteValue(PN); + PN->eraseFromParent(); + continue; + } + + // Scan this PHI node looking for a use of the PHI node by itself. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingValue(i) == PN && + L->contains(PN->getIncomingBlock(i))) + // We found something tasty to remove. + return PN; + } + return 0; +} + +// PlaceSplitBlockCarefully - If the block isn't already, move the new block to +// right after some 'outside block' block. This prevents the preheader from +// being placed inside the loop body, e.g. when the loop hasn't been rotated. +void PlaceSplitBlockCarefully(BasicBlock *NewBB, + SmallVectorImpl<BasicBlock*> &SplitPreds, + Loop *L) { + // Check to see if NewBB is already well placed. + Function::iterator BBI = NewBB; --BBI; + for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) { + if (&*BBI == SplitPreds[i]) + return; + } + + // If it isn't already after an outside block, move it after one. This is + // always good as it makes the uncond branch from the outside block into a + // fall-through. + + // Figure out *which* outside block to put this after. Prefer an outside + // block that neighbors a BB actually in the loop. + BasicBlock *FoundBB = 0; + for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) { + Function::iterator BBI = SplitPreds[i]; + if (++BBI != NewBB->getParent()->end() && + L->contains(BBI)) { + FoundBB = SplitPreds[i]; + break; + } + } + + // If our heuristic for a *good* bb to place this after doesn't find + // anything, just pick something. It's likely better than leaving it within + // the loop. + if (!FoundBB) + FoundBB = SplitPreds[0]; + NewBB->moveAfter(FoundBB); +} + + +/// SeparateNestedLoop - If this loop has multiple backedges, try to pull one of +/// them out into a nested loop. This is important for code that looks like +/// this: +/// +/// Loop: +/// ... +/// br cond, Loop, Next +/// ... +/// br cond2, Loop, Out +/// +/// To identify this common case, we look at the PHI nodes in the header of the +/// loop. PHI nodes with unchanging values on one backedge correspond to values +/// that change in the "outer" loop, but not in the "inner" loop. +/// +/// If we are able to separate out a loop, return the new outer loop that was +/// created. +/// +Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM, + BasicBlock *Preheader) { + // Don't try to separate loops without a preheader. + if (!Preheader) + return 0; + + // The header is not a landing pad; preheader insertion should ensure this. + assert(!L->getHeader()->isLandingPad() && + "Can't insert backedge to landing pad"); + + PHINode *PN = FindPHIToPartitionLoops(L, DT, AA, LI); + if (PN == 0) return 0; // No known way to partition. + + // Pull out all predecessors that have varying values in the loop. This + // handles the case when a PHI node has multiple instances of itself as + // arguments. + SmallVector<BasicBlock*, 8> OuterLoopPreds; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + if (PN->getIncomingValue(i) != PN || + !L->contains(PN->getIncomingBlock(i))) { + // We can't split indirectbr edges. + if (isa<IndirectBrInst>(PN->getIncomingBlock(i)->getTerminator())) + return 0; + OuterLoopPreds.push_back(PN->getIncomingBlock(i)); + } + } + DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n"); + + // If ScalarEvolution is around and knows anything about values in + // this loop, tell it to forget them, because we're about to + // substantially change it. + if (SE) + SE->forgetLoop(L); + + BasicBlock *Header = L->getHeader(); + BasicBlock *NewBB = + SplitBlockPredecessors(Header, OuterLoopPreds, ".outer", this); + + // Make sure that NewBB is put someplace intelligent, which doesn't mess up + // code layout too horribly. + PlaceSplitBlockCarefully(NewBB, OuterLoopPreds, L); + + // Create the new outer loop. + Loop *NewOuter = new Loop(); + + // Change the parent loop to use the outer loop as its child now. + if (Loop *Parent = L->getParentLoop()) + Parent->replaceChildLoopWith(L, NewOuter); + else + LI->changeTopLevelLoop(L, NewOuter); + + // L is now a subloop of our outer loop. + NewOuter->addChildLoop(L); + + // Add the new loop to the pass manager queue. + LPM.insertLoopIntoQueue(NewOuter); + + for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); + I != E; ++I) + NewOuter->addBlockEntry(*I); + + // Now reset the header in L, which had been moved by + // SplitBlockPredecessors for the outer loop. + L->moveToHeader(Header); + + // Determine which blocks should stay in L and which should be moved out to + // the Outer loop now. + std::set<BasicBlock*> BlocksInL; + for (pred_iterator PI=pred_begin(Header), E = pred_end(Header); PI!=E; ++PI) { + BasicBlock *P = *PI; + if (DT->dominates(Header, P)) + AddBlockAndPredsToSet(P, Header, BlocksInL); + } + + // Scan all of the loop children of L, moving them to OuterLoop if they are + // not part of the inner loop. + const std::vector<Loop*> &SubLoops = L->getSubLoops(); + for (size_t I = 0; I != SubLoops.size(); ) + if (BlocksInL.count(SubLoops[I]->getHeader())) + ++I; // Loop remains in L + else + NewOuter->addChildLoop(L->removeChildLoop(SubLoops.begin() + I)); + + // Now that we know which blocks are in L and which need to be moved to + // OuterLoop, move any blocks that need it. + for (unsigned i = 0; i != L->getBlocks().size(); ++i) { + BasicBlock *BB = L->getBlocks()[i]; + if (!BlocksInL.count(BB)) { + // Move this block to the parent, updating the exit blocks sets + L->removeBlockFromLoop(BB); + if ((*LI)[BB] == L) + LI->changeLoopFor(BB, NewOuter); + --i; + } + } + + return NewOuter; +} + + + +/// InsertUniqueBackedgeBlock - This method is called when the specified loop +/// has more than one backedge in it. If this occurs, revector all of these +/// backedges to target a new basic block and have that block branch to the loop +/// header. This ensures that loops have exactly one backedge. +/// +BasicBlock * +LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) { + assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!"); + + // Get information about the loop + BasicBlock *Header = L->getHeader(); + Function *F = Header->getParent(); + + // Unique backedge insertion currently depends on having a preheader. + if (!Preheader) + return 0; + + // The header is not a landing pad; preheader insertion should ensure this. + assert(!Header->isLandingPad() && "Can't insert backedge to landing pad"); + + // Figure out which basic blocks contain back-edges to the loop header. + std::vector<BasicBlock*> BackedgeBlocks; + for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){ + BasicBlock *P = *I; + + // Indirectbr edges cannot be split, so we must fail if we find one. + if (isa<IndirectBrInst>(P->getTerminator())) + return 0; + + if (P != Preheader) BackedgeBlocks.push_back(P); + } + + // Create and insert the new backedge block... + BasicBlock *BEBlock = BasicBlock::Create(Header->getContext(), + Header->getName()+".backedge", F); + BranchInst *BETerminator = BranchInst::Create(Header, BEBlock); + + DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block " + << BEBlock->getName() << "\n"); + + // Move the new backedge block to right after the last backedge block. + Function::iterator InsertPos = BackedgeBlocks.back(); ++InsertPos; + F->getBasicBlockList().splice(InsertPos, F->getBasicBlockList(), BEBlock); + + // Now that the block has been inserted into the function, create PHI nodes in + // the backedge block which correspond to any PHI nodes in the header block. + for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { + PHINode *PN = cast<PHINode>(I); + PHINode *NewPN = PHINode::Create(PN->getType(), BackedgeBlocks.size(), + PN->getName()+".be", BETerminator); + if (AA) AA->copyValue(PN, NewPN); + + // Loop over the PHI node, moving all entries except the one for the + // preheader over to the new PHI node. + unsigned PreheaderIdx = ~0U; + bool HasUniqueIncomingValue = true; + Value *UniqueValue = 0; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + BasicBlock *IBB = PN->getIncomingBlock(i); + Value *IV = PN->getIncomingValue(i); + if (IBB == Preheader) { + PreheaderIdx = i; + } else { + NewPN->addIncoming(IV, IBB); + if (HasUniqueIncomingValue) { + if (UniqueValue == 0) + UniqueValue = IV; + else if (UniqueValue != IV) + HasUniqueIncomingValue = false; + } + } + } + + // Delete all of the incoming values from the old PN except the preheader's + assert(PreheaderIdx != ~0U && "PHI has no preheader entry??"); + if (PreheaderIdx != 0) { + PN->setIncomingValue(0, PN->getIncomingValue(PreheaderIdx)); + PN->setIncomingBlock(0, PN->getIncomingBlock(PreheaderIdx)); + } + // Nuke all entries except the zero'th. + for (unsigned i = 0, e = PN->getNumIncomingValues()-1; i != e; ++i) + PN->removeIncomingValue(e-i, false); + + // Finally, add the newly constructed PHI node as the entry for the BEBlock. + PN->addIncoming(NewPN, BEBlock); + + // As an optimization, if all incoming values in the new PhiNode (which is a + // subset of the incoming values of the old PHI node) have the same value, + // eliminate the PHI Node. + if (HasUniqueIncomingValue) { + NewPN->replaceAllUsesWith(UniqueValue); + if (AA) AA->deleteValue(NewPN); + BEBlock->getInstList().erase(NewPN); + } + } + + // Now that all of the PHI nodes have been inserted and adjusted, modify the + // backedge blocks to just to the BEBlock instead of the header. + for (unsigned i = 0, e = BackedgeBlocks.size(); i != e; ++i) { + TerminatorInst *TI = BackedgeBlocks[i]->getTerminator(); + for (unsigned Op = 0, e = TI->getNumSuccessors(); Op != e; ++Op) + if (TI->getSuccessor(Op) == Header) + TI->setSuccessor(Op, BEBlock); + } + + //===--- Update all analyses which we must preserve now -----------------===// + + // Update Loop Information - we know that this block is now in the current + // loop and all parent loops. + L->addBasicBlockToLoop(BEBlock, LI->getBase()); + + // Update dominator information + DT->splitBlock(BEBlock); + + return BEBlock; +} + +void LoopSimplify::verifyAnalysis() const { + // It used to be possible to just assert L->isLoopSimplifyForm(), however + // with the introduction of indirectbr, there are now cases where it's + // not possible to transform a loop as necessary. We can at least check + // that there is an indirectbr near any time there's trouble. + + // Indirectbr can interfere with preheader and unique backedge insertion. + if (!L->getLoopPreheader() || !L->getLoopLatch()) { + bool HasIndBrPred = false; + for (pred_iterator PI = pred_begin(L->getHeader()), + PE = pred_end(L->getHeader()); PI != PE; ++PI) + if (isa<IndirectBrInst>((*PI)->getTerminator())) { + HasIndBrPred = true; + break; + } + assert(HasIndBrPred && + "LoopSimplify has no excuse for missing loop header info!"); + (void)HasIndBrPred; + } + + // Indirectbr can interfere with exit block canonicalization. + if (!L->hasDedicatedExits()) { + bool HasIndBrExiting = false; + SmallVector<BasicBlock*, 8> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { + if (isa<IndirectBrInst>((ExitingBlocks[i])->getTerminator())) { + HasIndBrExiting = true; + break; + } + } + + assert(HasIndBrExiting && + "LoopSimplify has no excuse for missing exit block info!"); + (void)HasIndBrExiting; + } +} diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp new file mode 100644 index 000000000000..162807d03c62 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -0,0 +1,457 @@ +//===-- UnrollLoop.cpp - Loop unrolling utilities -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements some loop unrolling utilities. It does not define any +// actual pass or policy, but provides a single function to perform loop +// unrolling. +// +// The process of unrolling can produce extraneous basic blocks linked with +// unconditional branches. This will be corrected in the future. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "loop-unroll" +#include "llvm/Transforms/Utils/UnrollLoop.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopIterator.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/SimplifyIndVar.h" +using namespace llvm; + +// TODO: Should these be here or in LoopUnroll? +STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled"); +STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)"); + +/// RemapInstruction - Convert the instruction operands from referencing the +/// current values into those specified by VMap. +static inline void RemapInstruction(Instruction *I, + ValueToValueMapTy &VMap) { + for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) { + Value *Op = I->getOperand(op); + ValueToValueMapTy::iterator It = VMap.find(Op); + if (It != VMap.end()) + I->setOperand(op, It->second); + } + + if (PHINode *PN = dyn_cast<PHINode>(I)) { + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + ValueToValueMapTy::iterator It = VMap.find(PN->getIncomingBlock(i)); + if (It != VMap.end()) + PN->setIncomingBlock(i, cast<BasicBlock>(It->second)); + } + } +} + +/// FoldBlockIntoPredecessor - Folds a basic block into its predecessor if it +/// only has one predecessor, and that predecessor only has one successor. +/// The LoopInfo Analysis that is passed will be kept consistent. +/// Returns the new combined block. +static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, + LPPassManager *LPM) { + // Merge basic blocks into their predecessor if there is only one distinct + // pred, and if there is only one distinct successor of the predecessor, and + // if there are no PHI nodes. + BasicBlock *OnlyPred = BB->getSinglePredecessor(); + if (!OnlyPred) return 0; + + if (OnlyPred->getTerminator()->getNumSuccessors() != 1) + return 0; + + DEBUG(dbgs() << "Merging: " << *BB << "into: " << *OnlyPred); + + // Resolve any PHI nodes at the start of the block. They are all + // guaranteed to have exactly one entry if they exist, unless there are + // multiple duplicate (but guaranteed to be equal) entries for the + // incoming edges. This occurs when there are multiple edges from + // OnlyPred to OnlySucc. + FoldSingleEntryPHINodes(BB); + + // Delete the unconditional branch from the predecessor... + OnlyPred->getInstList().pop_back(); + + // Make all PHI nodes that referred to BB now refer to Pred as their + // source... + BB->replaceAllUsesWith(OnlyPred); + + // Move all definitions in the successor to the predecessor... + OnlyPred->getInstList().splice(OnlyPred->end(), BB->getInstList()); + + // OldName will be valid until erased. + StringRef OldName = BB->getName(); + + // Erase basic block from the function... + + // ScalarEvolution holds references to loop exit blocks. + if (LPM) { + if (ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>()) { + if (Loop *L = LI->getLoopFor(BB)) + SE->forgetLoop(L); + } + } + LI->removeBlock(BB); + + // Inherit predecessor's name if it exists... + if (!OldName.empty() && !OnlyPred->hasName()) + OnlyPred->setName(OldName); + + BB->eraseFromParent(); + + return OnlyPred; +} + +/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true +/// if unrolling was successful, or false if the loop was unmodified. Unrolling +/// can only fail when the loop's latch block is not terminated by a conditional +/// branch instruction. However, if the trip count (and multiple) are not known, +/// loop unrolling will mostly produce more code that is no faster. +/// +/// TripCount is generally defined as the number of times the loop header +/// executes. UnrollLoop relaxes the definition to permit early exits: here +/// TripCount is the iteration on which control exits LatchBlock if no early +/// exits were taken. Note that UnrollLoop assumes that the loop counter test +/// terminates LatchBlock in order to remove unnecesssary instances of the +/// test. In other words, control may exit the loop prior to TripCount +/// iterations via an early branch, but control may not exit the loop from the +/// LatchBlock's terminator prior to TripCount iterations. +/// +/// Similarly, TripMultiple divides the number of times that the LatchBlock may +/// execute without exiting the loop. +/// +/// The LoopInfo Analysis that is passed will be kept consistent. +/// +/// If a LoopPassManager is passed in, and the loop is fully removed, it will be +/// removed from the LoopPassManager as well. LPM can also be NULL. +/// +/// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are +/// available it must also preserve those analyses. +bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, + bool AllowRuntime, unsigned TripMultiple, + LoopInfo *LI, LPPassManager *LPM) { + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) { + DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n"); + return false; + } + + BasicBlock *LatchBlock = L->getLoopLatch(); + if (!LatchBlock) { + DEBUG(dbgs() << " Can't unroll; loop exit-block-insertion failed.\n"); + return false; + } + + // Loops with indirectbr cannot be cloned. + if (!L->isSafeToClone()) { + DEBUG(dbgs() << " Can't unroll; Loop body cannot be cloned.\n"); + return false; + } + + BasicBlock *Header = L->getHeader(); + BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator()); + + if (!BI || BI->isUnconditional()) { + // The loop-rotate pass can be helpful to avoid this in many cases. + DEBUG(dbgs() << + " Can't unroll; loop not terminated by a conditional branch.\n"); + return false; + } + + if (Header->hasAddressTaken()) { + // The loop-rotate pass can be helpful to avoid this in many cases. + DEBUG(dbgs() << + " Won't unroll loop: address of header block is taken.\n"); + return false; + } + + if (TripCount != 0) + DEBUG(dbgs() << " Trip Count = " << TripCount << "\n"); + if (TripMultiple != 1) + DEBUG(dbgs() << " Trip Multiple = " << TripMultiple << "\n"); + + // Effectively "DCE" unrolled iterations that are beyond the tripcount + // and will never be executed. + if (TripCount != 0 && Count > TripCount) + Count = TripCount; + + // Don't enter the unroll code if there is nothing to do. This way we don't + // need to support "partial unrolling by 1". + if (TripCount == 0 && Count < 2) + return false; + + assert(Count > 0); + assert(TripMultiple > 0); + assert(TripCount == 0 || TripCount % TripMultiple == 0); + + // Are we eliminating the loop control altogether? + bool CompletelyUnroll = Count == TripCount; + + // We assume a run-time trip count if the compiler cannot + // figure out the loop trip count and the unroll-runtime + // flag is specified. + bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime); + + if (RuntimeTripCount && !UnrollRuntimeLoopProlog(L, Count, LI, LPM)) + return false; + + // Notify ScalarEvolution that the loop will be substantially changed, + // if not outright eliminated. + if (LPM) { + ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>(); + if (SE) + SE->forgetLoop(L); + } + + // If we know the trip count, we know the multiple... + unsigned BreakoutTrip = 0; + if (TripCount != 0) { + BreakoutTrip = TripCount % Count; + TripMultiple = 0; + } else { + // Figure out what multiple to use. + BreakoutTrip = TripMultiple = + (unsigned)GreatestCommonDivisor64(Count, TripMultiple); + } + + if (CompletelyUnroll) { + DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName() + << " with trip count " << TripCount << "!\n"); + } else { + DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() + << " by " << Count); + if (TripMultiple == 0 || BreakoutTrip != TripMultiple) { + DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip); + } else if (TripMultiple != 1) { + DEBUG(dbgs() << " with " << TripMultiple << " trips per branch"); + } else if (RuntimeTripCount) { + DEBUG(dbgs() << " with run-time trip count"); + } + DEBUG(dbgs() << "!\n"); + } + + bool ContinueOnTrue = L->contains(BI->getSuccessor(0)); + BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue); + + // For the first iteration of the loop, we should use the precloned values for + // PHI nodes. Insert associations now. + ValueToValueMapTy LastValueMap; + std::vector<PHINode*> OrigPHINode; + for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { + OrigPHINode.push_back(cast<PHINode>(I)); + } + + std::vector<BasicBlock*> Headers; + std::vector<BasicBlock*> Latches; + Headers.push_back(Header); + Latches.push_back(LatchBlock); + + // The current on-the-fly SSA update requires blocks to be processed in + // reverse postorder so that LastValueMap contains the correct value at each + // exit. + LoopBlocksDFS DFS(L); + DFS.perform(LI); + + // Stash the DFS iterators before adding blocks to the loop. + LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO(); + LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO(); + + for (unsigned It = 1; It != Count; ++It) { + std::vector<BasicBlock*> NewBlocks; + + for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { + ValueToValueMapTy VMap; + BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It)); + Header->getParent()->getBasicBlockList().push_back(New); + + // Loop over all of the PHI nodes in the block, changing them to use the + // incoming values from the previous block. + if (*BB == Header) + for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { + PHINode *NewPHI = cast<PHINode>(VMap[OrigPHINode[i]]); + Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock); + if (Instruction *InValI = dyn_cast<Instruction>(InVal)) + if (It > 1 && L->contains(InValI)) + InVal = LastValueMap[InValI]; + VMap[OrigPHINode[i]] = InVal; + New->getInstList().erase(NewPHI); + } + + // Update our running map of newest clones + LastValueMap[*BB] = New; + for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end(); + VI != VE; ++VI) + LastValueMap[VI->first] = VI->second; + + L->addBasicBlockToLoop(New, LI->getBase()); + + // Add phi entries for newly created values to all exit blocks. + for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB); + SI != SE; ++SI) { + if (L->contains(*SI)) + continue; + for (BasicBlock::iterator BBI = (*SI)->begin(); + PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) { + Value *Incoming = phi->getIncomingValueForBlock(*BB); + ValueToValueMapTy::iterator It = LastValueMap.find(Incoming); + if (It != LastValueMap.end()) + Incoming = It->second; + phi->addIncoming(Incoming, New); + } + } + // Keep track of new headers and latches as we create them, so that + // we can insert the proper branches later. + if (*BB == Header) + Headers.push_back(New); + if (*BB == LatchBlock) + Latches.push_back(New); + + NewBlocks.push_back(New); + } + + // Remap all instructions in the most recent iteration + for (unsigned i = 0; i < NewBlocks.size(); ++i) + for (BasicBlock::iterator I = NewBlocks[i]->begin(), + E = NewBlocks[i]->end(); I != E; ++I) + ::RemapInstruction(I, LastValueMap); + } + + // Loop over the PHI nodes in the original block, setting incoming values. + for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { + PHINode *PN = OrigPHINode[i]; + if (CompletelyUnroll) { + PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader)); + Header->getInstList().erase(PN); + } + else if (Count > 1) { + Value *InVal = PN->removeIncomingValue(LatchBlock, false); + // If this value was defined in the loop, take the value defined by the + // last iteration of the loop. + if (Instruction *InValI = dyn_cast<Instruction>(InVal)) { + if (L->contains(InValI)) + InVal = LastValueMap[InVal]; + } + assert(Latches.back() == LastValueMap[LatchBlock] && "bad last latch"); + PN->addIncoming(InVal, Latches.back()); + } + } + + // Now that all the basic blocks for the unrolled iterations are in place, + // set up the branches to connect them. + for (unsigned i = 0, e = Latches.size(); i != e; ++i) { + // The original branch was replicated in each unrolled iteration. + BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator()); + + // The branch destination. + unsigned j = (i + 1) % e; + BasicBlock *Dest = Headers[j]; + bool NeedConditional = true; + + if (RuntimeTripCount && j != 0) { + NeedConditional = false; + } + + // For a complete unroll, make the last iteration end with a branch + // to the exit block. + if (CompletelyUnroll && j == 0) { + Dest = LoopExit; + NeedConditional = false; + } + + // If we know the trip count or a multiple of it, we can safely use an + // unconditional branch for some iterations. + if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) { + NeedConditional = false; + } + + if (NeedConditional) { + // Update the conditional branch's successor for the following + // iteration. + Term->setSuccessor(!ContinueOnTrue, Dest); + } else { + // Remove phi operands at this loop exit + if (Dest != LoopExit) { + BasicBlock *BB = Latches[i]; + for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); + SI != SE; ++SI) { + if (*SI == Headers[i]) + continue; + for (BasicBlock::iterator BBI = (*SI)->begin(); + PHINode *Phi = dyn_cast<PHINode>(BBI); ++BBI) { + Phi->removeIncomingValue(BB, false); + } + } + } + // Replace the conditional branch with an unconditional one. + BranchInst::Create(Dest, Term); + Term->eraseFromParent(); + } + } + + // Merge adjacent basic blocks, if possible. + for (unsigned i = 0, e = Latches.size(); i != e; ++i) { + BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator()); + if (Term->isUnconditional()) { + BasicBlock *Dest = Term->getSuccessor(0); + if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, LPM)) + std::replace(Latches.begin(), Latches.end(), Dest, Fold); + } + } + + if (LPM) { + // FIXME: Reconstruct dom info, because it is not preserved properly. + // Incrementally updating domtree after loop unrolling would be easy. + if (DominatorTree *DT = LPM->getAnalysisIfAvailable<DominatorTree>()) + DT->runOnFunction(*L->getHeader()->getParent()); + + // Simplify any new induction variables in the partially unrolled loop. + ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>(); + if (SE && !CompletelyUnroll) { + SmallVector<WeakVH, 16> DeadInsts; + simplifyLoopIVs(L, SE, LPM, DeadInsts); + + // Aggressively clean up dead instructions that simplifyLoopIVs already + // identified. Any remaining should be cleaned up below. + while (!DeadInsts.empty()) + if (Instruction *Inst = + dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val())) + RecursivelyDeleteTriviallyDeadInstructions(Inst); + } + } + // At this point, the code is well formed. We now do a quick sweep over the + // inserted code, doing constant propagation and dead code elimination as we + // go. + const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks(); + for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(), + BBE = NewLoopBlocks.end(); BB != BBE; ++BB) + for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ) { + Instruction *Inst = I++; + + if (isInstructionTriviallyDead(Inst)) + (*BB)->getInstList().erase(Inst); + else if (Value *V = SimplifyInstruction(Inst)) + if (LI->replacementPreservesLCSSAForm(Inst, V)) { + Inst->replaceAllUsesWith(V); + (*BB)->getInstList().erase(Inst); + } + } + + NumCompletelyUnrolled += CompletelyUnroll; + ++NumUnrolled; + // Remove the loop from the LoopPassManager if it's completely removed. + if (CompletelyUnroll && LPM != NULL) + LPM->deleteLoopFromQueue(L); + + return true; +} diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp new file mode 100644 index 000000000000..d801d5f2c2a4 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -0,0 +1,374 @@ +//===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements some loop unrolling utilities for loops with run-time +// trip counts. See LoopUnroll.cpp for unrolling loops with compile-time +// trip counts. +// +// The functions in this file are used to generate extra code when the +// run-time trip count modulo the unroll factor is not 0. When this is the +// case, we need to generate code to execute these 'left over' iterations. +// +// The current strategy generates an if-then-else sequence prior to the +// unrolled loop to execute the 'left over' iterations. Other strategies +// include generate a loop before or after the unrolled loop. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "loop-unroll" +#include "llvm/Transforms/Utils/UnrollLoop.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/LoopIterator.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include <algorithm> + +using namespace llvm; + +STATISTIC(NumRuntimeUnrolled, + "Number of loops unrolled with run-time trip counts"); + +/// Connect the unrolling prolog code to the original loop. +/// The unrolling prolog code contains code to execute the +/// 'extra' iterations if the run-time trip count modulo the +/// unroll count is non-zero. +/// +/// This function performs the following: +/// - Create PHI nodes at prolog end block to combine values +/// that exit the prolog code and jump around the prolog. +/// - Add a PHI operand to a PHI node at the loop exit block +/// for values that exit the prolog and go around the loop. +/// - Branch around the original loop if the trip count is less +/// than the unroll factor. +/// +static void ConnectProlog(Loop *L, Value *TripCount, unsigned Count, + BasicBlock *LastPrologBB, BasicBlock *PrologEnd, + BasicBlock *OrigPH, BasicBlock *NewPH, + ValueToValueMapTy &LVMap, Pass *P) { + BasicBlock *Latch = L->getLoopLatch(); + assert(Latch != 0 && "Loop must have a latch"); + + // Create a PHI node for each outgoing value from the original loop + // (which means it is an outgoing value from the prolog code too). + // The new PHI node is inserted in the prolog end basic block. + // The new PHI name is added as an operand of a PHI node in either + // the loop header or the loop exit block. + for (succ_iterator SBI = succ_begin(Latch), SBE = succ_end(Latch); + SBI != SBE; ++SBI) { + for (BasicBlock::iterator BBI = (*SBI)->begin(); + PHINode *PN = dyn_cast<PHINode>(BBI); ++BBI) { + + // Add a new PHI node to the prolog end block and add the + // appropriate incoming values. + PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName()+".unr", + PrologEnd->getTerminator()); + // Adding a value to the new PHI node from the original loop preheader. + // This is the value that skips all the prolog code. + if (L->contains(PN)) { + NewPN->addIncoming(PN->getIncomingValueForBlock(NewPH), OrigPH); + } else { + NewPN->addIncoming(Constant::getNullValue(PN->getType()), OrigPH); + } + + Value *V = PN->getIncomingValueForBlock(Latch); + if (Instruction *I = dyn_cast<Instruction>(V)) { + if (L->contains(I)) { + V = LVMap[I]; + } + } + // Adding a value to the new PHI node from the last prolog block + // that was created. + NewPN->addIncoming(V, LastPrologBB); + + // Update the existing PHI node operand with the value from the + // new PHI node. How this is done depends on if the existing + // PHI node is in the original loop block, or the exit block. + if (L->contains(PN)) { + PN->setIncomingValue(PN->getBasicBlockIndex(NewPH), NewPN); + } else { + PN->addIncoming(NewPN, PrologEnd); + } + } + } + + // Create a branch around the orignal loop, which is taken if the + // trip count is less than the unroll factor. + Instruction *InsertPt = PrologEnd->getTerminator(); + Instruction *BrLoopExit = + new ICmpInst(InsertPt, ICmpInst::ICMP_ULT, TripCount, + ConstantInt::get(TripCount->getType(), Count)); + BasicBlock *Exit = L->getUniqueExitBlock(); + assert(Exit != 0 && "Loop must have a single exit block only"); + // Split the exit to maintain loop canonicalization guarantees + SmallVector<BasicBlock*, 4> Preds(pred_begin(Exit), pred_end(Exit)); + if (!Exit->isLandingPad()) { + SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", P); + } else { + SmallVector<BasicBlock*, 2> NewBBs; + SplitLandingPadPredecessors(Exit, Preds, ".unr1-lcssa", ".unr2-lcssa", + P, NewBBs); + } + // Add the branch to the exit block (around the unrolled loop) + BranchInst::Create(Exit, NewPH, BrLoopExit, InsertPt); + InsertPt->eraseFromParent(); +} + +/// Create a clone of the blocks in a loop and connect them together. +/// This function doesn't create a clone of the loop structure. +/// +/// There are two value maps that are defined and used. VMap is +/// for the values in the current loop instance. LVMap contains +/// the values from the last loop instance. We need the LVMap values +/// to update the initial values for the current loop instance. +/// +static void CloneLoopBlocks(Loop *L, + bool FirstCopy, + BasicBlock *InsertTop, + BasicBlock *InsertBot, + std::vector<BasicBlock *> &NewBlocks, + LoopBlocksDFS &LoopBlocks, + ValueToValueMapTy &VMap, + ValueToValueMapTy &LVMap, + LoopInfo *LI) { + + BasicBlock *Preheader = L->getLoopPreheader(); + BasicBlock *Header = L->getHeader(); + BasicBlock *Latch = L->getLoopLatch(); + Function *F = Header->getParent(); + LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO(); + LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO(); + // For each block in the original loop, create a new copy, + // and update the value map with the newly created values. + for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { + BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".unr", F); + NewBlocks.push_back(NewBB); + + if (Loop *ParentLoop = L->getParentLoop()) + ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase()); + + VMap[*BB] = NewBB; + if (Header == *BB) { + // For the first block, add a CFG connection to this newly + // created block + InsertTop->getTerminator()->setSuccessor(0, NewBB); + + // Change the incoming values to the ones defined in the + // previously cloned loop. + for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { + PHINode *NewPHI = cast<PHINode>(VMap[I]); + if (FirstCopy) { + // We replace the first phi node with the value from the preheader + VMap[I] = NewPHI->getIncomingValueForBlock(Preheader); + NewBB->getInstList().erase(NewPHI); + } else { + // Update VMap with values from the previous block + unsigned idx = NewPHI->getBasicBlockIndex(Latch); + Value *InVal = NewPHI->getIncomingValue(idx); + if (Instruction *I = dyn_cast<Instruction>(InVal)) + if (L->contains(I)) + InVal = LVMap[InVal]; + NewPHI->setIncomingValue(idx, InVal); + NewPHI->setIncomingBlock(idx, InsertTop); + } + } + } + + if (Latch == *BB) { + VMap.erase((*BB)->getTerminator()); + NewBB->getTerminator()->eraseFromParent(); + BranchInst::Create(InsertBot, NewBB); + } + } + // LastValueMap is updated with the values for the current loop + // which are used the next time this function is called. + for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end(); + VI != VE; ++VI) { + LVMap[VI->first] = VI->second; + } +} + +/// Insert code in the prolog code when unrolling a loop with a +/// run-time trip-count. +/// +/// This method assumes that the loop unroll factor is total number +/// of loop bodes in the loop after unrolling. (Some folks refer +/// to the unroll factor as the number of *extra* copies added). +/// We assume also that the loop unroll factor is a power-of-two. So, after +/// unrolling the loop, the number of loop bodies executed is 2, +/// 4, 8, etc. Note - LLVM converts the if-then-sequence to a switch +/// instruction in SimplifyCFG.cpp. Then, the backend decides how code for +/// the switch instruction is generated. +/// +/// extraiters = tripcount % loopfactor +/// if (extraiters == 0) jump Loop: +/// if (extraiters == loopfactor) jump L1 +/// if (extraiters == loopfactor-1) jump L2 +/// ... +/// L1: LoopBody; +/// L2: LoopBody; +/// ... +/// if tripcount < loopfactor jump End +/// Loop: +/// ... +/// End: +/// +bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, + LPPassManager *LPM) { + // for now, only unroll loops that contain a single exit + if (!L->getExitingBlock()) + return false; + + // Make sure the loop is in canonical form, and there is a single + // exit block only. + if (!L->isLoopSimplifyForm() || L->getUniqueExitBlock() == 0) + return false; + + // Use Scalar Evolution to compute the trip count. This allows more + // loops to be unrolled than relying on induction var simplification + if (!LPM) + return false; + ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>(); + if (SE == 0) + return false; + + // Only unroll loops with a computable trip count and the trip count needs + // to be an int value (allowing a pointer type is a TODO item) + const SCEV *BECount = SE->getBackedgeTakenCount(L); + if (isa<SCEVCouldNotCompute>(BECount) || !BECount->getType()->isIntegerTy()) + return false; + + // Add 1 since the backedge count doesn't include the first loop iteration + const SCEV *TripCountSC = + SE->getAddExpr(BECount, SE->getConstant(BECount->getType(), 1)); + if (isa<SCEVCouldNotCompute>(TripCountSC)) + return false; + + // We only handle cases when the unroll factor is a power of 2. + // Count is the loop unroll factor, the number of extra copies added + 1. + if ((Count & (Count-1)) != 0) + return false; + + // If this loop is nested, then the loop unroller changes the code in + // parent loop, so the Scalar Evolution pass needs to be run again + if (Loop *ParentLoop = L->getParentLoop()) + SE->forgetLoop(ParentLoop); + + BasicBlock *PH = L->getLoopPreheader(); + BasicBlock *Header = L->getHeader(); + BasicBlock *Latch = L->getLoopLatch(); + // It helps to splits the original preheader twice, one for the end of the + // prolog code and one for a new loop preheader + BasicBlock *PEnd = SplitEdge(PH, Header, LPM->getAsPass()); + BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), LPM->getAsPass()); + BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator()); + + // Compute the number of extra iterations required, which is: + // extra iterations = run-time trip count % (loop unroll factor + 1) + SCEVExpander Expander(*SE, "loop-unroll"); + Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(), + PreHeaderBR); + Type *CountTy = TripCount->getType(); + BinaryOperator *ModVal = + BinaryOperator::CreateURem(TripCount, + ConstantInt::get(CountTy, Count), + "xtraiter"); + ModVal->insertBefore(PreHeaderBR); + + // Check if for no extra iterations, then jump to unrolled loop + Value *BranchVal = new ICmpInst(PreHeaderBR, + ICmpInst::ICMP_NE, ModVal, + ConstantInt::get(CountTy, 0), "lcmp"); + // Branch to either the extra iterations or the unrolled loop + // We will fix up the true branch label when adding loop body copies + BranchInst::Create(PEnd, PEnd, BranchVal, PreHeaderBR); + assert(PreHeaderBR->isUnconditional() && + PreHeaderBR->getSuccessor(0) == PEnd && + "CFG edges in Preheader are not correct"); + PreHeaderBR->eraseFromParent(); + + ValueToValueMapTy LVMap; + Function *F = Header->getParent(); + // These variables are used to update the CFG links in each iteration + BasicBlock *CompareBB = 0; + BasicBlock *LastLoopBB = PH; + // Get an ordered list of blocks in the loop to help with the ordering of the + // cloned blocks in the prolog code + LoopBlocksDFS LoopBlocks(L); + LoopBlocks.perform(LI); + + // + // For each extra loop iteration, create a copy of the loop's basic blocks + // and generate a condition that branches to the copy depending on the + // number of 'left over' iterations. + // + for (unsigned leftOverIters = Count-1; leftOverIters > 0; --leftOverIters) { + std::vector<BasicBlock*> NewBlocks; + ValueToValueMapTy VMap; + + // Clone all the basic blocks in the loop, but we don't clone the loop + // This function adds the appropriate CFG connections. + CloneLoopBlocks(L, (leftOverIters == Count-1), LastLoopBB, PEnd, NewBlocks, + LoopBlocks, VMap, LVMap, LI); + LastLoopBB = cast<BasicBlock>(VMap[Latch]); + + // Insert the cloned blocks into function just before the original loop + F->getBasicBlockList().splice(PEnd, F->getBasicBlockList(), + NewBlocks[0], F->end()); + + // Generate the code for the comparison which determines if the loop + // prolog code needs to be executed. + if (leftOverIters == Count-1) { + // There is no compare block for the fall-thru case when for the last + // left over iteration + CompareBB = NewBlocks[0]; + } else { + // Create a new block for the comparison + BasicBlock *NewBB = BasicBlock::Create(CompareBB->getContext(), "unr.cmp", + F, CompareBB); + if (Loop *ParentLoop = L->getParentLoop()) { + // Add the new block to the parent loop, if needed + ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase()); + } + + // The comparison w/ the extra iteration value and branch + Value *BranchVal = new ICmpInst(*NewBB, ICmpInst::ICMP_EQ, ModVal, + ConstantInt::get(CountTy, leftOverIters), + "un.tmp"); + // Branch to either the extra iterations or the unrolled loop + BranchInst::Create(NewBlocks[0], CompareBB, + BranchVal, NewBB); + CompareBB = NewBB; + PH->getTerminator()->setSuccessor(0, NewBB); + VMap[NewPH] = CompareBB; + } + + // Rewrite the cloned instruction operands to use the values + // created when the clone is created. + for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) { + for (BasicBlock::iterator I = NewBlocks[i]->begin(), + E = NewBlocks[i]->end(); I != E; ++I) { + RemapInstruction(I, VMap, + RF_NoModuleLevelChanges|RF_IgnoreMissingEntries); + } + } + } + + // Connect the prolog code to the original loop and update the + // PHI functions. + ConnectProlog(L, TripCount, Count, LastLoopBB, PEnd, PH, NewPH, LVMap, + LPM->getAsPass()); + NumRuntimeUnrolled++; + return true; +} diff --git a/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp new file mode 100644 index 000000000000..e017f501209d --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp @@ -0,0 +1,174 @@ +//===- LowerExpectIntrinsic.cpp - Lower expect intrinsic ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass lowers the 'expect' intrinsic to LLVM metadata. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "lower-expect-intrinsic" +#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Metadata.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include <vector> + +using namespace llvm; + +STATISTIC(IfHandled, "Number of 'expect' intrinsic instructions handled"); + +static cl::opt<uint32_t> +LikelyBranchWeight("likely-branch-weight", cl::Hidden, cl::init(64), + cl::desc("Weight of the branch likely to be taken (default = 64)")); +static cl::opt<uint32_t> +UnlikelyBranchWeight("unlikely-branch-weight", cl::Hidden, cl::init(4), + cl::desc("Weight of the branch unlikely to be taken (default = 4)")); + +namespace { + + class LowerExpectIntrinsic : public FunctionPass { + + bool HandleSwitchExpect(SwitchInst *SI); + + bool HandleIfExpect(BranchInst *BI); + + public: + static char ID; + LowerExpectIntrinsic() : FunctionPass(ID) { + initializeLowerExpectIntrinsicPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F); + }; +} + + +bool LowerExpectIntrinsic::HandleSwitchExpect(SwitchInst *SI) { + CallInst *CI = dyn_cast<CallInst>(SI->getCondition()); + if (!CI) + return false; + + Function *Fn = CI->getCalledFunction(); + if (!Fn || Fn->getIntrinsicID() != Intrinsic::expect) + return false; + + Value *ArgValue = CI->getArgOperand(0); + ConstantInt *ExpectedValue = dyn_cast<ConstantInt>(CI->getArgOperand(1)); + if (!ExpectedValue) + return false; + + SwitchInst::CaseIt Case = SI->findCaseValue(ExpectedValue); + unsigned n = SI->getNumCases(); // +1 for default case. + std::vector<uint32_t> Weights(n + 1); + + Weights[0] = Case == SI->case_default() ? LikelyBranchWeight + : UnlikelyBranchWeight; + for (unsigned i = 0; i != n; ++i) + Weights[i + 1] = i == Case.getCaseIndex() ? LikelyBranchWeight + : UnlikelyBranchWeight; + + SI->setMetadata(LLVMContext::MD_prof, + MDBuilder(CI->getContext()).createBranchWeights(Weights)); + + SI->setCondition(ArgValue); + return true; +} + + +bool LowerExpectIntrinsic::HandleIfExpect(BranchInst *BI) { + if (BI->isUnconditional()) + return false; + + // Handle non-optimized IR code like: + // %expval = call i64 @llvm.expect.i64.i64(i64 %conv1, i64 1) + // %tobool = icmp ne i64 %expval, 0 + // br i1 %tobool, label %if.then, label %if.end + + ICmpInst *CmpI = dyn_cast<ICmpInst>(BI->getCondition()); + if (!CmpI || CmpI->getPredicate() != CmpInst::ICMP_NE) + return false; + + CallInst *CI = dyn_cast<CallInst>(CmpI->getOperand(0)); + if (!CI) + return false; + + Function *Fn = CI->getCalledFunction(); + if (!Fn || Fn->getIntrinsicID() != Intrinsic::expect) + return false; + + Value *ArgValue = CI->getArgOperand(0); + ConstantInt *ExpectedValue = dyn_cast<ConstantInt>(CI->getArgOperand(1)); + if (!ExpectedValue) + return false; + + MDBuilder MDB(CI->getContext()); + MDNode *Node; + + // If expect value is equal to 1 it means that we are more likely to take + // branch 0, in other case more likely is branch 1. + if (ExpectedValue->isOne()) + Node = MDB.createBranchWeights(LikelyBranchWeight, UnlikelyBranchWeight); + else + Node = MDB.createBranchWeights(UnlikelyBranchWeight, LikelyBranchWeight); + + BI->setMetadata(LLVMContext::MD_prof, Node); + + CmpI->setOperand(0, ArgValue); + return true; +} + + +bool LowerExpectIntrinsic::runOnFunction(Function &F) { + for (Function::iterator I = F.begin(), E = F.end(); I != E;) { + BasicBlock *BB = I++; + + // Create "block_weights" metadata. + if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) { + if (HandleIfExpect(BI)) + IfHandled++; + } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) { + if (HandleSwitchExpect(SI)) + IfHandled++; + } + + // remove llvm.expect intrinsics. + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); + BI != BE; ) { + CallInst *CI = dyn_cast<CallInst>(BI++); + if (!CI) + continue; + + Function *Fn = CI->getCalledFunction(); + if (Fn && Fn->getIntrinsicID() == Intrinsic::expect) { + Value *Exp = CI->getArgOperand(0); + CI->replaceAllUsesWith(Exp); + CI->eraseFromParent(); + } + } + } + + return false; +} + + +char LowerExpectIntrinsic::ID = 0; +INITIALIZE_PASS(LowerExpectIntrinsic, "lower-expect", "Lower 'expect' " + "Intrinsics", false, false) + +FunctionPass *llvm::createLowerExpectIntrinsicPass() { + return new LowerExpectIntrinsic(); +} diff --git a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp new file mode 100644 index 000000000000..9799a30f6a0f --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp @@ -0,0 +1,579 @@ +//===- LowerInvoke.cpp - Eliminate Invoke & Unwind instructions -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This transformation is designed for use by code generators which do not yet +// support stack unwinding. This pass supports two models of exception handling +// lowering, the 'cheap' support and the 'expensive' support. +// +// 'Cheap' exception handling support gives the program the ability to execute +// any program which does not "throw an exception", by turning 'invoke' +// instructions into calls and by turning 'unwind' instructions into calls to +// abort(). If the program does dynamically use the unwind instruction, the +// program will print a message then abort. +// +// 'Expensive' exception handling support gives the full exception handling +// support to the program at the cost of making the 'invoke' instruction +// really expensive. It basically inserts setjmp/longjmp calls to emulate the +// exception handling as necessary. +// +// Because the 'expensive' support slows down programs a lot, and EH is only +// used for a subset of the programs, it must be specifically enabled by an +// option. +// +// Note that after this pass runs the CFG is not entirely accurate (exceptional +// control flow edges are not correct anymore) so only very simple things should +// be done after the lowerinvoke pass has run (like generation of native code). +// This should not be used as a general purpose "my LLVM-to-LLVM pass doesn't +// support the invoke instruction yet" lowering pass. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "lowerinvoke" +#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include <csetjmp> +#include <set> +using namespace llvm; + +STATISTIC(NumInvokes, "Number of invokes replaced"); +STATISTIC(NumSpilled, "Number of registers live across unwind edges"); + +static cl::opt<bool> ExpensiveEHSupport("enable-correct-eh-support", + cl::desc("Make the -lowerinvoke pass insert expensive, but correct, EH code")); + +namespace { + class LowerInvoke : public FunctionPass { + const TargetMachine *TM; + + // Used for both models. + Constant *AbortFn; + + // Used for expensive EH support. + StructType *JBLinkTy; + GlobalVariable *JBListHead; + Constant *SetJmpFn, *LongJmpFn, *StackSaveFn, *StackRestoreFn; + bool useExpensiveEHSupport; + + public: + static char ID; // Pass identification, replacement for typeid + explicit LowerInvoke(const TargetMachine *TM = 0, + bool useExpensiveEHSupport = ExpensiveEHSupport) + : FunctionPass(ID), TM(TM), + useExpensiveEHSupport(useExpensiveEHSupport) { + initializeLowerInvokePass(*PassRegistry::getPassRegistry()); + } + bool doInitialization(Module &M); + bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + // This is a cluster of orthogonal Transforms + AU.addPreserved("mem2reg"); + AU.addPreservedID(LowerSwitchID); + } + + private: + bool insertCheapEHSupport(Function &F); + void splitLiveRangesLiveAcrossInvokes(SmallVectorImpl<InvokeInst*>&Invokes); + void rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo, + AllocaInst *InvokeNum, AllocaInst *StackPtr, + SwitchInst *CatchSwitch); + bool insertExpensiveEHSupport(Function &F); + }; +} + +char LowerInvoke::ID = 0; +INITIALIZE_PASS(LowerInvoke, "lowerinvoke", + "Lower invoke and unwind, for unwindless code generators", + false, false) + +char &llvm::LowerInvokePassID = LowerInvoke::ID; + +// Public Interface To the LowerInvoke pass. +FunctionPass *llvm::createLowerInvokePass(const TargetMachine *TM, + bool useExpensiveEHSupport) { + return new LowerInvoke(TM, useExpensiveEHSupport || ExpensiveEHSupport); +} + +// doInitialization - Make sure that there is a prototype for abort in the +// current module. +bool LowerInvoke::doInitialization(Module &M) { + Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext()); + if (useExpensiveEHSupport) { + // Insert a type for the linked list of jump buffers. + const TargetLowering *TLI = TM ? TM->getTargetLowering() : 0; + unsigned JBSize = TLI ? TLI->getJumpBufSize() : 0; + JBSize = JBSize ? JBSize : 200; + Type *JmpBufTy = ArrayType::get(VoidPtrTy, JBSize); + + JBLinkTy = StructType::create(M.getContext(), "llvm.sjljeh.jmpbufty"); + Type *Elts[] = { JmpBufTy, PointerType::getUnqual(JBLinkTy) }; + JBLinkTy->setBody(Elts); + + Type *PtrJBList = PointerType::getUnqual(JBLinkTy); + + // Now that we've done that, insert the jmpbuf list head global, unless it + // already exists. + if (!(JBListHead = M.getGlobalVariable("llvm.sjljeh.jblist", PtrJBList))) { + JBListHead = new GlobalVariable(M, PtrJBList, false, + GlobalValue::LinkOnceAnyLinkage, + Constant::getNullValue(PtrJBList), + "llvm.sjljeh.jblist"); + } + +// VisualStudio defines setjmp as _setjmp +#if defined(_MSC_VER) && defined(setjmp) && \ + !defined(setjmp_undefined_for_msvc) +# pragma push_macro("setjmp") +# undef setjmp +# define setjmp_undefined_for_msvc +#endif + + SetJmpFn = Intrinsic::getDeclaration(&M, Intrinsic::setjmp); + +#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc) + // let's return it to _setjmp state +# pragma pop_macro("setjmp") +# undef setjmp_undefined_for_msvc +#endif + + LongJmpFn = Intrinsic::getDeclaration(&M, Intrinsic::longjmp); + StackSaveFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave); + StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore); + } + + // We need the 'write' and 'abort' functions for both models. + AbortFn = M.getOrInsertFunction("abort", Type::getVoidTy(M.getContext()), + (Type *)0); + return true; +} + +bool LowerInvoke::insertCheapEHSupport(Function &F) { + bool Changed = false; + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) { + SmallVector<Value*,16> CallArgs(II->op_begin(), II->op_end() - 3); + // Insert a normal call instruction... + CallInst *NewCall = CallInst::Create(II->getCalledValue(), + CallArgs, "", II); + NewCall->takeName(II); + NewCall->setCallingConv(II->getCallingConv()); + NewCall->setAttributes(II->getAttributes()); + NewCall->setDebugLoc(II->getDebugLoc()); + II->replaceAllUsesWith(NewCall); + + // Insert an unconditional branch to the normal destination. + BranchInst::Create(II->getNormalDest(), II); + + // Remove any PHI node entries from the exception destination. + II->getUnwindDest()->removePredecessor(BB); + + // Remove the invoke instruction now. + BB->getInstList().erase(II); + + ++NumInvokes; Changed = true; + } + return Changed; +} + +/// rewriteExpensiveInvoke - Insert code and hack the function to replace the +/// specified invoke instruction with a call. +void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo, + AllocaInst *InvokeNum, + AllocaInst *StackPtr, + SwitchInst *CatchSwitch) { + ConstantInt *InvokeNoC = ConstantInt::get(Type::getInt32Ty(II->getContext()), + InvokeNo); + + // If the unwind edge has phi nodes, split the edge. + if (isa<PHINode>(II->getUnwindDest()->begin())) { + SplitCriticalEdge(II, 1, this); + + // If there are any phi nodes left, they must have a single predecessor. + while (PHINode *PN = dyn_cast<PHINode>(II->getUnwindDest()->begin())) { + PN->replaceAllUsesWith(PN->getIncomingValue(0)); + PN->eraseFromParent(); + } + } + + // Insert a store of the invoke num before the invoke and store zero into the + // location afterward. + new StoreInst(InvokeNoC, InvokeNum, true, II); // volatile + + // Insert a store of the stack ptr before the invoke, so we can restore it + // later in the exception case. + CallInst* StackSaveRet = CallInst::Create(StackSaveFn, "ssret", II); + new StoreInst(StackSaveRet, StackPtr, true, II); // volatile + + BasicBlock::iterator NI = II->getNormalDest()->getFirstInsertionPt(); + // nonvolatile. + new StoreInst(Constant::getNullValue(Type::getInt32Ty(II->getContext())), + InvokeNum, false, NI); + + Instruction* StackPtrLoad = + new LoadInst(StackPtr, "stackptr.restore", true, + II->getUnwindDest()->getFirstInsertionPt()); + CallInst::Create(StackRestoreFn, StackPtrLoad, "")->insertAfter(StackPtrLoad); + + // Add a switch case to our unwind block. + CatchSwitch->addCase(InvokeNoC, II->getUnwindDest()); + + // Insert a normal call instruction. + SmallVector<Value*,16> CallArgs(II->op_begin(), II->op_end() - 3); + CallInst *NewCall = CallInst::Create(II->getCalledValue(), + CallArgs, "", II); + NewCall->takeName(II); + NewCall->setCallingConv(II->getCallingConv()); + NewCall->setAttributes(II->getAttributes()); + NewCall->setDebugLoc(II->getDebugLoc()); + II->replaceAllUsesWith(NewCall); + + // Replace the invoke with an uncond branch. + BranchInst::Create(II->getNormalDest(), NewCall->getParent()); + II->eraseFromParent(); +} + +/// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until +/// we reach blocks we've already seen. +static void MarkBlocksLiveIn(BasicBlock *BB, std::set<BasicBlock*> &LiveBBs) { + if (!LiveBBs.insert(BB).second) return; // already been here. + + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + MarkBlocksLiveIn(*PI, LiveBBs); +} + +// First thing we need to do is scan the whole function for values that are +// live across unwind edges. Each value that is live across an unwind edge +// we spill into a stack location, guaranteeing that there is nothing live +// across the unwind edge. This process also splits all critical edges +// coming out of invoke's. +void LowerInvoke:: +splitLiveRangesLiveAcrossInvokes(SmallVectorImpl<InvokeInst*> &Invokes) { + // First step, split all critical edges from invoke instructions. + for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { + InvokeInst *II = Invokes[i]; + SplitCriticalEdge(II, 0, this); + SplitCriticalEdge(II, 1, this); + assert(!isa<PHINode>(II->getNormalDest()) && + !isa<PHINode>(II->getUnwindDest()) && + "critical edge splitting left single entry phi nodes?"); + } + + Function *F = Invokes.back()->getParent()->getParent(); + + // To avoid having to handle incoming arguments specially, we lower each arg + // to a copy instruction in the entry block. This ensures that the argument + // value itself cannot be live across the entry block. + BasicBlock::iterator AfterAllocaInsertPt = F->begin()->begin(); + while (isa<AllocaInst>(AfterAllocaInsertPt) && + isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsertPt)->getArraySize())) + ++AfterAllocaInsertPt; + for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); + AI != E; ++AI) { + Type *Ty = AI->getType(); + // Aggregate types can't be cast, but are legal argument types, so we have + // to handle them differently. We use an extract/insert pair as a + // lightweight method to achieve the same goal. + if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) { + Instruction *EI = ExtractValueInst::Create(AI, 0, "",AfterAllocaInsertPt); + Instruction *NI = InsertValueInst::Create(AI, EI, 0); + NI->insertAfter(EI); + AI->replaceAllUsesWith(NI); + // Set the operand of the instructions back to the AllocaInst. + EI->setOperand(0, AI); + NI->setOperand(0, AI); + } else { + // This is always a no-op cast because we're casting AI to AI->getType() + // so src and destination types are identical. BitCast is the only + // possibility. + CastInst *NC = new BitCastInst( + AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt); + AI->replaceAllUsesWith(NC); + // Set the operand of the cast instruction back to the AllocaInst. + // Normally it's forbidden to replace a CastInst's operand because it + // could cause the opcode to reflect an illegal conversion. However, + // we're replacing it here with the same value it was constructed with. + // We do this because the above replaceAllUsesWith() clobbered the + // operand, but we want this one to remain. + NC->setOperand(0, AI); + } + } + + // Finally, scan the code looking for instructions with bad live ranges. + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) + for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) { + // Ignore obvious cases we don't have to handle. In particular, most + // instructions either have no uses or only have a single use inside the + // current block. Ignore them quickly. + Instruction *Inst = II; + if (Inst->use_empty()) continue; + if (Inst->hasOneUse() && + cast<Instruction>(Inst->use_back())->getParent() == BB && + !isa<PHINode>(Inst->use_back())) continue; + + // If this is an alloca in the entry block, it's not a real register + // value. + if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst)) + if (isa<ConstantInt>(AI->getArraySize()) && BB == F->begin()) + continue; + + // Avoid iterator invalidation by copying users to a temporary vector. + SmallVector<Instruction*,16> Users; + for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end(); + UI != E; ++UI) { + Instruction *User = cast<Instruction>(*UI); + if (User->getParent() != BB || isa<PHINode>(User)) + Users.push_back(User); + } + + // Scan all of the uses and see if the live range is live across an unwind + // edge. If we find a use live across an invoke edge, create an alloca + // and spill the value. + + // Find all of the blocks that this value is live in. + std::set<BasicBlock*> LiveBBs; + LiveBBs.insert(Inst->getParent()); + while (!Users.empty()) { + Instruction *U = Users.back(); + Users.pop_back(); + + if (!isa<PHINode>(U)) { + MarkBlocksLiveIn(U->getParent(), LiveBBs); + } else { + // Uses for a PHI node occur in their predecessor block. + PHINode *PN = cast<PHINode>(U); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingValue(i) == Inst) + MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs); + } + } + + // Now that we know all of the blocks that this thing is live in, see if + // it includes any of the unwind locations. + bool NeedsSpill = false; + for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { + BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest(); + if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) { + NeedsSpill = true; + } + } + + // If we decided we need a spill, do it. + if (NeedsSpill) { + ++NumSpilled; + DemoteRegToStack(*Inst, true); + } + } +} + +bool LowerInvoke::insertExpensiveEHSupport(Function &F) { + SmallVector<ReturnInst*,16> Returns; + SmallVector<InvokeInst*,16> Invokes; + UnreachableInst* UnreachablePlaceholder = 0; + + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { + // Remember all return instructions in case we insert an invoke into this + // function. + Returns.push_back(RI); + } else if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) { + Invokes.push_back(II); + } + + if (Invokes.empty()) return false; + + NumInvokes += Invokes.size(); + + // TODO: This is not an optimal way to do this. In particular, this always + // inserts setjmp calls into the entries of functions with invoke instructions + // even though there are possibly paths through the function that do not + // execute any invokes. In particular, for functions with early exits, e.g. + // the 'addMove' method in hexxagon, it would be nice to not have to do the + // setjmp stuff on the early exit path. This requires a bit of dataflow, but + // would not be too hard to do. + + // If we have an invoke instruction, insert a setjmp that dominates all + // invokes. After the setjmp, use a cond branch that goes to the original + // code path on zero, and to a designated 'catch' block of nonzero. + Value *OldJmpBufPtr = 0; + if (!Invokes.empty()) { + // First thing we need to do is scan the whole function for values that are + // live across unwind edges. Each value that is live across an unwind edge + // we spill into a stack location, guaranteeing that there is nothing live + // across the unwind edge. This process also splits all critical edges + // coming out of invoke's. + splitLiveRangesLiveAcrossInvokes(Invokes); + + BasicBlock *EntryBB = F.begin(); + + // Create an alloca for the incoming jump buffer ptr and the new jump buffer + // that needs to be restored on all exits from the function. This is an + // alloca because the value needs to be live across invokes. + const TargetLowering *TLI = TM ? TM->getTargetLowering() : 0; + unsigned Align = TLI ? TLI->getJumpBufAlignment() : 0; + AllocaInst *JmpBuf = + new AllocaInst(JBLinkTy, 0, Align, + "jblink", F.begin()->begin()); + + Value *Idx[] = { Constant::getNullValue(Type::getInt32Ty(F.getContext())), + ConstantInt::get(Type::getInt32Ty(F.getContext()), 1) }; + OldJmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx, "OldBuf", + EntryBB->getTerminator()); + + // Copy the JBListHead to the alloca. + Value *OldBuf = new LoadInst(JBListHead, "oldjmpbufptr", true, + EntryBB->getTerminator()); + new StoreInst(OldBuf, OldJmpBufPtr, true, EntryBB->getTerminator()); + + // Add the new jumpbuf to the list. + new StoreInst(JmpBuf, JBListHead, true, EntryBB->getTerminator()); + + // Create the catch block. The catch block is basically a big switch + // statement that goes to all of the invoke catch blocks. + BasicBlock *CatchBB = + BasicBlock::Create(F.getContext(), "setjmp.catch", &F); + + // Create an alloca which keeps track of the stack pointer before every + // invoke, this allows us to properly restore the stack pointer after + // long jumping. + AllocaInst *StackPtr = new AllocaInst(Type::getInt8PtrTy(F.getContext()), 0, + "stackptr", EntryBB->begin()); + + // Create an alloca which keeps track of which invoke is currently + // executing. For normal calls it contains zero. + AllocaInst *InvokeNum = new AllocaInst(Type::getInt32Ty(F.getContext()), 0, + "invokenum",EntryBB->begin()); + new StoreInst(ConstantInt::get(Type::getInt32Ty(F.getContext()), 0), + InvokeNum, true, EntryBB->getTerminator()); + + // Insert a load in the Catch block, and a switch on its value. By default, + // we go to a block that just does an unwind (which is the correct action + // for a standard call). We insert an unreachable instruction here and + // modify the block to jump to the correct unwinding pad later. + BasicBlock *UnwindBB = BasicBlock::Create(F.getContext(), "unwindbb", &F); + UnreachablePlaceholder = new UnreachableInst(F.getContext(), UnwindBB); + + Value *CatchLoad = new LoadInst(InvokeNum, "invoke.num", true, CatchBB); + SwitchInst *CatchSwitch = + SwitchInst::Create(CatchLoad, UnwindBB, Invokes.size(), CatchBB); + + // Now that things are set up, insert the setjmp call itself. + + // Split the entry block to insert the conditional branch for the setjmp. + BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(), + "setjmp.cont"); + + Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 0); + Value *JmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx, "TheJmpBuf", + EntryBB->getTerminator()); + JmpBufPtr = new BitCastInst(JmpBufPtr, + Type::getInt8PtrTy(F.getContext()), + "tmp", EntryBB->getTerminator()); + Value *SJRet = CallInst::Create(SetJmpFn, JmpBufPtr, "sjret", + EntryBB->getTerminator()); + + // Compare the return value to zero. + Value *IsNormal = new ICmpInst(EntryBB->getTerminator(), + ICmpInst::ICMP_EQ, SJRet, + Constant::getNullValue(SJRet->getType()), + "notunwind"); + // Nuke the uncond branch. + EntryBB->getTerminator()->eraseFromParent(); + + // Put in a new condbranch in its place. + BranchInst::Create(ContBlock, CatchBB, IsNormal, EntryBB); + + // At this point, we are all set up, rewrite each invoke instruction. + for (unsigned i = 0, e = Invokes.size(); i != e; ++i) + rewriteExpensiveInvoke(Invokes[i], i+1, InvokeNum, StackPtr, CatchSwitch); + } + + // We know that there is at least one unwind. + + // Create three new blocks, the block to load the jmpbuf ptr and compare + // against null, the block to do the longjmp, and the error block for if it + // is null. Add them at the end of the function because they are not hot. + BasicBlock *UnwindHandler = BasicBlock::Create(F.getContext(), + "dounwind", &F); + BasicBlock *UnwindBlock = BasicBlock::Create(F.getContext(), "unwind", &F); + BasicBlock *TermBlock = BasicBlock::Create(F.getContext(), "unwinderror", &F); + + // If this function contains an invoke, restore the old jumpbuf ptr. + Value *BufPtr; + if (OldJmpBufPtr) { + // Before the return, insert a copy from the saved value to the new value. + BufPtr = new LoadInst(OldJmpBufPtr, "oldjmpbufptr", UnwindHandler); + new StoreInst(BufPtr, JBListHead, UnwindHandler); + } else { + BufPtr = new LoadInst(JBListHead, "ehlist", UnwindHandler); + } + + // Load the JBList, if it's null, then there was no catch! + Value *NotNull = new ICmpInst(*UnwindHandler, ICmpInst::ICMP_NE, BufPtr, + Constant::getNullValue(BufPtr->getType()), + "notnull"); + BranchInst::Create(UnwindBlock, TermBlock, NotNull, UnwindHandler); + + // Create the block to do the longjmp. + // Get a pointer to the jmpbuf and longjmp. + Value *Idx[] = { Constant::getNullValue(Type::getInt32Ty(F.getContext())), + ConstantInt::get(Type::getInt32Ty(F.getContext()), 0) }; + Idx[0] = GetElementPtrInst::Create(BufPtr, Idx, "JmpBuf", UnwindBlock); + Idx[0] = new BitCastInst(Idx[0], + Type::getInt8PtrTy(F.getContext()), + "tmp", UnwindBlock); + Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1); + CallInst::Create(LongJmpFn, Idx, "", UnwindBlock); + new UnreachableInst(F.getContext(), UnwindBlock); + + // Set up the term block ("throw without a catch"). + new UnreachableInst(F.getContext(), TermBlock); + + // Insert a call to abort() + CallInst::Create(AbortFn, "", + TermBlock->getTerminator())->setTailCall(); + + // Replace the inserted unreachable with a branch to the unwind handler. + if (UnreachablePlaceholder) { + BranchInst::Create(UnwindHandler, UnreachablePlaceholder); + UnreachablePlaceholder->eraseFromParent(); + } + + // Finally, for any returns from this function, if this function contains an + // invoke, restore the old jmpbuf pointer to its input value. + if (OldJmpBufPtr) { + for (unsigned i = 0, e = Returns.size(); i != e; ++i) { + ReturnInst *R = Returns[i]; + + // Before the return, insert a copy from the saved value to the new value. + Value *OldBuf = new LoadInst(OldJmpBufPtr, "oldjmpbufptr", true, R); + new StoreInst(OldBuf, JBListHead, true, R); + } + } + + return true; +} + +bool LowerInvoke::runOnFunction(Function &F) { + if (useExpensiveEHSupport) + return insertExpensiveEHSupport(F); + else + return insertCheapEHSupport(F); +} diff --git a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp new file mode 100644 index 000000000000..2d2a8a54a0f2 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp @@ -0,0 +1,323 @@ +//===- LowerSwitch.cpp - Eliminate Switch instructions --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The LowerSwitch transformation rewrites switch instructions with a sequence +// of branches, which allows targets to get away with not implementing the +// switch instruction until it is convenient. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Pass.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" +#include <algorithm> +using namespace llvm; + +namespace { + /// LowerSwitch Pass - Replace all SwitchInst instructions with chained branch + /// instructions. + class LowerSwitch : public FunctionPass { + public: + static char ID; // Pass identification, replacement for typeid + LowerSwitch() : FunctionPass(ID) { + initializeLowerSwitchPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + // This is a cluster of orthogonal Transforms + AU.addPreserved<UnifyFunctionExitNodes>(); + AU.addPreserved("mem2reg"); + AU.addPreservedID(LowerInvokePassID); + } + + struct CaseRange { + Constant* Low; + Constant* High; + BasicBlock* BB; + + CaseRange(Constant *low = 0, Constant *high = 0, BasicBlock *bb = 0) : + Low(low), High(high), BB(bb) { } + }; + + typedef std::vector<CaseRange> CaseVector; + typedef std::vector<CaseRange>::iterator CaseItr; + private: + void processSwitchInst(SwitchInst *SI); + + BasicBlock* switchConvert(CaseItr Begin, CaseItr End, Value* Val, + BasicBlock* OrigBlock, BasicBlock* Default); + BasicBlock* newLeafBlock(CaseRange& Leaf, Value* Val, + BasicBlock* OrigBlock, BasicBlock* Default); + unsigned Clusterify(CaseVector& Cases, SwitchInst *SI); + }; + + /// The comparison function for sorting the switch case values in the vector. + /// WARNING: Case ranges should be disjoint! + struct CaseCmp { + bool operator () (const LowerSwitch::CaseRange& C1, + const LowerSwitch::CaseRange& C2) { + + const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low); + const ConstantInt* CI2 = cast<const ConstantInt>(C2.High); + return CI1->getValue().slt(CI2->getValue()); + } + }; +} + +char LowerSwitch::ID = 0; +INITIALIZE_PASS(LowerSwitch, "lowerswitch", + "Lower SwitchInst's to branches", false, false) + +// Publicly exposed interface to pass... +char &llvm::LowerSwitchID = LowerSwitch::ID; +// createLowerSwitchPass - Interface to this file... +FunctionPass *llvm::createLowerSwitchPass() { + return new LowerSwitch(); +} + +bool LowerSwitch::runOnFunction(Function &F) { + bool Changed = false; + + for (Function::iterator I = F.begin(), E = F.end(); I != E; ) { + BasicBlock *Cur = I++; // Advance over block so we don't traverse new blocks + + if (SwitchInst *SI = dyn_cast<SwitchInst>(Cur->getTerminator())) { + Changed = true; + processSwitchInst(SI); + } + } + + return Changed; +} + +// operator<< - Used for debugging purposes. +// +static raw_ostream& operator<<(raw_ostream &O, + const LowerSwitch::CaseVector &C) + LLVM_ATTRIBUTE_USED; +static raw_ostream& operator<<(raw_ostream &O, + const LowerSwitch::CaseVector &C) { + O << "["; + + for (LowerSwitch::CaseVector::const_iterator B = C.begin(), + E = C.end(); B != E; ) { + O << *B->Low << " -" << *B->High; + if (++B != E) O << ", "; + } + + return O << "]"; +} + +// switchConvert - Convert the switch statement into a binary lookup of +// the case values. The function recursively builds this tree. +// +BasicBlock* LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, + Value* Val, BasicBlock* OrigBlock, + BasicBlock* Default) +{ + unsigned Size = End - Begin; + + if (Size == 1) + return newLeafBlock(*Begin, Val, OrigBlock, Default); + + unsigned Mid = Size / 2; + std::vector<CaseRange> LHS(Begin, Begin + Mid); + DEBUG(dbgs() << "LHS: " << LHS << "\n"); + std::vector<CaseRange> RHS(Begin + Mid, End); + DEBUG(dbgs() << "RHS: " << RHS << "\n"); + + CaseRange& Pivot = *(Begin + Mid); + DEBUG(dbgs() << "Pivot ==> " + << cast<ConstantInt>(Pivot.Low)->getValue() << " -" + << cast<ConstantInt>(Pivot.High)->getValue() << "\n"); + + BasicBlock* LBranch = switchConvert(LHS.begin(), LHS.end(), Val, + OrigBlock, Default); + BasicBlock* RBranch = switchConvert(RHS.begin(), RHS.end(), Val, + OrigBlock, Default); + + // Create a new node that checks if the value is < pivot. Go to the + // left branch if it is and right branch if not. + Function* F = OrigBlock->getParent(); + BasicBlock* NewNode = BasicBlock::Create(Val->getContext(), "NodeBlock"); + Function::iterator FI = OrigBlock; + F->getBasicBlockList().insert(++FI, NewNode); + + ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_SLT, + Val, Pivot.Low, "Pivot"); + NewNode->getInstList().push_back(Comp); + BranchInst::Create(LBranch, RBranch, Comp, NewNode); + return NewNode; +} + +// newLeafBlock - Create a new leaf block for the binary lookup tree. It +// checks if the switch's value == the case's value. If not, then it +// jumps to the default branch. At this point in the tree, the value +// can't be another valid case value, so the jump to the "default" branch +// is warranted. +// +BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val, + BasicBlock* OrigBlock, + BasicBlock* Default) +{ + Function* F = OrigBlock->getParent(); + BasicBlock* NewLeaf = BasicBlock::Create(Val->getContext(), "LeafBlock"); + Function::iterator FI = OrigBlock; + F->getBasicBlockList().insert(++FI, NewLeaf); + + // Emit comparison + ICmpInst* Comp = NULL; + if (Leaf.Low == Leaf.High) { + // Make the seteq instruction... + Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_EQ, Val, + Leaf.Low, "SwitchLeaf"); + } else { + // Make range comparison + if (cast<ConstantInt>(Leaf.Low)->isMinValue(true /*isSigned*/)) { + // Val >= Min && Val <= Hi --> Val <= Hi + Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SLE, Val, Leaf.High, + "SwitchLeaf"); + } else if (cast<ConstantInt>(Leaf.Low)->isZero()) { + // Val >= 0 && Val <= Hi --> Val <=u Hi + Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Val, Leaf.High, + "SwitchLeaf"); + } else { + // Emit V-Lo <=u Hi-Lo + Constant* NegLo = ConstantExpr::getNeg(Leaf.Low); + Instruction* Add = BinaryOperator::CreateAdd(Val, NegLo, + Val->getName()+".off", + NewLeaf); + Constant *UpperBound = ConstantExpr::getAdd(NegLo, Leaf.High); + Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Add, UpperBound, + "SwitchLeaf"); + } + } + + // Make the conditional branch... + BasicBlock* Succ = Leaf.BB; + BranchInst::Create(Succ, Default, Comp, NewLeaf); + + // If there were any PHI nodes in this successor, rewrite one entry + // from OrigBlock to come from NewLeaf. + for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) { + PHINode* PN = cast<PHINode>(I); + // Remove all but one incoming entries from the cluster + uint64_t Range = cast<ConstantInt>(Leaf.High)->getSExtValue() - + cast<ConstantInt>(Leaf.Low)->getSExtValue(); + for (uint64_t j = 0; j < Range; ++j) { + PN->removeIncomingValue(OrigBlock); + } + + int BlockIdx = PN->getBasicBlockIndex(OrigBlock); + assert(BlockIdx != -1 && "Switch didn't go to this successor??"); + PN->setIncomingBlock((unsigned)BlockIdx, NewLeaf); + } + + return NewLeaf; +} + +// Clusterify - Transform simple list of Cases into list of CaseRange's +unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) { + unsigned numCmps = 0; + + // Start with "simple" cases + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i) + Cases.push_back(CaseRange(i.getCaseValue(), i.getCaseValue(), + i.getCaseSuccessor())); + + std::sort(Cases.begin(), Cases.end(), CaseCmp()); + + // Merge case into clusters + if (Cases.size()>=2) + for (CaseItr I=Cases.begin(), J=llvm::next(Cases.begin()); J!=Cases.end(); ) { + int64_t nextValue = cast<ConstantInt>(J->Low)->getSExtValue(); + int64_t currentValue = cast<ConstantInt>(I->High)->getSExtValue(); + BasicBlock* nextBB = J->BB; + BasicBlock* currentBB = I->BB; + + // If the two neighboring cases go to the same destination, merge them + // into a single case. + if ((nextValue-currentValue==1) && (currentBB == nextBB)) { + I->High = J->High; + J = Cases.erase(J); + } else { + I = J++; + } + } + + for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) { + if (I->Low != I->High) + // A range counts double, since it requires two compares. + ++numCmps; + } + + return numCmps; +} + +// processSwitchInst - Replace the specified switch instruction with a sequence +// of chained if-then insts in a balanced binary search. +// +void LowerSwitch::processSwitchInst(SwitchInst *SI) { + BasicBlock *CurBlock = SI->getParent(); + BasicBlock *OrigBlock = CurBlock; + Function *F = CurBlock->getParent(); + Value *Val = SI->getCondition(); // The value we are switching on... + BasicBlock* Default = SI->getDefaultDest(); + + // If there is only the default destination, don't bother with the code below. + if (!SI->getNumCases()) { + BranchInst::Create(SI->getDefaultDest(), CurBlock); + CurBlock->getInstList().erase(SI); + return; + } + + // Create a new, empty default block so that the new hierarchy of + // if-then statements go to this and the PHI nodes are happy. + BasicBlock* NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault"); + F->getBasicBlockList().insert(Default, NewDefault); + + BranchInst::Create(Default, NewDefault); + + // If there is an entry in any PHI nodes for the default edge, make sure + // to update them as well. + for (BasicBlock::iterator I = Default->begin(); isa<PHINode>(I); ++I) { + PHINode *PN = cast<PHINode>(I); + int BlockIdx = PN->getBasicBlockIndex(OrigBlock); + assert(BlockIdx != -1 && "Switch didn't go to this successor??"); + PN->setIncomingBlock((unsigned)BlockIdx, NewDefault); + } + + // Prepare cases vector. + CaseVector Cases; + unsigned numCmps = Clusterify(Cases, SI); + + DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size() + << ". Total compares: " << numCmps << "\n"); + DEBUG(dbgs() << "Cases: " << Cases << "\n"); + (void)numCmps; + + BasicBlock* SwitchBlock = switchConvert(Cases.begin(), Cases.end(), Val, + OrigBlock, NewDefault); + + // Branch to our shiny new if-then stuff... + BranchInst::Create(SwitchBlock, OrigBlock); + + // We are now done with the switch instruction, delete it. + CurBlock->getInstList().erase(SI); +} diff --git a/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp new file mode 100644 index 000000000000..61b3965d8f11 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp @@ -0,0 +1,90 @@ +//===- Mem2Reg.cpp - The -mem2reg pass, a wrapper around the Utils lib ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass is a simple pass wrapper around the PromoteMemToReg function call +// exposed by the Utils library. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mem2reg" +#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Transforms/Utils/PromoteMemToReg.h" +#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" +using namespace llvm; + +STATISTIC(NumPromoted, "Number of alloca's promoted"); + +namespace { + struct PromotePass : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + PromotePass() : FunctionPass(ID) { + initializePromotePassPass(*PassRegistry::getPassRegistry()); + } + + // runOnFunction - To run this pass, first we calculate the alloca + // instructions that are safe for promotion, then we promote each one. + // + virtual bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<DominatorTree>(); + AU.setPreservesCFG(); + // This is a cluster of orthogonal Transforms + AU.addPreserved<UnifyFunctionExitNodes>(); + AU.addPreservedID(LowerSwitchID); + AU.addPreservedID(LowerInvokePassID); + } + }; +} // end of anonymous namespace + +char PromotePass::ID = 0; +INITIALIZE_PASS_BEGIN(PromotePass, "mem2reg", "Promote Memory to Register", + false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_END(PromotePass, "mem2reg", "Promote Memory to Register", + false, false) + +bool PromotePass::runOnFunction(Function &F) { + std::vector<AllocaInst*> Allocas; + + BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function + + bool Changed = false; + + DominatorTree &DT = getAnalysis<DominatorTree>(); + + while (1) { + Allocas.clear(); + + // Find allocas that are safe to promote, by looking at all instructions in + // the entry node + for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I) + if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca? + if (isAllocaPromotable(AI)) + Allocas.push_back(AI); + + if (Allocas.empty()) break; + + PromoteMemToReg(Allocas, DT); + NumPromoted += Allocas.size(); + Changed = true; + } + + return Changed; +} + +// createPromoteMemoryToRegister - Provide an entry point to create this pass. +// +FunctionPass *llvm::createPromoteMemoryToRegisterPass() { + return new PromotePass(); +} diff --git a/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp b/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp new file mode 100644 index 000000000000..c37045312296 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp @@ -0,0 +1,145 @@ +//===- MetaRenamer.cpp - Rename everything with metasyntatic names --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass renames everything with metasyntatic names. The intent is to use +// this pass after bugpoint reduction to conceal the nature of the original +// program. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/TypeFinder.h" +#include "llvm/Pass.h" +using namespace llvm; + +namespace { + + // This PRNG is from the ISO C spec. It is intentionally simple and + // unsuitable for cryptographic use. We're just looking for enough + // variety to surprise and delight users. + struct PRNG { + unsigned long next; + + void srand(unsigned int seed) { + next = seed; + } + + int rand() { + next = next * 1103515245 + 12345; + return (unsigned int)(next / 65536) % 32768; + } + }; + + struct MetaRenamer : public ModulePass { + static char ID; // Pass identification, replacement for typeid + MetaRenamer() : ModulePass(ID) { + initializeMetaRenamerPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + + bool runOnModule(Module &M) { + static const char *const metaNames[] = { + // See http://en.wikipedia.org/wiki/Metasyntactic_variable + "foo", "bar", "baz", "quux", "barney", "snork", "zot", "blam", "hoge", + "wibble", "wobble", "widget", "wombat", "ham", "eggs", "pluto", "spam" + }; + + // Seed our PRNG with simple additive sum of ModuleID. We're looking to + // simply avoid always having the same function names, and we need to + // remain deterministic. + unsigned int randSeed = 0; + for (std::string::const_iterator I = M.getModuleIdentifier().begin(), + E = M.getModuleIdentifier().end(); I != E; ++I) + randSeed += *I; + + PRNG prng; + prng.srand(randSeed); + + // Rename all aliases + for (Module::alias_iterator AI = M.alias_begin(), AE = M.alias_end(); + AI != AE; ++AI) { + StringRef Name = AI->getName(); + if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1)) + continue; + + AI->setName("alias"); + } + + // Rename all global variables + for (Module::global_iterator GI = M.global_begin(), GE = M.global_end(); + GI != GE; ++GI) { + StringRef Name = GI->getName(); + if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1)) + continue; + + GI->setName("global"); + } + + // Rename all struct types + TypeFinder StructTypes; + StructTypes.run(M, true); + for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) { + StructType *STy = StructTypes[i]; + if (STy->isLiteral() || STy->getName().empty()) continue; + + SmallString<128> NameStorage; + STy->setName((Twine("struct.") + metaNames[prng.rand() % + array_lengthof(metaNames)]).toStringRef(NameStorage)); + } + + // Rename all functions + for (Module::iterator FI = M.begin(), FE = M.end(); + FI != FE; ++FI) { + StringRef Name = FI->getName(); + if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1)) + continue; + + FI->setName(metaNames[prng.rand() % array_lengthof(metaNames)]); + runOnFunction(*FI); + } + return true; + } + + bool runOnFunction(Function &F) { + for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); + AI != AE; ++AI) + if (!AI->getType()->isVoidTy()) + AI->setName("arg"); + + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + BB->setName("bb"); + + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (!I->getType()->isVoidTy()) + I->setName("tmp"); + } + return true; + } + }; +} + +char MetaRenamer::ID = 0; +INITIALIZE_PASS(MetaRenamer, "metarenamer", + "Assign new names to everything", false, false) +//===----------------------------------------------------------------------===// +// +// MetaRenamer - Rename everything with metasyntactic names. +// +ModulePass *llvm::createMetaRenamerPass() { + return new MetaRenamer(); +} diff --git a/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp new file mode 100644 index 000000000000..ff6e6f9c60d3 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp @@ -0,0 +1,82 @@ +//===-- ModuleUtils.cpp - Functions to manipulate Modules -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This family of functions perform manipulations on Modules. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/ModuleUtils.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Module.h" + +using namespace llvm; + +static void appendToGlobalArray(const char *Array, + Module &M, Function *F, int Priority) { + IRBuilder<> IRB(M.getContext()); + FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false); + StructType *Ty = StructType::get( + IRB.getInt32Ty(), PointerType::getUnqual(FnTy), NULL); + + Constant *RuntimeCtorInit = ConstantStruct::get( + Ty, IRB.getInt32(Priority), F, NULL); + + // Get the current set of static global constructors and add the new ctor + // to the list. + SmallVector<Constant *, 16> CurrentCtors; + if (GlobalVariable * GVCtor = M.getNamedGlobal(Array)) { + if (Constant *Init = GVCtor->getInitializer()) { + unsigned n = Init->getNumOperands(); + CurrentCtors.reserve(n + 1); + for (unsigned i = 0; i != n; ++i) + CurrentCtors.push_back(cast<Constant>(Init->getOperand(i))); + } + GVCtor->eraseFromParent(); + } + + CurrentCtors.push_back(RuntimeCtorInit); + + // Create a new initializer. + ArrayType *AT = ArrayType::get(RuntimeCtorInit->getType(), + CurrentCtors.size()); + Constant *NewInit = ConstantArray::get(AT, CurrentCtors); + + // Create the new global variable and replace all uses of + // the old global variable with the new one. + (void)new GlobalVariable(M, NewInit->getType(), false, + GlobalValue::AppendingLinkage, NewInit, Array); +} + +void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority) { + appendToGlobalArray("llvm.global_ctors", M, F, Priority); +} + +void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority) { + appendToGlobalArray("llvm.global_dtors", M, F, Priority); +} + +GlobalVariable * +llvm::collectUsedGlobalVariables(Module &M, SmallPtrSet<GlobalValue *, 8> &Set, + bool CompilerUsed) { + const char *Name = CompilerUsed ? "llvm.compiler.used" : "llvm.used"; + GlobalVariable *GV = M.getGlobalVariable(Name); + if (!GV || !GV->hasInitializer()) + return GV; + + const ConstantArray *Init = cast<ConstantArray>(GV->getInitializer()); + for (unsigned I = 0, E = Init->getNumOperands(); I != E; ++I) { + Value *Op = Init->getOperand(I); + GlobalValue *G = cast<GlobalValue>(Op->stripPointerCastsNoFollowAliases()); + Set.insert(G); + } + return GV; +} diff --git a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp new file mode 100644 index 000000000000..8f6eee3510d1 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -0,0 +1,1079 @@ +//===- PromoteMemoryToRegister.cpp - Convert allocas to registers ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file promotes memory references to be register references. It promotes +// alloca instructions which only have loads and stores as uses. An alloca is +// transformed by using iterated dominator frontiers to place PHI nodes, then +// traversing the function in depth-first order to rewrite loads and stores as +// appropriate. +// +// The algorithm used here is based on: +// +// Sreedhar and Gao. A linear time algorithm for placing phi-nodes. +// In Proceedings of the 22nd ACM SIGPLAN-SIGACT Symposium on Principles of +// Programming Languages +// POPL '95. ACM, New York, NY, 62-73. +// +// It has been modified to not explicitly use the DJ graph data structure and to +// directly compute pruned SSA using per-variable liveness information. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mem2reg" +#include "llvm/Transforms/Utils/PromoteMemToReg.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/DIBuilder.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Metadata.h" +#include "llvm/Support/CFG.h" +#include "llvm/Transforms/Utils/Local.h" +#include <algorithm> +#include <queue> +using namespace llvm; + +STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block"); +STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store"); +STATISTIC(NumDeadAlloca, "Number of dead alloca's removed"); +STATISTIC(NumPHIInsert, "Number of PHI nodes inserted"); + +bool llvm::isAllocaPromotable(const AllocaInst *AI) { + // FIXME: If the memory unit is of pointer or integer type, we can permit + // assignments to subsections of the memory unit. + + // Only allow direct and non-volatile loads and stores... + for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end(); + UI != UE; ++UI) { // Loop over all of the uses of the alloca + const User *U = *UI; + if (const LoadInst *LI = dyn_cast<LoadInst>(U)) { + // Note that atomic loads can be transformed; atomic semantics do + // not have any meaning for a local alloca. + if (LI->isVolatile()) + return false; + } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { + if (SI->getOperand(0) == AI) + return false; // Don't allow a store OF the AI, only INTO the AI. + // Note that atomic stores can be transformed; atomic semantics do + // not have any meaning for a local alloca. + if (SI->isVolatile()) + return false; + } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { + if (II->getIntrinsicID() != Intrinsic::lifetime_start && + II->getIntrinsicID() != Intrinsic::lifetime_end) + return false; + } else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { + if (BCI->getType() != Type::getInt8PtrTy(U->getContext())) + return false; + if (!onlyUsedByLifetimeMarkers(BCI)) + return false; + } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) { + if (GEPI->getType() != Type::getInt8PtrTy(U->getContext())) + return false; + if (!GEPI->hasAllZeroIndices()) + return false; + if (!onlyUsedByLifetimeMarkers(GEPI)) + return false; + } else { + return false; + } + } + + return true; +} + +namespace { + +struct AllocaInfo { + SmallVector<BasicBlock *, 32> DefiningBlocks; + SmallVector<BasicBlock *, 32> UsingBlocks; + + StoreInst *OnlyStore; + BasicBlock *OnlyBlock; + bool OnlyUsedInOneBlock; + + Value *AllocaPointerVal; + DbgDeclareInst *DbgDeclare; + + void clear() { + DefiningBlocks.clear(); + UsingBlocks.clear(); + OnlyStore = 0; + OnlyBlock = 0; + OnlyUsedInOneBlock = true; + AllocaPointerVal = 0; + DbgDeclare = 0; + } + + /// Scan the uses of the specified alloca, filling in the AllocaInfo used + /// by the rest of the pass to reason about the uses of this alloca. + void AnalyzeAlloca(AllocaInst *AI) { + clear(); + + // As we scan the uses of the alloca instruction, keep track of stores, + // and decide whether all of the loads and stores to the alloca are within + // the same basic block. + for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); + UI != E;) { + Instruction *User = cast<Instruction>(*UI++); + + if (StoreInst *SI = dyn_cast<StoreInst>(User)) { + // Remember the basic blocks which define new values for the alloca + DefiningBlocks.push_back(SI->getParent()); + AllocaPointerVal = SI->getOperand(0); + OnlyStore = SI; + } else { + LoadInst *LI = cast<LoadInst>(User); + // Otherwise it must be a load instruction, keep track of variable + // reads. + UsingBlocks.push_back(LI->getParent()); + AllocaPointerVal = LI; + } + + if (OnlyUsedInOneBlock) { + if (OnlyBlock == 0) + OnlyBlock = User->getParent(); + else if (OnlyBlock != User->getParent()) + OnlyUsedInOneBlock = false; + } + } + + DbgDeclare = FindAllocaDbgDeclare(AI); + } +}; + +// Data package used by RenamePass() +class RenamePassData { +public: + typedef std::vector<Value *> ValVector; + + RenamePassData() : BB(NULL), Pred(NULL), Values() {} + RenamePassData(BasicBlock *B, BasicBlock *P, const ValVector &V) + : BB(B), Pred(P), Values(V) {} + BasicBlock *BB; + BasicBlock *Pred; + ValVector Values; + + void swap(RenamePassData &RHS) { + std::swap(BB, RHS.BB); + std::swap(Pred, RHS.Pred); + Values.swap(RHS.Values); + } +}; + +/// \brief This assigns and keeps a per-bb relative ordering of load/store +/// instructions in the block that directly load or store an alloca. +/// +/// This functionality is important because it avoids scanning large basic +/// blocks multiple times when promoting many allocas in the same block. +class LargeBlockInfo { + /// \brief For each instruction that we track, keep the index of the + /// instruction. + /// + /// The index starts out as the number of the instruction from the start of + /// the block. + DenseMap<const Instruction *, unsigned> InstNumbers; + +public: + + /// This code only looks at accesses to allocas. + static bool isInterestingInstruction(const Instruction *I) { + return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) || + (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1))); + } + + /// Get or calculate the index of the specified instruction. + unsigned getInstructionIndex(const Instruction *I) { + assert(isInterestingInstruction(I) && + "Not a load/store to/from an alloca?"); + + // If we already have this instruction number, return it. + DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I); + if (It != InstNumbers.end()) + return It->second; + + // Scan the whole block to get the instruction. This accumulates + // information for every interesting instruction in the block, in order to + // avoid gratuitus rescans. + const BasicBlock *BB = I->getParent(); + unsigned InstNo = 0; + for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end(); BBI != E; + ++BBI) + if (isInterestingInstruction(BBI)) + InstNumbers[BBI] = InstNo++; + It = InstNumbers.find(I); + + assert(It != InstNumbers.end() && "Didn't insert instruction?"); + return It->second; + } + + void deleteValue(const Instruction *I) { InstNumbers.erase(I); } + + void clear() { InstNumbers.clear(); } +}; + +struct PromoteMem2Reg { + /// The alloca instructions being promoted. + std::vector<AllocaInst *> Allocas; + DominatorTree &DT; + DIBuilder DIB; + + /// An AliasSetTracker object to update. If null, don't update it. + AliasSetTracker *AST; + + /// Reverse mapping of Allocas. + DenseMap<AllocaInst *, unsigned> AllocaLookup; + + /// \brief The PhiNodes we're adding. + /// + /// That map is used to simplify some Phi nodes as we iterate over it, so + /// it should have deterministic iterators. We could use a MapVector, but + /// since we already maintain a map from BasicBlock* to a stable numbering + /// (BBNumbers), the DenseMap is more efficient (also supports removal). + DenseMap<std::pair<unsigned, unsigned>, PHINode *> NewPhiNodes; + + /// For each PHI node, keep track of which entry in Allocas it corresponds + /// to. + DenseMap<PHINode *, unsigned> PhiToAllocaMap; + + /// If we are updating an AliasSetTracker, then for each alloca that is of + /// pointer type, we keep track of what to copyValue to the inserted PHI + /// nodes here. + std::vector<Value *> PointerAllocaValues; + + /// For each alloca, we keep track of the dbg.declare intrinsic that + /// describes it, if any, so that we can convert it to a dbg.value + /// intrinsic if the alloca gets promoted. + SmallVector<DbgDeclareInst *, 8> AllocaDbgDeclares; + + /// The set of basic blocks the renamer has already visited. + /// + SmallPtrSet<BasicBlock *, 16> Visited; + + /// Contains a stable numbering of basic blocks to avoid non-determinstic + /// behavior. + DenseMap<BasicBlock *, unsigned> BBNumbers; + + /// Maps DomTreeNodes to their level in the dominator tree. + DenseMap<DomTreeNode *, unsigned> DomLevels; + + /// Lazily compute the number of predecessors a block has. + DenseMap<const BasicBlock *, unsigned> BBNumPreds; + +public: + PromoteMem2Reg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT, + AliasSetTracker *AST) + : Allocas(Allocas.begin(), Allocas.end()), DT(DT), + DIB(*DT.getRoot()->getParent()->getParent()), AST(AST) {} + + void run(); + +private: + void RemoveFromAllocasList(unsigned &AllocaIdx) { + Allocas[AllocaIdx] = Allocas.back(); + Allocas.pop_back(); + --AllocaIdx; + } + + unsigned getNumPreds(const BasicBlock *BB) { + unsigned &NP = BBNumPreds[BB]; + if (NP == 0) + NP = std::distance(pred_begin(BB), pred_end(BB)) + 1; + return NP - 1; + } + + void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, + AllocaInfo &Info); + void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, + const SmallPtrSet<BasicBlock *, 32> &DefBlocks, + SmallPtrSet<BasicBlock *, 32> &LiveInBlocks); + void RenamePass(BasicBlock *BB, BasicBlock *Pred, + RenamePassData::ValVector &IncVals, + std::vector<RenamePassData> &Worklist); + bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version); +}; + +} // end of anonymous namespace + +static void removeLifetimeIntrinsicUsers(AllocaInst *AI) { + // Knowing that this alloca is promotable, we know that it's safe to kill all + // instructions except for load and store. + + for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end(); + UI != UE;) { + Instruction *I = cast<Instruction>(*UI); + ++UI; + if (isa<LoadInst>(I) || isa<StoreInst>(I)) + continue; + + if (!I->getType()->isVoidTy()) { + // The only users of this bitcast/GEP instruction are lifetime intrinsics. + // Follow the use/def chain to erase them now instead of leaving it for + // dead code elimination later. + for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); + UI != UE;) { + Instruction *Inst = cast<Instruction>(*UI); + ++UI; + Inst->eraseFromParent(); + } + } + I->eraseFromParent(); + } +} + +/// \brief Rewrite as many loads as possible given a single store. +/// +/// When there is only a single store, we can use the domtree to trivially +/// replace all of the dominated loads with the stored value. Do so, and return +/// true if this has successfully promoted the alloca entirely. If this returns +/// false there were some loads which were not dominated by the single store +/// and thus must be phi-ed with undef. We fall back to the standard alloca +/// promotion algorithm in that case. +static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, + LargeBlockInfo &LBI, + DominatorTree &DT, + AliasSetTracker *AST) { + StoreInst *OnlyStore = Info.OnlyStore; + bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0)); + BasicBlock *StoreBB = OnlyStore->getParent(); + int StoreIndex = -1; + + // Clear out UsingBlocks. We will reconstruct it here if needed. + Info.UsingBlocks.clear(); + + for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) { + Instruction *UserInst = cast<Instruction>(*UI++); + if (!isa<LoadInst>(UserInst)) { + assert(UserInst == OnlyStore && "Should only have load/stores"); + continue; + } + LoadInst *LI = cast<LoadInst>(UserInst); + + // Okay, if we have a load from the alloca, we want to replace it with the + // only value stored to the alloca. We can do this if the value is + // dominated by the store. If not, we use the rest of the mem2reg machinery + // to insert the phi nodes as needed. + if (!StoringGlobalVal) { // Non-instructions are always dominated. + if (LI->getParent() == StoreBB) { + // If we have a use that is in the same block as the store, compare the + // indices of the two instructions to see which one came first. If the + // load came before the store, we can't handle it. + if (StoreIndex == -1) + StoreIndex = LBI.getInstructionIndex(OnlyStore); + + if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) { + // Can't handle this load, bail out. + Info.UsingBlocks.push_back(StoreBB); + continue; + } + + } else if (LI->getParent() != StoreBB && + !DT.dominates(StoreBB, LI->getParent())) { + // If the load and store are in different blocks, use BB dominance to + // check their relationships. If the store doesn't dom the use, bail + // out. + Info.UsingBlocks.push_back(LI->getParent()); + continue; + } + } + + // Otherwise, we *can* safely rewrite this load. + Value *ReplVal = OnlyStore->getOperand(0); + // If the replacement value is the load, this must occur in unreachable + // code. + if (ReplVal == LI) + ReplVal = UndefValue::get(LI->getType()); + LI->replaceAllUsesWith(ReplVal); + if (AST && LI->getType()->isPointerTy()) + AST->deleteValue(LI); + LI->eraseFromParent(); + LBI.deleteValue(LI); + } + + // Finally, after the scan, check to see if the store is all that is left. + if (!Info.UsingBlocks.empty()) + return false; // If not, we'll have to fall back for the remainder. + + // Record debuginfo for the store and remove the declaration's + // debuginfo. + if (DbgDeclareInst *DDI = Info.DbgDeclare) { + DIBuilder DIB(*AI->getParent()->getParent()->getParent()); + ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, DIB); + DDI->eraseFromParent(); + LBI.deleteValue(DDI); + } + // Remove the (now dead) store and alloca. + Info.OnlyStore->eraseFromParent(); + LBI.deleteValue(Info.OnlyStore); + + if (AST) + AST->deleteValue(AI); + AI->eraseFromParent(); + LBI.deleteValue(AI); + return true; +} + +/// Many allocas are only used within a single basic block. If this is the +/// case, avoid traversing the CFG and inserting a lot of potentially useless +/// PHI nodes by just performing a single linear pass over the basic block +/// using the Alloca. +/// +/// If we cannot promote this alloca (because it is read before it is written), +/// return true. This is necessary in cases where, due to control flow, the +/// alloca is potentially undefined on some control flow paths. e.g. code like +/// this is potentially correct: +/// +/// for (...) { if (c) { A = undef; undef = B; } } +/// +/// ... so long as A is not used before undef is set. +static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, + LargeBlockInfo &LBI, + AliasSetTracker *AST) { + // The trickiest case to handle is when we have large blocks. Because of this, + // this code is optimized assuming that large blocks happen. This does not + // significantly pessimize the small block case. This uses LargeBlockInfo to + // make it efficient to get the index of various operations in the block. + + // Walk the use-def list of the alloca, getting the locations of all stores. + typedef SmallVector<std::pair<unsigned, StoreInst *>, 64> StoresByIndexTy; + StoresByIndexTy StoresByIndex; + + for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; + ++UI) + if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) + StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI)); + + // Sort the stores by their index, making it efficient to do a lookup with a + // binary search. + std::sort(StoresByIndex.begin(), StoresByIndex.end(), less_first()); + + // Walk all of the loads from this alloca, replacing them with the nearest + // store above them, if any. + for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) { + LoadInst *LI = dyn_cast<LoadInst>(*UI++); + if (!LI) + continue; + + unsigned LoadIdx = LBI.getInstructionIndex(LI); + + // Find the nearest store that has a lower index than this load. + StoresByIndexTy::iterator I = + std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(), + std::make_pair(LoadIdx, static_cast<StoreInst *>(0)), + less_first()); + + if (I == StoresByIndex.begin()) + // If there is no store before this load, the load takes the undef value. + LI->replaceAllUsesWith(UndefValue::get(LI->getType())); + else + // Otherwise, there was a store before this load, the load takes its value. + LI->replaceAllUsesWith(llvm::prior(I)->second->getOperand(0)); + + if (AST && LI->getType()->isPointerTy()) + AST->deleteValue(LI); + LI->eraseFromParent(); + LBI.deleteValue(LI); + } + + // Remove the (now dead) stores and alloca. + while (!AI->use_empty()) { + StoreInst *SI = cast<StoreInst>(AI->use_back()); + // Record debuginfo for the store before removing it. + if (DbgDeclareInst *DDI = Info.DbgDeclare) { + DIBuilder DIB(*AI->getParent()->getParent()->getParent()); + ConvertDebugDeclareToDebugValue(DDI, SI, DIB); + } + SI->eraseFromParent(); + LBI.deleteValue(SI); + } + + if (AST) + AST->deleteValue(AI); + AI->eraseFromParent(); + LBI.deleteValue(AI); + + // The alloca's debuginfo can be removed as well. + if (DbgDeclareInst *DDI = Info.DbgDeclare) { + DDI->eraseFromParent(); + LBI.deleteValue(DDI); + } + + ++NumLocalPromoted; +} + +void PromoteMem2Reg::run() { + Function &F = *DT.getRoot()->getParent(); + + if (AST) + PointerAllocaValues.resize(Allocas.size()); + AllocaDbgDeclares.resize(Allocas.size()); + + AllocaInfo Info; + LargeBlockInfo LBI; + + for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) { + AllocaInst *AI = Allocas[AllocaNum]; + + assert(isAllocaPromotable(AI) && "Cannot promote non-promotable alloca!"); + assert(AI->getParent()->getParent() == &F && + "All allocas should be in the same function, which is same as DF!"); + + removeLifetimeIntrinsicUsers(AI); + + if (AI->use_empty()) { + // If there are no uses of the alloca, just delete it now. + if (AST) + AST->deleteValue(AI); + AI->eraseFromParent(); + + // Remove the alloca from the Allocas list, since it has been processed + RemoveFromAllocasList(AllocaNum); + ++NumDeadAlloca; + continue; + } + + // Calculate the set of read and write-locations for each alloca. This is + // analogous to finding the 'uses' and 'definitions' of each variable. + Info.AnalyzeAlloca(AI); + + // If there is only a single store to this value, replace any loads of + // it that are directly dominated by the definition with the value stored. + if (Info.DefiningBlocks.size() == 1) { + if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AST)) { + // The alloca has been processed, move on. + RemoveFromAllocasList(AllocaNum); + ++NumSingleStore; + continue; + } + } + + // If the alloca is only read and written in one basic block, just perform a + // linear sweep over the block to eliminate it. + if (Info.OnlyUsedInOneBlock) { + promoteSingleBlockAlloca(AI, Info, LBI, AST); + + // The alloca has been processed, move on. + RemoveFromAllocasList(AllocaNum); + continue; + } + + // If we haven't computed dominator tree levels, do so now. + if (DomLevels.empty()) { + SmallVector<DomTreeNode *, 32> Worklist; + + DomTreeNode *Root = DT.getRootNode(); + DomLevels[Root] = 0; + Worklist.push_back(Root); + + while (!Worklist.empty()) { + DomTreeNode *Node = Worklist.pop_back_val(); + unsigned ChildLevel = DomLevels[Node] + 1; + for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); + CI != CE; ++CI) { + DomLevels[*CI] = ChildLevel; + Worklist.push_back(*CI); + } + } + } + + // If we haven't computed a numbering for the BB's in the function, do so + // now. + if (BBNumbers.empty()) { + unsigned ID = 0; + for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) + BBNumbers[I] = ID++; + } + + // If we have an AST to keep updated, remember some pointer value that is + // stored into the alloca. + if (AST) + PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal; + + // Remember the dbg.declare intrinsic describing this alloca, if any. + if (Info.DbgDeclare) + AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare; + + // Keep the reverse mapping of the 'Allocas' array for the rename pass. + AllocaLookup[Allocas[AllocaNum]] = AllocaNum; + + // At this point, we're committed to promoting the alloca using IDF's, and + // the standard SSA construction algorithm. Determine which blocks need PHI + // nodes and see if we can optimize out some work by avoiding insertion of + // dead phi nodes. + DetermineInsertionPoint(AI, AllocaNum, Info); + } + + if (Allocas.empty()) + return; // All of the allocas must have been trivial! + + LBI.clear(); + + // Set the incoming values for the basic block to be null values for all of + // the alloca's. We do this in case there is a load of a value that has not + // been stored yet. In this case, it will get this null value. + // + RenamePassData::ValVector Values(Allocas.size()); + for (unsigned i = 0, e = Allocas.size(); i != e; ++i) + Values[i] = UndefValue::get(Allocas[i]->getAllocatedType()); + + // Walks all basic blocks in the function performing the SSA rename algorithm + // and inserting the phi nodes we marked as necessary + // + std::vector<RenamePassData> RenamePassWorkList; + RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values)); + do { + RenamePassData RPD; + RPD.swap(RenamePassWorkList.back()); + RenamePassWorkList.pop_back(); + // RenamePass may add new worklist entries. + RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList); + } while (!RenamePassWorkList.empty()); + + // The renamer uses the Visited set to avoid infinite loops. Clear it now. + Visited.clear(); + + // Remove the allocas themselves from the function. + for (unsigned i = 0, e = Allocas.size(); i != e; ++i) { + Instruction *A = Allocas[i]; + + // If there are any uses of the alloca instructions left, they must be in + // unreachable basic blocks that were not processed by walking the dominator + // tree. Just delete the users now. + if (!A->use_empty()) + A->replaceAllUsesWith(UndefValue::get(A->getType())); + if (AST) + AST->deleteValue(A); + A->eraseFromParent(); + } + + // Remove alloca's dbg.declare instrinsics from the function. + for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i) + if (DbgDeclareInst *DDI = AllocaDbgDeclares[i]) + DDI->eraseFromParent(); + + // Loop over all of the PHI nodes and see if there are any that we can get + // rid of because they merge all of the same incoming values. This can + // happen due to undef values coming into the PHI nodes. This process is + // iterative, because eliminating one PHI node can cause others to be removed. + bool EliminatedAPHI = true; + while (EliminatedAPHI) { + EliminatedAPHI = false; + + // Iterating over NewPhiNodes is deterministic, so it is safe to try to + // simplify and RAUW them as we go. If it was not, we could add uses to + // the values we replace with in a non deterministic order, thus creating + // non deterministic def->use chains. + for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator + I = NewPhiNodes.begin(), + E = NewPhiNodes.end(); + I != E;) { + PHINode *PN = I->second; + + // If this PHI node merges one value and/or undefs, get the value. + if (Value *V = SimplifyInstruction(PN, 0, 0, &DT)) { + if (AST && PN->getType()->isPointerTy()) + AST->deleteValue(PN); + PN->replaceAllUsesWith(V); + PN->eraseFromParent(); + NewPhiNodes.erase(I++); + EliminatedAPHI = true; + continue; + } + ++I; + } + } + + // At this point, the renamer has added entries to PHI nodes for all reachable + // code. Unfortunately, there may be unreachable blocks which the renamer + // hasn't traversed. If this is the case, the PHI nodes may not + // have incoming values for all predecessors. Loop over all PHI nodes we have + // created, inserting undef values if they are missing any incoming values. + // + for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator + I = NewPhiNodes.begin(), + E = NewPhiNodes.end(); + I != E; ++I) { + // We want to do this once per basic block. As such, only process a block + // when we find the PHI that is the first entry in the block. + PHINode *SomePHI = I->second; + BasicBlock *BB = SomePHI->getParent(); + if (&BB->front() != SomePHI) + continue; + + // Only do work here if there the PHI nodes are missing incoming values. We + // know that all PHI nodes that were inserted in a block will have the same + // number of incoming values, so we can just check any of them. + if (SomePHI->getNumIncomingValues() == getNumPreds(BB)) + continue; + + // Get the preds for BB. + SmallVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB)); + + // Ok, now we know that all of the PHI nodes are missing entries for some + // basic blocks. Start by sorting the incoming predecessors for efficient + // access. + std::sort(Preds.begin(), Preds.end()); + + // Now we loop through all BB's which have entries in SomePHI and remove + // them from the Preds list. + for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) { + // Do a log(n) search of the Preds list for the entry we want. + SmallVectorImpl<BasicBlock *>::iterator EntIt = std::lower_bound( + Preds.begin(), Preds.end(), SomePHI->getIncomingBlock(i)); + assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i) && + "PHI node has entry for a block which is not a predecessor!"); + + // Remove the entry + Preds.erase(EntIt); + } + + // At this point, the blocks left in the preds list must have dummy + // entries inserted into every PHI nodes for the block. Update all the phi + // nodes in this block that we are inserting (there could be phis before + // mem2reg runs). + unsigned NumBadPreds = SomePHI->getNumIncomingValues(); + BasicBlock::iterator BBI = BB->begin(); + while ((SomePHI = dyn_cast<PHINode>(BBI++)) && + SomePHI->getNumIncomingValues() == NumBadPreds) { + Value *UndefVal = UndefValue::get(SomePHI->getType()); + for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred) + SomePHI->addIncoming(UndefVal, Preds[pred]); + } + } + + NewPhiNodes.clear(); +} + +/// \brief Determine which blocks the value is live in. +/// +/// These are blocks which lead to uses. Knowing this allows us to avoid +/// inserting PHI nodes into blocks which don't lead to uses (thus, the +/// inserted phi nodes would be dead). +void PromoteMem2Reg::ComputeLiveInBlocks( + AllocaInst *AI, AllocaInfo &Info, + const SmallPtrSet<BasicBlock *, 32> &DefBlocks, + SmallPtrSet<BasicBlock *, 32> &LiveInBlocks) { + + // To determine liveness, we must iterate through the predecessors of blocks + // where the def is live. Blocks are added to the worklist if we need to + // check their predecessors. Start with all the using blocks. + SmallVector<BasicBlock *, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(), + Info.UsingBlocks.end()); + + // If any of the using blocks is also a definition block, check to see if the + // definition occurs before or after the use. If it happens before the use, + // the value isn't really live-in. + for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) { + BasicBlock *BB = LiveInBlockWorklist[i]; + if (!DefBlocks.count(BB)) + continue; + + // Okay, this is a block that both uses and defines the value. If the first + // reference to the alloca is a def (store), then we know it isn't live-in. + for (BasicBlock::iterator I = BB->begin();; ++I) { + if (StoreInst *SI = dyn_cast<StoreInst>(I)) { + if (SI->getOperand(1) != AI) + continue; + + // We found a store to the alloca before a load. The alloca is not + // actually live-in here. + LiveInBlockWorklist[i] = LiveInBlockWorklist.back(); + LiveInBlockWorklist.pop_back(); + --i, --e; + break; + } + + if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + if (LI->getOperand(0) != AI) + continue; + + // Okay, we found a load before a store to the alloca. It is actually + // live into this block. + break; + } + } + } + + // Now that we have a set of blocks where the phi is live-in, recursively add + // their predecessors until we find the full region the value is live. + while (!LiveInBlockWorklist.empty()) { + BasicBlock *BB = LiveInBlockWorklist.pop_back_val(); + + // The block really is live in here, insert it into the set. If already in + // the set, then it has already been processed. + if (!LiveInBlocks.insert(BB)) + continue; + + // Since the value is live into BB, it is either defined in a predecessor or + // live into it to. Add the preds to the worklist unless they are a + // defining block. + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + BasicBlock *P = *PI; + + // The value is not live into a predecessor if it defines the value. + if (DefBlocks.count(P)) + continue; + + // Otherwise it is, add to the worklist. + LiveInBlockWorklist.push_back(P); + } + } +} + +/// At this point, we're committed to promoting the alloca using IDF's, and the +/// standard SSA construction algorithm. Determine which blocks need phi nodes +/// and see if we can optimize out some work by avoiding insertion of dead phi +/// nodes. +void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, + AllocaInfo &Info) { + // Unique the set of defining blocks for efficient lookup. + SmallPtrSet<BasicBlock *, 32> DefBlocks; + DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end()); + + // Determine which blocks the value is live in. These are blocks which lead + // to uses. + SmallPtrSet<BasicBlock *, 32> LiveInBlocks; + ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks); + + // Use a priority queue keyed on dominator tree level so that inserted nodes + // are handled from the bottom of the dominator tree upwards. + typedef std::pair<DomTreeNode *, unsigned> DomTreeNodePair; + typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>, + less_second> IDFPriorityQueue; + IDFPriorityQueue PQ; + + for (SmallPtrSet<BasicBlock *, 32>::const_iterator I = DefBlocks.begin(), + E = DefBlocks.end(); + I != E; ++I) { + if (DomTreeNode *Node = DT.getNode(*I)) + PQ.push(std::make_pair(Node, DomLevels[Node])); + } + + SmallVector<std::pair<unsigned, BasicBlock *>, 32> DFBlocks; + SmallPtrSet<DomTreeNode *, 32> Visited; + SmallVector<DomTreeNode *, 32> Worklist; + while (!PQ.empty()) { + DomTreeNodePair RootPair = PQ.top(); + PQ.pop(); + DomTreeNode *Root = RootPair.first; + unsigned RootLevel = RootPair.second; + + // Walk all dominator tree children of Root, inspecting their CFG edges with + // targets elsewhere on the dominator tree. Only targets whose level is at + // most Root's level are added to the iterated dominance frontier of the + // definition set. + + Worklist.clear(); + Worklist.push_back(Root); + + while (!Worklist.empty()) { + DomTreeNode *Node = Worklist.pop_back_val(); + BasicBlock *BB = Node->getBlock(); + + for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; + ++SI) { + DomTreeNode *SuccNode = DT.getNode(*SI); + + // Quickly skip all CFG edges that are also dominator tree edges instead + // of catching them below. + if (SuccNode->getIDom() == Node) + continue; + + unsigned SuccLevel = DomLevels[SuccNode]; + if (SuccLevel > RootLevel) + continue; + + if (!Visited.insert(SuccNode)) + continue; + + BasicBlock *SuccBB = SuccNode->getBlock(); + if (!LiveInBlocks.count(SuccBB)) + continue; + + DFBlocks.push_back(std::make_pair(BBNumbers[SuccBB], SuccBB)); + if (!DefBlocks.count(SuccBB)) + PQ.push(std::make_pair(SuccNode, SuccLevel)); + } + + for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE; + ++CI) { + if (!Visited.count(*CI)) + Worklist.push_back(*CI); + } + } + } + + if (DFBlocks.size() > 1) + std::sort(DFBlocks.begin(), DFBlocks.end()); + + unsigned CurrentVersion = 0; + for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i) + QueuePhiNode(DFBlocks[i].second, AllocaNum, CurrentVersion); +} + +/// \brief Queue a phi-node to be added to a basic-block for a specific Alloca. +/// +/// Returns true if there wasn't already a phi-node for that variable +bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, + unsigned &Version) { + // Look up the basic-block in question. + PHINode *&PN = NewPhiNodes[std::make_pair(BBNumbers[BB], AllocaNo)]; + + // If the BB already has a phi node added for the i'th alloca then we're done! + if (PN) + return false; + + // Create a PhiNode using the dereferenced type... and add the phi-node to the + // BasicBlock. + PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB), + Allocas[AllocaNo]->getName() + "." + Twine(Version++), + BB->begin()); + ++NumPHIInsert; + PhiToAllocaMap[PN] = AllocaNo; + + if (AST && PN->getType()->isPointerTy()) + AST->copyValue(PointerAllocaValues[AllocaNo], PN); + + return true; +} + +/// \brief Recursively traverse the CFG of the function, renaming loads and +/// stores to the allocas which we are promoting. +/// +/// IncomingVals indicates what value each Alloca contains on exit from the +/// predecessor block Pred. +void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred, + RenamePassData::ValVector &IncomingVals, + std::vector<RenamePassData> &Worklist) { +NextIteration: + // If we are inserting any phi nodes into this BB, they will already be in the + // block. + if (PHINode *APN = dyn_cast<PHINode>(BB->begin())) { + // If we have PHI nodes to update, compute the number of edges from Pred to + // BB. + if (PhiToAllocaMap.count(APN)) { + // We want to be able to distinguish between PHI nodes being inserted by + // this invocation of mem2reg from those phi nodes that already existed in + // the IR before mem2reg was run. We determine that APN is being inserted + // because it is missing incoming edges. All other PHI nodes being + // inserted by this pass of mem2reg will have the same number of incoming + // operands so far. Remember this count. + unsigned NewPHINumOperands = APN->getNumOperands(); + + unsigned NumEdges = std::count(succ_begin(Pred), succ_end(Pred), BB); + assert(NumEdges && "Must be at least one edge from Pred to BB!"); + + // Add entries for all the phis. + BasicBlock::iterator PNI = BB->begin(); + do { + unsigned AllocaNo = PhiToAllocaMap[APN]; + + // Add N incoming values to the PHI node. + for (unsigned i = 0; i != NumEdges; ++i) + APN->addIncoming(IncomingVals[AllocaNo], Pred); + + // The currently active variable for this block is now the PHI. + IncomingVals[AllocaNo] = APN; + + // Get the next phi node. + ++PNI; + APN = dyn_cast<PHINode>(PNI); + if (APN == 0) + break; + + // Verify that it is missing entries. If not, it is not being inserted + // by this mem2reg invocation so we want to ignore it. + } while (APN->getNumOperands() == NewPHINumOperands); + } + } + + // Don't revisit blocks. + if (!Visited.insert(BB)) + return; + + for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II);) { + Instruction *I = II++; // get the instruction, increment iterator + + if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand()); + if (!Src) + continue; + + DenseMap<AllocaInst *, unsigned>::iterator AI = AllocaLookup.find(Src); + if (AI == AllocaLookup.end()) + continue; + + Value *V = IncomingVals[AI->second]; + + // Anything using the load now uses the current value. + LI->replaceAllUsesWith(V); + if (AST && LI->getType()->isPointerTy()) + AST->deleteValue(LI); + BB->getInstList().erase(LI); + } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { + // Delete this instruction and mark the name as the current holder of the + // value + AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand()); + if (!Dest) + continue; + + DenseMap<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest); + if (ai == AllocaLookup.end()) + continue; + + // what value were we writing? + IncomingVals[ai->second] = SI->getOperand(0); + // Record debuginfo for the store before removing it. + if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second]) + ConvertDebugDeclareToDebugValue(DDI, SI, DIB); + BB->getInstList().erase(SI); + } + } + + // 'Recurse' to our successors. + succ_iterator I = succ_begin(BB), E = succ_end(BB); + if (I == E) + return; + + // Keep track of the successors so we don't visit the same successor twice + SmallPtrSet<BasicBlock *, 8> VisitedSuccs; + + // Handle the first successor without using the worklist. + VisitedSuccs.insert(*I); + Pred = BB; + BB = *I; + ++I; + + for (; I != E; ++I) + if (VisitedSuccs.insert(*I)) + Worklist.push_back(RenamePassData(*I, Pred, IncomingVals)); + + goto NextIteration; +} + +void llvm::PromoteMemToReg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT, + AliasSetTracker *AST) { + // If there is nothing to do, bail out... + if (Allocas.empty()) + return; + + PromoteMem2Reg(Allocas, DT, AST).run(); +} diff --git a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp new file mode 100644 index 000000000000..30adbfac058f --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp @@ -0,0 +1,492 @@ +//===- SSAUpdater.cpp - Unstructured SSA Update Tool ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SSAUpdater class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ssaupdater" +#include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/TinyPtrVector.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/AlignOf.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/SSAUpdaterImpl.h" + +using namespace llvm; + +typedef DenseMap<BasicBlock*, Value*> AvailableValsTy; +static AvailableValsTy &getAvailableVals(void *AV) { + return *static_cast<AvailableValsTy*>(AV); +} + +SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI) + : AV(0), ProtoType(0), ProtoName(), InsertedPHIs(NewPHI) {} + +SSAUpdater::~SSAUpdater() { + delete static_cast<AvailableValsTy*>(AV); +} + +void SSAUpdater::Initialize(Type *Ty, StringRef Name) { + if (AV == 0) + AV = new AvailableValsTy(); + else + getAvailableVals(AV).clear(); + ProtoType = Ty; + ProtoName = Name; +} + +bool SSAUpdater::HasValueForBlock(BasicBlock *BB) const { + return getAvailableVals(AV).count(BB); +} + +void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) { + assert(ProtoType != 0 && "Need to initialize SSAUpdater"); + assert(ProtoType == V->getType() && + "All rewritten values must have the same type"); + getAvailableVals(AV)[BB] = V; +} + +static bool IsEquivalentPHI(PHINode *PHI, + SmallDenseMap<BasicBlock*, Value*, 8> &ValueMapping) { + unsigned PHINumValues = PHI->getNumIncomingValues(); + if (PHINumValues != ValueMapping.size()) + return false; + + // Scan the phi to see if it matches. + for (unsigned i = 0, e = PHINumValues; i != e; ++i) + if (ValueMapping[PHI->getIncomingBlock(i)] != + PHI->getIncomingValue(i)) { + return false; + } + + return true; +} + +Value *SSAUpdater::GetValueAtEndOfBlock(BasicBlock *BB) { + Value *Res = GetValueAtEndOfBlockInternal(BB); + return Res; +} + +Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { + // If there is no definition of the renamed variable in this block, just use + // GetValueAtEndOfBlock to do our work. + if (!HasValueForBlock(BB)) + return GetValueAtEndOfBlock(BB); + + // Otherwise, we have the hard case. Get the live-in values for each + // predecessor. + SmallVector<std::pair<BasicBlock*, Value*>, 8> PredValues; + Value *SingularValue = 0; + + // We can get our predecessor info by walking the pred_iterator list, but it + // is relatively slow. If we already have PHI nodes in this block, walk one + // of them to get the predecessor list instead. + if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) { + for (unsigned i = 0, e = SomePhi->getNumIncomingValues(); i != e; ++i) { + BasicBlock *PredBB = SomePhi->getIncomingBlock(i); + Value *PredVal = GetValueAtEndOfBlock(PredBB); + PredValues.push_back(std::make_pair(PredBB, PredVal)); + + // Compute SingularValue. + if (i == 0) + SingularValue = PredVal; + else if (PredVal != SingularValue) + SingularValue = 0; + } + } else { + bool isFirstPred = true; + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + BasicBlock *PredBB = *PI; + Value *PredVal = GetValueAtEndOfBlock(PredBB); + PredValues.push_back(std::make_pair(PredBB, PredVal)); + + // Compute SingularValue. + if (isFirstPred) { + SingularValue = PredVal; + isFirstPred = false; + } else if (PredVal != SingularValue) + SingularValue = 0; + } + } + + // If there are no predecessors, just return undef. + if (PredValues.empty()) + return UndefValue::get(ProtoType); + + // Otherwise, if all the merged values are the same, just use it. + if (SingularValue != 0) + return SingularValue; + + // Otherwise, we do need a PHI: check to see if we already have one available + // in this block that produces the right value. + if (isa<PHINode>(BB->begin())) { + SmallDenseMap<BasicBlock*, Value*, 8> ValueMapping(PredValues.begin(), + PredValues.end()); + PHINode *SomePHI; + for (BasicBlock::iterator It = BB->begin(); + (SomePHI = dyn_cast<PHINode>(It)); ++It) { + if (IsEquivalentPHI(SomePHI, ValueMapping)) + return SomePHI; + } + } + + // Ok, we have no way out, insert a new one now. + PHINode *InsertedPHI = PHINode::Create(ProtoType, PredValues.size(), + ProtoName, &BB->front()); + + // Fill in all the predecessors of the PHI. + for (unsigned i = 0, e = PredValues.size(); i != e; ++i) + InsertedPHI->addIncoming(PredValues[i].second, PredValues[i].first); + + // See if the PHI node can be merged to a single value. This can happen in + // loop cases when we get a PHI of itself and one other value. + if (Value *V = SimplifyInstruction(InsertedPHI)) { + InsertedPHI->eraseFromParent(); + return V; + } + + // Set the DebugLoc of the inserted PHI, if available. + DebugLoc DL; + if (const Instruction *I = BB->getFirstNonPHI()) + DL = I->getDebugLoc(); + InsertedPHI->setDebugLoc(DL); + + // If the client wants to know about all new instructions, tell it. + if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); + + DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n"); + return InsertedPHI; +} + +void SSAUpdater::RewriteUse(Use &U) { + Instruction *User = cast<Instruction>(U.getUser()); + + Value *V; + if (PHINode *UserPN = dyn_cast<PHINode>(User)) + V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U)); + else + V = GetValueInMiddleOfBlock(User->getParent()); + + // Notify that users of the existing value that it is being replaced. + Value *OldVal = U.get(); + if (OldVal != V && OldVal->hasValueHandle()) + ValueHandleBase::ValueIsRAUWd(OldVal, V); + + U.set(V); +} + +void SSAUpdater::RewriteUseAfterInsertions(Use &U) { + Instruction *User = cast<Instruction>(U.getUser()); + + Value *V; + if (PHINode *UserPN = dyn_cast<PHINode>(User)) + V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U)); + else + V = GetValueAtEndOfBlock(User->getParent()); + + U.set(V); +} + +namespace llvm { +template<> +class SSAUpdaterTraits<SSAUpdater> { +public: + typedef BasicBlock BlkT; + typedef Value *ValT; + typedef PHINode PhiT; + + typedef succ_iterator BlkSucc_iterator; + static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return succ_begin(BB); } + static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return succ_end(BB); } + + class PHI_iterator { + private: + PHINode *PHI; + unsigned idx; + + public: + explicit PHI_iterator(PHINode *P) // begin iterator + : PHI(P), idx(0) {} + PHI_iterator(PHINode *P, bool) // end iterator + : PHI(P), idx(PHI->getNumIncomingValues()) {} + + PHI_iterator &operator++() { ++idx; return *this; } + bool operator==(const PHI_iterator& x) const { return idx == x.idx; } + bool operator!=(const PHI_iterator& x) const { return !operator==(x); } + Value *getIncomingValue() { return PHI->getIncomingValue(idx); } + BasicBlock *getIncomingBlock() { return PHI->getIncomingBlock(idx); } + }; + + static PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); } + static PHI_iterator PHI_end(PhiT *PHI) { + return PHI_iterator(PHI, true); + } + + /// FindPredecessorBlocks - Put the predecessors of Info->BB into the Preds + /// vector, set Info->NumPreds, and allocate space in Info->Preds. + static void FindPredecessorBlocks(BasicBlock *BB, + SmallVectorImpl<BasicBlock*> *Preds) { + // We can get our predecessor info by walking the pred_iterator list, + // but it is relatively slow. If we already have PHI nodes in this + // block, walk one of them to get the predecessor list instead. + if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) { + for (unsigned PI = 0, E = SomePhi->getNumIncomingValues(); PI != E; ++PI) + Preds->push_back(SomePhi->getIncomingBlock(PI)); + } else { + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + Preds->push_back(*PI); + } + } + + /// GetUndefVal - Get an undefined value of the same type as the value + /// being handled. + static Value *GetUndefVal(BasicBlock *BB, SSAUpdater *Updater) { + return UndefValue::get(Updater->ProtoType); + } + + /// CreateEmptyPHI - Create a new PHI instruction in the specified block. + /// Reserve space for the operands but do not fill them in yet. + static Value *CreateEmptyPHI(BasicBlock *BB, unsigned NumPreds, + SSAUpdater *Updater) { + PHINode *PHI = PHINode::Create(Updater->ProtoType, NumPreds, + Updater->ProtoName, &BB->front()); + return PHI; + } + + /// AddPHIOperand - Add the specified value as an operand of the PHI for + /// the specified predecessor block. + static void AddPHIOperand(PHINode *PHI, Value *Val, BasicBlock *Pred) { + PHI->addIncoming(Val, Pred); + } + + /// InstrIsPHI - Check if an instruction is a PHI. + /// + static PHINode *InstrIsPHI(Instruction *I) { + return dyn_cast<PHINode>(I); + } + + /// ValueIsPHI - Check if a value is a PHI. + /// + static PHINode *ValueIsPHI(Value *Val, SSAUpdater *Updater) { + return dyn_cast<PHINode>(Val); + } + + /// ValueIsNewPHI - Like ValueIsPHI but also check if the PHI has no source + /// operands, i.e., it was just added. + static PHINode *ValueIsNewPHI(Value *Val, SSAUpdater *Updater) { + PHINode *PHI = ValueIsPHI(Val, Updater); + if (PHI && PHI->getNumIncomingValues() == 0) + return PHI; + return 0; + } + + /// GetPHIValue - For the specified PHI instruction, return the value + /// that it defines. + static Value *GetPHIValue(PHINode *PHI) { + return PHI; + } +}; + +} // End llvm namespace + +/// Check to see if AvailableVals has an entry for the specified BB and if so, +/// return it. If not, construct SSA form by first calculating the required +/// placement of PHIs and then inserting new PHIs where needed. +Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { + AvailableValsTy &AvailableVals = getAvailableVals(AV); + if (Value *V = AvailableVals[BB]) + return V; + + SSAUpdaterImpl<SSAUpdater> Impl(this, &AvailableVals, InsertedPHIs); + return Impl.GetValue(BB); +} + +//===----------------------------------------------------------------------===// +// LoadAndStorePromoter Implementation +//===----------------------------------------------------------------------===// + +LoadAndStorePromoter:: +LoadAndStorePromoter(const SmallVectorImpl<Instruction*> &Insts, + SSAUpdater &S, StringRef BaseName) : SSA(S) { + if (Insts.empty()) return; + + Value *SomeVal; + if (LoadInst *LI = dyn_cast<LoadInst>(Insts[0])) + SomeVal = LI; + else + SomeVal = cast<StoreInst>(Insts[0])->getOperand(0); + + if (BaseName.empty()) + BaseName = SomeVal->getName(); + SSA.Initialize(SomeVal->getType(), BaseName); +} + + +void LoadAndStorePromoter:: +run(const SmallVectorImpl<Instruction*> &Insts) const { + + // First step: bucket up uses of the alloca by the block they occur in. + // This is important because we have to handle multiple defs/uses in a block + // ourselves: SSAUpdater is purely for cross-block references. + DenseMap<BasicBlock*, TinyPtrVector<Instruction*> > UsesByBlock; + + for (unsigned i = 0, e = Insts.size(); i != e; ++i) { + Instruction *User = Insts[i]; + UsesByBlock[User->getParent()].push_back(User); + } + + // Okay, now we can iterate over all the blocks in the function with uses, + // processing them. Keep track of which loads are loading a live-in value. + // Walk the uses in the use-list order to be determinstic. + SmallVector<LoadInst*, 32> LiveInLoads; + DenseMap<Value*, Value*> ReplacedLoads; + + for (unsigned i = 0, e = Insts.size(); i != e; ++i) { + Instruction *User = Insts[i]; + BasicBlock *BB = User->getParent(); + TinyPtrVector<Instruction*> &BlockUses = UsesByBlock[BB]; + + // If this block has already been processed, ignore this repeat use. + if (BlockUses.empty()) continue; + + // Okay, this is the first use in the block. If this block just has a + // single user in it, we can rewrite it trivially. + if (BlockUses.size() == 1) { + // If it is a store, it is a trivial def of the value in the block. + if (StoreInst *SI = dyn_cast<StoreInst>(User)) { + updateDebugInfo(SI); + SSA.AddAvailableValue(BB, SI->getOperand(0)); + } else + // Otherwise it is a load, queue it to rewrite as a live-in load. + LiveInLoads.push_back(cast<LoadInst>(User)); + BlockUses.clear(); + continue; + } + + // Otherwise, check to see if this block is all loads. + bool HasStore = false; + for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) { + if (isa<StoreInst>(BlockUses[i])) { + HasStore = true; + break; + } + } + + // If so, we can queue them all as live in loads. We don't have an + // efficient way to tell which on is first in the block and don't want to + // scan large blocks, so just add all loads as live ins. + if (!HasStore) { + for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) + LiveInLoads.push_back(cast<LoadInst>(BlockUses[i])); + BlockUses.clear(); + continue; + } + + // Otherwise, we have mixed loads and stores (or just a bunch of stores). + // Since SSAUpdater is purely for cross-block values, we need to determine + // the order of these instructions in the block. If the first use in the + // block is a load, then it uses the live in value. The last store defines + // the live out value. We handle this by doing a linear scan of the block. + Value *StoredValue = 0; + for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) { + if (LoadInst *L = dyn_cast<LoadInst>(II)) { + // If this is a load from an unrelated pointer, ignore it. + if (!isInstInList(L, Insts)) continue; + + // If we haven't seen a store yet, this is a live in use, otherwise + // use the stored value. + if (StoredValue) { + replaceLoadWithValue(L, StoredValue); + L->replaceAllUsesWith(StoredValue); + ReplacedLoads[L] = StoredValue; + } else { + LiveInLoads.push_back(L); + } + continue; + } + + if (StoreInst *SI = dyn_cast<StoreInst>(II)) { + // If this is a store to an unrelated pointer, ignore it. + if (!isInstInList(SI, Insts)) continue; + updateDebugInfo(SI); + + // Remember that this is the active value in the block. + StoredValue = SI->getOperand(0); + } + } + + // The last stored value that happened is the live-out for the block. + assert(StoredValue && "Already checked that there is a store in block"); + SSA.AddAvailableValue(BB, StoredValue); + BlockUses.clear(); + } + + // Okay, now we rewrite all loads that use live-in values in the loop, + // inserting PHI nodes as necessary. + for (unsigned i = 0, e = LiveInLoads.size(); i != e; ++i) { + LoadInst *ALoad = LiveInLoads[i]; + Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent()); + replaceLoadWithValue(ALoad, NewVal); + + // Avoid assertions in unreachable code. + if (NewVal == ALoad) NewVal = UndefValue::get(NewVal->getType()); + ALoad->replaceAllUsesWith(NewVal); + ReplacedLoads[ALoad] = NewVal; + } + + // Allow the client to do stuff before we start nuking things. + doExtraRewritesBeforeFinalDeletion(); + + // Now that everything is rewritten, delete the old instructions from the + // function. They should all be dead now. + for (unsigned i = 0, e = Insts.size(); i != e; ++i) { + Instruction *User = Insts[i]; + + // If this is a load that still has uses, then the load must have been added + // as a live value in the SSAUpdate data structure for a block (e.g. because + // the loaded value was stored later). In this case, we need to recursively + // propagate the updates until we get to the real value. + if (!User->use_empty()) { + Value *NewVal = ReplacedLoads[User]; + assert(NewVal && "not a replaced load?"); + + // Propagate down to the ultimate replacee. The intermediately loads + // could theoretically already have been deleted, so we don't want to + // dereference the Value*'s. + DenseMap<Value*, Value*>::iterator RLI = ReplacedLoads.find(NewVal); + while (RLI != ReplacedLoads.end()) { + NewVal = RLI->second; + RLI = ReplacedLoads.find(NewVal); + } + + replaceLoadWithValue(cast<LoadInst>(User), NewVal); + User->replaceAllUsesWith(NewVal); + } + + instructionDeleted(User); + User->eraseFromParent(); + } +} + +bool +LoadAndStorePromoter::isInstInList(Instruction *I, + const SmallVectorImpl<Instruction*> &Insts) + const { + return std::find(Insts.begin(), Insts.end(), I) != Insts.end(); +} diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp new file mode 100644 index 000000000000..ff50b12cdb3f --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -0,0 +1,4156 @@ +//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Peephole optimize the CFG. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "simplifycfg" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ConstantRange.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/NoFolder.h" +#include "llvm/Support/PatternMatch.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include <algorithm> +#include <map> +#include <set> +using namespace llvm; +using namespace PatternMatch; + +static cl::opt<unsigned> +PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(1), + cl::desc("Control the amount of phi node folding to perform (default = 1)")); + +static cl::opt<bool> +DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false), + cl::desc("Duplicate return instructions into unconditional branches")); + +static cl::opt<bool> +SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), + cl::desc("Sink common instructions down to the end block")); + +static cl::opt<bool> +HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), + cl::desc("Hoist conditional stores if an unconditional store preceeds")); + +STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); +STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables"); +STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block"); +STATISTIC(NumSpeculations, "Number of speculative executed instructions"); + +namespace { + /// ValueEqualityComparisonCase - Represents a case of a switch. + struct ValueEqualityComparisonCase { + ConstantInt *Value; + BasicBlock *Dest; + + ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest) + : Value(Value), Dest(Dest) {} + + bool operator<(ValueEqualityComparisonCase RHS) const { + // Comparing pointers is ok as we only rely on the order for uniquing. + return Value < RHS.Value; + } + + bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; } + }; + +class SimplifyCFGOpt { + const TargetTransformInfo &TTI; + const DataLayout *const TD; + Value *isValueEqualityComparison(TerminatorInst *TI); + BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI, + std::vector<ValueEqualityComparisonCase> &Cases); + bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, + BasicBlock *Pred, + IRBuilder<> &Builder); + bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI, + IRBuilder<> &Builder); + + bool SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder); + bool SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder); + bool SimplifyUnreachable(UnreachableInst *UI); + bool SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder); + bool SimplifyIndirectBr(IndirectBrInst *IBI); + bool SimplifyUncondBranch(BranchInst *BI, IRBuilder <> &Builder); + bool SimplifyCondBranch(BranchInst *BI, IRBuilder <>&Builder); + +public: + SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout *TD) + : TTI(TTI), TD(TD) {} + bool run(BasicBlock *BB); +}; +} + +/// SafeToMergeTerminators - Return true if it is safe to merge these two +/// terminator instructions together. +/// +static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) { + if (SI1 == SI2) return false; // Can't merge with self! + + // It is not safe to merge these two switch instructions if they have a common + // successor, and if that successor has a PHI node, and if *that* PHI node has + // conflicting incoming values from the two switch blocks. + BasicBlock *SI1BB = SI1->getParent(); + BasicBlock *SI2BB = SI2->getParent(); + SmallPtrSet<BasicBlock*, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB)); + + for (succ_iterator I = succ_begin(SI2BB), E = succ_end(SI2BB); I != E; ++I) + if (SI1Succs.count(*I)) + for (BasicBlock::iterator BBI = (*I)->begin(); + isa<PHINode>(BBI); ++BBI) { + PHINode *PN = cast<PHINode>(BBI); + if (PN->getIncomingValueForBlock(SI1BB) != + PN->getIncomingValueForBlock(SI2BB)) + return false; + } + + return true; +} + +/// isProfitableToFoldUnconditional - Return true if it is safe and profitable +/// to merge these two terminator instructions together, where SI1 is an +/// unconditional branch. PhiNodes will store all PHI nodes in common +/// successors. +/// +static bool isProfitableToFoldUnconditional(BranchInst *SI1, + BranchInst *SI2, + Instruction *Cond, + SmallVectorImpl<PHINode*> &PhiNodes) { + if (SI1 == SI2) return false; // Can't merge with self! + assert(SI1->isUnconditional() && SI2->isConditional()); + + // We fold the unconditional branch if we can easily update all PHI nodes in + // common successors: + // 1> We have a constant incoming value for the conditional branch; + // 2> We have "Cond" as the incoming value for the unconditional branch; + // 3> SI2->getCondition() and Cond have same operands. + CmpInst *Ci2 = dyn_cast<CmpInst>(SI2->getCondition()); + if (!Ci2) return false; + if (!(Cond->getOperand(0) == Ci2->getOperand(0) && + Cond->getOperand(1) == Ci2->getOperand(1)) && + !(Cond->getOperand(0) == Ci2->getOperand(1) && + Cond->getOperand(1) == Ci2->getOperand(0))) + return false; + + BasicBlock *SI1BB = SI1->getParent(); + BasicBlock *SI2BB = SI2->getParent(); + SmallPtrSet<BasicBlock*, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB)); + for (succ_iterator I = succ_begin(SI2BB), E = succ_end(SI2BB); I != E; ++I) + if (SI1Succs.count(*I)) + for (BasicBlock::iterator BBI = (*I)->begin(); + isa<PHINode>(BBI); ++BBI) { + PHINode *PN = cast<PHINode>(BBI); + if (PN->getIncomingValueForBlock(SI1BB) != Cond || + !isa<ConstantInt>(PN->getIncomingValueForBlock(SI2BB))) + return false; + PhiNodes.push_back(PN); + } + return true; +} + +/// AddPredecessorToBlock - Update PHI nodes in Succ to indicate that there will +/// now be entries in it from the 'NewPred' block. The values that will be +/// flowing into the PHI nodes will be the same as those coming in from +/// ExistPred, an existing predecessor of Succ. +static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, + BasicBlock *ExistPred) { + if (!isa<PHINode>(Succ->begin())) return; // Quick exit if nothing to do + + PHINode *PN; + for (BasicBlock::iterator I = Succ->begin(); + (PN = dyn_cast<PHINode>(I)); ++I) + PN->addIncoming(PN->getIncomingValueForBlock(ExistPred), NewPred); +} + +/// ComputeSpeculationCost - Compute an abstract "cost" of speculating the +/// given instruction, which is assumed to be safe to speculate. 1 means +/// cheap, 2 means less cheap, and UINT_MAX means prohibitively expensive. +static unsigned ComputeSpeculationCost(const User *I) { + assert(isSafeToSpeculativelyExecute(I) && + "Instruction is not safe to speculatively execute!"); + switch (Operator::getOpcode(I)) { + default: + // In doubt, be conservative. + return UINT_MAX; + case Instruction::GetElementPtr: + // GEPs are cheap if all indices are constant. + if (!cast<GEPOperator>(I)->hasAllConstantIndices()) + return UINT_MAX; + return 1; + case Instruction::Load: + case Instruction::Add: + case Instruction::Sub: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::ICmp: + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + return 1; // These are all cheap. + + case Instruction::Call: + case Instruction::Select: + return 2; + } +} + +/// DominatesMergePoint - If we have a merge point of an "if condition" as +/// accepted above, return true if the specified value dominates the block. We +/// don't handle the true generality of domination here, just a special case +/// which works well enough for us. +/// +/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to +/// see if V (which must be an instruction) and its recursive operands +/// that do not dominate BB have a combined cost lower than CostRemaining and +/// are non-trapping. If both are true, the instruction is inserted into the +/// set and true is returned. +/// +/// The cost for most non-trapping instructions is defined as 1 except for +/// Select whose cost is 2. +/// +/// After this function returns, CostRemaining is decreased by the cost of +/// V plus its non-dominating operands. If that cost is greater than +/// CostRemaining, false is returned and CostRemaining is undefined. +static bool DominatesMergePoint(Value *V, BasicBlock *BB, + SmallPtrSet<Instruction*, 4> *AggressiveInsts, + unsigned &CostRemaining) { + Instruction *I = dyn_cast<Instruction>(V); + if (!I) { + // Non-instructions all dominate instructions, but not all constantexprs + // can be executed unconditionally. + if (ConstantExpr *C = dyn_cast<ConstantExpr>(V)) + if (C->canTrap()) + return false; + return true; + } + BasicBlock *PBB = I->getParent(); + + // We don't want to allow weird loops that might have the "if condition" in + // the bottom of this block. + if (PBB == BB) return false; + + // If this instruction is defined in a block that contains an unconditional + // branch to BB, then it must be in the 'conditional' part of the "if + // statement". If not, it definitely dominates the region. + BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator()); + if (BI == 0 || BI->isConditional() || BI->getSuccessor(0) != BB) + return true; + + // If we aren't allowing aggressive promotion anymore, then don't consider + // instructions in the 'if region'. + if (AggressiveInsts == 0) return false; + + // If we have seen this instruction before, don't count it again. + if (AggressiveInsts->count(I)) return true; + + // Okay, it looks like the instruction IS in the "condition". Check to + // see if it's a cheap instruction to unconditionally compute, and if it + // only uses stuff defined outside of the condition. If so, hoist it out. + if (!isSafeToSpeculativelyExecute(I)) + return false; + + unsigned Cost = ComputeSpeculationCost(I); + + if (Cost > CostRemaining) + return false; + + CostRemaining -= Cost; + + // Okay, we can only really hoist these out if their operands do + // not take us over the cost threshold. + for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) + if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining)) + return false; + // Okay, it's safe to do this! Remember this instruction. + AggressiveInsts->insert(I); + return true; +} + +/// GetConstantInt - Extract ConstantInt from value, looking through IntToPtr +/// and PointerNullValue. Return NULL if value is not a constant int. +static ConstantInt *GetConstantInt(Value *V, const DataLayout *TD) { + // Normal constant int. + ConstantInt *CI = dyn_cast<ConstantInt>(V); + if (CI || !TD || !isa<Constant>(V) || !V->getType()->isPointerTy()) + return CI; + + // This is some kind of pointer constant. Turn it into a pointer-sized + // ConstantInt if possible. + IntegerType *PtrTy = cast<IntegerType>(TD->getIntPtrType(V->getType())); + + // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*). + if (isa<ConstantPointerNull>(V)) + return ConstantInt::get(PtrTy, 0); + + // IntToPtr const int. + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) + if (CE->getOpcode() == Instruction::IntToPtr) + if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) { + // The constant is very likely to have the right type already. + if (CI->getType() == PtrTy) + return CI; + else + return cast<ConstantInt> + (ConstantExpr::getIntegerCast(CI, PtrTy, /*isSigned=*/false)); + } + return 0; +} + +/// GatherConstantCompares - Given a potentially 'or'd or 'and'd together +/// collection of icmp eq/ne instructions that compare a value against a +/// constant, return the value being compared, and stick the constant into the +/// Values vector. +static Value * +GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra, + const DataLayout *TD, bool isEQ, unsigned &UsedICmps) { + Instruction *I = dyn_cast<Instruction>(V); + if (I == 0) return 0; + + // If this is an icmp against a constant, handle this as one of the cases. + if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) { + if (ConstantInt *C = GetConstantInt(I->getOperand(1), TD)) { + Value *RHSVal; + ConstantInt *RHSC; + + if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ:ICmpInst::ICMP_NE)) { + // (x & ~2^x) == y --> x == y || x == y|2^x + // This undoes a transformation done by instcombine to fuse 2 compares. + if (match(ICI->getOperand(0), + m_And(m_Value(RHSVal), m_ConstantInt(RHSC)))) { + APInt Not = ~RHSC->getValue(); + if (Not.isPowerOf2()) { + Vals.push_back(C); + Vals.push_back( + ConstantInt::get(C->getContext(), C->getValue() | Not)); + UsedICmps++; + return RHSVal; + } + } + + UsedICmps++; + Vals.push_back(C); + return I->getOperand(0); + } + + // If we have "x ult 3" comparison, for example, then we can add 0,1,2 to + // the set. + ConstantRange Span = + ConstantRange::makeICmpRegion(ICI->getPredicate(), C->getValue()); + + // Shift the range if the compare is fed by an add. This is the range + // compare idiom as emitted by instcombine. + bool hasAdd = + match(I->getOperand(0), m_Add(m_Value(RHSVal), m_ConstantInt(RHSC))); + if (hasAdd) + Span = Span.subtract(RHSC->getValue()); + + // If this is an and/!= check then we want to optimize "x ugt 2" into + // x != 0 && x != 1. + if (!isEQ) + Span = Span.inverse(); + + // If there are a ton of values, we don't want to make a ginormous switch. + if (Span.getSetSize().ugt(8) || Span.isEmptySet()) + return 0; + + for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp) + Vals.push_back(ConstantInt::get(V->getContext(), Tmp)); + UsedICmps++; + return hasAdd ? RHSVal : I->getOperand(0); + } + return 0; + } + + // Otherwise, we can only handle an | or &, depending on isEQ. + if (I->getOpcode() != (isEQ ? Instruction::Or : Instruction::And)) + return 0; + + unsigned NumValsBeforeLHS = Vals.size(); + unsigned UsedICmpsBeforeLHS = UsedICmps; + if (Value *LHS = GatherConstantCompares(I->getOperand(0), Vals, Extra, TD, + isEQ, UsedICmps)) { + unsigned NumVals = Vals.size(); + unsigned UsedICmpsBeforeRHS = UsedICmps; + if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, TD, + isEQ, UsedICmps)) { + if (LHS == RHS) + return LHS; + Vals.resize(NumVals); + UsedICmps = UsedICmpsBeforeRHS; + } + + // The RHS of the or/and can't be folded in and we haven't used "Extra" yet, + // set it and return success. + if (Extra == 0 || Extra == I->getOperand(1)) { + Extra = I->getOperand(1); + return LHS; + } + + Vals.resize(NumValsBeforeLHS); + UsedICmps = UsedICmpsBeforeLHS; + return 0; + } + + // If the LHS can't be folded in, but Extra is available and RHS can, try to + // use LHS as Extra. + if (Extra == 0 || Extra == I->getOperand(0)) { + Value *OldExtra = Extra; + Extra = I->getOperand(0); + if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, TD, + isEQ, UsedICmps)) + return RHS; + assert(Vals.size() == NumValsBeforeLHS); + Extra = OldExtra; + } + + return 0; +} + +static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) { + Instruction *Cond = 0; + if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { + Cond = dyn_cast<Instruction>(SI->getCondition()); + } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { + if (BI->isConditional()) + Cond = dyn_cast<Instruction>(BI->getCondition()); + } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) { + Cond = dyn_cast<Instruction>(IBI->getAddress()); + } + + TI->eraseFromParent(); + if (Cond) RecursivelyDeleteTriviallyDeadInstructions(Cond); +} + +/// isValueEqualityComparison - Return true if the specified terminator checks +/// to see if a value is equal to constant integer value. +Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) { + Value *CV = 0; + if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { + // Do not permit merging of large switch instructions into their + // predecessors unless there is only one predecessor. + if (SI->getNumSuccessors()*std::distance(pred_begin(SI->getParent()), + pred_end(SI->getParent())) <= 128) + CV = SI->getCondition(); + } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) + if (BI->isConditional() && BI->getCondition()->hasOneUse()) + if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) + if (ICI->isEquality() && GetConstantInt(ICI->getOperand(1), TD)) + CV = ICI->getOperand(0); + + // Unwrap any lossless ptrtoint cast. + if (TD && CV) { + if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) { + Value *Ptr = PTII->getPointerOperand(); + if (PTII->getType() == TD->getIntPtrType(Ptr->getType())) + CV = Ptr; + } + } + return CV; +} + +/// GetValueEqualityComparisonCases - Given a value comparison instruction, +/// decode all of the 'cases' that it represents and return the 'default' block. +BasicBlock *SimplifyCFGOpt:: +GetValueEqualityComparisonCases(TerminatorInst *TI, + std::vector<ValueEqualityComparisonCase> + &Cases) { + if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { + Cases.reserve(SI->getNumCases()); + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i) + Cases.push_back(ValueEqualityComparisonCase(i.getCaseValue(), + i.getCaseSuccessor())); + return SI->getDefaultDest(); + } + + BranchInst *BI = cast<BranchInst>(TI); + ICmpInst *ICI = cast<ICmpInst>(BI->getCondition()); + BasicBlock *Succ = BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_NE); + Cases.push_back(ValueEqualityComparisonCase(GetConstantInt(ICI->getOperand(1), + TD), + Succ)); + return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ); +} + + +/// EliminateBlockCases - Given a vector of bb/value pairs, remove any entries +/// in the list that match the specified block. +static void EliminateBlockCases(BasicBlock *BB, + std::vector<ValueEqualityComparisonCase> &Cases) { + Cases.erase(std::remove(Cases.begin(), Cases.end(), BB), Cases.end()); +} + +/// ValuesOverlap - Return true if there are any keys in C1 that exist in C2 as +/// well. +static bool +ValuesOverlap(std::vector<ValueEqualityComparisonCase> &C1, + std::vector<ValueEqualityComparisonCase > &C2) { + std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2; + + // Make V1 be smaller than V2. + if (V1->size() > V2->size()) + std::swap(V1, V2); + + if (V1->size() == 0) return false; + if (V1->size() == 1) { + // Just scan V2. + ConstantInt *TheVal = (*V1)[0].Value; + for (unsigned i = 0, e = V2->size(); i != e; ++i) + if (TheVal == (*V2)[i].Value) + return true; + } + + // Otherwise, just sort both lists and compare element by element. + array_pod_sort(V1->begin(), V1->end()); + array_pod_sort(V2->begin(), V2->end()); + unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size(); + while (i1 != e1 && i2 != e2) { + if ((*V1)[i1].Value == (*V2)[i2].Value) + return true; + if ((*V1)[i1].Value < (*V2)[i2].Value) + ++i1; + else + ++i2; + } + return false; +} + +/// SimplifyEqualityComparisonWithOnlyPredecessor - If TI is known to be a +/// terminator instruction and its block is known to only have a single +/// predecessor block, check to see if that predecessor is also a value +/// comparison with the same value, and if that comparison determines the +/// outcome of this comparison. If so, simplify TI. This does a very limited +/// form of jump threading. +bool SimplifyCFGOpt:: +SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, + BasicBlock *Pred, + IRBuilder<> &Builder) { + Value *PredVal = isValueEqualityComparison(Pred->getTerminator()); + if (!PredVal) return false; // Not a value comparison in predecessor. + + Value *ThisVal = isValueEqualityComparison(TI); + assert(ThisVal && "This isn't a value comparison!!"); + if (ThisVal != PredVal) return false; // Different predicates. + + // TODO: Preserve branch weight metadata, similarly to how + // FoldValueComparisonIntoPredecessors preserves it. + + // Find out information about when control will move from Pred to TI's block. + std::vector<ValueEqualityComparisonCase> PredCases; + BasicBlock *PredDef = GetValueEqualityComparisonCases(Pred->getTerminator(), + PredCases); + EliminateBlockCases(PredDef, PredCases); // Remove default from cases. + + // Find information about how control leaves this block. + std::vector<ValueEqualityComparisonCase> ThisCases; + BasicBlock *ThisDef = GetValueEqualityComparisonCases(TI, ThisCases); + EliminateBlockCases(ThisDef, ThisCases); // Remove default from cases. + + // If TI's block is the default block from Pred's comparison, potentially + // simplify TI based on this knowledge. + if (PredDef == TI->getParent()) { + // If we are here, we know that the value is none of those cases listed in + // PredCases. If there are any cases in ThisCases that are in PredCases, we + // can simplify TI. + if (!ValuesOverlap(PredCases, ThisCases)) + return false; + + if (isa<BranchInst>(TI)) { + // Okay, one of the successors of this condbr is dead. Convert it to a + // uncond br. + assert(ThisCases.size() == 1 && "Branch can only have one case!"); + // Insert the new branch. + Instruction *NI = Builder.CreateBr(ThisDef); + (void) NI; + + // Remove PHI node entries for the dead edge. + ThisCases[0].Dest->removePredecessor(TI->getParent()); + + DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() + << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n"); + + EraseTerminatorInstAndDCECond(TI); + return true; + } + + SwitchInst *SI = cast<SwitchInst>(TI); + // Okay, TI has cases that are statically dead, prune them away. + SmallPtrSet<Constant*, 16> DeadCases; + for (unsigned i = 0, e = PredCases.size(); i != e; ++i) + DeadCases.insert(PredCases[i].Value); + + DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() + << "Through successor TI: " << *TI); + + // Collect branch weights into a vector. + SmallVector<uint32_t, 8> Weights; + MDNode* MD = SI->getMetadata(LLVMContext::MD_prof); + bool HasWeight = MD && (MD->getNumOperands() == 2 + SI->getNumCases()); + if (HasWeight) + for (unsigned MD_i = 1, MD_e = MD->getNumOperands(); MD_i < MD_e; + ++MD_i) { + ConstantInt* CI = dyn_cast<ConstantInt>(MD->getOperand(MD_i)); + assert(CI); + Weights.push_back(CI->getValue().getZExtValue()); + } + for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) { + --i; + if (DeadCases.count(i.getCaseValue())) { + if (HasWeight) { + std::swap(Weights[i.getCaseIndex()+1], Weights.back()); + Weights.pop_back(); + } + i.getCaseSuccessor()->removePredecessor(TI->getParent()); + SI->removeCase(i); + } + } + if (HasWeight && Weights.size() >= 2) + SI->setMetadata(LLVMContext::MD_prof, + MDBuilder(SI->getParent()->getContext()). + createBranchWeights(Weights)); + + DEBUG(dbgs() << "Leaving: " << *TI << "\n"); + return true; + } + + // Otherwise, TI's block must correspond to some matched value. Find out + // which value (or set of values) this is. + ConstantInt *TIV = 0; + BasicBlock *TIBB = TI->getParent(); + for (unsigned i = 0, e = PredCases.size(); i != e; ++i) + if (PredCases[i].Dest == TIBB) { + if (TIV != 0) + return false; // Cannot handle multiple values coming to this block. + TIV = PredCases[i].Value; + } + assert(TIV && "No edge from pred to succ?"); + + // Okay, we found the one constant that our value can be if we get into TI's + // BB. Find out which successor will unconditionally be branched to. + BasicBlock *TheRealDest = 0; + for (unsigned i = 0, e = ThisCases.size(); i != e; ++i) + if (ThisCases[i].Value == TIV) { + TheRealDest = ThisCases[i].Dest; + break; + } + + // If not handled by any explicit cases, it is handled by the default case. + if (TheRealDest == 0) TheRealDest = ThisDef; + + // Remove PHI node entries for dead edges. + BasicBlock *CheckEdge = TheRealDest; + for (succ_iterator SI = succ_begin(TIBB), e = succ_end(TIBB); SI != e; ++SI) + if (*SI != CheckEdge) + (*SI)->removePredecessor(TIBB); + else + CheckEdge = 0; + + // Insert the new branch. + Instruction *NI = Builder.CreateBr(TheRealDest); + (void) NI; + + DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() + << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n"); + + EraseTerminatorInstAndDCECond(TI); + return true; +} + +namespace { + /// ConstantIntOrdering - This class implements a stable ordering of constant + /// integers that does not depend on their address. This is important for + /// applications that sort ConstantInt's to ensure uniqueness. + struct ConstantIntOrdering { + bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const { + return LHS->getValue().ult(RHS->getValue()); + } + }; +} + +static int ConstantIntSortPredicate(ConstantInt *const *P1, + ConstantInt *const *P2) { + const ConstantInt *LHS = *P1; + const ConstantInt *RHS = *P2; + if (LHS->getValue().ult(RHS->getValue())) + return 1; + if (LHS->getValue() == RHS->getValue()) + return 0; + return -1; +} + +static inline bool HasBranchWeights(const Instruction* I) { + MDNode* ProfMD = I->getMetadata(LLVMContext::MD_prof); + if (ProfMD && ProfMD->getOperand(0)) + if (MDString* MDS = dyn_cast<MDString>(ProfMD->getOperand(0))) + return MDS->getString().equals("branch_weights"); + + return false; +} + +/// Get Weights of a given TerminatorInst, the default weight is at the front +/// of the vector. If TI is a conditional eq, we need to swap the branch-weight +/// metadata. +static void GetBranchWeights(TerminatorInst *TI, + SmallVectorImpl<uint64_t> &Weights) { + MDNode* MD = TI->getMetadata(LLVMContext::MD_prof); + assert(MD); + for (unsigned i = 1, e = MD->getNumOperands(); i < e; ++i) { + ConstantInt* CI = dyn_cast<ConstantInt>(MD->getOperand(i)); + assert(CI); + Weights.push_back(CI->getValue().getZExtValue()); + } + + // If TI is a conditional eq, the default case is the false case, + // and the corresponding branch-weight data is at index 2. We swap the + // default weight to be the first entry. + if (BranchInst* BI = dyn_cast<BranchInst>(TI)) { + assert(Weights.size() == 2); + ICmpInst *ICI = cast<ICmpInst>(BI->getCondition()); + if (ICI->getPredicate() == ICmpInst::ICMP_EQ) + std::swap(Weights.front(), Weights.back()); + } +} + +/// Sees if any of the weights are too big for a uint32_t, and halves all the +/// weights if any are. +static void FitWeights(MutableArrayRef<uint64_t> Weights) { + bool Halve = false; + for (unsigned i = 0; i < Weights.size(); ++i) + if (Weights[i] > UINT_MAX) { + Halve = true; + break; + } + + if (! Halve) + return; + + for (unsigned i = 0; i < Weights.size(); ++i) + Weights[i] /= 2; +} + +/// FoldValueComparisonIntoPredecessors - The specified terminator is a value +/// equality comparison instruction (either a switch or a branch on "X == c"). +/// See if any of the predecessors of the terminator block are value comparisons +/// on the same value. If so, and if safe to do so, fold them together. +bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, + IRBuilder<> &Builder) { + BasicBlock *BB = TI->getParent(); + Value *CV = isValueEqualityComparison(TI); // CondVal + assert(CV && "Not a comparison?"); + bool Changed = false; + + SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB)); + while (!Preds.empty()) { + BasicBlock *Pred = Preds.pop_back_val(); + + // See if the predecessor is a comparison with the same value. + TerminatorInst *PTI = Pred->getTerminator(); + Value *PCV = isValueEqualityComparison(PTI); // PredCondVal + + if (PCV == CV && SafeToMergeTerminators(TI, PTI)) { + // Figure out which 'cases' to copy from SI to PSI. + std::vector<ValueEqualityComparisonCase> BBCases; + BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, BBCases); + + std::vector<ValueEqualityComparisonCase> PredCases; + BasicBlock *PredDefault = GetValueEqualityComparisonCases(PTI, PredCases); + + // Based on whether the default edge from PTI goes to BB or not, fill in + // PredCases and PredDefault with the new switch cases we would like to + // build. + SmallVector<BasicBlock*, 8> NewSuccessors; + + // Update the branch weight metadata along the way + SmallVector<uint64_t, 8> Weights; + bool PredHasWeights = HasBranchWeights(PTI); + bool SuccHasWeights = HasBranchWeights(TI); + + if (PredHasWeights) { + GetBranchWeights(PTI, Weights); + // branch-weight metadata is inconsistent here. + if (Weights.size() != 1 + PredCases.size()) + PredHasWeights = SuccHasWeights = false; + } else if (SuccHasWeights) + // If there are no predecessor weights but there are successor weights, + // populate Weights with 1, which will later be scaled to the sum of + // successor's weights + Weights.assign(1 + PredCases.size(), 1); + + SmallVector<uint64_t, 8> SuccWeights; + if (SuccHasWeights) { + GetBranchWeights(TI, SuccWeights); + // branch-weight metadata is inconsistent here. + if (SuccWeights.size() != 1 + BBCases.size()) + PredHasWeights = SuccHasWeights = false; + } else if (PredHasWeights) + SuccWeights.assign(1 + BBCases.size(), 1); + + if (PredDefault == BB) { + // If this is the default destination from PTI, only the edges in TI + // that don't occur in PTI, or that branch to BB will be activated. + std::set<ConstantInt*, ConstantIntOrdering> PTIHandled; + for (unsigned i = 0, e = PredCases.size(); i != e; ++i) + if (PredCases[i].Dest != BB) + PTIHandled.insert(PredCases[i].Value); + else { + // The default destination is BB, we don't need explicit targets. + std::swap(PredCases[i], PredCases.back()); + + if (PredHasWeights || SuccHasWeights) { + // Increase weight for the default case. + Weights[0] += Weights[i+1]; + std::swap(Weights[i+1], Weights.back()); + Weights.pop_back(); + } + + PredCases.pop_back(); + --i; --e; + } + + // Reconstruct the new switch statement we will be building. + if (PredDefault != BBDefault) { + PredDefault->removePredecessor(Pred); + PredDefault = BBDefault; + NewSuccessors.push_back(BBDefault); + } + + unsigned CasesFromPred = Weights.size(); + uint64_t ValidTotalSuccWeight = 0; + for (unsigned i = 0, e = BBCases.size(); i != e; ++i) + if (!PTIHandled.count(BBCases[i].Value) && + BBCases[i].Dest != BBDefault) { + PredCases.push_back(BBCases[i]); + NewSuccessors.push_back(BBCases[i].Dest); + if (SuccHasWeights || PredHasWeights) { + // The default weight is at index 0, so weight for the ith case + // should be at index i+1. Scale the cases from successor by + // PredDefaultWeight (Weights[0]). + Weights.push_back(Weights[0] * SuccWeights[i+1]); + ValidTotalSuccWeight += SuccWeights[i+1]; + } + } + + if (SuccHasWeights || PredHasWeights) { + ValidTotalSuccWeight += SuccWeights[0]; + // Scale the cases from predecessor by ValidTotalSuccWeight. + for (unsigned i = 1; i < CasesFromPred; ++i) + Weights[i] *= ValidTotalSuccWeight; + // Scale the default weight by SuccDefaultWeight (SuccWeights[0]). + Weights[0] *= SuccWeights[0]; + } + } else { + // If this is not the default destination from PSI, only the edges + // in SI that occur in PSI with a destination of BB will be + // activated. + std::set<ConstantInt*, ConstantIntOrdering> PTIHandled; + std::map<ConstantInt*, uint64_t> WeightsForHandled; + for (unsigned i = 0, e = PredCases.size(); i != e; ++i) + if (PredCases[i].Dest == BB) { + PTIHandled.insert(PredCases[i].Value); + + if (PredHasWeights || SuccHasWeights) { + WeightsForHandled[PredCases[i].Value] = Weights[i+1]; + std::swap(Weights[i+1], Weights.back()); + Weights.pop_back(); + } + + std::swap(PredCases[i], PredCases.back()); + PredCases.pop_back(); + --i; --e; + } + + // Okay, now we know which constants were sent to BB from the + // predecessor. Figure out where they will all go now. + for (unsigned i = 0, e = BBCases.size(); i != e; ++i) + if (PTIHandled.count(BBCases[i].Value)) { + // If this is one we are capable of getting... + if (PredHasWeights || SuccHasWeights) + Weights.push_back(WeightsForHandled[BBCases[i].Value]); + PredCases.push_back(BBCases[i]); + NewSuccessors.push_back(BBCases[i].Dest); + PTIHandled.erase(BBCases[i].Value);// This constant is taken care of + } + + // If there are any constants vectored to BB that TI doesn't handle, + // they must go to the default destination of TI. + for (std::set<ConstantInt*, ConstantIntOrdering>::iterator I = + PTIHandled.begin(), + E = PTIHandled.end(); I != E; ++I) { + if (PredHasWeights || SuccHasWeights) + Weights.push_back(WeightsForHandled[*I]); + PredCases.push_back(ValueEqualityComparisonCase(*I, BBDefault)); + NewSuccessors.push_back(BBDefault); + } + } + + // Okay, at this point, we know which new successor Pred will get. Make + // sure we update the number of entries in the PHI nodes for these + // successors. + for (unsigned i = 0, e = NewSuccessors.size(); i != e; ++i) + AddPredecessorToBlock(NewSuccessors[i], Pred, BB); + + Builder.SetInsertPoint(PTI); + // Convert pointer to int before we switch. + if (CV->getType()->isPointerTy()) { + assert(TD && "Cannot switch on pointer without DataLayout"); + CV = Builder.CreatePtrToInt(CV, TD->getIntPtrType(CV->getType()), + "magicptr"); + } + + // Now that the successors are updated, create the new Switch instruction. + SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, + PredCases.size()); + NewSI->setDebugLoc(PTI->getDebugLoc()); + for (unsigned i = 0, e = PredCases.size(); i != e; ++i) + NewSI->addCase(PredCases[i].Value, PredCases[i].Dest); + + if (PredHasWeights || SuccHasWeights) { + // Halve the weights if any of them cannot fit in an uint32_t + FitWeights(Weights); + + SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end()); + + NewSI->setMetadata(LLVMContext::MD_prof, + MDBuilder(BB->getContext()). + createBranchWeights(MDWeights)); + } + + EraseTerminatorInstAndDCECond(PTI); + + // Okay, last check. If BB is still a successor of PSI, then we must + // have an infinite loop case. If so, add an infinitely looping block + // to handle the case to preserve the behavior of the code. + BasicBlock *InfLoopBlock = 0; + for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i) + if (NewSI->getSuccessor(i) == BB) { + if (InfLoopBlock == 0) { + // Insert it at the end of the function, because it's either code, + // or it won't matter if it's hot. :) + InfLoopBlock = BasicBlock::Create(BB->getContext(), + "infloop", BB->getParent()); + BranchInst::Create(InfLoopBlock, InfLoopBlock); + } + NewSI->setSuccessor(i, InfLoopBlock); + } + + Changed = true; + } + } + return Changed; +} + +// isSafeToHoistInvoke - If we would need to insert a select that uses the +// value of this invoke (comments in HoistThenElseCodeToIf explain why we +// would need to do this), we can't hoist the invoke, as there is nowhere +// to put the select in this case. +static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, + Instruction *I1, Instruction *I2) { + for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) { + PHINode *PN; + for (BasicBlock::iterator BBI = SI->begin(); + (PN = dyn_cast<PHINode>(BBI)); ++BBI) { + Value *BB1V = PN->getIncomingValueForBlock(BB1); + Value *BB2V = PN->getIncomingValueForBlock(BB2); + if (BB1V != BB2V && (BB1V==I1 || BB2V==I2)) { + return false; + } + } + } + return true; +} + +/// HoistThenElseCodeToIf - Given a conditional branch that goes to BB1 and +/// BB2, hoist any common code in the two blocks up into the branch block. The +/// caller of this function guarantees that BI's block dominates BB1 and BB2. +static bool HoistThenElseCodeToIf(BranchInst *BI) { + // This does very trivial matching, with limited scanning, to find identical + // instructions in the two blocks. In particular, we don't want to get into + // O(M*N) situations here where M and N are the sizes of BB1 and BB2. As + // such, we currently just scan for obviously identical instructions in an + // identical order. + BasicBlock *BB1 = BI->getSuccessor(0); // The true destination. + BasicBlock *BB2 = BI->getSuccessor(1); // The false destination + + BasicBlock::iterator BB1_Itr = BB1->begin(); + BasicBlock::iterator BB2_Itr = BB2->begin(); + + Instruction *I1 = BB1_Itr++, *I2 = BB2_Itr++; + // Skip debug info if it is not identical. + DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1); + DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2); + if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) { + while (isa<DbgInfoIntrinsic>(I1)) + I1 = BB1_Itr++; + while (isa<DbgInfoIntrinsic>(I2)) + I2 = BB2_Itr++; + } + if (isa<PHINode>(I1) || !I1->isIdenticalToWhenDefined(I2) || + (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))) + return false; + + BasicBlock *BIParent = BI->getParent(); + + bool Changed = false; + do { + // If we are hoisting the terminator instruction, don't move one (making a + // broken BB), instead clone it, and remove BI. + if (isa<TerminatorInst>(I1)) + goto HoistTerminator; + + // For a normal instruction, we just move one to right before the branch, + // then replace all uses of the other with the first. Finally, we remove + // the now redundant second instruction. + BIParent->getInstList().splice(BI, BB1->getInstList(), I1); + if (!I2->use_empty()) + I2->replaceAllUsesWith(I1); + I1->intersectOptionalDataWith(I2); + I2->eraseFromParent(); + Changed = true; + + I1 = BB1_Itr++; + I2 = BB2_Itr++; + // Skip debug info if it is not identical. + DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1); + DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2); + if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) { + while (isa<DbgInfoIntrinsic>(I1)) + I1 = BB1_Itr++; + while (isa<DbgInfoIntrinsic>(I2)) + I2 = BB2_Itr++; + } + } while (I1->isIdenticalToWhenDefined(I2)); + + return true; + +HoistTerminator: + // It may not be possible to hoist an invoke. + if (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)) + return Changed; + + for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) { + PHINode *PN; + for (BasicBlock::iterator BBI = SI->begin(); + (PN = dyn_cast<PHINode>(BBI)); ++BBI) { + Value *BB1V = PN->getIncomingValueForBlock(BB1); + Value *BB2V = PN->getIncomingValueForBlock(BB2); + if (BB1V == BB2V) + continue; + + if (isa<ConstantExpr>(BB1V) && !isSafeToSpeculativelyExecute(BB1V)) + return Changed; + if (isa<ConstantExpr>(BB2V) && !isSafeToSpeculativelyExecute(BB2V)) + return Changed; + } + } + + // Okay, it is safe to hoist the terminator. + Instruction *NT = I1->clone(); + BIParent->getInstList().insert(BI, NT); + if (!NT->getType()->isVoidTy()) { + I1->replaceAllUsesWith(NT); + I2->replaceAllUsesWith(NT); + NT->takeName(I1); + } + + IRBuilder<true, NoFolder> Builder(NT); + // Hoisting one of the terminators from our successor is a great thing. + // Unfortunately, the successors of the if/else blocks may have PHI nodes in + // them. If they do, all PHI entries for BB1/BB2 must agree for all PHI + // nodes, so we insert select instruction to compute the final result. + std::map<std::pair<Value*,Value*>, SelectInst*> InsertedSelects; + for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) { + PHINode *PN; + for (BasicBlock::iterator BBI = SI->begin(); + (PN = dyn_cast<PHINode>(BBI)); ++BBI) { + Value *BB1V = PN->getIncomingValueForBlock(BB1); + Value *BB2V = PN->getIncomingValueForBlock(BB2); + if (BB1V == BB2V) continue; + + // These values do not agree. Insert a select instruction before NT + // that determines the right value. + SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)]; + if (SI == 0) + SI = cast<SelectInst> + (Builder.CreateSelect(BI->getCondition(), BB1V, BB2V, + BB1V->getName()+"."+BB2V->getName())); + + // Make the PHI node use the select for all incoming values for BB1/BB2 + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingBlock(i) == BB1 || PN->getIncomingBlock(i) == BB2) + PN->setIncomingValue(i, SI); + } + } + + // Update any PHI nodes in our new successors. + for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) + AddPredecessorToBlock(*SI, BIParent, BB1); + + EraseTerminatorInstAndDCECond(BI); + return true; +} + +/// SinkThenElseCodeToEnd - Given an unconditional branch that goes to BBEnd, +/// check whether BBEnd has only two predecessors and the other predecessor +/// ends with an unconditional branch. If it is true, sink any common code +/// in the two predecessors to BBEnd. +static bool SinkThenElseCodeToEnd(BranchInst *BI1) { + assert(BI1->isUnconditional()); + BasicBlock *BB1 = BI1->getParent(); + BasicBlock *BBEnd = BI1->getSuccessor(0); + + // Check that BBEnd has two predecessors and the other predecessor ends with + // an unconditional branch. + pred_iterator PI = pred_begin(BBEnd), PE = pred_end(BBEnd); + BasicBlock *Pred0 = *PI++; + if (PI == PE) // Only one predecessor. + return false; + BasicBlock *Pred1 = *PI++; + if (PI != PE) // More than two predecessors. + return false; + BasicBlock *BB2 = (Pred0 == BB1) ? Pred1 : Pred0; + BranchInst *BI2 = dyn_cast<BranchInst>(BB2->getTerminator()); + if (!BI2 || !BI2->isUnconditional()) + return false; + + // Gather the PHI nodes in BBEnd. + std::map<Value*, std::pair<Value*, PHINode*> > MapValueFromBB1ToBB2; + Instruction *FirstNonPhiInBBEnd = 0; + for (BasicBlock::iterator I = BBEnd->begin(), E = BBEnd->end(); + I != E; ++I) { + if (PHINode *PN = dyn_cast<PHINode>(I)) { + Value *BB1V = PN->getIncomingValueForBlock(BB1); + Value *BB2V = PN->getIncomingValueForBlock(BB2); + MapValueFromBB1ToBB2[BB1V] = std::make_pair(BB2V, PN); + } else { + FirstNonPhiInBBEnd = &*I; + break; + } + } + if (!FirstNonPhiInBBEnd) + return false; + + + // This does very trivial matching, with limited scanning, to find identical + // instructions in the two blocks. We scan backward for obviously identical + // instructions in an identical order. + BasicBlock::InstListType::reverse_iterator RI1 = BB1->getInstList().rbegin(), + RE1 = BB1->getInstList().rend(), RI2 = BB2->getInstList().rbegin(), + RE2 = BB2->getInstList().rend(); + // Skip debug info. + while (RI1 != RE1 && isa<DbgInfoIntrinsic>(&*RI1)) ++RI1; + if (RI1 == RE1) + return false; + while (RI2 != RE2 && isa<DbgInfoIntrinsic>(&*RI2)) ++RI2; + if (RI2 == RE2) + return false; + // Skip the unconditional branches. + ++RI1; + ++RI2; + + bool Changed = false; + while (RI1 != RE1 && RI2 != RE2) { + // Skip debug info. + while (RI1 != RE1 && isa<DbgInfoIntrinsic>(&*RI1)) ++RI1; + if (RI1 == RE1) + return Changed; + while (RI2 != RE2 && isa<DbgInfoIntrinsic>(&*RI2)) ++RI2; + if (RI2 == RE2) + return Changed; + + Instruction *I1 = &*RI1, *I2 = &*RI2; + // I1 and I2 should have a single use in the same PHI node, and they + // perform the same operation. + // Cannot move control-flow-involving, volatile loads, vaarg, etc. + if (isa<PHINode>(I1) || isa<PHINode>(I2) || + isa<TerminatorInst>(I1) || isa<TerminatorInst>(I2) || + isa<LandingPadInst>(I1) || isa<LandingPadInst>(I2) || + isa<AllocaInst>(I1) || isa<AllocaInst>(I2) || + I1->mayHaveSideEffects() || I2->mayHaveSideEffects() || + I1->mayReadOrWriteMemory() || I2->mayReadOrWriteMemory() || + !I1->hasOneUse() || !I2->hasOneUse() || + MapValueFromBB1ToBB2.find(I1) == MapValueFromBB1ToBB2.end() || + MapValueFromBB1ToBB2[I1].first != I2) + return Changed; + + // Check whether we should swap the operands of ICmpInst. + ICmpInst *ICmp1 = dyn_cast<ICmpInst>(I1), *ICmp2 = dyn_cast<ICmpInst>(I2); + bool SwapOpnds = false; + if (ICmp1 && ICmp2 && + ICmp1->getOperand(0) != ICmp2->getOperand(0) && + ICmp1->getOperand(1) != ICmp2->getOperand(1) && + (ICmp1->getOperand(0) == ICmp2->getOperand(1) || + ICmp1->getOperand(1) == ICmp2->getOperand(0))) { + ICmp2->swapOperands(); + SwapOpnds = true; + } + if (!I1->isSameOperationAs(I2)) { + if (SwapOpnds) + ICmp2->swapOperands(); + return Changed; + } + + // The operands should be either the same or they need to be generated + // with a PHI node after sinking. We only handle the case where there is + // a single pair of different operands. + Value *DifferentOp1 = 0, *DifferentOp2 = 0; + unsigned Op1Idx = 0; + for (unsigned I = 0, E = I1->getNumOperands(); I != E; ++I) { + if (I1->getOperand(I) == I2->getOperand(I)) + continue; + // Early exit if we have more-than one pair of different operands or + // the different operand is already in MapValueFromBB1ToBB2. + // Early exit if we need a PHI node to replace a constant. + if (DifferentOp1 || + MapValueFromBB1ToBB2.find(I1->getOperand(I)) != + MapValueFromBB1ToBB2.end() || + isa<Constant>(I1->getOperand(I)) || + isa<Constant>(I2->getOperand(I))) { + // If we can't sink the instructions, undo the swapping. + if (SwapOpnds) + ICmp2->swapOperands(); + return Changed; + } + DifferentOp1 = I1->getOperand(I); + Op1Idx = I; + DifferentOp2 = I2->getOperand(I); + } + + // We insert the pair of different operands to MapValueFromBB1ToBB2 and + // remove (I1, I2) from MapValueFromBB1ToBB2. + if (DifferentOp1) { + PHINode *NewPN = PHINode::Create(DifferentOp1->getType(), 2, + DifferentOp1->getName() + ".sink", + BBEnd->begin()); + MapValueFromBB1ToBB2[DifferentOp1] = std::make_pair(DifferentOp2, NewPN); + // I1 should use NewPN instead of DifferentOp1. + I1->setOperand(Op1Idx, NewPN); + NewPN->addIncoming(DifferentOp1, BB1); + NewPN->addIncoming(DifferentOp2, BB2); + DEBUG(dbgs() << "Create PHI node " << *NewPN << "\n";); + } + PHINode *OldPN = MapValueFromBB1ToBB2[I1].second; + MapValueFromBB1ToBB2.erase(I1); + + DEBUG(dbgs() << "SINK common instructions " << *I1 << "\n";); + DEBUG(dbgs() << " " << *I2 << "\n";); + // We need to update RE1 and RE2 if we are going to sink the first + // instruction in the basic block down. + bool UpdateRE1 = (I1 == BB1->begin()), UpdateRE2 = (I2 == BB2->begin()); + // Sink the instruction. + BBEnd->getInstList().splice(FirstNonPhiInBBEnd, BB1->getInstList(), I1); + if (!OldPN->use_empty()) + OldPN->replaceAllUsesWith(I1); + OldPN->eraseFromParent(); + + if (!I2->use_empty()) + I2->replaceAllUsesWith(I1); + I1->intersectOptionalDataWith(I2); + I2->eraseFromParent(); + + if (UpdateRE1) + RE1 = BB1->getInstList().rend(); + if (UpdateRE2) + RE2 = BB2->getInstList().rend(); + FirstNonPhiInBBEnd = I1; + NumSinkCommons++; + Changed = true; + } + return Changed; +} + +/// \brief Determine if we can hoist sink a sole store instruction out of a +/// conditional block. +/// +/// We are looking for code like the following: +/// BrBB: +/// store i32 %add, i32* %arrayidx2 +/// ... // No other stores or function calls (we could be calling a memory +/// ... // function). +/// %cmp = icmp ult %x, %y +/// br i1 %cmp, label %EndBB, label %ThenBB +/// ThenBB: +/// store i32 %add5, i32* %arrayidx2 +/// br label EndBB +/// EndBB: +/// ... +/// We are going to transform this into: +/// BrBB: +/// store i32 %add, i32* %arrayidx2 +/// ... // +/// %cmp = icmp ult %x, %y +/// %add.add5 = select i1 %cmp, i32 %add, %add5 +/// store i32 %add.add5, i32* %arrayidx2 +/// ... +/// +/// \return The pointer to the value of the previous store if the store can be +/// hoisted into the predecessor block. 0 otherwise. +static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, + BasicBlock *StoreBB, BasicBlock *EndBB) { + StoreInst *StoreToHoist = dyn_cast<StoreInst>(I); + if (!StoreToHoist) + return 0; + + // Volatile or atomic. + if (!StoreToHoist->isSimple()) + return 0; + + Value *StorePtr = StoreToHoist->getPointerOperand(); + + // Look for a store to the same pointer in BrBB. + unsigned MaxNumInstToLookAt = 10; + for (BasicBlock::reverse_iterator RI = BrBB->rbegin(), + RE = BrBB->rend(); RI != RE && (--MaxNumInstToLookAt); ++RI) { + Instruction *CurI = &*RI; + + // Could be calling an instruction that effects memory like free(). + if (CurI->mayHaveSideEffects() && !isa<StoreInst>(CurI)) + return 0; + + StoreInst *SI = dyn_cast<StoreInst>(CurI); + // Found the previous store make sure it stores to the same location. + if (SI && SI->getPointerOperand() == StorePtr) + // Found the previous store, return its value operand. + return SI->getValueOperand(); + else if (SI) + return 0; // Unknown store. + } + + return 0; +} + +/// \brief Speculate a conditional basic block flattening the CFG. +/// +/// Note that this is a very risky transform currently. Speculating +/// instructions like this is most often not desirable. Instead, there is an MI +/// pass which can do it with full awareness of the resource constraints. +/// However, some cases are "obvious" and we should do directly. An example of +/// this is speculating a single, reasonably cheap instruction. +/// +/// There is only one distinct advantage to flattening the CFG at the IR level: +/// it makes very common but simplistic optimizations such as are common in +/// instcombine and the DAG combiner more powerful by removing CFG edges and +/// modeling their effects with easier to reason about SSA value graphs. +/// +/// +/// An illustration of this transform is turning this IR: +/// \code +/// BB: +/// %cmp = icmp ult %x, %y +/// br i1 %cmp, label %EndBB, label %ThenBB +/// ThenBB: +/// %sub = sub %x, %y +/// br label BB2 +/// EndBB: +/// %phi = phi [ %sub, %ThenBB ], [ 0, %EndBB ] +/// ... +/// \endcode +/// +/// Into this IR: +/// \code +/// BB: +/// %cmp = icmp ult %x, %y +/// %sub = sub %x, %y +/// %cond = select i1 %cmp, 0, %sub +/// ... +/// \endcode +/// +/// \returns true if the conditional block is removed. +static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { + // Be conservative for now. FP select instruction can often be expensive. + Value *BrCond = BI->getCondition(); + if (isa<FCmpInst>(BrCond)) + return false; + + BasicBlock *BB = BI->getParent(); + BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0); + + // If ThenBB is actually on the false edge of the conditional branch, remember + // to swap the select operands later. + bool Invert = false; + if (ThenBB != BI->getSuccessor(0)) { + assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?"); + Invert = true; + } + assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block"); + + // Keep a count of how many times instructions are used within CondBB when + // they are candidates for sinking into CondBB. Specifically: + // - They are defined in BB, and + // - They have no side effects, and + // - All of their uses are in CondBB. + SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts; + + unsigned SpeculationCost = 0; + Value *SpeculatedStoreValue = 0; + StoreInst *SpeculatedStore = 0; + for (BasicBlock::iterator BBI = ThenBB->begin(), + BBE = llvm::prior(ThenBB->end()); + BBI != BBE; ++BBI) { + Instruction *I = BBI; + // Skip debug info. + if (isa<DbgInfoIntrinsic>(I)) + continue; + + // Only speculatively execution a single instruction (not counting the + // terminator) for now. + ++SpeculationCost; + if (SpeculationCost > 1) + return false; + + // Don't hoist the instruction if it's unsafe or expensive. + if (!isSafeToSpeculativelyExecute(I) && + !(HoistCondStores && + (SpeculatedStoreValue = isSafeToSpeculateStore(I, BB, ThenBB, + EndBB)))) + return false; + if (!SpeculatedStoreValue && + ComputeSpeculationCost(I) > PHINodeFoldingThreshold) + return false; + + // Store the store speculation candidate. + if (SpeculatedStoreValue) + SpeculatedStore = cast<StoreInst>(I); + + // Do not hoist the instruction if any of its operands are defined but not + // used in BB. The transformation will prevent the operand from + // being sunk into the use block. + for (User::op_iterator i = I->op_begin(), e = I->op_end(); + i != e; ++i) { + Instruction *OpI = dyn_cast<Instruction>(*i); + if (!OpI || OpI->getParent() != BB || + OpI->mayHaveSideEffects()) + continue; // Not a candidate for sinking. + + ++SinkCandidateUseCounts[OpI]; + } + } + + // Consider any sink candidates which are only used in CondBB as costs for + // speculation. Note, while we iterate over a DenseMap here, we are summing + // and so iteration order isn't significant. + for (SmallDenseMap<Instruction *, unsigned, 4>::iterator I = + SinkCandidateUseCounts.begin(), E = SinkCandidateUseCounts.end(); + I != E; ++I) + if (I->first->getNumUses() == I->second) { + ++SpeculationCost; + if (SpeculationCost > 1) + return false; + } + + // Check that the PHI nodes can be converted to selects. + bool HaveRewritablePHIs = false; + for (BasicBlock::iterator I = EndBB->begin(); + PHINode *PN = dyn_cast<PHINode>(I); ++I) { + Value *OrigV = PN->getIncomingValueForBlock(BB); + Value *ThenV = PN->getIncomingValueForBlock(ThenBB); + + // FIXME: Try to remove some of the duplication with HoistThenElseCodeToIf. + // Skip PHIs which are trivial. + if (ThenV == OrigV) + continue; + + HaveRewritablePHIs = true; + ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV); + ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV); + if (!OrigCE && !ThenCE) + continue; // Known safe and cheap. + + if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) || + (OrigCE && !isSafeToSpeculativelyExecute(OrigCE))) + return false; + unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE) : 0; + unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE) : 0; + if (OrigCost + ThenCost > 2 * PHINodeFoldingThreshold) + return false; + + // Account for the cost of an unfolded ConstantExpr which could end up + // getting expanded into Instructions. + // FIXME: This doesn't account for how many operations are combined in the + // constant expression. + ++SpeculationCost; + if (SpeculationCost > 1) + return false; + } + + // If there are no PHIs to process, bail early. This helps ensure idempotence + // as well. + if (!HaveRewritablePHIs && !(HoistCondStores && SpeculatedStoreValue)) + return false; + + // If we get here, we can hoist the instruction and if-convert. + DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";); + + // Insert a select of the value of the speculated store. + if (SpeculatedStoreValue) { + IRBuilder<true, NoFolder> Builder(BI); + Value *TrueV = SpeculatedStore->getValueOperand(); + Value *FalseV = SpeculatedStoreValue; + if (Invert) + std::swap(TrueV, FalseV); + Value *S = Builder.CreateSelect(BrCond, TrueV, FalseV, TrueV->getName() + + "." + FalseV->getName()); + SpeculatedStore->setOperand(0, S); + } + + // Hoist the instructions. + BB->getInstList().splice(BI, ThenBB->getInstList(), ThenBB->begin(), + llvm::prior(ThenBB->end())); + + // Insert selects and rewrite the PHI operands. + IRBuilder<true, NoFolder> Builder(BI); + for (BasicBlock::iterator I = EndBB->begin(); + PHINode *PN = dyn_cast<PHINode>(I); ++I) { + unsigned OrigI = PN->getBasicBlockIndex(BB); + unsigned ThenI = PN->getBasicBlockIndex(ThenBB); + Value *OrigV = PN->getIncomingValue(OrigI); + Value *ThenV = PN->getIncomingValue(ThenI); + + // Skip PHIs which are trivial. + if (OrigV == ThenV) + continue; + + // Create a select whose true value is the speculatively executed value and + // false value is the preexisting value. Swap them if the branch + // destinations were inverted. + Value *TrueV = ThenV, *FalseV = OrigV; + if (Invert) + std::swap(TrueV, FalseV); + Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, + TrueV->getName() + "." + FalseV->getName()); + PN->setIncomingValue(OrigI, V); + PN->setIncomingValue(ThenI, V); + } + + ++NumSpeculations; + return true; +} + +/// \returns True if this block contains a CallInst with the NoDuplicate +/// attribute. +static bool HasNoDuplicateCall(const BasicBlock *BB) { + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + const CallInst *CI = dyn_cast<CallInst>(I); + if (!CI) + continue; + if (CI->cannotDuplicate()) + return true; + } + return false; +} + +/// BlockIsSimpleEnoughToThreadThrough - Return true if we can thread a branch +/// across this block. +static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { + BranchInst *BI = cast<BranchInst>(BB->getTerminator()); + unsigned Size = 0; + + for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) { + if (isa<DbgInfoIntrinsic>(BBI)) + continue; + if (Size > 10) return false; // Don't clone large BB's. + ++Size; + + // We can only support instructions that do not define values that are + // live outside of the current basic block. + for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end(); + UI != E; ++UI) { + Instruction *U = cast<Instruction>(*UI); + if (U->getParent() != BB || isa<PHINode>(U)) return false; + } + + // Looks ok, continue checking. + } + + return true; +} + +/// FoldCondBranchOnPHI - If we have a conditional branch on a PHI node value +/// that is defined in the same block as the branch and if any PHI entries are +/// constants, thread edges corresponding to that entry to be branches to their +/// ultimate destination. +static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *TD) { + BasicBlock *BB = BI->getParent(); + PHINode *PN = dyn_cast<PHINode>(BI->getCondition()); + // NOTE: we currently cannot transform this case if the PHI node is used + // outside of the block. + if (!PN || PN->getParent() != BB || !PN->hasOneUse()) + return false; + + // Degenerate case of a single entry PHI. + if (PN->getNumIncomingValues() == 1) { + FoldSingleEntryPHINodes(PN->getParent()); + return true; + } + + // Now we know that this block has multiple preds and two succs. + if (!BlockIsSimpleEnoughToThreadThrough(BB)) return false; + + if (HasNoDuplicateCall(BB)) return false; + + // Okay, this is a simple enough basic block. See if any phi values are + // constants. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + ConstantInt *CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i)); + if (CB == 0 || !CB->getType()->isIntegerTy(1)) continue; + + // Okay, we now know that all edges from PredBB should be revectored to + // branch to RealDest. + BasicBlock *PredBB = PN->getIncomingBlock(i); + BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue()); + + if (RealDest == BB) continue; // Skip self loops. + // Skip if the predecessor's terminator is an indirect branch. + if (isa<IndirectBrInst>(PredBB->getTerminator())) continue; + + // The dest block might have PHI nodes, other predecessors and other + // difficult cases. Instead of being smart about this, just insert a new + // block that jumps to the destination block, effectively splitting + // the edge we are about to create. + BasicBlock *EdgeBB = BasicBlock::Create(BB->getContext(), + RealDest->getName()+".critedge", + RealDest->getParent(), RealDest); + BranchInst::Create(RealDest, EdgeBB); + + // Update PHI nodes. + AddPredecessorToBlock(RealDest, EdgeBB, BB); + + // BB may have instructions that are being threaded over. Clone these + // instructions into EdgeBB. We know that there will be no uses of the + // cloned instructions outside of EdgeBB. + BasicBlock::iterator InsertPt = EdgeBB->begin(); + DenseMap<Value*, Value*> TranslateMap; // Track translated values. + for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) { + if (PHINode *PN = dyn_cast<PHINode>(BBI)) { + TranslateMap[PN] = PN->getIncomingValueForBlock(PredBB); + continue; + } + // Clone the instruction. + Instruction *N = BBI->clone(); + if (BBI->hasName()) N->setName(BBI->getName()+".c"); + + // Update operands due to translation. + for (User::op_iterator i = N->op_begin(), e = N->op_end(); + i != e; ++i) { + DenseMap<Value*, Value*>::iterator PI = TranslateMap.find(*i); + if (PI != TranslateMap.end()) + *i = PI->second; + } + + // Check for trivial simplification. + if (Value *V = SimplifyInstruction(N, TD)) { + TranslateMap[BBI] = V; + delete N; // Instruction folded away, don't need actual inst + } else { + // Insert the new instruction into its new home. + EdgeBB->getInstList().insert(InsertPt, N); + if (!BBI->use_empty()) + TranslateMap[BBI] = N; + } + } + + // Loop over all of the edges from PredBB to BB, changing them to branch + // to EdgeBB instead. + TerminatorInst *PredBBTI = PredBB->getTerminator(); + for (unsigned i = 0, e = PredBBTI->getNumSuccessors(); i != e; ++i) + if (PredBBTI->getSuccessor(i) == BB) { + BB->removePredecessor(PredBB); + PredBBTI->setSuccessor(i, EdgeBB); + } + + // Recurse, simplifying any other constants. + return FoldCondBranchOnPHI(BI, TD) | true; + } + + return false; +} + +/// FoldTwoEntryPHINode - Given a BB that starts with the specified two-entry +/// PHI node, see if we can eliminate it. +static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *TD) { + // Ok, this is a two entry PHI node. Check to see if this is a simple "if + // statement", which has a very simple dominance structure. Basically, we + // are trying to find the condition that is being branched on, which + // subsequently causes this merge to happen. We really want control + // dependence information for this check, but simplifycfg can't keep it up + // to date, and this catches most of the cases we care about anyway. + BasicBlock *BB = PN->getParent(); + BasicBlock *IfTrue, *IfFalse; + Value *IfCond = GetIfCondition(BB, IfTrue, IfFalse); + if (!IfCond || + // Don't bother if the branch will be constant folded trivially. + isa<ConstantInt>(IfCond)) + return false; + + // Okay, we found that we can merge this two-entry phi node into a select. + // Doing so would require us to fold *all* two entry phi nodes in this block. + // At some point this becomes non-profitable (particularly if the target + // doesn't support cmov's). Only do this transformation if there are two or + // fewer PHI nodes in this block. + unsigned NumPhis = 0; + for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I) + if (NumPhis > 2) + return false; + + // Loop over the PHI's seeing if we can promote them all to select + // instructions. While we are at it, keep track of the instructions + // that need to be moved to the dominating block. + SmallPtrSet<Instruction*, 4> AggressiveInsts; + unsigned MaxCostVal0 = PHINodeFoldingThreshold, + MaxCostVal1 = PHINodeFoldingThreshold; + + for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) { + PHINode *PN = cast<PHINode>(II++); + if (Value *V = SimplifyInstruction(PN, TD)) { + PN->replaceAllUsesWith(V); + PN->eraseFromParent(); + continue; + } + + if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts, + MaxCostVal0) || + !DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts, + MaxCostVal1)) + return false; + } + + // If we folded the first phi, PN dangles at this point. Refresh it. If + // we ran out of PHIs then we simplified them all. + PN = dyn_cast<PHINode>(BB->begin()); + if (PN == 0) return true; + + // Don't fold i1 branches on PHIs which contain binary operators. These can + // often be turned into switches and other things. + if (PN->getType()->isIntegerTy(1) && + (isa<BinaryOperator>(PN->getIncomingValue(0)) || + isa<BinaryOperator>(PN->getIncomingValue(1)) || + isa<BinaryOperator>(IfCond))) + return false; + + // If we all PHI nodes are promotable, check to make sure that all + // instructions in the predecessor blocks can be promoted as well. If + // not, we won't be able to get rid of the control flow, so it's not + // worth promoting to select instructions. + BasicBlock *DomBlock = 0; + BasicBlock *IfBlock1 = PN->getIncomingBlock(0); + BasicBlock *IfBlock2 = PN->getIncomingBlock(1); + if (cast<BranchInst>(IfBlock1->getTerminator())->isConditional()) { + IfBlock1 = 0; + } else { + DomBlock = *pred_begin(IfBlock1); + for (BasicBlock::iterator I = IfBlock1->begin();!isa<TerminatorInst>(I);++I) + if (!AggressiveInsts.count(I) && !isa<DbgInfoIntrinsic>(I)) { + // This is not an aggressive instruction that we can promote. + // Because of this, we won't be able to get rid of the control + // flow, so the xform is not worth it. + return false; + } + } + + if (cast<BranchInst>(IfBlock2->getTerminator())->isConditional()) { + IfBlock2 = 0; + } else { + DomBlock = *pred_begin(IfBlock2); + for (BasicBlock::iterator I = IfBlock2->begin();!isa<TerminatorInst>(I);++I) + if (!AggressiveInsts.count(I) && !isa<DbgInfoIntrinsic>(I)) { + // This is not an aggressive instruction that we can promote. + // Because of this, we won't be able to get rid of the control + // flow, so the xform is not worth it. + return false; + } + } + + DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond << " T: " + << IfTrue->getName() << " F: " << IfFalse->getName() << "\n"); + + // If we can still promote the PHI nodes after this gauntlet of tests, + // do all of the PHI's now. + Instruction *InsertPt = DomBlock->getTerminator(); + IRBuilder<true, NoFolder> Builder(InsertPt); + + // Move all 'aggressive' instructions, which are defined in the + // conditional parts of the if's up to the dominating block. + if (IfBlock1) + DomBlock->getInstList().splice(InsertPt, + IfBlock1->getInstList(), IfBlock1->begin(), + IfBlock1->getTerminator()); + if (IfBlock2) + DomBlock->getInstList().splice(InsertPt, + IfBlock2->getInstList(), IfBlock2->begin(), + IfBlock2->getTerminator()); + + while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) { + // Change the PHI node into a select instruction. + Value *TrueVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse); + Value *FalseVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue); + + SelectInst *NV = + cast<SelectInst>(Builder.CreateSelect(IfCond, TrueVal, FalseVal, "")); + PN->replaceAllUsesWith(NV); + NV->takeName(PN); + PN->eraseFromParent(); + } + + // At this point, IfBlock1 and IfBlock2 are both empty, so our if statement + // has been flattened. Change DomBlock to jump directly to our new block to + // avoid other simplifycfg's kicking in on the diamond. + TerminatorInst *OldTI = DomBlock->getTerminator(); + Builder.SetInsertPoint(OldTI); + Builder.CreateBr(BB); + OldTI->eraseFromParent(); + return true; +} + +/// SimplifyCondBranchToTwoReturns - If we found a conditional branch that goes +/// to two returning blocks, try to merge them together into one return, +/// introducing a select if the return values disagree. +static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, + IRBuilder<> &Builder) { + assert(BI->isConditional() && "Must be a conditional branch"); + BasicBlock *TrueSucc = BI->getSuccessor(0); + BasicBlock *FalseSucc = BI->getSuccessor(1); + ReturnInst *TrueRet = cast<ReturnInst>(TrueSucc->getTerminator()); + ReturnInst *FalseRet = cast<ReturnInst>(FalseSucc->getTerminator()); + + // Check to ensure both blocks are empty (just a return) or optionally empty + // with PHI nodes. If there are other instructions, merging would cause extra + // computation on one path or the other. + if (!TrueSucc->getFirstNonPHIOrDbg()->isTerminator()) + return false; + if (!FalseSucc->getFirstNonPHIOrDbg()->isTerminator()) + return false; + + Builder.SetInsertPoint(BI); + // Okay, we found a branch that is going to two return nodes. If + // there is no return value for this function, just change the + // branch into a return. + if (FalseRet->getNumOperands() == 0) { + TrueSucc->removePredecessor(BI->getParent()); + FalseSucc->removePredecessor(BI->getParent()); + Builder.CreateRetVoid(); + EraseTerminatorInstAndDCECond(BI); + return true; + } + + // Otherwise, figure out what the true and false return values are + // so we can insert a new select instruction. + Value *TrueValue = TrueRet->getReturnValue(); + Value *FalseValue = FalseRet->getReturnValue(); + + // Unwrap any PHI nodes in the return blocks. + if (PHINode *TVPN = dyn_cast_or_null<PHINode>(TrueValue)) + if (TVPN->getParent() == TrueSucc) + TrueValue = TVPN->getIncomingValueForBlock(BI->getParent()); + if (PHINode *FVPN = dyn_cast_or_null<PHINode>(FalseValue)) + if (FVPN->getParent() == FalseSucc) + FalseValue = FVPN->getIncomingValueForBlock(BI->getParent()); + + // In order for this transformation to be safe, we must be able to + // unconditionally execute both operands to the return. This is + // normally the case, but we could have a potentially-trapping + // constant expression that prevents this transformation from being + // safe. + if (ConstantExpr *TCV = dyn_cast_or_null<ConstantExpr>(TrueValue)) + if (TCV->canTrap()) + return false; + if (ConstantExpr *FCV = dyn_cast_or_null<ConstantExpr>(FalseValue)) + if (FCV->canTrap()) + return false; + + // Okay, we collected all the mapped values and checked them for sanity, and + // defined to really do this transformation. First, update the CFG. + TrueSucc->removePredecessor(BI->getParent()); + FalseSucc->removePredecessor(BI->getParent()); + + // Insert select instructions where needed. + Value *BrCond = BI->getCondition(); + if (TrueValue) { + // Insert a select if the results differ. + if (TrueValue == FalseValue || isa<UndefValue>(FalseValue)) { + } else if (isa<UndefValue>(TrueValue)) { + TrueValue = FalseValue; + } else { + TrueValue = Builder.CreateSelect(BrCond, TrueValue, + FalseValue, "retval"); + } + } + + Value *RI = !TrueValue ? + Builder.CreateRetVoid() : Builder.CreateRet(TrueValue); + + (void) RI; + + DEBUG(dbgs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:" + << "\n " << *BI << "NewRet = " << *RI + << "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: "<< *FalseSucc); + + EraseTerminatorInstAndDCECond(BI); + + return true; +} + +/// ExtractBranchMetadata - Given a conditional BranchInstruction, retrieve the +/// probabilities of the branch taking each edge. Fills in the two APInt +/// parameters and return true, or returns false if no or invalid metadata was +/// found. +static bool ExtractBranchMetadata(BranchInst *BI, + uint64_t &ProbTrue, uint64_t &ProbFalse) { + assert(BI->isConditional() && + "Looking for probabilities on unconditional branch?"); + MDNode *ProfileData = BI->getMetadata(LLVMContext::MD_prof); + if (!ProfileData || ProfileData->getNumOperands() != 3) return false; + ConstantInt *CITrue = dyn_cast<ConstantInt>(ProfileData->getOperand(1)); + ConstantInt *CIFalse = dyn_cast<ConstantInt>(ProfileData->getOperand(2)); + if (!CITrue || !CIFalse) return false; + ProbTrue = CITrue->getValue().getZExtValue(); + ProbFalse = CIFalse->getValue().getZExtValue(); + return true; +} + +/// checkCSEInPredecessor - Return true if the given instruction is available +/// in its predecessor block. If yes, the instruction will be removed. +/// +static bool checkCSEInPredecessor(Instruction *Inst, BasicBlock *PB) { + if (!isa<BinaryOperator>(Inst) && !isa<CmpInst>(Inst)) + return false; + for (BasicBlock::iterator I = PB->begin(), E = PB->end(); I != E; I++) { + Instruction *PBI = &*I; + // Check whether Inst and PBI generate the same value. + if (Inst->isIdenticalTo(PBI)) { + Inst->replaceAllUsesWith(PBI); + Inst->eraseFromParent(); + return true; + } + } + return false; +} + +/// FoldBranchToCommonDest - If this basic block is simple enough, and if a +/// predecessor branches to us and one of our successors, fold the block into +/// the predecessor and use logical operations to pick the right destination. +bool llvm::FoldBranchToCommonDest(BranchInst *BI) { + BasicBlock *BB = BI->getParent(); + + Instruction *Cond = 0; + if (BI->isConditional()) + Cond = dyn_cast<Instruction>(BI->getCondition()); + else { + // For unconditional branch, check for a simple CFG pattern, where + // BB has a single predecessor and BB's successor is also its predecessor's + // successor. If such pattern exisits, check for CSE between BB and its + // predecessor. + if (BasicBlock *PB = BB->getSinglePredecessor()) + if (BranchInst *PBI = dyn_cast<BranchInst>(PB->getTerminator())) + if (PBI->isConditional() && + (BI->getSuccessor(0) == PBI->getSuccessor(0) || + BI->getSuccessor(0) == PBI->getSuccessor(1))) { + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); + I != E; ) { + Instruction *Curr = I++; + if (isa<CmpInst>(Curr)) { + Cond = Curr; + break; + } + // Quit if we can't remove this instruction. + if (!checkCSEInPredecessor(Curr, PB)) + return false; + } + } + + if (Cond == 0) + return false; + } + + if (Cond == 0 || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) || + Cond->getParent() != BB || !Cond->hasOneUse()) + return false; + + // Only allow this if the condition is a simple instruction that can be + // executed unconditionally. It must be in the same block as the branch, and + // must be at the front of the block. + BasicBlock::iterator FrontIt = BB->front(); + + // Ignore dbg intrinsics. + while (isa<DbgInfoIntrinsic>(FrontIt)) ++FrontIt; + + // Allow a single instruction to be hoisted in addition to the compare + // that feeds the branch. We later ensure that any values that _it_ uses + // were also live in the predecessor, so that we don't unnecessarily create + // register pressure or inhibit out-of-order execution. + Instruction *BonusInst = 0; + if (&*FrontIt != Cond && + FrontIt->hasOneUse() && *FrontIt->use_begin() == Cond && + isSafeToSpeculativelyExecute(FrontIt)) { + BonusInst = &*FrontIt; + ++FrontIt; + + // Ignore dbg intrinsics. + while (isa<DbgInfoIntrinsic>(FrontIt)) ++FrontIt; + } + + // Only a single bonus inst is allowed. + if (&*FrontIt != Cond) + return false; + + // Make sure the instruction after the condition is the cond branch. + BasicBlock::iterator CondIt = Cond; ++CondIt; + + // Ingore dbg intrinsics. + while (isa<DbgInfoIntrinsic>(CondIt)) ++CondIt; + + if (&*CondIt != BI) + return false; + + // Cond is known to be a compare or binary operator. Check to make sure that + // neither operand is a potentially-trapping constant expression. + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(0))) + if (CE->canTrap()) + return false; + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(1))) + if (CE->canTrap()) + return false; + + // Finally, don't infinitely unroll conditional loops. + BasicBlock *TrueDest = BI->getSuccessor(0); + BasicBlock *FalseDest = (BI->isConditional()) ? BI->getSuccessor(1) : 0; + if (TrueDest == BB || FalseDest == BB) + return false; + + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + BasicBlock *PredBlock = *PI; + BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator()); + + // Check that we have two conditional branches. If there is a PHI node in + // the common successor, verify that the same value flows in from both + // blocks. + SmallVector<PHINode*, 4> PHIs; + if (PBI == 0 || PBI->isUnconditional() || + (BI->isConditional() && + !SafeToMergeTerminators(BI, PBI)) || + (!BI->isConditional() && + !isProfitableToFoldUnconditional(BI, PBI, Cond, PHIs))) + continue; + + // Determine if the two branches share a common destination. + Instruction::BinaryOps Opc = Instruction::BinaryOpsEnd; + bool InvertPredCond = false; + + if (BI->isConditional()) { + if (PBI->getSuccessor(0) == TrueDest) + Opc = Instruction::Or; + else if (PBI->getSuccessor(1) == FalseDest) + Opc = Instruction::And; + else if (PBI->getSuccessor(0) == FalseDest) + Opc = Instruction::And, InvertPredCond = true; + else if (PBI->getSuccessor(1) == TrueDest) + Opc = Instruction::Or, InvertPredCond = true; + else + continue; + } else { + if (PBI->getSuccessor(0) != TrueDest && PBI->getSuccessor(1) != TrueDest) + continue; + } + + // Ensure that any values used in the bonus instruction are also used + // by the terminator of the predecessor. This means that those values + // must already have been resolved, so we won't be inhibiting the + // out-of-order core by speculating them earlier. We also allow + // instructions that are used by the terminator's condition because it + // exposes more merging opportunities. + bool UsedByBranch = (BonusInst && BonusInst->hasOneUse() && + *BonusInst->use_begin() == Cond); + + if (BonusInst && !UsedByBranch) { + // Collect the values used by the bonus inst + SmallPtrSet<Value*, 4> UsedValues; + for (Instruction::op_iterator OI = BonusInst->op_begin(), + OE = BonusInst->op_end(); OI != OE; ++OI) { + Value *V = *OI; + if (!isa<Constant>(V) && !isa<Argument>(V)) + UsedValues.insert(V); + } + + SmallVector<std::pair<Value*, unsigned>, 4> Worklist; + Worklist.push_back(std::make_pair(PBI->getOperand(0), 0)); + + // Walk up to four levels back up the use-def chain of the predecessor's + // terminator to see if all those values were used. The choice of four + // levels is arbitrary, to provide a compile-time-cost bound. + while (!Worklist.empty()) { + std::pair<Value*, unsigned> Pair = Worklist.back(); + Worklist.pop_back(); + + if (Pair.second >= 4) continue; + UsedValues.erase(Pair.first); + if (UsedValues.empty()) break; + + if (Instruction *I = dyn_cast<Instruction>(Pair.first)) { + for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end(); + OI != OE; ++OI) + Worklist.push_back(std::make_pair(OI->get(), Pair.second+1)); + } + } + + if (!UsedValues.empty()) return false; + } + + DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB); + IRBuilder<> Builder(PBI); + + // If we need to invert the condition in the pred block to match, do so now. + if (InvertPredCond) { + Value *NewCond = PBI->getCondition(); + + if (NewCond->hasOneUse() && isa<CmpInst>(NewCond)) { + CmpInst *CI = cast<CmpInst>(NewCond); + CI->setPredicate(CI->getInversePredicate()); + } else { + NewCond = Builder.CreateNot(NewCond, + PBI->getCondition()->getName()+".not"); + } + + PBI->setCondition(NewCond); + PBI->swapSuccessors(); + } + + // If we have a bonus inst, clone it into the predecessor block. + Instruction *NewBonus = 0; + if (BonusInst) { + NewBonus = BonusInst->clone(); + PredBlock->getInstList().insert(PBI, NewBonus); + NewBonus->takeName(BonusInst); + BonusInst->setName(BonusInst->getName()+".old"); + } + + // Clone Cond into the predecessor basic block, and or/and the + // two conditions together. + Instruction *New = Cond->clone(); + if (BonusInst) New->replaceUsesOfWith(BonusInst, NewBonus); + PredBlock->getInstList().insert(PBI, New); + New->takeName(Cond); + Cond->setName(New->getName()+".old"); + + if (BI->isConditional()) { + Instruction *NewCond = + cast<Instruction>(Builder.CreateBinOp(Opc, PBI->getCondition(), + New, "or.cond")); + PBI->setCondition(NewCond); + + uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight; + bool PredHasWeights = ExtractBranchMetadata(PBI, PredTrueWeight, + PredFalseWeight); + bool SuccHasWeights = ExtractBranchMetadata(BI, SuccTrueWeight, + SuccFalseWeight); + SmallVector<uint64_t, 8> NewWeights; + + if (PBI->getSuccessor(0) == BB) { + if (PredHasWeights && SuccHasWeights) { + // PBI: br i1 %x, BB, FalseDest + // BI: br i1 %y, TrueDest, FalseDest + //TrueWeight is TrueWeight for PBI * TrueWeight for BI. + NewWeights.push_back(PredTrueWeight * SuccTrueWeight); + //FalseWeight is FalseWeight for PBI * TotalWeight for BI + + // TrueWeight for PBI * FalseWeight for BI. + // We assume that total weights of a BranchInst can fit into 32 bits. + // Therefore, we will not have overflow using 64-bit arithmetic. + NewWeights.push_back(PredFalseWeight * (SuccFalseWeight + + SuccTrueWeight) + PredTrueWeight * SuccFalseWeight); + } + AddPredecessorToBlock(TrueDest, PredBlock, BB); + PBI->setSuccessor(0, TrueDest); + } + if (PBI->getSuccessor(1) == BB) { + if (PredHasWeights && SuccHasWeights) { + // PBI: br i1 %x, TrueDest, BB + // BI: br i1 %y, TrueDest, FalseDest + //TrueWeight is TrueWeight for PBI * TotalWeight for BI + + // FalseWeight for PBI * TrueWeight for BI. + NewWeights.push_back(PredTrueWeight * (SuccFalseWeight + + SuccTrueWeight) + PredFalseWeight * SuccTrueWeight); + //FalseWeight is FalseWeight for PBI * FalseWeight for BI. + NewWeights.push_back(PredFalseWeight * SuccFalseWeight); + } + AddPredecessorToBlock(FalseDest, PredBlock, BB); + PBI->setSuccessor(1, FalseDest); + } + if (NewWeights.size() == 2) { + // Halve the weights if any of them cannot fit in an uint32_t + FitWeights(NewWeights); + + SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(),NewWeights.end()); + PBI->setMetadata(LLVMContext::MD_prof, + MDBuilder(BI->getContext()). + createBranchWeights(MDWeights)); + } else + PBI->setMetadata(LLVMContext::MD_prof, NULL); + } else { + // Update PHI nodes in the common successors. + for (unsigned i = 0, e = PHIs.size(); i != e; ++i) { + ConstantInt *PBI_C = cast<ConstantInt>( + PHIs[i]->getIncomingValueForBlock(PBI->getParent())); + assert(PBI_C->getType()->isIntegerTy(1)); + Instruction *MergedCond = 0; + if (PBI->getSuccessor(0) == TrueDest) { + // Create (PBI_Cond and PBI_C) or (!PBI_Cond and BI_Value) + // PBI_C is true: PBI_Cond or (!PBI_Cond and BI_Value) + // is false: !PBI_Cond and BI_Value + Instruction *NotCond = + cast<Instruction>(Builder.CreateNot(PBI->getCondition(), + "not.cond")); + MergedCond = + cast<Instruction>(Builder.CreateBinOp(Instruction::And, + NotCond, New, + "and.cond")); + if (PBI_C->isOne()) + MergedCond = + cast<Instruction>(Builder.CreateBinOp(Instruction::Or, + PBI->getCondition(), MergedCond, + "or.cond")); + } else { + // Create (PBI_Cond and BI_Value) or (!PBI_Cond and PBI_C) + // PBI_C is true: (PBI_Cond and BI_Value) or (!PBI_Cond) + // is false: PBI_Cond and BI_Value + MergedCond = + cast<Instruction>(Builder.CreateBinOp(Instruction::And, + PBI->getCondition(), New, + "and.cond")); + if (PBI_C->isOne()) { + Instruction *NotCond = + cast<Instruction>(Builder.CreateNot(PBI->getCondition(), + "not.cond")); + MergedCond = + cast<Instruction>(Builder.CreateBinOp(Instruction::Or, + NotCond, MergedCond, + "or.cond")); + } + } + // Update PHI Node. + PHIs[i]->setIncomingValue(PHIs[i]->getBasicBlockIndex(PBI->getParent()), + MergedCond); + } + // Change PBI from Conditional to Unconditional. + BranchInst *New_PBI = BranchInst::Create(TrueDest, PBI); + EraseTerminatorInstAndDCECond(PBI); + PBI = New_PBI; + } + + // TODO: If BB is reachable from all paths through PredBlock, then we + // could replace PBI's branch probabilities with BI's. + + // Copy any debug value intrinsics into the end of PredBlock. + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (isa<DbgInfoIntrinsic>(*I)) + I->clone()->insertBefore(PBI); + + return true; + } + return false; +} + +/// SimplifyCondBranchToCondBranch - If we have a conditional branch as a +/// predecessor of another block, this function tries to simplify it. We know +/// that PBI and BI are both conditional branches, and BI is in one of the +/// successor blocks of PBI - PBI branches to BI. +static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { + assert(PBI->isConditional() && BI->isConditional()); + BasicBlock *BB = BI->getParent(); + + // If this block ends with a branch instruction, and if there is a + // predecessor that ends on a branch of the same condition, make + // this conditional branch redundant. + if (PBI->getCondition() == BI->getCondition() && + PBI->getSuccessor(0) != PBI->getSuccessor(1)) { + // Okay, the outcome of this conditional branch is statically + // knowable. If this block had a single pred, handle specially. + if (BB->getSinglePredecessor()) { + // Turn this into a branch on constant. + bool CondIsTrue = PBI->getSuccessor(0) == BB; + BI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()), + CondIsTrue)); + return true; // Nuke the branch on constant. + } + + // Otherwise, if there are multiple predecessors, insert a PHI that merges + // in the constant and simplify the block result. Subsequent passes of + // simplifycfg will thread the block. + if (BlockIsSimpleEnoughToThreadThrough(BB)) { + pred_iterator PB = pred_begin(BB), PE = pred_end(BB); + PHINode *NewPN = PHINode::Create(Type::getInt1Ty(BB->getContext()), + std::distance(PB, PE), + BI->getCondition()->getName() + ".pr", + BB->begin()); + // Okay, we're going to insert the PHI node. Since PBI is not the only + // predecessor, compute the PHI'd conditional value for all of the preds. + // Any predecessor where the condition is not computable we keep symbolic. + for (pred_iterator PI = PB; PI != PE; ++PI) { + BasicBlock *P = *PI; + if ((PBI = dyn_cast<BranchInst>(P->getTerminator())) && + PBI != BI && PBI->isConditional() && + PBI->getCondition() == BI->getCondition() && + PBI->getSuccessor(0) != PBI->getSuccessor(1)) { + bool CondIsTrue = PBI->getSuccessor(0) == BB; + NewPN->addIncoming(ConstantInt::get(Type::getInt1Ty(BB->getContext()), + CondIsTrue), P); + } else { + NewPN->addIncoming(BI->getCondition(), P); + } + } + + BI->setCondition(NewPN); + return true; + } + } + + // If this is a conditional branch in an empty block, and if any + // predecessors is a conditional branch to one of our destinations, + // fold the conditions into logical ops and one cond br. + BasicBlock::iterator BBI = BB->begin(); + // Ignore dbg intrinsics. + while (isa<DbgInfoIntrinsic>(BBI)) + ++BBI; + if (&*BBI != BI) + return false; + + + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BI->getCondition())) + if (CE->canTrap()) + return false; + + int PBIOp, BIOp; + if (PBI->getSuccessor(0) == BI->getSuccessor(0)) + PBIOp = BIOp = 0; + else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) + PBIOp = 0, BIOp = 1; + else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) + PBIOp = 1, BIOp = 0; + else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) + PBIOp = BIOp = 1; + else + return false; + + // Check to make sure that the other destination of this branch + // isn't BB itself. If so, this is an infinite loop that will + // keep getting unwound. + if (PBI->getSuccessor(PBIOp) == BB) + return false; + + // Do not perform this transformation if it would require + // insertion of a large number of select instructions. For targets + // without predication/cmovs, this is a big pessimization. + BasicBlock *CommonDest = PBI->getSuccessor(PBIOp); + + unsigned NumPhis = 0; + for (BasicBlock::iterator II = CommonDest->begin(); + isa<PHINode>(II); ++II, ++NumPhis) + if (NumPhis > 2) // Disable this xform. + return false; + + // Finally, if everything is ok, fold the branches to logical ops. + BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1); + + DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent() + << "AND: " << *BI->getParent()); + + + // If OtherDest *is* BB, then BB is a basic block with a single conditional + // branch in it, where one edge (OtherDest) goes back to itself but the other + // exits. We don't *know* that the program avoids the infinite loop + // (even though that seems likely). If we do this xform naively, we'll end up + // recursively unpeeling the loop. Since we know that (after the xform is + // done) that the block *is* infinite if reached, we just make it an obviously + // infinite loop with no cond branch. + if (OtherDest == BB) { + // Insert it at the end of the function, because it's either code, + // or it won't matter if it's hot. :) + BasicBlock *InfLoopBlock = BasicBlock::Create(BB->getContext(), + "infloop", BB->getParent()); + BranchInst::Create(InfLoopBlock, InfLoopBlock); + OtherDest = InfLoopBlock; + } + + DEBUG(dbgs() << *PBI->getParent()->getParent()); + + // BI may have other predecessors. Because of this, we leave + // it alone, but modify PBI. + + // Make sure we get to CommonDest on True&True directions. + Value *PBICond = PBI->getCondition(); + IRBuilder<true, NoFolder> Builder(PBI); + if (PBIOp) + PBICond = Builder.CreateNot(PBICond, PBICond->getName()+".not"); + + Value *BICond = BI->getCondition(); + if (BIOp) + BICond = Builder.CreateNot(BICond, BICond->getName()+".not"); + + // Merge the conditions. + Value *Cond = Builder.CreateOr(PBICond, BICond, "brmerge"); + + // Modify PBI to branch on the new condition to the new dests. + PBI->setCondition(Cond); + PBI->setSuccessor(0, CommonDest); + PBI->setSuccessor(1, OtherDest); + + // Update branch weight for PBI. + uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight; + bool PredHasWeights = ExtractBranchMetadata(PBI, PredTrueWeight, + PredFalseWeight); + bool SuccHasWeights = ExtractBranchMetadata(BI, SuccTrueWeight, + SuccFalseWeight); + if (PredHasWeights && SuccHasWeights) { + uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight; + uint64_t PredOther = PBIOp ?PredTrueWeight : PredFalseWeight; + uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight; + uint64_t SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight; + // The weight to CommonDest should be PredCommon * SuccTotal + + // PredOther * SuccCommon. + // The weight to OtherDest should be PredOther * SuccOther. + SmallVector<uint64_t, 2> NewWeights; + NewWeights.push_back(PredCommon * (SuccCommon + SuccOther) + + PredOther * SuccCommon); + NewWeights.push_back(PredOther * SuccOther); + // Halve the weights if any of them cannot fit in an uint32_t + FitWeights(NewWeights); + + SmallVector<uint32_t, 2> MDWeights(NewWeights.begin(),NewWeights.end()); + PBI->setMetadata(LLVMContext::MD_prof, + MDBuilder(BI->getContext()). + createBranchWeights(MDWeights)); + } + + // OtherDest may have phi nodes. If so, add an entry from PBI's + // block that are identical to the entries for BI's block. + AddPredecessorToBlock(OtherDest, PBI->getParent(), BB); + + // We know that the CommonDest already had an edge from PBI to + // it. If it has PHIs though, the PHIs may have different + // entries for BB and PBI's BB. If so, insert a select to make + // them agree. + PHINode *PN; + for (BasicBlock::iterator II = CommonDest->begin(); + (PN = dyn_cast<PHINode>(II)); ++II) { + Value *BIV = PN->getIncomingValueForBlock(BB); + unsigned PBBIdx = PN->getBasicBlockIndex(PBI->getParent()); + Value *PBIV = PN->getIncomingValue(PBBIdx); + if (BIV != PBIV) { + // Insert a select in PBI to pick the right value. + Value *NV = cast<SelectInst> + (Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName()+".mux")); + PN->setIncomingValue(PBBIdx, NV); + } + } + + DEBUG(dbgs() << "INTO: " << *PBI->getParent()); + DEBUG(dbgs() << *PBI->getParent()->getParent()); + + // This basic block is probably dead. We know it has at least + // one fewer predecessor. + return true; +} + +// SimplifyTerminatorOnSelect - Simplifies a terminator by replacing it with a +// branch to TrueBB if Cond is true or to FalseBB if Cond is false. +// Takes care of updating the successors and removing the old terminator. +// Also makes sure not to introduce new successors by assuming that edges to +// non-successor TrueBBs and FalseBBs aren't reachable. +static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond, + BasicBlock *TrueBB, BasicBlock *FalseBB, + uint32_t TrueWeight, + uint32_t FalseWeight){ + // Remove any superfluous successor edges from the CFG. + // First, figure out which successors to preserve. + // If TrueBB and FalseBB are equal, only try to preserve one copy of that + // successor. + BasicBlock *KeepEdge1 = TrueBB; + BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : 0; + + // Then remove the rest. + for (unsigned I = 0, E = OldTerm->getNumSuccessors(); I != E; ++I) { + BasicBlock *Succ = OldTerm->getSuccessor(I); + // Make sure only to keep exactly one copy of each edge. + if (Succ == KeepEdge1) + KeepEdge1 = 0; + else if (Succ == KeepEdge2) + KeepEdge2 = 0; + else + Succ->removePredecessor(OldTerm->getParent()); + } + + IRBuilder<> Builder(OldTerm); + Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc()); + + // Insert an appropriate new terminator. + if ((KeepEdge1 == 0) && (KeepEdge2 == 0)) { + if (TrueBB == FalseBB) + // We were only looking for one successor, and it was present. + // Create an unconditional branch to it. + Builder.CreateBr(TrueBB); + else { + // We found both of the successors we were looking for. + // Create a conditional branch sharing the condition of the select. + BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB); + if (TrueWeight != FalseWeight) + NewBI->setMetadata(LLVMContext::MD_prof, + MDBuilder(OldTerm->getContext()). + createBranchWeights(TrueWeight, FalseWeight)); + } + } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) { + // Neither of the selected blocks were successors, so this + // terminator must be unreachable. + new UnreachableInst(OldTerm->getContext(), OldTerm); + } else { + // One of the selected values was a successor, but the other wasn't. + // Insert an unconditional branch to the one that was found; + // the edge to the one that wasn't must be unreachable. + if (KeepEdge1 == 0) + // Only TrueBB was found. + Builder.CreateBr(TrueBB); + else + // Only FalseBB was found. + Builder.CreateBr(FalseBB); + } + + EraseTerminatorInstAndDCECond(OldTerm); + return true; +} + +// SimplifySwitchOnSelect - Replaces +// (switch (select cond, X, Y)) on constant X, Y +// with a branch - conditional if X and Y lead to distinct BBs, +// unconditional otherwise. +static bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) { + // Check for constant integer values in the select. + ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue()); + ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue()); + if (!TrueVal || !FalseVal) + return false; + + // Find the relevant condition and destinations. + Value *Condition = Select->getCondition(); + BasicBlock *TrueBB = SI->findCaseValue(TrueVal).getCaseSuccessor(); + BasicBlock *FalseBB = SI->findCaseValue(FalseVal).getCaseSuccessor(); + + // Get weight for TrueBB and FalseBB. + uint32_t TrueWeight = 0, FalseWeight = 0; + SmallVector<uint64_t, 8> Weights; + bool HasWeights = HasBranchWeights(SI); + if (HasWeights) { + GetBranchWeights(SI, Weights); + if (Weights.size() == 1 + SI->getNumCases()) { + TrueWeight = (uint32_t)Weights[SI->findCaseValue(TrueVal). + getSuccessorIndex()]; + FalseWeight = (uint32_t)Weights[SI->findCaseValue(FalseVal). + getSuccessorIndex()]; + } + } + + // Perform the actual simplification. + return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, + TrueWeight, FalseWeight); +} + +// SimplifyIndirectBrOnSelect - Replaces +// (indirectbr (select cond, blockaddress(@fn, BlockA), +// blockaddress(@fn, BlockB))) +// with +// (br cond, BlockA, BlockB). +static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) { + // Check that both operands of the select are block addresses. + BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue()); + BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue()); + if (!TBA || !FBA) + return false; + + // Extract the actual blocks. + BasicBlock *TrueBB = TBA->getBasicBlock(); + BasicBlock *FalseBB = FBA->getBasicBlock(); + + // Perform the actual simplification. + return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB, + 0, 0); +} + +/// TryToSimplifyUncondBranchWithICmpInIt - This is called when we find an icmp +/// instruction (a seteq/setne with a constant) as the only instruction in a +/// block that ends with an uncond branch. We are looking for a very specific +/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In +/// this case, we merge the first two "or's of icmp" into a switch, but then the +/// default value goes to an uncond block with a seteq in it, we get something +/// like: +/// +/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ] +/// DEFAULT: +/// %tmp = icmp eq i8 %A, 92 +/// br label %end +/// end: +/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ] +/// +/// We prefer to split the edge to 'end' so that there is a true/false entry to +/// the PHI, merging the third icmp into the switch. +static bool TryToSimplifyUncondBranchWithICmpInIt( + ICmpInst *ICI, IRBuilder<> &Builder, const TargetTransformInfo &TTI, + const DataLayout *TD) { + BasicBlock *BB = ICI->getParent(); + + // If the block has any PHIs in it or the icmp has multiple uses, it is too + // complex. + if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse()) return false; + + Value *V = ICI->getOperand(0); + ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1)); + + // The pattern we're looking for is where our only predecessor is a switch on + // 'V' and this block is the default case for the switch. In this case we can + // fold the compared value into the switch to simplify things. + BasicBlock *Pred = BB->getSinglePredecessor(); + if (Pred == 0 || !isa<SwitchInst>(Pred->getTerminator())) return false; + + SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator()); + if (SI->getCondition() != V) + return false; + + // If BB is reachable on a non-default case, then we simply know the value of + // V in this block. Substitute it and constant fold the icmp instruction + // away. + if (SI->getDefaultDest() != BB) { + ConstantInt *VVal = SI->findCaseDest(BB); + assert(VVal && "Should have a unique destination value"); + ICI->setOperand(0, VVal); + + if (Value *V = SimplifyInstruction(ICI, TD)) { + ICI->replaceAllUsesWith(V); + ICI->eraseFromParent(); + } + // BB is now empty, so it is likely to simplify away. + return SimplifyCFG(BB, TTI, TD) | true; + } + + // Ok, the block is reachable from the default dest. If the constant we're + // comparing exists in one of the other edges, then we can constant fold ICI + // and zap it. + if (SI->findCaseValue(Cst) != SI->case_default()) { + Value *V; + if (ICI->getPredicate() == ICmpInst::ICMP_EQ) + V = ConstantInt::getFalse(BB->getContext()); + else + V = ConstantInt::getTrue(BB->getContext()); + + ICI->replaceAllUsesWith(V); + ICI->eraseFromParent(); + // BB is now empty, so it is likely to simplify away. + return SimplifyCFG(BB, TTI, TD) | true; + } + + // The use of the icmp has to be in the 'end' block, by the only PHI node in + // the block. + BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0); + PHINode *PHIUse = dyn_cast<PHINode>(ICI->use_back()); + if (PHIUse == 0 || PHIUse != &SuccBlock->front() || + isa<PHINode>(++BasicBlock::iterator(PHIUse))) + return false; + + // If the icmp is a SETEQ, then the default dest gets false, the new edge gets + // true in the PHI. + Constant *DefaultCst = ConstantInt::getTrue(BB->getContext()); + Constant *NewCst = ConstantInt::getFalse(BB->getContext()); + + if (ICI->getPredicate() == ICmpInst::ICMP_EQ) + std::swap(DefaultCst, NewCst); + + // Replace ICI (which is used by the PHI for the default value) with true or + // false depending on if it is EQ or NE. + ICI->replaceAllUsesWith(DefaultCst); + ICI->eraseFromParent(); + + // Okay, the switch goes to this block on a default value. Add an edge from + // the switch to the merge point on the compared value. + BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "switch.edge", + BB->getParent(), BB); + SmallVector<uint64_t, 8> Weights; + bool HasWeights = HasBranchWeights(SI); + if (HasWeights) { + GetBranchWeights(SI, Weights); + if (Weights.size() == 1 + SI->getNumCases()) { + // Split weight for default case to case for "Cst". + Weights[0] = (Weights[0]+1) >> 1; + Weights.push_back(Weights[0]); + + SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end()); + SI->setMetadata(LLVMContext::MD_prof, + MDBuilder(SI->getContext()). + createBranchWeights(MDWeights)); + } + } + SI->addCase(Cst, NewBB); + + // NewBB branches to the phi block, add the uncond branch and the phi entry. + Builder.SetInsertPoint(NewBB); + Builder.SetCurrentDebugLocation(SI->getDebugLoc()); + Builder.CreateBr(SuccBlock); + PHIUse->addIncoming(NewCst, NewBB); + return true; +} + +/// SimplifyBranchOnICmpChain - The specified branch is a conditional branch. +/// Check to see if it is branching on an or/and chain of icmp instructions, and +/// fold it into a switch instruction if so. +static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *TD, + IRBuilder<> &Builder) { + Instruction *Cond = dyn_cast<Instruction>(BI->getCondition()); + if (Cond == 0) return false; + + + // Change br (X == 0 | X == 1), T, F into a switch instruction. + // If this is a bunch of seteq's or'd together, or if it's a bunch of + // 'setne's and'ed together, collect them. + Value *CompVal = 0; + std::vector<ConstantInt*> Values; + bool TrueWhenEqual = true; + Value *ExtraCase = 0; + unsigned UsedICmps = 0; + + if (Cond->getOpcode() == Instruction::Or) { + CompVal = GatherConstantCompares(Cond, Values, ExtraCase, TD, true, + UsedICmps); + } else if (Cond->getOpcode() == Instruction::And) { + CompVal = GatherConstantCompares(Cond, Values, ExtraCase, TD, false, + UsedICmps); + TrueWhenEqual = false; + } + + // If we didn't have a multiply compared value, fail. + if (CompVal == 0) return false; + + // Avoid turning single icmps into a switch. + if (UsedICmps <= 1) + return false; + + // There might be duplicate constants in the list, which the switch + // instruction can't handle, remove them now. + array_pod_sort(Values.begin(), Values.end(), ConstantIntSortPredicate); + Values.erase(std::unique(Values.begin(), Values.end()), Values.end()); + + // If Extra was used, we require at least two switch values to do the + // transformation. A switch with one value is just an cond branch. + if (ExtraCase && Values.size() < 2) return false; + + // TODO: Preserve branch weight metadata, similarly to how + // FoldValueComparisonIntoPredecessors preserves it. + + // Figure out which block is which destination. + BasicBlock *DefaultBB = BI->getSuccessor(1); + BasicBlock *EdgeBB = BI->getSuccessor(0); + if (!TrueWhenEqual) std::swap(DefaultBB, EdgeBB); + + BasicBlock *BB = BI->getParent(); + + DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size() + << " cases into SWITCH. BB is:\n" << *BB); + + // If there are any extra values that couldn't be folded into the switch + // then we evaluate them with an explicit branch first. Split the block + // right before the condbr to handle it. + if (ExtraCase) { + BasicBlock *NewBB = BB->splitBasicBlock(BI, "switch.early.test"); + // Remove the uncond branch added to the old block. + TerminatorInst *OldTI = BB->getTerminator(); + Builder.SetInsertPoint(OldTI); + + if (TrueWhenEqual) + Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB); + else + Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB); + + OldTI->eraseFromParent(); + + // If there are PHI nodes in EdgeBB, then we need to add a new entry to them + // for the edge we just added. + AddPredecessorToBlock(EdgeBB, BB, NewBB); + + DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase + << "\nEXTRABB = " << *BB); + BB = NewBB; + } + + Builder.SetInsertPoint(BI); + // Convert pointer to int before we switch. + if (CompVal->getType()->isPointerTy()) { + assert(TD && "Cannot switch on pointer without DataLayout"); + CompVal = Builder.CreatePtrToInt(CompVal, + TD->getIntPtrType(CompVal->getType()), + "magicptr"); + } + + // Create the new switch instruction now. + SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size()); + + // Add all of the 'cases' to the switch instruction. + for (unsigned i = 0, e = Values.size(); i != e; ++i) + New->addCase(Values[i], EdgeBB); + + // We added edges from PI to the EdgeBB. As such, if there were any + // PHI nodes in EdgeBB, they need entries to be added corresponding to + // the number of edges added. + for (BasicBlock::iterator BBI = EdgeBB->begin(); + isa<PHINode>(BBI); ++BBI) { + PHINode *PN = cast<PHINode>(BBI); + Value *InVal = PN->getIncomingValueForBlock(BB); + for (unsigned i = 0, e = Values.size()-1; i != e; ++i) + PN->addIncoming(InVal, BB); + } + + // Erase the old branch instruction. + EraseTerminatorInstAndDCECond(BI); + + DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n'); + return true; +} + +bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) { + // If this is a trivial landing pad that just continues unwinding the caught + // exception then zap the landing pad, turning its invokes into calls. + BasicBlock *BB = RI->getParent(); + LandingPadInst *LPInst = dyn_cast<LandingPadInst>(BB->getFirstNonPHI()); + if (RI->getValue() != LPInst) + // Not a landing pad, or the resume is not unwinding the exception that + // caused control to branch here. + return false; + + // Check that there are no other instructions except for debug intrinsics. + BasicBlock::iterator I = LPInst, E = RI; + while (++I != E) + if (!isa<DbgInfoIntrinsic>(I)) + return false; + + // Turn all invokes that unwind here into calls and delete the basic block. + bool InvokeRequiresTableEntry = false; + bool Changed = false; + for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) { + InvokeInst *II = cast<InvokeInst>((*PI++)->getTerminator()); + + if (II->hasFnAttr(Attribute::UWTable)) { + // Don't remove an `invoke' instruction if the ABI requires an entry into + // the table. + InvokeRequiresTableEntry = true; + continue; + } + + SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3); + + // Insert a call instruction before the invoke. + CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II); + Call->takeName(II); + Call->setCallingConv(II->getCallingConv()); + Call->setAttributes(II->getAttributes()); + Call->setDebugLoc(II->getDebugLoc()); + + // Anything that used the value produced by the invoke instruction now uses + // the value produced by the call instruction. Note that we do this even + // for void functions and calls with no uses so that the callgraph edge is + // updated. + II->replaceAllUsesWith(Call); + BB->removePredecessor(II->getParent()); + + // Insert a branch to the normal destination right before the invoke. + BranchInst::Create(II->getNormalDest(), II); + + // Finally, delete the invoke instruction! + II->eraseFromParent(); + Changed = true; + } + + if (!InvokeRequiresTableEntry) + // The landingpad is now unreachable. Zap it. + BB->eraseFromParent(); + + return Changed; +} + +bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { + BasicBlock *BB = RI->getParent(); + if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false; + + // Find predecessors that end with branches. + SmallVector<BasicBlock*, 8> UncondBranchPreds; + SmallVector<BranchInst*, 8> CondBranchPreds; + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + BasicBlock *P = *PI; + TerminatorInst *PTI = P->getTerminator(); + if (BranchInst *BI = dyn_cast<BranchInst>(PTI)) { + if (BI->isUnconditional()) + UncondBranchPreds.push_back(P); + else + CondBranchPreds.push_back(BI); + } + } + + // If we found some, do the transformation! + if (!UncondBranchPreds.empty() && DupRet) { + while (!UncondBranchPreds.empty()) { + BasicBlock *Pred = UncondBranchPreds.pop_back_val(); + DEBUG(dbgs() << "FOLDING: " << *BB + << "INTO UNCOND BRANCH PRED: " << *Pred); + (void)FoldReturnIntoUncondBranch(RI, BB, Pred); + } + + // If we eliminated all predecessors of the block, delete the block now. + if (pred_begin(BB) == pred_end(BB)) + // We know there are no successors, so just nuke the block. + BB->eraseFromParent(); + + return true; + } + + // Check out all of the conditional branches going to this return + // instruction. If any of them just select between returns, change the + // branch itself into a select/return pair. + while (!CondBranchPreds.empty()) { + BranchInst *BI = CondBranchPreds.pop_back_val(); + + // Check to see if the non-BB successor is also a return block. + if (isa<ReturnInst>(BI->getSuccessor(0)->getTerminator()) && + isa<ReturnInst>(BI->getSuccessor(1)->getTerminator()) && + SimplifyCondBranchToTwoReturns(BI, Builder)) + return true; + } + return false; +} + +bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { + BasicBlock *BB = UI->getParent(); + + bool Changed = false; + + // If there are any instructions immediately before the unreachable that can + // be removed, do so. + while (UI != BB->begin()) { + BasicBlock::iterator BBI = UI; + --BBI; + // Do not delete instructions that can have side effects which might cause + // the unreachable to not be reachable; specifically, calls and volatile + // operations may have this effect. + if (isa<CallInst>(BBI) && !isa<DbgInfoIntrinsic>(BBI)) break; + + if (BBI->mayHaveSideEffects()) { + if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) { + if (SI->isVolatile()) + break; + } else if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) { + if (LI->isVolatile()) + break; + } else if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(BBI)) { + if (RMWI->isVolatile()) + break; + } else if (AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(BBI)) { + if (CXI->isVolatile()) + break; + } else if (!isa<FenceInst>(BBI) && !isa<VAArgInst>(BBI) && + !isa<LandingPadInst>(BBI)) { + break; + } + // Note that deleting LandingPad's here is in fact okay, although it + // involves a bit of subtle reasoning. If this inst is a LandingPad, + // all the predecessors of this block will be the unwind edges of Invokes, + // and we can therefore guarantee this block will be erased. + } + + // Delete this instruction (any uses are guaranteed to be dead) + if (!BBI->use_empty()) + BBI->replaceAllUsesWith(UndefValue::get(BBI->getType())); + BBI->eraseFromParent(); + Changed = true; + } + + // If the unreachable instruction is the first in the block, take a gander + // at all of the predecessors of this instruction, and simplify them. + if (&BB->front() != UI) return Changed; + + SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB)); + for (unsigned i = 0, e = Preds.size(); i != e; ++i) { + TerminatorInst *TI = Preds[i]->getTerminator(); + IRBuilder<> Builder(TI); + if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { + if (BI->isUnconditional()) { + if (BI->getSuccessor(0) == BB) { + new UnreachableInst(TI->getContext(), TI); + TI->eraseFromParent(); + Changed = true; + } + } else { + if (BI->getSuccessor(0) == BB) { + Builder.CreateBr(BI->getSuccessor(1)); + EraseTerminatorInstAndDCECond(BI); + } else if (BI->getSuccessor(1) == BB) { + Builder.CreateBr(BI->getSuccessor(0)); + EraseTerminatorInstAndDCECond(BI); + Changed = true; + } + } + } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); + i != e; ++i) + if (i.getCaseSuccessor() == BB) { + BB->removePredecessor(SI->getParent()); + SI->removeCase(i); + --i; --e; + Changed = true; + } + // If the default value is unreachable, figure out the most popular + // destination and make it the default. + if (SI->getDefaultDest() == BB) { + std::map<BasicBlock*, std::pair<unsigned, unsigned> > Popularity; + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); + i != e; ++i) { + std::pair<unsigned, unsigned> &entry = + Popularity[i.getCaseSuccessor()]; + if (entry.first == 0) { + entry.first = 1; + entry.second = i.getCaseIndex(); + } else { + entry.first++; + } + } + + // Find the most popular block. + unsigned MaxPop = 0; + unsigned MaxIndex = 0; + BasicBlock *MaxBlock = 0; + for (std::map<BasicBlock*, std::pair<unsigned, unsigned> >::iterator + I = Popularity.begin(), E = Popularity.end(); I != E; ++I) { + if (I->second.first > MaxPop || + (I->second.first == MaxPop && MaxIndex > I->second.second)) { + MaxPop = I->second.first; + MaxIndex = I->second.second; + MaxBlock = I->first; + } + } + if (MaxBlock) { + // Make this the new default, allowing us to delete any explicit + // edges to it. + SI->setDefaultDest(MaxBlock); + Changed = true; + + // If MaxBlock has phinodes in it, remove MaxPop-1 entries from + // it. + if (isa<PHINode>(MaxBlock->begin())) + for (unsigned i = 0; i != MaxPop-1; ++i) + MaxBlock->removePredecessor(SI->getParent()); + + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); + i != e; ++i) + if (i.getCaseSuccessor() == MaxBlock) { + SI->removeCase(i); + --i; --e; + } + } + } + } else if (InvokeInst *II = dyn_cast<InvokeInst>(TI)) { + if (II->getUnwindDest() == BB) { + // Convert the invoke to a call instruction. This would be a good + // place to note that the call does not throw though. + BranchInst *BI = Builder.CreateBr(II->getNormalDest()); + II->removeFromParent(); // Take out of symbol table + + // Insert the call now... + SmallVector<Value*, 8> Args(II->op_begin(), II->op_end()-3); + Builder.SetInsertPoint(BI); + CallInst *CI = Builder.CreateCall(II->getCalledValue(), + Args, II->getName()); + CI->setCallingConv(II->getCallingConv()); + CI->setAttributes(II->getAttributes()); + // If the invoke produced a value, the call does now instead. + II->replaceAllUsesWith(CI); + delete II; + Changed = true; + } + } + } + + // If this block is now dead, remove it. + if (pred_begin(BB) == pred_end(BB) && + BB != &BB->getParent()->getEntryBlock()) { + // We know there are no successors, so just nuke the block. + BB->eraseFromParent(); + return true; + } + + return Changed; +} + +/// TurnSwitchRangeIntoICmp - Turns a switch with that contains only a +/// integer range comparison into a sub, an icmp and a branch. +static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { + assert(SI->getNumCases() > 1 && "Degenerate switch?"); + + // Make sure all cases point to the same destination and gather the values. + SmallVector<ConstantInt *, 16> Cases; + SwitchInst::CaseIt I = SI->case_begin(); + Cases.push_back(I.getCaseValue()); + SwitchInst::CaseIt PrevI = I++; + for (SwitchInst::CaseIt E = SI->case_end(); I != E; PrevI = I++) { + if (PrevI.getCaseSuccessor() != I.getCaseSuccessor()) + return false; + Cases.push_back(I.getCaseValue()); + } + assert(Cases.size() == SI->getNumCases() && "Not all cases gathered"); + + // Sort the case values, then check if they form a range we can transform. + array_pod_sort(Cases.begin(), Cases.end(), ConstantIntSortPredicate); + for (unsigned I = 1, E = Cases.size(); I != E; ++I) { + if (Cases[I-1]->getValue() != Cases[I]->getValue()+1) + return false; + } + + Constant *Offset = ConstantExpr::getNeg(Cases.back()); + Constant *NumCases = ConstantInt::get(Offset->getType(), SI->getNumCases()); + + Value *Sub = SI->getCondition(); + if (!Offset->isNullValue()) + Sub = Builder.CreateAdd(Sub, Offset, Sub->getName()+".off"); + Value *Cmp; + // If NumCases overflowed, then all possible values jump to the successor. + if (NumCases->isNullValue() && SI->getNumCases() != 0) + Cmp = ConstantInt::getTrue(SI->getContext()); + else + Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch"); + BranchInst *NewBI = Builder.CreateCondBr( + Cmp, SI->case_begin().getCaseSuccessor(), SI->getDefaultDest()); + + // Update weight for the newly-created conditional branch. + SmallVector<uint64_t, 8> Weights; + bool HasWeights = HasBranchWeights(SI); + if (HasWeights) { + GetBranchWeights(SI, Weights); + if (Weights.size() == 1 + SI->getNumCases()) { + // Combine all weights for the cases to be the true weight of NewBI. + // We assume that the sum of all weights for a Terminator can fit into 32 + // bits. + uint32_t NewTrueWeight = 0; + for (unsigned I = 1, E = Weights.size(); I != E; ++I) + NewTrueWeight += (uint32_t)Weights[I]; + NewBI->setMetadata(LLVMContext::MD_prof, + MDBuilder(SI->getContext()). + createBranchWeights(NewTrueWeight, + (uint32_t)Weights[0])); + } + } + + // Prune obsolete incoming values off the successor's PHI nodes. + for (BasicBlock::iterator BBI = SI->case_begin().getCaseSuccessor()->begin(); + isa<PHINode>(BBI); ++BBI) { + for (unsigned I = 0, E = SI->getNumCases()-1; I != E; ++I) + cast<PHINode>(BBI)->removeIncomingValue(SI->getParent()); + } + SI->eraseFromParent(); + + return true; +} + +/// EliminateDeadSwitchCases - Compute masked bits for the condition of a switch +/// and use it to remove dead cases. +static bool EliminateDeadSwitchCases(SwitchInst *SI) { + Value *Cond = SI->getCondition(); + unsigned Bits = Cond->getType()->getIntegerBitWidth(); + APInt KnownZero(Bits, 0), KnownOne(Bits, 0); + ComputeMaskedBits(Cond, KnownZero, KnownOne); + + // Gather dead cases. + SmallVector<ConstantInt*, 8> DeadCases; + for (SwitchInst::CaseIt I = SI->case_begin(), E = SI->case_end(); I != E; ++I) { + if ((I.getCaseValue()->getValue() & KnownZero) != 0 || + (I.getCaseValue()->getValue() & KnownOne) != KnownOne) { + DeadCases.push_back(I.getCaseValue()); + DEBUG(dbgs() << "SimplifyCFG: switch case '" + << I.getCaseValue() << "' is dead.\n"); + } + } + + SmallVector<uint64_t, 8> Weights; + bool HasWeight = HasBranchWeights(SI); + if (HasWeight) { + GetBranchWeights(SI, Weights); + HasWeight = (Weights.size() == 1 + SI->getNumCases()); + } + + // Remove dead cases from the switch. + for (unsigned I = 0, E = DeadCases.size(); I != E; ++I) { + SwitchInst::CaseIt Case = SI->findCaseValue(DeadCases[I]); + assert(Case != SI->case_default() && + "Case was not found. Probably mistake in DeadCases forming."); + if (HasWeight) { + std::swap(Weights[Case.getCaseIndex()+1], Weights.back()); + Weights.pop_back(); + } + + // Prune unused values from PHI nodes. + Case.getCaseSuccessor()->removePredecessor(SI->getParent()); + SI->removeCase(Case); + } + if (HasWeight) { + SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end()); + SI->setMetadata(LLVMContext::MD_prof, + MDBuilder(SI->getParent()->getContext()). + createBranchWeights(MDWeights)); + } + + return !DeadCases.empty(); +} + +/// FindPHIForConditionForwarding - If BB would be eligible for simplification +/// by TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated +/// by an unconditional branch), look at the phi node for BB in the successor +/// block and see if the incoming value is equal to CaseValue. If so, return +/// the phi node, and set PhiIndex to BB's index in the phi node. +static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue, + BasicBlock *BB, + int *PhiIndex) { + if (BB->getFirstNonPHIOrDbg() != BB->getTerminator()) + return NULL; // BB must be empty to be a candidate for simplification. + if (!BB->getSinglePredecessor()) + return NULL; // BB must be dominated by the switch. + + BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator()); + if (!Branch || !Branch->isUnconditional()) + return NULL; // Terminator must be unconditional branch. + + BasicBlock *Succ = Branch->getSuccessor(0); + + BasicBlock::iterator I = Succ->begin(); + while (PHINode *PHI = dyn_cast<PHINode>(I++)) { + int Idx = PHI->getBasicBlockIndex(BB); + assert(Idx >= 0 && "PHI has no entry for predecessor?"); + + Value *InValue = PHI->getIncomingValue(Idx); + if (InValue != CaseValue) continue; + + *PhiIndex = Idx; + return PHI; + } + + return NULL; +} + +/// ForwardSwitchConditionToPHI - Try to forward the condition of a switch +/// instruction to a phi node dominated by the switch, if that would mean that +/// some of the destination blocks of the switch can be folded away. +/// Returns true if a change is made. +static bool ForwardSwitchConditionToPHI(SwitchInst *SI) { + typedef DenseMap<PHINode*, SmallVector<int,4> > ForwardingNodesMap; + ForwardingNodesMap ForwardingNodes; + + for (SwitchInst::CaseIt I = SI->case_begin(), E = SI->case_end(); I != E; ++I) { + ConstantInt *CaseValue = I.getCaseValue(); + BasicBlock *CaseDest = I.getCaseSuccessor(); + + int PhiIndex; + PHINode *PHI = FindPHIForConditionForwarding(CaseValue, CaseDest, + &PhiIndex); + if (!PHI) continue; + + ForwardingNodes[PHI].push_back(PhiIndex); + } + + bool Changed = false; + + for (ForwardingNodesMap::iterator I = ForwardingNodes.begin(), + E = ForwardingNodes.end(); I != E; ++I) { + PHINode *Phi = I->first; + SmallVectorImpl<int> &Indexes = I->second; + + if (Indexes.size() < 2) continue; + + for (size_t I = 0, E = Indexes.size(); I != E; ++I) + Phi->setIncomingValue(Indexes[I], SI->getCondition()); + Changed = true; + } + + return Changed; +} + +/// ValidLookupTableConstant - Return true if the backend will be able to handle +/// initializing an array of constants like C. +static bool ValidLookupTableConstant(Constant *C) { + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) + return CE->isGEPWithNoNotionalOverIndexing(); + + return isa<ConstantFP>(C) || + isa<ConstantInt>(C) || + isa<ConstantPointerNull>(C) || + isa<GlobalValue>(C) || + isa<UndefValue>(C); +} + +/// LookupConstant - If V is a Constant, return it. Otherwise, try to look up +/// its constant value in ConstantPool, returning 0 if it's not there. +static Constant *LookupConstant(Value *V, + const SmallDenseMap<Value*, Constant*>& ConstantPool) { + if (Constant *C = dyn_cast<Constant>(V)) + return C; + return ConstantPool.lookup(V); +} + +/// ConstantFold - Try to fold instruction I into a constant. This works for +/// simple instructions such as binary operations where both operands are +/// constant or can be replaced by constants from the ConstantPool. Returns the +/// resulting constant on success, 0 otherwise. +static Constant * +ConstantFold(Instruction *I, + const SmallDenseMap<Value *, Constant *> &ConstantPool, + const DataLayout *DL) { + if (SelectInst *Select = dyn_cast<SelectInst>(I)) { + Constant *A = LookupConstant(Select->getCondition(), ConstantPool); + if (!A) + return 0; + if (A->isAllOnesValue()) + return LookupConstant(Select->getTrueValue(), ConstantPool); + if (A->isNullValue()) + return LookupConstant(Select->getFalseValue(), ConstantPool); + return 0; + } + + SmallVector<Constant *, 4> COps; + for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) { + if (Constant *A = LookupConstant(I->getOperand(N), ConstantPool)) + COps.push_back(A); + else + return 0; + } + + if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) + return ConstantFoldCompareInstOperands(Cmp->getPredicate(), COps[0], + COps[1], DL); + + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), COps, DL); +} + +/// GetCaseResults - Try to determine the resulting constant values in phi nodes +/// at the common destination basic block, *CommonDest, for one of the case +/// destionations CaseDest corresponding to value CaseVal (0 for the default +/// case), of a switch instruction SI. +static bool +GetCaseResults(SwitchInst *SI, + ConstantInt *CaseVal, + BasicBlock *CaseDest, + BasicBlock **CommonDest, + SmallVectorImpl<std::pair<PHINode *, Constant *> > &Res, + const DataLayout *DL) { + // The block from which we enter the common destination. + BasicBlock *Pred = SI->getParent(); + + // If CaseDest is empty except for some side-effect free instructions through + // which we can constant-propagate the CaseVal, continue to its successor. + SmallDenseMap<Value*, Constant*> ConstantPool; + ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal)); + for (BasicBlock::iterator I = CaseDest->begin(), E = CaseDest->end(); I != E; + ++I) { + if (TerminatorInst *T = dyn_cast<TerminatorInst>(I)) { + // If the terminator is a simple branch, continue to the next block. + if (T->getNumSuccessors() != 1) + return false; + Pred = CaseDest; + CaseDest = T->getSuccessor(0); + } else if (isa<DbgInfoIntrinsic>(I)) { + // Skip debug intrinsic. + continue; + } else if (Constant *C = ConstantFold(I, ConstantPool, DL)) { + // Instruction is side-effect free and constant. + ConstantPool.insert(std::make_pair(I, C)); + } else { + break; + } + } + + // If we did not have a CommonDest before, use the current one. + if (!*CommonDest) + *CommonDest = CaseDest; + // If the destination isn't the common one, abort. + if (CaseDest != *CommonDest) + return false; + + // Get the values for this case from phi nodes in the destination block. + BasicBlock::iterator I = (*CommonDest)->begin(); + while (PHINode *PHI = dyn_cast<PHINode>(I++)) { + int Idx = PHI->getBasicBlockIndex(Pred); + if (Idx == -1) + continue; + + Constant *ConstVal = LookupConstant(PHI->getIncomingValue(Idx), + ConstantPool); + if (!ConstVal) + return false; + + // Note: If the constant comes from constant-propagating the case value + // through the CaseDest basic block, it will be safe to remove the + // instructions in that block. They cannot be used (except in the phi nodes + // we visit) outside CaseDest, because that block does not dominate its + // successor. If it did, we would not be in this phi node. + + // Be conservative about which kinds of constants we support. + if (!ValidLookupTableConstant(ConstVal)) + return false; + + Res.push_back(std::make_pair(PHI, ConstVal)); + } + + return true; +} + +namespace { + /// SwitchLookupTable - This class represents a lookup table that can be used + /// to replace a switch. + class SwitchLookupTable { + public: + /// SwitchLookupTable - Create a lookup table to use as a switch replacement + /// with the contents of Values, using DefaultValue to fill any holes in the + /// table. + SwitchLookupTable(Module &M, + uint64_t TableSize, + ConstantInt *Offset, + const SmallVectorImpl<std::pair<ConstantInt*, Constant*> >& Values, + Constant *DefaultValue, + const DataLayout *TD); + + /// BuildLookup - Build instructions with Builder to retrieve the value at + /// the position given by Index in the lookup table. + Value *BuildLookup(Value *Index, IRBuilder<> &Builder); + + /// WouldFitInRegister - Return true if a table with TableSize elements of + /// type ElementType would fit in a target-legal register. + static bool WouldFitInRegister(const DataLayout *TD, + uint64_t TableSize, + const Type *ElementType); + + private: + // Depending on the contents of the table, it can be represented in + // different ways. + enum { + // For tables where each element contains the same value, we just have to + // store that single value and return it for each lookup. + SingleValueKind, + + // For small tables with integer elements, we can pack them into a bitmap + // that fits into a target-legal register. Values are retrieved by + // shift and mask operations. + BitMapKind, + + // The table is stored as an array of values. Values are retrieved by load + // instructions from the table. + ArrayKind + } Kind; + + // For SingleValueKind, this is the single value. + Constant *SingleValue; + + // For BitMapKind, this is the bitmap. + ConstantInt *BitMap; + IntegerType *BitMapElementTy; + + // For ArrayKind, this is the array. + GlobalVariable *Array; + }; +} + +SwitchLookupTable::SwitchLookupTable(Module &M, + uint64_t TableSize, + ConstantInt *Offset, + const SmallVectorImpl<std::pair<ConstantInt*, Constant*> >& Values, + Constant *DefaultValue, + const DataLayout *TD) + : SingleValue(0), BitMap(0), BitMapElementTy(0), Array(0) { + assert(Values.size() && "Can't build lookup table without values!"); + assert(TableSize >= Values.size() && "Can't fit values in table!"); + + // If all values in the table are equal, this is that value. + SingleValue = Values.begin()->second; + + // Build up the table contents. + SmallVector<Constant*, 64> TableContents(TableSize); + for (size_t I = 0, E = Values.size(); I != E; ++I) { + ConstantInt *CaseVal = Values[I].first; + Constant *CaseRes = Values[I].second; + assert(CaseRes->getType() == DefaultValue->getType()); + + uint64_t Idx = (CaseVal->getValue() - Offset->getValue()) + .getLimitedValue(); + TableContents[Idx] = CaseRes; + + if (CaseRes != SingleValue) + SingleValue = 0; + } + + // Fill in any holes in the table with the default result. + if (Values.size() < TableSize) { + for (uint64_t I = 0; I < TableSize; ++I) { + if (!TableContents[I]) + TableContents[I] = DefaultValue; + } + + if (DefaultValue != SingleValue) + SingleValue = 0; + } + + // If each element in the table contains the same value, we only need to store + // that single value. + if (SingleValue) { + Kind = SingleValueKind; + return; + } + + // If the type is integer and the table fits in a register, build a bitmap. + if (WouldFitInRegister(TD, TableSize, DefaultValue->getType())) { + IntegerType *IT = cast<IntegerType>(DefaultValue->getType()); + APInt TableInt(TableSize * IT->getBitWidth(), 0); + for (uint64_t I = TableSize; I > 0; --I) { + TableInt <<= IT->getBitWidth(); + // Insert values into the bitmap. Undef values are set to zero. + if (!isa<UndefValue>(TableContents[I - 1])) { + ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]); + TableInt |= Val->getValue().zext(TableInt.getBitWidth()); + } + } + BitMap = ConstantInt::get(M.getContext(), TableInt); + BitMapElementTy = IT; + Kind = BitMapKind; + ++NumBitMaps; + return; + } + + // Store the table in an array. + ArrayType *ArrayTy = ArrayType::get(DefaultValue->getType(), TableSize); + Constant *Initializer = ConstantArray::get(ArrayTy, TableContents); + + Array = new GlobalVariable(M, ArrayTy, /*constant=*/ true, + GlobalVariable::PrivateLinkage, + Initializer, + "switch.table"); + Array->setUnnamedAddr(true); + Kind = ArrayKind; +} + +Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) { + switch (Kind) { + case SingleValueKind: + return SingleValue; + case BitMapKind: { + // Type of the bitmap (e.g. i59). + IntegerType *MapTy = BitMap->getType(); + + // Cast Index to the same type as the bitmap. + // Note: The Index is <= the number of elements in the table, so + // truncating it to the width of the bitmask is safe. + Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast"); + + // Multiply the shift amount by the element width. + ShiftAmt = Builder.CreateMul(ShiftAmt, + ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()), + "switch.shiftamt"); + + // Shift down. + Value *DownShifted = Builder.CreateLShr(BitMap, ShiftAmt, + "switch.downshift"); + // Mask off. + return Builder.CreateTrunc(DownShifted, BitMapElementTy, + "switch.masked"); + } + case ArrayKind: { + Value *GEPIndices[] = { Builder.getInt32(0), Index }; + Value *GEP = Builder.CreateInBoundsGEP(Array, GEPIndices, + "switch.gep"); + return Builder.CreateLoad(GEP, "switch.load"); + } + } + llvm_unreachable("Unknown lookup table kind!"); +} + +bool SwitchLookupTable::WouldFitInRegister(const DataLayout *TD, + uint64_t TableSize, + const Type *ElementType) { + if (!TD) + return false; + const IntegerType *IT = dyn_cast<IntegerType>(ElementType); + if (!IT) + return false; + // FIXME: If the type is wider than it needs to be, e.g. i8 but all values + // are <= 15, we could try to narrow the type. + + // Avoid overflow, fitsInLegalInteger uses unsigned int for the width. + if (TableSize >= UINT_MAX/IT->getBitWidth()) + return false; + return TD->fitsInLegalInteger(TableSize * IT->getBitWidth()); +} + +/// ShouldBuildLookupTable - Determine whether a lookup table should be built +/// for this switch, based on the number of cases, size of the table and the +/// types of the results. +static bool ShouldBuildLookupTable(SwitchInst *SI, + uint64_t TableSize, + const TargetTransformInfo &TTI, + const DataLayout *TD, + const SmallDenseMap<PHINode*, Type*>& ResultTypes) { + if (SI->getNumCases() > TableSize || TableSize >= UINT64_MAX / 10) + return false; // TableSize overflowed, or mul below might overflow. + + bool AllTablesFitInRegister = true; + bool HasIllegalType = false; + for (SmallDenseMap<PHINode*, Type*>::const_iterator I = ResultTypes.begin(), + E = ResultTypes.end(); I != E; ++I) { + Type *Ty = I->second; + + // Saturate this flag to true. + HasIllegalType = HasIllegalType || !TTI.isTypeLegal(Ty); + + // Saturate this flag to false. + AllTablesFitInRegister = AllTablesFitInRegister && + SwitchLookupTable::WouldFitInRegister(TD, TableSize, Ty); + + // If both flags saturate, we're done. NOTE: This *only* works with + // saturating flags, and all flags have to saturate first due to the + // non-deterministic behavior of iterating over a dense map. + if (HasIllegalType && !AllTablesFitInRegister) + break; + } + + // If each table would fit in a register, we should build it anyway. + if (AllTablesFitInRegister) + return true; + + // Don't build a table that doesn't fit in-register if it has illegal types. + if (HasIllegalType) + return false; + + // The table density should be at least 40%. This is the same criterion as for + // jump tables, see SelectionDAGBuilder::handleJTSwitchCase. + // FIXME: Find the best cut-off. + return SI->getNumCases() * 10 >= TableSize * 4; +} + +/// SwitchToLookupTable - If the switch is only used to initialize one or more +/// phi nodes in a common successor block with different constant values, +/// replace the switch with lookup tables. +static bool SwitchToLookupTable(SwitchInst *SI, + IRBuilder<> &Builder, + const TargetTransformInfo &TTI, + const DataLayout* TD) { + assert(SI->getNumCases() > 1 && "Degenerate switch?"); + + // Only build lookup table when we have a target that supports it. + if (!TTI.shouldBuildLookupTables()) + return false; + + // FIXME: If the switch is too sparse for a lookup table, perhaps we could + // split off a dense part and build a lookup table for that. + + // FIXME: This creates arrays of GEPs to constant strings, which means each + // GEP needs a runtime relocation in PIC code. We should just build one big + // string and lookup indices into that. + + // Ignore the switch if the number of cases is too small. + // This is similar to the check when building jump tables in + // SelectionDAGBuilder::handleJTSwitchCase. + // FIXME: Determine the best cut-off. + if (SI->getNumCases() < 4) + return false; + + // Figure out the corresponding result for each case value and phi node in the + // common destination, as well as the the min and max case values. + assert(SI->case_begin() != SI->case_end()); + SwitchInst::CaseIt CI = SI->case_begin(); + ConstantInt *MinCaseVal = CI.getCaseValue(); + ConstantInt *MaxCaseVal = CI.getCaseValue(); + + BasicBlock *CommonDest = 0; + typedef SmallVector<std::pair<ConstantInt*, Constant*>, 4> ResultListTy; + SmallDenseMap<PHINode*, ResultListTy> ResultLists; + SmallDenseMap<PHINode*, Constant*> DefaultResults; + SmallDenseMap<PHINode*, Type*> ResultTypes; + SmallVector<PHINode*, 4> PHIs; + + for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) { + ConstantInt *CaseVal = CI.getCaseValue(); + if (CaseVal->getValue().slt(MinCaseVal->getValue())) + MinCaseVal = CaseVal; + if (CaseVal->getValue().sgt(MaxCaseVal->getValue())) + MaxCaseVal = CaseVal; + + // Resulting value at phi nodes for this case value. + typedef SmallVector<std::pair<PHINode*, Constant*>, 4> ResultsTy; + ResultsTy Results; + if (!GetCaseResults(SI, CaseVal, CI.getCaseSuccessor(), &CommonDest, + Results, TD)) + return false; + + // Append the result from this case to the list for each phi. + for (ResultsTy::iterator I = Results.begin(), E = Results.end(); I!=E; ++I) { + if (!ResultLists.count(I->first)) + PHIs.push_back(I->first); + ResultLists[I->first].push_back(std::make_pair(CaseVal, I->second)); + } + } + + // Get the resulting values for the default case. + SmallVector<std::pair<PHINode*, Constant*>, 4> DefaultResultsList; + if (!GetCaseResults(SI, 0, SI->getDefaultDest(), &CommonDest, + DefaultResultsList, TD)) + return false; + for (size_t I = 0, E = DefaultResultsList.size(); I != E; ++I) { + PHINode *PHI = DefaultResultsList[I].first; + Constant *Result = DefaultResultsList[I].second; + DefaultResults[PHI] = Result; + ResultTypes[PHI] = Result->getType(); + } + + APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue(); + uint64_t TableSize = RangeSpread.getLimitedValue() + 1; + if (!ShouldBuildLookupTable(SI, TableSize, TTI, TD, ResultTypes)) + return false; + + // Create the BB that does the lookups. + Module &Mod = *CommonDest->getParent()->getParent(); + BasicBlock *LookupBB = BasicBlock::Create(Mod.getContext(), + "switch.lookup", + CommonDest->getParent(), + CommonDest); + + // Compute the table index value. + Builder.SetInsertPoint(SI); + Value *TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal, + "switch.tableidx"); + + // Compute the maximum table size representable by the integer type we are + // switching upon. + unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits(); + uint64_t MaxTableSize = CaseSize > 63? UINT64_MAX : 1ULL << CaseSize; + assert(MaxTableSize >= TableSize && + "It is impossible for a switch to have more entries than the max " + "representable value of its input integer type's size."); + + // If we have a fully covered lookup table, unconditionally branch to the + // lookup table BB. Otherwise, check if the condition value is within the case + // range. If it is so, branch to the new BB. Otherwise branch to SI's default + // destination. + const bool GeneratingCoveredLookupTable = MaxTableSize == TableSize; + if (GeneratingCoveredLookupTable) { + Builder.CreateBr(LookupBB); + SI->getDefaultDest()->removePredecessor(SI->getParent()); + } else { + Value *Cmp = Builder.CreateICmpULT(TableIndex, ConstantInt::get( + MinCaseVal->getType(), TableSize)); + Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest()); + } + + // Populate the BB that does the lookups. + Builder.SetInsertPoint(LookupBB); + bool ReturnedEarly = false; + for (size_t I = 0, E = PHIs.size(); I != E; ++I) { + PHINode *PHI = PHIs[I]; + + SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultLists[PHI], + DefaultResults[PHI], TD); + + Value *Result = Table.BuildLookup(TableIndex, Builder); + + // If the result is used to return immediately from the function, we want to + // do that right here. + if (PHI->hasOneUse() && isa<ReturnInst>(*PHI->use_begin()) && + *PHI->use_begin() == CommonDest->getFirstNonPHIOrDbg()) { + Builder.CreateRet(Result); + ReturnedEarly = true; + break; + } + + PHI->addIncoming(Result, LookupBB); + } + + if (!ReturnedEarly) + Builder.CreateBr(CommonDest); + + // Remove the switch. + for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) { + BasicBlock *Succ = SI->getSuccessor(i); + + if (Succ == SI->getDefaultDest()) + continue; + Succ->removePredecessor(SI->getParent()); + } + SI->eraseFromParent(); + + ++NumLookupTables; + return true; +} + +bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { + BasicBlock *BB = SI->getParent(); + + if (isValueEqualityComparison(SI)) { + // If we only have one predecessor, and if it is a branch on this value, + // see if that predecessor totally determines the outcome of this switch. + if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) + if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder)) + return SimplifyCFG(BB, TTI, TD) | true; + + Value *Cond = SI->getCondition(); + if (SelectInst *Select = dyn_cast<SelectInst>(Cond)) + if (SimplifySwitchOnSelect(SI, Select)) + return SimplifyCFG(BB, TTI, TD) | true; + + // If the block only contains the switch, see if we can fold the block + // away into any preds. + BasicBlock::iterator BBI = BB->begin(); + // Ignore dbg intrinsics. + while (isa<DbgInfoIntrinsic>(BBI)) + ++BBI; + if (SI == &*BBI) + if (FoldValueComparisonIntoPredecessors(SI, Builder)) + return SimplifyCFG(BB, TTI, TD) | true; + } + + // Try to transform the switch into an icmp and a branch. + if (TurnSwitchRangeIntoICmp(SI, Builder)) + return SimplifyCFG(BB, TTI, TD) | true; + + // Remove unreachable cases. + if (EliminateDeadSwitchCases(SI)) + return SimplifyCFG(BB, TTI, TD) | true; + + if (ForwardSwitchConditionToPHI(SI)) + return SimplifyCFG(BB, TTI, TD) | true; + + if (SwitchToLookupTable(SI, Builder, TTI, TD)) + return SimplifyCFG(BB, TTI, TD) | true; + + return false; +} + +bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { + BasicBlock *BB = IBI->getParent(); + bool Changed = false; + + // Eliminate redundant destinations. + SmallPtrSet<Value *, 8> Succs; + for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { + BasicBlock *Dest = IBI->getDestination(i); + if (!Dest->hasAddressTaken() || !Succs.insert(Dest)) { + Dest->removePredecessor(BB); + IBI->removeDestination(i); + --i; --e; + Changed = true; + } + } + + if (IBI->getNumDestinations() == 0) { + // If the indirectbr has no successors, change it to unreachable. + new UnreachableInst(IBI->getContext(), IBI); + EraseTerminatorInstAndDCECond(IBI); + return true; + } + + if (IBI->getNumDestinations() == 1) { + // If the indirectbr has one successor, change it to a direct branch. + BranchInst::Create(IBI->getDestination(0), IBI); + EraseTerminatorInstAndDCECond(IBI); + return true; + } + + if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) { + if (SimplifyIndirectBrOnSelect(IBI, SI)) + return SimplifyCFG(BB, TTI, TD) | true; + } + return Changed; +} + +bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ + BasicBlock *BB = BI->getParent(); + + if (SinkCommon && SinkThenElseCodeToEnd(BI)) + return true; + + // If the Terminator is the only non-phi instruction, simplify the block. + BasicBlock::iterator I = BB->getFirstNonPHIOrDbgOrLifetime(); + if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() && + TryToSimplifyUncondBranchFromEmptyBlock(BB)) + return true; + + // If the only instruction in the block is a seteq/setne comparison + // against a constant, try to simplify the block. + if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) + if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) { + for (++I; isa<DbgInfoIntrinsic>(I); ++I) + ; + if (I->isTerminator() && + TryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, TTI, TD)) + return true; + } + + // If this basic block is ONLY a compare and a branch, and if a predecessor + // branches to us and our successor, fold the comparison into the + // predecessor and use logical operations to update the incoming value + // for PHI nodes in common successor. + if (FoldBranchToCommonDest(BI)) + return SimplifyCFG(BB, TTI, TD) | true; + return false; +} + + +bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { + BasicBlock *BB = BI->getParent(); + + // Conditional branch + if (isValueEqualityComparison(BI)) { + // If we only have one predecessor, and if it is a branch on this value, + // see if that predecessor totally determines the outcome of this + // switch. + if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) + if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder)) + return SimplifyCFG(BB, TTI, TD) | true; + + // This block must be empty, except for the setcond inst, if it exists. + // Ignore dbg intrinsics. + BasicBlock::iterator I = BB->begin(); + // Ignore dbg intrinsics. + while (isa<DbgInfoIntrinsic>(I)) + ++I; + if (&*I == BI) { + if (FoldValueComparisonIntoPredecessors(BI, Builder)) + return SimplifyCFG(BB, TTI, TD) | true; + } else if (&*I == cast<Instruction>(BI->getCondition())){ + ++I; + // Ignore dbg intrinsics. + while (isa<DbgInfoIntrinsic>(I)) + ++I; + if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder)) + return SimplifyCFG(BB, TTI, TD) | true; + } + } + + // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction. + if (SimplifyBranchOnICmpChain(BI, TD, Builder)) + return true; + + // If this basic block is ONLY a compare and a branch, and if a predecessor + // branches to us and one of our successors, fold the comparison into the + // predecessor and use logical operations to pick the right destination. + if (FoldBranchToCommonDest(BI)) + return SimplifyCFG(BB, TTI, TD) | true; + + // We have a conditional branch to two blocks that are only reachable + // from BI. We know that the condbr dominates the two blocks, so see if + // there is any identical code in the "then" and "else" blocks. If so, we + // can hoist it up to the branching block. + if (BI->getSuccessor(0)->getSinglePredecessor() != 0) { + if (BI->getSuccessor(1)->getSinglePredecessor() != 0) { + if (HoistThenElseCodeToIf(BI)) + return SimplifyCFG(BB, TTI, TD) | true; + } else { + // If Successor #1 has multiple preds, we may be able to conditionally + // execute Successor #0 if it branches to successor #1. + TerminatorInst *Succ0TI = BI->getSuccessor(0)->getTerminator(); + if (Succ0TI->getNumSuccessors() == 1 && + Succ0TI->getSuccessor(0) == BI->getSuccessor(1)) + if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0))) + return SimplifyCFG(BB, TTI, TD) | true; + } + } else if (BI->getSuccessor(1)->getSinglePredecessor() != 0) { + // If Successor #0 has multiple preds, we may be able to conditionally + // execute Successor #1 if it branches to successor #0. + TerminatorInst *Succ1TI = BI->getSuccessor(1)->getTerminator(); + if (Succ1TI->getNumSuccessors() == 1 && + Succ1TI->getSuccessor(0) == BI->getSuccessor(0)) + if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1))) + return SimplifyCFG(BB, TTI, TD) | true; + } + + // If this is a branch on a phi node in the current block, thread control + // through this block if any PHI node entries are constants. + if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition())) + if (PN->getParent() == BI->getParent()) + if (FoldCondBranchOnPHI(BI, TD)) + return SimplifyCFG(BB, TTI, TD) | true; + + // Scan predecessor blocks for conditional branches. + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator())) + if (PBI != BI && PBI->isConditional()) + if (SimplifyCondBranchToCondBranch(PBI, BI)) + return SimplifyCFG(BB, TTI, TD) | true; + + return false; +} + +/// Check if passing a value to an instruction will cause undefined behavior. +static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) { + Constant *C = dyn_cast<Constant>(V); + if (!C) + return false; + + if (I->use_empty()) + return false; + + if (C->isNullValue()) { + // Only look at the first use, avoid hurting compile time with long uselists + User *Use = *I->use_begin(); + + // Now make sure that there are no instructions in between that can alter + // control flow (eg. calls) + for (BasicBlock::iterator i = ++BasicBlock::iterator(I); &*i != Use; ++i) + if (i == I->getParent()->end() || i->mayHaveSideEffects()) + return false; + + // Look through GEPs. A load from a GEP derived from NULL is still undefined + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Use)) + if (GEP->getPointerOperand() == I) + return passingValueIsAlwaysUndefined(V, GEP); + + // Look through bitcasts. + if (BitCastInst *BC = dyn_cast<BitCastInst>(Use)) + return passingValueIsAlwaysUndefined(V, BC); + + // Load from null is undefined. + if (LoadInst *LI = dyn_cast<LoadInst>(Use)) + if (!LI->isVolatile()) + return LI->getPointerAddressSpace() == 0; + + // Store to null is undefined. + if (StoreInst *SI = dyn_cast<StoreInst>(Use)) + if (!SI->isVolatile()) + return SI->getPointerAddressSpace() == 0 && SI->getPointerOperand() == I; + } + return false; +} + +/// If BB has an incoming value that will always trigger undefined behavior +/// (eg. null pointer dereference), remove the branch leading here. +static bool removeUndefIntroducingPredecessor(BasicBlock *BB) { + for (BasicBlock::iterator i = BB->begin(); + PHINode *PHI = dyn_cast<PHINode>(i); ++i) + for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) + if (passingValueIsAlwaysUndefined(PHI->getIncomingValue(i), PHI)) { + TerminatorInst *T = PHI->getIncomingBlock(i)->getTerminator(); + IRBuilder<> Builder(T); + if (BranchInst *BI = dyn_cast<BranchInst>(T)) { + BB->removePredecessor(PHI->getIncomingBlock(i)); + // Turn uncoditional branches into unreachables and remove the dead + // destination from conditional branches. + if (BI->isUnconditional()) + Builder.CreateUnreachable(); + else + Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1) : + BI->getSuccessor(0)); + BI->eraseFromParent(); + return true; + } + // TODO: SwitchInst. + } + + return false; +} + +bool SimplifyCFGOpt::run(BasicBlock *BB) { + bool Changed = false; + + assert(BB && BB->getParent() && "Block not embedded in function!"); + assert(BB->getTerminator() && "Degenerate basic block encountered!"); + + // Remove basic blocks that have no predecessors (except the entry block)... + // or that just have themself as a predecessor. These are unreachable. + if ((pred_begin(BB) == pred_end(BB) && + BB != &BB->getParent()->getEntryBlock()) || + BB->getSinglePredecessor() == BB) { + DEBUG(dbgs() << "Removing BB: \n" << *BB); + DeleteDeadBlock(BB); + return true; + } + + // Check to see if we can constant propagate this terminator instruction + // away... + Changed |= ConstantFoldTerminator(BB, true); + + // Check for and eliminate duplicate PHI nodes in this block. + Changed |= EliminateDuplicatePHINodes(BB); + + // Check for and remove branches that will always cause undefined behavior. + Changed |= removeUndefIntroducingPredecessor(BB); + + // Merge basic blocks into their predecessor if there is only one distinct + // pred, and if there is only one distinct successor of the predecessor, and + // if there are no PHI nodes. + // + if (MergeBlockIntoPredecessor(BB)) + return true; + + IRBuilder<> Builder(BB); + + // If there is a trivial two-entry PHI node in this basic block, and we can + // eliminate it, do so now. + if (PHINode *PN = dyn_cast<PHINode>(BB->begin())) + if (PN->getNumIncomingValues() == 2) + Changed |= FoldTwoEntryPHINode(PN, TD); + + Builder.SetInsertPoint(BB->getTerminator()); + if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) { + if (BI->isUnconditional()) { + if (SimplifyUncondBranch(BI, Builder)) return true; + } else { + if (SimplifyCondBranch(BI, Builder)) return true; + } + } else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { + if (SimplifyReturn(RI, Builder)) return true; + } else if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) { + if (SimplifyResume(RI, Builder)) return true; + } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) { + if (SimplifySwitch(SI, Builder)) return true; + } else if (UnreachableInst *UI = + dyn_cast<UnreachableInst>(BB->getTerminator())) { + if (SimplifyUnreachable(UI)) return true; + } else if (IndirectBrInst *IBI = + dyn_cast<IndirectBrInst>(BB->getTerminator())) { + if (SimplifyIndirectBr(IBI)) return true; + } + + return Changed; +} + +/// SimplifyCFG - This function is used to do simplification of a CFG. For +/// example, it adjusts branches to branches to eliminate the extra hop, it +/// eliminates unreachable basic blocks, and does other "peephole" optimization +/// of the CFG. It returns true if a modification was made. +/// +bool llvm::SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, + const DataLayout *TD) { + return SimplifyCFGOpt(TTI, TD).run(BB); +} diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp new file mode 100644 index 000000000000..bf3442aeaaad --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -0,0 +1,393 @@ +//===-- SimplifyIndVar.cpp - Induction variable simplification ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements induction variable simplification. It does +// not define any actual pass or policy, but provides a single function to +// simplify a loop's induction variables based on ScalarEvolution. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "indvars" + +#include "llvm/Transforms/Utils/SimplifyIndVar.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/IVUsers.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +STATISTIC(NumElimIdentity, "Number of IV identities eliminated"); +STATISTIC(NumElimOperand, "Number of IV operands folded into a use"); +STATISTIC(NumElimRem , "Number of IV remainder operations eliminated"); +STATISTIC(NumElimCmp , "Number of IV comparisons eliminated"); + +namespace { + /// SimplifyIndvar - This is a utility for simplifying induction variables + /// based on ScalarEvolution. It is the primary instrument of the + /// IndvarSimplify pass, but it may also be directly invoked to cleanup after + /// other loop passes that preserve SCEV. + class SimplifyIndvar { + Loop *L; + LoopInfo *LI; + ScalarEvolution *SE; + const DataLayout *TD; // May be NULL + + SmallVectorImpl<WeakVH> &DeadInsts; + + bool Changed; + + public: + SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, LPPassManager *LPM, + SmallVectorImpl<WeakVH> &Dead, IVUsers *IVU = NULL) : + L(Loop), + LI(LPM->getAnalysisIfAvailable<LoopInfo>()), + SE(SE), + TD(LPM->getAnalysisIfAvailable<DataLayout>()), + DeadInsts(Dead), + Changed(false) { + assert(LI && "IV simplification requires LoopInfo"); + } + + bool hasChanged() const { return Changed; } + + /// Iteratively perform simplification on a worklist of users of the + /// specified induction variable. This is the top-level driver that applies + /// all simplicitions to users of an IV. + void simplifyUsers(PHINode *CurrIV, IVVisitor *V = NULL); + + Value *foldIVUser(Instruction *UseInst, Instruction *IVOperand); + + bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand); + void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand); + void eliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand, + bool IsSigned); + }; +} + +/// foldIVUser - Fold an IV operand into its use. This removes increments of an +/// aligned IV when used by a instruction that ignores the low bits. +/// +/// IVOperand is guaranteed SCEVable, but UseInst may not be. +/// +/// Return the operand of IVOperand for this induction variable if IVOperand can +/// be folded (in case more folding opportunities have been exposed). +/// Otherwise return null. +Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) { + Value *IVSrc = 0; + unsigned OperIdx = 0; + const SCEV *FoldedExpr = 0; + switch (UseInst->getOpcode()) { + default: + return 0; + case Instruction::UDiv: + case Instruction::LShr: + // We're only interested in the case where we know something about + // the numerator and have a constant denominator. + if (IVOperand != UseInst->getOperand(OperIdx) || + !isa<ConstantInt>(UseInst->getOperand(1))) + return 0; + + // Attempt to fold a binary operator with constant operand. + // e.g. ((I + 1) >> 2) => I >> 2 + if (!isa<BinaryOperator>(IVOperand) + || !isa<ConstantInt>(IVOperand->getOperand(1))) + return 0; + + IVSrc = IVOperand->getOperand(0); + // IVSrc must be the (SCEVable) IV, since the other operand is const. + assert(SE->isSCEVable(IVSrc->getType()) && "Expect SCEVable IV operand"); + + ConstantInt *D = cast<ConstantInt>(UseInst->getOperand(1)); + if (UseInst->getOpcode() == Instruction::LShr) { + // Get a constant for the divisor. See createSCEV. + uint32_t BitWidth = cast<IntegerType>(UseInst->getType())->getBitWidth(); + if (D->getValue().uge(BitWidth)) + return 0; + + D = ConstantInt::get(UseInst->getContext(), + APInt::getOneBitSet(BitWidth, D->getZExtValue())); + } + FoldedExpr = SE->getUDivExpr(SE->getSCEV(IVSrc), SE->getSCEV(D)); + } + // We have something that might fold it's operand. Compare SCEVs. + if (!SE->isSCEVable(UseInst->getType())) + return 0; + + // Bypass the operand if SCEV can prove it has no effect. + if (SE->getSCEV(UseInst) != FoldedExpr) + return 0; + + DEBUG(dbgs() << "INDVARS: Eliminated IV operand: " << *IVOperand + << " -> " << *UseInst << '\n'); + + UseInst->setOperand(OperIdx, IVSrc); + assert(SE->getSCEV(UseInst) == FoldedExpr && "bad SCEV with folded oper"); + + ++NumElimOperand; + Changed = true; + if (IVOperand->use_empty()) + DeadInsts.push_back(IVOperand); + return IVSrc; +} + +/// eliminateIVComparison - SimplifyIVUsers helper for eliminating useless +/// comparisons against an induction variable. +void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) { + unsigned IVOperIdx = 0; + ICmpInst::Predicate Pred = ICmp->getPredicate(); + if (IVOperand != ICmp->getOperand(0)) { + // Swapped + assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand"); + IVOperIdx = 1; + Pred = ICmpInst::getSwappedPredicate(Pred); + } + + // Get the SCEVs for the ICmp operands. + const SCEV *S = SE->getSCEV(ICmp->getOperand(IVOperIdx)); + const SCEV *X = SE->getSCEV(ICmp->getOperand(1 - IVOperIdx)); + + // Simplify unnecessary loops away. + const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent()); + S = SE->getSCEVAtScope(S, ICmpLoop); + X = SE->getSCEVAtScope(X, ICmpLoop); + + // If the condition is always true or always false, replace it with + // a constant value. + if (SE->isKnownPredicate(Pred, S, X)) + ICmp->replaceAllUsesWith(ConstantInt::getTrue(ICmp->getContext())); + else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X)) + ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext())); + else + return; + + DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n'); + ++NumElimCmp; + Changed = true; + DeadInsts.push_back(ICmp); +} + +/// eliminateIVRemainder - SimplifyIVUsers helper for eliminating useless +/// remainder operations operating on an induction variable. +void SimplifyIndvar::eliminateIVRemainder(BinaryOperator *Rem, + Value *IVOperand, + bool IsSigned) { + // We're only interested in the case where we know something about + // the numerator. + if (IVOperand != Rem->getOperand(0)) + return; + + // Get the SCEVs for the ICmp operands. + const SCEV *S = SE->getSCEV(Rem->getOperand(0)); + const SCEV *X = SE->getSCEV(Rem->getOperand(1)); + + // Simplify unnecessary loops away. + const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent()); + S = SE->getSCEVAtScope(S, ICmpLoop); + X = SE->getSCEVAtScope(X, ICmpLoop); + + // i % n --> i if i is in [0,n). + if ((!IsSigned || SE->isKnownNonNegative(S)) && + SE->isKnownPredicate(IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, + S, X)) + Rem->replaceAllUsesWith(Rem->getOperand(0)); + else { + // (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n). + const SCEV *LessOne = + SE->getMinusSCEV(S, SE->getConstant(S->getType(), 1)); + if (IsSigned && !SE->isKnownNonNegative(LessOne)) + return; + + if (!SE->isKnownPredicate(IsSigned ? + ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, + LessOne, X)) + return; + + ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ, + Rem->getOperand(0), Rem->getOperand(1)); + SelectInst *Sel = + SelectInst::Create(ICmp, + ConstantInt::get(Rem->getType(), 0), + Rem->getOperand(0), "tmp", Rem); + Rem->replaceAllUsesWith(Sel); + } + + DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); + ++NumElimRem; + Changed = true; + DeadInsts.push_back(Rem); +} + +/// eliminateIVUser - Eliminate an operation that consumes a simple IV and has +/// no observable side-effect given the range of IV values. +/// IVOperand is guaranteed SCEVable, but UseInst may not be. +bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst, + Instruction *IVOperand) { + if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) { + eliminateIVComparison(ICmp, IVOperand); + return true; + } + if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) { + bool IsSigned = Rem->getOpcode() == Instruction::SRem; + if (IsSigned || Rem->getOpcode() == Instruction::URem) { + eliminateIVRemainder(Rem, IVOperand, IsSigned); + return true; + } + } + + // Eliminate any operation that SCEV can prove is an identity function. + if (!SE->isSCEVable(UseInst->getType()) || + (UseInst->getType() != IVOperand->getType()) || + (SE->getSCEV(UseInst) != SE->getSCEV(IVOperand))) + return false; + + DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n'); + + UseInst->replaceAllUsesWith(IVOperand); + ++NumElimIdentity; + Changed = true; + DeadInsts.push_back(UseInst); + return true; +} + +/// pushIVUsers - Add all uses of Def to the current IV's worklist. +/// +static void pushIVUsers( + Instruction *Def, + SmallPtrSet<Instruction*,16> &Simplified, + SmallVectorImpl< std::pair<Instruction*,Instruction*> > &SimpleIVUsers) { + + for (Value::use_iterator UI = Def->use_begin(), E = Def->use_end(); + UI != E; ++UI) { + Instruction *User = cast<Instruction>(*UI); + + // Avoid infinite or exponential worklist processing. + // Also ensure unique worklist users. + // If Def is a LoopPhi, it may not be in the Simplified set, so check for + // self edges first. + if (User != Def && Simplified.insert(User)) + SimpleIVUsers.push_back(std::make_pair(User, Def)); + } +} + +/// isSimpleIVUser - Return true if this instruction generates a simple SCEV +/// expression in terms of that IV. +/// +/// This is similar to IVUsers' isInteresting() but processes each instruction +/// non-recursively when the operand is already known to be a simpleIVUser. +/// +static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) { + if (!SE->isSCEVable(I->getType())) + return false; + + // Get the symbolic expression for this instruction. + const SCEV *S = SE->getSCEV(I); + + // Only consider affine recurrences. + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S); + if (AR && AR->getLoop() == L) + return true; + + return false; +} + +/// simplifyUsers - Iteratively perform simplification on a worklist of users +/// of the specified induction variable. Each successive simplification may push +/// more users which may themselves be candidates for simplification. +/// +/// This algorithm does not require IVUsers analysis. Instead, it simplifies +/// instructions in-place during analysis. Rather than rewriting induction +/// variables bottom-up from their users, it transforms a chain of IVUsers +/// top-down, updating the IR only when it encouters a clear optimization +/// opportunitiy. +/// +/// Once DisableIVRewrite is default, LSR will be the only client of IVUsers. +/// +void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) { + if (!SE->isSCEVable(CurrIV->getType())) + return; + + // Instructions processed by SimplifyIndvar for CurrIV. + SmallPtrSet<Instruction*,16> Simplified; + + // Use-def pairs if IV users waiting to be processed for CurrIV. + SmallVector<std::pair<Instruction*, Instruction*>, 8> SimpleIVUsers; + + // Push users of the current LoopPhi. In rare cases, pushIVUsers may be + // called multiple times for the same LoopPhi. This is the proper thing to + // do for loop header phis that use each other. + pushIVUsers(CurrIV, Simplified, SimpleIVUsers); + + while (!SimpleIVUsers.empty()) { + std::pair<Instruction*, Instruction*> UseOper = + SimpleIVUsers.pop_back_val(); + // Bypass back edges to avoid extra work. + if (UseOper.first == CurrIV) continue; + + Instruction *IVOperand = UseOper.second; + for (unsigned N = 0; IVOperand; ++N) { + assert(N <= Simplified.size() && "runaway iteration"); + + Value *NewOper = foldIVUser(UseOper.first, IVOperand); + if (!NewOper) + break; // done folding + IVOperand = dyn_cast<Instruction>(NewOper); + } + if (!IVOperand) + continue; + + if (eliminateIVUser(UseOper.first, IVOperand)) { + pushIVUsers(IVOperand, Simplified, SimpleIVUsers); + continue; + } + CastInst *Cast = dyn_cast<CastInst>(UseOper.first); + if (V && Cast) { + V->visitCast(Cast); + continue; + } + if (isSimpleIVUser(UseOper.first, L, SE)) { + pushIVUsers(UseOper.first, Simplified, SimpleIVUsers); + } + } +} + +namespace llvm { + +void IVVisitor::anchor() { } + +/// simplifyUsersOfIV - Simplify instructions that use this induction variable +/// by using ScalarEvolution to analyze the IV's recurrence. +bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, LPPassManager *LPM, + SmallVectorImpl<WeakVH> &Dead, IVVisitor *V) +{ + LoopInfo *LI = &LPM->getAnalysis<LoopInfo>(); + SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, LPM, Dead); + SIV.simplifyUsers(CurrIV, V); + return SIV.hasChanged(); +} + +/// simplifyLoopIVs - Simplify users of induction variables within this +/// loop. This does not actually change or add IVs. +bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, LPPassManager *LPM, + SmallVectorImpl<WeakVH> &Dead) { + bool Changed = false; + for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { + Changed |= simplifyUsersOfIV(cast<PHINode>(I), SE, LPM, Dead); + } + return Changed; +} + +} // namespace llvm diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp new file mode 100644 index 000000000000..f9687e4d5890 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp @@ -0,0 +1,100 @@ +//===------ SimplifyInstructions.cpp - Remove redundant instructions ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is a utility pass used for testing the InstructionSimplify analysis. +// The analysis is applied to every instruction, and if it simplifies then the +// instruction is replaced by the simplification. If you are looking for a pass +// that performs serious instruction folding, use the instcombine pass instead. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "instsimplify" +#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Type.h" +#include "llvm/Pass.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Transforms/Utils/Local.h" +using namespace llvm; + +STATISTIC(NumSimplified, "Number of redundant instructions removed"); + +namespace { + struct InstSimplifier : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + InstSimplifier() : FunctionPass(ID) { + initializeInstSimplifierPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<TargetLibraryInfo>(); + } + + /// runOnFunction - Remove instructions that simplify. + bool runOnFunction(Function &F) { + const DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>(); + const DataLayout *TD = getAnalysisIfAvailable<DataLayout>(); + const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>(); + SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2; + bool Changed = false; + + do { + for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()), + DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) + for (BasicBlock::iterator BI = DI->begin(), BE = DI->end(); BI != BE;) { + Instruction *I = BI++; + // The first time through the loop ToSimplify is empty and we try to + // simplify all instructions. On later iterations ToSimplify is not + // empty and we only bother simplifying instructions that are in it. + if (!ToSimplify->empty() && !ToSimplify->count(I)) + continue; + // Don't waste time simplifying unused instructions. + if (!I->use_empty()) + if (Value *V = SimplifyInstruction(I, TD, TLI, DT)) { + // Mark all uses for resimplification next time round the loop. + for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); + UI != UE; ++UI) + Next->insert(cast<Instruction>(*UI)); + I->replaceAllUsesWith(V); + ++NumSimplified; + Changed = true; + } + Changed |= RecursivelyDeleteTriviallyDeadInstructions(I, TLI); + } + + // Place the list of instructions to simplify on the next loop iteration + // into ToSimplify. + std::swap(ToSimplify, Next); + Next->clear(); + } while (!ToSimplify->empty()); + + return Changed; + } + }; +} + +char InstSimplifier::ID = 0; +INITIALIZE_PASS_BEGIN(InstSimplifier, "instsimplify", + "Remove redundant instructions", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_PASS_END(InstSimplifier, "instsimplify", + "Remove redundant instructions", false, false) +char &llvm::InstructionSimplifierID = InstSimplifier::ID; + +// Public interface to the simplify instructions pass. +FunctionPass *llvm::createInstructionSimplifierPass() { + return new InstSimplifier(); +} diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp new file mode 100644 index 000000000000..15b3e66f94ad --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -0,0 +1,2253 @@ +//===------ SimplifyLibCalls.cpp - Library calls simplifier ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is a utility pass used for testing the InstructionSimplify analysis. +// The analysis is applied to every instruction, and if it simplifies then the +// instruction is replaced by the simplification. If you are looking for a pass +// that performs serious instruction folding, use the instcombine pass instead. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/SimplifyLibCalls.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Transforms/Utils/BuildLibCalls.h" + +using namespace llvm; + +static cl::opt<bool> +ColdErrorCalls("error-reporting-is-cold", cl::init(true), + cl::Hidden, cl::desc("Treat error-reporting calls as cold")); + +/// This class is the abstract base class for the set of optimizations that +/// corresponds to one library call. +namespace { +class LibCallOptimization { +protected: + Function *Caller; + const DataLayout *TD; + const TargetLibraryInfo *TLI; + const LibCallSimplifier *LCS; + LLVMContext* Context; +public: + LibCallOptimization() { } + virtual ~LibCallOptimization() {} + + /// callOptimizer - This pure virtual method is implemented by base classes to + /// do various optimizations. If this returns null then no transformation was + /// performed. If it returns CI, then it transformed the call and CI is to be + /// deleted. If it returns something else, replace CI with the new value and + /// delete CI. + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) + =0; + + /// ignoreCallingConv - Returns false if this transformation could possibly + /// change the calling convention. + virtual bool ignoreCallingConv() { return false; } + + Value *optimizeCall(CallInst *CI, const DataLayout *TD, + const TargetLibraryInfo *TLI, + const LibCallSimplifier *LCS, IRBuilder<> &B) { + Caller = CI->getParent()->getParent(); + this->TD = TD; + this->TLI = TLI; + this->LCS = LCS; + if (CI->getCalledFunction()) + Context = &CI->getCalledFunction()->getContext(); + + // We never change the calling convention. + if (!ignoreCallingConv() && CI->getCallingConv() != llvm::CallingConv::C) + return NULL; + + return callOptimizer(CI->getCalledFunction(), CI, B); + } +}; + +//===----------------------------------------------------------------------===// +// Helper Functions +//===----------------------------------------------------------------------===// + +/// isOnlyUsedInZeroEqualityComparison - Return true if it only matters that the +/// value is equal or not-equal to zero. +static bool isOnlyUsedInZeroEqualityComparison(Value *V) { + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); + UI != E; ++UI) { + if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI)) + if (IC->isEquality()) + if (Constant *C = dyn_cast<Constant>(IC->getOperand(1))) + if (C->isNullValue()) + continue; + // Unknown instruction. + return false; + } + return true; +} + +/// isOnlyUsedInEqualityComparison - Return true if it is only used in equality +/// comparisons with With. +static bool isOnlyUsedInEqualityComparison(Value *V, Value *With) { + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); + UI != E; ++UI) { + if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI)) + if (IC->isEquality() && IC->getOperand(1) == With) + continue; + // Unknown instruction. + return false; + } + return true; +} + +static bool callHasFloatingPointArgument(const CallInst *CI) { + for (CallInst::const_op_iterator it = CI->op_begin(), e = CI->op_end(); + it != e; ++it) { + if ((*it)->getType()->isFloatingPointTy()) + return true; + } + return false; +} + +/// \brief Check whether the overloaded unary floating point function +/// corresponing to \a Ty is available. +static bool hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty, + LibFunc::Func DoubleFn, LibFunc::Func FloatFn, + LibFunc::Func LongDoubleFn) { + switch (Ty->getTypeID()) { + case Type::FloatTyID: + return TLI->has(FloatFn); + case Type::DoubleTyID: + return TLI->has(DoubleFn); + default: + return TLI->has(LongDoubleFn); + } +} + +//===----------------------------------------------------------------------===// +// Fortified Library Call Optimizations +//===----------------------------------------------------------------------===// + +struct FortifiedLibCallOptimization : public LibCallOptimization { +protected: + virtual bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, + bool isString) const = 0; +}; + +struct InstFortifiedLibCallOptimization : public FortifiedLibCallOptimization { + CallInst *CI; + + bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, bool isString) const { + if (CI->getArgOperand(SizeCIOp) == CI->getArgOperand(SizeArgOp)) + return true; + if (ConstantInt *SizeCI = + dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) { + if (SizeCI->isAllOnesValue()) + return true; + if (isString) { + uint64_t Len = GetStringLength(CI->getArgOperand(SizeArgOp)); + // If the length is 0 we don't know how long it is and so we can't + // remove the check. + if (Len == 0) return false; + return SizeCI->getZExtValue() >= Len; + } + if (ConstantInt *Arg = dyn_cast<ConstantInt>( + CI->getArgOperand(SizeArgOp))) + return SizeCI->getZExtValue() >= Arg->getZExtValue(); + } + return false; + } +}; + +struct MemCpyChkOpt : public InstFortifiedLibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + this->CI = CI; + FunctionType *FT = Callee->getFunctionType(); + LLVMContext &Context = CI->getParent()->getContext(); + + // Check if this has the right signature. + if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + FT->getParamType(2) != TD->getIntPtrType(Context) || + FT->getParamType(3) != TD->getIntPtrType(Context)) + return 0; + + if (isFoldable(3, 2, false)) { + B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1); + return CI->getArgOperand(0); + } + return 0; + } +}; + +struct MemMoveChkOpt : public InstFortifiedLibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + this->CI = CI; + FunctionType *FT = Callee->getFunctionType(); + LLVMContext &Context = CI->getParent()->getContext(); + + // Check if this has the right signature. + if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + FT->getParamType(2) != TD->getIntPtrType(Context) || + FT->getParamType(3) != TD->getIntPtrType(Context)) + return 0; + + if (isFoldable(3, 2, false)) { + B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1); + return CI->getArgOperand(0); + } + return 0; + } +}; + +struct MemSetChkOpt : public InstFortifiedLibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + this->CI = CI; + FunctionType *FT = Callee->getFunctionType(); + LLVMContext &Context = CI->getParent()->getContext(); + + // Check if this has the right signature. + if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isIntegerTy() || + FT->getParamType(2) != TD->getIntPtrType(Context) || + FT->getParamType(3) != TD->getIntPtrType(Context)) + return 0; + + if (isFoldable(3, 2, false)) { + Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), + false); + B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1); + return CI->getArgOperand(0); + } + return 0; + } +}; + +struct StrCpyChkOpt : public InstFortifiedLibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + this->CI = CI; + StringRef Name = Callee->getName(); + FunctionType *FT = Callee->getFunctionType(); + LLVMContext &Context = CI->getParent()->getContext(); + + // Check if this has the right signature. + if (FT->getNumParams() != 3 || + FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != Type::getInt8PtrTy(Context) || + FT->getParamType(2) != TD->getIntPtrType(Context)) + return 0; + + Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); + if (Dst == Src) // __strcpy_chk(x,x) -> x + return Src; + + // If a) we don't have any length information, or b) we know this will + // fit then just lower to a plain strcpy. Otherwise we'll keep our + // strcpy_chk call which may fail at runtime if the size is too long. + // TODO: It might be nice to get a maximum length out of the possible + // string lengths for varying. + if (isFoldable(2, 1, true)) { + Value *Ret = EmitStrCpy(Dst, Src, B, TD, TLI, Name.substr(2, 6)); + return Ret; + } else { + // Maybe we can stil fold __strcpy_chk to __memcpy_chk. + uint64_t Len = GetStringLength(Src); + if (Len == 0) return 0; + + // This optimization require DataLayout. + if (!TD) return 0; + + Value *Ret = + EmitMemCpyChk(Dst, Src, + ConstantInt::get(TD->getIntPtrType(Context), Len), + CI->getArgOperand(2), B, TD, TLI); + return Ret; + } + return 0; + } +}; + +struct StpCpyChkOpt : public InstFortifiedLibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + this->CI = CI; + StringRef Name = Callee->getName(); + FunctionType *FT = Callee->getFunctionType(); + LLVMContext &Context = CI->getParent()->getContext(); + + // Check if this has the right signature. + if (FT->getNumParams() != 3 || + FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != Type::getInt8PtrTy(Context) || + FT->getParamType(2) != TD->getIntPtrType(FT->getParamType(0))) + return 0; + + Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); + if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x) + Value *StrLen = EmitStrLen(Src, B, TD, TLI); + return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : 0; + } + + // If a) we don't have any length information, or b) we know this will + // fit then just lower to a plain stpcpy. Otherwise we'll keep our + // stpcpy_chk call which may fail at runtime if the size is too long. + // TODO: It might be nice to get a maximum length out of the possible + // string lengths for varying. + if (isFoldable(2, 1, true)) { + Value *Ret = EmitStrCpy(Dst, Src, B, TD, TLI, Name.substr(2, 6)); + return Ret; + } else { + // Maybe we can stil fold __stpcpy_chk to __memcpy_chk. + uint64_t Len = GetStringLength(Src); + if (Len == 0) return 0; + + // This optimization require DataLayout. + if (!TD) return 0; + + Type *PT = FT->getParamType(0); + Value *LenV = ConstantInt::get(TD->getIntPtrType(PT), Len); + Value *DstEnd = B.CreateGEP(Dst, + ConstantInt::get(TD->getIntPtrType(PT), + Len - 1)); + if (!EmitMemCpyChk(Dst, Src, LenV, CI->getArgOperand(2), B, TD, TLI)) + return 0; + return DstEnd; + } + return 0; + } +}; + +struct StrNCpyChkOpt : public InstFortifiedLibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + this->CI = CI; + StringRef Name = Callee->getName(); + FunctionType *FT = Callee->getFunctionType(); + LLVMContext &Context = CI->getParent()->getContext(); + + // Check if this has the right signature. + if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != Type::getInt8PtrTy(Context) || + !FT->getParamType(2)->isIntegerTy() || + FT->getParamType(3) != TD->getIntPtrType(Context)) + return 0; + + if (isFoldable(3, 2, false)) { + Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), B, TD, TLI, + Name.substr(2, 7)); + return Ret; + } + return 0; + } +}; + +//===----------------------------------------------------------------------===// +// String and Memory Library Call Optimizations +//===----------------------------------------------------------------------===// + +struct StrCatOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "strcat" function prototype. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + FT->getReturnType() != B.getInt8PtrTy() || + FT->getParamType(0) != FT->getReturnType() || + FT->getParamType(1) != FT->getReturnType()) + return 0; + + // Extract some information from the instruction + Value *Dst = CI->getArgOperand(0); + Value *Src = CI->getArgOperand(1); + + // See if we can get the length of the input string. + uint64_t Len = GetStringLength(Src); + if (Len == 0) return 0; + --Len; // Unbias length. + + // Handle the simple, do-nothing case: strcat(x, "") -> x + if (Len == 0) + return Dst; + + // These optimizations require DataLayout. + if (!TD) return 0; + + return emitStrLenMemCpy(Src, Dst, Len, B); + } + + Value *emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, + IRBuilder<> &B) { + // We need to find the end of the destination string. That's where the + // memory is to be moved to. We just generate a call to strlen. + Value *DstLen = EmitStrLen(Dst, B, TD, TLI); + if (!DstLen) + return 0; + + // Now that we have the destination's length, we must index into the + // destination's pointer to get the actual memcpy destination (end of + // the string .. we're concatenating). + Value *CpyDst = B.CreateGEP(Dst, DstLen, "endptr"); + + // We have enough information to now generate the memcpy call to do the + // concatenation for us. Make a memcpy to copy the nul byte with align = 1. + B.CreateMemCpy(CpyDst, Src, + ConstantInt::get(TD->getIntPtrType(*Context), Len + 1), 1); + return Dst; + } +}; + +struct StrNCatOpt : public StrCatOpt { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "strncat" function prototype. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || + FT->getReturnType() != B.getInt8PtrTy() || + FT->getParamType(0) != FT->getReturnType() || + FT->getParamType(1) != FT->getReturnType() || + !FT->getParamType(2)->isIntegerTy()) + return 0; + + // Extract some information from the instruction + Value *Dst = CI->getArgOperand(0); + Value *Src = CI->getArgOperand(1); + uint64_t Len; + + // We don't do anything if length is not constant + if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2))) + Len = LengthArg->getZExtValue(); + else + return 0; + + // See if we can get the length of the input string. + uint64_t SrcLen = GetStringLength(Src); + if (SrcLen == 0) return 0; + --SrcLen; // Unbias length. + + // Handle the simple, do-nothing cases: + // strncat(x, "", c) -> x + // strncat(x, c, 0) -> x + if (SrcLen == 0 || Len == 0) return Dst; + + // These optimizations require DataLayout. + if (!TD) return 0; + + // We don't optimize this case + if (Len < SrcLen) return 0; + + // strncat(x, s, c) -> strcat(x, s) + // s is constant so the strcat can be optimized further + return emitStrLenMemCpy(Src, Dst, SrcLen, B); + } +}; + +struct StrChrOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "strchr" function prototype. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + FT->getReturnType() != B.getInt8PtrTy() || + FT->getParamType(0) != FT->getReturnType() || + !FT->getParamType(1)->isIntegerTy(32)) + return 0; + + Value *SrcStr = CI->getArgOperand(0); + + // If the second operand is non-constant, see if we can compute the length + // of the input string and turn this into memchr. + ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); + if (CharC == 0) { + // These optimizations require DataLayout. + if (!TD) return 0; + + uint64_t Len = GetStringLength(SrcStr); + if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32))// memchr needs i32. + return 0; + + return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul. + ConstantInt::get(TD->getIntPtrType(*Context), Len), + B, TD, TLI); + } + + // Otherwise, the character is a constant, see if the first argument is + // a string literal. If so, we can constant fold. + StringRef Str; + if (!getConstantStringInfo(SrcStr, Str)) + return 0; + + // Compute the offset, make sure to handle the case when we're searching for + // zero (a weird way to spell strlen). + size_t I = (0xFF & CharC->getSExtValue()) == 0 ? + Str.size() : Str.find(CharC->getSExtValue()); + if (I == StringRef::npos) // Didn't find the char. strchr returns null. + return Constant::getNullValue(CI->getType()); + + // strchr(s+n,c) -> gep(s+n+i,c) + return B.CreateGEP(SrcStr, B.getInt64(I), "strchr"); + } +}; + +struct StrRChrOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "strrchr" function prototype. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + FT->getReturnType() != B.getInt8PtrTy() || + FT->getParamType(0) != FT->getReturnType() || + !FT->getParamType(1)->isIntegerTy(32)) + return 0; + + Value *SrcStr = CI->getArgOperand(0); + ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); + + // Cannot fold anything if we're not looking for a constant. + if (!CharC) + return 0; + + StringRef Str; + if (!getConstantStringInfo(SrcStr, Str)) { + // strrchr(s, 0) -> strchr(s, 0) + if (TD && CharC->isZero()) + return EmitStrChr(SrcStr, '\0', B, TD, TLI); + return 0; + } + + // Compute the offset. + size_t I = (0xFF & CharC->getSExtValue()) == 0 ? + Str.size() : Str.rfind(CharC->getSExtValue()); + if (I == StringRef::npos) // Didn't find the char. Return null. + return Constant::getNullValue(CI->getType()); + + // strrchr(s+n,c) -> gep(s+n+i,c) + return B.CreateGEP(SrcStr, B.getInt64(I), "strrchr"); + } +}; + +struct StrCmpOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "strcmp" function prototype. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + !FT->getReturnType()->isIntegerTy(32) || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != B.getInt8PtrTy()) + return 0; + + Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1); + if (Str1P == Str2P) // strcmp(x,x) -> 0 + return ConstantInt::get(CI->getType(), 0); + + StringRef Str1, Str2; + bool HasStr1 = getConstantStringInfo(Str1P, Str1); + bool HasStr2 = getConstantStringInfo(Str2P, Str2); + + // strcmp(x, y) -> cnst (if both x and y are constant strings) + if (HasStr1 && HasStr2) + return ConstantInt::get(CI->getType(), Str1.compare(Str2)); + + if (HasStr1 && Str1.empty()) // strcmp("", x) -> -*x + return B.CreateNeg(B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), + CI->getType())); + + if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x + return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType()); + + // strcmp(P, "x") -> memcmp(P, "x", 2) + uint64_t Len1 = GetStringLength(Str1P); + uint64_t Len2 = GetStringLength(Str2P); + if (Len1 && Len2) { + // These optimizations require DataLayout. + if (!TD) return 0; + + return EmitMemCmp(Str1P, Str2P, + ConstantInt::get(TD->getIntPtrType(*Context), + std::min(Len1, Len2)), B, TD, TLI); + } + + return 0; + } +}; + +struct StrNCmpOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "strncmp" function prototype. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || + !FT->getReturnType()->isIntegerTy(32) || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != B.getInt8PtrTy() || + !FT->getParamType(2)->isIntegerTy()) + return 0; + + Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1); + if (Str1P == Str2P) // strncmp(x,x,n) -> 0 + return ConstantInt::get(CI->getType(), 0); + + // Get the length argument if it is constant. + uint64_t Length; + if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2))) + Length = LengthArg->getZExtValue(); + else + return 0; + + if (Length == 0) // strncmp(x,y,0) -> 0 + return ConstantInt::get(CI->getType(), 0); + + if (TD && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1) + return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD, TLI); + + StringRef Str1, Str2; + bool HasStr1 = getConstantStringInfo(Str1P, Str1); + bool HasStr2 = getConstantStringInfo(Str2P, Str2); + + // strncmp(x, y) -> cnst (if both x and y are constant strings) + if (HasStr1 && HasStr2) { + StringRef SubStr1 = Str1.substr(0, Length); + StringRef SubStr2 = Str2.substr(0, Length); + return ConstantInt::get(CI->getType(), SubStr1.compare(SubStr2)); + } + + if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> -*x + return B.CreateNeg(B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), + CI->getType())); + + if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x + return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType()); + + return 0; + } +}; + +struct StrCpyOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "strcpy" function prototype. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != B.getInt8PtrTy()) + return 0; + + Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); + if (Dst == Src) // strcpy(x,x) -> x + return Src; + + // These optimizations require DataLayout. + if (!TD) return 0; + + // See if we can get the length of the input string. + uint64_t Len = GetStringLength(Src); + if (Len == 0) return 0; + + // We have enough information to now generate the memcpy call to do the + // copy for us. Make a memcpy to copy the nul byte with align = 1. + B.CreateMemCpy(Dst, Src, + ConstantInt::get(TD->getIntPtrType(*Context), Len), 1); + return Dst; + } +}; + +struct StpCpyOpt: public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "stpcpy" function prototype. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != B.getInt8PtrTy()) + return 0; + + // These optimizations require DataLayout. + if (!TD) return 0; + + Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); + if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x) + Value *StrLen = EmitStrLen(Src, B, TD, TLI); + return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : 0; + } + + // See if we can get the length of the input string. + uint64_t Len = GetStringLength(Src); + if (Len == 0) return 0; + + Type *PT = FT->getParamType(0); + Value *LenV = ConstantInt::get(TD->getIntPtrType(PT), Len); + Value *DstEnd = B.CreateGEP(Dst, + ConstantInt::get(TD->getIntPtrType(PT), + Len - 1)); + + // We have enough information to now generate the memcpy call to do the + // copy for us. Make a memcpy to copy the nul byte with align = 1. + B.CreateMemCpy(Dst, Src, LenV, 1); + return DstEnd; + } +}; + +struct StrNCpyOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != B.getInt8PtrTy() || + !FT->getParamType(2)->isIntegerTy()) + return 0; + + Value *Dst = CI->getArgOperand(0); + Value *Src = CI->getArgOperand(1); + Value *LenOp = CI->getArgOperand(2); + + // See if we can get the length of the input string. + uint64_t SrcLen = GetStringLength(Src); + if (SrcLen == 0) return 0; + --SrcLen; + + if (SrcLen == 0) { + // strncpy(x, "", y) -> memset(x, '\0', y, 1) + B.CreateMemSet(Dst, B.getInt8('\0'), LenOp, 1); + return Dst; + } + + uint64_t Len; + if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(LenOp)) + Len = LengthArg->getZExtValue(); + else + return 0; + + if (Len == 0) return Dst; // strncpy(x, y, 0) -> x + + // These optimizations require DataLayout. + if (!TD) return 0; + + // Let strncpy handle the zero padding + if (Len > SrcLen+1) return 0; + + Type *PT = FT->getParamType(0); + // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant] + B.CreateMemCpy(Dst, Src, + ConstantInt::get(TD->getIntPtrType(PT), Len), 1); + + return Dst; + } +}; + +struct StrLenOpt : public LibCallOptimization { + virtual bool ignoreCallingConv() { return true; } + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 1 || + FT->getParamType(0) != B.getInt8PtrTy() || + !FT->getReturnType()->isIntegerTy()) + return 0; + + Value *Src = CI->getArgOperand(0); + + // Constant folding: strlen("xyz") -> 3 + if (uint64_t Len = GetStringLength(Src)) + return ConstantInt::get(CI->getType(), Len-1); + + // strlen(x) != 0 --> *x != 0 + // strlen(x) == 0 --> *x == 0 + if (isOnlyUsedInZeroEqualityComparison(CI)) + return B.CreateZExt(B.CreateLoad(Src, "strlenfirst"), CI->getType()); + return 0; + } +}; + +struct StrPBrkOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + FT->getParamType(0) != B.getInt8PtrTy() || + FT->getParamType(1) != FT->getParamType(0) || + FT->getReturnType() != FT->getParamType(0)) + return 0; + + StringRef S1, S2; + bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1); + bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2); + + // strpbrk(s, "") -> NULL + // strpbrk("", s) -> NULL + if ((HasS1 && S1.empty()) || (HasS2 && S2.empty())) + return Constant::getNullValue(CI->getType()); + + // Constant folding. + if (HasS1 && HasS2) { + size_t I = S1.find_first_of(S2); + if (I == StringRef::npos) // No match. + return Constant::getNullValue(CI->getType()); + + return B.CreateGEP(CI->getArgOperand(0), B.getInt64(I), "strpbrk"); + } + + // strpbrk(s, "a") -> strchr(s, 'a') + if (TD && HasS2 && S2.size() == 1) + return EmitStrChr(CI->getArgOperand(0), S2[0], B, TD, TLI); + + return 0; + } +}; + +struct StrToOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if ((FT->getNumParams() != 2 && FT->getNumParams() != 3) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy()) + return 0; + + Value *EndPtr = CI->getArgOperand(1); + if (isa<ConstantPointerNull>(EndPtr)) { + // With a null EndPtr, this function won't capture the main argument. + // It would be readonly too, except that it still may write to errno. + CI->addAttribute(1, Attribute::NoCapture); + } + + return 0; + } +}; + +struct StrSpnOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + FT->getParamType(0) != B.getInt8PtrTy() || + FT->getParamType(1) != FT->getParamType(0) || + !FT->getReturnType()->isIntegerTy()) + return 0; + + StringRef S1, S2; + bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1); + bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2); + + // strspn(s, "") -> 0 + // strspn("", s) -> 0 + if ((HasS1 && S1.empty()) || (HasS2 && S2.empty())) + return Constant::getNullValue(CI->getType()); + + // Constant folding. + if (HasS1 && HasS2) { + size_t Pos = S1.find_first_not_of(S2); + if (Pos == StringRef::npos) Pos = S1.size(); + return ConstantInt::get(CI->getType(), Pos); + } + + return 0; + } +}; + +struct StrCSpnOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + FT->getParamType(0) != B.getInt8PtrTy() || + FT->getParamType(1) != FT->getParamType(0) || + !FT->getReturnType()->isIntegerTy()) + return 0; + + StringRef S1, S2; + bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1); + bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2); + + // strcspn("", s) -> 0 + if (HasS1 && S1.empty()) + return Constant::getNullValue(CI->getType()); + + // Constant folding. + if (HasS1 && HasS2) { + size_t Pos = S1.find_first_of(S2); + if (Pos == StringRef::npos) Pos = S1.size(); + return ConstantInt::get(CI->getType(), Pos); + } + + // strcspn(s, "") -> strlen(s) + if (TD && HasS2 && S2.empty()) + return EmitStrLen(CI->getArgOperand(0), B, TD, TLI); + + return 0; + } +}; + +struct StrStrOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + !FT->getReturnType()->isPointerTy()) + return 0; + + // fold strstr(x, x) -> x. + if (CI->getArgOperand(0) == CI->getArgOperand(1)) + return B.CreateBitCast(CI->getArgOperand(0), CI->getType()); + + // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0 + if (TD && isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) { + Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD, TLI); + if (!StrLen) + return 0; + Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1), + StrLen, B, TD, TLI); + if (!StrNCmp) + return 0; + for (Value::use_iterator UI = CI->use_begin(), UE = CI->use_end(); + UI != UE; ) { + ICmpInst *Old = cast<ICmpInst>(*UI++); + Value *Cmp = B.CreateICmp(Old->getPredicate(), StrNCmp, + ConstantInt::getNullValue(StrNCmp->getType()), + "cmp"); + LCS->replaceAllUsesWith(Old, Cmp); + } + return CI; + } + + // See if either input string is a constant string. + StringRef SearchStr, ToFindStr; + bool HasStr1 = getConstantStringInfo(CI->getArgOperand(0), SearchStr); + bool HasStr2 = getConstantStringInfo(CI->getArgOperand(1), ToFindStr); + + // fold strstr(x, "") -> x. + if (HasStr2 && ToFindStr.empty()) + return B.CreateBitCast(CI->getArgOperand(0), CI->getType()); + + // If both strings are known, constant fold it. + if (HasStr1 && HasStr2) { + size_t Offset = SearchStr.find(ToFindStr); + + if (Offset == StringRef::npos) // strstr("foo", "bar") -> null + return Constant::getNullValue(CI->getType()); + + // strstr("abcd", "bc") -> gep((char*)"abcd", 1) + Value *Result = CastToCStr(CI->getArgOperand(0), B); + Result = B.CreateConstInBoundsGEP1_64(Result, Offset, "strstr"); + return B.CreateBitCast(Result, CI->getType()); + } + + // fold strstr(x, "y") -> strchr(x, 'y'). + if (HasStr2 && ToFindStr.size() == 1) { + Value *StrChr= EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TD, TLI); + return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : 0; + } + return 0; + } +}; + +struct MemCmpOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + !FT->getReturnType()->isIntegerTy(32)) + return 0; + + Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1); + + if (LHS == RHS) // memcmp(s,s,x) -> 0 + return Constant::getNullValue(CI->getType()); + + // Make sure we have a constant length. + ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2)); + if (!LenC) return 0; + uint64_t Len = LenC->getZExtValue(); + + if (Len == 0) // memcmp(s1,s2,0) -> 0 + return Constant::getNullValue(CI->getType()); + + // memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS + if (Len == 1) { + Value *LHSV = B.CreateZExt(B.CreateLoad(CastToCStr(LHS, B), "lhsc"), + CI->getType(), "lhsv"); + Value *RHSV = B.CreateZExt(B.CreateLoad(CastToCStr(RHS, B), "rhsc"), + CI->getType(), "rhsv"); + return B.CreateSub(LHSV, RHSV, "chardiff"); + } + + // Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant) + StringRef LHSStr, RHSStr; + if (getConstantStringInfo(LHS, LHSStr) && + getConstantStringInfo(RHS, RHSStr)) { + // Make sure we're not reading out-of-bounds memory. + if (Len > LHSStr.size() || Len > RHSStr.size()) + return 0; + // Fold the memcmp and normalize the result. This way we get consistent + // results across multiple platforms. + uint64_t Ret = 0; + int Cmp = memcmp(LHSStr.data(), RHSStr.data(), Len); + if (Cmp < 0) + Ret = -1; + else if (Cmp > 0) + Ret = 1; + return ConstantInt::get(CI->getType(), Ret); + } + + return 0; + } +}; + +struct MemCpyOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // These optimizations require DataLayout. + if (!TD) return 0; + + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + FT->getParamType(2) != TD->getIntPtrType(*Context)) + return 0; + + // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1) + B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1); + return CI->getArgOperand(0); + } +}; + +struct MemMoveOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // These optimizations require DataLayout. + if (!TD) return 0; + + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + FT->getParamType(2) != TD->getIntPtrType(*Context)) + return 0; + + // memmove(x, y, n) -> llvm.memmove(x, y, n, 1) + B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1); + return CI->getArgOperand(0); + } +}; + +struct MemSetOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // These optimizations require DataLayout. + if (!TD) return 0; + + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isIntegerTy() || + FT->getParamType(2) != TD->getIntPtrType(FT->getParamType(0))) + return 0; + + // memset(p, v, n) -> llvm.memset(p, v, n, 1) + Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false); + B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1); + return CI->getArgOperand(0); + } +}; + +//===----------------------------------------------------------------------===// +// Math Library Optimizations +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Double -> Float Shrinking Optimizations for Unary Functions like 'floor' + +struct UnaryDoubleFPOpt : public LibCallOptimization { + bool CheckRetType; + UnaryDoubleFPOpt(bool CheckReturnType): CheckRetType(CheckReturnType) {} + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() || + !FT->getParamType(0)->isDoubleTy()) + return 0; + + if (CheckRetType) { + // Check if all the uses for function like 'sin' are converted to float. + for (Value::use_iterator UseI = CI->use_begin(); UseI != CI->use_end(); + ++UseI) { + FPTruncInst *Cast = dyn_cast<FPTruncInst>(*UseI); + if (Cast == 0 || !Cast->getType()->isFloatTy()) + return 0; + } + } + + // If this is something like 'floor((double)floatval)', convert to floorf. + FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0)); + if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy()) + return 0; + + // floor((double)floatval) -> (double)floorf(floatval) + Value *V = Cast->getOperand(0); + V = EmitUnaryFloatFnCall(V, Callee->getName(), B, Callee->getAttributes()); + return B.CreateFPExt(V, B.getDoubleTy()); + } +}; + +struct UnsafeFPLibCallOptimization : public LibCallOptimization { + bool UnsafeFPShrink; + UnsafeFPLibCallOptimization(bool UnsafeFPShrink) { + this->UnsafeFPShrink = UnsafeFPShrink; + } +}; + +struct CosOpt : public UnsafeFPLibCallOptimization { + CosOpt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {} + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *Ret = NULL; + if (UnsafeFPShrink && Callee->getName() == "cos" && + TLI->has(LibFunc::cosf)) { + UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); + Ret = UnsafeUnaryDoubleFP.callOptimizer(Callee, CI, B); + } + + FunctionType *FT = Callee->getFunctionType(); + // Just make sure this has 1 argument of FP type, which matches the + // result type. + if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isFloatingPointTy()) + return Ret; + + // cos(-x) -> cos(x) + Value *Op1 = CI->getArgOperand(0); + if (BinaryOperator::isFNeg(Op1)) { + BinaryOperator *BinExpr = cast<BinaryOperator>(Op1); + return B.CreateCall(Callee, BinExpr->getOperand(1), "cos"); + } + return Ret; + } +}; + +struct PowOpt : public UnsafeFPLibCallOptimization { + PowOpt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {} + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *Ret = NULL; + if (UnsafeFPShrink && Callee->getName() == "pow" && + TLI->has(LibFunc::powf)) { + UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); + Ret = UnsafeUnaryDoubleFP.callOptimizer(Callee, CI, B); + } + + FunctionType *FT = Callee->getFunctionType(); + // Just make sure this has 2 arguments of the same FP type, which match the + // result type. + if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != FT->getParamType(1) || + !FT->getParamType(0)->isFloatingPointTy()) + return Ret; + + Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1); + if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) { + // pow(1.0, x) -> 1.0 + if (Op1C->isExactlyValue(1.0)) + return Op1C; + // pow(2.0, x) -> exp2(x) + if (Op1C->isExactlyValue(2.0) && + hasUnaryFloatFn(TLI, Op1->getType(), LibFunc::exp2, LibFunc::exp2f, + LibFunc::exp2l)) + return EmitUnaryFloatFnCall(Op2, "exp2", B, Callee->getAttributes()); + } + + ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2); + if (Op2C == 0) return Ret; + + if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0 + return ConstantFP::get(CI->getType(), 1.0); + + if (Op2C->isExactlyValue(0.5) && + hasUnaryFloatFn(TLI, Op2->getType(), LibFunc::sqrt, LibFunc::sqrtf, + LibFunc::sqrtl) && + hasUnaryFloatFn(TLI, Op2->getType(), LibFunc::fabs, LibFunc::fabsf, + LibFunc::fabsl)) { + // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))). + // This is faster than calling pow, and still handles negative zero + // and negative infinity correctly. + // TODO: In fast-math mode, this could be just sqrt(x). + // TODO: In finite-only mode, this could be just fabs(sqrt(x)). + Value *Inf = ConstantFP::getInfinity(CI->getType()); + Value *NegInf = ConstantFP::getInfinity(CI->getType(), true); + Value *Sqrt = EmitUnaryFloatFnCall(Op1, "sqrt", B, + Callee->getAttributes()); + Value *FAbs = EmitUnaryFloatFnCall(Sqrt, "fabs", B, + Callee->getAttributes()); + Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf); + Value *Sel = B.CreateSelect(FCmp, Inf, FAbs); + return Sel; + } + + if (Op2C->isExactlyValue(1.0)) // pow(x, 1.0) -> x + return Op1; + if (Op2C->isExactlyValue(2.0)) // pow(x, 2.0) -> x*x + return B.CreateFMul(Op1, Op1, "pow2"); + if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x + return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), + Op1, "powrecip"); + return 0; + } +}; + +struct Exp2Opt : public UnsafeFPLibCallOptimization { + Exp2Opt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {} + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *Ret = NULL; + if (UnsafeFPShrink && Callee->getName() == "exp2" && + TLI->has(LibFunc::exp2f)) { + UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); + Ret = UnsafeUnaryDoubleFP.callOptimizer(Callee, CI, B); + } + + FunctionType *FT = Callee->getFunctionType(); + // Just make sure this has 1 argument of FP type, which matches the + // result type. + if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isFloatingPointTy()) + return Ret; + + Value *Op = CI->getArgOperand(0); + // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32 + // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32 + Value *LdExpArg = 0; + if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) { + if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32) + LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty()); + } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) { + if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32) + LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty()); + } + + if (LdExpArg) { + const char *Name; + if (Op->getType()->isFloatTy()) + Name = "ldexpf"; + else if (Op->getType()->isDoubleTy()) + Name = "ldexp"; + else + Name = "ldexpl"; + + Constant *One = ConstantFP::get(*Context, APFloat(1.0f)); + if (!Op->getType()->isFloatTy()) + One = ConstantExpr::getFPExtend(One, Op->getType()); + + Module *M = Caller->getParent(); + Value *Callee = M->getOrInsertFunction(Name, Op->getType(), + Op->getType(), + B.getInt32Ty(), NULL); + CallInst *CI = B.CreateCall2(Callee, One, LdExpArg); + if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; + } + return Ret; + } +}; + +struct SinCosPiOpt : public LibCallOptimization { + SinCosPiOpt() {} + + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Make sure the prototype is as expected, otherwise the rest of the + // function is probably invalid and likely to abort. + if (!isTrigLibCall(CI)) + return 0; + + Value *Arg = CI->getArgOperand(0); + SmallVector<CallInst *, 1> SinCalls; + SmallVector<CallInst *, 1> CosCalls; + SmallVector<CallInst *, 1> SinCosCalls; + + bool IsFloat = Arg->getType()->isFloatTy(); + + // Look for all compatible sinpi, cospi and sincospi calls with the same + // argument. If there are enough (in some sense) we can make the + // substitution. + for (Value::use_iterator UI = Arg->use_begin(), UE = Arg->use_end(); + UI != UE; ++UI) + classifyArgUse(*UI, CI->getParent(), IsFloat, SinCalls, CosCalls, + SinCosCalls); + + // It's only worthwhile if both sinpi and cospi are actually used. + if (SinCosCalls.empty() && (SinCalls.empty() || CosCalls.empty())) + return 0; + + Value *Sin, *Cos, *SinCos; + insertSinCosCall(B, CI->getCalledFunction(), Arg, IsFloat, Sin, Cos, + SinCos); + + replaceTrigInsts(SinCalls, Sin); + replaceTrigInsts(CosCalls, Cos); + replaceTrigInsts(SinCosCalls, SinCos); + + return 0; + } + + bool isTrigLibCall(CallInst *CI) { + Function *Callee = CI->getCalledFunction(); + FunctionType *FT = Callee->getFunctionType(); + + // We can only hope to do anything useful if we can ignore things like errno + // and floating-point exceptions. + bool AttributesSafe = CI->hasFnAttr(Attribute::NoUnwind) && + CI->hasFnAttr(Attribute::ReadNone); + + // Other than that we need float(float) or double(double) + return AttributesSafe && FT->getNumParams() == 1 && + FT->getReturnType() == FT->getParamType(0) && + (FT->getParamType(0)->isFloatTy() || + FT->getParamType(0)->isDoubleTy()); + } + + void classifyArgUse(Value *Val, BasicBlock *BB, bool IsFloat, + SmallVectorImpl<CallInst *> &SinCalls, + SmallVectorImpl<CallInst *> &CosCalls, + SmallVectorImpl<CallInst *> &SinCosCalls) { + CallInst *CI = dyn_cast<CallInst>(Val); + + if (!CI) + return; + + Function *Callee = CI->getCalledFunction(); + StringRef FuncName = Callee->getName(); + LibFunc::Func Func; + if (!TLI->getLibFunc(FuncName, Func) || !TLI->has(Func) || + !isTrigLibCall(CI)) + return; + + if (IsFloat) { + if (Func == LibFunc::sinpif) + SinCalls.push_back(CI); + else if (Func == LibFunc::cospif) + CosCalls.push_back(CI); + else if (Func == LibFunc::sincospi_stretf) + SinCosCalls.push_back(CI); + } else { + if (Func == LibFunc::sinpi) + SinCalls.push_back(CI); + else if (Func == LibFunc::cospi) + CosCalls.push_back(CI); + else if (Func == LibFunc::sincospi_stret) + SinCosCalls.push_back(CI); + } + } + + void replaceTrigInsts(SmallVectorImpl<CallInst*> &Calls, Value *Res) { + for (SmallVectorImpl<CallInst*>::iterator I = Calls.begin(), + E = Calls.end(); + I != E; ++I) { + LCS->replaceAllUsesWith(*I, Res); + } + } + + void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg, + bool UseFloat, Value *&Sin, Value *&Cos, + Value *&SinCos) { + Type *ArgTy = Arg->getType(); + Type *ResTy; + StringRef Name; + + Triple T(OrigCallee->getParent()->getTargetTriple()); + if (UseFloat) { + Name = "__sincospi_stretf"; + + assert(T.getArch() != Triple::x86 && "x86 messy and unsupported for now"); + // x86_64 can't use {float, float} since that would be returned in both + // xmm0 and xmm1, which isn't what a real struct would do. + ResTy = T.getArch() == Triple::x86_64 + ? static_cast<Type *>(VectorType::get(ArgTy, 2)) + : static_cast<Type *>(StructType::get(ArgTy, ArgTy, NULL)); + } else { + Name = "__sincospi_stret"; + ResTy = StructType::get(ArgTy, ArgTy, NULL); + } + + Module *M = OrigCallee->getParent(); + Value *Callee = M->getOrInsertFunction(Name, OrigCallee->getAttributes(), + ResTy, ArgTy, NULL); + + if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) { + // If the argument is an instruction, it must dominate all uses so put our + // sincos call there. + BasicBlock::iterator Loc = ArgInst; + B.SetInsertPoint(ArgInst->getParent(), ++Loc); + } else { + // Otherwise (e.g. for a constant) the beginning of the function is as + // good a place as any. + BasicBlock &EntryBB = B.GetInsertBlock()->getParent()->getEntryBlock(); + B.SetInsertPoint(&EntryBB, EntryBB.begin()); + } + + SinCos = B.CreateCall(Callee, Arg, "sincospi"); + + if (SinCos->getType()->isStructTy()) { + Sin = B.CreateExtractValue(SinCos, 0, "sinpi"); + Cos = B.CreateExtractValue(SinCos, 1, "cospi"); + } else { + Sin = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 0), + "sinpi"); + Cos = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 1), + "cospi"); + } + } + +}; + +//===----------------------------------------------------------------------===// +// Integer Library Call Optimizations +//===----------------------------------------------------------------------===// + +struct FFSOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + // Just make sure this has 2 arguments of the same FP type, which match the + // result type. + if (FT->getNumParams() != 1 || + !FT->getReturnType()->isIntegerTy(32) || + !FT->getParamType(0)->isIntegerTy()) + return 0; + + Value *Op = CI->getArgOperand(0); + + // Constant fold. + if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { + if (CI->isZero()) // ffs(0) -> 0. + return B.getInt32(0); + // ffs(c) -> cttz(c)+1 + return B.getInt32(CI->getValue().countTrailingZeros() + 1); + } + + // ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0 + Type *ArgType = Op->getType(); + Value *F = Intrinsic::getDeclaration(Callee->getParent(), + Intrinsic::cttz, ArgType); + Value *V = B.CreateCall2(F, Op, B.getFalse(), "cttz"); + V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1)); + V = B.CreateIntCast(V, B.getInt32Ty(), false); + + Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType)); + return B.CreateSelect(Cond, V, B.getInt32(0)); + } +}; + +struct AbsOpt : public LibCallOptimization { + virtual bool ignoreCallingConv() { return true; } + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + // We require integer(integer) where the types agree. + if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() || + FT->getParamType(0) != FT->getReturnType()) + return 0; + + // abs(x) -> x >s -1 ? x : -x + Value *Op = CI->getArgOperand(0); + Value *Pos = B.CreateICmpSGT(Op, Constant::getAllOnesValue(Op->getType()), + "ispos"); + Value *Neg = B.CreateNeg(Op, "neg"); + return B.CreateSelect(Pos, Op, Neg); + } +}; + +struct IsDigitOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + // We require integer(i32) + if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() || + !FT->getParamType(0)->isIntegerTy(32)) + return 0; + + // isdigit(c) -> (c-'0') <u 10 + Value *Op = CI->getArgOperand(0); + Op = B.CreateSub(Op, B.getInt32('0'), "isdigittmp"); + Op = B.CreateICmpULT(Op, B.getInt32(10), "isdigit"); + return B.CreateZExt(Op, CI->getType()); + } +}; + +struct IsAsciiOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + // We require integer(i32) + if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() || + !FT->getParamType(0)->isIntegerTy(32)) + return 0; + + // isascii(c) -> c <u 128 + Value *Op = CI->getArgOperand(0); + Op = B.CreateICmpULT(Op, B.getInt32(128), "isascii"); + return B.CreateZExt(Op, CI->getType()); + } +}; + +struct ToAsciiOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + // We require i32(i32) + if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isIntegerTy(32)) + return 0; + + // toascii(c) -> c & 0x7f + return B.CreateAnd(CI->getArgOperand(0), + ConstantInt::get(CI->getType(),0x7F)); + } +}; + +//===----------------------------------------------------------------------===// +// Formatting and IO Library Call Optimizations +//===----------------------------------------------------------------------===// + +struct ErrorReportingOpt : public LibCallOptimization { + ErrorReportingOpt(int S = -1) : StreamArg(S) {} + + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &) { + // Error reporting calls should be cold, mark them as such. + // This applies even to non-builtin calls: it is only a hint and applies to + // functions that the frontend might not understand as builtins. + + // This heuristic was suggested in: + // Improving Static Branch Prediction in a Compiler + // Brian L. Deitrich, Ben-Chung Cheng, Wen-mei W. Hwu + // Proceedings of PACT'98, Oct. 1998, IEEE + + if (!CI->hasFnAttr(Attribute::Cold) && isReportingError(Callee, CI)) { + CI->addAttribute(AttributeSet::FunctionIndex, Attribute::Cold); + } + + return 0; + } + +protected: + bool isReportingError(Function *Callee, CallInst *CI) { + if (!ColdErrorCalls) + return false; + + if (!Callee || !Callee->isDeclaration()) + return false; + + if (StreamArg < 0) + return true; + + // These functions might be considered cold, but only if their stream + // argument is stderr. + + if (StreamArg >= (int) CI->getNumArgOperands()) + return false; + LoadInst *LI = dyn_cast<LoadInst>(CI->getArgOperand(StreamArg)); + if (!LI) + return false; + GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getPointerOperand()); + if (!GV || !GV->isDeclaration()) + return false; + return GV->getName() == "stderr"; + } + + int StreamArg; +}; + +struct PrintFOpt : public LibCallOptimization { + Value *optimizeFixedFormatString(Function *Callee, CallInst *CI, + IRBuilder<> &B) { + // Check for a fixed format string. + StringRef FormatStr; + if (!getConstantStringInfo(CI->getArgOperand(0), FormatStr)) + return 0; + + // Empty format string -> noop. + if (FormatStr.empty()) // Tolerate printf's declared void. + return CI->use_empty() ? (Value*)CI : + ConstantInt::get(CI->getType(), 0); + + // Do not do any of the following transformations if the printf return value + // is used, in general the printf return value is not compatible with either + // putchar() or puts(). + if (!CI->use_empty()) + return 0; + + // printf("x") -> putchar('x'), even for '%'. + if (FormatStr.size() == 1) { + Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TD, TLI); + if (CI->use_empty() || !Res) return Res; + return B.CreateIntCast(Res, CI->getType(), true); + } + + // printf("foo\n") --> puts("foo") + if (FormatStr[FormatStr.size()-1] == '\n' && + FormatStr.find('%') == StringRef::npos) { // No format characters. + // Create a string literal with no \n on it. We expect the constant merge + // pass to be run after this pass, to merge duplicate strings. + FormatStr = FormatStr.drop_back(); + Value *GV = B.CreateGlobalString(FormatStr, "str"); + Value *NewCI = EmitPutS(GV, B, TD, TLI); + return (CI->use_empty() || !NewCI) ? + NewCI : + ConstantInt::get(CI->getType(), FormatStr.size()+1); + } + + // Optimize specific format strings. + // printf("%c", chr) --> putchar(chr) + if (FormatStr == "%c" && CI->getNumArgOperands() > 1 && + CI->getArgOperand(1)->getType()->isIntegerTy()) { + Value *Res = EmitPutChar(CI->getArgOperand(1), B, TD, TLI); + + if (CI->use_empty() || !Res) return Res; + return B.CreateIntCast(Res, CI->getType(), true); + } + + // printf("%s\n", str) --> puts(str) + if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 && + CI->getArgOperand(1)->getType()->isPointerTy()) { + return EmitPutS(CI->getArgOperand(1), B, TD, TLI); + } + return 0; + } + + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Require one fixed pointer argument and an integer/void result. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() || + !(FT->getReturnType()->isIntegerTy() || + FT->getReturnType()->isVoidTy())) + return 0; + + if (Value *V = optimizeFixedFormatString(Callee, CI, B)) { + return V; + } + + // printf(format, ...) -> iprintf(format, ...) if no floating point + // arguments. + if (TLI->has(LibFunc::iprintf) && !callHasFloatingPointArgument(CI)) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + Constant *IPrintFFn = + M->getOrInsertFunction("iprintf", FT, Callee->getAttributes()); + CallInst *New = cast<CallInst>(CI->clone()); + New->setCalledFunction(IPrintFFn); + B.Insert(New); + return New; + } + return 0; + } +}; + +struct SPrintFOpt : public LibCallOptimization { + Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI, + IRBuilder<> &B) { + // Check for a fixed format string. + StringRef FormatStr; + if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr)) + return 0; + + // If we just have a format string (nothing else crazy) transform it. + if (CI->getNumArgOperands() == 2) { + // Make sure there's no % in the constant array. We could try to handle + // %% -> % in the future if we cared. + for (unsigned i = 0, e = FormatStr.size(); i != e; ++i) + if (FormatStr[i] == '%') + return 0; // we found a format specifier, bail out. + + // These optimizations require DataLayout. + if (!TD) return 0; + + // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1) + B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), + ConstantInt::get(TD->getIntPtrType(*Context), // Copy the + FormatStr.size() + 1), 1); // nul byte. + return ConstantInt::get(CI->getType(), FormatStr.size()); + } + + // The remaining optimizations require the format string to be "%s" or "%c" + // and have an extra operand. + if (FormatStr.size() != 2 || FormatStr[0] != '%' || + CI->getNumArgOperands() < 3) + return 0; + + // Decode the second character of the format string. + if (FormatStr[1] == 'c') { + // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0 + if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0; + Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char"); + Value *Ptr = CastToCStr(CI->getArgOperand(0), B); + B.CreateStore(V, Ptr); + Ptr = B.CreateGEP(Ptr, B.getInt32(1), "nul"); + B.CreateStore(B.getInt8(0), Ptr); + + return ConstantInt::get(CI->getType(), 1); + } + + if (FormatStr[1] == 's') { + // These optimizations require DataLayout. + if (!TD) return 0; + + // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1) + if (!CI->getArgOperand(2)->getType()->isPointerTy()) return 0; + + Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD, TLI); + if (!Len) + return 0; + Value *IncLen = B.CreateAdd(Len, + ConstantInt::get(Len->getType(), 1), + "leninc"); + B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1); + + // The sprintf result is the unincremented number of bytes in the string. + return B.CreateIntCast(Len, CI->getType(), false); + } + return 0; + } + + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Require two fixed pointer arguments and an integer result. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + !FT->getReturnType()->isIntegerTy()) + return 0; + + if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) { + return V; + } + + // sprintf(str, format, ...) -> siprintf(str, format, ...) if no floating + // point arguments. + if (TLI->has(LibFunc::siprintf) && !callHasFloatingPointArgument(CI)) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + Constant *SIPrintFFn = + M->getOrInsertFunction("siprintf", FT, Callee->getAttributes()); + CallInst *New = cast<CallInst>(CI->clone()); + New->setCalledFunction(SIPrintFFn); + B.Insert(New); + return New; + } + return 0; + } +}; + +struct FPrintFOpt : public LibCallOptimization { + Value *optimizeFixedFormatString(Function *Callee, CallInst *CI, + IRBuilder<> &B) { + ErrorReportingOpt ER(/* StreamArg = */ 0); + (void) ER.callOptimizer(Callee, CI, B); + + // All the optimizations depend on the format string. + StringRef FormatStr; + if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr)) + return 0; + + // Do not do any of the following transformations if the fprintf return + // value is used, in general the fprintf return value is not compatible + // with fwrite(), fputc() or fputs(). + if (!CI->use_empty()) + return 0; + + // fprintf(F, "foo") --> fwrite("foo", 3, 1, F) + if (CI->getNumArgOperands() == 2) { + for (unsigned i = 0, e = FormatStr.size(); i != e; ++i) + if (FormatStr[i] == '%') // Could handle %% -> % if we cared. + return 0; // We found a format specifier. + + // These optimizations require DataLayout. + if (!TD) return 0; + + return EmitFWrite(CI->getArgOperand(1), + ConstantInt::get(TD->getIntPtrType(*Context), + FormatStr.size()), + CI->getArgOperand(0), B, TD, TLI); + } + + // The remaining optimizations require the format string to be "%s" or "%c" + // and have an extra operand. + if (FormatStr.size() != 2 || FormatStr[0] != '%' || + CI->getNumArgOperands() < 3) + return 0; + + // Decode the second character of the format string. + if (FormatStr[1] == 'c') { + // fprintf(F, "%c", chr) --> fputc(chr, F) + if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0; + return EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI); + } + + if (FormatStr[1] == 's') { + // fprintf(F, "%s", str) --> fputs(str, F) + if (!CI->getArgOperand(2)->getType()->isPointerTy()) + return 0; + return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI); + } + return 0; + } + + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Require two fixed paramters as pointers and integer result. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + !FT->getReturnType()->isIntegerTy()) + return 0; + + if (Value *V = optimizeFixedFormatString(Callee, CI, B)) { + return V; + } + + // fprintf(stream, format, ...) -> fiprintf(stream, format, ...) if no + // floating point arguments. + if (TLI->has(LibFunc::fiprintf) && !callHasFloatingPointArgument(CI)) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + Constant *FIPrintFFn = + M->getOrInsertFunction("fiprintf", FT, Callee->getAttributes()); + CallInst *New = cast<CallInst>(CI->clone()); + New->setCalledFunction(FIPrintFFn); + B.Insert(New); + return New; + } + return 0; + } +}; + +struct FWriteOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + ErrorReportingOpt ER(/* StreamArg = */ 3); + (void) ER.callOptimizer(Callee, CI, B); + + // Require a pointer, an integer, an integer, a pointer, returning integer. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 4 || !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isIntegerTy() || + !FT->getParamType(2)->isIntegerTy() || + !FT->getParamType(3)->isPointerTy() || + !FT->getReturnType()->isIntegerTy()) + return 0; + + // Get the element size and count. + ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); + ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2)); + if (!SizeC || !CountC) return 0; + uint64_t Bytes = SizeC->getZExtValue()*CountC->getZExtValue(); + + // If this is writing zero records, remove the call (it's a noop). + if (Bytes == 0) + return ConstantInt::get(CI->getType(), 0); + + // If this is writing one byte, turn it into fputc. + // This optimisation is only valid, if the return value is unused. + if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F) + Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char"); + Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, TD, TLI); + return NewCI ? ConstantInt::get(CI->getType(), 1) : 0; + } + + return 0; + } +}; + +struct FPutsOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + ErrorReportingOpt ER(/* StreamArg = */ 1); + (void) ER.callOptimizer(Callee, CI, B); + + // These optimizations require DataLayout. + if (!TD) return 0; + + // Require two pointers. Also, we can't optimize if return value is used. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + !CI->use_empty()) + return 0; + + // fputs(s,F) --> fwrite(s,1,strlen(s),F) + uint64_t Len = GetStringLength(CI->getArgOperand(0)); + if (!Len) return 0; + // Known to have no uses (see above). + return EmitFWrite(CI->getArgOperand(0), + ConstantInt::get(TD->getIntPtrType(*Context), Len-1), + CI->getArgOperand(1), B, TD, TLI); + } +}; + +struct PutsOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Require one fixed pointer argument and an integer/void result. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() || + !(FT->getReturnType()->isIntegerTy() || + FT->getReturnType()->isVoidTy())) + return 0; + + // Check for a constant string. + StringRef Str; + if (!getConstantStringInfo(CI->getArgOperand(0), Str)) + return 0; + + if (Str.empty() && CI->use_empty()) { + // puts("") -> putchar('\n') + Value *Res = EmitPutChar(B.getInt32('\n'), B, TD, TLI); + if (CI->use_empty() || !Res) return Res; + return B.CreateIntCast(Res, CI->getType(), true); + } + + return 0; + } +}; + +} // End anonymous namespace. + +namespace llvm { + +class LibCallSimplifierImpl { + const DataLayout *TD; + const TargetLibraryInfo *TLI; + const LibCallSimplifier *LCS; + bool UnsafeFPShrink; + + // Math library call optimizations. + CosOpt Cos; + PowOpt Pow; + Exp2Opt Exp2; +public: + LibCallSimplifierImpl(const DataLayout *TD, const TargetLibraryInfo *TLI, + const LibCallSimplifier *LCS, + bool UnsafeFPShrink = false) + : Cos(UnsafeFPShrink), Pow(UnsafeFPShrink), Exp2(UnsafeFPShrink) { + this->TD = TD; + this->TLI = TLI; + this->LCS = LCS; + this->UnsafeFPShrink = UnsafeFPShrink; + } + + Value *optimizeCall(CallInst *CI); + LibCallOptimization *lookupOptimization(CallInst *CI); + bool hasFloatVersion(StringRef FuncName); +}; + +bool LibCallSimplifierImpl::hasFloatVersion(StringRef FuncName) { + LibFunc::Func Func; + SmallString<20> FloatFuncName = FuncName; + FloatFuncName += 'f'; + if (TLI->getLibFunc(FloatFuncName, Func)) + return TLI->has(Func); + return false; +} + +// Fortified library call optimizations. +static MemCpyChkOpt MemCpyChk; +static MemMoveChkOpt MemMoveChk; +static MemSetChkOpt MemSetChk; +static StrCpyChkOpt StrCpyChk; +static StpCpyChkOpt StpCpyChk; +static StrNCpyChkOpt StrNCpyChk; + +// String library call optimizations. +static StrCatOpt StrCat; +static StrNCatOpt StrNCat; +static StrChrOpt StrChr; +static StrRChrOpt StrRChr; +static StrCmpOpt StrCmp; +static StrNCmpOpt StrNCmp; +static StrCpyOpt StrCpy; +static StpCpyOpt StpCpy; +static StrNCpyOpt StrNCpy; +static StrLenOpt StrLen; +static StrPBrkOpt StrPBrk; +static StrToOpt StrTo; +static StrSpnOpt StrSpn; +static StrCSpnOpt StrCSpn; +static StrStrOpt StrStr; + +// Memory library call optimizations. +static MemCmpOpt MemCmp; +static MemCpyOpt MemCpy; +static MemMoveOpt MemMove; +static MemSetOpt MemSet; + +// Math library call optimizations. +static UnaryDoubleFPOpt UnaryDoubleFP(false); +static UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); +static SinCosPiOpt SinCosPi; + + // Integer library call optimizations. +static FFSOpt FFS; +static AbsOpt Abs; +static IsDigitOpt IsDigit; +static IsAsciiOpt IsAscii; +static ToAsciiOpt ToAscii; + +// Formatting and IO library call optimizations. +static ErrorReportingOpt ErrorReporting; +static ErrorReportingOpt ErrorReporting0(0); +static ErrorReportingOpt ErrorReporting1(1); +static PrintFOpt PrintF; +static SPrintFOpt SPrintF; +static FPrintFOpt FPrintF; +static FWriteOpt FWrite; +static FPutsOpt FPuts; +static PutsOpt Puts; + +LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) { + LibFunc::Func Func; + Function *Callee = CI->getCalledFunction(); + StringRef FuncName = Callee->getName(); + + // Next check for intrinsics. + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) { + switch (II->getIntrinsicID()) { + case Intrinsic::pow: + return &Pow; + case Intrinsic::exp2: + return &Exp2; + default: + return 0; + } + } + + // Then check for known library functions. + if (TLI->getLibFunc(FuncName, Func) && TLI->has(Func)) { + switch (Func) { + case LibFunc::strcat: + return &StrCat; + case LibFunc::strncat: + return &StrNCat; + case LibFunc::strchr: + return &StrChr; + case LibFunc::strrchr: + return &StrRChr; + case LibFunc::strcmp: + return &StrCmp; + case LibFunc::strncmp: + return &StrNCmp; + case LibFunc::strcpy: + return &StrCpy; + case LibFunc::stpcpy: + return &StpCpy; + case LibFunc::strncpy: + return &StrNCpy; + case LibFunc::strlen: + return &StrLen; + case LibFunc::strpbrk: + return &StrPBrk; + case LibFunc::strtol: + case LibFunc::strtod: + case LibFunc::strtof: + case LibFunc::strtoul: + case LibFunc::strtoll: + case LibFunc::strtold: + case LibFunc::strtoull: + return &StrTo; + case LibFunc::strspn: + return &StrSpn; + case LibFunc::strcspn: + return &StrCSpn; + case LibFunc::strstr: + return &StrStr; + case LibFunc::memcmp: + return &MemCmp; + case LibFunc::memcpy: + return &MemCpy; + case LibFunc::memmove: + return &MemMove; + case LibFunc::memset: + return &MemSet; + case LibFunc::cosf: + case LibFunc::cos: + case LibFunc::cosl: + return &Cos; + case LibFunc::sinpif: + case LibFunc::sinpi: + case LibFunc::cospif: + case LibFunc::cospi: + return &SinCosPi; + case LibFunc::powf: + case LibFunc::pow: + case LibFunc::powl: + return &Pow; + case LibFunc::exp2l: + case LibFunc::exp2: + case LibFunc::exp2f: + return &Exp2; + case LibFunc::ffs: + case LibFunc::ffsl: + case LibFunc::ffsll: + return &FFS; + case LibFunc::abs: + case LibFunc::labs: + case LibFunc::llabs: + return &Abs; + case LibFunc::isdigit: + return &IsDigit; + case LibFunc::isascii: + return &IsAscii; + case LibFunc::toascii: + return &ToAscii; + case LibFunc::printf: + return &PrintF; + case LibFunc::sprintf: + return &SPrintF; + case LibFunc::fprintf: + return &FPrintF; + case LibFunc::fwrite: + return &FWrite; + case LibFunc::fputs: + return &FPuts; + case LibFunc::puts: + return &Puts; + case LibFunc::perror: + return &ErrorReporting; + case LibFunc::vfprintf: + case LibFunc::fiprintf: + return &ErrorReporting0; + case LibFunc::fputc: + return &ErrorReporting1; + case LibFunc::ceil: + case LibFunc::fabs: + case LibFunc::floor: + case LibFunc::rint: + case LibFunc::round: + case LibFunc::nearbyint: + case LibFunc::trunc: + if (hasFloatVersion(FuncName)) + return &UnaryDoubleFP; + return 0; + case LibFunc::acos: + case LibFunc::acosh: + case LibFunc::asin: + case LibFunc::asinh: + case LibFunc::atan: + case LibFunc::atanh: + case LibFunc::cbrt: + case LibFunc::cosh: + case LibFunc::exp: + case LibFunc::exp10: + case LibFunc::expm1: + case LibFunc::log: + case LibFunc::log10: + case LibFunc::log1p: + case LibFunc::log2: + case LibFunc::logb: + case LibFunc::sin: + case LibFunc::sinh: + case LibFunc::sqrt: + case LibFunc::tan: + case LibFunc::tanh: + if (UnsafeFPShrink && hasFloatVersion(FuncName)) + return &UnsafeUnaryDoubleFP; + return 0; + case LibFunc::memcpy_chk: + return &MemCpyChk; + default: + return 0; + } + } + + // Finally check for fortified library calls. + if (FuncName.endswith("_chk")) { + if (FuncName == "__memmove_chk") + return &MemMoveChk; + else if (FuncName == "__memset_chk") + return &MemSetChk; + else if (FuncName == "__strcpy_chk") + return &StrCpyChk; + else if (FuncName == "__stpcpy_chk") + return &StpCpyChk; + else if (FuncName == "__strncpy_chk") + return &StrNCpyChk; + else if (FuncName == "__stpncpy_chk") + return &StrNCpyChk; + } + + return 0; + +} + +Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) { + LibCallOptimization *LCO = lookupOptimization(CI); + if (LCO) { + IRBuilder<> Builder(CI); + return LCO->optimizeCall(CI, TD, TLI, LCS, Builder); + } + return 0; +} + +LibCallSimplifier::LibCallSimplifier(const DataLayout *TD, + const TargetLibraryInfo *TLI, + bool UnsafeFPShrink) { + Impl = new LibCallSimplifierImpl(TD, TLI, this, UnsafeFPShrink); +} + +LibCallSimplifier::~LibCallSimplifier() { + delete Impl; +} + +Value *LibCallSimplifier::optimizeCall(CallInst *CI) { + if (CI->isNoBuiltin()) return 0; + return Impl->optimizeCall(CI); +} + +void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) const { + I->replaceAllUsesWith(With); + I->eraseFromParent(); +} + +} + +// TODO: +// Additional cases that we need to add to this file: +// +// cbrt: +// * cbrt(expN(X)) -> expN(x/3) +// * cbrt(sqrt(x)) -> pow(x,1/6) +// * cbrt(sqrt(x)) -> pow(x,1/9) +// +// exp, expf, expl: +// * exp(log(x)) -> x +// +// log, logf, logl: +// * log(exp(x)) -> x +// * log(x**y) -> y*log(x) +// * log(exp(y)) -> y*log(e) +// * log(exp2(y)) -> y*log(2) +// * log(exp10(y)) -> y*log(10) +// * log(sqrt(x)) -> 0.5*log(x) +// * log(pow(x,y)) -> y*log(x) +// +// lround, lroundf, lroundl: +// * lround(cnst) -> cnst' +// +// pow, powf, powl: +// * pow(exp(x),y) -> exp(x*y) +// * pow(sqrt(x),y) -> pow(x,y*0.5) +// * pow(pow(x,y),z)-> pow(x,y*z) +// +// round, roundf, roundl: +// * round(cnst) -> cnst' +// +// signbit: +// * signbit(cnst) -> cnst' +// * signbit(nncst) -> 0 (if pstv is a non-negative constant) +// +// sqrt, sqrtf, sqrtl: +// * sqrt(expN(x)) -> expN(x*0.5) +// * sqrt(Nroot(x)) -> pow(x,1/(2*N)) +// * sqrt(pow(x,y)) -> pow(|x|,y*0.5) +// +// strchr: +// * strchr(p, 0) -> strlen(p) +// tan, tanf, tanl: +// * tan(atan(x)) -> x +// +// trunc, truncf, truncl: +// * trunc(cnst) -> cnst' +// +// diff --git a/contrib/llvm/lib/Transforms/Utils/SpecialCaseList.cpp b/contrib/llvm/lib/Transforms/Utils/SpecialCaseList.cpp new file mode 100644 index 000000000000..2ef692c564c6 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/SpecialCaseList.cpp @@ -0,0 +1,222 @@ +//===-- SpecialCaseList.cpp - special case list for sanitizers ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is a utility class for instrumentation passes (like AddressSanitizer +// or ThreadSanitizer) to avoid instrumenting some functions or global +// variables, or to instrument some functions or global variables in a specific +// way, based on a user-supplied list. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/SpecialCaseList.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Regex.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/system_error.h" +#include <string> +#include <utility> + +namespace llvm { + +/// Represents a set of regular expressions. Regular expressions which are +/// "literal" (i.e. no regex metacharacters) are stored in Strings, while all +/// others are represented as a single pipe-separated regex in RegEx. The +/// reason for doing so is efficiency; StringSet is much faster at matching +/// literal strings than Regex. +struct SpecialCaseList::Entry { + StringSet<> Strings; + Regex *RegEx; + + Entry() : RegEx(0) {} + + bool match(StringRef Query) const { + return Strings.count(Query) || (RegEx && RegEx->match(Query)); + } +}; + +SpecialCaseList::SpecialCaseList() : Entries() {} + +SpecialCaseList *SpecialCaseList::create( + const StringRef Path, std::string &Error) { + if (Path.empty()) + return new SpecialCaseList(); + OwningPtr<MemoryBuffer> File; + if (error_code EC = MemoryBuffer::getFile(Path, File)) { + Error = (Twine("Can't open file '") + Path + "': " + EC.message()).str(); + return 0; + } + return create(File.get(), Error); +} + +SpecialCaseList *SpecialCaseList::create( + const MemoryBuffer *MB, std::string &Error) { + OwningPtr<SpecialCaseList> SCL(new SpecialCaseList()); + if (!SCL->parse(MB, Error)) + return 0; + return SCL.take(); +} + +SpecialCaseList *SpecialCaseList::createOrDie(const StringRef Path) { + std::string Error; + if (SpecialCaseList *SCL = create(Path, Error)) + return SCL; + report_fatal_error(Error); +} + +bool SpecialCaseList::parse(const MemoryBuffer *MB, std::string &Error) { + // Iterate through each line in the blacklist file. + SmallVector<StringRef, 16> Lines; + SplitString(MB->getBuffer(), Lines, "\n\r"); + StringMap<StringMap<std::string> > Regexps; + assert(Entries.empty() && + "parse() should be called on an empty SpecialCaseList"); + int LineNo = 1; + for (SmallVectorImpl<StringRef>::iterator I = Lines.begin(), E = Lines.end(); + I != E; ++I, ++LineNo) { + // Ignore empty lines and lines starting with "#" + if (I->empty() || I->startswith("#")) + continue; + // Get our prefix and unparsed regexp. + std::pair<StringRef, StringRef> SplitLine = I->split(":"); + StringRef Prefix = SplitLine.first; + if (SplitLine.second.empty()) { + // Missing ':' in the line. + Error = (Twine("Malformed line ") + Twine(LineNo) + ": '" + + SplitLine.first + "'").str(); + return false; + } + + std::pair<StringRef, StringRef> SplitRegexp = SplitLine.second.split("="); + std::string Regexp = SplitRegexp.first; + StringRef Category = SplitRegexp.second; + + // Backwards compatibility. + if (Prefix == "global-init") { + Prefix = "global"; + Category = "init"; + } else if (Prefix == "global-init-type") { + Prefix = "type"; + Category = "init"; + } else if (Prefix == "global-init-src") { + Prefix = "src"; + Category = "init"; + } + + // See if we can store Regexp in Strings. + if (Regex::isLiteralERE(Regexp)) { + Entries[Prefix][Category].Strings.insert(Regexp); + continue; + } + + // Replace * with .* + for (size_t pos = 0; (pos = Regexp.find("*", pos)) != std::string::npos; + pos += strlen(".*")) { + Regexp.replace(pos, strlen("*"), ".*"); + } + + // Check that the regexp is valid. + Regex CheckRE(Regexp); + std::string REError; + if (!CheckRE.isValid(REError)) { + Error = (Twine("Malformed regex in line ") + Twine(LineNo) + ": '" + + SplitLine.second + "': " + REError).str(); + return false; + } + + // Add this regexp into the proper group by its prefix. + if (!Regexps[Prefix][Category].empty()) + Regexps[Prefix][Category] += "|"; + Regexps[Prefix][Category] += "^" + Regexp + "$"; + } + + // Iterate through each of the prefixes, and create Regexs for them. + for (StringMap<StringMap<std::string> >::const_iterator I = Regexps.begin(), + E = Regexps.end(); + I != E; ++I) { + for (StringMap<std::string>::const_iterator II = I->second.begin(), + IE = I->second.end(); + II != IE; ++II) { + Entries[I->getKey()][II->getKey()].RegEx = new Regex(II->getValue()); + } + } + return true; +} + +SpecialCaseList::~SpecialCaseList() { + for (StringMap<StringMap<Entry> >::iterator I = Entries.begin(), + E = Entries.end(); + I != E; ++I) { + for (StringMap<Entry>::const_iterator II = I->second.begin(), + IE = I->second.end(); + II != IE; ++II) { + delete II->second.RegEx; + } + } +} + +bool SpecialCaseList::isIn(const Function& F, const StringRef Category) const { + return isIn(*F.getParent(), Category) || + inSectionCategory("fun", F.getName(), Category); +} + +static StringRef GetGlobalTypeString(const GlobalValue &G) { + // Types of GlobalVariables are always pointer types. + Type *GType = G.getType()->getElementType(); + // For now we support blacklisting struct types only. + if (StructType *SGType = dyn_cast<StructType>(GType)) { + if (!SGType->isLiteral()) + return SGType->getName(); + } + return "<unknown type>"; +} + +bool SpecialCaseList::isIn(const GlobalVariable &G, + const StringRef Category) const { + return isIn(*G.getParent(), Category) || + inSectionCategory("global", G.getName(), Category) || + inSectionCategory("type", GetGlobalTypeString(G), Category); +} + +bool SpecialCaseList::isIn(const GlobalAlias &GA, + const StringRef Category) const { + if (isIn(*GA.getParent(), Category)) + return true; + + if (isa<FunctionType>(GA.getType()->getElementType())) + return inSectionCategory("fun", GA.getName(), Category); + + return inSectionCategory("global", GA.getName(), Category) || + inSectionCategory("type", GetGlobalTypeString(GA), Category); +} + +bool SpecialCaseList::isIn(const Module &M, const StringRef Category) const { + return inSectionCategory("src", M.getModuleIdentifier(), Category); +} + +bool SpecialCaseList::inSectionCategory(const StringRef Section, + const StringRef Query, + const StringRef Category) const { + StringMap<StringMap<Entry> >::const_iterator I = Entries.find(Section); + if (I == Entries.end()) return false; + StringMap<Entry>::const_iterator II = I->second.find(Category); + if (II == I->second.end()) return false; + + return II->getValue().match(Query); +} + +} // namespace llvm diff --git a/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp new file mode 100644 index 000000000000..560f58160753 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp @@ -0,0 +1,122 @@ +//===- UnifyFunctionExitNodes.cpp - Make all functions have a single exit -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass is used to ensure that functions have at most one return +// instruction in them. Additionally, it keeps track of which node is the new +// exit node of the CFG. If there are no exit nodes in the CFG, the getExitNode +// method will return a null pointer. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Type.h" +#include "llvm/Transforms/Scalar.h" +using namespace llvm; + +char UnifyFunctionExitNodes::ID = 0; +INITIALIZE_PASS(UnifyFunctionExitNodes, "mergereturn", + "Unify function exit nodes", false, false) + +Pass *llvm::createUnifyFunctionExitNodesPass() { + return new UnifyFunctionExitNodes(); +} + +void UnifyFunctionExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{ + // We preserve the non-critical-edgeness property + AU.addPreservedID(BreakCriticalEdgesID); + // This is a cluster of orthogonal Transforms + AU.addPreserved("mem2reg"); + AU.addPreservedID(LowerSwitchID); +} + +// UnifyAllExitNodes - Unify all exit nodes of the CFG by creating a new +// BasicBlock, and converting all returns to unconditional branches to this +// new basic block. The singular exit node is returned. +// +// If there are no return stmts in the Function, a null pointer is returned. +// +bool UnifyFunctionExitNodes::runOnFunction(Function &F) { + // Loop over all of the blocks in a function, tracking all of the blocks that + // return. + // + std::vector<BasicBlock*> ReturningBlocks; + std::vector<BasicBlock*> UnreachableBlocks; + for(Function::iterator I = F.begin(), E = F.end(); I != E; ++I) + if (isa<ReturnInst>(I->getTerminator())) + ReturningBlocks.push_back(I); + else if (isa<UnreachableInst>(I->getTerminator())) + UnreachableBlocks.push_back(I); + + // Then unreachable blocks. + if (UnreachableBlocks.empty()) { + UnreachableBlock = 0; + } else if (UnreachableBlocks.size() == 1) { + UnreachableBlock = UnreachableBlocks.front(); + } else { + UnreachableBlock = BasicBlock::Create(F.getContext(), + "UnifiedUnreachableBlock", &F); + new UnreachableInst(F.getContext(), UnreachableBlock); + + for (std::vector<BasicBlock*>::iterator I = UnreachableBlocks.begin(), + E = UnreachableBlocks.end(); I != E; ++I) { + BasicBlock *BB = *I; + BB->getInstList().pop_back(); // Remove the unreachable inst. + BranchInst::Create(UnreachableBlock, BB); + } + } + + // Now handle return blocks. + if (ReturningBlocks.empty()) { + ReturnBlock = 0; + return false; // No blocks return + } else if (ReturningBlocks.size() == 1) { + ReturnBlock = ReturningBlocks.front(); // Already has a single return block + return false; + } + + // Otherwise, we need to insert a new basic block into the function, add a PHI + // nodes (if the function returns values), and convert all of the return + // instructions into unconditional branches. + // + BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(), + "UnifiedReturnBlock", &F); + + PHINode *PN = 0; + if (F.getReturnType()->isVoidTy()) { + ReturnInst::Create(F.getContext(), NULL, NewRetBlock); + } else { + // If the function doesn't return void... add a PHI node to the block... + PN = PHINode::Create(F.getReturnType(), ReturningBlocks.size(), + "UnifiedRetVal"); + NewRetBlock->getInstList().push_back(PN); + ReturnInst::Create(F.getContext(), PN, NewRetBlock); + } + + // Loop over all of the blocks, replacing the return instruction with an + // unconditional branch. + // + for (std::vector<BasicBlock*>::iterator I = ReturningBlocks.begin(), + E = ReturningBlocks.end(); I != E; ++I) { + BasicBlock *BB = *I; + + // Add an incoming element to the PHI node for every return instruction that + // is merging into this new block... + if (PN) + PN->addIncoming(BB->getTerminator()->getOperand(0), BB); + + BB->getInstList().pop_back(); // Remove the return insn + BranchInst::Create(NewRetBlock, BB); + } + ReturnBlock = NewRetBlock; + return true; +} diff --git a/contrib/llvm/lib/Transforms/Utils/Utils.cpp b/contrib/llvm/lib/Transforms/Utils/Utils.cpp new file mode 100644 index 000000000000..c3df215c294a --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/Utils.cpp @@ -0,0 +1,39 @@ +//===-- Utils.cpp - TransformUtils Infrastructure -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the common initialization infrastructure for the +// TransformUtils library. +// +//===----------------------------------------------------------------------===// + +#include "llvm/InitializePasses.h" +#include "llvm/PassRegistry.h" +#include "llvm-c/Initialization.h" + +using namespace llvm; + +/// initializeTransformUtils - Initialize all passes in the TransformUtils +/// library. +void llvm::initializeTransformUtils(PassRegistry &Registry) { + initializeBreakCriticalEdgesPass(Registry); + initializeInstNamerPass(Registry); + initializeLCSSAPass(Registry); + initializeLoopSimplifyPass(Registry); + initializeLowerInvokePass(Registry); + initializeLowerSwitchPass(Registry); + initializePromotePassPass(Registry); + initializeUnifyFunctionExitNodesPass(Registry); + initializeInstSimplifierPass(Registry); + initializeMetaRenamerPass(Registry); +} + +/// LLVMInitializeTransformUtils - C binding for initializeTransformUtilsPasses. +void LLVMInitializeTransformUtils(LLVMPassRegistryRef R) { + initializeTransformUtils(*unwrap(R)); +} diff --git a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp new file mode 100644 index 000000000000..457fc80e1ea5 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp @@ -0,0 +1,224 @@ +//===- ValueMapper.cpp - Interface shared by lib/Transforms/Utils ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the MapValue function, which is shared by various parts of +// the lib/Transforms/Utils library. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/ValueMapper.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Metadata.h" +using namespace llvm; + +// Out of line method to get vtable etc for class. +void ValueMapTypeRemapper::anchor() {} +void ValueMaterializer::anchor() {} + +Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, + ValueMapTypeRemapper *TypeMapper, + ValueMaterializer *Materializer) { + ValueToValueMapTy::iterator I = VM.find(V); + + // If the value already exists in the map, use it. + if (I != VM.end() && I->second) return I->second; + + // If we have a materializer and it can materialize a value, use that. + if (Materializer) { + if (Value *NewV = Materializer->materializeValueFor(const_cast<Value*>(V))) + return VM[V] = NewV; + } + + // Global values do not need to be seeded into the VM if they + // are using the identity mapping. + if (isa<GlobalValue>(V) || isa<MDString>(V)) + return VM[V] = const_cast<Value*>(V); + + if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) { + // Inline asm may need *type* remapping. + FunctionType *NewTy = IA->getFunctionType(); + if (TypeMapper) { + NewTy = cast<FunctionType>(TypeMapper->remapType(NewTy)); + + if (NewTy != IA->getFunctionType()) + V = InlineAsm::get(NewTy, IA->getAsmString(), IA->getConstraintString(), + IA->hasSideEffects(), IA->isAlignStack()); + } + + return VM[V] = const_cast<Value*>(V); + } + + + if (const MDNode *MD = dyn_cast<MDNode>(V)) { + // If this is a module-level metadata and we know that nothing at the module + // level is changing, then use an identity mapping. + if (!MD->isFunctionLocal() && (Flags & RF_NoModuleLevelChanges)) + return VM[V] = const_cast<Value*>(V); + + // Create a dummy node in case we have a metadata cycle. + MDNode *Dummy = MDNode::getTemporary(V->getContext(), None); + VM[V] = Dummy; + + // Check all operands to see if any need to be remapped. + for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) { + Value *OP = MD->getOperand(i); + if (OP == 0) continue; + Value *Mapped_OP = MapValue(OP, VM, Flags, TypeMapper, Materializer); + // Use identity map if Mapped_Op is null and we can ignore missing + // entries. + if (Mapped_OP == OP || + (Mapped_OP == 0 && (Flags & RF_IgnoreMissingEntries))) + continue; + + // Ok, at least one operand needs remapping. + SmallVector<Value*, 4> Elts; + Elts.reserve(MD->getNumOperands()); + for (i = 0; i != e; ++i) { + Value *Op = MD->getOperand(i); + if (Op == 0) + Elts.push_back(0); + else { + Value *Mapped_Op = MapValue(Op, VM, Flags, TypeMapper, Materializer); + // Use identity map if Mapped_Op is null and we can ignore missing + // entries. + if (Mapped_Op == 0 && (Flags & RF_IgnoreMissingEntries)) + Mapped_Op = Op; + Elts.push_back(Mapped_Op); + } + } + MDNode *NewMD = MDNode::get(V->getContext(), Elts); + Dummy->replaceAllUsesWith(NewMD); + VM[V] = NewMD; + MDNode::deleteTemporary(Dummy); + return NewMD; + } + + VM[V] = const_cast<Value*>(V); + MDNode::deleteTemporary(Dummy); + + // No operands needed remapping. Use an identity mapping. + return const_cast<Value*>(V); + } + + // Okay, this either must be a constant (which may or may not be mappable) or + // is something that is not in the mapping table. + Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V)); + if (C == 0) + return 0; + + if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) { + Function *F = + cast<Function>(MapValue(BA->getFunction(), VM, Flags, TypeMapper, Materializer)); + BasicBlock *BB = cast_or_null<BasicBlock>(MapValue(BA->getBasicBlock(), VM, + Flags, TypeMapper, Materializer)); + return VM[V] = BlockAddress::get(F, BB ? BB : BA->getBasicBlock()); + } + + // Otherwise, we have some other constant to remap. Start by checking to see + // if all operands have an identity remapping. + unsigned OpNo = 0, NumOperands = C->getNumOperands(); + Value *Mapped = 0; + for (; OpNo != NumOperands; ++OpNo) { + Value *Op = C->getOperand(OpNo); + Mapped = MapValue(Op, VM, Flags, TypeMapper, Materializer); + if (Mapped != C) break; + } + + // See if the type mapper wants to remap the type as well. + Type *NewTy = C->getType(); + if (TypeMapper) + NewTy = TypeMapper->remapType(NewTy); + + // If the result type and all operands match up, then just insert an identity + // mapping. + if (OpNo == NumOperands && NewTy == C->getType()) + return VM[V] = C; + + // Okay, we need to create a new constant. We've already processed some or + // all of the operands, set them all up now. + SmallVector<Constant*, 8> Ops; + Ops.reserve(NumOperands); + for (unsigned j = 0; j != OpNo; ++j) + Ops.push_back(cast<Constant>(C->getOperand(j))); + + // If one of the operands mismatch, push it and the other mapped operands. + if (OpNo != NumOperands) { + Ops.push_back(cast<Constant>(Mapped)); + + // Map the rest of the operands that aren't processed yet. + for (++OpNo; OpNo != NumOperands; ++OpNo) + Ops.push_back(MapValue(cast<Constant>(C->getOperand(OpNo)), VM, + Flags, TypeMapper, Materializer)); + } + + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) + return VM[V] = CE->getWithOperands(Ops, NewTy); + if (isa<ConstantArray>(C)) + return VM[V] = ConstantArray::get(cast<ArrayType>(NewTy), Ops); + if (isa<ConstantStruct>(C)) + return VM[V] = ConstantStruct::get(cast<StructType>(NewTy), Ops); + if (isa<ConstantVector>(C)) + return VM[V] = ConstantVector::get(Ops); + // If this is a no-operand constant, it must be because the type was remapped. + if (isa<UndefValue>(C)) + return VM[V] = UndefValue::get(NewTy); + if (isa<ConstantAggregateZero>(C)) + return VM[V] = ConstantAggregateZero::get(NewTy); + assert(isa<ConstantPointerNull>(C)); + return VM[V] = ConstantPointerNull::get(cast<PointerType>(NewTy)); +} + +/// RemapInstruction - Convert the instruction operands from referencing the +/// current values into those specified by VMap. +/// +void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap, + RemapFlags Flags, ValueMapTypeRemapper *TypeMapper, + ValueMaterializer *Materializer){ + // Remap operands. + for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) { + Value *V = MapValue(*op, VMap, Flags, TypeMapper, Materializer); + // If we aren't ignoring missing entries, assert that something happened. + if (V != 0) + *op = V; + else + assert((Flags & RF_IgnoreMissingEntries) && + "Referenced value not in value map!"); + } + + // Remap phi nodes' incoming blocks. + if (PHINode *PN = dyn_cast<PHINode>(I)) { + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *V = MapValue(PN->getIncomingBlock(i), VMap, Flags); + // If we aren't ignoring missing entries, assert that something happened. + if (V != 0) + PN->setIncomingBlock(i, cast<BasicBlock>(V)); + else + assert((Flags & RF_IgnoreMissingEntries) && + "Referenced block not in value map!"); + } + } + + // Remap attached metadata. + SmallVector<std::pair<unsigned, MDNode *>, 4> MDs; + I->getAllMetadata(MDs); + for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator + MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI) { + MDNode *Old = MI->second; + MDNode *New = MapValue(Old, VMap, Flags, TypeMapper, Materializer); + if (New != Old) + I->setMetadata(MI->first, New); + } + + // If the instruction's type is being remapped, do so now. + if (TypeMapper) + I->mutateType(TypeMapper->remapType(I->getType())); +} |