aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Transforms/Utils
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Transforms/Utils')
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp771
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp370
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp597
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp262
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp574
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CloneModule.cpp122
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CmpInstAnalysis.cpp96
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp779
-rw-r--r--contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp149
-rw-r--r--contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp486
-rw-r--r--contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp183
-rw-r--r--contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp932
-rw-r--r--contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp64
-rw-r--r--contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp524
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LCSSA.cpp281
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Local.cpp1273
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp801
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp457
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp374
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp174
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp579
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp323
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp90
-rw-r--r--contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp145
-rw-r--r--contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp82
-rw-r--r--contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp1079
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp492
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp4156
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp393
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp100
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp2253
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SpecialCaseList.cpp222
-rw-r--r--contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp122
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Utils.cpp39
-rw-r--r--contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp224
35 files changed, 19568 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
new file mode 100644
index 000000000000..12de9eed4b85
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -0,0 +1,771 @@
+//===-- BasicBlockUtils.cpp - BasicBlock Utilities -------------------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions perform manipulations on basic blocks, and
+// instructions contained within basic blocks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <algorithm>
+using namespace llvm;
+
+/// DeleteDeadBlock - Delete the specified block, which must have no
+/// predecessors.
+void llvm::DeleteDeadBlock(BasicBlock *BB) {
+ assert((pred_begin(BB) == pred_end(BB) ||
+ // Can delete self loop.
+ BB->getSinglePredecessor() == BB) && "Block is not dead!");
+ TerminatorInst *BBTerm = BB->getTerminator();
+
+ // Loop through all of our successors and make sure they know that one
+ // of their predecessors is going away.
+ for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i)
+ BBTerm->getSuccessor(i)->removePredecessor(BB);
+
+ // Zap all the instructions in the block.
+ while (!BB->empty()) {
+ Instruction &I = BB->back();
+ // If this instruction is used, replace uses with an arbitrary value.
+ // Because control flow can't get here, we don't care what we replace the
+ // value with. Note that since this block is unreachable, and all values
+ // contained within it must dominate their uses, that all uses will
+ // eventually be removed (they are themselves dead).
+ if (!I.use_empty())
+ I.replaceAllUsesWith(UndefValue::get(I.getType()));
+ BB->getInstList().pop_back();
+ }
+
+ // Zap the block!
+ BB->eraseFromParent();
+}
+
+/// FoldSingleEntryPHINodes - We know that BB has one predecessor. If there are
+/// any single-entry PHI nodes in it, fold them away. This handles the case
+/// when all entries to the PHI nodes in a block are guaranteed equal, such as
+/// when the block has exactly one predecessor.
+void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, Pass *P) {
+ if (!isa<PHINode>(BB->begin())) return;
+
+ AliasAnalysis *AA = 0;
+ MemoryDependenceAnalysis *MemDep = 0;
+ if (P) {
+ AA = P->getAnalysisIfAvailable<AliasAnalysis>();
+ MemDep = P->getAnalysisIfAvailable<MemoryDependenceAnalysis>();
+ }
+
+ while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
+ if (PN->getIncomingValue(0) != PN)
+ PN->replaceAllUsesWith(PN->getIncomingValue(0));
+ else
+ PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
+
+ if (MemDep)
+ MemDep->removeInstruction(PN); // Memdep updates AA itself.
+ else if (AA && isa<PointerType>(PN->getType()))
+ AA->deleteValue(PN);
+
+ PN->eraseFromParent();
+ }
+}
+
+
+/// DeleteDeadPHIs - Examine each PHI in the given block and delete it if it
+/// is dead. Also recursively delete any operands that become dead as
+/// a result. This includes tracing the def-use list from the PHI to see if
+/// it is ultimately unused or if it reaches an unused cycle.
+bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) {
+ // Recursively deleting a PHI may cause multiple PHIs to be deleted
+ // or RAUW'd undef, so use an array of WeakVH for the PHIs to delete.
+ SmallVector<WeakVH, 8> PHIs;
+ for (BasicBlock::iterator I = BB->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I)
+ PHIs.push_back(PN);
+
+ bool Changed = false;
+ for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
+ if (PHINode *PN = dyn_cast_or_null<PHINode>(PHIs[i].operator Value*()))
+ Changed |= RecursivelyDeleteDeadPHINode(PN, TLI);
+
+ return Changed;
+}
+
+/// MergeBlockIntoPredecessor - Attempts to merge a block into its predecessor,
+/// if possible. The return value indicates success or failure.
+bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
+ // Don't merge away blocks who have their address taken.
+ if (BB->hasAddressTaken()) return false;
+
+ // Can't merge if there are multiple predecessors, or no predecessors.
+ BasicBlock *PredBB = BB->getUniquePredecessor();
+ if (!PredBB) return false;
+
+ // Don't break self-loops.
+ if (PredBB == BB) return false;
+ // Don't break invokes.
+ if (isa<InvokeInst>(PredBB->getTerminator())) return false;
+
+ succ_iterator SI(succ_begin(PredBB)), SE(succ_end(PredBB));
+ BasicBlock *OnlySucc = BB;
+ for (; SI != SE; ++SI)
+ if (*SI != OnlySucc) {
+ OnlySucc = 0; // There are multiple distinct successors!
+ break;
+ }
+
+ // Can't merge if there are multiple successors.
+ if (!OnlySucc) return false;
+
+ // Can't merge if there is PHI loop.
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) {
+ if (PHINode *PN = dyn_cast<PHINode>(BI)) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == PN)
+ return false;
+ } else
+ break;
+ }
+
+ // Begin by getting rid of unneeded PHIs.
+ if (isa<PHINode>(BB->front()))
+ FoldSingleEntryPHINodes(BB, P);
+
+ // Delete the unconditional branch from the predecessor...
+ PredBB->getInstList().pop_back();
+
+ // Make all PHI nodes that referred to BB now refer to Pred as their
+ // source...
+ BB->replaceAllUsesWith(PredBB);
+
+ // Move all definitions in the successor to the predecessor...
+ PredBB->getInstList().splice(PredBB->end(), BB->getInstList());
+
+ // Inherit predecessors name if it exists.
+ if (!PredBB->hasName())
+ PredBB->takeName(BB);
+
+ // Finally, erase the old block and update dominator info.
+ if (P) {
+ if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) {
+ if (DomTreeNode *DTN = DT->getNode(BB)) {
+ DomTreeNode *PredDTN = DT->getNode(PredBB);
+ SmallVector<DomTreeNode*, 8> Children(DTN->begin(), DTN->end());
+ for (SmallVectorImpl<DomTreeNode *>::iterator DI = Children.begin(),
+ DE = Children.end(); DI != DE; ++DI)
+ DT->changeImmediateDominator(*DI, PredDTN);
+
+ DT->eraseNode(BB);
+ }
+
+ if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>())
+ LI->removeBlock(BB);
+
+ if (MemoryDependenceAnalysis *MD =
+ P->getAnalysisIfAvailable<MemoryDependenceAnalysis>())
+ MD->invalidateCachedPredecessors();
+ }
+ }
+
+ BB->eraseFromParent();
+ return true;
+}
+
+/// ReplaceInstWithValue - Replace all uses of an instruction (specified by BI)
+/// with a value, then remove and delete the original instruction.
+///
+void llvm::ReplaceInstWithValue(BasicBlock::InstListType &BIL,
+ BasicBlock::iterator &BI, Value *V) {
+ Instruction &I = *BI;
+ // Replaces all of the uses of the instruction with uses of the value
+ I.replaceAllUsesWith(V);
+
+ // Make sure to propagate a name if there is one already.
+ if (I.hasName() && !V->hasName())
+ V->takeName(&I);
+
+ // Delete the unnecessary instruction now...
+ BI = BIL.erase(BI);
+}
+
+
+/// ReplaceInstWithInst - Replace the instruction specified by BI with the
+/// instruction specified by I. The original instruction is deleted and BI is
+/// updated to point to the new instruction.
+///
+void llvm::ReplaceInstWithInst(BasicBlock::InstListType &BIL,
+ BasicBlock::iterator &BI, Instruction *I) {
+ assert(I->getParent() == 0 &&
+ "ReplaceInstWithInst: Instruction already inserted into basic block!");
+
+ // Insert the new instruction into the basic block...
+ BasicBlock::iterator New = BIL.insert(BI, I);
+
+ // Replace all uses of the old instruction, and delete it.
+ ReplaceInstWithValue(BIL, BI, I);
+
+ // Move BI back to point to the newly inserted instruction
+ BI = New;
+}
+
+/// ReplaceInstWithInst - Replace the instruction specified by From with the
+/// instruction specified by To.
+///
+void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) {
+ BasicBlock::iterator BI(From);
+ ReplaceInstWithInst(From->getParent()->getInstList(), BI, To);
+}
+
+/// SplitEdge - Split the edge connecting specified block. Pass P must
+/// not be NULL.
+BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
+ unsigned SuccNum = GetSuccessorNumber(BB, Succ);
+
+ // If this is a critical edge, let SplitCriticalEdge do it.
+ TerminatorInst *LatchTerm = BB->getTerminator();
+ if (SplitCriticalEdge(LatchTerm, SuccNum, P))
+ return LatchTerm->getSuccessor(SuccNum);
+
+ // If the edge isn't critical, then BB has a single successor or Succ has a
+ // single pred. Split the block.
+ if (BasicBlock *SP = Succ->getSinglePredecessor()) {
+ // If the successor only has a single pred, split the top of the successor
+ // block.
+ assert(SP == BB && "CFG broken");
+ SP = NULL;
+ return SplitBlock(Succ, Succ->begin(), P);
+ }
+
+ // Otherwise, if BB has a single successor, split it at the bottom of the
+ // block.
+ assert(BB->getTerminator()->getNumSuccessors() == 1 &&
+ "Should have a single succ!");
+ return SplitBlock(BB, BB->getTerminator(), P);
+}
+
+/// SplitBlock - Split the specified block at the specified instruction - every
+/// thing before SplitPt stays in Old and everything starting with SplitPt moves
+/// to a new block. The two blocks are joined by an unconditional branch and
+/// the loop info is updated.
+///
+BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) {
+ BasicBlock::iterator SplitIt = SplitPt;
+ while (isa<PHINode>(SplitIt) || isa<LandingPadInst>(SplitIt))
+ ++SplitIt;
+ BasicBlock *New = Old->splitBasicBlock(SplitIt, Old->getName()+".split");
+
+ // The new block lives in whichever loop the old one did. This preserves
+ // LCSSA as well, because we force the split point to be after any PHI nodes.
+ if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>())
+ if (Loop *L = LI->getLoopFor(Old))
+ L->addBasicBlockToLoop(New, LI->getBase());
+
+ if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) {
+ // Old dominates New. New node dominates all other nodes dominated by Old.
+ if (DomTreeNode *OldNode = DT->getNode(Old)) {
+ std::vector<DomTreeNode *> Children;
+ for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end();
+ I != E; ++I)
+ Children.push_back(*I);
+
+ DomTreeNode *NewNode = DT->addNewBlock(New,Old);
+ for (std::vector<DomTreeNode *>::iterator I = Children.begin(),
+ E = Children.end(); I != E; ++I)
+ DT->changeImmediateDominator(*I, NewNode);
+ }
+ }
+
+ return New;
+}
+
+/// UpdateAnalysisInformation - Update DominatorTree, LoopInfo, and LCCSA
+/// analysis information.
+static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
+ ArrayRef<BasicBlock *> Preds,
+ Pass *P, bool &HasLoopExit) {
+ if (!P) return;
+
+ LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>();
+ Loop *L = LI ? LI->getLoopFor(OldBB) : 0;
+
+ // If we need to preserve loop analyses, collect some information about how
+ // this split will affect loops.
+ bool IsLoopEntry = !!L;
+ bool SplitMakesNewLoopHeader = false;
+ if (LI) {
+ bool PreserveLCSSA = P->mustPreserveAnalysisID(LCSSAID);
+ for (ArrayRef<BasicBlock*>::iterator
+ i = Preds.begin(), e = Preds.end(); i != e; ++i) {
+ BasicBlock *Pred = *i;
+
+ // If we need to preserve LCSSA, determine if any of the preds is a loop
+ // exit.
+ if (PreserveLCSSA)
+ if (Loop *PL = LI->getLoopFor(Pred))
+ if (!PL->contains(OldBB))
+ HasLoopExit = true;
+
+ // If we need to preserve LoopInfo, note whether any of the preds crosses
+ // an interesting loop boundary.
+ if (!L) continue;
+ if (L->contains(Pred))
+ IsLoopEntry = false;
+ else
+ SplitMakesNewLoopHeader = true;
+ }
+ }
+
+ // Update dominator tree if available.
+ DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
+ if (DT)
+ DT->splitBlock(NewBB);
+
+ if (!L) return;
+
+ if (IsLoopEntry) {
+ // Add the new block to the nearest enclosing loop (and not an adjacent
+ // loop). To find this, examine each of the predecessors and determine which
+ // loops enclose them, and select the most-nested loop which contains the
+ // loop containing the block being split.
+ Loop *InnermostPredLoop = 0;
+ for (ArrayRef<BasicBlock*>::iterator
+ i = Preds.begin(), e = Preds.end(); i != e; ++i) {
+ BasicBlock *Pred = *i;
+ if (Loop *PredLoop = LI->getLoopFor(Pred)) {
+ // Seek a loop which actually contains the block being split (to avoid
+ // adjacent loops).
+ while (PredLoop && !PredLoop->contains(OldBB))
+ PredLoop = PredLoop->getParentLoop();
+
+ // Select the most-nested of these loops which contains the block.
+ if (PredLoop && PredLoop->contains(OldBB) &&
+ (!InnermostPredLoop ||
+ InnermostPredLoop->getLoopDepth() < PredLoop->getLoopDepth()))
+ InnermostPredLoop = PredLoop;
+ }
+ }
+
+ if (InnermostPredLoop)
+ InnermostPredLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+ } else {
+ L->addBasicBlockToLoop(NewBB, LI->getBase());
+ if (SplitMakesNewLoopHeader)
+ L->moveToHeader(NewBB);
+ }
+}
+
+/// UpdatePHINodes - Update the PHI nodes in OrigBB to include the values coming
+/// from NewBB. This also updates AliasAnalysis, if available.
+static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
+ ArrayRef<BasicBlock*> Preds, BranchInst *BI,
+ Pass *P, bool HasLoopExit) {
+ // Otherwise, create a new PHI node in NewBB for each PHI node in OrigBB.
+ AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : 0;
+ for (BasicBlock::iterator I = OrigBB->begin(); isa<PHINode>(I); ) {
+ PHINode *PN = cast<PHINode>(I++);
+
+ // Check to see if all of the values coming in are the same. If so, we
+ // don't need to create a new PHI node, unless it's needed for LCSSA.
+ Value *InVal = 0;
+ if (!HasLoopExit) {
+ InVal = PN->getIncomingValueForBlock(Preds[0]);
+ for (unsigned i = 1, e = Preds.size(); i != e; ++i)
+ if (InVal != PN->getIncomingValueForBlock(Preds[i])) {
+ InVal = 0;
+ break;
+ }
+ }
+
+ if (InVal) {
+ // If all incoming values for the new PHI would be the same, just don't
+ // make a new PHI. Instead, just remove the incoming values from the old
+ // PHI.
+ for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
+ // Explicitly check the BB index here to handle duplicates in Preds.
+ int Idx = PN->getBasicBlockIndex(Preds[i]);
+ if (Idx >= 0)
+ PN->removeIncomingValue(Idx, false);
+ }
+ } else {
+ // If the values coming into the block are not the same, we need a PHI.
+ // Create the new PHI node, insert it into NewBB at the end of the block
+ PHINode *NewPHI =
+ PHINode::Create(PN->getType(), Preds.size(), PN->getName() + ".ph", BI);
+ if (AA) AA->copyValue(PN, NewPHI);
+
+ // Move all of the PHI values for 'Preds' to the new PHI.
+ for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
+ Value *V = PN->removeIncomingValue(Preds[i], false);
+ NewPHI->addIncoming(V, Preds[i]);
+ }
+
+ InVal = NewPHI;
+ }
+
+ // Add an incoming value to the PHI node in the loop for the preheader
+ // edge.
+ PN->addIncoming(InVal, NewBB);
+ }
+}
+
+/// SplitBlockPredecessors - This method transforms BB by introducing a new
+/// basic block into the function, and moving some of the predecessors of BB to
+/// be predecessors of the new block. The new predecessors are indicated by the
+/// Preds array, which has NumPreds elements in it. The new block is given a
+/// suffix of 'Suffix'.
+///
+/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree,
+/// LoopInfo, and LCCSA but no other analyses. In particular, it does not
+/// preserve LoopSimplify (because it's complicated to handle the case where one
+/// of the edges being split is an exit of a loop with other exits).
+///
+BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
+ ArrayRef<BasicBlock*> Preds,
+ const char *Suffix, Pass *P) {
+ // Create new basic block, insert right before the original block.
+ BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix,
+ BB->getParent(), BB);
+
+ // The new block unconditionally branches to the old block.
+ BranchInst *BI = BranchInst::Create(BB, NewBB);
+
+ // Move the edges from Preds to point to NewBB instead of BB.
+ for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
+ // This is slightly more strict than necessary; the minimum requirement
+ // is that there be no more than one indirectbr branching to BB. And
+ // all BlockAddress uses would need to be updated.
+ assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) &&
+ "Cannot split an edge from an IndirectBrInst");
+ Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB);
+ }
+
+ // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI
+ // node becomes an incoming value for BB's phi node. However, if the Preds
+ // list is empty, we need to insert dummy entries into the PHI nodes in BB to
+ // account for the newly created predecessor.
+ if (Preds.size() == 0) {
+ // Insert dummy values as the incoming value.
+ for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I)
+ cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB);
+ return NewBB;
+ }
+
+ // Update DominatorTree, LoopInfo, and LCCSA analysis information.
+ bool HasLoopExit = false;
+ UpdateAnalysisInformation(BB, NewBB, Preds, P, HasLoopExit);
+
+ // Update the PHI nodes in BB with the values coming from NewBB.
+ UpdatePHINodes(BB, NewBB, Preds, BI, P, HasLoopExit);
+ return NewBB;
+}
+
+/// SplitLandingPadPredecessors - This method transforms the landing pad,
+/// OrigBB, by introducing two new basic blocks into the function. One of those
+/// new basic blocks gets the predecessors listed in Preds. The other basic
+/// block gets the remaining predecessors of OrigBB. The landingpad instruction
+/// OrigBB is clone into both of the new basic blocks. The new blocks are given
+/// the suffixes 'Suffix1' and 'Suffix2', and are returned in the NewBBs vector.
+///
+/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree,
+/// DominanceFrontier, LoopInfo, and LCCSA but no other analyses. In particular,
+/// it does not preserve LoopSimplify (because it's complicated to handle the
+/// case where one of the edges being split is an exit of a loop with other
+/// exits).
+///
+void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
+ ArrayRef<BasicBlock*> Preds,
+ const char *Suffix1, const char *Suffix2,
+ Pass *P,
+ SmallVectorImpl<BasicBlock*> &NewBBs) {
+ assert(OrigBB->isLandingPad() && "Trying to split a non-landing pad!");
+
+ // Create a new basic block for OrigBB's predecessors listed in Preds. Insert
+ // it right before the original block.
+ BasicBlock *NewBB1 = BasicBlock::Create(OrigBB->getContext(),
+ OrigBB->getName() + Suffix1,
+ OrigBB->getParent(), OrigBB);
+ NewBBs.push_back(NewBB1);
+
+ // The new block unconditionally branches to the old block.
+ BranchInst *BI1 = BranchInst::Create(OrigBB, NewBB1);
+
+ // Move the edges from Preds to point to NewBB1 instead of OrigBB.
+ for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
+ // This is slightly more strict than necessary; the minimum requirement
+ // is that there be no more than one indirectbr branching to BB. And
+ // all BlockAddress uses would need to be updated.
+ assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) &&
+ "Cannot split an edge from an IndirectBrInst");
+ Preds[i]->getTerminator()->replaceUsesOfWith(OrigBB, NewBB1);
+ }
+
+ // Update DominatorTree, LoopInfo, and LCCSA analysis information.
+ bool HasLoopExit = false;
+ UpdateAnalysisInformation(OrigBB, NewBB1, Preds, P, HasLoopExit);
+
+ // Update the PHI nodes in OrigBB with the values coming from NewBB1.
+ UpdatePHINodes(OrigBB, NewBB1, Preds, BI1, P, HasLoopExit);
+
+ // Move the remaining edges from OrigBB to point to NewBB2.
+ SmallVector<BasicBlock*, 8> NewBB2Preds;
+ for (pred_iterator i = pred_begin(OrigBB), e = pred_end(OrigBB);
+ i != e; ) {
+ BasicBlock *Pred = *i++;
+ if (Pred == NewBB1) continue;
+ assert(!isa<IndirectBrInst>(Pred->getTerminator()) &&
+ "Cannot split an edge from an IndirectBrInst");
+ NewBB2Preds.push_back(Pred);
+ e = pred_end(OrigBB);
+ }
+
+ BasicBlock *NewBB2 = 0;
+ if (!NewBB2Preds.empty()) {
+ // Create another basic block for the rest of OrigBB's predecessors.
+ NewBB2 = BasicBlock::Create(OrigBB->getContext(),
+ OrigBB->getName() + Suffix2,
+ OrigBB->getParent(), OrigBB);
+ NewBBs.push_back(NewBB2);
+
+ // The new block unconditionally branches to the old block.
+ BranchInst *BI2 = BranchInst::Create(OrigBB, NewBB2);
+
+ // Move the remaining edges from OrigBB to point to NewBB2.
+ for (SmallVectorImpl<BasicBlock*>::iterator
+ i = NewBB2Preds.begin(), e = NewBB2Preds.end(); i != e; ++i)
+ (*i)->getTerminator()->replaceUsesOfWith(OrigBB, NewBB2);
+
+ // Update DominatorTree, LoopInfo, and LCCSA analysis information.
+ HasLoopExit = false;
+ UpdateAnalysisInformation(OrigBB, NewBB2, NewBB2Preds, P, HasLoopExit);
+
+ // Update the PHI nodes in OrigBB with the values coming from NewBB2.
+ UpdatePHINodes(OrigBB, NewBB2, NewBB2Preds, BI2, P, HasLoopExit);
+ }
+
+ LandingPadInst *LPad = OrigBB->getLandingPadInst();
+ Instruction *Clone1 = LPad->clone();
+ Clone1->setName(Twine("lpad") + Suffix1);
+ NewBB1->getInstList().insert(NewBB1->getFirstInsertionPt(), Clone1);
+
+ if (NewBB2) {
+ Instruction *Clone2 = LPad->clone();
+ Clone2->setName(Twine("lpad") + Suffix2);
+ NewBB2->getInstList().insert(NewBB2->getFirstInsertionPt(), Clone2);
+
+ // Create a PHI node for the two cloned landingpad instructions.
+ PHINode *PN = PHINode::Create(LPad->getType(), 2, "lpad.phi", LPad);
+ PN->addIncoming(Clone1, NewBB1);
+ PN->addIncoming(Clone2, NewBB2);
+ LPad->replaceAllUsesWith(PN);
+ LPad->eraseFromParent();
+ } else {
+ // There is no second clone. Just replace the landing pad with the first
+ // clone.
+ LPad->replaceAllUsesWith(Clone1);
+ LPad->eraseFromParent();
+ }
+}
+
+/// FoldReturnIntoUncondBranch - This method duplicates the specified return
+/// instruction into a predecessor which ends in an unconditional branch. If
+/// the return instruction returns a value defined by a PHI, propagate the
+/// right value into the return. It returns the new return instruction in the
+/// predecessor.
+ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
+ BasicBlock *Pred) {
+ Instruction *UncondBranch = Pred->getTerminator();
+ // Clone the return and add it to the end of the predecessor.
+ Instruction *NewRet = RI->clone();
+ Pred->getInstList().push_back(NewRet);
+
+ // If the return instruction returns a value, and if the value was a
+ // PHI node in "BB", propagate the right value into the return.
+ for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end();
+ i != e; ++i) {
+ Value *V = *i;
+ Instruction *NewBC = 0;
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(V)) {
+ // Return value might be bitcasted. Clone and insert it before the
+ // return instruction.
+ V = BCI->getOperand(0);
+ NewBC = BCI->clone();
+ Pred->getInstList().insert(NewRet, NewBC);
+ *i = NewBC;
+ }
+ if (PHINode *PN = dyn_cast<PHINode>(V)) {
+ if (PN->getParent() == BB) {
+ if (NewBC)
+ NewBC->setOperand(0, PN->getIncomingValueForBlock(Pred));
+ else
+ *i = PN->getIncomingValueForBlock(Pred);
+ }
+ }
+ }
+
+ // Update any PHI nodes in the returning block to realize that we no
+ // longer branch to them.
+ BB->removePredecessor(Pred);
+ UncondBranch->eraseFromParent();
+ return cast<ReturnInst>(NewRet);
+}
+
+/// SplitBlockAndInsertIfThen - Split the containing block at the
+/// specified instruction - everything before and including Cmp stays
+/// in the old basic block, and everything after Cmp is moved to a
+/// new block. The two blocks are connected by a conditional branch
+/// (with value of Cmp being the condition).
+/// Before:
+/// Head
+/// Cmp
+/// Tail
+/// After:
+/// Head
+/// Cmp
+/// if (Cmp)
+/// ThenBlock
+/// Tail
+///
+/// If Unreachable is true, then ThenBlock ends with
+/// UnreachableInst, otherwise it branches to Tail.
+/// Returns the NewBasicBlock's terminator.
+
+TerminatorInst *llvm::SplitBlockAndInsertIfThen(Instruction *Cmp,
+ bool Unreachable, MDNode *BranchWeights) {
+ Instruction *SplitBefore = Cmp->getNextNode();
+ BasicBlock *Head = SplitBefore->getParent();
+ BasicBlock *Tail = Head->splitBasicBlock(SplitBefore);
+ TerminatorInst *HeadOldTerm = Head->getTerminator();
+ LLVMContext &C = Head->getContext();
+ BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
+ TerminatorInst *CheckTerm;
+ if (Unreachable)
+ CheckTerm = new UnreachableInst(C, ThenBlock);
+ else
+ CheckTerm = BranchInst::Create(Tail, ThenBlock);
+ BranchInst *HeadNewTerm =
+ BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cmp);
+ HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
+ ReplaceInstWithInst(HeadOldTerm, HeadNewTerm);
+ return CheckTerm;
+}
+
+/// GetIfCondition - Given a basic block (BB) with two predecessors,
+/// check to see if the merge at this block is due
+/// to an "if condition". If so, return the boolean condition that determines
+/// which entry into BB will be taken. Also, return by references the block
+/// that will be entered from if the condition is true, and the block that will
+/// be entered if the condition is false.
+///
+/// This does no checking to see if the true/false blocks have large or unsavory
+/// instructions in them.
+Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
+ BasicBlock *&IfFalse) {
+ PHINode *SomePHI = dyn_cast<PHINode>(BB->begin());
+ BasicBlock *Pred1 = NULL;
+ BasicBlock *Pred2 = NULL;
+
+ if (SomePHI) {
+ if (SomePHI->getNumIncomingValues() != 2)
+ return NULL;
+ Pred1 = SomePHI->getIncomingBlock(0);
+ Pred2 = SomePHI->getIncomingBlock(1);
+ } else {
+ pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+ if (PI == PE) // No predecessor
+ return NULL;
+ Pred1 = *PI++;
+ if (PI == PE) // Only one predecessor
+ return NULL;
+ Pred2 = *PI++;
+ if (PI != PE) // More than two predecessors
+ return NULL;
+ }
+
+ // We can only handle branches. Other control flow will be lowered to
+ // branches if possible anyway.
+ BranchInst *Pred1Br = dyn_cast<BranchInst>(Pred1->getTerminator());
+ BranchInst *Pred2Br = dyn_cast<BranchInst>(Pred2->getTerminator());
+ if (Pred1Br == 0 || Pred2Br == 0)
+ return 0;
+
+ // Eliminate code duplication by ensuring that Pred1Br is conditional if
+ // either are.
+ if (Pred2Br->isConditional()) {
+ // If both branches are conditional, we don't have an "if statement". In
+ // reality, we could transform this case, but since the condition will be
+ // required anyway, we stand no chance of eliminating it, so the xform is
+ // probably not profitable.
+ if (Pred1Br->isConditional())
+ return 0;
+
+ std::swap(Pred1, Pred2);
+ std::swap(Pred1Br, Pred2Br);
+ }
+
+ if (Pred1Br->isConditional()) {
+ // The only thing we have to watch out for here is to make sure that Pred2
+ // doesn't have incoming edges from other blocks. If it does, the condition
+ // doesn't dominate BB.
+ if (Pred2->getSinglePredecessor() == 0)
+ return 0;
+
+ // If we found a conditional branch predecessor, make sure that it branches
+ // to BB and Pred2Br. If it doesn't, this isn't an "if statement".
+ if (Pred1Br->getSuccessor(0) == BB &&
+ Pred1Br->getSuccessor(1) == Pred2) {
+ IfTrue = Pred1;
+ IfFalse = Pred2;
+ } else if (Pred1Br->getSuccessor(0) == Pred2 &&
+ Pred1Br->getSuccessor(1) == BB) {
+ IfTrue = Pred2;
+ IfFalse = Pred1;
+ } else {
+ // We know that one arm of the conditional goes to BB, so the other must
+ // go somewhere unrelated, and this must not be an "if statement".
+ return 0;
+ }
+
+ return Pred1Br->getCondition();
+ }
+
+ // Ok, if we got here, both predecessors end with an unconditional branch to
+ // BB. Don't panic! If both blocks only have a single (identical)
+ // predecessor, and THAT is a conditional branch, then we're all ok!
+ BasicBlock *CommonPred = Pred1->getSinglePredecessor();
+ if (CommonPred == 0 || CommonPred != Pred2->getSinglePredecessor())
+ return 0;
+
+ // Otherwise, if this is a conditional branch, then we can use it!
+ BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator());
+ if (BI == 0) return 0;
+
+ assert(BI->isConditional() && "Two successors but not conditional?");
+ if (BI->getSuccessor(0) == Pred1) {
+ IfTrue = Pred1;
+ IfFalse = Pred2;
+ } else {
+ IfTrue = Pred2;
+ IfFalse = Pred1;
+ }
+ return BI->getCondition();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
new file mode 100644
index 000000000000..0e7f7f784401
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -0,0 +1,370 @@
+//===- BreakCriticalEdges.cpp - Critical Edge Elimination Pass ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// BreakCriticalEdges pass - Break all of the critical edges in the CFG by
+// inserting a dummy basic block. This pass may be "required" by passes that
+// cannot deal with critical edges. For this usage, the structure type is
+// forward declared. This pass obviously invalidates the CFG, but can update
+// dominator trees.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "break-crit-edges"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+using namespace llvm;
+
+STATISTIC(NumBroken, "Number of blocks inserted");
+
+namespace {
+ struct BreakCriticalEdges : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ BreakCriticalEdges() : FunctionPass(ID) {
+ initializeBreakCriticalEdgesPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<DominatorTree>();
+ AU.addPreserved<LoopInfo>();
+
+ // No loop canonicalization guarantees are broken by this pass.
+ AU.addPreservedID(LoopSimplifyID);
+ }
+ };
+}
+
+char BreakCriticalEdges::ID = 0;
+INITIALIZE_PASS(BreakCriticalEdges, "break-crit-edges",
+ "Break critical edges in CFG", false, false)
+
+// Publicly exposed interface to pass...
+char &llvm::BreakCriticalEdgesID = BreakCriticalEdges::ID;
+FunctionPass *llvm::createBreakCriticalEdgesPass() {
+ return new BreakCriticalEdges();
+}
+
+// runOnFunction - Loop over all of the edges in the CFG, breaking critical
+// edges as they are found.
+//
+bool BreakCriticalEdges::runOnFunction(Function &F) {
+ bool Changed = false;
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ TerminatorInst *TI = I->getTerminator();
+ if (TI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(TI))
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ if (SplitCriticalEdge(TI, i, this)) {
+ ++NumBroken;
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// Implementation of the external critical edge manipulation functions
+//===----------------------------------------------------------------------===//
+
+/// createPHIsForSplitLoopExit - When a loop exit edge is split, LCSSA form
+/// may require new PHIs in the new exit block. This function inserts the
+/// new PHIs, as needed. Preds is a list of preds inside the loop, SplitBB
+/// is the new loop exit block, and DestBB is the old loop exit, now the
+/// successor of SplitBB.
+static void createPHIsForSplitLoopExit(ArrayRef<BasicBlock *> Preds,
+ BasicBlock *SplitBB,
+ BasicBlock *DestBB) {
+ // SplitBB shouldn't have anything non-trivial in it yet.
+ assert((SplitBB->getFirstNonPHI() == SplitBB->getTerminator() ||
+ SplitBB->isLandingPad()) && "SplitBB has non-PHI nodes!");
+
+ // For each PHI in the destination block.
+ for (BasicBlock::iterator I = DestBB->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+ unsigned Idx = PN->getBasicBlockIndex(SplitBB);
+ Value *V = PN->getIncomingValue(Idx);
+
+ // If the input is a PHI which already satisfies LCSSA, don't create
+ // a new one.
+ if (const PHINode *VP = dyn_cast<PHINode>(V))
+ if (VP->getParent() == SplitBB)
+ continue;
+
+ // Otherwise a new PHI is needed. Create one and populate it.
+ PHINode *NewPN =
+ PHINode::Create(PN->getType(), Preds.size(), "split",
+ SplitBB->isLandingPad() ?
+ SplitBB->begin() : SplitBB->getTerminator());
+ for (unsigned i = 0, e = Preds.size(); i != e; ++i)
+ NewPN->addIncoming(V, Preds[i]);
+
+ // Update the original PHI.
+ PN->setIncomingValue(Idx, NewPN);
+ }
+}
+
+/// SplitCriticalEdge - If this edge is a critical edge, insert a new node to
+/// split the critical edge. This will update DominatorTree information if it
+/// is available, thus calling this pass will not invalidate either of them.
+/// This returns the new block if the edge was split, null otherwise.
+///
+/// If MergeIdenticalEdges is true (not the default), *all* edges from TI to the
+/// specified successor will be merged into the same critical edge block.
+/// This is most commonly interesting with switch instructions, which may
+/// have many edges to any one destination. This ensures that all edges to that
+/// dest go to one block instead of each going to a different block, but isn't
+/// the standard definition of a "critical edge".
+///
+/// It is invalid to call this function on a critical edge that starts at an
+/// IndirectBrInst. Splitting these edges will almost always create an invalid
+/// program because the address of the new block won't be the one that is jumped
+/// to.
+///
+BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
+ Pass *P, bool MergeIdenticalEdges,
+ bool DontDeleteUselessPhis,
+ bool SplitLandingPads) {
+ if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return 0;
+
+ assert(!isa<IndirectBrInst>(TI) &&
+ "Cannot split critical edge from IndirectBrInst");
+
+ BasicBlock *TIBB = TI->getParent();
+ BasicBlock *DestBB = TI->getSuccessor(SuccNum);
+
+ // Splitting the critical edge to a landing pad block is non-trivial. Don't do
+ // it in this generic function.
+ if (DestBB->isLandingPad()) return 0;
+
+ // Create a new basic block, linking it into the CFG.
+ BasicBlock *NewBB = BasicBlock::Create(TI->getContext(),
+ TIBB->getName() + "." + DestBB->getName() + "_crit_edge");
+ // Create our unconditional branch.
+ BranchInst *NewBI = BranchInst::Create(DestBB, NewBB);
+ NewBI->setDebugLoc(TI->getDebugLoc());
+
+ // Branch to the new block, breaking the edge.
+ TI->setSuccessor(SuccNum, NewBB);
+
+ // Insert the block into the function... right after the block TI lives in.
+ Function &F = *TIBB->getParent();
+ Function::iterator FBBI = TIBB;
+ F.getBasicBlockList().insert(++FBBI, NewBB);
+
+ // If there are any PHI nodes in DestBB, we need to update them so that they
+ // merge incoming values from NewBB instead of from TIBB.
+ {
+ unsigned BBIdx = 0;
+ for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) {
+ // We no longer enter through TIBB, now we come in through NewBB.
+ // Revector exactly one entry in the PHI node that used to come from
+ // TIBB to come from NewBB.
+ PHINode *PN = cast<PHINode>(I);
+
+ // Reuse the previous value of BBIdx if it lines up. In cases where we
+ // have multiple phi nodes with *lots* of predecessors, this is a speed
+ // win because we don't have to scan the PHI looking for TIBB. This
+ // happens because the BB list of PHI nodes are usually in the same
+ // order.
+ if (PN->getIncomingBlock(BBIdx) != TIBB)
+ BBIdx = PN->getBasicBlockIndex(TIBB);
+ PN->setIncomingBlock(BBIdx, NewBB);
+ }
+ }
+
+ // If there are any other edges from TIBB to DestBB, update those to go
+ // through the split block, making those edges non-critical as well (and
+ // reducing the number of phi entries in the DestBB if relevant).
+ if (MergeIdenticalEdges) {
+ for (unsigned i = SuccNum+1, e = TI->getNumSuccessors(); i != e; ++i) {
+ if (TI->getSuccessor(i) != DestBB) continue;
+
+ // Remove an entry for TIBB from DestBB phi nodes.
+ DestBB->removePredecessor(TIBB, DontDeleteUselessPhis);
+
+ // We found another edge to DestBB, go to NewBB instead.
+ TI->setSuccessor(i, NewBB);
+ }
+ }
+
+
+
+ // If we don't have a pass object, we can't update anything...
+ if (P == 0) return NewBB;
+
+ DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
+ LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>();
+
+ // If we have nothing to update, just return.
+ if (DT == 0 && LI == 0)
+ return NewBB;
+
+ // Now update analysis information. Since the only predecessor of NewBB is
+ // the TIBB, TIBB clearly dominates NewBB. TIBB usually doesn't dominate
+ // anything, as there are other successors of DestBB. However, if all other
+ // predecessors of DestBB are already dominated by DestBB (e.g. DestBB is a
+ // loop header) then NewBB dominates DestBB.
+ SmallVector<BasicBlock*, 8> OtherPreds;
+
+ // If there is a PHI in the block, loop over predecessors with it, which is
+ // faster than iterating pred_begin/end.
+ if (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingBlock(i) != NewBB)
+ OtherPreds.push_back(PN->getIncomingBlock(i));
+ } else {
+ for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB);
+ I != E; ++I) {
+ BasicBlock *P = *I;
+ if (P != NewBB)
+ OtherPreds.push_back(P);
+ }
+ }
+
+ bool NewBBDominatesDestBB = true;
+
+ // Should we update DominatorTree information?
+ if (DT) {
+ DomTreeNode *TINode = DT->getNode(TIBB);
+
+ // The new block is not the immediate dominator for any other nodes, but
+ // TINode is the immediate dominator for the new node.
+ //
+ if (TINode) { // Don't break unreachable code!
+ DomTreeNode *NewBBNode = DT->addNewBlock(NewBB, TIBB);
+ DomTreeNode *DestBBNode = 0;
+
+ // If NewBBDominatesDestBB hasn't been computed yet, do so with DT.
+ if (!OtherPreds.empty()) {
+ DestBBNode = DT->getNode(DestBB);
+ while (!OtherPreds.empty() && NewBBDominatesDestBB) {
+ if (DomTreeNode *OPNode = DT->getNode(OtherPreds.back()))
+ NewBBDominatesDestBB = DT->dominates(DestBBNode, OPNode);
+ OtherPreds.pop_back();
+ }
+ OtherPreds.clear();
+ }
+
+ // If NewBBDominatesDestBB, then NewBB dominates DestBB, otherwise it
+ // doesn't dominate anything.
+ if (NewBBDominatesDestBB) {
+ if (!DestBBNode) DestBBNode = DT->getNode(DestBB);
+ DT->changeImmediateDominator(DestBBNode, NewBBNode);
+ }
+ }
+ }
+
+ // Update LoopInfo if it is around.
+ if (LI) {
+ if (Loop *TIL = LI->getLoopFor(TIBB)) {
+ // If one or the other blocks were not in a loop, the new block is not
+ // either, and thus LI doesn't need to be updated.
+ if (Loop *DestLoop = LI->getLoopFor(DestBB)) {
+ if (TIL == DestLoop) {
+ // Both in the same loop, the NewBB joins loop.
+ DestLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+ } else if (TIL->contains(DestLoop)) {
+ // Edge from an outer loop to an inner loop. Add to the outer loop.
+ TIL->addBasicBlockToLoop(NewBB, LI->getBase());
+ } else if (DestLoop->contains(TIL)) {
+ // Edge from an inner loop to an outer loop. Add to the outer loop.
+ DestLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+ } else {
+ // Edge from two loops with no containment relation. Because these
+ // are natural loops, we know that the destination block must be the
+ // header of its loop (adding a branch into a loop elsewhere would
+ // create an irreducible loop).
+ assert(DestLoop->getHeader() == DestBB &&
+ "Should not create irreducible loops!");
+ if (Loop *P = DestLoop->getParentLoop())
+ P->addBasicBlockToLoop(NewBB, LI->getBase());
+ }
+ }
+ // If TIBB is in a loop and DestBB is outside of that loop, split the
+ // other exit blocks of the loop that also have predecessors outside
+ // the loop, to maintain a LoopSimplify guarantee.
+ if (!TIL->contains(DestBB) &&
+ P->mustPreserveAnalysisID(LoopSimplifyID)) {
+ assert(!TIL->contains(NewBB) &&
+ "Split point for loop exit is contained in loop!");
+
+ // Update LCSSA form in the newly created exit block.
+ if (P->mustPreserveAnalysisID(LCSSAID))
+ createPHIsForSplitLoopExit(TIBB, NewBB, DestBB);
+
+ // For each unique exit block...
+ // FIXME: This code is functionally equivalent to the corresponding
+ // loop in LoopSimplify.
+ SmallVector<BasicBlock *, 4> ExitBlocks;
+ TIL->getExitBlocks(ExitBlocks);
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+ // Collect all the preds that are inside the loop, and note
+ // whether there are any preds outside the loop.
+ SmallVector<BasicBlock *, 4> Preds;
+ bool HasPredOutsideOfLoop = false;
+ BasicBlock *Exit = ExitBlocks[i];
+ for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit);
+ I != E; ++I) {
+ BasicBlock *P = *I;
+ if (TIL->contains(P)) {
+ if (isa<IndirectBrInst>(P->getTerminator())) {
+ Preds.clear();
+ break;
+ }
+ Preds.push_back(P);
+ } else {
+ HasPredOutsideOfLoop = true;
+ }
+ }
+ // If there are any preds not in the loop, we'll need to split
+ // the edges. The Preds.empty() check is needed because a block
+ // may appear multiple times in the list. We can't use
+ // getUniqueExitBlocks above because that depends on LoopSimplify
+ // form, which we're in the process of restoring!
+ if (!Preds.empty() && HasPredOutsideOfLoop) {
+ if (!Exit->isLandingPad()) {
+ BasicBlock *NewExitBB =
+ SplitBlockPredecessors(Exit, Preds, "split", P);
+ if (P->mustPreserveAnalysisID(LCSSAID))
+ createPHIsForSplitLoopExit(Preds, NewExitBB, Exit);
+ } else if (SplitLandingPads) {
+ SmallVector<BasicBlock*, 8> NewBBs;
+ SplitLandingPadPredecessors(Exit, Preds,
+ ".split1", ".split2",
+ P, NewBBs);
+ if (P->mustPreserveAnalysisID(LCSSAID))
+ createPHIsForSplitLoopExit(Preds, NewBBs[0], Exit);
+ }
+ }
+ }
+ }
+ // LCSSA form was updated above for the case where LoopSimplify is
+ // available, which means that all predecessors of loop exit blocks
+ // are within the loop. Without LoopSimplify form, it would be
+ // necessary to insert a new phi.
+ assert((!P->mustPreserveAnalysisID(LCSSAID) ||
+ P->mustPreserveAnalysisID(LoopSimplifyID)) &&
+ "SplitCriticalEdge doesn't know how to update LCCSA form "
+ "without LoopSimplify!");
+ }
+ }
+
+ return NewBB;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
new file mode 100644
index 000000000000..6d13217df55d
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -0,0 +1,597 @@
+//===- BuildLibCalls.cpp - Utility builder for libcalls -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements some functions that will create standard C libcalls.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+
+using namespace llvm;
+
+/// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*.
+Value *llvm::CastToCStr(Value *V, IRBuilder<> &B) {
+ return B.CreateBitCast(V, B.getInt8PtrTy(), "cstr");
+}
+
+/// EmitStrLen - Emit a call to the strlen function to the builder, for the
+/// specified pointer. This always returns an integer value of size intptr_t.
+Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::strlen))
+ return 0;
+
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeSet AS[2];
+ AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
+ Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
+ AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+ ArrayRef<Attribute::AttrKind>(AVs, 2));
+
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ Constant *StrLen = M->getOrInsertFunction("strlen",
+ AttributeSet::get(M->getContext(),
+ AS),
+ TD->getIntPtrType(Context),
+ B.getInt8PtrTy(),
+ NULL);
+ CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen");
+ if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
+/// EmitStrNLen - Emit a call to the strnlen function to the builder, for the
+/// specified pointer. Ptr is required to be some pointer type, MaxLen must
+/// be of size_t type, and the return value has 'intptr_t' type.
+Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B,
+ const DataLayout *TD, const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::strnlen))
+ return 0;
+
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeSet AS[2];
+ AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
+ Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
+ AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+ ArrayRef<Attribute::AttrKind>(AVs, 2));
+
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ Constant *StrNLen = M->getOrInsertFunction("strnlen",
+ AttributeSet::get(M->getContext(),
+ AS),
+ TD->getIntPtrType(Context),
+ B.getInt8PtrTy(),
+ TD->getIntPtrType(Context),
+ NULL);
+ CallInst *CI = B.CreateCall2(StrNLen, CastToCStr(Ptr, B), MaxLen, "strnlen");
+ if (const Function *F = dyn_cast<Function>(StrNLen->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
+/// EmitStrChr - Emit a call to the strchr function to the builder, for the
+/// specified pointer and character. Ptr is required to be some pointer type,
+/// and the return value has 'i8*' type.
+Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B,
+ const DataLayout *TD, const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::strchr))
+ return 0;
+
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
+ AttributeSet AS =
+ AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+ ArrayRef<Attribute::AttrKind>(AVs, 2));
+
+ Type *I8Ptr = B.getInt8PtrTy();
+ Type *I32Ty = B.getInt32Ty();
+ Constant *StrChr = M->getOrInsertFunction("strchr",
+ AttributeSet::get(M->getContext(),
+ AS),
+ I8Ptr, I8Ptr, I32Ty, NULL);
+ CallInst *CI = B.CreateCall2(StrChr, CastToCStr(Ptr, B),
+ ConstantInt::get(I32Ty, C), "strchr");
+ if (const Function *F = dyn_cast<Function>(StrChr->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+ return CI;
+}
+
+/// EmitStrNCmp - Emit a call to the strncmp function to the builder.
+Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len,
+ IRBuilder<> &B, const DataLayout *TD,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::strncmp))
+ return 0;
+
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeSet AS[3];
+ AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
+ AS[1] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture);
+ Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
+ AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+ ArrayRef<Attribute::AttrKind>(AVs, 2));
+
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ Value *StrNCmp = M->getOrInsertFunction("strncmp",
+ AttributeSet::get(M->getContext(),
+ AS),
+ B.getInt32Ty(),
+ B.getInt8PtrTy(),
+ B.getInt8PtrTy(),
+ TD->getIntPtrType(Context), NULL);
+ CallInst *CI = B.CreateCall3(StrNCmp, CastToCStr(Ptr1, B),
+ CastToCStr(Ptr2, B), Len, "strncmp");
+
+ if (const Function *F = dyn_cast<Function>(StrNCmp->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
+/// EmitStrCpy - Emit a call to the strcpy function to the builder, for the
+/// specified pointer arguments.
+Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
+ const DataLayout *TD, const TargetLibraryInfo *TLI,
+ StringRef Name) {
+ if (!TLI->has(LibFunc::strcpy))
+ return 0;
+
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeSet AS[2];
+ AS[0] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture);
+ AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+ Attribute::NoUnwind);
+ Type *I8Ptr = B.getInt8PtrTy();
+ Value *StrCpy = M->getOrInsertFunction(Name,
+ AttributeSet::get(M->getContext(), AS),
+ I8Ptr, I8Ptr, I8Ptr, NULL);
+ CallInst *CI = B.CreateCall2(StrCpy, CastToCStr(Dst, B), CastToCStr(Src, B),
+ Name);
+ if (const Function *F = dyn_cast<Function>(StrCpy->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+ return CI;
+}
+
+/// EmitStrNCpy - Emit a call to the strncpy function to the builder, for the
+/// specified pointer arguments.
+Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
+ IRBuilder<> &B, const DataLayout *TD,
+ const TargetLibraryInfo *TLI, StringRef Name) {
+ if (!TLI->has(LibFunc::strncpy))
+ return 0;
+
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeSet AS[2];
+ AS[0] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture);
+ AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+ Attribute::NoUnwind);
+ Type *I8Ptr = B.getInt8PtrTy();
+ Value *StrNCpy = M->getOrInsertFunction(Name,
+ AttributeSet::get(M->getContext(),
+ AS),
+ I8Ptr, I8Ptr, I8Ptr,
+ Len->getType(), NULL);
+ CallInst *CI = B.CreateCall3(StrNCpy, CastToCStr(Dst, B), CastToCStr(Src, B),
+ Len, "strncpy");
+ if (const Function *F = dyn_cast<Function>(StrNCpy->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+ return CI;
+}
+
+/// EmitMemCpyChk - Emit a call to the __memcpy_chk function to the builder.
+/// This expects that the Len and ObjSize have type 'intptr_t' and Dst/Src
+/// are pointers.
+Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
+ IRBuilder<> &B, const DataLayout *TD,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::memcpy_chk))
+ return 0;
+
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeSet AS;
+ AS = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+ Attribute::NoUnwind);
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ Value *MemCpy = M->getOrInsertFunction("__memcpy_chk",
+ AttributeSet::get(M->getContext(), AS),
+ B.getInt8PtrTy(),
+ B.getInt8PtrTy(),
+ B.getInt8PtrTy(),
+ TD->getIntPtrType(Context),
+ TD->getIntPtrType(Context), NULL);
+ Dst = CastToCStr(Dst, B);
+ Src = CastToCStr(Src, B);
+ CallInst *CI = B.CreateCall4(MemCpy, Dst, Src, Len, ObjSize);
+ if (const Function *F = dyn_cast<Function>(MemCpy->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+ return CI;
+}
+
+/// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is
+/// a pointer, Val is an i32 value, and Len is an 'intptr_t' value.
+Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
+ Value *Len, IRBuilder<> &B, const DataLayout *TD,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::memchr))
+ return 0;
+
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeSet AS;
+ Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
+ AS = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+ ArrayRef<Attribute::AttrKind>(AVs, 2));
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ Value *MemChr = M->getOrInsertFunction("memchr",
+ AttributeSet::get(M->getContext(), AS),
+ B.getInt8PtrTy(),
+ B.getInt8PtrTy(),
+ B.getInt32Ty(),
+ TD->getIntPtrType(Context),
+ NULL);
+ CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr");
+
+ if (const Function *F = dyn_cast<Function>(MemChr->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
+/// EmitMemCmp - Emit a call to the memcmp function.
+Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
+ Value *Len, IRBuilder<> &B, const DataLayout *TD,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::memcmp))
+ return 0;
+
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeSet AS[3];
+ AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
+ AS[1] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture);
+ Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
+ AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+ ArrayRef<Attribute::AttrKind>(AVs, 2));
+
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ Value *MemCmp = M->getOrInsertFunction("memcmp",
+ AttributeSet::get(M->getContext(), AS),
+ B.getInt32Ty(),
+ B.getInt8PtrTy(),
+ B.getInt8PtrTy(),
+ TD->getIntPtrType(Context), NULL);
+ CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B),
+ Len, "memcmp");
+
+ if (const Function *F = dyn_cast<Function>(MemCmp->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
+/// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' (e.g.
+/// 'floor'). This function is known to take a single of type matching 'Op' and
+/// returns one value with the same type. If 'Op' is a long double, 'l' is
+/// added as the suffix of name, if 'Op' is a float, we add a 'f' suffix.
+Value *llvm::EmitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
+ const AttributeSet &Attrs) {
+ SmallString<20> NameBuffer;
+ if (!Op->getType()->isDoubleTy()) {
+ // If we need to add a suffix, copy into NameBuffer.
+ NameBuffer += Name;
+ if (Op->getType()->isFloatTy())
+ NameBuffer += 'f'; // floorf
+ else
+ NameBuffer += 'l'; // floorl
+ Name = NameBuffer;
+ }
+
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
+ Op->getType(), NULL);
+ CallInst *CI = B.CreateCall(Callee, Op, Name);
+ CI->setAttributes(Attrs);
+ if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
+/// EmitPutChar - Emit a call to the putchar function. This assumes that Char
+/// is an integer.
+Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const DataLayout *TD,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::putchar))
+ return 0;
+
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Value *PutChar = M->getOrInsertFunction("putchar", B.getInt32Ty(),
+ B.getInt32Ty(), NULL);
+ CallInst *CI = B.CreateCall(PutChar,
+ B.CreateIntCast(Char,
+ B.getInt32Ty(),
+ /*isSigned*/true,
+ "chari"),
+ "putchar");
+
+ if (const Function *F = dyn_cast<Function>(PutChar->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+ return CI;
+}
+
+/// EmitPutS - Emit a call to the puts function. This assumes that Str is
+/// some pointer.
+Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const DataLayout *TD,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::puts))
+ return 0;
+
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeSet AS[2];
+ AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
+ AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+ Attribute::NoUnwind);
+
+ Value *PutS = M->getOrInsertFunction("puts",
+ AttributeSet::get(M->getContext(), AS),
+ B.getInt32Ty(),
+ B.getInt8PtrTy(),
+ NULL);
+ CallInst *CI = B.CreateCall(PutS, CastToCStr(Str, B), "puts");
+ if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+ return CI;
+}
+
+/// EmitFPutC - Emit a call to the fputc function. This assumes that Char is
+/// an integer and File is a pointer to FILE.
+Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
+ const DataLayout *TD, const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::fputc))
+ return 0;
+
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeSet AS[2];
+ AS[0] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture);
+ AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+ Attribute::NoUnwind);
+ Constant *F;
+ if (File->getType()->isPointerTy())
+ F = M->getOrInsertFunction("fputc",
+ AttributeSet::get(M->getContext(), AS),
+ B.getInt32Ty(),
+ B.getInt32Ty(), File->getType(),
+ NULL);
+ else
+ F = M->getOrInsertFunction("fputc",
+ B.getInt32Ty(),
+ B.getInt32Ty(),
+ File->getType(), NULL);
+ Char = B.CreateIntCast(Char, B.getInt32Ty(), /*isSigned*/true,
+ "chari");
+ CallInst *CI = B.CreateCall2(F, Char, File, "fputc");
+
+ if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ CI->setCallingConv(Fn->getCallingConv());
+ return CI;
+}
+
+/// EmitFPutS - Emit a call to the puts function. Str is required to be a
+/// pointer and File is a pointer to FILE.
+Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
+ const DataLayout *TD, const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::fputs))
+ return 0;
+
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeSet AS[3];
+ AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
+ AS[1] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture);
+ AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+ Attribute::NoUnwind);
+ StringRef FPutsName = TLI->getName(LibFunc::fputs);
+ Constant *F;
+ if (File->getType()->isPointerTy())
+ F = M->getOrInsertFunction(FPutsName,
+ AttributeSet::get(M->getContext(), AS),
+ B.getInt32Ty(),
+ B.getInt8PtrTy(),
+ File->getType(), NULL);
+ else
+ F = M->getOrInsertFunction(FPutsName, B.getInt32Ty(),
+ B.getInt8PtrTy(),
+ File->getType(), NULL);
+ CallInst *CI = B.CreateCall2(F, CastToCStr(Str, B), File, "fputs");
+
+ if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ CI->setCallingConv(Fn->getCallingConv());
+ return CI;
+}
+
+/// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is
+/// a pointer, Size is an 'intptr_t', and File is a pointer to FILE.
+Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
+ IRBuilder<> &B, const DataLayout *TD,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::fwrite))
+ return 0;
+
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeSet AS[3];
+ AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
+ AS[1] = AttributeSet::get(M->getContext(), 4, Attribute::NoCapture);
+ AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+ Attribute::NoUnwind);
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ StringRef FWriteName = TLI->getName(LibFunc::fwrite);
+ Constant *F;
+ if (File->getType()->isPointerTy())
+ F = M->getOrInsertFunction(FWriteName,
+ AttributeSet::get(M->getContext(), AS),
+ TD->getIntPtrType(Context),
+ B.getInt8PtrTy(),
+ TD->getIntPtrType(Context),
+ TD->getIntPtrType(Context),
+ File->getType(), NULL);
+ else
+ F = M->getOrInsertFunction(FWriteName, TD->getIntPtrType(Context),
+ B.getInt8PtrTy(),
+ TD->getIntPtrType(Context),
+ TD->getIntPtrType(Context),
+ File->getType(), NULL);
+ CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size,
+ ConstantInt::get(TD->getIntPtrType(Context), 1), File);
+
+ if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ CI->setCallingConv(Fn->getCallingConv());
+ return CI;
+}
+
+SimplifyFortifiedLibCalls::~SimplifyFortifiedLibCalls() { }
+
+bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const DataLayout *TD,
+ const TargetLibraryInfo *TLI) {
+ // We really need DataLayout for later.
+ if (!TD) return false;
+
+ this->CI = CI;
+ Function *Callee = CI->getCalledFunction();
+ StringRef Name = Callee->getName();
+ FunctionType *FT = Callee->getFunctionType();
+ LLVMContext &Context = CI->getParent()->getContext();
+ IRBuilder<> B(CI);
+
+ if (Name == "__memcpy_chk") {
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ FT->getParamType(2) != TD->getIntPtrType(Context) ||
+ FT->getParamType(3) != TD->getIntPtrType(Context))
+ return false;
+
+ if (isFoldable(3, 2, false)) {
+ B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1);
+ replaceCall(CI->getArgOperand(0));
+ return true;
+ }
+ return false;
+ }
+
+ // Should be similar to memcpy.
+ if (Name == "__mempcpy_chk") {
+ return false;
+ }
+
+ if (Name == "__memmove_chk") {
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ FT->getParamType(2) != TD->getIntPtrType(Context) ||
+ FT->getParamType(3) != TD->getIntPtrType(Context))
+ return false;
+
+ if (isFoldable(3, 2, false)) {
+ B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1);
+ replaceCall(CI->getArgOperand(0));
+ return true;
+ }
+ return false;
+ }
+
+ if (Name == "__memset_chk") {
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isIntegerTy() ||
+ FT->getParamType(2) != TD->getIntPtrType(Context) ||
+ FT->getParamType(3) != TD->getIntPtrType(Context))
+ return false;
+
+ if (isFoldable(3, 2, false)) {
+ Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(),
+ false);
+ B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
+ replaceCall(CI->getArgOperand(0));
+ return true;
+ }
+ return false;
+ }
+
+ if (Name == "__strcpy_chk" || Name == "__stpcpy_chk") {
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 3 ||
+ FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
+ FT->getParamType(2) != TD->getIntPtrType(Context))
+ return 0;
+
+
+ // If a) we don't have any length information, or b) we know this will
+ // fit then just lower to a plain st[rp]cpy. Otherwise we'll keep our
+ // st[rp]cpy_chk call which may fail at runtime if the size is too long.
+ // TODO: It might be nice to get a maximum length out of the possible
+ // string lengths for varying.
+ if (isFoldable(2, 1, true)) {
+ Value *Ret = EmitStrCpy(CI->getArgOperand(0), CI->getArgOperand(1), B, TD,
+ TLI, Name.substr(2, 6));
+ if (!Ret)
+ return false;
+ replaceCall(Ret);
+ return true;
+ }
+ return false;
+ }
+
+ if (Name == "__strncpy_chk" || Name == "__stpncpy_chk") {
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
+ !FT->getParamType(2)->isIntegerTy() ||
+ FT->getParamType(3) != TD->getIntPtrType(Context))
+ return false;
+
+ if (isFoldable(3, 2, false)) {
+ Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), B, TD, TLI,
+ Name.substr(2, 7));
+ if (!Ret)
+ return false;
+ replaceCall(Ret);
+ return true;
+ }
+ return false;
+ }
+
+ if (Name == "__strcat_chk") {
+ return false;
+ }
+
+ if (Name == "__strncat_chk") {
+ return false;
+ }
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp b/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
new file mode 100644
index 000000000000..1f517d038d19
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -0,0 +1,262 @@
+//===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains an optimization for div and rem on architectures that
+// execute short instructions significantly faster than longer instructions.
+// For example, on Intel Atom 32-bit divides are slow enough that during
+// runtime it is profitable to check the value of the operands, and if they are
+// positive and less than 256 use an unsigned 8-bit divide.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "bypass-slow-division"
+#include "llvm/Transforms/Utils/BypassSlowDivision.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+
+using namespace llvm;
+
+namespace {
+ struct DivOpInfo {
+ bool SignedOp;
+ Value *Dividend;
+ Value *Divisor;
+
+ DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor)
+ : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
+ };
+
+ struct DivPhiNodes {
+ PHINode *Quotient;
+ PHINode *Remainder;
+
+ DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder)
+ : Quotient(InQuotient), Remainder(InRemainder) {}
+ };
+}
+
+namespace llvm {
+ template<>
+ struct DenseMapInfo<DivOpInfo> {
+ static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) {
+ return Val1.SignedOp == Val2.SignedOp &&
+ Val1.Dividend == Val2.Dividend &&
+ Val1.Divisor == Val2.Divisor;
+ }
+
+ static DivOpInfo getEmptyKey() {
+ return DivOpInfo(false, 0, 0);
+ }
+
+ static DivOpInfo getTombstoneKey() {
+ return DivOpInfo(true, 0, 0);
+ }
+
+ static unsigned getHashValue(const DivOpInfo &Val) {
+ return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^
+ reinterpret_cast<uintptr_t>(Val.Divisor)) ^
+ (unsigned)Val.SignedOp;
+ }
+ };
+
+ typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy;
+}
+
+// insertFastDiv - Substitutes the div/rem instruction with code that checks the
+// value of the operands and uses a shorter-faster div/rem instruction when
+// possible and the longer-slower div/rem instruction otherwise.
+static bool insertFastDiv(Function &F,
+ Function::iterator &I,
+ BasicBlock::iterator &J,
+ IntegerType *BypassType,
+ bool UseDivOp,
+ bool UseSignedOp,
+ DivCacheTy &PerBBDivCache) {
+ // Get instruction operands
+ Instruction *Instr = J;
+ Value *Dividend = Instr->getOperand(0);
+ Value *Divisor = Instr->getOperand(1);
+
+ if (isa<ConstantInt>(Divisor) ||
+ (isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) {
+ // Operations with immediate values should have
+ // been solved and replaced during compile time.
+ return false;
+ }
+
+ // Basic Block is split before divide
+ BasicBlock *MainBB = I;
+ BasicBlock *SuccessorBB = I->splitBasicBlock(J);
+ ++I; //advance iterator I to successorBB
+
+ // Add new basic block for slow divide operation
+ BasicBlock *SlowBB = BasicBlock::Create(F.getContext(), "",
+ MainBB->getParent(), SuccessorBB);
+ SlowBB->moveBefore(SuccessorBB);
+ IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin());
+ Value *SlowQuotientV;
+ Value *SlowRemainderV;
+ if (UseSignedOp) {
+ SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor);
+ SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor);
+ } else {
+ SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor);
+ SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor);
+ }
+ SlowBuilder.CreateBr(SuccessorBB);
+
+ // Add new basic block for fast divide operation
+ BasicBlock *FastBB = BasicBlock::Create(F.getContext(), "",
+ MainBB->getParent(), SuccessorBB);
+ FastBB->moveBefore(SlowBB);
+ IRBuilder<> FastBuilder(FastBB, FastBB->begin());
+ Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor,
+ BypassType);
+ Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend,
+ BypassType);
+
+ // udiv/urem because optimization only handles positive numbers
+ Value *ShortQuotientV = FastBuilder.CreateExactUDiv(ShortDividendV,
+ ShortDivisorV);
+ Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV,
+ ShortDivisorV);
+ Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt,
+ ShortQuotientV,
+ Dividend->getType());
+ Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt,
+ ShortRemainderV,
+ Dividend->getType());
+ FastBuilder.CreateBr(SuccessorBB);
+
+ // Phi nodes for result of div and rem
+ IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin());
+ PHINode *QuoPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
+ QuoPhi->addIncoming(SlowQuotientV, SlowBB);
+ QuoPhi->addIncoming(FastQuotientV, FastBB);
+ PHINode *RemPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
+ RemPhi->addIncoming(SlowRemainderV, SlowBB);
+ RemPhi->addIncoming(FastRemainderV, FastBB);
+
+ // Replace Instr with appropriate phi node
+ if (UseDivOp)
+ Instr->replaceAllUsesWith(QuoPhi);
+ else
+ Instr->replaceAllUsesWith(RemPhi);
+ Instr->eraseFromParent();
+
+ // Combine operands into a single value with OR for value testing below
+ MainBB->getInstList().back().eraseFromParent();
+ IRBuilder<> MainBuilder(MainBB, MainBB->end());
+ Value *OrV = MainBuilder.CreateOr(Dividend, Divisor);
+
+ // BitMask is inverted to check if the operands are
+ // larger than the bypass type
+ uint64_t BitMask = ~BypassType->getBitMask();
+ Value *AndV = MainBuilder.CreateAnd(OrV, BitMask);
+
+ // Compare operand values and branch
+ Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0);
+ Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
+ MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
+
+ // point iterator J at first instruction of successorBB
+ J = I->begin();
+
+ // Cache phi nodes to be used later in place of other instances
+ // of div or rem with the same sign, dividend, and divisor
+ DivOpInfo Key(UseSignedOp, Dividend, Divisor);
+ DivPhiNodes Value(QuoPhi, RemPhi);
+ PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value));
+ return true;
+}
+
+// reuseOrInsertFastDiv - Reuses previously computed dividend or remainder if
+// operands and operation are identical. Otherwise call insertFastDiv to perform
+// the optimization and cache the resulting dividend and remainder.
+static bool reuseOrInsertFastDiv(Function &F,
+ Function::iterator &I,
+ BasicBlock::iterator &J,
+ IntegerType *BypassType,
+ bool UseDivOp,
+ bool UseSignedOp,
+ DivCacheTy &PerBBDivCache) {
+ // Get instruction operands
+ Instruction *Instr = J;
+ DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1));
+ DivCacheTy::iterator CacheI = PerBBDivCache.find(Key);
+
+ if (CacheI == PerBBDivCache.end()) {
+ // If previous instance does not exist, insert fast div
+ return insertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp,
+ PerBBDivCache);
+ }
+
+ // Replace operation value with previously generated phi node
+ DivPhiNodes &Value = CacheI->second;
+ if (UseDivOp) {
+ // Replace all uses of div instruction with quotient phi node
+ J->replaceAllUsesWith(Value.Quotient);
+ } else {
+ // Replace all uses of rem instruction with remainder phi node
+ J->replaceAllUsesWith(Value.Remainder);
+ }
+
+ // Advance to next operation
+ ++J;
+
+ // Remove redundant operation
+ Instr->eraseFromParent();
+ return true;
+}
+
+// bypassSlowDivision - This optimization identifies DIV instructions that can
+// be profitably bypassed and carried out with a shorter, faster divide.
+bool llvm::bypassSlowDivision(Function &F,
+ Function::iterator &I,
+ const DenseMap<unsigned int, unsigned int> &BypassWidths) {
+ DivCacheTy DivCache;
+
+ bool MadeChange = false;
+ for (BasicBlock::iterator J = I->begin(); J != I->end(); J++) {
+
+ // Get instruction details
+ unsigned Opcode = J->getOpcode();
+ bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
+ bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem;
+ bool UseSignedOp = Opcode == Instruction::SDiv ||
+ Opcode == Instruction::SRem;
+
+ // Only optimize div or rem ops
+ if (!UseDivOp && !UseRemOp)
+ continue;
+
+ // Skip division on vector types, only optimize integer instructions
+ if (!J->getType()->isIntegerTy())
+ continue;
+
+ // Get bitwidth of div/rem instruction
+ IntegerType *T = cast<IntegerType>(J->getType());
+ unsigned int bitwidth = T->getBitWidth();
+
+ // Continue if bitwidth is not bypassed
+ DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth);
+ if (BI == BypassWidths.end())
+ continue;
+
+ // Get type for div/rem instruction with bypass bitwidth
+ IntegerType *BT = IntegerType::get(J->getContext(), BI->second);
+
+ MadeChange |= reuseOrInsertFastDiv(F, I, J, BT, UseDivOp,
+ UseSignedOp, DivCache);
+ }
+
+ return MadeChange;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
new file mode 100644
index 000000000000..d105f5e24a2b
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -0,0 +1,574 @@
+//===- CloneFunction.cpp - Clone a function into another function ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CloneFunctionInto interface, which is used as the
+// low-level function cloner. This is used by the CloneFunction and function
+// inliner to do the dirty work of copying the body of a function around.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include <map>
+using namespace llvm;
+
+// CloneBasicBlock - See comments in Cloning.h
+BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB,
+ ValueToValueMapTy &VMap,
+ const Twine &NameSuffix, Function *F,
+ ClonedCodeInfo *CodeInfo) {
+ BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F);
+ if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);
+
+ bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;
+
+ // Loop over all instructions, and copy them over.
+ for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end();
+ II != IE; ++II) {
+ Instruction *NewInst = II->clone();
+ if (II->hasName())
+ NewInst->setName(II->getName()+NameSuffix);
+ NewBB->getInstList().push_back(NewInst);
+ VMap[II] = NewInst; // Add instruction map to value.
+
+ hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
+ if (isa<ConstantInt>(AI->getArraySize()))
+ hasStaticAllocas = true;
+ else
+ hasDynamicAllocas = true;
+ }
+ }
+
+ if (CodeInfo) {
+ CodeInfo->ContainsCalls |= hasCalls;
+ CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas;
+ CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas &&
+ BB != &BB->getParent()->getEntryBlock();
+ }
+ return NewBB;
+}
+
+// Clone OldFunc into NewFunc, transforming the old arguments into references to
+// VMap values.
+//
+void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
+ ValueToValueMapTy &VMap,
+ bool ModuleLevelChanges,
+ SmallVectorImpl<ReturnInst*> &Returns,
+ const char *NameSuffix, ClonedCodeInfo *CodeInfo,
+ ValueMapTypeRemapper *TypeMapper,
+ ValueMaterializer *Materializer) {
+ assert(NameSuffix && "NameSuffix cannot be null!");
+
+#ifndef NDEBUG
+ for (Function::const_arg_iterator I = OldFunc->arg_begin(),
+ E = OldFunc->arg_end(); I != E; ++I)
+ assert(VMap.count(I) && "No mapping from source argument specified!");
+#endif
+
+ AttributeSet OldAttrs = OldFunc->getAttributes();
+ // Clone any argument attributes that are present in the VMap.
+ for (Function::const_arg_iterator I = OldFunc->arg_begin(),
+ E = OldFunc->arg_end();
+ I != E; ++I)
+ if (Argument *Anew = dyn_cast<Argument>(VMap[I])) {
+ AttributeSet attrs =
+ OldAttrs.getParamAttributes(I->getArgNo() + 1);
+ if (attrs.getNumSlots() > 0)
+ Anew->addAttr(attrs);
+ }
+
+ NewFunc->setAttributes(NewFunc->getAttributes()
+ .addAttributes(NewFunc->getContext(),
+ AttributeSet::ReturnIndex,
+ OldAttrs.getRetAttributes()));
+ NewFunc->setAttributes(NewFunc->getAttributes()
+ .addAttributes(NewFunc->getContext(),
+ AttributeSet::FunctionIndex,
+ OldAttrs.getFnAttributes()));
+
+ // Loop over all of the basic blocks in the function, cloning them as
+ // appropriate. Note that we save BE this way in order to handle cloning of
+ // recursive functions into themselves.
+ //
+ for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end();
+ BI != BE; ++BI) {
+ const BasicBlock &BB = *BI;
+
+ // Create a new basic block and copy instructions into it!
+ BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo);
+
+ // Add basic block mapping.
+ VMap[&BB] = CBB;
+
+ // It is only legal to clone a function if a block address within that
+ // function is never referenced outside of the function. Given that, we
+ // want to map block addresses from the old function to block addresses in
+ // the clone. (This is different from the generic ValueMapper
+ // implementation, which generates an invalid blockaddress when
+ // cloning a function.)
+ if (BB.hasAddressTaken()) {
+ Constant *OldBBAddr = BlockAddress::get(const_cast<Function*>(OldFunc),
+ const_cast<BasicBlock*>(&BB));
+ VMap[OldBBAddr] = BlockAddress::get(NewFunc, CBB);
+ }
+
+ // Note return instructions for the caller.
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(CBB->getTerminator()))
+ Returns.push_back(RI);
+ }
+
+ // Loop over all of the instructions in the function, fixing up operand
+ // references as we go. This uses VMap to do all the hard work.
+ for (Function::iterator BB = cast<BasicBlock>(VMap[OldFunc->begin()]),
+ BE = NewFunc->end(); BB != BE; ++BB)
+ // Loop over all instructions, fixing each one as we find it...
+ for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II)
+ RemapInstruction(II, VMap,
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
+ TypeMapper, Materializer);
+}
+
+/// CloneFunction - Return a copy of the specified function, but without
+/// embedding the function into another module. Also, any references specified
+/// in the VMap are changed to refer to their mapped value instead of the
+/// original one. If any of the arguments to the function are in the VMap,
+/// the arguments are deleted from the resultant function. The VMap is
+/// updated to include mappings from all of the instructions and basicblocks in
+/// the function from their old to new values.
+///
+Function *llvm::CloneFunction(const Function *F, ValueToValueMapTy &VMap,
+ bool ModuleLevelChanges,
+ ClonedCodeInfo *CodeInfo) {
+ std::vector<Type*> ArgTypes;
+
+ // The user might be deleting arguments to the function by specifying them in
+ // the VMap. If so, we need to not add the arguments to the arg ty vector
+ //
+ for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I)
+ if (VMap.count(I) == 0) // Haven't mapped the argument to anything yet?
+ ArgTypes.push_back(I->getType());
+
+ // Create a new function type...
+ FunctionType *FTy = FunctionType::get(F->getFunctionType()->getReturnType(),
+ ArgTypes, F->getFunctionType()->isVarArg());
+
+ // Create the new function...
+ Function *NewF = Function::Create(FTy, F->getLinkage(), F->getName());
+
+ // Loop over the arguments, copying the names of the mapped arguments over...
+ Function::arg_iterator DestI = NewF->arg_begin();
+ for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I)
+ if (VMap.count(I) == 0) { // Is this argument preserved?
+ DestI->setName(I->getName()); // Copy the name over...
+ VMap[I] = DestI++; // Add mapping to VMap
+ }
+
+ SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned.
+ CloneFunctionInto(NewF, F, VMap, ModuleLevelChanges, Returns, "", CodeInfo);
+ return NewF;
+}
+
+
+
+namespace {
+ /// PruningFunctionCloner - This class is a private class used to implement
+ /// the CloneAndPruneFunctionInto method.
+ struct PruningFunctionCloner {
+ Function *NewFunc;
+ const Function *OldFunc;
+ ValueToValueMapTy &VMap;
+ bool ModuleLevelChanges;
+ const char *NameSuffix;
+ ClonedCodeInfo *CodeInfo;
+ const DataLayout *TD;
+ public:
+ PruningFunctionCloner(Function *newFunc, const Function *oldFunc,
+ ValueToValueMapTy &valueMap,
+ bool moduleLevelChanges,
+ const char *nameSuffix,
+ ClonedCodeInfo *codeInfo,
+ const DataLayout *td)
+ : NewFunc(newFunc), OldFunc(oldFunc),
+ VMap(valueMap), ModuleLevelChanges(moduleLevelChanges),
+ NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) {
+ }
+
+ /// CloneBlock - The specified block is found to be reachable, clone it and
+ /// anything that it can reach.
+ void CloneBlock(const BasicBlock *BB,
+ std::vector<const BasicBlock*> &ToClone);
+ };
+}
+
+/// CloneBlock - The specified block is found to be reachable, clone it and
+/// anything that it can reach.
+void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
+ std::vector<const BasicBlock*> &ToClone){
+ WeakVH &BBEntry = VMap[BB];
+
+ // Have we already cloned this block?
+ if (BBEntry) return;
+
+ // Nope, clone it now.
+ BasicBlock *NewBB;
+ BBEntry = NewBB = BasicBlock::Create(BB->getContext());
+ if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);
+
+ // It is only legal to clone a function if a block address within that
+ // function is never referenced outside of the function. Given that, we
+ // want to map block addresses from the old function to block addresses in
+ // the clone. (This is different from the generic ValueMapper
+ // implementation, which generates an invalid blockaddress when
+ // cloning a function.)
+ //
+ // Note that we don't need to fix the mapping for unreachable blocks;
+ // the default mapping there is safe.
+ if (BB->hasAddressTaken()) {
+ Constant *OldBBAddr = BlockAddress::get(const_cast<Function*>(OldFunc),
+ const_cast<BasicBlock*>(BB));
+ VMap[OldBBAddr] = BlockAddress::get(NewFunc, NewBB);
+ }
+
+
+ bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;
+
+ // Loop over all instructions, and copy them over, DCE'ing as we go. This
+ // loop doesn't include the terminator.
+ for (BasicBlock::const_iterator II = BB->begin(), IE = --BB->end();
+ II != IE; ++II) {
+ Instruction *NewInst = II->clone();
+
+ // Eagerly remap operands to the newly cloned instruction, except for PHI
+ // nodes for which we defer processing until we update the CFG.
+ if (!isa<PHINode>(NewInst)) {
+ RemapInstruction(NewInst, VMap,
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
+
+ // If we can simplify this instruction to some other value, simply add
+ // a mapping to that value rather than inserting a new instruction into
+ // the basic block.
+ if (Value *V = SimplifyInstruction(NewInst, TD)) {
+ // On the off-chance that this simplifies to an instruction in the old
+ // function, map it back into the new function.
+ if (Value *MappedV = VMap.lookup(V))
+ V = MappedV;
+
+ VMap[II] = V;
+ delete NewInst;
+ continue;
+ }
+ }
+
+ if (II->hasName())
+ NewInst->setName(II->getName()+NameSuffix);
+ VMap[II] = NewInst; // Add instruction map to value.
+ NewBB->getInstList().push_back(NewInst);
+ hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
+ if (isa<ConstantInt>(AI->getArraySize()))
+ hasStaticAllocas = true;
+ else
+ hasDynamicAllocas = true;
+ }
+ }
+
+ // Finally, clone over the terminator.
+ const TerminatorInst *OldTI = BB->getTerminator();
+ bool TerminatorDone = false;
+ if (const BranchInst *BI = dyn_cast<BranchInst>(OldTI)) {
+ if (BI->isConditional()) {
+ // If the condition was a known constant in the callee...
+ ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
+ // Or is a known constant in the caller...
+ if (Cond == 0) {
+ Value *V = VMap[BI->getCondition()];
+ Cond = dyn_cast_or_null<ConstantInt>(V);
+ }
+
+ // Constant fold to uncond branch!
+ if (Cond) {
+ BasicBlock *Dest = BI->getSuccessor(!Cond->getZExtValue());
+ VMap[OldTI] = BranchInst::Create(Dest, NewBB);
+ ToClone.push_back(Dest);
+ TerminatorDone = true;
+ }
+ }
+ } else if (const SwitchInst *SI = dyn_cast<SwitchInst>(OldTI)) {
+ // If switching on a value known constant in the caller.
+ ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition());
+ if (Cond == 0) { // Or known constant after constant prop in the callee...
+ Value *V = VMap[SI->getCondition()];
+ Cond = dyn_cast_or_null<ConstantInt>(V);
+ }
+ if (Cond) { // Constant fold to uncond branch!
+ SwitchInst::ConstCaseIt Case = SI->findCaseValue(Cond);
+ BasicBlock *Dest = const_cast<BasicBlock*>(Case.getCaseSuccessor());
+ VMap[OldTI] = BranchInst::Create(Dest, NewBB);
+ ToClone.push_back(Dest);
+ TerminatorDone = true;
+ }
+ }
+
+ if (!TerminatorDone) {
+ Instruction *NewInst = OldTI->clone();
+ if (OldTI->hasName())
+ NewInst->setName(OldTI->getName()+NameSuffix);
+ NewBB->getInstList().push_back(NewInst);
+ VMap[OldTI] = NewInst; // Add instruction map to value.
+
+ // Recursively clone any reachable successor blocks.
+ const TerminatorInst *TI = BB->getTerminator();
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ ToClone.push_back(TI->getSuccessor(i));
+ }
+
+ if (CodeInfo) {
+ CodeInfo->ContainsCalls |= hasCalls;
+ CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas;
+ CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas &&
+ BB != &BB->getParent()->front();
+ }
+}
+
+/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto,
+/// except that it does some simple constant prop and DCE on the fly. The
+/// effect of this is to copy significantly less code in cases where (for
+/// example) a function call with constant arguments is inlined, and those
+/// constant arguments cause a significant amount of code in the callee to be
+/// dead. Since this doesn't produce an exact copy of the input, it can't be
+/// used for things like CloneFunction or CloneModule.
+void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
+ ValueToValueMapTy &VMap,
+ bool ModuleLevelChanges,
+ SmallVectorImpl<ReturnInst*> &Returns,
+ const char *NameSuffix,
+ ClonedCodeInfo *CodeInfo,
+ const DataLayout *TD,
+ Instruction *TheCall) {
+ assert(NameSuffix && "NameSuffix cannot be null!");
+
+#ifndef NDEBUG
+ for (Function::const_arg_iterator II = OldFunc->arg_begin(),
+ E = OldFunc->arg_end(); II != E; ++II)
+ assert(VMap.count(II) && "No mapping from source argument specified!");
+#endif
+
+ PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges,
+ NameSuffix, CodeInfo, TD);
+
+ // Clone the entry block, and anything recursively reachable from it.
+ std::vector<const BasicBlock*> CloneWorklist;
+ CloneWorklist.push_back(&OldFunc->getEntryBlock());
+ while (!CloneWorklist.empty()) {
+ const BasicBlock *BB = CloneWorklist.back();
+ CloneWorklist.pop_back();
+ PFC.CloneBlock(BB, CloneWorklist);
+ }
+
+ // Loop over all of the basic blocks in the old function. If the block was
+ // reachable, we have cloned it and the old block is now in the value map:
+ // insert it into the new function in the right order. If not, ignore it.
+ //
+ // Defer PHI resolution until rest of function is resolved.
+ SmallVector<const PHINode*, 16> PHIToResolve;
+ for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end();
+ BI != BE; ++BI) {
+ Value *V = VMap[BI];
+ BasicBlock *NewBB = cast_or_null<BasicBlock>(V);
+ if (NewBB == 0) continue; // Dead block.
+
+ // Add the new block to the new function.
+ NewFunc->getBasicBlockList().push_back(NewBB);
+
+ // Handle PHI nodes specially, as we have to remove references to dead
+ // blocks.
+ for (BasicBlock::const_iterator I = BI->begin(), E = BI->end(); I != E; ++I)
+ if (const PHINode *PN = dyn_cast<PHINode>(I))
+ PHIToResolve.push_back(PN);
+ else
+ break;
+
+ // Finally, remap the terminator instructions, as those can't be remapped
+ // until all BBs are mapped.
+ RemapInstruction(NewBB->getTerminator(), VMap,
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
+ }
+
+ // Defer PHI resolution until rest of function is resolved, PHI resolution
+ // requires the CFG to be up-to-date.
+ for (unsigned phino = 0, e = PHIToResolve.size(); phino != e; ) {
+ const PHINode *OPN = PHIToResolve[phino];
+ unsigned NumPreds = OPN->getNumIncomingValues();
+ const BasicBlock *OldBB = OPN->getParent();
+ BasicBlock *NewBB = cast<BasicBlock>(VMap[OldBB]);
+
+ // Map operands for blocks that are live and remove operands for blocks
+ // that are dead.
+ for (; phino != PHIToResolve.size() &&
+ PHIToResolve[phino]->getParent() == OldBB; ++phino) {
+ OPN = PHIToResolve[phino];
+ PHINode *PN = cast<PHINode>(VMap[OPN]);
+ for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) {
+ Value *V = VMap[PN->getIncomingBlock(pred)];
+ if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(V)) {
+ Value *InVal = MapValue(PN->getIncomingValue(pred),
+ VMap,
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
+ assert(InVal && "Unknown input value?");
+ PN->setIncomingValue(pred, InVal);
+ PN->setIncomingBlock(pred, MappedBlock);
+ } else {
+ PN->removeIncomingValue(pred, false);
+ --pred, --e; // Revisit the next entry.
+ }
+ }
+ }
+
+ // The loop above has removed PHI entries for those blocks that are dead
+ // and has updated others. However, if a block is live (i.e. copied over)
+ // but its terminator has been changed to not go to this block, then our
+ // phi nodes will have invalid entries. Update the PHI nodes in this
+ // case.
+ PHINode *PN = cast<PHINode>(NewBB->begin());
+ NumPreds = std::distance(pred_begin(NewBB), pred_end(NewBB));
+ if (NumPreds != PN->getNumIncomingValues()) {
+ assert(NumPreds < PN->getNumIncomingValues());
+ // Count how many times each predecessor comes to this block.
+ std::map<BasicBlock*, unsigned> PredCount;
+ for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB);
+ PI != E; ++PI)
+ --PredCount[*PI];
+
+ // Figure out how many entries to remove from each PHI.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ ++PredCount[PN->getIncomingBlock(i)];
+
+ // At this point, the excess predecessor entries are positive in the
+ // map. Loop over all of the PHIs and remove excess predecessor
+ // entries.
+ BasicBlock::iterator I = NewBB->begin();
+ for (; (PN = dyn_cast<PHINode>(I)); ++I) {
+ for (std::map<BasicBlock*, unsigned>::iterator PCI =PredCount.begin(),
+ E = PredCount.end(); PCI != E; ++PCI) {
+ BasicBlock *Pred = PCI->first;
+ for (unsigned NumToRemove = PCI->second; NumToRemove; --NumToRemove)
+ PN->removeIncomingValue(Pred, false);
+ }
+ }
+ }
+
+ // If the loops above have made these phi nodes have 0 or 1 operand,
+ // replace them with undef or the input value. We must do this for
+ // correctness, because 0-operand phis are not valid.
+ PN = cast<PHINode>(NewBB->begin());
+ if (PN->getNumIncomingValues() == 0) {
+ BasicBlock::iterator I = NewBB->begin();
+ BasicBlock::const_iterator OldI = OldBB->begin();
+ while ((PN = dyn_cast<PHINode>(I++))) {
+ Value *NV = UndefValue::get(PN->getType());
+ PN->replaceAllUsesWith(NV);
+ assert(VMap[OldI] == PN && "VMap mismatch");
+ VMap[OldI] = NV;
+ PN->eraseFromParent();
+ ++OldI;
+ }
+ }
+ }
+
+ // Make a second pass over the PHINodes now that all of them have been
+ // remapped into the new function, simplifying the PHINode and performing any
+ // recursive simplifications exposed. This will transparently update the
+ // WeakVH in the VMap. Notably, we rely on that so that if we coalesce
+ // two PHINodes, the iteration over the old PHIs remains valid, and the
+ // mapping will just map us to the new node (which may not even be a PHI
+ // node).
+ for (unsigned Idx = 0, Size = PHIToResolve.size(); Idx != Size; ++Idx)
+ if (PHINode *PN = dyn_cast<PHINode>(VMap[PHIToResolve[Idx]]))
+ recursivelySimplifyInstruction(PN, TD);
+
+ // Now that the inlined function body has been fully constructed, go through
+ // and zap unconditional fall-through branches. This happen all the time when
+ // specializing code: code specialization turns conditional branches into
+ // uncond branches, and this code folds them.
+ Function::iterator Begin = cast<BasicBlock>(VMap[&OldFunc->getEntryBlock()]);
+ Function::iterator I = Begin;
+ while (I != NewFunc->end()) {
+ // Check if this block has become dead during inlining or other
+ // simplifications. Note that the first block will appear dead, as it has
+ // not yet been wired up properly.
+ if (I != Begin && (pred_begin(I) == pred_end(I) ||
+ I->getSinglePredecessor() == I)) {
+ BasicBlock *DeadBB = I++;
+ DeleteDeadBlock(DeadBB);
+ continue;
+ }
+
+ // We need to simplify conditional branches and switches with a constant
+ // operand. We try to prune these out when cloning, but if the
+ // simplification required looking through PHI nodes, those are only
+ // available after forming the full basic block. That may leave some here,
+ // and we still want to prune the dead code as early as possible.
+ ConstantFoldTerminator(I);
+
+ BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator());
+ if (!BI || BI->isConditional()) { ++I; continue; }
+
+ BasicBlock *Dest = BI->getSuccessor(0);
+ if (!Dest->getSinglePredecessor()) {
+ ++I; continue;
+ }
+
+ // We shouldn't be able to get single-entry PHI nodes here, as instsimplify
+ // above should have zapped all of them..
+ assert(!isa<PHINode>(Dest->begin()));
+
+ // We know all single-entry PHI nodes in the inlined function have been
+ // removed, so we just need to splice the blocks.
+ BI->eraseFromParent();
+
+ // Make all PHI nodes that referred to Dest now refer to I as their source.
+ Dest->replaceAllUsesWith(I);
+
+ // Move all the instructions in the succ to the pred.
+ I->getInstList().splice(I->end(), Dest->getInstList());
+
+ // Remove the dest block.
+ Dest->eraseFromParent();
+
+ // Do not increment I, iteratively merge all things this block branches to.
+ }
+
+ // Make a final pass over the basic blocks from theh old function to gather
+ // any return instructions which survived folding. We have to do this here
+ // because we can iteratively remove and merge returns above.
+ for (Function::iterator I = cast<BasicBlock>(VMap[&OldFunc->getEntryBlock()]),
+ E = NewFunc->end();
+ I != E; ++I)
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator()))
+ Returns.push_back(RI);
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
new file mode 100644
index 000000000000..64df089e1b81
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
@@ -0,0 +1,122 @@
+//===- CloneModule.cpp - Clone an entire module ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CloneModule interface which makes a copy of an
+// entire module.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+using namespace llvm;
+
+/// CloneModule - Return an exact copy of the specified module. This is not as
+/// easy as it might seem because we have to worry about making copies of global
+/// variables and functions, and making their (initializers and references,
+/// respectively) refer to the right globals.
+///
+Module *llvm::CloneModule(const Module *M) {
+ // Create the value map that maps things from the old module over to the new
+ // module.
+ ValueToValueMapTy VMap;
+ return CloneModule(M, VMap);
+}
+
+Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
+ // First off, we need to create the new module.
+ Module *New = new Module(M->getModuleIdentifier(), M->getContext());
+ New->setDataLayout(M->getDataLayout());
+ New->setTargetTriple(M->getTargetTriple());
+ New->setModuleInlineAsm(M->getModuleInlineAsm());
+
+ // Loop over all of the global variables, making corresponding globals in the
+ // new module. Here we add them to the VMap and to the new Module. We
+ // don't worry about attributes or initializers, they will come later.
+ //
+ for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
+ I != E; ++I) {
+ GlobalVariable *GV = new GlobalVariable(*New,
+ I->getType()->getElementType(),
+ I->isConstant(), I->getLinkage(),
+ (Constant*) 0, I->getName(),
+ (GlobalVariable*) 0,
+ I->getThreadLocalMode(),
+ I->getType()->getAddressSpace());
+ GV->copyAttributesFrom(I);
+ VMap[I] = GV;
+ }
+
+ // Loop over the functions in the module, making external functions as before
+ for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {
+ Function *NF =
+ Function::Create(cast<FunctionType>(I->getType()->getElementType()),
+ I->getLinkage(), I->getName(), New);
+ NF->copyAttributesFrom(I);
+ VMap[I] = NF;
+ }
+
+ // Loop over the aliases in the module
+ for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
+ I != E; ++I) {
+ GlobalAlias *GA = new GlobalAlias(I->getType(), I->getLinkage(),
+ I->getName(), NULL, New);
+ GA->copyAttributesFrom(I);
+ VMap[I] = GA;
+ }
+
+ // Now that all of the things that global variable initializer can refer to
+ // have been created, loop through and copy the global variable referrers
+ // over... We also set the attributes on the global now.
+ //
+ for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
+ I != E; ++I) {
+ GlobalVariable *GV = cast<GlobalVariable>(VMap[I]);
+ if (I->hasInitializer())
+ GV->setInitializer(MapValue(I->getInitializer(), VMap));
+ }
+
+ // Similarly, copy over function bodies now...
+ //
+ for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {
+ Function *F = cast<Function>(VMap[I]);
+ if (!I->isDeclaration()) {
+ Function::arg_iterator DestI = F->arg_begin();
+ for (Function::const_arg_iterator J = I->arg_begin(); J != I->arg_end();
+ ++J) {
+ DestI->setName(J->getName());
+ VMap[J] = DestI++;
+ }
+
+ SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned.
+ CloneFunctionInto(F, I, VMap, /*ModuleLevelChanges=*/true, Returns);
+ }
+ }
+
+ // And aliases
+ for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
+ I != E; ++I) {
+ GlobalAlias *GA = cast<GlobalAlias>(VMap[I]);
+ if (const Constant *C = I->getAliasee())
+ GA->setAliasee(MapValue(C, VMap));
+ }
+
+ // And named metadata....
+ for (Module::const_named_metadata_iterator I = M->named_metadata_begin(),
+ E = M->named_metadata_end(); I != E; ++I) {
+ const NamedMDNode &NMD = *I;
+ NamedMDNode *NewNMD = New->getOrInsertNamedMetadata(NMD.getName());
+ for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i)
+ NewNMD->addOperand(MapValue(NMD.getOperand(i), VMap));
+ }
+
+ return New;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/CmpInstAnalysis.cpp b/contrib/llvm/lib/Transforms/Utils/CmpInstAnalysis.cpp
new file mode 100644
index 000000000000..8fa412a18b99
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/CmpInstAnalysis.cpp
@@ -0,0 +1,96 @@
+//===- CmpInstAnalysis.cpp - Utils to help fold compares ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file holds routines to help analyse compare instructions
+// and fold them into constants or other compare instructions
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/CmpInstAnalysis.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+
+using namespace llvm;
+
+/// getICmpCode - Encode a icmp predicate into a three bit mask. These bits
+/// are carefully arranged to allow folding of expressions such as:
+///
+/// (A < B) | (A > B) --> (A != B)
+///
+/// Note that this is only valid if the first and second predicates have the
+/// same sign. Is illegal to do: (A u< B) | (A s> B)
+///
+/// Three bits are used to represent the condition, as follows:
+/// 0 A > B
+/// 1 A == B
+/// 2 A < B
+///
+/// <=> Value Definition
+/// 000 0 Always false
+/// 001 1 A > B
+/// 010 2 A == B
+/// 011 3 A >= B
+/// 100 4 A < B
+/// 101 5 A != B
+/// 110 6 A <= B
+/// 111 7 Always true
+///
+unsigned llvm::getICmpCode(const ICmpInst *ICI, bool InvertPred) {
+ ICmpInst::Predicate Pred = InvertPred ? ICI->getInversePredicate()
+ : ICI->getPredicate();
+ switch (Pred) {
+ // False -> 0
+ case ICmpInst::ICMP_UGT: return 1; // 001
+ case ICmpInst::ICMP_SGT: return 1; // 001
+ case ICmpInst::ICMP_EQ: return 2; // 010
+ case ICmpInst::ICMP_UGE: return 3; // 011
+ case ICmpInst::ICMP_SGE: return 3; // 011
+ case ICmpInst::ICMP_ULT: return 4; // 100
+ case ICmpInst::ICMP_SLT: return 4; // 100
+ case ICmpInst::ICMP_NE: return 5; // 101
+ case ICmpInst::ICMP_ULE: return 6; // 110
+ case ICmpInst::ICMP_SLE: return 6; // 110
+ // True -> 7
+ default:
+ llvm_unreachable("Invalid ICmp predicate!");
+ }
+}
+
+/// getICmpValue - This is the complement of getICmpCode, which turns an
+/// opcode and two operands into either a constant true or false, or the
+/// predicate for a new ICmp instruction. The sign is passed in to determine
+/// which kind of predicate to use in the new icmp instruction.
+/// Non-NULL return value will be a true or false constant.
+/// NULL return means a new ICmp is needed. The predicate for which is
+/// output in NewICmpPred.
+Value *llvm::getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS,
+ CmpInst::Predicate &NewICmpPred) {
+ switch (Code) {
+ default: llvm_unreachable("Illegal ICmp code!");
+ case 0: // False.
+ return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
+ case 1: NewICmpPred = Sign ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
+ case 2: NewICmpPred = ICmpInst::ICMP_EQ; break;
+ case 3: NewICmpPred = Sign ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
+ case 4: NewICmpPred = Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
+ case 5: NewICmpPred = ICmpInst::ICMP_NE; break;
+ case 6: NewICmpPred = Sign ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
+ case 7: // True.
+ return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1);
+ }
+ return NULL;
+}
+
+/// PredicatesFoldable - Return true if both predicates match sign or if at
+/// least one of them is an equality comparison (which is signless).
+bool llvm::PredicatesFoldable(ICmpInst::Predicate p1, ICmpInst::Predicate p2) {
+ return (CmpInst::isSigned(p1) == CmpInst::isSigned(p2)) ||
+ (CmpInst::isSigned(p1) && ICmpInst::isEquality(p2)) ||
+ (CmpInst::isSigned(p2) && ICmpInst::isEquality(p1));
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
new file mode 100644
index 000000000000..6f0086443693
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -0,0 +1,779 @@
+//===- CodeExtractor.cpp - Pull code region into a new function -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the interface to tear out a code region, such as an
+// individual loop or a parallel section, into a new function, replacing it with
+// a call to the new function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/CodeExtractor.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/RegionInfo.h"
+#include "llvm/Analysis/RegionIterator.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <algorithm>
+#include <set>
+using namespace llvm;
+
+// Provide a command-line option to aggregate function arguments into a struct
+// for functions produced by the code extractor. This is useful when converting
+// extracted functions to pthread-based code, as only one argument (void*) can
+// be passed in to pthread_create().
+static cl::opt<bool>
+AggregateArgsOpt("aggregate-extracted-args", cl::Hidden,
+ cl::desc("Aggregate arguments to code-extracted functions"));
+
+/// \brief Test whether a block is valid for extraction.
+static bool isBlockValidForExtraction(const BasicBlock &BB) {
+ // Landing pads must be in the function where they were inserted for cleanup.
+ if (BB.isLandingPad())
+ return false;
+
+ // Don't hoist code containing allocas, invokes, or vastarts.
+ for (BasicBlock::const_iterator I = BB.begin(), E = BB.end(); I != E; ++I) {
+ if (isa<AllocaInst>(I) || isa<InvokeInst>(I))
+ return false;
+ if (const CallInst *CI = dyn_cast<CallInst>(I))
+ if (const Function *F = CI->getCalledFunction())
+ if (F->getIntrinsicID() == Intrinsic::vastart)
+ return false;
+ }
+
+ return true;
+}
+
+/// \brief Build a set of blocks to extract if the input blocks are viable.
+template <typename IteratorT>
+static SetVector<BasicBlock *> buildExtractionBlockSet(IteratorT BBBegin,
+ IteratorT BBEnd) {
+ SetVector<BasicBlock *> Result;
+
+ assert(BBBegin != BBEnd);
+
+ // Loop over the blocks, adding them to our set-vector, and aborting with an
+ // empty set if we encounter invalid blocks.
+ for (IteratorT I = BBBegin, E = BBEnd; I != E; ++I) {
+ if (!Result.insert(*I))
+ llvm_unreachable("Repeated basic blocks in extraction input");
+
+ if (!isBlockValidForExtraction(**I)) {
+ Result.clear();
+ return Result;
+ }
+ }
+
+#ifndef NDEBUG
+ for (SetVector<BasicBlock *>::iterator I = llvm::next(Result.begin()),
+ E = Result.end();
+ I != E; ++I)
+ for (pred_iterator PI = pred_begin(*I), PE = pred_end(*I);
+ PI != PE; ++PI)
+ assert(Result.count(*PI) &&
+ "No blocks in this region may have entries from outside the region"
+ " except for the first block!");
+#endif
+
+ return Result;
+}
+
+/// \brief Helper to call buildExtractionBlockSet with an ArrayRef.
+static SetVector<BasicBlock *>
+buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs) {
+ return buildExtractionBlockSet(BBs.begin(), BBs.end());
+}
+
+/// \brief Helper to call buildExtractionBlockSet with a RegionNode.
+static SetVector<BasicBlock *>
+buildExtractionBlockSet(const RegionNode &RN) {
+ if (!RN.isSubRegion())
+ // Just a single BasicBlock.
+ return buildExtractionBlockSet(RN.getNodeAs<BasicBlock>());
+
+ const Region &R = *RN.getNodeAs<Region>();
+
+ return buildExtractionBlockSet(R.block_begin(), R.block_end());
+}
+
+CodeExtractor::CodeExtractor(BasicBlock *BB, bool AggregateArgs)
+ : DT(0), AggregateArgs(AggregateArgs||AggregateArgsOpt),
+ Blocks(buildExtractionBlockSet(BB)), NumExitBlocks(~0U) {}
+
+CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
+ bool AggregateArgs)
+ : DT(DT), AggregateArgs(AggregateArgs||AggregateArgsOpt),
+ Blocks(buildExtractionBlockSet(BBs)), NumExitBlocks(~0U) {}
+
+CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs)
+ : DT(&DT), AggregateArgs(AggregateArgs||AggregateArgsOpt),
+ Blocks(buildExtractionBlockSet(L.getBlocks())), NumExitBlocks(~0U) {}
+
+CodeExtractor::CodeExtractor(DominatorTree &DT, const RegionNode &RN,
+ bool AggregateArgs)
+ : DT(&DT), AggregateArgs(AggregateArgs||AggregateArgsOpt),
+ Blocks(buildExtractionBlockSet(RN)), NumExitBlocks(~0U) {}
+
+/// definedInRegion - Return true if the specified value is defined in the
+/// extracted region.
+static bool definedInRegion(const SetVector<BasicBlock *> &Blocks, Value *V) {
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ if (Blocks.count(I->getParent()))
+ return true;
+ return false;
+}
+
+/// definedInCaller - Return true if the specified value is defined in the
+/// function being code extracted, but not in the region being extracted.
+/// These values must be passed in as live-ins to the function.
+static bool definedInCaller(const SetVector<BasicBlock *> &Blocks, Value *V) {
+ if (isa<Argument>(V)) return true;
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ if (!Blocks.count(I->getParent()))
+ return true;
+ return false;
+}
+
+void CodeExtractor::findInputsOutputs(ValueSet &Inputs,
+ ValueSet &Outputs) const {
+ for (SetVector<BasicBlock *>::const_iterator I = Blocks.begin(),
+ E = Blocks.end();
+ I != E; ++I) {
+ BasicBlock *BB = *I;
+
+ // If a used value is defined outside the region, it's an input. If an
+ // instruction is used outside the region, it's an output.
+ for (BasicBlock::iterator II = BB->begin(), IE = BB->end();
+ II != IE; ++II) {
+ for (User::op_iterator OI = II->op_begin(), OE = II->op_end();
+ OI != OE; ++OI)
+ if (definedInCaller(Blocks, *OI))
+ Inputs.insert(*OI);
+
+ for (Value::use_iterator UI = II->use_begin(), UE = II->use_end();
+ UI != UE; ++UI)
+ if (!definedInRegion(Blocks, *UI)) {
+ Outputs.insert(II);
+ break;
+ }
+ }
+ }
+}
+
+/// severSplitPHINodes - If a PHI node has multiple inputs from outside of the
+/// region, we need to split the entry block of the region so that the PHI node
+/// is easier to deal with.
+void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
+ unsigned NumPredsFromRegion = 0;
+ unsigned NumPredsOutsideRegion = 0;
+
+ if (Header != &Header->getParent()->getEntryBlock()) {
+ PHINode *PN = dyn_cast<PHINode>(Header->begin());
+ if (!PN) return; // No PHI nodes.
+
+ // If the header node contains any PHI nodes, check to see if there is more
+ // than one entry from outside the region. If so, we need to sever the
+ // header block into two.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (Blocks.count(PN->getIncomingBlock(i)))
+ ++NumPredsFromRegion;
+ else
+ ++NumPredsOutsideRegion;
+
+ // If there is one (or fewer) predecessor from outside the region, we don't
+ // need to do anything special.
+ if (NumPredsOutsideRegion <= 1) return;
+ }
+
+ // Otherwise, we need to split the header block into two pieces: one
+ // containing PHI nodes merging values from outside of the region, and a
+ // second that contains all of the code for the block and merges back any
+ // incoming values from inside of the region.
+ BasicBlock::iterator AfterPHIs = Header->getFirstNonPHI();
+ BasicBlock *NewBB = Header->splitBasicBlock(AfterPHIs,
+ Header->getName()+".ce");
+
+ // We only want to code extract the second block now, and it becomes the new
+ // header of the region.
+ BasicBlock *OldPred = Header;
+ Blocks.remove(OldPred);
+ Blocks.insert(NewBB);
+ Header = NewBB;
+
+ // Okay, update dominator sets. The blocks that dominate the new one are the
+ // blocks that dominate TIBB plus the new block itself.
+ if (DT)
+ DT->splitBlock(NewBB);
+
+ // Okay, now we need to adjust the PHI nodes and any branches from within the
+ // region to go to the new header block instead of the old header block.
+ if (NumPredsFromRegion) {
+ PHINode *PN = cast<PHINode>(OldPred->begin());
+ // Loop over all of the predecessors of OldPred that are in the region,
+ // changing them to branch to NewBB instead.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (Blocks.count(PN->getIncomingBlock(i))) {
+ TerminatorInst *TI = PN->getIncomingBlock(i)->getTerminator();
+ TI->replaceUsesOfWith(OldPred, NewBB);
+ }
+
+ // Okay, everything within the region is now branching to the right block, we
+ // just have to update the PHI nodes now, inserting PHI nodes into NewBB.
+ for (AfterPHIs = OldPred->begin(); isa<PHINode>(AfterPHIs); ++AfterPHIs) {
+ PHINode *PN = cast<PHINode>(AfterPHIs);
+ // Create a new PHI node in the new region, which has an incoming value
+ // from OldPred of PN.
+ PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion,
+ PN->getName()+".ce", NewBB->begin());
+ NewPN->addIncoming(PN, OldPred);
+
+ // Loop over all of the incoming value in PN, moving them to NewPN if they
+ // are from the extracted region.
+ for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {
+ if (Blocks.count(PN->getIncomingBlock(i))) {
+ NewPN->addIncoming(PN->getIncomingValue(i), PN->getIncomingBlock(i));
+ PN->removeIncomingValue(i);
+ --i;
+ }
+ }
+ }
+ }
+}
+
+void CodeExtractor::splitReturnBlocks() {
+ for (SetVector<BasicBlock *>::iterator I = Blocks.begin(), E = Blocks.end();
+ I != E; ++I)
+ if (ReturnInst *RI = dyn_cast<ReturnInst>((*I)->getTerminator())) {
+ BasicBlock *New = (*I)->splitBasicBlock(RI, (*I)->getName()+".ret");
+ if (DT) {
+ // Old dominates New. New node dominates all other nodes dominated
+ // by Old.
+ DomTreeNode *OldNode = DT->getNode(*I);
+ SmallVector<DomTreeNode*, 8> Children;
+ for (DomTreeNode::iterator DI = OldNode->begin(), DE = OldNode->end();
+ DI != DE; ++DI)
+ Children.push_back(*DI);
+
+ DomTreeNode *NewNode = DT->addNewBlock(New, *I);
+
+ for (SmallVectorImpl<DomTreeNode *>::iterator I = Children.begin(),
+ E = Children.end(); I != E; ++I)
+ DT->changeImmediateDominator(*I, NewNode);
+ }
+ }
+}
+
+/// constructFunction - make a function based on inputs and outputs, as follows:
+/// f(in0, ..., inN, out0, ..., outN)
+///
+Function *CodeExtractor::constructFunction(const ValueSet &inputs,
+ const ValueSet &outputs,
+ BasicBlock *header,
+ BasicBlock *newRootNode,
+ BasicBlock *newHeader,
+ Function *oldFunction,
+ Module *M) {
+ DEBUG(dbgs() << "inputs: " << inputs.size() << "\n");
+ DEBUG(dbgs() << "outputs: " << outputs.size() << "\n");
+
+ // This function returns unsigned, outputs will go back by reference.
+ switch (NumExitBlocks) {
+ case 0:
+ case 1: RetTy = Type::getVoidTy(header->getContext()); break;
+ case 2: RetTy = Type::getInt1Ty(header->getContext()); break;
+ default: RetTy = Type::getInt16Ty(header->getContext()); break;
+ }
+
+ std::vector<Type*> paramTy;
+
+ // Add the types of the input values to the function's argument list
+ for (ValueSet::const_iterator i = inputs.begin(), e = inputs.end();
+ i != e; ++i) {
+ const Value *value = *i;
+ DEBUG(dbgs() << "value used in func: " << *value << "\n");
+ paramTy.push_back(value->getType());
+ }
+
+ // Add the types of the output values to the function's argument list.
+ for (ValueSet::const_iterator I = outputs.begin(), E = outputs.end();
+ I != E; ++I) {
+ DEBUG(dbgs() << "instr used in func: " << **I << "\n");
+ if (AggregateArgs)
+ paramTy.push_back((*I)->getType());
+ else
+ paramTy.push_back(PointerType::getUnqual((*I)->getType()));
+ }
+
+ DEBUG(dbgs() << "Function type: " << *RetTy << " f(");
+ for (std::vector<Type*>::iterator i = paramTy.begin(),
+ e = paramTy.end(); i != e; ++i)
+ DEBUG(dbgs() << **i << ", ");
+ DEBUG(dbgs() << ")\n");
+
+ if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
+ PointerType *StructPtr =
+ PointerType::getUnqual(StructType::get(M->getContext(), paramTy));
+ paramTy.clear();
+ paramTy.push_back(StructPtr);
+ }
+ FunctionType *funcType =
+ FunctionType::get(RetTy, paramTy, false);
+
+ // Create the new function
+ Function *newFunction = Function::Create(funcType,
+ GlobalValue::InternalLinkage,
+ oldFunction->getName() + "_" +
+ header->getName(), M);
+ // If the old function is no-throw, so is the new one.
+ if (oldFunction->doesNotThrow())
+ newFunction->setDoesNotThrow();
+
+ newFunction->getBasicBlockList().push_back(newRootNode);
+
+ // Create an iterator to name all of the arguments we inserted.
+ Function::arg_iterator AI = newFunction->arg_begin();
+
+ // Rewrite all users of the inputs in the extracted region to use the
+ // arguments (or appropriate addressing into struct) instead.
+ for (unsigned i = 0, e = inputs.size(); i != e; ++i) {
+ Value *RewriteVal;
+ if (AggregateArgs) {
+ Value *Idx[2];
+ Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext()));
+ Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i);
+ TerminatorInst *TI = newFunction->begin()->getTerminator();
+ GetElementPtrInst *GEP =
+ GetElementPtrInst::Create(AI, Idx, "gep_" + inputs[i]->getName(), TI);
+ RewriteVal = new LoadInst(GEP, "loadgep_" + inputs[i]->getName(), TI);
+ } else
+ RewriteVal = AI++;
+
+ std::vector<User*> Users(inputs[i]->use_begin(), inputs[i]->use_end());
+ for (std::vector<User*>::iterator use = Users.begin(), useE = Users.end();
+ use != useE; ++use)
+ if (Instruction* inst = dyn_cast<Instruction>(*use))
+ if (Blocks.count(inst->getParent()))
+ inst->replaceUsesOfWith(inputs[i], RewriteVal);
+ }
+
+ // Set names for input and output arguments.
+ if (!AggregateArgs) {
+ AI = newFunction->arg_begin();
+ for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++AI)
+ AI->setName(inputs[i]->getName());
+ for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++AI)
+ AI->setName(outputs[i]->getName()+".out");
+ }
+
+ // Rewrite branches to basic blocks outside of the loop to new dummy blocks
+ // within the new function. This must be done before we lose track of which
+ // blocks were originally in the code region.
+ std::vector<User*> Users(header->use_begin(), header->use_end());
+ for (unsigned i = 0, e = Users.size(); i != e; ++i)
+ // The BasicBlock which contains the branch is not in the region
+ // modify the branch target to a new block
+ if (TerminatorInst *TI = dyn_cast<TerminatorInst>(Users[i]))
+ if (!Blocks.count(TI->getParent()) &&
+ TI->getParent()->getParent() == oldFunction)
+ TI->replaceUsesOfWith(header, newHeader);
+
+ return newFunction;
+}
+
+/// FindPhiPredForUseInBlock - Given a value and a basic block, find a PHI
+/// that uses the value within the basic block, and return the predecessor
+/// block associated with that use, or return 0 if none is found.
+static BasicBlock* FindPhiPredForUseInBlock(Value* Used, BasicBlock* BB) {
+ for (Value::use_iterator UI = Used->use_begin(),
+ UE = Used->use_end(); UI != UE; ++UI) {
+ PHINode *P = dyn_cast<PHINode>(*UI);
+ if (P && P->getParent() == BB)
+ return P->getIncomingBlock(UI);
+ }
+
+ return 0;
+}
+
+/// emitCallAndSwitchStatement - This method sets up the caller side by adding
+/// the call instruction, splitting any PHI nodes in the header block as
+/// necessary.
+void CodeExtractor::
+emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
+ ValueSet &inputs, ValueSet &outputs) {
+ // Emit a call to the new function, passing in: *pointer to struct (if
+ // aggregating parameters), or plan inputs and allocated memory for outputs
+ std::vector<Value*> params, StructValues, ReloadOutputs, Reloads;
+
+ LLVMContext &Context = newFunction->getContext();
+
+ // Add inputs as params, or to be filled into the struct
+ for (ValueSet::iterator i = inputs.begin(), e = inputs.end(); i != e; ++i)
+ if (AggregateArgs)
+ StructValues.push_back(*i);
+ else
+ params.push_back(*i);
+
+ // Create allocas for the outputs
+ for (ValueSet::iterator i = outputs.begin(), e = outputs.end(); i != e; ++i) {
+ if (AggregateArgs) {
+ StructValues.push_back(*i);
+ } else {
+ AllocaInst *alloca =
+ new AllocaInst((*i)->getType(), 0, (*i)->getName()+".loc",
+ codeReplacer->getParent()->begin()->begin());
+ ReloadOutputs.push_back(alloca);
+ params.push_back(alloca);
+ }
+ }
+
+ AllocaInst *Struct = 0;
+ if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
+ std::vector<Type*> ArgTypes;
+ for (ValueSet::iterator v = StructValues.begin(),
+ ve = StructValues.end(); v != ve; ++v)
+ ArgTypes.push_back((*v)->getType());
+
+ // Allocate a struct at the beginning of this function
+ Type *StructArgTy = StructType::get(newFunction->getContext(), ArgTypes);
+ Struct =
+ new AllocaInst(StructArgTy, 0, "structArg",
+ codeReplacer->getParent()->begin()->begin());
+ params.push_back(Struct);
+
+ for (unsigned i = 0, e = inputs.size(); i != e; ++i) {
+ Value *Idx[2];
+ Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+ Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i);
+ GetElementPtrInst *GEP =
+ GetElementPtrInst::Create(Struct, Idx,
+ "gep_" + StructValues[i]->getName());
+ codeReplacer->getInstList().push_back(GEP);
+ StoreInst *SI = new StoreInst(StructValues[i], GEP);
+ codeReplacer->getInstList().push_back(SI);
+ }
+ }
+
+ // Emit the call to the function
+ CallInst *call = CallInst::Create(newFunction, params,
+ NumExitBlocks > 1 ? "targetBlock" : "");
+ codeReplacer->getInstList().push_back(call);
+
+ Function::arg_iterator OutputArgBegin = newFunction->arg_begin();
+ unsigned FirstOut = inputs.size();
+ if (!AggregateArgs)
+ std::advance(OutputArgBegin, inputs.size());
+
+ // Reload the outputs passed in by reference
+ for (unsigned i = 0, e = outputs.size(); i != e; ++i) {
+ Value *Output = 0;
+ if (AggregateArgs) {
+ Value *Idx[2];
+ Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+ Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i);
+ GetElementPtrInst *GEP
+ = GetElementPtrInst::Create(Struct, Idx,
+ "gep_reload_" + outputs[i]->getName());
+ codeReplacer->getInstList().push_back(GEP);
+ Output = GEP;
+ } else {
+ Output = ReloadOutputs[i];
+ }
+ LoadInst *load = new LoadInst(Output, outputs[i]->getName()+".reload");
+ Reloads.push_back(load);
+ codeReplacer->getInstList().push_back(load);
+ std::vector<User*> Users(outputs[i]->use_begin(), outputs[i]->use_end());
+ for (unsigned u = 0, e = Users.size(); u != e; ++u) {
+ Instruction *inst = cast<Instruction>(Users[u]);
+ if (!Blocks.count(inst->getParent()))
+ inst->replaceUsesOfWith(outputs[i], load);
+ }
+ }
+
+ // Now we can emit a switch statement using the call as a value.
+ SwitchInst *TheSwitch =
+ SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)),
+ codeReplacer, 0, codeReplacer);
+
+ // Since there may be multiple exits from the original region, make the new
+ // function return an unsigned, switch on that number. This loop iterates
+ // over all of the blocks in the extracted region, updating any terminator
+ // instructions in the to-be-extracted region that branch to blocks that are
+ // not in the region to be extracted.
+ std::map<BasicBlock*, BasicBlock*> ExitBlockMap;
+
+ unsigned switchVal = 0;
+ for (SetVector<BasicBlock*>::const_iterator i = Blocks.begin(),
+ e = Blocks.end(); i != e; ++i) {
+ TerminatorInst *TI = (*i)->getTerminator();
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ if (!Blocks.count(TI->getSuccessor(i))) {
+ BasicBlock *OldTarget = TI->getSuccessor(i);
+ // add a new basic block which returns the appropriate value
+ BasicBlock *&NewTarget = ExitBlockMap[OldTarget];
+ if (!NewTarget) {
+ // If we don't already have an exit stub for this non-extracted
+ // destination, create one now!
+ NewTarget = BasicBlock::Create(Context,
+ OldTarget->getName() + ".exitStub",
+ newFunction);
+ unsigned SuccNum = switchVal++;
+
+ Value *brVal = 0;
+ switch (NumExitBlocks) {
+ case 0:
+ case 1: break; // No value needed.
+ case 2: // Conditional branch, return a bool
+ brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum);
+ break;
+ default:
+ brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum);
+ break;
+ }
+
+ ReturnInst *NTRet = ReturnInst::Create(Context, brVal, NewTarget);
+
+ // Update the switch instruction.
+ TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context),
+ SuccNum),
+ OldTarget);
+
+ // Restore values just before we exit
+ Function::arg_iterator OAI = OutputArgBegin;
+ for (unsigned out = 0, e = outputs.size(); out != e; ++out) {
+ // For an invoke, the normal destination is the only one that is
+ // dominated by the result of the invocation
+ BasicBlock *DefBlock = cast<Instruction>(outputs[out])->getParent();
+
+ bool DominatesDef = true;
+
+ if (InvokeInst *Invoke = dyn_cast<InvokeInst>(outputs[out])) {
+ DefBlock = Invoke->getNormalDest();
+
+ // Make sure we are looking at the original successor block, not
+ // at a newly inserted exit block, which won't be in the dominator
+ // info.
+ for (std::map<BasicBlock*, BasicBlock*>::iterator I =
+ ExitBlockMap.begin(), E = ExitBlockMap.end(); I != E; ++I)
+ if (DefBlock == I->second) {
+ DefBlock = I->first;
+ break;
+ }
+
+ // In the extract block case, if the block we are extracting ends
+ // with an invoke instruction, make sure that we don't emit a
+ // store of the invoke value for the unwind block.
+ if (!DT && DefBlock != OldTarget)
+ DominatesDef = false;
+ }
+
+ if (DT) {
+ DominatesDef = DT->dominates(DefBlock, OldTarget);
+
+ // If the output value is used by a phi in the target block,
+ // then we need to test for dominance of the phi's predecessor
+ // instead. Unfortunately, this a little complicated since we
+ // have already rewritten uses of the value to uses of the reload.
+ BasicBlock* pred = FindPhiPredForUseInBlock(Reloads[out],
+ OldTarget);
+ if (pred && DT && DT->dominates(DefBlock, pred))
+ DominatesDef = true;
+ }
+
+ if (DominatesDef) {
+ if (AggregateArgs) {
+ Value *Idx[2];
+ Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+ Idx[1] = ConstantInt::get(Type::getInt32Ty(Context),
+ FirstOut+out);
+ GetElementPtrInst *GEP =
+ GetElementPtrInst::Create(OAI, Idx,
+ "gep_" + outputs[out]->getName(),
+ NTRet);
+ new StoreInst(outputs[out], GEP, NTRet);
+ } else {
+ new StoreInst(outputs[out], OAI, NTRet);
+ }
+ }
+ // Advance output iterator even if we don't emit a store
+ if (!AggregateArgs) ++OAI;
+ }
+ }
+
+ // rewrite the original branch instruction with this new target
+ TI->setSuccessor(i, NewTarget);
+ }
+ }
+
+ // Now that we've done the deed, simplify the switch instruction.
+ Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType();
+ switch (NumExitBlocks) {
+ case 0:
+ // There are no successors (the block containing the switch itself), which
+ // means that previously this was the last part of the function, and hence
+ // this should be rewritten as a `ret'
+
+ // Check if the function should return a value
+ if (OldFnRetTy->isVoidTy()) {
+ ReturnInst::Create(Context, 0, TheSwitch); // Return void
+ } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) {
+ // return what we have
+ ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch);
+ } else {
+ // Otherwise we must have code extracted an unwind or something, just
+ // return whatever we want.
+ ReturnInst::Create(Context,
+ Constant::getNullValue(OldFnRetTy), TheSwitch);
+ }
+
+ TheSwitch->eraseFromParent();
+ break;
+ case 1:
+ // Only a single destination, change the switch into an unconditional
+ // branch.
+ BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch);
+ TheSwitch->eraseFromParent();
+ break;
+ case 2:
+ BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2),
+ call, TheSwitch);
+ TheSwitch->eraseFromParent();
+ break;
+ default:
+ // Otherwise, make the default destination of the switch instruction be one
+ // of the other successors.
+ TheSwitch->setCondition(call);
+ TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks));
+ // Remove redundant case
+ TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks-1));
+ break;
+ }
+}
+
+void CodeExtractor::moveCodeToFunction(Function *newFunction) {
+ Function *oldFunc = (*Blocks.begin())->getParent();
+ Function::BasicBlockListType &oldBlocks = oldFunc->getBasicBlockList();
+ Function::BasicBlockListType &newBlocks = newFunction->getBasicBlockList();
+
+ for (SetVector<BasicBlock*>::const_iterator i = Blocks.begin(),
+ e = Blocks.end(); i != e; ++i) {
+ // Delete the basic block from the old function, and the list of blocks
+ oldBlocks.remove(*i);
+
+ // Insert this basic block into the new function
+ newBlocks.push_back(*i);
+ }
+}
+
+Function *CodeExtractor::extractCodeRegion() {
+ if (!isEligible())
+ return 0;
+
+ ValueSet inputs, outputs;
+
+ // Assumption: this is a single-entry code region, and the header is the first
+ // block in the region.
+ BasicBlock *header = *Blocks.begin();
+
+ // If we have to split PHI nodes or the entry block, do so now.
+ severSplitPHINodes(header);
+
+ // If we have any return instructions in the region, split those blocks so
+ // that the return is not in the region.
+ splitReturnBlocks();
+
+ Function *oldFunction = header->getParent();
+
+ // This takes place of the original loop
+ BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(),
+ "codeRepl", oldFunction,
+ header);
+
+ // The new function needs a root node because other nodes can branch to the
+ // head of the region, but the entry node of a function cannot have preds.
+ BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(),
+ "newFuncRoot");
+ newFuncRoot->getInstList().push_back(BranchInst::Create(header));
+
+ // Find inputs to, outputs from the code region.
+ findInputsOutputs(inputs, outputs);
+
+ SmallPtrSet<BasicBlock *, 1> ExitBlocks;
+ for (SetVector<BasicBlock *>::iterator I = Blocks.begin(), E = Blocks.end();
+ I != E; ++I)
+ for (succ_iterator SI = succ_begin(*I), SE = succ_end(*I); SI != SE; ++SI)
+ if (!Blocks.count(*SI))
+ ExitBlocks.insert(*SI);
+ NumExitBlocks = ExitBlocks.size();
+
+ // Construct new function based on inputs/outputs & add allocas for all defs.
+ Function *newFunction = constructFunction(inputs, outputs, header,
+ newFuncRoot,
+ codeReplacer, oldFunction,
+ oldFunction->getParent());
+
+ emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs);
+
+ moveCodeToFunction(newFunction);
+
+ // Loop over all of the PHI nodes in the header block, and change any
+ // references to the old incoming edge to be the new incoming edge.
+ for (BasicBlock::iterator I = header->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (!Blocks.count(PN->getIncomingBlock(i)))
+ PN->setIncomingBlock(i, newFuncRoot);
+ }
+
+ // Look at all successors of the codeReplacer block. If any of these blocks
+ // had PHI nodes in them, we need to update the "from" block to be the code
+ // replacer, not the original block in the extracted region.
+ std::vector<BasicBlock*> Succs(succ_begin(codeReplacer),
+ succ_end(codeReplacer));
+ for (unsigned i = 0, e = Succs.size(); i != e; ++i)
+ for (BasicBlock::iterator I = Succs[i]->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ std::set<BasicBlock*> ProcessedPreds;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (Blocks.count(PN->getIncomingBlock(i))) {
+ if (ProcessedPreds.insert(PN->getIncomingBlock(i)).second)
+ PN->setIncomingBlock(i, codeReplacer);
+ else {
+ // There were multiple entries in the PHI for this block, now there
+ // is only one, so remove the duplicated entries.
+ PN->removeIncomingValue(i, false);
+ --i; --e;
+ }
+ }
+ }
+
+ //cerr << "NEW FUNCTION: " << *newFunction;
+ // verifyFunction(*newFunction);
+
+ // cerr << "OLD FUNCTION: " << *oldFunction;
+ // verifyFunction(*oldFunction);
+
+ DEBUG(if (verifyFunction(*newFunction))
+ report_fatal_error("verifyFunction failed!"));
+ return newFunction;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
new file mode 100644
index 000000000000..0723b3534c2b
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -0,0 +1,149 @@
+//===- DemoteRegToStack.cpp - Move a virtual register to the stack --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Type.h"
+using namespace llvm;
+
+/// DemoteRegToStack - This function takes a virtual register computed by an
+/// Instruction and replaces it with a slot in the stack frame, allocated via
+/// alloca. This allows the CFG to be changed around without fear of
+/// invalidating the SSA information for the value. It returns the pointer to
+/// the alloca inserted to create a stack slot for I.
+AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
+ Instruction *AllocaPoint) {
+ if (I.use_empty()) {
+ I.eraseFromParent();
+ return 0;
+ }
+
+ // Create a stack slot to hold the value.
+ AllocaInst *Slot;
+ if (AllocaPoint) {
+ Slot = new AllocaInst(I.getType(), 0,
+ I.getName()+".reg2mem", AllocaPoint);
+ } else {
+ Function *F = I.getParent()->getParent();
+ Slot = new AllocaInst(I.getType(), 0, I.getName()+".reg2mem",
+ F->getEntryBlock().begin());
+ }
+
+ // Change all of the users of the instruction to read from the stack slot.
+ while (!I.use_empty()) {
+ Instruction *U = cast<Instruction>(I.use_back());
+ if (PHINode *PN = dyn_cast<PHINode>(U)) {
+ // If this is a PHI node, we can't insert a load of the value before the
+ // use. Instead insert the load in the predecessor block corresponding
+ // to the incoming value.
+ //
+ // Note that if there are multiple edges from a basic block to this PHI
+ // node that we cannot have multiple loads. The problem is that the
+ // resulting PHI node will have multiple values (from each load) coming in
+ // from the same block, which is illegal SSA form. For this reason, we
+ // keep track of and reuse loads we insert.
+ DenseMap<BasicBlock*, Value*> Loads;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == &I) {
+ Value *&V = Loads[PN->getIncomingBlock(i)];
+ if (V == 0) {
+ // Insert the load into the predecessor block
+ V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads,
+ PN->getIncomingBlock(i)->getTerminator());
+ }
+ PN->setIncomingValue(i, V);
+ }
+
+ } else {
+ // If this is a normal instruction, just insert a load.
+ Value *V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads, U);
+ U->replaceUsesOfWith(&I, V);
+ }
+ }
+
+
+ // Insert stores of the computed value into the stack slot. We have to be
+ // careful if I is an invoke instruction, because we can't insert the store
+ // AFTER the terminator instruction.
+ BasicBlock::iterator InsertPt;
+ if (!isa<TerminatorInst>(I)) {
+ InsertPt = &I;
+ ++InsertPt;
+ } else {
+ InvokeInst &II = cast<InvokeInst>(I);
+ if (II.getNormalDest()->getSinglePredecessor())
+ InsertPt = II.getNormalDest()->getFirstInsertionPt();
+ else {
+ // We cannot demote invoke instructions to the stack if their normal edge
+ // is critical. Therefore, split the critical edge and insert the store
+ // in the newly created basic block.
+ unsigned SuccNum = GetSuccessorNumber(I.getParent(), II.getNormalDest());
+ TerminatorInst *TI = &cast<TerminatorInst>(I);
+ assert (isCriticalEdge(TI, SuccNum) &&
+ "Expected a critical edge!");
+ BasicBlock *BB = SplitCriticalEdge(TI, SuccNum);
+ assert (BB && "Unable to split critical edge.");
+ InsertPt = BB->getFirstInsertionPt();
+ }
+ }
+
+ for (; isa<PHINode>(InsertPt) || isa<LandingPadInst>(InsertPt); ++InsertPt)
+ /* empty */; // Don't insert before PHI nodes or landingpad instrs.
+
+ new StoreInst(&I, Slot, InsertPt);
+ return Slot;
+}
+
+/// DemotePHIToStack - This function takes a virtual register computed by a PHI
+/// node and replaces it with a slot in the stack frame allocated via alloca.
+/// The PHI node is deleted. It returns the pointer to the alloca inserted.
+AllocaInst *llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) {
+ if (P->use_empty()) {
+ P->eraseFromParent();
+ return 0;
+ }
+
+ // Create a stack slot to hold the value.
+ AllocaInst *Slot;
+ if (AllocaPoint) {
+ Slot = new AllocaInst(P->getType(), 0,
+ P->getName()+".reg2mem", AllocaPoint);
+ } else {
+ Function *F = P->getParent()->getParent();
+ Slot = new AllocaInst(P->getType(), 0, P->getName()+".reg2mem",
+ F->getEntryBlock().begin());
+ }
+
+ // Iterate over each operand inserting a store in each predecessor.
+ for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) {
+ if (InvokeInst *II = dyn_cast<InvokeInst>(P->getIncomingValue(i))) {
+ assert(II->getParent() != P->getIncomingBlock(i) &&
+ "Invoke edge not supported yet"); (void)II;
+ }
+ new StoreInst(P->getIncomingValue(i), Slot,
+ P->getIncomingBlock(i)->getTerminator());
+ }
+
+ // Insert a load in place of the PHI and replace all uses.
+ BasicBlock::iterator InsertPt = P;
+
+ for (; isa<PHINode>(InsertPt) || isa<LandingPadInst>(InsertPt); ++InsertPt)
+ /* empty */; // Don't insert before PHI nodes or landingpad instrs.
+
+ Value *V = new LoadInst(Slot, P->getName()+".reload", InsertPt);
+ P->replaceAllUsesWith(V);
+
+ // Delete PHI.
+ P->eraseFromParent();
+ return Slot;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp b/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp
new file mode 100644
index 000000000000..1da226bfcbe1
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp
@@ -0,0 +1,486 @@
+//===- FlatternCFG.cpp - Code to perform CFG flattening ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Reduce conditional branches in CFG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "flattencfg"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+using namespace llvm;
+
+namespace {
+class FlattenCFGOpt {
+ AliasAnalysis *AA;
+ /// \brief Use parallel-and or parallel-or to generate conditions for
+ /// conditional branches.
+ bool FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder, Pass *P = 0);
+ /// \brief If \param BB is the merge block of an if-region, attempt to merge
+ /// the if-region with an adjacent if-region upstream if two if-regions
+ /// contain identical instructions.
+ bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder, Pass *P = 0);
+ /// \brief Compare a pair of blocks: \p Block1 and \p Block2, which
+ /// are from two if-regions whose entry blocks are \p Head1 and \p
+ /// Head2. \returns true if \p Block1 and \p Block2 contain identical
+ /// instructions, and have no memory reference alias with \p Head2.
+ /// This is used as a legality check for merging if-regions.
+ bool CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
+ BasicBlock *Block1, BasicBlock *Block2);
+
+public:
+ FlattenCFGOpt(AliasAnalysis *AA) : AA(AA) {}
+ bool run(BasicBlock *BB);
+};
+}
+
+/// If \param [in] BB has more than one predecessor that is a conditional
+/// branch, attempt to use parallel and/or for the branch condition. \returns
+/// true on success.
+///
+/// Before:
+/// ......
+/// %cmp10 = fcmp une float %tmp1, %tmp2
+/// br i1 %cmp1, label %if.then, label %lor.rhs
+///
+/// lor.rhs:
+/// ......
+/// %cmp11 = fcmp une float %tmp3, %tmp4
+/// br i1 %cmp11, label %if.then, label %ifend
+///
+/// if.end: // the merge block
+/// ......
+///
+/// if.then: // has two predecessors, both of them contains conditional branch.
+/// ......
+/// br label %if.end;
+///
+/// After:
+/// ......
+/// %cmp10 = fcmp une float %tmp1, %tmp2
+/// ......
+/// %cmp11 = fcmp une float %tmp3, %tmp4
+/// %cmp12 = or i1 %cmp10, %cmp11 // parallel-or mode.
+/// br i1 %cmp12, label %if.then, label %ifend
+///
+/// if.end:
+/// ......
+///
+/// if.then:
+/// ......
+/// br label %if.end;
+///
+/// Current implementation handles two cases.
+/// Case 1: \param BB is on the else-path.
+///
+/// BB1
+/// / |
+/// BB2 |
+/// / \ |
+/// BB3 \ | where, BB1, BB2 contain conditional branches.
+/// \ | / BB3 contains unconditional branch.
+/// \ | / BB4 corresponds to \param BB which is also the merge.
+/// BB => BB4
+///
+///
+/// Corresponding source code:
+///
+/// if (a == b && c == d)
+/// statement; // BB3
+///
+/// Case 2: \param BB BB is on the then-path.
+///
+/// BB1
+/// / |
+/// | BB2
+/// \ / | where BB1, BB2 contain conditional branches.
+/// BB => BB3 | BB3 contains unconditiona branch and corresponds
+/// \ / to \param BB. BB4 is the merge.
+/// BB4
+///
+/// Corresponding source code:
+///
+/// if (a == b || c == d)
+/// statement; // BB3
+///
+/// In both cases, \param BB is the common successor of conditional branches.
+/// In Case 1, \param BB (BB4) has an unconditional branch (BB3) as
+/// its predecessor. In Case 2, \param BB (BB3) only has conditional branches
+/// as its predecessors.
+///
+bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder,
+ Pass *P) {
+ PHINode *PHI = dyn_cast<PHINode>(BB->begin());
+ if (PHI)
+ return false; // For simplicity, avoid cases containing PHI nodes.
+
+ BasicBlock *LastCondBlock = NULL;
+ BasicBlock *FirstCondBlock = NULL;
+ BasicBlock *UnCondBlock = NULL;
+ int Idx = -1;
+
+ // Check predecessors of \param BB.
+ SmallPtrSet<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
+ for (SmallPtrSetIterator<BasicBlock *> PI = Preds.begin(), PE = Preds.end();
+ PI != PE; ++PI) {
+ BasicBlock *Pred = *PI;
+ BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator());
+
+ // All predecessors should terminate with a branch.
+ if (!PBI)
+ return false;
+
+ BasicBlock *PP = Pred->getSinglePredecessor();
+
+ if (PBI->isUnconditional()) {
+ // Case 1: Pred (BB3) is an unconditional block, it should
+ // have a single predecessor (BB2) that is also a predecessor
+ // of \param BB (BB4) and should not have address-taken.
+ // There should exist only one such unconditional
+ // branch among the predecessors.
+ if (UnCondBlock || !PP || (Preds.count(PP) == 0) ||
+ Pred->hasAddressTaken())
+ return false;
+
+ UnCondBlock = Pred;
+ continue;
+ }
+
+ // Only conditional branches are allowed beyond this point.
+ assert(PBI->isConditional());
+
+ // Condition's unique use should be the branch instruction.
+ Value *PC = PBI->getCondition();
+ if (!PC || !PC->hasOneUse())
+ return false;
+
+ if (PP && Preds.count(PP)) {
+ // These are internal condition blocks to be merged from, e.g.,
+ // BB2 in both cases.
+ // Should not be address-taken.
+ if (Pred->hasAddressTaken())
+ return false;
+
+ // Instructions in the internal condition blocks should be safe
+ // to hoist up.
+ for (BasicBlock::iterator BI = Pred->begin(), BE = PBI; BI != BE;) {
+ Instruction *CI = BI++;
+ if (isa<PHINode>(CI) || !isSafeToSpeculativelyExecute(CI))
+ return false;
+ }
+ } else {
+ // This is the condition block to be merged into, e.g. BB1 in
+ // both cases.
+ if (FirstCondBlock)
+ return false;
+ FirstCondBlock = Pred;
+ }
+
+ // Find whether BB is uniformly on the true (or false) path
+ // for all of its predecessors.
+ BasicBlock *PS1 = PBI->getSuccessor(0);
+ BasicBlock *PS2 = PBI->getSuccessor(1);
+ BasicBlock *PS = (PS1 == BB) ? PS2 : PS1;
+ int CIdx = (PS1 == BB) ? 0 : 1;
+
+ if (Idx == -1)
+ Idx = CIdx;
+ else if (CIdx != Idx)
+ return false;
+
+ // PS is the successor which is not BB. Check successors to identify
+ // the last conditional branch.
+ if (Preds.count(PS) == 0) {
+ // Case 2.
+ LastCondBlock = Pred;
+ } else {
+ // Case 1
+ BranchInst *BPS = dyn_cast<BranchInst>(PS->getTerminator());
+ if (BPS && BPS->isUnconditional()) {
+ // Case 1: PS(BB3) should be an unconditional branch.
+ LastCondBlock = Pred;
+ }
+ }
+ }
+
+ if (!FirstCondBlock || !LastCondBlock || (FirstCondBlock == LastCondBlock))
+ return false;
+
+ TerminatorInst *TBB = LastCondBlock->getTerminator();
+ BasicBlock *PS1 = TBB->getSuccessor(0);
+ BasicBlock *PS2 = TBB->getSuccessor(1);
+ BranchInst *PBI1 = dyn_cast<BranchInst>(PS1->getTerminator());
+ BranchInst *PBI2 = dyn_cast<BranchInst>(PS2->getTerminator());
+
+ // If PS1 does not jump into PS2, but PS2 jumps into PS1,
+ // attempt branch inversion.
+ if (!PBI1 || !PBI1->isUnconditional() ||
+ (PS1->getTerminator()->getSuccessor(0) != PS2)) {
+ // Check whether PS2 jumps into PS1.
+ if (!PBI2 || !PBI2->isUnconditional() ||
+ (PS2->getTerminator()->getSuccessor(0) != PS1))
+ return false;
+
+ // Do branch inversion.
+ BasicBlock *CurrBlock = LastCondBlock;
+ bool EverChanged = false;
+ while (1) {
+ BranchInst *BI = dyn_cast<BranchInst>(CurrBlock->getTerminator());
+ CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
+ CmpInst::Predicate Predicate = CI->getPredicate();
+ // Cannonicalize icmp_ne -> icmp_eq, fcmp_one -> fcmp_oeq
+ if ((Predicate == CmpInst::ICMP_NE) || (Predicate == CmpInst::FCMP_ONE)) {
+ CI->setPredicate(ICmpInst::getInversePredicate(Predicate));
+ BI->swapSuccessors();
+ EverChanged = true;
+ }
+ if (CurrBlock == FirstCondBlock)
+ break;
+ CurrBlock = CurrBlock->getSinglePredecessor();
+ }
+ return EverChanged;
+ }
+
+ // PS1 must have a conditional branch.
+ if (!PBI1 || !PBI1->isUnconditional())
+ return false;
+
+ // PS2 should not contain PHI node.
+ PHI = dyn_cast<PHINode>(PS2->begin());
+ if (PHI)
+ return false;
+
+ // Do the transformation.
+ BasicBlock *CB;
+ BranchInst *PBI = dyn_cast<BranchInst>(FirstCondBlock->getTerminator());
+ bool Iteration = true;
+ IRBuilder<>::InsertPointGuard Guard(Builder);
+ Value *PC = PBI->getCondition();
+
+ do {
+ CB = PBI->getSuccessor(1 - Idx);
+ // Delete the conditional branch.
+ FirstCondBlock->getInstList().pop_back();
+ FirstCondBlock->getInstList()
+ .splice(FirstCondBlock->end(), CB->getInstList());
+ PBI = cast<BranchInst>(FirstCondBlock->getTerminator());
+ Value *CC = PBI->getCondition();
+ // Merge conditions.
+ Builder.SetInsertPoint(PBI);
+ Value *NC;
+ if (Idx == 0)
+ // Case 2, use parallel or.
+ NC = Builder.CreateOr(PC, CC);
+ else
+ // Case 1, use parallel and.
+ NC = Builder.CreateAnd(PC, CC);
+
+ PBI->replaceUsesOfWith(CC, NC);
+ PC = NC;
+ if (CB == LastCondBlock)
+ Iteration = false;
+ // Remove internal conditional branches.
+ CB->dropAllReferences();
+ // make CB unreachable and let downstream to delete the block.
+ new UnreachableInst(CB->getContext(), CB);
+ } while (Iteration);
+
+ DEBUG(dbgs() << "Use parallel and/or in:\n" << *FirstCondBlock);
+ return true;
+}
+
+/// Compare blocks from two if-regions, where \param Head1 is the entry of the
+/// 1st if-region. \param Head2 is the entry of the 2nd if-region. \param
+/// Block1 is a block in the 1st if-region to compare. \param Block2 is a block
+// in the 2nd if-region to compare. \returns true if \param Block1 and \param
+/// Block2 have identical instructions and do not have memory reference alias
+/// with \param Head2.
+///
+bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
+ BasicBlock *Block1,
+ BasicBlock *Block2) {
+ TerminatorInst *PTI2 = Head2->getTerminator();
+ Instruction *PBI2 = Head2->begin();
+
+ bool eq1 = (Block1 == Head1);
+ bool eq2 = (Block2 == Head2);
+ if (eq1 || eq2) {
+ // An empty then-path or else-path.
+ return (eq1 == eq2);
+ }
+
+ // Check whether instructions in Block1 and Block2 are identical
+ // and do not alias with instructions in Head2.
+ BasicBlock::iterator iter1 = Block1->begin();
+ BasicBlock::iterator end1 = Block1->getTerminator();
+ BasicBlock::iterator iter2 = Block2->begin();
+ BasicBlock::iterator end2 = Block2->getTerminator();
+
+ while (1) {
+ if (iter1 == end1) {
+ if (iter2 != end2)
+ return false;
+ break;
+ }
+
+ if (!iter1->isIdenticalTo(iter2))
+ return false;
+
+ // Illegal to remove instructions with side effects except
+ // non-volatile stores.
+ if (iter1->mayHaveSideEffects()) {
+ Instruction *CurI = &*iter1;
+ StoreInst *SI = dyn_cast<StoreInst>(CurI);
+ if (!SI || SI->isVolatile())
+ return false;
+ }
+
+ // For simplicity and speed, data dependency check can be
+ // avoided if read from memory doesn't exist.
+ if (iter1->mayReadFromMemory())
+ return false;
+
+ if (iter1->mayWriteToMemory()) {
+ for (BasicBlock::iterator BI = PBI2, BE = PTI2; BI != BE; ++BI) {
+ if (BI->mayReadFromMemory() || BI->mayWriteToMemory()) {
+ // Check alias with Head2.
+ if (!AA || AA->alias(iter1, BI))
+ return false;
+ }
+ }
+ }
+ ++iter1;
+ ++iter2;
+ }
+
+ return true;
+}
+
+/// Check whether \param BB is the merge block of a if-region. If yes, check
+/// whether there exists an adjacent if-region upstream, the two if-regions
+/// contain identical instructions and can be legally merged. \returns true if
+/// the two if-regions are merged.
+///
+/// From:
+/// if (a)
+/// statement;
+/// if (b)
+/// statement;
+///
+/// To:
+/// if (a || b)
+/// statement;
+///
+bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder,
+ Pass *P) {
+ BasicBlock *IfTrue2, *IfFalse2;
+ Value *IfCond2 = GetIfCondition(BB, IfTrue2, IfFalse2);
+ Instruction *CInst2 = dyn_cast_or_null<Instruction>(IfCond2);
+ if (!CInst2)
+ return false;
+
+ BasicBlock *SecondEntryBlock = CInst2->getParent();
+ if (SecondEntryBlock->hasAddressTaken())
+ return false;
+
+ BasicBlock *IfTrue1, *IfFalse1;
+ Value *IfCond1 = GetIfCondition(SecondEntryBlock, IfTrue1, IfFalse1);
+ Instruction *CInst1 = dyn_cast_or_null<Instruction>(IfCond1);
+ if (!CInst1)
+ return false;
+
+ BasicBlock *FirstEntryBlock = CInst1->getParent();
+
+ // Either then-path or else-path should be empty.
+ if ((IfTrue1 != FirstEntryBlock) && (IfFalse1 != FirstEntryBlock))
+ return false;
+ if ((IfTrue2 != SecondEntryBlock) && (IfFalse2 != SecondEntryBlock))
+ return false;
+
+ TerminatorInst *PTI2 = SecondEntryBlock->getTerminator();
+ Instruction *PBI2 = SecondEntryBlock->begin();
+
+ if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfTrue1,
+ IfTrue2))
+ return false;
+
+ if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfFalse1,
+ IfFalse2))
+ return false;
+
+ // Check whether \param SecondEntryBlock has side-effect and is safe to
+ // speculate.
+ for (BasicBlock::iterator BI = PBI2, BE = PTI2; BI != BE; ++BI) {
+ Instruction *CI = BI;
+ if (isa<PHINode>(CI) || CI->mayHaveSideEffects() ||
+ !isSafeToSpeculativelyExecute(CI))
+ return false;
+ }
+
+ // Merge \param SecondEntryBlock into \param FirstEntryBlock.
+ FirstEntryBlock->getInstList().pop_back();
+ FirstEntryBlock->getInstList()
+ .splice(FirstEntryBlock->end(), SecondEntryBlock->getInstList());
+ BranchInst *PBI = dyn_cast<BranchInst>(FirstEntryBlock->getTerminator());
+ Value *CC = PBI->getCondition();
+ BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+ BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+ Builder.SetInsertPoint(PBI);
+ Value *NC = Builder.CreateOr(CInst1, CC);
+ PBI->replaceUsesOfWith(CC, NC);
+ Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt);
+
+ // Remove IfTrue1
+ if (IfTrue1 != FirstEntryBlock) {
+ IfTrue1->dropAllReferences();
+ IfTrue1->eraseFromParent();
+ }
+
+ // Remove IfFalse1
+ if (IfFalse1 != FirstEntryBlock) {
+ IfFalse1->dropAllReferences();
+ IfFalse1->eraseFromParent();
+ }
+
+ // Remove \param SecondEntryBlock
+ SecondEntryBlock->dropAllReferences();
+ SecondEntryBlock->eraseFromParent();
+ DEBUG(dbgs() << "If conditions merged into:\n" << *FirstEntryBlock);
+ return true;
+}
+
+bool FlattenCFGOpt::run(BasicBlock *BB) {
+ bool Changed = false;
+ assert(BB && BB->getParent() && "Block not embedded in function!");
+ assert(BB->getTerminator() && "Degenerate basic block encountered!");
+
+ IRBuilder<> Builder(BB);
+
+ if (FlattenParallelAndOr(BB, Builder))
+ return true;
+
+ if (MergeIfRegion(BB, Builder))
+ return true;
+
+ return Changed;
+}
+
+/// FlattenCFG - This function is used to flatten a CFG. For
+/// example, it uses parallel-and and parallel-or mode to collapse
+// if-conditions and merge if-regions with identical statements.
+///
+bool llvm::FlattenCFG(BasicBlock *BB, AliasAnalysis *AA) {
+ return FlattenCFGOpt(AA).run(BB);
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp b/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp
new file mode 100644
index 000000000000..5f0a563ceec0
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp
@@ -0,0 +1,183 @@
+//===-- GlobalStatus.cpp - Compute status info for globals -----------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Transforms/Utils/GlobalStatus.h"
+
+using namespace llvm;
+
+/// Return the stronger of the two ordering. If the two orderings are acquire
+/// and release, then return AcquireRelease.
+///
+static AtomicOrdering strongerOrdering(AtomicOrdering X, AtomicOrdering Y) {
+ if (X == Acquire && Y == Release)
+ return AcquireRelease;
+ if (Y == Acquire && X == Release)
+ return AcquireRelease;
+ return (AtomicOrdering)std::max(X, Y);
+}
+
+/// It is safe to destroy a constant iff it is only used by constants itself.
+/// Note that constants cannot be cyclic, so this test is pretty easy to
+/// implement recursively.
+///
+bool llvm::isSafeToDestroyConstant(const Constant *C) {
+ if (isa<GlobalValue>(C))
+ return false;
+
+ for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E;
+ ++UI)
+ if (const Constant *CU = dyn_cast<Constant>(*UI)) {
+ if (!isSafeToDestroyConstant(CU))
+ return false;
+ } else
+ return false;
+ return true;
+}
+
+static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
+ SmallPtrSet<const PHINode *, 16> &PhiUsers) {
+ for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
+ ++UI) {
+ const User *U = *UI;
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
+ GS.HasNonInstructionUser = true;
+
+ // If the result of the constantexpr isn't pointer type, then we won't
+ // know to expect it in various places. Just reject early.
+ if (!isa<PointerType>(CE->getType()))
+ return true;
+
+ if (analyzeGlobalAux(CE, GS, PhiUsers))
+ return true;
+ } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
+ if (!GS.HasMultipleAccessingFunctions) {
+ const Function *F = I->getParent()->getParent();
+ if (GS.AccessingFunction == 0)
+ GS.AccessingFunction = F;
+ else if (GS.AccessingFunction != F)
+ GS.HasMultipleAccessingFunctions = true;
+ }
+ if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ GS.IsLoaded = true;
+ // Don't hack on volatile loads.
+ if (LI->isVolatile())
+ return true;
+ GS.Ordering = strongerOrdering(GS.Ordering, LI->getOrdering());
+ } else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ // Don't allow a store OF the address, only stores TO the address.
+ if (SI->getOperand(0) == V)
+ return true;
+
+ // Don't hack on volatile stores.
+ if (SI->isVolatile())
+ return true;
+
+ GS.Ordering = strongerOrdering(GS.Ordering, SI->getOrdering());
+
+ // If this is a direct store to the global (i.e., the global is a scalar
+ // value, not an aggregate), keep more specific information about
+ // stores.
+ if (GS.StoredType != GlobalStatus::Stored) {
+ if (const GlobalVariable *GV =
+ dyn_cast<GlobalVariable>(SI->getOperand(1))) {
+ Value *StoredVal = SI->getOperand(0);
+
+ if (Constant *C = dyn_cast<Constant>(StoredVal)) {
+ if (C->isThreadDependent()) {
+ // The stored value changes between threads; don't track it.
+ return true;
+ }
+ }
+
+ if (StoredVal == GV->getInitializer()) {
+ if (GS.StoredType < GlobalStatus::InitializerStored)
+ GS.StoredType = GlobalStatus::InitializerStored;
+ } else if (isa<LoadInst>(StoredVal) &&
+ cast<LoadInst>(StoredVal)->getOperand(0) == GV) {
+ if (GS.StoredType < GlobalStatus::InitializerStored)
+ GS.StoredType = GlobalStatus::InitializerStored;
+ } else if (GS.StoredType < GlobalStatus::StoredOnce) {
+ GS.StoredType = GlobalStatus::StoredOnce;
+ GS.StoredOnceValue = StoredVal;
+ } else if (GS.StoredType == GlobalStatus::StoredOnce &&
+ GS.StoredOnceValue == StoredVal) {
+ // noop.
+ } else {
+ GS.StoredType = GlobalStatus::Stored;
+ }
+ } else {
+ GS.StoredType = GlobalStatus::Stored;
+ }
+ }
+ } else if (isa<BitCastInst>(I)) {
+ if (analyzeGlobalAux(I, GS, PhiUsers))
+ return true;
+ } else if (isa<GetElementPtrInst>(I)) {
+ if (analyzeGlobalAux(I, GS, PhiUsers))
+ return true;
+ } else if (isa<SelectInst>(I)) {
+ if (analyzeGlobalAux(I, GS, PhiUsers))
+ return true;
+ } else if (const PHINode *PN = dyn_cast<PHINode>(I)) {
+ // PHI nodes we can check just like select or GEP instructions, but we
+ // have to be careful about infinite recursion.
+ if (PhiUsers.insert(PN)) // Not already visited.
+ if (analyzeGlobalAux(I, GS, PhiUsers))
+ return true;
+ } else if (isa<CmpInst>(I)) {
+ GS.IsCompared = true;
+ } else if (const MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) {
+ if (MTI->isVolatile())
+ return true;
+ if (MTI->getArgOperand(0) == V)
+ GS.StoredType = GlobalStatus::Stored;
+ if (MTI->getArgOperand(1) == V)
+ GS.IsLoaded = true;
+ } else if (const MemSetInst *MSI = dyn_cast<MemSetInst>(I)) {
+ assert(MSI->getArgOperand(0) == V && "Memset only takes one pointer!");
+ if (MSI->isVolatile())
+ return true;
+ GS.StoredType = GlobalStatus::Stored;
+ } else if (ImmutableCallSite C = I) {
+ if (!C.isCallee(UI))
+ return true;
+ GS.IsLoaded = true;
+ } else {
+ return true; // Any other non-load instruction might take address!
+ }
+ } else if (const Constant *C = dyn_cast<Constant>(U)) {
+ GS.HasNonInstructionUser = true;
+ // We might have a dead and dangling constant hanging off of here.
+ if (!isSafeToDestroyConstant(C))
+ return true;
+ } else {
+ GS.HasNonInstructionUser = true;
+ // Otherwise must be some other user.
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool GlobalStatus::analyzeGlobal(const Value *V, GlobalStatus &GS) {
+ SmallPtrSet<const PHINode *, 16> PhiUsers;
+ return analyzeGlobalAux(V, GS, PhiUsers);
+}
+
+GlobalStatus::GlobalStatus()
+ : IsCompared(false), IsLoaded(false), StoredType(NotStored),
+ StoredOnceValue(0), AccessingFunction(0),
+ HasMultipleAccessingFunctions(false), HasNonInstructionUser(false),
+ Ordering(NotAtomic) {}
diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
new file mode 100644
index 000000000000..d021bcef4027
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -0,0 +1,932 @@
+//===- InlineFunction.cpp - Code to perform function inlining -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements inlining of a function into a call site, resolving
+// parameters and the return value as appropriate.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI,
+ bool InsertLifetime) {
+ return InlineFunction(CallSite(CI), IFI, InsertLifetime);
+}
+bool llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI,
+ bool InsertLifetime) {
+ return InlineFunction(CallSite(II), IFI, InsertLifetime);
+}
+
+namespace {
+ /// A class for recording information about inlining through an invoke.
+ class InvokeInliningInfo {
+ BasicBlock *OuterResumeDest; ///< Destination of the invoke's unwind.
+ BasicBlock *InnerResumeDest; ///< Destination for the callee's resume.
+ LandingPadInst *CallerLPad; ///< LandingPadInst associated with the invoke.
+ PHINode *InnerEHValuesPHI; ///< PHI for EH values from landingpad insts.
+ SmallVector<Value*, 8> UnwindDestPHIValues;
+
+ public:
+ InvokeInliningInfo(InvokeInst *II)
+ : OuterResumeDest(II->getUnwindDest()), InnerResumeDest(0),
+ CallerLPad(0), InnerEHValuesPHI(0) {
+ // If there are PHI nodes in the unwind destination block, we need to keep
+ // track of which values came into them from the invoke before removing
+ // the edge from this block.
+ llvm::BasicBlock *InvokeBB = II->getParent();
+ BasicBlock::iterator I = OuterResumeDest->begin();
+ for (; isa<PHINode>(I); ++I) {
+ // Save the value to use for this edge.
+ PHINode *PHI = cast<PHINode>(I);
+ UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB));
+ }
+
+ CallerLPad = cast<LandingPadInst>(I);
+ }
+
+ /// getOuterResumeDest - The outer unwind destination is the target of
+ /// unwind edges introduced for calls within the inlined function.
+ BasicBlock *getOuterResumeDest() const {
+ return OuterResumeDest;
+ }
+
+ BasicBlock *getInnerResumeDest();
+
+ LandingPadInst *getLandingPadInst() const { return CallerLPad; }
+
+ /// forwardResume - Forward the 'resume' instruction to the caller's landing
+ /// pad block. When the landing pad block has only one predecessor, this is
+ /// a simple branch. When there is more than one predecessor, we need to
+ /// split the landing pad block after the landingpad instruction and jump
+ /// to there.
+ void forwardResume(ResumeInst *RI,
+ SmallPtrSet<LandingPadInst*, 16> &InlinedLPads);
+
+ /// addIncomingPHIValuesFor - Add incoming-PHI values to the unwind
+ /// destination block for the given basic block, using the values for the
+ /// original invoke's source block.
+ void addIncomingPHIValuesFor(BasicBlock *BB) const {
+ addIncomingPHIValuesForInto(BB, OuterResumeDest);
+ }
+
+ void addIncomingPHIValuesForInto(BasicBlock *src, BasicBlock *dest) const {
+ BasicBlock::iterator I = dest->begin();
+ for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
+ PHINode *phi = cast<PHINode>(I);
+ phi->addIncoming(UnwindDestPHIValues[i], src);
+ }
+ }
+ };
+}
+
+/// getInnerResumeDest - Get or create a target for the branch from ResumeInsts.
+BasicBlock *InvokeInliningInfo::getInnerResumeDest() {
+ if (InnerResumeDest) return InnerResumeDest;
+
+ // Split the landing pad.
+ BasicBlock::iterator SplitPoint = CallerLPad; ++SplitPoint;
+ InnerResumeDest =
+ OuterResumeDest->splitBasicBlock(SplitPoint,
+ OuterResumeDest->getName() + ".body");
+
+ // The number of incoming edges we expect to the inner landing pad.
+ const unsigned PHICapacity = 2;
+
+ // Create corresponding new PHIs for all the PHIs in the outer landing pad.
+ BasicBlock::iterator InsertPoint = InnerResumeDest->begin();
+ BasicBlock::iterator I = OuterResumeDest->begin();
+ for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
+ PHINode *OuterPHI = cast<PHINode>(I);
+ PHINode *InnerPHI = PHINode::Create(OuterPHI->getType(), PHICapacity,
+ OuterPHI->getName() + ".lpad-body",
+ InsertPoint);
+ OuterPHI->replaceAllUsesWith(InnerPHI);
+ InnerPHI->addIncoming(OuterPHI, OuterResumeDest);
+ }
+
+ // Create a PHI for the exception values.
+ InnerEHValuesPHI = PHINode::Create(CallerLPad->getType(), PHICapacity,
+ "eh.lpad-body", InsertPoint);
+ CallerLPad->replaceAllUsesWith(InnerEHValuesPHI);
+ InnerEHValuesPHI->addIncoming(CallerLPad, OuterResumeDest);
+
+ // All done.
+ return InnerResumeDest;
+}
+
+/// forwardResume - Forward the 'resume' instruction to the caller's landing pad
+/// block. When the landing pad block has only one predecessor, this is a simple
+/// branch. When there is more than one predecessor, we need to split the
+/// landing pad block after the landingpad instruction and jump to there.
+void InvokeInliningInfo::forwardResume(ResumeInst *RI,
+ SmallPtrSet<LandingPadInst*, 16> &InlinedLPads) {
+ BasicBlock *Dest = getInnerResumeDest();
+ LandingPadInst *OuterLPad = getLandingPadInst();
+ BasicBlock *Src = RI->getParent();
+
+ BranchInst::Create(Dest, Src);
+
+ // Update the PHIs in the destination. They were inserted in an order which
+ // makes this work.
+ addIncomingPHIValuesForInto(Src, Dest);
+
+ InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src);
+ RI->eraseFromParent();
+
+ // Append the clauses from the outer landing pad instruction into the inlined
+ // landing pad instructions.
+ for (SmallPtrSet<LandingPadInst*, 16>::iterator I = InlinedLPads.begin(),
+ E = InlinedLPads.end(); I != E; ++I) {
+ LandingPadInst *InlinedLPad = *I;
+ for (unsigned OuterIdx = 0, OuterNum = OuterLPad->getNumClauses();
+ OuterIdx != OuterNum; ++OuterIdx)
+ InlinedLPad->addClause(OuterLPad->getClause(OuterIdx));
+ }
+}
+
+/// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into
+/// an invoke, we have to turn all of the calls that can throw into
+/// invokes. This function analyze BB to see if there are any calls, and if so,
+/// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI
+/// nodes in that block with the values specified in InvokeDestPHIValues.
+///
+/// Returns true to indicate that the next block should be skipped.
+static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
+ InvokeInliningInfo &Invoke) {
+ LandingPadInst *LPI = Invoke.getLandingPadInst();
+
+ for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
+ Instruction *I = BBI++;
+
+ if (LandingPadInst *L = dyn_cast<LandingPadInst>(I)) {
+ unsigned NumClauses = LPI->getNumClauses();
+ L->reserveClauses(NumClauses);
+ for (unsigned i = 0; i != NumClauses; ++i)
+ L->addClause(LPI->getClause(i));
+ }
+
+ // We only need to check for function calls: inlined invoke
+ // instructions require no special handling.
+ CallInst *CI = dyn_cast<CallInst>(I);
+
+ // If this call cannot unwind, don't convert it to an invoke.
+ // Inline asm calls cannot throw.
+ if (!CI || CI->doesNotThrow() || isa<InlineAsm>(CI->getCalledValue()))
+ continue;
+
+ // Convert this function call into an invoke instruction. First, split the
+ // basic block.
+ BasicBlock *Split = BB->splitBasicBlock(CI, CI->getName()+".noexc");
+
+ // Delete the unconditional branch inserted by splitBasicBlock
+ BB->getInstList().pop_back();
+
+ // Create the new invoke instruction.
+ ImmutableCallSite CS(CI);
+ SmallVector<Value*, 8> InvokeArgs(CS.arg_begin(), CS.arg_end());
+ InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), Split,
+ Invoke.getOuterResumeDest(),
+ InvokeArgs, CI->getName(), BB);
+ II->setCallingConv(CI->getCallingConv());
+ II->setAttributes(CI->getAttributes());
+
+ // Make sure that anything using the call now uses the invoke! This also
+ // updates the CallGraph if present, because it uses a WeakVH.
+ CI->replaceAllUsesWith(II);
+
+ // Delete the original call
+ Split->getInstList().pop_front();
+
+ // Update any PHI nodes in the exceptional block to indicate that there is
+ // now a new entry in them.
+ Invoke.addIncomingPHIValuesFor(BB);
+ return false;
+ }
+
+ return false;
+}
+
+/// HandleInlinedInvoke - If we inlined an invoke site, we need to convert calls
+/// in the body of the inlined function into invokes.
+///
+/// II is the invoke instruction being inlined. FirstNewBlock is the first
+/// block of the inlined code (the last block is the end of the function),
+/// and InlineCodeInfo is information about the code that got inlined.
+static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
+ ClonedCodeInfo &InlinedCodeInfo) {
+ BasicBlock *InvokeDest = II->getUnwindDest();
+
+ Function *Caller = FirstNewBlock->getParent();
+
+ // The inlined code is currently at the end of the function, scan from the
+ // start of the inlined code to its end, checking for stuff we need to
+ // rewrite.
+ InvokeInliningInfo Invoke(II);
+
+ // Get all of the inlined landing pad instructions.
+ SmallPtrSet<LandingPadInst*, 16> InlinedLPads;
+ for (Function::iterator I = FirstNewBlock, E = Caller->end(); I != E; ++I)
+ if (InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator()))
+ InlinedLPads.insert(II->getLandingPadInst());
+
+ for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){
+ if (InlinedCodeInfo.ContainsCalls)
+ if (HandleCallsInBlockInlinedThroughInvoke(BB, Invoke)) {
+ // Honor a request to skip the next block.
+ ++BB;
+ continue;
+ }
+
+ // Forward any resumes that are remaining here.
+ if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator()))
+ Invoke.forwardResume(RI, InlinedLPads);
+ }
+
+ // Now that everything is happy, we have one final detail. The PHI nodes in
+ // the exception destination block still have entries due to the original
+ // invoke instruction. Eliminate these entries (which might even delete the
+ // PHI node) now.
+ InvokeDest->removePredecessor(II->getParent());
+}
+
+/// UpdateCallGraphAfterInlining - Once we have cloned code over from a callee
+/// into the caller, update the specified callgraph to reflect the changes we
+/// made. Note that it's possible that not all code was copied over, so only
+/// some edges of the callgraph may remain.
+static void UpdateCallGraphAfterInlining(CallSite CS,
+ Function::iterator FirstNewBlock,
+ ValueToValueMapTy &VMap,
+ InlineFunctionInfo &IFI) {
+ CallGraph &CG = *IFI.CG;
+ const Function *Caller = CS.getInstruction()->getParent()->getParent();
+ const Function *Callee = CS.getCalledFunction();
+ CallGraphNode *CalleeNode = CG[Callee];
+ CallGraphNode *CallerNode = CG[Caller];
+
+ // Since we inlined some uninlined call sites in the callee into the caller,
+ // add edges from the caller to all of the callees of the callee.
+ CallGraphNode::iterator I = CalleeNode->begin(), E = CalleeNode->end();
+
+ // Consider the case where CalleeNode == CallerNode.
+ CallGraphNode::CalledFunctionsVector CallCache;
+ if (CalleeNode == CallerNode) {
+ CallCache.assign(I, E);
+ I = CallCache.begin();
+ E = CallCache.end();
+ }
+
+ for (; I != E; ++I) {
+ const Value *OrigCall = I->first;
+
+ ValueToValueMapTy::iterator VMI = VMap.find(OrigCall);
+ // Only copy the edge if the call was inlined!
+ if (VMI == VMap.end() || VMI->second == 0)
+ continue;
+
+ // If the call was inlined, but then constant folded, there is no edge to
+ // add. Check for this case.
+ Instruction *NewCall = dyn_cast<Instruction>(VMI->second);
+ if (NewCall == 0) continue;
+
+ // Remember that this call site got inlined for the client of
+ // InlineFunction.
+ IFI.InlinedCalls.push_back(NewCall);
+
+ // It's possible that inlining the callsite will cause it to go from an
+ // indirect to a direct call by resolving a function pointer. If this
+ // happens, set the callee of the new call site to a more precise
+ // destination. This can also happen if the call graph node of the caller
+ // was just unnecessarily imprecise.
+ if (I->second->getFunction() == 0)
+ if (Function *F = CallSite(NewCall).getCalledFunction()) {
+ // Indirect call site resolved to direct call.
+ CallerNode->addCalledFunction(CallSite(NewCall), CG[F]);
+
+ continue;
+ }
+
+ CallerNode->addCalledFunction(CallSite(NewCall), I->second);
+ }
+
+ // Update the call graph by deleting the edge from Callee to Caller. We must
+ // do this after the loop above in case Caller and Callee are the same.
+ CallerNode->removeCallEdgeFor(CS);
+}
+
+/// HandleByValArgument - When inlining a call site that has a byval argument,
+/// we have to make the implicit memcpy explicit by adding it.
+static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
+ const Function *CalledFunc,
+ InlineFunctionInfo &IFI,
+ unsigned ByValAlignment) {
+ Type *AggTy = cast<PointerType>(Arg->getType())->getElementType();
+
+ // If the called function is readonly, then it could not mutate the caller's
+ // copy of the byval'd memory. In this case, it is safe to elide the copy and
+ // temporary.
+ if (CalledFunc->onlyReadsMemory()) {
+ // If the byval argument has a specified alignment that is greater than the
+ // passed in pointer, then we either have to round up the input pointer or
+ // give up on this transformation.
+ if (ByValAlignment <= 1) // 0 = unspecified, 1 = no particular alignment.
+ return Arg;
+
+ // If the pointer is already known to be sufficiently aligned, or if we can
+ // round it up to a larger alignment, then we don't need a temporary.
+ if (getOrEnforceKnownAlignment(Arg, ByValAlignment,
+ IFI.TD) >= ByValAlignment)
+ return Arg;
+
+ // Otherwise, we have to make a memcpy to get a safe alignment. This is bad
+ // for code quality, but rarely happens and is required for correctness.
+ }
+
+ LLVMContext &Context = Arg->getContext();
+
+ Type *VoidPtrTy = Type::getInt8PtrTy(Context);
+
+ // Create the alloca. If we have DataLayout, use nice alignment.
+ unsigned Align = 1;
+ if (IFI.TD)
+ Align = IFI.TD->getPrefTypeAlignment(AggTy);
+
+ // If the byval had an alignment specified, we *must* use at least that
+ // alignment, as it is required by the byval argument (and uses of the
+ // pointer inside the callee).
+ Align = std::max(Align, ByValAlignment);
+
+ Function *Caller = TheCall->getParent()->getParent();
+
+ Value *NewAlloca = new AllocaInst(AggTy, 0, Align, Arg->getName(),
+ &*Caller->begin()->begin());
+ // Emit a memcpy.
+ Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)};
+ Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(),
+ Intrinsic::memcpy,
+ Tys);
+ Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall);
+ Value *SrcCast = new BitCastInst(Arg, VoidPtrTy, "tmp", TheCall);
+
+ Value *Size;
+ if (IFI.TD == 0)
+ Size = ConstantExpr::getSizeOf(AggTy);
+ else
+ Size = ConstantInt::get(Type::getInt64Ty(Context),
+ IFI.TD->getTypeStoreSize(AggTy));
+
+ // Always generate a memcpy of alignment 1 here because we don't know
+ // the alignment of the src pointer. Other optimizations can infer
+ // better alignment.
+ Value *CallArgs[] = {
+ DestCast, SrcCast, Size,
+ ConstantInt::get(Type::getInt32Ty(Context), 1),
+ ConstantInt::getFalse(Context) // isVolatile
+ };
+ IRBuilder<>(TheCall).CreateCall(MemCpyFn, CallArgs);
+
+ // Uses of the argument in the function should use our new alloca
+ // instead.
+ return NewAlloca;
+}
+
+// isUsedByLifetimeMarker - Check whether this Value is used by a lifetime
+// intrinsic.
+static bool isUsedByLifetimeMarker(Value *V) {
+ for (Value::use_iterator UI = V->use_begin(), UE = V->use_end(); UI != UE;
+ ++UI) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(*UI)) {
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+// hasLifetimeMarkers - Check whether the given alloca already has
+// lifetime.start or lifetime.end intrinsics.
+static bool hasLifetimeMarkers(AllocaInst *AI) {
+ Type *Int8PtrTy = Type::getInt8PtrTy(AI->getType()->getContext());
+ if (AI->getType() == Int8PtrTy)
+ return isUsedByLifetimeMarker(AI);
+
+ // Do a scan to find all the casts to i8*.
+ for (Value::use_iterator I = AI->use_begin(), E = AI->use_end(); I != E;
+ ++I) {
+ if (I->getType() != Int8PtrTy) continue;
+ if (I->stripPointerCasts() != AI) continue;
+ if (isUsedByLifetimeMarker(*I))
+ return true;
+ }
+ return false;
+}
+
+/// updateInlinedAtInfo - Helper function used by fixupLineNumbers to
+/// recursively update InlinedAtEntry of a DebugLoc.
+static DebugLoc updateInlinedAtInfo(const DebugLoc &DL,
+ const DebugLoc &InlinedAtDL,
+ LLVMContext &Ctx) {
+ if (MDNode *IA = DL.getInlinedAt(Ctx)) {
+ DebugLoc NewInlinedAtDL
+ = updateInlinedAtInfo(DebugLoc::getFromDILocation(IA), InlinedAtDL, Ctx);
+ return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx),
+ NewInlinedAtDL.getAsMDNode(Ctx));
+ }
+
+ return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx),
+ InlinedAtDL.getAsMDNode(Ctx));
+}
+
+/// fixupLineNumbers - Update inlined instructions' line numbers to
+/// to encode location where these instructions are inlined.
+static void fixupLineNumbers(Function *Fn, Function::iterator FI,
+ Instruction *TheCall) {
+ DebugLoc TheCallDL = TheCall->getDebugLoc();
+ if (TheCallDL.isUnknown())
+ return;
+
+ for (; FI != Fn->end(); ++FI) {
+ for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
+ BI != BE; ++BI) {
+ DebugLoc DL = BI->getDebugLoc();
+ if (!DL.isUnknown()) {
+ BI->setDebugLoc(updateInlinedAtInfo(DL, TheCallDL, BI->getContext()));
+ if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(BI)) {
+ LLVMContext &Ctx = BI->getContext();
+ MDNode *InlinedAt = BI->getDebugLoc().getInlinedAt(Ctx);
+ DVI->setOperand(2, createInlinedVariable(DVI->getVariable(),
+ InlinedAt, Ctx));
+ }
+ }
+ }
+ }
+}
+
+/// InlineFunction - This function inlines the called function into the basic
+/// block of the caller. This returns false if it is not possible to inline
+/// this call. The program is still in a well defined state if this occurs
+/// though.
+///
+/// Note that this only does one level of inlining. For example, if the
+/// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
+/// exists in the instruction stream. Similarly this will inline a recursive
+/// function by one level.
+bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
+ bool InsertLifetime) {
+ Instruction *TheCall = CS.getInstruction();
+ assert(TheCall->getParent() && TheCall->getParent()->getParent() &&
+ "Instruction not in function!");
+
+ // If IFI has any state in it, zap it before we fill it in.
+ IFI.reset();
+
+ const Function *CalledFunc = CS.getCalledFunction();
+ if (CalledFunc == 0 || // Can't inline external function or indirect
+ CalledFunc->isDeclaration() || // call, or call to a vararg function!
+ CalledFunc->getFunctionType()->isVarArg()) return false;
+
+ // If the call to the callee is not a tail call, we must clear the 'tail'
+ // flags on any calls that we inline.
+ bool MustClearTailCallFlags =
+ !(isa<CallInst>(TheCall) && cast<CallInst>(TheCall)->isTailCall());
+
+ // If the call to the callee cannot throw, set the 'nounwind' flag on any
+ // calls that we inline.
+ bool MarkNoUnwind = CS.doesNotThrow();
+
+ BasicBlock *OrigBB = TheCall->getParent();
+ Function *Caller = OrigBB->getParent();
+
+ // GC poses two hazards to inlining, which only occur when the callee has GC:
+ // 1. If the caller has no GC, then the callee's GC must be propagated to the
+ // caller.
+ // 2. If the caller has a differing GC, it is invalid to inline.
+ if (CalledFunc->hasGC()) {
+ if (!Caller->hasGC())
+ Caller->setGC(CalledFunc->getGC());
+ else if (CalledFunc->getGC() != Caller->getGC())
+ return false;
+ }
+
+ // Get the personality function from the callee if it contains a landing pad.
+ Value *CalleePersonality = 0;
+ for (Function::const_iterator I = CalledFunc->begin(), E = CalledFunc->end();
+ I != E; ++I)
+ if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
+ const BasicBlock *BB = II->getUnwindDest();
+ const LandingPadInst *LP = BB->getLandingPadInst();
+ CalleePersonality = LP->getPersonalityFn();
+ break;
+ }
+
+ // Find the personality function used by the landing pads of the caller. If it
+ // exists, then check to see that it matches the personality function used in
+ // the callee.
+ if (CalleePersonality) {
+ for (Function::const_iterator I = Caller->begin(), E = Caller->end();
+ I != E; ++I)
+ if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
+ const BasicBlock *BB = II->getUnwindDest();
+ const LandingPadInst *LP = BB->getLandingPadInst();
+
+ // If the personality functions match, then we can perform the
+ // inlining. Otherwise, we can't inline.
+ // TODO: This isn't 100% true. Some personality functions are proper
+ // supersets of others and can be used in place of the other.
+ if (LP->getPersonalityFn() != CalleePersonality)
+ return false;
+
+ break;
+ }
+ }
+
+ // Get an iterator to the last basic block in the function, which will have
+ // the new function inlined after it.
+ Function::iterator LastBlock = &Caller->back();
+
+ // Make sure to capture all of the return instructions from the cloned
+ // function.
+ SmallVector<ReturnInst*, 8> Returns;
+ ClonedCodeInfo InlinedFunctionInfo;
+ Function::iterator FirstNewBlock;
+
+ { // Scope to destroy VMap after cloning.
+ ValueToValueMapTy VMap;
+
+ assert(CalledFunc->arg_size() == CS.arg_size() &&
+ "No varargs calls can be inlined!");
+
+ // Calculate the vector of arguments to pass into the function cloner, which
+ // matches up the formal to the actual argument values.
+ CallSite::arg_iterator AI = CS.arg_begin();
+ unsigned ArgNo = 0;
+ for (Function::const_arg_iterator I = CalledFunc->arg_begin(),
+ E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) {
+ Value *ActualArg = *AI;
+
+ // When byval arguments actually inlined, we need to make the copy implied
+ // by them explicit. However, we don't do this if the callee is readonly
+ // or readnone, because the copy would be unneeded: the callee doesn't
+ // modify the struct.
+ if (CS.isByValArgument(ArgNo)) {
+ ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI,
+ CalledFunc->getParamAlignment(ArgNo+1));
+
+ // Calls that we inline may use the new alloca, so we need to clear
+ // their 'tail' flags if HandleByValArgument introduced a new alloca and
+ // the callee has calls.
+ MustClearTailCallFlags |= ActualArg != *AI;
+ }
+
+ VMap[I] = ActualArg;
+ }
+
+ // We want the inliner to prune the code as it copies. We would LOVE to
+ // have no dead or constant instructions leftover after inlining occurs
+ // (which can happen, e.g., because an argument was constant), but we'll be
+ // happy with whatever the cloner can do.
+ CloneAndPruneFunctionInto(Caller, CalledFunc, VMap,
+ /*ModuleLevelChanges=*/false, Returns, ".i",
+ &InlinedFunctionInfo, IFI.TD, TheCall);
+
+ // Remember the first block that is newly cloned over.
+ FirstNewBlock = LastBlock; ++FirstNewBlock;
+
+ // Update the callgraph if requested.
+ if (IFI.CG)
+ UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI);
+
+ // Update inlined instructions' line number information.
+ fixupLineNumbers(Caller, FirstNewBlock, TheCall);
+ }
+
+ // If there are any alloca instructions in the block that used to be the entry
+ // block for the callee, move them to the entry block of the caller. First
+ // calculate which instruction they should be inserted before. We insert the
+ // instructions at the end of the current alloca list.
+ {
+ BasicBlock::iterator InsertPoint = Caller->begin()->begin();
+ for (BasicBlock::iterator I = FirstNewBlock->begin(),
+ E = FirstNewBlock->end(); I != E; ) {
+ AllocaInst *AI = dyn_cast<AllocaInst>(I++);
+ if (AI == 0) continue;
+
+ // If the alloca is now dead, remove it. This often occurs due to code
+ // specialization.
+ if (AI->use_empty()) {
+ AI->eraseFromParent();
+ continue;
+ }
+
+ if (!isa<Constant>(AI->getArraySize()))
+ continue;
+
+ // Keep track of the static allocas that we inline into the caller.
+ IFI.StaticAllocas.push_back(AI);
+
+ // Scan for the block of allocas that we can move over, and move them
+ // all at once.
+ while (isa<AllocaInst>(I) &&
+ isa<Constant>(cast<AllocaInst>(I)->getArraySize())) {
+ IFI.StaticAllocas.push_back(cast<AllocaInst>(I));
+ ++I;
+ }
+
+ // Transfer all of the allocas over in a block. Using splice means
+ // that the instructions aren't removed from the symbol table, then
+ // reinserted.
+ Caller->getEntryBlock().getInstList().splice(InsertPoint,
+ FirstNewBlock->getInstList(),
+ AI, I);
+ }
+ }
+
+ // Leave lifetime markers for the static alloca's, scoping them to the
+ // function we just inlined.
+ if (InsertLifetime && !IFI.StaticAllocas.empty()) {
+ IRBuilder<> builder(FirstNewBlock->begin());
+ for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) {
+ AllocaInst *AI = IFI.StaticAllocas[ai];
+
+ // If the alloca is already scoped to something smaller than the whole
+ // function then there's no need to add redundant, less accurate markers.
+ if (hasLifetimeMarkers(AI))
+ continue;
+
+ // Try to determine the size of the allocation.
+ ConstantInt *AllocaSize = 0;
+ if (ConstantInt *AIArraySize =
+ dyn_cast<ConstantInt>(AI->getArraySize())) {
+ if (IFI.TD) {
+ Type *AllocaType = AI->getAllocatedType();
+ uint64_t AllocaTypeSize = IFI.TD->getTypeAllocSize(AllocaType);
+ uint64_t AllocaArraySize = AIArraySize->getLimitedValue();
+ assert(AllocaArraySize > 0 && "array size of AllocaInst is zero");
+ // Check that array size doesn't saturate uint64_t and doesn't
+ // overflow when it's multiplied by type size.
+ if (AllocaArraySize != ~0ULL &&
+ UINT64_MAX / AllocaArraySize >= AllocaTypeSize) {
+ AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()),
+ AllocaArraySize * AllocaTypeSize);
+ }
+ }
+ }
+
+ builder.CreateLifetimeStart(AI, AllocaSize);
+ for (unsigned ri = 0, re = Returns.size(); ri != re; ++ri) {
+ IRBuilder<> builder(Returns[ri]);
+ builder.CreateLifetimeEnd(AI, AllocaSize);
+ }
+ }
+ }
+
+ // If the inlined code contained dynamic alloca instructions, wrap the inlined
+ // code with llvm.stacksave/llvm.stackrestore intrinsics.
+ if (InlinedFunctionInfo.ContainsDynamicAllocas) {
+ Module *M = Caller->getParent();
+ // Get the two intrinsics we care about.
+ Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave);
+ Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore);
+
+ // Insert the llvm.stacksave.
+ CallInst *SavedPtr = IRBuilder<>(FirstNewBlock, FirstNewBlock->begin())
+ .CreateCall(StackSave, "savedstack");
+
+ // Insert a call to llvm.stackrestore before any return instructions in the
+ // inlined function.
+ for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
+ IRBuilder<>(Returns[i]).CreateCall(StackRestore, SavedPtr);
+ }
+ }
+
+ // If we are inlining tail call instruction through a call site that isn't
+ // marked 'tail', we must remove the tail marker for any calls in the inlined
+ // code. Also, calls inlined through a 'nounwind' call site should be marked
+ // 'nounwind'.
+ if (InlinedFunctionInfo.ContainsCalls &&
+ (MustClearTailCallFlags || MarkNoUnwind)) {
+ for (Function::iterator BB = FirstNewBlock, E = Caller->end();
+ BB != E; ++BB)
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ if (MustClearTailCallFlags)
+ CI->setTailCall(false);
+ if (MarkNoUnwind)
+ CI->setDoesNotThrow();
+ }
+ }
+
+ // If we are inlining for an invoke instruction, we must make sure to rewrite
+ // any call instructions into invoke instructions.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
+ HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo);
+
+ // If we cloned in _exactly one_ basic block, and if that block ends in a
+ // return instruction, we splice the body of the inlined callee directly into
+ // the calling basic block.
+ if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) {
+ // Move all of the instructions right before the call.
+ OrigBB->getInstList().splice(TheCall, FirstNewBlock->getInstList(),
+ FirstNewBlock->begin(), FirstNewBlock->end());
+ // Remove the cloned basic block.
+ Caller->getBasicBlockList().pop_back();
+
+ // If the call site was an invoke instruction, add a branch to the normal
+ // destination.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
+ BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall);
+ NewBr->setDebugLoc(Returns[0]->getDebugLoc());
+ }
+
+ // If the return instruction returned a value, replace uses of the call with
+ // uses of the returned value.
+ if (!TheCall->use_empty()) {
+ ReturnInst *R = Returns[0];
+ if (TheCall == R->getReturnValue())
+ TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
+ else
+ TheCall->replaceAllUsesWith(R->getReturnValue());
+ }
+ // Since we are now done with the Call/Invoke, we can delete it.
+ TheCall->eraseFromParent();
+
+ // Since we are now done with the return instruction, delete it also.
+ Returns[0]->eraseFromParent();
+
+ // We are now done with the inlining.
+ return true;
+ }
+
+ // Otherwise, we have the normal case, of more than one block to inline or
+ // multiple return sites.
+
+ // We want to clone the entire callee function into the hole between the
+ // "starter" and "ender" blocks. How we accomplish this depends on whether
+ // this is an invoke instruction or a call instruction.
+ BasicBlock *AfterCallBB;
+ BranchInst *CreatedBranchToNormalDest = NULL;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
+
+ // Add an unconditional branch to make this look like the CallInst case...
+ CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), TheCall);
+
+ // Split the basic block. This guarantees that no PHI nodes will have to be
+ // updated due to new incoming edges, and make the invoke case more
+ // symmetric to the call case.
+ AfterCallBB = OrigBB->splitBasicBlock(CreatedBranchToNormalDest,
+ CalledFunc->getName()+".exit");
+
+ } else { // It's a call
+ // If this is a call instruction, we need to split the basic block that
+ // the call lives in.
+ //
+ AfterCallBB = OrigBB->splitBasicBlock(TheCall,
+ CalledFunc->getName()+".exit");
+ }
+
+ // Change the branch that used to go to AfterCallBB to branch to the first
+ // basic block of the inlined function.
+ //
+ TerminatorInst *Br = OrigBB->getTerminator();
+ assert(Br && Br->getOpcode() == Instruction::Br &&
+ "splitBasicBlock broken!");
+ Br->setOperand(0, FirstNewBlock);
+
+
+ // Now that the function is correct, make it a little bit nicer. In
+ // particular, move the basic blocks inserted from the end of the function
+ // into the space made by splitting the source basic block.
+ Caller->getBasicBlockList().splice(AfterCallBB, Caller->getBasicBlockList(),
+ FirstNewBlock, Caller->end());
+
+ // Handle all of the return instructions that we just cloned in, and eliminate
+ // any users of the original call/invoke instruction.
+ Type *RTy = CalledFunc->getReturnType();
+
+ PHINode *PHI = 0;
+ if (Returns.size() > 1) {
+ // The PHI node should go at the front of the new basic block to merge all
+ // possible incoming values.
+ if (!TheCall->use_empty()) {
+ PHI = PHINode::Create(RTy, Returns.size(), TheCall->getName(),
+ AfterCallBB->begin());
+ // Anything that used the result of the function call should now use the
+ // PHI node as their operand.
+ TheCall->replaceAllUsesWith(PHI);
+ }
+
+ // Loop over all of the return instructions adding entries to the PHI node
+ // as appropriate.
+ if (PHI) {
+ for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
+ ReturnInst *RI = Returns[i];
+ assert(RI->getReturnValue()->getType() == PHI->getType() &&
+ "Ret value not consistent in function!");
+ PHI->addIncoming(RI->getReturnValue(), RI->getParent());
+ }
+ }
+
+
+ // Add a branch to the merge points and remove return instructions.
+ DebugLoc Loc;
+ for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
+ ReturnInst *RI = Returns[i];
+ BranchInst* BI = BranchInst::Create(AfterCallBB, RI);
+ Loc = RI->getDebugLoc();
+ BI->setDebugLoc(Loc);
+ RI->eraseFromParent();
+ }
+ // We need to set the debug location to *somewhere* inside the
+ // inlined function. The line number may be nonsensical, but the
+ // instruction will at least be associated with the right
+ // function.
+ if (CreatedBranchToNormalDest)
+ CreatedBranchToNormalDest->setDebugLoc(Loc);
+ } else if (!Returns.empty()) {
+ // Otherwise, if there is exactly one return value, just replace anything
+ // using the return value of the call with the computed value.
+ if (!TheCall->use_empty()) {
+ if (TheCall == Returns[0]->getReturnValue())
+ TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
+ else
+ TheCall->replaceAllUsesWith(Returns[0]->getReturnValue());
+ }
+
+ // Update PHI nodes that use the ReturnBB to use the AfterCallBB.
+ BasicBlock *ReturnBB = Returns[0]->getParent();
+ ReturnBB->replaceAllUsesWith(AfterCallBB);
+
+ // Splice the code from the return block into the block that it will return
+ // to, which contains the code that was after the call.
+ AfterCallBB->getInstList().splice(AfterCallBB->begin(),
+ ReturnBB->getInstList());
+
+ if (CreatedBranchToNormalDest)
+ CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc());
+
+ // Delete the return instruction now and empty ReturnBB now.
+ Returns[0]->eraseFromParent();
+ ReturnBB->eraseFromParent();
+ } else if (!TheCall->use_empty()) {
+ // No returns, but something is using the return value of the call. Just
+ // nuke the result.
+ TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
+ }
+
+ // Since we are now done with the Call/Invoke, we can delete it.
+ TheCall->eraseFromParent();
+
+ // We should always be able to fold the entry block of the function into the
+ // single predecessor of the block...
+ assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!");
+ BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0);
+
+ // Splice the code entry block into calling block, right before the
+ // unconditional branch.
+ CalleeEntry->replaceAllUsesWith(OrigBB); // Update PHI nodes
+ OrigBB->getInstList().splice(Br, CalleeEntry->getInstList());
+
+ // Remove the unconditional branch.
+ OrigBB->getInstList().erase(Br);
+
+ // Now we can remove the CalleeEntry block, which is now empty.
+ Caller->getBasicBlockList().erase(CalleeEntry);
+
+ // If we inserted a phi node, check to see if it has a single value (e.g. all
+ // the entries are the same or undef). If so, remove the PHI so it doesn't
+ // block other optimizations.
+ if (PHI) {
+ if (Value *V = SimplifyInstruction(PHI, IFI.TD)) {
+ PHI->replaceAllUsesWith(V);
+ PHI->eraseFromParent();
+ }
+ }
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp b/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp
new file mode 100644
index 000000000000..a020bc7398f5
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp
@@ -0,0 +1,64 @@
+//===- InstructionNamer.cpp - Give anonymous instructions names -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a little utility pass that gives instructions names, this is mostly
+// useful when diffing the effect of an optimization because deleting an
+// unnamed instruction can change all other instruction numbering, making the
+// diff very noisy.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+
+namespace {
+ struct InstNamer : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ InstNamer() : FunctionPass(ID) {
+ initializeInstNamerPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &Info) const {
+ Info.setPreservesAll();
+ }
+
+ bool runOnFunction(Function &F) {
+ for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end();
+ AI != AE; ++AI)
+ if (!AI->hasName() && !AI->getType()->isVoidTy())
+ AI->setName("arg");
+
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (!BB->hasName())
+ BB->setName("bb");
+
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (!I->hasName() && !I->getType()->isVoidTy())
+ I->setName("tmp");
+ }
+ return true;
+ }
+ };
+
+ char InstNamer::ID = 0;
+}
+
+INITIALIZE_PASS(InstNamer, "instnamer",
+ "Assign names to anonymous instructions", false, false)
+char &llvm::InstructionNamerID = InstNamer::ID;
+//===----------------------------------------------------------------------===//
+//
+// InstructionNamer - Give any unnamed non-void instructions "tmp" names.
+//
+FunctionPass *llvm::createInstructionNamerPass() {
+ return new InstNamer();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp b/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp
new file mode 100644
index 000000000000..3cb8ded8506a
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp
@@ -0,0 +1,524 @@
+//===-- IntegerDivision.cpp - Expand integer division ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains an implementation of 32bit scalar integer division for
+// targets that don't have native support. It's largely derived from
+// compiler-rt's implementation of __udivsi3, but hand-tuned to reduce the
+// amount of control flow
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "integer-division"
+#include "llvm/Transforms/Utils/IntegerDivision.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+
+using namespace llvm;
+
+/// Generate code to compute the remainder of two signed integers. Returns the
+/// remainder, which will have the sign of the dividend. Builder's insert point
+/// should be pointing where the caller wants code generated, e.g. at the srem
+/// instruction. This will generate a urem in the process, and Builder's insert
+/// point will be pointing at the uren (if present, i.e. not folded), ready to
+/// be expanded if the user wishes
+static Value *generateSignedRemainderCode(Value *Dividend, Value *Divisor,
+ IRBuilder<> &Builder) {
+ ConstantInt *ThirtyOne = Builder.getInt32(31);
+
+ // ; %dividend_sgn = ashr i32 %dividend, 31
+ // ; %divisor_sgn = ashr i32 %divisor, 31
+ // ; %dvd_xor = xor i32 %dividend, %dividend_sgn
+ // ; %dvs_xor = xor i32 %divisor, %divisor_sgn
+ // ; %u_dividend = sub i32 %dvd_xor, %dividend_sgn
+ // ; %u_divisor = sub i32 %dvs_xor, %divisor_sgn
+ // ; %urem = urem i32 %dividend, %divisor
+ // ; %xored = xor i32 %urem, %dividend_sgn
+ // ; %srem = sub i32 %xored, %dividend_sgn
+ Value *DividendSign = Builder.CreateAShr(Dividend, ThirtyOne);
+ Value *DivisorSign = Builder.CreateAShr(Divisor, ThirtyOne);
+ Value *DvdXor = Builder.CreateXor(Dividend, DividendSign);
+ Value *DvsXor = Builder.CreateXor(Divisor, DivisorSign);
+ Value *UDividend = Builder.CreateSub(DvdXor, DividendSign);
+ Value *UDivisor = Builder.CreateSub(DvsXor, DivisorSign);
+ Value *URem = Builder.CreateURem(UDividend, UDivisor);
+ Value *Xored = Builder.CreateXor(URem, DividendSign);
+ Value *SRem = Builder.CreateSub(Xored, DividendSign);
+
+ if (Instruction *URemInst = dyn_cast<Instruction>(URem))
+ Builder.SetInsertPoint(URemInst);
+
+ return SRem;
+}
+
+
+/// Generate code to compute the remainder of two unsigned integers. Returns the
+/// remainder. Builder's insert point should be pointing where the caller wants
+/// code generated, e.g. at the urem instruction. This will generate a udiv in
+/// the process, and Builder's insert point will be pointing at the udiv (if
+/// present, i.e. not folded), ready to be expanded if the user wishes
+static Value *generatedUnsignedRemainderCode(Value *Dividend, Value *Divisor,
+ IRBuilder<> &Builder) {
+ // Remainder = Dividend - Quotient*Divisor
+
+ // ; %quotient = udiv i32 %dividend, %divisor
+ // ; %product = mul i32 %divisor, %quotient
+ // ; %remainder = sub i32 %dividend, %product
+ Value *Quotient = Builder.CreateUDiv(Dividend, Divisor);
+ Value *Product = Builder.CreateMul(Divisor, Quotient);
+ Value *Remainder = Builder.CreateSub(Dividend, Product);
+
+ if (Instruction *UDiv = dyn_cast<Instruction>(Quotient))
+ Builder.SetInsertPoint(UDiv);
+
+ return Remainder;
+}
+
+/// Generate code to divide two signed integers. Returns the quotient, rounded
+/// towards 0. Builder's insert point should be pointing where the caller wants
+/// code generated, e.g. at the sdiv instruction. This will generate a udiv in
+/// the process, and Builder's insert point will be pointing at the udiv (if
+/// present, i.e. not folded), ready to be expanded if the user wishes.
+static Value *generateSignedDivisionCode(Value *Dividend, Value *Divisor,
+ IRBuilder<> &Builder) {
+ // Implementation taken from compiler-rt's __divsi3
+
+ ConstantInt *ThirtyOne = Builder.getInt32(31);
+
+ // ; %tmp = ashr i32 %dividend, 31
+ // ; %tmp1 = ashr i32 %divisor, 31
+ // ; %tmp2 = xor i32 %tmp, %dividend
+ // ; %u_dvnd = sub nsw i32 %tmp2, %tmp
+ // ; %tmp3 = xor i32 %tmp1, %divisor
+ // ; %u_dvsr = sub nsw i32 %tmp3, %tmp1
+ // ; %q_sgn = xor i32 %tmp1, %tmp
+ // ; %q_mag = udiv i32 %u_dvnd, %u_dvsr
+ // ; %tmp4 = xor i32 %q_mag, %q_sgn
+ // ; %q = sub i32 %tmp4, %q_sgn
+ Value *Tmp = Builder.CreateAShr(Dividend, ThirtyOne);
+ Value *Tmp1 = Builder.CreateAShr(Divisor, ThirtyOne);
+ Value *Tmp2 = Builder.CreateXor(Tmp, Dividend);
+ Value *U_Dvnd = Builder.CreateSub(Tmp2, Tmp);
+ Value *Tmp3 = Builder.CreateXor(Tmp1, Divisor);
+ Value *U_Dvsr = Builder.CreateSub(Tmp3, Tmp1);
+ Value *Q_Sgn = Builder.CreateXor(Tmp1, Tmp);
+ Value *Q_Mag = Builder.CreateUDiv(U_Dvnd, U_Dvsr);
+ Value *Tmp4 = Builder.CreateXor(Q_Mag, Q_Sgn);
+ Value *Q = Builder.CreateSub(Tmp4, Q_Sgn);
+
+ if (Instruction *UDiv = dyn_cast<Instruction>(Q_Mag))
+ Builder.SetInsertPoint(UDiv);
+
+ return Q;
+}
+
+/// Generates code to divide two unsigned scalar 32-bit integers. Returns the
+/// quotient, rounded towards 0. Builder's insert point should be pointing where
+/// the caller wants code generated, e.g. at the udiv instruction.
+static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
+ IRBuilder<> &Builder) {
+ // The basic algorithm can be found in the compiler-rt project's
+ // implementation of __udivsi3.c. Here, we do a lower-level IR based approach
+ // that's been hand-tuned to lessen the amount of control flow involved.
+
+ // Some helper values
+ IntegerType *I32Ty = Builder.getInt32Ty();
+
+ ConstantInt *Zero = Builder.getInt32(0);
+ ConstantInt *One = Builder.getInt32(1);
+ ConstantInt *ThirtyOne = Builder.getInt32(31);
+ ConstantInt *NegOne = ConstantInt::getSigned(I32Ty, -1);
+ ConstantInt *True = Builder.getTrue();
+
+ BasicBlock *IBB = Builder.GetInsertBlock();
+ Function *F = IBB->getParent();
+ Function *CTLZi32 = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
+ I32Ty);
+
+ // Our CFG is going to look like:
+ // +---------------------+
+ // | special-cases |
+ // | ... |
+ // +---------------------+
+ // | |
+ // | +----------+
+ // | | bb1 |
+ // | | ... |
+ // | +----------+
+ // | | |
+ // | | +------------+
+ // | | | preheader |
+ // | | | ... |
+ // | | +------------+
+ // | | |
+ // | | | +---+
+ // | | | | |
+ // | | +------------+ |
+ // | | | do-while | |
+ // | | | ... | |
+ // | | +------------+ |
+ // | | | | |
+ // | +-----------+ +---+
+ // | | loop-exit |
+ // | | ... |
+ // | +-----------+
+ // | |
+ // +-------+
+ // | ... |
+ // | end |
+ // +-------+
+ BasicBlock *SpecialCases = Builder.GetInsertBlock();
+ SpecialCases->setName(Twine(SpecialCases->getName(), "_udiv-special-cases"));
+ BasicBlock *End = SpecialCases->splitBasicBlock(Builder.GetInsertPoint(),
+ "udiv-end");
+ BasicBlock *LoopExit = BasicBlock::Create(Builder.getContext(),
+ "udiv-loop-exit", F, End);
+ BasicBlock *DoWhile = BasicBlock::Create(Builder.getContext(),
+ "udiv-do-while", F, End);
+ BasicBlock *Preheader = BasicBlock::Create(Builder.getContext(),
+ "udiv-preheader", F, End);
+ BasicBlock *BB1 = BasicBlock::Create(Builder.getContext(),
+ "udiv-bb1", F, End);
+
+ // We'll be overwriting the terminator to insert our extra blocks
+ SpecialCases->getTerminator()->eraseFromParent();
+
+ // First off, check for special cases: dividend or divisor is zero, divisor
+ // is greater than dividend, and divisor is 1.
+ // ; special-cases:
+ // ; %ret0_1 = icmp eq i32 %divisor, 0
+ // ; %ret0_2 = icmp eq i32 %dividend, 0
+ // ; %ret0_3 = or i1 %ret0_1, %ret0_2
+ // ; %tmp0 = tail call i32 @llvm.ctlz.i32(i32 %divisor, i1 true)
+ // ; %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %dividend, i1 true)
+ // ; %sr = sub nsw i32 %tmp0, %tmp1
+ // ; %ret0_4 = icmp ugt i32 %sr, 31
+ // ; %ret0 = or i1 %ret0_3, %ret0_4
+ // ; %retDividend = icmp eq i32 %sr, 31
+ // ; %retVal = select i1 %ret0, i32 0, i32 %dividend
+ // ; %earlyRet = or i1 %ret0, %retDividend
+ // ; br i1 %earlyRet, label %end, label %bb1
+ Builder.SetInsertPoint(SpecialCases);
+ Value *Ret0_1 = Builder.CreateICmpEQ(Divisor, Zero);
+ Value *Ret0_2 = Builder.CreateICmpEQ(Dividend, Zero);
+ Value *Ret0_3 = Builder.CreateOr(Ret0_1, Ret0_2);
+ Value *Tmp0 = Builder.CreateCall2(CTLZi32, Divisor, True);
+ Value *Tmp1 = Builder.CreateCall2(CTLZi32, Dividend, True);
+ Value *SR = Builder.CreateSub(Tmp0, Tmp1);
+ Value *Ret0_4 = Builder.CreateICmpUGT(SR, ThirtyOne);
+ Value *Ret0 = Builder.CreateOr(Ret0_3, Ret0_4);
+ Value *RetDividend = Builder.CreateICmpEQ(SR, ThirtyOne);
+ Value *RetVal = Builder.CreateSelect(Ret0, Zero, Dividend);
+ Value *EarlyRet = Builder.CreateOr(Ret0, RetDividend);
+ Builder.CreateCondBr(EarlyRet, End, BB1);
+
+ // ; bb1: ; preds = %special-cases
+ // ; %sr_1 = add i32 %sr, 1
+ // ; %tmp2 = sub i32 31, %sr
+ // ; %q = shl i32 %dividend, %tmp2
+ // ; %skipLoop = icmp eq i32 %sr_1, 0
+ // ; br i1 %skipLoop, label %loop-exit, label %preheader
+ Builder.SetInsertPoint(BB1);
+ Value *SR_1 = Builder.CreateAdd(SR, One);
+ Value *Tmp2 = Builder.CreateSub(ThirtyOne, SR);
+ Value *Q = Builder.CreateShl(Dividend, Tmp2);
+ Value *SkipLoop = Builder.CreateICmpEQ(SR_1, Zero);
+ Builder.CreateCondBr(SkipLoop, LoopExit, Preheader);
+
+ // ; preheader: ; preds = %bb1
+ // ; %tmp3 = lshr i32 %dividend, %sr_1
+ // ; %tmp4 = add i32 %divisor, -1
+ // ; br label %do-while
+ Builder.SetInsertPoint(Preheader);
+ Value *Tmp3 = Builder.CreateLShr(Dividend, SR_1);
+ Value *Tmp4 = Builder.CreateAdd(Divisor, NegOne);
+ Builder.CreateBr(DoWhile);
+
+ // ; do-while: ; preds = %do-while, %preheader
+ // ; %carry_1 = phi i32 [ 0, %preheader ], [ %carry, %do-while ]
+ // ; %sr_3 = phi i32 [ %sr_1, %preheader ], [ %sr_2, %do-while ]
+ // ; %r_1 = phi i32 [ %tmp3, %preheader ], [ %r, %do-while ]
+ // ; %q_2 = phi i32 [ %q, %preheader ], [ %q_1, %do-while ]
+ // ; %tmp5 = shl i32 %r_1, 1
+ // ; %tmp6 = lshr i32 %q_2, 31
+ // ; %tmp7 = or i32 %tmp5, %tmp6
+ // ; %tmp8 = shl i32 %q_2, 1
+ // ; %q_1 = or i32 %carry_1, %tmp8
+ // ; %tmp9 = sub i32 %tmp4, %tmp7
+ // ; %tmp10 = ashr i32 %tmp9, 31
+ // ; %carry = and i32 %tmp10, 1
+ // ; %tmp11 = and i32 %tmp10, %divisor
+ // ; %r = sub i32 %tmp7, %tmp11
+ // ; %sr_2 = add i32 %sr_3, -1
+ // ; %tmp12 = icmp eq i32 %sr_2, 0
+ // ; br i1 %tmp12, label %loop-exit, label %do-while
+ Builder.SetInsertPoint(DoWhile);
+ PHINode *Carry_1 = Builder.CreatePHI(I32Ty, 2);
+ PHINode *SR_3 = Builder.CreatePHI(I32Ty, 2);
+ PHINode *R_1 = Builder.CreatePHI(I32Ty, 2);
+ PHINode *Q_2 = Builder.CreatePHI(I32Ty, 2);
+ Value *Tmp5 = Builder.CreateShl(R_1, One);
+ Value *Tmp6 = Builder.CreateLShr(Q_2, ThirtyOne);
+ Value *Tmp7 = Builder.CreateOr(Tmp5, Tmp6);
+ Value *Tmp8 = Builder.CreateShl(Q_2, One);
+ Value *Q_1 = Builder.CreateOr(Carry_1, Tmp8);
+ Value *Tmp9 = Builder.CreateSub(Tmp4, Tmp7);
+ Value *Tmp10 = Builder.CreateAShr(Tmp9, 31);
+ Value *Carry = Builder.CreateAnd(Tmp10, One);
+ Value *Tmp11 = Builder.CreateAnd(Tmp10, Divisor);
+ Value *R = Builder.CreateSub(Tmp7, Tmp11);
+ Value *SR_2 = Builder.CreateAdd(SR_3, NegOne);
+ Value *Tmp12 = Builder.CreateICmpEQ(SR_2, Zero);
+ Builder.CreateCondBr(Tmp12, LoopExit, DoWhile);
+
+ // ; loop-exit: ; preds = %do-while, %bb1
+ // ; %carry_2 = phi i32 [ 0, %bb1 ], [ %carry, %do-while ]
+ // ; %q_3 = phi i32 [ %q, %bb1 ], [ %q_1, %do-while ]
+ // ; %tmp13 = shl i32 %q_3, 1
+ // ; %q_4 = or i32 %carry_2, %tmp13
+ // ; br label %end
+ Builder.SetInsertPoint(LoopExit);
+ PHINode *Carry_2 = Builder.CreatePHI(I32Ty, 2);
+ PHINode *Q_3 = Builder.CreatePHI(I32Ty, 2);
+ Value *Tmp13 = Builder.CreateShl(Q_3, One);
+ Value *Q_4 = Builder.CreateOr(Carry_2, Tmp13);
+ Builder.CreateBr(End);
+
+ // ; end: ; preds = %loop-exit, %special-cases
+ // ; %q_5 = phi i32 [ %q_4, %loop-exit ], [ %retVal, %special-cases ]
+ // ; ret i32 %q_5
+ Builder.SetInsertPoint(End, End->begin());
+ PHINode *Q_5 = Builder.CreatePHI(I32Ty, 2);
+
+ // Populate the Phis, since all values have now been created. Our Phis were:
+ // ; %carry_1 = phi i32 [ 0, %preheader ], [ %carry, %do-while ]
+ Carry_1->addIncoming(Zero, Preheader);
+ Carry_1->addIncoming(Carry, DoWhile);
+ // ; %sr_3 = phi i32 [ %sr_1, %preheader ], [ %sr_2, %do-while ]
+ SR_3->addIncoming(SR_1, Preheader);
+ SR_3->addIncoming(SR_2, DoWhile);
+ // ; %r_1 = phi i32 [ %tmp3, %preheader ], [ %r, %do-while ]
+ R_1->addIncoming(Tmp3, Preheader);
+ R_1->addIncoming(R, DoWhile);
+ // ; %q_2 = phi i32 [ %q, %preheader ], [ %q_1, %do-while ]
+ Q_2->addIncoming(Q, Preheader);
+ Q_2->addIncoming(Q_1, DoWhile);
+ // ; %carry_2 = phi i32 [ 0, %bb1 ], [ %carry, %do-while ]
+ Carry_2->addIncoming(Zero, BB1);
+ Carry_2->addIncoming(Carry, DoWhile);
+ // ; %q_3 = phi i32 [ %q, %bb1 ], [ %q_1, %do-while ]
+ Q_3->addIncoming(Q, BB1);
+ Q_3->addIncoming(Q_1, DoWhile);
+ // ; %q_5 = phi i32 [ %q_4, %loop-exit ], [ %retVal, %special-cases ]
+ Q_5->addIncoming(Q_4, LoopExit);
+ Q_5->addIncoming(RetVal, SpecialCases);
+
+ return Q_5;
+}
+
+/// Generate code to calculate the remainder of two integers, replacing Rem with
+/// the generated code. This currently generates code using the udiv expansion,
+/// but future work includes generating more specialized code, e.g. when more
+/// information about the operands are known. Currently only implements 32bit
+/// scalar division (due to udiv's limitation), but future work is removing this
+/// limitation.
+///
+/// @brief Replace Rem with generated code.
+bool llvm::expandRemainder(BinaryOperator *Rem) {
+ assert((Rem->getOpcode() == Instruction::SRem ||
+ Rem->getOpcode() == Instruction::URem) &&
+ "Trying to expand remainder from a non-remainder function");
+
+ IRBuilder<> Builder(Rem);
+
+ // First prepare the sign if it's a signed remainder
+ if (Rem->getOpcode() == Instruction::SRem) {
+ Value *Remainder = generateSignedRemainderCode(Rem->getOperand(0),
+ Rem->getOperand(1), Builder);
+
+ Rem->replaceAllUsesWith(Remainder);
+ Rem->dropAllReferences();
+ Rem->eraseFromParent();
+
+ // If we didn't actually generate a udiv instruction, we're done
+ BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint());
+ if (!BO || BO->getOpcode() != Instruction::URem)
+ return true;
+
+ Rem = BO;
+ }
+
+ Value *Remainder = generatedUnsignedRemainderCode(Rem->getOperand(0),
+ Rem->getOperand(1),
+ Builder);
+
+ Rem->replaceAllUsesWith(Remainder);
+ Rem->dropAllReferences();
+ Rem->eraseFromParent();
+
+ // Expand the udiv
+ if (BinaryOperator *UDiv = dyn_cast<BinaryOperator>(Builder.GetInsertPoint())) {
+ assert(UDiv->getOpcode() == Instruction::UDiv && "Non-udiv in expansion?");
+ expandDivision(UDiv);
+ }
+
+ return true;
+}
+
+
+/// Generate code to divide two integers, replacing Div with the generated
+/// code. This currently generates code similarly to compiler-rt's
+/// implementations, but future work includes generating more specialized code
+/// when more information about the operands are known. Currently only
+/// implements 32bit scalar division, but future work is removing this
+/// limitation.
+///
+/// @brief Replace Div with generated code.
+bool llvm::expandDivision(BinaryOperator *Div) {
+ assert((Div->getOpcode() == Instruction::SDiv ||
+ Div->getOpcode() == Instruction::UDiv) &&
+ "Trying to expand division from a non-division function");
+
+ IRBuilder<> Builder(Div);
+
+ if (Div->getType()->isVectorTy())
+ llvm_unreachable("Div over vectors not supported");
+
+ // First prepare the sign if it's a signed division
+ if (Div->getOpcode() == Instruction::SDiv) {
+ // Lower the code to unsigned division, and reset Div to point to the udiv.
+ Value *Quotient = generateSignedDivisionCode(Div->getOperand(0),
+ Div->getOperand(1), Builder);
+ Div->replaceAllUsesWith(Quotient);
+ Div->dropAllReferences();
+ Div->eraseFromParent();
+
+ // If we didn't actually generate a udiv instruction, we're done
+ BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint());
+ if (!BO || BO->getOpcode() != Instruction::UDiv)
+ return true;
+
+ Div = BO;
+ }
+
+ // Insert the unsigned division code
+ Value *Quotient = generateUnsignedDivisionCode(Div->getOperand(0),
+ Div->getOperand(1),
+ Builder);
+ Div->replaceAllUsesWith(Quotient);
+ Div->dropAllReferences();
+ Div->eraseFromParent();
+
+ return true;
+}
+
+/// Generate code to compute the remainder of two integers of bitwidth up to
+/// 32 bits. Uses the above routines and extends the inputs/truncates the
+/// outputs to operate in 32 bits; that is, these routines are good for targets
+/// that have no or very little suppport for smaller than 32 bit integer
+/// arithmetic.
+///
+/// @brief Replace Rem with emulation code.
+bool llvm::expandRemainderUpTo32Bits(BinaryOperator *Rem) {
+ assert((Rem->getOpcode() == Instruction::SRem ||
+ Rem->getOpcode() == Instruction::URem) &&
+ "Trying to expand remainder from a non-remainder function");
+
+ Type *RemTy = Rem->getType();
+ if (RemTy->isVectorTy())
+ llvm_unreachable("Div over vectors not supported");
+
+ unsigned RemTyBitWidth = RemTy->getIntegerBitWidth();
+
+ if (RemTyBitWidth > 32)
+ llvm_unreachable("Div of bitwidth greater than 32 not supported");
+
+ if (RemTyBitWidth == 32)
+ return expandRemainder(Rem);
+
+ // If bitwidth smaller than 32 extend inputs, truncate output and proceed
+ // with 32 bit division.
+ IRBuilder<> Builder(Rem);
+
+ Value *ExtDividend;
+ Value *ExtDivisor;
+ Value *ExtRem;
+ Value *Trunc;
+ Type *Int32Ty = Builder.getInt32Ty();
+
+ if (Rem->getOpcode() == Instruction::SRem) {
+ ExtDividend = Builder.CreateSExt(Rem->getOperand(0), Int32Ty);
+ ExtDivisor = Builder.CreateSExt(Rem->getOperand(1), Int32Ty);
+ ExtRem = Builder.CreateSRem(ExtDividend, ExtDivisor);
+ } else {
+ ExtDividend = Builder.CreateZExt(Rem->getOperand(0), Int32Ty);
+ ExtDivisor = Builder.CreateZExt(Rem->getOperand(1), Int32Ty);
+ ExtRem = Builder.CreateURem(ExtDividend, ExtDivisor);
+ }
+ Trunc = Builder.CreateTrunc(ExtRem, RemTy);
+
+ Rem->replaceAllUsesWith(Trunc);
+ Rem->dropAllReferences();
+ Rem->eraseFromParent();
+
+ return expandRemainder(cast<BinaryOperator>(ExtRem));
+}
+
+
+/// Generate code to divide two integers of bitwidth up to 32 bits. Uses the
+/// above routines and extends the inputs/truncates the outputs to operate
+/// in 32 bits; that is, these routines are good for targets that have no
+/// or very little support for smaller than 32 bit integer arithmetic.
+///
+/// @brief Replace Div with emulation code.
+bool llvm::expandDivisionUpTo32Bits(BinaryOperator *Div) {
+ assert((Div->getOpcode() == Instruction::SDiv ||
+ Div->getOpcode() == Instruction::UDiv) &&
+ "Trying to expand division from a non-division function");
+
+ Type *DivTy = Div->getType();
+ if (DivTy->isVectorTy())
+ llvm_unreachable("Div over vectors not supported");
+
+ unsigned DivTyBitWidth = DivTy->getIntegerBitWidth();
+
+ if (DivTyBitWidth > 32)
+ llvm_unreachable("Div of bitwidth greater than 32 not supported");
+
+ if (DivTyBitWidth == 32)
+ return expandDivision(Div);
+
+ // If bitwidth smaller than 32 extend inputs, truncate output and proceed
+ // with 32 bit division.
+ IRBuilder<> Builder(Div);
+
+ Value *ExtDividend;
+ Value *ExtDivisor;
+ Value *ExtDiv;
+ Value *Trunc;
+ Type *Int32Ty = Builder.getInt32Ty();
+
+ if (Div->getOpcode() == Instruction::SDiv) {
+ ExtDividend = Builder.CreateSExt(Div->getOperand(0), Int32Ty);
+ ExtDivisor = Builder.CreateSExt(Div->getOperand(1), Int32Ty);
+ ExtDiv = Builder.CreateSDiv(ExtDividend, ExtDivisor);
+ } else {
+ ExtDividend = Builder.CreateZExt(Div->getOperand(0), Int32Ty);
+ ExtDivisor = Builder.CreateZExt(Div->getOperand(1), Int32Ty);
+ ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor);
+ }
+ Trunc = Builder.CreateTrunc(ExtDiv, DivTy);
+
+ Div->replaceAllUsesWith(Trunc);
+ Div->dropAllReferences();
+ Div->eraseFromParent();
+
+ return expandDivision(cast<BinaryOperator>(ExtDiv));
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
new file mode 100644
index 000000000000..f15e8d59276b
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
@@ -0,0 +1,281 @@
+//===-- LCSSA.cpp - Convert loops into loop-closed SSA form ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass transforms loops by placing phi nodes at the end of the loops for
+// all values that are live across the loop boundary. For example, it turns
+// the left into the right code:
+//
+// for (...) for (...)
+// if (c) if (c)
+// X1 = ... X1 = ...
+// else else
+// X2 = ... X2 = ...
+// X3 = phi(X1, X2) X3 = phi(X1, X2)
+// ... = X3 + 4 X4 = phi(X3)
+// ... = X4 + 4
+//
+// This is still valid LLVM; the extra phi nodes are purely redundant, and will
+// be trivially eliminated by InstCombine. The major benefit of this
+// transformation is that it makes many other loop optimizations, such as
+// LoopUnswitching, simpler.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lcssa"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/PredIteratorCache.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+using namespace llvm;
+
+STATISTIC(NumLCSSA, "Number of live out of a loop variables");
+
+namespace {
+ struct LCSSA : public LoopPass {
+ static char ID; // Pass identification, replacement for typeid
+ LCSSA() : LoopPass(ID) {
+ initializeLCSSAPass(*PassRegistry::getPassRegistry());
+ }
+
+ // Cached analysis information for the current function.
+ DominatorTree *DT;
+ LoopInfo *LI;
+ ScalarEvolution *SE;
+ PredIteratorCache PredCache;
+ Loop *L;
+
+ virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+ /// This transformation requires natural loop information & requires that
+ /// loop preheaders be inserted into the CFG. It maintains both of these,
+ /// as well as the CFG. It also requires dominator information.
+ ///
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<LoopInfo>();
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addPreserved<ScalarEvolution>();
+ }
+ private:
+ bool ProcessInstruction(Instruction *Inst,
+ const SmallVectorImpl<BasicBlock*> &ExitBlocks);
+
+ /// verifyAnalysis() - Verify loop nest.
+ virtual void verifyAnalysis() const {
+ // Check the special guarantees that LCSSA makes.
+ assert(L->isLCSSAForm(*DT) && "LCSSA form not preserved!");
+ }
+ };
+}
+
+char LCSSA::ID = 0;
+INITIALIZE_PASS_BEGIN(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_END(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
+
+Pass *llvm::createLCSSAPass() { return new LCSSA(); }
+char &llvm::LCSSAID = LCSSA::ID;
+
+
+/// BlockDominatesAnExit - Return true if the specified block dominates at least
+/// one of the blocks in the specified list.
+static bool BlockDominatesAnExit(BasicBlock *BB,
+ const SmallVectorImpl<BasicBlock*> &ExitBlocks,
+ DominatorTree *DT) {
+ DomTreeNode *DomNode = DT->getNode(BB);
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+ if (DT->dominates(DomNode, DT->getNode(ExitBlocks[i])))
+ return true;
+
+ return false;
+}
+
+
+/// runOnFunction - Process all loops in the function, inner-most out.
+bool LCSSA::runOnLoop(Loop *TheLoop, LPPassManager &LPM) {
+ L = TheLoop;
+
+ DT = &getAnalysis<DominatorTree>();
+ LI = &getAnalysis<LoopInfo>();
+ SE = getAnalysisIfAvailable<ScalarEvolution>();
+
+ // Get the set of exiting blocks.
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ L->getExitBlocks(ExitBlocks);
+
+ if (ExitBlocks.empty())
+ return false;
+
+ // Look at all the instructions in the loop, checking to see if they have uses
+ // outside the loop. If so, rewrite those uses.
+ bool MadeChange = false;
+
+ for (Loop::block_iterator BBI = L->block_begin(), E = L->block_end();
+ BBI != E; ++BBI) {
+ BasicBlock *BB = *BBI;
+
+ // For large loops, avoid use-scanning by using dominance information: In
+ // particular, if a block does not dominate any of the loop exits, then none
+ // of the values defined in the block could be used outside the loop.
+ if (!BlockDominatesAnExit(BB, ExitBlocks, DT))
+ continue;
+
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end();
+ I != E; ++I) {
+ // Reject two common cases fast: instructions with no uses (like stores)
+ // and instructions with one use that is in the same block as this.
+ if (I->use_empty() ||
+ (I->hasOneUse() && I->use_back()->getParent() == BB &&
+ !isa<PHINode>(I->use_back())))
+ continue;
+
+ MadeChange |= ProcessInstruction(I, ExitBlocks);
+ }
+ }
+
+ // If we modified the code, remove any caches about the loop from SCEV to
+ // avoid dangling entries.
+ // FIXME: This is a big hammer, can we clear the cache more selectively?
+ if (SE && MadeChange)
+ SE->forgetLoop(L);
+
+ assert(L->isLCSSAForm(*DT));
+ PredCache.clear();
+
+ return MadeChange;
+}
+
+/// isExitBlock - Return true if the specified block is in the list.
+static bool isExitBlock(BasicBlock *BB,
+ const SmallVectorImpl<BasicBlock*> &ExitBlocks) {
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+ if (ExitBlocks[i] == BB)
+ return true;
+ return false;
+}
+
+/// ProcessInstruction - Given an instruction in the loop, check to see if it
+/// has any uses that are outside the current loop. If so, insert LCSSA PHI
+/// nodes and rewrite the uses.
+bool LCSSA::ProcessInstruction(Instruction *Inst,
+ const SmallVectorImpl<BasicBlock*> &ExitBlocks) {
+ SmallVector<Use*, 16> UsesToRewrite;
+
+ BasicBlock *InstBB = Inst->getParent();
+
+ for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end();
+ UI != E; ++UI) {
+ User *U = *UI;
+ BasicBlock *UserBB = cast<Instruction>(U)->getParent();
+ if (PHINode *PN = dyn_cast<PHINode>(U))
+ UserBB = PN->getIncomingBlock(UI);
+
+ if (InstBB != UserBB && !L->contains(UserBB))
+ UsesToRewrite.push_back(&UI.getUse());
+ }
+
+ // If there are no uses outside the loop, exit with no change.
+ if (UsesToRewrite.empty()) return false;
+
+ ++NumLCSSA; // We are applying the transformation
+
+ // Invoke instructions are special in that their result value is not available
+ // along their unwind edge. The code below tests to see whether DomBB dominates
+ // the value, so adjust DomBB to the normal destination block, which is
+ // effectively where the value is first usable.
+ BasicBlock *DomBB = Inst->getParent();
+ if (InvokeInst *Inv = dyn_cast<InvokeInst>(Inst))
+ DomBB = Inv->getNormalDest();
+
+ DomTreeNode *DomNode = DT->getNode(DomBB);
+
+ SmallVector<PHINode*, 16> AddedPHIs;
+
+ SSAUpdater SSAUpdate;
+ SSAUpdate.Initialize(Inst->getType(), Inst->getName());
+
+ // Insert the LCSSA phi's into all of the exit blocks dominated by the
+ // value, and add them to the Phi's map.
+ for (SmallVectorImpl<BasicBlock*>::const_iterator BBI = ExitBlocks.begin(),
+ BBE = ExitBlocks.end(); BBI != BBE; ++BBI) {
+ BasicBlock *ExitBB = *BBI;
+ if (!DT->dominates(DomNode, DT->getNode(ExitBB))) continue;
+
+ // If we already inserted something for this BB, don't reprocess it.
+ if (SSAUpdate.HasValueForBlock(ExitBB)) continue;
+
+ PHINode *PN = PHINode::Create(Inst->getType(),
+ PredCache.GetNumPreds(ExitBB),
+ Inst->getName()+".lcssa",
+ ExitBB->begin());
+
+ // Add inputs from inside the loop for this PHI.
+ for (BasicBlock **PI = PredCache.GetPreds(ExitBB); *PI; ++PI) {
+ PN->addIncoming(Inst, *PI);
+
+ // If the exit block has a predecessor not within the loop, arrange for
+ // the incoming value use corresponding to that predecessor to be
+ // rewritten in terms of a different LCSSA PHI.
+ if (!L->contains(*PI))
+ UsesToRewrite.push_back(
+ &PN->getOperandUse(
+ PN->getOperandNumForIncomingValue(PN->getNumIncomingValues()-1)));
+ }
+
+ AddedPHIs.push_back(PN);
+
+ // Remember that this phi makes the value alive in this block.
+ SSAUpdate.AddAvailableValue(ExitBB, PN);
+ }
+
+ // Rewrite all uses outside the loop in terms of the new PHIs we just
+ // inserted.
+ for (unsigned i = 0, e = UsesToRewrite.size(); i != e; ++i) {
+ // If this use is in an exit block, rewrite to use the newly inserted PHI.
+ // This is required for correctness because SSAUpdate doesn't handle uses in
+ // the same block. It assumes the PHI we inserted is at the end of the
+ // block.
+ Instruction *User = cast<Instruction>(UsesToRewrite[i]->getUser());
+ BasicBlock *UserBB = User->getParent();
+ if (PHINode *PN = dyn_cast<PHINode>(User))
+ UserBB = PN->getIncomingBlock(*UsesToRewrite[i]);
+
+ if (isa<PHINode>(UserBB->begin()) &&
+ isExitBlock(UserBB, ExitBlocks)) {
+ // Tell the VHs that the uses changed. This updates SCEV's caches.
+ if (UsesToRewrite[i]->get()->hasValueHandle())
+ ValueHandleBase::ValueIsRAUWd(*UsesToRewrite[i], UserBB->begin());
+ UsesToRewrite[i]->set(UserBB->begin());
+ continue;
+ }
+
+ // Otherwise, do full PHI insertion.
+ SSAUpdate.RewriteUse(*UsesToRewrite[i]);
+ }
+
+ // Remove PHI nodes that did not have any uses rewritten.
+ for (unsigned i = 0, e = AddedPHIs.size(); i != e; ++i) {
+ if (AddedPHIs[i]->use_empty())
+ AddedPHIs[i]->eraseFromParent();
+ }
+
+ return true;
+}
+
diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp
new file mode 100644
index 000000000000..2768041fb2b9
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp
@@ -0,0 +1,1273 @@
+//===-- Local.cpp - Functions to perform local transformations ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions perform various local transformations to the
+// program.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/DIBuilder.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(NumRemoved, "Number of unreachable basic blocks removed");
+
+//===----------------------------------------------------------------------===//
+// Local constant propagation.
+//
+
+/// ConstantFoldTerminator - If a terminator instruction is predicated on a
+/// constant value, convert it into an unconditional branch to the constant
+/// destination. This is a nontrivial operation because the successors of this
+/// basic block must have their PHI nodes updated.
+/// Also calls RecursivelyDeleteTriviallyDeadInstructions() on any branch/switch
+/// conditions and indirectbr addresses this might make dead if
+/// DeleteDeadConditions is true.
+bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
+ const TargetLibraryInfo *TLI) {
+ TerminatorInst *T = BB->getTerminator();
+ IRBuilder<> Builder(T);
+
+ // Branch - See if we are conditional jumping on constant
+ if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
+ if (BI->isUnconditional()) return false; // Can't optimize uncond branch
+ BasicBlock *Dest1 = BI->getSuccessor(0);
+ BasicBlock *Dest2 = BI->getSuccessor(1);
+
+ if (ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition())) {
+ // Are we branching on constant?
+ // YES. Change to unconditional branch...
+ BasicBlock *Destination = Cond->getZExtValue() ? Dest1 : Dest2;
+ BasicBlock *OldDest = Cond->getZExtValue() ? Dest2 : Dest1;
+
+ //cerr << "Function: " << T->getParent()->getParent()
+ // << "\nRemoving branch from " << T->getParent()
+ // << "\n\nTo: " << OldDest << endl;
+
+ // Let the basic block know that we are letting go of it. Based on this,
+ // it will adjust it's PHI nodes.
+ OldDest->removePredecessor(BB);
+
+ // Replace the conditional branch with an unconditional one.
+ Builder.CreateBr(Destination);
+ BI->eraseFromParent();
+ return true;
+ }
+
+ if (Dest2 == Dest1) { // Conditional branch to same location?
+ // This branch matches something like this:
+ // br bool %cond, label %Dest, label %Dest
+ // and changes it into: br label %Dest
+
+ // Let the basic block know that we are letting go of one copy of it.
+ assert(BI->getParent() && "Terminator not inserted in block!");
+ Dest1->removePredecessor(BI->getParent());
+
+ // Replace the conditional branch with an unconditional one.
+ Builder.CreateBr(Dest1);
+ Value *Cond = BI->getCondition();
+ BI->eraseFromParent();
+ if (DeleteDeadConditions)
+ RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI);
+ return true;
+ }
+ return false;
+ }
+
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
+ // If we are switching on a constant, we can convert the switch into a
+ // single branch instruction!
+ ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition());
+ BasicBlock *TheOnlyDest = SI->getDefaultDest();
+ BasicBlock *DefaultDest = TheOnlyDest;
+
+ // Figure out which case it goes to.
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+ i != e; ++i) {
+ // Found case matching a constant operand?
+ if (i.getCaseValue() == CI) {
+ TheOnlyDest = i.getCaseSuccessor();
+ break;
+ }
+
+ // Check to see if this branch is going to the same place as the default
+ // dest. If so, eliminate it as an explicit compare.
+ if (i.getCaseSuccessor() == DefaultDest) {
+ MDNode* MD = SI->getMetadata(LLVMContext::MD_prof);
+ // MD should have 2 + NumCases operands.
+ if (MD && MD->getNumOperands() == 2 + SI->getNumCases()) {
+ // Collect branch weights into a vector.
+ SmallVector<uint32_t, 8> Weights;
+ for (unsigned MD_i = 1, MD_e = MD->getNumOperands(); MD_i < MD_e;
+ ++MD_i) {
+ ConstantInt* CI = dyn_cast<ConstantInt>(MD->getOperand(MD_i));
+ assert(CI);
+ Weights.push_back(CI->getValue().getZExtValue());
+ }
+ // Merge weight of this case to the default weight.
+ unsigned idx = i.getCaseIndex();
+ Weights[0] += Weights[idx+1];
+ // Remove weight for this case.
+ std::swap(Weights[idx+1], Weights.back());
+ Weights.pop_back();
+ SI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(BB->getContext()).
+ createBranchWeights(Weights));
+ }
+ // Remove this entry.
+ DefaultDest->removePredecessor(SI->getParent());
+ SI->removeCase(i);
+ --i; --e;
+ continue;
+ }
+
+ // Otherwise, check to see if the switch only branches to one destination.
+ // We do this by reseting "TheOnlyDest" to null when we find two non-equal
+ // destinations.
+ if (i.getCaseSuccessor() != TheOnlyDest) TheOnlyDest = 0;
+ }
+
+ if (CI && !TheOnlyDest) {
+ // Branching on a constant, but not any of the cases, go to the default
+ // successor.
+ TheOnlyDest = SI->getDefaultDest();
+ }
+
+ // If we found a single destination that we can fold the switch into, do so
+ // now.
+ if (TheOnlyDest) {
+ // Insert the new branch.
+ Builder.CreateBr(TheOnlyDest);
+ BasicBlock *BB = SI->getParent();
+
+ // Remove entries from PHI nodes which we no longer branch to...
+ for (unsigned i = 0, e = SI->getNumSuccessors(); i != e; ++i) {
+ // Found case matching a constant operand?
+ BasicBlock *Succ = SI->getSuccessor(i);
+ if (Succ == TheOnlyDest)
+ TheOnlyDest = 0; // Don't modify the first branch to TheOnlyDest
+ else
+ Succ->removePredecessor(BB);
+ }
+
+ // Delete the old switch.
+ Value *Cond = SI->getCondition();
+ SI->eraseFromParent();
+ if (DeleteDeadConditions)
+ RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI);
+ return true;
+ }
+
+ if (SI->getNumCases() == 1) {
+ // Otherwise, we can fold this switch into a conditional branch
+ // instruction if it has only one non-default destination.
+ SwitchInst::CaseIt FirstCase = SI->case_begin();
+ Value *Cond = Builder.CreateICmpEQ(SI->getCondition(),
+ FirstCase.getCaseValue(), "cond");
+
+ // Insert the new branch.
+ BranchInst *NewBr = Builder.CreateCondBr(Cond,
+ FirstCase.getCaseSuccessor(),
+ SI->getDefaultDest());
+ MDNode* MD = SI->getMetadata(LLVMContext::MD_prof);
+ if (MD && MD->getNumOperands() == 3) {
+ ConstantInt *SICase = dyn_cast<ConstantInt>(MD->getOperand(2));
+ ConstantInt *SIDef = dyn_cast<ConstantInt>(MD->getOperand(1));
+ assert(SICase && SIDef);
+ // The TrueWeight should be the weight for the single case of SI.
+ NewBr->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(BB->getContext()).
+ createBranchWeights(SICase->getValue().getZExtValue(),
+ SIDef->getValue().getZExtValue()));
+ }
+
+ // Delete the old switch.
+ SI->eraseFromParent();
+ return true;
+ }
+ return false;
+ }
+
+ if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(T)) {
+ // indirectbr blockaddress(@F, @BB) -> br label @BB
+ if (BlockAddress *BA =
+ dyn_cast<BlockAddress>(IBI->getAddress()->stripPointerCasts())) {
+ BasicBlock *TheOnlyDest = BA->getBasicBlock();
+ // Insert the new branch.
+ Builder.CreateBr(TheOnlyDest);
+
+ for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
+ if (IBI->getDestination(i) == TheOnlyDest)
+ TheOnlyDest = 0;
+ else
+ IBI->getDestination(i)->removePredecessor(IBI->getParent());
+ }
+ Value *Address = IBI->getAddress();
+ IBI->eraseFromParent();
+ if (DeleteDeadConditions)
+ RecursivelyDeleteTriviallyDeadInstructions(Address, TLI);
+
+ // If we didn't find our destination in the IBI successor list, then we
+ // have undefined behavior. Replace the unconditional branch with an
+ // 'unreachable' instruction.
+ if (TheOnlyDest) {
+ BB->getTerminator()->eraseFromParent();
+ new UnreachableInst(BB->getContext(), BB);
+ }
+
+ return true;
+ }
+ }
+
+ return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Local dead code elimination.
+//
+
+/// isInstructionTriviallyDead - Return true if the result produced by the
+/// instruction is not used, and the instruction has no side effects.
+///
+bool llvm::isInstructionTriviallyDead(Instruction *I,
+ const TargetLibraryInfo *TLI) {
+ if (!I->use_empty() || isa<TerminatorInst>(I)) return false;
+
+ // We don't want the landingpad instruction removed by anything this general.
+ if (isa<LandingPadInst>(I))
+ return false;
+
+ // We don't want debug info removed by anything this general, unless
+ // debug info is empty.
+ if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(I)) {
+ if (DDI->getAddress())
+ return false;
+ return true;
+ }
+ if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(I)) {
+ if (DVI->getValue())
+ return false;
+ return true;
+ }
+
+ if (!I->mayHaveSideEffects()) return true;
+
+ // Special case intrinsics that "may have side effects" but can be deleted
+ // when dead.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ // Safe to delete llvm.stacksave if dead.
+ if (II->getIntrinsicID() == Intrinsic::stacksave)
+ return true;
+
+ // Lifetime intrinsics are dead when their right-hand is undef.
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end)
+ return isa<UndefValue>(II->getArgOperand(1));
+ }
+
+ if (isAllocLikeFn(I, TLI)) return true;
+
+ if (CallInst *CI = isFreeCall(I, TLI))
+ if (Constant *C = dyn_cast<Constant>(CI->getArgOperand(0)))
+ return C->isNullValue() || isa<UndefValue>(C);
+
+ return false;
+}
+
+/// RecursivelyDeleteTriviallyDeadInstructions - If the specified value is a
+/// trivially dead instruction, delete it. If that makes any of its operands
+/// trivially dead, delete them too, recursively. Return true if any
+/// instructions were deleted.
+bool
+llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V,
+ const TargetLibraryInfo *TLI) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I || !I->use_empty() || !isInstructionTriviallyDead(I, TLI))
+ return false;
+
+ SmallVector<Instruction*, 16> DeadInsts;
+ DeadInsts.push_back(I);
+
+ do {
+ I = DeadInsts.pop_back_val();
+
+ // Null out all of the instruction's operands to see if any operand becomes
+ // dead as we go.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ Value *OpV = I->getOperand(i);
+ I->setOperand(i, 0);
+
+ if (!OpV->use_empty()) continue;
+
+ // If the operand is an instruction that became dead as we nulled out the
+ // operand, and if it is 'trivially' dead, delete it in a future loop
+ // iteration.
+ if (Instruction *OpI = dyn_cast<Instruction>(OpV))
+ if (isInstructionTriviallyDead(OpI, TLI))
+ DeadInsts.push_back(OpI);
+ }
+
+ I->eraseFromParent();
+ } while (!DeadInsts.empty());
+
+ return true;
+}
+
+/// areAllUsesEqual - Check whether the uses of a value are all the same.
+/// This is similar to Instruction::hasOneUse() except this will also return
+/// true when there are no uses or multiple uses that all refer to the same
+/// value.
+static bool areAllUsesEqual(Instruction *I) {
+ Value::use_iterator UI = I->use_begin();
+ Value::use_iterator UE = I->use_end();
+ if (UI == UE)
+ return true;
+
+ User *TheUse = *UI;
+ for (++UI; UI != UE; ++UI) {
+ if (*UI != TheUse)
+ return false;
+ }
+ return true;
+}
+
+/// RecursivelyDeleteDeadPHINode - If the specified value is an effectively
+/// dead PHI node, due to being a def-use chain of single-use nodes that
+/// either forms a cycle or is terminated by a trivially dead instruction,
+/// delete it. If that makes any of its operands trivially dead, delete them
+/// too, recursively. Return true if a change was made.
+bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN,
+ const TargetLibraryInfo *TLI) {
+ SmallPtrSet<Instruction*, 4> Visited;
+ for (Instruction *I = PN; areAllUsesEqual(I) && !I->mayHaveSideEffects();
+ I = cast<Instruction>(*I->use_begin())) {
+ if (I->use_empty())
+ return RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
+
+ // If we find an instruction more than once, we're on a cycle that
+ // won't prove fruitful.
+ if (!Visited.insert(I)) {
+ // Break the cycle and delete the instruction and its operands.
+ I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ (void)RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
+ return true;
+ }
+ }
+ return false;
+}
+
+/// SimplifyInstructionsInBlock - Scan the specified basic block and try to
+/// simplify any instructions in it and recursively delete dead instructions.
+///
+/// This returns true if it changed the code, note that it can delete
+/// instructions in other blocks as well in this block.
+bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const DataLayout *TD,
+ const TargetLibraryInfo *TLI) {
+ bool MadeChange = false;
+
+#ifndef NDEBUG
+ // In debug builds, ensure that the terminator of the block is never replaced
+ // or deleted by these simplifications. The idea of simplification is that it
+ // cannot introduce new instructions, and there is no way to replace the
+ // terminator of a block without introducing a new instruction.
+ AssertingVH<Instruction> TerminatorVH(--BB->end());
+#endif
+
+ for (BasicBlock::iterator BI = BB->begin(), E = --BB->end(); BI != E; ) {
+ assert(!BI->isTerminator());
+ Instruction *Inst = BI++;
+
+ WeakVH BIHandle(BI);
+ if (recursivelySimplifyInstruction(Inst, TD, TLI)) {
+ MadeChange = true;
+ if (BIHandle != BI)
+ BI = BB->begin();
+ continue;
+ }
+
+ MadeChange |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
+ if (BIHandle != BI)
+ BI = BB->begin();
+ }
+ return MadeChange;
+}
+
+//===----------------------------------------------------------------------===//
+// Control Flow Graph Restructuring.
+//
+
+
+/// RemovePredecessorAndSimplify - Like BasicBlock::removePredecessor, this
+/// method is called when we're about to delete Pred as a predecessor of BB. If
+/// BB contains any PHI nodes, this drops the entries in the PHI nodes for Pred.
+///
+/// Unlike the removePredecessor method, this attempts to simplify uses of PHI
+/// nodes that collapse into identity values. For example, if we have:
+/// x = phi(1, 0, 0, 0)
+/// y = and x, z
+///
+/// .. and delete the predecessor corresponding to the '1', this will attempt to
+/// recursively fold the and to 0.
+void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
+ DataLayout *TD) {
+ // This only adjusts blocks with PHI nodes.
+ if (!isa<PHINode>(BB->begin()))
+ return;
+
+ // Remove the entries for Pred from the PHI nodes in BB, but do not simplify
+ // them down. This will leave us with single entry phi nodes and other phis
+ // that can be removed.
+ BB->removePredecessor(Pred, true);
+
+ WeakVH PhiIt = &BB->front();
+ while (PHINode *PN = dyn_cast<PHINode>(PhiIt)) {
+ PhiIt = &*++BasicBlock::iterator(cast<Instruction>(PhiIt));
+ Value *OldPhiIt = PhiIt;
+
+ if (!recursivelySimplifyInstruction(PN, TD))
+ continue;
+
+ // If recursive simplification ended up deleting the next PHI node we would
+ // iterate to, then our iterator is invalid, restart scanning from the top
+ // of the block.
+ if (PhiIt != OldPhiIt) PhiIt = &BB->front();
+ }
+}
+
+
+/// MergeBasicBlockIntoOnlyPred - DestBB is a block with one predecessor and its
+/// predecessor is known to have one successor (DestBB!). Eliminate the edge
+/// between them, moving the instructions in the predecessor into DestBB and
+/// deleting the predecessor block.
+///
+void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
+ // If BB has single-entry PHI nodes, fold them.
+ while (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) {
+ Value *NewVal = PN->getIncomingValue(0);
+ // Replace self referencing PHI with undef, it must be dead.
+ if (NewVal == PN) NewVal = UndefValue::get(PN->getType());
+ PN->replaceAllUsesWith(NewVal);
+ PN->eraseFromParent();
+ }
+
+ BasicBlock *PredBB = DestBB->getSinglePredecessor();
+ assert(PredBB && "Block doesn't have a single predecessor!");
+
+ // Zap anything that took the address of DestBB. Not doing this will give the
+ // address an invalid value.
+ if (DestBB->hasAddressTaken()) {
+ BlockAddress *BA = BlockAddress::get(DestBB);
+ Constant *Replacement =
+ ConstantInt::get(llvm::Type::getInt32Ty(BA->getContext()), 1);
+ BA->replaceAllUsesWith(ConstantExpr::getIntToPtr(Replacement,
+ BA->getType()));
+ BA->destroyConstant();
+ }
+
+ // Anything that branched to PredBB now branches to DestBB.
+ PredBB->replaceAllUsesWith(DestBB);
+
+ // Splice all the instructions from PredBB to DestBB.
+ PredBB->getTerminator()->eraseFromParent();
+ DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList());
+
+ if (P) {
+ DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
+ if (DT) {
+ BasicBlock *PredBBIDom = DT->getNode(PredBB)->getIDom()->getBlock();
+ DT->changeImmediateDominator(DestBB, PredBBIDom);
+ DT->eraseNode(PredBB);
+ }
+ }
+ // Nuke BB.
+ PredBB->eraseFromParent();
+}
+
+/// CanMergeValues - Return true if we can choose one of these values to use
+/// in place of the other. Note that we will always choose the non-undef
+/// value to keep.
+static bool CanMergeValues(Value *First, Value *Second) {
+ return First == Second || isa<UndefValue>(First) || isa<UndefValue>(Second);
+}
+
+/// CanPropagatePredecessorsForPHIs - Return true if we can fold BB, an
+/// almost-empty BB ending in an unconditional branch to Succ, into Succ.
+///
+/// Assumption: Succ is the single successor for BB.
+///
+static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
+ assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!");
+
+ DEBUG(dbgs() << "Looking to fold " << BB->getName() << " into "
+ << Succ->getName() << "\n");
+ // Shortcut, if there is only a single predecessor it must be BB and merging
+ // is always safe
+ if (Succ->getSinglePredecessor()) return true;
+
+ // Make a list of the predecessors of BB
+ SmallPtrSet<BasicBlock*, 16> BBPreds(pred_begin(BB), pred_end(BB));
+
+ // Look at all the phi nodes in Succ, to see if they present a conflict when
+ // merging these blocks
+ for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+
+ // If the incoming value from BB is again a PHINode in
+ // BB which has the same incoming value for *PI as PN does, we can
+ // merge the phi nodes and then the blocks can still be merged
+ PHINode *BBPN = dyn_cast<PHINode>(PN->getIncomingValueForBlock(BB));
+ if (BBPN && BBPN->getParent() == BB) {
+ for (unsigned PI = 0, PE = PN->getNumIncomingValues(); PI != PE; ++PI) {
+ BasicBlock *IBB = PN->getIncomingBlock(PI);
+ if (BBPreds.count(IBB) &&
+ !CanMergeValues(BBPN->getIncomingValueForBlock(IBB),
+ PN->getIncomingValue(PI))) {
+ DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in "
+ << Succ->getName() << " is conflicting with "
+ << BBPN->getName() << " with regard to common predecessor "
+ << IBB->getName() << "\n");
+ return false;
+ }
+ }
+ } else {
+ Value* Val = PN->getIncomingValueForBlock(BB);
+ for (unsigned PI = 0, PE = PN->getNumIncomingValues(); PI != PE; ++PI) {
+ // See if the incoming value for the common predecessor is equal to the
+ // one for BB, in which case this phi node will not prevent the merging
+ // of the block.
+ BasicBlock *IBB = PN->getIncomingBlock(PI);
+ if (BBPreds.count(IBB) &&
+ !CanMergeValues(Val, PN->getIncomingValue(PI))) {
+ DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in "
+ << Succ->getName() << " is conflicting with regard to common "
+ << "predecessor " << IBB->getName() << "\n");
+ return false;
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+typedef SmallVector<BasicBlock *, 16> PredBlockVector;
+typedef DenseMap<BasicBlock *, Value *> IncomingValueMap;
+
+/// \brief Determines the value to use as the phi node input for a block.
+///
+/// Select between \p OldVal any value that we know flows from \p BB
+/// to a particular phi on the basis of which one (if either) is not
+/// undef. Update IncomingValues based on the selected value.
+///
+/// \param OldVal The value we are considering selecting.
+/// \param BB The block that the value flows in from.
+/// \param IncomingValues A map from block-to-value for other phi inputs
+/// that we have examined.
+///
+/// \returns the selected value.
+static Value *selectIncomingValueForBlock(Value *OldVal, BasicBlock *BB,
+ IncomingValueMap &IncomingValues) {
+ if (!isa<UndefValue>(OldVal)) {
+ assert((!IncomingValues.count(BB) ||
+ IncomingValues.find(BB)->second == OldVal) &&
+ "Expected OldVal to match incoming value from BB!");
+
+ IncomingValues.insert(std::make_pair(BB, OldVal));
+ return OldVal;
+ }
+
+ IncomingValueMap::const_iterator It = IncomingValues.find(BB);
+ if (It != IncomingValues.end()) return It->second;
+
+ return OldVal;
+}
+
+/// \brief Create a map from block to value for the operands of a
+/// given phi.
+///
+/// Create a map from block to value for each non-undef value flowing
+/// into \p PN.
+///
+/// \param PN The phi we are collecting the map for.
+/// \param IncomingValues [out] The map from block to value for this phi.
+static void gatherIncomingValuesToPhi(PHINode *PN,
+ IncomingValueMap &IncomingValues) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *BB = PN->getIncomingBlock(i);
+ Value *V = PN->getIncomingValue(i);
+
+ if (!isa<UndefValue>(V))
+ IncomingValues.insert(std::make_pair(BB, V));
+ }
+}
+
+/// \brief Replace the incoming undef values to a phi with the values
+/// from a block-to-value map.
+///
+/// \param PN The phi we are replacing the undefs in.
+/// \param IncomingValues A map from block to value.
+static void replaceUndefValuesInPhi(PHINode *PN,
+ const IncomingValueMap &IncomingValues) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *V = PN->getIncomingValue(i);
+
+ if (!isa<UndefValue>(V)) continue;
+
+ BasicBlock *BB = PN->getIncomingBlock(i);
+ IncomingValueMap::const_iterator It = IncomingValues.find(BB);
+ if (It == IncomingValues.end()) continue;
+
+ PN->setIncomingValue(i, It->second);
+ }
+}
+
+/// \brief Replace a value flowing from a block to a phi with
+/// potentially multiple instances of that value flowing from the
+/// block's predecessors to the phi.
+///
+/// \param BB The block with the value flowing into the phi.
+/// \param BBPreds The predecessors of BB.
+/// \param PN The phi that we are updating.
+static void redirectValuesFromPredecessorsToPhi(BasicBlock *BB,
+ const PredBlockVector &BBPreds,
+ PHINode *PN) {
+ Value *OldVal = PN->removeIncomingValue(BB, false);
+ assert(OldVal && "No entry in PHI for Pred BB!");
+
+ IncomingValueMap IncomingValues;
+
+ // We are merging two blocks - BB, and the block containing PN - and
+ // as a result we need to redirect edges from the predecessors of BB
+ // to go to the block containing PN, and update PN
+ // accordingly. Since we allow merging blocks in the case where the
+ // predecessor and successor blocks both share some predecessors,
+ // and where some of those common predecessors might have undef
+ // values flowing into PN, we want to rewrite those values to be
+ // consistent with the non-undef values.
+
+ gatherIncomingValuesToPhi(PN, IncomingValues);
+
+ // If this incoming value is one of the PHI nodes in BB, the new entries
+ // in the PHI node are the entries from the old PHI.
+ if (isa<PHINode>(OldVal) && cast<PHINode>(OldVal)->getParent() == BB) {
+ PHINode *OldValPN = cast<PHINode>(OldVal);
+ for (unsigned i = 0, e = OldValPN->getNumIncomingValues(); i != e; ++i) {
+ // Note that, since we are merging phi nodes and BB and Succ might
+ // have common predecessors, we could end up with a phi node with
+ // identical incoming branches. This will be cleaned up later (and
+ // will trigger asserts if we try to clean it up now, without also
+ // simplifying the corresponding conditional branch).
+ BasicBlock *PredBB = OldValPN->getIncomingBlock(i);
+ Value *PredVal = OldValPN->getIncomingValue(i);
+ Value *Selected = selectIncomingValueForBlock(PredVal, PredBB,
+ IncomingValues);
+
+ // And add a new incoming value for this predecessor for the
+ // newly retargeted branch.
+ PN->addIncoming(Selected, PredBB);
+ }
+ } else {
+ for (unsigned i = 0, e = BBPreds.size(); i != e; ++i) {
+ // Update existing incoming values in PN for this
+ // predecessor of BB.
+ BasicBlock *PredBB = BBPreds[i];
+ Value *Selected = selectIncomingValueForBlock(OldVal, PredBB,
+ IncomingValues);
+
+ // And add a new incoming value for this predecessor for the
+ // newly retargeted branch.
+ PN->addIncoming(Selected, PredBB);
+ }
+ }
+
+ replaceUndefValuesInPhi(PN, IncomingValues);
+}
+
+/// TryToSimplifyUncondBranchFromEmptyBlock - BB is known to contain an
+/// unconditional branch, and contains no instructions other than PHI nodes,
+/// potential side-effect free intrinsics and the branch. If possible,
+/// eliminate BB by rewriting all the predecessors to branch to the successor
+/// block and return true. If we can't transform, return false.
+bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) {
+ assert(BB != &BB->getParent()->getEntryBlock() &&
+ "TryToSimplifyUncondBranchFromEmptyBlock called on entry block!");
+
+ // We can't eliminate infinite loops.
+ BasicBlock *Succ = cast<BranchInst>(BB->getTerminator())->getSuccessor(0);
+ if (BB == Succ) return false;
+
+ // Check to see if merging these blocks would cause conflicts for any of the
+ // phi nodes in BB or Succ. If not, we can safely merge.
+ if (!CanPropagatePredecessorsForPHIs(BB, Succ)) return false;
+
+ // Check for cases where Succ has multiple predecessors and a PHI node in BB
+ // has uses which will not disappear when the PHI nodes are merged. It is
+ // possible to handle such cases, but difficult: it requires checking whether
+ // BB dominates Succ, which is non-trivial to calculate in the case where
+ // Succ has multiple predecessors. Also, it requires checking whether
+ // constructing the necessary self-referential PHI node doesn't introduce any
+ // conflicts; this isn't too difficult, but the previous code for doing this
+ // was incorrect.
+ //
+ // Note that if this check finds a live use, BB dominates Succ, so BB is
+ // something like a loop pre-header (or rarely, a part of an irreducible CFG);
+ // folding the branch isn't profitable in that case anyway.
+ if (!Succ->getSinglePredecessor()) {
+ BasicBlock::iterator BBI = BB->begin();
+ while (isa<PHINode>(*BBI)) {
+ for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end();
+ UI != E; ++UI) {
+ if (PHINode* PN = dyn_cast<PHINode>(*UI)) {
+ if (PN->getIncomingBlock(UI) != BB)
+ return false;
+ } else {
+ return false;
+ }
+ }
+ ++BBI;
+ }
+ }
+
+ DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB);
+
+ if (isa<PHINode>(Succ->begin())) {
+ // If there is more than one pred of succ, and there are PHI nodes in
+ // the successor, then we need to add incoming edges for the PHI nodes
+ //
+ const PredBlockVector BBPreds(pred_begin(BB), pred_end(BB));
+
+ // Loop over all of the PHI nodes in the successor of BB.
+ for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+
+ redirectValuesFromPredecessorsToPhi(BB, BBPreds, PN);
+ }
+ }
+
+ if (Succ->getSinglePredecessor()) {
+ // BB is the only predecessor of Succ, so Succ will end up with exactly
+ // the same predecessors BB had.
+
+ // Copy over any phi, debug or lifetime instruction.
+ BB->getTerminator()->eraseFromParent();
+ Succ->getInstList().splice(Succ->getFirstNonPHI(), BB->getInstList());
+ } else {
+ while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) {
+ // We explicitly check for such uses in CanPropagatePredecessorsForPHIs.
+ assert(PN->use_empty() && "There shouldn't be any uses here!");
+ PN->eraseFromParent();
+ }
+ }
+
+ // Everything that jumped to BB now goes to Succ.
+ BB->replaceAllUsesWith(Succ);
+ if (!Succ->hasName()) Succ->takeName(BB);
+ BB->eraseFromParent(); // Delete the old basic block.
+ return true;
+}
+
+/// EliminateDuplicatePHINodes - Check for and eliminate duplicate PHI
+/// nodes in this block. This doesn't try to be clever about PHI nodes
+/// which differ only in the order of the incoming values, but instcombine
+/// orders them so it usually won't matter.
+///
+bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
+ bool Changed = false;
+
+ // This implementation doesn't currently consider undef operands
+ // specially. Theoretically, two phis which are identical except for
+ // one having an undef where the other doesn't could be collapsed.
+
+ // Map from PHI hash values to PHI nodes. If multiple PHIs have
+ // the same hash value, the element is the first PHI in the
+ // linked list in CollisionMap.
+ DenseMap<uintptr_t, PHINode *> HashMap;
+
+ // Maintain linked lists of PHI nodes with common hash values.
+ DenseMap<PHINode *, PHINode *> CollisionMap;
+
+ // Examine each PHI.
+ for (BasicBlock::iterator I = BB->begin();
+ PHINode *PN = dyn_cast<PHINode>(I++); ) {
+ // Compute a hash value on the operands. Instcombine will likely have sorted
+ // them, which helps expose duplicates, but we have to check all the
+ // operands to be safe in case instcombine hasn't run.
+ uintptr_t Hash = 0;
+ // This hash algorithm is quite weak as hash functions go, but it seems
+ // to do a good enough job for this particular purpose, and is very quick.
+ for (User::op_iterator I = PN->op_begin(), E = PN->op_end(); I != E; ++I) {
+ Hash ^= reinterpret_cast<uintptr_t>(static_cast<Value *>(*I));
+ Hash = (Hash << 7) | (Hash >> (sizeof(uintptr_t) * CHAR_BIT - 7));
+ }
+ for (PHINode::block_iterator I = PN->block_begin(), E = PN->block_end();
+ I != E; ++I) {
+ Hash ^= reinterpret_cast<uintptr_t>(static_cast<BasicBlock *>(*I));
+ Hash = (Hash << 7) | (Hash >> (sizeof(uintptr_t) * CHAR_BIT - 7));
+ }
+ // Avoid colliding with the DenseMap sentinels ~0 and ~0-1.
+ Hash >>= 1;
+ // If we've never seen this hash value before, it's a unique PHI.
+ std::pair<DenseMap<uintptr_t, PHINode *>::iterator, bool> Pair =
+ HashMap.insert(std::make_pair(Hash, PN));
+ if (Pair.second) continue;
+ // Otherwise it's either a duplicate or a hash collision.
+ for (PHINode *OtherPN = Pair.first->second; ; ) {
+ if (OtherPN->isIdenticalTo(PN)) {
+ // A duplicate. Replace this PHI with its duplicate.
+ PN->replaceAllUsesWith(OtherPN);
+ PN->eraseFromParent();
+ Changed = true;
+ break;
+ }
+ // A non-duplicate hash collision.
+ DenseMap<PHINode *, PHINode *>::iterator I = CollisionMap.find(OtherPN);
+ if (I == CollisionMap.end()) {
+ // Set this PHI to be the head of the linked list of colliding PHIs.
+ PHINode *Old = Pair.first->second;
+ Pair.first->second = PN;
+ CollisionMap[PN] = Old;
+ break;
+ }
+ // Proceed to the next PHI in the list.
+ OtherPN = I->second;
+ }
+ }
+
+ return Changed;
+}
+
+/// enforceKnownAlignment - If the specified pointer points to an object that
+/// we control, modify the object's alignment to PrefAlign. This isn't
+/// often possible though. If alignment is important, a more reliable approach
+/// is to simply align all global variables and allocation instructions to
+/// their preferred alignment from the beginning.
+///
+static unsigned enforceKnownAlignment(Value *V, unsigned Align,
+ unsigned PrefAlign, const DataLayout *TD) {
+ V = V->stripPointerCasts();
+
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+ // If the preferred alignment is greater than the natural stack alignment
+ // then don't round up. This avoids dynamic stack realignment.
+ if (TD && TD->exceedsNaturalStackAlignment(PrefAlign))
+ return Align;
+ // If there is a requested alignment and if this is an alloca, round up.
+ if (AI->getAlignment() >= PrefAlign)
+ return AI->getAlignment();
+ AI->setAlignment(PrefAlign);
+ return PrefAlign;
+ }
+
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ // If there is a large requested alignment and we can, bump up the alignment
+ // of the global.
+ if (GV->isDeclaration()) return Align;
+ // If the memory we set aside for the global may not be the memory used by
+ // the final program then it is impossible for us to reliably enforce the
+ // preferred alignment.
+ if (GV->isWeakForLinker()) return Align;
+
+ if (GV->getAlignment() >= PrefAlign)
+ return GV->getAlignment();
+ // We can only increase the alignment of the global if it has no alignment
+ // specified or if it is not assigned a section. If it is assigned a
+ // section, the global could be densely packed with other objects in the
+ // section, increasing the alignment could cause padding issues.
+ if (!GV->hasSection() || GV->getAlignment() == 0)
+ GV->setAlignment(PrefAlign);
+ return GV->getAlignment();
+ }
+
+ return Align;
+}
+
+/// getOrEnforceKnownAlignment - If the specified pointer has an alignment that
+/// we can determine, return it, otherwise return 0. If PrefAlign is specified,
+/// and it is more than the alignment of the ultimate object, see if we can
+/// increase the alignment of the ultimate object, making this check succeed.
+unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
+ const DataLayout *DL) {
+ assert(V->getType()->isPointerTy() &&
+ "getOrEnforceKnownAlignment expects a pointer!");
+ unsigned BitWidth = DL ? DL->getPointerTypeSizeInBits(V->getType()) : 64;
+
+ APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ ComputeMaskedBits(V, KnownZero, KnownOne, DL);
+ unsigned TrailZ = KnownZero.countTrailingOnes();
+
+ // Avoid trouble with ridiculously large TrailZ values, such as
+ // those computed from a null pointer.
+ TrailZ = std::min(TrailZ, unsigned(sizeof(unsigned) * CHAR_BIT - 1));
+
+ unsigned Align = 1u << std::min(BitWidth - 1, TrailZ);
+
+ // LLVM doesn't support alignments larger than this currently.
+ Align = std::min(Align, +Value::MaximumAlignment);
+
+ if (PrefAlign > Align)
+ Align = enforceKnownAlignment(V, Align, PrefAlign, DL);
+
+ // We don't need to make any adjustment.
+ return Align;
+}
+
+///===---------------------------------------------------------------------===//
+/// Dbg Intrinsic utilities
+///
+
+/// See if there is a dbg.value intrinsic for DIVar before I.
+static bool LdStHasDebugValue(DIVariable &DIVar, Instruction *I) {
+ // Since we can't guarantee that the original dbg.declare instrinsic
+ // is removed by LowerDbgDeclare(), we need to make sure that we are
+ // not inserting the same dbg.value intrinsic over and over.
+ llvm::BasicBlock::InstListType::iterator PrevI(I);
+ if (PrevI != I->getParent()->getInstList().begin()) {
+ --PrevI;
+ if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(PrevI))
+ if (DVI->getValue() == I->getOperand(0) &&
+ DVI->getOffset() == 0 &&
+ DVI->getVariable() == DIVar)
+ return true;
+ }
+ return false;
+}
+
+/// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value
+/// that has an associated llvm.dbg.decl intrinsic.
+bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
+ StoreInst *SI, DIBuilder &Builder) {
+ DIVariable DIVar(DDI->getVariable());
+ assert((!DIVar || DIVar.isVariable()) &&
+ "Variable in DbgDeclareInst should be either null or a DIVariable.");
+ if (!DIVar)
+ return false;
+
+ if (LdStHasDebugValue(DIVar, SI))
+ return true;
+
+ Instruction *DbgVal = NULL;
+ // If an argument is zero extended then use argument directly. The ZExt
+ // may be zapped by an optimization pass in future.
+ Argument *ExtendedArg = NULL;
+ if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0)))
+ ExtendedArg = dyn_cast<Argument>(ZExt->getOperand(0));
+ if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
+ ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0));
+ if (ExtendedArg)
+ DbgVal = Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, SI);
+ else
+ DbgVal = Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar, SI);
+
+ // Propagate any debug metadata from the store onto the dbg.value.
+ DebugLoc SIDL = SI->getDebugLoc();
+ if (!SIDL.isUnknown())
+ DbgVal->setDebugLoc(SIDL);
+ // Otherwise propagate debug metadata from dbg.declare.
+ else
+ DbgVal->setDebugLoc(DDI->getDebugLoc());
+ return true;
+}
+
+/// Inserts a llvm.dbg.value intrinsic before a load of an alloca'd value
+/// that has an associated llvm.dbg.decl intrinsic.
+bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
+ LoadInst *LI, DIBuilder &Builder) {
+ DIVariable DIVar(DDI->getVariable());
+ assert((!DIVar || DIVar.isVariable()) &&
+ "Variable in DbgDeclareInst should be either null or a DIVariable.");
+ if (!DIVar)
+ return false;
+
+ if (LdStHasDebugValue(DIVar, LI))
+ return true;
+
+ Instruction *DbgVal =
+ Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0,
+ DIVar, LI);
+
+ // Propagate any debug metadata from the store onto the dbg.value.
+ DebugLoc LIDL = LI->getDebugLoc();
+ if (!LIDL.isUnknown())
+ DbgVal->setDebugLoc(LIDL);
+ // Otherwise propagate debug metadata from dbg.declare.
+ else
+ DbgVal->setDebugLoc(DDI->getDebugLoc());
+ return true;
+}
+
+/// LowerDbgDeclare - Lowers llvm.dbg.declare intrinsics into appropriate set
+/// of llvm.dbg.value intrinsics.
+bool llvm::LowerDbgDeclare(Function &F) {
+ DIBuilder DIB(*F.getParent());
+ SmallVector<DbgDeclareInst *, 4> Dbgs;
+ for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
+ for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) {
+ if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI))
+ Dbgs.push_back(DDI);
+ }
+ if (Dbgs.empty())
+ return false;
+
+ for (SmallVectorImpl<DbgDeclareInst *>::iterator I = Dbgs.begin(),
+ E = Dbgs.end(); I != E; ++I) {
+ DbgDeclareInst *DDI = *I;
+ AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress());
+ // If this is an alloca for a scalar variable, insert a dbg.value
+ // at each load and store to the alloca and erase the dbg.declare.
+ if (AI && !AI->isArrayAllocation()) {
+
+ // We only remove the dbg.declare intrinsic if all uses are
+ // converted to dbg.value intrinsics.
+ bool RemoveDDI = true;
+ for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
+ UI != E; ++UI)
+ if (StoreInst *SI = dyn_cast<StoreInst>(*UI))
+ ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
+ else if (LoadInst *LI = dyn_cast<LoadInst>(*UI))
+ ConvertDebugDeclareToDebugValue(DDI, LI, DIB);
+ else
+ RemoveDDI = false;
+ if (RemoveDDI)
+ DDI->eraseFromParent();
+ }
+ }
+ return true;
+}
+
+/// FindAllocaDbgDeclare - Finds the llvm.dbg.declare intrinsic describing the
+/// alloca 'V', if any.
+DbgDeclareInst *llvm::FindAllocaDbgDeclare(Value *V) {
+ if (MDNode *DebugNode = MDNode::getIfExists(V->getContext(), V))
+ for (Value::use_iterator UI = DebugNode->use_begin(),
+ E = DebugNode->use_end(); UI != E; ++UI)
+ if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI))
+ return DDI;
+
+ return 0;
+}
+
+bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
+ DIBuilder &Builder) {
+ DbgDeclareInst *DDI = FindAllocaDbgDeclare(AI);
+ if (!DDI)
+ return false;
+ DIVariable DIVar(DDI->getVariable());
+ assert((!DIVar || DIVar.isVariable()) &&
+ "Variable in DbgDeclareInst should be either null or a DIVariable.");
+ if (!DIVar)
+ return false;
+
+ // Create a copy of the original DIDescriptor for user variable, appending
+ // "deref" operation to a list of address elements, as new llvm.dbg.declare
+ // will take a value storing address of the memory for variable, not
+ // alloca itself.
+ Type *Int64Ty = Type::getInt64Ty(AI->getContext());
+ SmallVector<Value*, 4> NewDIVarAddress;
+ if (DIVar.hasComplexAddress()) {
+ for (unsigned i = 0, n = DIVar.getNumAddrElements(); i < n; ++i) {
+ NewDIVarAddress.push_back(
+ ConstantInt::get(Int64Ty, DIVar.getAddrElement(i)));
+ }
+ }
+ NewDIVarAddress.push_back(ConstantInt::get(Int64Ty, DIBuilder::OpDeref));
+ DIVariable NewDIVar = Builder.createComplexVariable(
+ DIVar.getTag(), DIVar.getContext(), DIVar.getName(),
+ DIVar.getFile(), DIVar.getLineNumber(), DIVar.getType(),
+ NewDIVarAddress, DIVar.getArgNumber());
+
+ // Insert llvm.dbg.declare in the same basic block as the original alloca,
+ // and remove old llvm.dbg.declare.
+ BasicBlock *BB = AI->getParent();
+ Builder.insertDeclare(NewAllocaAddress, NewDIVar, BB);
+ DDI->eraseFromParent();
+ return true;
+}
+
+/// changeToUnreachable - Insert an unreachable instruction before the specified
+/// instruction, making it and the rest of the code in the block dead.
+static void changeToUnreachable(Instruction *I, bool UseLLVMTrap) {
+ BasicBlock *BB = I->getParent();
+ // Loop over all of the successors, removing BB's entry from any PHI
+ // nodes.
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+ (*SI)->removePredecessor(BB);
+
+ // Insert a call to llvm.trap right before this. This turns the undefined
+ // behavior into a hard fail instead of falling through into random code.
+ if (UseLLVMTrap) {
+ Function *TrapFn =
+ Intrinsic::getDeclaration(BB->getParent()->getParent(), Intrinsic::trap);
+ CallInst *CallTrap = CallInst::Create(TrapFn, "", I);
+ CallTrap->setDebugLoc(I->getDebugLoc());
+ }
+ new UnreachableInst(I->getContext(), I);
+
+ // All instructions after this are dead.
+ BasicBlock::iterator BBI = I, BBE = BB->end();
+ while (BBI != BBE) {
+ if (!BBI->use_empty())
+ BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
+ BB->getInstList().erase(BBI++);
+ }
+}
+
+/// changeToCall - Convert the specified invoke into a normal call.
+static void changeToCall(InvokeInst *II) {
+ SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
+ CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II);
+ NewCall->takeName(II);
+ NewCall->setCallingConv(II->getCallingConv());
+ NewCall->setAttributes(II->getAttributes());
+ NewCall->setDebugLoc(II->getDebugLoc());
+ II->replaceAllUsesWith(NewCall);
+
+ // Follow the call by a branch to the normal destination.
+ BranchInst::Create(II->getNormalDest(), II);
+
+ // Update PHI nodes in the unwind destination
+ II->getUnwindDest()->removePredecessor(II->getParent());
+ II->eraseFromParent();
+}
+
+static bool markAliveBlocks(BasicBlock *BB,
+ SmallPtrSet<BasicBlock*, 128> &Reachable) {
+
+ SmallVector<BasicBlock*, 128> Worklist;
+ Worklist.push_back(BB);
+ Reachable.insert(BB);
+ bool Changed = false;
+ do {
+ BB = Worklist.pop_back_val();
+
+ // Do a quick scan of the basic block, turning any obviously unreachable
+ // instructions into LLVM unreachable insts. The instruction combining pass
+ // canonicalizes unreachable insts into stores to null or undef.
+ for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;++BBI){
+ if (CallInst *CI = dyn_cast<CallInst>(BBI)) {
+ if (CI->doesNotReturn()) {
+ // If we found a call to a no-return function, insert an unreachable
+ // instruction after it. Make sure there isn't *already* one there
+ // though.
+ ++BBI;
+ if (!isa<UnreachableInst>(BBI)) {
+ // Don't insert a call to llvm.trap right before the unreachable.
+ changeToUnreachable(BBI, false);
+ Changed = true;
+ }
+ break;
+ }
+ }
+
+ // Store to undef and store to null are undefined and used to signal that
+ // they should be changed to unreachable by passes that can't modify the
+ // CFG.
+ if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
+ // Don't touch volatile stores.
+ if (SI->isVolatile()) continue;
+
+ Value *Ptr = SI->getOperand(1);
+
+ if (isa<UndefValue>(Ptr) ||
+ (isa<ConstantPointerNull>(Ptr) &&
+ SI->getPointerAddressSpace() == 0)) {
+ changeToUnreachable(SI, true);
+ Changed = true;
+ break;
+ }
+ }
+ }
+
+ // Turn invokes that call 'nounwind' functions into ordinary calls.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+ Value *Callee = II->getCalledValue();
+ if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
+ changeToUnreachable(II, true);
+ Changed = true;
+ } else if (II->doesNotThrow()) {
+ if (II->use_empty() && II->onlyReadsMemory()) {
+ // jump to the normal destination branch.
+ BranchInst::Create(II->getNormalDest(), II);
+ II->getUnwindDest()->removePredecessor(II->getParent());
+ II->eraseFromParent();
+ } else
+ changeToCall(II);
+ Changed = true;
+ }
+ }
+
+ Changed |= ConstantFoldTerminator(BB, true);
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+ if (Reachable.insert(*SI))
+ Worklist.push_back(*SI);
+ } while (!Worklist.empty());
+ return Changed;
+}
+
+/// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even
+/// if they are in a dead cycle. Return true if a change was made, false
+/// otherwise.
+bool llvm::removeUnreachableBlocks(Function &F) {
+ SmallPtrSet<BasicBlock*, 128> Reachable;
+ bool Changed = markAliveBlocks(F.begin(), Reachable);
+
+ // If there are unreachable blocks in the CFG...
+ if (Reachable.size() == F.size())
+ return Changed;
+
+ assert(Reachable.size() < F.size());
+ NumRemoved += F.size()-Reachable.size();
+
+ // Loop over all of the basic blocks that are not reachable, dropping all of
+ // their internal references...
+ for (Function::iterator BB = ++F.begin(), E = F.end(); BB != E; ++BB) {
+ if (Reachable.count(BB))
+ continue;
+
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+ if (Reachable.count(*SI))
+ (*SI)->removePredecessor(BB);
+ BB->dropAllReferences();
+ }
+
+ for (Function::iterator I = ++F.begin(); I != F.end();)
+ if (!Reachable.count(I))
+ I = F.getBasicBlockList().erase(I);
+ else
+ ++I;
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
new file mode 100644
index 000000000000..6d5f16ca333b
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
@@ -0,0 +1,801 @@
+//===- LoopSimplify.cpp - Loop Canonicalization Pass ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs several transformations to transform natural loops into a
+// simpler form, which makes subsequent analyses and transformations simpler and
+// more effective.
+//
+// Loop pre-header insertion guarantees that there is a single, non-critical
+// entry edge from outside of the loop to the loop header. This simplifies a
+// number of analyses and transformations, such as LICM.
+//
+// Loop exit-block insertion guarantees that all exit blocks from the loop
+// (blocks which are outside of the loop that have predecessors inside of the
+// loop) only have predecessors from inside of the loop (and are thus dominated
+// by the loop header). This simplifies transformations such as store-sinking
+// that are built into LICM.
+//
+// This pass also guarantees that loops will have exactly one backedge.
+//
+// Indirectbr instructions introduce several complications. If the loop
+// contains or is entered by an indirectbr instruction, it may not be possible
+// to transform the loop and make these guarantees. Client code should check
+// that these conditions are true before relying on them.
+//
+// Note that the simplifycfg pass will clean up blocks which are split out but
+// end up being unnecessary, so usage of this pass should not pessimize
+// generated code.
+//
+// This pass obviously modifies the CFG, but updates loop information and
+// dominator information.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-simplify"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+using namespace llvm;
+
+STATISTIC(NumInserted, "Number of pre-header or exit blocks inserted");
+STATISTIC(NumNested , "Number of nested loops split out");
+
+namespace {
+ struct LoopSimplify : public LoopPass {
+ static char ID; // Pass identification, replacement for typeid
+ LoopSimplify() : LoopPass(ID) {
+ initializeLoopSimplifyPass(*PassRegistry::getPassRegistry());
+ }
+
+ // AA - If we have an alias analysis object to update, this is it, otherwise
+ // this is null.
+ AliasAnalysis *AA;
+ LoopInfo *LI;
+ DominatorTree *DT;
+ ScalarEvolution *SE;
+ Loop *L;
+ virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ // We need loop information to identify the loops...
+ AU.addRequired<DominatorTree>();
+ AU.addPreserved<DominatorTree>();
+
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<LoopInfo>();
+
+ AU.addPreserved<AliasAnalysis>();
+ AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved<DependenceAnalysis>();
+ AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added.
+ }
+
+ /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees.
+ void verifyAnalysis() const;
+
+ private:
+ bool ProcessLoop(Loop *L, LPPassManager &LPM);
+ BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit);
+ Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM,
+ BasicBlock *Preheader);
+ BasicBlock *InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader);
+ };
+}
+
+static void PlaceSplitBlockCarefully(BasicBlock *NewBB,
+ SmallVectorImpl<BasicBlock*> &SplitPreds,
+ Loop *L);
+
+char LoopSimplify::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify",
+ "Canonicalize natural loops", true, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_END(LoopSimplify, "loop-simplify",
+ "Canonicalize natural loops", true, false)
+
+// Publicly exposed interface to pass...
+char &llvm::LoopSimplifyID = LoopSimplify::ID;
+Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
+
+/// runOnLoop - Run down all loops in the CFG (recursively, but we could do
+/// it in any convenient order) inserting preheaders...
+///
+bool LoopSimplify::runOnLoop(Loop *l, LPPassManager &LPM) {
+ L = l;
+ bool Changed = false;
+ LI = &getAnalysis<LoopInfo>();
+ AA = getAnalysisIfAvailable<AliasAnalysis>();
+ DT = &getAnalysis<DominatorTree>();
+ SE = getAnalysisIfAvailable<ScalarEvolution>();
+
+ Changed |= ProcessLoop(L, LPM);
+
+ return Changed;
+}
+
+/// ProcessLoop - Walk the loop structure in depth first order, ensuring that
+/// all loops have preheaders.
+///
+bool LoopSimplify::ProcessLoop(Loop *L, LPPassManager &LPM) {
+ bool Changed = false;
+ReprocessLoop:
+
+ // Check to see that no blocks (other than the header) in this loop have
+ // predecessors that are not in the loop. This is not valid for natural
+ // loops, but can occur if the blocks are unreachable. Since they are
+ // unreachable we can just shamelessly delete those CFG edges!
+ for (Loop::block_iterator BB = L->block_begin(), E = L->block_end();
+ BB != E; ++BB) {
+ if (*BB == L->getHeader()) continue;
+
+ SmallPtrSet<BasicBlock*, 4> BadPreds;
+ for (pred_iterator PI = pred_begin(*BB),
+ PE = pred_end(*BB); PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ if (!L->contains(P))
+ BadPreds.insert(P);
+ }
+
+ // Delete each unique out-of-loop (and thus dead) predecessor.
+ for (SmallPtrSet<BasicBlock*, 4>::iterator I = BadPreds.begin(),
+ E = BadPreds.end(); I != E; ++I) {
+
+ DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor "
+ << (*I)->getName() << "\n");
+
+ // Inform each successor of each dead pred.
+ for (succ_iterator SI = succ_begin(*I), SE = succ_end(*I); SI != SE; ++SI)
+ (*SI)->removePredecessor(*I);
+ // Zap the dead pred's terminator and replace it with unreachable.
+ TerminatorInst *TI = (*I)->getTerminator();
+ TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
+ (*I)->getTerminator()->eraseFromParent();
+ new UnreachableInst((*I)->getContext(), *I);
+ Changed = true;
+ }
+ }
+
+ // If there are exiting blocks with branches on undef, resolve the undef in
+ // the direction which will exit the loop. This will help simplify loop
+ // trip count computations.
+ SmallVector<BasicBlock*, 8> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+ for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
+ E = ExitingBlocks.end(); I != E; ++I)
+ if (BranchInst *BI = dyn_cast<BranchInst>((*I)->getTerminator()))
+ if (BI->isConditional()) {
+ if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) {
+
+ DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in "
+ << (*I)->getName() << "\n");
+
+ BI->setCondition(ConstantInt::get(Cond->getType(),
+ !L->contains(BI->getSuccessor(0))));
+
+ // This may make the loop analyzable, force SCEV recomputation.
+ if (SE)
+ SE->forgetLoop(L);
+
+ Changed = true;
+ }
+ }
+
+ // Does the loop already have a preheader? If so, don't insert one.
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) {
+ Preheader = InsertPreheaderForLoop(L, this);
+ if (Preheader) {
+ ++NumInserted;
+ Changed = true;
+ }
+ }
+
+ // Next, check to make sure that all exit nodes of the loop only have
+ // predecessors that are inside of the loop. This check guarantees that the
+ // loop preheader/header will dominate the exit blocks. If the exit block has
+ // predecessors from outside of the loop, split the edge now.
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ L->getExitBlocks(ExitBlocks);
+
+ SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(),
+ ExitBlocks.end());
+ for (SmallSetVector<BasicBlock *, 8>::iterator I = ExitBlockSet.begin(),
+ E = ExitBlockSet.end(); I != E; ++I) {
+ BasicBlock *ExitBlock = *I;
+ for (pred_iterator PI = pred_begin(ExitBlock), PE = pred_end(ExitBlock);
+ PI != PE; ++PI)
+ // Must be exactly this loop: no subloops, parent loops, or non-loop preds
+ // allowed.
+ if (!L->contains(*PI)) {
+ if (RewriteLoopExitBlock(L, ExitBlock)) {
+ ++NumInserted;
+ Changed = true;
+ }
+ break;
+ }
+ }
+
+ // If the header has more than two predecessors at this point (from the
+ // preheader and from multiple backedges), we must adjust the loop.
+ BasicBlock *LoopLatch = L->getLoopLatch();
+ if (!LoopLatch) {
+ // If this is really a nested loop, rip it out into a child loop. Don't do
+ // this for loops with a giant number of backedges, just factor them into a
+ // common backedge instead.
+ if (L->getNumBackEdges() < 8) {
+ if (SeparateNestedLoop(L, LPM, Preheader)) {
+ ++NumNested;
+ // This is a big restructuring change, reprocess the whole loop.
+ Changed = true;
+ // GCC doesn't tail recursion eliminate this.
+ goto ReprocessLoop;
+ }
+ }
+
+ // If we either couldn't, or didn't want to, identify nesting of the loops,
+ // insert a new block that all backedges target, then make it jump to the
+ // loop header.
+ LoopLatch = InsertUniqueBackedgeBlock(L, Preheader);
+ if (LoopLatch) {
+ ++NumInserted;
+ Changed = true;
+ }
+ }
+
+ // Scan over the PHI nodes in the loop header. Since they now have only two
+ // incoming values (the loop is canonicalized), we may have simplified the PHI
+ // down to 'X = phi [X, Y]', which should be replaced with 'Y'.
+ PHINode *PN;
+ for (BasicBlock::iterator I = L->getHeader()->begin();
+ (PN = dyn_cast<PHINode>(I++)); )
+ if (Value *V = SimplifyInstruction(PN, 0, 0, DT)) {
+ if (AA) AA->deleteValue(PN);
+ if (SE) SE->forgetValue(PN);
+ PN->replaceAllUsesWith(V);
+ PN->eraseFromParent();
+ }
+
+ // If this loop has multiple exits and the exits all go to the same
+ // block, attempt to merge the exits. This helps several passes, such
+ // as LoopRotation, which do not support loops with multiple exits.
+ // SimplifyCFG also does this (and this code uses the same utility
+ // function), however this code is loop-aware, where SimplifyCFG is
+ // not. That gives it the advantage of being able to hoist
+ // loop-invariant instructions out of the way to open up more
+ // opportunities, and the disadvantage of having the responsibility
+ // to preserve dominator information.
+ bool UniqueExit = true;
+ if (!ExitBlocks.empty())
+ for (unsigned i = 1, e = ExitBlocks.size(); i != e; ++i)
+ if (ExitBlocks[i] != ExitBlocks[0]) {
+ UniqueExit = false;
+ break;
+ }
+ if (UniqueExit) {
+ for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitingBlock = ExitingBlocks[i];
+ if (!ExitingBlock->getSinglePredecessor()) continue;
+ BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+ if (!BI || !BI->isConditional()) continue;
+ CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
+ if (!CI || CI->getParent() != ExitingBlock) continue;
+
+ // Attempt to hoist out all instructions except for the
+ // comparison and the branch.
+ bool AllInvariant = true;
+ for (BasicBlock::iterator I = ExitingBlock->begin(); &*I != BI; ) {
+ Instruction *Inst = I++;
+ // Skip debug info intrinsics.
+ if (isa<DbgInfoIntrinsic>(Inst))
+ continue;
+ if (Inst == CI)
+ continue;
+ if (!L->makeLoopInvariant(Inst, Changed,
+ Preheader ? Preheader->getTerminator() : 0)) {
+ AllInvariant = false;
+ break;
+ }
+ }
+ if (!AllInvariant) continue;
+
+ // The block has now been cleared of all instructions except for
+ // a comparison and a conditional branch. SimplifyCFG may be able
+ // to fold it now.
+ if (!FoldBranchToCommonDest(BI)) continue;
+
+ // Success. The block is now dead, so remove it from the loop,
+ // update the dominator tree and delete it.
+ DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block "
+ << ExitingBlock->getName() << "\n");
+
+ // If any reachable control flow within this loop has changed, notify
+ // ScalarEvolution. Currently assume the parent loop doesn't change
+ // (spliting edges doesn't count). If blocks, CFG edges, or other values
+ // in the parent loop change, then we need call to forgetLoop() for the
+ // parent instead.
+ if (SE)
+ SE->forgetLoop(L);
+
+ assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock));
+ Changed = true;
+ LI->removeBlock(ExitingBlock);
+
+ DomTreeNode *Node = DT->getNode(ExitingBlock);
+ const std::vector<DomTreeNodeBase<BasicBlock> *> &Children =
+ Node->getChildren();
+ while (!Children.empty()) {
+ DomTreeNode *Child = Children.front();
+ DT->changeImmediateDominator(Child, Node->getIDom());
+ }
+ DT->eraseNode(ExitingBlock);
+
+ BI->getSuccessor(0)->removePredecessor(ExitingBlock);
+ BI->getSuccessor(1)->removePredecessor(ExitingBlock);
+ ExitingBlock->eraseFromParent();
+ }
+ }
+
+ return Changed;
+}
+
+/// InsertPreheaderForLoop - Once we discover that a loop doesn't have a
+/// preheader, this method is called to insert one. This method has two phases:
+/// preheader insertion and analysis updating.
+///
+BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) {
+ BasicBlock *Header = L->getHeader();
+
+ // Compute the set of predecessors of the loop that are not in the loop.
+ SmallVector<BasicBlock*, 8> OutsideBlocks;
+ for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
+ PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ if (!L->contains(P)) { // Coming in from outside the loop?
+ // If the loop is branched to from an indirect branch, we won't
+ // be able to fully transform the loop, because it prohibits
+ // edge splitting.
+ if (isa<IndirectBrInst>(P->getTerminator())) return 0;
+
+ // Keep track of it.
+ OutsideBlocks.push_back(P);
+ }
+ }
+
+ // Split out the loop pre-header.
+ BasicBlock *PreheaderBB;
+ if (!Header->isLandingPad()) {
+ PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader",
+ PP);
+ } else {
+ SmallVector<BasicBlock*, 2> NewBBs;
+ SplitLandingPadPredecessors(Header, OutsideBlocks, ".preheader",
+ ".split-lp", PP, NewBBs);
+ PreheaderBB = NewBBs[0];
+ }
+
+ PreheaderBB->getTerminator()->setDebugLoc(
+ Header->getFirstNonPHI()->getDebugLoc());
+ DEBUG(dbgs() << "LoopSimplify: Creating pre-header "
+ << PreheaderBB->getName() << "\n");
+
+ // Make sure that NewBB is put someplace intelligent, which doesn't mess up
+ // code layout too horribly.
+ PlaceSplitBlockCarefully(PreheaderBB, OutsideBlocks, L);
+
+ return PreheaderBB;
+}
+
+/// RewriteLoopExitBlock - Ensure that the loop preheader dominates all exit
+/// blocks. This method is used to split exit blocks that have predecessors
+/// outside of the loop.
+BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) {
+ SmallVector<BasicBlock*, 8> LoopBlocks;
+ for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) {
+ BasicBlock *P = *I;
+ if (L->contains(P)) {
+ // Don't do this if the loop is exited via an indirect branch.
+ if (isa<IndirectBrInst>(P->getTerminator())) return 0;
+
+ LoopBlocks.push_back(P);
+ }
+ }
+
+ assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?");
+ BasicBlock *NewExitBB = 0;
+
+ if (Exit->isLandingPad()) {
+ SmallVector<BasicBlock*, 2> NewBBs;
+ SplitLandingPadPredecessors(Exit, ArrayRef<BasicBlock*>(&LoopBlocks[0],
+ LoopBlocks.size()),
+ ".loopexit", ".nonloopexit",
+ this, NewBBs);
+ NewExitBB = NewBBs[0];
+ } else {
+ NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", this);
+ }
+
+ DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
+ << NewExitBB->getName() << "\n");
+ return NewExitBB;
+}
+
+/// AddBlockAndPredsToSet - Add the specified block, and all of its
+/// predecessors, to the specified set, if it's not already in there. Stop
+/// predecessor traversal when we reach StopBlock.
+static void AddBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock,
+ std::set<BasicBlock*> &Blocks) {
+ std::vector<BasicBlock *> WorkList;
+ WorkList.push_back(InputBB);
+ do {
+ BasicBlock *BB = WorkList.back(); WorkList.pop_back();
+ if (Blocks.insert(BB).second && BB != StopBlock)
+ // If BB is not already processed and it is not a stop block then
+ // insert its predecessor in the work list
+ for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
+ BasicBlock *WBB = *I;
+ WorkList.push_back(WBB);
+ }
+ } while(!WorkList.empty());
+}
+
+/// FindPHIToPartitionLoops - The first part of loop-nestification is to find a
+/// PHI node that tells us how to partition the loops.
+static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT,
+ AliasAnalysis *AA, LoopInfo *LI) {
+ for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) {
+ PHINode *PN = cast<PHINode>(I);
+ ++I;
+ if (Value *V = SimplifyInstruction(PN, 0, 0, DT)) {
+ // This is a degenerate PHI already, don't modify it!
+ PN->replaceAllUsesWith(V);
+ if (AA) AA->deleteValue(PN);
+ PN->eraseFromParent();
+ continue;
+ }
+
+ // Scan this PHI node looking for a use of the PHI node by itself.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == PN &&
+ L->contains(PN->getIncomingBlock(i)))
+ // We found something tasty to remove.
+ return PN;
+ }
+ return 0;
+}
+
+// PlaceSplitBlockCarefully - If the block isn't already, move the new block to
+// right after some 'outside block' block. This prevents the preheader from
+// being placed inside the loop body, e.g. when the loop hasn't been rotated.
+void PlaceSplitBlockCarefully(BasicBlock *NewBB,
+ SmallVectorImpl<BasicBlock*> &SplitPreds,
+ Loop *L) {
+ // Check to see if NewBB is already well placed.
+ Function::iterator BBI = NewBB; --BBI;
+ for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
+ if (&*BBI == SplitPreds[i])
+ return;
+ }
+
+ // If it isn't already after an outside block, move it after one. This is
+ // always good as it makes the uncond branch from the outside block into a
+ // fall-through.
+
+ // Figure out *which* outside block to put this after. Prefer an outside
+ // block that neighbors a BB actually in the loop.
+ BasicBlock *FoundBB = 0;
+ for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
+ Function::iterator BBI = SplitPreds[i];
+ if (++BBI != NewBB->getParent()->end() &&
+ L->contains(BBI)) {
+ FoundBB = SplitPreds[i];
+ break;
+ }
+ }
+
+ // If our heuristic for a *good* bb to place this after doesn't find
+ // anything, just pick something. It's likely better than leaving it within
+ // the loop.
+ if (!FoundBB)
+ FoundBB = SplitPreds[0];
+ NewBB->moveAfter(FoundBB);
+}
+
+
+/// SeparateNestedLoop - If this loop has multiple backedges, try to pull one of
+/// them out into a nested loop. This is important for code that looks like
+/// this:
+///
+/// Loop:
+/// ...
+/// br cond, Loop, Next
+/// ...
+/// br cond2, Loop, Out
+///
+/// To identify this common case, we look at the PHI nodes in the header of the
+/// loop. PHI nodes with unchanging values on one backedge correspond to values
+/// that change in the "outer" loop, but not in the "inner" loop.
+///
+/// If we are able to separate out a loop, return the new outer loop that was
+/// created.
+///
+Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM,
+ BasicBlock *Preheader) {
+ // Don't try to separate loops without a preheader.
+ if (!Preheader)
+ return 0;
+
+ // The header is not a landing pad; preheader insertion should ensure this.
+ assert(!L->getHeader()->isLandingPad() &&
+ "Can't insert backedge to landing pad");
+
+ PHINode *PN = FindPHIToPartitionLoops(L, DT, AA, LI);
+ if (PN == 0) return 0; // No known way to partition.
+
+ // Pull out all predecessors that have varying values in the loop. This
+ // handles the case when a PHI node has multiple instances of itself as
+ // arguments.
+ SmallVector<BasicBlock*, 8> OuterLoopPreds;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ if (PN->getIncomingValue(i) != PN ||
+ !L->contains(PN->getIncomingBlock(i))) {
+ // We can't split indirectbr edges.
+ if (isa<IndirectBrInst>(PN->getIncomingBlock(i)->getTerminator()))
+ return 0;
+ OuterLoopPreds.push_back(PN->getIncomingBlock(i));
+ }
+ }
+ DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n");
+
+ // If ScalarEvolution is around and knows anything about values in
+ // this loop, tell it to forget them, because we're about to
+ // substantially change it.
+ if (SE)
+ SE->forgetLoop(L);
+
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *NewBB =
+ SplitBlockPredecessors(Header, OuterLoopPreds, ".outer", this);
+
+ // Make sure that NewBB is put someplace intelligent, which doesn't mess up
+ // code layout too horribly.
+ PlaceSplitBlockCarefully(NewBB, OuterLoopPreds, L);
+
+ // Create the new outer loop.
+ Loop *NewOuter = new Loop();
+
+ // Change the parent loop to use the outer loop as its child now.
+ if (Loop *Parent = L->getParentLoop())
+ Parent->replaceChildLoopWith(L, NewOuter);
+ else
+ LI->changeTopLevelLoop(L, NewOuter);
+
+ // L is now a subloop of our outer loop.
+ NewOuter->addChildLoop(L);
+
+ // Add the new loop to the pass manager queue.
+ LPM.insertLoopIntoQueue(NewOuter);
+
+ for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+ I != E; ++I)
+ NewOuter->addBlockEntry(*I);
+
+ // Now reset the header in L, which had been moved by
+ // SplitBlockPredecessors for the outer loop.
+ L->moveToHeader(Header);
+
+ // Determine which blocks should stay in L and which should be moved out to
+ // the Outer loop now.
+ std::set<BasicBlock*> BlocksInL;
+ for (pred_iterator PI=pred_begin(Header), E = pred_end(Header); PI!=E; ++PI) {
+ BasicBlock *P = *PI;
+ if (DT->dominates(Header, P))
+ AddBlockAndPredsToSet(P, Header, BlocksInL);
+ }
+
+ // Scan all of the loop children of L, moving them to OuterLoop if they are
+ // not part of the inner loop.
+ const std::vector<Loop*> &SubLoops = L->getSubLoops();
+ for (size_t I = 0; I != SubLoops.size(); )
+ if (BlocksInL.count(SubLoops[I]->getHeader()))
+ ++I; // Loop remains in L
+ else
+ NewOuter->addChildLoop(L->removeChildLoop(SubLoops.begin() + I));
+
+ // Now that we know which blocks are in L and which need to be moved to
+ // OuterLoop, move any blocks that need it.
+ for (unsigned i = 0; i != L->getBlocks().size(); ++i) {
+ BasicBlock *BB = L->getBlocks()[i];
+ if (!BlocksInL.count(BB)) {
+ // Move this block to the parent, updating the exit blocks sets
+ L->removeBlockFromLoop(BB);
+ if ((*LI)[BB] == L)
+ LI->changeLoopFor(BB, NewOuter);
+ --i;
+ }
+ }
+
+ return NewOuter;
+}
+
+
+
+/// InsertUniqueBackedgeBlock - This method is called when the specified loop
+/// has more than one backedge in it. If this occurs, revector all of these
+/// backedges to target a new basic block and have that block branch to the loop
+/// header. This ensures that loops have exactly one backedge.
+///
+BasicBlock *
+LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
+ assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!");
+
+ // Get information about the loop
+ BasicBlock *Header = L->getHeader();
+ Function *F = Header->getParent();
+
+ // Unique backedge insertion currently depends on having a preheader.
+ if (!Preheader)
+ return 0;
+
+ // The header is not a landing pad; preheader insertion should ensure this.
+ assert(!Header->isLandingPad() && "Can't insert backedge to landing pad");
+
+ // Figure out which basic blocks contain back-edges to the loop header.
+ std::vector<BasicBlock*> BackedgeBlocks;
+ for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){
+ BasicBlock *P = *I;
+
+ // Indirectbr edges cannot be split, so we must fail if we find one.
+ if (isa<IndirectBrInst>(P->getTerminator()))
+ return 0;
+
+ if (P != Preheader) BackedgeBlocks.push_back(P);
+ }
+
+ // Create and insert the new backedge block...
+ BasicBlock *BEBlock = BasicBlock::Create(Header->getContext(),
+ Header->getName()+".backedge", F);
+ BranchInst *BETerminator = BranchInst::Create(Header, BEBlock);
+
+ DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block "
+ << BEBlock->getName() << "\n");
+
+ // Move the new backedge block to right after the last backedge block.
+ Function::iterator InsertPos = BackedgeBlocks.back(); ++InsertPos;
+ F->getBasicBlockList().splice(InsertPos, F->getBasicBlockList(), BEBlock);
+
+ // Now that the block has been inserted into the function, create PHI nodes in
+ // the backedge block which correspond to any PHI nodes in the header block.
+ for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ PHINode *NewPN = PHINode::Create(PN->getType(), BackedgeBlocks.size(),
+ PN->getName()+".be", BETerminator);
+ if (AA) AA->copyValue(PN, NewPN);
+
+ // Loop over the PHI node, moving all entries except the one for the
+ // preheader over to the new PHI node.
+ unsigned PreheaderIdx = ~0U;
+ bool HasUniqueIncomingValue = true;
+ Value *UniqueValue = 0;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *IBB = PN->getIncomingBlock(i);
+ Value *IV = PN->getIncomingValue(i);
+ if (IBB == Preheader) {
+ PreheaderIdx = i;
+ } else {
+ NewPN->addIncoming(IV, IBB);
+ if (HasUniqueIncomingValue) {
+ if (UniqueValue == 0)
+ UniqueValue = IV;
+ else if (UniqueValue != IV)
+ HasUniqueIncomingValue = false;
+ }
+ }
+ }
+
+ // Delete all of the incoming values from the old PN except the preheader's
+ assert(PreheaderIdx != ~0U && "PHI has no preheader entry??");
+ if (PreheaderIdx != 0) {
+ PN->setIncomingValue(0, PN->getIncomingValue(PreheaderIdx));
+ PN->setIncomingBlock(0, PN->getIncomingBlock(PreheaderIdx));
+ }
+ // Nuke all entries except the zero'th.
+ for (unsigned i = 0, e = PN->getNumIncomingValues()-1; i != e; ++i)
+ PN->removeIncomingValue(e-i, false);
+
+ // Finally, add the newly constructed PHI node as the entry for the BEBlock.
+ PN->addIncoming(NewPN, BEBlock);
+
+ // As an optimization, if all incoming values in the new PhiNode (which is a
+ // subset of the incoming values of the old PHI node) have the same value,
+ // eliminate the PHI Node.
+ if (HasUniqueIncomingValue) {
+ NewPN->replaceAllUsesWith(UniqueValue);
+ if (AA) AA->deleteValue(NewPN);
+ BEBlock->getInstList().erase(NewPN);
+ }
+ }
+
+ // Now that all of the PHI nodes have been inserted and adjusted, modify the
+ // backedge blocks to just to the BEBlock instead of the header.
+ for (unsigned i = 0, e = BackedgeBlocks.size(); i != e; ++i) {
+ TerminatorInst *TI = BackedgeBlocks[i]->getTerminator();
+ for (unsigned Op = 0, e = TI->getNumSuccessors(); Op != e; ++Op)
+ if (TI->getSuccessor(Op) == Header)
+ TI->setSuccessor(Op, BEBlock);
+ }
+
+ //===--- Update all analyses which we must preserve now -----------------===//
+
+ // Update Loop Information - we know that this block is now in the current
+ // loop and all parent loops.
+ L->addBasicBlockToLoop(BEBlock, LI->getBase());
+
+ // Update dominator information
+ DT->splitBlock(BEBlock);
+
+ return BEBlock;
+}
+
+void LoopSimplify::verifyAnalysis() const {
+ // It used to be possible to just assert L->isLoopSimplifyForm(), however
+ // with the introduction of indirectbr, there are now cases where it's
+ // not possible to transform a loop as necessary. We can at least check
+ // that there is an indirectbr near any time there's trouble.
+
+ // Indirectbr can interfere with preheader and unique backedge insertion.
+ if (!L->getLoopPreheader() || !L->getLoopLatch()) {
+ bool HasIndBrPred = false;
+ for (pred_iterator PI = pred_begin(L->getHeader()),
+ PE = pred_end(L->getHeader()); PI != PE; ++PI)
+ if (isa<IndirectBrInst>((*PI)->getTerminator())) {
+ HasIndBrPred = true;
+ break;
+ }
+ assert(HasIndBrPred &&
+ "LoopSimplify has no excuse for missing loop header info!");
+ (void)HasIndBrPred;
+ }
+
+ // Indirectbr can interfere with exit block canonicalization.
+ if (!L->hasDedicatedExits()) {
+ bool HasIndBrExiting = false;
+ SmallVector<BasicBlock*, 8> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+ for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+ if (isa<IndirectBrInst>((ExitingBlocks[i])->getTerminator())) {
+ HasIndBrExiting = true;
+ break;
+ }
+ }
+
+ assert(HasIndBrExiting &&
+ "LoopSimplify has no excuse for missing exit block info!");
+ (void)HasIndBrExiting;
+ }
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
new file mode 100644
index 000000000000..162807d03c62
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -0,0 +1,457 @@
+//===-- UnrollLoop.cpp - Loop unrolling utilities -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements some loop unrolling utilities. It does not define any
+// actual pass or policy, but provides a single function to perform loop
+// unrolling.
+//
+// The process of unrolling can produce extraneous basic blocks linked with
+// unconditional branches. This will be corrected in the future.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-unroll"
+#include "llvm/Transforms/Utils/UnrollLoop.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SimplifyIndVar.h"
+using namespace llvm;
+
+// TODO: Should these be here or in LoopUnroll?
+STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled");
+STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)");
+
+/// RemapInstruction - Convert the instruction operands from referencing the
+/// current values into those specified by VMap.
+static inline void RemapInstruction(Instruction *I,
+ ValueToValueMapTy &VMap) {
+ for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) {
+ Value *Op = I->getOperand(op);
+ ValueToValueMapTy::iterator It = VMap.find(Op);
+ if (It != VMap.end())
+ I->setOperand(op, It->second);
+ }
+
+ if (PHINode *PN = dyn_cast<PHINode>(I)) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ ValueToValueMapTy::iterator It = VMap.find(PN->getIncomingBlock(i));
+ if (It != VMap.end())
+ PN->setIncomingBlock(i, cast<BasicBlock>(It->second));
+ }
+ }
+}
+
+/// FoldBlockIntoPredecessor - Folds a basic block into its predecessor if it
+/// only has one predecessor, and that predecessor only has one successor.
+/// The LoopInfo Analysis that is passed will be kept consistent.
+/// Returns the new combined block.
+static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI,
+ LPPassManager *LPM) {
+ // Merge basic blocks into their predecessor if there is only one distinct
+ // pred, and if there is only one distinct successor of the predecessor, and
+ // if there are no PHI nodes.
+ BasicBlock *OnlyPred = BB->getSinglePredecessor();
+ if (!OnlyPred) return 0;
+
+ if (OnlyPred->getTerminator()->getNumSuccessors() != 1)
+ return 0;
+
+ DEBUG(dbgs() << "Merging: " << *BB << "into: " << *OnlyPred);
+
+ // Resolve any PHI nodes at the start of the block. They are all
+ // guaranteed to have exactly one entry if they exist, unless there are
+ // multiple duplicate (but guaranteed to be equal) entries for the
+ // incoming edges. This occurs when there are multiple edges from
+ // OnlyPred to OnlySucc.
+ FoldSingleEntryPHINodes(BB);
+
+ // Delete the unconditional branch from the predecessor...
+ OnlyPred->getInstList().pop_back();
+
+ // Make all PHI nodes that referred to BB now refer to Pred as their
+ // source...
+ BB->replaceAllUsesWith(OnlyPred);
+
+ // Move all definitions in the successor to the predecessor...
+ OnlyPred->getInstList().splice(OnlyPred->end(), BB->getInstList());
+
+ // OldName will be valid until erased.
+ StringRef OldName = BB->getName();
+
+ // Erase basic block from the function...
+
+ // ScalarEvolution holds references to loop exit blocks.
+ if (LPM) {
+ if (ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>()) {
+ if (Loop *L = LI->getLoopFor(BB))
+ SE->forgetLoop(L);
+ }
+ }
+ LI->removeBlock(BB);
+
+ // Inherit predecessor's name if it exists...
+ if (!OldName.empty() && !OnlyPred->hasName())
+ OnlyPred->setName(OldName);
+
+ BB->eraseFromParent();
+
+ return OnlyPred;
+}
+
+/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true
+/// if unrolling was successful, or false if the loop was unmodified. Unrolling
+/// can only fail when the loop's latch block is not terminated by a conditional
+/// branch instruction. However, if the trip count (and multiple) are not known,
+/// loop unrolling will mostly produce more code that is no faster.
+///
+/// TripCount is generally defined as the number of times the loop header
+/// executes. UnrollLoop relaxes the definition to permit early exits: here
+/// TripCount is the iteration on which control exits LatchBlock if no early
+/// exits were taken. Note that UnrollLoop assumes that the loop counter test
+/// terminates LatchBlock in order to remove unnecesssary instances of the
+/// test. In other words, control may exit the loop prior to TripCount
+/// iterations via an early branch, but control may not exit the loop from the
+/// LatchBlock's terminator prior to TripCount iterations.
+///
+/// Similarly, TripMultiple divides the number of times that the LatchBlock may
+/// execute without exiting the loop.
+///
+/// The LoopInfo Analysis that is passed will be kept consistent.
+///
+/// If a LoopPassManager is passed in, and the loop is fully removed, it will be
+/// removed from the LoopPassManager as well. LPM can also be NULL.
+///
+/// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are
+/// available it must also preserve those analyses.
+bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
+ bool AllowRuntime, unsigned TripMultiple,
+ LoopInfo *LI, LPPassManager *LPM) {
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) {
+ DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n");
+ return false;
+ }
+
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ if (!LatchBlock) {
+ DEBUG(dbgs() << " Can't unroll; loop exit-block-insertion failed.\n");
+ return false;
+ }
+
+ // Loops with indirectbr cannot be cloned.
+ if (!L->isSafeToClone()) {
+ DEBUG(dbgs() << " Can't unroll; Loop body cannot be cloned.\n");
+ return false;
+ }
+
+ BasicBlock *Header = L->getHeader();
+ BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
+
+ if (!BI || BI->isUnconditional()) {
+ // The loop-rotate pass can be helpful to avoid this in many cases.
+ DEBUG(dbgs() <<
+ " Can't unroll; loop not terminated by a conditional branch.\n");
+ return false;
+ }
+
+ if (Header->hasAddressTaken()) {
+ // The loop-rotate pass can be helpful to avoid this in many cases.
+ DEBUG(dbgs() <<
+ " Won't unroll loop: address of header block is taken.\n");
+ return false;
+ }
+
+ if (TripCount != 0)
+ DEBUG(dbgs() << " Trip Count = " << TripCount << "\n");
+ if (TripMultiple != 1)
+ DEBUG(dbgs() << " Trip Multiple = " << TripMultiple << "\n");
+
+ // Effectively "DCE" unrolled iterations that are beyond the tripcount
+ // and will never be executed.
+ if (TripCount != 0 && Count > TripCount)
+ Count = TripCount;
+
+ // Don't enter the unroll code if there is nothing to do. This way we don't
+ // need to support "partial unrolling by 1".
+ if (TripCount == 0 && Count < 2)
+ return false;
+
+ assert(Count > 0);
+ assert(TripMultiple > 0);
+ assert(TripCount == 0 || TripCount % TripMultiple == 0);
+
+ // Are we eliminating the loop control altogether?
+ bool CompletelyUnroll = Count == TripCount;
+
+ // We assume a run-time trip count if the compiler cannot
+ // figure out the loop trip count and the unroll-runtime
+ // flag is specified.
+ bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime);
+
+ if (RuntimeTripCount && !UnrollRuntimeLoopProlog(L, Count, LI, LPM))
+ return false;
+
+ // Notify ScalarEvolution that the loop will be substantially changed,
+ // if not outright eliminated.
+ if (LPM) {
+ ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
+ if (SE)
+ SE->forgetLoop(L);
+ }
+
+ // If we know the trip count, we know the multiple...
+ unsigned BreakoutTrip = 0;
+ if (TripCount != 0) {
+ BreakoutTrip = TripCount % Count;
+ TripMultiple = 0;
+ } else {
+ // Figure out what multiple to use.
+ BreakoutTrip = TripMultiple =
+ (unsigned)GreatestCommonDivisor64(Count, TripMultiple);
+ }
+
+ if (CompletelyUnroll) {
+ DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName()
+ << " with trip count " << TripCount << "!\n");
+ } else {
+ DEBUG(dbgs() << "UNROLLING loop %" << Header->getName()
+ << " by " << Count);
+ if (TripMultiple == 0 || BreakoutTrip != TripMultiple) {
+ DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip);
+ } else if (TripMultiple != 1) {
+ DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
+ } else if (RuntimeTripCount) {
+ DEBUG(dbgs() << " with run-time trip count");
+ }
+ DEBUG(dbgs() << "!\n");
+ }
+
+ bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
+ BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);
+
+ // For the first iteration of the loop, we should use the precloned values for
+ // PHI nodes. Insert associations now.
+ ValueToValueMapTy LastValueMap;
+ std::vector<PHINode*> OrigPHINode;
+ for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
+ OrigPHINode.push_back(cast<PHINode>(I));
+ }
+
+ std::vector<BasicBlock*> Headers;
+ std::vector<BasicBlock*> Latches;
+ Headers.push_back(Header);
+ Latches.push_back(LatchBlock);
+
+ // The current on-the-fly SSA update requires blocks to be processed in
+ // reverse postorder so that LastValueMap contains the correct value at each
+ // exit.
+ LoopBlocksDFS DFS(L);
+ DFS.perform(LI);
+
+ // Stash the DFS iterators before adding blocks to the loop.
+ LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO();
+ LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO();
+
+ for (unsigned It = 1; It != Count; ++It) {
+ std::vector<BasicBlock*> NewBlocks;
+
+ for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
+ ValueToValueMapTy VMap;
+ BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
+ Header->getParent()->getBasicBlockList().push_back(New);
+
+ // Loop over all of the PHI nodes in the block, changing them to use the
+ // incoming values from the previous block.
+ if (*BB == Header)
+ for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
+ PHINode *NewPHI = cast<PHINode>(VMap[OrigPHINode[i]]);
+ Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock);
+ if (Instruction *InValI = dyn_cast<Instruction>(InVal))
+ if (It > 1 && L->contains(InValI))
+ InVal = LastValueMap[InValI];
+ VMap[OrigPHINode[i]] = InVal;
+ New->getInstList().erase(NewPHI);
+ }
+
+ // Update our running map of newest clones
+ LastValueMap[*BB] = New;
+ for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
+ VI != VE; ++VI)
+ LastValueMap[VI->first] = VI->second;
+
+ L->addBasicBlockToLoop(New, LI->getBase());
+
+ // Add phi entries for newly created values to all exit blocks.
+ for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB);
+ SI != SE; ++SI) {
+ if (L->contains(*SI))
+ continue;
+ for (BasicBlock::iterator BBI = (*SI)->begin();
+ PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) {
+ Value *Incoming = phi->getIncomingValueForBlock(*BB);
+ ValueToValueMapTy::iterator It = LastValueMap.find(Incoming);
+ if (It != LastValueMap.end())
+ Incoming = It->second;
+ phi->addIncoming(Incoming, New);
+ }
+ }
+ // Keep track of new headers and latches as we create them, so that
+ // we can insert the proper branches later.
+ if (*BB == Header)
+ Headers.push_back(New);
+ if (*BB == LatchBlock)
+ Latches.push_back(New);
+
+ NewBlocks.push_back(New);
+ }
+
+ // Remap all instructions in the most recent iteration
+ for (unsigned i = 0; i < NewBlocks.size(); ++i)
+ for (BasicBlock::iterator I = NewBlocks[i]->begin(),
+ E = NewBlocks[i]->end(); I != E; ++I)
+ ::RemapInstruction(I, LastValueMap);
+ }
+
+ // Loop over the PHI nodes in the original block, setting incoming values.
+ for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
+ PHINode *PN = OrigPHINode[i];
+ if (CompletelyUnroll) {
+ PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader));
+ Header->getInstList().erase(PN);
+ }
+ else if (Count > 1) {
+ Value *InVal = PN->removeIncomingValue(LatchBlock, false);
+ // If this value was defined in the loop, take the value defined by the
+ // last iteration of the loop.
+ if (Instruction *InValI = dyn_cast<Instruction>(InVal)) {
+ if (L->contains(InValI))
+ InVal = LastValueMap[InVal];
+ }
+ assert(Latches.back() == LastValueMap[LatchBlock] && "bad last latch");
+ PN->addIncoming(InVal, Latches.back());
+ }
+ }
+
+ // Now that all the basic blocks for the unrolled iterations are in place,
+ // set up the branches to connect them.
+ for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
+ // The original branch was replicated in each unrolled iteration.
+ BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
+
+ // The branch destination.
+ unsigned j = (i + 1) % e;
+ BasicBlock *Dest = Headers[j];
+ bool NeedConditional = true;
+
+ if (RuntimeTripCount && j != 0) {
+ NeedConditional = false;
+ }
+
+ // For a complete unroll, make the last iteration end with a branch
+ // to the exit block.
+ if (CompletelyUnroll && j == 0) {
+ Dest = LoopExit;
+ NeedConditional = false;
+ }
+
+ // If we know the trip count or a multiple of it, we can safely use an
+ // unconditional branch for some iterations.
+ if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) {
+ NeedConditional = false;
+ }
+
+ if (NeedConditional) {
+ // Update the conditional branch's successor for the following
+ // iteration.
+ Term->setSuccessor(!ContinueOnTrue, Dest);
+ } else {
+ // Remove phi operands at this loop exit
+ if (Dest != LoopExit) {
+ BasicBlock *BB = Latches[i];
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB);
+ SI != SE; ++SI) {
+ if (*SI == Headers[i])
+ continue;
+ for (BasicBlock::iterator BBI = (*SI)->begin();
+ PHINode *Phi = dyn_cast<PHINode>(BBI); ++BBI) {
+ Phi->removeIncomingValue(BB, false);
+ }
+ }
+ }
+ // Replace the conditional branch with an unconditional one.
+ BranchInst::Create(Dest, Term);
+ Term->eraseFromParent();
+ }
+ }
+
+ // Merge adjacent basic blocks, if possible.
+ for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
+ BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
+ if (Term->isUnconditional()) {
+ BasicBlock *Dest = Term->getSuccessor(0);
+ if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, LPM))
+ std::replace(Latches.begin(), Latches.end(), Dest, Fold);
+ }
+ }
+
+ if (LPM) {
+ // FIXME: Reconstruct dom info, because it is not preserved properly.
+ // Incrementally updating domtree after loop unrolling would be easy.
+ if (DominatorTree *DT = LPM->getAnalysisIfAvailable<DominatorTree>())
+ DT->runOnFunction(*L->getHeader()->getParent());
+
+ // Simplify any new induction variables in the partially unrolled loop.
+ ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
+ if (SE && !CompletelyUnroll) {
+ SmallVector<WeakVH, 16> DeadInsts;
+ simplifyLoopIVs(L, SE, LPM, DeadInsts);
+
+ // Aggressively clean up dead instructions that simplifyLoopIVs already
+ // identified. Any remaining should be cleaned up below.
+ while (!DeadInsts.empty())
+ if (Instruction *Inst =
+ dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
+ RecursivelyDeleteTriviallyDeadInstructions(Inst);
+ }
+ }
+ // At this point, the code is well formed. We now do a quick sweep over the
+ // inserted code, doing constant propagation and dead code elimination as we
+ // go.
+ const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks();
+ for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(),
+ BBE = NewLoopBlocks.end(); BB != BBE; ++BB)
+ for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ) {
+ Instruction *Inst = I++;
+
+ if (isInstructionTriviallyDead(Inst))
+ (*BB)->getInstList().erase(Inst);
+ else if (Value *V = SimplifyInstruction(Inst))
+ if (LI->replacementPreservesLCSSAForm(Inst, V)) {
+ Inst->replaceAllUsesWith(V);
+ (*BB)->getInstList().erase(Inst);
+ }
+ }
+
+ NumCompletelyUnrolled += CompletelyUnroll;
+ ++NumUnrolled;
+ // Remove the loop from the LoopPassManager if it's completely removed.
+ if (CompletelyUnroll && LPM != NULL)
+ LPM->deleteLoopFromQueue(L);
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
new file mode 100644
index 000000000000..d801d5f2c2a4
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -0,0 +1,374 @@
+//===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements some loop unrolling utilities for loops with run-time
+// trip counts. See LoopUnroll.cpp for unrolling loops with compile-time
+// trip counts.
+//
+// The functions in this file are used to generate extra code when the
+// run-time trip count modulo the unroll factor is not 0. When this is the
+// case, we need to generate code to execute these 'left over' iterations.
+//
+// The current strategy generates an if-then-else sequence prior to the
+// unrolled loop to execute the 'left over' iterations. Other strategies
+// include generate a loop before or after the unrolled loop.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-unroll"
+#include "llvm/Transforms/Utils/UnrollLoop.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include <algorithm>
+
+using namespace llvm;
+
+STATISTIC(NumRuntimeUnrolled,
+ "Number of loops unrolled with run-time trip counts");
+
+/// Connect the unrolling prolog code to the original loop.
+/// The unrolling prolog code contains code to execute the
+/// 'extra' iterations if the run-time trip count modulo the
+/// unroll count is non-zero.
+///
+/// This function performs the following:
+/// - Create PHI nodes at prolog end block to combine values
+/// that exit the prolog code and jump around the prolog.
+/// - Add a PHI operand to a PHI node at the loop exit block
+/// for values that exit the prolog and go around the loop.
+/// - Branch around the original loop if the trip count is less
+/// than the unroll factor.
+///
+static void ConnectProlog(Loop *L, Value *TripCount, unsigned Count,
+ BasicBlock *LastPrologBB, BasicBlock *PrologEnd,
+ BasicBlock *OrigPH, BasicBlock *NewPH,
+ ValueToValueMapTy &LVMap, Pass *P) {
+ BasicBlock *Latch = L->getLoopLatch();
+ assert(Latch != 0 && "Loop must have a latch");
+
+ // Create a PHI node for each outgoing value from the original loop
+ // (which means it is an outgoing value from the prolog code too).
+ // The new PHI node is inserted in the prolog end basic block.
+ // The new PHI name is added as an operand of a PHI node in either
+ // the loop header or the loop exit block.
+ for (succ_iterator SBI = succ_begin(Latch), SBE = succ_end(Latch);
+ SBI != SBE; ++SBI) {
+ for (BasicBlock::iterator BBI = (*SBI)->begin();
+ PHINode *PN = dyn_cast<PHINode>(BBI); ++BBI) {
+
+ // Add a new PHI node to the prolog end block and add the
+ // appropriate incoming values.
+ PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName()+".unr",
+ PrologEnd->getTerminator());
+ // Adding a value to the new PHI node from the original loop preheader.
+ // This is the value that skips all the prolog code.
+ if (L->contains(PN)) {
+ NewPN->addIncoming(PN->getIncomingValueForBlock(NewPH), OrigPH);
+ } else {
+ NewPN->addIncoming(Constant::getNullValue(PN->getType()), OrigPH);
+ }
+
+ Value *V = PN->getIncomingValueForBlock(Latch);
+ if (Instruction *I = dyn_cast<Instruction>(V)) {
+ if (L->contains(I)) {
+ V = LVMap[I];
+ }
+ }
+ // Adding a value to the new PHI node from the last prolog block
+ // that was created.
+ NewPN->addIncoming(V, LastPrologBB);
+
+ // Update the existing PHI node operand with the value from the
+ // new PHI node. How this is done depends on if the existing
+ // PHI node is in the original loop block, or the exit block.
+ if (L->contains(PN)) {
+ PN->setIncomingValue(PN->getBasicBlockIndex(NewPH), NewPN);
+ } else {
+ PN->addIncoming(NewPN, PrologEnd);
+ }
+ }
+ }
+
+ // Create a branch around the orignal loop, which is taken if the
+ // trip count is less than the unroll factor.
+ Instruction *InsertPt = PrologEnd->getTerminator();
+ Instruction *BrLoopExit =
+ new ICmpInst(InsertPt, ICmpInst::ICMP_ULT, TripCount,
+ ConstantInt::get(TripCount->getType(), Count));
+ BasicBlock *Exit = L->getUniqueExitBlock();
+ assert(Exit != 0 && "Loop must have a single exit block only");
+ // Split the exit to maintain loop canonicalization guarantees
+ SmallVector<BasicBlock*, 4> Preds(pred_begin(Exit), pred_end(Exit));
+ if (!Exit->isLandingPad()) {
+ SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", P);
+ } else {
+ SmallVector<BasicBlock*, 2> NewBBs;
+ SplitLandingPadPredecessors(Exit, Preds, ".unr1-lcssa", ".unr2-lcssa",
+ P, NewBBs);
+ }
+ // Add the branch to the exit block (around the unrolled loop)
+ BranchInst::Create(Exit, NewPH, BrLoopExit, InsertPt);
+ InsertPt->eraseFromParent();
+}
+
+/// Create a clone of the blocks in a loop and connect them together.
+/// This function doesn't create a clone of the loop structure.
+///
+/// There are two value maps that are defined and used. VMap is
+/// for the values in the current loop instance. LVMap contains
+/// the values from the last loop instance. We need the LVMap values
+/// to update the initial values for the current loop instance.
+///
+static void CloneLoopBlocks(Loop *L,
+ bool FirstCopy,
+ BasicBlock *InsertTop,
+ BasicBlock *InsertBot,
+ std::vector<BasicBlock *> &NewBlocks,
+ LoopBlocksDFS &LoopBlocks,
+ ValueToValueMapTy &VMap,
+ ValueToValueMapTy &LVMap,
+ LoopInfo *LI) {
+
+ BasicBlock *Preheader = L->getLoopPreheader();
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *Latch = L->getLoopLatch();
+ Function *F = Header->getParent();
+ LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
+ LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
+ // For each block in the original loop, create a new copy,
+ // and update the value map with the newly created values.
+ for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
+ BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".unr", F);
+ NewBlocks.push_back(NewBB);
+
+ if (Loop *ParentLoop = L->getParentLoop())
+ ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+
+ VMap[*BB] = NewBB;
+ if (Header == *BB) {
+ // For the first block, add a CFG connection to this newly
+ // created block
+ InsertTop->getTerminator()->setSuccessor(0, NewBB);
+
+ // Change the incoming values to the ones defined in the
+ // previously cloned loop.
+ for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
+ PHINode *NewPHI = cast<PHINode>(VMap[I]);
+ if (FirstCopy) {
+ // We replace the first phi node with the value from the preheader
+ VMap[I] = NewPHI->getIncomingValueForBlock(Preheader);
+ NewBB->getInstList().erase(NewPHI);
+ } else {
+ // Update VMap with values from the previous block
+ unsigned idx = NewPHI->getBasicBlockIndex(Latch);
+ Value *InVal = NewPHI->getIncomingValue(idx);
+ if (Instruction *I = dyn_cast<Instruction>(InVal))
+ if (L->contains(I))
+ InVal = LVMap[InVal];
+ NewPHI->setIncomingValue(idx, InVal);
+ NewPHI->setIncomingBlock(idx, InsertTop);
+ }
+ }
+ }
+
+ if (Latch == *BB) {
+ VMap.erase((*BB)->getTerminator());
+ NewBB->getTerminator()->eraseFromParent();
+ BranchInst::Create(InsertBot, NewBB);
+ }
+ }
+ // LastValueMap is updated with the values for the current loop
+ // which are used the next time this function is called.
+ for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
+ VI != VE; ++VI) {
+ LVMap[VI->first] = VI->second;
+ }
+}
+
+/// Insert code in the prolog code when unrolling a loop with a
+/// run-time trip-count.
+///
+/// This method assumes that the loop unroll factor is total number
+/// of loop bodes in the loop after unrolling. (Some folks refer
+/// to the unroll factor as the number of *extra* copies added).
+/// We assume also that the loop unroll factor is a power-of-two. So, after
+/// unrolling the loop, the number of loop bodies executed is 2,
+/// 4, 8, etc. Note - LLVM converts the if-then-sequence to a switch
+/// instruction in SimplifyCFG.cpp. Then, the backend decides how code for
+/// the switch instruction is generated.
+///
+/// extraiters = tripcount % loopfactor
+/// if (extraiters == 0) jump Loop:
+/// if (extraiters == loopfactor) jump L1
+/// if (extraiters == loopfactor-1) jump L2
+/// ...
+/// L1: LoopBody;
+/// L2: LoopBody;
+/// ...
+/// if tripcount < loopfactor jump End
+/// Loop:
+/// ...
+/// End:
+///
+bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
+ LPPassManager *LPM) {
+ // for now, only unroll loops that contain a single exit
+ if (!L->getExitingBlock())
+ return false;
+
+ // Make sure the loop is in canonical form, and there is a single
+ // exit block only.
+ if (!L->isLoopSimplifyForm() || L->getUniqueExitBlock() == 0)
+ return false;
+
+ // Use Scalar Evolution to compute the trip count. This allows more
+ // loops to be unrolled than relying on induction var simplification
+ if (!LPM)
+ return false;
+ ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
+ if (SE == 0)
+ return false;
+
+ // Only unroll loops with a computable trip count and the trip count needs
+ // to be an int value (allowing a pointer type is a TODO item)
+ const SCEV *BECount = SE->getBackedgeTakenCount(L);
+ if (isa<SCEVCouldNotCompute>(BECount) || !BECount->getType()->isIntegerTy())
+ return false;
+
+ // Add 1 since the backedge count doesn't include the first loop iteration
+ const SCEV *TripCountSC =
+ SE->getAddExpr(BECount, SE->getConstant(BECount->getType(), 1));
+ if (isa<SCEVCouldNotCompute>(TripCountSC))
+ return false;
+
+ // We only handle cases when the unroll factor is a power of 2.
+ // Count is the loop unroll factor, the number of extra copies added + 1.
+ if ((Count & (Count-1)) != 0)
+ return false;
+
+ // If this loop is nested, then the loop unroller changes the code in
+ // parent loop, so the Scalar Evolution pass needs to be run again
+ if (Loop *ParentLoop = L->getParentLoop())
+ SE->forgetLoop(ParentLoop);
+
+ BasicBlock *PH = L->getLoopPreheader();
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *Latch = L->getLoopLatch();
+ // It helps to splits the original preheader twice, one for the end of the
+ // prolog code and one for a new loop preheader
+ BasicBlock *PEnd = SplitEdge(PH, Header, LPM->getAsPass());
+ BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), LPM->getAsPass());
+ BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator());
+
+ // Compute the number of extra iterations required, which is:
+ // extra iterations = run-time trip count % (loop unroll factor + 1)
+ SCEVExpander Expander(*SE, "loop-unroll");
+ Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
+ PreHeaderBR);
+ Type *CountTy = TripCount->getType();
+ BinaryOperator *ModVal =
+ BinaryOperator::CreateURem(TripCount,
+ ConstantInt::get(CountTy, Count),
+ "xtraiter");
+ ModVal->insertBefore(PreHeaderBR);
+
+ // Check if for no extra iterations, then jump to unrolled loop
+ Value *BranchVal = new ICmpInst(PreHeaderBR,
+ ICmpInst::ICMP_NE, ModVal,
+ ConstantInt::get(CountTy, 0), "lcmp");
+ // Branch to either the extra iterations or the unrolled loop
+ // We will fix up the true branch label when adding loop body copies
+ BranchInst::Create(PEnd, PEnd, BranchVal, PreHeaderBR);
+ assert(PreHeaderBR->isUnconditional() &&
+ PreHeaderBR->getSuccessor(0) == PEnd &&
+ "CFG edges in Preheader are not correct");
+ PreHeaderBR->eraseFromParent();
+
+ ValueToValueMapTy LVMap;
+ Function *F = Header->getParent();
+ // These variables are used to update the CFG links in each iteration
+ BasicBlock *CompareBB = 0;
+ BasicBlock *LastLoopBB = PH;
+ // Get an ordered list of blocks in the loop to help with the ordering of the
+ // cloned blocks in the prolog code
+ LoopBlocksDFS LoopBlocks(L);
+ LoopBlocks.perform(LI);
+
+ //
+ // For each extra loop iteration, create a copy of the loop's basic blocks
+ // and generate a condition that branches to the copy depending on the
+ // number of 'left over' iterations.
+ //
+ for (unsigned leftOverIters = Count-1; leftOverIters > 0; --leftOverIters) {
+ std::vector<BasicBlock*> NewBlocks;
+ ValueToValueMapTy VMap;
+
+ // Clone all the basic blocks in the loop, but we don't clone the loop
+ // This function adds the appropriate CFG connections.
+ CloneLoopBlocks(L, (leftOverIters == Count-1), LastLoopBB, PEnd, NewBlocks,
+ LoopBlocks, VMap, LVMap, LI);
+ LastLoopBB = cast<BasicBlock>(VMap[Latch]);
+
+ // Insert the cloned blocks into function just before the original loop
+ F->getBasicBlockList().splice(PEnd, F->getBasicBlockList(),
+ NewBlocks[0], F->end());
+
+ // Generate the code for the comparison which determines if the loop
+ // prolog code needs to be executed.
+ if (leftOverIters == Count-1) {
+ // There is no compare block for the fall-thru case when for the last
+ // left over iteration
+ CompareBB = NewBlocks[0];
+ } else {
+ // Create a new block for the comparison
+ BasicBlock *NewBB = BasicBlock::Create(CompareBB->getContext(), "unr.cmp",
+ F, CompareBB);
+ if (Loop *ParentLoop = L->getParentLoop()) {
+ // Add the new block to the parent loop, if needed
+ ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+ }
+
+ // The comparison w/ the extra iteration value and branch
+ Value *BranchVal = new ICmpInst(*NewBB, ICmpInst::ICMP_EQ, ModVal,
+ ConstantInt::get(CountTy, leftOverIters),
+ "un.tmp");
+ // Branch to either the extra iterations or the unrolled loop
+ BranchInst::Create(NewBlocks[0], CompareBB,
+ BranchVal, NewBB);
+ CompareBB = NewBB;
+ PH->getTerminator()->setSuccessor(0, NewBB);
+ VMap[NewPH] = CompareBB;
+ }
+
+ // Rewrite the cloned instruction operands to use the values
+ // created when the clone is created.
+ for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) {
+ for (BasicBlock::iterator I = NewBlocks[i]->begin(),
+ E = NewBlocks[i]->end(); I != E; ++I) {
+ RemapInstruction(I, VMap,
+ RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
+ }
+ }
+ }
+
+ // Connect the prolog code to the original loop and update the
+ // PHI functions.
+ ConnectProlog(L, TripCount, Count, LastLoopBB, PEnd, PH, NewPH, LVMap,
+ LPM->getAsPass());
+ NumRuntimeUnrolled++;
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
new file mode 100644
index 000000000000..e017f501209d
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
@@ -0,0 +1,174 @@
+//===- LowerExpectIntrinsic.cpp - Lower expect intrinsic ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers the 'expect' intrinsic to LLVM metadata.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lower-expect-intrinsic"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include <vector>
+
+using namespace llvm;
+
+STATISTIC(IfHandled, "Number of 'expect' intrinsic instructions handled");
+
+static cl::opt<uint32_t>
+LikelyBranchWeight("likely-branch-weight", cl::Hidden, cl::init(64),
+ cl::desc("Weight of the branch likely to be taken (default = 64)"));
+static cl::opt<uint32_t>
+UnlikelyBranchWeight("unlikely-branch-weight", cl::Hidden, cl::init(4),
+ cl::desc("Weight of the branch unlikely to be taken (default = 4)"));
+
+namespace {
+
+ class LowerExpectIntrinsic : public FunctionPass {
+
+ bool HandleSwitchExpect(SwitchInst *SI);
+
+ bool HandleIfExpect(BranchInst *BI);
+
+ public:
+ static char ID;
+ LowerExpectIntrinsic() : FunctionPass(ID) {
+ initializeLowerExpectIntrinsicPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F);
+ };
+}
+
+
+bool LowerExpectIntrinsic::HandleSwitchExpect(SwitchInst *SI) {
+ CallInst *CI = dyn_cast<CallInst>(SI->getCondition());
+ if (!CI)
+ return false;
+
+ Function *Fn = CI->getCalledFunction();
+ if (!Fn || Fn->getIntrinsicID() != Intrinsic::expect)
+ return false;
+
+ Value *ArgValue = CI->getArgOperand(0);
+ ConstantInt *ExpectedValue = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+ if (!ExpectedValue)
+ return false;
+
+ SwitchInst::CaseIt Case = SI->findCaseValue(ExpectedValue);
+ unsigned n = SI->getNumCases(); // +1 for default case.
+ std::vector<uint32_t> Weights(n + 1);
+
+ Weights[0] = Case == SI->case_default() ? LikelyBranchWeight
+ : UnlikelyBranchWeight;
+ for (unsigned i = 0; i != n; ++i)
+ Weights[i + 1] = i == Case.getCaseIndex() ? LikelyBranchWeight
+ : UnlikelyBranchWeight;
+
+ SI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(CI->getContext()).createBranchWeights(Weights));
+
+ SI->setCondition(ArgValue);
+ return true;
+}
+
+
+bool LowerExpectIntrinsic::HandleIfExpect(BranchInst *BI) {
+ if (BI->isUnconditional())
+ return false;
+
+ // Handle non-optimized IR code like:
+ // %expval = call i64 @llvm.expect.i64.i64(i64 %conv1, i64 1)
+ // %tobool = icmp ne i64 %expval, 0
+ // br i1 %tobool, label %if.then, label %if.end
+
+ ICmpInst *CmpI = dyn_cast<ICmpInst>(BI->getCondition());
+ if (!CmpI || CmpI->getPredicate() != CmpInst::ICMP_NE)
+ return false;
+
+ CallInst *CI = dyn_cast<CallInst>(CmpI->getOperand(0));
+ if (!CI)
+ return false;
+
+ Function *Fn = CI->getCalledFunction();
+ if (!Fn || Fn->getIntrinsicID() != Intrinsic::expect)
+ return false;
+
+ Value *ArgValue = CI->getArgOperand(0);
+ ConstantInt *ExpectedValue = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+ if (!ExpectedValue)
+ return false;
+
+ MDBuilder MDB(CI->getContext());
+ MDNode *Node;
+
+ // If expect value is equal to 1 it means that we are more likely to take
+ // branch 0, in other case more likely is branch 1.
+ if (ExpectedValue->isOne())
+ Node = MDB.createBranchWeights(LikelyBranchWeight, UnlikelyBranchWeight);
+ else
+ Node = MDB.createBranchWeights(UnlikelyBranchWeight, LikelyBranchWeight);
+
+ BI->setMetadata(LLVMContext::MD_prof, Node);
+
+ CmpI->setOperand(0, ArgValue);
+ return true;
+}
+
+
+bool LowerExpectIntrinsic::runOnFunction(Function &F) {
+ for (Function::iterator I = F.begin(), E = F.end(); I != E;) {
+ BasicBlock *BB = I++;
+
+ // Create "block_weights" metadata.
+ if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
+ if (HandleIfExpect(BI))
+ IfHandled++;
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
+ if (HandleSwitchExpect(SI))
+ IfHandled++;
+ }
+
+ // remove llvm.expect intrinsics.
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
+ BI != BE; ) {
+ CallInst *CI = dyn_cast<CallInst>(BI++);
+ if (!CI)
+ continue;
+
+ Function *Fn = CI->getCalledFunction();
+ if (Fn && Fn->getIntrinsicID() == Intrinsic::expect) {
+ Value *Exp = CI->getArgOperand(0);
+ CI->replaceAllUsesWith(Exp);
+ CI->eraseFromParent();
+ }
+ }
+ }
+
+ return false;
+}
+
+
+char LowerExpectIntrinsic::ID = 0;
+INITIALIZE_PASS(LowerExpectIntrinsic, "lower-expect", "Lower 'expect' "
+ "Intrinsics", false, false)
+
+FunctionPass *llvm::createLowerExpectIntrinsicPass() {
+ return new LowerExpectIntrinsic();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp
new file mode 100644
index 000000000000..9799a30f6a0f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp
@@ -0,0 +1,579 @@
+//===- LowerInvoke.cpp - Eliminate Invoke & Unwind instructions -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation is designed for use by code generators which do not yet
+// support stack unwinding. This pass supports two models of exception handling
+// lowering, the 'cheap' support and the 'expensive' support.
+//
+// 'Cheap' exception handling support gives the program the ability to execute
+// any program which does not "throw an exception", by turning 'invoke'
+// instructions into calls and by turning 'unwind' instructions into calls to
+// abort(). If the program does dynamically use the unwind instruction, the
+// program will print a message then abort.
+//
+// 'Expensive' exception handling support gives the full exception handling
+// support to the program at the cost of making the 'invoke' instruction
+// really expensive. It basically inserts setjmp/longjmp calls to emulate the
+// exception handling as necessary.
+//
+// Because the 'expensive' support slows down programs a lot, and EH is only
+// used for a subset of the programs, it must be specifically enabled by an
+// option.
+//
+// Note that after this pass runs the CFG is not entirely accurate (exceptional
+// control flow edges are not correct anymore) so only very simple things should
+// be done after the lowerinvoke pass has run (like generation of native code).
+// This should not be used as a general purpose "my LLVM-to-LLVM pass doesn't
+// support the invoke instruction yet" lowering pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lowerinvoke"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <csetjmp>
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumInvokes, "Number of invokes replaced");
+STATISTIC(NumSpilled, "Number of registers live across unwind edges");
+
+static cl::opt<bool> ExpensiveEHSupport("enable-correct-eh-support",
+ cl::desc("Make the -lowerinvoke pass insert expensive, but correct, EH code"));
+
+namespace {
+ class LowerInvoke : public FunctionPass {
+ const TargetMachine *TM;
+
+ // Used for both models.
+ Constant *AbortFn;
+
+ // Used for expensive EH support.
+ StructType *JBLinkTy;
+ GlobalVariable *JBListHead;
+ Constant *SetJmpFn, *LongJmpFn, *StackSaveFn, *StackRestoreFn;
+ bool useExpensiveEHSupport;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit LowerInvoke(const TargetMachine *TM = 0,
+ bool useExpensiveEHSupport = ExpensiveEHSupport)
+ : FunctionPass(ID), TM(TM),
+ useExpensiveEHSupport(useExpensiveEHSupport) {
+ initializeLowerInvokePass(*PassRegistry::getPassRegistry());
+ }
+ bool doInitialization(Module &M);
+ bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ // This is a cluster of orthogonal Transforms
+ AU.addPreserved("mem2reg");
+ AU.addPreservedID(LowerSwitchID);
+ }
+
+ private:
+ bool insertCheapEHSupport(Function &F);
+ void splitLiveRangesLiveAcrossInvokes(SmallVectorImpl<InvokeInst*>&Invokes);
+ void rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
+ AllocaInst *InvokeNum, AllocaInst *StackPtr,
+ SwitchInst *CatchSwitch);
+ bool insertExpensiveEHSupport(Function &F);
+ };
+}
+
+char LowerInvoke::ID = 0;
+INITIALIZE_PASS(LowerInvoke, "lowerinvoke",
+ "Lower invoke and unwind, for unwindless code generators",
+ false, false)
+
+char &llvm::LowerInvokePassID = LowerInvoke::ID;
+
+// Public Interface To the LowerInvoke pass.
+FunctionPass *llvm::createLowerInvokePass(const TargetMachine *TM,
+ bool useExpensiveEHSupport) {
+ return new LowerInvoke(TM, useExpensiveEHSupport || ExpensiveEHSupport);
+}
+
+// doInitialization - Make sure that there is a prototype for abort in the
+// current module.
+bool LowerInvoke::doInitialization(Module &M) {
+ Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
+ if (useExpensiveEHSupport) {
+ // Insert a type for the linked list of jump buffers.
+ const TargetLowering *TLI = TM ? TM->getTargetLowering() : 0;
+ unsigned JBSize = TLI ? TLI->getJumpBufSize() : 0;
+ JBSize = JBSize ? JBSize : 200;
+ Type *JmpBufTy = ArrayType::get(VoidPtrTy, JBSize);
+
+ JBLinkTy = StructType::create(M.getContext(), "llvm.sjljeh.jmpbufty");
+ Type *Elts[] = { JmpBufTy, PointerType::getUnqual(JBLinkTy) };
+ JBLinkTy->setBody(Elts);
+
+ Type *PtrJBList = PointerType::getUnqual(JBLinkTy);
+
+ // Now that we've done that, insert the jmpbuf list head global, unless it
+ // already exists.
+ if (!(JBListHead = M.getGlobalVariable("llvm.sjljeh.jblist", PtrJBList))) {
+ JBListHead = new GlobalVariable(M, PtrJBList, false,
+ GlobalValue::LinkOnceAnyLinkage,
+ Constant::getNullValue(PtrJBList),
+ "llvm.sjljeh.jblist");
+ }
+
+// VisualStudio defines setjmp as _setjmp
+#if defined(_MSC_VER) && defined(setjmp) && \
+ !defined(setjmp_undefined_for_msvc)
+# pragma push_macro("setjmp")
+# undef setjmp
+# define setjmp_undefined_for_msvc
+#endif
+
+ SetJmpFn = Intrinsic::getDeclaration(&M, Intrinsic::setjmp);
+
+#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)
+ // let's return it to _setjmp state
+# pragma pop_macro("setjmp")
+# undef setjmp_undefined_for_msvc
+#endif
+
+ LongJmpFn = Intrinsic::getDeclaration(&M, Intrinsic::longjmp);
+ StackSaveFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave);
+ StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore);
+ }
+
+ // We need the 'write' and 'abort' functions for both models.
+ AbortFn = M.getOrInsertFunction("abort", Type::getVoidTy(M.getContext()),
+ (Type *)0);
+ return true;
+}
+
+bool LowerInvoke::insertCheapEHSupport(Function &F) {
+ bool Changed = false;
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+ SmallVector<Value*,16> CallArgs(II->op_begin(), II->op_end() - 3);
+ // Insert a normal call instruction...
+ CallInst *NewCall = CallInst::Create(II->getCalledValue(),
+ CallArgs, "", II);
+ NewCall->takeName(II);
+ NewCall->setCallingConv(II->getCallingConv());
+ NewCall->setAttributes(II->getAttributes());
+ NewCall->setDebugLoc(II->getDebugLoc());
+ II->replaceAllUsesWith(NewCall);
+
+ // Insert an unconditional branch to the normal destination.
+ BranchInst::Create(II->getNormalDest(), II);
+
+ // Remove any PHI node entries from the exception destination.
+ II->getUnwindDest()->removePredecessor(BB);
+
+ // Remove the invoke instruction now.
+ BB->getInstList().erase(II);
+
+ ++NumInvokes; Changed = true;
+ }
+ return Changed;
+}
+
+/// rewriteExpensiveInvoke - Insert code and hack the function to replace the
+/// specified invoke instruction with a call.
+void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
+ AllocaInst *InvokeNum,
+ AllocaInst *StackPtr,
+ SwitchInst *CatchSwitch) {
+ ConstantInt *InvokeNoC = ConstantInt::get(Type::getInt32Ty(II->getContext()),
+ InvokeNo);
+
+ // If the unwind edge has phi nodes, split the edge.
+ if (isa<PHINode>(II->getUnwindDest()->begin())) {
+ SplitCriticalEdge(II, 1, this);
+
+ // If there are any phi nodes left, they must have a single predecessor.
+ while (PHINode *PN = dyn_cast<PHINode>(II->getUnwindDest()->begin())) {
+ PN->replaceAllUsesWith(PN->getIncomingValue(0));
+ PN->eraseFromParent();
+ }
+ }
+
+ // Insert a store of the invoke num before the invoke and store zero into the
+ // location afterward.
+ new StoreInst(InvokeNoC, InvokeNum, true, II); // volatile
+
+ // Insert a store of the stack ptr before the invoke, so we can restore it
+ // later in the exception case.
+ CallInst* StackSaveRet = CallInst::Create(StackSaveFn, "ssret", II);
+ new StoreInst(StackSaveRet, StackPtr, true, II); // volatile
+
+ BasicBlock::iterator NI = II->getNormalDest()->getFirstInsertionPt();
+ // nonvolatile.
+ new StoreInst(Constant::getNullValue(Type::getInt32Ty(II->getContext())),
+ InvokeNum, false, NI);
+
+ Instruction* StackPtrLoad =
+ new LoadInst(StackPtr, "stackptr.restore", true,
+ II->getUnwindDest()->getFirstInsertionPt());
+ CallInst::Create(StackRestoreFn, StackPtrLoad, "")->insertAfter(StackPtrLoad);
+
+ // Add a switch case to our unwind block.
+ CatchSwitch->addCase(InvokeNoC, II->getUnwindDest());
+
+ // Insert a normal call instruction.
+ SmallVector<Value*,16> CallArgs(II->op_begin(), II->op_end() - 3);
+ CallInst *NewCall = CallInst::Create(II->getCalledValue(),
+ CallArgs, "", II);
+ NewCall->takeName(II);
+ NewCall->setCallingConv(II->getCallingConv());
+ NewCall->setAttributes(II->getAttributes());
+ NewCall->setDebugLoc(II->getDebugLoc());
+ II->replaceAllUsesWith(NewCall);
+
+ // Replace the invoke with an uncond branch.
+ BranchInst::Create(II->getNormalDest(), NewCall->getParent());
+ II->eraseFromParent();
+}
+
+/// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until
+/// we reach blocks we've already seen.
+static void MarkBlocksLiveIn(BasicBlock *BB, std::set<BasicBlock*> &LiveBBs) {
+ if (!LiveBBs.insert(BB).second) return; // already been here.
+
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ MarkBlocksLiveIn(*PI, LiveBBs);
+}
+
+// First thing we need to do is scan the whole function for values that are
+// live across unwind edges. Each value that is live across an unwind edge
+// we spill into a stack location, guaranteeing that there is nothing live
+// across the unwind edge. This process also splits all critical edges
+// coming out of invoke's.
+void LowerInvoke::
+splitLiveRangesLiveAcrossInvokes(SmallVectorImpl<InvokeInst*> &Invokes) {
+ // First step, split all critical edges from invoke instructions.
+ for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
+ InvokeInst *II = Invokes[i];
+ SplitCriticalEdge(II, 0, this);
+ SplitCriticalEdge(II, 1, this);
+ assert(!isa<PHINode>(II->getNormalDest()) &&
+ !isa<PHINode>(II->getUnwindDest()) &&
+ "critical edge splitting left single entry phi nodes?");
+ }
+
+ Function *F = Invokes.back()->getParent()->getParent();
+
+ // To avoid having to handle incoming arguments specially, we lower each arg
+ // to a copy instruction in the entry block. This ensures that the argument
+ // value itself cannot be live across the entry block.
+ BasicBlock::iterator AfterAllocaInsertPt = F->begin()->begin();
+ while (isa<AllocaInst>(AfterAllocaInsertPt) &&
+ isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsertPt)->getArraySize()))
+ ++AfterAllocaInsertPt;
+ for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
+ AI != E; ++AI) {
+ Type *Ty = AI->getType();
+ // Aggregate types can't be cast, but are legal argument types, so we have
+ // to handle them differently. We use an extract/insert pair as a
+ // lightweight method to achieve the same goal.
+ if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
+ Instruction *EI = ExtractValueInst::Create(AI, 0, "",AfterAllocaInsertPt);
+ Instruction *NI = InsertValueInst::Create(AI, EI, 0);
+ NI->insertAfter(EI);
+ AI->replaceAllUsesWith(NI);
+ // Set the operand of the instructions back to the AllocaInst.
+ EI->setOperand(0, AI);
+ NI->setOperand(0, AI);
+ } else {
+ // This is always a no-op cast because we're casting AI to AI->getType()
+ // so src and destination types are identical. BitCast is the only
+ // possibility.
+ CastInst *NC = new BitCastInst(
+ AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt);
+ AI->replaceAllUsesWith(NC);
+ // Set the operand of the cast instruction back to the AllocaInst.
+ // Normally it's forbidden to replace a CastInst's operand because it
+ // could cause the opcode to reflect an illegal conversion. However,
+ // we're replacing it here with the same value it was constructed with.
+ // We do this because the above replaceAllUsesWith() clobbered the
+ // operand, but we want this one to remain.
+ NC->setOperand(0, AI);
+ }
+ }
+
+ // Finally, scan the code looking for instructions with bad live ranges.
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
+ // Ignore obvious cases we don't have to handle. In particular, most
+ // instructions either have no uses or only have a single use inside the
+ // current block. Ignore them quickly.
+ Instruction *Inst = II;
+ if (Inst->use_empty()) continue;
+ if (Inst->hasOneUse() &&
+ cast<Instruction>(Inst->use_back())->getParent() == BB &&
+ !isa<PHINode>(Inst->use_back())) continue;
+
+ // If this is an alloca in the entry block, it's not a real register
+ // value.
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst))
+ if (isa<ConstantInt>(AI->getArraySize()) && BB == F->begin())
+ continue;
+
+ // Avoid iterator invalidation by copying users to a temporary vector.
+ SmallVector<Instruction*,16> Users;
+ for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end();
+ UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ if (User->getParent() != BB || isa<PHINode>(User))
+ Users.push_back(User);
+ }
+
+ // Scan all of the uses and see if the live range is live across an unwind
+ // edge. If we find a use live across an invoke edge, create an alloca
+ // and spill the value.
+
+ // Find all of the blocks that this value is live in.
+ std::set<BasicBlock*> LiveBBs;
+ LiveBBs.insert(Inst->getParent());
+ while (!Users.empty()) {
+ Instruction *U = Users.back();
+ Users.pop_back();
+
+ if (!isa<PHINode>(U)) {
+ MarkBlocksLiveIn(U->getParent(), LiveBBs);
+ } else {
+ // Uses for a PHI node occur in their predecessor block.
+ PHINode *PN = cast<PHINode>(U);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == Inst)
+ MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs);
+ }
+ }
+
+ // Now that we know all of the blocks that this thing is live in, see if
+ // it includes any of the unwind locations.
+ bool NeedsSpill = false;
+ for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
+ BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest();
+ if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) {
+ NeedsSpill = true;
+ }
+ }
+
+ // If we decided we need a spill, do it.
+ if (NeedsSpill) {
+ ++NumSpilled;
+ DemoteRegToStack(*Inst, true);
+ }
+ }
+}
+
+bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
+ SmallVector<ReturnInst*,16> Returns;
+ SmallVector<InvokeInst*,16> Invokes;
+ UnreachableInst* UnreachablePlaceholder = 0;
+
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ // Remember all return instructions in case we insert an invoke into this
+ // function.
+ Returns.push_back(RI);
+ } else if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+ Invokes.push_back(II);
+ }
+
+ if (Invokes.empty()) return false;
+
+ NumInvokes += Invokes.size();
+
+ // TODO: This is not an optimal way to do this. In particular, this always
+ // inserts setjmp calls into the entries of functions with invoke instructions
+ // even though there are possibly paths through the function that do not
+ // execute any invokes. In particular, for functions with early exits, e.g.
+ // the 'addMove' method in hexxagon, it would be nice to not have to do the
+ // setjmp stuff on the early exit path. This requires a bit of dataflow, but
+ // would not be too hard to do.
+
+ // If we have an invoke instruction, insert a setjmp that dominates all
+ // invokes. After the setjmp, use a cond branch that goes to the original
+ // code path on zero, and to a designated 'catch' block of nonzero.
+ Value *OldJmpBufPtr = 0;
+ if (!Invokes.empty()) {
+ // First thing we need to do is scan the whole function for values that are
+ // live across unwind edges. Each value that is live across an unwind edge
+ // we spill into a stack location, guaranteeing that there is nothing live
+ // across the unwind edge. This process also splits all critical edges
+ // coming out of invoke's.
+ splitLiveRangesLiveAcrossInvokes(Invokes);
+
+ BasicBlock *EntryBB = F.begin();
+
+ // Create an alloca for the incoming jump buffer ptr and the new jump buffer
+ // that needs to be restored on all exits from the function. This is an
+ // alloca because the value needs to be live across invokes.
+ const TargetLowering *TLI = TM ? TM->getTargetLowering() : 0;
+ unsigned Align = TLI ? TLI->getJumpBufAlignment() : 0;
+ AllocaInst *JmpBuf =
+ new AllocaInst(JBLinkTy, 0, Align,
+ "jblink", F.begin()->begin());
+
+ Value *Idx[] = { Constant::getNullValue(Type::getInt32Ty(F.getContext())),
+ ConstantInt::get(Type::getInt32Ty(F.getContext()), 1) };
+ OldJmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx, "OldBuf",
+ EntryBB->getTerminator());
+
+ // Copy the JBListHead to the alloca.
+ Value *OldBuf = new LoadInst(JBListHead, "oldjmpbufptr", true,
+ EntryBB->getTerminator());
+ new StoreInst(OldBuf, OldJmpBufPtr, true, EntryBB->getTerminator());
+
+ // Add the new jumpbuf to the list.
+ new StoreInst(JmpBuf, JBListHead, true, EntryBB->getTerminator());
+
+ // Create the catch block. The catch block is basically a big switch
+ // statement that goes to all of the invoke catch blocks.
+ BasicBlock *CatchBB =
+ BasicBlock::Create(F.getContext(), "setjmp.catch", &F);
+
+ // Create an alloca which keeps track of the stack pointer before every
+ // invoke, this allows us to properly restore the stack pointer after
+ // long jumping.
+ AllocaInst *StackPtr = new AllocaInst(Type::getInt8PtrTy(F.getContext()), 0,
+ "stackptr", EntryBB->begin());
+
+ // Create an alloca which keeps track of which invoke is currently
+ // executing. For normal calls it contains zero.
+ AllocaInst *InvokeNum = new AllocaInst(Type::getInt32Ty(F.getContext()), 0,
+ "invokenum",EntryBB->begin());
+ new StoreInst(ConstantInt::get(Type::getInt32Ty(F.getContext()), 0),
+ InvokeNum, true, EntryBB->getTerminator());
+
+ // Insert a load in the Catch block, and a switch on its value. By default,
+ // we go to a block that just does an unwind (which is the correct action
+ // for a standard call). We insert an unreachable instruction here and
+ // modify the block to jump to the correct unwinding pad later.
+ BasicBlock *UnwindBB = BasicBlock::Create(F.getContext(), "unwindbb", &F);
+ UnreachablePlaceholder = new UnreachableInst(F.getContext(), UnwindBB);
+
+ Value *CatchLoad = new LoadInst(InvokeNum, "invoke.num", true, CatchBB);
+ SwitchInst *CatchSwitch =
+ SwitchInst::Create(CatchLoad, UnwindBB, Invokes.size(), CatchBB);
+
+ // Now that things are set up, insert the setjmp call itself.
+
+ // Split the entry block to insert the conditional branch for the setjmp.
+ BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(),
+ "setjmp.cont");
+
+ Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 0);
+ Value *JmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx, "TheJmpBuf",
+ EntryBB->getTerminator());
+ JmpBufPtr = new BitCastInst(JmpBufPtr,
+ Type::getInt8PtrTy(F.getContext()),
+ "tmp", EntryBB->getTerminator());
+ Value *SJRet = CallInst::Create(SetJmpFn, JmpBufPtr, "sjret",
+ EntryBB->getTerminator());
+
+ // Compare the return value to zero.
+ Value *IsNormal = new ICmpInst(EntryBB->getTerminator(),
+ ICmpInst::ICMP_EQ, SJRet,
+ Constant::getNullValue(SJRet->getType()),
+ "notunwind");
+ // Nuke the uncond branch.
+ EntryBB->getTerminator()->eraseFromParent();
+
+ // Put in a new condbranch in its place.
+ BranchInst::Create(ContBlock, CatchBB, IsNormal, EntryBB);
+
+ // At this point, we are all set up, rewrite each invoke instruction.
+ for (unsigned i = 0, e = Invokes.size(); i != e; ++i)
+ rewriteExpensiveInvoke(Invokes[i], i+1, InvokeNum, StackPtr, CatchSwitch);
+ }
+
+ // We know that there is at least one unwind.
+
+ // Create three new blocks, the block to load the jmpbuf ptr and compare
+ // against null, the block to do the longjmp, and the error block for if it
+ // is null. Add them at the end of the function because they are not hot.
+ BasicBlock *UnwindHandler = BasicBlock::Create(F.getContext(),
+ "dounwind", &F);
+ BasicBlock *UnwindBlock = BasicBlock::Create(F.getContext(), "unwind", &F);
+ BasicBlock *TermBlock = BasicBlock::Create(F.getContext(), "unwinderror", &F);
+
+ // If this function contains an invoke, restore the old jumpbuf ptr.
+ Value *BufPtr;
+ if (OldJmpBufPtr) {
+ // Before the return, insert a copy from the saved value to the new value.
+ BufPtr = new LoadInst(OldJmpBufPtr, "oldjmpbufptr", UnwindHandler);
+ new StoreInst(BufPtr, JBListHead, UnwindHandler);
+ } else {
+ BufPtr = new LoadInst(JBListHead, "ehlist", UnwindHandler);
+ }
+
+ // Load the JBList, if it's null, then there was no catch!
+ Value *NotNull = new ICmpInst(*UnwindHandler, ICmpInst::ICMP_NE, BufPtr,
+ Constant::getNullValue(BufPtr->getType()),
+ "notnull");
+ BranchInst::Create(UnwindBlock, TermBlock, NotNull, UnwindHandler);
+
+ // Create the block to do the longjmp.
+ // Get a pointer to the jmpbuf and longjmp.
+ Value *Idx[] = { Constant::getNullValue(Type::getInt32Ty(F.getContext())),
+ ConstantInt::get(Type::getInt32Ty(F.getContext()), 0) };
+ Idx[0] = GetElementPtrInst::Create(BufPtr, Idx, "JmpBuf", UnwindBlock);
+ Idx[0] = new BitCastInst(Idx[0],
+ Type::getInt8PtrTy(F.getContext()),
+ "tmp", UnwindBlock);
+ Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1);
+ CallInst::Create(LongJmpFn, Idx, "", UnwindBlock);
+ new UnreachableInst(F.getContext(), UnwindBlock);
+
+ // Set up the term block ("throw without a catch").
+ new UnreachableInst(F.getContext(), TermBlock);
+
+ // Insert a call to abort()
+ CallInst::Create(AbortFn, "",
+ TermBlock->getTerminator())->setTailCall();
+
+ // Replace the inserted unreachable with a branch to the unwind handler.
+ if (UnreachablePlaceholder) {
+ BranchInst::Create(UnwindHandler, UnreachablePlaceholder);
+ UnreachablePlaceholder->eraseFromParent();
+ }
+
+ // Finally, for any returns from this function, if this function contains an
+ // invoke, restore the old jmpbuf pointer to its input value.
+ if (OldJmpBufPtr) {
+ for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
+ ReturnInst *R = Returns[i];
+
+ // Before the return, insert a copy from the saved value to the new value.
+ Value *OldBuf = new LoadInst(OldJmpBufPtr, "oldjmpbufptr", true, R);
+ new StoreInst(OldBuf, JBListHead, true, R);
+ }
+ }
+
+ return true;
+}
+
+bool LowerInvoke::runOnFunction(Function &F) {
+ if (useExpensiveEHSupport)
+ return insertExpensiveEHSupport(F);
+ else
+ return insertCheapEHSupport(F);
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
new file mode 100644
index 000000000000..2d2a8a54a0f2
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
@@ -0,0 +1,323 @@
+//===- LowerSwitch.cpp - Eliminate Switch instructions --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LowerSwitch transformation rewrites switch instructions with a sequence
+// of branches, which allows targets to get away with not implementing the
+// switch instruction until it is convenient.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
+#include <algorithm>
+using namespace llvm;
+
+namespace {
+ /// LowerSwitch Pass - Replace all SwitchInst instructions with chained branch
+ /// instructions.
+ class LowerSwitch : public FunctionPass {
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ LowerSwitch() : FunctionPass(ID) {
+ initializeLowerSwitchPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ // This is a cluster of orthogonal Transforms
+ AU.addPreserved<UnifyFunctionExitNodes>();
+ AU.addPreserved("mem2reg");
+ AU.addPreservedID(LowerInvokePassID);
+ }
+
+ struct CaseRange {
+ Constant* Low;
+ Constant* High;
+ BasicBlock* BB;
+
+ CaseRange(Constant *low = 0, Constant *high = 0, BasicBlock *bb = 0) :
+ Low(low), High(high), BB(bb) { }
+ };
+
+ typedef std::vector<CaseRange> CaseVector;
+ typedef std::vector<CaseRange>::iterator CaseItr;
+ private:
+ void processSwitchInst(SwitchInst *SI);
+
+ BasicBlock* switchConvert(CaseItr Begin, CaseItr End, Value* Val,
+ BasicBlock* OrigBlock, BasicBlock* Default);
+ BasicBlock* newLeafBlock(CaseRange& Leaf, Value* Val,
+ BasicBlock* OrigBlock, BasicBlock* Default);
+ unsigned Clusterify(CaseVector& Cases, SwitchInst *SI);
+ };
+
+ /// The comparison function for sorting the switch case values in the vector.
+ /// WARNING: Case ranges should be disjoint!
+ struct CaseCmp {
+ bool operator () (const LowerSwitch::CaseRange& C1,
+ const LowerSwitch::CaseRange& C2) {
+
+ const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low);
+ const ConstantInt* CI2 = cast<const ConstantInt>(C2.High);
+ return CI1->getValue().slt(CI2->getValue());
+ }
+ };
+}
+
+char LowerSwitch::ID = 0;
+INITIALIZE_PASS(LowerSwitch, "lowerswitch",
+ "Lower SwitchInst's to branches", false, false)
+
+// Publicly exposed interface to pass...
+char &llvm::LowerSwitchID = LowerSwitch::ID;
+// createLowerSwitchPass - Interface to this file...
+FunctionPass *llvm::createLowerSwitchPass() {
+ return new LowerSwitch();
+}
+
+bool LowerSwitch::runOnFunction(Function &F) {
+ bool Changed = false;
+
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
+ BasicBlock *Cur = I++; // Advance over block so we don't traverse new blocks
+
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(Cur->getTerminator())) {
+ Changed = true;
+ processSwitchInst(SI);
+ }
+ }
+
+ return Changed;
+}
+
+// operator<< - Used for debugging purposes.
+//
+static raw_ostream& operator<<(raw_ostream &O,
+ const LowerSwitch::CaseVector &C)
+ LLVM_ATTRIBUTE_USED;
+static raw_ostream& operator<<(raw_ostream &O,
+ const LowerSwitch::CaseVector &C) {
+ O << "[";
+
+ for (LowerSwitch::CaseVector::const_iterator B = C.begin(),
+ E = C.end(); B != E; ) {
+ O << *B->Low << " -" << *B->High;
+ if (++B != E) O << ", ";
+ }
+
+ return O << "]";
+}
+
+// switchConvert - Convert the switch statement into a binary lookup of
+// the case values. The function recursively builds this tree.
+//
+BasicBlock* LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,
+ Value* Val, BasicBlock* OrigBlock,
+ BasicBlock* Default)
+{
+ unsigned Size = End - Begin;
+
+ if (Size == 1)
+ return newLeafBlock(*Begin, Val, OrigBlock, Default);
+
+ unsigned Mid = Size / 2;
+ std::vector<CaseRange> LHS(Begin, Begin + Mid);
+ DEBUG(dbgs() << "LHS: " << LHS << "\n");
+ std::vector<CaseRange> RHS(Begin + Mid, End);
+ DEBUG(dbgs() << "RHS: " << RHS << "\n");
+
+ CaseRange& Pivot = *(Begin + Mid);
+ DEBUG(dbgs() << "Pivot ==> "
+ << cast<ConstantInt>(Pivot.Low)->getValue() << " -"
+ << cast<ConstantInt>(Pivot.High)->getValue() << "\n");
+
+ BasicBlock* LBranch = switchConvert(LHS.begin(), LHS.end(), Val,
+ OrigBlock, Default);
+ BasicBlock* RBranch = switchConvert(RHS.begin(), RHS.end(), Val,
+ OrigBlock, Default);
+
+ // Create a new node that checks if the value is < pivot. Go to the
+ // left branch if it is and right branch if not.
+ Function* F = OrigBlock->getParent();
+ BasicBlock* NewNode = BasicBlock::Create(Val->getContext(), "NodeBlock");
+ Function::iterator FI = OrigBlock;
+ F->getBasicBlockList().insert(++FI, NewNode);
+
+ ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_SLT,
+ Val, Pivot.Low, "Pivot");
+ NewNode->getInstList().push_back(Comp);
+ BranchInst::Create(LBranch, RBranch, Comp, NewNode);
+ return NewNode;
+}
+
+// newLeafBlock - Create a new leaf block for the binary lookup tree. It
+// checks if the switch's value == the case's value. If not, then it
+// jumps to the default branch. At this point in the tree, the value
+// can't be another valid case value, so the jump to the "default" branch
+// is warranted.
+//
+BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
+ BasicBlock* OrigBlock,
+ BasicBlock* Default)
+{
+ Function* F = OrigBlock->getParent();
+ BasicBlock* NewLeaf = BasicBlock::Create(Val->getContext(), "LeafBlock");
+ Function::iterator FI = OrigBlock;
+ F->getBasicBlockList().insert(++FI, NewLeaf);
+
+ // Emit comparison
+ ICmpInst* Comp = NULL;
+ if (Leaf.Low == Leaf.High) {
+ // Make the seteq instruction...
+ Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_EQ, Val,
+ Leaf.Low, "SwitchLeaf");
+ } else {
+ // Make range comparison
+ if (cast<ConstantInt>(Leaf.Low)->isMinValue(true /*isSigned*/)) {
+ // Val >= Min && Val <= Hi --> Val <= Hi
+ Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SLE, Val, Leaf.High,
+ "SwitchLeaf");
+ } else if (cast<ConstantInt>(Leaf.Low)->isZero()) {
+ // Val >= 0 && Val <= Hi --> Val <=u Hi
+ Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Val, Leaf.High,
+ "SwitchLeaf");
+ } else {
+ // Emit V-Lo <=u Hi-Lo
+ Constant* NegLo = ConstantExpr::getNeg(Leaf.Low);
+ Instruction* Add = BinaryOperator::CreateAdd(Val, NegLo,
+ Val->getName()+".off",
+ NewLeaf);
+ Constant *UpperBound = ConstantExpr::getAdd(NegLo, Leaf.High);
+ Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Add, UpperBound,
+ "SwitchLeaf");
+ }
+ }
+
+ // Make the conditional branch...
+ BasicBlock* Succ = Leaf.BB;
+ BranchInst::Create(Succ, Default, Comp, NewLeaf);
+
+ // If there were any PHI nodes in this successor, rewrite one entry
+ // from OrigBlock to come from NewLeaf.
+ for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
+ PHINode* PN = cast<PHINode>(I);
+ // Remove all but one incoming entries from the cluster
+ uint64_t Range = cast<ConstantInt>(Leaf.High)->getSExtValue() -
+ cast<ConstantInt>(Leaf.Low)->getSExtValue();
+ for (uint64_t j = 0; j < Range; ++j) {
+ PN->removeIncomingValue(OrigBlock);
+ }
+
+ int BlockIdx = PN->getBasicBlockIndex(OrigBlock);
+ assert(BlockIdx != -1 && "Switch didn't go to this successor??");
+ PN->setIncomingBlock((unsigned)BlockIdx, NewLeaf);
+ }
+
+ return NewLeaf;
+}
+
+// Clusterify - Transform simple list of Cases into list of CaseRange's
+unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
+ unsigned numCmps = 0;
+
+ // Start with "simple" cases
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i)
+ Cases.push_back(CaseRange(i.getCaseValue(), i.getCaseValue(),
+ i.getCaseSuccessor()));
+
+ std::sort(Cases.begin(), Cases.end(), CaseCmp());
+
+ // Merge case into clusters
+ if (Cases.size()>=2)
+ for (CaseItr I=Cases.begin(), J=llvm::next(Cases.begin()); J!=Cases.end(); ) {
+ int64_t nextValue = cast<ConstantInt>(J->Low)->getSExtValue();
+ int64_t currentValue = cast<ConstantInt>(I->High)->getSExtValue();
+ BasicBlock* nextBB = J->BB;
+ BasicBlock* currentBB = I->BB;
+
+ // If the two neighboring cases go to the same destination, merge them
+ // into a single case.
+ if ((nextValue-currentValue==1) && (currentBB == nextBB)) {
+ I->High = J->High;
+ J = Cases.erase(J);
+ } else {
+ I = J++;
+ }
+ }
+
+ for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) {
+ if (I->Low != I->High)
+ // A range counts double, since it requires two compares.
+ ++numCmps;
+ }
+
+ return numCmps;
+}
+
+// processSwitchInst - Replace the specified switch instruction with a sequence
+// of chained if-then insts in a balanced binary search.
+//
+void LowerSwitch::processSwitchInst(SwitchInst *SI) {
+ BasicBlock *CurBlock = SI->getParent();
+ BasicBlock *OrigBlock = CurBlock;
+ Function *F = CurBlock->getParent();
+ Value *Val = SI->getCondition(); // The value we are switching on...
+ BasicBlock* Default = SI->getDefaultDest();
+
+ // If there is only the default destination, don't bother with the code below.
+ if (!SI->getNumCases()) {
+ BranchInst::Create(SI->getDefaultDest(), CurBlock);
+ CurBlock->getInstList().erase(SI);
+ return;
+ }
+
+ // Create a new, empty default block so that the new hierarchy of
+ // if-then statements go to this and the PHI nodes are happy.
+ BasicBlock* NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault");
+ F->getBasicBlockList().insert(Default, NewDefault);
+
+ BranchInst::Create(Default, NewDefault);
+
+ // If there is an entry in any PHI nodes for the default edge, make sure
+ // to update them as well.
+ for (BasicBlock::iterator I = Default->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ int BlockIdx = PN->getBasicBlockIndex(OrigBlock);
+ assert(BlockIdx != -1 && "Switch didn't go to this successor??");
+ PN->setIncomingBlock((unsigned)BlockIdx, NewDefault);
+ }
+
+ // Prepare cases vector.
+ CaseVector Cases;
+ unsigned numCmps = Clusterify(Cases, SI);
+
+ DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
+ << ". Total compares: " << numCmps << "\n");
+ DEBUG(dbgs() << "Cases: " << Cases << "\n");
+ (void)numCmps;
+
+ BasicBlock* SwitchBlock = switchConvert(Cases.begin(), Cases.end(), Val,
+ OrigBlock, NewDefault);
+
+ // Branch to our shiny new if-then stuff...
+ BranchInst::Create(SwitchBlock, OrigBlock);
+
+ // We are now done with the switch instruction, delete it.
+ CurBlock->getInstList().erase(SI);
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp
new file mode 100644
index 000000000000..61b3965d8f11
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp
@@ -0,0 +1,90 @@
+//===- Mem2Reg.cpp - The -mem2reg pass, a wrapper around the Utils lib ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is a simple pass wrapper around the PromoteMemToReg function call
+// exposed by the Utils library.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mem2reg"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
+using namespace llvm;
+
+STATISTIC(NumPromoted, "Number of alloca's promoted");
+
+namespace {
+ struct PromotePass : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ PromotePass() : FunctionPass(ID) {
+ initializePromotePassPass(*PassRegistry::getPassRegistry());
+ }
+
+ // runOnFunction - To run this pass, first we calculate the alloca
+ // instructions that are safe for promotion, then we promote each one.
+ //
+ virtual bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DominatorTree>();
+ AU.setPreservesCFG();
+ // This is a cluster of orthogonal Transforms
+ AU.addPreserved<UnifyFunctionExitNodes>();
+ AU.addPreservedID(LowerSwitchID);
+ AU.addPreservedID(LowerInvokePassID);
+ }
+ };
+} // end of anonymous namespace
+
+char PromotePass::ID = 0;
+INITIALIZE_PASS_BEGIN(PromotePass, "mem2reg", "Promote Memory to Register",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(PromotePass, "mem2reg", "Promote Memory to Register",
+ false, false)
+
+bool PromotePass::runOnFunction(Function &F) {
+ std::vector<AllocaInst*> Allocas;
+
+ BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function
+
+ bool Changed = false;
+
+ DominatorTree &DT = getAnalysis<DominatorTree>();
+
+ while (1) {
+ Allocas.clear();
+
+ // Find allocas that are safe to promote, by looking at all instructions in
+ // the entry node
+ for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I)
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca?
+ if (isAllocaPromotable(AI))
+ Allocas.push_back(AI);
+
+ if (Allocas.empty()) break;
+
+ PromoteMemToReg(Allocas, DT);
+ NumPromoted += Allocas.size();
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+// createPromoteMemoryToRegister - Provide an entry point to create this pass.
+//
+FunctionPass *llvm::createPromoteMemoryToRegisterPass() {
+ return new PromotePass();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp b/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp
new file mode 100644
index 000000000000..c37045312296
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp
@@ -0,0 +1,145 @@
+//===- MetaRenamer.cpp - Rename everything with metasyntatic names --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass renames everything with metasyntatic names. The intent is to use
+// this pass after bugpoint reduction to conceal the nature of the original
+// program.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/TypeFinder.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+
+namespace {
+
+ // This PRNG is from the ISO C spec. It is intentionally simple and
+ // unsuitable for cryptographic use. We're just looking for enough
+ // variety to surprise and delight users.
+ struct PRNG {
+ unsigned long next;
+
+ void srand(unsigned int seed) {
+ next = seed;
+ }
+
+ int rand() {
+ next = next * 1103515245 + 12345;
+ return (unsigned int)(next / 65536) % 32768;
+ }
+ };
+
+ struct MetaRenamer : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+ MetaRenamer() : ModulePass(ID) {
+ initializeMetaRenamerPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+
+ bool runOnModule(Module &M) {
+ static const char *const metaNames[] = {
+ // See http://en.wikipedia.org/wiki/Metasyntactic_variable
+ "foo", "bar", "baz", "quux", "barney", "snork", "zot", "blam", "hoge",
+ "wibble", "wobble", "widget", "wombat", "ham", "eggs", "pluto", "spam"
+ };
+
+ // Seed our PRNG with simple additive sum of ModuleID. We're looking to
+ // simply avoid always having the same function names, and we need to
+ // remain deterministic.
+ unsigned int randSeed = 0;
+ for (std::string::const_iterator I = M.getModuleIdentifier().begin(),
+ E = M.getModuleIdentifier().end(); I != E; ++I)
+ randSeed += *I;
+
+ PRNG prng;
+ prng.srand(randSeed);
+
+ // Rename all aliases
+ for (Module::alias_iterator AI = M.alias_begin(), AE = M.alias_end();
+ AI != AE; ++AI) {
+ StringRef Name = AI->getName();
+ if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1))
+ continue;
+
+ AI->setName("alias");
+ }
+
+ // Rename all global variables
+ for (Module::global_iterator GI = M.global_begin(), GE = M.global_end();
+ GI != GE; ++GI) {
+ StringRef Name = GI->getName();
+ if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1))
+ continue;
+
+ GI->setName("global");
+ }
+
+ // Rename all struct types
+ TypeFinder StructTypes;
+ StructTypes.run(M, true);
+ for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) {
+ StructType *STy = StructTypes[i];
+ if (STy->isLiteral() || STy->getName().empty()) continue;
+
+ SmallString<128> NameStorage;
+ STy->setName((Twine("struct.") + metaNames[prng.rand() %
+ array_lengthof(metaNames)]).toStringRef(NameStorage));
+ }
+
+ // Rename all functions
+ for (Module::iterator FI = M.begin(), FE = M.end();
+ FI != FE; ++FI) {
+ StringRef Name = FI->getName();
+ if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1))
+ continue;
+
+ FI->setName(metaNames[prng.rand() % array_lengthof(metaNames)]);
+ runOnFunction(*FI);
+ }
+ return true;
+ }
+
+ bool runOnFunction(Function &F) {
+ for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end();
+ AI != AE; ++AI)
+ if (!AI->getType()->isVoidTy())
+ AI->setName("arg");
+
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ BB->setName("bb");
+
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (!I->getType()->isVoidTy())
+ I->setName("tmp");
+ }
+ return true;
+ }
+ };
+}
+
+char MetaRenamer::ID = 0;
+INITIALIZE_PASS(MetaRenamer, "metarenamer",
+ "Assign new names to everything", false, false)
+//===----------------------------------------------------------------------===//
+//
+// MetaRenamer - Rename everything with metasyntactic names.
+//
+ModulePass *llvm::createMetaRenamerPass() {
+ return new MetaRenamer();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp
new file mode 100644
index 000000000000..ff6e6f9c60d3
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp
@@ -0,0 +1,82 @@
+//===-- ModuleUtils.cpp - Functions to manipulate Modules -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions perform manipulations on Modules.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+
+using namespace llvm;
+
+static void appendToGlobalArray(const char *Array,
+ Module &M, Function *F, int Priority) {
+ IRBuilder<> IRB(M.getContext());
+ FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false);
+ StructType *Ty = StructType::get(
+ IRB.getInt32Ty(), PointerType::getUnqual(FnTy), NULL);
+
+ Constant *RuntimeCtorInit = ConstantStruct::get(
+ Ty, IRB.getInt32(Priority), F, NULL);
+
+ // Get the current set of static global constructors and add the new ctor
+ // to the list.
+ SmallVector<Constant *, 16> CurrentCtors;
+ if (GlobalVariable * GVCtor = M.getNamedGlobal(Array)) {
+ if (Constant *Init = GVCtor->getInitializer()) {
+ unsigned n = Init->getNumOperands();
+ CurrentCtors.reserve(n + 1);
+ for (unsigned i = 0; i != n; ++i)
+ CurrentCtors.push_back(cast<Constant>(Init->getOperand(i)));
+ }
+ GVCtor->eraseFromParent();
+ }
+
+ CurrentCtors.push_back(RuntimeCtorInit);
+
+ // Create a new initializer.
+ ArrayType *AT = ArrayType::get(RuntimeCtorInit->getType(),
+ CurrentCtors.size());
+ Constant *NewInit = ConstantArray::get(AT, CurrentCtors);
+
+ // Create the new global variable and replace all uses of
+ // the old global variable with the new one.
+ (void)new GlobalVariable(M, NewInit->getType(), false,
+ GlobalValue::AppendingLinkage, NewInit, Array);
+}
+
+void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority) {
+ appendToGlobalArray("llvm.global_ctors", M, F, Priority);
+}
+
+void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority) {
+ appendToGlobalArray("llvm.global_dtors", M, F, Priority);
+}
+
+GlobalVariable *
+llvm::collectUsedGlobalVariables(Module &M, SmallPtrSet<GlobalValue *, 8> &Set,
+ bool CompilerUsed) {
+ const char *Name = CompilerUsed ? "llvm.compiler.used" : "llvm.used";
+ GlobalVariable *GV = M.getGlobalVariable(Name);
+ if (!GV || !GV->hasInitializer())
+ return GV;
+
+ const ConstantArray *Init = cast<ConstantArray>(GV->getInitializer());
+ for (unsigned I = 0, E = Init->getNumOperands(); I != E; ++I) {
+ Value *Op = Init->getOperand(I);
+ GlobalValue *G = cast<GlobalValue>(Op->stripPointerCastsNoFollowAliases());
+ Set.insert(G);
+ }
+ return GV;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
new file mode 100644
index 000000000000..8f6eee3510d1
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -0,0 +1,1079 @@
+//===- PromoteMemoryToRegister.cpp - Convert allocas to registers ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file promotes memory references to be register references. It promotes
+// alloca instructions which only have loads and stores as uses. An alloca is
+// transformed by using iterated dominator frontiers to place PHI nodes, then
+// traversing the function in depth-first order to rewrite loads and stores as
+// appropriate.
+//
+// The algorithm used here is based on:
+//
+// Sreedhar and Gao. A linear time algorithm for placing phi-nodes.
+// In Proceedings of the 22nd ACM SIGPLAN-SIGACT Symposium on Principles of
+// Programming Languages
+// POPL '95. ACM, New York, NY, 62-73.
+//
+// It has been modified to not explicitly use the DJ graph data structure and to
+// directly compute pruned SSA using per-variable liveness information.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mem2reg"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/DIBuilder.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <algorithm>
+#include <queue>
+using namespace llvm;
+
+STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block");
+STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store");
+STATISTIC(NumDeadAlloca, "Number of dead alloca's removed");
+STATISTIC(NumPHIInsert, "Number of PHI nodes inserted");
+
+bool llvm::isAllocaPromotable(const AllocaInst *AI) {
+ // FIXME: If the memory unit is of pointer or integer type, we can permit
+ // assignments to subsections of the memory unit.
+
+ // Only allow direct and non-volatile loads and stores...
+ for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end();
+ UI != UE; ++UI) { // Loop over all of the uses of the alloca
+ const User *U = *UI;
+ if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ // Note that atomic loads can be transformed; atomic semantics do
+ // not have any meaning for a local alloca.
+ if (LI->isVolatile())
+ return false;
+ } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ if (SI->getOperand(0) == AI)
+ return false; // Don't allow a store OF the AI, only INTO the AI.
+ // Note that atomic stores can be transformed; atomic semantics do
+ // not have any meaning for a local alloca.
+ if (SI->isVolatile())
+ return false;
+ } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
+ if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
+ II->getIntrinsicID() != Intrinsic::lifetime_end)
+ return false;
+ } else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
+ if (BCI->getType() != Type::getInt8PtrTy(U->getContext()))
+ return false;
+ if (!onlyUsedByLifetimeMarkers(BCI))
+ return false;
+ } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
+ if (GEPI->getType() != Type::getInt8PtrTy(U->getContext()))
+ return false;
+ if (!GEPI->hasAllZeroIndices())
+ return false;
+ if (!onlyUsedByLifetimeMarkers(GEPI))
+ return false;
+ } else {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+namespace {
+
+struct AllocaInfo {
+ SmallVector<BasicBlock *, 32> DefiningBlocks;
+ SmallVector<BasicBlock *, 32> UsingBlocks;
+
+ StoreInst *OnlyStore;
+ BasicBlock *OnlyBlock;
+ bool OnlyUsedInOneBlock;
+
+ Value *AllocaPointerVal;
+ DbgDeclareInst *DbgDeclare;
+
+ void clear() {
+ DefiningBlocks.clear();
+ UsingBlocks.clear();
+ OnlyStore = 0;
+ OnlyBlock = 0;
+ OnlyUsedInOneBlock = true;
+ AllocaPointerVal = 0;
+ DbgDeclare = 0;
+ }
+
+ /// Scan the uses of the specified alloca, filling in the AllocaInfo used
+ /// by the rest of the pass to reason about the uses of this alloca.
+ void AnalyzeAlloca(AllocaInst *AI) {
+ clear();
+
+ // As we scan the uses of the alloca instruction, keep track of stores,
+ // and decide whether all of the loads and stores to the alloca are within
+ // the same basic block.
+ for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
+ UI != E;) {
+ Instruction *User = cast<Instruction>(*UI++);
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ // Remember the basic blocks which define new values for the alloca
+ DefiningBlocks.push_back(SI->getParent());
+ AllocaPointerVal = SI->getOperand(0);
+ OnlyStore = SI;
+ } else {
+ LoadInst *LI = cast<LoadInst>(User);
+ // Otherwise it must be a load instruction, keep track of variable
+ // reads.
+ UsingBlocks.push_back(LI->getParent());
+ AllocaPointerVal = LI;
+ }
+
+ if (OnlyUsedInOneBlock) {
+ if (OnlyBlock == 0)
+ OnlyBlock = User->getParent();
+ else if (OnlyBlock != User->getParent())
+ OnlyUsedInOneBlock = false;
+ }
+ }
+
+ DbgDeclare = FindAllocaDbgDeclare(AI);
+ }
+};
+
+// Data package used by RenamePass()
+class RenamePassData {
+public:
+ typedef std::vector<Value *> ValVector;
+
+ RenamePassData() : BB(NULL), Pred(NULL), Values() {}
+ RenamePassData(BasicBlock *B, BasicBlock *P, const ValVector &V)
+ : BB(B), Pred(P), Values(V) {}
+ BasicBlock *BB;
+ BasicBlock *Pred;
+ ValVector Values;
+
+ void swap(RenamePassData &RHS) {
+ std::swap(BB, RHS.BB);
+ std::swap(Pred, RHS.Pred);
+ Values.swap(RHS.Values);
+ }
+};
+
+/// \brief This assigns and keeps a per-bb relative ordering of load/store
+/// instructions in the block that directly load or store an alloca.
+///
+/// This functionality is important because it avoids scanning large basic
+/// blocks multiple times when promoting many allocas in the same block.
+class LargeBlockInfo {
+ /// \brief For each instruction that we track, keep the index of the
+ /// instruction.
+ ///
+ /// The index starts out as the number of the instruction from the start of
+ /// the block.
+ DenseMap<const Instruction *, unsigned> InstNumbers;
+
+public:
+
+ /// This code only looks at accesses to allocas.
+ static bool isInterestingInstruction(const Instruction *I) {
+ return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) ||
+ (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1)));
+ }
+
+ /// Get or calculate the index of the specified instruction.
+ unsigned getInstructionIndex(const Instruction *I) {
+ assert(isInterestingInstruction(I) &&
+ "Not a load/store to/from an alloca?");
+
+ // If we already have this instruction number, return it.
+ DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I);
+ if (It != InstNumbers.end())
+ return It->second;
+
+ // Scan the whole block to get the instruction. This accumulates
+ // information for every interesting instruction in the block, in order to
+ // avoid gratuitus rescans.
+ const BasicBlock *BB = I->getParent();
+ unsigned InstNo = 0;
+ for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end(); BBI != E;
+ ++BBI)
+ if (isInterestingInstruction(BBI))
+ InstNumbers[BBI] = InstNo++;
+ It = InstNumbers.find(I);
+
+ assert(It != InstNumbers.end() && "Didn't insert instruction?");
+ return It->second;
+ }
+
+ void deleteValue(const Instruction *I) { InstNumbers.erase(I); }
+
+ void clear() { InstNumbers.clear(); }
+};
+
+struct PromoteMem2Reg {
+ /// The alloca instructions being promoted.
+ std::vector<AllocaInst *> Allocas;
+ DominatorTree &DT;
+ DIBuilder DIB;
+
+ /// An AliasSetTracker object to update. If null, don't update it.
+ AliasSetTracker *AST;
+
+ /// Reverse mapping of Allocas.
+ DenseMap<AllocaInst *, unsigned> AllocaLookup;
+
+ /// \brief The PhiNodes we're adding.
+ ///
+ /// That map is used to simplify some Phi nodes as we iterate over it, so
+ /// it should have deterministic iterators. We could use a MapVector, but
+ /// since we already maintain a map from BasicBlock* to a stable numbering
+ /// (BBNumbers), the DenseMap is more efficient (also supports removal).
+ DenseMap<std::pair<unsigned, unsigned>, PHINode *> NewPhiNodes;
+
+ /// For each PHI node, keep track of which entry in Allocas it corresponds
+ /// to.
+ DenseMap<PHINode *, unsigned> PhiToAllocaMap;
+
+ /// If we are updating an AliasSetTracker, then for each alloca that is of
+ /// pointer type, we keep track of what to copyValue to the inserted PHI
+ /// nodes here.
+ std::vector<Value *> PointerAllocaValues;
+
+ /// For each alloca, we keep track of the dbg.declare intrinsic that
+ /// describes it, if any, so that we can convert it to a dbg.value
+ /// intrinsic if the alloca gets promoted.
+ SmallVector<DbgDeclareInst *, 8> AllocaDbgDeclares;
+
+ /// The set of basic blocks the renamer has already visited.
+ ///
+ SmallPtrSet<BasicBlock *, 16> Visited;
+
+ /// Contains a stable numbering of basic blocks to avoid non-determinstic
+ /// behavior.
+ DenseMap<BasicBlock *, unsigned> BBNumbers;
+
+ /// Maps DomTreeNodes to their level in the dominator tree.
+ DenseMap<DomTreeNode *, unsigned> DomLevels;
+
+ /// Lazily compute the number of predecessors a block has.
+ DenseMap<const BasicBlock *, unsigned> BBNumPreds;
+
+public:
+ PromoteMem2Reg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
+ AliasSetTracker *AST)
+ : Allocas(Allocas.begin(), Allocas.end()), DT(DT),
+ DIB(*DT.getRoot()->getParent()->getParent()), AST(AST) {}
+
+ void run();
+
+private:
+ void RemoveFromAllocasList(unsigned &AllocaIdx) {
+ Allocas[AllocaIdx] = Allocas.back();
+ Allocas.pop_back();
+ --AllocaIdx;
+ }
+
+ unsigned getNumPreds(const BasicBlock *BB) {
+ unsigned &NP = BBNumPreds[BB];
+ if (NP == 0)
+ NP = std::distance(pred_begin(BB), pred_end(BB)) + 1;
+ return NP - 1;
+ }
+
+ void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
+ AllocaInfo &Info);
+ void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,
+ const SmallPtrSet<BasicBlock *, 32> &DefBlocks,
+ SmallPtrSet<BasicBlock *, 32> &LiveInBlocks);
+ void RenamePass(BasicBlock *BB, BasicBlock *Pred,
+ RenamePassData::ValVector &IncVals,
+ std::vector<RenamePassData> &Worklist);
+ bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version);
+};
+
+} // end of anonymous namespace
+
+static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
+ // Knowing that this alloca is promotable, we know that it's safe to kill all
+ // instructions except for load and store.
+
+ for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end();
+ UI != UE;) {
+ Instruction *I = cast<Instruction>(*UI);
+ ++UI;
+ if (isa<LoadInst>(I) || isa<StoreInst>(I))
+ continue;
+
+ if (!I->getType()->isVoidTy()) {
+ // The only users of this bitcast/GEP instruction are lifetime intrinsics.
+ // Follow the use/def chain to erase them now instead of leaving it for
+ // dead code elimination later.
+ for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE;) {
+ Instruction *Inst = cast<Instruction>(*UI);
+ ++UI;
+ Inst->eraseFromParent();
+ }
+ }
+ I->eraseFromParent();
+ }
+}
+
+/// \brief Rewrite as many loads as possible given a single store.
+///
+/// When there is only a single store, we can use the domtree to trivially
+/// replace all of the dominated loads with the stored value. Do so, and return
+/// true if this has successfully promoted the alloca entirely. If this returns
+/// false there were some loads which were not dominated by the single store
+/// and thus must be phi-ed with undef. We fall back to the standard alloca
+/// promotion algorithm in that case.
+static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
+ LargeBlockInfo &LBI,
+ DominatorTree &DT,
+ AliasSetTracker *AST) {
+ StoreInst *OnlyStore = Info.OnlyStore;
+ bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0));
+ BasicBlock *StoreBB = OnlyStore->getParent();
+ int StoreIndex = -1;
+
+ // Clear out UsingBlocks. We will reconstruct it here if needed.
+ Info.UsingBlocks.clear();
+
+ for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) {
+ Instruction *UserInst = cast<Instruction>(*UI++);
+ if (!isa<LoadInst>(UserInst)) {
+ assert(UserInst == OnlyStore && "Should only have load/stores");
+ continue;
+ }
+ LoadInst *LI = cast<LoadInst>(UserInst);
+
+ // Okay, if we have a load from the alloca, we want to replace it with the
+ // only value stored to the alloca. We can do this if the value is
+ // dominated by the store. If not, we use the rest of the mem2reg machinery
+ // to insert the phi nodes as needed.
+ if (!StoringGlobalVal) { // Non-instructions are always dominated.
+ if (LI->getParent() == StoreBB) {
+ // If we have a use that is in the same block as the store, compare the
+ // indices of the two instructions to see which one came first. If the
+ // load came before the store, we can't handle it.
+ if (StoreIndex == -1)
+ StoreIndex = LBI.getInstructionIndex(OnlyStore);
+
+ if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) {
+ // Can't handle this load, bail out.
+ Info.UsingBlocks.push_back(StoreBB);
+ continue;
+ }
+
+ } else if (LI->getParent() != StoreBB &&
+ !DT.dominates(StoreBB, LI->getParent())) {
+ // If the load and store are in different blocks, use BB dominance to
+ // check their relationships. If the store doesn't dom the use, bail
+ // out.
+ Info.UsingBlocks.push_back(LI->getParent());
+ continue;
+ }
+ }
+
+ // Otherwise, we *can* safely rewrite this load.
+ Value *ReplVal = OnlyStore->getOperand(0);
+ // If the replacement value is the load, this must occur in unreachable
+ // code.
+ if (ReplVal == LI)
+ ReplVal = UndefValue::get(LI->getType());
+ LI->replaceAllUsesWith(ReplVal);
+ if (AST && LI->getType()->isPointerTy())
+ AST->deleteValue(LI);
+ LI->eraseFromParent();
+ LBI.deleteValue(LI);
+ }
+
+ // Finally, after the scan, check to see if the store is all that is left.
+ if (!Info.UsingBlocks.empty())
+ return false; // If not, we'll have to fall back for the remainder.
+
+ // Record debuginfo for the store and remove the declaration's
+ // debuginfo.
+ if (DbgDeclareInst *DDI = Info.DbgDeclare) {
+ DIBuilder DIB(*AI->getParent()->getParent()->getParent());
+ ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, DIB);
+ DDI->eraseFromParent();
+ LBI.deleteValue(DDI);
+ }
+ // Remove the (now dead) store and alloca.
+ Info.OnlyStore->eraseFromParent();
+ LBI.deleteValue(Info.OnlyStore);
+
+ if (AST)
+ AST->deleteValue(AI);
+ AI->eraseFromParent();
+ LBI.deleteValue(AI);
+ return true;
+}
+
+/// Many allocas are only used within a single basic block. If this is the
+/// case, avoid traversing the CFG and inserting a lot of potentially useless
+/// PHI nodes by just performing a single linear pass over the basic block
+/// using the Alloca.
+///
+/// If we cannot promote this alloca (because it is read before it is written),
+/// return true. This is necessary in cases where, due to control flow, the
+/// alloca is potentially undefined on some control flow paths. e.g. code like
+/// this is potentially correct:
+///
+/// for (...) { if (c) { A = undef; undef = B; } }
+///
+/// ... so long as A is not used before undef is set.
+static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
+ LargeBlockInfo &LBI,
+ AliasSetTracker *AST) {
+ // The trickiest case to handle is when we have large blocks. Because of this,
+ // this code is optimized assuming that large blocks happen. This does not
+ // significantly pessimize the small block case. This uses LargeBlockInfo to
+ // make it efficient to get the index of various operations in the block.
+
+ // Walk the use-def list of the alloca, getting the locations of all stores.
+ typedef SmallVector<std::pair<unsigned, StoreInst *>, 64> StoresByIndexTy;
+ StoresByIndexTy StoresByIndex;
+
+ for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;
+ ++UI)
+ if (StoreInst *SI = dyn_cast<StoreInst>(*UI))
+ StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI));
+
+ // Sort the stores by their index, making it efficient to do a lookup with a
+ // binary search.
+ std::sort(StoresByIndex.begin(), StoresByIndex.end(), less_first());
+
+ // Walk all of the loads from this alloca, replacing them with the nearest
+ // store above them, if any.
+ for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) {
+ LoadInst *LI = dyn_cast<LoadInst>(*UI++);
+ if (!LI)
+ continue;
+
+ unsigned LoadIdx = LBI.getInstructionIndex(LI);
+
+ // Find the nearest store that has a lower index than this load.
+ StoresByIndexTy::iterator I =
+ std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(),
+ std::make_pair(LoadIdx, static_cast<StoreInst *>(0)),
+ less_first());
+
+ if (I == StoresByIndex.begin())
+ // If there is no store before this load, the load takes the undef value.
+ LI->replaceAllUsesWith(UndefValue::get(LI->getType()));
+ else
+ // Otherwise, there was a store before this load, the load takes its value.
+ LI->replaceAllUsesWith(llvm::prior(I)->second->getOperand(0));
+
+ if (AST && LI->getType()->isPointerTy())
+ AST->deleteValue(LI);
+ LI->eraseFromParent();
+ LBI.deleteValue(LI);
+ }
+
+ // Remove the (now dead) stores and alloca.
+ while (!AI->use_empty()) {
+ StoreInst *SI = cast<StoreInst>(AI->use_back());
+ // Record debuginfo for the store before removing it.
+ if (DbgDeclareInst *DDI = Info.DbgDeclare) {
+ DIBuilder DIB(*AI->getParent()->getParent()->getParent());
+ ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
+ }
+ SI->eraseFromParent();
+ LBI.deleteValue(SI);
+ }
+
+ if (AST)
+ AST->deleteValue(AI);
+ AI->eraseFromParent();
+ LBI.deleteValue(AI);
+
+ // The alloca's debuginfo can be removed as well.
+ if (DbgDeclareInst *DDI = Info.DbgDeclare) {
+ DDI->eraseFromParent();
+ LBI.deleteValue(DDI);
+ }
+
+ ++NumLocalPromoted;
+}
+
+void PromoteMem2Reg::run() {
+ Function &F = *DT.getRoot()->getParent();
+
+ if (AST)
+ PointerAllocaValues.resize(Allocas.size());
+ AllocaDbgDeclares.resize(Allocas.size());
+
+ AllocaInfo Info;
+ LargeBlockInfo LBI;
+
+ for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) {
+ AllocaInst *AI = Allocas[AllocaNum];
+
+ assert(isAllocaPromotable(AI) && "Cannot promote non-promotable alloca!");
+ assert(AI->getParent()->getParent() == &F &&
+ "All allocas should be in the same function, which is same as DF!");
+
+ removeLifetimeIntrinsicUsers(AI);
+
+ if (AI->use_empty()) {
+ // If there are no uses of the alloca, just delete it now.
+ if (AST)
+ AST->deleteValue(AI);
+ AI->eraseFromParent();
+
+ // Remove the alloca from the Allocas list, since it has been processed
+ RemoveFromAllocasList(AllocaNum);
+ ++NumDeadAlloca;
+ continue;
+ }
+
+ // Calculate the set of read and write-locations for each alloca. This is
+ // analogous to finding the 'uses' and 'definitions' of each variable.
+ Info.AnalyzeAlloca(AI);
+
+ // If there is only a single store to this value, replace any loads of
+ // it that are directly dominated by the definition with the value stored.
+ if (Info.DefiningBlocks.size() == 1) {
+ if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AST)) {
+ // The alloca has been processed, move on.
+ RemoveFromAllocasList(AllocaNum);
+ ++NumSingleStore;
+ continue;
+ }
+ }
+
+ // If the alloca is only read and written in one basic block, just perform a
+ // linear sweep over the block to eliminate it.
+ if (Info.OnlyUsedInOneBlock) {
+ promoteSingleBlockAlloca(AI, Info, LBI, AST);
+
+ // The alloca has been processed, move on.
+ RemoveFromAllocasList(AllocaNum);
+ continue;
+ }
+
+ // If we haven't computed dominator tree levels, do so now.
+ if (DomLevels.empty()) {
+ SmallVector<DomTreeNode *, 32> Worklist;
+
+ DomTreeNode *Root = DT.getRootNode();
+ DomLevels[Root] = 0;
+ Worklist.push_back(Root);
+
+ while (!Worklist.empty()) {
+ DomTreeNode *Node = Worklist.pop_back_val();
+ unsigned ChildLevel = DomLevels[Node] + 1;
+ for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end();
+ CI != CE; ++CI) {
+ DomLevels[*CI] = ChildLevel;
+ Worklist.push_back(*CI);
+ }
+ }
+ }
+
+ // If we haven't computed a numbering for the BB's in the function, do so
+ // now.
+ if (BBNumbers.empty()) {
+ unsigned ID = 0;
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+ BBNumbers[I] = ID++;
+ }
+
+ // If we have an AST to keep updated, remember some pointer value that is
+ // stored into the alloca.
+ if (AST)
+ PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal;
+
+ // Remember the dbg.declare intrinsic describing this alloca, if any.
+ if (Info.DbgDeclare)
+ AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare;
+
+ // Keep the reverse mapping of the 'Allocas' array for the rename pass.
+ AllocaLookup[Allocas[AllocaNum]] = AllocaNum;
+
+ // At this point, we're committed to promoting the alloca using IDF's, and
+ // the standard SSA construction algorithm. Determine which blocks need PHI
+ // nodes and see if we can optimize out some work by avoiding insertion of
+ // dead phi nodes.
+ DetermineInsertionPoint(AI, AllocaNum, Info);
+ }
+
+ if (Allocas.empty())
+ return; // All of the allocas must have been trivial!
+
+ LBI.clear();
+
+ // Set the incoming values for the basic block to be null values for all of
+ // the alloca's. We do this in case there is a load of a value that has not
+ // been stored yet. In this case, it will get this null value.
+ //
+ RenamePassData::ValVector Values(Allocas.size());
+ for (unsigned i = 0, e = Allocas.size(); i != e; ++i)
+ Values[i] = UndefValue::get(Allocas[i]->getAllocatedType());
+
+ // Walks all basic blocks in the function performing the SSA rename algorithm
+ // and inserting the phi nodes we marked as necessary
+ //
+ std::vector<RenamePassData> RenamePassWorkList;
+ RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values));
+ do {
+ RenamePassData RPD;
+ RPD.swap(RenamePassWorkList.back());
+ RenamePassWorkList.pop_back();
+ // RenamePass may add new worklist entries.
+ RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList);
+ } while (!RenamePassWorkList.empty());
+
+ // The renamer uses the Visited set to avoid infinite loops. Clear it now.
+ Visited.clear();
+
+ // Remove the allocas themselves from the function.
+ for (unsigned i = 0, e = Allocas.size(); i != e; ++i) {
+ Instruction *A = Allocas[i];
+
+ // If there are any uses of the alloca instructions left, they must be in
+ // unreachable basic blocks that were not processed by walking the dominator
+ // tree. Just delete the users now.
+ if (!A->use_empty())
+ A->replaceAllUsesWith(UndefValue::get(A->getType()));
+ if (AST)
+ AST->deleteValue(A);
+ A->eraseFromParent();
+ }
+
+ // Remove alloca's dbg.declare instrinsics from the function.
+ for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i)
+ if (DbgDeclareInst *DDI = AllocaDbgDeclares[i])
+ DDI->eraseFromParent();
+
+ // Loop over all of the PHI nodes and see if there are any that we can get
+ // rid of because they merge all of the same incoming values. This can
+ // happen due to undef values coming into the PHI nodes. This process is
+ // iterative, because eliminating one PHI node can cause others to be removed.
+ bool EliminatedAPHI = true;
+ while (EliminatedAPHI) {
+ EliminatedAPHI = false;
+
+ // Iterating over NewPhiNodes is deterministic, so it is safe to try to
+ // simplify and RAUW them as we go. If it was not, we could add uses to
+ // the values we replace with in a non deterministic order, thus creating
+ // non deterministic def->use chains.
+ for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator
+ I = NewPhiNodes.begin(),
+ E = NewPhiNodes.end();
+ I != E;) {
+ PHINode *PN = I->second;
+
+ // If this PHI node merges one value and/or undefs, get the value.
+ if (Value *V = SimplifyInstruction(PN, 0, 0, &DT)) {
+ if (AST && PN->getType()->isPointerTy())
+ AST->deleteValue(PN);
+ PN->replaceAllUsesWith(V);
+ PN->eraseFromParent();
+ NewPhiNodes.erase(I++);
+ EliminatedAPHI = true;
+ continue;
+ }
+ ++I;
+ }
+ }
+
+ // At this point, the renamer has added entries to PHI nodes for all reachable
+ // code. Unfortunately, there may be unreachable blocks which the renamer
+ // hasn't traversed. If this is the case, the PHI nodes may not
+ // have incoming values for all predecessors. Loop over all PHI nodes we have
+ // created, inserting undef values if they are missing any incoming values.
+ //
+ for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator
+ I = NewPhiNodes.begin(),
+ E = NewPhiNodes.end();
+ I != E; ++I) {
+ // We want to do this once per basic block. As such, only process a block
+ // when we find the PHI that is the first entry in the block.
+ PHINode *SomePHI = I->second;
+ BasicBlock *BB = SomePHI->getParent();
+ if (&BB->front() != SomePHI)
+ continue;
+
+ // Only do work here if there the PHI nodes are missing incoming values. We
+ // know that all PHI nodes that were inserted in a block will have the same
+ // number of incoming values, so we can just check any of them.
+ if (SomePHI->getNumIncomingValues() == getNumPreds(BB))
+ continue;
+
+ // Get the preds for BB.
+ SmallVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
+
+ // Ok, now we know that all of the PHI nodes are missing entries for some
+ // basic blocks. Start by sorting the incoming predecessors for efficient
+ // access.
+ std::sort(Preds.begin(), Preds.end());
+
+ // Now we loop through all BB's which have entries in SomePHI and remove
+ // them from the Preds list.
+ for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) {
+ // Do a log(n) search of the Preds list for the entry we want.
+ SmallVectorImpl<BasicBlock *>::iterator EntIt = std::lower_bound(
+ Preds.begin(), Preds.end(), SomePHI->getIncomingBlock(i));
+ assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i) &&
+ "PHI node has entry for a block which is not a predecessor!");
+
+ // Remove the entry
+ Preds.erase(EntIt);
+ }
+
+ // At this point, the blocks left in the preds list must have dummy
+ // entries inserted into every PHI nodes for the block. Update all the phi
+ // nodes in this block that we are inserting (there could be phis before
+ // mem2reg runs).
+ unsigned NumBadPreds = SomePHI->getNumIncomingValues();
+ BasicBlock::iterator BBI = BB->begin();
+ while ((SomePHI = dyn_cast<PHINode>(BBI++)) &&
+ SomePHI->getNumIncomingValues() == NumBadPreds) {
+ Value *UndefVal = UndefValue::get(SomePHI->getType());
+ for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred)
+ SomePHI->addIncoming(UndefVal, Preds[pred]);
+ }
+ }
+
+ NewPhiNodes.clear();
+}
+
+/// \brief Determine which blocks the value is live in.
+///
+/// These are blocks which lead to uses. Knowing this allows us to avoid
+/// inserting PHI nodes into blocks which don't lead to uses (thus, the
+/// inserted phi nodes would be dead).
+void PromoteMem2Reg::ComputeLiveInBlocks(
+ AllocaInst *AI, AllocaInfo &Info,
+ const SmallPtrSet<BasicBlock *, 32> &DefBlocks,
+ SmallPtrSet<BasicBlock *, 32> &LiveInBlocks) {
+
+ // To determine liveness, we must iterate through the predecessors of blocks
+ // where the def is live. Blocks are added to the worklist if we need to
+ // check their predecessors. Start with all the using blocks.
+ SmallVector<BasicBlock *, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(),
+ Info.UsingBlocks.end());
+
+ // If any of the using blocks is also a definition block, check to see if the
+ // definition occurs before or after the use. If it happens before the use,
+ // the value isn't really live-in.
+ for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) {
+ BasicBlock *BB = LiveInBlockWorklist[i];
+ if (!DefBlocks.count(BB))
+ continue;
+
+ // Okay, this is a block that both uses and defines the value. If the first
+ // reference to the alloca is a def (store), then we know it isn't live-in.
+ for (BasicBlock::iterator I = BB->begin();; ++I) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ if (SI->getOperand(1) != AI)
+ continue;
+
+ // We found a store to the alloca before a load. The alloca is not
+ // actually live-in here.
+ LiveInBlockWorklist[i] = LiveInBlockWorklist.back();
+ LiveInBlockWorklist.pop_back();
+ --i, --e;
+ break;
+ }
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ if (LI->getOperand(0) != AI)
+ continue;
+
+ // Okay, we found a load before a store to the alloca. It is actually
+ // live into this block.
+ break;
+ }
+ }
+ }
+
+ // Now that we have a set of blocks where the phi is live-in, recursively add
+ // their predecessors until we find the full region the value is live.
+ while (!LiveInBlockWorklist.empty()) {
+ BasicBlock *BB = LiveInBlockWorklist.pop_back_val();
+
+ // The block really is live in here, insert it into the set. If already in
+ // the set, then it has already been processed.
+ if (!LiveInBlocks.insert(BB))
+ continue;
+
+ // Since the value is live into BB, it is either defined in a predecessor or
+ // live into it to. Add the preds to the worklist unless they are a
+ // defining block.
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *P = *PI;
+
+ // The value is not live into a predecessor if it defines the value.
+ if (DefBlocks.count(P))
+ continue;
+
+ // Otherwise it is, add to the worklist.
+ LiveInBlockWorklist.push_back(P);
+ }
+ }
+}
+
+/// At this point, we're committed to promoting the alloca using IDF's, and the
+/// standard SSA construction algorithm. Determine which blocks need phi nodes
+/// and see if we can optimize out some work by avoiding insertion of dead phi
+/// nodes.
+void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
+ AllocaInfo &Info) {
+ // Unique the set of defining blocks for efficient lookup.
+ SmallPtrSet<BasicBlock *, 32> DefBlocks;
+ DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end());
+
+ // Determine which blocks the value is live in. These are blocks which lead
+ // to uses.
+ SmallPtrSet<BasicBlock *, 32> LiveInBlocks;
+ ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks);
+
+ // Use a priority queue keyed on dominator tree level so that inserted nodes
+ // are handled from the bottom of the dominator tree upwards.
+ typedef std::pair<DomTreeNode *, unsigned> DomTreeNodePair;
+ typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>,
+ less_second> IDFPriorityQueue;
+ IDFPriorityQueue PQ;
+
+ for (SmallPtrSet<BasicBlock *, 32>::const_iterator I = DefBlocks.begin(),
+ E = DefBlocks.end();
+ I != E; ++I) {
+ if (DomTreeNode *Node = DT.getNode(*I))
+ PQ.push(std::make_pair(Node, DomLevels[Node]));
+ }
+
+ SmallVector<std::pair<unsigned, BasicBlock *>, 32> DFBlocks;
+ SmallPtrSet<DomTreeNode *, 32> Visited;
+ SmallVector<DomTreeNode *, 32> Worklist;
+ while (!PQ.empty()) {
+ DomTreeNodePair RootPair = PQ.top();
+ PQ.pop();
+ DomTreeNode *Root = RootPair.first;
+ unsigned RootLevel = RootPair.second;
+
+ // Walk all dominator tree children of Root, inspecting their CFG edges with
+ // targets elsewhere on the dominator tree. Only targets whose level is at
+ // most Root's level are added to the iterated dominance frontier of the
+ // definition set.
+
+ Worklist.clear();
+ Worklist.push_back(Root);
+
+ while (!Worklist.empty()) {
+ DomTreeNode *Node = Worklist.pop_back_val();
+ BasicBlock *BB = Node->getBlock();
+
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE;
+ ++SI) {
+ DomTreeNode *SuccNode = DT.getNode(*SI);
+
+ // Quickly skip all CFG edges that are also dominator tree edges instead
+ // of catching them below.
+ if (SuccNode->getIDom() == Node)
+ continue;
+
+ unsigned SuccLevel = DomLevels[SuccNode];
+ if (SuccLevel > RootLevel)
+ continue;
+
+ if (!Visited.insert(SuccNode))
+ continue;
+
+ BasicBlock *SuccBB = SuccNode->getBlock();
+ if (!LiveInBlocks.count(SuccBB))
+ continue;
+
+ DFBlocks.push_back(std::make_pair(BBNumbers[SuccBB], SuccBB));
+ if (!DefBlocks.count(SuccBB))
+ PQ.push(std::make_pair(SuccNode, SuccLevel));
+ }
+
+ for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE;
+ ++CI) {
+ if (!Visited.count(*CI))
+ Worklist.push_back(*CI);
+ }
+ }
+ }
+
+ if (DFBlocks.size() > 1)
+ std::sort(DFBlocks.begin(), DFBlocks.end());
+
+ unsigned CurrentVersion = 0;
+ for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i)
+ QueuePhiNode(DFBlocks[i].second, AllocaNum, CurrentVersion);
+}
+
+/// \brief Queue a phi-node to be added to a basic-block for a specific Alloca.
+///
+/// Returns true if there wasn't already a phi-node for that variable
+bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
+ unsigned &Version) {
+ // Look up the basic-block in question.
+ PHINode *&PN = NewPhiNodes[std::make_pair(BBNumbers[BB], AllocaNo)];
+
+ // If the BB already has a phi node added for the i'th alloca then we're done!
+ if (PN)
+ return false;
+
+ // Create a PhiNode using the dereferenced type... and add the phi-node to the
+ // BasicBlock.
+ PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB),
+ Allocas[AllocaNo]->getName() + "." + Twine(Version++),
+ BB->begin());
+ ++NumPHIInsert;
+ PhiToAllocaMap[PN] = AllocaNo;
+
+ if (AST && PN->getType()->isPointerTy())
+ AST->copyValue(PointerAllocaValues[AllocaNo], PN);
+
+ return true;
+}
+
+/// \brief Recursively traverse the CFG of the function, renaming loads and
+/// stores to the allocas which we are promoting.
+///
+/// IncomingVals indicates what value each Alloca contains on exit from the
+/// predecessor block Pred.
+void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred,
+ RenamePassData::ValVector &IncomingVals,
+ std::vector<RenamePassData> &Worklist) {
+NextIteration:
+ // If we are inserting any phi nodes into this BB, they will already be in the
+ // block.
+ if (PHINode *APN = dyn_cast<PHINode>(BB->begin())) {
+ // If we have PHI nodes to update, compute the number of edges from Pred to
+ // BB.
+ if (PhiToAllocaMap.count(APN)) {
+ // We want to be able to distinguish between PHI nodes being inserted by
+ // this invocation of mem2reg from those phi nodes that already existed in
+ // the IR before mem2reg was run. We determine that APN is being inserted
+ // because it is missing incoming edges. All other PHI nodes being
+ // inserted by this pass of mem2reg will have the same number of incoming
+ // operands so far. Remember this count.
+ unsigned NewPHINumOperands = APN->getNumOperands();
+
+ unsigned NumEdges = std::count(succ_begin(Pred), succ_end(Pred), BB);
+ assert(NumEdges && "Must be at least one edge from Pred to BB!");
+
+ // Add entries for all the phis.
+ BasicBlock::iterator PNI = BB->begin();
+ do {
+ unsigned AllocaNo = PhiToAllocaMap[APN];
+
+ // Add N incoming values to the PHI node.
+ for (unsigned i = 0; i != NumEdges; ++i)
+ APN->addIncoming(IncomingVals[AllocaNo], Pred);
+
+ // The currently active variable for this block is now the PHI.
+ IncomingVals[AllocaNo] = APN;
+
+ // Get the next phi node.
+ ++PNI;
+ APN = dyn_cast<PHINode>(PNI);
+ if (APN == 0)
+ break;
+
+ // Verify that it is missing entries. If not, it is not being inserted
+ // by this mem2reg invocation so we want to ignore it.
+ } while (APN->getNumOperands() == NewPHINumOperands);
+ }
+ }
+
+ // Don't revisit blocks.
+ if (!Visited.insert(BB))
+ return;
+
+ for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II);) {
+ Instruction *I = II++; // get the instruction, increment iterator
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand());
+ if (!Src)
+ continue;
+
+ DenseMap<AllocaInst *, unsigned>::iterator AI = AllocaLookup.find(Src);
+ if (AI == AllocaLookup.end())
+ continue;
+
+ Value *V = IncomingVals[AI->second];
+
+ // Anything using the load now uses the current value.
+ LI->replaceAllUsesWith(V);
+ if (AST && LI->getType()->isPointerTy())
+ AST->deleteValue(LI);
+ BB->getInstList().erase(LI);
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ // Delete this instruction and mark the name as the current holder of the
+ // value
+ AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand());
+ if (!Dest)
+ continue;
+
+ DenseMap<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest);
+ if (ai == AllocaLookup.end())
+ continue;
+
+ // what value were we writing?
+ IncomingVals[ai->second] = SI->getOperand(0);
+ // Record debuginfo for the store before removing it.
+ if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second])
+ ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
+ BB->getInstList().erase(SI);
+ }
+ }
+
+ // 'Recurse' to our successors.
+ succ_iterator I = succ_begin(BB), E = succ_end(BB);
+ if (I == E)
+ return;
+
+ // Keep track of the successors so we don't visit the same successor twice
+ SmallPtrSet<BasicBlock *, 8> VisitedSuccs;
+
+ // Handle the first successor without using the worklist.
+ VisitedSuccs.insert(*I);
+ Pred = BB;
+ BB = *I;
+ ++I;
+
+ for (; I != E; ++I)
+ if (VisitedSuccs.insert(*I))
+ Worklist.push_back(RenamePassData(*I, Pred, IncomingVals));
+
+ goto NextIteration;
+}
+
+void llvm::PromoteMemToReg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
+ AliasSetTracker *AST) {
+ // If there is nothing to do, bail out...
+ if (Allocas.empty())
+ return;
+
+ PromoteMem2Reg(Allocas, DT, AST).run();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
new file mode 100644
index 000000000000..30adbfac058f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
@@ -0,0 +1,492 @@
+//===- SSAUpdater.cpp - Unstructured SSA Update Tool ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SSAUpdater class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ssaupdater"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/AlignOf.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdaterImpl.h"
+
+using namespace llvm;
+
+typedef DenseMap<BasicBlock*, Value*> AvailableValsTy;
+static AvailableValsTy &getAvailableVals(void *AV) {
+ return *static_cast<AvailableValsTy*>(AV);
+}
+
+SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI)
+ : AV(0), ProtoType(0), ProtoName(), InsertedPHIs(NewPHI) {}
+
+SSAUpdater::~SSAUpdater() {
+ delete static_cast<AvailableValsTy*>(AV);
+}
+
+void SSAUpdater::Initialize(Type *Ty, StringRef Name) {
+ if (AV == 0)
+ AV = new AvailableValsTy();
+ else
+ getAvailableVals(AV).clear();
+ ProtoType = Ty;
+ ProtoName = Name;
+}
+
+bool SSAUpdater::HasValueForBlock(BasicBlock *BB) const {
+ return getAvailableVals(AV).count(BB);
+}
+
+void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) {
+ assert(ProtoType != 0 && "Need to initialize SSAUpdater");
+ assert(ProtoType == V->getType() &&
+ "All rewritten values must have the same type");
+ getAvailableVals(AV)[BB] = V;
+}
+
+static bool IsEquivalentPHI(PHINode *PHI,
+ SmallDenseMap<BasicBlock*, Value*, 8> &ValueMapping) {
+ unsigned PHINumValues = PHI->getNumIncomingValues();
+ if (PHINumValues != ValueMapping.size())
+ return false;
+
+ // Scan the phi to see if it matches.
+ for (unsigned i = 0, e = PHINumValues; i != e; ++i)
+ if (ValueMapping[PHI->getIncomingBlock(i)] !=
+ PHI->getIncomingValue(i)) {
+ return false;
+ }
+
+ return true;
+}
+
+Value *SSAUpdater::GetValueAtEndOfBlock(BasicBlock *BB) {
+ Value *Res = GetValueAtEndOfBlockInternal(BB);
+ return Res;
+}
+
+Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
+ // If there is no definition of the renamed variable in this block, just use
+ // GetValueAtEndOfBlock to do our work.
+ if (!HasValueForBlock(BB))
+ return GetValueAtEndOfBlock(BB);
+
+ // Otherwise, we have the hard case. Get the live-in values for each
+ // predecessor.
+ SmallVector<std::pair<BasicBlock*, Value*>, 8> PredValues;
+ Value *SingularValue = 0;
+
+ // We can get our predecessor info by walking the pred_iterator list, but it
+ // is relatively slow. If we already have PHI nodes in this block, walk one
+ // of them to get the predecessor list instead.
+ if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) {
+ for (unsigned i = 0, e = SomePhi->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *PredBB = SomePhi->getIncomingBlock(i);
+ Value *PredVal = GetValueAtEndOfBlock(PredBB);
+ PredValues.push_back(std::make_pair(PredBB, PredVal));
+
+ // Compute SingularValue.
+ if (i == 0)
+ SingularValue = PredVal;
+ else if (PredVal != SingularValue)
+ SingularValue = 0;
+ }
+ } else {
+ bool isFirstPred = true;
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *PredBB = *PI;
+ Value *PredVal = GetValueAtEndOfBlock(PredBB);
+ PredValues.push_back(std::make_pair(PredBB, PredVal));
+
+ // Compute SingularValue.
+ if (isFirstPred) {
+ SingularValue = PredVal;
+ isFirstPred = false;
+ } else if (PredVal != SingularValue)
+ SingularValue = 0;
+ }
+ }
+
+ // If there are no predecessors, just return undef.
+ if (PredValues.empty())
+ return UndefValue::get(ProtoType);
+
+ // Otherwise, if all the merged values are the same, just use it.
+ if (SingularValue != 0)
+ return SingularValue;
+
+ // Otherwise, we do need a PHI: check to see if we already have one available
+ // in this block that produces the right value.
+ if (isa<PHINode>(BB->begin())) {
+ SmallDenseMap<BasicBlock*, Value*, 8> ValueMapping(PredValues.begin(),
+ PredValues.end());
+ PHINode *SomePHI;
+ for (BasicBlock::iterator It = BB->begin();
+ (SomePHI = dyn_cast<PHINode>(It)); ++It) {
+ if (IsEquivalentPHI(SomePHI, ValueMapping))
+ return SomePHI;
+ }
+ }
+
+ // Ok, we have no way out, insert a new one now.
+ PHINode *InsertedPHI = PHINode::Create(ProtoType, PredValues.size(),
+ ProtoName, &BB->front());
+
+ // Fill in all the predecessors of the PHI.
+ for (unsigned i = 0, e = PredValues.size(); i != e; ++i)
+ InsertedPHI->addIncoming(PredValues[i].second, PredValues[i].first);
+
+ // See if the PHI node can be merged to a single value. This can happen in
+ // loop cases when we get a PHI of itself and one other value.
+ if (Value *V = SimplifyInstruction(InsertedPHI)) {
+ InsertedPHI->eraseFromParent();
+ return V;
+ }
+
+ // Set the DebugLoc of the inserted PHI, if available.
+ DebugLoc DL;
+ if (const Instruction *I = BB->getFirstNonPHI())
+ DL = I->getDebugLoc();
+ InsertedPHI->setDebugLoc(DL);
+
+ // If the client wants to know about all new instructions, tell it.
+ if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
+
+ DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n");
+ return InsertedPHI;
+}
+
+void SSAUpdater::RewriteUse(Use &U) {
+ Instruction *User = cast<Instruction>(U.getUser());
+
+ Value *V;
+ if (PHINode *UserPN = dyn_cast<PHINode>(User))
+ V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U));
+ else
+ V = GetValueInMiddleOfBlock(User->getParent());
+
+ // Notify that users of the existing value that it is being replaced.
+ Value *OldVal = U.get();
+ if (OldVal != V && OldVal->hasValueHandle())
+ ValueHandleBase::ValueIsRAUWd(OldVal, V);
+
+ U.set(V);
+}
+
+void SSAUpdater::RewriteUseAfterInsertions(Use &U) {
+ Instruction *User = cast<Instruction>(U.getUser());
+
+ Value *V;
+ if (PHINode *UserPN = dyn_cast<PHINode>(User))
+ V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U));
+ else
+ V = GetValueAtEndOfBlock(User->getParent());
+
+ U.set(V);
+}
+
+namespace llvm {
+template<>
+class SSAUpdaterTraits<SSAUpdater> {
+public:
+ typedef BasicBlock BlkT;
+ typedef Value *ValT;
+ typedef PHINode PhiT;
+
+ typedef succ_iterator BlkSucc_iterator;
+ static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return succ_begin(BB); }
+ static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return succ_end(BB); }
+
+ class PHI_iterator {
+ private:
+ PHINode *PHI;
+ unsigned idx;
+
+ public:
+ explicit PHI_iterator(PHINode *P) // begin iterator
+ : PHI(P), idx(0) {}
+ PHI_iterator(PHINode *P, bool) // end iterator
+ : PHI(P), idx(PHI->getNumIncomingValues()) {}
+
+ PHI_iterator &operator++() { ++idx; return *this; }
+ bool operator==(const PHI_iterator& x) const { return idx == x.idx; }
+ bool operator!=(const PHI_iterator& x) const { return !operator==(x); }
+ Value *getIncomingValue() { return PHI->getIncomingValue(idx); }
+ BasicBlock *getIncomingBlock() { return PHI->getIncomingBlock(idx); }
+ };
+
+ static PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); }
+ static PHI_iterator PHI_end(PhiT *PHI) {
+ return PHI_iterator(PHI, true);
+ }
+
+ /// FindPredecessorBlocks - Put the predecessors of Info->BB into the Preds
+ /// vector, set Info->NumPreds, and allocate space in Info->Preds.
+ static void FindPredecessorBlocks(BasicBlock *BB,
+ SmallVectorImpl<BasicBlock*> *Preds) {
+ // We can get our predecessor info by walking the pred_iterator list,
+ // but it is relatively slow. If we already have PHI nodes in this
+ // block, walk one of them to get the predecessor list instead.
+ if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) {
+ for (unsigned PI = 0, E = SomePhi->getNumIncomingValues(); PI != E; ++PI)
+ Preds->push_back(SomePhi->getIncomingBlock(PI));
+ } else {
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ Preds->push_back(*PI);
+ }
+ }
+
+ /// GetUndefVal - Get an undefined value of the same type as the value
+ /// being handled.
+ static Value *GetUndefVal(BasicBlock *BB, SSAUpdater *Updater) {
+ return UndefValue::get(Updater->ProtoType);
+ }
+
+ /// CreateEmptyPHI - Create a new PHI instruction in the specified block.
+ /// Reserve space for the operands but do not fill them in yet.
+ static Value *CreateEmptyPHI(BasicBlock *BB, unsigned NumPreds,
+ SSAUpdater *Updater) {
+ PHINode *PHI = PHINode::Create(Updater->ProtoType, NumPreds,
+ Updater->ProtoName, &BB->front());
+ return PHI;
+ }
+
+ /// AddPHIOperand - Add the specified value as an operand of the PHI for
+ /// the specified predecessor block.
+ static void AddPHIOperand(PHINode *PHI, Value *Val, BasicBlock *Pred) {
+ PHI->addIncoming(Val, Pred);
+ }
+
+ /// InstrIsPHI - Check if an instruction is a PHI.
+ ///
+ static PHINode *InstrIsPHI(Instruction *I) {
+ return dyn_cast<PHINode>(I);
+ }
+
+ /// ValueIsPHI - Check if a value is a PHI.
+ ///
+ static PHINode *ValueIsPHI(Value *Val, SSAUpdater *Updater) {
+ return dyn_cast<PHINode>(Val);
+ }
+
+ /// ValueIsNewPHI - Like ValueIsPHI but also check if the PHI has no source
+ /// operands, i.e., it was just added.
+ static PHINode *ValueIsNewPHI(Value *Val, SSAUpdater *Updater) {
+ PHINode *PHI = ValueIsPHI(Val, Updater);
+ if (PHI && PHI->getNumIncomingValues() == 0)
+ return PHI;
+ return 0;
+ }
+
+ /// GetPHIValue - For the specified PHI instruction, return the value
+ /// that it defines.
+ static Value *GetPHIValue(PHINode *PHI) {
+ return PHI;
+ }
+};
+
+} // End llvm namespace
+
+/// Check to see if AvailableVals has an entry for the specified BB and if so,
+/// return it. If not, construct SSA form by first calculating the required
+/// placement of PHIs and then inserting new PHIs where needed.
+Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
+ AvailableValsTy &AvailableVals = getAvailableVals(AV);
+ if (Value *V = AvailableVals[BB])
+ return V;
+
+ SSAUpdaterImpl<SSAUpdater> Impl(this, &AvailableVals, InsertedPHIs);
+ return Impl.GetValue(BB);
+}
+
+//===----------------------------------------------------------------------===//
+// LoadAndStorePromoter Implementation
+//===----------------------------------------------------------------------===//
+
+LoadAndStorePromoter::
+LoadAndStorePromoter(const SmallVectorImpl<Instruction*> &Insts,
+ SSAUpdater &S, StringRef BaseName) : SSA(S) {
+ if (Insts.empty()) return;
+
+ Value *SomeVal;
+ if (LoadInst *LI = dyn_cast<LoadInst>(Insts[0]))
+ SomeVal = LI;
+ else
+ SomeVal = cast<StoreInst>(Insts[0])->getOperand(0);
+
+ if (BaseName.empty())
+ BaseName = SomeVal->getName();
+ SSA.Initialize(SomeVal->getType(), BaseName);
+}
+
+
+void LoadAndStorePromoter::
+run(const SmallVectorImpl<Instruction*> &Insts) const {
+
+ // First step: bucket up uses of the alloca by the block they occur in.
+ // This is important because we have to handle multiple defs/uses in a block
+ // ourselves: SSAUpdater is purely for cross-block references.
+ DenseMap<BasicBlock*, TinyPtrVector<Instruction*> > UsesByBlock;
+
+ for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
+ Instruction *User = Insts[i];
+ UsesByBlock[User->getParent()].push_back(User);
+ }
+
+ // Okay, now we can iterate over all the blocks in the function with uses,
+ // processing them. Keep track of which loads are loading a live-in value.
+ // Walk the uses in the use-list order to be determinstic.
+ SmallVector<LoadInst*, 32> LiveInLoads;
+ DenseMap<Value*, Value*> ReplacedLoads;
+
+ for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
+ Instruction *User = Insts[i];
+ BasicBlock *BB = User->getParent();
+ TinyPtrVector<Instruction*> &BlockUses = UsesByBlock[BB];
+
+ // If this block has already been processed, ignore this repeat use.
+ if (BlockUses.empty()) continue;
+
+ // Okay, this is the first use in the block. If this block just has a
+ // single user in it, we can rewrite it trivially.
+ if (BlockUses.size() == 1) {
+ // If it is a store, it is a trivial def of the value in the block.
+ if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ updateDebugInfo(SI);
+ SSA.AddAvailableValue(BB, SI->getOperand(0));
+ } else
+ // Otherwise it is a load, queue it to rewrite as a live-in load.
+ LiveInLoads.push_back(cast<LoadInst>(User));
+ BlockUses.clear();
+ continue;
+ }
+
+ // Otherwise, check to see if this block is all loads.
+ bool HasStore = false;
+ for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) {
+ if (isa<StoreInst>(BlockUses[i])) {
+ HasStore = true;
+ break;
+ }
+ }
+
+ // If so, we can queue them all as live in loads. We don't have an
+ // efficient way to tell which on is first in the block and don't want to
+ // scan large blocks, so just add all loads as live ins.
+ if (!HasStore) {
+ for (unsigned i = 0, e = BlockUses.size(); i != e; ++i)
+ LiveInLoads.push_back(cast<LoadInst>(BlockUses[i]));
+ BlockUses.clear();
+ continue;
+ }
+
+ // Otherwise, we have mixed loads and stores (or just a bunch of stores).
+ // Since SSAUpdater is purely for cross-block values, we need to determine
+ // the order of these instructions in the block. If the first use in the
+ // block is a load, then it uses the live in value. The last store defines
+ // the live out value. We handle this by doing a linear scan of the block.
+ Value *StoredValue = 0;
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
+ if (LoadInst *L = dyn_cast<LoadInst>(II)) {
+ // If this is a load from an unrelated pointer, ignore it.
+ if (!isInstInList(L, Insts)) continue;
+
+ // If we haven't seen a store yet, this is a live in use, otherwise
+ // use the stored value.
+ if (StoredValue) {
+ replaceLoadWithValue(L, StoredValue);
+ L->replaceAllUsesWith(StoredValue);
+ ReplacedLoads[L] = StoredValue;
+ } else {
+ LiveInLoads.push_back(L);
+ }
+ continue;
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(II)) {
+ // If this is a store to an unrelated pointer, ignore it.
+ if (!isInstInList(SI, Insts)) continue;
+ updateDebugInfo(SI);
+
+ // Remember that this is the active value in the block.
+ StoredValue = SI->getOperand(0);
+ }
+ }
+
+ // The last stored value that happened is the live-out for the block.
+ assert(StoredValue && "Already checked that there is a store in block");
+ SSA.AddAvailableValue(BB, StoredValue);
+ BlockUses.clear();
+ }
+
+ // Okay, now we rewrite all loads that use live-in values in the loop,
+ // inserting PHI nodes as necessary.
+ for (unsigned i = 0, e = LiveInLoads.size(); i != e; ++i) {
+ LoadInst *ALoad = LiveInLoads[i];
+ Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent());
+ replaceLoadWithValue(ALoad, NewVal);
+
+ // Avoid assertions in unreachable code.
+ if (NewVal == ALoad) NewVal = UndefValue::get(NewVal->getType());
+ ALoad->replaceAllUsesWith(NewVal);
+ ReplacedLoads[ALoad] = NewVal;
+ }
+
+ // Allow the client to do stuff before we start nuking things.
+ doExtraRewritesBeforeFinalDeletion();
+
+ // Now that everything is rewritten, delete the old instructions from the
+ // function. They should all be dead now.
+ for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
+ Instruction *User = Insts[i];
+
+ // If this is a load that still has uses, then the load must have been added
+ // as a live value in the SSAUpdate data structure for a block (e.g. because
+ // the loaded value was stored later). In this case, we need to recursively
+ // propagate the updates until we get to the real value.
+ if (!User->use_empty()) {
+ Value *NewVal = ReplacedLoads[User];
+ assert(NewVal && "not a replaced load?");
+
+ // Propagate down to the ultimate replacee. The intermediately loads
+ // could theoretically already have been deleted, so we don't want to
+ // dereference the Value*'s.
+ DenseMap<Value*, Value*>::iterator RLI = ReplacedLoads.find(NewVal);
+ while (RLI != ReplacedLoads.end()) {
+ NewVal = RLI->second;
+ RLI = ReplacedLoads.find(NewVal);
+ }
+
+ replaceLoadWithValue(cast<LoadInst>(User), NewVal);
+ User->replaceAllUsesWith(NewVal);
+ }
+
+ instructionDeleted(User);
+ User->eraseFromParent();
+ }
+}
+
+bool
+LoadAndStorePromoter::isInstInList(Instruction *I,
+ const SmallVectorImpl<Instruction*> &Insts)
+ const {
+ return std::find(Insts.begin(), Insts.end(), I) != Insts.end();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
new file mode 100644
index 000000000000..ff50b12cdb3f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -0,0 +1,4156 @@
+//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Peephole optimize the CFG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "simplifycfg"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/NoFolder.h"
+#include "llvm/Support/PatternMatch.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <algorithm>
+#include <map>
+#include <set>
+using namespace llvm;
+using namespace PatternMatch;
+
+static cl::opt<unsigned>
+PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(1),
+ cl::desc("Control the amount of phi node folding to perform (default = 1)"));
+
+static cl::opt<bool>
+DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false),
+ cl::desc("Duplicate return instructions into unconditional branches"));
+
+static cl::opt<bool>
+SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
+ cl::desc("Sink common instructions down to the end block"));
+
+static cl::opt<bool>
+HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
+ cl::desc("Hoist conditional stores if an unconditional store preceeds"));
+
+STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
+STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables");
+STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block");
+STATISTIC(NumSpeculations, "Number of speculative executed instructions");
+
+namespace {
+ /// ValueEqualityComparisonCase - Represents a case of a switch.
+ struct ValueEqualityComparisonCase {
+ ConstantInt *Value;
+ BasicBlock *Dest;
+
+ ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
+ : Value(Value), Dest(Dest) {}
+
+ bool operator<(ValueEqualityComparisonCase RHS) const {
+ // Comparing pointers is ok as we only rely on the order for uniquing.
+ return Value < RHS.Value;
+ }
+
+ bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
+ };
+
+class SimplifyCFGOpt {
+ const TargetTransformInfo &TTI;
+ const DataLayout *const TD;
+ Value *isValueEqualityComparison(TerminatorInst *TI);
+ BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI,
+ std::vector<ValueEqualityComparisonCase> &Cases);
+ bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
+ BasicBlock *Pred,
+ IRBuilder<> &Builder);
+ bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
+ IRBuilder<> &Builder);
+
+ bool SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder);
+ bool SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
+ bool SimplifyUnreachable(UnreachableInst *UI);
+ bool SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
+ bool SimplifyIndirectBr(IndirectBrInst *IBI);
+ bool SimplifyUncondBranch(BranchInst *BI, IRBuilder <> &Builder);
+ bool SimplifyCondBranch(BranchInst *BI, IRBuilder <>&Builder);
+
+public:
+ SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout *TD)
+ : TTI(TTI), TD(TD) {}
+ bool run(BasicBlock *BB);
+};
+}
+
+/// SafeToMergeTerminators - Return true if it is safe to merge these two
+/// terminator instructions together.
+///
+static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) {
+ if (SI1 == SI2) return false; // Can't merge with self!
+
+ // It is not safe to merge these two switch instructions if they have a common
+ // successor, and if that successor has a PHI node, and if *that* PHI node has
+ // conflicting incoming values from the two switch blocks.
+ BasicBlock *SI1BB = SI1->getParent();
+ BasicBlock *SI2BB = SI2->getParent();
+ SmallPtrSet<BasicBlock*, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
+
+ for (succ_iterator I = succ_begin(SI2BB), E = succ_end(SI2BB); I != E; ++I)
+ if (SI1Succs.count(*I))
+ for (BasicBlock::iterator BBI = (*I)->begin();
+ isa<PHINode>(BBI); ++BBI) {
+ PHINode *PN = cast<PHINode>(BBI);
+ if (PN->getIncomingValueForBlock(SI1BB) !=
+ PN->getIncomingValueForBlock(SI2BB))
+ return false;
+ }
+
+ return true;
+}
+
+/// isProfitableToFoldUnconditional - Return true if it is safe and profitable
+/// to merge these two terminator instructions together, where SI1 is an
+/// unconditional branch. PhiNodes will store all PHI nodes in common
+/// successors.
+///
+static bool isProfitableToFoldUnconditional(BranchInst *SI1,
+ BranchInst *SI2,
+ Instruction *Cond,
+ SmallVectorImpl<PHINode*> &PhiNodes) {
+ if (SI1 == SI2) return false; // Can't merge with self!
+ assert(SI1->isUnconditional() && SI2->isConditional());
+
+ // We fold the unconditional branch if we can easily update all PHI nodes in
+ // common successors:
+ // 1> We have a constant incoming value for the conditional branch;
+ // 2> We have "Cond" as the incoming value for the unconditional branch;
+ // 3> SI2->getCondition() and Cond have same operands.
+ CmpInst *Ci2 = dyn_cast<CmpInst>(SI2->getCondition());
+ if (!Ci2) return false;
+ if (!(Cond->getOperand(0) == Ci2->getOperand(0) &&
+ Cond->getOperand(1) == Ci2->getOperand(1)) &&
+ !(Cond->getOperand(0) == Ci2->getOperand(1) &&
+ Cond->getOperand(1) == Ci2->getOperand(0)))
+ return false;
+
+ BasicBlock *SI1BB = SI1->getParent();
+ BasicBlock *SI2BB = SI2->getParent();
+ SmallPtrSet<BasicBlock*, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
+ for (succ_iterator I = succ_begin(SI2BB), E = succ_end(SI2BB); I != E; ++I)
+ if (SI1Succs.count(*I))
+ for (BasicBlock::iterator BBI = (*I)->begin();
+ isa<PHINode>(BBI); ++BBI) {
+ PHINode *PN = cast<PHINode>(BBI);
+ if (PN->getIncomingValueForBlock(SI1BB) != Cond ||
+ !isa<ConstantInt>(PN->getIncomingValueForBlock(SI2BB)))
+ return false;
+ PhiNodes.push_back(PN);
+ }
+ return true;
+}
+
+/// AddPredecessorToBlock - Update PHI nodes in Succ to indicate that there will
+/// now be entries in it from the 'NewPred' block. The values that will be
+/// flowing into the PHI nodes will be the same as those coming in from
+/// ExistPred, an existing predecessor of Succ.
+static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
+ BasicBlock *ExistPred) {
+ if (!isa<PHINode>(Succ->begin())) return; // Quick exit if nothing to do
+
+ PHINode *PN;
+ for (BasicBlock::iterator I = Succ->begin();
+ (PN = dyn_cast<PHINode>(I)); ++I)
+ PN->addIncoming(PN->getIncomingValueForBlock(ExistPred), NewPred);
+}
+
+/// ComputeSpeculationCost - Compute an abstract "cost" of speculating the
+/// given instruction, which is assumed to be safe to speculate. 1 means
+/// cheap, 2 means less cheap, and UINT_MAX means prohibitively expensive.
+static unsigned ComputeSpeculationCost(const User *I) {
+ assert(isSafeToSpeculativelyExecute(I) &&
+ "Instruction is not safe to speculatively execute!");
+ switch (Operator::getOpcode(I)) {
+ default:
+ // In doubt, be conservative.
+ return UINT_MAX;
+ case Instruction::GetElementPtr:
+ // GEPs are cheap if all indices are constant.
+ if (!cast<GEPOperator>(I)->hasAllConstantIndices())
+ return UINT_MAX;
+ return 1;
+ case Instruction::Load:
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::ICmp:
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ return 1; // These are all cheap.
+
+ case Instruction::Call:
+ case Instruction::Select:
+ return 2;
+ }
+}
+
+/// DominatesMergePoint - If we have a merge point of an "if condition" as
+/// accepted above, return true if the specified value dominates the block. We
+/// don't handle the true generality of domination here, just a special case
+/// which works well enough for us.
+///
+/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
+/// see if V (which must be an instruction) and its recursive operands
+/// that do not dominate BB have a combined cost lower than CostRemaining and
+/// are non-trapping. If both are true, the instruction is inserted into the
+/// set and true is returned.
+///
+/// The cost for most non-trapping instructions is defined as 1 except for
+/// Select whose cost is 2.
+///
+/// After this function returns, CostRemaining is decreased by the cost of
+/// V plus its non-dominating operands. If that cost is greater than
+/// CostRemaining, false is returned and CostRemaining is undefined.
+static bool DominatesMergePoint(Value *V, BasicBlock *BB,
+ SmallPtrSet<Instruction*, 4> *AggressiveInsts,
+ unsigned &CostRemaining) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) {
+ // Non-instructions all dominate instructions, but not all constantexprs
+ // can be executed unconditionally.
+ if (ConstantExpr *C = dyn_cast<ConstantExpr>(V))
+ if (C->canTrap())
+ return false;
+ return true;
+ }
+ BasicBlock *PBB = I->getParent();
+
+ // We don't want to allow weird loops that might have the "if condition" in
+ // the bottom of this block.
+ if (PBB == BB) return false;
+
+ // If this instruction is defined in a block that contains an unconditional
+ // branch to BB, then it must be in the 'conditional' part of the "if
+ // statement". If not, it definitely dominates the region.
+ BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator());
+ if (BI == 0 || BI->isConditional() || BI->getSuccessor(0) != BB)
+ return true;
+
+ // If we aren't allowing aggressive promotion anymore, then don't consider
+ // instructions in the 'if region'.
+ if (AggressiveInsts == 0) return false;
+
+ // If we have seen this instruction before, don't count it again.
+ if (AggressiveInsts->count(I)) return true;
+
+ // Okay, it looks like the instruction IS in the "condition". Check to
+ // see if it's a cheap instruction to unconditionally compute, and if it
+ // only uses stuff defined outside of the condition. If so, hoist it out.
+ if (!isSafeToSpeculativelyExecute(I))
+ return false;
+
+ unsigned Cost = ComputeSpeculationCost(I);
+
+ if (Cost > CostRemaining)
+ return false;
+
+ CostRemaining -= Cost;
+
+ // Okay, we can only really hoist these out if their operands do
+ // not take us over the cost threshold.
+ for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
+ if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining))
+ return false;
+ // Okay, it's safe to do this! Remember this instruction.
+ AggressiveInsts->insert(I);
+ return true;
+}
+
+/// GetConstantInt - Extract ConstantInt from value, looking through IntToPtr
+/// and PointerNullValue. Return NULL if value is not a constant int.
+static ConstantInt *GetConstantInt(Value *V, const DataLayout *TD) {
+ // Normal constant int.
+ ConstantInt *CI = dyn_cast<ConstantInt>(V);
+ if (CI || !TD || !isa<Constant>(V) || !V->getType()->isPointerTy())
+ return CI;
+
+ // This is some kind of pointer constant. Turn it into a pointer-sized
+ // ConstantInt if possible.
+ IntegerType *PtrTy = cast<IntegerType>(TD->getIntPtrType(V->getType()));
+
+ // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
+ if (isa<ConstantPointerNull>(V))
+ return ConstantInt::get(PtrTy, 0);
+
+ // IntToPtr const int.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ if (CE->getOpcode() == Instruction::IntToPtr)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
+ // The constant is very likely to have the right type already.
+ if (CI->getType() == PtrTy)
+ return CI;
+ else
+ return cast<ConstantInt>
+ (ConstantExpr::getIntegerCast(CI, PtrTy, /*isSigned=*/false));
+ }
+ return 0;
+}
+
+/// GatherConstantCompares - Given a potentially 'or'd or 'and'd together
+/// collection of icmp eq/ne instructions that compare a value against a
+/// constant, return the value being compared, and stick the constant into the
+/// Values vector.
+static Value *
+GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
+ const DataLayout *TD, bool isEQ, unsigned &UsedICmps) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (I == 0) return 0;
+
+ // If this is an icmp against a constant, handle this as one of the cases.
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
+ if (ConstantInt *C = GetConstantInt(I->getOperand(1), TD)) {
+ Value *RHSVal;
+ ConstantInt *RHSC;
+
+ if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ:ICmpInst::ICMP_NE)) {
+ // (x & ~2^x) == y --> x == y || x == y|2^x
+ // This undoes a transformation done by instcombine to fuse 2 compares.
+ if (match(ICI->getOperand(0),
+ m_And(m_Value(RHSVal), m_ConstantInt(RHSC)))) {
+ APInt Not = ~RHSC->getValue();
+ if (Not.isPowerOf2()) {
+ Vals.push_back(C);
+ Vals.push_back(
+ ConstantInt::get(C->getContext(), C->getValue() | Not));
+ UsedICmps++;
+ return RHSVal;
+ }
+ }
+
+ UsedICmps++;
+ Vals.push_back(C);
+ return I->getOperand(0);
+ }
+
+ // If we have "x ult 3" comparison, for example, then we can add 0,1,2 to
+ // the set.
+ ConstantRange Span =
+ ConstantRange::makeICmpRegion(ICI->getPredicate(), C->getValue());
+
+ // Shift the range if the compare is fed by an add. This is the range
+ // compare idiom as emitted by instcombine.
+ bool hasAdd =
+ match(I->getOperand(0), m_Add(m_Value(RHSVal), m_ConstantInt(RHSC)));
+ if (hasAdd)
+ Span = Span.subtract(RHSC->getValue());
+
+ // If this is an and/!= check then we want to optimize "x ugt 2" into
+ // x != 0 && x != 1.
+ if (!isEQ)
+ Span = Span.inverse();
+
+ // If there are a ton of values, we don't want to make a ginormous switch.
+ if (Span.getSetSize().ugt(8) || Span.isEmptySet())
+ return 0;
+
+ for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
+ Vals.push_back(ConstantInt::get(V->getContext(), Tmp));
+ UsedICmps++;
+ return hasAdd ? RHSVal : I->getOperand(0);
+ }
+ return 0;
+ }
+
+ // Otherwise, we can only handle an | or &, depending on isEQ.
+ if (I->getOpcode() != (isEQ ? Instruction::Or : Instruction::And))
+ return 0;
+
+ unsigned NumValsBeforeLHS = Vals.size();
+ unsigned UsedICmpsBeforeLHS = UsedICmps;
+ if (Value *LHS = GatherConstantCompares(I->getOperand(0), Vals, Extra, TD,
+ isEQ, UsedICmps)) {
+ unsigned NumVals = Vals.size();
+ unsigned UsedICmpsBeforeRHS = UsedICmps;
+ if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, TD,
+ isEQ, UsedICmps)) {
+ if (LHS == RHS)
+ return LHS;
+ Vals.resize(NumVals);
+ UsedICmps = UsedICmpsBeforeRHS;
+ }
+
+ // The RHS of the or/and can't be folded in and we haven't used "Extra" yet,
+ // set it and return success.
+ if (Extra == 0 || Extra == I->getOperand(1)) {
+ Extra = I->getOperand(1);
+ return LHS;
+ }
+
+ Vals.resize(NumValsBeforeLHS);
+ UsedICmps = UsedICmpsBeforeLHS;
+ return 0;
+ }
+
+ // If the LHS can't be folded in, but Extra is available and RHS can, try to
+ // use LHS as Extra.
+ if (Extra == 0 || Extra == I->getOperand(0)) {
+ Value *OldExtra = Extra;
+ Extra = I->getOperand(0);
+ if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, TD,
+ isEQ, UsedICmps))
+ return RHS;
+ assert(Vals.size() == NumValsBeforeLHS);
+ Extra = OldExtra;
+ }
+
+ return 0;
+}
+
+static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) {
+ Instruction *Cond = 0;
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ Cond = dyn_cast<Instruction>(SI->getCondition());
+ } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ if (BI->isConditional())
+ Cond = dyn_cast<Instruction>(BI->getCondition());
+ } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
+ Cond = dyn_cast<Instruction>(IBI->getAddress());
+ }
+
+ TI->eraseFromParent();
+ if (Cond) RecursivelyDeleteTriviallyDeadInstructions(Cond);
+}
+
+/// isValueEqualityComparison - Return true if the specified terminator checks
+/// to see if a value is equal to constant integer value.
+Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) {
+ Value *CV = 0;
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ // Do not permit merging of large switch instructions into their
+ // predecessors unless there is only one predecessor.
+ if (SI->getNumSuccessors()*std::distance(pred_begin(SI->getParent()),
+ pred_end(SI->getParent())) <= 128)
+ CV = SI->getCondition();
+ } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
+ if (BI->isConditional() && BI->getCondition()->hasOneUse())
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition()))
+ if (ICI->isEquality() && GetConstantInt(ICI->getOperand(1), TD))
+ CV = ICI->getOperand(0);
+
+ // Unwrap any lossless ptrtoint cast.
+ if (TD && CV) {
+ if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
+ Value *Ptr = PTII->getPointerOperand();
+ if (PTII->getType() == TD->getIntPtrType(Ptr->getType()))
+ CV = Ptr;
+ }
+ }
+ return CV;
+}
+
+/// GetValueEqualityComparisonCases - Given a value comparison instruction,
+/// decode all of the 'cases' that it represents and return the 'default' block.
+BasicBlock *SimplifyCFGOpt::
+GetValueEqualityComparisonCases(TerminatorInst *TI,
+ std::vector<ValueEqualityComparisonCase>
+ &Cases) {
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ Cases.reserve(SI->getNumCases());
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i)
+ Cases.push_back(ValueEqualityComparisonCase(i.getCaseValue(),
+ i.getCaseSuccessor()));
+ return SI->getDefaultDest();
+ }
+
+ BranchInst *BI = cast<BranchInst>(TI);
+ ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
+ BasicBlock *Succ = BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_NE);
+ Cases.push_back(ValueEqualityComparisonCase(GetConstantInt(ICI->getOperand(1),
+ TD),
+ Succ));
+ return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ);
+}
+
+
+/// EliminateBlockCases - Given a vector of bb/value pairs, remove any entries
+/// in the list that match the specified block.
+static void EliminateBlockCases(BasicBlock *BB,
+ std::vector<ValueEqualityComparisonCase> &Cases) {
+ Cases.erase(std::remove(Cases.begin(), Cases.end(), BB), Cases.end());
+}
+
+/// ValuesOverlap - Return true if there are any keys in C1 that exist in C2 as
+/// well.
+static bool
+ValuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
+ std::vector<ValueEqualityComparisonCase > &C2) {
+ std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
+
+ // Make V1 be smaller than V2.
+ if (V1->size() > V2->size())
+ std::swap(V1, V2);
+
+ if (V1->size() == 0) return false;
+ if (V1->size() == 1) {
+ // Just scan V2.
+ ConstantInt *TheVal = (*V1)[0].Value;
+ for (unsigned i = 0, e = V2->size(); i != e; ++i)
+ if (TheVal == (*V2)[i].Value)
+ return true;
+ }
+
+ // Otherwise, just sort both lists and compare element by element.
+ array_pod_sort(V1->begin(), V1->end());
+ array_pod_sort(V2->begin(), V2->end());
+ unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
+ while (i1 != e1 && i2 != e2) {
+ if ((*V1)[i1].Value == (*V2)[i2].Value)
+ return true;
+ if ((*V1)[i1].Value < (*V2)[i2].Value)
+ ++i1;
+ else
+ ++i2;
+ }
+ return false;
+}
+
+/// SimplifyEqualityComparisonWithOnlyPredecessor - If TI is known to be a
+/// terminator instruction and its block is known to only have a single
+/// predecessor block, check to see if that predecessor is also a value
+/// comparison with the same value, and if that comparison determines the
+/// outcome of this comparison. If so, simplify TI. This does a very limited
+/// form of jump threading.
+bool SimplifyCFGOpt::
+SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
+ BasicBlock *Pred,
+ IRBuilder<> &Builder) {
+ Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
+ if (!PredVal) return false; // Not a value comparison in predecessor.
+
+ Value *ThisVal = isValueEqualityComparison(TI);
+ assert(ThisVal && "This isn't a value comparison!!");
+ if (ThisVal != PredVal) return false; // Different predicates.
+
+ // TODO: Preserve branch weight metadata, similarly to how
+ // FoldValueComparisonIntoPredecessors preserves it.
+
+ // Find out information about when control will move from Pred to TI's block.
+ std::vector<ValueEqualityComparisonCase> PredCases;
+ BasicBlock *PredDef = GetValueEqualityComparisonCases(Pred->getTerminator(),
+ PredCases);
+ EliminateBlockCases(PredDef, PredCases); // Remove default from cases.
+
+ // Find information about how control leaves this block.
+ std::vector<ValueEqualityComparisonCase> ThisCases;
+ BasicBlock *ThisDef = GetValueEqualityComparisonCases(TI, ThisCases);
+ EliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
+
+ // If TI's block is the default block from Pred's comparison, potentially
+ // simplify TI based on this knowledge.
+ if (PredDef == TI->getParent()) {
+ // If we are here, we know that the value is none of those cases listed in
+ // PredCases. If there are any cases in ThisCases that are in PredCases, we
+ // can simplify TI.
+ if (!ValuesOverlap(PredCases, ThisCases))
+ return false;
+
+ if (isa<BranchInst>(TI)) {
+ // Okay, one of the successors of this condbr is dead. Convert it to a
+ // uncond br.
+ assert(ThisCases.size() == 1 && "Branch can only have one case!");
+ // Insert the new branch.
+ Instruction *NI = Builder.CreateBr(ThisDef);
+ (void) NI;
+
+ // Remove PHI node entries for the dead edge.
+ ThisCases[0].Dest->removePredecessor(TI->getParent());
+
+ DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
+ << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n");
+
+ EraseTerminatorInstAndDCECond(TI);
+ return true;
+ }
+
+ SwitchInst *SI = cast<SwitchInst>(TI);
+ // Okay, TI has cases that are statically dead, prune them away.
+ SmallPtrSet<Constant*, 16> DeadCases;
+ for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+ DeadCases.insert(PredCases[i].Value);
+
+ DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
+ << "Through successor TI: " << *TI);
+
+ // Collect branch weights into a vector.
+ SmallVector<uint32_t, 8> Weights;
+ MDNode* MD = SI->getMetadata(LLVMContext::MD_prof);
+ bool HasWeight = MD && (MD->getNumOperands() == 2 + SI->getNumCases());
+ if (HasWeight)
+ for (unsigned MD_i = 1, MD_e = MD->getNumOperands(); MD_i < MD_e;
+ ++MD_i) {
+ ConstantInt* CI = dyn_cast<ConstantInt>(MD->getOperand(MD_i));
+ assert(CI);
+ Weights.push_back(CI->getValue().getZExtValue());
+ }
+ for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
+ --i;
+ if (DeadCases.count(i.getCaseValue())) {
+ if (HasWeight) {
+ std::swap(Weights[i.getCaseIndex()+1], Weights.back());
+ Weights.pop_back();
+ }
+ i.getCaseSuccessor()->removePredecessor(TI->getParent());
+ SI->removeCase(i);
+ }
+ }
+ if (HasWeight && Weights.size() >= 2)
+ SI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(SI->getParent()->getContext()).
+ createBranchWeights(Weights));
+
+ DEBUG(dbgs() << "Leaving: " << *TI << "\n");
+ return true;
+ }
+
+ // Otherwise, TI's block must correspond to some matched value. Find out
+ // which value (or set of values) this is.
+ ConstantInt *TIV = 0;
+ BasicBlock *TIBB = TI->getParent();
+ for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+ if (PredCases[i].Dest == TIBB) {
+ if (TIV != 0)
+ return false; // Cannot handle multiple values coming to this block.
+ TIV = PredCases[i].Value;
+ }
+ assert(TIV && "No edge from pred to succ?");
+
+ // Okay, we found the one constant that our value can be if we get into TI's
+ // BB. Find out which successor will unconditionally be branched to.
+ BasicBlock *TheRealDest = 0;
+ for (unsigned i = 0, e = ThisCases.size(); i != e; ++i)
+ if (ThisCases[i].Value == TIV) {
+ TheRealDest = ThisCases[i].Dest;
+ break;
+ }
+
+ // If not handled by any explicit cases, it is handled by the default case.
+ if (TheRealDest == 0) TheRealDest = ThisDef;
+
+ // Remove PHI node entries for dead edges.
+ BasicBlock *CheckEdge = TheRealDest;
+ for (succ_iterator SI = succ_begin(TIBB), e = succ_end(TIBB); SI != e; ++SI)
+ if (*SI != CheckEdge)
+ (*SI)->removePredecessor(TIBB);
+ else
+ CheckEdge = 0;
+
+ // Insert the new branch.
+ Instruction *NI = Builder.CreateBr(TheRealDest);
+ (void) NI;
+
+ DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
+ << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n");
+
+ EraseTerminatorInstAndDCECond(TI);
+ return true;
+}
+
+namespace {
+ /// ConstantIntOrdering - This class implements a stable ordering of constant
+ /// integers that does not depend on their address. This is important for
+ /// applications that sort ConstantInt's to ensure uniqueness.
+ struct ConstantIntOrdering {
+ bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
+ return LHS->getValue().ult(RHS->getValue());
+ }
+ };
+}
+
+static int ConstantIntSortPredicate(ConstantInt *const *P1,
+ ConstantInt *const *P2) {
+ const ConstantInt *LHS = *P1;
+ const ConstantInt *RHS = *P2;
+ if (LHS->getValue().ult(RHS->getValue()))
+ return 1;
+ if (LHS->getValue() == RHS->getValue())
+ return 0;
+ return -1;
+}
+
+static inline bool HasBranchWeights(const Instruction* I) {
+ MDNode* ProfMD = I->getMetadata(LLVMContext::MD_prof);
+ if (ProfMD && ProfMD->getOperand(0))
+ if (MDString* MDS = dyn_cast<MDString>(ProfMD->getOperand(0)))
+ return MDS->getString().equals("branch_weights");
+
+ return false;
+}
+
+/// Get Weights of a given TerminatorInst, the default weight is at the front
+/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
+/// metadata.
+static void GetBranchWeights(TerminatorInst *TI,
+ SmallVectorImpl<uint64_t> &Weights) {
+ MDNode* MD = TI->getMetadata(LLVMContext::MD_prof);
+ assert(MD);
+ for (unsigned i = 1, e = MD->getNumOperands(); i < e; ++i) {
+ ConstantInt* CI = dyn_cast<ConstantInt>(MD->getOperand(i));
+ assert(CI);
+ Weights.push_back(CI->getValue().getZExtValue());
+ }
+
+ // If TI is a conditional eq, the default case is the false case,
+ // and the corresponding branch-weight data is at index 2. We swap the
+ // default weight to be the first entry.
+ if (BranchInst* BI = dyn_cast<BranchInst>(TI)) {
+ assert(Weights.size() == 2);
+ ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
+ if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
+ std::swap(Weights.front(), Weights.back());
+ }
+}
+
+/// Sees if any of the weights are too big for a uint32_t, and halves all the
+/// weights if any are.
+static void FitWeights(MutableArrayRef<uint64_t> Weights) {
+ bool Halve = false;
+ for (unsigned i = 0; i < Weights.size(); ++i)
+ if (Weights[i] > UINT_MAX) {
+ Halve = true;
+ break;
+ }
+
+ if (! Halve)
+ return;
+
+ for (unsigned i = 0; i < Weights.size(); ++i)
+ Weights[i] /= 2;
+}
+
+/// FoldValueComparisonIntoPredecessors - The specified terminator is a value
+/// equality comparison instruction (either a switch or a branch on "X == c").
+/// See if any of the predecessors of the terminator block are value comparisons
+/// on the same value. If so, and if safe to do so, fold them together.
+bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
+ IRBuilder<> &Builder) {
+ BasicBlock *BB = TI->getParent();
+ Value *CV = isValueEqualityComparison(TI); // CondVal
+ assert(CV && "Not a comparison?");
+ bool Changed = false;
+
+ SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB));
+ while (!Preds.empty()) {
+ BasicBlock *Pred = Preds.pop_back_val();
+
+ // See if the predecessor is a comparison with the same value.
+ TerminatorInst *PTI = Pred->getTerminator();
+ Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
+
+ if (PCV == CV && SafeToMergeTerminators(TI, PTI)) {
+ // Figure out which 'cases' to copy from SI to PSI.
+ std::vector<ValueEqualityComparisonCase> BBCases;
+ BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, BBCases);
+
+ std::vector<ValueEqualityComparisonCase> PredCases;
+ BasicBlock *PredDefault = GetValueEqualityComparisonCases(PTI, PredCases);
+
+ // Based on whether the default edge from PTI goes to BB or not, fill in
+ // PredCases and PredDefault with the new switch cases we would like to
+ // build.
+ SmallVector<BasicBlock*, 8> NewSuccessors;
+
+ // Update the branch weight metadata along the way
+ SmallVector<uint64_t, 8> Weights;
+ bool PredHasWeights = HasBranchWeights(PTI);
+ bool SuccHasWeights = HasBranchWeights(TI);
+
+ if (PredHasWeights) {
+ GetBranchWeights(PTI, Weights);
+ // branch-weight metadata is inconsistent here.
+ if (Weights.size() != 1 + PredCases.size())
+ PredHasWeights = SuccHasWeights = false;
+ } else if (SuccHasWeights)
+ // If there are no predecessor weights but there are successor weights,
+ // populate Weights with 1, which will later be scaled to the sum of
+ // successor's weights
+ Weights.assign(1 + PredCases.size(), 1);
+
+ SmallVector<uint64_t, 8> SuccWeights;
+ if (SuccHasWeights) {
+ GetBranchWeights(TI, SuccWeights);
+ // branch-weight metadata is inconsistent here.
+ if (SuccWeights.size() != 1 + BBCases.size())
+ PredHasWeights = SuccHasWeights = false;
+ } else if (PredHasWeights)
+ SuccWeights.assign(1 + BBCases.size(), 1);
+
+ if (PredDefault == BB) {
+ // If this is the default destination from PTI, only the edges in TI
+ // that don't occur in PTI, or that branch to BB will be activated.
+ std::set<ConstantInt*, ConstantIntOrdering> PTIHandled;
+ for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+ if (PredCases[i].Dest != BB)
+ PTIHandled.insert(PredCases[i].Value);
+ else {
+ // The default destination is BB, we don't need explicit targets.
+ std::swap(PredCases[i], PredCases.back());
+
+ if (PredHasWeights || SuccHasWeights) {
+ // Increase weight for the default case.
+ Weights[0] += Weights[i+1];
+ std::swap(Weights[i+1], Weights.back());
+ Weights.pop_back();
+ }
+
+ PredCases.pop_back();
+ --i; --e;
+ }
+
+ // Reconstruct the new switch statement we will be building.
+ if (PredDefault != BBDefault) {
+ PredDefault->removePredecessor(Pred);
+ PredDefault = BBDefault;
+ NewSuccessors.push_back(BBDefault);
+ }
+
+ unsigned CasesFromPred = Weights.size();
+ uint64_t ValidTotalSuccWeight = 0;
+ for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
+ if (!PTIHandled.count(BBCases[i].Value) &&
+ BBCases[i].Dest != BBDefault) {
+ PredCases.push_back(BBCases[i]);
+ NewSuccessors.push_back(BBCases[i].Dest);
+ if (SuccHasWeights || PredHasWeights) {
+ // The default weight is at index 0, so weight for the ith case
+ // should be at index i+1. Scale the cases from successor by
+ // PredDefaultWeight (Weights[0]).
+ Weights.push_back(Weights[0] * SuccWeights[i+1]);
+ ValidTotalSuccWeight += SuccWeights[i+1];
+ }
+ }
+
+ if (SuccHasWeights || PredHasWeights) {
+ ValidTotalSuccWeight += SuccWeights[0];
+ // Scale the cases from predecessor by ValidTotalSuccWeight.
+ for (unsigned i = 1; i < CasesFromPred; ++i)
+ Weights[i] *= ValidTotalSuccWeight;
+ // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
+ Weights[0] *= SuccWeights[0];
+ }
+ } else {
+ // If this is not the default destination from PSI, only the edges
+ // in SI that occur in PSI with a destination of BB will be
+ // activated.
+ std::set<ConstantInt*, ConstantIntOrdering> PTIHandled;
+ std::map<ConstantInt*, uint64_t> WeightsForHandled;
+ for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+ if (PredCases[i].Dest == BB) {
+ PTIHandled.insert(PredCases[i].Value);
+
+ if (PredHasWeights || SuccHasWeights) {
+ WeightsForHandled[PredCases[i].Value] = Weights[i+1];
+ std::swap(Weights[i+1], Weights.back());
+ Weights.pop_back();
+ }
+
+ std::swap(PredCases[i], PredCases.back());
+ PredCases.pop_back();
+ --i; --e;
+ }
+
+ // Okay, now we know which constants were sent to BB from the
+ // predecessor. Figure out where they will all go now.
+ for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
+ if (PTIHandled.count(BBCases[i].Value)) {
+ // If this is one we are capable of getting...
+ if (PredHasWeights || SuccHasWeights)
+ Weights.push_back(WeightsForHandled[BBCases[i].Value]);
+ PredCases.push_back(BBCases[i]);
+ NewSuccessors.push_back(BBCases[i].Dest);
+ PTIHandled.erase(BBCases[i].Value);// This constant is taken care of
+ }
+
+ // If there are any constants vectored to BB that TI doesn't handle,
+ // they must go to the default destination of TI.
+ for (std::set<ConstantInt*, ConstantIntOrdering>::iterator I =
+ PTIHandled.begin(),
+ E = PTIHandled.end(); I != E; ++I) {
+ if (PredHasWeights || SuccHasWeights)
+ Weights.push_back(WeightsForHandled[*I]);
+ PredCases.push_back(ValueEqualityComparisonCase(*I, BBDefault));
+ NewSuccessors.push_back(BBDefault);
+ }
+ }
+
+ // Okay, at this point, we know which new successor Pred will get. Make
+ // sure we update the number of entries in the PHI nodes for these
+ // successors.
+ for (unsigned i = 0, e = NewSuccessors.size(); i != e; ++i)
+ AddPredecessorToBlock(NewSuccessors[i], Pred, BB);
+
+ Builder.SetInsertPoint(PTI);
+ // Convert pointer to int before we switch.
+ if (CV->getType()->isPointerTy()) {
+ assert(TD && "Cannot switch on pointer without DataLayout");
+ CV = Builder.CreatePtrToInt(CV, TD->getIntPtrType(CV->getType()),
+ "magicptr");
+ }
+
+ // Now that the successors are updated, create the new Switch instruction.
+ SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault,
+ PredCases.size());
+ NewSI->setDebugLoc(PTI->getDebugLoc());
+ for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+ NewSI->addCase(PredCases[i].Value, PredCases[i].Dest);
+
+ if (PredHasWeights || SuccHasWeights) {
+ // Halve the weights if any of them cannot fit in an uint32_t
+ FitWeights(Weights);
+
+ SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
+
+ NewSI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(BB->getContext()).
+ createBranchWeights(MDWeights));
+ }
+
+ EraseTerminatorInstAndDCECond(PTI);
+
+ // Okay, last check. If BB is still a successor of PSI, then we must
+ // have an infinite loop case. If so, add an infinitely looping block
+ // to handle the case to preserve the behavior of the code.
+ BasicBlock *InfLoopBlock = 0;
+ for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
+ if (NewSI->getSuccessor(i) == BB) {
+ if (InfLoopBlock == 0) {
+ // Insert it at the end of the function, because it's either code,
+ // or it won't matter if it's hot. :)
+ InfLoopBlock = BasicBlock::Create(BB->getContext(),
+ "infloop", BB->getParent());
+ BranchInst::Create(InfLoopBlock, InfLoopBlock);
+ }
+ NewSI->setSuccessor(i, InfLoopBlock);
+ }
+
+ Changed = true;
+ }
+ }
+ return Changed;
+}
+
+// isSafeToHoistInvoke - If we would need to insert a select that uses the
+// value of this invoke (comments in HoistThenElseCodeToIf explain why we
+// would need to do this), we can't hoist the invoke, as there is nowhere
+// to put the select in this case.
+static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2,
+ Instruction *I1, Instruction *I2) {
+ for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) {
+ PHINode *PN;
+ for (BasicBlock::iterator BBI = SI->begin();
+ (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
+ Value *BB1V = PN->getIncomingValueForBlock(BB1);
+ Value *BB2V = PN->getIncomingValueForBlock(BB2);
+ if (BB1V != BB2V && (BB1V==I1 || BB2V==I2)) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+/// HoistThenElseCodeToIf - Given a conditional branch that goes to BB1 and
+/// BB2, hoist any common code in the two blocks up into the branch block. The
+/// caller of this function guarantees that BI's block dominates BB1 and BB2.
+static bool HoistThenElseCodeToIf(BranchInst *BI) {
+ // This does very trivial matching, with limited scanning, to find identical
+ // instructions in the two blocks. In particular, we don't want to get into
+ // O(M*N) situations here where M and N are the sizes of BB1 and BB2. As
+ // such, we currently just scan for obviously identical instructions in an
+ // identical order.
+ BasicBlock *BB1 = BI->getSuccessor(0); // The true destination.
+ BasicBlock *BB2 = BI->getSuccessor(1); // The false destination
+
+ BasicBlock::iterator BB1_Itr = BB1->begin();
+ BasicBlock::iterator BB2_Itr = BB2->begin();
+
+ Instruction *I1 = BB1_Itr++, *I2 = BB2_Itr++;
+ // Skip debug info if it is not identical.
+ DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1);
+ DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2);
+ if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) {
+ while (isa<DbgInfoIntrinsic>(I1))
+ I1 = BB1_Itr++;
+ while (isa<DbgInfoIntrinsic>(I2))
+ I2 = BB2_Itr++;
+ }
+ if (isa<PHINode>(I1) || !I1->isIdenticalToWhenDefined(I2) ||
+ (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
+ return false;
+
+ BasicBlock *BIParent = BI->getParent();
+
+ bool Changed = false;
+ do {
+ // If we are hoisting the terminator instruction, don't move one (making a
+ // broken BB), instead clone it, and remove BI.
+ if (isa<TerminatorInst>(I1))
+ goto HoistTerminator;
+
+ // For a normal instruction, we just move one to right before the branch,
+ // then replace all uses of the other with the first. Finally, we remove
+ // the now redundant second instruction.
+ BIParent->getInstList().splice(BI, BB1->getInstList(), I1);
+ if (!I2->use_empty())
+ I2->replaceAllUsesWith(I1);
+ I1->intersectOptionalDataWith(I2);
+ I2->eraseFromParent();
+ Changed = true;
+
+ I1 = BB1_Itr++;
+ I2 = BB2_Itr++;
+ // Skip debug info if it is not identical.
+ DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1);
+ DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2);
+ if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) {
+ while (isa<DbgInfoIntrinsic>(I1))
+ I1 = BB1_Itr++;
+ while (isa<DbgInfoIntrinsic>(I2))
+ I2 = BB2_Itr++;
+ }
+ } while (I1->isIdenticalToWhenDefined(I2));
+
+ return true;
+
+HoistTerminator:
+ // It may not be possible to hoist an invoke.
+ if (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))
+ return Changed;
+
+ for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) {
+ PHINode *PN;
+ for (BasicBlock::iterator BBI = SI->begin();
+ (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
+ Value *BB1V = PN->getIncomingValueForBlock(BB1);
+ Value *BB2V = PN->getIncomingValueForBlock(BB2);
+ if (BB1V == BB2V)
+ continue;
+
+ if (isa<ConstantExpr>(BB1V) && !isSafeToSpeculativelyExecute(BB1V))
+ return Changed;
+ if (isa<ConstantExpr>(BB2V) && !isSafeToSpeculativelyExecute(BB2V))
+ return Changed;
+ }
+ }
+
+ // Okay, it is safe to hoist the terminator.
+ Instruction *NT = I1->clone();
+ BIParent->getInstList().insert(BI, NT);
+ if (!NT->getType()->isVoidTy()) {
+ I1->replaceAllUsesWith(NT);
+ I2->replaceAllUsesWith(NT);
+ NT->takeName(I1);
+ }
+
+ IRBuilder<true, NoFolder> Builder(NT);
+ // Hoisting one of the terminators from our successor is a great thing.
+ // Unfortunately, the successors of the if/else blocks may have PHI nodes in
+ // them. If they do, all PHI entries for BB1/BB2 must agree for all PHI
+ // nodes, so we insert select instruction to compute the final result.
+ std::map<std::pair<Value*,Value*>, SelectInst*> InsertedSelects;
+ for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) {
+ PHINode *PN;
+ for (BasicBlock::iterator BBI = SI->begin();
+ (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
+ Value *BB1V = PN->getIncomingValueForBlock(BB1);
+ Value *BB2V = PN->getIncomingValueForBlock(BB2);
+ if (BB1V == BB2V) continue;
+
+ // These values do not agree. Insert a select instruction before NT
+ // that determines the right value.
+ SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
+ if (SI == 0)
+ SI = cast<SelectInst>
+ (Builder.CreateSelect(BI->getCondition(), BB1V, BB2V,
+ BB1V->getName()+"."+BB2V->getName()));
+
+ // Make the PHI node use the select for all incoming values for BB1/BB2
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingBlock(i) == BB1 || PN->getIncomingBlock(i) == BB2)
+ PN->setIncomingValue(i, SI);
+ }
+ }
+
+ // Update any PHI nodes in our new successors.
+ for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI)
+ AddPredecessorToBlock(*SI, BIParent, BB1);
+
+ EraseTerminatorInstAndDCECond(BI);
+ return true;
+}
+
+/// SinkThenElseCodeToEnd - Given an unconditional branch that goes to BBEnd,
+/// check whether BBEnd has only two predecessors and the other predecessor
+/// ends with an unconditional branch. If it is true, sink any common code
+/// in the two predecessors to BBEnd.
+static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
+ assert(BI1->isUnconditional());
+ BasicBlock *BB1 = BI1->getParent();
+ BasicBlock *BBEnd = BI1->getSuccessor(0);
+
+ // Check that BBEnd has two predecessors and the other predecessor ends with
+ // an unconditional branch.
+ pred_iterator PI = pred_begin(BBEnd), PE = pred_end(BBEnd);
+ BasicBlock *Pred0 = *PI++;
+ if (PI == PE) // Only one predecessor.
+ return false;
+ BasicBlock *Pred1 = *PI++;
+ if (PI != PE) // More than two predecessors.
+ return false;
+ BasicBlock *BB2 = (Pred0 == BB1) ? Pred1 : Pred0;
+ BranchInst *BI2 = dyn_cast<BranchInst>(BB2->getTerminator());
+ if (!BI2 || !BI2->isUnconditional())
+ return false;
+
+ // Gather the PHI nodes in BBEnd.
+ std::map<Value*, std::pair<Value*, PHINode*> > MapValueFromBB1ToBB2;
+ Instruction *FirstNonPhiInBBEnd = 0;
+ for (BasicBlock::iterator I = BBEnd->begin(), E = BBEnd->end();
+ I != E; ++I) {
+ if (PHINode *PN = dyn_cast<PHINode>(I)) {
+ Value *BB1V = PN->getIncomingValueForBlock(BB1);
+ Value *BB2V = PN->getIncomingValueForBlock(BB2);
+ MapValueFromBB1ToBB2[BB1V] = std::make_pair(BB2V, PN);
+ } else {
+ FirstNonPhiInBBEnd = &*I;
+ break;
+ }
+ }
+ if (!FirstNonPhiInBBEnd)
+ return false;
+
+
+ // This does very trivial matching, with limited scanning, to find identical
+ // instructions in the two blocks. We scan backward for obviously identical
+ // instructions in an identical order.
+ BasicBlock::InstListType::reverse_iterator RI1 = BB1->getInstList().rbegin(),
+ RE1 = BB1->getInstList().rend(), RI2 = BB2->getInstList().rbegin(),
+ RE2 = BB2->getInstList().rend();
+ // Skip debug info.
+ while (RI1 != RE1 && isa<DbgInfoIntrinsic>(&*RI1)) ++RI1;
+ if (RI1 == RE1)
+ return false;
+ while (RI2 != RE2 && isa<DbgInfoIntrinsic>(&*RI2)) ++RI2;
+ if (RI2 == RE2)
+ return false;
+ // Skip the unconditional branches.
+ ++RI1;
+ ++RI2;
+
+ bool Changed = false;
+ while (RI1 != RE1 && RI2 != RE2) {
+ // Skip debug info.
+ while (RI1 != RE1 && isa<DbgInfoIntrinsic>(&*RI1)) ++RI1;
+ if (RI1 == RE1)
+ return Changed;
+ while (RI2 != RE2 && isa<DbgInfoIntrinsic>(&*RI2)) ++RI2;
+ if (RI2 == RE2)
+ return Changed;
+
+ Instruction *I1 = &*RI1, *I2 = &*RI2;
+ // I1 and I2 should have a single use in the same PHI node, and they
+ // perform the same operation.
+ // Cannot move control-flow-involving, volatile loads, vaarg, etc.
+ if (isa<PHINode>(I1) || isa<PHINode>(I2) ||
+ isa<TerminatorInst>(I1) || isa<TerminatorInst>(I2) ||
+ isa<LandingPadInst>(I1) || isa<LandingPadInst>(I2) ||
+ isa<AllocaInst>(I1) || isa<AllocaInst>(I2) ||
+ I1->mayHaveSideEffects() || I2->mayHaveSideEffects() ||
+ I1->mayReadOrWriteMemory() || I2->mayReadOrWriteMemory() ||
+ !I1->hasOneUse() || !I2->hasOneUse() ||
+ MapValueFromBB1ToBB2.find(I1) == MapValueFromBB1ToBB2.end() ||
+ MapValueFromBB1ToBB2[I1].first != I2)
+ return Changed;
+
+ // Check whether we should swap the operands of ICmpInst.
+ ICmpInst *ICmp1 = dyn_cast<ICmpInst>(I1), *ICmp2 = dyn_cast<ICmpInst>(I2);
+ bool SwapOpnds = false;
+ if (ICmp1 && ICmp2 &&
+ ICmp1->getOperand(0) != ICmp2->getOperand(0) &&
+ ICmp1->getOperand(1) != ICmp2->getOperand(1) &&
+ (ICmp1->getOperand(0) == ICmp2->getOperand(1) ||
+ ICmp1->getOperand(1) == ICmp2->getOperand(0))) {
+ ICmp2->swapOperands();
+ SwapOpnds = true;
+ }
+ if (!I1->isSameOperationAs(I2)) {
+ if (SwapOpnds)
+ ICmp2->swapOperands();
+ return Changed;
+ }
+
+ // The operands should be either the same or they need to be generated
+ // with a PHI node after sinking. We only handle the case where there is
+ // a single pair of different operands.
+ Value *DifferentOp1 = 0, *DifferentOp2 = 0;
+ unsigned Op1Idx = 0;
+ for (unsigned I = 0, E = I1->getNumOperands(); I != E; ++I) {
+ if (I1->getOperand(I) == I2->getOperand(I))
+ continue;
+ // Early exit if we have more-than one pair of different operands or
+ // the different operand is already in MapValueFromBB1ToBB2.
+ // Early exit if we need a PHI node to replace a constant.
+ if (DifferentOp1 ||
+ MapValueFromBB1ToBB2.find(I1->getOperand(I)) !=
+ MapValueFromBB1ToBB2.end() ||
+ isa<Constant>(I1->getOperand(I)) ||
+ isa<Constant>(I2->getOperand(I))) {
+ // If we can't sink the instructions, undo the swapping.
+ if (SwapOpnds)
+ ICmp2->swapOperands();
+ return Changed;
+ }
+ DifferentOp1 = I1->getOperand(I);
+ Op1Idx = I;
+ DifferentOp2 = I2->getOperand(I);
+ }
+
+ // We insert the pair of different operands to MapValueFromBB1ToBB2 and
+ // remove (I1, I2) from MapValueFromBB1ToBB2.
+ if (DifferentOp1) {
+ PHINode *NewPN = PHINode::Create(DifferentOp1->getType(), 2,
+ DifferentOp1->getName() + ".sink",
+ BBEnd->begin());
+ MapValueFromBB1ToBB2[DifferentOp1] = std::make_pair(DifferentOp2, NewPN);
+ // I1 should use NewPN instead of DifferentOp1.
+ I1->setOperand(Op1Idx, NewPN);
+ NewPN->addIncoming(DifferentOp1, BB1);
+ NewPN->addIncoming(DifferentOp2, BB2);
+ DEBUG(dbgs() << "Create PHI node " << *NewPN << "\n";);
+ }
+ PHINode *OldPN = MapValueFromBB1ToBB2[I1].second;
+ MapValueFromBB1ToBB2.erase(I1);
+
+ DEBUG(dbgs() << "SINK common instructions " << *I1 << "\n";);
+ DEBUG(dbgs() << " " << *I2 << "\n";);
+ // We need to update RE1 and RE2 if we are going to sink the first
+ // instruction in the basic block down.
+ bool UpdateRE1 = (I1 == BB1->begin()), UpdateRE2 = (I2 == BB2->begin());
+ // Sink the instruction.
+ BBEnd->getInstList().splice(FirstNonPhiInBBEnd, BB1->getInstList(), I1);
+ if (!OldPN->use_empty())
+ OldPN->replaceAllUsesWith(I1);
+ OldPN->eraseFromParent();
+
+ if (!I2->use_empty())
+ I2->replaceAllUsesWith(I1);
+ I1->intersectOptionalDataWith(I2);
+ I2->eraseFromParent();
+
+ if (UpdateRE1)
+ RE1 = BB1->getInstList().rend();
+ if (UpdateRE2)
+ RE2 = BB2->getInstList().rend();
+ FirstNonPhiInBBEnd = I1;
+ NumSinkCommons++;
+ Changed = true;
+ }
+ return Changed;
+}
+
+/// \brief Determine if we can hoist sink a sole store instruction out of a
+/// conditional block.
+///
+/// We are looking for code like the following:
+/// BrBB:
+/// store i32 %add, i32* %arrayidx2
+/// ... // No other stores or function calls (we could be calling a memory
+/// ... // function).
+/// %cmp = icmp ult %x, %y
+/// br i1 %cmp, label %EndBB, label %ThenBB
+/// ThenBB:
+/// store i32 %add5, i32* %arrayidx2
+/// br label EndBB
+/// EndBB:
+/// ...
+/// We are going to transform this into:
+/// BrBB:
+/// store i32 %add, i32* %arrayidx2
+/// ... //
+/// %cmp = icmp ult %x, %y
+/// %add.add5 = select i1 %cmp, i32 %add, %add5
+/// store i32 %add.add5, i32* %arrayidx2
+/// ...
+///
+/// \return The pointer to the value of the previous store if the store can be
+/// hoisted into the predecessor block. 0 otherwise.
+static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
+ BasicBlock *StoreBB, BasicBlock *EndBB) {
+ StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
+ if (!StoreToHoist)
+ return 0;
+
+ // Volatile or atomic.
+ if (!StoreToHoist->isSimple())
+ return 0;
+
+ Value *StorePtr = StoreToHoist->getPointerOperand();
+
+ // Look for a store to the same pointer in BrBB.
+ unsigned MaxNumInstToLookAt = 10;
+ for (BasicBlock::reverse_iterator RI = BrBB->rbegin(),
+ RE = BrBB->rend(); RI != RE && (--MaxNumInstToLookAt); ++RI) {
+ Instruction *CurI = &*RI;
+
+ // Could be calling an instruction that effects memory like free().
+ if (CurI->mayHaveSideEffects() && !isa<StoreInst>(CurI))
+ return 0;
+
+ StoreInst *SI = dyn_cast<StoreInst>(CurI);
+ // Found the previous store make sure it stores to the same location.
+ if (SI && SI->getPointerOperand() == StorePtr)
+ // Found the previous store, return its value operand.
+ return SI->getValueOperand();
+ else if (SI)
+ return 0; // Unknown store.
+ }
+
+ return 0;
+}
+
+/// \brief Speculate a conditional basic block flattening the CFG.
+///
+/// Note that this is a very risky transform currently. Speculating
+/// instructions like this is most often not desirable. Instead, there is an MI
+/// pass which can do it with full awareness of the resource constraints.
+/// However, some cases are "obvious" and we should do directly. An example of
+/// this is speculating a single, reasonably cheap instruction.
+///
+/// There is only one distinct advantage to flattening the CFG at the IR level:
+/// it makes very common but simplistic optimizations such as are common in
+/// instcombine and the DAG combiner more powerful by removing CFG edges and
+/// modeling their effects with easier to reason about SSA value graphs.
+///
+///
+/// An illustration of this transform is turning this IR:
+/// \code
+/// BB:
+/// %cmp = icmp ult %x, %y
+/// br i1 %cmp, label %EndBB, label %ThenBB
+/// ThenBB:
+/// %sub = sub %x, %y
+/// br label BB2
+/// EndBB:
+/// %phi = phi [ %sub, %ThenBB ], [ 0, %EndBB ]
+/// ...
+/// \endcode
+///
+/// Into this IR:
+/// \code
+/// BB:
+/// %cmp = icmp ult %x, %y
+/// %sub = sub %x, %y
+/// %cond = select i1 %cmp, 0, %sub
+/// ...
+/// \endcode
+///
+/// \returns true if the conditional block is removed.
+static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
+ // Be conservative for now. FP select instruction can often be expensive.
+ Value *BrCond = BI->getCondition();
+ if (isa<FCmpInst>(BrCond))
+ return false;
+
+ BasicBlock *BB = BI->getParent();
+ BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
+
+ // If ThenBB is actually on the false edge of the conditional branch, remember
+ // to swap the select operands later.
+ bool Invert = false;
+ if (ThenBB != BI->getSuccessor(0)) {
+ assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
+ Invert = true;
+ }
+ assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
+
+ // Keep a count of how many times instructions are used within CondBB when
+ // they are candidates for sinking into CondBB. Specifically:
+ // - They are defined in BB, and
+ // - They have no side effects, and
+ // - All of their uses are in CondBB.
+ SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
+
+ unsigned SpeculationCost = 0;
+ Value *SpeculatedStoreValue = 0;
+ StoreInst *SpeculatedStore = 0;
+ for (BasicBlock::iterator BBI = ThenBB->begin(),
+ BBE = llvm::prior(ThenBB->end());
+ BBI != BBE; ++BBI) {
+ Instruction *I = BBI;
+ // Skip debug info.
+ if (isa<DbgInfoIntrinsic>(I))
+ continue;
+
+ // Only speculatively execution a single instruction (not counting the
+ // terminator) for now.
+ ++SpeculationCost;
+ if (SpeculationCost > 1)
+ return false;
+
+ // Don't hoist the instruction if it's unsafe or expensive.
+ if (!isSafeToSpeculativelyExecute(I) &&
+ !(HoistCondStores &&
+ (SpeculatedStoreValue = isSafeToSpeculateStore(I, BB, ThenBB,
+ EndBB))))
+ return false;
+ if (!SpeculatedStoreValue &&
+ ComputeSpeculationCost(I) > PHINodeFoldingThreshold)
+ return false;
+
+ // Store the store speculation candidate.
+ if (SpeculatedStoreValue)
+ SpeculatedStore = cast<StoreInst>(I);
+
+ // Do not hoist the instruction if any of its operands are defined but not
+ // used in BB. The transformation will prevent the operand from
+ // being sunk into the use block.
+ for (User::op_iterator i = I->op_begin(), e = I->op_end();
+ i != e; ++i) {
+ Instruction *OpI = dyn_cast<Instruction>(*i);
+ if (!OpI || OpI->getParent() != BB ||
+ OpI->mayHaveSideEffects())
+ continue; // Not a candidate for sinking.
+
+ ++SinkCandidateUseCounts[OpI];
+ }
+ }
+
+ // Consider any sink candidates which are only used in CondBB as costs for
+ // speculation. Note, while we iterate over a DenseMap here, we are summing
+ // and so iteration order isn't significant.
+ for (SmallDenseMap<Instruction *, unsigned, 4>::iterator I =
+ SinkCandidateUseCounts.begin(), E = SinkCandidateUseCounts.end();
+ I != E; ++I)
+ if (I->first->getNumUses() == I->second) {
+ ++SpeculationCost;
+ if (SpeculationCost > 1)
+ return false;
+ }
+
+ // Check that the PHI nodes can be converted to selects.
+ bool HaveRewritablePHIs = false;
+ for (BasicBlock::iterator I = EndBB->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+ Value *OrigV = PN->getIncomingValueForBlock(BB);
+ Value *ThenV = PN->getIncomingValueForBlock(ThenBB);
+
+ // FIXME: Try to remove some of the duplication with HoistThenElseCodeToIf.
+ // Skip PHIs which are trivial.
+ if (ThenV == OrigV)
+ continue;
+
+ HaveRewritablePHIs = true;
+ ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
+ ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
+ if (!OrigCE && !ThenCE)
+ continue; // Known safe and cheap.
+
+ if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) ||
+ (OrigCE && !isSafeToSpeculativelyExecute(OrigCE)))
+ return false;
+ unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE) : 0;
+ unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE) : 0;
+ if (OrigCost + ThenCost > 2 * PHINodeFoldingThreshold)
+ return false;
+
+ // Account for the cost of an unfolded ConstantExpr which could end up
+ // getting expanded into Instructions.
+ // FIXME: This doesn't account for how many operations are combined in the
+ // constant expression.
+ ++SpeculationCost;
+ if (SpeculationCost > 1)
+ return false;
+ }
+
+ // If there are no PHIs to process, bail early. This helps ensure idempotence
+ // as well.
+ if (!HaveRewritablePHIs && !(HoistCondStores && SpeculatedStoreValue))
+ return false;
+
+ // If we get here, we can hoist the instruction and if-convert.
+ DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
+
+ // Insert a select of the value of the speculated store.
+ if (SpeculatedStoreValue) {
+ IRBuilder<true, NoFolder> Builder(BI);
+ Value *TrueV = SpeculatedStore->getValueOperand();
+ Value *FalseV = SpeculatedStoreValue;
+ if (Invert)
+ std::swap(TrueV, FalseV);
+ Value *S = Builder.CreateSelect(BrCond, TrueV, FalseV, TrueV->getName() +
+ "." + FalseV->getName());
+ SpeculatedStore->setOperand(0, S);
+ }
+
+ // Hoist the instructions.
+ BB->getInstList().splice(BI, ThenBB->getInstList(), ThenBB->begin(),
+ llvm::prior(ThenBB->end()));
+
+ // Insert selects and rewrite the PHI operands.
+ IRBuilder<true, NoFolder> Builder(BI);
+ for (BasicBlock::iterator I = EndBB->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+ unsigned OrigI = PN->getBasicBlockIndex(BB);
+ unsigned ThenI = PN->getBasicBlockIndex(ThenBB);
+ Value *OrigV = PN->getIncomingValue(OrigI);
+ Value *ThenV = PN->getIncomingValue(ThenI);
+
+ // Skip PHIs which are trivial.
+ if (OrigV == ThenV)
+ continue;
+
+ // Create a select whose true value is the speculatively executed value and
+ // false value is the preexisting value. Swap them if the branch
+ // destinations were inverted.
+ Value *TrueV = ThenV, *FalseV = OrigV;
+ if (Invert)
+ std::swap(TrueV, FalseV);
+ Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV,
+ TrueV->getName() + "." + FalseV->getName());
+ PN->setIncomingValue(OrigI, V);
+ PN->setIncomingValue(ThenI, V);
+ }
+
+ ++NumSpeculations;
+ return true;
+}
+
+/// \returns True if this block contains a CallInst with the NoDuplicate
+/// attribute.
+static bool HasNoDuplicateCall(const BasicBlock *BB) {
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ const CallInst *CI = dyn_cast<CallInst>(I);
+ if (!CI)
+ continue;
+ if (CI->cannotDuplicate())
+ return true;
+ }
+ return false;
+}
+
+/// BlockIsSimpleEnoughToThreadThrough - Return true if we can thread a branch
+/// across this block.
+static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
+ BranchInst *BI = cast<BranchInst>(BB->getTerminator());
+ unsigned Size = 0;
+
+ for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
+ if (isa<DbgInfoIntrinsic>(BBI))
+ continue;
+ if (Size > 10) return false; // Don't clone large BB's.
+ ++Size;
+
+ // We can only support instructions that do not define values that are
+ // live outside of the current basic block.
+ for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end();
+ UI != E; ++UI) {
+ Instruction *U = cast<Instruction>(*UI);
+ if (U->getParent() != BB || isa<PHINode>(U)) return false;
+ }
+
+ // Looks ok, continue checking.
+ }
+
+ return true;
+}
+
+/// FoldCondBranchOnPHI - If we have a conditional branch on a PHI node value
+/// that is defined in the same block as the branch and if any PHI entries are
+/// constants, thread edges corresponding to that entry to be branches to their
+/// ultimate destination.
+static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *TD) {
+ BasicBlock *BB = BI->getParent();
+ PHINode *PN = dyn_cast<PHINode>(BI->getCondition());
+ // NOTE: we currently cannot transform this case if the PHI node is used
+ // outside of the block.
+ if (!PN || PN->getParent() != BB || !PN->hasOneUse())
+ return false;
+
+ // Degenerate case of a single entry PHI.
+ if (PN->getNumIncomingValues() == 1) {
+ FoldSingleEntryPHINodes(PN->getParent());
+ return true;
+ }
+
+ // Now we know that this block has multiple preds and two succs.
+ if (!BlockIsSimpleEnoughToThreadThrough(BB)) return false;
+
+ if (HasNoDuplicateCall(BB)) return false;
+
+ // Okay, this is a simple enough basic block. See if any phi values are
+ // constants.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ ConstantInt *CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i));
+ if (CB == 0 || !CB->getType()->isIntegerTy(1)) continue;
+
+ // Okay, we now know that all edges from PredBB should be revectored to
+ // branch to RealDest.
+ BasicBlock *PredBB = PN->getIncomingBlock(i);
+ BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
+
+ if (RealDest == BB) continue; // Skip self loops.
+ // Skip if the predecessor's terminator is an indirect branch.
+ if (isa<IndirectBrInst>(PredBB->getTerminator())) continue;
+
+ // The dest block might have PHI nodes, other predecessors and other
+ // difficult cases. Instead of being smart about this, just insert a new
+ // block that jumps to the destination block, effectively splitting
+ // the edge we are about to create.
+ BasicBlock *EdgeBB = BasicBlock::Create(BB->getContext(),
+ RealDest->getName()+".critedge",
+ RealDest->getParent(), RealDest);
+ BranchInst::Create(RealDest, EdgeBB);
+
+ // Update PHI nodes.
+ AddPredecessorToBlock(RealDest, EdgeBB, BB);
+
+ // BB may have instructions that are being threaded over. Clone these
+ // instructions into EdgeBB. We know that there will be no uses of the
+ // cloned instructions outside of EdgeBB.
+ BasicBlock::iterator InsertPt = EdgeBB->begin();
+ DenseMap<Value*, Value*> TranslateMap; // Track translated values.
+ for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
+ if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
+ TranslateMap[PN] = PN->getIncomingValueForBlock(PredBB);
+ continue;
+ }
+ // Clone the instruction.
+ Instruction *N = BBI->clone();
+ if (BBI->hasName()) N->setName(BBI->getName()+".c");
+
+ // Update operands due to translation.
+ for (User::op_iterator i = N->op_begin(), e = N->op_end();
+ i != e; ++i) {
+ DenseMap<Value*, Value*>::iterator PI = TranslateMap.find(*i);
+ if (PI != TranslateMap.end())
+ *i = PI->second;
+ }
+
+ // Check for trivial simplification.
+ if (Value *V = SimplifyInstruction(N, TD)) {
+ TranslateMap[BBI] = V;
+ delete N; // Instruction folded away, don't need actual inst
+ } else {
+ // Insert the new instruction into its new home.
+ EdgeBB->getInstList().insert(InsertPt, N);
+ if (!BBI->use_empty())
+ TranslateMap[BBI] = N;
+ }
+ }
+
+ // Loop over all of the edges from PredBB to BB, changing them to branch
+ // to EdgeBB instead.
+ TerminatorInst *PredBBTI = PredBB->getTerminator();
+ for (unsigned i = 0, e = PredBBTI->getNumSuccessors(); i != e; ++i)
+ if (PredBBTI->getSuccessor(i) == BB) {
+ BB->removePredecessor(PredBB);
+ PredBBTI->setSuccessor(i, EdgeBB);
+ }
+
+ // Recurse, simplifying any other constants.
+ return FoldCondBranchOnPHI(BI, TD) | true;
+ }
+
+ return false;
+}
+
+/// FoldTwoEntryPHINode - Given a BB that starts with the specified two-entry
+/// PHI node, see if we can eliminate it.
+static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *TD) {
+ // Ok, this is a two entry PHI node. Check to see if this is a simple "if
+ // statement", which has a very simple dominance structure. Basically, we
+ // are trying to find the condition that is being branched on, which
+ // subsequently causes this merge to happen. We really want control
+ // dependence information for this check, but simplifycfg can't keep it up
+ // to date, and this catches most of the cases we care about anyway.
+ BasicBlock *BB = PN->getParent();
+ BasicBlock *IfTrue, *IfFalse;
+ Value *IfCond = GetIfCondition(BB, IfTrue, IfFalse);
+ if (!IfCond ||
+ // Don't bother if the branch will be constant folded trivially.
+ isa<ConstantInt>(IfCond))
+ return false;
+
+ // Okay, we found that we can merge this two-entry phi node into a select.
+ // Doing so would require us to fold *all* two entry phi nodes in this block.
+ // At some point this becomes non-profitable (particularly if the target
+ // doesn't support cmov's). Only do this transformation if there are two or
+ // fewer PHI nodes in this block.
+ unsigned NumPhis = 0;
+ for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
+ if (NumPhis > 2)
+ return false;
+
+ // Loop over the PHI's seeing if we can promote them all to select
+ // instructions. While we are at it, keep track of the instructions
+ // that need to be moved to the dominating block.
+ SmallPtrSet<Instruction*, 4> AggressiveInsts;
+ unsigned MaxCostVal0 = PHINodeFoldingThreshold,
+ MaxCostVal1 = PHINodeFoldingThreshold;
+
+ for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
+ PHINode *PN = cast<PHINode>(II++);
+ if (Value *V = SimplifyInstruction(PN, TD)) {
+ PN->replaceAllUsesWith(V);
+ PN->eraseFromParent();
+ continue;
+ }
+
+ if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts,
+ MaxCostVal0) ||
+ !DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts,
+ MaxCostVal1))
+ return false;
+ }
+
+ // If we folded the first phi, PN dangles at this point. Refresh it. If
+ // we ran out of PHIs then we simplified them all.
+ PN = dyn_cast<PHINode>(BB->begin());
+ if (PN == 0) return true;
+
+ // Don't fold i1 branches on PHIs which contain binary operators. These can
+ // often be turned into switches and other things.
+ if (PN->getType()->isIntegerTy(1) &&
+ (isa<BinaryOperator>(PN->getIncomingValue(0)) ||
+ isa<BinaryOperator>(PN->getIncomingValue(1)) ||
+ isa<BinaryOperator>(IfCond)))
+ return false;
+
+ // If we all PHI nodes are promotable, check to make sure that all
+ // instructions in the predecessor blocks can be promoted as well. If
+ // not, we won't be able to get rid of the control flow, so it's not
+ // worth promoting to select instructions.
+ BasicBlock *DomBlock = 0;
+ BasicBlock *IfBlock1 = PN->getIncomingBlock(0);
+ BasicBlock *IfBlock2 = PN->getIncomingBlock(1);
+ if (cast<BranchInst>(IfBlock1->getTerminator())->isConditional()) {
+ IfBlock1 = 0;
+ } else {
+ DomBlock = *pred_begin(IfBlock1);
+ for (BasicBlock::iterator I = IfBlock1->begin();!isa<TerminatorInst>(I);++I)
+ if (!AggressiveInsts.count(I) && !isa<DbgInfoIntrinsic>(I)) {
+ // This is not an aggressive instruction that we can promote.
+ // Because of this, we won't be able to get rid of the control
+ // flow, so the xform is not worth it.
+ return false;
+ }
+ }
+
+ if (cast<BranchInst>(IfBlock2->getTerminator())->isConditional()) {
+ IfBlock2 = 0;
+ } else {
+ DomBlock = *pred_begin(IfBlock2);
+ for (BasicBlock::iterator I = IfBlock2->begin();!isa<TerminatorInst>(I);++I)
+ if (!AggressiveInsts.count(I) && !isa<DbgInfoIntrinsic>(I)) {
+ // This is not an aggressive instruction that we can promote.
+ // Because of this, we won't be able to get rid of the control
+ // flow, so the xform is not worth it.
+ return false;
+ }
+ }
+
+ DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond << " T: "
+ << IfTrue->getName() << " F: " << IfFalse->getName() << "\n");
+
+ // If we can still promote the PHI nodes after this gauntlet of tests,
+ // do all of the PHI's now.
+ Instruction *InsertPt = DomBlock->getTerminator();
+ IRBuilder<true, NoFolder> Builder(InsertPt);
+
+ // Move all 'aggressive' instructions, which are defined in the
+ // conditional parts of the if's up to the dominating block.
+ if (IfBlock1)
+ DomBlock->getInstList().splice(InsertPt,
+ IfBlock1->getInstList(), IfBlock1->begin(),
+ IfBlock1->getTerminator());
+ if (IfBlock2)
+ DomBlock->getInstList().splice(InsertPt,
+ IfBlock2->getInstList(), IfBlock2->begin(),
+ IfBlock2->getTerminator());
+
+ while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
+ // Change the PHI node into a select instruction.
+ Value *TrueVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse);
+ Value *FalseVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue);
+
+ SelectInst *NV =
+ cast<SelectInst>(Builder.CreateSelect(IfCond, TrueVal, FalseVal, ""));
+ PN->replaceAllUsesWith(NV);
+ NV->takeName(PN);
+ PN->eraseFromParent();
+ }
+
+ // At this point, IfBlock1 and IfBlock2 are both empty, so our if statement
+ // has been flattened. Change DomBlock to jump directly to our new block to
+ // avoid other simplifycfg's kicking in on the diamond.
+ TerminatorInst *OldTI = DomBlock->getTerminator();
+ Builder.SetInsertPoint(OldTI);
+ Builder.CreateBr(BB);
+ OldTI->eraseFromParent();
+ return true;
+}
+
+/// SimplifyCondBranchToTwoReturns - If we found a conditional branch that goes
+/// to two returning blocks, try to merge them together into one return,
+/// introducing a select if the return values disagree.
+static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
+ IRBuilder<> &Builder) {
+ assert(BI->isConditional() && "Must be a conditional branch");
+ BasicBlock *TrueSucc = BI->getSuccessor(0);
+ BasicBlock *FalseSucc = BI->getSuccessor(1);
+ ReturnInst *TrueRet = cast<ReturnInst>(TrueSucc->getTerminator());
+ ReturnInst *FalseRet = cast<ReturnInst>(FalseSucc->getTerminator());
+
+ // Check to ensure both blocks are empty (just a return) or optionally empty
+ // with PHI nodes. If there are other instructions, merging would cause extra
+ // computation on one path or the other.
+ if (!TrueSucc->getFirstNonPHIOrDbg()->isTerminator())
+ return false;
+ if (!FalseSucc->getFirstNonPHIOrDbg()->isTerminator())
+ return false;
+
+ Builder.SetInsertPoint(BI);
+ // Okay, we found a branch that is going to two return nodes. If
+ // there is no return value for this function, just change the
+ // branch into a return.
+ if (FalseRet->getNumOperands() == 0) {
+ TrueSucc->removePredecessor(BI->getParent());
+ FalseSucc->removePredecessor(BI->getParent());
+ Builder.CreateRetVoid();
+ EraseTerminatorInstAndDCECond(BI);
+ return true;
+ }
+
+ // Otherwise, figure out what the true and false return values are
+ // so we can insert a new select instruction.
+ Value *TrueValue = TrueRet->getReturnValue();
+ Value *FalseValue = FalseRet->getReturnValue();
+
+ // Unwrap any PHI nodes in the return blocks.
+ if (PHINode *TVPN = dyn_cast_or_null<PHINode>(TrueValue))
+ if (TVPN->getParent() == TrueSucc)
+ TrueValue = TVPN->getIncomingValueForBlock(BI->getParent());
+ if (PHINode *FVPN = dyn_cast_or_null<PHINode>(FalseValue))
+ if (FVPN->getParent() == FalseSucc)
+ FalseValue = FVPN->getIncomingValueForBlock(BI->getParent());
+
+ // In order for this transformation to be safe, we must be able to
+ // unconditionally execute both operands to the return. This is
+ // normally the case, but we could have a potentially-trapping
+ // constant expression that prevents this transformation from being
+ // safe.
+ if (ConstantExpr *TCV = dyn_cast_or_null<ConstantExpr>(TrueValue))
+ if (TCV->canTrap())
+ return false;
+ if (ConstantExpr *FCV = dyn_cast_or_null<ConstantExpr>(FalseValue))
+ if (FCV->canTrap())
+ return false;
+
+ // Okay, we collected all the mapped values and checked them for sanity, and
+ // defined to really do this transformation. First, update the CFG.
+ TrueSucc->removePredecessor(BI->getParent());
+ FalseSucc->removePredecessor(BI->getParent());
+
+ // Insert select instructions where needed.
+ Value *BrCond = BI->getCondition();
+ if (TrueValue) {
+ // Insert a select if the results differ.
+ if (TrueValue == FalseValue || isa<UndefValue>(FalseValue)) {
+ } else if (isa<UndefValue>(TrueValue)) {
+ TrueValue = FalseValue;
+ } else {
+ TrueValue = Builder.CreateSelect(BrCond, TrueValue,
+ FalseValue, "retval");
+ }
+ }
+
+ Value *RI = !TrueValue ?
+ Builder.CreateRetVoid() : Builder.CreateRet(TrueValue);
+
+ (void) RI;
+
+ DEBUG(dbgs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:"
+ << "\n " << *BI << "NewRet = " << *RI
+ << "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: "<< *FalseSucc);
+
+ EraseTerminatorInstAndDCECond(BI);
+
+ return true;
+}
+
+/// ExtractBranchMetadata - Given a conditional BranchInstruction, retrieve the
+/// probabilities of the branch taking each edge. Fills in the two APInt
+/// parameters and return true, or returns false if no or invalid metadata was
+/// found.
+static bool ExtractBranchMetadata(BranchInst *BI,
+ uint64_t &ProbTrue, uint64_t &ProbFalse) {
+ assert(BI->isConditional() &&
+ "Looking for probabilities on unconditional branch?");
+ MDNode *ProfileData = BI->getMetadata(LLVMContext::MD_prof);
+ if (!ProfileData || ProfileData->getNumOperands() != 3) return false;
+ ConstantInt *CITrue = dyn_cast<ConstantInt>(ProfileData->getOperand(1));
+ ConstantInt *CIFalse = dyn_cast<ConstantInt>(ProfileData->getOperand(2));
+ if (!CITrue || !CIFalse) return false;
+ ProbTrue = CITrue->getValue().getZExtValue();
+ ProbFalse = CIFalse->getValue().getZExtValue();
+ return true;
+}
+
+/// checkCSEInPredecessor - Return true if the given instruction is available
+/// in its predecessor block. If yes, the instruction will be removed.
+///
+static bool checkCSEInPredecessor(Instruction *Inst, BasicBlock *PB) {
+ if (!isa<BinaryOperator>(Inst) && !isa<CmpInst>(Inst))
+ return false;
+ for (BasicBlock::iterator I = PB->begin(), E = PB->end(); I != E; I++) {
+ Instruction *PBI = &*I;
+ // Check whether Inst and PBI generate the same value.
+ if (Inst->isIdenticalTo(PBI)) {
+ Inst->replaceAllUsesWith(PBI);
+ Inst->eraseFromParent();
+ return true;
+ }
+ }
+ return false;
+}
+
+/// FoldBranchToCommonDest - If this basic block is simple enough, and if a
+/// predecessor branches to us and one of our successors, fold the block into
+/// the predecessor and use logical operations to pick the right destination.
+bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
+ BasicBlock *BB = BI->getParent();
+
+ Instruction *Cond = 0;
+ if (BI->isConditional())
+ Cond = dyn_cast<Instruction>(BI->getCondition());
+ else {
+ // For unconditional branch, check for a simple CFG pattern, where
+ // BB has a single predecessor and BB's successor is also its predecessor's
+ // successor. If such pattern exisits, check for CSE between BB and its
+ // predecessor.
+ if (BasicBlock *PB = BB->getSinglePredecessor())
+ if (BranchInst *PBI = dyn_cast<BranchInst>(PB->getTerminator()))
+ if (PBI->isConditional() &&
+ (BI->getSuccessor(0) == PBI->getSuccessor(0) ||
+ BI->getSuccessor(0) == PBI->getSuccessor(1))) {
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end();
+ I != E; ) {
+ Instruction *Curr = I++;
+ if (isa<CmpInst>(Curr)) {
+ Cond = Curr;
+ break;
+ }
+ // Quit if we can't remove this instruction.
+ if (!checkCSEInPredecessor(Curr, PB))
+ return false;
+ }
+ }
+
+ if (Cond == 0)
+ return false;
+ }
+
+ if (Cond == 0 || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) ||
+ Cond->getParent() != BB || !Cond->hasOneUse())
+ return false;
+
+ // Only allow this if the condition is a simple instruction that can be
+ // executed unconditionally. It must be in the same block as the branch, and
+ // must be at the front of the block.
+ BasicBlock::iterator FrontIt = BB->front();
+
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(FrontIt)) ++FrontIt;
+
+ // Allow a single instruction to be hoisted in addition to the compare
+ // that feeds the branch. We later ensure that any values that _it_ uses
+ // were also live in the predecessor, so that we don't unnecessarily create
+ // register pressure or inhibit out-of-order execution.
+ Instruction *BonusInst = 0;
+ if (&*FrontIt != Cond &&
+ FrontIt->hasOneUse() && *FrontIt->use_begin() == Cond &&
+ isSafeToSpeculativelyExecute(FrontIt)) {
+ BonusInst = &*FrontIt;
+ ++FrontIt;
+
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(FrontIt)) ++FrontIt;
+ }
+
+ // Only a single bonus inst is allowed.
+ if (&*FrontIt != Cond)
+ return false;
+
+ // Make sure the instruction after the condition is the cond branch.
+ BasicBlock::iterator CondIt = Cond; ++CondIt;
+
+ // Ingore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(CondIt)) ++CondIt;
+
+ if (&*CondIt != BI)
+ return false;
+
+ // Cond is known to be a compare or binary operator. Check to make sure that
+ // neither operand is a potentially-trapping constant expression.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(0)))
+ if (CE->canTrap())
+ return false;
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(1)))
+ if (CE->canTrap())
+ return false;
+
+ // Finally, don't infinitely unroll conditional loops.
+ BasicBlock *TrueDest = BI->getSuccessor(0);
+ BasicBlock *FalseDest = (BI->isConditional()) ? BI->getSuccessor(1) : 0;
+ if (TrueDest == BB || FalseDest == BB)
+ return false;
+
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *PredBlock = *PI;
+ BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
+
+ // Check that we have two conditional branches. If there is a PHI node in
+ // the common successor, verify that the same value flows in from both
+ // blocks.
+ SmallVector<PHINode*, 4> PHIs;
+ if (PBI == 0 || PBI->isUnconditional() ||
+ (BI->isConditional() &&
+ !SafeToMergeTerminators(BI, PBI)) ||
+ (!BI->isConditional() &&
+ !isProfitableToFoldUnconditional(BI, PBI, Cond, PHIs)))
+ continue;
+
+ // Determine if the two branches share a common destination.
+ Instruction::BinaryOps Opc = Instruction::BinaryOpsEnd;
+ bool InvertPredCond = false;
+
+ if (BI->isConditional()) {
+ if (PBI->getSuccessor(0) == TrueDest)
+ Opc = Instruction::Or;
+ else if (PBI->getSuccessor(1) == FalseDest)
+ Opc = Instruction::And;
+ else if (PBI->getSuccessor(0) == FalseDest)
+ Opc = Instruction::And, InvertPredCond = true;
+ else if (PBI->getSuccessor(1) == TrueDest)
+ Opc = Instruction::Or, InvertPredCond = true;
+ else
+ continue;
+ } else {
+ if (PBI->getSuccessor(0) != TrueDest && PBI->getSuccessor(1) != TrueDest)
+ continue;
+ }
+
+ // Ensure that any values used in the bonus instruction are also used
+ // by the terminator of the predecessor. This means that those values
+ // must already have been resolved, so we won't be inhibiting the
+ // out-of-order core by speculating them earlier. We also allow
+ // instructions that are used by the terminator's condition because it
+ // exposes more merging opportunities.
+ bool UsedByBranch = (BonusInst && BonusInst->hasOneUse() &&
+ *BonusInst->use_begin() == Cond);
+
+ if (BonusInst && !UsedByBranch) {
+ // Collect the values used by the bonus inst
+ SmallPtrSet<Value*, 4> UsedValues;
+ for (Instruction::op_iterator OI = BonusInst->op_begin(),
+ OE = BonusInst->op_end(); OI != OE; ++OI) {
+ Value *V = *OI;
+ if (!isa<Constant>(V) && !isa<Argument>(V))
+ UsedValues.insert(V);
+ }
+
+ SmallVector<std::pair<Value*, unsigned>, 4> Worklist;
+ Worklist.push_back(std::make_pair(PBI->getOperand(0), 0));
+
+ // Walk up to four levels back up the use-def chain of the predecessor's
+ // terminator to see if all those values were used. The choice of four
+ // levels is arbitrary, to provide a compile-time-cost bound.
+ while (!Worklist.empty()) {
+ std::pair<Value*, unsigned> Pair = Worklist.back();
+ Worklist.pop_back();
+
+ if (Pair.second >= 4) continue;
+ UsedValues.erase(Pair.first);
+ if (UsedValues.empty()) break;
+
+ if (Instruction *I = dyn_cast<Instruction>(Pair.first)) {
+ for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end();
+ OI != OE; ++OI)
+ Worklist.push_back(std::make_pair(OI->get(), Pair.second+1));
+ }
+ }
+
+ if (!UsedValues.empty()) return false;
+ }
+
+ DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
+ IRBuilder<> Builder(PBI);
+
+ // If we need to invert the condition in the pred block to match, do so now.
+ if (InvertPredCond) {
+ Value *NewCond = PBI->getCondition();
+
+ if (NewCond->hasOneUse() && isa<CmpInst>(NewCond)) {
+ CmpInst *CI = cast<CmpInst>(NewCond);
+ CI->setPredicate(CI->getInversePredicate());
+ } else {
+ NewCond = Builder.CreateNot(NewCond,
+ PBI->getCondition()->getName()+".not");
+ }
+
+ PBI->setCondition(NewCond);
+ PBI->swapSuccessors();
+ }
+
+ // If we have a bonus inst, clone it into the predecessor block.
+ Instruction *NewBonus = 0;
+ if (BonusInst) {
+ NewBonus = BonusInst->clone();
+ PredBlock->getInstList().insert(PBI, NewBonus);
+ NewBonus->takeName(BonusInst);
+ BonusInst->setName(BonusInst->getName()+".old");
+ }
+
+ // Clone Cond into the predecessor basic block, and or/and the
+ // two conditions together.
+ Instruction *New = Cond->clone();
+ if (BonusInst) New->replaceUsesOfWith(BonusInst, NewBonus);
+ PredBlock->getInstList().insert(PBI, New);
+ New->takeName(Cond);
+ Cond->setName(New->getName()+".old");
+
+ if (BI->isConditional()) {
+ Instruction *NewCond =
+ cast<Instruction>(Builder.CreateBinOp(Opc, PBI->getCondition(),
+ New, "or.cond"));
+ PBI->setCondition(NewCond);
+
+ uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
+ bool PredHasWeights = ExtractBranchMetadata(PBI, PredTrueWeight,
+ PredFalseWeight);
+ bool SuccHasWeights = ExtractBranchMetadata(BI, SuccTrueWeight,
+ SuccFalseWeight);
+ SmallVector<uint64_t, 8> NewWeights;
+
+ if (PBI->getSuccessor(0) == BB) {
+ if (PredHasWeights && SuccHasWeights) {
+ // PBI: br i1 %x, BB, FalseDest
+ // BI: br i1 %y, TrueDest, FalseDest
+ //TrueWeight is TrueWeight for PBI * TrueWeight for BI.
+ NewWeights.push_back(PredTrueWeight * SuccTrueWeight);
+ //FalseWeight is FalseWeight for PBI * TotalWeight for BI +
+ // TrueWeight for PBI * FalseWeight for BI.
+ // We assume that total weights of a BranchInst can fit into 32 bits.
+ // Therefore, we will not have overflow using 64-bit arithmetic.
+ NewWeights.push_back(PredFalseWeight * (SuccFalseWeight +
+ SuccTrueWeight) + PredTrueWeight * SuccFalseWeight);
+ }
+ AddPredecessorToBlock(TrueDest, PredBlock, BB);
+ PBI->setSuccessor(0, TrueDest);
+ }
+ if (PBI->getSuccessor(1) == BB) {
+ if (PredHasWeights && SuccHasWeights) {
+ // PBI: br i1 %x, TrueDest, BB
+ // BI: br i1 %y, TrueDest, FalseDest
+ //TrueWeight is TrueWeight for PBI * TotalWeight for BI +
+ // FalseWeight for PBI * TrueWeight for BI.
+ NewWeights.push_back(PredTrueWeight * (SuccFalseWeight +
+ SuccTrueWeight) + PredFalseWeight * SuccTrueWeight);
+ //FalseWeight is FalseWeight for PBI * FalseWeight for BI.
+ NewWeights.push_back(PredFalseWeight * SuccFalseWeight);
+ }
+ AddPredecessorToBlock(FalseDest, PredBlock, BB);
+ PBI->setSuccessor(1, FalseDest);
+ }
+ if (NewWeights.size() == 2) {
+ // Halve the weights if any of them cannot fit in an uint32_t
+ FitWeights(NewWeights);
+
+ SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(),NewWeights.end());
+ PBI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(BI->getContext()).
+ createBranchWeights(MDWeights));
+ } else
+ PBI->setMetadata(LLVMContext::MD_prof, NULL);
+ } else {
+ // Update PHI nodes in the common successors.
+ for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
+ ConstantInt *PBI_C = cast<ConstantInt>(
+ PHIs[i]->getIncomingValueForBlock(PBI->getParent()));
+ assert(PBI_C->getType()->isIntegerTy(1));
+ Instruction *MergedCond = 0;
+ if (PBI->getSuccessor(0) == TrueDest) {
+ // Create (PBI_Cond and PBI_C) or (!PBI_Cond and BI_Value)
+ // PBI_C is true: PBI_Cond or (!PBI_Cond and BI_Value)
+ // is false: !PBI_Cond and BI_Value
+ Instruction *NotCond =
+ cast<Instruction>(Builder.CreateNot(PBI->getCondition(),
+ "not.cond"));
+ MergedCond =
+ cast<Instruction>(Builder.CreateBinOp(Instruction::And,
+ NotCond, New,
+ "and.cond"));
+ if (PBI_C->isOne())
+ MergedCond =
+ cast<Instruction>(Builder.CreateBinOp(Instruction::Or,
+ PBI->getCondition(), MergedCond,
+ "or.cond"));
+ } else {
+ // Create (PBI_Cond and BI_Value) or (!PBI_Cond and PBI_C)
+ // PBI_C is true: (PBI_Cond and BI_Value) or (!PBI_Cond)
+ // is false: PBI_Cond and BI_Value
+ MergedCond =
+ cast<Instruction>(Builder.CreateBinOp(Instruction::And,
+ PBI->getCondition(), New,
+ "and.cond"));
+ if (PBI_C->isOne()) {
+ Instruction *NotCond =
+ cast<Instruction>(Builder.CreateNot(PBI->getCondition(),
+ "not.cond"));
+ MergedCond =
+ cast<Instruction>(Builder.CreateBinOp(Instruction::Or,
+ NotCond, MergedCond,
+ "or.cond"));
+ }
+ }
+ // Update PHI Node.
+ PHIs[i]->setIncomingValue(PHIs[i]->getBasicBlockIndex(PBI->getParent()),
+ MergedCond);
+ }
+ // Change PBI from Conditional to Unconditional.
+ BranchInst *New_PBI = BranchInst::Create(TrueDest, PBI);
+ EraseTerminatorInstAndDCECond(PBI);
+ PBI = New_PBI;
+ }
+
+ // TODO: If BB is reachable from all paths through PredBlock, then we
+ // could replace PBI's branch probabilities with BI's.
+
+ // Copy any debug value intrinsics into the end of PredBlock.
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (isa<DbgInfoIntrinsic>(*I))
+ I->clone()->insertBefore(PBI);
+
+ return true;
+ }
+ return false;
+}
+
+/// SimplifyCondBranchToCondBranch - If we have a conditional branch as a
+/// predecessor of another block, this function tries to simplify it. We know
+/// that PBI and BI are both conditional branches, and BI is in one of the
+/// successor blocks of PBI - PBI branches to BI.
+static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
+ assert(PBI->isConditional() && BI->isConditional());
+ BasicBlock *BB = BI->getParent();
+
+ // If this block ends with a branch instruction, and if there is a
+ // predecessor that ends on a branch of the same condition, make
+ // this conditional branch redundant.
+ if (PBI->getCondition() == BI->getCondition() &&
+ PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
+ // Okay, the outcome of this conditional branch is statically
+ // knowable. If this block had a single pred, handle specially.
+ if (BB->getSinglePredecessor()) {
+ // Turn this into a branch on constant.
+ bool CondIsTrue = PBI->getSuccessor(0) == BB;
+ BI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()),
+ CondIsTrue));
+ return true; // Nuke the branch on constant.
+ }
+
+ // Otherwise, if there are multiple predecessors, insert a PHI that merges
+ // in the constant and simplify the block result. Subsequent passes of
+ // simplifycfg will thread the block.
+ if (BlockIsSimpleEnoughToThreadThrough(BB)) {
+ pred_iterator PB = pred_begin(BB), PE = pred_end(BB);
+ PHINode *NewPN = PHINode::Create(Type::getInt1Ty(BB->getContext()),
+ std::distance(PB, PE),
+ BI->getCondition()->getName() + ".pr",
+ BB->begin());
+ // Okay, we're going to insert the PHI node. Since PBI is not the only
+ // predecessor, compute the PHI'd conditional value for all of the preds.
+ // Any predecessor where the condition is not computable we keep symbolic.
+ for (pred_iterator PI = PB; PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ if ((PBI = dyn_cast<BranchInst>(P->getTerminator())) &&
+ PBI != BI && PBI->isConditional() &&
+ PBI->getCondition() == BI->getCondition() &&
+ PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
+ bool CondIsTrue = PBI->getSuccessor(0) == BB;
+ NewPN->addIncoming(ConstantInt::get(Type::getInt1Ty(BB->getContext()),
+ CondIsTrue), P);
+ } else {
+ NewPN->addIncoming(BI->getCondition(), P);
+ }
+ }
+
+ BI->setCondition(NewPN);
+ return true;
+ }
+ }
+
+ // If this is a conditional branch in an empty block, and if any
+ // predecessors is a conditional branch to one of our destinations,
+ // fold the conditions into logical ops and one cond br.
+ BasicBlock::iterator BBI = BB->begin();
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(BBI))
+ ++BBI;
+ if (&*BBI != BI)
+ return false;
+
+
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BI->getCondition()))
+ if (CE->canTrap())
+ return false;
+
+ int PBIOp, BIOp;
+ if (PBI->getSuccessor(0) == BI->getSuccessor(0))
+ PBIOp = BIOp = 0;
+ else if (PBI->getSuccessor(0) == BI->getSuccessor(1))
+ PBIOp = 0, BIOp = 1;
+ else if (PBI->getSuccessor(1) == BI->getSuccessor(0))
+ PBIOp = 1, BIOp = 0;
+ else if (PBI->getSuccessor(1) == BI->getSuccessor(1))
+ PBIOp = BIOp = 1;
+ else
+ return false;
+
+ // Check to make sure that the other destination of this branch
+ // isn't BB itself. If so, this is an infinite loop that will
+ // keep getting unwound.
+ if (PBI->getSuccessor(PBIOp) == BB)
+ return false;
+
+ // Do not perform this transformation if it would require
+ // insertion of a large number of select instructions. For targets
+ // without predication/cmovs, this is a big pessimization.
+ BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
+
+ unsigned NumPhis = 0;
+ for (BasicBlock::iterator II = CommonDest->begin();
+ isa<PHINode>(II); ++II, ++NumPhis)
+ if (NumPhis > 2) // Disable this xform.
+ return false;
+
+ // Finally, if everything is ok, fold the branches to logical ops.
+ BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
+
+ DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
+ << "AND: " << *BI->getParent());
+
+
+ // If OtherDest *is* BB, then BB is a basic block with a single conditional
+ // branch in it, where one edge (OtherDest) goes back to itself but the other
+ // exits. We don't *know* that the program avoids the infinite loop
+ // (even though that seems likely). If we do this xform naively, we'll end up
+ // recursively unpeeling the loop. Since we know that (after the xform is
+ // done) that the block *is* infinite if reached, we just make it an obviously
+ // infinite loop with no cond branch.
+ if (OtherDest == BB) {
+ // Insert it at the end of the function, because it's either code,
+ // or it won't matter if it's hot. :)
+ BasicBlock *InfLoopBlock = BasicBlock::Create(BB->getContext(),
+ "infloop", BB->getParent());
+ BranchInst::Create(InfLoopBlock, InfLoopBlock);
+ OtherDest = InfLoopBlock;
+ }
+
+ DEBUG(dbgs() << *PBI->getParent()->getParent());
+
+ // BI may have other predecessors. Because of this, we leave
+ // it alone, but modify PBI.
+
+ // Make sure we get to CommonDest on True&True directions.
+ Value *PBICond = PBI->getCondition();
+ IRBuilder<true, NoFolder> Builder(PBI);
+ if (PBIOp)
+ PBICond = Builder.CreateNot(PBICond, PBICond->getName()+".not");
+
+ Value *BICond = BI->getCondition();
+ if (BIOp)
+ BICond = Builder.CreateNot(BICond, BICond->getName()+".not");
+
+ // Merge the conditions.
+ Value *Cond = Builder.CreateOr(PBICond, BICond, "brmerge");
+
+ // Modify PBI to branch on the new condition to the new dests.
+ PBI->setCondition(Cond);
+ PBI->setSuccessor(0, CommonDest);
+ PBI->setSuccessor(1, OtherDest);
+
+ // Update branch weight for PBI.
+ uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
+ bool PredHasWeights = ExtractBranchMetadata(PBI, PredTrueWeight,
+ PredFalseWeight);
+ bool SuccHasWeights = ExtractBranchMetadata(BI, SuccTrueWeight,
+ SuccFalseWeight);
+ if (PredHasWeights && SuccHasWeights) {
+ uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
+ uint64_t PredOther = PBIOp ?PredTrueWeight : PredFalseWeight;
+ uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
+ uint64_t SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
+ // The weight to CommonDest should be PredCommon * SuccTotal +
+ // PredOther * SuccCommon.
+ // The weight to OtherDest should be PredOther * SuccOther.
+ SmallVector<uint64_t, 2> NewWeights;
+ NewWeights.push_back(PredCommon * (SuccCommon + SuccOther) +
+ PredOther * SuccCommon);
+ NewWeights.push_back(PredOther * SuccOther);
+ // Halve the weights if any of them cannot fit in an uint32_t
+ FitWeights(NewWeights);
+
+ SmallVector<uint32_t, 2> MDWeights(NewWeights.begin(),NewWeights.end());
+ PBI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(BI->getContext()).
+ createBranchWeights(MDWeights));
+ }
+
+ // OtherDest may have phi nodes. If so, add an entry from PBI's
+ // block that are identical to the entries for BI's block.
+ AddPredecessorToBlock(OtherDest, PBI->getParent(), BB);
+
+ // We know that the CommonDest already had an edge from PBI to
+ // it. If it has PHIs though, the PHIs may have different
+ // entries for BB and PBI's BB. If so, insert a select to make
+ // them agree.
+ PHINode *PN;
+ for (BasicBlock::iterator II = CommonDest->begin();
+ (PN = dyn_cast<PHINode>(II)); ++II) {
+ Value *BIV = PN->getIncomingValueForBlock(BB);
+ unsigned PBBIdx = PN->getBasicBlockIndex(PBI->getParent());
+ Value *PBIV = PN->getIncomingValue(PBBIdx);
+ if (BIV != PBIV) {
+ // Insert a select in PBI to pick the right value.
+ Value *NV = cast<SelectInst>
+ (Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName()+".mux"));
+ PN->setIncomingValue(PBBIdx, NV);
+ }
+ }
+
+ DEBUG(dbgs() << "INTO: " << *PBI->getParent());
+ DEBUG(dbgs() << *PBI->getParent()->getParent());
+
+ // This basic block is probably dead. We know it has at least
+ // one fewer predecessor.
+ return true;
+}
+
+// SimplifyTerminatorOnSelect - Simplifies a terminator by replacing it with a
+// branch to TrueBB if Cond is true or to FalseBB if Cond is false.
+// Takes care of updating the successors and removing the old terminator.
+// Also makes sure not to introduce new successors by assuming that edges to
+// non-successor TrueBBs and FalseBBs aren't reachable.
+static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond,
+ BasicBlock *TrueBB, BasicBlock *FalseBB,
+ uint32_t TrueWeight,
+ uint32_t FalseWeight){
+ // Remove any superfluous successor edges from the CFG.
+ // First, figure out which successors to preserve.
+ // If TrueBB and FalseBB are equal, only try to preserve one copy of that
+ // successor.
+ BasicBlock *KeepEdge1 = TrueBB;
+ BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : 0;
+
+ // Then remove the rest.
+ for (unsigned I = 0, E = OldTerm->getNumSuccessors(); I != E; ++I) {
+ BasicBlock *Succ = OldTerm->getSuccessor(I);
+ // Make sure only to keep exactly one copy of each edge.
+ if (Succ == KeepEdge1)
+ KeepEdge1 = 0;
+ else if (Succ == KeepEdge2)
+ KeepEdge2 = 0;
+ else
+ Succ->removePredecessor(OldTerm->getParent());
+ }
+
+ IRBuilder<> Builder(OldTerm);
+ Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
+
+ // Insert an appropriate new terminator.
+ if ((KeepEdge1 == 0) && (KeepEdge2 == 0)) {
+ if (TrueBB == FalseBB)
+ // We were only looking for one successor, and it was present.
+ // Create an unconditional branch to it.
+ Builder.CreateBr(TrueBB);
+ else {
+ // We found both of the successors we were looking for.
+ // Create a conditional branch sharing the condition of the select.
+ BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
+ if (TrueWeight != FalseWeight)
+ NewBI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(OldTerm->getContext()).
+ createBranchWeights(TrueWeight, FalseWeight));
+ }
+ } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
+ // Neither of the selected blocks were successors, so this
+ // terminator must be unreachable.
+ new UnreachableInst(OldTerm->getContext(), OldTerm);
+ } else {
+ // One of the selected values was a successor, but the other wasn't.
+ // Insert an unconditional branch to the one that was found;
+ // the edge to the one that wasn't must be unreachable.
+ if (KeepEdge1 == 0)
+ // Only TrueBB was found.
+ Builder.CreateBr(TrueBB);
+ else
+ // Only FalseBB was found.
+ Builder.CreateBr(FalseBB);
+ }
+
+ EraseTerminatorInstAndDCECond(OldTerm);
+ return true;
+}
+
+// SimplifySwitchOnSelect - Replaces
+// (switch (select cond, X, Y)) on constant X, Y
+// with a branch - conditional if X and Y lead to distinct BBs,
+// unconditional otherwise.
+static bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) {
+ // Check for constant integer values in the select.
+ ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
+ ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
+ if (!TrueVal || !FalseVal)
+ return false;
+
+ // Find the relevant condition and destinations.
+ Value *Condition = Select->getCondition();
+ BasicBlock *TrueBB = SI->findCaseValue(TrueVal).getCaseSuccessor();
+ BasicBlock *FalseBB = SI->findCaseValue(FalseVal).getCaseSuccessor();
+
+ // Get weight for TrueBB and FalseBB.
+ uint32_t TrueWeight = 0, FalseWeight = 0;
+ SmallVector<uint64_t, 8> Weights;
+ bool HasWeights = HasBranchWeights(SI);
+ if (HasWeights) {
+ GetBranchWeights(SI, Weights);
+ if (Weights.size() == 1 + SI->getNumCases()) {
+ TrueWeight = (uint32_t)Weights[SI->findCaseValue(TrueVal).
+ getSuccessorIndex()];
+ FalseWeight = (uint32_t)Weights[SI->findCaseValue(FalseVal).
+ getSuccessorIndex()];
+ }
+ }
+
+ // Perform the actual simplification.
+ return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB,
+ TrueWeight, FalseWeight);
+}
+
+// SimplifyIndirectBrOnSelect - Replaces
+// (indirectbr (select cond, blockaddress(@fn, BlockA),
+// blockaddress(@fn, BlockB)))
+// with
+// (br cond, BlockA, BlockB).
+static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) {
+ // Check that both operands of the select are block addresses.
+ BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
+ BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
+ if (!TBA || !FBA)
+ return false;
+
+ // Extract the actual blocks.
+ BasicBlock *TrueBB = TBA->getBasicBlock();
+ BasicBlock *FalseBB = FBA->getBasicBlock();
+
+ // Perform the actual simplification.
+ return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB,
+ 0, 0);
+}
+
+/// TryToSimplifyUncondBranchWithICmpInIt - This is called when we find an icmp
+/// instruction (a seteq/setne with a constant) as the only instruction in a
+/// block that ends with an uncond branch. We are looking for a very specific
+/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
+/// this case, we merge the first two "or's of icmp" into a switch, but then the
+/// default value goes to an uncond block with a seteq in it, we get something
+/// like:
+///
+/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
+/// DEFAULT:
+/// %tmp = icmp eq i8 %A, 92
+/// br label %end
+/// end:
+/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
+///
+/// We prefer to split the edge to 'end' so that there is a true/false entry to
+/// the PHI, merging the third icmp into the switch.
+static bool TryToSimplifyUncondBranchWithICmpInIt(
+ ICmpInst *ICI, IRBuilder<> &Builder, const TargetTransformInfo &TTI,
+ const DataLayout *TD) {
+ BasicBlock *BB = ICI->getParent();
+
+ // If the block has any PHIs in it or the icmp has multiple uses, it is too
+ // complex.
+ if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse()) return false;
+
+ Value *V = ICI->getOperand(0);
+ ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1));
+
+ // The pattern we're looking for is where our only predecessor is a switch on
+ // 'V' and this block is the default case for the switch. In this case we can
+ // fold the compared value into the switch to simplify things.
+ BasicBlock *Pred = BB->getSinglePredecessor();
+ if (Pred == 0 || !isa<SwitchInst>(Pred->getTerminator())) return false;
+
+ SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
+ if (SI->getCondition() != V)
+ return false;
+
+ // If BB is reachable on a non-default case, then we simply know the value of
+ // V in this block. Substitute it and constant fold the icmp instruction
+ // away.
+ if (SI->getDefaultDest() != BB) {
+ ConstantInt *VVal = SI->findCaseDest(BB);
+ assert(VVal && "Should have a unique destination value");
+ ICI->setOperand(0, VVal);
+
+ if (Value *V = SimplifyInstruction(ICI, TD)) {
+ ICI->replaceAllUsesWith(V);
+ ICI->eraseFromParent();
+ }
+ // BB is now empty, so it is likely to simplify away.
+ return SimplifyCFG(BB, TTI, TD) | true;
+ }
+
+ // Ok, the block is reachable from the default dest. If the constant we're
+ // comparing exists in one of the other edges, then we can constant fold ICI
+ // and zap it.
+ if (SI->findCaseValue(Cst) != SI->case_default()) {
+ Value *V;
+ if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
+ V = ConstantInt::getFalse(BB->getContext());
+ else
+ V = ConstantInt::getTrue(BB->getContext());
+
+ ICI->replaceAllUsesWith(V);
+ ICI->eraseFromParent();
+ // BB is now empty, so it is likely to simplify away.
+ return SimplifyCFG(BB, TTI, TD) | true;
+ }
+
+ // The use of the icmp has to be in the 'end' block, by the only PHI node in
+ // the block.
+ BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
+ PHINode *PHIUse = dyn_cast<PHINode>(ICI->use_back());
+ if (PHIUse == 0 || PHIUse != &SuccBlock->front() ||
+ isa<PHINode>(++BasicBlock::iterator(PHIUse)))
+ return false;
+
+ // If the icmp is a SETEQ, then the default dest gets false, the new edge gets
+ // true in the PHI.
+ Constant *DefaultCst = ConstantInt::getTrue(BB->getContext());
+ Constant *NewCst = ConstantInt::getFalse(BB->getContext());
+
+ if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
+ std::swap(DefaultCst, NewCst);
+
+ // Replace ICI (which is used by the PHI for the default value) with true or
+ // false depending on if it is EQ or NE.
+ ICI->replaceAllUsesWith(DefaultCst);
+ ICI->eraseFromParent();
+
+ // Okay, the switch goes to this block on a default value. Add an edge from
+ // the switch to the merge point on the compared value.
+ BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "switch.edge",
+ BB->getParent(), BB);
+ SmallVector<uint64_t, 8> Weights;
+ bool HasWeights = HasBranchWeights(SI);
+ if (HasWeights) {
+ GetBranchWeights(SI, Weights);
+ if (Weights.size() == 1 + SI->getNumCases()) {
+ // Split weight for default case to case for "Cst".
+ Weights[0] = (Weights[0]+1) >> 1;
+ Weights.push_back(Weights[0]);
+
+ SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
+ SI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(SI->getContext()).
+ createBranchWeights(MDWeights));
+ }
+ }
+ SI->addCase(Cst, NewBB);
+
+ // NewBB branches to the phi block, add the uncond branch and the phi entry.
+ Builder.SetInsertPoint(NewBB);
+ Builder.SetCurrentDebugLocation(SI->getDebugLoc());
+ Builder.CreateBr(SuccBlock);
+ PHIUse->addIncoming(NewCst, NewBB);
+ return true;
+}
+
+/// SimplifyBranchOnICmpChain - The specified branch is a conditional branch.
+/// Check to see if it is branching on an or/and chain of icmp instructions, and
+/// fold it into a switch instruction if so.
+static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *TD,
+ IRBuilder<> &Builder) {
+ Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
+ if (Cond == 0) return false;
+
+
+ // Change br (X == 0 | X == 1), T, F into a switch instruction.
+ // If this is a bunch of seteq's or'd together, or if it's a bunch of
+ // 'setne's and'ed together, collect them.
+ Value *CompVal = 0;
+ std::vector<ConstantInt*> Values;
+ bool TrueWhenEqual = true;
+ Value *ExtraCase = 0;
+ unsigned UsedICmps = 0;
+
+ if (Cond->getOpcode() == Instruction::Or) {
+ CompVal = GatherConstantCompares(Cond, Values, ExtraCase, TD, true,
+ UsedICmps);
+ } else if (Cond->getOpcode() == Instruction::And) {
+ CompVal = GatherConstantCompares(Cond, Values, ExtraCase, TD, false,
+ UsedICmps);
+ TrueWhenEqual = false;
+ }
+
+ // If we didn't have a multiply compared value, fail.
+ if (CompVal == 0) return false;
+
+ // Avoid turning single icmps into a switch.
+ if (UsedICmps <= 1)
+ return false;
+
+ // There might be duplicate constants in the list, which the switch
+ // instruction can't handle, remove them now.
+ array_pod_sort(Values.begin(), Values.end(), ConstantIntSortPredicate);
+ Values.erase(std::unique(Values.begin(), Values.end()), Values.end());
+
+ // If Extra was used, we require at least two switch values to do the
+ // transformation. A switch with one value is just an cond branch.
+ if (ExtraCase && Values.size() < 2) return false;
+
+ // TODO: Preserve branch weight metadata, similarly to how
+ // FoldValueComparisonIntoPredecessors preserves it.
+
+ // Figure out which block is which destination.
+ BasicBlock *DefaultBB = BI->getSuccessor(1);
+ BasicBlock *EdgeBB = BI->getSuccessor(0);
+ if (!TrueWhenEqual) std::swap(DefaultBB, EdgeBB);
+
+ BasicBlock *BB = BI->getParent();
+
+ DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
+ << " cases into SWITCH. BB is:\n" << *BB);
+
+ // If there are any extra values that couldn't be folded into the switch
+ // then we evaluate them with an explicit branch first. Split the block
+ // right before the condbr to handle it.
+ if (ExtraCase) {
+ BasicBlock *NewBB = BB->splitBasicBlock(BI, "switch.early.test");
+ // Remove the uncond branch added to the old block.
+ TerminatorInst *OldTI = BB->getTerminator();
+ Builder.SetInsertPoint(OldTI);
+
+ if (TrueWhenEqual)
+ Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB);
+ else
+ Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
+
+ OldTI->eraseFromParent();
+
+ // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
+ // for the edge we just added.
+ AddPredecessorToBlock(EdgeBB, BB, NewBB);
+
+ DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
+ << "\nEXTRABB = " << *BB);
+ BB = NewBB;
+ }
+
+ Builder.SetInsertPoint(BI);
+ // Convert pointer to int before we switch.
+ if (CompVal->getType()->isPointerTy()) {
+ assert(TD && "Cannot switch on pointer without DataLayout");
+ CompVal = Builder.CreatePtrToInt(CompVal,
+ TD->getIntPtrType(CompVal->getType()),
+ "magicptr");
+ }
+
+ // Create the new switch instruction now.
+ SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
+
+ // Add all of the 'cases' to the switch instruction.
+ for (unsigned i = 0, e = Values.size(); i != e; ++i)
+ New->addCase(Values[i], EdgeBB);
+
+ // We added edges from PI to the EdgeBB. As such, if there were any
+ // PHI nodes in EdgeBB, they need entries to be added corresponding to
+ // the number of edges added.
+ for (BasicBlock::iterator BBI = EdgeBB->begin();
+ isa<PHINode>(BBI); ++BBI) {
+ PHINode *PN = cast<PHINode>(BBI);
+ Value *InVal = PN->getIncomingValueForBlock(BB);
+ for (unsigned i = 0, e = Values.size()-1; i != e; ++i)
+ PN->addIncoming(InVal, BB);
+ }
+
+ // Erase the old branch instruction.
+ EraseTerminatorInstAndDCECond(BI);
+
+ DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
+ return true;
+}
+
+bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
+ // If this is a trivial landing pad that just continues unwinding the caught
+ // exception then zap the landing pad, turning its invokes into calls.
+ BasicBlock *BB = RI->getParent();
+ LandingPadInst *LPInst = dyn_cast<LandingPadInst>(BB->getFirstNonPHI());
+ if (RI->getValue() != LPInst)
+ // Not a landing pad, or the resume is not unwinding the exception that
+ // caused control to branch here.
+ return false;
+
+ // Check that there are no other instructions except for debug intrinsics.
+ BasicBlock::iterator I = LPInst, E = RI;
+ while (++I != E)
+ if (!isa<DbgInfoIntrinsic>(I))
+ return false;
+
+ // Turn all invokes that unwind here into calls and delete the basic block.
+ bool InvokeRequiresTableEntry = false;
+ bool Changed = false;
+ for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) {
+ InvokeInst *II = cast<InvokeInst>((*PI++)->getTerminator());
+
+ if (II->hasFnAttr(Attribute::UWTable)) {
+ // Don't remove an `invoke' instruction if the ABI requires an entry into
+ // the table.
+ InvokeRequiresTableEntry = true;
+ continue;
+ }
+
+ SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
+
+ // Insert a call instruction before the invoke.
+ CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II);
+ Call->takeName(II);
+ Call->setCallingConv(II->getCallingConv());
+ Call->setAttributes(II->getAttributes());
+ Call->setDebugLoc(II->getDebugLoc());
+
+ // Anything that used the value produced by the invoke instruction now uses
+ // the value produced by the call instruction. Note that we do this even
+ // for void functions and calls with no uses so that the callgraph edge is
+ // updated.
+ II->replaceAllUsesWith(Call);
+ BB->removePredecessor(II->getParent());
+
+ // Insert a branch to the normal destination right before the invoke.
+ BranchInst::Create(II->getNormalDest(), II);
+
+ // Finally, delete the invoke instruction!
+ II->eraseFromParent();
+ Changed = true;
+ }
+
+ if (!InvokeRequiresTableEntry)
+ // The landingpad is now unreachable. Zap it.
+ BB->eraseFromParent();
+
+ return Changed;
+}
+
+bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
+ BasicBlock *BB = RI->getParent();
+ if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false;
+
+ // Find predecessors that end with branches.
+ SmallVector<BasicBlock*, 8> UncondBranchPreds;
+ SmallVector<BranchInst*, 8> CondBranchPreds;
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *P = *PI;
+ TerminatorInst *PTI = P->getTerminator();
+ if (BranchInst *BI = dyn_cast<BranchInst>(PTI)) {
+ if (BI->isUnconditional())
+ UncondBranchPreds.push_back(P);
+ else
+ CondBranchPreds.push_back(BI);
+ }
+ }
+
+ // If we found some, do the transformation!
+ if (!UncondBranchPreds.empty() && DupRet) {
+ while (!UncondBranchPreds.empty()) {
+ BasicBlock *Pred = UncondBranchPreds.pop_back_val();
+ DEBUG(dbgs() << "FOLDING: " << *BB
+ << "INTO UNCOND BRANCH PRED: " << *Pred);
+ (void)FoldReturnIntoUncondBranch(RI, BB, Pred);
+ }
+
+ // If we eliminated all predecessors of the block, delete the block now.
+ if (pred_begin(BB) == pred_end(BB))
+ // We know there are no successors, so just nuke the block.
+ BB->eraseFromParent();
+
+ return true;
+ }
+
+ // Check out all of the conditional branches going to this return
+ // instruction. If any of them just select between returns, change the
+ // branch itself into a select/return pair.
+ while (!CondBranchPreds.empty()) {
+ BranchInst *BI = CondBranchPreds.pop_back_val();
+
+ // Check to see if the non-BB successor is also a return block.
+ if (isa<ReturnInst>(BI->getSuccessor(0)->getTerminator()) &&
+ isa<ReturnInst>(BI->getSuccessor(1)->getTerminator()) &&
+ SimplifyCondBranchToTwoReturns(BI, Builder))
+ return true;
+ }
+ return false;
+}
+
+bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
+ BasicBlock *BB = UI->getParent();
+
+ bool Changed = false;
+
+ // If there are any instructions immediately before the unreachable that can
+ // be removed, do so.
+ while (UI != BB->begin()) {
+ BasicBlock::iterator BBI = UI;
+ --BBI;
+ // Do not delete instructions that can have side effects which might cause
+ // the unreachable to not be reachable; specifically, calls and volatile
+ // operations may have this effect.
+ if (isa<CallInst>(BBI) && !isa<DbgInfoIntrinsic>(BBI)) break;
+
+ if (BBI->mayHaveSideEffects()) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
+ if (SI->isVolatile())
+ break;
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
+ if (LI->isVolatile())
+ break;
+ } else if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(BBI)) {
+ if (RMWI->isVolatile())
+ break;
+ } else if (AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(BBI)) {
+ if (CXI->isVolatile())
+ break;
+ } else if (!isa<FenceInst>(BBI) && !isa<VAArgInst>(BBI) &&
+ !isa<LandingPadInst>(BBI)) {
+ break;
+ }
+ // Note that deleting LandingPad's here is in fact okay, although it
+ // involves a bit of subtle reasoning. If this inst is a LandingPad,
+ // all the predecessors of this block will be the unwind edges of Invokes,
+ // and we can therefore guarantee this block will be erased.
+ }
+
+ // Delete this instruction (any uses are guaranteed to be dead)
+ if (!BBI->use_empty())
+ BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
+ BBI->eraseFromParent();
+ Changed = true;
+ }
+
+ // If the unreachable instruction is the first in the block, take a gander
+ // at all of the predecessors of this instruction, and simplify them.
+ if (&BB->front() != UI) return Changed;
+
+ SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB));
+ for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
+ TerminatorInst *TI = Preds[i]->getTerminator();
+ IRBuilder<> Builder(TI);
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ if (BI->isUnconditional()) {
+ if (BI->getSuccessor(0) == BB) {
+ new UnreachableInst(TI->getContext(), TI);
+ TI->eraseFromParent();
+ Changed = true;
+ }
+ } else {
+ if (BI->getSuccessor(0) == BB) {
+ Builder.CreateBr(BI->getSuccessor(1));
+ EraseTerminatorInstAndDCECond(BI);
+ } else if (BI->getSuccessor(1) == BB) {
+ Builder.CreateBr(BI->getSuccessor(0));
+ EraseTerminatorInstAndDCECond(BI);
+ Changed = true;
+ }
+ }
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+ i != e; ++i)
+ if (i.getCaseSuccessor() == BB) {
+ BB->removePredecessor(SI->getParent());
+ SI->removeCase(i);
+ --i; --e;
+ Changed = true;
+ }
+ // If the default value is unreachable, figure out the most popular
+ // destination and make it the default.
+ if (SI->getDefaultDest() == BB) {
+ std::map<BasicBlock*, std::pair<unsigned, unsigned> > Popularity;
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+ i != e; ++i) {
+ std::pair<unsigned, unsigned> &entry =
+ Popularity[i.getCaseSuccessor()];
+ if (entry.first == 0) {
+ entry.first = 1;
+ entry.second = i.getCaseIndex();
+ } else {
+ entry.first++;
+ }
+ }
+
+ // Find the most popular block.
+ unsigned MaxPop = 0;
+ unsigned MaxIndex = 0;
+ BasicBlock *MaxBlock = 0;
+ for (std::map<BasicBlock*, std::pair<unsigned, unsigned> >::iterator
+ I = Popularity.begin(), E = Popularity.end(); I != E; ++I) {
+ if (I->second.first > MaxPop ||
+ (I->second.first == MaxPop && MaxIndex > I->second.second)) {
+ MaxPop = I->second.first;
+ MaxIndex = I->second.second;
+ MaxBlock = I->first;
+ }
+ }
+ if (MaxBlock) {
+ // Make this the new default, allowing us to delete any explicit
+ // edges to it.
+ SI->setDefaultDest(MaxBlock);
+ Changed = true;
+
+ // If MaxBlock has phinodes in it, remove MaxPop-1 entries from
+ // it.
+ if (isa<PHINode>(MaxBlock->begin()))
+ for (unsigned i = 0; i != MaxPop-1; ++i)
+ MaxBlock->removePredecessor(SI->getParent());
+
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+ i != e; ++i)
+ if (i.getCaseSuccessor() == MaxBlock) {
+ SI->removeCase(i);
+ --i; --e;
+ }
+ }
+ }
+ } else if (InvokeInst *II = dyn_cast<InvokeInst>(TI)) {
+ if (II->getUnwindDest() == BB) {
+ // Convert the invoke to a call instruction. This would be a good
+ // place to note that the call does not throw though.
+ BranchInst *BI = Builder.CreateBr(II->getNormalDest());
+ II->removeFromParent(); // Take out of symbol table
+
+ // Insert the call now...
+ SmallVector<Value*, 8> Args(II->op_begin(), II->op_end()-3);
+ Builder.SetInsertPoint(BI);
+ CallInst *CI = Builder.CreateCall(II->getCalledValue(),
+ Args, II->getName());
+ CI->setCallingConv(II->getCallingConv());
+ CI->setAttributes(II->getAttributes());
+ // If the invoke produced a value, the call does now instead.
+ II->replaceAllUsesWith(CI);
+ delete II;
+ Changed = true;
+ }
+ }
+ }
+
+ // If this block is now dead, remove it.
+ if (pred_begin(BB) == pred_end(BB) &&
+ BB != &BB->getParent()->getEntryBlock()) {
+ // We know there are no successors, so just nuke the block.
+ BB->eraseFromParent();
+ return true;
+ }
+
+ return Changed;
+}
+
+/// TurnSwitchRangeIntoICmp - Turns a switch with that contains only a
+/// integer range comparison into a sub, an icmp and a branch.
+static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
+ assert(SI->getNumCases() > 1 && "Degenerate switch?");
+
+ // Make sure all cases point to the same destination and gather the values.
+ SmallVector<ConstantInt *, 16> Cases;
+ SwitchInst::CaseIt I = SI->case_begin();
+ Cases.push_back(I.getCaseValue());
+ SwitchInst::CaseIt PrevI = I++;
+ for (SwitchInst::CaseIt E = SI->case_end(); I != E; PrevI = I++) {
+ if (PrevI.getCaseSuccessor() != I.getCaseSuccessor())
+ return false;
+ Cases.push_back(I.getCaseValue());
+ }
+ assert(Cases.size() == SI->getNumCases() && "Not all cases gathered");
+
+ // Sort the case values, then check if they form a range we can transform.
+ array_pod_sort(Cases.begin(), Cases.end(), ConstantIntSortPredicate);
+ for (unsigned I = 1, E = Cases.size(); I != E; ++I) {
+ if (Cases[I-1]->getValue() != Cases[I]->getValue()+1)
+ return false;
+ }
+
+ Constant *Offset = ConstantExpr::getNeg(Cases.back());
+ Constant *NumCases = ConstantInt::get(Offset->getType(), SI->getNumCases());
+
+ Value *Sub = SI->getCondition();
+ if (!Offset->isNullValue())
+ Sub = Builder.CreateAdd(Sub, Offset, Sub->getName()+".off");
+ Value *Cmp;
+ // If NumCases overflowed, then all possible values jump to the successor.
+ if (NumCases->isNullValue() && SI->getNumCases() != 0)
+ Cmp = ConstantInt::getTrue(SI->getContext());
+ else
+ Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
+ BranchInst *NewBI = Builder.CreateCondBr(
+ Cmp, SI->case_begin().getCaseSuccessor(), SI->getDefaultDest());
+
+ // Update weight for the newly-created conditional branch.
+ SmallVector<uint64_t, 8> Weights;
+ bool HasWeights = HasBranchWeights(SI);
+ if (HasWeights) {
+ GetBranchWeights(SI, Weights);
+ if (Weights.size() == 1 + SI->getNumCases()) {
+ // Combine all weights for the cases to be the true weight of NewBI.
+ // We assume that the sum of all weights for a Terminator can fit into 32
+ // bits.
+ uint32_t NewTrueWeight = 0;
+ for (unsigned I = 1, E = Weights.size(); I != E; ++I)
+ NewTrueWeight += (uint32_t)Weights[I];
+ NewBI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(SI->getContext()).
+ createBranchWeights(NewTrueWeight,
+ (uint32_t)Weights[0]));
+ }
+ }
+
+ // Prune obsolete incoming values off the successor's PHI nodes.
+ for (BasicBlock::iterator BBI = SI->case_begin().getCaseSuccessor()->begin();
+ isa<PHINode>(BBI); ++BBI) {
+ for (unsigned I = 0, E = SI->getNumCases()-1; I != E; ++I)
+ cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
+ }
+ SI->eraseFromParent();
+
+ return true;
+}
+
+/// EliminateDeadSwitchCases - Compute masked bits for the condition of a switch
+/// and use it to remove dead cases.
+static bool EliminateDeadSwitchCases(SwitchInst *SI) {
+ Value *Cond = SI->getCondition();
+ unsigned Bits = Cond->getType()->getIntegerBitWidth();
+ APInt KnownZero(Bits, 0), KnownOne(Bits, 0);
+ ComputeMaskedBits(Cond, KnownZero, KnownOne);
+
+ // Gather dead cases.
+ SmallVector<ConstantInt*, 8> DeadCases;
+ for (SwitchInst::CaseIt I = SI->case_begin(), E = SI->case_end(); I != E; ++I) {
+ if ((I.getCaseValue()->getValue() & KnownZero) != 0 ||
+ (I.getCaseValue()->getValue() & KnownOne) != KnownOne) {
+ DeadCases.push_back(I.getCaseValue());
+ DEBUG(dbgs() << "SimplifyCFG: switch case '"
+ << I.getCaseValue() << "' is dead.\n");
+ }
+ }
+
+ SmallVector<uint64_t, 8> Weights;
+ bool HasWeight = HasBranchWeights(SI);
+ if (HasWeight) {
+ GetBranchWeights(SI, Weights);
+ HasWeight = (Weights.size() == 1 + SI->getNumCases());
+ }
+
+ // Remove dead cases from the switch.
+ for (unsigned I = 0, E = DeadCases.size(); I != E; ++I) {
+ SwitchInst::CaseIt Case = SI->findCaseValue(DeadCases[I]);
+ assert(Case != SI->case_default() &&
+ "Case was not found. Probably mistake in DeadCases forming.");
+ if (HasWeight) {
+ std::swap(Weights[Case.getCaseIndex()+1], Weights.back());
+ Weights.pop_back();
+ }
+
+ // Prune unused values from PHI nodes.
+ Case.getCaseSuccessor()->removePredecessor(SI->getParent());
+ SI->removeCase(Case);
+ }
+ if (HasWeight) {
+ SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
+ SI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(SI->getParent()->getContext()).
+ createBranchWeights(MDWeights));
+ }
+
+ return !DeadCases.empty();
+}
+
+/// FindPHIForConditionForwarding - If BB would be eligible for simplification
+/// by TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
+/// by an unconditional branch), look at the phi node for BB in the successor
+/// block and see if the incoming value is equal to CaseValue. If so, return
+/// the phi node, and set PhiIndex to BB's index in the phi node.
+static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue,
+ BasicBlock *BB,
+ int *PhiIndex) {
+ if (BB->getFirstNonPHIOrDbg() != BB->getTerminator())
+ return NULL; // BB must be empty to be a candidate for simplification.
+ if (!BB->getSinglePredecessor())
+ return NULL; // BB must be dominated by the switch.
+
+ BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator());
+ if (!Branch || !Branch->isUnconditional())
+ return NULL; // Terminator must be unconditional branch.
+
+ BasicBlock *Succ = Branch->getSuccessor(0);
+
+ BasicBlock::iterator I = Succ->begin();
+ while (PHINode *PHI = dyn_cast<PHINode>(I++)) {
+ int Idx = PHI->getBasicBlockIndex(BB);
+ assert(Idx >= 0 && "PHI has no entry for predecessor?");
+
+ Value *InValue = PHI->getIncomingValue(Idx);
+ if (InValue != CaseValue) continue;
+
+ *PhiIndex = Idx;
+ return PHI;
+ }
+
+ return NULL;
+}
+
+/// ForwardSwitchConditionToPHI - Try to forward the condition of a switch
+/// instruction to a phi node dominated by the switch, if that would mean that
+/// some of the destination blocks of the switch can be folded away.
+/// Returns true if a change is made.
+static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
+ typedef DenseMap<PHINode*, SmallVector<int,4> > ForwardingNodesMap;
+ ForwardingNodesMap ForwardingNodes;
+
+ for (SwitchInst::CaseIt I = SI->case_begin(), E = SI->case_end(); I != E; ++I) {
+ ConstantInt *CaseValue = I.getCaseValue();
+ BasicBlock *CaseDest = I.getCaseSuccessor();
+
+ int PhiIndex;
+ PHINode *PHI = FindPHIForConditionForwarding(CaseValue, CaseDest,
+ &PhiIndex);
+ if (!PHI) continue;
+
+ ForwardingNodes[PHI].push_back(PhiIndex);
+ }
+
+ bool Changed = false;
+
+ for (ForwardingNodesMap::iterator I = ForwardingNodes.begin(),
+ E = ForwardingNodes.end(); I != E; ++I) {
+ PHINode *Phi = I->first;
+ SmallVectorImpl<int> &Indexes = I->second;
+
+ if (Indexes.size() < 2) continue;
+
+ for (size_t I = 0, E = Indexes.size(); I != E; ++I)
+ Phi->setIncomingValue(Indexes[I], SI->getCondition());
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+/// ValidLookupTableConstant - Return true if the backend will be able to handle
+/// initializing an array of constants like C.
+static bool ValidLookupTableConstant(Constant *C) {
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+ return CE->isGEPWithNoNotionalOverIndexing();
+
+ return isa<ConstantFP>(C) ||
+ isa<ConstantInt>(C) ||
+ isa<ConstantPointerNull>(C) ||
+ isa<GlobalValue>(C) ||
+ isa<UndefValue>(C);
+}
+
+/// LookupConstant - If V is a Constant, return it. Otherwise, try to look up
+/// its constant value in ConstantPool, returning 0 if it's not there.
+static Constant *LookupConstant(Value *V,
+ const SmallDenseMap<Value*, Constant*>& ConstantPool) {
+ if (Constant *C = dyn_cast<Constant>(V))
+ return C;
+ return ConstantPool.lookup(V);
+}
+
+/// ConstantFold - Try to fold instruction I into a constant. This works for
+/// simple instructions such as binary operations where both operands are
+/// constant or can be replaced by constants from the ConstantPool. Returns the
+/// resulting constant on success, 0 otherwise.
+static Constant *
+ConstantFold(Instruction *I,
+ const SmallDenseMap<Value *, Constant *> &ConstantPool,
+ const DataLayout *DL) {
+ if (SelectInst *Select = dyn_cast<SelectInst>(I)) {
+ Constant *A = LookupConstant(Select->getCondition(), ConstantPool);
+ if (!A)
+ return 0;
+ if (A->isAllOnesValue())
+ return LookupConstant(Select->getTrueValue(), ConstantPool);
+ if (A->isNullValue())
+ return LookupConstant(Select->getFalseValue(), ConstantPool);
+ return 0;
+ }
+
+ SmallVector<Constant *, 4> COps;
+ for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
+ if (Constant *A = LookupConstant(I->getOperand(N), ConstantPool))
+ COps.push_back(A);
+ else
+ return 0;
+ }
+
+ if (CmpInst *Cmp = dyn_cast<CmpInst>(I))
+ return ConstantFoldCompareInstOperands(Cmp->getPredicate(), COps[0],
+ COps[1], DL);
+
+ return ConstantFoldInstOperands(I->getOpcode(), I->getType(), COps, DL);
+}
+
+/// GetCaseResults - Try to determine the resulting constant values in phi nodes
+/// at the common destination basic block, *CommonDest, for one of the case
+/// destionations CaseDest corresponding to value CaseVal (0 for the default
+/// case), of a switch instruction SI.
+static bool
+GetCaseResults(SwitchInst *SI,
+ ConstantInt *CaseVal,
+ BasicBlock *CaseDest,
+ BasicBlock **CommonDest,
+ SmallVectorImpl<std::pair<PHINode *, Constant *> > &Res,
+ const DataLayout *DL) {
+ // The block from which we enter the common destination.
+ BasicBlock *Pred = SI->getParent();
+
+ // If CaseDest is empty except for some side-effect free instructions through
+ // which we can constant-propagate the CaseVal, continue to its successor.
+ SmallDenseMap<Value*, Constant*> ConstantPool;
+ ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
+ for (BasicBlock::iterator I = CaseDest->begin(), E = CaseDest->end(); I != E;
+ ++I) {
+ if (TerminatorInst *T = dyn_cast<TerminatorInst>(I)) {
+ // If the terminator is a simple branch, continue to the next block.
+ if (T->getNumSuccessors() != 1)
+ return false;
+ Pred = CaseDest;
+ CaseDest = T->getSuccessor(0);
+ } else if (isa<DbgInfoIntrinsic>(I)) {
+ // Skip debug intrinsic.
+ continue;
+ } else if (Constant *C = ConstantFold(I, ConstantPool, DL)) {
+ // Instruction is side-effect free and constant.
+ ConstantPool.insert(std::make_pair(I, C));
+ } else {
+ break;
+ }
+ }
+
+ // If we did not have a CommonDest before, use the current one.
+ if (!*CommonDest)
+ *CommonDest = CaseDest;
+ // If the destination isn't the common one, abort.
+ if (CaseDest != *CommonDest)
+ return false;
+
+ // Get the values for this case from phi nodes in the destination block.
+ BasicBlock::iterator I = (*CommonDest)->begin();
+ while (PHINode *PHI = dyn_cast<PHINode>(I++)) {
+ int Idx = PHI->getBasicBlockIndex(Pred);
+ if (Idx == -1)
+ continue;
+
+ Constant *ConstVal = LookupConstant(PHI->getIncomingValue(Idx),
+ ConstantPool);
+ if (!ConstVal)
+ return false;
+
+ // Note: If the constant comes from constant-propagating the case value
+ // through the CaseDest basic block, it will be safe to remove the
+ // instructions in that block. They cannot be used (except in the phi nodes
+ // we visit) outside CaseDest, because that block does not dominate its
+ // successor. If it did, we would not be in this phi node.
+
+ // Be conservative about which kinds of constants we support.
+ if (!ValidLookupTableConstant(ConstVal))
+ return false;
+
+ Res.push_back(std::make_pair(PHI, ConstVal));
+ }
+
+ return true;
+}
+
+namespace {
+ /// SwitchLookupTable - This class represents a lookup table that can be used
+ /// to replace a switch.
+ class SwitchLookupTable {
+ public:
+ /// SwitchLookupTable - Create a lookup table to use as a switch replacement
+ /// with the contents of Values, using DefaultValue to fill any holes in the
+ /// table.
+ SwitchLookupTable(Module &M,
+ uint64_t TableSize,
+ ConstantInt *Offset,
+ const SmallVectorImpl<std::pair<ConstantInt*, Constant*> >& Values,
+ Constant *DefaultValue,
+ const DataLayout *TD);
+
+ /// BuildLookup - Build instructions with Builder to retrieve the value at
+ /// the position given by Index in the lookup table.
+ Value *BuildLookup(Value *Index, IRBuilder<> &Builder);
+
+ /// WouldFitInRegister - Return true if a table with TableSize elements of
+ /// type ElementType would fit in a target-legal register.
+ static bool WouldFitInRegister(const DataLayout *TD,
+ uint64_t TableSize,
+ const Type *ElementType);
+
+ private:
+ // Depending on the contents of the table, it can be represented in
+ // different ways.
+ enum {
+ // For tables where each element contains the same value, we just have to
+ // store that single value and return it for each lookup.
+ SingleValueKind,
+
+ // For small tables with integer elements, we can pack them into a bitmap
+ // that fits into a target-legal register. Values are retrieved by
+ // shift and mask operations.
+ BitMapKind,
+
+ // The table is stored as an array of values. Values are retrieved by load
+ // instructions from the table.
+ ArrayKind
+ } Kind;
+
+ // For SingleValueKind, this is the single value.
+ Constant *SingleValue;
+
+ // For BitMapKind, this is the bitmap.
+ ConstantInt *BitMap;
+ IntegerType *BitMapElementTy;
+
+ // For ArrayKind, this is the array.
+ GlobalVariable *Array;
+ };
+}
+
+SwitchLookupTable::SwitchLookupTable(Module &M,
+ uint64_t TableSize,
+ ConstantInt *Offset,
+ const SmallVectorImpl<std::pair<ConstantInt*, Constant*> >& Values,
+ Constant *DefaultValue,
+ const DataLayout *TD)
+ : SingleValue(0), BitMap(0), BitMapElementTy(0), Array(0) {
+ assert(Values.size() && "Can't build lookup table without values!");
+ assert(TableSize >= Values.size() && "Can't fit values in table!");
+
+ // If all values in the table are equal, this is that value.
+ SingleValue = Values.begin()->second;
+
+ // Build up the table contents.
+ SmallVector<Constant*, 64> TableContents(TableSize);
+ for (size_t I = 0, E = Values.size(); I != E; ++I) {
+ ConstantInt *CaseVal = Values[I].first;
+ Constant *CaseRes = Values[I].second;
+ assert(CaseRes->getType() == DefaultValue->getType());
+
+ uint64_t Idx = (CaseVal->getValue() - Offset->getValue())
+ .getLimitedValue();
+ TableContents[Idx] = CaseRes;
+
+ if (CaseRes != SingleValue)
+ SingleValue = 0;
+ }
+
+ // Fill in any holes in the table with the default result.
+ if (Values.size() < TableSize) {
+ for (uint64_t I = 0; I < TableSize; ++I) {
+ if (!TableContents[I])
+ TableContents[I] = DefaultValue;
+ }
+
+ if (DefaultValue != SingleValue)
+ SingleValue = 0;
+ }
+
+ // If each element in the table contains the same value, we only need to store
+ // that single value.
+ if (SingleValue) {
+ Kind = SingleValueKind;
+ return;
+ }
+
+ // If the type is integer and the table fits in a register, build a bitmap.
+ if (WouldFitInRegister(TD, TableSize, DefaultValue->getType())) {
+ IntegerType *IT = cast<IntegerType>(DefaultValue->getType());
+ APInt TableInt(TableSize * IT->getBitWidth(), 0);
+ for (uint64_t I = TableSize; I > 0; --I) {
+ TableInt <<= IT->getBitWidth();
+ // Insert values into the bitmap. Undef values are set to zero.
+ if (!isa<UndefValue>(TableContents[I - 1])) {
+ ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
+ TableInt |= Val->getValue().zext(TableInt.getBitWidth());
+ }
+ }
+ BitMap = ConstantInt::get(M.getContext(), TableInt);
+ BitMapElementTy = IT;
+ Kind = BitMapKind;
+ ++NumBitMaps;
+ return;
+ }
+
+ // Store the table in an array.
+ ArrayType *ArrayTy = ArrayType::get(DefaultValue->getType(), TableSize);
+ Constant *Initializer = ConstantArray::get(ArrayTy, TableContents);
+
+ Array = new GlobalVariable(M, ArrayTy, /*constant=*/ true,
+ GlobalVariable::PrivateLinkage,
+ Initializer,
+ "switch.table");
+ Array->setUnnamedAddr(true);
+ Kind = ArrayKind;
+}
+
+Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
+ switch (Kind) {
+ case SingleValueKind:
+ return SingleValue;
+ case BitMapKind: {
+ // Type of the bitmap (e.g. i59).
+ IntegerType *MapTy = BitMap->getType();
+
+ // Cast Index to the same type as the bitmap.
+ // Note: The Index is <= the number of elements in the table, so
+ // truncating it to the width of the bitmask is safe.
+ Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
+
+ // Multiply the shift amount by the element width.
+ ShiftAmt = Builder.CreateMul(ShiftAmt,
+ ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
+ "switch.shiftamt");
+
+ // Shift down.
+ Value *DownShifted = Builder.CreateLShr(BitMap, ShiftAmt,
+ "switch.downshift");
+ // Mask off.
+ return Builder.CreateTrunc(DownShifted, BitMapElementTy,
+ "switch.masked");
+ }
+ case ArrayKind: {
+ Value *GEPIndices[] = { Builder.getInt32(0), Index };
+ Value *GEP = Builder.CreateInBoundsGEP(Array, GEPIndices,
+ "switch.gep");
+ return Builder.CreateLoad(GEP, "switch.load");
+ }
+ }
+ llvm_unreachable("Unknown lookup table kind!");
+}
+
+bool SwitchLookupTable::WouldFitInRegister(const DataLayout *TD,
+ uint64_t TableSize,
+ const Type *ElementType) {
+ if (!TD)
+ return false;
+ const IntegerType *IT = dyn_cast<IntegerType>(ElementType);
+ if (!IT)
+ return false;
+ // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
+ // are <= 15, we could try to narrow the type.
+
+ // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
+ if (TableSize >= UINT_MAX/IT->getBitWidth())
+ return false;
+ return TD->fitsInLegalInteger(TableSize * IT->getBitWidth());
+}
+
+/// ShouldBuildLookupTable - Determine whether a lookup table should be built
+/// for this switch, based on the number of cases, size of the table and the
+/// types of the results.
+static bool ShouldBuildLookupTable(SwitchInst *SI,
+ uint64_t TableSize,
+ const TargetTransformInfo &TTI,
+ const DataLayout *TD,
+ const SmallDenseMap<PHINode*, Type*>& ResultTypes) {
+ if (SI->getNumCases() > TableSize || TableSize >= UINT64_MAX / 10)
+ return false; // TableSize overflowed, or mul below might overflow.
+
+ bool AllTablesFitInRegister = true;
+ bool HasIllegalType = false;
+ for (SmallDenseMap<PHINode*, Type*>::const_iterator I = ResultTypes.begin(),
+ E = ResultTypes.end(); I != E; ++I) {
+ Type *Ty = I->second;
+
+ // Saturate this flag to true.
+ HasIllegalType = HasIllegalType || !TTI.isTypeLegal(Ty);
+
+ // Saturate this flag to false.
+ AllTablesFitInRegister = AllTablesFitInRegister &&
+ SwitchLookupTable::WouldFitInRegister(TD, TableSize, Ty);
+
+ // If both flags saturate, we're done. NOTE: This *only* works with
+ // saturating flags, and all flags have to saturate first due to the
+ // non-deterministic behavior of iterating over a dense map.
+ if (HasIllegalType && !AllTablesFitInRegister)
+ break;
+ }
+
+ // If each table would fit in a register, we should build it anyway.
+ if (AllTablesFitInRegister)
+ return true;
+
+ // Don't build a table that doesn't fit in-register if it has illegal types.
+ if (HasIllegalType)
+ return false;
+
+ // The table density should be at least 40%. This is the same criterion as for
+ // jump tables, see SelectionDAGBuilder::handleJTSwitchCase.
+ // FIXME: Find the best cut-off.
+ return SI->getNumCases() * 10 >= TableSize * 4;
+}
+
+/// SwitchToLookupTable - If the switch is only used to initialize one or more
+/// phi nodes in a common successor block with different constant values,
+/// replace the switch with lookup tables.
+static bool SwitchToLookupTable(SwitchInst *SI,
+ IRBuilder<> &Builder,
+ const TargetTransformInfo &TTI,
+ const DataLayout* TD) {
+ assert(SI->getNumCases() > 1 && "Degenerate switch?");
+
+ // Only build lookup table when we have a target that supports it.
+ if (!TTI.shouldBuildLookupTables())
+ return false;
+
+ // FIXME: If the switch is too sparse for a lookup table, perhaps we could
+ // split off a dense part and build a lookup table for that.
+
+ // FIXME: This creates arrays of GEPs to constant strings, which means each
+ // GEP needs a runtime relocation in PIC code. We should just build one big
+ // string and lookup indices into that.
+
+ // Ignore the switch if the number of cases is too small.
+ // This is similar to the check when building jump tables in
+ // SelectionDAGBuilder::handleJTSwitchCase.
+ // FIXME: Determine the best cut-off.
+ if (SI->getNumCases() < 4)
+ return false;
+
+ // Figure out the corresponding result for each case value and phi node in the
+ // common destination, as well as the the min and max case values.
+ assert(SI->case_begin() != SI->case_end());
+ SwitchInst::CaseIt CI = SI->case_begin();
+ ConstantInt *MinCaseVal = CI.getCaseValue();
+ ConstantInt *MaxCaseVal = CI.getCaseValue();
+
+ BasicBlock *CommonDest = 0;
+ typedef SmallVector<std::pair<ConstantInt*, Constant*>, 4> ResultListTy;
+ SmallDenseMap<PHINode*, ResultListTy> ResultLists;
+ SmallDenseMap<PHINode*, Constant*> DefaultResults;
+ SmallDenseMap<PHINode*, Type*> ResultTypes;
+ SmallVector<PHINode*, 4> PHIs;
+
+ for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
+ ConstantInt *CaseVal = CI.getCaseValue();
+ if (CaseVal->getValue().slt(MinCaseVal->getValue()))
+ MinCaseVal = CaseVal;
+ if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
+ MaxCaseVal = CaseVal;
+
+ // Resulting value at phi nodes for this case value.
+ typedef SmallVector<std::pair<PHINode*, Constant*>, 4> ResultsTy;
+ ResultsTy Results;
+ if (!GetCaseResults(SI, CaseVal, CI.getCaseSuccessor(), &CommonDest,
+ Results, TD))
+ return false;
+
+ // Append the result from this case to the list for each phi.
+ for (ResultsTy::iterator I = Results.begin(), E = Results.end(); I!=E; ++I) {
+ if (!ResultLists.count(I->first))
+ PHIs.push_back(I->first);
+ ResultLists[I->first].push_back(std::make_pair(CaseVal, I->second));
+ }
+ }
+
+ // Get the resulting values for the default case.
+ SmallVector<std::pair<PHINode*, Constant*>, 4> DefaultResultsList;
+ if (!GetCaseResults(SI, 0, SI->getDefaultDest(), &CommonDest,
+ DefaultResultsList, TD))
+ return false;
+ for (size_t I = 0, E = DefaultResultsList.size(); I != E; ++I) {
+ PHINode *PHI = DefaultResultsList[I].first;
+ Constant *Result = DefaultResultsList[I].second;
+ DefaultResults[PHI] = Result;
+ ResultTypes[PHI] = Result->getType();
+ }
+
+ APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue();
+ uint64_t TableSize = RangeSpread.getLimitedValue() + 1;
+ if (!ShouldBuildLookupTable(SI, TableSize, TTI, TD, ResultTypes))
+ return false;
+
+ // Create the BB that does the lookups.
+ Module &Mod = *CommonDest->getParent()->getParent();
+ BasicBlock *LookupBB = BasicBlock::Create(Mod.getContext(),
+ "switch.lookup",
+ CommonDest->getParent(),
+ CommonDest);
+
+ // Compute the table index value.
+ Builder.SetInsertPoint(SI);
+ Value *TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal,
+ "switch.tableidx");
+
+ // Compute the maximum table size representable by the integer type we are
+ // switching upon.
+ unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
+ uint64_t MaxTableSize = CaseSize > 63? UINT64_MAX : 1ULL << CaseSize;
+ assert(MaxTableSize >= TableSize &&
+ "It is impossible for a switch to have more entries than the max "
+ "representable value of its input integer type's size.");
+
+ // If we have a fully covered lookup table, unconditionally branch to the
+ // lookup table BB. Otherwise, check if the condition value is within the case
+ // range. If it is so, branch to the new BB. Otherwise branch to SI's default
+ // destination.
+ const bool GeneratingCoveredLookupTable = MaxTableSize == TableSize;
+ if (GeneratingCoveredLookupTable) {
+ Builder.CreateBr(LookupBB);
+ SI->getDefaultDest()->removePredecessor(SI->getParent());
+ } else {
+ Value *Cmp = Builder.CreateICmpULT(TableIndex, ConstantInt::get(
+ MinCaseVal->getType(), TableSize));
+ Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
+ }
+
+ // Populate the BB that does the lookups.
+ Builder.SetInsertPoint(LookupBB);
+ bool ReturnedEarly = false;
+ for (size_t I = 0, E = PHIs.size(); I != E; ++I) {
+ PHINode *PHI = PHIs[I];
+
+ SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultLists[PHI],
+ DefaultResults[PHI], TD);
+
+ Value *Result = Table.BuildLookup(TableIndex, Builder);
+
+ // If the result is used to return immediately from the function, we want to
+ // do that right here.
+ if (PHI->hasOneUse() && isa<ReturnInst>(*PHI->use_begin()) &&
+ *PHI->use_begin() == CommonDest->getFirstNonPHIOrDbg()) {
+ Builder.CreateRet(Result);
+ ReturnedEarly = true;
+ break;
+ }
+
+ PHI->addIncoming(Result, LookupBB);
+ }
+
+ if (!ReturnedEarly)
+ Builder.CreateBr(CommonDest);
+
+ // Remove the switch.
+ for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
+ BasicBlock *Succ = SI->getSuccessor(i);
+
+ if (Succ == SI->getDefaultDest())
+ continue;
+ Succ->removePredecessor(SI->getParent());
+ }
+ SI->eraseFromParent();
+
+ ++NumLookupTables;
+ return true;
+}
+
+bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
+ BasicBlock *BB = SI->getParent();
+
+ if (isValueEqualityComparison(SI)) {
+ // If we only have one predecessor, and if it is a branch on this value,
+ // see if that predecessor totally determines the outcome of this switch.
+ if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
+ if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
+ return SimplifyCFG(BB, TTI, TD) | true;
+
+ Value *Cond = SI->getCondition();
+ if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
+ if (SimplifySwitchOnSelect(SI, Select))
+ return SimplifyCFG(BB, TTI, TD) | true;
+
+ // If the block only contains the switch, see if we can fold the block
+ // away into any preds.
+ BasicBlock::iterator BBI = BB->begin();
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(BBI))
+ ++BBI;
+ if (SI == &*BBI)
+ if (FoldValueComparisonIntoPredecessors(SI, Builder))
+ return SimplifyCFG(BB, TTI, TD) | true;
+ }
+
+ // Try to transform the switch into an icmp and a branch.
+ if (TurnSwitchRangeIntoICmp(SI, Builder))
+ return SimplifyCFG(BB, TTI, TD) | true;
+
+ // Remove unreachable cases.
+ if (EliminateDeadSwitchCases(SI))
+ return SimplifyCFG(BB, TTI, TD) | true;
+
+ if (ForwardSwitchConditionToPHI(SI))
+ return SimplifyCFG(BB, TTI, TD) | true;
+
+ if (SwitchToLookupTable(SI, Builder, TTI, TD))
+ return SimplifyCFG(BB, TTI, TD) | true;
+
+ return false;
+}
+
+bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
+ BasicBlock *BB = IBI->getParent();
+ bool Changed = false;
+
+ // Eliminate redundant destinations.
+ SmallPtrSet<Value *, 8> Succs;
+ for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
+ BasicBlock *Dest = IBI->getDestination(i);
+ if (!Dest->hasAddressTaken() || !Succs.insert(Dest)) {
+ Dest->removePredecessor(BB);
+ IBI->removeDestination(i);
+ --i; --e;
+ Changed = true;
+ }
+ }
+
+ if (IBI->getNumDestinations() == 0) {
+ // If the indirectbr has no successors, change it to unreachable.
+ new UnreachableInst(IBI->getContext(), IBI);
+ EraseTerminatorInstAndDCECond(IBI);
+ return true;
+ }
+
+ if (IBI->getNumDestinations() == 1) {
+ // If the indirectbr has one successor, change it to a direct branch.
+ BranchInst::Create(IBI->getDestination(0), IBI);
+ EraseTerminatorInstAndDCECond(IBI);
+ return true;
+ }
+
+ if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
+ if (SimplifyIndirectBrOnSelect(IBI, SI))
+ return SimplifyCFG(BB, TTI, TD) | true;
+ }
+ return Changed;
+}
+
+bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
+ BasicBlock *BB = BI->getParent();
+
+ if (SinkCommon && SinkThenElseCodeToEnd(BI))
+ return true;
+
+ // If the Terminator is the only non-phi instruction, simplify the block.
+ BasicBlock::iterator I = BB->getFirstNonPHIOrDbgOrLifetime();
+ if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
+ TryToSimplifyUncondBranchFromEmptyBlock(BB))
+ return true;
+
+ // If the only instruction in the block is a seteq/setne comparison
+ // against a constant, try to simplify the block.
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
+ if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
+ for (++I; isa<DbgInfoIntrinsic>(I); ++I)
+ ;
+ if (I->isTerminator() &&
+ TryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, TTI, TD))
+ return true;
+ }
+
+ // If this basic block is ONLY a compare and a branch, and if a predecessor
+ // branches to us and our successor, fold the comparison into the
+ // predecessor and use logical operations to update the incoming value
+ // for PHI nodes in common successor.
+ if (FoldBranchToCommonDest(BI))
+ return SimplifyCFG(BB, TTI, TD) | true;
+ return false;
+}
+
+
+bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
+ BasicBlock *BB = BI->getParent();
+
+ // Conditional branch
+ if (isValueEqualityComparison(BI)) {
+ // If we only have one predecessor, and if it is a branch on this value,
+ // see if that predecessor totally determines the outcome of this
+ // switch.
+ if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
+ if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
+ return SimplifyCFG(BB, TTI, TD) | true;
+
+ // This block must be empty, except for the setcond inst, if it exists.
+ // Ignore dbg intrinsics.
+ BasicBlock::iterator I = BB->begin();
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(I))
+ ++I;
+ if (&*I == BI) {
+ if (FoldValueComparisonIntoPredecessors(BI, Builder))
+ return SimplifyCFG(BB, TTI, TD) | true;
+ } else if (&*I == cast<Instruction>(BI->getCondition())){
+ ++I;
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(I))
+ ++I;
+ if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder))
+ return SimplifyCFG(BB, TTI, TD) | true;
+ }
+ }
+
+ // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
+ if (SimplifyBranchOnICmpChain(BI, TD, Builder))
+ return true;
+
+ // If this basic block is ONLY a compare and a branch, and if a predecessor
+ // branches to us and one of our successors, fold the comparison into the
+ // predecessor and use logical operations to pick the right destination.
+ if (FoldBranchToCommonDest(BI))
+ return SimplifyCFG(BB, TTI, TD) | true;
+
+ // We have a conditional branch to two blocks that are only reachable
+ // from BI. We know that the condbr dominates the two blocks, so see if
+ // there is any identical code in the "then" and "else" blocks. If so, we
+ // can hoist it up to the branching block.
+ if (BI->getSuccessor(0)->getSinglePredecessor() != 0) {
+ if (BI->getSuccessor(1)->getSinglePredecessor() != 0) {
+ if (HoistThenElseCodeToIf(BI))
+ return SimplifyCFG(BB, TTI, TD) | true;
+ } else {
+ // If Successor #1 has multiple preds, we may be able to conditionally
+ // execute Successor #0 if it branches to successor #1.
+ TerminatorInst *Succ0TI = BI->getSuccessor(0)->getTerminator();
+ if (Succ0TI->getNumSuccessors() == 1 &&
+ Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
+ if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0)))
+ return SimplifyCFG(BB, TTI, TD) | true;
+ }
+ } else if (BI->getSuccessor(1)->getSinglePredecessor() != 0) {
+ // If Successor #0 has multiple preds, we may be able to conditionally
+ // execute Successor #1 if it branches to successor #0.
+ TerminatorInst *Succ1TI = BI->getSuccessor(1)->getTerminator();
+ if (Succ1TI->getNumSuccessors() == 1 &&
+ Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
+ if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1)))
+ return SimplifyCFG(BB, TTI, TD) | true;
+ }
+
+ // If this is a branch on a phi node in the current block, thread control
+ // through this block if any PHI node entries are constants.
+ if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition()))
+ if (PN->getParent() == BI->getParent())
+ if (FoldCondBranchOnPHI(BI, TD))
+ return SimplifyCFG(BB, TTI, TD) | true;
+
+ // Scan predecessor blocks for conditional branches.
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
+ if (PBI != BI && PBI->isConditional())
+ if (SimplifyCondBranchToCondBranch(PBI, BI))
+ return SimplifyCFG(BB, TTI, TD) | true;
+
+ return false;
+}
+
+/// Check if passing a value to an instruction will cause undefined behavior.
+static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) {
+ Constant *C = dyn_cast<Constant>(V);
+ if (!C)
+ return false;
+
+ if (I->use_empty())
+ return false;
+
+ if (C->isNullValue()) {
+ // Only look at the first use, avoid hurting compile time with long uselists
+ User *Use = *I->use_begin();
+
+ // Now make sure that there are no instructions in between that can alter
+ // control flow (eg. calls)
+ for (BasicBlock::iterator i = ++BasicBlock::iterator(I); &*i != Use; ++i)
+ if (i == I->getParent()->end() || i->mayHaveSideEffects())
+ return false;
+
+ // Look through GEPs. A load from a GEP derived from NULL is still undefined
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Use))
+ if (GEP->getPointerOperand() == I)
+ return passingValueIsAlwaysUndefined(V, GEP);
+
+ // Look through bitcasts.
+ if (BitCastInst *BC = dyn_cast<BitCastInst>(Use))
+ return passingValueIsAlwaysUndefined(V, BC);
+
+ // Load from null is undefined.
+ if (LoadInst *LI = dyn_cast<LoadInst>(Use))
+ if (!LI->isVolatile())
+ return LI->getPointerAddressSpace() == 0;
+
+ // Store to null is undefined.
+ if (StoreInst *SI = dyn_cast<StoreInst>(Use))
+ if (!SI->isVolatile())
+ return SI->getPointerAddressSpace() == 0 && SI->getPointerOperand() == I;
+ }
+ return false;
+}
+
+/// If BB has an incoming value that will always trigger undefined behavior
+/// (eg. null pointer dereference), remove the branch leading here.
+static bool removeUndefIntroducingPredecessor(BasicBlock *BB) {
+ for (BasicBlock::iterator i = BB->begin();
+ PHINode *PHI = dyn_cast<PHINode>(i); ++i)
+ for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
+ if (passingValueIsAlwaysUndefined(PHI->getIncomingValue(i), PHI)) {
+ TerminatorInst *T = PHI->getIncomingBlock(i)->getTerminator();
+ IRBuilder<> Builder(T);
+ if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
+ BB->removePredecessor(PHI->getIncomingBlock(i));
+ // Turn uncoditional branches into unreachables and remove the dead
+ // destination from conditional branches.
+ if (BI->isUnconditional())
+ Builder.CreateUnreachable();
+ else
+ Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1) :
+ BI->getSuccessor(0));
+ BI->eraseFromParent();
+ return true;
+ }
+ // TODO: SwitchInst.
+ }
+
+ return false;
+}
+
+bool SimplifyCFGOpt::run(BasicBlock *BB) {
+ bool Changed = false;
+
+ assert(BB && BB->getParent() && "Block not embedded in function!");
+ assert(BB->getTerminator() && "Degenerate basic block encountered!");
+
+ // Remove basic blocks that have no predecessors (except the entry block)...
+ // or that just have themself as a predecessor. These are unreachable.
+ if ((pred_begin(BB) == pred_end(BB) &&
+ BB != &BB->getParent()->getEntryBlock()) ||
+ BB->getSinglePredecessor() == BB) {
+ DEBUG(dbgs() << "Removing BB: \n" << *BB);
+ DeleteDeadBlock(BB);
+ return true;
+ }
+
+ // Check to see if we can constant propagate this terminator instruction
+ // away...
+ Changed |= ConstantFoldTerminator(BB, true);
+
+ // Check for and eliminate duplicate PHI nodes in this block.
+ Changed |= EliminateDuplicatePHINodes(BB);
+
+ // Check for and remove branches that will always cause undefined behavior.
+ Changed |= removeUndefIntroducingPredecessor(BB);
+
+ // Merge basic blocks into their predecessor if there is only one distinct
+ // pred, and if there is only one distinct successor of the predecessor, and
+ // if there are no PHI nodes.
+ //
+ if (MergeBlockIntoPredecessor(BB))
+ return true;
+
+ IRBuilder<> Builder(BB);
+
+ // If there is a trivial two-entry PHI node in this basic block, and we can
+ // eliminate it, do so now.
+ if (PHINode *PN = dyn_cast<PHINode>(BB->begin()))
+ if (PN->getNumIncomingValues() == 2)
+ Changed |= FoldTwoEntryPHINode(PN, TD);
+
+ Builder.SetInsertPoint(BB->getTerminator());
+ if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
+ if (BI->isUnconditional()) {
+ if (SimplifyUncondBranch(BI, Builder)) return true;
+ } else {
+ if (SimplifyCondBranch(BI, Builder)) return true;
+ }
+ } else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ if (SimplifyReturn(RI, Builder)) return true;
+ } else if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) {
+ if (SimplifyResume(RI, Builder)) return true;
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
+ if (SimplifySwitch(SI, Builder)) return true;
+ } else if (UnreachableInst *UI =
+ dyn_cast<UnreachableInst>(BB->getTerminator())) {
+ if (SimplifyUnreachable(UI)) return true;
+ } else if (IndirectBrInst *IBI =
+ dyn_cast<IndirectBrInst>(BB->getTerminator())) {
+ if (SimplifyIndirectBr(IBI)) return true;
+ }
+
+ return Changed;
+}
+
+/// SimplifyCFG - This function is used to do simplification of a CFG. For
+/// example, it adjusts branches to branches to eliminate the extra hop, it
+/// eliminates unreachable basic blocks, and does other "peephole" optimization
+/// of the CFG. It returns true if a modification was made.
+///
+bool llvm::SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
+ const DataLayout *TD) {
+ return SimplifyCFGOpt(TTI, TD).run(BB);
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
new file mode 100644
index 000000000000..bf3442aeaaad
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -0,0 +1,393 @@
+//===-- SimplifyIndVar.cpp - Induction variable simplification ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements induction variable simplification. It does
+// not define any actual pass or policy, but provides a single function to
+// simplify a loop's induction variables based on ScalarEvolution.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "indvars"
+
+#include "llvm/Transforms/Utils/SimplifyIndVar.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/IVUsers.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+STATISTIC(NumElimIdentity, "Number of IV identities eliminated");
+STATISTIC(NumElimOperand, "Number of IV operands folded into a use");
+STATISTIC(NumElimRem , "Number of IV remainder operations eliminated");
+STATISTIC(NumElimCmp , "Number of IV comparisons eliminated");
+
+namespace {
+ /// SimplifyIndvar - This is a utility for simplifying induction variables
+ /// based on ScalarEvolution. It is the primary instrument of the
+ /// IndvarSimplify pass, but it may also be directly invoked to cleanup after
+ /// other loop passes that preserve SCEV.
+ class SimplifyIndvar {
+ Loop *L;
+ LoopInfo *LI;
+ ScalarEvolution *SE;
+ const DataLayout *TD; // May be NULL
+
+ SmallVectorImpl<WeakVH> &DeadInsts;
+
+ bool Changed;
+
+ public:
+ SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, LPPassManager *LPM,
+ SmallVectorImpl<WeakVH> &Dead, IVUsers *IVU = NULL) :
+ L(Loop),
+ LI(LPM->getAnalysisIfAvailable<LoopInfo>()),
+ SE(SE),
+ TD(LPM->getAnalysisIfAvailable<DataLayout>()),
+ DeadInsts(Dead),
+ Changed(false) {
+ assert(LI && "IV simplification requires LoopInfo");
+ }
+
+ bool hasChanged() const { return Changed; }
+
+ /// Iteratively perform simplification on a worklist of users of the
+ /// specified induction variable. This is the top-level driver that applies
+ /// all simplicitions to users of an IV.
+ void simplifyUsers(PHINode *CurrIV, IVVisitor *V = NULL);
+
+ Value *foldIVUser(Instruction *UseInst, Instruction *IVOperand);
+
+ bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand);
+ void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand);
+ void eliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand,
+ bool IsSigned);
+ };
+}
+
+/// foldIVUser - Fold an IV operand into its use. This removes increments of an
+/// aligned IV when used by a instruction that ignores the low bits.
+///
+/// IVOperand is guaranteed SCEVable, but UseInst may not be.
+///
+/// Return the operand of IVOperand for this induction variable if IVOperand can
+/// be folded (in case more folding opportunities have been exposed).
+/// Otherwise return null.
+Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) {
+ Value *IVSrc = 0;
+ unsigned OperIdx = 0;
+ const SCEV *FoldedExpr = 0;
+ switch (UseInst->getOpcode()) {
+ default:
+ return 0;
+ case Instruction::UDiv:
+ case Instruction::LShr:
+ // We're only interested in the case where we know something about
+ // the numerator and have a constant denominator.
+ if (IVOperand != UseInst->getOperand(OperIdx) ||
+ !isa<ConstantInt>(UseInst->getOperand(1)))
+ return 0;
+
+ // Attempt to fold a binary operator with constant operand.
+ // e.g. ((I + 1) >> 2) => I >> 2
+ if (!isa<BinaryOperator>(IVOperand)
+ || !isa<ConstantInt>(IVOperand->getOperand(1)))
+ return 0;
+
+ IVSrc = IVOperand->getOperand(0);
+ // IVSrc must be the (SCEVable) IV, since the other operand is const.
+ assert(SE->isSCEVable(IVSrc->getType()) && "Expect SCEVable IV operand");
+
+ ConstantInt *D = cast<ConstantInt>(UseInst->getOperand(1));
+ if (UseInst->getOpcode() == Instruction::LShr) {
+ // Get a constant for the divisor. See createSCEV.
+ uint32_t BitWidth = cast<IntegerType>(UseInst->getType())->getBitWidth();
+ if (D->getValue().uge(BitWidth))
+ return 0;
+
+ D = ConstantInt::get(UseInst->getContext(),
+ APInt::getOneBitSet(BitWidth, D->getZExtValue()));
+ }
+ FoldedExpr = SE->getUDivExpr(SE->getSCEV(IVSrc), SE->getSCEV(D));
+ }
+ // We have something that might fold it's operand. Compare SCEVs.
+ if (!SE->isSCEVable(UseInst->getType()))
+ return 0;
+
+ // Bypass the operand if SCEV can prove it has no effect.
+ if (SE->getSCEV(UseInst) != FoldedExpr)
+ return 0;
+
+ DEBUG(dbgs() << "INDVARS: Eliminated IV operand: " << *IVOperand
+ << " -> " << *UseInst << '\n');
+
+ UseInst->setOperand(OperIdx, IVSrc);
+ assert(SE->getSCEV(UseInst) == FoldedExpr && "bad SCEV with folded oper");
+
+ ++NumElimOperand;
+ Changed = true;
+ if (IVOperand->use_empty())
+ DeadInsts.push_back(IVOperand);
+ return IVSrc;
+}
+
+/// eliminateIVComparison - SimplifyIVUsers helper for eliminating useless
+/// comparisons against an induction variable.
+void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
+ unsigned IVOperIdx = 0;
+ ICmpInst::Predicate Pred = ICmp->getPredicate();
+ if (IVOperand != ICmp->getOperand(0)) {
+ // Swapped
+ assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand");
+ IVOperIdx = 1;
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ }
+
+ // Get the SCEVs for the ICmp operands.
+ const SCEV *S = SE->getSCEV(ICmp->getOperand(IVOperIdx));
+ const SCEV *X = SE->getSCEV(ICmp->getOperand(1 - IVOperIdx));
+
+ // Simplify unnecessary loops away.
+ const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent());
+ S = SE->getSCEVAtScope(S, ICmpLoop);
+ X = SE->getSCEVAtScope(X, ICmpLoop);
+
+ // If the condition is always true or always false, replace it with
+ // a constant value.
+ if (SE->isKnownPredicate(Pred, S, X))
+ ICmp->replaceAllUsesWith(ConstantInt::getTrue(ICmp->getContext()));
+ else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X))
+ ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext()));
+ else
+ return;
+
+ DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
+ ++NumElimCmp;
+ Changed = true;
+ DeadInsts.push_back(ICmp);
+}
+
+/// eliminateIVRemainder - SimplifyIVUsers helper for eliminating useless
+/// remainder operations operating on an induction variable.
+void SimplifyIndvar::eliminateIVRemainder(BinaryOperator *Rem,
+ Value *IVOperand,
+ bool IsSigned) {
+ // We're only interested in the case where we know something about
+ // the numerator.
+ if (IVOperand != Rem->getOperand(0))
+ return;
+
+ // Get the SCEVs for the ICmp operands.
+ const SCEV *S = SE->getSCEV(Rem->getOperand(0));
+ const SCEV *X = SE->getSCEV(Rem->getOperand(1));
+
+ // Simplify unnecessary loops away.
+ const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent());
+ S = SE->getSCEVAtScope(S, ICmpLoop);
+ X = SE->getSCEVAtScope(X, ICmpLoop);
+
+ // i % n --> i if i is in [0,n).
+ if ((!IsSigned || SE->isKnownNonNegative(S)) &&
+ SE->isKnownPredicate(IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
+ S, X))
+ Rem->replaceAllUsesWith(Rem->getOperand(0));
+ else {
+ // (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n).
+ const SCEV *LessOne =
+ SE->getMinusSCEV(S, SE->getConstant(S->getType(), 1));
+ if (IsSigned && !SE->isKnownNonNegative(LessOne))
+ return;
+
+ if (!SE->isKnownPredicate(IsSigned ?
+ ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
+ LessOne, X))
+ return;
+
+ ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ,
+ Rem->getOperand(0), Rem->getOperand(1));
+ SelectInst *Sel =
+ SelectInst::Create(ICmp,
+ ConstantInt::get(Rem->getType(), 0),
+ Rem->getOperand(0), "tmp", Rem);
+ Rem->replaceAllUsesWith(Sel);
+ }
+
+ DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
+ ++NumElimRem;
+ Changed = true;
+ DeadInsts.push_back(Rem);
+}
+
+/// eliminateIVUser - Eliminate an operation that consumes a simple IV and has
+/// no observable side-effect given the range of IV values.
+/// IVOperand is guaranteed SCEVable, but UseInst may not be.
+bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst,
+ Instruction *IVOperand) {
+ if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
+ eliminateIVComparison(ICmp, IVOperand);
+ return true;
+ }
+ if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) {
+ bool IsSigned = Rem->getOpcode() == Instruction::SRem;
+ if (IsSigned || Rem->getOpcode() == Instruction::URem) {
+ eliminateIVRemainder(Rem, IVOperand, IsSigned);
+ return true;
+ }
+ }
+
+ // Eliminate any operation that SCEV can prove is an identity function.
+ if (!SE->isSCEVable(UseInst->getType()) ||
+ (UseInst->getType() != IVOperand->getType()) ||
+ (SE->getSCEV(UseInst) != SE->getSCEV(IVOperand)))
+ return false;
+
+ DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n');
+
+ UseInst->replaceAllUsesWith(IVOperand);
+ ++NumElimIdentity;
+ Changed = true;
+ DeadInsts.push_back(UseInst);
+ return true;
+}
+
+/// pushIVUsers - Add all uses of Def to the current IV's worklist.
+///
+static void pushIVUsers(
+ Instruction *Def,
+ SmallPtrSet<Instruction*,16> &Simplified,
+ SmallVectorImpl< std::pair<Instruction*,Instruction*> > &SimpleIVUsers) {
+
+ for (Value::use_iterator UI = Def->use_begin(), E = Def->use_end();
+ UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+
+ // Avoid infinite or exponential worklist processing.
+ // Also ensure unique worklist users.
+ // If Def is a LoopPhi, it may not be in the Simplified set, so check for
+ // self edges first.
+ if (User != Def && Simplified.insert(User))
+ SimpleIVUsers.push_back(std::make_pair(User, Def));
+ }
+}
+
+/// isSimpleIVUser - Return true if this instruction generates a simple SCEV
+/// expression in terms of that IV.
+///
+/// This is similar to IVUsers' isInteresting() but processes each instruction
+/// non-recursively when the operand is already known to be a simpleIVUser.
+///
+static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) {
+ if (!SE->isSCEVable(I->getType()))
+ return false;
+
+ // Get the symbolic expression for this instruction.
+ const SCEV *S = SE->getSCEV(I);
+
+ // Only consider affine recurrences.
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S);
+ if (AR && AR->getLoop() == L)
+ return true;
+
+ return false;
+}
+
+/// simplifyUsers - Iteratively perform simplification on a worklist of users
+/// of the specified induction variable. Each successive simplification may push
+/// more users which may themselves be candidates for simplification.
+///
+/// This algorithm does not require IVUsers analysis. Instead, it simplifies
+/// instructions in-place during analysis. Rather than rewriting induction
+/// variables bottom-up from their users, it transforms a chain of IVUsers
+/// top-down, updating the IR only when it encouters a clear optimization
+/// opportunitiy.
+///
+/// Once DisableIVRewrite is default, LSR will be the only client of IVUsers.
+///
+void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
+ if (!SE->isSCEVable(CurrIV->getType()))
+ return;
+
+ // Instructions processed by SimplifyIndvar for CurrIV.
+ SmallPtrSet<Instruction*,16> Simplified;
+
+ // Use-def pairs if IV users waiting to be processed for CurrIV.
+ SmallVector<std::pair<Instruction*, Instruction*>, 8> SimpleIVUsers;
+
+ // Push users of the current LoopPhi. In rare cases, pushIVUsers may be
+ // called multiple times for the same LoopPhi. This is the proper thing to
+ // do for loop header phis that use each other.
+ pushIVUsers(CurrIV, Simplified, SimpleIVUsers);
+
+ while (!SimpleIVUsers.empty()) {
+ std::pair<Instruction*, Instruction*> UseOper =
+ SimpleIVUsers.pop_back_val();
+ // Bypass back edges to avoid extra work.
+ if (UseOper.first == CurrIV) continue;
+
+ Instruction *IVOperand = UseOper.second;
+ for (unsigned N = 0; IVOperand; ++N) {
+ assert(N <= Simplified.size() && "runaway iteration");
+
+ Value *NewOper = foldIVUser(UseOper.first, IVOperand);
+ if (!NewOper)
+ break; // done folding
+ IVOperand = dyn_cast<Instruction>(NewOper);
+ }
+ if (!IVOperand)
+ continue;
+
+ if (eliminateIVUser(UseOper.first, IVOperand)) {
+ pushIVUsers(IVOperand, Simplified, SimpleIVUsers);
+ continue;
+ }
+ CastInst *Cast = dyn_cast<CastInst>(UseOper.first);
+ if (V && Cast) {
+ V->visitCast(Cast);
+ continue;
+ }
+ if (isSimpleIVUser(UseOper.first, L, SE)) {
+ pushIVUsers(UseOper.first, Simplified, SimpleIVUsers);
+ }
+ }
+}
+
+namespace llvm {
+
+void IVVisitor::anchor() { }
+
+/// simplifyUsersOfIV - Simplify instructions that use this induction variable
+/// by using ScalarEvolution to analyze the IV's recurrence.
+bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, LPPassManager *LPM,
+ SmallVectorImpl<WeakVH> &Dead, IVVisitor *V)
+{
+ LoopInfo *LI = &LPM->getAnalysis<LoopInfo>();
+ SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, LPM, Dead);
+ SIV.simplifyUsers(CurrIV, V);
+ return SIV.hasChanged();
+}
+
+/// simplifyLoopIVs - Simplify users of induction variables within this
+/// loop. This does not actually change or add IVs.
+bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, LPPassManager *LPM,
+ SmallVectorImpl<WeakVH> &Dead) {
+ bool Changed = false;
+ for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
+ Changed |= simplifyUsersOfIV(cast<PHINode>(I), SE, LPM, Dead);
+ }
+ return Changed;
+}
+
+} // namespace llvm
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp
new file mode 100644
index 000000000000..f9687e4d5890
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -0,0 +1,100 @@
+//===------ SimplifyInstructions.cpp - Remove redundant instructions ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a utility pass used for testing the InstructionSimplify analysis.
+// The analysis is applied to every instruction, and if it simplifies then the
+// instruction is replaced by the simplification. If you are looking for a pass
+// that performs serious instruction folding, use the instcombine pass instead.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "instsimplify"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+STATISTIC(NumSimplified, "Number of redundant instructions removed");
+
+namespace {
+ struct InstSimplifier : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ InstSimplifier() : FunctionPass(ID) {
+ initializeInstSimplifierPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<TargetLibraryInfo>();
+ }
+
+ /// runOnFunction - Remove instructions that simplify.
+ bool runOnFunction(Function &F) {
+ const DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
+ const DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
+ const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
+ SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
+ bool Changed = false;
+
+ do {
+ for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
+ DE = df_end(&F.getEntryBlock()); DI != DE; ++DI)
+ for (BasicBlock::iterator BI = DI->begin(), BE = DI->end(); BI != BE;) {
+ Instruction *I = BI++;
+ // The first time through the loop ToSimplify is empty and we try to
+ // simplify all instructions. On later iterations ToSimplify is not
+ // empty and we only bother simplifying instructions that are in it.
+ if (!ToSimplify->empty() && !ToSimplify->count(I))
+ continue;
+ // Don't waste time simplifying unused instructions.
+ if (!I->use_empty())
+ if (Value *V = SimplifyInstruction(I, TD, TLI, DT)) {
+ // Mark all uses for resimplification next time round the loop.
+ for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE; ++UI)
+ Next->insert(cast<Instruction>(*UI));
+ I->replaceAllUsesWith(V);
+ ++NumSimplified;
+ Changed = true;
+ }
+ Changed |= RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
+ }
+
+ // Place the list of instructions to simplify on the next loop iteration
+ // into ToSimplify.
+ std::swap(ToSimplify, Next);
+ Next->clear();
+ } while (!ToSimplify->empty());
+
+ return Changed;
+ }
+ };
+}
+
+char InstSimplifier::ID = 0;
+INITIALIZE_PASS_BEGIN(InstSimplifier, "instsimplify",
+ "Remove redundant instructions", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(InstSimplifier, "instsimplify",
+ "Remove redundant instructions", false, false)
+char &llvm::InstructionSimplifierID = InstSimplifier::ID;
+
+// Public interface to the simplify instructions pass.
+FunctionPass *llvm::createInstructionSimplifierPass() {
+ return new InstSimplifier();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
new file mode 100644
index 000000000000..15b3e66f94ad
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -0,0 +1,2253 @@
+//===------ SimplifyLibCalls.cpp - Library calls simplifier ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a utility pass used for testing the InstructionSimplify analysis.
+// The analysis is applied to every instruction, and if it simplifies then the
+// instruction is replaced by the simplification. If you are looking for a pass
+// that performs serious instruction folding, use the instcombine pass instead.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+ColdErrorCalls("error-reporting-is-cold", cl::init(true),
+ cl::Hidden, cl::desc("Treat error-reporting calls as cold"));
+
+/// This class is the abstract base class for the set of optimizations that
+/// corresponds to one library call.
+namespace {
+class LibCallOptimization {
+protected:
+ Function *Caller;
+ const DataLayout *TD;
+ const TargetLibraryInfo *TLI;
+ const LibCallSimplifier *LCS;
+ LLVMContext* Context;
+public:
+ LibCallOptimization() { }
+ virtual ~LibCallOptimization() {}
+
+ /// callOptimizer - This pure virtual method is implemented by base classes to
+ /// do various optimizations. If this returns null then no transformation was
+ /// performed. If it returns CI, then it transformed the call and CI is to be
+ /// deleted. If it returns something else, replace CI with the new value and
+ /// delete CI.
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B)
+ =0;
+
+ /// ignoreCallingConv - Returns false if this transformation could possibly
+ /// change the calling convention.
+ virtual bool ignoreCallingConv() { return false; }
+
+ Value *optimizeCall(CallInst *CI, const DataLayout *TD,
+ const TargetLibraryInfo *TLI,
+ const LibCallSimplifier *LCS, IRBuilder<> &B) {
+ Caller = CI->getParent()->getParent();
+ this->TD = TD;
+ this->TLI = TLI;
+ this->LCS = LCS;
+ if (CI->getCalledFunction())
+ Context = &CI->getCalledFunction()->getContext();
+
+ // We never change the calling convention.
+ if (!ignoreCallingConv() && CI->getCallingConv() != llvm::CallingConv::C)
+ return NULL;
+
+ return callOptimizer(CI->getCalledFunction(), CI, B);
+ }
+};
+
+//===----------------------------------------------------------------------===//
+// Helper Functions
+//===----------------------------------------------------------------------===//
+
+/// isOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
+/// value is equal or not-equal to zero.
+static bool isOnlyUsedInZeroEqualityComparison(Value *V) {
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
+ UI != E; ++UI) {
+ if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
+ if (IC->isEquality())
+ if (Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
+ if (C->isNullValue())
+ continue;
+ // Unknown instruction.
+ return false;
+ }
+ return true;
+}
+
+/// isOnlyUsedInEqualityComparison - Return true if it is only used in equality
+/// comparisons with With.
+static bool isOnlyUsedInEqualityComparison(Value *V, Value *With) {
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
+ UI != E; ++UI) {
+ if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
+ if (IC->isEquality() && IC->getOperand(1) == With)
+ continue;
+ // Unknown instruction.
+ return false;
+ }
+ return true;
+}
+
+static bool callHasFloatingPointArgument(const CallInst *CI) {
+ for (CallInst::const_op_iterator it = CI->op_begin(), e = CI->op_end();
+ it != e; ++it) {
+ if ((*it)->getType()->isFloatingPointTy())
+ return true;
+ }
+ return false;
+}
+
+/// \brief Check whether the overloaded unary floating point function
+/// corresponing to \a Ty is available.
+static bool hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
+ LibFunc::Func DoubleFn, LibFunc::Func FloatFn,
+ LibFunc::Func LongDoubleFn) {
+ switch (Ty->getTypeID()) {
+ case Type::FloatTyID:
+ return TLI->has(FloatFn);
+ case Type::DoubleTyID:
+ return TLI->has(DoubleFn);
+ default:
+ return TLI->has(LongDoubleFn);
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Fortified Library Call Optimizations
+//===----------------------------------------------------------------------===//
+
+struct FortifiedLibCallOptimization : public LibCallOptimization {
+protected:
+ virtual bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp,
+ bool isString) const = 0;
+};
+
+struct InstFortifiedLibCallOptimization : public FortifiedLibCallOptimization {
+ CallInst *CI;
+
+ bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, bool isString) const {
+ if (CI->getArgOperand(SizeCIOp) == CI->getArgOperand(SizeArgOp))
+ return true;
+ if (ConstantInt *SizeCI =
+ dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) {
+ if (SizeCI->isAllOnesValue())
+ return true;
+ if (isString) {
+ uint64_t Len = GetStringLength(CI->getArgOperand(SizeArgOp));
+ // If the length is 0 we don't know how long it is and so we can't
+ // remove the check.
+ if (Len == 0) return false;
+ return SizeCI->getZExtValue() >= Len;
+ }
+ if (ConstantInt *Arg = dyn_cast<ConstantInt>(
+ CI->getArgOperand(SizeArgOp)))
+ return SizeCI->getZExtValue() >= Arg->getZExtValue();
+ }
+ return false;
+ }
+};
+
+struct MemCpyChkOpt : public InstFortifiedLibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ this->CI = CI;
+ FunctionType *FT = Callee->getFunctionType();
+ LLVMContext &Context = CI->getParent()->getContext();
+
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ FT->getParamType(2) != TD->getIntPtrType(Context) ||
+ FT->getParamType(3) != TD->getIntPtrType(Context))
+ return 0;
+
+ if (isFoldable(3, 2, false)) {
+ B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1);
+ return CI->getArgOperand(0);
+ }
+ return 0;
+ }
+};
+
+struct MemMoveChkOpt : public InstFortifiedLibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ this->CI = CI;
+ FunctionType *FT = Callee->getFunctionType();
+ LLVMContext &Context = CI->getParent()->getContext();
+
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ FT->getParamType(2) != TD->getIntPtrType(Context) ||
+ FT->getParamType(3) != TD->getIntPtrType(Context))
+ return 0;
+
+ if (isFoldable(3, 2, false)) {
+ B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1);
+ return CI->getArgOperand(0);
+ }
+ return 0;
+ }
+};
+
+struct MemSetChkOpt : public InstFortifiedLibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ this->CI = CI;
+ FunctionType *FT = Callee->getFunctionType();
+ LLVMContext &Context = CI->getParent()->getContext();
+
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isIntegerTy() ||
+ FT->getParamType(2) != TD->getIntPtrType(Context) ||
+ FT->getParamType(3) != TD->getIntPtrType(Context))
+ return 0;
+
+ if (isFoldable(3, 2, false)) {
+ Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(),
+ false);
+ B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
+ return CI->getArgOperand(0);
+ }
+ return 0;
+ }
+};
+
+struct StrCpyChkOpt : public InstFortifiedLibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ this->CI = CI;
+ StringRef Name = Callee->getName();
+ FunctionType *FT = Callee->getFunctionType();
+ LLVMContext &Context = CI->getParent()->getContext();
+
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 3 ||
+ FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
+ FT->getParamType(2) != TD->getIntPtrType(Context))
+ return 0;
+
+ Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
+ if (Dst == Src) // __strcpy_chk(x,x) -> x
+ return Src;
+
+ // If a) we don't have any length information, or b) we know this will
+ // fit then just lower to a plain strcpy. Otherwise we'll keep our
+ // strcpy_chk call which may fail at runtime if the size is too long.
+ // TODO: It might be nice to get a maximum length out of the possible
+ // string lengths for varying.
+ if (isFoldable(2, 1, true)) {
+ Value *Ret = EmitStrCpy(Dst, Src, B, TD, TLI, Name.substr(2, 6));
+ return Ret;
+ } else {
+ // Maybe we can stil fold __strcpy_chk to __memcpy_chk.
+ uint64_t Len = GetStringLength(Src);
+ if (Len == 0) return 0;
+
+ // This optimization require DataLayout.
+ if (!TD) return 0;
+
+ Value *Ret =
+ EmitMemCpyChk(Dst, Src,
+ ConstantInt::get(TD->getIntPtrType(Context), Len),
+ CI->getArgOperand(2), B, TD, TLI);
+ return Ret;
+ }
+ return 0;
+ }
+};
+
+struct StpCpyChkOpt : public InstFortifiedLibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ this->CI = CI;
+ StringRef Name = Callee->getName();
+ FunctionType *FT = Callee->getFunctionType();
+ LLVMContext &Context = CI->getParent()->getContext();
+
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 3 ||
+ FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
+ FT->getParamType(2) != TD->getIntPtrType(FT->getParamType(0)))
+ return 0;
+
+ Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
+ if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x)
+ Value *StrLen = EmitStrLen(Src, B, TD, TLI);
+ return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : 0;
+ }
+
+ // If a) we don't have any length information, or b) we know this will
+ // fit then just lower to a plain stpcpy. Otherwise we'll keep our
+ // stpcpy_chk call which may fail at runtime if the size is too long.
+ // TODO: It might be nice to get a maximum length out of the possible
+ // string lengths for varying.
+ if (isFoldable(2, 1, true)) {
+ Value *Ret = EmitStrCpy(Dst, Src, B, TD, TLI, Name.substr(2, 6));
+ return Ret;
+ } else {
+ // Maybe we can stil fold __stpcpy_chk to __memcpy_chk.
+ uint64_t Len = GetStringLength(Src);
+ if (Len == 0) return 0;
+
+ // This optimization require DataLayout.
+ if (!TD) return 0;
+
+ Type *PT = FT->getParamType(0);
+ Value *LenV = ConstantInt::get(TD->getIntPtrType(PT), Len);
+ Value *DstEnd = B.CreateGEP(Dst,
+ ConstantInt::get(TD->getIntPtrType(PT),
+ Len - 1));
+ if (!EmitMemCpyChk(Dst, Src, LenV, CI->getArgOperand(2), B, TD, TLI))
+ return 0;
+ return DstEnd;
+ }
+ return 0;
+ }
+};
+
+struct StrNCpyChkOpt : public InstFortifiedLibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ this->CI = CI;
+ StringRef Name = Callee->getName();
+ FunctionType *FT = Callee->getFunctionType();
+ LLVMContext &Context = CI->getParent()->getContext();
+
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
+ !FT->getParamType(2)->isIntegerTy() ||
+ FT->getParamType(3) != TD->getIntPtrType(Context))
+ return 0;
+
+ if (isFoldable(3, 2, false)) {
+ Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), B, TD, TLI,
+ Name.substr(2, 7));
+ return Ret;
+ }
+ return 0;
+ }
+};
+
+//===----------------------------------------------------------------------===//
+// String and Memory Library Call Optimizations
+//===----------------------------------------------------------------------===//
+
+struct StrCatOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strcat" function prototype.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getReturnType() != B.getInt8PtrTy() ||
+ FT->getParamType(0) != FT->getReturnType() ||
+ FT->getParamType(1) != FT->getReturnType())
+ return 0;
+
+ // Extract some information from the instruction
+ Value *Dst = CI->getArgOperand(0);
+ Value *Src = CI->getArgOperand(1);
+
+ // See if we can get the length of the input string.
+ uint64_t Len = GetStringLength(Src);
+ if (Len == 0) return 0;
+ --Len; // Unbias length.
+
+ // Handle the simple, do-nothing case: strcat(x, "") -> x
+ if (Len == 0)
+ return Dst;
+
+ // These optimizations require DataLayout.
+ if (!TD) return 0;
+
+ return emitStrLenMemCpy(Src, Dst, Len, B);
+ }
+
+ Value *emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len,
+ IRBuilder<> &B) {
+ // We need to find the end of the destination string. That's where the
+ // memory is to be moved to. We just generate a call to strlen.
+ Value *DstLen = EmitStrLen(Dst, B, TD, TLI);
+ if (!DstLen)
+ return 0;
+
+ // Now that we have the destination's length, we must index into the
+ // destination's pointer to get the actual memcpy destination (end of
+ // the string .. we're concatenating).
+ Value *CpyDst = B.CreateGEP(Dst, DstLen, "endptr");
+
+ // We have enough information to now generate the memcpy call to do the
+ // concatenation for us. Make a memcpy to copy the nul byte with align = 1.
+ B.CreateMemCpy(CpyDst, Src,
+ ConstantInt::get(TD->getIntPtrType(*Context), Len + 1), 1);
+ return Dst;
+ }
+};
+
+struct StrNCatOpt : public StrCatOpt {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strncat" function prototype.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 ||
+ FT->getReturnType() != B.getInt8PtrTy() ||
+ FT->getParamType(0) != FT->getReturnType() ||
+ FT->getParamType(1) != FT->getReturnType() ||
+ !FT->getParamType(2)->isIntegerTy())
+ return 0;
+
+ // Extract some information from the instruction
+ Value *Dst = CI->getArgOperand(0);
+ Value *Src = CI->getArgOperand(1);
+ uint64_t Len;
+
+ // We don't do anything if length is not constant
+ if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
+ Len = LengthArg->getZExtValue();
+ else
+ return 0;
+
+ // See if we can get the length of the input string.
+ uint64_t SrcLen = GetStringLength(Src);
+ if (SrcLen == 0) return 0;
+ --SrcLen; // Unbias length.
+
+ // Handle the simple, do-nothing cases:
+ // strncat(x, "", c) -> x
+ // strncat(x, c, 0) -> x
+ if (SrcLen == 0 || Len == 0) return Dst;
+
+ // These optimizations require DataLayout.
+ if (!TD) return 0;
+
+ // We don't optimize this case
+ if (Len < SrcLen) return 0;
+
+ // strncat(x, s, c) -> strcat(x, s)
+ // s is constant so the strcat can be optimized further
+ return emitStrLenMemCpy(Src, Dst, SrcLen, B);
+ }
+};
+
+struct StrChrOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strchr" function prototype.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getReturnType() != B.getInt8PtrTy() ||
+ FT->getParamType(0) != FT->getReturnType() ||
+ !FT->getParamType(1)->isIntegerTy(32))
+ return 0;
+
+ Value *SrcStr = CI->getArgOperand(0);
+
+ // If the second operand is non-constant, see if we can compute the length
+ // of the input string and turn this into memchr.
+ ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+ if (CharC == 0) {
+ // These optimizations require DataLayout.
+ if (!TD) return 0;
+
+ uint64_t Len = GetStringLength(SrcStr);
+ if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32))// memchr needs i32.
+ return 0;
+
+ return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
+ ConstantInt::get(TD->getIntPtrType(*Context), Len),
+ B, TD, TLI);
+ }
+
+ // Otherwise, the character is a constant, see if the first argument is
+ // a string literal. If so, we can constant fold.
+ StringRef Str;
+ if (!getConstantStringInfo(SrcStr, Str))
+ return 0;
+
+ // Compute the offset, make sure to handle the case when we're searching for
+ // zero (a weird way to spell strlen).
+ size_t I = (0xFF & CharC->getSExtValue()) == 0 ?
+ Str.size() : Str.find(CharC->getSExtValue());
+ if (I == StringRef::npos) // Didn't find the char. strchr returns null.
+ return Constant::getNullValue(CI->getType());
+
+ // strchr(s+n,c) -> gep(s+n+i,c)
+ return B.CreateGEP(SrcStr, B.getInt64(I), "strchr");
+ }
+};
+
+struct StrRChrOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strrchr" function prototype.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getReturnType() != B.getInt8PtrTy() ||
+ FT->getParamType(0) != FT->getReturnType() ||
+ !FT->getParamType(1)->isIntegerTy(32))
+ return 0;
+
+ Value *SrcStr = CI->getArgOperand(0);
+ ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+
+ // Cannot fold anything if we're not looking for a constant.
+ if (!CharC)
+ return 0;
+
+ StringRef Str;
+ if (!getConstantStringInfo(SrcStr, Str)) {
+ // strrchr(s, 0) -> strchr(s, 0)
+ if (TD && CharC->isZero())
+ return EmitStrChr(SrcStr, '\0', B, TD, TLI);
+ return 0;
+ }
+
+ // Compute the offset.
+ size_t I = (0xFF & CharC->getSExtValue()) == 0 ?
+ Str.size() : Str.rfind(CharC->getSExtValue());
+ if (I == StringRef::npos) // Didn't find the char. Return null.
+ return Constant::getNullValue(CI->getType());
+
+ // strrchr(s+n,c) -> gep(s+n+i,c)
+ return B.CreateGEP(SrcStr, B.getInt64(I), "strrchr");
+ }
+};
+
+struct StrCmpOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strcmp" function prototype.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ !FT->getReturnType()->isIntegerTy(32) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != B.getInt8PtrTy())
+ return 0;
+
+ Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
+ if (Str1P == Str2P) // strcmp(x,x) -> 0
+ return ConstantInt::get(CI->getType(), 0);
+
+ StringRef Str1, Str2;
+ bool HasStr1 = getConstantStringInfo(Str1P, Str1);
+ bool HasStr2 = getConstantStringInfo(Str2P, Str2);
+
+ // strcmp(x, y) -> cnst (if both x and y are constant strings)
+ if (HasStr1 && HasStr2)
+ return ConstantInt::get(CI->getType(), Str1.compare(Str2));
+
+ if (HasStr1 && Str1.empty()) // strcmp("", x) -> -*x
+ return B.CreateNeg(B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"),
+ CI->getType()));
+
+ if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x
+ return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
+
+ // strcmp(P, "x") -> memcmp(P, "x", 2)
+ uint64_t Len1 = GetStringLength(Str1P);
+ uint64_t Len2 = GetStringLength(Str2P);
+ if (Len1 && Len2) {
+ // These optimizations require DataLayout.
+ if (!TD) return 0;
+
+ return EmitMemCmp(Str1P, Str2P,
+ ConstantInt::get(TD->getIntPtrType(*Context),
+ std::min(Len1, Len2)), B, TD, TLI);
+ }
+
+ return 0;
+ }
+};
+
+struct StrNCmpOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strncmp" function prototype.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 ||
+ !FT->getReturnType()->isIntegerTy(32) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != B.getInt8PtrTy() ||
+ !FT->getParamType(2)->isIntegerTy())
+ return 0;
+
+ Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
+ if (Str1P == Str2P) // strncmp(x,x,n) -> 0
+ return ConstantInt::get(CI->getType(), 0);
+
+ // Get the length argument if it is constant.
+ uint64_t Length;
+ if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
+ Length = LengthArg->getZExtValue();
+ else
+ return 0;
+
+ if (Length == 0) // strncmp(x,y,0) -> 0
+ return ConstantInt::get(CI->getType(), 0);
+
+ if (TD && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
+ return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD, TLI);
+
+ StringRef Str1, Str2;
+ bool HasStr1 = getConstantStringInfo(Str1P, Str1);
+ bool HasStr2 = getConstantStringInfo(Str2P, Str2);
+
+ // strncmp(x, y) -> cnst (if both x and y are constant strings)
+ if (HasStr1 && HasStr2) {
+ StringRef SubStr1 = Str1.substr(0, Length);
+ StringRef SubStr2 = Str2.substr(0, Length);
+ return ConstantInt::get(CI->getType(), SubStr1.compare(SubStr2));
+ }
+
+ if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> -*x
+ return B.CreateNeg(B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"),
+ CI->getType()));
+
+ if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x
+ return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
+
+ return 0;
+ }
+};
+
+struct StrCpyOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strcpy" function prototype.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != B.getInt8PtrTy())
+ return 0;
+
+ Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
+ if (Dst == Src) // strcpy(x,x) -> x
+ return Src;
+
+ // These optimizations require DataLayout.
+ if (!TD) return 0;
+
+ // See if we can get the length of the input string.
+ uint64_t Len = GetStringLength(Src);
+ if (Len == 0) return 0;
+
+ // We have enough information to now generate the memcpy call to do the
+ // copy for us. Make a memcpy to copy the nul byte with align = 1.
+ B.CreateMemCpy(Dst, Src,
+ ConstantInt::get(TD->getIntPtrType(*Context), Len), 1);
+ return Dst;
+ }
+};
+
+struct StpCpyOpt: public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "stpcpy" function prototype.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != B.getInt8PtrTy())
+ return 0;
+
+ // These optimizations require DataLayout.
+ if (!TD) return 0;
+
+ Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
+ if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x)
+ Value *StrLen = EmitStrLen(Src, B, TD, TLI);
+ return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : 0;
+ }
+
+ // See if we can get the length of the input string.
+ uint64_t Len = GetStringLength(Src);
+ if (Len == 0) return 0;
+
+ Type *PT = FT->getParamType(0);
+ Value *LenV = ConstantInt::get(TD->getIntPtrType(PT), Len);
+ Value *DstEnd = B.CreateGEP(Dst,
+ ConstantInt::get(TD->getIntPtrType(PT),
+ Len - 1));
+
+ // We have enough information to now generate the memcpy call to do the
+ // copy for us. Make a memcpy to copy the nul byte with align = 1.
+ B.CreateMemCpy(Dst, Src, LenV, 1);
+ return DstEnd;
+ }
+};
+
+struct StrNCpyOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != B.getInt8PtrTy() ||
+ !FT->getParamType(2)->isIntegerTy())
+ return 0;
+
+ Value *Dst = CI->getArgOperand(0);
+ Value *Src = CI->getArgOperand(1);
+ Value *LenOp = CI->getArgOperand(2);
+
+ // See if we can get the length of the input string.
+ uint64_t SrcLen = GetStringLength(Src);
+ if (SrcLen == 0) return 0;
+ --SrcLen;
+
+ if (SrcLen == 0) {
+ // strncpy(x, "", y) -> memset(x, '\0', y, 1)
+ B.CreateMemSet(Dst, B.getInt8('\0'), LenOp, 1);
+ return Dst;
+ }
+
+ uint64_t Len;
+ if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(LenOp))
+ Len = LengthArg->getZExtValue();
+ else
+ return 0;
+
+ if (Len == 0) return Dst; // strncpy(x, y, 0) -> x
+
+ // These optimizations require DataLayout.
+ if (!TD) return 0;
+
+ // Let strncpy handle the zero padding
+ if (Len > SrcLen+1) return 0;
+
+ Type *PT = FT->getParamType(0);
+ // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant]
+ B.CreateMemCpy(Dst, Src,
+ ConstantInt::get(TD->getIntPtrType(PT), Len), 1);
+
+ return Dst;
+ }
+};
+
+struct StrLenOpt : public LibCallOptimization {
+ virtual bool ignoreCallingConv() { return true; }
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 1 ||
+ FT->getParamType(0) != B.getInt8PtrTy() ||
+ !FT->getReturnType()->isIntegerTy())
+ return 0;
+
+ Value *Src = CI->getArgOperand(0);
+
+ // Constant folding: strlen("xyz") -> 3
+ if (uint64_t Len = GetStringLength(Src))
+ return ConstantInt::get(CI->getType(), Len-1);
+
+ // strlen(x) != 0 --> *x != 0
+ // strlen(x) == 0 --> *x == 0
+ if (isOnlyUsedInZeroEqualityComparison(CI))
+ return B.CreateZExt(B.CreateLoad(Src, "strlenfirst"), CI->getType());
+ return 0;
+ }
+};
+
+struct StrPBrkOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getParamType(0) != B.getInt8PtrTy() ||
+ FT->getParamType(1) != FT->getParamType(0) ||
+ FT->getReturnType() != FT->getParamType(0))
+ return 0;
+
+ StringRef S1, S2;
+ bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
+ bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
+
+ // strpbrk(s, "") -> NULL
+ // strpbrk("", s) -> NULL
+ if ((HasS1 && S1.empty()) || (HasS2 && S2.empty()))
+ return Constant::getNullValue(CI->getType());
+
+ // Constant folding.
+ if (HasS1 && HasS2) {
+ size_t I = S1.find_first_of(S2);
+ if (I == StringRef::npos) // No match.
+ return Constant::getNullValue(CI->getType());
+
+ return B.CreateGEP(CI->getArgOperand(0), B.getInt64(I), "strpbrk");
+ }
+
+ // strpbrk(s, "a") -> strchr(s, 'a')
+ if (TD && HasS2 && S2.size() == 1)
+ return EmitStrChr(CI->getArgOperand(0), S2[0], B, TD, TLI);
+
+ return 0;
+ }
+};
+
+struct StrToOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ FunctionType *FT = Callee->getFunctionType();
+ if ((FT->getNumParams() != 2 && FT->getNumParams() != 3) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy())
+ return 0;
+
+ Value *EndPtr = CI->getArgOperand(1);
+ if (isa<ConstantPointerNull>(EndPtr)) {
+ // With a null EndPtr, this function won't capture the main argument.
+ // It would be readonly too, except that it still may write to errno.
+ CI->addAttribute(1, Attribute::NoCapture);
+ }
+
+ return 0;
+ }
+};
+
+struct StrSpnOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getParamType(0) != B.getInt8PtrTy() ||
+ FT->getParamType(1) != FT->getParamType(0) ||
+ !FT->getReturnType()->isIntegerTy())
+ return 0;
+
+ StringRef S1, S2;
+ bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
+ bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
+
+ // strspn(s, "") -> 0
+ // strspn("", s) -> 0
+ if ((HasS1 && S1.empty()) || (HasS2 && S2.empty()))
+ return Constant::getNullValue(CI->getType());
+
+ // Constant folding.
+ if (HasS1 && HasS2) {
+ size_t Pos = S1.find_first_not_of(S2);
+ if (Pos == StringRef::npos) Pos = S1.size();
+ return ConstantInt::get(CI->getType(), Pos);
+ }
+
+ return 0;
+ }
+};
+
+struct StrCSpnOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getParamType(0) != B.getInt8PtrTy() ||
+ FT->getParamType(1) != FT->getParamType(0) ||
+ !FT->getReturnType()->isIntegerTy())
+ return 0;
+
+ StringRef S1, S2;
+ bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
+ bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
+
+ // strcspn("", s) -> 0
+ if (HasS1 && S1.empty())
+ return Constant::getNullValue(CI->getType());
+
+ // Constant folding.
+ if (HasS1 && HasS2) {
+ size_t Pos = S1.find_first_of(S2);
+ if (Pos == StringRef::npos) Pos = S1.size();
+ return ConstantInt::get(CI->getType(), Pos);
+ }
+
+ // strcspn(s, "") -> strlen(s)
+ if (TD && HasS2 && S2.empty())
+ return EmitStrLen(CI->getArgOperand(0), B, TD, TLI);
+
+ return 0;
+ }
+};
+
+struct StrStrOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ !FT->getReturnType()->isPointerTy())
+ return 0;
+
+ // fold strstr(x, x) -> x.
+ if (CI->getArgOperand(0) == CI->getArgOperand(1))
+ return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
+
+ // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0
+ if (TD && isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) {
+ Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD, TLI);
+ if (!StrLen)
+ return 0;
+ Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1),
+ StrLen, B, TD, TLI);
+ if (!StrNCmp)
+ return 0;
+ for (Value::use_iterator UI = CI->use_begin(), UE = CI->use_end();
+ UI != UE; ) {
+ ICmpInst *Old = cast<ICmpInst>(*UI++);
+ Value *Cmp = B.CreateICmp(Old->getPredicate(), StrNCmp,
+ ConstantInt::getNullValue(StrNCmp->getType()),
+ "cmp");
+ LCS->replaceAllUsesWith(Old, Cmp);
+ }
+ return CI;
+ }
+
+ // See if either input string is a constant string.
+ StringRef SearchStr, ToFindStr;
+ bool HasStr1 = getConstantStringInfo(CI->getArgOperand(0), SearchStr);
+ bool HasStr2 = getConstantStringInfo(CI->getArgOperand(1), ToFindStr);
+
+ // fold strstr(x, "") -> x.
+ if (HasStr2 && ToFindStr.empty())
+ return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
+
+ // If both strings are known, constant fold it.
+ if (HasStr1 && HasStr2) {
+ size_t Offset = SearchStr.find(ToFindStr);
+
+ if (Offset == StringRef::npos) // strstr("foo", "bar") -> null
+ return Constant::getNullValue(CI->getType());
+
+ // strstr("abcd", "bc") -> gep((char*)"abcd", 1)
+ Value *Result = CastToCStr(CI->getArgOperand(0), B);
+ Result = B.CreateConstInBoundsGEP1_64(Result, Offset, "strstr");
+ return B.CreateBitCast(Result, CI->getType());
+ }
+
+ // fold strstr(x, "y") -> strchr(x, 'y').
+ if (HasStr2 && ToFindStr.size() == 1) {
+ Value *StrChr= EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TD, TLI);
+ return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : 0;
+ }
+ return 0;
+ }
+};
+
+struct MemCmpOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ !FT->getReturnType()->isIntegerTy(32))
+ return 0;
+
+ Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1);
+
+ if (LHS == RHS) // memcmp(s,s,x) -> 0
+ return Constant::getNullValue(CI->getType());
+
+ // Make sure we have a constant length.
+ ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+ if (!LenC) return 0;
+ uint64_t Len = LenC->getZExtValue();
+
+ if (Len == 0) // memcmp(s1,s2,0) -> 0
+ return Constant::getNullValue(CI->getType());
+
+ // memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS
+ if (Len == 1) {
+ Value *LHSV = B.CreateZExt(B.CreateLoad(CastToCStr(LHS, B), "lhsc"),
+ CI->getType(), "lhsv");
+ Value *RHSV = B.CreateZExt(B.CreateLoad(CastToCStr(RHS, B), "rhsc"),
+ CI->getType(), "rhsv");
+ return B.CreateSub(LHSV, RHSV, "chardiff");
+ }
+
+ // Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant)
+ StringRef LHSStr, RHSStr;
+ if (getConstantStringInfo(LHS, LHSStr) &&
+ getConstantStringInfo(RHS, RHSStr)) {
+ // Make sure we're not reading out-of-bounds memory.
+ if (Len > LHSStr.size() || Len > RHSStr.size())
+ return 0;
+ // Fold the memcmp and normalize the result. This way we get consistent
+ // results across multiple platforms.
+ uint64_t Ret = 0;
+ int Cmp = memcmp(LHSStr.data(), RHSStr.data(), Len);
+ if (Cmp < 0)
+ Ret = -1;
+ else if (Cmp > 0)
+ Ret = 1;
+ return ConstantInt::get(CI->getType(), Ret);
+ }
+
+ return 0;
+ }
+};
+
+struct MemCpyOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // These optimizations require DataLayout.
+ if (!TD) return 0;
+
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ FT->getParamType(2) != TD->getIntPtrType(*Context))
+ return 0;
+
+ // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1)
+ B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1);
+ return CI->getArgOperand(0);
+ }
+};
+
+struct MemMoveOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // These optimizations require DataLayout.
+ if (!TD) return 0;
+
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ FT->getParamType(2) != TD->getIntPtrType(*Context))
+ return 0;
+
+ // memmove(x, y, n) -> llvm.memmove(x, y, n, 1)
+ B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1);
+ return CI->getArgOperand(0);
+ }
+};
+
+struct MemSetOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // These optimizations require DataLayout.
+ if (!TD) return 0;
+
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isIntegerTy() ||
+ FT->getParamType(2) != TD->getIntPtrType(FT->getParamType(0)))
+ return 0;
+
+ // memset(p, v, n) -> llvm.memset(p, v, n, 1)
+ Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
+ B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
+ return CI->getArgOperand(0);
+ }
+};
+
+//===----------------------------------------------------------------------===//
+// Math Library Optimizations
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Double -> Float Shrinking Optimizations for Unary Functions like 'floor'
+
+struct UnaryDoubleFPOpt : public LibCallOptimization {
+ bool CheckRetType;
+ UnaryDoubleFPOpt(bool CheckReturnType): CheckRetType(CheckReturnType) {}
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() ||
+ !FT->getParamType(0)->isDoubleTy())
+ return 0;
+
+ if (CheckRetType) {
+ // Check if all the uses for function like 'sin' are converted to float.
+ for (Value::use_iterator UseI = CI->use_begin(); UseI != CI->use_end();
+ ++UseI) {
+ FPTruncInst *Cast = dyn_cast<FPTruncInst>(*UseI);
+ if (Cast == 0 || !Cast->getType()->isFloatTy())
+ return 0;
+ }
+ }
+
+ // If this is something like 'floor((double)floatval)', convert to floorf.
+ FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0));
+ if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy())
+ return 0;
+
+ // floor((double)floatval) -> (double)floorf(floatval)
+ Value *V = Cast->getOperand(0);
+ V = EmitUnaryFloatFnCall(V, Callee->getName(), B, Callee->getAttributes());
+ return B.CreateFPExt(V, B.getDoubleTy());
+ }
+};
+
+struct UnsafeFPLibCallOptimization : public LibCallOptimization {
+ bool UnsafeFPShrink;
+ UnsafeFPLibCallOptimization(bool UnsafeFPShrink) {
+ this->UnsafeFPShrink = UnsafeFPShrink;
+ }
+};
+
+struct CosOpt : public UnsafeFPLibCallOptimization {
+ CosOpt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {}
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *Ret = NULL;
+ if (UnsafeFPShrink && Callee->getName() == "cos" &&
+ TLI->has(LibFunc::cosf)) {
+ UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
+ Ret = UnsafeUnaryDoubleFP.callOptimizer(Callee, CI, B);
+ }
+
+ FunctionType *FT = Callee->getFunctionType();
+ // Just make sure this has 1 argument of FP type, which matches the
+ // result type.
+ if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isFloatingPointTy())
+ return Ret;
+
+ // cos(-x) -> cos(x)
+ Value *Op1 = CI->getArgOperand(0);
+ if (BinaryOperator::isFNeg(Op1)) {
+ BinaryOperator *BinExpr = cast<BinaryOperator>(Op1);
+ return B.CreateCall(Callee, BinExpr->getOperand(1), "cos");
+ }
+ return Ret;
+ }
+};
+
+struct PowOpt : public UnsafeFPLibCallOptimization {
+ PowOpt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {}
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *Ret = NULL;
+ if (UnsafeFPShrink && Callee->getName() == "pow" &&
+ TLI->has(LibFunc::powf)) {
+ UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
+ Ret = UnsafeUnaryDoubleFP.callOptimizer(Callee, CI, B);
+ }
+
+ FunctionType *FT = Callee->getFunctionType();
+ // Just make sure this has 2 arguments of the same FP type, which match the
+ // result type.
+ if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ !FT->getParamType(0)->isFloatingPointTy())
+ return Ret;
+
+ Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1);
+ if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
+ // pow(1.0, x) -> 1.0
+ if (Op1C->isExactlyValue(1.0))
+ return Op1C;
+ // pow(2.0, x) -> exp2(x)
+ if (Op1C->isExactlyValue(2.0) &&
+ hasUnaryFloatFn(TLI, Op1->getType(), LibFunc::exp2, LibFunc::exp2f,
+ LibFunc::exp2l))
+ return EmitUnaryFloatFnCall(Op2, "exp2", B, Callee->getAttributes());
+ }
+
+ ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2);
+ if (Op2C == 0) return Ret;
+
+ if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0
+ return ConstantFP::get(CI->getType(), 1.0);
+
+ if (Op2C->isExactlyValue(0.5) &&
+ hasUnaryFloatFn(TLI, Op2->getType(), LibFunc::sqrt, LibFunc::sqrtf,
+ LibFunc::sqrtl) &&
+ hasUnaryFloatFn(TLI, Op2->getType(), LibFunc::fabs, LibFunc::fabsf,
+ LibFunc::fabsl)) {
+ // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))).
+ // This is faster than calling pow, and still handles negative zero
+ // and negative infinity correctly.
+ // TODO: In fast-math mode, this could be just sqrt(x).
+ // TODO: In finite-only mode, this could be just fabs(sqrt(x)).
+ Value *Inf = ConstantFP::getInfinity(CI->getType());
+ Value *NegInf = ConstantFP::getInfinity(CI->getType(), true);
+ Value *Sqrt = EmitUnaryFloatFnCall(Op1, "sqrt", B,
+ Callee->getAttributes());
+ Value *FAbs = EmitUnaryFloatFnCall(Sqrt, "fabs", B,
+ Callee->getAttributes());
+ Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf);
+ Value *Sel = B.CreateSelect(FCmp, Inf, FAbs);
+ return Sel;
+ }
+
+ if (Op2C->isExactlyValue(1.0)) // pow(x, 1.0) -> x
+ return Op1;
+ if (Op2C->isExactlyValue(2.0)) // pow(x, 2.0) -> x*x
+ return B.CreateFMul(Op1, Op1, "pow2");
+ if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x
+ return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0),
+ Op1, "powrecip");
+ return 0;
+ }
+};
+
+struct Exp2Opt : public UnsafeFPLibCallOptimization {
+ Exp2Opt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {}
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *Ret = NULL;
+ if (UnsafeFPShrink && Callee->getName() == "exp2" &&
+ TLI->has(LibFunc::exp2f)) {
+ UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
+ Ret = UnsafeUnaryDoubleFP.callOptimizer(Callee, CI, B);
+ }
+
+ FunctionType *FT = Callee->getFunctionType();
+ // Just make sure this has 1 argument of FP type, which matches the
+ // result type.
+ if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isFloatingPointTy())
+ return Ret;
+
+ Value *Op = CI->getArgOperand(0);
+ // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32
+ // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32
+ Value *LdExpArg = 0;
+ if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) {
+ if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32)
+ LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty());
+ } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) {
+ if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32)
+ LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty());
+ }
+
+ if (LdExpArg) {
+ const char *Name;
+ if (Op->getType()->isFloatTy())
+ Name = "ldexpf";
+ else if (Op->getType()->isDoubleTy())
+ Name = "ldexp";
+ else
+ Name = "ldexpl";
+
+ Constant *One = ConstantFP::get(*Context, APFloat(1.0f));
+ if (!Op->getType()->isFloatTy())
+ One = ConstantExpr::getFPExtend(One, Op->getType());
+
+ Module *M = Caller->getParent();
+ Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
+ Op->getType(),
+ B.getInt32Ty(), NULL);
+ CallInst *CI = B.CreateCall2(Callee, One, LdExpArg);
+ if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+ }
+ return Ret;
+ }
+};
+
+struct SinCosPiOpt : public LibCallOptimization {
+ SinCosPiOpt() {}
+
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Make sure the prototype is as expected, otherwise the rest of the
+ // function is probably invalid and likely to abort.
+ if (!isTrigLibCall(CI))
+ return 0;
+
+ Value *Arg = CI->getArgOperand(0);
+ SmallVector<CallInst *, 1> SinCalls;
+ SmallVector<CallInst *, 1> CosCalls;
+ SmallVector<CallInst *, 1> SinCosCalls;
+
+ bool IsFloat = Arg->getType()->isFloatTy();
+
+ // Look for all compatible sinpi, cospi and sincospi calls with the same
+ // argument. If there are enough (in some sense) we can make the
+ // substitution.
+ for (Value::use_iterator UI = Arg->use_begin(), UE = Arg->use_end();
+ UI != UE; ++UI)
+ classifyArgUse(*UI, CI->getParent(), IsFloat, SinCalls, CosCalls,
+ SinCosCalls);
+
+ // It's only worthwhile if both sinpi and cospi are actually used.
+ if (SinCosCalls.empty() && (SinCalls.empty() || CosCalls.empty()))
+ return 0;
+
+ Value *Sin, *Cos, *SinCos;
+ insertSinCosCall(B, CI->getCalledFunction(), Arg, IsFloat, Sin, Cos,
+ SinCos);
+
+ replaceTrigInsts(SinCalls, Sin);
+ replaceTrigInsts(CosCalls, Cos);
+ replaceTrigInsts(SinCosCalls, SinCos);
+
+ return 0;
+ }
+
+ bool isTrigLibCall(CallInst *CI) {
+ Function *Callee = CI->getCalledFunction();
+ FunctionType *FT = Callee->getFunctionType();
+
+ // We can only hope to do anything useful if we can ignore things like errno
+ // and floating-point exceptions.
+ bool AttributesSafe = CI->hasFnAttr(Attribute::NoUnwind) &&
+ CI->hasFnAttr(Attribute::ReadNone);
+
+ // Other than that we need float(float) or double(double)
+ return AttributesSafe && FT->getNumParams() == 1 &&
+ FT->getReturnType() == FT->getParamType(0) &&
+ (FT->getParamType(0)->isFloatTy() ||
+ FT->getParamType(0)->isDoubleTy());
+ }
+
+ void classifyArgUse(Value *Val, BasicBlock *BB, bool IsFloat,
+ SmallVectorImpl<CallInst *> &SinCalls,
+ SmallVectorImpl<CallInst *> &CosCalls,
+ SmallVectorImpl<CallInst *> &SinCosCalls) {
+ CallInst *CI = dyn_cast<CallInst>(Val);
+
+ if (!CI)
+ return;
+
+ Function *Callee = CI->getCalledFunction();
+ StringRef FuncName = Callee->getName();
+ LibFunc::Func Func;
+ if (!TLI->getLibFunc(FuncName, Func) || !TLI->has(Func) ||
+ !isTrigLibCall(CI))
+ return;
+
+ if (IsFloat) {
+ if (Func == LibFunc::sinpif)
+ SinCalls.push_back(CI);
+ else if (Func == LibFunc::cospif)
+ CosCalls.push_back(CI);
+ else if (Func == LibFunc::sincospi_stretf)
+ SinCosCalls.push_back(CI);
+ } else {
+ if (Func == LibFunc::sinpi)
+ SinCalls.push_back(CI);
+ else if (Func == LibFunc::cospi)
+ CosCalls.push_back(CI);
+ else if (Func == LibFunc::sincospi_stret)
+ SinCosCalls.push_back(CI);
+ }
+ }
+
+ void replaceTrigInsts(SmallVectorImpl<CallInst*> &Calls, Value *Res) {
+ for (SmallVectorImpl<CallInst*>::iterator I = Calls.begin(),
+ E = Calls.end();
+ I != E; ++I) {
+ LCS->replaceAllUsesWith(*I, Res);
+ }
+ }
+
+ void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg,
+ bool UseFloat, Value *&Sin, Value *&Cos,
+ Value *&SinCos) {
+ Type *ArgTy = Arg->getType();
+ Type *ResTy;
+ StringRef Name;
+
+ Triple T(OrigCallee->getParent()->getTargetTriple());
+ if (UseFloat) {
+ Name = "__sincospi_stretf";
+
+ assert(T.getArch() != Triple::x86 && "x86 messy and unsupported for now");
+ // x86_64 can't use {float, float} since that would be returned in both
+ // xmm0 and xmm1, which isn't what a real struct would do.
+ ResTy = T.getArch() == Triple::x86_64
+ ? static_cast<Type *>(VectorType::get(ArgTy, 2))
+ : static_cast<Type *>(StructType::get(ArgTy, ArgTy, NULL));
+ } else {
+ Name = "__sincospi_stret";
+ ResTy = StructType::get(ArgTy, ArgTy, NULL);
+ }
+
+ Module *M = OrigCallee->getParent();
+ Value *Callee = M->getOrInsertFunction(Name, OrigCallee->getAttributes(),
+ ResTy, ArgTy, NULL);
+
+ if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) {
+ // If the argument is an instruction, it must dominate all uses so put our
+ // sincos call there.
+ BasicBlock::iterator Loc = ArgInst;
+ B.SetInsertPoint(ArgInst->getParent(), ++Loc);
+ } else {
+ // Otherwise (e.g. for a constant) the beginning of the function is as
+ // good a place as any.
+ BasicBlock &EntryBB = B.GetInsertBlock()->getParent()->getEntryBlock();
+ B.SetInsertPoint(&EntryBB, EntryBB.begin());
+ }
+
+ SinCos = B.CreateCall(Callee, Arg, "sincospi");
+
+ if (SinCos->getType()->isStructTy()) {
+ Sin = B.CreateExtractValue(SinCos, 0, "sinpi");
+ Cos = B.CreateExtractValue(SinCos, 1, "cospi");
+ } else {
+ Sin = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 0),
+ "sinpi");
+ Cos = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 1),
+ "cospi");
+ }
+ }
+
+};
+
+//===----------------------------------------------------------------------===//
+// Integer Library Call Optimizations
+//===----------------------------------------------------------------------===//
+
+struct FFSOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ FunctionType *FT = Callee->getFunctionType();
+ // Just make sure this has 2 arguments of the same FP type, which match the
+ // result type.
+ if (FT->getNumParams() != 1 ||
+ !FT->getReturnType()->isIntegerTy(32) ||
+ !FT->getParamType(0)->isIntegerTy())
+ return 0;
+
+ Value *Op = CI->getArgOperand(0);
+
+ // Constant fold.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+ if (CI->isZero()) // ffs(0) -> 0.
+ return B.getInt32(0);
+ // ffs(c) -> cttz(c)+1
+ return B.getInt32(CI->getValue().countTrailingZeros() + 1);
+ }
+
+ // ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0
+ Type *ArgType = Op->getType();
+ Value *F = Intrinsic::getDeclaration(Callee->getParent(),
+ Intrinsic::cttz, ArgType);
+ Value *V = B.CreateCall2(F, Op, B.getFalse(), "cttz");
+ V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1));
+ V = B.CreateIntCast(V, B.getInt32Ty(), false);
+
+ Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType));
+ return B.CreateSelect(Cond, V, B.getInt32(0));
+ }
+};
+
+struct AbsOpt : public LibCallOptimization {
+ virtual bool ignoreCallingConv() { return true; }
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ FunctionType *FT = Callee->getFunctionType();
+ // We require integer(integer) where the types agree.
+ if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
+ FT->getParamType(0) != FT->getReturnType())
+ return 0;
+
+ // abs(x) -> x >s -1 ? x : -x
+ Value *Op = CI->getArgOperand(0);
+ Value *Pos = B.CreateICmpSGT(Op, Constant::getAllOnesValue(Op->getType()),
+ "ispos");
+ Value *Neg = B.CreateNeg(Op, "neg");
+ return B.CreateSelect(Pos, Op, Neg);
+ }
+};
+
+struct IsDigitOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ FunctionType *FT = Callee->getFunctionType();
+ // We require integer(i32)
+ if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
+ !FT->getParamType(0)->isIntegerTy(32))
+ return 0;
+
+ // isdigit(c) -> (c-'0') <u 10
+ Value *Op = CI->getArgOperand(0);
+ Op = B.CreateSub(Op, B.getInt32('0'), "isdigittmp");
+ Op = B.CreateICmpULT(Op, B.getInt32(10), "isdigit");
+ return B.CreateZExt(Op, CI->getType());
+ }
+};
+
+struct IsAsciiOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ FunctionType *FT = Callee->getFunctionType();
+ // We require integer(i32)
+ if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
+ !FT->getParamType(0)->isIntegerTy(32))
+ return 0;
+
+ // isascii(c) -> c <u 128
+ Value *Op = CI->getArgOperand(0);
+ Op = B.CreateICmpULT(Op, B.getInt32(128), "isascii");
+ return B.CreateZExt(Op, CI->getType());
+ }
+};
+
+struct ToAsciiOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ FunctionType *FT = Callee->getFunctionType();
+ // We require i32(i32)
+ if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isIntegerTy(32))
+ return 0;
+
+ // toascii(c) -> c & 0x7f
+ return B.CreateAnd(CI->getArgOperand(0),
+ ConstantInt::get(CI->getType(),0x7F));
+ }
+};
+
+//===----------------------------------------------------------------------===//
+// Formatting and IO Library Call Optimizations
+//===----------------------------------------------------------------------===//
+
+struct ErrorReportingOpt : public LibCallOptimization {
+ ErrorReportingOpt(int S = -1) : StreamArg(S) {}
+
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &) {
+ // Error reporting calls should be cold, mark them as such.
+ // This applies even to non-builtin calls: it is only a hint and applies to
+ // functions that the frontend might not understand as builtins.
+
+ // This heuristic was suggested in:
+ // Improving Static Branch Prediction in a Compiler
+ // Brian L. Deitrich, Ben-Chung Cheng, Wen-mei W. Hwu
+ // Proceedings of PACT'98, Oct. 1998, IEEE
+
+ if (!CI->hasFnAttr(Attribute::Cold) && isReportingError(Callee, CI)) {
+ CI->addAttribute(AttributeSet::FunctionIndex, Attribute::Cold);
+ }
+
+ return 0;
+ }
+
+protected:
+ bool isReportingError(Function *Callee, CallInst *CI) {
+ if (!ColdErrorCalls)
+ return false;
+
+ if (!Callee || !Callee->isDeclaration())
+ return false;
+
+ if (StreamArg < 0)
+ return true;
+
+ // These functions might be considered cold, but only if their stream
+ // argument is stderr.
+
+ if (StreamArg >= (int) CI->getNumArgOperands())
+ return false;
+ LoadInst *LI = dyn_cast<LoadInst>(CI->getArgOperand(StreamArg));
+ if (!LI)
+ return false;
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getPointerOperand());
+ if (!GV || !GV->isDeclaration())
+ return false;
+ return GV->getName() == "stderr";
+ }
+
+ int StreamArg;
+};
+
+struct PrintFOpt : public LibCallOptimization {
+ Value *optimizeFixedFormatString(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) {
+ // Check for a fixed format string.
+ StringRef FormatStr;
+ if (!getConstantStringInfo(CI->getArgOperand(0), FormatStr))
+ return 0;
+
+ // Empty format string -> noop.
+ if (FormatStr.empty()) // Tolerate printf's declared void.
+ return CI->use_empty() ? (Value*)CI :
+ ConstantInt::get(CI->getType(), 0);
+
+ // Do not do any of the following transformations if the printf return value
+ // is used, in general the printf return value is not compatible with either
+ // putchar() or puts().
+ if (!CI->use_empty())
+ return 0;
+
+ // printf("x") -> putchar('x'), even for '%'.
+ if (FormatStr.size() == 1) {
+ Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TD, TLI);
+ if (CI->use_empty() || !Res) return Res;
+ return B.CreateIntCast(Res, CI->getType(), true);
+ }
+
+ // printf("foo\n") --> puts("foo")
+ if (FormatStr[FormatStr.size()-1] == '\n' &&
+ FormatStr.find('%') == StringRef::npos) { // No format characters.
+ // Create a string literal with no \n on it. We expect the constant merge
+ // pass to be run after this pass, to merge duplicate strings.
+ FormatStr = FormatStr.drop_back();
+ Value *GV = B.CreateGlobalString(FormatStr, "str");
+ Value *NewCI = EmitPutS(GV, B, TD, TLI);
+ return (CI->use_empty() || !NewCI) ?
+ NewCI :
+ ConstantInt::get(CI->getType(), FormatStr.size()+1);
+ }
+
+ // Optimize specific format strings.
+ // printf("%c", chr) --> putchar(chr)
+ if (FormatStr == "%c" && CI->getNumArgOperands() > 1 &&
+ CI->getArgOperand(1)->getType()->isIntegerTy()) {
+ Value *Res = EmitPutChar(CI->getArgOperand(1), B, TD, TLI);
+
+ if (CI->use_empty() || !Res) return Res;
+ return B.CreateIntCast(Res, CI->getType(), true);
+ }
+
+ // printf("%s\n", str) --> puts(str)
+ if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 &&
+ CI->getArgOperand(1)->getType()->isPointerTy()) {
+ return EmitPutS(CI->getArgOperand(1), B, TD, TLI);
+ }
+ return 0;
+ }
+
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Require one fixed pointer argument and an integer/void result.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
+ !(FT->getReturnType()->isIntegerTy() ||
+ FT->getReturnType()->isVoidTy()))
+ return 0;
+
+ if (Value *V = optimizeFixedFormatString(Callee, CI, B)) {
+ return V;
+ }
+
+ // printf(format, ...) -> iprintf(format, ...) if no floating point
+ // arguments.
+ if (TLI->has(LibFunc::iprintf) && !callHasFloatingPointArgument(CI)) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Constant *IPrintFFn =
+ M->getOrInsertFunction("iprintf", FT, Callee->getAttributes());
+ CallInst *New = cast<CallInst>(CI->clone());
+ New->setCalledFunction(IPrintFFn);
+ B.Insert(New);
+ return New;
+ }
+ return 0;
+ }
+};
+
+struct SPrintFOpt : public LibCallOptimization {
+ Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) {
+ // Check for a fixed format string.
+ StringRef FormatStr;
+ if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
+ return 0;
+
+ // If we just have a format string (nothing else crazy) transform it.
+ if (CI->getNumArgOperands() == 2) {
+ // Make sure there's no % in the constant array. We could try to handle
+ // %% -> % in the future if we cared.
+ for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
+ if (FormatStr[i] == '%')
+ return 0; // we found a format specifier, bail out.
+
+ // These optimizations require DataLayout.
+ if (!TD) return 0;
+
+ // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
+ B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ ConstantInt::get(TD->getIntPtrType(*Context), // Copy the
+ FormatStr.size() + 1), 1); // nul byte.
+ return ConstantInt::get(CI->getType(), FormatStr.size());
+ }
+
+ // The remaining optimizations require the format string to be "%s" or "%c"
+ // and have an extra operand.
+ if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
+ CI->getNumArgOperands() < 3)
+ return 0;
+
+ // Decode the second character of the format string.
+ if (FormatStr[1] == 'c') {
+ // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
+ if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
+ Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char");
+ Value *Ptr = CastToCStr(CI->getArgOperand(0), B);
+ B.CreateStore(V, Ptr);
+ Ptr = B.CreateGEP(Ptr, B.getInt32(1), "nul");
+ B.CreateStore(B.getInt8(0), Ptr);
+
+ return ConstantInt::get(CI->getType(), 1);
+ }
+
+ if (FormatStr[1] == 's') {
+ // These optimizations require DataLayout.
+ if (!TD) return 0;
+
+ // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1)
+ if (!CI->getArgOperand(2)->getType()->isPointerTy()) return 0;
+
+ Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD, TLI);
+ if (!Len)
+ return 0;
+ Value *IncLen = B.CreateAdd(Len,
+ ConstantInt::get(Len->getType(), 1),
+ "leninc");
+ B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1);
+
+ // The sprintf result is the unincremented number of bytes in the string.
+ return B.CreateIntCast(Len, CI->getType(), false);
+ }
+ return 0;
+ }
+
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Require two fixed pointer arguments and an integer result.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ !FT->getReturnType()->isIntegerTy())
+ return 0;
+
+ if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) {
+ return V;
+ }
+
+ // sprintf(str, format, ...) -> siprintf(str, format, ...) if no floating
+ // point arguments.
+ if (TLI->has(LibFunc::siprintf) && !callHasFloatingPointArgument(CI)) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Constant *SIPrintFFn =
+ M->getOrInsertFunction("siprintf", FT, Callee->getAttributes());
+ CallInst *New = cast<CallInst>(CI->clone());
+ New->setCalledFunction(SIPrintFFn);
+ B.Insert(New);
+ return New;
+ }
+ return 0;
+ }
+};
+
+struct FPrintFOpt : public LibCallOptimization {
+ Value *optimizeFixedFormatString(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) {
+ ErrorReportingOpt ER(/* StreamArg = */ 0);
+ (void) ER.callOptimizer(Callee, CI, B);
+
+ // All the optimizations depend on the format string.
+ StringRef FormatStr;
+ if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
+ return 0;
+
+ // Do not do any of the following transformations if the fprintf return
+ // value is used, in general the fprintf return value is not compatible
+ // with fwrite(), fputc() or fputs().
+ if (!CI->use_empty())
+ return 0;
+
+ // fprintf(F, "foo") --> fwrite("foo", 3, 1, F)
+ if (CI->getNumArgOperands() == 2) {
+ for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
+ if (FormatStr[i] == '%') // Could handle %% -> % if we cared.
+ return 0; // We found a format specifier.
+
+ // These optimizations require DataLayout.
+ if (!TD) return 0;
+
+ return EmitFWrite(CI->getArgOperand(1),
+ ConstantInt::get(TD->getIntPtrType(*Context),
+ FormatStr.size()),
+ CI->getArgOperand(0), B, TD, TLI);
+ }
+
+ // The remaining optimizations require the format string to be "%s" or "%c"
+ // and have an extra operand.
+ if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
+ CI->getNumArgOperands() < 3)
+ return 0;
+
+ // Decode the second character of the format string.
+ if (FormatStr[1] == 'c') {
+ // fprintf(F, "%c", chr) --> fputc(chr, F)
+ if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
+ return EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI);
+ }
+
+ if (FormatStr[1] == 's') {
+ // fprintf(F, "%s", str) --> fputs(str, F)
+ if (!CI->getArgOperand(2)->getType()->isPointerTy())
+ return 0;
+ return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI);
+ }
+ return 0;
+ }
+
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Require two fixed paramters as pointers and integer result.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ !FT->getReturnType()->isIntegerTy())
+ return 0;
+
+ if (Value *V = optimizeFixedFormatString(Callee, CI, B)) {
+ return V;
+ }
+
+ // fprintf(stream, format, ...) -> fiprintf(stream, format, ...) if no
+ // floating point arguments.
+ if (TLI->has(LibFunc::fiprintf) && !callHasFloatingPointArgument(CI)) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Constant *FIPrintFFn =
+ M->getOrInsertFunction("fiprintf", FT, Callee->getAttributes());
+ CallInst *New = cast<CallInst>(CI->clone());
+ New->setCalledFunction(FIPrintFFn);
+ B.Insert(New);
+ return New;
+ }
+ return 0;
+ }
+};
+
+struct FWriteOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ ErrorReportingOpt ER(/* StreamArg = */ 3);
+ (void) ER.callOptimizer(Callee, CI, B);
+
+ // Require a pointer, an integer, an integer, a pointer, returning integer.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 4 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isIntegerTy() ||
+ !FT->getParamType(2)->isIntegerTy() ||
+ !FT->getParamType(3)->isPointerTy() ||
+ !FT->getReturnType()->isIntegerTy())
+ return 0;
+
+ // Get the element size and count.
+ ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+ ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+ if (!SizeC || !CountC) return 0;
+ uint64_t Bytes = SizeC->getZExtValue()*CountC->getZExtValue();
+
+ // If this is writing zero records, remove the call (it's a noop).
+ if (Bytes == 0)
+ return ConstantInt::get(CI->getType(), 0);
+
+ // If this is writing one byte, turn it into fputc.
+ // This optimisation is only valid, if the return value is unused.
+ if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F)
+ Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char");
+ Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, TD, TLI);
+ return NewCI ? ConstantInt::get(CI->getType(), 1) : 0;
+ }
+
+ return 0;
+ }
+};
+
+struct FPutsOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ ErrorReportingOpt ER(/* StreamArg = */ 1);
+ (void) ER.callOptimizer(Callee, CI, B);
+
+ // These optimizations require DataLayout.
+ if (!TD) return 0;
+
+ // Require two pointers. Also, we can't optimize if return value is used.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ !CI->use_empty())
+ return 0;
+
+ // fputs(s,F) --> fwrite(s,1,strlen(s),F)
+ uint64_t Len = GetStringLength(CI->getArgOperand(0));
+ if (!Len) return 0;
+ // Known to have no uses (see above).
+ return EmitFWrite(CI->getArgOperand(0),
+ ConstantInt::get(TD->getIntPtrType(*Context), Len-1),
+ CI->getArgOperand(1), B, TD, TLI);
+ }
+};
+
+struct PutsOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Require one fixed pointer argument and an integer/void result.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
+ !(FT->getReturnType()->isIntegerTy() ||
+ FT->getReturnType()->isVoidTy()))
+ return 0;
+
+ // Check for a constant string.
+ StringRef Str;
+ if (!getConstantStringInfo(CI->getArgOperand(0), Str))
+ return 0;
+
+ if (Str.empty() && CI->use_empty()) {
+ // puts("") -> putchar('\n')
+ Value *Res = EmitPutChar(B.getInt32('\n'), B, TD, TLI);
+ if (CI->use_empty() || !Res) return Res;
+ return B.CreateIntCast(Res, CI->getType(), true);
+ }
+
+ return 0;
+ }
+};
+
+} // End anonymous namespace.
+
+namespace llvm {
+
+class LibCallSimplifierImpl {
+ const DataLayout *TD;
+ const TargetLibraryInfo *TLI;
+ const LibCallSimplifier *LCS;
+ bool UnsafeFPShrink;
+
+ // Math library call optimizations.
+ CosOpt Cos;
+ PowOpt Pow;
+ Exp2Opt Exp2;
+public:
+ LibCallSimplifierImpl(const DataLayout *TD, const TargetLibraryInfo *TLI,
+ const LibCallSimplifier *LCS,
+ bool UnsafeFPShrink = false)
+ : Cos(UnsafeFPShrink), Pow(UnsafeFPShrink), Exp2(UnsafeFPShrink) {
+ this->TD = TD;
+ this->TLI = TLI;
+ this->LCS = LCS;
+ this->UnsafeFPShrink = UnsafeFPShrink;
+ }
+
+ Value *optimizeCall(CallInst *CI);
+ LibCallOptimization *lookupOptimization(CallInst *CI);
+ bool hasFloatVersion(StringRef FuncName);
+};
+
+bool LibCallSimplifierImpl::hasFloatVersion(StringRef FuncName) {
+ LibFunc::Func Func;
+ SmallString<20> FloatFuncName = FuncName;
+ FloatFuncName += 'f';
+ if (TLI->getLibFunc(FloatFuncName, Func))
+ return TLI->has(Func);
+ return false;
+}
+
+// Fortified library call optimizations.
+static MemCpyChkOpt MemCpyChk;
+static MemMoveChkOpt MemMoveChk;
+static MemSetChkOpt MemSetChk;
+static StrCpyChkOpt StrCpyChk;
+static StpCpyChkOpt StpCpyChk;
+static StrNCpyChkOpt StrNCpyChk;
+
+// String library call optimizations.
+static StrCatOpt StrCat;
+static StrNCatOpt StrNCat;
+static StrChrOpt StrChr;
+static StrRChrOpt StrRChr;
+static StrCmpOpt StrCmp;
+static StrNCmpOpt StrNCmp;
+static StrCpyOpt StrCpy;
+static StpCpyOpt StpCpy;
+static StrNCpyOpt StrNCpy;
+static StrLenOpt StrLen;
+static StrPBrkOpt StrPBrk;
+static StrToOpt StrTo;
+static StrSpnOpt StrSpn;
+static StrCSpnOpt StrCSpn;
+static StrStrOpt StrStr;
+
+// Memory library call optimizations.
+static MemCmpOpt MemCmp;
+static MemCpyOpt MemCpy;
+static MemMoveOpt MemMove;
+static MemSetOpt MemSet;
+
+// Math library call optimizations.
+static UnaryDoubleFPOpt UnaryDoubleFP(false);
+static UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
+static SinCosPiOpt SinCosPi;
+
+ // Integer library call optimizations.
+static FFSOpt FFS;
+static AbsOpt Abs;
+static IsDigitOpt IsDigit;
+static IsAsciiOpt IsAscii;
+static ToAsciiOpt ToAscii;
+
+// Formatting and IO library call optimizations.
+static ErrorReportingOpt ErrorReporting;
+static ErrorReportingOpt ErrorReporting0(0);
+static ErrorReportingOpt ErrorReporting1(1);
+static PrintFOpt PrintF;
+static SPrintFOpt SPrintF;
+static FPrintFOpt FPrintF;
+static FWriteOpt FWrite;
+static FPutsOpt FPuts;
+static PutsOpt Puts;
+
+LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) {
+ LibFunc::Func Func;
+ Function *Callee = CI->getCalledFunction();
+ StringRef FuncName = Callee->getName();
+
+ // Next check for intrinsics.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::pow:
+ return &Pow;
+ case Intrinsic::exp2:
+ return &Exp2;
+ default:
+ return 0;
+ }
+ }
+
+ // Then check for known library functions.
+ if (TLI->getLibFunc(FuncName, Func) && TLI->has(Func)) {
+ switch (Func) {
+ case LibFunc::strcat:
+ return &StrCat;
+ case LibFunc::strncat:
+ return &StrNCat;
+ case LibFunc::strchr:
+ return &StrChr;
+ case LibFunc::strrchr:
+ return &StrRChr;
+ case LibFunc::strcmp:
+ return &StrCmp;
+ case LibFunc::strncmp:
+ return &StrNCmp;
+ case LibFunc::strcpy:
+ return &StrCpy;
+ case LibFunc::stpcpy:
+ return &StpCpy;
+ case LibFunc::strncpy:
+ return &StrNCpy;
+ case LibFunc::strlen:
+ return &StrLen;
+ case LibFunc::strpbrk:
+ return &StrPBrk;
+ case LibFunc::strtol:
+ case LibFunc::strtod:
+ case LibFunc::strtof:
+ case LibFunc::strtoul:
+ case LibFunc::strtoll:
+ case LibFunc::strtold:
+ case LibFunc::strtoull:
+ return &StrTo;
+ case LibFunc::strspn:
+ return &StrSpn;
+ case LibFunc::strcspn:
+ return &StrCSpn;
+ case LibFunc::strstr:
+ return &StrStr;
+ case LibFunc::memcmp:
+ return &MemCmp;
+ case LibFunc::memcpy:
+ return &MemCpy;
+ case LibFunc::memmove:
+ return &MemMove;
+ case LibFunc::memset:
+ return &MemSet;
+ case LibFunc::cosf:
+ case LibFunc::cos:
+ case LibFunc::cosl:
+ return &Cos;
+ case LibFunc::sinpif:
+ case LibFunc::sinpi:
+ case LibFunc::cospif:
+ case LibFunc::cospi:
+ return &SinCosPi;
+ case LibFunc::powf:
+ case LibFunc::pow:
+ case LibFunc::powl:
+ return &Pow;
+ case LibFunc::exp2l:
+ case LibFunc::exp2:
+ case LibFunc::exp2f:
+ return &Exp2;
+ case LibFunc::ffs:
+ case LibFunc::ffsl:
+ case LibFunc::ffsll:
+ return &FFS;
+ case LibFunc::abs:
+ case LibFunc::labs:
+ case LibFunc::llabs:
+ return &Abs;
+ case LibFunc::isdigit:
+ return &IsDigit;
+ case LibFunc::isascii:
+ return &IsAscii;
+ case LibFunc::toascii:
+ return &ToAscii;
+ case LibFunc::printf:
+ return &PrintF;
+ case LibFunc::sprintf:
+ return &SPrintF;
+ case LibFunc::fprintf:
+ return &FPrintF;
+ case LibFunc::fwrite:
+ return &FWrite;
+ case LibFunc::fputs:
+ return &FPuts;
+ case LibFunc::puts:
+ return &Puts;
+ case LibFunc::perror:
+ return &ErrorReporting;
+ case LibFunc::vfprintf:
+ case LibFunc::fiprintf:
+ return &ErrorReporting0;
+ case LibFunc::fputc:
+ return &ErrorReporting1;
+ case LibFunc::ceil:
+ case LibFunc::fabs:
+ case LibFunc::floor:
+ case LibFunc::rint:
+ case LibFunc::round:
+ case LibFunc::nearbyint:
+ case LibFunc::trunc:
+ if (hasFloatVersion(FuncName))
+ return &UnaryDoubleFP;
+ return 0;
+ case LibFunc::acos:
+ case LibFunc::acosh:
+ case LibFunc::asin:
+ case LibFunc::asinh:
+ case LibFunc::atan:
+ case LibFunc::atanh:
+ case LibFunc::cbrt:
+ case LibFunc::cosh:
+ case LibFunc::exp:
+ case LibFunc::exp10:
+ case LibFunc::expm1:
+ case LibFunc::log:
+ case LibFunc::log10:
+ case LibFunc::log1p:
+ case LibFunc::log2:
+ case LibFunc::logb:
+ case LibFunc::sin:
+ case LibFunc::sinh:
+ case LibFunc::sqrt:
+ case LibFunc::tan:
+ case LibFunc::tanh:
+ if (UnsafeFPShrink && hasFloatVersion(FuncName))
+ return &UnsafeUnaryDoubleFP;
+ return 0;
+ case LibFunc::memcpy_chk:
+ return &MemCpyChk;
+ default:
+ return 0;
+ }
+ }
+
+ // Finally check for fortified library calls.
+ if (FuncName.endswith("_chk")) {
+ if (FuncName == "__memmove_chk")
+ return &MemMoveChk;
+ else if (FuncName == "__memset_chk")
+ return &MemSetChk;
+ else if (FuncName == "__strcpy_chk")
+ return &StrCpyChk;
+ else if (FuncName == "__stpcpy_chk")
+ return &StpCpyChk;
+ else if (FuncName == "__strncpy_chk")
+ return &StrNCpyChk;
+ else if (FuncName == "__stpncpy_chk")
+ return &StrNCpyChk;
+ }
+
+ return 0;
+
+}
+
+Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) {
+ LibCallOptimization *LCO = lookupOptimization(CI);
+ if (LCO) {
+ IRBuilder<> Builder(CI);
+ return LCO->optimizeCall(CI, TD, TLI, LCS, Builder);
+ }
+ return 0;
+}
+
+LibCallSimplifier::LibCallSimplifier(const DataLayout *TD,
+ const TargetLibraryInfo *TLI,
+ bool UnsafeFPShrink) {
+ Impl = new LibCallSimplifierImpl(TD, TLI, this, UnsafeFPShrink);
+}
+
+LibCallSimplifier::~LibCallSimplifier() {
+ delete Impl;
+}
+
+Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
+ if (CI->isNoBuiltin()) return 0;
+ return Impl->optimizeCall(CI);
+}
+
+void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) const {
+ I->replaceAllUsesWith(With);
+ I->eraseFromParent();
+}
+
+}
+
+// TODO:
+// Additional cases that we need to add to this file:
+//
+// cbrt:
+// * cbrt(expN(X)) -> expN(x/3)
+// * cbrt(sqrt(x)) -> pow(x,1/6)
+// * cbrt(sqrt(x)) -> pow(x,1/9)
+//
+// exp, expf, expl:
+// * exp(log(x)) -> x
+//
+// log, logf, logl:
+// * log(exp(x)) -> x
+// * log(x**y) -> y*log(x)
+// * log(exp(y)) -> y*log(e)
+// * log(exp2(y)) -> y*log(2)
+// * log(exp10(y)) -> y*log(10)
+// * log(sqrt(x)) -> 0.5*log(x)
+// * log(pow(x,y)) -> y*log(x)
+//
+// lround, lroundf, lroundl:
+// * lround(cnst) -> cnst'
+//
+// pow, powf, powl:
+// * pow(exp(x),y) -> exp(x*y)
+// * pow(sqrt(x),y) -> pow(x,y*0.5)
+// * pow(pow(x,y),z)-> pow(x,y*z)
+//
+// round, roundf, roundl:
+// * round(cnst) -> cnst'
+//
+// signbit:
+// * signbit(cnst) -> cnst'
+// * signbit(nncst) -> 0 (if pstv is a non-negative constant)
+//
+// sqrt, sqrtf, sqrtl:
+// * sqrt(expN(x)) -> expN(x*0.5)
+// * sqrt(Nroot(x)) -> pow(x,1/(2*N))
+// * sqrt(pow(x,y)) -> pow(|x|,y*0.5)
+//
+// strchr:
+// * strchr(p, 0) -> strlen(p)
+// tan, tanf, tanl:
+// * tan(atan(x)) -> x
+//
+// trunc, truncf, truncl:
+// * trunc(cnst) -> cnst'
+//
+//
diff --git a/contrib/llvm/lib/Transforms/Utils/SpecialCaseList.cpp b/contrib/llvm/lib/Transforms/Utils/SpecialCaseList.cpp
new file mode 100644
index 000000000000..2ef692c564c6
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/SpecialCaseList.cpp
@@ -0,0 +1,222 @@
+//===-- SpecialCaseList.cpp - special case list for sanitizers ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a utility class for instrumentation passes (like AddressSanitizer
+// or ThreadSanitizer) to avoid instrumenting some functions or global
+// variables, or to instrument some functions or global variables in a specific
+// way, based on a user-supplied list.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/SpecialCaseList.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+#include <string>
+#include <utility>
+
+namespace llvm {
+
+/// Represents a set of regular expressions. Regular expressions which are
+/// "literal" (i.e. no regex metacharacters) are stored in Strings, while all
+/// others are represented as a single pipe-separated regex in RegEx. The
+/// reason for doing so is efficiency; StringSet is much faster at matching
+/// literal strings than Regex.
+struct SpecialCaseList::Entry {
+ StringSet<> Strings;
+ Regex *RegEx;
+
+ Entry() : RegEx(0) {}
+
+ bool match(StringRef Query) const {
+ return Strings.count(Query) || (RegEx && RegEx->match(Query));
+ }
+};
+
+SpecialCaseList::SpecialCaseList() : Entries() {}
+
+SpecialCaseList *SpecialCaseList::create(
+ const StringRef Path, std::string &Error) {
+ if (Path.empty())
+ return new SpecialCaseList();
+ OwningPtr<MemoryBuffer> File;
+ if (error_code EC = MemoryBuffer::getFile(Path, File)) {
+ Error = (Twine("Can't open file '") + Path + "': " + EC.message()).str();
+ return 0;
+ }
+ return create(File.get(), Error);
+}
+
+SpecialCaseList *SpecialCaseList::create(
+ const MemoryBuffer *MB, std::string &Error) {
+ OwningPtr<SpecialCaseList> SCL(new SpecialCaseList());
+ if (!SCL->parse(MB, Error))
+ return 0;
+ return SCL.take();
+}
+
+SpecialCaseList *SpecialCaseList::createOrDie(const StringRef Path) {
+ std::string Error;
+ if (SpecialCaseList *SCL = create(Path, Error))
+ return SCL;
+ report_fatal_error(Error);
+}
+
+bool SpecialCaseList::parse(const MemoryBuffer *MB, std::string &Error) {
+ // Iterate through each line in the blacklist file.
+ SmallVector<StringRef, 16> Lines;
+ SplitString(MB->getBuffer(), Lines, "\n\r");
+ StringMap<StringMap<std::string> > Regexps;
+ assert(Entries.empty() &&
+ "parse() should be called on an empty SpecialCaseList");
+ int LineNo = 1;
+ for (SmallVectorImpl<StringRef>::iterator I = Lines.begin(), E = Lines.end();
+ I != E; ++I, ++LineNo) {
+ // Ignore empty lines and lines starting with "#"
+ if (I->empty() || I->startswith("#"))
+ continue;
+ // Get our prefix and unparsed regexp.
+ std::pair<StringRef, StringRef> SplitLine = I->split(":");
+ StringRef Prefix = SplitLine.first;
+ if (SplitLine.second.empty()) {
+ // Missing ':' in the line.
+ Error = (Twine("Malformed line ") + Twine(LineNo) + ": '" +
+ SplitLine.first + "'").str();
+ return false;
+ }
+
+ std::pair<StringRef, StringRef> SplitRegexp = SplitLine.second.split("=");
+ std::string Regexp = SplitRegexp.first;
+ StringRef Category = SplitRegexp.second;
+
+ // Backwards compatibility.
+ if (Prefix == "global-init") {
+ Prefix = "global";
+ Category = "init";
+ } else if (Prefix == "global-init-type") {
+ Prefix = "type";
+ Category = "init";
+ } else if (Prefix == "global-init-src") {
+ Prefix = "src";
+ Category = "init";
+ }
+
+ // See if we can store Regexp in Strings.
+ if (Regex::isLiteralERE(Regexp)) {
+ Entries[Prefix][Category].Strings.insert(Regexp);
+ continue;
+ }
+
+ // Replace * with .*
+ for (size_t pos = 0; (pos = Regexp.find("*", pos)) != std::string::npos;
+ pos += strlen(".*")) {
+ Regexp.replace(pos, strlen("*"), ".*");
+ }
+
+ // Check that the regexp is valid.
+ Regex CheckRE(Regexp);
+ std::string REError;
+ if (!CheckRE.isValid(REError)) {
+ Error = (Twine("Malformed regex in line ") + Twine(LineNo) + ": '" +
+ SplitLine.second + "': " + REError).str();
+ return false;
+ }
+
+ // Add this regexp into the proper group by its prefix.
+ if (!Regexps[Prefix][Category].empty())
+ Regexps[Prefix][Category] += "|";
+ Regexps[Prefix][Category] += "^" + Regexp + "$";
+ }
+
+ // Iterate through each of the prefixes, and create Regexs for them.
+ for (StringMap<StringMap<std::string> >::const_iterator I = Regexps.begin(),
+ E = Regexps.end();
+ I != E; ++I) {
+ for (StringMap<std::string>::const_iterator II = I->second.begin(),
+ IE = I->second.end();
+ II != IE; ++II) {
+ Entries[I->getKey()][II->getKey()].RegEx = new Regex(II->getValue());
+ }
+ }
+ return true;
+}
+
+SpecialCaseList::~SpecialCaseList() {
+ for (StringMap<StringMap<Entry> >::iterator I = Entries.begin(),
+ E = Entries.end();
+ I != E; ++I) {
+ for (StringMap<Entry>::const_iterator II = I->second.begin(),
+ IE = I->second.end();
+ II != IE; ++II) {
+ delete II->second.RegEx;
+ }
+ }
+}
+
+bool SpecialCaseList::isIn(const Function& F, const StringRef Category) const {
+ return isIn(*F.getParent(), Category) ||
+ inSectionCategory("fun", F.getName(), Category);
+}
+
+static StringRef GetGlobalTypeString(const GlobalValue &G) {
+ // Types of GlobalVariables are always pointer types.
+ Type *GType = G.getType()->getElementType();
+ // For now we support blacklisting struct types only.
+ if (StructType *SGType = dyn_cast<StructType>(GType)) {
+ if (!SGType->isLiteral())
+ return SGType->getName();
+ }
+ return "<unknown type>";
+}
+
+bool SpecialCaseList::isIn(const GlobalVariable &G,
+ const StringRef Category) const {
+ return isIn(*G.getParent(), Category) ||
+ inSectionCategory("global", G.getName(), Category) ||
+ inSectionCategory("type", GetGlobalTypeString(G), Category);
+}
+
+bool SpecialCaseList::isIn(const GlobalAlias &GA,
+ const StringRef Category) const {
+ if (isIn(*GA.getParent(), Category))
+ return true;
+
+ if (isa<FunctionType>(GA.getType()->getElementType()))
+ return inSectionCategory("fun", GA.getName(), Category);
+
+ return inSectionCategory("global", GA.getName(), Category) ||
+ inSectionCategory("type", GetGlobalTypeString(GA), Category);
+}
+
+bool SpecialCaseList::isIn(const Module &M, const StringRef Category) const {
+ return inSectionCategory("src", M.getModuleIdentifier(), Category);
+}
+
+bool SpecialCaseList::inSectionCategory(const StringRef Section,
+ const StringRef Query,
+ const StringRef Category) const {
+ StringMap<StringMap<Entry> >::const_iterator I = Entries.find(Section);
+ if (I == Entries.end()) return false;
+ StringMap<Entry>::const_iterator II = I->second.find(Category);
+ if (II == I->second.end()) return false;
+
+ return II->getValue().match(Query);
+}
+
+} // namespace llvm
diff --git a/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
new file mode 100644
index 000000000000..560f58160753
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -0,0 +1,122 @@
+//===- UnifyFunctionExitNodes.cpp - Make all functions have a single exit -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is used to ensure that functions have at most one return
+// instruction in them. Additionally, it keeps track of which node is the new
+// exit node of the CFG. If there are no exit nodes in the CFG, the getExitNode
+// method will return a null pointer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Transforms/Scalar.h"
+using namespace llvm;
+
+char UnifyFunctionExitNodes::ID = 0;
+INITIALIZE_PASS(UnifyFunctionExitNodes, "mergereturn",
+ "Unify function exit nodes", false, false)
+
+Pass *llvm::createUnifyFunctionExitNodesPass() {
+ return new UnifyFunctionExitNodes();
+}
+
+void UnifyFunctionExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{
+ // We preserve the non-critical-edgeness property
+ AU.addPreservedID(BreakCriticalEdgesID);
+ // This is a cluster of orthogonal Transforms
+ AU.addPreserved("mem2reg");
+ AU.addPreservedID(LowerSwitchID);
+}
+
+// UnifyAllExitNodes - Unify all exit nodes of the CFG by creating a new
+// BasicBlock, and converting all returns to unconditional branches to this
+// new basic block. The singular exit node is returned.
+//
+// If there are no return stmts in the Function, a null pointer is returned.
+//
+bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
+ // Loop over all of the blocks in a function, tracking all of the blocks that
+ // return.
+ //
+ std::vector<BasicBlock*> ReturningBlocks;
+ std::vector<BasicBlock*> UnreachableBlocks;
+ for(Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+ if (isa<ReturnInst>(I->getTerminator()))
+ ReturningBlocks.push_back(I);
+ else if (isa<UnreachableInst>(I->getTerminator()))
+ UnreachableBlocks.push_back(I);
+
+ // Then unreachable blocks.
+ if (UnreachableBlocks.empty()) {
+ UnreachableBlock = 0;
+ } else if (UnreachableBlocks.size() == 1) {
+ UnreachableBlock = UnreachableBlocks.front();
+ } else {
+ UnreachableBlock = BasicBlock::Create(F.getContext(),
+ "UnifiedUnreachableBlock", &F);
+ new UnreachableInst(F.getContext(), UnreachableBlock);
+
+ for (std::vector<BasicBlock*>::iterator I = UnreachableBlocks.begin(),
+ E = UnreachableBlocks.end(); I != E; ++I) {
+ BasicBlock *BB = *I;
+ BB->getInstList().pop_back(); // Remove the unreachable inst.
+ BranchInst::Create(UnreachableBlock, BB);
+ }
+ }
+
+ // Now handle return blocks.
+ if (ReturningBlocks.empty()) {
+ ReturnBlock = 0;
+ return false; // No blocks return
+ } else if (ReturningBlocks.size() == 1) {
+ ReturnBlock = ReturningBlocks.front(); // Already has a single return block
+ return false;
+ }
+
+ // Otherwise, we need to insert a new basic block into the function, add a PHI
+ // nodes (if the function returns values), and convert all of the return
+ // instructions into unconditional branches.
+ //
+ BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(),
+ "UnifiedReturnBlock", &F);
+
+ PHINode *PN = 0;
+ if (F.getReturnType()->isVoidTy()) {
+ ReturnInst::Create(F.getContext(), NULL, NewRetBlock);
+ } else {
+ // If the function doesn't return void... add a PHI node to the block...
+ PN = PHINode::Create(F.getReturnType(), ReturningBlocks.size(),
+ "UnifiedRetVal");
+ NewRetBlock->getInstList().push_back(PN);
+ ReturnInst::Create(F.getContext(), PN, NewRetBlock);
+ }
+
+ // Loop over all of the blocks, replacing the return instruction with an
+ // unconditional branch.
+ //
+ for (std::vector<BasicBlock*>::iterator I = ReturningBlocks.begin(),
+ E = ReturningBlocks.end(); I != E; ++I) {
+ BasicBlock *BB = *I;
+
+ // Add an incoming element to the PHI node for every return instruction that
+ // is merging into this new block...
+ if (PN)
+ PN->addIncoming(BB->getTerminator()->getOperand(0), BB);
+
+ BB->getInstList().pop_back(); // Remove the return insn
+ BranchInst::Create(NewRetBlock, BB);
+ }
+ ReturnBlock = NewRetBlock;
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/Utils.cpp b/contrib/llvm/lib/Transforms/Utils/Utils.cpp
new file mode 100644
index 000000000000..c3df215c294a
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/Utils.cpp
@@ -0,0 +1,39 @@
+//===-- Utils.cpp - TransformUtils Infrastructure -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the common initialization infrastructure for the
+// TransformUtils library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm/PassRegistry.h"
+#include "llvm-c/Initialization.h"
+
+using namespace llvm;
+
+/// initializeTransformUtils - Initialize all passes in the TransformUtils
+/// library.
+void llvm::initializeTransformUtils(PassRegistry &Registry) {
+ initializeBreakCriticalEdgesPass(Registry);
+ initializeInstNamerPass(Registry);
+ initializeLCSSAPass(Registry);
+ initializeLoopSimplifyPass(Registry);
+ initializeLowerInvokePass(Registry);
+ initializeLowerSwitchPass(Registry);
+ initializePromotePassPass(Registry);
+ initializeUnifyFunctionExitNodesPass(Registry);
+ initializeInstSimplifierPass(Registry);
+ initializeMetaRenamerPass(Registry);
+}
+
+/// LLVMInitializeTransformUtils - C binding for initializeTransformUtilsPasses.
+void LLVMInitializeTransformUtils(LLVMPassRegistryRef R) {
+ initializeTransformUtils(*unwrap(R));
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
new file mode 100644
index 000000000000..457fc80e1ea5
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
@@ -0,0 +1,224 @@
+//===- ValueMapper.cpp - Interface shared by lib/Transforms/Utils ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MapValue function, which is shared by various parts of
+// the lib/Transforms/Utils library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Metadata.h"
+using namespace llvm;
+
+// Out of line method to get vtable etc for class.
+void ValueMapTypeRemapper::anchor() {}
+void ValueMaterializer::anchor() {}
+
+Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
+ ValueMapTypeRemapper *TypeMapper,
+ ValueMaterializer *Materializer) {
+ ValueToValueMapTy::iterator I = VM.find(V);
+
+ // If the value already exists in the map, use it.
+ if (I != VM.end() && I->second) return I->second;
+
+ // If we have a materializer and it can materialize a value, use that.
+ if (Materializer) {
+ if (Value *NewV = Materializer->materializeValueFor(const_cast<Value*>(V)))
+ return VM[V] = NewV;
+ }
+
+ // Global values do not need to be seeded into the VM if they
+ // are using the identity mapping.
+ if (isa<GlobalValue>(V) || isa<MDString>(V))
+ return VM[V] = const_cast<Value*>(V);
+
+ if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) {
+ // Inline asm may need *type* remapping.
+ FunctionType *NewTy = IA->getFunctionType();
+ if (TypeMapper) {
+ NewTy = cast<FunctionType>(TypeMapper->remapType(NewTy));
+
+ if (NewTy != IA->getFunctionType())
+ V = InlineAsm::get(NewTy, IA->getAsmString(), IA->getConstraintString(),
+ IA->hasSideEffects(), IA->isAlignStack());
+ }
+
+ return VM[V] = const_cast<Value*>(V);
+ }
+
+
+ if (const MDNode *MD = dyn_cast<MDNode>(V)) {
+ // If this is a module-level metadata and we know that nothing at the module
+ // level is changing, then use an identity mapping.
+ if (!MD->isFunctionLocal() && (Flags & RF_NoModuleLevelChanges))
+ return VM[V] = const_cast<Value*>(V);
+
+ // Create a dummy node in case we have a metadata cycle.
+ MDNode *Dummy = MDNode::getTemporary(V->getContext(), None);
+ VM[V] = Dummy;
+
+ // Check all operands to see if any need to be remapped.
+ for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) {
+ Value *OP = MD->getOperand(i);
+ if (OP == 0) continue;
+ Value *Mapped_OP = MapValue(OP, VM, Flags, TypeMapper, Materializer);
+ // Use identity map if Mapped_Op is null and we can ignore missing
+ // entries.
+ if (Mapped_OP == OP ||
+ (Mapped_OP == 0 && (Flags & RF_IgnoreMissingEntries)))
+ continue;
+
+ // Ok, at least one operand needs remapping.
+ SmallVector<Value*, 4> Elts;
+ Elts.reserve(MD->getNumOperands());
+ for (i = 0; i != e; ++i) {
+ Value *Op = MD->getOperand(i);
+ if (Op == 0)
+ Elts.push_back(0);
+ else {
+ Value *Mapped_Op = MapValue(Op, VM, Flags, TypeMapper, Materializer);
+ // Use identity map if Mapped_Op is null and we can ignore missing
+ // entries.
+ if (Mapped_Op == 0 && (Flags & RF_IgnoreMissingEntries))
+ Mapped_Op = Op;
+ Elts.push_back(Mapped_Op);
+ }
+ }
+ MDNode *NewMD = MDNode::get(V->getContext(), Elts);
+ Dummy->replaceAllUsesWith(NewMD);
+ VM[V] = NewMD;
+ MDNode::deleteTemporary(Dummy);
+ return NewMD;
+ }
+
+ VM[V] = const_cast<Value*>(V);
+ MDNode::deleteTemporary(Dummy);
+
+ // No operands needed remapping. Use an identity mapping.
+ return const_cast<Value*>(V);
+ }
+
+ // Okay, this either must be a constant (which may or may not be mappable) or
+ // is something that is not in the mapping table.
+ Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V));
+ if (C == 0)
+ return 0;
+
+ if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) {
+ Function *F =
+ cast<Function>(MapValue(BA->getFunction(), VM, Flags, TypeMapper, Materializer));
+ BasicBlock *BB = cast_or_null<BasicBlock>(MapValue(BA->getBasicBlock(), VM,
+ Flags, TypeMapper, Materializer));
+ return VM[V] = BlockAddress::get(F, BB ? BB : BA->getBasicBlock());
+ }
+
+ // Otherwise, we have some other constant to remap. Start by checking to see
+ // if all operands have an identity remapping.
+ unsigned OpNo = 0, NumOperands = C->getNumOperands();
+ Value *Mapped = 0;
+ for (; OpNo != NumOperands; ++OpNo) {
+ Value *Op = C->getOperand(OpNo);
+ Mapped = MapValue(Op, VM, Flags, TypeMapper, Materializer);
+ if (Mapped != C) break;
+ }
+
+ // See if the type mapper wants to remap the type as well.
+ Type *NewTy = C->getType();
+ if (TypeMapper)
+ NewTy = TypeMapper->remapType(NewTy);
+
+ // If the result type and all operands match up, then just insert an identity
+ // mapping.
+ if (OpNo == NumOperands && NewTy == C->getType())
+ return VM[V] = C;
+
+ // Okay, we need to create a new constant. We've already processed some or
+ // all of the operands, set them all up now.
+ SmallVector<Constant*, 8> Ops;
+ Ops.reserve(NumOperands);
+ for (unsigned j = 0; j != OpNo; ++j)
+ Ops.push_back(cast<Constant>(C->getOperand(j)));
+
+ // If one of the operands mismatch, push it and the other mapped operands.
+ if (OpNo != NumOperands) {
+ Ops.push_back(cast<Constant>(Mapped));
+
+ // Map the rest of the operands that aren't processed yet.
+ for (++OpNo; OpNo != NumOperands; ++OpNo)
+ Ops.push_back(MapValue(cast<Constant>(C->getOperand(OpNo)), VM,
+ Flags, TypeMapper, Materializer));
+ }
+
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+ return VM[V] = CE->getWithOperands(Ops, NewTy);
+ if (isa<ConstantArray>(C))
+ return VM[V] = ConstantArray::get(cast<ArrayType>(NewTy), Ops);
+ if (isa<ConstantStruct>(C))
+ return VM[V] = ConstantStruct::get(cast<StructType>(NewTy), Ops);
+ if (isa<ConstantVector>(C))
+ return VM[V] = ConstantVector::get(Ops);
+ // If this is a no-operand constant, it must be because the type was remapped.
+ if (isa<UndefValue>(C))
+ return VM[V] = UndefValue::get(NewTy);
+ if (isa<ConstantAggregateZero>(C))
+ return VM[V] = ConstantAggregateZero::get(NewTy);
+ assert(isa<ConstantPointerNull>(C));
+ return VM[V] = ConstantPointerNull::get(cast<PointerType>(NewTy));
+}
+
+/// RemapInstruction - Convert the instruction operands from referencing the
+/// current values into those specified by VMap.
+///
+void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
+ RemapFlags Flags, ValueMapTypeRemapper *TypeMapper,
+ ValueMaterializer *Materializer){
+ // Remap operands.
+ for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) {
+ Value *V = MapValue(*op, VMap, Flags, TypeMapper, Materializer);
+ // If we aren't ignoring missing entries, assert that something happened.
+ if (V != 0)
+ *op = V;
+ else
+ assert((Flags & RF_IgnoreMissingEntries) &&
+ "Referenced value not in value map!");
+ }
+
+ // Remap phi nodes' incoming blocks.
+ if (PHINode *PN = dyn_cast<PHINode>(I)) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *V = MapValue(PN->getIncomingBlock(i), VMap, Flags);
+ // If we aren't ignoring missing entries, assert that something happened.
+ if (V != 0)
+ PN->setIncomingBlock(i, cast<BasicBlock>(V));
+ else
+ assert((Flags & RF_IgnoreMissingEntries) &&
+ "Referenced block not in value map!");
+ }
+ }
+
+ // Remap attached metadata.
+ SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
+ I->getAllMetadata(MDs);
+ for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator
+ MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI) {
+ MDNode *Old = MI->second;
+ MDNode *New = MapValue(Old, VMap, Flags, TypeMapper, Materializer);
+ if (New != Old)
+ I->setMetadata(MI->first, New);
+ }
+
+ // If the instruction's type is being remapped, do so now.
+ if (TypeMapper)
+ I->mutateType(TypeMapper->remapType(I->getType()));
+}