aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2016-08-16 21:02:59 +0000
committerDimitry Andric <dim@FreeBSD.org>2016-08-16 21:02:59 +0000
commit3ca95b020283db6244cab92ede73c969253b6a31 (patch)
treed16e791e58694facd8f68d3e2797a1eaa8018afc /contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
parent27067774dce3388702a4cf744d7096c6fb71b688 (diff)
parentc3aee98e721333f265a88d6bf348e6e468f027d4 (diff)
downloadsrc-3ca95b020283db6244cab92ede73c969253b6a31.tar.gz
src-3ca95b020283db6244cab92ede73c969253b6a31.zip
Update llvm to release_39 branch r276489, and resolve conflicts.
Notes
Notes: svn path=/projects/clang390-import/; revision=304240
Diffstat (limited to 'contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp')
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp497
1 files changed, 380 insertions, 117 deletions
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 0d68f18ad0e5..861a50cf354d 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -16,8 +16,8 @@
// case, we need to generate code to execute these 'left over' iterations.
//
// The current strategy generates an if-then-else sequence prior to the
-// unrolled loop to execute the 'left over' iterations. Other strategies
-// include generate a loop before or after the unrolled loop.
+// unrolled loop to execute the 'left over' iterations before or after the
+// unrolled loop.
//
//===----------------------------------------------------------------------===//
@@ -60,91 +60,220 @@ STATISTIC(NumRuntimeUnrolled,
/// than the unroll factor.
///
static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
- BasicBlock *LastPrologBB, BasicBlock *PrologEnd,
- BasicBlock *OrigPH, BasicBlock *NewPH,
- ValueToValueMapTy &VMap, DominatorTree *DT,
- LoopInfo *LI, bool PreserveLCSSA) {
+ BasicBlock *PrologExit, BasicBlock *PreHeader,
+ BasicBlock *NewPreHeader, ValueToValueMapTy &VMap,
+ DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA) {
BasicBlock *Latch = L->getLoopLatch();
assert(Latch && "Loop must have a latch");
+ BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]);
// Create a PHI node for each outgoing value from the original loop
// (which means it is an outgoing value from the prolog code too).
// The new PHI node is inserted in the prolog end basic block.
- // The new PHI name is added as an operand of a PHI node in either
+ // The new PHI node value is added as an operand of a PHI node in either
// the loop header or the loop exit block.
- for (succ_iterator SBI = succ_begin(Latch), SBE = succ_end(Latch);
- SBI != SBE; ++SBI) {
- for (BasicBlock::iterator BBI = (*SBI)->begin();
- PHINode *PN = dyn_cast<PHINode>(BBI); ++BBI) {
-
+ for (BasicBlock *Succ : successors(Latch)) {
+ for (Instruction &BBI : *Succ) {
+ PHINode *PN = dyn_cast<PHINode>(&BBI);
+ // Exit when we passed all PHI nodes.
+ if (!PN)
+ break;
// Add a new PHI node to the prolog end block and add the
// appropriate incoming values.
- PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName()+".unr",
- PrologEnd->getTerminator());
+ PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName() + ".unr",
+ PrologExit->getFirstNonPHI());
// Adding a value to the new PHI node from the original loop preheader.
// This is the value that skips all the prolog code.
if (L->contains(PN)) {
- NewPN->addIncoming(PN->getIncomingValueForBlock(NewPH), OrigPH);
+ NewPN->addIncoming(PN->getIncomingValueForBlock(NewPreHeader),
+ PreHeader);
} else {
- NewPN->addIncoming(UndefValue::get(PN->getType()), OrigPH);
+ NewPN->addIncoming(UndefValue::get(PN->getType()), PreHeader);
}
Value *V = PN->getIncomingValueForBlock(Latch);
if (Instruction *I = dyn_cast<Instruction>(V)) {
if (L->contains(I)) {
- V = VMap[I];
+ V = VMap.lookup(I);
}
}
// Adding a value to the new PHI node from the last prolog block
// that was created.
- NewPN->addIncoming(V, LastPrologBB);
+ NewPN->addIncoming(V, PrologLatch);
// Update the existing PHI node operand with the value from the
// new PHI node. How this is done depends on if the existing
// PHI node is in the original loop block, or the exit block.
if (L->contains(PN)) {
- PN->setIncomingValue(PN->getBasicBlockIndex(NewPH), NewPN);
+ PN->setIncomingValue(PN->getBasicBlockIndex(NewPreHeader), NewPN);
} else {
- PN->addIncoming(NewPN, PrologEnd);
+ PN->addIncoming(NewPN, PrologExit);
}
}
}
- // Create a branch around the orignal loop, which is taken if there are no
+ // Create a branch around the original loop, which is taken if there are no
// iterations remaining to be executed after running the prologue.
- Instruction *InsertPt = PrologEnd->getTerminator();
+ Instruction *InsertPt = PrologExit->getTerminator();
IRBuilder<> B(InsertPt);
assert(Count != 0 && "nonsensical Count!");
- // If BECount <u (Count - 1) then (BECount + 1) & (Count - 1) == (BECount + 1)
- // (since Count is a power of 2). This means %xtraiter is (BECount + 1) and
- // and all of the iterations of this loop were executed by the prologue. Note
- // that if BECount <u (Count - 1) then (BECount + 1) cannot unsigned-overflow.
+ // If BECount <u (Count - 1) then (BECount + 1) % Count == (BECount + 1)
+ // This means %xtraiter is (BECount + 1) and all of the iterations of this
+ // loop were executed by the prologue. Note that if BECount <u (Count - 1)
+ // then (BECount + 1) cannot unsigned-overflow.
Value *BrLoopExit =
B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1));
BasicBlock *Exit = L->getUniqueExitBlock();
assert(Exit && "Loop must have a single exit block only");
// Split the exit to maintain loop canonicalization guarantees
- SmallVector<BasicBlock*, 4> Preds(pred_begin(Exit), pred_end(Exit));
+ SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", DT, LI,
PreserveLCSSA);
// Add the branch to the exit block (around the unrolled loop)
- B.CreateCondBr(BrLoopExit, Exit, NewPH);
+ B.CreateCondBr(BrLoopExit, Exit, NewPreHeader);
+ InsertPt->eraseFromParent();
+}
+
+/// Connect the unrolling epilog code to the original loop.
+/// The unrolling epilog code contains code to execute the
+/// 'extra' iterations if the run-time trip count modulo the
+/// unroll count is non-zero.
+///
+/// This function performs the following:
+/// - Update PHI nodes at the unrolling loop exit and epilog loop exit
+/// - Create PHI nodes at the unrolling loop exit to combine
+/// values that exit the unrolling loop code and jump around it.
+/// - Update PHI operands in the epilog loop by the new PHI nodes
+/// - Branch around the epilog loop if extra iters (ModVal) is zero.
+///
+static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
+ BasicBlock *Exit, BasicBlock *PreHeader,
+ BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader,
+ ValueToValueMapTy &VMap, DominatorTree *DT,
+ LoopInfo *LI, bool PreserveLCSSA) {
+ BasicBlock *Latch = L->getLoopLatch();
+ assert(Latch && "Loop must have a latch");
+ BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]);
+
+ // Loop structure should be the following:
+ //
+ // PreHeader
+ // NewPreHeader
+ // Header
+ // ...
+ // Latch
+ // NewExit (PN)
+ // EpilogPreHeader
+ // EpilogHeader
+ // ...
+ // EpilogLatch
+ // Exit (EpilogPN)
+
+ // Update PHI nodes at NewExit and Exit.
+ for (Instruction &BBI : *NewExit) {
+ PHINode *PN = dyn_cast<PHINode>(&BBI);
+ // Exit when we passed all PHI nodes.
+ if (!PN)
+ break;
+ // PN should be used in another PHI located in Exit block as
+ // Exit was split by SplitBlockPredecessors into Exit and NewExit
+ // Basicaly it should look like:
+ // NewExit:
+ // PN = PHI [I, Latch]
+ // ...
+ // Exit:
+ // EpilogPN = PHI [PN, EpilogPreHeader]
+ //
+ // There is EpilogPreHeader incoming block instead of NewExit as
+ // NewExit was spilt 1 more time to get EpilogPreHeader.
+ assert(PN->hasOneUse() && "The phi should have 1 use");
+ PHINode *EpilogPN = cast<PHINode> (PN->use_begin()->getUser());
+ assert(EpilogPN->getParent() == Exit && "EpilogPN should be in Exit block");
+
+ // Add incoming PreHeader from branch around the Loop
+ PN->addIncoming(UndefValue::get(PN->getType()), PreHeader);
+
+ Value *V = PN->getIncomingValueForBlock(Latch);
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (I && L->contains(I))
+ // If value comes from an instruction in the loop add VMap value.
+ V = VMap.lookup(I);
+ // For the instruction out of the loop, constant or undefined value
+ // insert value itself.
+ EpilogPN->addIncoming(V, EpilogLatch);
+
+ assert(EpilogPN->getBasicBlockIndex(EpilogPreHeader) >= 0 &&
+ "EpilogPN should have EpilogPreHeader incoming block");
+ // Change EpilogPreHeader incoming block to NewExit.
+ EpilogPN->setIncomingBlock(EpilogPN->getBasicBlockIndex(EpilogPreHeader),
+ NewExit);
+ // Now PHIs should look like:
+ // NewExit:
+ // PN = PHI [I, Latch], [undef, PreHeader]
+ // ...
+ // Exit:
+ // EpilogPN = PHI [PN, NewExit], [VMap[I], EpilogLatch]
+ }
+
+ // Create PHI nodes at NewExit (from the unrolling loop Latch and PreHeader).
+ // Update corresponding PHI nodes in epilog loop.
+ for (BasicBlock *Succ : successors(Latch)) {
+ // Skip this as we already updated phis in exit blocks.
+ if (!L->contains(Succ))
+ continue;
+ for (Instruction &BBI : *Succ) {
+ PHINode *PN = dyn_cast<PHINode>(&BBI);
+ // Exit when we passed all PHI nodes.
+ if (!PN)
+ break;
+ // Add new PHI nodes to the loop exit block and update epilog
+ // PHIs with the new PHI values.
+ PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName() + ".unr",
+ NewExit->getFirstNonPHI());
+ // Adding a value to the new PHI node from the unrolling loop preheader.
+ NewPN->addIncoming(PN->getIncomingValueForBlock(NewPreHeader), PreHeader);
+ // Adding a value to the new PHI node from the unrolling loop latch.
+ NewPN->addIncoming(PN->getIncomingValueForBlock(Latch), Latch);
+
+ // Update the existing PHI node operand with the value from the new PHI
+ // node. Corresponding instruction in epilog loop should be PHI.
+ PHINode *VPN = cast<PHINode>(VMap[&BBI]);
+ VPN->setIncomingValue(VPN->getBasicBlockIndex(EpilogPreHeader), NewPN);
+ }
+ }
+
+ Instruction *InsertPt = NewExit->getTerminator();
+ IRBuilder<> B(InsertPt);
+ Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod");
+ assert(Exit && "Loop must have a single exit block only");
+ // Split the exit to maintain loop canonicalization guarantees
+ SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
+ SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI,
+ PreserveLCSSA);
+ // Add the branch to the exit block (around the unrolling loop)
+ B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit);
InsertPt->eraseFromParent();
}
/// Create a clone of the blocks in a loop and connect them together.
-/// If UnrollProlog is true, loop structure will not be cloned, otherwise a new
-/// loop will be created including all cloned blocks, and the iterator of it
-/// switches to count NewIter down to 0.
+/// If CreateRemainderLoop is false, loop structure will not be cloned,
+/// otherwise a new loop will be created including all cloned blocks, and the
+/// iterator of it switches to count NewIter down to 0.
+/// The cloned blocks should be inserted between InsertTop and InsertBot.
+/// If loop structure is cloned InsertTop should be new preheader, InsertBot
+/// new loop exit.
///
-static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
+static void CloneLoopBlocks(Loop *L, Value *NewIter,
+ const bool CreateRemainderLoop,
+ const bool UseEpilogRemainder,
BasicBlock *InsertTop, BasicBlock *InsertBot,
+ BasicBlock *Preheader,
std::vector<BasicBlock *> &NewBlocks,
LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap,
LoopInfo *LI) {
- BasicBlock *Preheader = L->getLoopPreheader();
+ StringRef suffix = UseEpilogRemainder ? "epil" : "prol";
BasicBlock *Header = L->getHeader();
BasicBlock *Latch = L->getLoopLatch();
Function *F = Header->getParent();
@@ -152,7 +281,7 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
Loop *NewLoop = nullptr;
Loop *ParentLoop = L->getParentLoop();
- if (!UnrollProlog) {
+ if (CreateRemainderLoop) {
NewLoop = new Loop();
if (ParentLoop)
ParentLoop->addChildLoop(NewLoop);
@@ -163,7 +292,7 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
// For each block in the original loop, create a new copy,
// and update the value map with the newly created values.
for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
- BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".prol", F);
+ BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F);
NewBlocks.push_back(NewBB);
if (NewLoop)
@@ -176,19 +305,20 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
// For the first block, add a CFG connection to this newly
// created block.
InsertTop->getTerminator()->setSuccessor(0, NewBB);
-
}
+
if (Latch == *BB) {
- // For the last block, if UnrollProlog is true, create a direct jump to
- // InsertBot. If not, create a loop back to cloned head.
+ // For the last block, if CreateRemainderLoop is false, create a direct
+ // jump to InsertBot. If not, create a loop back to cloned head.
VMap.erase((*BB)->getTerminator());
BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]);
BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator());
IRBuilder<> Builder(LatchBR);
- if (UnrollProlog) {
+ if (!CreateRemainderLoop) {
Builder.CreateBr(InsertBot);
} else {
- PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2, "prol.iter",
+ PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2,
+ suffix + ".iter",
FirstLoopBB->getFirstNonPHI());
Value *IdxSub =
Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
@@ -207,9 +337,15 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
// cloned loop.
for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
- if (UnrollProlog) {
- VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader);
- cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
+ if (!CreateRemainderLoop) {
+ if (UseEpilogRemainder) {
+ unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
+ NewPHI->setIncomingBlock(idx, InsertTop);
+ NewPHI->removeIncomingValue(Latch, false);
+ } else {
+ VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader);
+ cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
+ }
} else {
unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
NewPHI->setIncomingBlock(idx, InsertTop);
@@ -217,8 +353,8 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
idx = NewPHI->getBasicBlockIndex(Latch);
Value *InVal = NewPHI->getIncomingValue(idx);
NewPHI->setIncomingBlock(idx, NewLatch);
- if (VMap[InVal])
- NewPHI->setIncomingValue(idx, VMap[InVal]);
+ if (Value *V = VMap.lookup(InVal))
+ NewPHI->setIncomingValue(idx, V);
}
}
if (NewLoop) {
@@ -254,11 +390,11 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
}
}
-/// Insert code in the prolog code when unrolling a loop with a
+/// Insert code in the prolog/epilog code when unrolling a loop with a
/// run-time trip-count.
///
/// This method assumes that the loop unroll factor is total number
-/// of loop bodes in the loop after unrolling. (Some folks refer
+/// of loop bodies in the loop after unrolling. (Some folks refer
/// to the unroll factor as the number of *extra* copies added).
/// We assume also that the loop unroll factor is a power-of-two. So, after
/// unrolling the loop, the number of loop bodies executed is 2,
@@ -266,37 +402,56 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
/// instruction in SimplifyCFG.cpp. Then, the backend decides how code for
/// the switch instruction is generated.
///
+/// ***Prolog case***
/// extraiters = tripcount % loopfactor
/// if (extraiters == 0) jump Loop:
-/// else jump Prol
+/// else jump Prol:
/// Prol: LoopBody;
/// extraiters -= 1 // Omitted if unroll factor is 2.
/// if (extraiters != 0) jump Prol: // Omitted if unroll factor is 2.
-/// if (tripcount < loopfactor) jump End
+/// if (tripcount < loopfactor) jump End:
/// Loop:
/// ...
/// End:
///
-bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
- bool AllowExpensiveTripCount, LoopInfo *LI,
- ScalarEvolution *SE, DominatorTree *DT,
- bool PreserveLCSSA) {
+/// ***Epilog case***
+/// extraiters = tripcount % loopfactor
+/// if (tripcount < loopfactor) jump LoopExit:
+/// unroll_iters = tripcount - extraiters
+/// Loop: LoopBody; (executes unroll_iter times);
+/// unroll_iter -= 1
+/// if (unroll_iter != 0) jump Loop:
+/// LoopExit:
+/// if (extraiters == 0) jump EpilExit:
+/// Epil: LoopBody; (executes extraiters times)
+/// extraiters -= 1 // Omitted if unroll factor is 2.
+/// if (extraiters != 0) jump Epil: // Omitted if unroll factor is 2.
+/// EpilExit:
+
+bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
+ bool AllowExpensiveTripCount,
+ bool UseEpilogRemainder,
+ LoopInfo *LI, ScalarEvolution *SE,
+ DominatorTree *DT, bool PreserveLCSSA) {
// for now, only unroll loops that contain a single exit
if (!L->getExitingBlock())
return false;
// Make sure the loop is in canonical form, and there is a single
// exit block only.
- if (!L->isLoopSimplifyForm() || !L->getUniqueExitBlock())
+ if (!L->isLoopSimplifyForm())
+ return false;
+ BasicBlock *Exit = L->getUniqueExitBlock(); // successor out of loop
+ if (!Exit)
return false;
- // Use Scalar Evolution to compute the trip count. This allows more
- // loops to be unrolled than relying on induction var simplification
+ // Use Scalar Evolution to compute the trip count. This allows more loops to
+ // be unrolled than relying on induction var simplification.
if (!SE)
return false;
- // Only unroll loops with a computable trip count and the trip count needs
- // to be an int value (allowing a pointer type is a TODO item)
+ // Only unroll loops with a computable trip count, and the trip count needs
+ // to be an int value (allowing a pointer type is a TODO item).
const SCEV *BECountSC = SE->getBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(BECountSC) ||
!BECountSC->getType()->isIntegerTy())
@@ -304,21 +459,19 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth();
- // Add 1 since the backedge count doesn't include the first loop iteration
+ // Add 1 since the backedge count doesn't include the first loop iteration.
const SCEV *TripCountSC =
SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1));
if (isa<SCEVCouldNotCompute>(TripCountSC))
return false;
BasicBlock *Header = L->getHeader();
+ BasicBlock *PreHeader = L->getLoopPreheader();
+ BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
const DataLayout &DL = Header->getModule()->getDataLayout();
SCEVExpander Expander(*SE, DL, "loop-unroll");
- if (!AllowExpensiveTripCount && Expander.isHighCostExpansion(TripCountSC, L))
- return false;
-
- // We only handle cases when the unroll factor is a power of 2.
- // Count is the loop unroll factor, the number of extra copies added + 1.
- if (!isPowerOf2_32(Count))
+ if (!AllowExpensiveTripCount &&
+ Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR))
return false;
// This constraint lets us deal with an overflowing trip count easily; see the
@@ -326,51 +479,115 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
if (Log2_32(Count) > BEWidth)
return false;
- // If this loop is nested, then the loop unroller changes the code in
- // parent loop, so the Scalar Evolution pass needs to be run again
+ // If this loop is nested, then the loop unroller changes the code in the
+ // parent loop, so the Scalar Evolution pass needs to be run again.
if (Loop *ParentLoop = L->getParentLoop())
SE->forgetLoop(ParentLoop);
- BasicBlock *PH = L->getLoopPreheader();
BasicBlock *Latch = L->getLoopLatch();
- // It helps to splits the original preheader twice, one for the end of the
- // prolog code and one for a new loop preheader
- BasicBlock *PEnd = SplitEdge(PH, Header, DT, LI);
- BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), DT, LI);
- BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator());
+ // Loop structure is the following:
+ //
+ // PreHeader
+ // Header
+ // ...
+ // Latch
+ // Exit
+
+ BasicBlock *NewPreHeader;
+ BasicBlock *NewExit = nullptr;
+ BasicBlock *PrologExit = nullptr;
+ BasicBlock *EpilogPreHeader = nullptr;
+ BasicBlock *PrologPreHeader = nullptr;
+
+ if (UseEpilogRemainder) {
+ // If epilog remainder
+ // Split PreHeader to insert a branch around loop for unrolling.
+ NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI);
+ NewPreHeader->setName(PreHeader->getName() + ".new");
+ // Split Exit to create phi nodes from branch above.
+ SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
+ NewExit = SplitBlockPredecessors(Exit, Preds, ".unr-lcssa",
+ DT, LI, PreserveLCSSA);
+ // Split NewExit to insert epilog remainder loop.
+ EpilogPreHeader = SplitBlock(NewExit, NewExit->getTerminator(), DT, LI);
+ EpilogPreHeader->setName(Header->getName() + ".epil.preheader");
+ } else {
+ // If prolog remainder
+ // Split the original preheader twice to insert prolog remainder loop
+ PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI);
+ PrologPreHeader->setName(Header->getName() + ".prol.preheader");
+ PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(),
+ DT, LI);
+ PrologExit->setName(Header->getName() + ".prol.loopexit");
+ // Split PrologExit to get NewPreHeader.
+ NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI);
+ NewPreHeader->setName(PreHeader->getName() + ".new");
+ }
+ // Loop structure should be the following:
+ // Epilog Prolog
+ //
+ // PreHeader PreHeader
+ // *NewPreHeader *PrologPreHeader
+ // Header *PrologExit
+ // ... *NewPreHeader
+ // Latch Header
+ // *NewExit ...
+ // *EpilogPreHeader Latch
+ // Exit Exit
+
+ // Calculate conditions for branch around loop for unrolling
+ // in epilog case and around prolog remainder loop in prolog case.
// Compute the number of extra iterations required, which is:
- // extra iterations = run-time trip count % (loop unroll factor + 1)
+ // extra iterations = run-time trip count % loop unroll factor
+ PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
PreHeaderBR);
Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(),
PreHeaderBR);
-
IRBuilder<> B(PreHeaderBR);
- Value *ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter");
-
- // If ModVal is zero, we know that either
- // 1. there are no iteration to be run in the prologue loop
- // OR
- // 2. the addition computing TripCount overflowed
- //
- // If (2) is true, we know that TripCount really is (1 << BEWidth) and so the
- // number of iterations that remain to be run in the original loop is a
- // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we
- // explicitly check this above).
-
- Value *BranchVal = B.CreateIsNotNull(ModVal, "lcmp.mod");
-
- // Branch to either the extra iterations or the cloned/unrolled loop
- // We will fix up the true branch label when adding loop body copies
- B.CreateCondBr(BranchVal, PEnd, PEnd);
- assert(PreHeaderBR->isUnconditional() &&
- PreHeaderBR->getSuccessor(0) == PEnd &&
- "CFG edges in Preheader are not correct");
+ Value *ModVal;
+ // Calculate ModVal = (BECount + 1) % Count.
+ // Note that TripCount is BECount + 1.
+ if (isPowerOf2_32(Count)) {
+ // When Count is power of 2 we don't BECount for epilog case, however we'll
+ // need it for a branch around unrolling loop for prolog case.
+ ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter");
+ // 1. There are no iterations to be run in the prolog/epilog loop.
+ // OR
+ // 2. The addition computing TripCount overflowed.
+ //
+ // If (2) is true, we know that TripCount really is (1 << BEWidth) and so
+ // the number of iterations that remain to be run in the original loop is a
+ // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we
+ // explicitly check this above).
+ } else {
+ // As (BECount + 1) can potentially unsigned overflow we count
+ // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count.
+ Value *ModValTmp = B.CreateURem(BECount,
+ ConstantInt::get(BECount->getType(),
+ Count));
+ Value *ModValAdd = B.CreateAdd(ModValTmp,
+ ConstantInt::get(ModValTmp->getType(), 1));
+ // At that point (BECount % Count) + 1 could be equal to Count.
+ // To handle this case we need to take mod by Count one more time.
+ ModVal = B.CreateURem(ModValAdd,
+ ConstantInt::get(BECount->getType(), Count),
+ "xtraiter");
+ }
+ Value *BranchVal =
+ UseEpilogRemainder ? B.CreateICmpULT(BECount,
+ ConstantInt::get(BECount->getType(),
+ Count - 1)) :
+ B.CreateIsNotNull(ModVal, "lcmp.mod");
+ BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader;
+ BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit;
+ // Branch to either remainder (extra iterations) loop or unrolling loop.
+ B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop);
PreHeaderBR->eraseFromParent();
Function *F = Header->getParent();
// Get an ordered list of blocks in the loop to help with the ordering of the
- // cloned blocks in the prolog code
+ // cloned blocks in the prolog/epilog code
LoopBlocksDFS LoopBlocks(L);
LoopBlocks.perform(LI);
@@ -382,34 +599,80 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
std::vector<BasicBlock *> NewBlocks;
ValueToValueMapTy VMap;
- bool UnrollPrologue = Count == 2;
+ // For unroll factor 2 remainder loop will have 1 iterations.
+ // Do not create 1 iteration loop.
+ bool CreateRemainderLoop = (Count != 2);
// Clone all the basic blocks in the loop. If Count is 2, we don't clone
// the loop, otherwise we create a cloned loop to execute the extra
// iterations. This function adds the appropriate CFG connections.
- CloneLoopBlocks(L, ModVal, UnrollPrologue, PH, PEnd, NewBlocks, LoopBlocks,
- VMap, LI);
-
- // Insert the cloned blocks into function just before the original loop
- F->getBasicBlockList().splice(PEnd->getIterator(), F->getBasicBlockList(),
- NewBlocks[0]->getIterator(), F->end());
-
- // Rewrite the cloned instruction operands to use the values
- // created when the clone is created.
- for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) {
- for (BasicBlock::iterator I = NewBlocks[i]->begin(),
- E = NewBlocks[i]->end();
- I != E; ++I) {
- RemapInstruction(&*I, VMap,
- RF_NoModuleLevelChanges | RF_IgnoreMissingEntries);
+ BasicBlock *InsertBot = UseEpilogRemainder ? Exit : PrologExit;
+ BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
+ CloneLoopBlocks(L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop,
+ InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, LI);
+
+ // Insert the cloned blocks into the function.
+ F->getBasicBlockList().splice(InsertBot->getIterator(),
+ F->getBasicBlockList(),
+ NewBlocks[0]->getIterator(),
+ F->end());
+
+ // Loop structure should be the following:
+ // Epilog Prolog
+ //
+ // PreHeader PreHeader
+ // NewPreHeader PrologPreHeader
+ // Header PrologHeader
+ // ... ...
+ // Latch PrologLatch
+ // NewExit PrologExit
+ // EpilogPreHeader NewPreHeader
+ // EpilogHeader Header
+ // ... ...
+ // EpilogLatch Latch
+ // Exit Exit
+
+ // Rewrite the cloned instruction operands to use the values created when the
+ // clone is created.
+ for (BasicBlock *BB : NewBlocks) {
+ for (Instruction &I : *BB) {
+ RemapInstruction(&I, VMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
}
}
- // Connect the prolog code to the original loop and update the
- // PHI functions.
- BasicBlock *LastLoopBB = cast<BasicBlock>(VMap[Latch]);
- ConnectProlog(L, BECount, Count, LastLoopBB, PEnd, PH, NewPH, VMap, DT, LI,
- PreserveLCSSA);
+ if (UseEpilogRemainder) {
+ // Connect the epilog code to the original loop and update the
+ // PHI functions.
+ ConnectEpilog(L, ModVal, NewExit, Exit, PreHeader,
+ EpilogPreHeader, NewPreHeader, VMap, DT, LI,
+ PreserveLCSSA);
+
+ // Update counter in loop for unrolling.
+ // I should be multiply of Count.
+ IRBuilder<> B2(NewPreHeader->getTerminator());
+ Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter");
+ BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
+ B2.SetInsertPoint(LatchBR);
+ PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter",
+ Header->getFirstNonPHI());
+ Value *IdxSub =
+ B2.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
+ NewIdx->getName() + ".nsub");
+ Value *IdxCmp;
+ if (LatchBR->getSuccessor(0) == Header)
+ IdxCmp = B2.CreateIsNotNull(IdxSub, NewIdx->getName() + ".ncmp");
+ else
+ IdxCmp = B2.CreateIsNull(IdxSub, NewIdx->getName() + ".ncmp");
+ NewIdx->addIncoming(TestVal, NewPreHeader);
+ NewIdx->addIncoming(IdxSub, Latch);
+ LatchBR->setCondition(IdxCmp);
+ } else {
+ // Connect the prolog code to the original loop and update the
+ // PHI functions.
+ ConnectProlog(L, BECount, Count, PrologExit, PreHeader, NewPreHeader,
+ VMap, DT, LI, PreserveLCSSA);
+ }
NumRuntimeUnrolled++;
return true;
}