src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2016-08-16 21:02:59 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2016-08-16 21:02:59 +0000
commit	3ca95b020283db6244cab92ede73c969253b6a31 (patch)
tree	d16e791e58694facd8f68d3e2797a1eaa8018afc /contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
parent	27067774dce3388702a4cf744d7096c6fb71b688 (diff)
parent	c3aee98e721333f265a88d6bf348e6e468f027d4 (diff)
download	src-3ca95b020283db6244cab92ede73c969253b6a31.tar.gz src-3ca95b020283db6244cab92ede73c969253b6a31.zip

Update llvm to release_39 branch r276489, and resolve conflicts.

Notes

Notes: svn path=/projects/clang390-import/; revision=304240

Diffstat (limited to 'contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp')

-rw-r--r--

contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp

497

1 files changed, 380 insertions, 117 deletions

diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 0d68f18ad0e5..861a50cf354d 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp

@@ -16,8 +16,8 @@

// case, we need to generate code to execute these 'left over' iterations.

// The current strategy generates an if-then-else sequence prior to the

-// unrolled loop to execute the 'left over' iterations. Other strategies

-// include generate a loop before or after the unrolled loop.

+// unrolled loop to execute the 'left over' iterations before or after the

+// unrolled loop.

//===----------------------------------------------------------------------===//

@@ -60,91 +60,220 @@ STATISTIC(NumRuntimeUnrolled,

/// than the unroll factor.

///

static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,

- BasicBlock *LastPrologBB, BasicBlock *PrologEnd,

- BasicBlock *OrigPH, BasicBlock *NewPH,

- ValueToValueMapTy &VMap, DominatorTree *DT,

- LoopInfo *LI, bool PreserveLCSSA) {

+ BasicBlock *PrologExit, BasicBlock *PreHeader,

+ BasicBlock *NewPreHeader, ValueToValueMapTy &VMap,

+ DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA) {

BasicBlock *Latch = L->getLoopLatch();

assert(Latch && "Loop must have a latch");

+ BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]);

// Create a PHI node for each outgoing value from the original loop

// (which means it is an outgoing value from the prolog code too).

// The new PHI node is inserted in the prolog end basic block.

- // The new PHI name is added as an operand of a PHI node in either

+ // The new PHI node value is added as an operand of a PHI node in either

// the loop header or the loop exit block.

- for (succ_iterator SBI = succ_begin(Latch), SBE = succ_end(Latch);

- SBI != SBE; ++SBI) {

- for (BasicBlock::iterator BBI = (*SBI)->begin();

- PHINode *PN = dyn_cast<PHINode>(BBI); ++BBI) {

+ for (BasicBlock *Succ : successors(Latch)) {

+ for (Instruction &BBI : *Succ) {

+ PHINode *PN = dyn_cast<PHINode>(&BBI);

+ // Exit when we passed all PHI nodes.

+ if (!PN)

+ break;

// Add a new PHI node to the prolog end block and add the

// appropriate incoming values.

- PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName()+".unr",

- PrologEnd->getTerminator());

+ PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName() + ".unr",

+ PrologExit->getFirstNonPHI());

// Adding a value to the new PHI node from the original loop preheader.

// This is the value that skips all the prolog code.

if (L->contains(PN)) {

- NewPN->addIncoming(PN->getIncomingValueForBlock(NewPH), OrigPH);

+ NewPN->addIncoming(PN->getIncomingValueForBlock(NewPreHeader),

+ PreHeader);

} else {

- NewPN->addIncoming(UndefValue::get(PN->getType()), OrigPH);

+ NewPN->addIncoming(UndefValue::get(PN->getType()), PreHeader);

}

Value *V = PN->getIncomingValueForBlock(Latch);

if (Instruction *I = dyn_cast<Instruction>(V)) {

if (L->contains(I)) {

- V = VMap[I];

+ V = VMap.lookup(I);

}

// Adding a value to the new PHI node from the last prolog block

// that was created.

- NewPN->addIncoming(V, LastPrologBB);

+ NewPN->addIncoming(V, PrologLatch);

// Update the existing PHI node operand with the value from the

// new PHI node. How this is done depends on if the existing

// PHI node is in the original loop block, or the exit block.

if (L->contains(PN)) {

- PN->setIncomingValue(PN->getBasicBlockIndex(NewPH), NewPN);

+ PN->setIncomingValue(PN->getBasicBlockIndex(NewPreHeader), NewPN);

} else {

- PN->addIncoming(NewPN, PrologEnd);

+ PN->addIncoming(NewPN, PrologExit);

}

- // Create a branch around the orignal loop, which is taken if there are no

+ // Create a branch around the original loop, which is taken if there are no

// iterations remaining to be executed after running the prologue.

- Instruction *InsertPt = PrologEnd->getTerminator();

+ Instruction *InsertPt = PrologExit->getTerminator();

IRBuilder<> B(InsertPt);

assert(Count != 0 && "nonsensical Count!");

- // If BECount <u (Count - 1) then (BECount + 1) & (Count - 1) == (BECount + 1)

- // (since Count is a power of 2). This means %xtraiter is (BECount + 1) and

- // and all of the iterations of this loop were executed by the prologue. Note

- // that if BECount <u (Count - 1) then (BECount + 1) cannot unsigned-overflow.

+ // If BECount <u (Count - 1) then (BECount + 1) % Count == (BECount + 1)

+ // This means %xtraiter is (BECount + 1) and all of the iterations of this

+ // loop were executed by the prologue. Note that if BECount <u (Count - 1)

+ // then (BECount + 1) cannot unsigned-overflow.

Value *BrLoopExit =

B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1));

BasicBlock *Exit = L->getUniqueExitBlock();

assert(Exit && "Loop must have a single exit block only");

// Split the exit to maintain loop canonicalization guarantees

- SmallVector<BasicBlock*, 4> Preds(pred_begin(Exit), pred_end(Exit));

+ SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));

SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", DT, LI,

PreserveLCSSA);

// Add the branch to the exit block (around the unrolled loop)

- B.CreateCondBr(BrLoopExit, Exit, NewPH);

+ B.CreateCondBr(BrLoopExit, Exit, NewPreHeader);

+ InsertPt->eraseFromParent();

+/// Connect the unrolling epilog code to the original loop.

+/// The unrolling epilog code contains code to execute the

+/// 'extra' iterations if the run-time trip count modulo the

+/// unroll count is non-zero.

+///

+/// This function performs the following:

+/// - Update PHI nodes at the unrolling loop exit and epilog loop exit

+/// - Create PHI nodes at the unrolling loop exit to combine

+/// values that exit the unrolling loop code and jump around it.

+/// - Update PHI operands in the epilog loop by the new PHI nodes

+/// - Branch around the epilog loop if extra iters (ModVal) is zero.

+///

+static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,

+ BasicBlock *Exit, BasicBlock *PreHeader,

+ BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader,

+ ValueToValueMapTy &VMap, DominatorTree *DT,

+ LoopInfo *LI, bool PreserveLCSSA) {

+ BasicBlock *Latch = L->getLoopLatch();

+ assert(Latch && "Loop must have a latch");

+ BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]);

+ // Loop structure should be the following:

+ //

+ // PreHeader

+ // NewPreHeader

+ // Header

+ // ...

+ // Latch

+ // NewExit (PN)

+ // EpilogPreHeader

+ // EpilogHeader

+ // ...

+ // EpilogLatch

+ // Exit (EpilogPN)

+ // Update PHI nodes at NewExit and Exit.

+ for (Instruction &BBI : *NewExit) {

+ PHINode *PN = dyn_cast<PHINode>(&BBI);

+ // Exit when we passed all PHI nodes.

+ if (!PN)

+ break;

+ // PN should be used in another PHI located in Exit block as

+ // Exit was split by SplitBlockPredecessors into Exit and NewExit

+ // Basicaly it should look like:

+ // NewExit:

+ // PN = PHI [I, Latch]

+ // ...

+ // Exit:

+ // EpilogPN = PHI [PN, EpilogPreHeader]

+ //

+ // There is EpilogPreHeader incoming block instead of NewExit as

+ // NewExit was spilt 1 more time to get EpilogPreHeader.

+ assert(PN->hasOneUse() && "The phi should have 1 use");

+ PHINode *EpilogPN = cast<PHINode> (PN->use_begin()->getUser());

+ assert(EpilogPN->getParent() == Exit && "EpilogPN should be in Exit block");

+ // Add incoming PreHeader from branch around the Loop

+ PN->addIncoming(UndefValue::get(PN->getType()), PreHeader);

+ Value *V = PN->getIncomingValueForBlock(Latch);

+ Instruction *I = dyn_cast<Instruction>(V);

+ if (I && L->contains(I))

+ // If value comes from an instruction in the loop add VMap value.

+ V = VMap.lookup(I);

+ // For the instruction out of the loop, constant or undefined value

+ // insert value itself.

+ EpilogPN->addIncoming(V, EpilogLatch);

+ assert(EpilogPN->getBasicBlockIndex(EpilogPreHeader) >= 0 &&

+ "EpilogPN should have EpilogPreHeader incoming block");

+ // Change EpilogPreHeader incoming block to NewExit.

+ EpilogPN->setIncomingBlock(EpilogPN->getBasicBlockIndex(EpilogPreHeader),

+ NewExit);

+ // Now PHIs should look like:

+ // NewExit:

+ // PN = PHI [I, Latch], [undef, PreHeader]

+ // ...

+ // Exit:

+ // EpilogPN = PHI [PN, NewExit], [VMap[I], EpilogLatch]

+ }

+ // Create PHI nodes at NewExit (from the unrolling loop Latch and PreHeader).

+ // Update corresponding PHI nodes in epilog loop.

+ for (BasicBlock *Succ : successors(Latch)) {

+ // Skip this as we already updated phis in exit blocks.

+ if (!L->contains(Succ))

+ continue;

+ for (Instruction &BBI : *Succ) {

+ PHINode *PN = dyn_cast<PHINode>(&BBI);

+ // Exit when we passed all PHI nodes.

+ if (!PN)

+ break;

+ // Add new PHI nodes to the loop exit block and update epilog

+ // PHIs with the new PHI values.

+ PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName() + ".unr",

+ NewExit->getFirstNonPHI());

+ // Adding a value to the new PHI node from the unrolling loop preheader.

+ NewPN->addIncoming(PN->getIncomingValueForBlock(NewPreHeader), PreHeader);

+ // Adding a value to the new PHI node from the unrolling loop latch.

+ NewPN->addIncoming(PN->getIncomingValueForBlock(Latch), Latch);

+ // Update the existing PHI node operand with the value from the new PHI

+ // node. Corresponding instruction in epilog loop should be PHI.

+ PHINode *VPN = cast<PHINode>(VMap[&BBI]);

+ VPN->setIncomingValue(VPN->getBasicBlockIndex(EpilogPreHeader), NewPN);

+ }

+ Instruction *InsertPt = NewExit->getTerminator();

+ IRBuilder<> B(InsertPt);

+ Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod");

+ assert(Exit && "Loop must have a single exit block only");

+ // Split the exit to maintain loop canonicalization guarantees

+ SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));

+ SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI,

+ PreserveLCSSA);

+ // Add the branch to the exit block (around the unrolling loop)

+ B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit);

InsertPt->eraseFromParent();

}

/// Create a clone of the blocks in a loop and connect them together.

-/// If UnrollProlog is true, loop structure will not be cloned, otherwise a new

-/// loop will be created including all cloned blocks, and the iterator of it

-/// switches to count NewIter down to 0.

+/// If CreateRemainderLoop is false, loop structure will not be cloned,

+/// otherwise a new loop will be created including all cloned blocks, and the

+/// iterator of it switches to count NewIter down to 0.

+/// The cloned blocks should be inserted between InsertTop and InsertBot.

+/// If loop structure is cloned InsertTop should be new preheader, InsertBot

+/// new loop exit.

///

-static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,

+static void CloneLoopBlocks(Loop *L, Value *NewIter,

+ const bool CreateRemainderLoop,

+ const bool UseEpilogRemainder,

BasicBlock *InsertTop, BasicBlock *InsertBot,

+ BasicBlock *Preheader,

std::vector<BasicBlock *> &NewBlocks,

LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap,

LoopInfo *LI) {

- BasicBlock *Preheader = L->getLoopPreheader();

+ StringRef suffix = UseEpilogRemainder ? "epil" : "prol";

BasicBlock *Header = L->getHeader();

BasicBlock *Latch = L->getLoopLatch();

Function *F = Header->getParent();

@@ -152,7 +281,7 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,

LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();

Loop *NewLoop = nullptr;

Loop *ParentLoop = L->getParentLoop();

- if (!UnrollProlog) {

+ if (CreateRemainderLoop) {

NewLoop = new Loop();

if (ParentLoop)

ParentLoop->addChildLoop(NewLoop);

@@ -163,7 +292,7 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,

// For each block in the original loop, create a new copy,

// and update the value map with the newly created values.

for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {

- BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".prol", F);

+ BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F);

NewBlocks.push_back(NewBB);

if (NewLoop)

@@ -176,19 +305,20 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,

// For the first block, add a CFG connection to this newly

// created block.

InsertTop->getTerminator()->setSuccessor(0, NewBB);

}

if (Latch == *BB) {

- // For the last block, if UnrollProlog is true, create a direct jump to

- // InsertBot. If not, create a loop back to cloned head.

+ // For the last block, if CreateRemainderLoop is false, create a direct

+ // jump to InsertBot. If not, create a loop back to cloned head.

VMap.erase((*BB)->getTerminator());

BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]);

BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator());

IRBuilder<> Builder(LatchBR);

- if (UnrollProlog) {

+ if (!CreateRemainderLoop) {

Builder.CreateBr(InsertBot);

} else {

- PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2, "prol.iter",

+ PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2,

+ suffix + ".iter",

FirstLoopBB->getFirstNonPHI());

Value *IdxSub =

Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),

@@ -207,9 +337,15 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,

// cloned loop.

for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {

PHINode *NewPHI = cast<PHINode>(VMap[&*I]);

- if (UnrollProlog) {

- VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader);

- cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);

+ if (!CreateRemainderLoop) {

+ if (UseEpilogRemainder) {

+ unsigned idx = NewPHI->getBasicBlockIndex(Preheader);

+ NewPHI->setIncomingBlock(idx, InsertTop);

+ NewPHI->removeIncomingValue(Latch, false);

+ } else {

+ VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader);

+ cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);

+ }

} else {

unsigned idx = NewPHI->getBasicBlockIndex(Preheader);

NewPHI->setIncomingBlock(idx, InsertTop);

@@ -217,8 +353,8 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,

idx = NewPHI->getBasicBlockIndex(Latch);

Value *InVal = NewPHI->getIncomingValue(idx);

NewPHI->setIncomingBlock(idx, NewLatch);

- if (VMap[InVal])

- NewPHI->setIncomingValue(idx, VMap[InVal]);

+ if (Value *V = VMap.lookup(InVal))

+ NewPHI->setIncomingValue(idx, V);

}

if (NewLoop) {

@@ -254,11 +390,11 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,

}

-/// Insert code in the prolog code when unrolling a loop with a

+/// Insert code in the prolog/epilog code when unrolling a loop with a

/// run-time trip-count.

///

/// This method assumes that the loop unroll factor is total number

-/// of loop bodes in the loop after unrolling. (Some folks refer

+/// of loop bodies in the loop after unrolling. (Some folks refer

/// to the unroll factor as the number of *extra* copies added).

/// We assume also that the loop unroll factor is a power-of-two. So, after

/// unrolling the loop, the number of loop bodies executed is 2,

@@ -266,37 +402,56 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,

/// instruction in SimplifyCFG.cpp. Then, the backend decides how code for

/// the switch instruction is generated.

///

+/// ***Prolog case***

/// extraiters = tripcount % loopfactor

/// if (extraiters == 0) jump Loop:

-/// else jump Prol

+/// else jump Prol:

/// Prol: LoopBody;

/// extraiters -= 1 // Omitted if unroll factor is 2.

/// if (extraiters != 0) jump Prol: // Omitted if unroll factor is 2.

-/// if (tripcount < loopfactor) jump End

+/// if (tripcount < loopfactor) jump End:

/// Loop:

/// ...

/// End:

///

-bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,

- bool AllowExpensiveTripCount, LoopInfo *LI,

- ScalarEvolution *SE, DominatorTree *DT,

- bool PreserveLCSSA) {

+/// ***Epilog case***

+/// extraiters = tripcount % loopfactor

+/// if (tripcount < loopfactor) jump LoopExit:

+/// unroll_iters = tripcount - extraiters

+/// Loop: LoopBody; (executes unroll_iter times);

+/// unroll_iter -= 1

+/// if (unroll_iter != 0) jump Loop:

+/// LoopExit:

+/// if (extraiters == 0) jump EpilExit:

+/// Epil: LoopBody; (executes extraiters times)

+/// extraiters -= 1 // Omitted if unroll factor is 2.

+/// if (extraiters != 0) jump Epil: // Omitted if unroll factor is 2.

+/// EpilExit:

+bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,

+ bool AllowExpensiveTripCount,

+ bool UseEpilogRemainder,

+ LoopInfo *LI, ScalarEvolution *SE,

+ DominatorTree *DT, bool PreserveLCSSA) {

// for now, only unroll loops that contain a single exit

if (!L->getExitingBlock())

return false;

// Make sure the loop is in canonical form, and there is a single

// exit block only.

- if (!L->isLoopSimplifyForm() || !L->getUniqueExitBlock())

+ if (!L->isLoopSimplifyForm())

+ return false;

+ BasicBlock *Exit = L->getUniqueExitBlock(); // successor out of loop

+ if (!Exit)

return false;

- // Use Scalar Evolution to compute the trip count. This allows more

- // loops to be unrolled than relying on induction var simplification

+ // Use Scalar Evolution to compute the trip count. This allows more loops to

+ // be unrolled than relying on induction var simplification.

if (!SE)

return false;

- // Only unroll loops with a computable trip count and the trip count needs

- // to be an int value (allowing a pointer type is a TODO item)

+ // Only unroll loops with a computable trip count, and the trip count needs

+ // to be an int value (allowing a pointer type is a TODO item).

const SCEV *BECountSC = SE->getBackedgeTakenCount(L);

if (isa<SCEVCouldNotCompute>(BECountSC) ||

!BECountSC->getType()->isIntegerTy())

@@ -304,21 +459,19 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,

unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth();

- // Add 1 since the backedge count doesn't include the first loop iteration

+ // Add 1 since the backedge count doesn't include the first loop iteration.

const SCEV *TripCountSC =

SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1));

if (isa<SCEVCouldNotCompute>(TripCountSC))

return false;

BasicBlock *Header = L->getHeader();

+ BasicBlock *PreHeader = L->getLoopPreheader();

+ BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());

const DataLayout &DL = Header->getModule()->getDataLayout();

SCEVExpander Expander(*SE, DL, "loop-unroll");

- if (!AllowExpensiveTripCount && Expander.isHighCostExpansion(TripCountSC, L))

- return false;

- // We only handle cases when the unroll factor is a power of 2.

- // Count is the loop unroll factor, the number of extra copies added + 1.

- if (!isPowerOf2_32(Count))

+ if (!AllowExpensiveTripCount &&

+ Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR))

return false;

// This constraint lets us deal with an overflowing trip count easily; see the

@@ -326,51 +479,115 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,

if (Log2_32(Count) > BEWidth)

return false;

- // If this loop is nested, then the loop unroller changes the code in

- // parent loop, so the Scalar Evolution pass needs to be run again

+ // If this loop is nested, then the loop unroller changes the code in the

+ // parent loop, so the Scalar Evolution pass needs to be run again.

if (Loop *ParentLoop = L->getParentLoop())

SE->forgetLoop(ParentLoop);

- BasicBlock *PH = L->getLoopPreheader();

BasicBlock *Latch = L->getLoopLatch();

- // It helps to splits the original preheader twice, one for the end of the

- // prolog code and one for a new loop preheader

- BasicBlock *PEnd = SplitEdge(PH, Header, DT, LI);

- BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), DT, LI);

- BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator());

+ // Loop structure is the following:

+ //

+ // PreHeader

+ // Header

+ // ...

+ // Latch

+ // Exit

+ BasicBlock *NewPreHeader;

+ BasicBlock *NewExit = nullptr;

+ BasicBlock *PrologExit = nullptr;

+ BasicBlock *EpilogPreHeader = nullptr;

+ BasicBlock *PrologPreHeader = nullptr;

+ if (UseEpilogRemainder) {

+ // If epilog remainder

+ // Split PreHeader to insert a branch around loop for unrolling.

+ NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI);

+ NewPreHeader->setName(PreHeader->getName() + ".new");

+ // Split Exit to create phi nodes from branch above.

+ SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));

+ NewExit = SplitBlockPredecessors(Exit, Preds, ".unr-lcssa",

+ DT, LI, PreserveLCSSA);

+ // Split NewExit to insert epilog remainder loop.

+ EpilogPreHeader = SplitBlock(NewExit, NewExit->getTerminator(), DT, LI);

+ EpilogPreHeader->setName(Header->getName() + ".epil.preheader");

+ } else {

+ // If prolog remainder

+ // Split the original preheader twice to insert prolog remainder loop

+ PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI);

+ PrologPreHeader->setName(Header->getName() + ".prol.preheader");

+ PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(),

+ DT, LI);

+ PrologExit->setName(Header->getName() + ".prol.loopexit");

+ // Split PrologExit to get NewPreHeader.

+ NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI);

+ NewPreHeader->setName(PreHeader->getName() + ".new");

+ }

+ // Loop structure should be the following:

+ // Epilog Prolog

+ //

+ // PreHeader PreHeader

+ // *NewPreHeader *PrologPreHeader

+ // Header *PrologExit

+ // ... *NewPreHeader

+ // Latch Header

+ // *NewExit ...

+ // *EpilogPreHeader Latch

+ // Exit Exit

+ // Calculate conditions for branch around loop for unrolling

+ // in epilog case and around prolog remainder loop in prolog case.

// Compute the number of extra iterations required, which is:

- // extra iterations = run-time trip count % (loop unroll factor + 1)

+ // extra iterations = run-time trip count % loop unroll factor

+ PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());

Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),

PreHeaderBR);

Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(),

PreHeaderBR);

IRBuilder<> B(PreHeaderBR);

- Value *ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter");

- // If ModVal is zero, we know that either

- // 1. there are no iteration to be run in the prologue loop

- // OR

- // 2. the addition computing TripCount overflowed

- //

- // If (2) is true, we know that TripCount really is (1 << BEWidth) and so the

- // number of iterations that remain to be run in the original loop is a

- // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we

- // explicitly check this above).

- Value *BranchVal = B.CreateIsNotNull(ModVal, "lcmp.mod");

- // Branch to either the extra iterations or the cloned/unrolled loop

- // We will fix up the true branch label when adding loop body copies

- B.CreateCondBr(BranchVal, PEnd, PEnd);

- assert(PreHeaderBR->isUnconditional() &&

- PreHeaderBR->getSuccessor(0) == PEnd &&

- "CFG edges in Preheader are not correct");

+ Value *ModVal;

+ // Calculate ModVal = (BECount + 1) % Count.

+ // Note that TripCount is BECount + 1.

+ if (isPowerOf2_32(Count)) {

+ // When Count is power of 2 we don't BECount for epilog case, however we'll

+ // need it for a branch around unrolling loop for prolog case.

+ ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter");

+ // 1. There are no iterations to be run in the prolog/epilog loop.

+ // OR

+ // 2. The addition computing TripCount overflowed.

+ //

+ // If (2) is true, we know that TripCount really is (1 << BEWidth) and so

+ // the number of iterations that remain to be run in the original loop is a

+ // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we

+ // explicitly check this above).

+ } else {

+ // As (BECount + 1) can potentially unsigned overflow we count

+ // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count.

+ Value *ModValTmp = B.CreateURem(BECount,

+ ConstantInt::get(BECount->getType(),

+ Count));

+ Value *ModValAdd = B.CreateAdd(ModValTmp,

+ ConstantInt::get(ModValTmp->getType(), 1));

+ // At that point (BECount % Count) + 1 could be equal to Count.

+ // To handle this case we need to take mod by Count one more time.

+ ModVal = B.CreateURem(ModValAdd,

+ ConstantInt::get(BECount->getType(), Count),

+ "xtraiter");

+ }

+ Value *BranchVal =

+ UseEpilogRemainder ? B.CreateICmpULT(BECount,

+ ConstantInt::get(BECount->getType(),

+ Count - 1)) :

+ B.CreateIsNotNull(ModVal, "lcmp.mod");

+ BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader;

+ BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit;

+ // Branch to either remainder (extra iterations) loop or unrolling loop.

+ B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop);

PreHeaderBR->eraseFromParent();

Function *F = Header->getParent();

// Get an ordered list of blocks in the loop to help with the ordering of the

- // cloned blocks in the prolog code

+ // cloned blocks in the prolog/epilog code

LoopBlocksDFS LoopBlocks(L);

LoopBlocks.perform(LI);

@@ -382,34 +599,80 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,

std::vector<BasicBlock *> NewBlocks;

ValueToValueMapTy VMap;

- bool UnrollPrologue = Count == 2;

+ // For unroll factor 2 remainder loop will have 1 iterations.

+ // Do not create 1 iteration loop.

+ bool CreateRemainderLoop = (Count != 2);

// Clone all the basic blocks in the loop. If Count is 2, we don't clone

// the loop, otherwise we create a cloned loop to execute the extra

// iterations. This function adds the appropriate CFG connections.

- CloneLoopBlocks(L, ModVal, UnrollPrologue, PH, PEnd, NewBlocks, LoopBlocks,

- VMap, LI);

- // Insert the cloned blocks into function just before the original loop

- F->getBasicBlockList().splice(PEnd->getIterator(), F->getBasicBlockList(),

- NewBlocks[0]->getIterator(), F->end());

- // Rewrite the cloned instruction operands to use the values

- // created when the clone is created.

- for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) {

- for (BasicBlock::iterator I = NewBlocks[i]->begin(),

- E = NewBlocks[i]->end();

- I != E; ++I) {

- RemapInstruction(&*I, VMap,

- RF_NoModuleLevelChanges | RF_IgnoreMissingEntries);

+ BasicBlock *InsertBot = UseEpilogRemainder ? Exit : PrologExit;

+ BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;

+ CloneLoopBlocks(L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop,

+ InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, LI);

+ // Insert the cloned blocks into the function.

+ F->getBasicBlockList().splice(InsertBot->getIterator(),

+ F->getBasicBlockList(),

+ NewBlocks[0]->getIterator(),

+ F->end());

+ // Loop structure should be the following:

+ // Epilog Prolog

+ //

+ // PreHeader PreHeader

+ // NewPreHeader PrologPreHeader

+ // Header PrologHeader

+ // ... ...

+ // Latch PrologLatch

+ // NewExit PrologExit

+ // EpilogPreHeader NewPreHeader

+ // EpilogHeader Header

+ // ... ...

+ // EpilogLatch Latch

+ // Exit Exit

+ // Rewrite the cloned instruction operands to use the values created when the

+ // clone is created.

+ for (BasicBlock *BB : NewBlocks) {

+ for (Instruction &I : *BB) {

+ RemapInstruction(&I, VMap,

+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);

}

- // Connect the prolog code to the original loop and update the

- // PHI functions.

- BasicBlock *LastLoopBB = cast<BasicBlock>(VMap[Latch]);

- ConnectProlog(L, BECount, Count, LastLoopBB, PEnd, PH, NewPH, VMap, DT, LI,

- PreserveLCSSA);

+ if (UseEpilogRemainder) {

+ // Connect the epilog code to the original loop and update the

+ // PHI functions.

+ ConnectEpilog(L, ModVal, NewExit, Exit, PreHeader,

+ EpilogPreHeader, NewPreHeader, VMap, DT, LI,

+ PreserveLCSSA);

+ // Update counter in loop for unrolling.

+ // I should be multiply of Count.

+ IRBuilder<> B2(NewPreHeader->getTerminator());

+ Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter");

+ BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());

+ B2.SetInsertPoint(LatchBR);

+ PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter",

+ Header->getFirstNonPHI());

+ Value *IdxSub =

+ B2.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),

+ NewIdx->getName() + ".nsub");

+ Value *IdxCmp;

+ if (LatchBR->getSuccessor(0) == Header)

+ IdxCmp = B2.CreateIsNotNull(IdxSub, NewIdx->getName() + ".ncmp");

+ else

+ IdxCmp = B2.CreateIsNull(IdxSub, NewIdx->getName() + ".ncmp");

+ NewIdx->addIncoming(TestVal, NewPreHeader);

+ NewIdx->addIncoming(IdxSub, Latch);

+ LatchBR->setCondition(IdxCmp);

+ } else {

+ // Connect the prolog code to the original loop and update the

+ // PHI functions.

+ ConnectProlog(L, BECount, Count, PrologExit, PreHeader, NewPreHeader,

+ VMap, DT, LI, PreserveLCSSA);

+ }

NumRuntimeUnrolled++;

return true;

}