aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Scalar/LoopInterchange.cpp')
-rw-r--r--llvm/lib/Transforms/Scalar/LoopInterchange.cpp298
1 files changed, 150 insertions, 148 deletions
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 18daa4295224..0a7c62113c7f 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -44,6 +44,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include <cassert>
@@ -86,7 +87,8 @@ static void printDepMatrix(CharMatrix &DepMatrix) {
#endif
static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
- Loop *L, DependenceInfo *DI) {
+ Loop *L, DependenceInfo *DI,
+ ScalarEvolution *SE) {
using ValueVector = SmallVector<Value *, 16>;
ValueVector MemInstr;
@@ -125,6 +127,10 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
// Track Output, Flow, and Anti dependencies.
if (auto D = DI->depends(Src, Dst, true)) {
assert(D->isOrdered() && "Expected an output, flow or anti dep.");
+ // If the direction vector is negative, normalize it to
+ // make it non-negative.
+ if (D->normalize(SE))
+ LLVM_DEBUG(dbgs() << "Negative dependence vector normalized.\n");
LLVM_DEBUG(StringRef DepType =
D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";
dbgs() << "Found " << DepType
@@ -133,19 +139,7 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
unsigned Levels = D->getLevels();
char Direction;
for (unsigned II = 1; II <= Levels; ++II) {
- const SCEV *Distance = D->getDistance(II);
- const SCEVConstant *SCEVConst =
- dyn_cast_or_null<SCEVConstant>(Distance);
- if (SCEVConst) {
- const ConstantInt *CI = SCEVConst->getValue();
- if (CI->isNegative())
- Direction = '<';
- else if (CI->isZero())
- Direction = '=';
- else
- Direction = '>';
- Dep.push_back(Direction);
- } else if (D->isScalar(II)) {
+ if (D->isScalar(II)) {
Direction = 'S';
Dep.push_back(Direction);
} else {
@@ -188,80 +182,36 @@ static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx,
std::swap(DepMatrix[I][ToIndx], DepMatrix[I][FromIndx]);
}
-// Checks if outermost non '=','S'or'I' dependence in the dependence matrix is
-// '>'
-static bool isOuterMostDepPositive(CharMatrix &DepMatrix, unsigned Row,
- unsigned Column) {
- for (unsigned i = 0; i <= Column; ++i) {
- if (DepMatrix[Row][i] == '<')
- return false;
- if (DepMatrix[Row][i] == '>')
+// After interchanging, check if the direction vector is valid.
+// [Theorem] A permutation of the loops in a perfect nest is legal if and only
+// if the direction matrix, after the same permutation is applied to its
+// columns, has no ">" direction as the leftmost non-"=" direction in any row.
+static bool isLexicographicallyPositive(std::vector<char> &DV) {
+ for (unsigned Level = 0; Level < DV.size(); ++Level) {
+ unsigned char Direction = DV[Level];
+ if (Direction == '<')
return true;
- }
- // All dependencies were '=','S' or 'I'
- return false;
-}
-
-// Checks if no dependence exist in the dependency matrix in Row before Column.
-static bool containsNoDependence(CharMatrix &DepMatrix, unsigned Row,
- unsigned Column) {
- for (unsigned i = 0; i < Column; ++i) {
- if (DepMatrix[Row][i] != '=' && DepMatrix[Row][i] != 'S' &&
- DepMatrix[Row][i] != 'I')
+ if (Direction == '>' || Direction == '*')
return false;
}
return true;
}
-static bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row,
- unsigned OuterLoopId, char InnerDep,
- char OuterDep) {
- if (isOuterMostDepPositive(DepMatrix, Row, OuterLoopId))
- return false;
-
- if (InnerDep == OuterDep)
- return true;
-
- // It is legal to interchange if and only if after interchange no row has a
- // '>' direction as the leftmost non-'='.
-
- if (InnerDep == '=' || InnerDep == 'S' || InnerDep == 'I')
- return true;
-
- if (InnerDep == '<')
- return true;
-
- if (InnerDep == '>') {
- // If OuterLoopId represents outermost loop then interchanging will make the
- // 1st dependency as '>'
- if (OuterLoopId == 0)
- return false;
-
- // If all dependencies before OuterloopId are '=','S'or 'I'. Then
- // interchanging will result in this row having an outermost non '='
- // dependency of '>'
- if (!containsNoDependence(DepMatrix, Row, OuterLoopId))
- return true;
- }
-
- return false;
-}
-
// Checks if it is legal to interchange 2 loops.
-// [Theorem] A permutation of the loops in a perfect nest is legal if and only
-// if the direction matrix, after the same permutation is applied to its
-// columns, has no ">" direction as the leftmost non-"=" direction in any row.
static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
unsigned InnerLoopId,
unsigned OuterLoopId) {
unsigned NumRows = DepMatrix.size();
+ std::vector<char> Cur;
// For each row check if it is valid to interchange.
for (unsigned Row = 0; Row < NumRows; ++Row) {
- char InnerDep = DepMatrix[Row][InnerLoopId];
- char OuterDep = DepMatrix[Row][OuterLoopId];
- if (InnerDep == '*' || OuterDep == '*')
+ // Create temporary DepVector check its lexicographical order
+ // before and after swapping OuterLoop vs InnerLoop
+ Cur = DepMatrix[Row];
+ if (!isLexicographicallyPositive(Cur))
return false;
- if (!validDepInterchange(DepMatrix, Row, OuterLoopId, InnerDep, OuterDep))
+ std::swap(Cur[InnerLoopId], Cur[OuterLoopId]);
+ if (!isLexicographicallyPositive(Cur))
return false;
}
return true;
@@ -361,11 +311,18 @@ public:
bool isProfitable(const Loop *InnerLoop, const Loop *OuterLoop,
unsigned InnerLoopId, unsigned OuterLoopId,
CharMatrix &DepMatrix,
- const DenseMap<const Loop *, unsigned> &CostMap);
+ const DenseMap<const Loop *, unsigned> &CostMap,
+ std::unique_ptr<CacheCost> &CC);
private:
int getInstrOrderCost();
-
+ std::optional<bool> isProfitablePerLoopCacheAnalysis(
+ const DenseMap<const Loop *, unsigned> &CostMap,
+ std::unique_ptr<CacheCost> &CC);
+ std::optional<bool> isProfitablePerInstrOrderCost();
+ std::optional<bool> isProfitableForVectorization(unsigned InnerLoopId,
+ unsigned OuterLoopId,
+ CharMatrix &DepMatrix);
Loop *OuterLoop;
Loop *InnerLoop;
@@ -486,7 +443,7 @@ struct LoopInterchange {
CharMatrix DependencyMatrix;
Loop *OuterMostLoop = *(LoopList.begin());
if (!populateDependencyMatrix(DependencyMatrix, LoopNestDepth,
- OuterMostLoop, DI)) {
+ OuterMostLoop, DI, SE)) {
LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n");
return false;
}
@@ -562,7 +519,7 @@ struct LoopInterchange {
LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n");
LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
- DependencyMatrix, CostMap)) {
+ DependencyMatrix, CostMap, CC)) {
LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n");
return false;
}
@@ -579,11 +536,7 @@ struct LoopInterchange {
LLVM_DEBUG(dbgs() << "Loops interchanged.\n");
LoopsInterchanged++;
- assert(InnerLoop->isLCSSAForm(*DT) &&
- "Inner loop not left in LCSSA form after loop interchange!");
- assert(OuterLoop->isLCSSAForm(*DT) &&
- "Outer loop not left in LCSSA form after loop interchange!");
-
+ llvm::formLCSSARecursively(*OuterLoop, *DT, LI, SE);
return true;
}
};
@@ -858,18 +811,26 @@ bool LoopInterchangeLegality::currentLimitations() {
}
Inductions.clear();
- if (!findInductionAndReductions(InnerLoop, Inductions, nullptr)) {
- LLVM_DEBUG(
- dbgs() << "Only inner loops with induction or reduction PHI nodes "
- << "are supported currently.\n");
- ORE->emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIInner",
- InnerLoop->getStartLoc(),
- InnerLoop->getHeader())
- << "Only inner loops with induction or reduction PHI nodes can be"
- " interchange currently.";
- });
- return true;
+ // For multi-level loop nests, make sure that all phi nodes for inner loops
+ // at all levels can be recognized as a induction or reduction phi. Bail out
+ // if a phi node at a certain nesting level cannot be properly recognized.
+ Loop *CurLevelLoop = OuterLoop;
+ while (!CurLevelLoop->getSubLoops().empty()) {
+ // We already made sure that the loop nest is tightly nested.
+ CurLevelLoop = CurLevelLoop->getSubLoops().front();
+ if (!findInductionAndReductions(CurLevelLoop, Inductions, nullptr)) {
+ LLVM_DEBUG(
+ dbgs() << "Only inner loops with induction or reduction PHI nodes "
+ << "are supported currently.\n");
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIInner",
+ CurLevelLoop->getStartLoc(),
+ CurLevelLoop->getHeader())
+ << "Only inner loops with induction or reduction PHI nodes can be"
+ " interchange currently.";
+ });
+ return true;
+ }
}
// TODO: Triangular loops are not handled for now.
@@ -1137,31 +1098,10 @@ int LoopInterchangeProfitability::getInstrOrderCost() {
return GoodOrder - BadOrder;
}
-static bool isProfitableForVectorization(unsigned InnerLoopId,
- unsigned OuterLoopId,
- CharMatrix &DepMatrix) {
- // TODO: Improve this heuristic to catch more cases.
- // If the inner loop is loop independent or doesn't carry any dependency it is
- // profitable to move this to outer position.
- for (auto &Row : DepMatrix) {
- if (Row[InnerLoopId] != 'S' && Row[InnerLoopId] != 'I')
- return false;
- // TODO: We need to improve this heuristic.
- if (Row[OuterLoopId] != '=')
- return false;
- }
- // If outer loop has dependence and inner loop is loop independent then it is
- // profitable to interchange to enable parallelism.
- // If there are no dependences, interchanging will not improve anything.
- return !DepMatrix.empty();
-}
-
-bool LoopInterchangeProfitability::isProfitable(
- const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId,
- unsigned OuterLoopId, CharMatrix &DepMatrix,
- const DenseMap<const Loop *, unsigned> &CostMap) {
- // TODO: Remove the legacy cost model.
-
+std::optional<bool>
+LoopInterchangeProfitability::isProfitablePerLoopCacheAnalysis(
+ const DenseMap<const Loop *, unsigned> &CostMap,
+ std::unique_ptr<CacheCost> &CC) {
// This is the new cost model returned from loop cache analysis.
// A smaller index means the loop should be placed an outer loop, and vice
// versa.
@@ -1173,30 +1113,91 @@ bool LoopInterchangeProfitability::isProfitable(
LLVM_DEBUG(dbgs() << "InnerIndex = " << InnerIndex
<< ", OuterIndex = " << OuterIndex << "\n");
if (InnerIndex < OuterIndex)
- return true;
- } else {
- // Legacy cost model: this is rough cost estimation algorithm. It counts the
- // good and bad order of induction variables in the instruction and allows
- // reordering if number of bad orders is more than good.
- int Cost = getInstrOrderCost();
- LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n");
- if (Cost < -LoopInterchangeCostThreshold)
- return true;
+ return std::optional<bool>(true);
+ assert(InnerIndex != OuterIndex && "CostMap should assign unique "
+ "numbers to each loop");
+ if (CC->getLoopCost(*OuterLoop) == CC->getLoopCost(*InnerLoop))
+ return std::nullopt;
+ return std::optional<bool>(false);
}
+ return std::nullopt;
+}
- // It is not profitable as per current cache profitability model. But check if
- // we can move this loop outside to improve parallelism.
- if (isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix))
- return true;
+std::optional<bool>
+LoopInterchangeProfitability::isProfitablePerInstrOrderCost() {
+ // Legacy cost model: this is rough cost estimation algorithm. It counts the
+ // good and bad order of induction variables in the instruction and allows
+ // reordering if number of bad orders is more than good.
+ int Cost = getInstrOrderCost();
+ LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n");
+ if (Cost < 0 && Cost < LoopInterchangeCostThreshold)
+ return std::optional<bool>(true);
+
+ return std::nullopt;
+}
- ORE->emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable",
- InnerLoop->getStartLoc(),
- InnerLoop->getHeader())
- << "Interchanging loops is too costly and it does not improve "
- "parallelism.";
- });
- return false;
+std::optional<bool> LoopInterchangeProfitability::isProfitableForVectorization(
+ unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix) {
+ for (auto &Row : DepMatrix) {
+ // If the inner loop is loop independent or doesn't carry any dependency
+ // it is not profitable to move this to outer position, since we are
+ // likely able to do inner loop vectorization already.
+ if (Row[InnerLoopId] == 'I' || Row[InnerLoopId] == '=')
+ return std::optional<bool>(false);
+
+ // If the outer loop is not loop independent it is not profitable to move
+ // this to inner position, since doing so would not enable inner loop
+ // parallelism.
+ if (Row[OuterLoopId] != 'I' && Row[OuterLoopId] != '=')
+ return std::optional<bool>(false);
+ }
+ // If inner loop has dependence and outer loop is loop independent then it
+ // is/ profitable to interchange to enable inner loop parallelism.
+ // If there are no dependences, interchanging will not improve anything.
+ return std::optional<bool>(!DepMatrix.empty());
+}
+
+bool LoopInterchangeProfitability::isProfitable(
+ const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId,
+ unsigned OuterLoopId, CharMatrix &DepMatrix,
+ const DenseMap<const Loop *, unsigned> &CostMap,
+ std::unique_ptr<CacheCost> &CC) {
+ // isProfitable() is structured to avoid endless loop interchange.
+ // If loop cache analysis could decide the profitability then,
+ // profitability check will stop and return the analysis result.
+ // If cache analysis failed to analyze the loopnest (e.g.,
+ // due to delinearization issues) then only check whether it is
+ // profitable for InstrOrderCost. Likewise, if InstrOrderCost failed to
+ // analysis the profitability then only, isProfitableForVectorization
+ // will decide.
+ std::optional<bool> shouldInterchange =
+ isProfitablePerLoopCacheAnalysis(CostMap, CC);
+ if (!shouldInterchange.has_value()) {
+ shouldInterchange = isProfitablePerInstrOrderCost();
+ if (!shouldInterchange.has_value())
+ shouldInterchange =
+ isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix);
+ }
+ if (!shouldInterchange.has_value()) {
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable",
+ InnerLoop->getStartLoc(),
+ InnerLoop->getHeader())
+ << "Insufficient information to calculate the cost of loop for "
+ "interchange.";
+ });
+ return false;
+ } else if (!shouldInterchange.value()) {
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable",
+ InnerLoop->getStartLoc(),
+ InnerLoop->getHeader())
+ << "Interchanging loops is not considered to improve cache "
+ "locality nor vectorization.";
+ });
+ return false;
+ }
+ return true;
}
void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
@@ -1286,7 +1287,6 @@ void LoopInterchangeTransform::restructureLoops(
// Tell SE that we move the loops around.
SE->forgetLoop(NewOuter);
- SE->forgetLoop(NewInner);
}
bool LoopInterchangeTransform::transform() {
@@ -1360,9 +1360,11 @@ bool LoopInterchangeTransform::transform() {
for (Instruction *InnerIndexVar : InnerIndexVarList)
WorkList.insert(cast<Instruction>(InnerIndexVar));
MoveInstructions();
+ }
- // Splits the inner loops phi nodes out into a separate basic block.
- BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
+ // Ensure the inner loop phi nodes have a separate basic block.
+ BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
+ if (InnerLoopHeader->getFirstNonPHI() != InnerLoopHeader->getTerminator()) {
SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHI(), DT, LI);
LLVM_DEBUG(dbgs() << "splitting InnerLoopHeader done\n");
}
@@ -1394,11 +1396,10 @@ bool LoopInterchangeTransform::transform() {
/// \brief Move all instructions except the terminator from FromBB right before
/// InsertBefore
static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
- auto &ToList = InsertBefore->getParent()->getInstList();
- auto &FromList = FromBB->getInstList();
+ BasicBlock *ToBB = InsertBefore->getParent();
- ToList.splice(InsertBefore->getIterator(), FromList, FromList.begin(),
- FromBB->getTerminator()->getIterator());
+ ToBB->splice(InsertBefore->getIterator(), FromBB, FromBB->begin(),
+ FromBB->getTerminator()->getIterator());
}
/// Swap instructions between \p BB1 and \p BB2 but keep terminators intact.
@@ -1773,5 +1774,6 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
OptimizationRemarkEmitter ORE(&F);
if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, CC, &ORE).run(LN))
return PreservedAnalyses::all();
+ U.markLoopNestChanged(true);
return getLoopPassPreservedAnalyses();
}