diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2024-02-07 14:37:28 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2024-02-07 14:37:28 +0000 |
commit | 4fdf604ba667503ae582304cebdd3df426778a6b (patch) | |
tree | aae65a5d1b0a1b1acd9389fc36a0baf5fc1918c7 /llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | |
parent | 2d835ae8657273e3aa8b9ef3201fb8df5563af9d (diff) |
Vendor import of llvm-project branch release/18.x llvmorg-18.1.0-rc2-0-gc6c86965d967.vendor/llvm-project/llvmorg-18.1.0-rc2-0-gc6c86965d967
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/LoopVectorize.cpp')
-rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 62 |
1 files changed, 56 insertions, 6 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 6ca93e15719f..dd596c567cd4 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1957,6 +1957,8 @@ class GeneratedRTChecks { bool CostTooHigh = false; const bool AddBranchWeights; + Loop *OuterLoop = nullptr; + public: GeneratedRTChecks(ScalarEvolution &SE, DominatorTree *DT, LoopInfo *LI, TargetTransformInfo *TTI, const DataLayout &DL, @@ -2053,6 +2055,9 @@ public: DT->eraseNode(SCEVCheckBlock); LI->removeBlock(SCEVCheckBlock); } + + // Outer loop is used as part of the later cost calculations. + OuterLoop = L->getParentLoop(); } InstructionCost getCost() { @@ -2076,16 +2081,61 @@ public: LLVM_DEBUG(dbgs() << " " << C << " for " << I << "\n"); RTCheckCost += C; } - if (MemCheckBlock) + if (MemCheckBlock) { + InstructionCost MemCheckCost = 0; for (Instruction &I : *MemCheckBlock) { if (MemCheckBlock->getTerminator() == &I) continue; InstructionCost C = TTI->getInstructionCost(&I, TTI::TCK_RecipThroughput); LLVM_DEBUG(dbgs() << " " << C << " for " << I << "\n"); - RTCheckCost += C; + MemCheckCost += C; } + // If the runtime memory checks are being created inside an outer loop + // we should find out if these checks are outer loop invariant. If so, + // the checks will likely be hoisted out and so the effective cost will + // reduce according to the outer loop trip count. + if (OuterLoop) { + ScalarEvolution *SE = MemCheckExp.getSE(); + // TODO: If profitable, we could refine this further by analysing every + // individual memory check, since there could be a mixture of loop + // variant and invariant checks that mean the final condition is + // variant. + const SCEV *Cond = SE->getSCEV(MemRuntimeCheckCond); + if (SE->isLoopInvariant(Cond, OuterLoop)) { + // It seems reasonable to assume that we can reduce the effective + // cost of the checks even when we know nothing about the trip + // count. Assume that the outer loop executes at least twice. + unsigned BestTripCount = 2; + + // If exact trip count is known use that. + if (unsigned SmallTC = SE->getSmallConstantTripCount(OuterLoop)) + BestTripCount = SmallTC; + else if (LoopVectorizeWithBlockFrequency) { + // Else use profile data if available. + if (auto EstimatedTC = getLoopEstimatedTripCount(OuterLoop)) + BestTripCount = *EstimatedTC; + } + + InstructionCost NewMemCheckCost = MemCheckCost / BestTripCount; + + // Let's ensure the cost is always at least 1. + NewMemCheckCost = std::max(*NewMemCheckCost.getValue(), + (InstructionCost::CostType)1); + + LLVM_DEBUG(dbgs() + << "We expect runtime memory checks to be hoisted " + << "out of the outer loop. Cost reduced from " + << MemCheckCost << " to " << NewMemCheckCost << '\n'); + + MemCheckCost = NewMemCheckCost; + } + } + + RTCheckCost += MemCheckCost; + } + if (SCEVCheckBlock || MemCheckBlock) LLVM_DEBUG(dbgs() << "Total cost of runtime checks: " << RTCheckCost << "\n"); @@ -2144,8 +2194,8 @@ public: BranchInst::Create(LoopVectorPreHeader, SCEVCheckBlock); // Create new preheader for vector loop. - if (auto *PL = LI->getLoopFor(LoopVectorPreHeader)) - PL->addBasicBlockToLoop(SCEVCheckBlock, *LI); + if (OuterLoop) + OuterLoop->addBasicBlockToLoop(SCEVCheckBlock, *LI); SCEVCheckBlock->getTerminator()->eraseFromParent(); SCEVCheckBlock->moveBefore(LoopVectorPreHeader); @@ -2179,8 +2229,8 @@ public: DT->changeImmediateDominator(LoopVectorPreHeader, MemCheckBlock); MemCheckBlock->moveBefore(LoopVectorPreHeader); - if (auto *PL = LI->getLoopFor(LoopVectorPreHeader)) - PL->addBasicBlockToLoop(MemCheckBlock, *LI); + if (OuterLoop) + OuterLoop->addBasicBlockToLoop(MemCheckBlock, *LI); BranchInst &BI = *BranchInst::Create(Bypass, LoopVectorPreHeader, MemRuntimeCheckCond); |