aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp')
-rw-r--r--llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp139
1 files changed, 96 insertions, 43 deletions
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index b77843d7cd71..3cb4df12e9b0 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -51,9 +51,11 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
+#include "llvm/Analysis/MustExecute.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -91,6 +93,7 @@
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -123,15 +126,19 @@ class LoopIdiomRecognize {
const DataLayout *DL;
OptimizationRemarkEmitter &ORE;
bool ApplyCodeSizeHeuristics;
+ std::unique_ptr<MemorySSAUpdater> MSSAU;
public:
explicit LoopIdiomRecognize(AliasAnalysis *AA, DominatorTree *DT,
LoopInfo *LI, ScalarEvolution *SE,
TargetLibraryInfo *TLI,
- const TargetTransformInfo *TTI,
+ const TargetTransformInfo *TTI, MemorySSA *MSSA,
const DataLayout *DL,
OptimizationRemarkEmitter &ORE)
- : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL), ORE(ORE) {}
+ : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL), ORE(ORE) {
+ if (MSSA)
+ MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
+ }
bool runOnLoop(Loop *L);
@@ -224,13 +231,17 @@ public:
&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
*L->getHeader()->getParent());
const DataLayout *DL = &L->getHeader()->getModule()->getDataLayout();
+ auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
+ MemorySSA *MSSA = nullptr;
+ if (MSSAAnalysis)
+ MSSA = &MSSAAnalysis->getMSSA();
// For the old PM, we can't use OptimizationRemarkEmitter as an analysis
// pass. Function analyses need to be preserved across loop transformations
// but ORE cannot be preserved (see comment before the pass definition).
OptimizationRemarkEmitter ORE(L->getHeader()->getParent());
- LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, DL, ORE);
+ LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, MSSA, DL, ORE);
return LIR.runOnLoop(L);
}
@@ -239,6 +250,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
getLoopAnalysisUsage(AU);
}
};
@@ -252,23 +264,20 @@ PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM,
LPMUpdater &) {
const auto *DL = &L.getHeader()->getModule()->getDataLayout();
- const auto &FAM =
- AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
- Function *F = L.getHeader()->getParent();
-
- auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F);
- // FIXME: This should probably be optional rather than required.
- if (!ORE)
- report_fatal_error(
- "LoopIdiomRecognizePass: OptimizationRemarkEmitterAnalysis not cached "
- "at a higher level");
+ // For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
+ // pass. Function analyses need to be preserved across loop transformations
+ // but ORE cannot be preserved (see comment before the pass definition).
+ OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
- LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI, DL,
- *ORE);
+ LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI,
+ AR.MSSA, DL, ORE);
if (!LIR.runOnLoop(&L))
return PreservedAnalyses::all();
- return getLoopPassPreservedAnalyses();
+ auto PA = getLoopPassPreservedAnalyses();
+ if (AR.MSSA)
+ PA.preserve<MemorySSAAnalysis>();
+ return PA;
}
INITIALIZE_PASS_BEGIN(LoopIdiomRecognizeLegacyPass, "loop-idiom",
@@ -339,14 +348,14 @@ bool LoopIdiomRecognize::runOnCountableLoop() {
<< "] Countable Loop %" << CurLoop->getHeader()->getName()
<< "\n");
- bool MadeChange = false;
-
// The following transforms hoist stores/memsets into the loop pre-header.
- // Give up if the loop has instructions may throw.
+ // Give up if the loop has instructions that may throw.
SimpleLoopSafetyInfo SafetyInfo;
SafetyInfo.computeLoopSafetyInfo(CurLoop);
if (SafetyInfo.anyBlockMayThrow())
- return MadeChange;
+ return false;
+
+ bool MadeChange = false;
// Scan all the blocks in the loop that are not in subloops.
for (auto *BB : CurLoop->getBlocks()) {
@@ -968,11 +977,17 @@ bool LoopIdiomRecognize::processLoopStridedStore(
Value *PatternPtr = ConstantExpr::getBitCast(GV, Int8PtrTy);
NewCall = Builder.CreateCall(MSP, {BasePtr, PatternPtr, NumBytes});
}
+ NewCall->setDebugLoc(TheStore->getDebugLoc());
+
+ if (MSSAU) {
+ MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
+ NewCall, nullptr, NewCall->getParent(), MemorySSA::BeforeTerminator);
+ MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true);
+ }
LLVM_DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n"
<< " from store to: " << *Ev << " at: " << *TheStore
<< "\n");
- NewCall->setDebugLoc(TheStore->getDebugLoc());
ORE.emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "ProcessLoopStridedStore",
@@ -984,12 +999,40 @@ bool LoopIdiomRecognize::processLoopStridedStore(
// Okay, the memset has been formed. Zap the original store and anything that
// feeds into it.
- for (auto *I : Stores)
+ for (auto *I : Stores) {
+ if (MSSAU)
+ MSSAU->removeMemoryAccess(I, true);
deleteDeadInstruction(I);
+ }
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
++NumMemSet;
return true;
}
+class ExpandedValuesCleaner {
+ SCEVExpander &Expander;
+ TargetLibraryInfo *TLI;
+ SmallVector<Value *, 4> ExpandedValues;
+ bool Commit = false;
+
+public:
+ ExpandedValuesCleaner(SCEVExpander &Expander, TargetLibraryInfo *TLI)
+ : Expander(Expander), TLI(TLI) {}
+
+ void add(Value *V) { ExpandedValues.push_back(V); }
+
+ void commit() { Commit = true; }
+
+ ~ExpandedValuesCleaner() {
+ if (!Commit) {
+ Expander.clear();
+ for (auto *V : ExpandedValues)
+ RecursivelyDeleteTriviallyDeadInstructions(V, TLI);
+ }
+ }
+};
+
/// If the stored value is a strided load in the same loop with the same stride
/// this may be transformable into a memcpy. This kicks in for stuff like
/// for (i) A[i] = B[i];
@@ -1020,6 +1063,8 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
IRBuilder<> Builder(Preheader->getTerminator());
SCEVExpander Expander(*SE, *DL, "loop-idiom");
+ ExpandedValuesCleaner EVC(Expander, TLI);
+
const SCEV *StrStart = StoreEv->getStart();
unsigned StrAS = SI->getPointerAddressSpace();
Type *IntIdxTy = Builder.getIntNTy(DL->getIndexSizeInBits(StrAS));
@@ -1036,16 +1081,13 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
// checking everything.
Value *StoreBasePtr = Expander.expandCodeFor(
StrStart, Builder.getInt8PtrTy(StrAS), Preheader->getTerminator());
+ EVC.add(StoreBasePtr);
SmallPtrSet<Instruction *, 1> Stores;
Stores.insert(SI);
if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount,
- StoreSize, *AA, Stores)) {
- Expander.clear();
- // If we generated new code for the base pointer, clean up.
- RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI);
+ StoreSize, *AA, Stores))
return false;
- }
const SCEV *LdStart = LoadEv->getStart();
unsigned LdAS = LI->getPointerAddressSpace();
@@ -1058,15 +1100,11 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
// mutated by the loop.
Value *LoadBasePtr = Expander.expandCodeFor(
LdStart, Builder.getInt8PtrTy(LdAS), Preheader->getTerminator());
+ EVC.add(LoadBasePtr);
if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount,
- StoreSize, *AA, Stores)) {
- Expander.clear();
- // If we generated new code for the base pointer, clean up.
- RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI);
- RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI);
+ StoreSize, *AA, Stores))
return false;
- }
if (avoidLIRForMultiBlockLoop())
return false;
@@ -1078,6 +1116,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
Value *NumBytes =
Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());
+ EVC.add(NumBytes);
CallInst *NewCall = nullptr;
// Check whether to generate an unordered atomic memcpy:
@@ -1089,8 +1128,9 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
else {
// We cannot allow unaligned ops for unordered load/store, so reject
// anything where the alignment isn't at least the element size.
- unsigned Align = std::min(SI->getAlignment(), LI->getAlignment());
- if (Align < StoreSize)
+ const Align StoreAlign = SI->getAlign();
+ const Align LoadAlign = LI->getAlign();
+ if (StoreAlign < StoreSize || LoadAlign < StoreSize)
return false;
// If the element.atomic memcpy is not lowered into explicit
@@ -1104,11 +1144,17 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
// Note that unordered atomic loads/stores are *required* by the spec to
// have an alignment but non-atomic loads/stores may not.
NewCall = Builder.CreateElementUnorderedAtomicMemCpy(
- StoreBasePtr, SI->getAlignment(), LoadBasePtr, LI->getAlignment(),
- NumBytes, StoreSize);
+ StoreBasePtr, StoreAlign, LoadBasePtr, LoadAlign, NumBytes,
+ StoreSize);
}
NewCall->setDebugLoc(SI->getDebugLoc());
+ if (MSSAU) {
+ MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
+ NewCall, nullptr, NewCall->getParent(), MemorySSA::BeforeTerminator);
+ MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true);
+ }
+
LLVM_DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n"
<< " from load ptr=" << *LoadEv << " at: " << *LI << "\n"
<< " from store ptr=" << *StoreEv << " at: " << *SI
@@ -1124,8 +1170,13 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
// Okay, the memcpy has been formed. Zap the original store and anything that
// feeds into it.
+ if (MSSAU)
+ MSSAU->removeMemoryAccess(SI, true);
deleteDeadInstruction(SI);
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
++NumMemCpy;
+ EVC.commit();
return true;
}
@@ -1502,18 +1553,20 @@ bool LoopIdiomRecognize::recognizeAndInsertFFS() {
// %inc = add nsw %i.0, 1
// br i1 %tobool
- const Value *Args[] =
- {InitX, ZeroCheck ? ConstantInt::getTrue(InitX->getContext())
- : ConstantInt::getFalse(InitX->getContext())};
+ const Value *Args[] = {
+ InitX, ZeroCheck ? ConstantInt::getTrue(InitX->getContext())
+ : ConstantInt::getFalse(InitX->getContext())};
// @llvm.dbg doesn't count as they have no semantic effect.
auto InstWithoutDebugIt = CurLoop->getHeader()->instructionsWithoutDebug();
uint32_t HeaderSize =
std::distance(InstWithoutDebugIt.begin(), InstWithoutDebugIt.end());
+ IntrinsicCostAttributes Attrs(IntrinID, InitX->getType(), Args);
+ int Cost =
+ TTI->getIntrinsicInstrCost(Attrs, TargetTransformInfo::TCK_SizeAndLatency);
if (HeaderSize != IdiomCanonicalSize &&
- TTI->getIntrinsicCost(IntrinID, InitX->getType(), Args) >
- TargetTransformInfo::TCC_Basic)
+ Cost > TargetTransformInfo::TCC_Basic)
return false;
transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,