diff options
Diffstat (limited to 'llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp')
-rw-r--r-- | llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 139 |
1 files changed, 96 insertions, 43 deletions
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index b77843d7cd71..3cb4df12e9b0 100644 --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -51,9 +51,11 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/MemorySSAUpdater.h" +#include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -91,6 +93,7 @@ #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -123,15 +126,19 @@ class LoopIdiomRecognize { const DataLayout *DL; OptimizationRemarkEmitter &ORE; bool ApplyCodeSizeHeuristics; + std::unique_ptr<MemorySSAUpdater> MSSAU; public: explicit LoopIdiomRecognize(AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, TargetLibraryInfo *TLI, - const TargetTransformInfo *TTI, + const TargetTransformInfo *TTI, MemorySSA *MSSA, const DataLayout *DL, OptimizationRemarkEmitter &ORE) - : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL), ORE(ORE) {} + : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL), ORE(ORE) { + if (MSSA) + MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); + } bool runOnLoop(Loop *L); @@ -224,13 +231,17 @@ public: &getAnalysis<TargetTransformInfoWrapperPass>().getTTI( *L->getHeader()->getParent()); const DataLayout *DL = &L->getHeader()->getModule()->getDataLayout(); + auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>(); + MemorySSA *MSSA = nullptr; + if (MSSAAnalysis) + MSSA = &MSSAAnalysis->getMSSA(); // For the old PM, we can't use OptimizationRemarkEmitter as an analysis // pass. Function analyses need to be preserved across loop transformations // but ORE cannot be preserved (see comment before the pass definition). OptimizationRemarkEmitter ORE(L->getHeader()->getParent()); - LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, DL, ORE); + LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, MSSA, DL, ORE); return LIR.runOnLoop(L); } @@ -239,6 +250,7 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<TargetLibraryInfoWrapperPass>(); AU.addRequired<TargetTransformInfoWrapperPass>(); + AU.addPreserved<MemorySSAWrapperPass>(); getLoopAnalysisUsage(AU); } }; @@ -252,23 +264,20 @@ PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM, LPMUpdater &) { const auto *DL = &L.getHeader()->getModule()->getDataLayout(); - const auto &FAM = - AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager(); - Function *F = L.getHeader()->getParent(); - - auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F); - // FIXME: This should probably be optional rather than required. - if (!ORE) - report_fatal_error( - "LoopIdiomRecognizePass: OptimizationRemarkEmitterAnalysis not cached " - "at a higher level"); + // For the new PM, we also can't use OptimizationRemarkEmitter as an analysis + // pass. Function analyses need to be preserved across loop transformations + // but ORE cannot be preserved (see comment before the pass definition). + OptimizationRemarkEmitter ORE(L.getHeader()->getParent()); - LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI, DL, - *ORE); + LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI, + AR.MSSA, DL, ORE); if (!LIR.runOnLoop(&L)) return PreservedAnalyses::all(); - return getLoopPassPreservedAnalyses(); + auto PA = getLoopPassPreservedAnalyses(); + if (AR.MSSA) + PA.preserve<MemorySSAAnalysis>(); + return PA; } INITIALIZE_PASS_BEGIN(LoopIdiomRecognizeLegacyPass, "loop-idiom", @@ -339,14 +348,14 @@ bool LoopIdiomRecognize::runOnCountableLoop() { << "] Countable Loop %" << CurLoop->getHeader()->getName() << "\n"); - bool MadeChange = false; - // The following transforms hoist stores/memsets into the loop pre-header. - // Give up if the loop has instructions may throw. + // Give up if the loop has instructions that may throw. SimpleLoopSafetyInfo SafetyInfo; SafetyInfo.computeLoopSafetyInfo(CurLoop); if (SafetyInfo.anyBlockMayThrow()) - return MadeChange; + return false; + + bool MadeChange = false; // Scan all the blocks in the loop that are not in subloops. for (auto *BB : CurLoop->getBlocks()) { @@ -968,11 +977,17 @@ bool LoopIdiomRecognize::processLoopStridedStore( Value *PatternPtr = ConstantExpr::getBitCast(GV, Int8PtrTy); NewCall = Builder.CreateCall(MSP, {BasePtr, PatternPtr, NumBytes}); } + NewCall->setDebugLoc(TheStore->getDebugLoc()); + + if (MSSAU) { + MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB( + NewCall, nullptr, NewCall->getParent(), MemorySSA::BeforeTerminator); + MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true); + } LLVM_DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n" << " from store to: " << *Ev << " at: " << *TheStore << "\n"); - NewCall->setDebugLoc(TheStore->getDebugLoc()); ORE.emit([&]() { return OptimizationRemark(DEBUG_TYPE, "ProcessLoopStridedStore", @@ -984,12 +999,40 @@ bool LoopIdiomRecognize::processLoopStridedStore( // Okay, the memset has been formed. Zap the original store and anything that // feeds into it. - for (auto *I : Stores) + for (auto *I : Stores) { + if (MSSAU) + MSSAU->removeMemoryAccess(I, true); deleteDeadInstruction(I); + } + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); ++NumMemSet; return true; } +class ExpandedValuesCleaner { + SCEVExpander &Expander; + TargetLibraryInfo *TLI; + SmallVector<Value *, 4> ExpandedValues; + bool Commit = false; + +public: + ExpandedValuesCleaner(SCEVExpander &Expander, TargetLibraryInfo *TLI) + : Expander(Expander), TLI(TLI) {} + + void add(Value *V) { ExpandedValues.push_back(V); } + + void commit() { Commit = true; } + + ~ExpandedValuesCleaner() { + if (!Commit) { + Expander.clear(); + for (auto *V : ExpandedValues) + RecursivelyDeleteTriviallyDeadInstructions(V, TLI); + } + } +}; + /// If the stored value is a strided load in the same loop with the same stride /// this may be transformable into a memcpy. This kicks in for stuff like /// for (i) A[i] = B[i]; @@ -1020,6 +1063,8 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, IRBuilder<> Builder(Preheader->getTerminator()); SCEVExpander Expander(*SE, *DL, "loop-idiom"); + ExpandedValuesCleaner EVC(Expander, TLI); + const SCEV *StrStart = StoreEv->getStart(); unsigned StrAS = SI->getPointerAddressSpace(); Type *IntIdxTy = Builder.getIntNTy(DL->getIndexSizeInBits(StrAS)); @@ -1036,16 +1081,13 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, // checking everything. Value *StoreBasePtr = Expander.expandCodeFor( StrStart, Builder.getInt8PtrTy(StrAS), Preheader->getTerminator()); + EVC.add(StoreBasePtr); SmallPtrSet<Instruction *, 1> Stores; Stores.insert(SI); if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount, - StoreSize, *AA, Stores)) { - Expander.clear(); - // If we generated new code for the base pointer, clean up. - RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI); + StoreSize, *AA, Stores)) return false; - } const SCEV *LdStart = LoadEv->getStart(); unsigned LdAS = LI->getPointerAddressSpace(); @@ -1058,15 +1100,11 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, // mutated by the loop. Value *LoadBasePtr = Expander.expandCodeFor( LdStart, Builder.getInt8PtrTy(LdAS), Preheader->getTerminator()); + EVC.add(LoadBasePtr); if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount, - StoreSize, *AA, Stores)) { - Expander.clear(); - // If we generated new code for the base pointer, clean up. - RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI); - RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI); + StoreSize, *AA, Stores)) return false; - } if (avoidLIRForMultiBlockLoop()) return false; @@ -1078,6 +1116,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator()); + EVC.add(NumBytes); CallInst *NewCall = nullptr; // Check whether to generate an unordered atomic memcpy: @@ -1089,8 +1128,9 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, else { // We cannot allow unaligned ops for unordered load/store, so reject // anything where the alignment isn't at least the element size. - unsigned Align = std::min(SI->getAlignment(), LI->getAlignment()); - if (Align < StoreSize) + const Align StoreAlign = SI->getAlign(); + const Align LoadAlign = LI->getAlign(); + if (StoreAlign < StoreSize || LoadAlign < StoreSize) return false; // If the element.atomic memcpy is not lowered into explicit @@ -1104,11 +1144,17 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, // Note that unordered atomic loads/stores are *required* by the spec to // have an alignment but non-atomic loads/stores may not. NewCall = Builder.CreateElementUnorderedAtomicMemCpy( - StoreBasePtr, SI->getAlignment(), LoadBasePtr, LI->getAlignment(), - NumBytes, StoreSize); + StoreBasePtr, StoreAlign, LoadBasePtr, LoadAlign, NumBytes, + StoreSize); } NewCall->setDebugLoc(SI->getDebugLoc()); + if (MSSAU) { + MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB( + NewCall, nullptr, NewCall->getParent(), MemorySSA::BeforeTerminator); + MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true); + } + LLVM_DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n" << " from load ptr=" << *LoadEv << " at: " << *LI << "\n" << " from store ptr=" << *StoreEv << " at: " << *SI @@ -1124,8 +1170,13 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, // Okay, the memcpy has been formed. Zap the original store and anything that // feeds into it. + if (MSSAU) + MSSAU->removeMemoryAccess(SI, true); deleteDeadInstruction(SI); + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); ++NumMemCpy; + EVC.commit(); return true; } @@ -1502,18 +1553,20 @@ bool LoopIdiomRecognize::recognizeAndInsertFFS() { // %inc = add nsw %i.0, 1 // br i1 %tobool - const Value *Args[] = - {InitX, ZeroCheck ? ConstantInt::getTrue(InitX->getContext()) - : ConstantInt::getFalse(InitX->getContext())}; + const Value *Args[] = { + InitX, ZeroCheck ? ConstantInt::getTrue(InitX->getContext()) + : ConstantInt::getFalse(InitX->getContext())}; // @llvm.dbg doesn't count as they have no semantic effect. auto InstWithoutDebugIt = CurLoop->getHeader()->instructionsWithoutDebug(); uint32_t HeaderSize = std::distance(InstWithoutDebugIt.begin(), InstWithoutDebugIt.end()); + IntrinsicCostAttributes Attrs(IntrinID, InitX->getType(), Args); + int Cost = + TTI->getIntrinsicInstrCost(Attrs, TargetTransformInfo::TCK_SizeAndLatency); if (HeaderSize != IdiomCanonicalSize && - TTI->getIntrinsicCost(IntrinID, InitX->getType(), Args) > - TargetTransformInfo::TCC_Basic) + Cost > TargetTransformInfo::TCC_Basic) return false; transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX, |