diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-12-20 14:16:56 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-12-20 14:16:56 +0000 |
commit | 2cab237b5dbfe1b3e9c7aa7a3c02d2b98fcf7462 (patch) | |
tree | 524fe828571f81358bba62fdb6d04c6e5e96a2a4 /contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp | |
parent | 6c7828a2807ea5e50c79ca42dbedf2b589ce63b2 (diff) | |
parent | 044eb2f6afba375a914ac9d8024f8f5142bb912e (diff) |
Merge llvm trunk r321017 to contrib/llvm.
Notes
Notes:
svn path=/projects/clang600-import/; revision=327023
Diffstat (limited to 'contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp')
-rw-r--r-- | contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 167 |
1 files changed, 105 insertions, 62 deletions
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 4a6a35c0ab1b..21551f0a0825 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -1,4 +1,4 @@ -//===-- LoopIdiomRecognize.cpp - Loop idiom recognition -------------------===// +//===- LoopIdiomRecognize.cpp - Loop idiom recognition --------------------===// // // The LLVM Compiler Infrastructure // @@ -38,32 +38,64 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/LoopIdiomRecognize.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/BasicAliasAnalysis.h" -#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopAccessAnalysis.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <utility> +#include <vector> + using namespace llvm; #define DEBUG_TYPE "loop-idiom" @@ -80,7 +112,7 @@ static cl::opt<bool> UseLIRCodeSizeHeurs( namespace { class LoopIdiomRecognize { - Loop *CurLoop; + Loop *CurLoop = nullptr; AliasAnalysis *AA; DominatorTree *DT; LoopInfo *LI; @@ -96,20 +128,21 @@ public: TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, const DataLayout *DL) - : CurLoop(nullptr), AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), - DL(DL) {} + : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL) {} bool runOnLoop(Loop *L); private: - typedef SmallVector<StoreInst *, 8> StoreList; - typedef MapVector<Value *, StoreList> StoreListMap; + using StoreList = SmallVector<StoreInst *, 8>; + using StoreListMap = MapVector<Value *, StoreList>; + StoreListMap StoreRefsForMemset; StoreListMap StoreRefsForMemsetPattern; StoreList StoreRefsForMemcpy; bool HasMemset; bool HasMemsetPattern; bool HasMemcpy; + /// Return code for isLegalStore() enum LegalStoreKind { None = 0, @@ -164,6 +197,7 @@ private: class LoopIdiomRecognizeLegacyPass : public LoopPass { public: static char ID; + explicit LoopIdiomRecognizeLegacyPass() : LoopPass(ID) { initializeLoopIdiomRecognizeLegacyPassPass( *PassRegistry::getPassRegistry()); @@ -190,14 +224,16 @@ public: /// This transformation requires natural loop information & requires that /// loop preheaders be inserted into the CFG. - /// void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<TargetLibraryInfoWrapperPass>(); AU.addRequired<TargetTransformInfoWrapperPass>(); getLoopAnalysisUsage(AU); } }; -} // End anonymous namespace. + +} // end anonymous namespace + +char LoopIdiomRecognizeLegacyPass::ID = 0; PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, @@ -211,7 +247,6 @@ PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM, return getLoopPassPreservedAnalyses(); } -char LoopIdiomRecognizeLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(LoopIdiomRecognizeLegacyPass, "loop-idiom", "Recognize loop idioms", false, false) INITIALIZE_PASS_DEPENDENCY(LoopPass) @@ -299,13 +334,6 @@ bool LoopIdiomRecognize::runOnCountableLoop() { return MadeChange; } -static unsigned getStoreSizeInBytes(StoreInst *SI, const DataLayout *DL) { - uint64_t SizeInBits = DL->getTypeSizeInBits(SI->getValueOperand()->getType()); - assert(((SizeInBits & 7) || (SizeInBits >> 32) == 0) && - "Don't overflow unsigned."); - return (unsigned)SizeInBits >> 3; -} - static APInt getStoreStride(const SCEVAddRecExpr *StoreEv) { const SCEVConstant *ConstStride = cast<SCEVConstant>(StoreEv->getOperand(1)); return ConstStride->getAPInt(); @@ -354,7 +382,6 @@ static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) { LoopIdiomRecognize::LegalStoreKind LoopIdiomRecognize::isLegalStore(StoreInst *SI) { - // Don't touch volatile stores. if (SI->isVolatile()) return LegalStoreKind::None; @@ -424,7 +451,7 @@ LoopIdiomRecognize::isLegalStore(StoreInst *SI) { // Check to see if the stride matches the size of the store. If so, then we // know that every byte is touched in the loop. APInt Stride = getStoreStride(StoreEv); - unsigned StoreSize = getStoreSizeInBytes(SI, DL); + unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType()); if (StoreSize != Stride && StoreSize != -Stride) return LegalStoreKind::None; @@ -563,7 +590,7 @@ bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL, const SCEVAddRecExpr *FirstStoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(FirstStorePtr)); APInt FirstStride = getStoreStride(FirstStoreEv); - unsigned FirstStoreSize = getStoreSizeInBytes(SL[i], DL); + unsigned FirstStoreSize = DL->getTypeStoreSize(SL[i]->getValueOperand()->getType()); // See if we can optimize just this store in isolation. if (FirstStride == FirstStoreSize || -FirstStride == FirstStoreSize) { @@ -656,7 +683,7 @@ bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL, break; AdjacentStores.insert(I); - StoreSize += getStoreSizeInBytes(I, DL); + StoreSize += DL->getTypeStoreSize(I->getValueOperand()->getType()); // Move to the next value in the chain. I = ConsecutiveChain[I]; } @@ -761,7 +788,8 @@ mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L, ++BI) for (Instruction &I : **BI) if (IgnoredStores.count(&I) == 0 && - (AA.getModRefInfo(&I, StoreLoc) & Access)) + isModOrRefSet( + intersectModRef(AA.getModRefInfo(&I, StoreLoc), Access))) return true; return false; @@ -780,6 +808,41 @@ static const SCEV *getStartForNegStride(const SCEV *Start, const SCEV *BECount, return SE->getMinusSCEV(Start, Index); } +/// Compute the number of bytes as a SCEV from the backedge taken count. +/// +/// This also maps the SCEV into the provided type and tries to handle the +/// computation in a way that will fold cleanly. +static const SCEV *getNumBytes(const SCEV *BECount, Type *IntPtr, + unsigned StoreSize, Loop *CurLoop, + const DataLayout *DL, ScalarEvolution *SE) { + const SCEV *NumBytesS; + // The # stored bytes is (BECount+1)*Size. Expand the trip count out to + // pointer size if it isn't already. + // + // If we're going to need to zero extend the BE count, check if we can add + // one to it prior to zero extending without overflow. Provided this is safe, + // it allows better simplification of the +1. + if (DL->getTypeSizeInBits(BECount->getType()) < + DL->getTypeSizeInBits(IntPtr) && + SE->isLoopEntryGuardedByCond( + CurLoop, ICmpInst::ICMP_NE, BECount, + SE->getNegativeSCEV(SE->getOne(BECount->getType())))) { + NumBytesS = SE->getZeroExtendExpr( + SE->getAddExpr(BECount, SE->getOne(BECount->getType()), SCEV::FlagNUW), + IntPtr); + } else { + NumBytesS = SE->getAddExpr(SE->getTruncateOrZeroExtend(BECount, IntPtr), + SE->getOne(IntPtr), SCEV::FlagNUW); + } + + // And scale it based on the store size. + if (StoreSize != 1) { + NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize), + SCEV::FlagNUW); + } + return NumBytesS; +} + /// processLoopStridedStore - We see a strided store of some value. If we can /// transform this into a memset or memset_pattern in the loop preheader, do so. bool LoopIdiomRecognize::processLoopStridedStore( @@ -824,8 +887,8 @@ bool LoopIdiomRecognize::processLoopStridedStore( // base pointer and checking the region. Value *BasePtr = Expander.expandCodeFor(Start, DestInt8PtrTy, Preheader->getTerminator()); - if (mayLoopAccessLocation(BasePtr, MRI_ModRef, CurLoop, BECount, StoreSize, - *AA, Stores)) { + if (mayLoopAccessLocation(BasePtr, ModRefInfo::ModRef, CurLoop, BECount, + StoreSize, *AA, Stores)) { Expander.clear(); // If we generated new code for the base pointer, clean up. RecursivelyDeleteTriviallyDeadInstructions(BasePtr, TLI); @@ -837,16 +900,8 @@ bool LoopIdiomRecognize::processLoopStridedStore( // Okay, everything looks good, insert the memset. - // The # stored bytes is (BECount+1)*Size. Expand the trip count out to - // pointer size if it isn't already. - BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr); - const SCEV *NumBytesS = - SE->getAddExpr(BECount, SE->getOne(IntPtr), SCEV::FlagNUW); - if (StoreSize != 1) { - NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize), - SCEV::FlagNUW); - } + getNumBytes(BECount, IntPtr, StoreSize, CurLoop, DL, SE); // TODO: ideally we should still be able to generate memset if SCEV expander // is taught to generate the dependencies at the latest point. @@ -903,7 +958,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, Value *StorePtr = SI->getPointerOperand(); const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr)); APInt Stride = getStoreStride(StoreEv); - unsigned StoreSize = getStoreSizeInBytes(SI, DL); + unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType()); bool NegStride = StoreSize == -Stride; // The store must be feeding a non-volatile load. @@ -942,7 +997,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, SmallPtrSet<Instruction *, 1> Stores; Stores.insert(SI); - if (mayLoopAccessLocation(StoreBasePtr, MRI_ModRef, CurLoop, BECount, + if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount, StoreSize, *AA, Stores)) { Expander.clear(); // If we generated new code for the base pointer, clean up. @@ -962,8 +1017,8 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, Value *LoadBasePtr = Expander.expandCodeFor( LdStart, Builder.getInt8PtrTy(LdAS), Preheader->getTerminator()); - if (mayLoopAccessLocation(LoadBasePtr, MRI_Mod, CurLoop, BECount, StoreSize, - *AA, Stores)) { + if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount, + StoreSize, *AA, Stores)) { Expander.clear(); // If we generated new code for the base pointer, clean up. RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI); @@ -976,16 +1031,8 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, // Okay, everything is safe, we can transform this! - // The # stored bytes is (BECount+1)*Size. Expand the trip count out to - // pointer size if it isn't already. - BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy); - const SCEV *NumBytesS = - SE->getAddExpr(BECount, SE->getOne(IntPtrTy), SCEV::FlagNUW); - - if (StoreSize != 1) - NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize), - SCEV::FlagNUW); + getNumBytes(BECount, IntPtrTy, StoreSize, CurLoop, DL, SE); Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator()); @@ -1010,16 +1057,12 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, if (StoreSize > TTI->getAtomicMemIntrinsicMaxElementSize()) return false; + // Create the call. + // Note that unordered atomic loads/stores are *required* by the spec to + // have an alignment but non-atomic loads/stores may not. NewCall = Builder.CreateElementUnorderedAtomicMemCpy( - StoreBasePtr, LoadBasePtr, NumBytes, StoreSize); - - // Propagate alignment info onto the pointer args. Note that unordered - // atomic loads/stores are *required* by the spec to have an alignment - // but non-atomic loads/stores may not. - NewCall->addParamAttr(0, Attribute::getWithAlignment(NewCall->getContext(), - SI->getAlignment())); - NewCall->addParamAttr(1, Attribute::getWithAlignment(NewCall->getContext(), - LI->getAlignment())); + StoreBasePtr, SI->getAlignment(), LoadBasePtr, LI->getAlignment(), + NumBytes, StoreSize); } NewCall->setDebugLoc(SI->getDebugLoc()); @@ -1273,9 +1316,9 @@ static bool detectCTLZIdiom(Loop *CurLoop, PHINode *&PhiX, // step 2: detect instructions corresponding to "x.next = x >> 1" if (!DefX || DefX->getOpcode() != Instruction::AShr) return false; - if (ConstantInt *Shft = dyn_cast<ConstantInt>(DefX->getOperand(1))) - if (!Shft || !Shft->isOne()) - return false; + ConstantInt *Shft = dyn_cast<ConstantInt>(DefX->getOperand(1)); + if (!Shft || !Shft->isOne()) + return false; VarX = DefX->getOperand(0); // step 3: Check the recurrence of variable X @@ -1469,7 +1512,7 @@ static CallInst *createCTLZIntrinsic(IRBuilder<> &IRBuilder, Value *Val, /// PhiX = PHI [InitX, DefX] /// CntInst = CntPhi + 1 /// DefX = PhiX >> 1 -// LOOP_BODY +/// LOOP_BODY /// Br: loop if (DefX != 0) /// Use(CntPhi) or Use(CntInst) /// |