aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-12-20 14:16:56 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-12-20 14:16:56 +0000
commit2cab237b5dbfe1b3e9c7aa7a3c02d2b98fcf7462 (patch)
tree524fe828571f81358bba62fdb6d04c6e5e96a2a4 /contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
parent6c7828a2807ea5e50c79ca42dbedf2b589ce63b2 (diff)
parent044eb2f6afba375a914ac9d8024f8f5142bb912e (diff)
Merge llvm trunk r321017 to contrib/llvm.
Notes
Notes: svn path=/projects/clang600-import/; revision=327023
Diffstat (limited to 'contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp')
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp167
1 files changed, 105 insertions, 62 deletions
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 4a6a35c0ab1b..21551f0a0825 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -1,4 +1,4 @@
-//===-- LoopIdiomRecognize.cpp - Loop idiom recognition -------------------===//
+//===- LoopIdiomRecognize.cpp - Loop idiom recognition --------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -38,32 +38,64 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/BasicAliasAnalysis.h"
-#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <utility>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "loop-idiom"
@@ -80,7 +112,7 @@ static cl::opt<bool> UseLIRCodeSizeHeurs(
namespace {
class LoopIdiomRecognize {
- Loop *CurLoop;
+ Loop *CurLoop = nullptr;
AliasAnalysis *AA;
DominatorTree *DT;
LoopInfo *LI;
@@ -96,20 +128,21 @@ public:
TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI,
const DataLayout *DL)
- : CurLoop(nullptr), AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI),
- DL(DL) {}
+ : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL) {}
bool runOnLoop(Loop *L);
private:
- typedef SmallVector<StoreInst *, 8> StoreList;
- typedef MapVector<Value *, StoreList> StoreListMap;
+ using StoreList = SmallVector<StoreInst *, 8>;
+ using StoreListMap = MapVector<Value *, StoreList>;
+
StoreListMap StoreRefsForMemset;
StoreListMap StoreRefsForMemsetPattern;
StoreList StoreRefsForMemcpy;
bool HasMemset;
bool HasMemsetPattern;
bool HasMemcpy;
+
/// Return code for isLegalStore()
enum LegalStoreKind {
None = 0,
@@ -164,6 +197,7 @@ private:
class LoopIdiomRecognizeLegacyPass : public LoopPass {
public:
static char ID;
+
explicit LoopIdiomRecognizeLegacyPass() : LoopPass(ID) {
initializeLoopIdiomRecognizeLegacyPassPass(
*PassRegistry::getPassRegistry());
@@ -190,14 +224,16 @@ public:
/// This transformation requires natural loop information & requires that
/// loop preheaders be inserted into the CFG.
- ///
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
getLoopAnalysisUsage(AU);
}
};
-} // End anonymous namespace.
+
+} // end anonymous namespace
+
+char LoopIdiomRecognizeLegacyPass::ID = 0;
PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
@@ -211,7 +247,6 @@ PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM,
return getLoopPassPreservedAnalyses();
}
-char LoopIdiomRecognizeLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(LoopIdiomRecognizeLegacyPass, "loop-idiom",
"Recognize loop idioms", false, false)
INITIALIZE_PASS_DEPENDENCY(LoopPass)
@@ -299,13 +334,6 @@ bool LoopIdiomRecognize::runOnCountableLoop() {
return MadeChange;
}
-static unsigned getStoreSizeInBytes(StoreInst *SI, const DataLayout *DL) {
- uint64_t SizeInBits = DL->getTypeSizeInBits(SI->getValueOperand()->getType());
- assert(((SizeInBits & 7) || (SizeInBits >> 32) == 0) &&
- "Don't overflow unsigned.");
- return (unsigned)SizeInBits >> 3;
-}
-
static APInt getStoreStride(const SCEVAddRecExpr *StoreEv) {
const SCEVConstant *ConstStride = cast<SCEVConstant>(StoreEv->getOperand(1));
return ConstStride->getAPInt();
@@ -354,7 +382,6 @@ static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) {
LoopIdiomRecognize::LegalStoreKind
LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
-
// Don't touch volatile stores.
if (SI->isVolatile())
return LegalStoreKind::None;
@@ -424,7 +451,7 @@ LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
// Check to see if the stride matches the size of the store. If so, then we
// know that every byte is touched in the loop.
APInt Stride = getStoreStride(StoreEv);
- unsigned StoreSize = getStoreSizeInBytes(SI, DL);
+ unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
if (StoreSize != Stride && StoreSize != -Stride)
return LegalStoreKind::None;
@@ -563,7 +590,7 @@ bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
const SCEVAddRecExpr *FirstStoreEv =
cast<SCEVAddRecExpr>(SE->getSCEV(FirstStorePtr));
APInt FirstStride = getStoreStride(FirstStoreEv);
- unsigned FirstStoreSize = getStoreSizeInBytes(SL[i], DL);
+ unsigned FirstStoreSize = DL->getTypeStoreSize(SL[i]->getValueOperand()->getType());
// See if we can optimize just this store in isolation.
if (FirstStride == FirstStoreSize || -FirstStride == FirstStoreSize) {
@@ -656,7 +683,7 @@ bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
break;
AdjacentStores.insert(I);
- StoreSize += getStoreSizeInBytes(I, DL);
+ StoreSize += DL->getTypeStoreSize(I->getValueOperand()->getType());
// Move to the next value in the chain.
I = ConsecutiveChain[I];
}
@@ -761,7 +788,8 @@ mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
++BI)
for (Instruction &I : **BI)
if (IgnoredStores.count(&I) == 0 &&
- (AA.getModRefInfo(&I, StoreLoc) & Access))
+ isModOrRefSet(
+ intersectModRef(AA.getModRefInfo(&I, StoreLoc), Access)))
return true;
return false;
@@ -780,6 +808,41 @@ static const SCEV *getStartForNegStride(const SCEV *Start, const SCEV *BECount,
return SE->getMinusSCEV(Start, Index);
}
+/// Compute the number of bytes as a SCEV from the backedge taken count.
+///
+/// This also maps the SCEV into the provided type and tries to handle the
+/// computation in a way that will fold cleanly.
+static const SCEV *getNumBytes(const SCEV *BECount, Type *IntPtr,
+ unsigned StoreSize, Loop *CurLoop,
+ const DataLayout *DL, ScalarEvolution *SE) {
+ const SCEV *NumBytesS;
+ // The # stored bytes is (BECount+1)*Size. Expand the trip count out to
+ // pointer size if it isn't already.
+ //
+ // If we're going to need to zero extend the BE count, check if we can add
+ // one to it prior to zero extending without overflow. Provided this is safe,
+ // it allows better simplification of the +1.
+ if (DL->getTypeSizeInBits(BECount->getType()) <
+ DL->getTypeSizeInBits(IntPtr) &&
+ SE->isLoopEntryGuardedByCond(
+ CurLoop, ICmpInst::ICMP_NE, BECount,
+ SE->getNegativeSCEV(SE->getOne(BECount->getType())))) {
+ NumBytesS = SE->getZeroExtendExpr(
+ SE->getAddExpr(BECount, SE->getOne(BECount->getType()), SCEV::FlagNUW),
+ IntPtr);
+ } else {
+ NumBytesS = SE->getAddExpr(SE->getTruncateOrZeroExtend(BECount, IntPtr),
+ SE->getOne(IntPtr), SCEV::FlagNUW);
+ }
+
+ // And scale it based on the store size.
+ if (StoreSize != 1) {
+ NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
+ SCEV::FlagNUW);
+ }
+ return NumBytesS;
+}
+
/// processLoopStridedStore - We see a strided store of some value. If we can
/// transform this into a memset or memset_pattern in the loop preheader, do so.
bool LoopIdiomRecognize::processLoopStridedStore(
@@ -824,8 +887,8 @@ bool LoopIdiomRecognize::processLoopStridedStore(
// base pointer and checking the region.
Value *BasePtr =
Expander.expandCodeFor(Start, DestInt8PtrTy, Preheader->getTerminator());
- if (mayLoopAccessLocation(BasePtr, MRI_ModRef, CurLoop, BECount, StoreSize,
- *AA, Stores)) {
+ if (mayLoopAccessLocation(BasePtr, ModRefInfo::ModRef, CurLoop, BECount,
+ StoreSize, *AA, Stores)) {
Expander.clear();
// If we generated new code for the base pointer, clean up.
RecursivelyDeleteTriviallyDeadInstructions(BasePtr, TLI);
@@ -837,16 +900,8 @@ bool LoopIdiomRecognize::processLoopStridedStore(
// Okay, everything looks good, insert the memset.
- // The # stored bytes is (BECount+1)*Size. Expand the trip count out to
- // pointer size if it isn't already.
- BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
-
const SCEV *NumBytesS =
- SE->getAddExpr(BECount, SE->getOne(IntPtr), SCEV::FlagNUW);
- if (StoreSize != 1) {
- NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
- SCEV::FlagNUW);
- }
+ getNumBytes(BECount, IntPtr, StoreSize, CurLoop, DL, SE);
// TODO: ideally we should still be able to generate memset if SCEV expander
// is taught to generate the dependencies at the latest point.
@@ -903,7 +958,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
Value *StorePtr = SI->getPointerOperand();
const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
APInt Stride = getStoreStride(StoreEv);
- unsigned StoreSize = getStoreSizeInBytes(SI, DL);
+ unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
bool NegStride = StoreSize == -Stride;
// The store must be feeding a non-volatile load.
@@ -942,7 +997,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
SmallPtrSet<Instruction *, 1> Stores;
Stores.insert(SI);
- if (mayLoopAccessLocation(StoreBasePtr, MRI_ModRef, CurLoop, BECount,
+ if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount,
StoreSize, *AA, Stores)) {
Expander.clear();
// If we generated new code for the base pointer, clean up.
@@ -962,8 +1017,8 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
Value *LoadBasePtr = Expander.expandCodeFor(
LdStart, Builder.getInt8PtrTy(LdAS), Preheader->getTerminator());
- if (mayLoopAccessLocation(LoadBasePtr, MRI_Mod, CurLoop, BECount, StoreSize,
- *AA, Stores)) {
+ if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount,
+ StoreSize, *AA, Stores)) {
Expander.clear();
// If we generated new code for the base pointer, clean up.
RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI);
@@ -976,16 +1031,8 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
// Okay, everything is safe, we can transform this!
- // The # stored bytes is (BECount+1)*Size. Expand the trip count out to
- // pointer size if it isn't already.
- BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy);
-
const SCEV *NumBytesS =
- SE->getAddExpr(BECount, SE->getOne(IntPtrTy), SCEV::FlagNUW);
-
- if (StoreSize != 1)
- NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize),
- SCEV::FlagNUW);
+ getNumBytes(BECount, IntPtrTy, StoreSize, CurLoop, DL, SE);
Value *NumBytes =
Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator());
@@ -1010,16 +1057,12 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
if (StoreSize > TTI->getAtomicMemIntrinsicMaxElementSize())
return false;
+ // Create the call.
+ // Note that unordered atomic loads/stores are *required* by the spec to
+ // have an alignment but non-atomic loads/stores may not.
NewCall = Builder.CreateElementUnorderedAtomicMemCpy(
- StoreBasePtr, LoadBasePtr, NumBytes, StoreSize);
-
- // Propagate alignment info onto the pointer args. Note that unordered
- // atomic loads/stores are *required* by the spec to have an alignment
- // but non-atomic loads/stores may not.
- NewCall->addParamAttr(0, Attribute::getWithAlignment(NewCall->getContext(),
- SI->getAlignment()));
- NewCall->addParamAttr(1, Attribute::getWithAlignment(NewCall->getContext(),
- LI->getAlignment()));
+ StoreBasePtr, SI->getAlignment(), LoadBasePtr, LI->getAlignment(),
+ NumBytes, StoreSize);
}
NewCall->setDebugLoc(SI->getDebugLoc());
@@ -1273,9 +1316,9 @@ static bool detectCTLZIdiom(Loop *CurLoop, PHINode *&PhiX,
// step 2: detect instructions corresponding to "x.next = x >> 1"
if (!DefX || DefX->getOpcode() != Instruction::AShr)
return false;
- if (ConstantInt *Shft = dyn_cast<ConstantInt>(DefX->getOperand(1)))
- if (!Shft || !Shft->isOne())
- return false;
+ ConstantInt *Shft = dyn_cast<ConstantInt>(DefX->getOperand(1));
+ if (!Shft || !Shft->isOne())
+ return false;
VarX = DefX->getOperand(0);
// step 3: Check the recurrence of variable X
@@ -1469,7 +1512,7 @@ static CallInst *createCTLZIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
/// PhiX = PHI [InitX, DefX]
/// CntInst = CntPhi + 1
/// DefX = PhiX >> 1
-// LOOP_BODY
+/// LOOP_BODY
/// Br: loop if (DefX != 0)
/// Use(CntPhi) or Use(CntInst)
///