diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp | 486 |
1 files changed, 340 insertions, 146 deletions
diff --git a/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 2ab78d2b7ee2..79161db9b5e4 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -47,6 +47,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PassManager.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" @@ -60,12 +61,12 @@ #include <algorithm> #include <cassert> #include <cstdint> -#include <cstdlib> #include <iterator> #include <utility> #include <vector> using namespace llvm; +using namespace llvm::PatternMatch; #define DEBUG_TYPE "loop-accesses" @@ -172,7 +173,8 @@ RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup( : High(RtCheck.Pointers[Index].End), Low(RtCheck.Pointers[Index].Start), AddressSpace(RtCheck.Pointers[Index] .PointerValue->getType() - ->getPointerAddressSpace()) { + ->getPointerAddressSpace()), + NeedsFreeze(RtCheck.Pointers[Index].NeedsFreeze) { Members.push_back(Index); } @@ -189,21 +191,20 @@ RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup( /// /// There is no conflict when the intervals are disjoint: /// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End) -void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr, +void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr, + Type *AccessTy, bool WritePtr, unsigned DepSetId, unsigned ASId, - const ValueToValueMap &Strides, - PredicatedScalarEvolution &PSE) { - // Get the stride replaced scev. - const SCEV *Sc = replaceSymbolicStrideSCEV(PSE, Strides, Ptr); + PredicatedScalarEvolution &PSE, + bool NeedsFreeze) { ScalarEvolution *SE = PSE.getSE(); const SCEV *ScStart; const SCEV *ScEnd; - if (SE->isLoopInvariant(Sc, Lp)) { - ScStart = ScEnd = Sc; + if (SE->isLoopInvariant(PtrExpr, Lp)) { + ScStart = ScEnd = PtrExpr; } else { - const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc); + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrExpr); assert(AR && "Invalid addrec expression"); const SCEV *Ex = PSE.getBackedgeTakenCount(); @@ -227,15 +228,100 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr, // Add the size of the pointed element to ScEnd. auto &DL = Lp->getHeader()->getModule()->getDataLayout(); Type *IdxTy = DL.getIndexType(Ptr->getType()); - const SCEV *EltSizeSCEV = - SE->getStoreSizeOfExpr(IdxTy, Ptr->getType()->getPointerElementType()); + const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr(IdxTy, AccessTy); ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV); - Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc); + Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, PtrExpr, + NeedsFreeze); } -SmallVector<RuntimePointerCheck, 4> -RuntimePointerChecking::generateChecks() const { +void RuntimePointerChecking::tryToCreateDiffCheck( + const RuntimeCheckingPtrGroup &CGI, const RuntimeCheckingPtrGroup &CGJ) { + if (!CanUseDiffCheck) + return; + + // If either group contains multiple different pointers, bail out. + // TODO: Support multiple pointers by using the minimum or maximum pointer, + // depending on src & sink. + if (CGI.Members.size() != 1 || CGJ.Members.size() != 1) { + CanUseDiffCheck = false; + return; + } + + PointerInfo *Src = &Pointers[CGI.Members[0]]; + PointerInfo *Sink = &Pointers[CGJ.Members[0]]; + + // If either pointer is read and written, multiple checks may be needed. Bail + // out. + if (!DC.getOrderForAccess(Src->PointerValue, !Src->IsWritePtr).empty() || + !DC.getOrderForAccess(Sink->PointerValue, !Sink->IsWritePtr).empty()) { + CanUseDiffCheck = false; + return; + } + + ArrayRef<unsigned> AccSrc = + DC.getOrderForAccess(Src->PointerValue, Src->IsWritePtr); + ArrayRef<unsigned> AccSink = + DC.getOrderForAccess(Sink->PointerValue, Sink->IsWritePtr); + // If either pointer is accessed multiple times, there may not be a clear + // src/sink relation. Bail out for now. + if (AccSrc.size() != 1 || AccSink.size() != 1) { + CanUseDiffCheck = false; + return; + } + // If the sink is accessed before src, swap src/sink. + if (AccSink[0] < AccSrc[0]) + std::swap(Src, Sink); + + auto *SrcAR = dyn_cast<SCEVAddRecExpr>(Src->Expr); + auto *SinkAR = dyn_cast<SCEVAddRecExpr>(Sink->Expr); + if (!SrcAR || !SinkAR) { + CanUseDiffCheck = false; + return; + } + + const DataLayout &DL = + SinkAR->getLoop()->getHeader()->getModule()->getDataLayout(); + SmallVector<Instruction *, 4> SrcInsts = + DC.getInstructionsForAccess(Src->PointerValue, Src->IsWritePtr); + SmallVector<Instruction *, 4> SinkInsts = + DC.getInstructionsForAccess(Sink->PointerValue, Sink->IsWritePtr); + Type *SrcTy = getLoadStoreType(SrcInsts[0]); + Type *DstTy = getLoadStoreType(SinkInsts[0]); + if (isa<ScalableVectorType>(SrcTy) || isa<ScalableVectorType>(DstTy)) + return; + unsigned AllocSize = + std::max(DL.getTypeAllocSize(SrcTy), DL.getTypeAllocSize(DstTy)); + IntegerType *IntTy = + IntegerType::get(Src->PointerValue->getContext(), + DL.getPointerSizeInBits(CGI.AddressSpace)); + + // Only matching constant steps matching the AllocSize are supported at the + // moment. This simplifies the difference computation. Can be extended in the + // future. + auto *Step = dyn_cast<SCEVConstant>(SinkAR->getStepRecurrence(*SE)); + if (!Step || Step != SrcAR->getStepRecurrence(*SE) || + Step->getAPInt().abs() != AllocSize) { + CanUseDiffCheck = false; + return; + } + + // When counting down, the dependence distance needs to be swapped. + if (Step->getValue()->isNegative()) + std::swap(SinkAR, SrcAR); + + const SCEV *SinkStartInt = SE->getPtrToIntExpr(SinkAR->getStart(), IntTy); + const SCEV *SrcStartInt = SE->getPtrToIntExpr(SrcAR->getStart(), IntTy); + if (isa<SCEVCouldNotCompute>(SinkStartInt) || + isa<SCEVCouldNotCompute>(SrcStartInt)) { + CanUseDiffCheck = false; + return; + } + DiffChecks.emplace_back(SrcStartInt, SinkStartInt, AllocSize, + Src->NeedsFreeze || Sink->NeedsFreeze); +} + +SmallVector<RuntimePointerCheck, 4> RuntimePointerChecking::generateChecks() { SmallVector<RuntimePointerCheck, 4> Checks; for (unsigned I = 0; I < CheckingGroups.size(); ++I) { @@ -243,8 +329,10 @@ RuntimePointerChecking::generateChecks() const { const RuntimeCheckingPtrGroup &CGI = CheckingGroups[I]; const RuntimeCheckingPtrGroup &CGJ = CheckingGroups[J]; - if (needsChecking(CGI, CGJ)) + if (needsChecking(CGI, CGJ)) { + tryToCreateDiffCheck(CGI, CGJ); Checks.push_back(std::make_pair(&CGI, &CGJ)); + } } } return Checks; @@ -285,11 +373,12 @@ bool RuntimeCheckingPtrGroup::addPointer(unsigned Index, return addPointer( Index, RtCheck.Pointers[Index].Start, RtCheck.Pointers[Index].End, RtCheck.Pointers[Index].PointerValue->getType()->getPointerAddressSpace(), - *RtCheck.SE); + RtCheck.Pointers[Index].NeedsFreeze, *RtCheck.SE); } bool RuntimeCheckingPtrGroup::addPointer(unsigned Index, const SCEV *Start, const SCEV *End, unsigned AS, + bool NeedsFreeze, ScalarEvolution &SE) { assert(AddressSpace == AS && "all pointers in a checking group must be in the same address space"); @@ -314,6 +403,7 @@ bool RuntimeCheckingPtrGroup::addPointer(unsigned Index, const SCEV *Start, High = End; Members.push_back(Index); + this->NeedsFreeze |= NeedsFreeze; return true; } @@ -371,9 +461,11 @@ void RuntimePointerChecking::groupChecks( unsigned TotalComparisons = 0; - DenseMap<Value *, unsigned> PositionMap; - for (unsigned Index = 0; Index < Pointers.size(); ++Index) - PositionMap[Pointers[Index].PointerValue] = Index; + DenseMap<Value *, SmallVector<unsigned>> PositionMap; + for (unsigned Index = 0; Index < Pointers.size(); ++Index) { + auto Iter = PositionMap.insert({Pointers[Index].PointerValue, {}}); + Iter.first->second.push_back(Index); + } // We need to keep track of what pointers we've already seen so we // don't process them twice. @@ -404,34 +496,35 @@ void RuntimePointerChecking::groupChecks( auto PointerI = PositionMap.find(MI->getPointer()); assert(PointerI != PositionMap.end() && "pointer in equivalence class not found in PositionMap"); - unsigned Pointer = PointerI->second; - bool Merged = false; - // Mark this pointer as seen. - Seen.insert(Pointer); - - // Go through all the existing sets and see if we can find one - // which can include this pointer. - for (RuntimeCheckingPtrGroup &Group : Groups) { - // Don't perform more than a certain amount of comparisons. - // This should limit the cost of grouping the pointers to something - // reasonable. If we do end up hitting this threshold, the algorithm - // will create separate groups for all remaining pointers. - if (TotalComparisons > MemoryCheckMergeThreshold) - break; - - TotalComparisons++; - - if (Group.addPointer(Pointer, *this)) { - Merged = true; - break; + for (unsigned Pointer : PointerI->second) { + bool Merged = false; + // Mark this pointer as seen. + Seen.insert(Pointer); + + // Go through all the existing sets and see if we can find one + // which can include this pointer. + for (RuntimeCheckingPtrGroup &Group : Groups) { + // Don't perform more than a certain amount of comparisons. + // This should limit the cost of grouping the pointers to something + // reasonable. If we do end up hitting this threshold, the algorithm + // will create separate groups for all remaining pointers. + if (TotalComparisons > MemoryCheckMergeThreshold) + break; + + TotalComparisons++; + + if (Group.addPointer(Pointer, *this)) { + Merged = true; + break; + } } - } - if (!Merged) - // We couldn't add this pointer to any existing set or the threshold - // for the number of comparisons has been reached. Create a new group - // to hold the current pointer. - Groups.push_back(RuntimeCheckingPtrGroup(Pointer, *this)); + if (!Merged) + // We couldn't add this pointer to any existing set or the threshold + // for the number of comparisons has been reached. Create a new group + // to hold the current pointer. + Groups.push_back(RuntimeCheckingPtrGroup(Pointer, *this)); + } } // We've computed the grouped checks for this partition. @@ -522,19 +615,19 @@ public: : TheLoop(TheLoop), AST(*AA), LI(LI), DepCands(DA), PSE(PSE) {} /// Register a load and whether it is only read from. - void addLoad(MemoryLocation &Loc, bool IsReadOnly) { + void addLoad(MemoryLocation &Loc, Type *AccessTy, bool IsReadOnly) { Value *Ptr = const_cast<Value*>(Loc.Ptr); AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags); - Accesses.insert(MemAccessInfo(Ptr, false)); + Accesses[MemAccessInfo(Ptr, false)].insert(AccessTy); if (IsReadOnly) ReadOnlyPtr.insert(Ptr); } /// Register a store. - void addStore(MemoryLocation &Loc) { + void addStore(MemoryLocation &Loc, Type *AccessTy) { Value *Ptr = const_cast<Value*>(Loc.Ptr); AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags); - Accesses.insert(MemAccessInfo(Ptr, true)); + Accesses[MemAccessInfo(Ptr, true)].insert(AccessTy); } /// Check if we can emit a run-time no-alias check for \p Access. @@ -545,12 +638,11 @@ public: /// we will attempt to use additional run-time checks in order to get /// the bounds of the pointer. bool createCheckForAccess(RuntimePointerChecking &RtCheck, - MemAccessInfo Access, + MemAccessInfo Access, Type *AccessTy, const ValueToValueMap &Strides, DenseMap<Value *, unsigned> &DepSetId, Loop *TheLoop, unsigned &RunningDepId, - unsigned ASId, bool ShouldCheckStride, - bool Assume); + unsigned ASId, bool ShouldCheckStride, bool Assume); /// Check whether we can check the pointers at runtime for /// non-intersection. @@ -559,7 +651,7 @@ public: /// (i.e. the pointers have computable bounds). bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE, Loop *TheLoop, const ValueToValueMap &Strides, - bool ShouldCheckWrap = false); + Value *&UncomputablePtr, bool ShouldCheckWrap = false); /// Goes over all memory accesses, checks whether a RT check is needed /// and builds sets of dependent accesses. @@ -583,14 +675,15 @@ public: MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; } private: - typedef SetVector<MemAccessInfo> PtrAccessSet; + typedef MapVector<MemAccessInfo, SmallSetVector<Type *, 1>> PtrAccessMap; /// Go over all memory access and check whether runtime pointer checks /// are needed and build sets of dependency check candidates. void processMemAccesses(); - /// Set of all accesses. - PtrAccessSet Accesses; + /// Map of all accesses. Values are the types used to access memory pointed to + /// by the pointer. + PtrAccessMap Accesses; /// The loop being checked. const Loop *TheLoop; @@ -630,11 +723,8 @@ private: /// Check whether a pointer can participate in a runtime bounds check. /// If \p Assume, try harder to prove that we can compute the bounds of \p Ptr /// by adding run-time checks (overflow checks) if necessary. -static bool hasComputableBounds(PredicatedScalarEvolution &PSE, - const ValueToValueMap &Strides, Value *Ptr, - Loop *L, bool Assume) { - const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr); - +static bool hasComputableBounds(PredicatedScalarEvolution &PSE, Value *Ptr, + const SCEV *PtrScev, Loop *L, bool Assume) { // The bounds for loop-invariant pointer is trivial. if (PSE.getSE()->isLoopInvariant(PtrScev, L)) return true; @@ -652,12 +742,12 @@ static bool hasComputableBounds(PredicatedScalarEvolution &PSE, /// Check whether a pointer address cannot wrap. static bool isNoWrap(PredicatedScalarEvolution &PSE, - const ValueToValueMap &Strides, Value *Ptr, Loop *L) { + const ValueToValueMap &Strides, Value *Ptr, Type *AccessTy, + Loop *L) { const SCEV *PtrScev = PSE.getSCEV(Ptr); if (PSE.getSE()->isLoopInvariant(PtrScev, L)) return true; - Type *AccessTy = Ptr->getType()->getPointerElementType(); int64_t Stride = getPtrStride(PSE, AccessTy, Ptr, L, Strides); if (Stride == 1 || PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW)) return true; @@ -689,7 +779,7 @@ static void visitPointers(Value *StartPtr, const Loop &InnermostLoop, } bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck, - MemAccessInfo Access, + MemAccessInfo Access, Type *AccessTy, const ValueToValueMap &StridesMap, DenseMap<Value *, unsigned> &DepSetId, Loop *TheLoop, unsigned &RunningDepId, @@ -697,42 +787,75 @@ bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck, bool Assume) { Value *Ptr = Access.getPointer(); - if (!hasComputableBounds(PSE, StridesMap, Ptr, TheLoop, Assume)) - return false; + ScalarEvolution &SE = *PSE.getSE(); + SmallVector<std::pair<const SCEV *, bool>> TranslatedPtrs; + auto *SI = dyn_cast<SelectInst>(Ptr); + // Look through selects in the current loop. + if (SI && !TheLoop->isLoopInvariant(SI)) { + TranslatedPtrs = { + std::make_pair(SE.getSCEV(SI->getOperand(1)), + !isGuaranteedNotToBeUndefOrPoison(SI->getOperand(1))), + std::make_pair(SE.getSCEV(SI->getOperand(2)), + !isGuaranteedNotToBeUndefOrPoison(SI->getOperand(2)))}; + } else + TranslatedPtrs = { + std::make_pair(replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr), false)}; - // When we run after a failing dependency check we have to make sure - // we don't have wrapping pointers. - if (ShouldCheckWrap && !isNoWrap(PSE, StridesMap, Ptr, TheLoop)) { - auto *Expr = PSE.getSCEV(Ptr); - if (!Assume || !isa<SCEVAddRecExpr>(Expr)) + for (auto &P : TranslatedPtrs) { + const SCEV *PtrExpr = P.first; + if (!hasComputableBounds(PSE, Ptr, PtrExpr, TheLoop, Assume)) return false; - PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW); + + // When we run after a failing dependency check we have to make sure + // we don't have wrapping pointers. + if (ShouldCheckWrap) { + // Skip wrap checking when translating pointers. + if (TranslatedPtrs.size() > 1) + return false; + + if (!isNoWrap(PSE, StridesMap, Ptr, AccessTy, TheLoop)) { + auto *Expr = PSE.getSCEV(Ptr); + if (!Assume || !isa<SCEVAddRecExpr>(Expr)) + return false; + PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW); + } + } + // If there's only one option for Ptr, look it up after bounds and wrap + // checking, because assumptions might have been added to PSE. + if (TranslatedPtrs.size() == 1) + TranslatedPtrs[0] = std::make_pair( + replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr), false); } - // The id of the dependence set. - unsigned DepId; + for (auto &P : TranslatedPtrs) { + const SCEV *PtrExpr = P.first; - if (isDependencyCheckNeeded()) { - Value *Leader = DepCands.getLeaderValue(Access).getPointer(); - unsigned &LeaderId = DepSetId[Leader]; - if (!LeaderId) - LeaderId = RunningDepId++; - DepId = LeaderId; - } else - // Each access has its own dependence set. - DepId = RunningDepId++; + // The id of the dependence set. + unsigned DepId; - bool IsWrite = Access.getInt(); - RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE); - LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n'); + if (isDependencyCheckNeeded()) { + Value *Leader = DepCands.getLeaderValue(Access).getPointer(); + unsigned &LeaderId = DepSetId[Leader]; + if (!LeaderId) + LeaderId = RunningDepId++; + DepId = LeaderId; + } else + // Each access has its own dependence set. + DepId = RunningDepId++; + + bool IsWrite = Access.getInt(); + RtCheck.insert(TheLoop, Ptr, PtrExpr, AccessTy, IsWrite, DepId, ASId, PSE, + P.second); + LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n'); + } return true; - } +} bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE, Loop *TheLoop, const ValueToValueMap &StridesMap, - bool ShouldCheckWrap) { + Value *&UncomputablePtr, bool ShouldCheckWrap) { // Find pointers with computable bounds. We are going to use this information // to place a runtime bound check. bool CanDoRT = true; @@ -788,12 +911,15 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, } for (auto &Access : AccessInfos) { - if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId, TheLoop, - RunningDepId, ASId, ShouldCheckWrap, false)) { - LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" - << *Access.getPointer() << '\n'); - Retries.push_back(Access); - CanDoAliasSetRT = false; + for (auto &AccessTy : Accesses[Access]) { + if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap, + DepSetId, TheLoop, RunningDepId, ASId, + ShouldCheckWrap, false)) { + LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" + << *Access.getPointer() << '\n'); + Retries.push_back(Access); + CanDoAliasSetRT = false; + } } } @@ -815,13 +941,17 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, // We know that we need these checks, so we can now be more aggressive // and add further checks if required (overflow checks). CanDoAliasSetRT = true; - for (auto Access : Retries) - if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId, - TheLoop, RunningDepId, ASId, - ShouldCheckWrap, /*Assume=*/true)) { - CanDoAliasSetRT = false; - break; + for (auto Access : Retries) { + for (auto &AccessTy : Accesses[Access]) { + if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap, + DepSetId, TheLoop, RunningDepId, ASId, + ShouldCheckWrap, /*Assume=*/true)) { + CanDoAliasSetRT = false; + UncomputablePtr = Access.getPointer(); + break; + } } + } } CanDoRT &= CanDoAliasSetRT; @@ -886,9 +1016,12 @@ void AccessAnalysis::processMemAccesses() { LLVM_DEBUG(dbgs() << "LAA: Accesses(" << Accesses.size() << "):\n"); LLVM_DEBUG({ for (auto A : Accesses) - dbgs() << "\t" << *A.getPointer() << " (" << - (A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ? - "read-only" : "read")) << ")\n"; + dbgs() << "\t" << *A.first.getPointer() << " (" + << (A.first.getInt() + ? "write" + : (ReadOnlyPtr.count(A.first.getPointer()) ? "read-only" + : "read")) + << ")\n"; }); // The AliasSetTracker has nicely partitioned our pointers by metadata @@ -907,13 +1040,13 @@ void AccessAnalysis::processMemAccesses() { UnderlyingObjToAccessMap ObjToLastAccess; // Set of access to check after all writes have been processed. - PtrAccessSet DeferredAccesses; + PtrAccessMap DeferredAccesses; // Iterate over each alias set twice, once to process read/write pointers, // and then to process read-only pointers. for (int SetIteration = 0; SetIteration < 2; ++SetIteration) { bool UseDeferred = SetIteration > 0; - PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses; + PtrAccessMap &S = UseDeferred ? DeferredAccesses : Accesses; for (const auto &AV : AS) { Value *Ptr = AV.getValue(); @@ -921,10 +1054,10 @@ void AccessAnalysis::processMemAccesses() { // For a single memory access in AliasSetTracker, Accesses may contain // both read and write, and they both need to be handled for CheckDeps. for (const auto &AC : S) { - if (AC.getPointer() != Ptr) + if (AC.first.getPointer() != Ptr) continue; - bool IsWrite = AC.getInt(); + bool IsWrite = AC.first.getInt(); // If we're using the deferred access set, then it contains only // reads. @@ -946,7 +1079,9 @@ void AccessAnalysis::processMemAccesses() { // consecutive as "read-only" pointers (so that we check // "a[b[i]] +="). Hence, we need the second check for "!IsWrite". if (!UseDeferred && IsReadOnlyPtr) { - DeferredAccesses.insert(Access); + // We only use the pointer keys, the types vector values don't + // matter. + DeferredAccesses.insert({Access, {}}); continue; } @@ -1445,13 +1580,13 @@ static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE, const SCEV *CastedDist = &Dist; const SCEV *CastedProduct = Product; - uint64_t DistTypeSize = DL.getTypeAllocSize(Dist.getType()); - uint64_t ProductTypeSize = DL.getTypeAllocSize(Product->getType()); + uint64_t DistTypeSizeBits = DL.getTypeSizeInBits(Dist.getType()); + uint64_t ProductTypeSizeBits = DL.getTypeSizeInBits(Product->getType()); // The dependence distance can be positive/negative, so we sign extend Dist; // The multiplication of the absolute stride in bytes and the // backedgeTakenCount is non-negative, so we zero extend Product. - if (DistTypeSize > ProductTypeSize) + if (DistTypeSizeBits > ProductTypeSizeBits) CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType()); else CastedDist = SE.getNoopOrSignExtend(&Dist, Product->getType()); @@ -1518,8 +1653,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, Value *BPtr = B.getPointer(); bool AIsWrite = A.getInt(); bool BIsWrite = B.getInt(); - Type *ATy = APtr->getType()->getPointerElementType(); - Type *BTy = BPtr->getType()->getPointerElementType(); + Type *ATy = getLoadStoreType(InstMap[AIdx]); + Type *BTy = getLoadStoreType(InstMap[BIdx]); // Two reads are independent. if (!AIsWrite && !BIsWrite) @@ -1842,8 +1977,6 @@ bool LoopAccessInfo::canAnalyzeLoop() { void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, const TargetLibraryInfo *TLI, DominatorTree *DT) { - typedef SmallPtrSet<Value*, 16> ValueSet; - // Holds the Load and Store instructions. SmallVector<LoadInst *, 16> Loads; SmallVector<StoreInst *, 16> Stores; @@ -1975,22 +2108,26 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, // for read and once for write, it will only appear once (on the write // list). This is okay, since we are going to check for conflicts between // writes and between reads and writes, but not between reads and reads. - ValueSet Seen; + SmallSet<std::pair<Value *, Type *>, 16> Seen; // Record uniform store addresses to identify if we have multiple stores // to the same address. - ValueSet UniformStores; + SmallPtrSet<Value *, 16> UniformStores; for (StoreInst *ST : Stores) { Value *Ptr = ST->getPointerOperand(); - if (isUniform(Ptr)) + if (isUniform(Ptr)) { + // Record store instructions to loop invariant addresses + StoresToInvariantAddresses.push_back(ST); HasDependenceInvolvingLoopInvariantAddress |= !UniformStores.insert(Ptr).second; + } // If we did *not* see this pointer before, insert it to the read-write // list. At this phase it is only a 'write' list. - if (Seen.insert(Ptr).second) { + Type *AccessTy = getLoadStoreType(ST); + if (Seen.insert({Ptr, AccessTy}).second) { ++NumReadWrites; MemoryLocation Loc = MemoryLocation::get(ST); @@ -2001,9 +2138,9 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, Loc.AATags.TBAA = nullptr; visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop, - [&Accesses, Loc](Value *Ptr) { + [&Accesses, AccessTy, Loc](Value *Ptr) { MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr); - Accesses.addStore(NewLoc); + Accesses.addStore(NewLoc, AccessTy); }); } } @@ -2027,7 +2164,8 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, // read a few words, modify, and write a few words, and some of the // words may be written to the same address. bool IsReadOnlyPtr = false; - if (Seen.insert(Ptr).second || + Type *AccessTy = getLoadStoreType(LD); + if (Seen.insert({Ptr, AccessTy}).second || !getPtrStride(*PSE, LD->getType(), Ptr, TheLoop, SymbolicStrides)) { ++NumReads; IsReadOnlyPtr = true; @@ -2049,9 +2187,9 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, Loc.AATags.TBAA = nullptr; visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop, - [&Accesses, Loc, IsReadOnlyPtr](Value *Ptr) { + [&Accesses, AccessTy, Loc, IsReadOnlyPtr](Value *Ptr) { MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr); - Accesses.addLoad(NewLoc, IsReadOnlyPtr); + Accesses.addLoad(NewLoc, AccessTy, IsReadOnlyPtr); }); } @@ -2069,10 +2207,14 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, // Find pointers with computable bounds. We are going to use this information // to place a runtime bound check. - bool CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(), - TheLoop, SymbolicStrides); + Value *UncomputablePtr = nullptr; + bool CanDoRTIfNeeded = + Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(), TheLoop, + SymbolicStrides, UncomputablePtr, false); if (!CanDoRTIfNeeded) { - recordAnalysis("CantIdentifyArrayBounds") << "cannot identify array bounds"; + auto *I = dyn_cast_or_null<Instruction>(UncomputablePtr); + recordAnalysis("CantIdentifyArrayBounds", I) + << "cannot identify array bounds"; LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find " << "the array bounds.\n"); CanVecMem = false; @@ -2099,12 +2241,14 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, PtrRtChecking->Need = true; auto *SE = PSE->getSE(); - CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, SE, TheLoop, - SymbolicStrides, true); + UncomputablePtr = nullptr; + CanDoRTIfNeeded = Accesses.canCheckPtrAtRT( + *PtrRtChecking, SE, TheLoop, SymbolicStrides, UncomputablePtr, true); // Check that we found the bounds for the pointer. if (!CanDoRTIfNeeded) { - recordAnalysis("CantCheckMemDepsAtRunTime") + auto *I = dyn_cast_or_null<Instruction>(UncomputablePtr); + recordAnalysis("CantCheckMemDepsAtRunTime", I) << "cannot check memory dependencies at runtime"; LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n"); CanVecMem = false; @@ -2129,13 +2273,61 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, dbgs() << "LAA: No unsafe dependent memory operations in loop. We" << (PtrRtChecking->Need ? "" : " don't") << " need runtime memory checks.\n"); - else { - recordAnalysis("UnsafeMemDep") - << "unsafe dependent memory operations in loop. Use " - "#pragma loop distribute(enable) to allow loop distribution " - "to attempt to isolate the offending operations into a separate " - "loop"; - LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n"); + else + emitUnsafeDependenceRemark(); +} + +void LoopAccessInfo::emitUnsafeDependenceRemark() { + auto Deps = getDepChecker().getDependences(); + if (!Deps) + return; + auto Found = std::find_if( + Deps->begin(), Deps->end(), [](const MemoryDepChecker::Dependence &D) { + return MemoryDepChecker::Dependence::isSafeForVectorization(D.Type) != + MemoryDepChecker::VectorizationSafetyStatus::Safe; + }); + if (Found == Deps->end()) + return; + MemoryDepChecker::Dependence Dep = *Found; + + LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n"); + + // Emit remark for first unsafe dependence + OptimizationRemarkAnalysis &R = + recordAnalysis("UnsafeDep", Dep.getDestination(*this)) + << "unsafe dependent memory operations in loop. Use " + "#pragma loop distribute(enable) to allow loop distribution " + "to attempt to isolate the offending operations into a separate " + "loop"; + + switch (Dep.Type) { + case MemoryDepChecker::Dependence::NoDep: + case MemoryDepChecker::Dependence::Forward: + case MemoryDepChecker::Dependence::BackwardVectorizable: + llvm_unreachable("Unexpected dependence"); + case MemoryDepChecker::Dependence::Backward: + R << "\nBackward loop carried data dependence."; + break; + case MemoryDepChecker::Dependence::ForwardButPreventsForwarding: + R << "\nForward loop carried data dependence that prevents " + "store-to-load forwarding."; + break; + case MemoryDepChecker::Dependence::BackwardVectorizableButPreventsForwarding: + R << "\nBackward loop carried data dependence that prevents " + "store-to-load forwarding."; + break; + case MemoryDepChecker::Dependence::Unknown: + R << "\nUnknown data dependence."; + break; + } + + if (Instruction *I = Dep.getSource(*this)) { + DebugLoc SourceLoc = I->getDebugLoc(); + if (auto *DD = dyn_cast_or_null<Instruction>(getPointerOperand(I))) + SourceLoc = DD->getDebugLoc(); + if (SourceLoc) + R << " Memory location is the same as accessed at " + << ore::NV("Location", SourceLoc); } } @@ -2212,12 +2404,12 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) { // The Stride can be positive/negative, so we sign extend Stride; // The backedgeTakenCount is non-negative, so we zero extend BETakenCount. const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout(); - uint64_t StrideTypeSize = DL.getTypeAllocSize(StrideExpr->getType()); - uint64_t BETypeSize = DL.getTypeAllocSize(BETakenCount->getType()); + uint64_t StrideTypeSizeBits = DL.getTypeSizeInBits(StrideExpr->getType()); + uint64_t BETypeSizeBits = DL.getTypeSizeInBits(BETakenCount->getType()); const SCEV *CastedStride = StrideExpr; const SCEV *CastedBECount = BETakenCount; ScalarEvolution *SE = PSE->getSE(); - if (BETypeSize >= StrideTypeSize) + if (BETypeSizeBits >= StrideTypeSizeBits) CastedStride = SE->getNoopOrSignExtend(StrideExpr, BETakenCount->getType()); else CastedBECount = SE->getZeroExtendExpr(BETakenCount, StrideExpr->getType()); @@ -2232,7 +2424,7 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) { "at most once.\n"); return; } - LLVM_DEBUG(dbgs() << "LAA: Found a strided access that we can version."); + LLVM_DEBUG(dbgs() << "LAA: Found a strided access that we can version.\n"); SymbolicStrides[Ptr] = Stride; StrideSet.insert(Stride); @@ -2242,10 +2434,12 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI, AAResults *AA, DominatorTree *DT, LoopInfo *LI) : PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)), - PtrRtChecking(std::make_unique<RuntimePointerChecking>(SE)), + PtrRtChecking(nullptr), DepChecker(std::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L) { - if (canAnalyzeLoop()) + PtrRtChecking = std::make_unique<RuntimePointerChecking>(*DepChecker, SE); + if (canAnalyzeLoop()) { analyzeLoop(AA, LI, TLI, DT); + } } void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { @@ -2283,7 +2477,7 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { << "found in loop.\n"; OS.indent(Depth) << "SCEV assumptions:\n"; - PSE->getUnionPredicate().print(OS, Depth); + PSE->getPredicate().print(OS, Depth); OS << "\n"; @@ -2301,7 +2495,7 @@ const LoopAccessInfo &LoopAccessLegacyAnalysis::getInfo(Loop *L) { if (!LAI) LAI = std::make_unique<LoopAccessInfo>(L, SE, TLI, AA, DT, LI); - return *LAI.get(); + return *LAI; } void LoopAccessLegacyAnalysis::print(raw_ostream &OS, const Module *M) const { |