diff options
Diffstat (limited to 'lib/Analysis')
-rw-r--r-- | lib/Analysis/AliasAnalysis.cpp | 84 | ||||
-rw-r--r-- | lib/Analysis/AliasAnalysisEvaluator.cpp | 8 | ||||
-rw-r--r-- | lib/Analysis/AliasSetTracker.cpp | 2 | ||||
-rw-r--r-- | lib/Analysis/BranchProbabilityInfo.cpp | 4 | ||||
-rw-r--r-- | lib/Analysis/CMakeLists.txt | 1 | ||||
-rw-r--r-- | lib/Analysis/DependenceAnalysis.cpp | 70 | ||||
-rw-r--r-- | lib/Analysis/InstructionSimplify.cpp | 149 | ||||
-rw-r--r-- | lib/Analysis/LoopAccessAnalysis.cpp | 198 | ||||
-rw-r--r-- | lib/Analysis/MemoryDependenceAnalysis.cpp | 19 | ||||
-rw-r--r-- | lib/Analysis/MemoryLocation.cpp | 90 | ||||
-rw-r--r-- | lib/Analysis/PHITransAddr.cpp | 25 | ||||
-rw-r--r-- | lib/Analysis/ScalarEvolutionExpander.cpp | 40 | ||||
-rw-r--r-- | lib/Analysis/TargetTransformInfo.cpp | 17 | ||||
-rw-r--r-- | lib/Analysis/ValueTracking.cpp | 43 |
14 files changed, 517 insertions, 233 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp index f54e234bbead..ce46d5300517 100644 --- a/lib/Analysis/AliasAnalysis.cpp +++ b/lib/Analysis/AliasAnalysis.cpp @@ -93,7 +93,7 @@ AliasAnalysis::getModRefInfo(Instruction *I, ImmutableCallSite Call) { // location this memory access defines. The best we can say // is that if the call references what this instruction // defines, it must be clobbered by this location. - const AliasAnalysis::Location DefLoc = AA->getLocation(I); + const AliasAnalysis::Location DefLoc = MemoryLocation::get(I); if (getModRefInfo(Call, DefLoc) != AliasAnalysis::NoModRef) return AliasAnalysis::ModRef; } @@ -267,78 +267,6 @@ AliasAnalysis::getModRefBehavior(const Function *F) { // AliasAnalysis non-virtual helper method implementation //===----------------------------------------------------------------------===// -AliasAnalysis::Location AliasAnalysis::getLocation(const LoadInst *LI) { - AAMDNodes AATags; - LI->getAAMetadata(AATags); - - return Location(LI->getPointerOperand(), - getTypeStoreSize(LI->getType()), AATags); -} - -AliasAnalysis::Location AliasAnalysis::getLocation(const StoreInst *SI) { - AAMDNodes AATags; - SI->getAAMetadata(AATags); - - return Location(SI->getPointerOperand(), - getTypeStoreSize(SI->getValueOperand()->getType()), AATags); -} - -AliasAnalysis::Location AliasAnalysis::getLocation(const VAArgInst *VI) { - AAMDNodes AATags; - VI->getAAMetadata(AATags); - - return Location(VI->getPointerOperand(), UnknownSize, AATags); -} - -AliasAnalysis::Location -AliasAnalysis::getLocation(const AtomicCmpXchgInst *CXI) { - AAMDNodes AATags; - CXI->getAAMetadata(AATags); - - return Location(CXI->getPointerOperand(), - getTypeStoreSize(CXI->getCompareOperand()->getType()), - AATags); -} - -AliasAnalysis::Location -AliasAnalysis::getLocation(const AtomicRMWInst *RMWI) { - AAMDNodes AATags; - RMWI->getAAMetadata(AATags); - - return Location(RMWI->getPointerOperand(), - getTypeStoreSize(RMWI->getValOperand()->getType()), AATags); -} - -AliasAnalysis::Location -AliasAnalysis::getLocationForSource(const MemTransferInst *MTI) { - uint64_t Size = UnknownSize; - if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength())) - Size = C->getValue().getZExtValue(); - - // memcpy/memmove can have AA tags. For memcpy, they apply - // to both the source and the destination. - AAMDNodes AATags; - MTI->getAAMetadata(AATags); - - return Location(MTI->getRawSource(), Size, AATags); -} - -AliasAnalysis::Location -AliasAnalysis::getLocationForDest(const MemIntrinsic *MTI) { - uint64_t Size = UnknownSize; - if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength())) - Size = C->getValue().getZExtValue(); - - // memcpy/memmove can have AA tags. For memcpy, they apply - // to both the source and the destination. - AAMDNodes AATags; - MTI->getAAMetadata(AATags); - - return Location(MTI->getRawDest(), Size, AATags); -} - - - AliasAnalysis::ModRefResult AliasAnalysis::getModRefInfo(const LoadInst *L, const Location &Loc) { // Be conservative in the face of volatile/atomic. @@ -347,7 +275,7 @@ AliasAnalysis::getModRefInfo(const LoadInst *L, const Location &Loc) { // If the load address doesn't alias the given address, it doesn't read // or write the specified memory. - if (Loc.Ptr && !alias(getLocation(L), Loc)) + if (Loc.Ptr && !alias(MemoryLocation::get(L), Loc)) return NoModRef; // Otherwise, a load just reads. @@ -363,7 +291,7 @@ AliasAnalysis::getModRefInfo(const StoreInst *S, const Location &Loc) { if (Loc.Ptr) { // If the store address cannot alias the pointer in question, then the // specified memory cannot be modified by the store. - if (!alias(getLocation(S), Loc)) + if (!alias(MemoryLocation::get(S), Loc)) return NoModRef; // If the pointer is a pointer to constant memory, then it could not have @@ -383,7 +311,7 @@ AliasAnalysis::getModRefInfo(const VAArgInst *V, const Location &Loc) { if (Loc.Ptr) { // If the va_arg address cannot alias the pointer in question, then the // specified memory cannot be accessed by the va_arg. - if (!alias(getLocation(V), Loc)) + if (!alias(MemoryLocation::get(V), Loc)) return NoModRef; // If the pointer is a pointer to constant memory, then it could not have @@ -403,7 +331,7 @@ AliasAnalysis::getModRefInfo(const AtomicCmpXchgInst *CX, const Location &Loc) { return ModRef; // If the cmpxchg address does not alias the location, it does not access it. - if (Loc.Ptr && !alias(getLocation(CX), Loc)) + if (Loc.Ptr && !alias(MemoryLocation::get(CX), Loc)) return NoModRef; return ModRef; @@ -416,7 +344,7 @@ AliasAnalysis::getModRefInfo(const AtomicRMWInst *RMW, const Location &Loc) { return ModRef; // If the atomicrmw address does not alias the location, it does not access it. - if (Loc.Ptr && !alias(getLocation(RMW), Loc)) + if (Loc.Ptr && !alias(MemoryLocation::get(RMW), Loc)) return NoModRef; return ModRef; diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp index 273eacc16e72..dd6a3a0715e1 100644 --- a/lib/Analysis/AliasAnalysisEvaluator.cpp +++ b/lib/Analysis/AliasAnalysisEvaluator.cpp @@ -219,8 +219,8 @@ bool AAEval::runOnFunction(Function &F) { I1 != E; ++I1) { for (SetVector<Value *>::iterator I2 = Stores.begin(), E2 = Stores.end(); I2 != E2; ++I2) { - switch (AA.alias(AA.getLocation(cast<LoadInst>(*I1)), - AA.getLocation(cast<StoreInst>(*I2)))) { + switch (AA.alias(MemoryLocation::get(cast<LoadInst>(*I1)), + MemoryLocation::get(cast<StoreInst>(*I2)))) { case AliasAnalysis::NoAlias: PrintLoadStoreResults("NoAlias", PrintNoAlias, *I1, *I2, F.getParent()); @@ -245,8 +245,8 @@ bool AAEval::runOnFunction(Function &F) { for (SetVector<Value *>::iterator I1 = Stores.begin(), E = Stores.end(); I1 != E; ++I1) { for (SetVector<Value *>::iterator I2 = Stores.begin(); I2 != I1; ++I2) { - switch (AA.alias(AA.getLocation(cast<StoreInst>(*I1)), - AA.getLocation(cast<StoreInst>(*I2)))) { + switch (AA.alias(MemoryLocation::get(cast<StoreInst>(*I1)), + MemoryLocation::get(cast<StoreInst>(*I2)))) { case AliasAnalysis::NoAlias: PrintLoadStoreResults("NoAlias", PrintNoAlias, *I1, *I2, F.getParent()); diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp index 50890c17f2eb..12c1c7d4af90 100644 --- a/lib/Analysis/AliasSetTracker.cpp +++ b/lib/Analysis/AliasSetTracker.cpp @@ -130,7 +130,7 @@ void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry, void AliasSet::addUnknownInst(Instruction *I, AliasAnalysis &AA) { if (UnknownInsts.empty()) addRef(); - UnknownInsts.push_back(I); + UnknownInsts.emplace_back(I); if (!I->mayWriteToMemory()) { AliasTy = MayAlias; diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp index 091943bfc7b3..430b41241edf 100644 --- a/lib/Analysis/BranchProbabilityInfo.cpp +++ b/lib/Analysis/BranchProbabilityInfo.cpp @@ -543,6 +543,10 @@ bool BranchProbabilityInfo::runOnFunction(Function &F) { return false; } +void BranchProbabilityInfo::releaseMemory() { + Weights.clear(); +} + void BranchProbabilityInfo::print(raw_ostream &OS, const Module *) const { OS << "---- Branch Probabilities ----\n"; // We print the probabilities from the last function the analysis ran over, diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index 32fe9b9495d1..b22ee7e24931 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -42,6 +42,7 @@ add_llvm_library(LLVMAnalysis MemDerefPrinter.cpp MemoryBuiltins.cpp MemoryDependenceAnalysis.cpp + MemoryLocation.cpp ModuleDebugInfoPrinter.cpp NoAliasAnalysis.cpp PHITransAddr.cpp diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp index 808a38b6346b..b16cdfef3375 100644 --- a/lib/Analysis/DependenceAnalysis.cpp +++ b/lib/Analysis/DependenceAnalysis.cpp @@ -779,23 +779,56 @@ void DependenceAnalysis::collectCommonLoops(const SCEV *Expression, } } -void DependenceAnalysis::unifySubscriptType(Subscript *Pair) { - const SCEV *Src = Pair->Src; - const SCEV *Dst = Pair->Dst; - IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); - IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); - if (SrcTy == nullptr || DstTy == nullptr) { - assert(SrcTy == DstTy && "This function only unify integer types and " - "expect Src and Dst share the same type " - "otherwise."); - return; +void DependenceAnalysis::unifySubscriptType(ArrayRef<Subscript *> Pairs) { + + unsigned widestWidthSeen = 0; + Type *widestType; + + // Go through each pair and find the widest bit to which we need + // to extend all of them. + for (unsigned i = 0; i < Pairs.size(); i++) { + const SCEV *Src = Pairs[i]->Src; + const SCEV *Dst = Pairs[i]->Dst; + IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); + IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); + if (SrcTy == nullptr || DstTy == nullptr) { + assert(SrcTy == DstTy && "This function only unify integer types and " + "expect Src and Dst share the same type " + "otherwise."); + continue; + } + if (SrcTy->getBitWidth() > widestWidthSeen) { + widestWidthSeen = SrcTy->getBitWidth(); + widestType = SrcTy; + } + if (DstTy->getBitWidth() > widestWidthSeen) { + widestWidthSeen = DstTy->getBitWidth(); + widestType = DstTy; + } } - if (SrcTy->getBitWidth() > DstTy->getBitWidth()) { - // Sign-extend Dst to typeof(Src) if typeof(Src) is wider than typeof(Dst). - Pair->Dst = SE->getSignExtendExpr(Dst, SrcTy); - } else if (SrcTy->getBitWidth() < DstTy->getBitWidth()) { - // Sign-extend Src to typeof(Dst) if typeof(Dst) is wider than typeof(Src). - Pair->Src = SE->getSignExtendExpr(Src, DstTy); + + + assert(widestWidthSeen > 0); + + // Now extend each pair to the widest seen. + for (unsigned i = 0; i < Pairs.size(); i++) { + const SCEV *Src = Pairs[i]->Src; + const SCEV *Dst = Pairs[i]->Dst; + IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); + IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); + if (SrcTy == nullptr || DstTy == nullptr) { + assert(SrcTy == DstTy && "This function only unify integer types and " + "expect Src and Dst share the same type " + "otherwise."); + continue; + } + if (SrcTy->getBitWidth() < widestWidthSeen) + // Sign-extend Src to widestType + Pairs[i]->Src = SE->getSignExtendExpr(Src, widestType); + if (DstTy->getBitWidth() < widestWidthSeen) { + // Sign-extend Dst to widestType + Pairs[i]->Dst = SE->getSignExtendExpr(Dst, widestType); + } } } @@ -2937,7 +2970,7 @@ const SCEV *DependenceAnalysis::getUpperBound(BoundInfo *Bound) const { // return the coefficient (the step) // corresponding to the specified loop. // If there isn't one, return 0. -// For example, given a*i + b*j + c*k, zeroing the coefficient +// For example, given a*i + b*j + c*k, finding the coefficient // corresponding to the j loop would yield b. const SCEV *DependenceAnalysis::findCoefficient(const SCEV *Expr, const Loop *TargetLoop) const { @@ -3574,13 +3607,16 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst, SmallBitVector Sivs(Pairs); SmallBitVector Mivs(Pairs); SmallBitVector ConstrainedLevels(MaxLevels + 1); + SmallVector<Subscript *, 4> PairsInGroup; for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) { DEBUG(dbgs() << SJ << " "); if (Pair[SJ].Classification == Subscript::SIV) Sivs.set(SJ); else Mivs.set(SJ); + PairsInGroup.push_back(&Pair[SJ]); } + unifySubscriptType(PairsInGroup); DEBUG(dbgs() << "}\n"); while (Sivs.any()) { bool Changed = false; diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 097b99eb0a5e..ec56d888dc2f 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -3144,6 +3144,90 @@ Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, RecursionLimit); } +/// SimplifyWithOpReplaced - See if V simplifies when its operand Op is +/// replaced with RepOp. +static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, + const Query &Q, + unsigned MaxRecurse) { + // Trivial replacement. + if (V == Op) + return RepOp; + + auto *I = dyn_cast<Instruction>(V); + if (!I) + return nullptr; + + // If this is a binary operator, try to simplify it with the replaced op. + if (auto *B = dyn_cast<BinaryOperator>(I)) { + // Consider: + // %cmp = icmp eq i32 %x, 2147483647 + // %add = add nsw i32 %x, 1 + // %sel = select i1 %cmp, i32 -2147483648, i32 %add + // + // We can't replace %sel with %add unless we strip away the flags. + if (isa<OverflowingBinaryOperator>(B)) + if (B->hasNoSignedWrap() || B->hasNoUnsignedWrap()) + return nullptr; + if (isa<PossiblyExactOperator>(B)) + if (B->isExact()) + return nullptr; + + if (MaxRecurse) { + if (B->getOperand(0) == Op) + return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), Q, + MaxRecurse - 1); + if (B->getOperand(1) == Op) + return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, Q, + MaxRecurse - 1); + } + } + + // Same for CmpInsts. + if (CmpInst *C = dyn_cast<CmpInst>(I)) { + if (MaxRecurse) { + if (C->getOperand(0) == Op) + return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), Q, + MaxRecurse - 1); + if (C->getOperand(1) == Op) + return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, Q, + MaxRecurse - 1); + } + } + + // TODO: We could hand off more cases to instsimplify here. + + // If all operands are constant after substituting Op for RepOp then we can + // constant fold the instruction. + if (Constant *CRepOp = dyn_cast<Constant>(RepOp)) { + // Build a list of all constant operands. + SmallVector<Constant *, 8> ConstOps; + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + if (I->getOperand(i) == Op) + ConstOps.push_back(CRepOp); + else if (Constant *COp = dyn_cast<Constant>(I->getOperand(i))) + ConstOps.push_back(COp); + else + break; + } + + // All operands were constants, fold it. + if (ConstOps.size() == I->getNumOperands()) { + if (CmpInst *C = dyn_cast<CmpInst>(I)) + return ConstantFoldCompareInstOperands(C->getPredicate(), ConstOps[0], + ConstOps[1], Q.DL, Q.TLI); + + if (LoadInst *LI = dyn_cast<LoadInst>(I)) + if (!LI->isVolatile()) + return ConstantFoldLoadFromConstPtr(ConstOps[0], Q.DL); + + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), ConstOps, + Q.DL, Q.TLI); + } + } + + return nullptr; +} + /// SimplifySelectInst - Given operands for a SelectInst, see if we can fold /// the result. If not, this returns null. static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal, @@ -3172,29 +3256,28 @@ static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal, if (isa<UndefValue>(FalseVal)) // select C, X, undef -> X return TrueVal; - const auto *ICI = dyn_cast<ICmpInst>(CondVal); - unsigned BitWidth = TrueVal->getType()->getScalarSizeInBits(); - if (ICI && BitWidth) { + if (const auto *ICI = dyn_cast<ICmpInst>(CondVal)) { + unsigned BitWidth = Q.DL.getTypeSizeInBits(TrueVal->getType()); ICmpInst::Predicate Pred = ICI->getPredicate(); + Value *CmpLHS = ICI->getOperand(0); + Value *CmpRHS = ICI->getOperand(1); APInt MinSignedValue = APInt::getSignBit(BitWidth); Value *X; const APInt *Y; bool TrueWhenUnset; bool IsBitTest = false; if (ICmpInst::isEquality(Pred) && - match(ICI->getOperand(0), m_And(m_Value(X), m_APInt(Y))) && - match(ICI->getOperand(1), m_Zero())) { + match(CmpLHS, m_And(m_Value(X), m_APInt(Y))) && + match(CmpRHS, m_Zero())) { IsBitTest = true; TrueWhenUnset = Pred == ICmpInst::ICMP_EQ; - } else if (Pred == ICmpInst::ICMP_SLT && - match(ICI->getOperand(1), m_Zero())) { - X = ICI->getOperand(0); + } else if (Pred == ICmpInst::ICMP_SLT && match(CmpRHS, m_Zero())) { + X = CmpLHS; Y = &MinSignedValue; IsBitTest = true; TrueWhenUnset = false; - } else if (Pred == ICmpInst::ICMP_SGT && - match(ICI->getOperand(1), m_AllOnes())) { - X = ICI->getOperand(0); + } else if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, m_AllOnes())) { + X = CmpLHS; Y = &MinSignedValue; IsBitTest = true; TrueWhenUnset = true; @@ -3225,6 +3308,50 @@ static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal, return TrueWhenUnset ? TrueVal : FalseVal; } } + if (ICI->hasOneUse()) { + const APInt *C; + if (match(CmpRHS, m_APInt(C))) { + // X < MIN ? T : F --> F + if (Pred == ICmpInst::ICMP_SLT && C->isMinSignedValue()) + return FalseVal; + // X < MIN ? T : F --> F + if (Pred == ICmpInst::ICMP_ULT && C->isMinValue()) + return FalseVal; + // X > MAX ? T : F --> F + if (Pred == ICmpInst::ICMP_SGT && C->isMaxSignedValue()) + return FalseVal; + // X > MAX ? T : F --> F + if (Pred == ICmpInst::ICMP_UGT && C->isMaxValue()) + return FalseVal; + } + } + + // If we have an equality comparison then we know the value in one of the + // arms of the select. See if substituting this value into the arm and + // simplifying the result yields the same value as the other arm. + if (Pred == ICmpInst::ICMP_EQ) { + if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q, MaxRecurse) == + TrueVal || + SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q, MaxRecurse) == + TrueVal) + return FalseVal; + if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q, MaxRecurse) == + FalseVal || + SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q, MaxRecurse) == + FalseVal) + return FalseVal; + } else if (Pred == ICmpInst::ICMP_NE) { + if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q, MaxRecurse) == + FalseVal || + SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q, MaxRecurse) == + FalseVal) + return TrueVal; + if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q, MaxRecurse) == + TrueVal || + SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q, MaxRecurse) == + TrueVal) + return TrueVal; + } } return nullptr; diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp index b70de00db04b..c661c7b87dcb 100644 --- a/lib/Analysis/LoopAccessAnalysis.cpp +++ b/lib/Analysis/LoopAccessAnalysis.cpp @@ -177,15 +177,21 @@ void LoopAccessInfo::RuntimePointerCheck::print( } } -bool LoopAccessInfo::RuntimePointerCheck::needsAnyChecking( +unsigned LoopAccessInfo::RuntimePointerCheck::getNumberOfChecks( const SmallVectorImpl<int> *PtrPartition) const { unsigned NumPointers = Pointers.size(); + unsigned CheckCount = 0; for (unsigned I = 0; I < NumPointers; ++I) for (unsigned J = I + 1; J < NumPointers; ++J) if (needsChecking(I, J, PtrPartition)) - return true; - return false; + CheckCount++; + return CheckCount; +} + +bool LoopAccessInfo::RuntimePointerCheck::needsAnyChecking( + const SmallVectorImpl<int> *PtrPartition) const { + return getNumberOfChecks(PtrPartition) != 0; } namespace { @@ -220,10 +226,11 @@ public: } /// \brief Check whether we can check the pointers at runtime for - /// non-intersection. + /// non-intersection. Returns true when we have 0 pointers + /// (a check on 0 pointers for non-intersection will always return true). bool canCheckPtrAtRT(LoopAccessInfo::RuntimePointerCheck &RtCheck, - unsigned &NumComparisons, ScalarEvolution *SE, - Loop *TheLoop, const ValueToValueMap &Strides, + bool &NeedRTCheck, ScalarEvolution *SE, Loop *TheLoop, + const ValueToValueMap &Strides, bool ShouldCheckStride = false); /// \brief Goes over all memory accesses, checks whether a RT check is needed @@ -289,29 +296,23 @@ static bool hasComputableBounds(ScalarEvolution *SE, return AR->isAffine(); } -/// \brief Check the stride of the pointer and ensure that it does not wrap in -/// the address space. -static int isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp, - const ValueToValueMap &StridesMap); - bool AccessAnalysis::canCheckPtrAtRT( - LoopAccessInfo::RuntimePointerCheck &RtCheck, unsigned &NumComparisons, + LoopAccessInfo::RuntimePointerCheck &RtCheck, bool &NeedRTCheck, ScalarEvolution *SE, Loop *TheLoop, const ValueToValueMap &StridesMap, bool ShouldCheckStride) { // Find pointers with computable bounds. We are going to use this information // to place a runtime bound check. bool CanDoRT = true; + NeedRTCheck = false; + if (!IsRTCheckNeeded) return true; + bool IsDepCheckNeeded = isDependencyCheckNeeded(); - NumComparisons = 0; // We assign a consecutive id to access from different alias sets. // Accesses between different groups doesn't need to be checked. unsigned ASId = 1; for (auto &AS : AST) { - unsigned NumReadPtrChecks = 0; - unsigned NumWritePtrChecks = 0; - // We assign consecutive id to access from different dependence sets. // Accesses within the same set don't need a runtime check. unsigned RunningDepId = 1; @@ -322,11 +323,6 @@ bool AccessAnalysis::canCheckPtrAtRT( bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true)); MemAccessInfo Access(Ptr, IsWrite); - if (IsWrite) - ++NumWritePtrChecks; - else - ++NumReadPtrChecks; - if (hasComputableBounds(SE, StridesMap, Ptr) && // When we run after a failing dependency check we have to make sure // we don't have wrapping pointers. @@ -354,16 +350,15 @@ bool AccessAnalysis::canCheckPtrAtRT( } } - if (IsDepCheckNeeded && CanDoRT && RunningDepId == 2) - NumComparisons += 0; // Only one dependence set. - else { - NumComparisons += (NumWritePtrChecks * (NumReadPtrChecks + - NumWritePtrChecks - 1)); - } - ++ASId; } + // We need a runtime check if there are any accesses that need checking. + // However, some accesses cannot be checked (for example because we + // can't determine their bounds). In these cases we would need a check + // but wouldn't be able to add it. + NeedRTCheck = !CanDoRT || RtCheck.needsAnyChecking(nullptr); + // If the pointers that we would use for the bounds comparison have different // address spaces, assume the values aren't directly comparable, so we can't // use them for the runtime check. We also have to assume they could @@ -510,8 +505,8 @@ static bool isInBoundsGep(Value *Ptr) { } /// \brief Check whether the access through \p Ptr has a constant stride. -static int isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp, - const ValueToValueMap &StridesMap) { +int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp, + const ValueToValueMap &StridesMap) { const Type *Ty = Ptr->getType(); assert(Ty->isPointerTy() && "Unexpected non-ptr"); @@ -678,6 +673,42 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance, return false; } +/// \brief Check the dependence for two accesses with the same stride \p Stride. +/// \p Distance is the positive distance and \p TypeByteSize is type size in +/// bytes. +/// +/// \returns true if they are independent. +static bool areStridedAccessesIndependent(unsigned Distance, unsigned Stride, + unsigned TypeByteSize) { + assert(Stride > 1 && "The stride must be greater than 1"); + assert(TypeByteSize > 0 && "The type size in byte must be non-zero"); + assert(Distance > 0 && "The distance must be non-zero"); + + // Skip if the distance is not multiple of type byte size. + if (Distance % TypeByteSize) + return false; + + unsigned ScaledDist = Distance / TypeByteSize; + + // No dependence if the scaled distance is not multiple of the stride. + // E.g. + // for (i = 0; i < 1024 ; i += 4) + // A[i+2] = A[i] + 1; + // + // Two accesses in memory (scaled distance is 2, stride is 4): + // | A[0] | | | | A[4] | | | | + // | | | A[2] | | | | A[6] | | + // + // E.g. + // for (i = 0; i < 1024 ; i += 3) + // A[i+4] = A[i] + 1; + // + // Two accesses in memory (scaled distance is 4, stride is 3): + // | A[0] | | | A[3] | | | A[6] | | | + // | | | | | A[4] | | | A[7] | | + return ScaledDist % Stride; +} + MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, const MemAccessInfo &B, unsigned BIdx, @@ -778,34 +809,87 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, unsigned Distance = (unsigned) Val.getZExtValue(); + unsigned Stride = std::abs(StrideAPtr); + if (Stride > 1 && + areStridedAccessesIndependent(Distance, Stride, TypeByteSize)) + return Dependence::NoDep; + // Bail out early if passed-in parameters make vectorization not feasible. unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ? VectorizerParams::VectorizationFactor : 1); unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ? VectorizerParams::VectorizationInterleave : 1); + // The minimum number of iterations for a vectorized/unrolled version. + unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U); + + // It's not vectorizable if the distance is smaller than the minimum distance + // needed for a vectroized/unrolled version. Vectorizing one iteration in + // front needs TypeByteSize * Stride. Vectorizing the last iteration needs + // TypeByteSize (No need to plus the last gap distance). + // + // E.g. Assume one char is 1 byte in memory and one int is 4 bytes. + // foo(int *A) { + // int *B = (int *)((char *)A + 14); + // for (i = 0 ; i < 1024 ; i += 2) + // B[i] = A[i] + 1; + // } + // + // Two accesses in memory (stride is 2): + // | A[0] | | A[2] | | A[4] | | A[6] | | + // | B[0] | | B[2] | | B[4] | + // + // Distance needs for vectorizing iterations except the last iteration: + // 4 * 2 * (MinNumIter - 1). Distance needs for the last iteration: 4. + // So the minimum distance needed is: 4 * 2 * (MinNumIter - 1) + 4. + // + // If MinNumIter is 2, it is vectorizable as the minimum distance needed is + // 12, which is less than distance. + // + // If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4), + // the minimum distance needed is 28, which is greater than distance. It is + // not safe to do vectorization. + unsigned MinDistanceNeeded = + TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize; + if (MinDistanceNeeded > Distance) { + DEBUG(dbgs() << "LAA: Failure because of positive distance " << Distance + << '\n'); + return Dependence::Backward; + } - // The distance must be bigger than the size needed for a vectorized version - // of the operation and the size of the vectorized operation must not be - // bigger than the currrent maximum size. - if (Distance < 2*TypeByteSize || - 2*TypeByteSize > MaxSafeDepDistBytes || - Distance < TypeByteSize * ForcedUnroll * ForcedFactor) { - DEBUG(dbgs() << "LAA: Failure because of Positive distance " - << Val.getSExtValue() << '\n'); + // Unsafe if the minimum distance needed is greater than max safe distance. + if (MinDistanceNeeded > MaxSafeDepDistBytes) { + DEBUG(dbgs() << "LAA: Failure because it needs at least " + << MinDistanceNeeded << " size in bytes"); return Dependence::Backward; } // Positive distance bigger than max vectorization factor. - MaxSafeDepDistBytes = Distance < MaxSafeDepDistBytes ? - Distance : MaxSafeDepDistBytes; + // FIXME: Should use max factor instead of max distance in bytes, which could + // not handle different types. + // E.g. Assume one char is 1 byte in memory and one int is 4 bytes. + // void foo (int *A, char *B) { + // for (unsigned i = 0; i < 1024; i++) { + // A[i+2] = A[i] + 1; + // B[i+2] = B[i] + 1; + // } + // } + // + // This case is currently unsafe according to the max safe distance. If we + // analyze the two accesses on array B, the max safe dependence distance + // is 2. Then we analyze the accesses on array A, the minimum distance needed + // is 8, which is less than 2 and forbidden vectorization, But actually + // both A and B could be vectorized by 2 iterations. + MaxSafeDepDistBytes = + Distance < MaxSafeDepDistBytes ? Distance : MaxSafeDepDistBytes; bool IsTrueDataDependence = (!AIsWrite && BIsWrite); if (IsTrueDataDependence && couldPreventStoreLoadForward(Distance, TypeByteSize)) return Dependence::BackwardVectorizableButPreventsForwarding; - DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue() << - " with max VF = " << MaxSafeDepDistBytes / TypeByteSize << '\n'); + DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue() + << " with max VF = " + << MaxSafeDepDistBytes / (TypeByteSize * Stride) << '\n'); return Dependence::BackwardVectorizable; } @@ -1066,7 +1150,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { if (Seen.insert(Ptr).second) { ++NumReadWrites; - AliasAnalysis::Location Loc = AA->getLocation(ST); + AliasAnalysis::Location Loc = MemoryLocation::get(ST); // The TBAA metadata could have a control dependency on the predication // condition, so we cannot rely on it when determining whether or not we // need runtime pointer checks. @@ -1102,7 +1186,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { IsReadOnlyPtr = true; } - AliasAnalysis::Location Loc = AA->getLocation(LD); + AliasAnalysis::Location Loc = MemoryLocation::get(LD); // The TBAA metadata could have a control dependency on the predication // condition, so we cannot rely on it when determining whether or not we // need runtime pointer checks. @@ -1123,22 +1207,17 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { // Build dependence sets and check whether we need a runtime pointer bounds // check. Accesses.buildDependenceSets(); - bool NeedRTCheck = Accesses.isRTCheckNeeded(); // Find pointers with computable bounds. We are going to use this information // to place a runtime bound check. - bool CanDoRT = false; - if (NeedRTCheck) - CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE, TheLoop, - Strides); - - DEBUG(dbgs() << "LAA: We need to do " << NumComparisons << - " pointer comparisons.\n"); + bool NeedRTCheck; + bool CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, + NeedRTCheck, SE, + TheLoop, Strides); - // If we only have one set of dependences to check pointers among we don't - // need a runtime check. - if (NumComparisons == 0 && NeedRTCheck) - NeedRTCheck = false; + DEBUG(dbgs() << "LAA: We need to do " + << PtrRtCheck.getNumberOfChecks(nullptr) + << " pointer comparisons.\n"); // Check that we found the bounds for the pointer. if (CanDoRT) @@ -1171,10 +1250,11 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { PtrRtCheck.reset(); PtrRtCheck.Need = true; - CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE, + CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NeedRTCheck, SE, TheLoop, Strides, true); + // Check that we found the bounds for the pointer. - if (!CanDoRT && NumComparisons > 0) { + if (NeedRTCheck && !CanDoRT) { emitAnalysis(LoopAccessReport() << "cannot check memory dependencies at runtime"); DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n"); @@ -1319,7 +1399,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI, AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI, const ValueToValueMap &Strides) - : DepChecker(SE, L), NumComparisons(0), TheLoop(L), SE(SE), DL(DL), + : DepChecker(SE, L), TheLoop(L), SE(SE), DL(DL), TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1U), CanVecMem(false), StoreToLoopInvariantAddress(false) { diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 3c1826a58e66..255bae61eb2f 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -124,11 +124,11 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst, AliasAnalysis *AA) { if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) { if (LI->isUnordered()) { - Loc = AA->getLocation(LI); + Loc = MemoryLocation::get(LI); return AliasAnalysis::Ref; } if (LI->getOrdering() == Monotonic) { - Loc = AA->getLocation(LI); + Loc = MemoryLocation::get(LI); return AliasAnalysis::ModRef; } Loc = AliasAnalysis::Location(); @@ -137,11 +137,11 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst, if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) { if (SI->isUnordered()) { - Loc = AA->getLocation(SI); + Loc = MemoryLocation::get(SI); return AliasAnalysis::Mod; } if (SI->getOrdering() == Monotonic) { - Loc = AA->getLocation(SI); + Loc = MemoryLocation::get(SI); return AliasAnalysis::ModRef; } Loc = AliasAnalysis::Location(); @@ -149,7 +149,7 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst, } if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) { - Loc = AA->getLocation(V); + Loc = MemoryLocation::get(V); return AliasAnalysis::ModRef; } @@ -486,7 +486,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, } } - AliasAnalysis::Location LoadLoc = AA->getLocation(LI); + AliasAnalysis::Location LoadLoc = MemoryLocation::get(LI); // If we found a pointer, check if it could be the same as our pointer. AliasAnalysis::AliasResult R = AA->alias(LoadLoc, MemLoc); @@ -575,7 +575,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, // Ok, this store might clobber the query pointer. Check to see if it is // a must alias: in this case, we want to return this as a def. - AliasAnalysis::Location StoreLoc = AA->getLocation(SI); + AliasAnalysis::Location StoreLoc = MemoryLocation::get(SI); // If we found a pointer, check if it could be the same as our pointer. AliasAnalysis::AliasResult R = AA->alias(StoreLoc, MemLoc); @@ -872,7 +872,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { void MemoryDependenceAnalysis:: getNonLocalPointerDependency(Instruction *QueryInst, SmallVectorImpl<NonLocalDepResult> &Result) { - const AliasAnalysis::Location Loc = AA->getLocation(QueryInst); + const AliasAnalysis::Location Loc = MemoryLocation::get(QueryInst); bool isLoad = isa<LoadInst>(QueryInst); BasicBlock *FromBB = QueryInst->getParent(); assert(FromBB); @@ -1278,8 +1278,7 @@ getNonLocalPointerDepFromBB(Instruction *QueryInst, // Get the PHI translated pointer in this predecessor. This can fail if // not translatable, in which case the getAddr() returns null. PHITransAddr &PredPointer = PredList.back().second; - PredPointer.PHITranslateValue(BB, Pred, nullptr); - + PredPointer.PHITranslateValue(BB, Pred, DT, /*MustDominate=*/false); Value *PredPtrVal = PredPointer.getAddr(); // Check to see if we have already visited this pred block with another diff --git a/lib/Analysis/MemoryLocation.cpp b/lib/Analysis/MemoryLocation.cpp new file mode 100644 index 000000000000..f87a017b9211 --- /dev/null +++ b/lib/Analysis/MemoryLocation.cpp @@ -0,0 +1,90 @@ +//===- MemoryLocation.cpp - Memory location descriptions -------------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +using namespace llvm; + +MemoryLocation MemoryLocation::get(const LoadInst *LI) { + AAMDNodes AATags; + LI->getAAMetadata(AATags); + const auto &DL = LI->getModule()->getDataLayout(); + + return MemoryLocation(LI->getPointerOperand(), + DL.getTypeStoreSize(LI->getType()), AATags); +} + +MemoryLocation MemoryLocation::get(const StoreInst *SI) { + AAMDNodes AATags; + SI->getAAMetadata(AATags); + const auto &DL = SI->getModule()->getDataLayout(); + + return MemoryLocation(SI->getPointerOperand(), + DL.getTypeStoreSize(SI->getValueOperand()->getType()), + AATags); +} + +MemoryLocation MemoryLocation::get(const VAArgInst *VI) { + AAMDNodes AATags; + VI->getAAMetadata(AATags); + + return MemoryLocation(VI->getPointerOperand(), UnknownSize, AATags); +} + +MemoryLocation MemoryLocation::get(const AtomicCmpXchgInst *CXI) { + AAMDNodes AATags; + CXI->getAAMetadata(AATags); + const auto &DL = CXI->getModule()->getDataLayout(); + + return MemoryLocation( + CXI->getPointerOperand(), + DL.getTypeStoreSize(CXI->getCompareOperand()->getType()), AATags); +} + +MemoryLocation MemoryLocation::get(const AtomicRMWInst *RMWI) { + AAMDNodes AATags; + RMWI->getAAMetadata(AATags); + const auto &DL = RMWI->getModule()->getDataLayout(); + + return MemoryLocation(RMWI->getPointerOperand(), + DL.getTypeStoreSize(RMWI->getValOperand()->getType()), + AATags); +} + +MemoryLocation MemoryLocation::getForSource(const MemTransferInst *MTI) { + uint64_t Size = UnknownSize; + if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength())) + Size = C->getValue().getZExtValue(); + + // memcpy/memmove can have AA tags. For memcpy, they apply + // to both the source and the destination. + AAMDNodes AATags; + MTI->getAAMetadata(AATags); + + return MemoryLocation(MTI->getRawSource(), Size, AATags); +} + +MemoryLocation MemoryLocation::getForDest(const MemIntrinsic *MTI) { + uint64_t Size = UnknownSize; + if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength())) + Size = C->getValue().getZExtValue(); + + // memcpy/memmove can have AA tags. For memcpy, they apply + // to both the source and the destination. + AAMDNodes AATags; + MTI->getAAMetadata(AATags); + + return MemoryLocation(MTI->getRawDest(), Size, AATags); +} diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp index 177684fdc9a7..633d6aaad35e 100644 --- a/lib/Analysis/PHITransAddr.cpp +++ b/lib/Analysis/PHITransAddr.cpp @@ -150,7 +150,8 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, if (!Inst) return V; // Determine whether 'Inst' is an input to our PHI translatable expression. - bool isInput = std::count(InstInputs.begin(), InstInputs.end(), Inst); + bool isInput = + std::find(InstInputs.begin(), InstInputs.end(), Inst) != InstInputs.end(); // Handle inputs instructions if needed. if (isInput) { @@ -276,7 +277,8 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, isNSW = isNUW = false; // If the old 'LHS' was an input, add the new 'LHS' as an input. - if (std::count(InstInputs.begin(), InstInputs.end(), BOp)) { + if (std::find(InstInputs.begin(), InstInputs.end(), BOp) != + InstInputs.end()) { RemoveInstInputs(BOp, InstInputs); AddAsInput(LHS); } @@ -313,21 +315,26 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, /// PHITranslateValue - PHI translate the current address up the CFG from -/// CurBB to Pred, updating our state to reflect any needed changes. If the -/// dominator tree DT is non-null, the translated value must dominate +/// CurBB to Pred, updating our state to reflect any needed changes. If +/// 'MustDominate' is true, the translated value must dominate /// PredBB. This returns true on failure and sets Addr to null. bool PHITransAddr::PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB, - const DominatorTree *DT) { + const DominatorTree *DT, + bool MustDominate) { + assert(DT || !MustDominate); assert(Verify() && "Invalid PHITransAddr!"); - Addr = PHITranslateSubExpr(Addr, CurBB, PredBB, DT); + if (DT && DT->isReachableFromEntry(PredBB)) + Addr = + PHITranslateSubExpr(Addr, CurBB, PredBB, MustDominate ? DT : nullptr); + else + Addr = nullptr; assert(Verify() && "Invalid PHITransAddr!"); - if (DT) { + if (MustDominate) // Make sure the value is live in the predecessor. if (Instruction *Inst = dyn_cast_or_null<Instruction>(Addr)) if (!DT->dominates(Inst->getParent(), PredBB)) Addr = nullptr; - } return Addr == nullptr; } @@ -370,7 +377,7 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB, // See if we have a version of this value already available and dominating // PredBB. If so, there is no need to insert a new instance of it. PHITransAddr Tmp(InVal, DL, AC); - if (!Tmp.PHITranslateValue(CurBB, PredBB, &DT)) + if (!Tmp.PHITranslateValue(CurBB, PredBB, &DT, /*MustDominate=*/true)) return Tmp.getAddr(); // If we don't have an available version of this value, it must be an diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index 0bd427bf74e9..f82235d0c26e 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -1712,7 +1712,7 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, // would confuse the logic below that expects proper IVs. if (Value *V = SimplifyInstruction(Phi, DL, SE.TLI, SE.DT, SE.AC)) { Phi->replaceAllUsesWith(V); - DeadInsts.push_back(Phi); + DeadInsts.emplace_back(Phi); ++NumElim; DEBUG_WITH_TYPE(DebugType, dbgs() << "INDVARS: Eliminated constant iv: " << *Phi << '\n'); @@ -1787,7 +1787,7 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, CreateTruncOrBitCast(OrigInc, IsomorphicInc->getType(), IVName); } IsomorphicInc->replaceAllUsesWith(NewInc); - DeadInsts.push_back(IsomorphicInc); + DeadInsts.emplace_back(IsomorphicInc); } } DEBUG_WITH_TYPE(DebugType, dbgs() @@ -1800,13 +1800,30 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, NewIV = Builder.CreateTruncOrBitCast(OrigPhiRef, Phi->getType(), IVName); } Phi->replaceAllUsesWith(NewIV); - DeadInsts.push_back(Phi); + DeadInsts.emplace_back(Phi); } return NumElim; } bool SCEVExpander::isHighCostExpansionHelper( const SCEV *S, Loop *L, SmallPtrSetImpl<const SCEV *> &Processed) { + + // Zero/One operand expressions + switch (S->getSCEVType()) { + case scUnknown: + case scConstant: + return false; + case scTruncate: + return isHighCostExpansionHelper(cast<SCEVTruncateExpr>(S)->getOperand(), L, + Processed); + case scZeroExtend: + return isHighCostExpansionHelper(cast<SCEVZeroExtendExpr>(S)->getOperand(), + L, Processed); + case scSignExtend: + return isHighCostExpansionHelper(cast<SCEVSignExtendExpr>(S)->getOperand(), + L, Processed); + } + if (!Processed.insert(S).second) return false; @@ -1849,23 +1866,22 @@ bool SCEVExpander::isHighCostExpansionHelper( } } - // Recurse past add expressions, which commonly occur in the + // HowManyLessThans uses a Max expression whenever the loop is not guarded by + // the exit condition. + if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S)) + return true; + + // Recurse past nary expressions, which commonly occur in the // BackedgeTakenCount. They may already exist in program code, and if not, // they are not too expensive rematerialize. - if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { - for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); + if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(S)) { + for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); I != E; ++I) { if (isHighCostExpansionHelper(*I, L, Processed)) return true; } - return false; } - // HowManyLessThans uses a Max expression whenever the loop is not guarded by - // the exit condition. - if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S)) - return true; - // If we haven't recognized an expensive SCEV pattern, assume it's an // expression produced by program code. return false; diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index e1744d1f2965..24cada3e5313 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -100,9 +100,10 @@ bool TargetTransformInfo::isLegalICmpImmediate(int64_t Imm) const { bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, - int64_t Scale) const { + int64_t Scale, + unsigned AddrSpace) const { return TTIImpl->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, - Scale); + Scale, AddrSpace); } bool TargetTransformInfo::isLegalMaskedStore(Type *DataType, @@ -118,9 +119,10 @@ bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType, int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, - int64_t Scale) const { + int64_t Scale, + unsigned AddrSpace) const { return TTIImpl->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, - Scale); + Scale, AddrSpace); } bool TargetTransformInfo::isTruncateFree(Type *Ty1, Type *Ty2) const { @@ -235,6 +237,13 @@ TargetTransformInfo::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, return TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace); } +unsigned TargetTransformInfo::getInterleavedMemoryOpCost( + unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, + unsigned Alignment, unsigned AddressSpace) const { + return TTIImpl->getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, + Alignment, AddressSpace); +} + unsigned TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys) const { diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index a55712c6296a..c4f046340fce 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -2967,38 +2967,25 @@ static bool isDereferenceablePointer(const Value *V, const DataLayout &DL, // For GEPs, determine if the indexing lands within the allocated object. if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { + Type *VTy = GEP->getType(); + Type *Ty = VTy->getPointerElementType(); + const Value *Base = GEP->getPointerOperand(); + // Conservatively require that the base pointer be fully dereferenceable. - if (!Visited.insert(GEP->getOperand(0)).second) + if (!Visited.insert(Base).second) return false; - if (!isDereferenceablePointer(GEP->getOperand(0), DL, CtxI, + if (!isDereferenceablePointer(Base, DL, CtxI, DT, TLI, Visited)) return false; - // Check the indices. - gep_type_iterator GTI = gep_type_begin(GEP); - for (User::const_op_iterator I = GEP->op_begin()+1, - E = GEP->op_end(); I != E; ++I) { - Value *Index = *I; - Type *Ty = *GTI++; - // Struct indices can't be out of bounds. - if (isa<StructType>(Ty)) - continue; - ConstantInt *CI = dyn_cast<ConstantInt>(Index); - if (!CI) - return false; - // Zero is always ok. - if (CI->isZero()) - continue; - // Check to see that it's within the bounds of an array. - ArrayType *ATy = dyn_cast<ArrayType>(Ty); - if (!ATy) - return false; - if (CI->getValue().getActiveBits() > 64) - return false; - if (CI->getZExtValue() >= ATy->getNumElements()) - return false; - } - // Indices check out; this is dereferenceable. - return true; + + APInt Offset(DL.getPointerTypeSizeInBits(VTy), 0); + if (!GEP->accumulateConstantOffset(DL, Offset)) + return false; + + // Check if the load is within the bounds of the underlying object. + uint64_t LoadSize = DL.getTypeStoreSize(Ty); + Type *BaseType = Base->getType()->getPointerElementType(); + return (Offset + LoadSize).ule(DL.getTypeAllocSize(BaseType)); } // For gc.relocate, look through relocations |