14 files changed, 517 insertions, 233 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
index f54e234bbead..ce46d5300517 100644
--- a/lib/Analysis/AliasAnalysis.cpp
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -93,7 +93,7 @@ AliasAnalysis::getModRefInfo(Instruction *I, ImmutableCallSite Call) {
     // location this memory access defines.  The best we can say
     // is that if the call references what this instruction
     // defines, it must be clobbered by this location.
-    const AliasAnalysis::Location DefLoc = AA->getLocation(I);
+    const AliasAnalysis::Location DefLoc = MemoryLocation::get(I);
     if (getModRefInfo(Call, DefLoc) != AliasAnalysis::NoModRef)
       return AliasAnalysis::ModRef;
   }
@@ -267,78 +267,6 @@ AliasAnalysis::getModRefBehavior(const Function *F) {
 // AliasAnalysis non-virtual helper method implementation
 //===----------------------------------------------------------------------===//
 
-AliasAnalysis::Location AliasAnalysis::getLocation(const LoadInst *LI) {
-  AAMDNodes AATags;
-  LI->getAAMetadata(AATags);
-
-  return Location(LI->getPointerOperand(),
-                  getTypeStoreSize(LI->getType()), AATags);
-}
-
-AliasAnalysis::Location AliasAnalysis::getLocation(const StoreInst *SI) {
-  AAMDNodes AATags;
-  SI->getAAMetadata(AATags);
-
-  return Location(SI->getPointerOperand(),
-                  getTypeStoreSize(SI->getValueOperand()->getType()), AATags);
-}
-
-AliasAnalysis::Location AliasAnalysis::getLocation(const VAArgInst *VI) {
-  AAMDNodes AATags;
-  VI->getAAMetadata(AATags);
-
-  return Location(VI->getPointerOperand(), UnknownSize, AATags);
-}
-
-AliasAnalysis::Location
-AliasAnalysis::getLocation(const AtomicCmpXchgInst *CXI) {
-  AAMDNodes AATags;
-  CXI->getAAMetadata(AATags);
-
-  return Location(CXI->getPointerOperand(),
-                  getTypeStoreSize(CXI->getCompareOperand()->getType()),
-                  AATags);
-}
-
-AliasAnalysis::Location
-AliasAnalysis::getLocation(const AtomicRMWInst *RMWI) {
-  AAMDNodes AATags;
-  RMWI->getAAMetadata(AATags);
-
-  return Location(RMWI->getPointerOperand(),
-                  getTypeStoreSize(RMWI->getValOperand()->getType()), AATags);
-}
-
-AliasAnalysis::Location
-AliasAnalysis::getLocationForSource(const MemTransferInst *MTI) {
-  uint64_t Size = UnknownSize;
-  if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength()))
-    Size = C->getValue().getZExtValue();
-
-  // memcpy/memmove can have AA tags. For memcpy, they apply
-  // to both the source and the destination.
-  AAMDNodes AATags;
-  MTI->getAAMetadata(AATags);
-
-  return Location(MTI->getRawSource(), Size, AATags);
-}
-
-AliasAnalysis::Location
-AliasAnalysis::getLocationForDest(const MemIntrinsic *MTI) {
-  uint64_t Size = UnknownSize;
-  if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength()))
-    Size = C->getValue().getZExtValue();
-
-  // memcpy/memmove can have AA tags. For memcpy, they apply
-  // to both the source and the destination.
-  AAMDNodes AATags;
-  MTI->getAAMetadata(AATags);
-
-  return Location(MTI->getRawDest(), Size, AATags);
-}
-
-
-
 AliasAnalysis::ModRefResult
 AliasAnalysis::getModRefInfo(const LoadInst *L, const Location &Loc) {
   // Be conservative in the face of volatile/atomic.
@@ -347,7 +275,7 @@ AliasAnalysis::getModRefInfo(const LoadInst *L, const Location &Loc) {
 
   // If the load address doesn't alias the given address, it doesn't read
   // or write the specified memory.
-  if (Loc.Ptr && !alias(getLocation(L), Loc))
+  if (Loc.Ptr && !alias(MemoryLocation::get(L), Loc))
     return NoModRef;
 
   // Otherwise, a load just reads.
@@ -363,7 +291,7 @@ AliasAnalysis::getModRefInfo(const StoreInst *S, const Location &Loc) {
   if (Loc.Ptr) {
     // If the store address cannot alias the pointer in question, then the
     // specified memory cannot be modified by the store.
-    if (!alias(getLocation(S), Loc))
+    if (!alias(MemoryLocation::get(S), Loc))
       return NoModRef;
 
     // If the pointer is a pointer to constant memory, then it could not have
@@ -383,7 +311,7 @@ AliasAnalysis::getModRefInfo(const VAArgInst *V, const Location &Loc) {
   if (Loc.Ptr) {
     // If the va_arg address cannot alias the pointer in question, then the
     // specified memory cannot be accessed by the va_arg.
-    if (!alias(getLocation(V), Loc))
+    if (!alias(MemoryLocation::get(V), Loc))
       return NoModRef;
 
     // If the pointer is a pointer to constant memory, then it could not have
@@ -403,7 +331,7 @@ AliasAnalysis::getModRefInfo(const AtomicCmpXchgInst *CX, const Location &Loc) {
     return ModRef;
 
   // If the cmpxchg address does not alias the location, it does not access it.
-  if (Loc.Ptr && !alias(getLocation(CX), Loc))
+  if (Loc.Ptr && !alias(MemoryLocation::get(CX), Loc))
     return NoModRef;
 
   return ModRef;
@@ -416,7 +344,7 @@ AliasAnalysis::getModRefInfo(const AtomicRMWInst *RMW, const Location &Loc) {
     return ModRef;
 
   // If the atomicrmw address does not alias the location, it does not access it.
-  if (Loc.Ptr && !alias(getLocation(RMW), Loc))
+  if (Loc.Ptr && !alias(MemoryLocation::get(RMW), Loc))
     return NoModRef;
 
   return ModRef;
diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp
index 273eacc16e72..dd6a3a0715e1 100644
--- a/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -219,8 +219,8 @@ bool AAEval::runOnFunction(Function &F) {
          I1 != E; ++I1) {
       for (SetVector<Value *>::iterator I2 = Stores.begin(), E2 = Stores.end();
            I2 != E2; ++I2) {
-        switch (AA.alias(AA.getLocation(cast<LoadInst>(*I1)),
-                         AA.getLocation(cast<StoreInst>(*I2)))) {
+        switch (AA.alias(MemoryLocation::get(cast<LoadInst>(*I1)),
+                         MemoryLocation::get(cast<StoreInst>(*I2)))) {
         case AliasAnalysis::NoAlias:
           PrintLoadStoreResults("NoAlias", PrintNoAlias, *I1, *I2,
                                 F.getParent());
@@ -245,8 +245,8 @@ bool AAEval::runOnFunction(Function &F) {
     for (SetVector<Value *>::iterator I1 = Stores.begin(), E = Stores.end();
          I1 != E; ++I1) {
       for (SetVector<Value *>::iterator I2 = Stores.begin(); I2 != I1; ++I2) {
-        switch (AA.alias(AA.getLocation(cast<StoreInst>(*I1)),
-                         AA.getLocation(cast<StoreInst>(*I2)))) {
+        switch (AA.alias(MemoryLocation::get(cast<StoreInst>(*I1)),
+                         MemoryLocation::get(cast<StoreInst>(*I2)))) {
         case AliasAnalysis::NoAlias:
           PrintLoadStoreResults("NoAlias", PrintNoAlias, *I1, *I2,
                                 F.getParent());
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index 50890c17f2eb..12c1c7d4af90 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -130,7 +130,7 @@ void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry,
 void AliasSet::addUnknownInst(Instruction *I, AliasAnalysis &AA) {
   if (UnknownInsts.empty())
     addRef();
-  UnknownInsts.push_back(I);
+  UnknownInsts.emplace_back(I);
 
   if (!I->mayWriteToMemory()) {
     AliasTy = MayAlias;
diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp
index 091943bfc7b3..430b41241edf 100644
--- a/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/lib/Analysis/BranchProbabilityInfo.cpp
@@ -543,6 +543,10 @@ bool BranchProbabilityInfo::runOnFunction(Function &F) {
   return false;
 }
 
+void BranchProbabilityInfo::releaseMemory() {
+  Weights.clear();
+}
+
 void BranchProbabilityInfo::print(raw_ostream &OS, const Module *) const {
   OS << "---- Branch Probabilities ----\n";
   // We print the probabilities from the last function the analysis ran over,
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index 32fe9b9495d1..b22ee7e24931 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -42,6 +42,7 @@ add_llvm_library(LLVMAnalysis
   MemDerefPrinter.cpp
   MemoryBuiltins.cpp
   MemoryDependenceAnalysis.cpp
+  MemoryLocation.cpp
   ModuleDebugInfoPrinter.cpp
   NoAliasAnalysis.cpp
   PHITransAddr.cpp
diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp
index 808a38b6346b..b16cdfef3375 100644
--- a/lib/Analysis/DependenceAnalysis.cpp
+++ b/lib/Analysis/DependenceAnalysis.cpp
@@ -779,23 +779,56 @@ void DependenceAnalysis::collectCommonLoops(const SCEV *Expression,
   }
 }
 
-void DependenceAnalysis::unifySubscriptType(Subscript *Pair) {
-  const SCEV *Src = Pair->Src;
-  const SCEV *Dst = Pair->Dst;
-  IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType());
-  IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType());
-  if (SrcTy == nullptr || DstTy == nullptr) {
-    assert(SrcTy == DstTy && "This function only unify integer types and "
-                             "expect Src and Dst share the same type "
-                             "otherwise.");
-    return;
+void DependenceAnalysis::unifySubscriptType(ArrayRef<Subscript *> Pairs) {
+
+  unsigned widestWidthSeen = 0;
+  Type *widestType;
+
+  // Go through each pair and find the widest bit to which we need
+  // to extend all of them.
+  for (unsigned i = 0; i < Pairs.size(); i++) {
+    const SCEV *Src = Pairs[i]->Src;
+    const SCEV *Dst = Pairs[i]->Dst;
+    IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType());
+    IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType());
+    if (SrcTy == nullptr || DstTy == nullptr) {
+      assert(SrcTy == DstTy && "This function only unify integer types and "
+             "expect Src and Dst share the same type "
+             "otherwise.");
+      continue;
+    }
+    if (SrcTy->getBitWidth() > widestWidthSeen) {
+      widestWidthSeen = SrcTy->getBitWidth();
+      widestType = SrcTy;
+    }
+    if (DstTy->getBitWidth() > widestWidthSeen) {
+      widestWidthSeen = DstTy->getBitWidth();
+      widestType = DstTy;
+    }
   }
-  if (SrcTy->getBitWidth() > DstTy->getBitWidth()) {
-    // Sign-extend Dst to typeof(Src) if typeof(Src) is wider than typeof(Dst).
-    Pair->Dst = SE->getSignExtendExpr(Dst, SrcTy);
-  } else if (SrcTy->getBitWidth() < DstTy->getBitWidth()) {
-    // Sign-extend Src to typeof(Dst) if typeof(Dst) is wider than typeof(Src).
-    Pair->Src = SE->getSignExtendExpr(Src, DstTy);
+
+
+  assert(widestWidthSeen > 0);
+
+  // Now extend each pair to the widest seen.
+  for (unsigned i = 0; i < Pairs.size(); i++) {
+    const SCEV *Src = Pairs[i]->Src;
+    const SCEV *Dst = Pairs[i]->Dst;
+    IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType());
+    IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType());
+    if (SrcTy == nullptr || DstTy == nullptr) {
+      assert(SrcTy == DstTy && "This function only unify integer types and "
+             "expect Src and Dst share the same type "
+             "otherwise.");
+      continue;
+    }
+    if (SrcTy->getBitWidth() < widestWidthSeen)
+      // Sign-extend Src to widestType
+      Pairs[i]->Src = SE->getSignExtendExpr(Src, widestType);
+    if (DstTy->getBitWidth() < widestWidthSeen) {
+      // Sign-extend Dst to widestType
+      Pairs[i]->Dst = SE->getSignExtendExpr(Dst, widestType);
+    }
   }
 }
 
@@ -2937,7 +2970,7 @@ const SCEV *DependenceAnalysis::getUpperBound(BoundInfo *Bound) const {
 // return the coefficient (the step)
 // corresponding to the specified loop.
 // If there isn't one, return 0.
-// For example, given a*i + b*j + c*k, zeroing the coefficient
+// For example, given a*i + b*j + c*k, finding the coefficient
 // corresponding to the j loop would yield b.
 const SCEV *DependenceAnalysis::findCoefficient(const SCEV *Expr,
                                                 const Loop *TargetLoop)  const {
@@ -3574,13 +3607,16 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst,
       SmallBitVector Sivs(Pairs);
       SmallBitVector Mivs(Pairs);
       SmallBitVector ConstrainedLevels(MaxLevels + 1);
+      SmallVector<Subscript *, 4> PairsInGroup;
       for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) {
         DEBUG(dbgs() << SJ << " ");
         if (Pair[SJ].Classification == Subscript::SIV)
           Sivs.set(SJ);
         else
           Mivs.set(SJ);
+        PairsInGroup.push_back(&Pair[SJ]);
       }
+      unifySubscriptType(PairsInGroup);
       DEBUG(dbgs() << "}\n");
       while (Sivs.any()) {
         bool Changed = false;
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 097b99eb0a5e..ec56d888dc2f 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -3144,6 +3144,90 @@ Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
                             RecursionLimit);
 }
 
+/// SimplifyWithOpReplaced - See if V simplifies when its operand Op is
+/// replaced with RepOp.
+static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
+                                           const Query &Q,
+                                           unsigned MaxRecurse) {
+  // Trivial replacement.
+  if (V == Op)
+    return RepOp;
+
+  auto *I = dyn_cast<Instruction>(V);
+  if (!I)
+    return nullptr;
+
+  // If this is a binary operator, try to simplify it with the replaced op.
+  if (auto *B = dyn_cast<BinaryOperator>(I)) {
+    // Consider:
+    //   %cmp = icmp eq i32 %x, 2147483647
+    //   %add = add nsw i32 %x, 1
+    //   %sel = select i1 %cmp, i32 -2147483648, i32 %add
+    //
+    // We can't replace %sel with %add unless we strip away the flags.
+    if (isa<OverflowingBinaryOperator>(B))
+      if (B->hasNoSignedWrap() || B->hasNoUnsignedWrap())
+        return nullptr;
+    if (isa<PossiblyExactOperator>(B))
+      if (B->isExact())
+        return nullptr;
+
+    if (MaxRecurse) {
+      if (B->getOperand(0) == Op)
+        return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), Q,
+                             MaxRecurse - 1);
+      if (B->getOperand(1) == Op)
+        return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, Q,
+                             MaxRecurse - 1);
+    }
+  }
+
+  // Same for CmpInsts.
+  if (CmpInst *C = dyn_cast<CmpInst>(I)) {
+    if (MaxRecurse) {
+      if (C->getOperand(0) == Op)
+        return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), Q,
+                               MaxRecurse - 1);
+      if (C->getOperand(1) == Op)
+        return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, Q,
+                               MaxRecurse - 1);
+    }
+  }
+
+  // TODO: We could hand off more cases to instsimplify here.
+
+  // If all operands are constant after substituting Op for RepOp then we can
+  // constant fold the instruction.
+  if (Constant *CRepOp = dyn_cast<Constant>(RepOp)) {
+    // Build a list of all constant operands.
+    SmallVector<Constant *, 8> ConstOps;
+    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+      if (I->getOperand(i) == Op)
+        ConstOps.push_back(CRepOp);
+      else if (Constant *COp = dyn_cast<Constant>(I->getOperand(i)))
+        ConstOps.push_back(COp);
+      else
+        break;
+    }
+
+    // All operands were constants, fold it.
+    if (ConstOps.size() == I->getNumOperands()) {
+      if (CmpInst *C = dyn_cast<CmpInst>(I))
+        return ConstantFoldCompareInstOperands(C->getPredicate(), ConstOps[0],
+                                               ConstOps[1], Q.DL, Q.TLI);
+
+      if (LoadInst *LI = dyn_cast<LoadInst>(I))
+        if (!LI->isVolatile())
+          return ConstantFoldLoadFromConstPtr(ConstOps[0], Q.DL);
+
+      return ConstantFoldInstOperands(I->getOpcode(), I->getType(), ConstOps,
+                                      Q.DL, Q.TLI);
+    }
+  }
+
+  return nullptr;
+}
+
 /// SimplifySelectInst - Given operands for a SelectInst, see if we can fold
 /// the result.  If not, this returns null.
 static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal,
@@ -3172,29 +3256,28 @@ static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal,
   if (isa<UndefValue>(FalseVal))   // select C, X, undef -> X
     return TrueVal;
 
-  const auto *ICI = dyn_cast<ICmpInst>(CondVal);
-  unsigned BitWidth = TrueVal->getType()->getScalarSizeInBits();
-  if (ICI && BitWidth) {
+  if (const auto *ICI = dyn_cast<ICmpInst>(CondVal)) {
+    unsigned BitWidth = Q.DL.getTypeSizeInBits(TrueVal->getType());
     ICmpInst::Predicate Pred = ICI->getPredicate();
+    Value *CmpLHS = ICI->getOperand(0);
+    Value *CmpRHS = ICI->getOperand(1);
     APInt MinSignedValue = APInt::getSignBit(BitWidth);
     Value *X;
     const APInt *Y;
     bool TrueWhenUnset;
     bool IsBitTest = false;
     if (ICmpInst::isEquality(Pred) &&
-        match(ICI->getOperand(0), m_And(m_Value(X), m_APInt(Y))) &&
-        match(ICI->getOperand(1), m_Zero())) {
+        match(CmpLHS, m_And(m_Value(X), m_APInt(Y))) &&
+        match(CmpRHS, m_Zero())) {
       IsBitTest = true;
       TrueWhenUnset = Pred == ICmpInst::ICMP_EQ;
-    } else if (Pred == ICmpInst::ICMP_SLT &&
-               match(ICI->getOperand(1), m_Zero())) {
-      X = ICI->getOperand(0);
+    } else if (Pred == ICmpInst::ICMP_SLT && match(CmpRHS, m_Zero())) {
+      X = CmpLHS;
       Y = &MinSignedValue;
       IsBitTest = true;
       TrueWhenUnset = false;
-    } else if (Pred == ICmpInst::ICMP_SGT &&
-               match(ICI->getOperand(1), m_AllOnes())) {
-      X = ICI->getOperand(0);
+    } else if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, m_AllOnes())) {
+      X = CmpLHS;
       Y = &MinSignedValue;
       IsBitTest = true;
       TrueWhenUnset = true;
@@ -3225,6 +3308,50 @@ static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal,
           return TrueWhenUnset ? TrueVal : FalseVal;
       }
     }
+    if (ICI->hasOneUse()) {
+      const APInt *C;
+      if (match(CmpRHS, m_APInt(C))) {
+        // X < MIN ? T : F  -->  F
+        if (Pred == ICmpInst::ICMP_SLT && C->isMinSignedValue())
+          return FalseVal;
+        // X < MIN ? T : F  -->  F
+        if (Pred == ICmpInst::ICMP_ULT && C->isMinValue())
+          return FalseVal;
+        // X > MAX ? T : F  -->  F
+        if (Pred == ICmpInst::ICMP_SGT && C->isMaxSignedValue())
+          return FalseVal;
+        // X > MAX ? T : F  -->  F
+        if (Pred == ICmpInst::ICMP_UGT && C->isMaxValue())
+          return FalseVal;
+      }
+    }
+
+    // If we have an equality comparison then we know the value in one of the
+    // arms of the select. See if substituting this value into the arm and
+    // simplifying the result yields the same value as the other arm.
+    if (Pred == ICmpInst::ICMP_EQ) {
+      if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q, MaxRecurse) ==
+              TrueVal ||
+          SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q, MaxRecurse) ==
+              TrueVal)
+        return FalseVal;
+      if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q, MaxRecurse) ==
+              FalseVal ||
+          SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q, MaxRecurse) ==
+              FalseVal)
+        return FalseVal;
+    } else if (Pred == ICmpInst::ICMP_NE) {
+      if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q, MaxRecurse) ==
+              FalseVal ||
+          SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q, MaxRecurse) ==
+              FalseVal)
+        return TrueVal;
+      if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q, MaxRecurse) ==
+              TrueVal ||
+          SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q, MaxRecurse) ==
+              TrueVal)
+        return TrueVal;
+    }
   }
 
   return nullptr;
diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp
index b70de00db04b..c661c7b87dcb 100644
--- a/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/lib/Analysis/LoopAccessAnalysis.cpp
@@ -177,15 +177,21 @@ void LoopAccessInfo::RuntimePointerCheck::print(
       }
 }
 
-bool LoopAccessInfo::RuntimePointerCheck::needsAnyChecking(
+unsigned LoopAccessInfo::RuntimePointerCheck::getNumberOfChecks(
     const SmallVectorImpl<int> *PtrPartition) const {
   unsigned NumPointers = Pointers.size();
+  unsigned CheckCount = 0;
 
   for (unsigned I = 0; I < NumPointers; ++I)
     for (unsigned J = I + 1; J < NumPointers; ++J)
       if (needsChecking(I, J, PtrPartition))
-        return true;
-  return false;
+        CheckCount++;
+  return CheckCount;
+}
+
+bool LoopAccessInfo::RuntimePointerCheck::needsAnyChecking(
+    const SmallVectorImpl<int> *PtrPartition) const {
+  return getNumberOfChecks(PtrPartition) != 0;
 }
 
 namespace {
@@ -220,10 +226,11 @@ public:
   }
 
   /// \brief Check whether we can check the pointers at runtime for
-  /// non-intersection.
+  /// non-intersection. Returns true when we have 0 pointers
+  /// (a check on 0 pointers for non-intersection will always return true).
   bool canCheckPtrAtRT(LoopAccessInfo::RuntimePointerCheck &RtCheck,
-                       unsigned &NumComparisons, ScalarEvolution *SE,
-                       Loop *TheLoop, const ValueToValueMap &Strides,
+                       bool &NeedRTCheck, ScalarEvolution *SE, Loop *TheLoop,
+                       const ValueToValueMap &Strides,
                        bool ShouldCheckStride = false);
 
   /// \brief Goes over all memory accesses, checks whether a RT check is needed
@@ -289,29 +296,23 @@ static bool hasComputableBounds(ScalarEvolution *SE,
   return AR->isAffine();
 }
 
-/// \brief Check the stride of the pointer and ensure that it does not wrap in
-/// the address space.
-static int isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp,
-                        const ValueToValueMap &StridesMap);
-
 bool AccessAnalysis::canCheckPtrAtRT(
-    LoopAccessInfo::RuntimePointerCheck &RtCheck, unsigned &NumComparisons,
+    LoopAccessInfo::RuntimePointerCheck &RtCheck, bool &NeedRTCheck,
     ScalarEvolution *SE, Loop *TheLoop, const ValueToValueMap &StridesMap,
     bool ShouldCheckStride) {
   // Find pointers with computable bounds. We are going to use this information
   // to place a runtime bound check.
   bool CanDoRT = true;
 
+  NeedRTCheck = false;
+  if (!IsRTCheckNeeded) return true;
+
   bool IsDepCheckNeeded = isDependencyCheckNeeded();
-  NumComparisons = 0;
 
   // We assign a consecutive id to access from different alias sets.
   // Accesses between different groups doesn't need to be checked.
   unsigned ASId = 1;
   for (auto &AS : AST) {
-    unsigned NumReadPtrChecks = 0;
-    unsigned NumWritePtrChecks = 0;
-
     // We assign consecutive id to access from different dependence sets.
     // Accesses within the same set don't need a runtime check.
     unsigned RunningDepId = 1;
@@ -322,11 +323,6 @@ bool AccessAnalysis::canCheckPtrAtRT(
       bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
       MemAccessInfo Access(Ptr, IsWrite);
 
-      if (IsWrite)
-        ++NumWritePtrChecks;
-      else
-        ++NumReadPtrChecks;
-
       if (hasComputableBounds(SE, StridesMap, Ptr) &&
           // When we run after a failing dependency check we have to make sure
           // we don't have wrapping pointers.
@@ -354,16 +350,15 @@ bool AccessAnalysis::canCheckPtrAtRT(
       }
     }
 
-    if (IsDepCheckNeeded && CanDoRT && RunningDepId == 2)
-      NumComparisons += 0; // Only one dependence set.
-    else {
-      NumComparisons += (NumWritePtrChecks * (NumReadPtrChecks +
-                                              NumWritePtrChecks - 1));
-    }
-
     ++ASId;
   }
 
+  // We need a runtime check if there are any accesses that need checking.
+  // However, some accesses cannot be checked (for example because we
+  // can't determine their bounds). In these cases we would need a check
+  // but wouldn't be able to add it.
+  NeedRTCheck = !CanDoRT || RtCheck.needsAnyChecking(nullptr);
+
   // If the pointers that we would use for the bounds comparison have different
   // address spaces, assume the values aren't directly comparable, so we can't
   // use them for the runtime check. We also have to assume they could
@@ -510,8 +505,8 @@ static bool isInBoundsGep(Value *Ptr) {
 }
 
 /// \brief Check whether the access through \p Ptr has a constant stride.
-static int isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp,
-                        const ValueToValueMap &StridesMap) {
+int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp,
+                       const ValueToValueMap &StridesMap) {
   const Type *Ty = Ptr->getType();
   assert(Ty->isPointerTy() && "Unexpected non-ptr");
 
@@ -678,6 +673,42 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance,
   return false;
 }
 
+/// \brief Check the dependence for two accesses with the same stride \p Stride.
+/// \p Distance is the positive distance and \p TypeByteSize is type size in
+/// bytes.
+///
+/// \returns true if they are independent.
+static bool areStridedAccessesIndependent(unsigned Distance, unsigned Stride,
+                                          unsigned TypeByteSize) {
+  assert(Stride > 1 && "The stride must be greater than 1");
+  assert(TypeByteSize > 0 && "The type size in byte must be non-zero");
+  assert(Distance > 0 && "The distance must be non-zero");
+
+  // Skip if the distance is not multiple of type byte size.
+  if (Distance % TypeByteSize)
+    return false;
+
+  unsigned ScaledDist = Distance / TypeByteSize;
+
+  // No dependence if the scaled distance is not multiple of the stride.
+  // E.g.
+  //      for (i = 0; i < 1024 ; i += 4)
+  //        A[i+2] = A[i] + 1;
+  //
+  // Two accesses in memory (scaled distance is 2, stride is 4):
+  //     | A[0] |      |      |      | A[4] |      |      |      |
+  //     |      |      | A[2] |      |      |      | A[6] |      |
+  //
+  // E.g.
+  //      for (i = 0; i < 1024 ; i += 3)
+  //        A[i+4] = A[i] + 1;
+  //
+  // Two accesses in memory (scaled distance is 4, stride is 3):
+  //     | A[0] |      |      | A[3] |      |      | A[6] |      |      |
+  //     |      |      |      |      | A[4] |      |      | A[7] |      |
+  return ScaledDist % Stride;
+}
+
 MemoryDepChecker::Dependence::DepType
 MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
                               const MemAccessInfo &B, unsigned BIdx,
@@ -778,34 +809,87 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
 
   unsigned Distance = (unsigned) Val.getZExtValue();
 
+  unsigned Stride = std::abs(StrideAPtr);
+  if (Stride > 1 &&
+      areStridedAccessesIndependent(Distance, Stride, TypeByteSize))
+    return Dependence::NoDep;
+
   // Bail out early if passed-in parameters make vectorization not feasible.
   unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
                            VectorizerParams::VectorizationFactor : 1);
   unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ?
                            VectorizerParams::VectorizationInterleave : 1);
+  // The minimum number of iterations for a vectorized/unrolled version.
+  unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U);
+
+  // It's not vectorizable if the distance is smaller than the minimum distance
+  // needed for a vectroized/unrolled version. Vectorizing one iteration in
+  // front needs TypeByteSize * Stride. Vectorizing the last iteration needs
+  // TypeByteSize (No need to plus the last gap distance).
+  //
+  // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
+  //      foo(int *A) {
+  //        int *B = (int *)((char *)A + 14);
+  //        for (i = 0 ; i < 1024 ; i += 2)
+  //          B[i] = A[i] + 1;
+  //      }
+  //
+  // Two accesses in memory (stride is 2):
+  //     | A[0] |      | A[2] |      | A[4] |      | A[6] |      |
+  //                              | B[0] |      | B[2] |      | B[4] |
+  //
+  // Distance needs for vectorizing iterations except the last iteration:
+  // 4 * 2 * (MinNumIter - 1). Distance needs for the last iteration: 4.
+  // So the minimum distance needed is: 4 * 2 * (MinNumIter - 1) + 4.
+  //
+  // If MinNumIter is 2, it is vectorizable as the minimum distance needed is
+  // 12, which is less than distance.
+  //
+  // If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4),
+  // the minimum distance needed is 28, which is greater than distance. It is
+  // not safe to do vectorization.
+  unsigned MinDistanceNeeded =
+      TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize;
+  if (MinDistanceNeeded > Distance) {
+    DEBUG(dbgs() << "LAA: Failure because of positive distance " << Distance
+                 << '\n');
+    return Dependence::Backward;
+  }
 
-  // The distance must be bigger than the size needed for a vectorized version
-  // of the operation and the size of the vectorized operation must not be
-  // bigger than the currrent maximum size.
-  if (Distance < 2*TypeByteSize ||
-      2*TypeByteSize > MaxSafeDepDistBytes ||
-      Distance < TypeByteSize * ForcedUnroll * ForcedFactor) {
-    DEBUG(dbgs() << "LAA: Failure because of Positive distance "
-        << Val.getSExtValue() << '\n');
+  // Unsafe if the minimum distance needed is greater than max safe distance.
+  if (MinDistanceNeeded > MaxSafeDepDistBytes) {
+    DEBUG(dbgs() << "LAA: Failure because it needs at least "
+                 << MinDistanceNeeded << " size in bytes");
     return Dependence::Backward;
   }
 
   // Positive distance bigger than max vectorization factor.
-  MaxSafeDepDistBytes = Distance < MaxSafeDepDistBytes ?
-    Distance : MaxSafeDepDistBytes;
+  // FIXME: Should use max factor instead of max distance in bytes, which could
+  // not handle different types.
+  // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
+  //      void foo (int *A, char *B) {
+  //        for (unsigned i = 0; i < 1024; i++) {
+  //          A[i+2] = A[i] + 1;
+  //          B[i+2] = B[i] + 1;
+  //        }
+  //      }
+  //
+  // This case is currently unsafe according to the max safe distance. If we
+  // analyze the two accesses on array B, the max safe dependence distance
+  // is 2. Then we analyze the accesses on array A, the minimum distance needed
+  // is 8, which is less than 2 and forbidden vectorization, But actually
+  // both A and B could be vectorized by 2 iterations.
+  MaxSafeDepDistBytes =
+      Distance < MaxSafeDepDistBytes ? Distance : MaxSafeDepDistBytes;
 
   bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
   if (IsTrueDataDependence &&
       couldPreventStoreLoadForward(Distance, TypeByteSize))
     return Dependence::BackwardVectorizableButPreventsForwarding;
 
-  DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue() <<
-        " with max VF = " << MaxSafeDepDistBytes / TypeByteSize << '\n');
+  DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue()
+               << " with max VF = "
+               << MaxSafeDepDistBytes / (TypeByteSize * Stride) << '\n');
 
   return Dependence::BackwardVectorizable;
 }
@@ -1066,7 +1150,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
     if (Seen.insert(Ptr).second) {
       ++NumReadWrites;
 
-      AliasAnalysis::Location Loc = AA->getLocation(ST);
+      AliasAnalysis::Location Loc = MemoryLocation::get(ST);
       // The TBAA metadata could have a control dependency on the predication
       // condition, so we cannot rely on it when determining whether or not we
       // need runtime pointer checks.
@@ -1102,7 +1186,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
       IsReadOnlyPtr = true;
     }
 
-    AliasAnalysis::Location Loc = AA->getLocation(LD);
+    AliasAnalysis::Location Loc = MemoryLocation::get(LD);
     // The TBAA metadata could have a control dependency on the predication
     // condition, so we cannot rely on it when determining whether or not we
     // need runtime pointer checks.
@@ -1123,22 +1207,17 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
   // Build dependence sets and check whether we need a runtime pointer bounds
   // check.
   Accesses.buildDependenceSets();
-  bool NeedRTCheck = Accesses.isRTCheckNeeded();
 
   // Find pointers with computable bounds. We are going to use this information
   // to place a runtime bound check.
-  bool CanDoRT = false;
-  if (NeedRTCheck)
-    CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE, TheLoop,
-                                       Strides);
-
-  DEBUG(dbgs() << "LAA: We need to do " << NumComparisons <<
-        " pointer comparisons.\n");
+  bool NeedRTCheck;
+  bool CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck,
+                                          NeedRTCheck, SE,
+                                          TheLoop, Strides);
 
-  // If we only have one set of dependences to check pointers among we don't
-  // need a runtime check.
-  if (NumComparisons == 0 && NeedRTCheck)
-    NeedRTCheck = false;
+  DEBUG(dbgs() << "LAA: We need to do "
+               << PtrRtCheck.getNumberOfChecks(nullptr)
+               << " pointer comparisons.\n");
 
   // Check that we found the bounds for the pointer.
   if (CanDoRT)
@@ -1171,10 +1250,11 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
       PtrRtCheck.reset();
       PtrRtCheck.Need = true;
 
-      CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE,
+      CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NeedRTCheck, SE,
                                          TheLoop, Strides, true);
+
       // Check that we found the bounds for the pointer.
-      if (!CanDoRT && NumComparisons > 0) {
+      if (NeedRTCheck && !CanDoRT) {
         emitAnalysis(LoopAccessReport()
                      << "cannot check memory dependencies at runtime");
         DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
@@ -1319,7 +1399,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
                                const TargetLibraryInfo *TLI, AliasAnalysis *AA,
                                DominatorTree *DT, LoopInfo *LI,
                                const ValueToValueMap &Strides)
-    : DepChecker(SE, L), NumComparisons(0), TheLoop(L), SE(SE), DL(DL),
+    : DepChecker(SE, L), TheLoop(L), SE(SE), DL(DL),
       TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0),
       MaxSafeDepDistBytes(-1U), CanVecMem(false),
       StoreToLoopInvariantAddress(false) {
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 3c1826a58e66..255bae61eb2f 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -124,11 +124,11 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst,
                                         AliasAnalysis *AA) {
   if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
     if (LI->isUnordered()) {
-      Loc = AA->getLocation(LI);
+      Loc = MemoryLocation::get(LI);
       return AliasAnalysis::Ref;
     }
     if (LI->getOrdering() == Monotonic) {
-      Loc = AA->getLocation(LI);
+      Loc = MemoryLocation::get(LI);
       return AliasAnalysis::ModRef;
     }
     Loc = AliasAnalysis::Location();
@@ -137,11 +137,11 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst,
 
   if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
     if (SI->isUnordered()) {
-      Loc = AA->getLocation(SI);
+      Loc = MemoryLocation::get(SI);
       return AliasAnalysis::Mod;
     }
     if (SI->getOrdering() == Monotonic) {
-      Loc = AA->getLocation(SI);
+      Loc = MemoryLocation::get(SI);
       return AliasAnalysis::ModRef;
     }
     Loc = AliasAnalysis::Location();
@@ -149,7 +149,7 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst,
   }
 
   if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
-    Loc = AA->getLocation(V);
+    Loc = MemoryLocation::get(V);
     return AliasAnalysis::ModRef;
   }
 
@@ -486,7 +486,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
         }
       }
 
-      AliasAnalysis::Location LoadLoc = AA->getLocation(LI);
+      AliasAnalysis::Location LoadLoc = MemoryLocation::get(LI);
 
       // If we found a pointer, check if it could be the same as our pointer.
       AliasAnalysis::AliasResult R = AA->alias(LoadLoc, MemLoc);
@@ -575,7 +575,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
 
       // Ok, this store might clobber the query pointer.  Check to see if it is
       // a must alias: in this case, we want to return this as a def.
-      AliasAnalysis::Location StoreLoc = AA->getLocation(SI);
+      AliasAnalysis::Location StoreLoc = MemoryLocation::get(SI);
 
       // If we found a pointer, check if it could be the same as our pointer.
       AliasAnalysis::AliasResult R = AA->alias(StoreLoc, MemLoc);
@@ -872,7 +872,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
 void MemoryDependenceAnalysis::
 getNonLocalPointerDependency(Instruction *QueryInst,
                              SmallVectorImpl<NonLocalDepResult> &Result) {
-  const AliasAnalysis::Location Loc = AA->getLocation(QueryInst);
+  const AliasAnalysis::Location Loc = MemoryLocation::get(QueryInst);
   bool isLoad = isa<LoadInst>(QueryInst);
   BasicBlock *FromBB = QueryInst->getParent();
   assert(FromBB);
@@ -1278,8 +1278,7 @@ getNonLocalPointerDepFromBB(Instruction *QueryInst,
       // Get the PHI translated pointer in this predecessor.  This can fail if
       // not translatable, in which case the getAddr() returns null.
       PHITransAddr &PredPointer = PredList.back().second;
-      PredPointer.PHITranslateValue(BB, Pred, nullptr);
-
+      PredPointer.PHITranslateValue(BB, Pred, DT, /*MustDominate=*/false);
       Value *PredPtrVal = PredPointer.getAddr();
 
       // Check to see if we have already visited this pred block with another
diff --git a/lib/Analysis/MemoryLocation.cpp b/lib/Analysis/MemoryLocation.cpp
new file mode 100644
index 000000000000..f87a017b9211
--- /dev/null
+++ b/lib/Analysis/MemoryLocation.cpp
@@ -0,0 +1,90 @@
+//===- MemoryLocation.cpp - Memory location descriptions -------------------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+using namespace llvm;
+
+MemoryLocation MemoryLocation::get(const LoadInst *LI) {
+  AAMDNodes AATags;
+  LI->getAAMetadata(AATags);
+  const auto &DL = LI->getModule()->getDataLayout();
+
+  return MemoryLocation(LI->getPointerOperand(),
+                        DL.getTypeStoreSize(LI->getType()), AATags);
+}
+
+MemoryLocation MemoryLocation::get(const StoreInst *SI) {
+  AAMDNodes AATags;
+  SI->getAAMetadata(AATags);
+  const auto &DL = SI->getModule()->getDataLayout();
+
+  return MemoryLocation(SI->getPointerOperand(),
+                        DL.getTypeStoreSize(SI->getValueOperand()->getType()),
+                        AATags);
+}
+
+MemoryLocation MemoryLocation::get(const VAArgInst *VI) {
+  AAMDNodes AATags;
+  VI->getAAMetadata(AATags);
+
+  return MemoryLocation(VI->getPointerOperand(), UnknownSize, AATags);
+}
+
+MemoryLocation MemoryLocation::get(const AtomicCmpXchgInst *CXI) {
+  AAMDNodes AATags;
+  CXI->getAAMetadata(AATags);
+  const auto &DL = CXI->getModule()->getDataLayout();
+
+  return MemoryLocation(
+      CXI->getPointerOperand(),
+      DL.getTypeStoreSize(CXI->getCompareOperand()->getType()), AATags);
+}
+
+MemoryLocation MemoryLocation::get(const AtomicRMWInst *RMWI) {
+  AAMDNodes AATags;
+  RMWI->getAAMetadata(AATags);
+  const auto &DL = RMWI->getModule()->getDataLayout();
+
+  return MemoryLocation(RMWI->getPointerOperand(),
+                        DL.getTypeStoreSize(RMWI->getValOperand()->getType()),
+                        AATags);
+}
+
+MemoryLocation MemoryLocation::getForSource(const MemTransferInst *MTI) {
+  uint64_t Size = UnknownSize;
+  if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength()))
+    Size = C->getValue().getZExtValue();
+
+  // memcpy/memmove can have AA tags. For memcpy, they apply
+  // to both the source and the destination.
+  AAMDNodes AATags;
+  MTI->getAAMetadata(AATags);
+
+  return MemoryLocation(MTI->getRawSource(), Size, AATags);
+}
+
+MemoryLocation MemoryLocation::getForDest(const MemIntrinsic *MTI) {
+  uint64_t Size = UnknownSize;
+  if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength()))
+    Size = C->getValue().getZExtValue();
+
+  // memcpy/memmove can have AA tags. For memcpy, they apply
+  // to both the source and the destination.
+  AAMDNodes AATags;
+  MTI->getAAMetadata(AATags);
+
+  return MemoryLocation(MTI->getRawDest(), Size, AATags);
+}
diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp
index 177684fdc9a7..633d6aaad35e 100644
--- a/lib/Analysis/PHITransAddr.cpp
+++ b/lib/Analysis/PHITransAddr.cpp
@@ -150,7 +150,8 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
   if (!Inst) return V;
 
   // Determine whether 'Inst' is an input to our PHI translatable expression.
-  bool isInput = std::count(InstInputs.begin(), InstInputs.end(), Inst);
+  bool isInput =
+      std::find(InstInputs.begin(), InstInputs.end(), Inst) != InstInputs.end();
 
   // Handle inputs instructions if needed.
   if (isInput) {
@@ -276,7 +277,8 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
           isNSW = isNUW = false;
 
           // If the old 'LHS' was an input, add the new 'LHS' as an input.
-          if (std::count(InstInputs.begin(), InstInputs.end(), BOp)) {
+          if (std::find(InstInputs.begin(), InstInputs.end(), BOp) !=
+              InstInputs.end()) {
             RemoveInstInputs(BOp, InstInputs);
             AddAsInput(LHS);
           }
@@ -313,21 +315,26 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
 
 
 /// PHITranslateValue - PHI translate the current address up the CFG from
-/// CurBB to Pred, updating our state to reflect any needed changes.  If the
-/// dominator tree DT is non-null, the translated value must dominate
+/// CurBB to Pred, updating our state to reflect any needed changes.  If
+/// 'MustDominate' is true, the translated value must dominate
 /// PredBB.  This returns true on failure and sets Addr to null.
 bool PHITransAddr::PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB,
-                                     const DominatorTree *DT) {
+                                     const DominatorTree *DT,
+                                     bool MustDominate) {
+  assert(DT || !MustDominate);
   assert(Verify() && "Invalid PHITransAddr!");
-  Addr = PHITranslateSubExpr(Addr, CurBB, PredBB, DT);
+  if (DT && DT->isReachableFromEntry(PredBB))
+    Addr =
+        PHITranslateSubExpr(Addr, CurBB, PredBB, MustDominate ? DT : nullptr);
+  else
+    Addr = nullptr;
   assert(Verify() && "Invalid PHITransAddr!");
 
-  if (DT) {
+  if (MustDominate)
     // Make sure the value is live in the predecessor.
     if (Instruction *Inst = dyn_cast_or_null<Instruction>(Addr))
       if (!DT->dominates(Inst->getParent(), PredBB))
         Addr = nullptr;
-  }
 
   return Addr == nullptr;
 }
@@ -370,7 +377,7 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
   // See if we have a version of this value already available and dominating
   // PredBB.  If so, there is no need to insert a new instance of it.
   PHITransAddr Tmp(InVal, DL, AC);
-  if (!Tmp.PHITranslateValue(CurBB, PredBB, &DT))
+  if (!Tmp.PHITranslateValue(CurBB, PredBB, &DT, /*MustDominate=*/true))
     return Tmp.getAddr();
 
   // If we don't have an available version of this value, it must be an
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 0bd427bf74e9..f82235d0c26e 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -1712,7 +1712,7 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
     // would confuse the logic below that expects proper IVs.
     if (Value *V = SimplifyInstruction(Phi, DL, SE.TLI, SE.DT, SE.AC)) {
       Phi->replaceAllUsesWith(V);
-      DeadInsts.push_back(Phi);
+      DeadInsts.emplace_back(Phi);
       ++NumElim;
       DEBUG_WITH_TYPE(DebugType, dbgs()
                       << "INDVARS: Eliminated constant iv: " << *Phi << '\n');
@@ -1787,7 +1787,7 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
             CreateTruncOrBitCast(OrigInc, IsomorphicInc->getType(), IVName);
         }
         IsomorphicInc->replaceAllUsesWith(NewInc);
-        DeadInsts.push_back(IsomorphicInc);
+        DeadInsts.emplace_back(IsomorphicInc);
       }
     }
     DEBUG_WITH_TYPE(DebugType, dbgs()
@@ -1800,13 +1800,30 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
       NewIV = Builder.CreateTruncOrBitCast(OrigPhiRef, Phi->getType(), IVName);
     }
     Phi->replaceAllUsesWith(NewIV);
-    DeadInsts.push_back(Phi);
+    DeadInsts.emplace_back(Phi);
   }
   return NumElim;
 }
 
 bool SCEVExpander::isHighCostExpansionHelper(
     const SCEV *S, Loop *L, SmallPtrSetImpl<const SCEV *> &Processed) {
+
+  // Zero/One operand expressions
+  switch (S->getSCEVType()) {
+  case scUnknown:
+  case scConstant:
+    return false;
+  case scTruncate:
+    return isHighCostExpansionHelper(cast<SCEVTruncateExpr>(S)->getOperand(), L,
+                                     Processed);
+  case scZeroExtend:
+    return isHighCostExpansionHelper(cast<SCEVZeroExtendExpr>(S)->getOperand(),
+                                     L, Processed);
+  case scSignExtend:
+    return isHighCostExpansionHelper(cast<SCEVSignExtendExpr>(S)->getOperand(),
+                                     L, Processed);
+  }
+
   if (!Processed.insert(S).second)
     return false;
 
@@ -1849,23 +1866,22 @@ bool SCEVExpander::isHighCostExpansionHelper(
     }
   }
 
-  // Recurse past add expressions, which commonly occur in the
+  // HowManyLessThans uses a Max expression whenever the loop is not guarded by
+  // the exit condition.
+  if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S))
+    return true;
+
+  // Recurse past nary expressions, which commonly occur in the
   // BackedgeTakenCount. They may already exist in program code, and if not,
   // they are not too expensive rematerialize.
-  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
-    for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+  if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(S)) {
+    for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
          I != E; ++I) {
       if (isHighCostExpansionHelper(*I, L, Processed))
         return true;
     }
-    return false;
   }
 
-  // HowManyLessThans uses a Max expression whenever the loop is not guarded by
-  // the exit condition.
-  if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S))
-    return true;
-
   // If we haven't recognized an expensive SCEV pattern, assume it's an
   // expression produced by program code.
   return false;
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index e1744d1f2965..24cada3e5313 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -100,9 +100,10 @@ bool TargetTransformInfo::isLegalICmpImmediate(int64_t Imm) const {
 bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
                                                 int64_t BaseOffset,
                                                 bool HasBaseReg,
-                                                int64_t Scale) const {
+                                                int64_t Scale,
+                                                unsigned AddrSpace) const {
   return TTIImpl->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
-                                        Scale);
+                                        Scale, AddrSpace);
 }
 
 bool TargetTransformInfo::isLegalMaskedStore(Type *DataType,
@@ -118,9 +119,10 @@ bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType,
 int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
                                               int64_t BaseOffset,
                                               bool HasBaseReg,
-                                              int64_t Scale) const {
+                                              int64_t Scale,
+                                              unsigned AddrSpace) const {
   return TTIImpl->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
-                                       Scale);
+                                       Scale, AddrSpace);
 }
 
 bool TargetTransformInfo::isTruncateFree(Type *Ty1, Type *Ty2) const {
@@ -235,6 +237,13 @@ TargetTransformInfo::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
   return TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
 }
 
+unsigned TargetTransformInfo::getInterleavedMemoryOpCost(
+    unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+    unsigned Alignment, unsigned AddressSpace) const {
+  return TTIImpl->getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+                                             Alignment, AddressSpace);
+}
+
 unsigned
 TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
                                            ArrayRef<Type *> Tys) const {
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index a55712c6296a..c4f046340fce 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -2967,38 +2967,25 @@ static bool isDereferenceablePointer(const Value *V, const DataLayout &DL,
 
   // For GEPs, determine if the indexing lands within the allocated object.
   if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+    Type *VTy = GEP->getType();
+    Type *Ty = VTy->getPointerElementType();
+    const Value *Base = GEP->getPointerOperand();
+
     // Conservatively require that the base pointer be fully dereferenceable.
-    if (!Visited.insert(GEP->getOperand(0)).second)
+    if (!Visited.insert(Base).second)
       return false;
-    if (!isDereferenceablePointer(GEP->getOperand(0), DL, CtxI,
+    if (!isDereferenceablePointer(Base, DL, CtxI,
                                   DT, TLI, Visited))
       return false;
-    // Check the indices.
-    gep_type_iterator GTI = gep_type_begin(GEP);
-    for (User::const_op_iterator I = GEP->op_begin()+1,
-         E = GEP->op_end(); I != E; ++I) {
-      Value *Index = *I;
-      Type *Ty = *GTI++;
-      // Struct indices can't be out of bounds.
-      if (isa<StructType>(Ty))
-        continue;
-      ConstantInt *CI = dyn_cast<ConstantInt>(Index);
-      if (!CI)
-        return false;
-      // Zero is always ok.
-      if (CI->isZero())
-        continue;
-      // Check to see that it's within the bounds of an array.
-      ArrayType *ATy = dyn_cast<ArrayType>(Ty);
-      if (!ATy)
-        return false;
-      if (CI->getValue().getActiveBits() > 64)
-        return false;
-      if (CI->getZExtValue() >= ATy->getNumElements())
-        return false;
-    }
-    // Indices check out; this is dereferenceable.
-    return true;
+    
+    APInt Offset(DL.getPointerTypeSizeInBits(VTy), 0);
+    if (!GEP->accumulateConstantOffset(DL, Offset))
+      return false;
+    
+    // Check if the load is within the bounds of the underlying object.
+    uint64_t LoadSize = DL.getTypeStoreSize(Ty);
+    Type *BaseType = Base->getType()->getPointerElementType();
+    return (Offset + LoadSize).ule(DL.getTypeAllocSize(BaseType));
   }
 
   // For gc.relocate, look through relocations