56 files changed, 3161 insertions, 2409 deletions
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ADCE.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ADCE.cpp
index 6f3fdb88eda5..b693acceb3f6 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ADCE.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ADCE.cpp
@@ -538,7 +538,7 @@ bool AggressiveDeadCodeElimination::removeDeadInstructions() {
   // that have no side effects and do not influence the control flow or return
   // value of the function, and may therefore be deleted safely.
   // NOTE: We reuse the Worklist vector here for memory efficiency.
-  for (Instruction &I : instructions(F)) {
+  for (Instruction &I : llvm::reverse(instructions(F))) {
     // Check if the instruction is alive.
     if (isLive(&I))
       continue;
@@ -554,9 +554,11 @@ bool AggressiveDeadCodeElimination::removeDeadInstructions() {
     // Prepare to delete.
     Worklist.push_back(&I);
     salvageDebugInfo(I);
-    I.dropAllReferences();
   }
 
+  for (Instruction *&I : Worklist)
+    I->dropAllReferences();
+
   for (Instruction *&I : Worklist) {
     ++NumRemoved;
     I->eraseFromParent();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/BDCE.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/BDCE.cpp
index c06125788f37..6c2467db79f7 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/BDCE.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/BDCE.cpp
@@ -53,7 +53,7 @@ static void clearAssumptionsOfUsers(Instruction *I, DemandedBits &DB) {
     // in the def-use chain needs to be changed.
     auto *J = dyn_cast<Instruction>(JU);
     if (J && J->getType()->isIntOrIntVectorTy() &&
-        !DB.getDemandedBits(J).isAllOnesValue()) {
+        !DB.getDemandedBits(J).isAllOnes()) {
       Visited.insert(J);
       WorkList.push_back(J);
     }
@@ -84,7 +84,7 @@ static void clearAssumptionsOfUsers(Instruction *I, DemandedBits &DB) {
       // that in the def-use chain needs to be changed.
       auto *K = dyn_cast<Instruction>(KU);
       if (K && Visited.insert(K).second && K->getType()->isIntOrIntVectorTy() &&
-          !DB.getDemandedBits(K).isAllOnesValue())
+          !DB.getDemandedBits(K).isAllOnes())
         WorkList.push_back(K);
     }
   }
@@ -103,12 +103,9 @@ static bool bitTrackingDCE(Function &F, DemandedBits &DB) {
     // Remove instructions that are dead, either because they were not reached
     // during analysis or have no demanded bits.
     if (DB.isInstructionDead(&I) ||
-        (I.getType()->isIntOrIntVectorTy() &&
-         DB.getDemandedBits(&I).isNullValue() &&
+        (I.getType()->isIntOrIntVectorTy() && DB.getDemandedBits(&I).isZero() &&
          wouldInstructionBeTriviallyDead(&I))) {
-      salvageDebugInfo(I);
       Worklist.push_back(&I);
-      I.dropAllReferences();
       Changed = true;
       continue;
     }
@@ -155,6 +152,11 @@ static bool bitTrackingDCE(Function &F, DemandedBits &DB) {
     }
   }
 
+  for (Instruction *&I : llvm::reverse(Worklist)) {
+    salvageDebugInfo(*I);
+    I->dropAllReferences();
+  }
+
   for (Instruction *&I : Worklist) {
     ++NumRemoved;
     I->eraseFromParent();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
index 2eb94b721d96..95de59fa8262 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
@@ -467,7 +467,7 @@ static PredsWithCondsTy shouldSplitOnPredicatedArgument(CallBase &CB,
   BasicBlock *StopAt = CSDTNode ? CSDTNode->getIDom()->getBlock() : nullptr;
 
   SmallVector<std::pair<BasicBlock *, ConditionsTy>, 2> PredsCS;
-  for (auto *Pred : make_range(Preds.rbegin(), Preds.rend())) {
+  for (auto *Pred : llvm::reverse(Preds)) {
     ConditionsTy Conditions;
     // Record condition on edge BB(CS) <- Pred
     recordCondition(CB, Pred, CB.getParent(), Conditions);
@@ -505,8 +505,7 @@ static bool doCallSiteSplitting(Function &F, TargetLibraryInfo &TLI,
 
   DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Lazy);
   bool Changed = false;
-  for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE;) {
-    BasicBlock &BB = *BI++;
+  for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
     auto II = BB.getFirstNonPHIOrDbg()->getIterator();
     auto IE = BB.getTerminator()->getIterator();
     // Iterate until we reach the terminator instruction. tryToSplitCallSite
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
index 535f50d4f904..27f54f8026e1 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -762,7 +762,7 @@ void ConstantHoistingPass::emitBaseConstants(Instruction *Base,
       PointerType *Int8PtrTy = Type::getInt8PtrTy(*Ctx,
           cast<PointerType>(Ty)->getAddressSpace());
       Base = new BitCastInst(Base, Int8PtrTy, "base_bitcast", InsertionPt);
-      Mat = GetElementPtrInst::Create(Int8PtrTy->getElementType(), Base,
+      Mat = GetElementPtrInst::Create(Type::getInt8Ty(*Ctx), Base,
           Offset, "mat_gep", InsertionPt);
       Mat = new BitCastInst(Mat, Ty, "mat_bitcast", InsertionPt);
     } else
@@ -819,10 +819,9 @@ void ConstantHoistingPass::emitBaseConstants(Instruction *Base,
 
     // Aside from constant GEPs, only constant cast expressions are collected.
     assert(ConstExpr->isCast() && "ConstExpr should be a cast");
-    Instruction *ConstExprInst = ConstExpr->getAsInstruction();
+    Instruction *ConstExprInst = ConstExpr->getAsInstruction(
+        findMatInsertPt(ConstUser.Inst, ConstUser.OpndIdx));
     ConstExprInst->setOperand(0, Mat);
-    ConstExprInst->insertBefore(findMatInsertPt(ConstUser.Inst,
-                                                ConstUser.OpndIdx));
 
     // Use the same debug location as the instruction we are about to update.
     ConstExprInst->setDebugLoc(ConstUser.Inst->getDebugLoc());
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index efd1c025d0cd..7f2d5d7d9987 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -18,6 +18,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ConstraintSystem.h"
 #include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
@@ -268,6 +269,31 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT) {
       continue;
     WorkList.emplace_back(DT.getNode(&BB));
 
+    // True as long as long as the current instruction is guaranteed to execute.
+    bool GuaranteedToExecute = true;
+    // Scan BB for assume calls.
+    // TODO: also use this scan to queue conditions to simplify, so we can
+    // interleave facts from assumes and conditions to simplify in a single
+    // basic block. And to skip another traversal of each basic block when
+    // simplifying.
+    for (Instruction &I : BB) {
+      Value *Cond;
+      // For now, just handle assumes with a single compare as condition.
+      if (match(&I, m_Intrinsic<Intrinsic::assume>(m_Value(Cond))) &&
+          isa<CmpInst>(Cond)) {
+        if (GuaranteedToExecute) {
+          // The assume is guaranteed to execute when BB is entered, hence Cond
+          // holds on entry to BB.
+          WorkList.emplace_back(DT.getNode(&BB), cast<CmpInst>(Cond), false);
+        } else {
+          // Otherwise the condition only holds in the successors.
+          for (BasicBlock *Succ : successors(&BB))
+            WorkList.emplace_back(DT.getNode(Succ), cast<CmpInst>(Cond), false);
+        }
+      }
+      GuaranteedToExecute &= isGuaranteedToTransferExecutionToSuccessor(&I);
+    }
+
     auto *Br = dyn_cast<BranchInst>(BB.getTerminator());
     if (!Br || !Br->isConditional())
       continue;
@@ -395,8 +421,13 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT) {
             for (auto &E : reverse(DFSInStack))
               dbgs() << "   C " << *E.Condition << " " << E.IsNot << "\n";
           });
-          Cmp->replaceAllUsesWith(
-              ConstantInt::getTrue(F.getParent()->getContext()));
+          Cmp->replaceUsesWithIf(
+              ConstantInt::getTrue(F.getParent()->getContext()), [](Use &U) {
+                // Conditions in an assume trivially simplify to true. Skip uses
+                // in assume calls to not destroy the available information.
+                auto *II = dyn_cast<IntrinsicInst>(U.getUser());
+                return !II || II->getIntrinsicID() != Intrinsic::assume;
+              });
           NumCondsRemoved++;
           Changed = true;
         }
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 36cbd42a5fdd..ca9567dc7ac8 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -67,6 +67,7 @@ STATISTIC(NumUDivURemsNarrowed,
 STATISTIC(NumAShrs,     "Number of ashr converted to lshr");
 STATISTIC(NumSRems,     "Number of srem converted to urem");
 STATISTIC(NumSExt,      "Number of sext converted to zext");
+STATISTIC(NumSICmps,    "Number of signed icmp preds simplified to unsigned");
 STATISTIC(NumAnd,       "Number of ands removed");
 STATISTIC(NumNW,        "Number of no-wrap deductions");
 STATISTIC(NumNSW,       "Number of no-signed-wrap deductions");
@@ -295,11 +296,34 @@ static bool processMemAccess(Instruction *I, LazyValueInfo *LVI) {
   return true;
 }
 
+static bool processICmp(ICmpInst *Cmp, LazyValueInfo *LVI) {
+  // Only for signed relational comparisons of scalar integers.
+  if (Cmp->getType()->isVectorTy() ||
+      !Cmp->getOperand(0)->getType()->isIntegerTy())
+    return false;
+
+  if (!Cmp->isSigned())
+    return false;
+
+  ICmpInst::Predicate UnsignedPred =
+      ConstantRange::getEquivalentPredWithFlippedSignedness(
+          Cmp->getPredicate(), LVI->getConstantRange(Cmp->getOperand(0), Cmp),
+          LVI->getConstantRange(Cmp->getOperand(1), Cmp));
+
+  if (UnsignedPred == ICmpInst::Predicate::BAD_ICMP_PREDICATE)
+    return false;
+
+  ++NumSICmps;
+  Cmp->setPredicate(UnsignedPred);
+
+  return true;
+}
+
 /// See if LazyValueInfo's ability to exploit edge conditions or range
 /// information is sufficient to prove this comparison. Even for local
 /// conditions, this can sometimes prove conditions instcombine can't by
 /// exploiting range information.
-static bool processCmp(CmpInst *Cmp, LazyValueInfo *LVI) {
+static bool constantFoldCmp(CmpInst *Cmp, LazyValueInfo *LVI) {
   Value *Op0 = Cmp->getOperand(0);
   auto *C = dyn_cast<Constant>(Cmp->getOperand(1));
   if (!C)
@@ -318,6 +342,17 @@ static bool processCmp(CmpInst *Cmp, LazyValueInfo *LVI) {
   return true;
 }
 
+static bool processCmp(CmpInst *Cmp, LazyValueInfo *LVI) {
+  if (constantFoldCmp(Cmp, LVI))
+    return true;
+
+  if (auto *ICmp = dyn_cast<ICmpInst>(Cmp))
+    if (processICmp(ICmp, LVI))
+      return true;
+
+  return false;
+}
+
 /// Simplify a switch instruction by removing cases which can never fire. If the
 /// uselessness of a case could be determined locally then constant propagation
 /// would already have figured it out. Instead, walk the predecessors and
@@ -341,7 +376,13 @@ static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI,
     // ConstantFoldTerminator() as the underlying SwitchInst can be changed.
     SwitchInstProfUpdateWrapper SI(*I);
 
-    for (auto CI = SI->case_begin(), CE = SI->case_end(); CI != CE;) {
+    APInt Low =
+        APInt::getSignedMaxValue(Cond->getType()->getScalarSizeInBits());
+    APInt High =
+        APInt::getSignedMinValue(Cond->getType()->getScalarSizeInBits());
+
+    SwitchInst::CaseIt CI = SI->case_begin();
+    for (auto CE = SI->case_end(); CI != CE;) {
       ConstantInt *Case = CI->getCaseValue();
       LazyValueInfo::Tristate State =
           LVI->getPredicateAt(CmpInst::ICMP_EQ, Cond, Case, I,
@@ -374,9 +415,28 @@ static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI,
         break;
       }
 
+      // Get Lower/Upper bound from switch cases.
+      Low = APIntOps::smin(Case->getValue(), Low);
+      High = APIntOps::smax(Case->getValue(), High);
+
       // Increment the case iterator since we didn't delete it.
       ++CI;
     }
+
+    // Try to simplify default case as unreachable
+    if (CI == SI->case_end() && SI->getNumCases() != 0 &&
+        !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg())) {
+      const ConstantRange SIRange =
+          LVI->getConstantRange(SI->getCondition(), SI);
+
+      // If the numbered switch cases cover the entire range of the condition,
+      // then the default case is not reachable.
+      if (SIRange.getSignedMin() == Low && SIRange.getSignedMax() == High &&
+          SI->getNumCases() == High - Low + 1) {
+        createUnreachableSwitchDefault(SI, &DTU);
+        Changed = true;
+      }
+    }
   }
 
   if (Changed)
@@ -690,7 +750,7 @@ static bool narrowSDivOrSRem(BinaryOperator *Instr, LazyValueInfo *LVI) {
 
   // sdiv/srem is UB if divisor is -1 and divident is INT_MIN, so unless we can
   // prove that such a combination is impossible, we need to bump the bitwidth.
-  if (CRs[1]->contains(APInt::getAllOnesValue(OrigWidth)) &&
+  if (CRs[1]->contains(APInt::getAllOnes(OrigWidth)) &&
       CRs[0]->contains(
           APInt::getSignedMinValue(MinSignedBits).sextOrSelf(OrigWidth)))
     ++MinSignedBits;
@@ -1023,49 +1083,48 @@ static bool runImpl(Function &F, LazyValueInfo *LVI, DominatorTree *DT,
   // blocks.
   for (BasicBlock *BB : depth_first(&F.getEntryBlock())) {
     bool BBChanged = false;
-    for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
-      Instruction *II = &*BI++;
-      switch (II->getOpcode()) {
+    for (Instruction &II : llvm::make_early_inc_range(*BB)) {
+      switch (II.getOpcode()) {
       case Instruction::Select:
-        BBChanged |= processSelect(cast<SelectInst>(II), LVI);
+        BBChanged |= processSelect(cast<SelectInst>(&II), LVI);
         break;
       case Instruction::PHI:
-        BBChanged |= processPHI(cast<PHINode>(II), LVI, DT, SQ);
+        BBChanged |= processPHI(cast<PHINode>(&II), LVI, DT, SQ);
         break;
       case Instruction::ICmp:
       case Instruction::FCmp:
-        BBChanged |= processCmp(cast<CmpInst>(II), LVI);
+        BBChanged |= processCmp(cast<CmpInst>(&II), LVI);
         break;
       case Instruction::Load:
       case Instruction::Store:
-        BBChanged |= processMemAccess(II, LVI);
+        BBChanged |= processMemAccess(&II, LVI);
         break;
       case Instruction::Call:
       case Instruction::Invoke:
-        BBChanged |= processCallSite(cast<CallBase>(*II), LVI);
+        BBChanged |= processCallSite(cast<CallBase>(II), LVI);
         break;
       case Instruction::SRem:
       case Instruction::SDiv:
-        BBChanged |= processSDivOrSRem(cast<BinaryOperator>(II), LVI);
+        BBChanged |= processSDivOrSRem(cast<BinaryOperator>(&II), LVI);
         break;
       case Instruction::UDiv:
       case Instruction::URem:
-        BBChanged |= processUDivOrURem(cast<BinaryOperator>(II), LVI);
+        BBChanged |= processUDivOrURem(cast<BinaryOperator>(&II), LVI);
         break;
       case Instruction::AShr:
-        BBChanged |= processAShr(cast<BinaryOperator>(II), LVI);
+        BBChanged |= processAShr(cast<BinaryOperator>(&II), LVI);
         break;
       case Instruction::SExt:
-        BBChanged |= processSExt(cast<SExtInst>(II), LVI);
+        BBChanged |= processSExt(cast<SExtInst>(&II), LVI);
         break;
       case Instruction::Add:
       case Instruction::Sub:
       case Instruction::Mul:
       case Instruction::Shl:
-        BBChanged |= processBinOp(cast<BinaryOperator>(II), LVI);
+        BBChanged |= processBinOp(cast<BinaryOperator>(&II), LVI);
         break;
       case Instruction::And:
-        BBChanged |= processAnd(cast<BinaryOperator>(II), LVI);
+        BBChanged |= processAnd(cast<BinaryOperator>(&II), LVI);
         break;
       }
     }
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
index 90679bcac4b7..8c4523206070 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
@@ -1,9 +1,8 @@
 //===- DFAJumpThreading.cpp - Threads a switch statement inside a loop ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -84,8 +83,6 @@
 #include "llvm/Transforms/Utils/ValueMapper.h"
 #include <algorithm>
 #include <deque>
-#include <unordered_map>
-#include <unordered_set>
 
 using namespace llvm;
 
@@ -147,8 +144,7 @@ private:
       Stack.push_back(SIToUnfold);
 
     while (!Stack.empty()) {
-      SelectInstToUnfold SIToUnfold = Stack.back();
-      Stack.pop_back();
+      SelectInstToUnfold SIToUnfold = Stack.pop_back_val();
 
       std::vector<SelectInstToUnfold> NewSIsToUnfold;
       std::vector<BasicBlock *> NewBBs;
@@ -174,6 +170,7 @@ public:
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<AssumptionCacheTracker>();
     AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addPreserved<DominatorTreeWrapperPass>();
     AU.addRequired<TargetTransformInfoWrapperPass>();
     AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
   }
@@ -350,7 +347,7 @@ struct ClonedBlock {
 
 typedef std::deque<BasicBlock *> PathType;
 typedef std::vector<PathType> PathsType;
-typedef std::set<const BasicBlock *> VisitedBlocks;
+typedef SmallPtrSet<const BasicBlock *, 8> VisitedBlocks;
 typedef std::vector<ClonedBlock> CloneList;
 
 // This data structure keeps track of all blocks that have been cloned.  If two
@@ -493,7 +490,7 @@ private:
   }
 
   bool isPredictableValue(Value *InpVal, SmallSet<Value *, 16> &SeenValues) {
-    if (SeenValues.find(InpVal) != SeenValues.end())
+    if (SeenValues.contains(InpVal))
       return true;
 
     if (isa<ConstantInt>(InpVal))
@@ -508,7 +505,7 @@ private:
 
   void addInstToQueue(Value *Val, std::deque<Instruction *> &Q,
                       SmallSet<Value *, 16> &SeenValues) {
-    if (SeenValues.find(Val) != SeenValues.end())
+    if (SeenValues.contains(Val))
       return;
     if (Instruction *I = dyn_cast<Instruction>(Val))
       Q.push_back(I);
@@ -533,7 +530,7 @@ private:
       return false;
 
     if (isa<PHINode>(SIUse) &&
-        SIBB->getSingleSuccessor() != dyn_cast<Instruction>(SIUse)->getParent())
+        SIBB->getSingleSuccessor() != cast<Instruction>(SIUse)->getParent())
       return false;
 
     // If select will not be sunk during unfolding, and it is in the same basic
@@ -621,13 +618,9 @@ private:
     // Some blocks have multiple edges to the same successor, and this set
     // is used to prevent a duplicate path from being generated
     SmallSet<BasicBlock *, 4> Successors;
-
-    for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) {
-      BasicBlock *Succ = *SI;
-
-      if (Successors.find(Succ) != Successors.end())
+    for (BasicBlock *Succ : successors(BB)) {
+      if (!Successors.insert(Succ).second)
         continue;
-      Successors.insert(Succ);
 
       // Found a cycle through the SwitchBlock
       if (Succ == SwitchBlock) {
@@ -636,7 +629,7 @@ private:
       }
 
       // We have encountered a cycle, do not get caught in it
-      if (Visited.find(Succ) != Visited.end())
+      if (Visited.contains(Succ))
         continue;
 
       PathsType SuccPaths = paths(Succ, Visited, PathDepth + 1);
@@ -668,15 +661,14 @@ private:
     SmallSet<Value *, 16> SeenValues;
 
     while (!Stack.empty()) {
-      PHINode *CurPhi = Stack.back();
-      Stack.pop_back();
+      PHINode *CurPhi = Stack.pop_back_val();
 
       Res[CurPhi->getParent()] = CurPhi;
       SeenValues.insert(CurPhi);
 
       for (Value *Incoming : CurPhi->incoming_values()) {
         if (Incoming == FirstDef || isa<ConstantInt>(Incoming) ||
-            SeenValues.find(Incoming) != SeenValues.end()) {
+            SeenValues.contains(Incoming)) {
           continue;
         }
 
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index d22b3f409585..a8ec8bb97970 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -13,10 +13,10 @@
 // in between both MemoryDefs. A bit more concretely:
 //
 // For all MemoryDefs StartDef:
-// 1. Get the next dominating clobbering MemoryDef (EarlierAccess) by walking
+// 1. Get the next dominating clobbering MemoryDef (MaybeDeadAccess) by walking
 //    upwards.
-// 2. Check that there are no reads between EarlierAccess and the StartDef by
-//    checking all uses starting at EarlierAccess and walking until we see
+// 2. Check that there are no reads between MaybeDeadAccess and the StartDef by
+//    checking all uses starting at MaybeDeadAccess and walking until we see
 //    StartDef.
 // 3. For each found CurrentDef, check that:
 //   1. There are no barrier instructions between CurrentDef and StartDef (like
@@ -56,6 +56,7 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instruction.h"
@@ -78,6 +79,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include <algorithm>
 #include <cassert>
@@ -122,7 +124,7 @@ EnablePartialStoreMerging("enable-dse-partial-store-merging",
 static cl::opt<unsigned>
     MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden,
                        cl::desc("The number of memory instructions to scan for "
-                                "dead store elimination (default = 100)"));
+                                "dead store elimination (default = 150)"));
 static cl::opt<unsigned> MemorySSAUpwardsStepLimit(
     "dse-memoryssa-walklimit", cl::init(90), cl::Hidden,
     cl::desc("The maximum number of steps while walking upwards to find "
@@ -203,39 +205,6 @@ static bool hasAnalyzableMemoryWrite(Instruction *I,
   return false;
 }
 
-/// Return a Location stored to by the specified instruction. If isRemovable
-/// returns true, this function and getLocForRead completely describe the memory
-/// operations for this instruction.
-static MemoryLocation getLocForWrite(Instruction *Inst,
-                                     const TargetLibraryInfo &TLI) {
-  if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
-    return MemoryLocation::get(SI);
-
-  // memcpy/memmove/memset.
-  if (auto *MI = dyn_cast<AnyMemIntrinsic>(Inst))
-    return MemoryLocation::getForDest(MI);
-
-  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
-    switch (II->getIntrinsicID()) {
-    default:
-      return MemoryLocation(); // Unhandled intrinsic.
-    case Intrinsic::init_trampoline:
-      return MemoryLocation::getAfter(II->getArgOperand(0));
-    case Intrinsic::masked_store:
-      return MemoryLocation::getForArgument(II, 1, TLI);
-    case Intrinsic::lifetime_end: {
-      uint64_t Len = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue();
-      return MemoryLocation(II->getArgOperand(1), Len);
-    }
-    }
-  }
-  if (auto *CB = dyn_cast<CallBase>(Inst))
-    // All the supported TLI functions so far happen to have dest as their
-    // first argument.
-    return MemoryLocation::getAfter(CB->getArgOperand(0));
-  return MemoryLocation();
-}
-
 /// If the value of this instruction and the memory it writes to is unused, may
 /// we delete this instruction?
 static bool isRemovable(Instruction *I) {
@@ -333,147 +302,146 @@ enum OverwriteResult {
 } // end anonymous namespace
 
 /// Check if two instruction are masked stores that completely
-/// overwrite one another. More specifically, \p Later has to
-/// overwrite \p Earlier.
-static OverwriteResult isMaskedStoreOverwrite(const Instruction *Later,
-                                              const Instruction *Earlier,
+/// overwrite one another. More specifically, \p KillingI has to
+/// overwrite \p DeadI.
+static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI,
+                                              const Instruction *DeadI,
                                               BatchAAResults &AA) {
-  const auto *IIL = dyn_cast<IntrinsicInst>(Later);
-  const auto *IIE = dyn_cast<IntrinsicInst>(Earlier);
-  if (IIL == nullptr || IIE == nullptr)
+  const auto *KillingII = dyn_cast<IntrinsicInst>(KillingI);
+  const auto *DeadII = dyn_cast<IntrinsicInst>(DeadI);
+  if (KillingII == nullptr || DeadII == nullptr)
     return OW_Unknown;
-  if (IIL->getIntrinsicID() != Intrinsic::masked_store ||
-      IIE->getIntrinsicID() != Intrinsic::masked_store)
+  if (KillingII->getIntrinsicID() != Intrinsic::masked_store ||
+      DeadII->getIntrinsicID() != Intrinsic::masked_store)
     return OW_Unknown;
   // Pointers.
-  Value *LP = IIL->getArgOperand(1)->stripPointerCasts();
-  Value *EP = IIE->getArgOperand(1)->stripPointerCasts();
-  if (LP != EP && !AA.isMustAlias(LP, EP))
+  Value *KillingPtr = KillingII->getArgOperand(1)->stripPointerCasts();
+  Value *DeadPtr = DeadII->getArgOperand(1)->stripPointerCasts();
+  if (KillingPtr != DeadPtr && !AA.isMustAlias(KillingPtr, DeadPtr))
     return OW_Unknown;
   // Masks.
-  // TODO: check that Later's mask is a superset of the Earlier's mask.
-  if (IIL->getArgOperand(3) != IIE->getArgOperand(3))
+  // TODO: check that KillingII's mask is a superset of the DeadII's mask.
+  if (KillingII->getArgOperand(3) != DeadII->getArgOperand(3))
     return OW_Unknown;
   return OW_Complete;
 }
 
-/// Return 'OW_Complete' if a store to the 'Later' location completely
-/// overwrites a store to the 'Earlier' location, 'OW_End' if the end of the
-/// 'Earlier' location is completely overwritten by 'Later', 'OW_Begin' if the
-/// beginning of the 'Earlier' location is overwritten by 'Later'.
-/// 'OW_PartialEarlierWithFullLater' means that an earlier (big) store was
-/// overwritten by a latter (smaller) store which doesn't write outside the big
+/// Return 'OW_Complete' if a store to the 'KillingLoc' location completely
+/// overwrites a store to the 'DeadLoc' location, 'OW_End' if the end of the
+/// 'DeadLoc' location is completely overwritten by 'KillingLoc', 'OW_Begin'
+/// if the beginning of the 'DeadLoc' location is overwritten by 'KillingLoc'.
+/// 'OW_PartialEarlierWithFullLater' means that a dead (big) store was
+/// overwritten by a killing (smaller) store which doesn't write outside the big
 /// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.
-/// NOTE: This function must only be called if both \p Later and \p Earlier
-/// write to the same underlying object with valid \p EarlierOff and \p
-/// LaterOff.
-static OverwriteResult isPartialOverwrite(const MemoryLocation &Later,
-                                          const MemoryLocation &Earlier,
-                                          int64_t EarlierOff, int64_t LaterOff,
-                                          Instruction *DepWrite,
+/// NOTE: This function must only be called if both \p KillingLoc and \p
+/// DeadLoc belong to the same underlying object with valid \p KillingOff and
+/// \p DeadOff.
+static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc,
+                                          const MemoryLocation &DeadLoc,
+                                          int64_t KillingOff, int64_t DeadOff,
+                                          Instruction *DeadI,
                                           InstOverlapIntervalsTy &IOL) {
-  const uint64_t LaterSize = Later.Size.getValue();
-  const uint64_t EarlierSize = Earlier.Size.getValue();
+  const uint64_t KillingSize = KillingLoc.Size.getValue();
+  const uint64_t DeadSize = DeadLoc.Size.getValue();
   // We may now overlap, although the overlap is not complete. There might also
   // be other incomplete overlaps, and together, they might cover the complete
-  // earlier write.
+  // dead store.
   // Note: The correctness of this logic depends on the fact that this function
   // is not even called providing DepWrite when there are any intervening reads.
   if (EnablePartialOverwriteTracking &&
-      LaterOff < int64_t(EarlierOff + EarlierSize) &&
-      int64_t(LaterOff + LaterSize) >= EarlierOff) {
+      KillingOff < int64_t(DeadOff + DeadSize) &&
+      int64_t(KillingOff + KillingSize) >= DeadOff) {
 
     // Insert our part of the overlap into the map.
-    auto &IM = IOL[DepWrite];
-    LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: Earlier [" << EarlierOff
-                      << ", " << int64_t(EarlierOff + EarlierSize)
-                      << ") Later [" << LaterOff << ", "
-                      << int64_t(LaterOff + LaterSize) << ")\n");
+    auto &IM = IOL[DeadI];
+    LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: DeadLoc [" << DeadOff << ", "
+                      << int64_t(DeadOff + DeadSize) << ") KillingLoc ["
+                      << KillingOff << ", " << int64_t(KillingOff + KillingSize)
+                      << ")\n");
 
     // Make sure that we only insert non-overlapping intervals and combine
     // adjacent intervals. The intervals are stored in the map with the ending
     // offset as the key (in the half-open sense) and the starting offset as
     // the value.
-    int64_t LaterIntStart = LaterOff, LaterIntEnd = LaterOff + LaterSize;
+    int64_t KillingIntStart = KillingOff;
+    int64_t KillingIntEnd = KillingOff + KillingSize;
 
-    // Find any intervals ending at, or after, LaterIntStart which start
-    // before LaterIntEnd.
-    auto ILI = IM.lower_bound(LaterIntStart);
-    if (ILI != IM.end() && ILI->second <= LaterIntEnd) {
+    // Find any intervals ending at, or after, KillingIntStart which start
+    // before KillingIntEnd.
+    auto ILI = IM.lower_bound(KillingIntStart);
+    if (ILI != IM.end() && ILI->second <= KillingIntEnd) {
       // This existing interval is overlapped with the current store somewhere
-      // in [LaterIntStart, LaterIntEnd]. Merge them by erasing the existing
+      // in [KillingIntStart, KillingIntEnd]. Merge them by erasing the existing
       // intervals and adjusting our start and end.
-      LaterIntStart = std::min(LaterIntStart, ILI->second);
-      LaterIntEnd = std::max(LaterIntEnd, ILI->first);
+      KillingIntStart = std::min(KillingIntStart, ILI->second);
+      KillingIntEnd = std::max(KillingIntEnd, ILI->first);
       ILI = IM.erase(ILI);
 
       // Continue erasing and adjusting our end in case other previous
       // intervals are also overlapped with the current store.
       //
-      // |--- ealier 1 ---|  |--- ealier 2 ---|
-      //     |------- later---------|
+      // |--- dead 1 ---|  |--- dead 2 ---|
+      //     |------- killing---------|
       //
-      while (ILI != IM.end() && ILI->second <= LaterIntEnd) {
-        assert(ILI->second > LaterIntStart && "Unexpected interval");
-        LaterIntEnd = std::max(LaterIntEnd, ILI->first);
+      while (ILI != IM.end() && ILI->second <= KillingIntEnd) {
+        assert(ILI->second > KillingIntStart && "Unexpected interval");
+        KillingIntEnd = std::max(KillingIntEnd, ILI->first);
         ILI = IM.erase(ILI);
       }
     }
 
-    IM[LaterIntEnd] = LaterIntStart;
+    IM[KillingIntEnd] = KillingIntStart;
 
     ILI = IM.begin();
-    if (ILI->second <= EarlierOff &&
-        ILI->first >= int64_t(EarlierOff + EarlierSize)) {
-      LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: Earlier ["
-                        << EarlierOff << ", "
-                        << int64_t(EarlierOff + EarlierSize)
-                        << ") Composite Later [" << ILI->second << ", "
+    if (ILI->second <= DeadOff && ILI->first >= int64_t(DeadOff + DeadSize)) {
+      LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: DeadLoc ["
+                        << DeadOff << ", " << int64_t(DeadOff + DeadSize)
+                        << ") Composite KillingLoc [" << ILI->second << ", "
                         << ILI->first << ")\n");
       ++NumCompletePartials;
       return OW_Complete;
     }
   }
 
-  // Check for an earlier store which writes to all the memory locations that
-  // the later store writes to.
-  if (EnablePartialStoreMerging && LaterOff >= EarlierOff &&
-      int64_t(EarlierOff + EarlierSize) > LaterOff &&
-      uint64_t(LaterOff - EarlierOff) + LaterSize <= EarlierSize) {
-    LLVM_DEBUG(dbgs() << "DSE: Partial overwrite an earlier load ["
-                      << EarlierOff << ", "
-                      << int64_t(EarlierOff + EarlierSize)
-                      << ") by a later store [" << LaterOff << ", "
-                      << int64_t(LaterOff + LaterSize) << ")\n");
+  // Check for a dead store which writes to all the memory locations that
+  // the killing store writes to.
+  if (EnablePartialStoreMerging && KillingOff >= DeadOff &&
+      int64_t(DeadOff + DeadSize) > KillingOff &&
+      uint64_t(KillingOff - DeadOff) + KillingSize <= DeadSize) {
+    LLVM_DEBUG(dbgs() << "DSE: Partial overwrite a dead load [" << DeadOff
+                      << ", " << int64_t(DeadOff + DeadSize)
+                      << ") by a killing store [" << KillingOff << ", "
+                      << int64_t(KillingOff + KillingSize) << ")\n");
     // TODO: Maybe come up with a better name?
     return OW_PartialEarlierWithFullLater;
   }
 
-  // Another interesting case is if the later store overwrites the end of the
-  // earlier store.
+  // Another interesting case is if the killing store overwrites the end of the
+  // dead store.
   //
-  //      |--earlier--|
-  //                |--   later   --|
+  //      |--dead--|
+  //                |--   killing   --|
   //
-  // In this case we may want to trim the size of earlier to avoid generating
-  // writes to addresses which will definitely be overwritten later
+  // In this case we may want to trim the size of dead store to avoid
+  // generating stores to addresses which will definitely be overwritten killing
+  // store.
   if (!EnablePartialOverwriteTracking &&
-      (LaterOff > EarlierOff && LaterOff < int64_t(EarlierOff + EarlierSize) &&
-       int64_t(LaterOff + LaterSize) >= int64_t(EarlierOff + EarlierSize)))
+      (KillingOff > DeadOff && KillingOff < int64_t(DeadOff + DeadSize) &&
+       int64_t(KillingOff + KillingSize) >= int64_t(DeadOff + DeadSize)))
     return OW_End;
 
-  // Finally, we also need to check if the later store overwrites the beginning
-  // of the earlier store.
+  // Finally, we also need to check if the killing store overwrites the
+  // beginning of the dead store.
   //
-  //                |--earlier--|
-  //      |--   later   --|
+  //                |--dead--|
+  //      |--  killing  --|
   //
   // In this case we may want to move the destination address and trim the size
-  // of earlier to avoid generating writes to addresses which will definitely
-  // be overwritten later.
+  // of dead store to avoid generating stores to addresses which will definitely
+  // be overwritten killing store.
   if (!EnablePartialOverwriteTracking &&
-      (LaterOff <= EarlierOff && int64_t(LaterOff + LaterSize) > EarlierOff)) {
-    assert(int64_t(LaterOff + LaterSize) < int64_t(EarlierOff + EarlierSize) &&
+      (KillingOff <= DeadOff && int64_t(KillingOff + KillingSize) > DeadOff)) {
+    assert(int64_t(KillingOff + KillingSize) < int64_t(DeadOff + DeadSize) &&
            "Expect to be handled as OW_Complete");
     return OW_Begin;
   }
@@ -505,7 +473,12 @@ memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI,
   BasicBlock::iterator SecondBBI(SecondI);
   BasicBlock *FirstBB = FirstI->getParent();
   BasicBlock *SecondBB = SecondI->getParent();
-  MemoryLocation MemLoc = MemoryLocation::get(SecondI);
+  MemoryLocation MemLoc;
+  if (auto *MemSet = dyn_cast<MemSetInst>(SecondI))
+    MemLoc = MemoryLocation::getForDest(MemSet);
+  else
+    MemLoc = MemoryLocation::get(SecondI);
+
   auto *MemLocPtr = const_cast<Value *>(MemLoc.Ptr);
 
   // Start checking the SecondBB.
@@ -568,11 +541,11 @@ memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI,
   return true;
 }
 
-static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierStart,
-                         uint64_t &EarlierSize, int64_t LaterStart,
-                         uint64_t LaterSize, bool IsOverwriteEnd) {
-  auto *EarlierIntrinsic = cast<AnyMemIntrinsic>(EarlierWrite);
-  Align PrefAlign = EarlierIntrinsic->getDestAlign().valueOrOne();
+static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
+                         uint64_t &DeadSize, int64_t KillingStart,
+                         uint64_t KillingSize, bool IsOverwriteEnd) {
+  auto *DeadIntrinsic = cast<AnyMemIntrinsic>(DeadI);
+  Align PrefAlign = DeadIntrinsic->getDestAlign().valueOrOne();
 
   // We assume that memet/memcpy operates in chunks of the "largest" native
   // type size and aligned on the same value. That means optimal start and size
@@ -593,19 +566,19 @@ static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierStart,
   // Compute start and size of the region to remove. Make sure 'PrefAlign' is
   // maintained on the remaining store.
   if (IsOverwriteEnd) {
-    // Calculate required adjustment for 'LaterStart'in order to keep remaining
-    // store size aligned on 'PerfAlign'.
+    // Calculate required adjustment for 'KillingStart' in order to keep
+    // remaining store size aligned on 'PerfAlign'.
     uint64_t Off =
-        offsetToAlignment(uint64_t(LaterStart - EarlierStart), PrefAlign);
-    ToRemoveStart = LaterStart + Off;
-    if (EarlierSize <= uint64_t(ToRemoveStart - EarlierStart))
+        offsetToAlignment(uint64_t(KillingStart - DeadStart), PrefAlign);
+    ToRemoveStart = KillingStart + Off;
+    if (DeadSize <= uint64_t(ToRemoveStart - DeadStart))
       return false;
-    ToRemoveSize = EarlierSize - uint64_t(ToRemoveStart - EarlierStart);
+    ToRemoveSize = DeadSize - uint64_t(ToRemoveStart - DeadStart);
   } else {
-    ToRemoveStart = EarlierStart;
-    assert(LaterSize >= uint64_t(EarlierStart - LaterStart) &&
+    ToRemoveStart = DeadStart;
+    assert(KillingSize >= uint64_t(DeadStart - KillingStart) &&
            "Not overlapping accesses?");
-    ToRemoveSize = LaterSize - uint64_t(EarlierStart - LaterStart);
+    ToRemoveSize = KillingSize - uint64_t(DeadStart - KillingStart);
     // Calculate required adjustment for 'ToRemoveSize'in order to keep
     // start of the remaining store aligned on 'PerfAlign'.
     uint64_t Off = offsetToAlignment(ToRemoveSize, PrefAlign);
@@ -619,10 +592,10 @@ static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierStart,
   }
 
   assert(ToRemoveSize > 0 && "Shouldn't reach here if nothing to remove");
-  assert(EarlierSize > ToRemoveSize && "Can't remove more than original size");
+  assert(DeadSize > ToRemoveSize && "Can't remove more than original size");
 
-  uint64_t NewSize = EarlierSize - ToRemoveSize;
-  if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(EarlierWrite)) {
+  uint64_t NewSize = DeadSize - ToRemoveSize;
+  if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(DeadI)) {
     // When shortening an atomic memory intrinsic, the newly shortened
     // length must remain an integer multiple of the element size.
     const uint32_t ElementSize = AMI->getElementSizeInBytes();
@@ -631,65 +604,62 @@ static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierStart,
   }
 
   LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n  OW "
-                    << (IsOverwriteEnd ? "END" : "BEGIN") << ": "
-                    << *EarlierWrite << "\n  KILLER [" << ToRemoveStart << ", "
+                    << (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *DeadI
+                    << "\n  KILLER [" << ToRemoveStart << ", "
                     << int64_t(ToRemoveStart + ToRemoveSize) << ")\n");
 
-  Value *EarlierWriteLength = EarlierIntrinsic->getLength();
-  Value *TrimmedLength =
-      ConstantInt::get(EarlierWriteLength->getType(), NewSize);
-  EarlierIntrinsic->setLength(TrimmedLength);
-  EarlierIntrinsic->setDestAlignment(PrefAlign);
+  Value *DeadWriteLength = DeadIntrinsic->getLength();
+  Value *TrimmedLength = ConstantInt::get(DeadWriteLength->getType(), NewSize);
+  DeadIntrinsic->setLength(TrimmedLength);
+  DeadIntrinsic->setDestAlignment(PrefAlign);
 
   if (!IsOverwriteEnd) {
-    Value *OrigDest = EarlierIntrinsic->getRawDest();
+    Value *OrigDest = DeadIntrinsic->getRawDest();
     Type *Int8PtrTy =
-        Type::getInt8PtrTy(EarlierIntrinsic->getContext(),
+        Type::getInt8PtrTy(DeadIntrinsic->getContext(),
                            OrigDest->getType()->getPointerAddressSpace());
     Value *Dest = OrigDest;
     if (OrigDest->getType() != Int8PtrTy)
-      Dest = CastInst::CreatePointerCast(OrigDest, Int8PtrTy, "", EarlierWrite);
+      Dest = CastInst::CreatePointerCast(OrigDest, Int8PtrTy, "", DeadI);
     Value *Indices[1] = {
-        ConstantInt::get(EarlierWriteLength->getType(), ToRemoveSize)};
+        ConstantInt::get(DeadWriteLength->getType(), ToRemoveSize)};
     Instruction *NewDestGEP = GetElementPtrInst::CreateInBounds(
-        Type::getInt8Ty(EarlierIntrinsic->getContext()),
-        Dest, Indices, "", EarlierWrite);
-    NewDestGEP->setDebugLoc(EarlierIntrinsic->getDebugLoc());
+        Type::getInt8Ty(DeadIntrinsic->getContext()), Dest, Indices, "", DeadI);
+    NewDestGEP->setDebugLoc(DeadIntrinsic->getDebugLoc());
     if (NewDestGEP->getType() != OrigDest->getType())
       NewDestGEP = CastInst::CreatePointerCast(NewDestGEP, OrigDest->getType(),
-                                               "", EarlierWrite);
-    EarlierIntrinsic->setDest(NewDestGEP);
+                                               "", DeadI);
+    DeadIntrinsic->setDest(NewDestGEP);
   }
 
-  // Finally update start and size of earlier access.
+  // Finally update start and size of dead access.
   if (!IsOverwriteEnd)
-    EarlierStart += ToRemoveSize;
-  EarlierSize = NewSize;
+    DeadStart += ToRemoveSize;
+  DeadSize = NewSize;
 
   return true;
 }
 
-static bool tryToShortenEnd(Instruction *EarlierWrite,
-                            OverlapIntervalsTy &IntervalMap,
-                            int64_t &EarlierStart, uint64_t &EarlierSize) {
-  if (IntervalMap.empty() || !isShortenableAtTheEnd(EarlierWrite))
+static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap,
+                            int64_t &DeadStart, uint64_t &DeadSize) {
+  if (IntervalMap.empty() || !isShortenableAtTheEnd(DeadI))
     return false;
 
   OverlapIntervalsTy::iterator OII = --IntervalMap.end();
-  int64_t LaterStart = OII->second;
-  uint64_t LaterSize = OII->first - LaterStart;
+  int64_t KillingStart = OII->second;
+  uint64_t KillingSize = OII->first - KillingStart;
 
-  assert(OII->first - LaterStart >= 0 && "Size expected to be positive");
+  assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
 
-  if (LaterStart > EarlierStart &&
-      // Note: "LaterStart - EarlierStart" is known to be positive due to
+  if (KillingStart > DeadStart &&
+      // Note: "KillingStart - KillingStart" is known to be positive due to
       // preceding check.
-      (uint64_t)(LaterStart - EarlierStart) < EarlierSize &&
-      // Note: "EarlierSize - (uint64_t)(LaterStart - EarlierStart)" is known to
+      (uint64_t)(KillingStart - DeadStart) < DeadSize &&
+      // Note: "DeadSize - (uint64_t)(KillingStart - DeadStart)" is known to
       // be non negative due to preceding checks.
-      LaterSize >= EarlierSize - (uint64_t)(LaterStart - EarlierStart)) {
-    if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart,
-                     LaterSize, true)) {
+      KillingSize >= DeadSize - (uint64_t)(KillingStart - DeadStart)) {
+    if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
+                     true)) {
       IntervalMap.erase(OII);
       return true;
     }
@@ -697,28 +667,28 @@ static bool tryToShortenEnd(Instruction *EarlierWrite,
   return false;
 }
 
-static bool tryToShortenBegin(Instruction *EarlierWrite,
+static bool tryToShortenBegin(Instruction *DeadI,
                               OverlapIntervalsTy &IntervalMap,
-                              int64_t &EarlierStart, uint64_t &EarlierSize) {
-  if (IntervalMap.empty() || !isShortenableAtTheBeginning(EarlierWrite))
+                              int64_t &DeadStart, uint64_t &DeadSize) {
+  if (IntervalMap.empty() || !isShortenableAtTheBeginning(DeadI))
     return false;
 
   OverlapIntervalsTy::iterator OII = IntervalMap.begin();
-  int64_t LaterStart = OII->second;
-  uint64_t LaterSize = OII->first - LaterStart;
+  int64_t KillingStart = OII->second;
+  uint64_t KillingSize = OII->first - KillingStart;
 
-  assert(OII->first - LaterStart >= 0 && "Size expected to be positive");
+  assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
 
-  if (LaterStart <= EarlierStart &&
-      // Note: "EarlierStart - LaterStart" is known to be non negative due to
+  if (KillingStart <= DeadStart &&
+      // Note: "DeadStart - KillingStart" is known to be non negative due to
       // preceding check.
-      LaterSize > (uint64_t)(EarlierStart - LaterStart)) {
-    // Note: "LaterSize - (uint64_t)(EarlierStart - LaterStart)" is known to be
-    // positive due to preceding checks.
-    assert(LaterSize - (uint64_t)(EarlierStart - LaterStart) < EarlierSize &&
+      KillingSize > (uint64_t)(DeadStart - KillingStart)) {
+    // Note: "KillingSize - (uint64_t)(DeadStart - DeadStart)" is known to
+    // be positive due to preceding checks.
+    assert(KillingSize - (uint64_t)(DeadStart - KillingStart) < DeadSize &&
            "Should have been handled as OW_Complete");
-    if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart,
-                     LaterSize, false)) {
+    if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
+                     false)) {
       IntervalMap.erase(OII);
       return true;
     }
@@ -726,71 +696,48 @@ static bool tryToShortenBegin(Instruction *EarlierWrite,
   return false;
 }
 
-static bool removePartiallyOverlappedStores(const DataLayout &DL,
-                                            InstOverlapIntervalsTy &IOL,
-                                            const TargetLibraryInfo &TLI) {
-  bool Changed = false;
-  for (auto OI : IOL) {
-    Instruction *EarlierWrite = OI.first;
-    MemoryLocation Loc = getLocForWrite(EarlierWrite, TLI);
-    assert(isRemovable(EarlierWrite) && "Expect only removable instruction");
-
-    const Value *Ptr = Loc.Ptr->stripPointerCasts();
-    int64_t EarlierStart = 0;
-    uint64_t EarlierSize = Loc.Size.getValue();
-    GetPointerBaseWithConstantOffset(Ptr, EarlierStart, DL);
-    OverlapIntervalsTy &IntervalMap = OI.second;
-    Changed |=
-        tryToShortenEnd(EarlierWrite, IntervalMap, EarlierStart, EarlierSize);
-    if (IntervalMap.empty())
-      continue;
-    Changed |=
-        tryToShortenBegin(EarlierWrite, IntervalMap, EarlierStart, EarlierSize);
-  }
-  return Changed;
-}
-
-static Constant *tryToMergePartialOverlappingStores(
-    StoreInst *Earlier, StoreInst *Later, int64_t InstWriteOffset,
-    int64_t DepWriteOffset, const DataLayout &DL, BatchAAResults &AA,
-    DominatorTree *DT) {
-
-  if (Earlier && isa<ConstantInt>(Earlier->getValueOperand()) &&
-      DL.typeSizeEqualsStoreSize(Earlier->getValueOperand()->getType()) &&
-      Later && isa<ConstantInt>(Later->getValueOperand()) &&
-      DL.typeSizeEqualsStoreSize(Later->getValueOperand()->getType()) &&
-      memoryIsNotModifiedBetween(Earlier, Later, AA, DL, DT)) {
+static Constant *
+tryToMergePartialOverlappingStores(StoreInst *KillingI, StoreInst *DeadI,
+                                   int64_t KillingOffset, int64_t DeadOffset,
+                                   const DataLayout &DL, BatchAAResults &AA,
+                                   DominatorTree *DT) {
+
+  if (DeadI && isa<ConstantInt>(DeadI->getValueOperand()) &&
+      DL.typeSizeEqualsStoreSize(DeadI->getValueOperand()->getType()) &&
+      KillingI && isa<ConstantInt>(KillingI->getValueOperand()) &&
+      DL.typeSizeEqualsStoreSize(KillingI->getValueOperand()->getType()) &&
+      memoryIsNotModifiedBetween(DeadI, KillingI, AA, DL, DT)) {
     // If the store we find is:
     //   a) partially overwritten by the store to 'Loc'
-    //   b) the later store is fully contained in the earlier one and
+    //   b) the killing store is fully contained in the dead one and
     //   c) they both have a constant value
     //   d) none of the two stores need padding
-    // Merge the two stores, replacing the earlier store's value with a
+    // Merge the two stores, replacing the dead store's value with a
     // merge of both values.
     // TODO: Deal with other constant types (vectors, etc), and probably
     // some mem intrinsics (if needed)
 
-    APInt EarlierValue =
-        cast<ConstantInt>(Earlier->getValueOperand())->getValue();
-    APInt LaterValue = cast<ConstantInt>(Later->getValueOperand())->getValue();
-    unsigned LaterBits = LaterValue.getBitWidth();
-    assert(EarlierValue.getBitWidth() > LaterValue.getBitWidth());
-    LaterValue = LaterValue.zext(EarlierValue.getBitWidth());
+    APInt DeadValue = cast<ConstantInt>(DeadI->getValueOperand())->getValue();
+    APInt KillingValue =
+        cast<ConstantInt>(KillingI->getValueOperand())->getValue();
+    unsigned KillingBits = KillingValue.getBitWidth();
+    assert(DeadValue.getBitWidth() > KillingValue.getBitWidth());
+    KillingValue = KillingValue.zext(DeadValue.getBitWidth());
 
     // Offset of the smaller store inside the larger store
-    unsigned BitOffsetDiff = (InstWriteOffset - DepWriteOffset) * 8;
-    unsigned LShiftAmount = DL.isBigEndian() ? EarlierValue.getBitWidth() -
-                                                   BitOffsetDiff - LaterBits
-                                             : BitOffsetDiff;
-    APInt Mask = APInt::getBitsSet(EarlierValue.getBitWidth(), LShiftAmount,
-                                   LShiftAmount + LaterBits);
+    unsigned BitOffsetDiff = (KillingOffset - DeadOffset) * 8;
+    unsigned LShiftAmount =
+        DL.isBigEndian() ? DeadValue.getBitWidth() - BitOffsetDiff - KillingBits
+                         : BitOffsetDiff;
+    APInt Mask = APInt::getBitsSet(DeadValue.getBitWidth(), LShiftAmount,
+                                   LShiftAmount + KillingBits);
     // Clear the bits we'll be replacing, then OR with the smaller
     // store, shifted appropriately.
-    APInt Merged = (EarlierValue & ~Mask) | (LaterValue << LShiftAmount);
-    LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n  Earlier: " << *Earlier
-                      << "\n  Later: " << *Later
+    APInt Merged = (DeadValue & ~Mask) | (KillingValue << LShiftAmount);
+    LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n  Dead: " << *DeadI
+                      << "\n  Killing: " << *KillingI
                       << "\n  Merged Value: " << Merged << '\n');
-    return ConstantInt::get(Earlier->getValueOperand()->getType(), Merged);
+    return ConstantInt::get(DeadI->getValueOperand()->getType(), Merged);
   }
   return nullptr;
 }
@@ -819,14 +766,17 @@ bool isNoopIntrinsic(Instruction *I) {
 }
 
 // Check if we can ignore \p D for DSE.
-bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
+bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller,
+                const TargetLibraryInfo &TLI) {
   Instruction *DI = D->getMemoryInst();
   // Calls that only access inaccessible memory cannot read or write any memory
   // locations we consider for elimination.
   if (auto *CB = dyn_cast<CallBase>(DI))
-    if (CB->onlyAccessesInaccessibleMemory())
+    if (CB->onlyAccessesInaccessibleMemory()) {
+      if (isAllocLikeFn(DI, &TLI))
+        return false;
       return true;
-
+    }
   // We can eliminate stores to locations not visible to the caller across
   // throwing instructions.
   if (DI->mayThrow() && !DefVisibleToCaller)
@@ -841,7 +791,7 @@ bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
     return true;
 
   // Skip intrinsics that do not really read or modify memory.
-  if (isNoopIntrinsic(D->getMemoryInst()))
+  if (isNoopIntrinsic(DI))
     return true;
 
   return false;
@@ -850,6 +800,7 @@ bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
 struct DSEState {
   Function &F;
   AliasAnalysis &AA;
+  EarliestEscapeInfo EI;
 
   /// The single BatchAA instance that is used to cache AA queries. It will
   /// not be invalidated over the whole run. This is safe, because:
@@ -892,30 +843,29 @@ struct DSEState {
   /// basic block.
   DenseMap<BasicBlock *, InstOverlapIntervalsTy> IOLs;
 
+  // Class contains self-reference, make sure it's not copied/moved.
+  DSEState(const DSEState &) = delete;
+  DSEState &operator=(const DSEState &) = delete;
+
   DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
            PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
            const LoopInfo &LI)
-      : F(F), AA(AA), BatchAA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI),
-        DL(F.getParent()->getDataLayout()), LI(LI) {}
-
-  static DSEState get(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
-                      DominatorTree &DT, PostDominatorTree &PDT,
-                      const TargetLibraryInfo &TLI, const LoopInfo &LI) {
-    DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);
+      : F(F), AA(AA), EI(DT, LI), BatchAA(AA, &EI), MSSA(MSSA), DT(DT),
+        PDT(PDT), TLI(TLI), DL(F.getParent()->getDataLayout()), LI(LI) {
     // Collect blocks with throwing instructions not modeled in MemorySSA and
     // alloc-like objects.
     unsigned PO = 0;
     for (BasicBlock *BB : post_order(&F)) {
-      State.PostOrderNumbers[BB] = PO++;
+      PostOrderNumbers[BB] = PO++;
       for (Instruction &I : *BB) {
         MemoryAccess *MA = MSSA.getMemoryAccess(&I);
         if (I.mayThrow() && !MA)
-          State.ThrowingBlocks.insert(I.getParent());
+          ThrowingBlocks.insert(I.getParent());
 
         auto *MD = dyn_cast_or_null<MemoryDef>(MA);
-        if (MD && State.MemDefs.size() < MemorySSADefsPerBlockLimit &&
-            (State.getLocForWriteEx(&I) || State.isMemTerminatorInst(&I)))
-          State.MemDefs.push_back(MD);
+        if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit &&
+            (getLocForWriteEx(&I) || isMemTerminatorInst(&I)))
+          MemDefs.push_back(MD);
       }
     }
 
@@ -925,131 +875,134 @@ struct DSEState {
       if (AI.hasPassPointeeByValueCopyAttr()) {
         // For byval, the caller doesn't know the address of the allocation.
         if (AI.hasByValAttr())
-          State.InvisibleToCallerBeforeRet.insert({&AI, true});
-        State.InvisibleToCallerAfterRet.insert({&AI, true});
+          InvisibleToCallerBeforeRet.insert({&AI, true});
+        InvisibleToCallerAfterRet.insert({&AI, true});
       }
 
     // Collect whether there is any irreducible control flow in the function.
-    State.ContainsIrreducibleLoops = mayContainIrreducibleControl(F, &LI);
-
-    return State;
+    ContainsIrreducibleLoops = mayContainIrreducibleControl(F, &LI);
   }
 
-  /// Return 'OW_Complete' if a store to the 'Later' location (by \p LaterI
-  /// instruction) completely overwrites a store to the 'Earlier' location.
-  /// (by \p EarlierI instruction).
-  /// Return OW_MaybePartial if \p Later does not completely overwrite
-  /// \p Earlier, but they both write to the same underlying object. In that
-  /// case, use isPartialOverwrite to check if \p Later partially overwrites
-  /// \p Earlier. Returns 'OW_Unknown' if nothing can be determined.
-  OverwriteResult
-  isOverwrite(const Instruction *LaterI, const Instruction *EarlierI,
-              const MemoryLocation &Later, const MemoryLocation &Earlier,
-              int64_t &EarlierOff, int64_t &LaterOff) {
+  /// Return 'OW_Complete' if a store to the 'KillingLoc' location (by \p
+  /// KillingI instruction) completely overwrites a store to the 'DeadLoc'
+  /// location (by \p DeadI instruction).
+  /// Return OW_MaybePartial if \p KillingI does not completely overwrite
+  /// \p DeadI, but they both write to the same underlying object. In that
+  /// case, use isPartialOverwrite to check if \p KillingI partially overwrites
+  /// \p DeadI. Returns 'OW_Unknown' if nothing can be determined.
+  OverwriteResult isOverwrite(const Instruction *KillingI,
+                              const Instruction *DeadI,
+                              const MemoryLocation &KillingLoc,
+                              const MemoryLocation &DeadLoc,
+                              int64_t &KillingOff, int64_t &DeadOff) {
     // AliasAnalysis does not always account for loops. Limit overwrite checks
-    // to dependencies for which we can guarantee they are independant of any
+    // to dependencies for which we can guarantee they are independent of any
     // loops they are in.
-    if (!isGuaranteedLoopIndependent(EarlierI, LaterI, Earlier))
+    if (!isGuaranteedLoopIndependent(DeadI, KillingI, DeadLoc))
       return OW_Unknown;
 
     // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll
     // get imprecise values here, though (except for unknown sizes).
-    if (!Later.Size.isPrecise() || !Earlier.Size.isPrecise()) {
+    if (!KillingLoc.Size.isPrecise() || !DeadLoc.Size.isPrecise()) {
       // In case no constant size is known, try to an IR values for the number
       // of bytes written and check if they match.
-      const auto *LaterMemI = dyn_cast<MemIntrinsic>(LaterI);
-      const auto *EarlierMemI = dyn_cast<MemIntrinsic>(EarlierI);
-      if (LaterMemI && EarlierMemI) {
-        const Value *LaterV = LaterMemI->getLength();
-        const Value *EarlierV = EarlierMemI->getLength();
-        if (LaterV == EarlierV && BatchAA.isMustAlias(Earlier, Later))
+      const auto *KillingMemI = dyn_cast<MemIntrinsic>(KillingI);
+      const auto *DeadMemI = dyn_cast<MemIntrinsic>(DeadI);
+      if (KillingMemI && DeadMemI) {
+        const Value *KillingV = KillingMemI->getLength();
+        const Value *DeadV = DeadMemI->getLength();
+        if (KillingV == DeadV && BatchAA.isMustAlias(DeadLoc, KillingLoc))
           return OW_Complete;
       }
 
       // Masked stores have imprecise locations, but we can reason about them
       // to some extent.
-      return isMaskedStoreOverwrite(LaterI, EarlierI, BatchAA);
+      return isMaskedStoreOverwrite(KillingI, DeadI, BatchAA);
     }
 
-    const uint64_t LaterSize = Later.Size.getValue();
-    const uint64_t EarlierSize = Earlier.Size.getValue();
+    const uint64_t KillingSize = KillingLoc.Size.getValue();
+    const uint64_t DeadSize = DeadLoc.Size.getValue();
 
     // Query the alias information
-    AliasResult AAR = BatchAA.alias(Later, Earlier);
+    AliasResult AAR = BatchAA.alias(KillingLoc, DeadLoc);
 
     // If the start pointers are the same, we just have to compare sizes to see if
-    // the later store was larger than the earlier store.
+    // the killing store was larger than the dead store.
     if (AAR == AliasResult::MustAlias) {
-      // Make sure that the Later size is >= the Earlier size.
-      if (LaterSize >= EarlierSize)
+      // Make sure that the KillingSize size is >= the DeadSize size.
+      if (KillingSize >= DeadSize)
         return OW_Complete;
     }
 
     // If we hit a partial alias we may have a full overwrite
     if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) {
       int32_t Off = AAR.getOffset();
-      if (Off >= 0 && (uint64_t)Off + EarlierSize <= LaterSize)
+      if (Off >= 0 && (uint64_t)Off + DeadSize <= KillingSize)
         return OW_Complete;
     }
 
-    // Check to see if the later store is to the entire object (either a global,
-    // an alloca, or a byval/inalloca argument).  If so, then it clearly
+    // Check to see if the killing store is to the entire object (either a
+    // global, an alloca, or a byval/inalloca argument).  If so, then it clearly
     // overwrites any other store to the same object.
-    const Value *P1 = Earlier.Ptr->stripPointerCasts();
-    const Value *P2 = Later.Ptr->stripPointerCasts();
-    const Value *UO1 = getUnderlyingObject(P1), *UO2 = getUnderlyingObject(P2);
+    const Value *DeadPtr = DeadLoc.Ptr->stripPointerCasts();
+    const Value *KillingPtr = KillingLoc.Ptr->stripPointerCasts();
+    const Value *DeadUndObj = getUnderlyingObject(DeadPtr);
+    const Value *KillingUndObj = getUnderlyingObject(KillingPtr);
 
     // If we can't resolve the same pointers to the same object, then we can't
     // analyze them at all.
-    if (UO1 != UO2)
+    if (DeadUndObj != KillingUndObj)
       return OW_Unknown;
 
-    // If the "Later" store is to a recognizable object, get its size.
-    uint64_t ObjectSize = getPointerSize(UO2, DL, TLI, &F);
-    if (ObjectSize != MemoryLocation::UnknownSize)
-      if (ObjectSize == LaterSize && ObjectSize >= EarlierSize)
+    // If the KillingI store is to a recognizable object, get its size.
+    uint64_t KillingUndObjSize = getPointerSize(KillingUndObj, DL, TLI, &F);
+    if (KillingUndObjSize != MemoryLocation::UnknownSize)
+      if (KillingUndObjSize == KillingSize && KillingUndObjSize >= DeadSize)
         return OW_Complete;
 
     // Okay, we have stores to two completely different pointers.  Try to
     // decompose the pointer into a "base + constant_offset" form.  If the base
     // pointers are equal, then we can reason about the two stores.
-    EarlierOff = 0;
-    LaterOff = 0;
-    const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, DL);
-    const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, DL);
-
-    // If the base pointers still differ, we have two completely different stores.
-    if (BP1 != BP2)
+    DeadOff = 0;
+    KillingOff = 0;
+    const Value *DeadBasePtr =
+        GetPointerBaseWithConstantOffset(DeadPtr, DeadOff, DL);
+    const Value *KillingBasePtr =
+        GetPointerBaseWithConstantOffset(KillingPtr, KillingOff, DL);
+
+    // If the base pointers still differ, we have two completely different
+    // stores.
+    if (DeadBasePtr != KillingBasePtr)
       return OW_Unknown;
 
-    // The later access completely overlaps the earlier store if and only if
-    // both start and end of the earlier one is "inside" the later one:
-    //    |<->|--earlier--|<->|
-    //    |-------later-------|
+    // The killing access completely overlaps the dead store if and only if
+    // both start and end of the dead one is "inside" the killing one:
+    //    |<->|--dead--|<->|
+    //    |-----killing------|
     // Accesses may overlap if and only if start of one of them is "inside"
     // another one:
-    //    |<->|--earlier--|<----->|
-    //    |-------later-------|
+    //    |<->|--dead--|<-------->|
+    //    |-------killing--------|
     //           OR
-    //    |----- earlier -----|
-    //    |<->|---later---|<----->|
+    //    |-------dead-------|
+    //    |<->|---killing---|<----->|
     //
     // We have to be careful here as *Off is signed while *.Size is unsigned.
 
-    // Check if the earlier access starts "not before" the later one.
-    if (EarlierOff >= LaterOff) {
-      // If the earlier access ends "not after" the later access then the earlier
-      // one is completely overwritten by the later one.
-      if (uint64_t(EarlierOff - LaterOff) + EarlierSize <= LaterSize)
+    // Check if the dead access starts "not before" the killing one.
+    if (DeadOff >= KillingOff) {
+      // If the dead access ends "not after" the killing access then the
+      // dead one is completely overwritten by the killing one.
+      if (uint64_t(DeadOff - KillingOff) + DeadSize <= KillingSize)
         return OW_Complete;
-      // If start of the earlier access is "before" end of the later access then
-      // accesses overlap.
-      else if ((uint64_t)(EarlierOff - LaterOff) < LaterSize)
+      // If start of the dead access is "before" end of the killing access
+      // then accesses overlap.
+      else if ((uint64_t)(DeadOff - KillingOff) < KillingSize)
         return OW_MaybePartial;
     }
-    // If start of the later access is "before" end of the earlier access then
+    // If start of the killing access is "before" end of the dead access then
     // accesses overlap.
-    else if ((uint64_t)(LaterOff - EarlierOff) < EarlierSize) {
+    else if ((uint64_t)(KillingOff - DeadOff) < DeadSize) {
       return OW_MaybePartial;
     }
 
@@ -1106,8 +1059,13 @@ struct DSEState {
       LibFunc LF;
       if (TLI.getLibFunc(*CB, LF) && TLI.has(LF)) {
         switch (LF) {
-        case LibFunc_strcpy:
         case LibFunc_strncpy:
+          if (const auto *Len = dyn_cast<ConstantInt>(CB->getArgOperand(2)))
+            return MemoryLocation(CB->getArgOperand(0),
+                                  LocationSize::precise(Len->getZExtValue()),
+                                  CB->getAAMetadata());
+          LLVM_FALLTHROUGH;
+        case LibFunc_strcpy:
         case LibFunc_strcat:
         case LibFunc_strncat:
           return {MemoryLocation::getAfter(CB->getArgOperand(0))};
@@ -1145,8 +1103,8 @@ struct DSEState {
 
     int64_t InstWriteOffset, DepWriteOffset;
     if (auto CC = getLocForWriteEx(UseInst))
-      return isOverwrite(UseInst, DefInst, *CC, DefLoc, DepWriteOffset,
-                         InstWriteOffset) == OW_Complete;
+      return isOverwrite(UseInst, DefInst, *CC, DefLoc, InstWriteOffset,
+                         DepWriteOffset) == OW_Complete;
     return false;
   }
 
@@ -1248,9 +1206,10 @@ struct DSEState {
       const Value *LocUO = getUnderlyingObject(Loc.Ptr);
       return BatchAA.isMustAlias(TermLoc.Ptr, LocUO);
     }
-    int64_t InstWriteOffset, DepWriteOffset;
-    return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, DepWriteOffset,
-                       InstWriteOffset) == OW_Complete;
+    int64_t InstWriteOffset = 0;
+    int64_t DepWriteOffset = 0;
+    return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, InstWriteOffset,
+                       DepWriteOffset) == OW_Complete;
   }
 
   // Returns true if \p Use may read from \p DefLoc.
@@ -1270,10 +1229,6 @@ struct DSEState {
       if (CB->onlyAccessesInaccessibleMemory())
         return false;
 
-    // NOTE: For calls, the number of stores removed could be slightly improved
-    // by using AA.callCapturesBefore(UseInst, DefLoc, &DT), but that showed to
-    // be expensive compared to the benefits in practice. For now, avoid more
-    // expensive analysis to limit compile-time.
     return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc));
   }
 
@@ -1329,15 +1284,15 @@ struct DSEState {
     return IsGuaranteedLoopInvariantBase(Ptr);
   }
 
-  // Find a MemoryDef writing to \p DefLoc and dominating \p StartAccess, with
-  // no read access between them or on any other path to a function exit block
-  // if \p DefLoc is not accessible after the function returns. If there is no
-  // such MemoryDef, return None. The returned value may not (completely)
-  // overwrite \p DefLoc. Currently we bail out when we encounter an aliasing
-  // MemoryUse (read).
+  // Find a MemoryDef writing to \p KillingLoc and dominating \p StartAccess,
+  // with no read access between them or on any other path to a function exit
+  // block if \p KillingLoc is not accessible after the function returns. If
+  // there is no such MemoryDef, return None. The returned value may not
+  // (completely) overwrite \p KillingLoc. Currently we bail out when we
+  // encounter an aliasing MemoryUse (read).
   Optional<MemoryAccess *>
   getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess,
-                  const MemoryLocation &DefLoc, const Value *DefUO,
+                  const MemoryLocation &KillingLoc, const Value *KillingUndObj,
                   unsigned &ScanLimit, unsigned &WalkerStepLimit,
                   bool IsMemTerm, unsigned &PartialLimit) {
     if (ScanLimit == 0 || WalkerStepLimit == 0) {
@@ -1389,19 +1344,20 @@ struct DSEState {
       MemoryDef *CurrentDef = cast<MemoryDef>(Current);
       Instruction *CurrentI = CurrentDef->getMemoryInst();
 
-      if (canSkipDef(CurrentDef, !isInvisibleToCallerBeforeRet(DefUO)))
+      if (canSkipDef(CurrentDef, !isInvisibleToCallerBeforeRet(KillingUndObj),
+                     TLI))
         continue;
 
       // Before we try to remove anything, check for any extra throwing
       // instructions that block us from DSEing
-      if (mayThrowBetween(KillingI, CurrentI, DefUO)) {
+      if (mayThrowBetween(KillingI, CurrentI, KillingUndObj)) {
         LLVM_DEBUG(dbgs() << "  ... skip, may throw!\n");
         return None;
       }
 
       // Check for anything that looks like it will be a barrier to further
       // removal
-      if (isDSEBarrier(DefUO, CurrentI)) {
+      if (isDSEBarrier(KillingUndObj, CurrentI)) {
         LLVM_DEBUG(dbgs() << "  ... skip, barrier\n");
         return None;
       }
@@ -1410,14 +1366,14 @@ struct DSEState {
       // clobber, bail out, as the path is not profitable. We skip this check
       // for intrinsic calls, because the code knows how to handle memcpy
       // intrinsics.
-      if (!isa<IntrinsicInst>(CurrentI) && isReadClobber(DefLoc, CurrentI))
+      if (!isa<IntrinsicInst>(CurrentI) && isReadClobber(KillingLoc, CurrentI))
         return None;
 
       // Quick check if there are direct uses that are read-clobbers.
-      if (any_of(Current->uses(), [this, &DefLoc, StartAccess](Use &U) {
+      if (any_of(Current->uses(), [this, &KillingLoc, StartAccess](Use &U) {
             if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(U.getUser()))
               return !MSSA.dominates(StartAccess, UseOrDef) &&
-                     isReadClobber(DefLoc, UseOrDef->getMemoryInst());
+                     isReadClobber(KillingLoc, UseOrDef->getMemoryInst());
             return false;
           })) {
         LLVM_DEBUG(dbgs() << "   ...  found a read clobber\n");
@@ -1450,9 +1406,10 @@ struct DSEState {
         if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI))
           continue;
       } else {
-        int64_t InstWriteOffset, DepWriteOffset;
-        auto OR = isOverwrite(KillingI, CurrentI, DefLoc, *CurrentLoc,
-                              DepWriteOffset, InstWriteOffset);
+        int64_t KillingOffset = 0;
+        int64_t DeadOffset = 0;
+        auto OR = isOverwrite(KillingI, CurrentI, KillingLoc, *CurrentLoc,
+                              KillingOffset, DeadOffset);
         // If Current does not write to the same object as KillingDef, check
         // the next candidate.
         if (OR == OW_Unknown)
@@ -1473,30 +1430,25 @@ struct DSEState {
     };
 
     // Accesses to objects accessible after the function returns can only be
-    // eliminated if the access is killed along all paths to the exit. Collect
+    // eliminated if the access is dead along all paths to the exit. Collect
     // the blocks with killing (=completely overwriting MemoryDefs) and check if
-    // they cover all paths from EarlierAccess to any function exit.
+    // they cover all paths from MaybeDeadAccess to any function exit.
     SmallPtrSet<Instruction *, 16> KillingDefs;
     KillingDefs.insert(KillingDef->getMemoryInst());
-    MemoryAccess *EarlierAccess = Current;
-    Instruction *EarlierMemInst =
-        cast<MemoryDef>(EarlierAccess)->getMemoryInst();
-    LLVM_DEBUG(dbgs() << "  Checking for reads of " << *EarlierAccess << " ("
-                      << *EarlierMemInst << ")\n");
+    MemoryAccess *MaybeDeadAccess = Current;
+    MemoryLocation MaybeDeadLoc = *CurrentLoc;
+    Instruction *MaybeDeadI = cast<MemoryDef>(MaybeDeadAccess)->getMemoryInst();
+    LLVM_DEBUG(dbgs() << "  Checking for reads of " << *MaybeDeadAccess << " ("
+                      << *MaybeDeadI << ")\n");
 
     SmallSetVector<MemoryAccess *, 32> WorkList;
     auto PushMemUses = [&WorkList](MemoryAccess *Acc) {
       for (Use &U : Acc->uses())
         WorkList.insert(cast<MemoryAccess>(U.getUser()));
     };
-    PushMemUses(EarlierAccess);
-
-    // Optimistically collect all accesses for reads. If we do not find any
-    // read clobbers, add them to the cache.
-    SmallPtrSet<MemoryAccess *, 16> KnownNoReads;
-    if (!EarlierMemInst->mayReadFromMemory())
-      KnownNoReads.insert(EarlierAccess);
-    // Check if EarlierDef may be read.
+    PushMemUses(MaybeDeadAccess);
+
+    // Check if DeadDef may be read.
     for (unsigned I = 0; I < WorkList.size(); I++) {
       MemoryAccess *UseAccess = WorkList[I];
 
@@ -1508,7 +1460,6 @@ struct DSEState {
       }
       --ScanLimit;
       NumDomMemDefChecks++;
-      KnownNoReads.insert(UseAccess);
 
       if (isa<MemoryPhi>(UseAccess)) {
         if (any_of(KillingDefs, [this, UseAccess](Instruction *KI) {
@@ -1535,7 +1486,7 @@ struct DSEState {
 
       // A memory terminator kills all preceeding MemoryDefs and all succeeding
       // MemoryAccesses. We do not have to check it's users.
-      if (isMemTerminator(*CurrentLoc, EarlierMemInst, UseInst)) {
+      if (isMemTerminator(MaybeDeadLoc, MaybeDeadI, UseInst)) {
         LLVM_DEBUG(
             dbgs()
             << " ... skipping, memterminator invalidates following accesses\n");
@@ -1548,14 +1499,14 @@ struct DSEState {
         continue;
       }
 
-      if (UseInst->mayThrow() && !isInvisibleToCallerBeforeRet(DefUO)) {
+      if (UseInst->mayThrow() && !isInvisibleToCallerBeforeRet(KillingUndObj)) {
         LLVM_DEBUG(dbgs() << "  ... found throwing instruction\n");
         return None;
       }
 
       // Uses which may read the original MemoryDef mean we cannot eliminate the
       // original MD. Stop walk.
-      if (isReadClobber(*CurrentLoc, UseInst)) {
+      if (isReadClobber(MaybeDeadLoc, UseInst)) {
         LLVM_DEBUG(dbgs() << "    ... found read clobber\n");
         return None;
       }
@@ -1563,16 +1514,16 @@ struct DSEState {
       // If this worklist walks back to the original memory access (and the
       // pointer is not guarenteed loop invariant) then we cannot assume that a
       // store kills itself.
-      if (EarlierAccess == UseAccess &&
-          !isGuaranteedLoopInvariant(CurrentLoc->Ptr)) {
+      if (MaybeDeadAccess == UseAccess &&
+          !isGuaranteedLoopInvariant(MaybeDeadLoc.Ptr)) {
         LLVM_DEBUG(dbgs() << "    ... found not loop invariant self access\n");
         return None;
       }
-      // Otherwise, for the KillingDef and EarlierAccess we only have to check
+      // Otherwise, for the KillingDef and MaybeDeadAccess we only have to check
       // if it reads the memory location.
       // TODO: It would probably be better to check for self-reads before
       // calling the function.
-      if (KillingDef == UseAccess || EarlierAccess == UseAccess) {
+      if (KillingDef == UseAccess || MaybeDeadAccess == UseAccess) {
         LLVM_DEBUG(dbgs() << "    ... skipping killing def/dom access\n");
         continue;
       }
@@ -1581,18 +1532,18 @@ struct DSEState {
       // the original location. Otherwise we have to check uses of *all*
       // MemoryDefs we discover, including non-aliasing ones. Otherwise we might
       // miss cases like the following
-      //   1 = Def(LoE) ; <----- EarlierDef stores [0,1]
+      //   1 = Def(LoE) ; <----- DeadDef stores [0,1]
       //   2 = Def(1)   ; (2, 1) = NoAlias,   stores [2,3]
       //   Use(2)       ; MayAlias 2 *and* 1, loads [0, 3].
       //                  (The Use points to the *first* Def it may alias)
       //   3 = Def(1)   ; <---- Current  (3, 2) = NoAlias, (3,1) = MayAlias,
       //                  stores [0,1]
       if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) {
-        if (isCompleteOverwrite(*CurrentLoc, EarlierMemInst, UseInst)) {
+        if (isCompleteOverwrite(MaybeDeadLoc, MaybeDeadI, UseInst)) {
           BasicBlock *MaybeKillingBlock = UseInst->getParent();
           if (PostOrderNumbers.find(MaybeKillingBlock)->second <
-              PostOrderNumbers.find(EarlierAccess->getBlock())->second) {
-            if (!isInvisibleToCallerAfterRet(DefUO)) {
+              PostOrderNumbers.find(MaybeDeadAccess->getBlock())->second) {
+            if (!isInvisibleToCallerAfterRet(KillingUndObj)) {
               LLVM_DEBUG(dbgs()
                          << "    ... found killing def " << *UseInst << "\n");
               KillingDefs.insert(UseInst);
@@ -1608,9 +1559,9 @@ struct DSEState {
     }
 
     // For accesses to locations visible after the function returns, make sure
-    // that the location is killed (=overwritten) along all paths from
-    // EarlierAccess to the exit.
-    if (!isInvisibleToCallerAfterRet(DefUO)) {
+    // that the location is dead (=overwritten) along all paths from
+    // MaybeDeadAccess to the exit.
+    if (!isInvisibleToCallerAfterRet(KillingUndObj)) {
       SmallPtrSet<BasicBlock *, 16> KillingBlocks;
       for (Instruction *KD : KillingDefs)
         KillingBlocks.insert(KD->getParent());
@@ -1619,25 +1570,24 @@ struct DSEState {
 
       // Find the common post-dominator of all killing blocks.
       BasicBlock *CommonPred = *KillingBlocks.begin();
-      for (auto I = std::next(KillingBlocks.begin()), E = KillingBlocks.end();
-           I != E; I++) {
+      for (BasicBlock *BB : llvm::drop_begin(KillingBlocks)) {
         if (!CommonPred)
           break;
-        CommonPred = PDT.findNearestCommonDominator(CommonPred, *I);
+        CommonPred = PDT.findNearestCommonDominator(CommonPred, BB);
       }
 
       // If CommonPred is in the set of killing blocks, just check if it
-      // post-dominates EarlierAccess.
+      // post-dominates MaybeDeadAccess.
       if (KillingBlocks.count(CommonPred)) {
-        if (PDT.dominates(CommonPred, EarlierAccess->getBlock()))
-          return {EarlierAccess};
+        if (PDT.dominates(CommonPred, MaybeDeadAccess->getBlock()))
+          return {MaybeDeadAccess};
         return None;
       }
 
-      // If the common post-dominator does not post-dominate EarlierAccess,
-      // there is a path from EarlierAccess to an exit not going through a
+      // If the common post-dominator does not post-dominate MaybeDeadAccess,
+      // there is a path from MaybeDeadAccess to an exit not going through a
       // killing block.
-      if (PDT.dominates(CommonPred, EarlierAccess->getBlock())) {
+      if (PDT.dominates(CommonPred, MaybeDeadAccess->getBlock())) {
         SetVector<BasicBlock *> WorkList;
 
         // If CommonPred is null, there are multiple exits from the function.
@@ -1650,16 +1600,16 @@ struct DSEState {
 
         NumCFGTries++;
         // Check if all paths starting from an exit node go through one of the
-        // killing blocks before reaching EarlierAccess.
+        // killing blocks before reaching MaybeDeadAccess.
         for (unsigned I = 0; I < WorkList.size(); I++) {
           NumCFGChecks++;
           BasicBlock *Current = WorkList[I];
           if (KillingBlocks.count(Current))
             continue;
-          if (Current == EarlierAccess->getBlock())
+          if (Current == MaybeDeadAccess->getBlock())
             return None;
 
-          // EarlierAccess is reachable from the entry, so we don't have to
+          // MaybeDeadAccess is reachable from the entry, so we don't have to
           // explore unreachable blocks further.
           if (!DT.isReachableFromEntry(Current))
             continue;
@@ -1671,14 +1621,14 @@ struct DSEState {
             return None;
         }
         NumCFGSuccess++;
-        return {EarlierAccess};
+        return {MaybeDeadAccess};
       }
       return None;
     }
 
-    // No aliasing MemoryUses of EarlierAccess found, EarlierAccess is
+    // No aliasing MemoryUses of MaybeDeadAccess found, MaybeDeadAccess is
     // potentially dead.
-    return {EarlierAccess};
+    return {MaybeDeadAccess};
   }
 
   // Delete dead memory defs
@@ -1701,6 +1651,7 @@ struct DSEState {
         if (MemoryDef *MD = dyn_cast<MemoryDef>(MA)) {
           SkipStores.insert(MD);
         }
+
         Updater.removeMemoryAccess(MA);
       }
 
@@ -1715,47 +1666,49 @@ struct DSEState {
             NowDeadInsts.push_back(OpI);
         }
 
+      EI.removeInstruction(DeadInst);
       DeadInst->eraseFromParent();
     }
   }
 
-  // Check for any extra throws between SI and NI that block DSE.  This only
-  // checks extra maythrows (those that aren't MemoryDef's). MemoryDef that may
-  // throw are handled during the walk from one def to the next.
-  bool mayThrowBetween(Instruction *SI, Instruction *NI,
-                       const Value *SILocUnd) {
-    // First see if we can ignore it by using the fact that SI is an
+  // Check for any extra throws between \p KillingI and \p DeadI that block
+  // DSE.  This only checks extra maythrows (those that aren't MemoryDef's).
+  // MemoryDef that may throw are handled during the walk from one def to the
+  // next.
+  bool mayThrowBetween(Instruction *KillingI, Instruction *DeadI,
+                       const Value *KillingUndObj) {
+    // First see if we can ignore it by using the fact that KillingI is an
     // alloca/alloca like object that is not visible to the caller during
     // execution of the function.
-    if (SILocUnd && isInvisibleToCallerBeforeRet(SILocUnd))
+    if (KillingUndObj && isInvisibleToCallerBeforeRet(KillingUndObj))
       return false;
 
-    if (SI->getParent() == NI->getParent())
-      return ThrowingBlocks.count(SI->getParent());
+    if (KillingI->getParent() == DeadI->getParent())
+      return ThrowingBlocks.count(KillingI->getParent());
     return !ThrowingBlocks.empty();
   }
 
-  // Check if \p NI acts as a DSE barrier for \p SI. The following instructions
-  // act as barriers:
-  //  * A memory instruction that may throw and \p SI accesses a non-stack
+  // Check if \p DeadI acts as a DSE barrier for \p KillingI. The following
+  // instructions act as barriers:
+  //  * A memory instruction that may throw and \p KillingI accesses a non-stack
   //  object.
   //  * Atomic stores stronger that monotonic.
-  bool isDSEBarrier(const Value *SILocUnd, Instruction *NI) {
-    // If NI may throw it acts as a barrier, unless we are to an alloca/alloca
-    // like object that does not escape.
-    if (NI->mayThrow() && !isInvisibleToCallerBeforeRet(SILocUnd))
+  bool isDSEBarrier(const Value *KillingUndObj, Instruction *DeadI) {
+    // If DeadI may throw it acts as a barrier, unless we are to an
+    // alloca/alloca like object that does not escape.
+    if (DeadI->mayThrow() && !isInvisibleToCallerBeforeRet(KillingUndObj))
       return true;
 
-    // If NI is an atomic load/store stronger than monotonic, do not try to
+    // If DeadI is an atomic load/store stronger than monotonic, do not try to
     // eliminate/reorder it.
-    if (NI->isAtomic()) {
-      if (auto *LI = dyn_cast<LoadInst>(NI))
+    if (DeadI->isAtomic()) {
+      if (auto *LI = dyn_cast<LoadInst>(DeadI))
         return isStrongerThanMonotonic(LI->getOrdering());
-      if (auto *SI = dyn_cast<StoreInst>(NI))
+      if (auto *SI = dyn_cast<StoreInst>(DeadI))
         return isStrongerThanMonotonic(SI->getOrdering());
-      if (auto *ARMW = dyn_cast<AtomicRMWInst>(NI))
+      if (auto *ARMW = dyn_cast<AtomicRMWInst>(DeadI))
         return isStrongerThanMonotonic(ARMW->getOrdering());
-      if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(NI))
+      if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(DeadI))
         return isStrongerThanMonotonic(CmpXchg->getSuccessOrdering()) ||
                isStrongerThanMonotonic(CmpXchg->getFailureOrdering());
       llvm_unreachable("other instructions should be skipped in MemorySSA");
@@ -1776,7 +1729,6 @@ struct DSEState {
         continue;
 
       Instruction *DefI = Def->getMemoryInst();
-      SmallVector<const Value *, 4> Pointers;
       auto DefLoc = getLocForWriteEx(DefI);
       if (!DefLoc)
         continue;
@@ -1787,7 +1739,7 @@ struct DSEState {
       // uncommon. If it turns out to be important, we can use
       // getUnderlyingObjects here instead.
       const Value *UO = getUnderlyingObject(DefLoc->Ptr);
-      if (!UO || !isInvisibleToCallerAfterRet(UO))
+      if (!isInvisibleToCallerAfterRet(UO))
         continue;
 
       if (isWriteAtEndOfFunction(Def)) {
@@ -1804,8 +1756,7 @@ struct DSEState {
 
   /// \returns true if \p Def is a no-op store, either because it
   /// directly stores back a loaded value or stores zero to a calloced object.
-  bool storeIsNoop(MemoryDef *Def, const MemoryLocation &DefLoc,
-                   const Value *DefUO) {
+  bool storeIsNoop(MemoryDef *Def, const Value *DefUO) {
     StoreInst *Store = dyn_cast<StoreInst>(Def->getMemoryInst());
     MemSetInst *MemSet = dyn_cast<MemSetInst>(Def->getMemoryInst());
     Constant *StoredConstant = nullptr;
@@ -1816,13 +1767,78 @@ struct DSEState {
 
     if (StoredConstant && StoredConstant->isNullValue()) {
       auto *DefUOInst = dyn_cast<Instruction>(DefUO);
-      if (DefUOInst && isCallocLikeFn(DefUOInst, &TLI)) {
-        auto *UnderlyingDef = cast<MemoryDef>(MSSA.getMemoryAccess(DefUOInst));
-        // If UnderlyingDef is the clobbering access of Def, no instructions
-        // between them can modify the memory location.
-        auto *ClobberDef =
-            MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def);
-        return UnderlyingDef == ClobberDef;
+      if (DefUOInst) {
+        if (isCallocLikeFn(DefUOInst, &TLI)) {
+          auto *UnderlyingDef =
+              cast<MemoryDef>(MSSA.getMemoryAccess(DefUOInst));
+          // If UnderlyingDef is the clobbering access of Def, no instructions
+          // between them can modify the memory location.
+          auto *ClobberDef =
+              MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def);
+          return UnderlyingDef == ClobberDef;
+        }
+
+        if (MemSet) {
+          if (F.hasFnAttribute(Attribute::SanitizeMemory) ||
+              F.hasFnAttribute(Attribute::SanitizeAddress) ||
+              F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
+              F.getName() == "calloc")
+            return false;
+          auto *Malloc = const_cast<CallInst *>(dyn_cast<CallInst>(DefUOInst));
+          if (!Malloc)
+            return false;
+          auto *InnerCallee = Malloc->getCalledFunction();
+          if (!InnerCallee)
+            return false;
+          LibFunc Func;
+          if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) ||
+              Func != LibFunc_malloc)
+            return false;
+
+          auto shouldCreateCalloc = [](CallInst *Malloc, CallInst *Memset) {
+            // Check for br(icmp ptr, null), truebb, falsebb) pattern at the end
+            // of malloc block
+            auto *MallocBB = Malloc->getParent(),
+                 *MemsetBB = Memset->getParent();
+            if (MallocBB == MemsetBB)
+              return true;
+            auto *Ptr = Memset->getArgOperand(0);
+            auto *TI = MallocBB->getTerminator();
+            ICmpInst::Predicate Pred;
+            BasicBlock *TrueBB, *FalseBB;
+            if (!match(TI, m_Br(m_ICmp(Pred, m_Specific(Ptr), m_Zero()), TrueBB,
+                                FalseBB)))
+              return false;
+            if (Pred != ICmpInst::ICMP_EQ || MemsetBB != FalseBB)
+              return false;
+            return true;
+          };
+
+          if (Malloc->getOperand(0) == MemSet->getLength()) {
+            if (shouldCreateCalloc(Malloc, MemSet) &&
+                DT.dominates(Malloc, MemSet) &&
+                memoryIsNotModifiedBetween(Malloc, MemSet, BatchAA, DL, &DT)) {
+              IRBuilder<> IRB(Malloc);
+              const auto &DL = Malloc->getModule()->getDataLayout();
+              if (auto *Calloc =
+                      emitCalloc(ConstantInt::get(IRB.getIntPtrTy(DL), 1),
+                                 Malloc->getArgOperand(0), IRB, TLI)) {
+                MemorySSAUpdater Updater(&MSSA);
+                auto *LastDef = cast<MemoryDef>(
+                    Updater.getMemorySSA()->getMemoryAccess(Malloc));
+                auto *NewAccess = Updater.createMemoryAccessAfter(
+                    cast<Instruction>(Calloc), LastDef, LastDef);
+                auto *NewAccessMD = cast<MemoryDef>(NewAccess);
+                Updater.insertDef(NewAccessMD, /*RenameUses=*/true);
+                Updater.removeMemoryAccess(Malloc);
+                Malloc->replaceAllUsesWith(Calloc);
+                Malloc->eraseFromParent();
+                return true;
+              }
+              return false;
+            }
+          }
+        }
       }
     }
 
@@ -1875,6 +1891,76 @@ struct DSEState {
 
     return false;
   }
+
+  bool removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL) {
+    bool Changed = false;
+    for (auto OI : IOL) {
+      Instruction *DeadI = OI.first;
+      MemoryLocation Loc = *getLocForWriteEx(DeadI);
+      assert(isRemovable(DeadI) && "Expect only removable instruction");
+
+      const Value *Ptr = Loc.Ptr->stripPointerCasts();
+      int64_t DeadStart = 0;
+      uint64_t DeadSize = Loc.Size.getValue();
+      GetPointerBaseWithConstantOffset(Ptr, DeadStart, DL);
+      OverlapIntervalsTy &IntervalMap = OI.second;
+      Changed |= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize);
+      if (IntervalMap.empty())
+        continue;
+      Changed |= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);
+    }
+    return Changed;
+  }
+
+  /// Eliminates writes to locations where the value that is being written
+  /// is already stored at the same location.
+  bool eliminateRedundantStoresOfExistingValues() {
+    bool MadeChange = false;
+    LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs that write the "
+                         "already existing value\n");
+    for (auto *Def : MemDefs) {
+      if (SkipStores.contains(Def) || MSSA.isLiveOnEntryDef(Def) ||
+          !isRemovable(Def->getMemoryInst()))
+        continue;
+      auto *UpperDef = dyn_cast<MemoryDef>(Def->getDefiningAccess());
+      if (!UpperDef || MSSA.isLiveOnEntryDef(UpperDef))
+        continue;
+
+      Instruction *DefInst = Def->getMemoryInst();
+      Instruction *UpperInst = UpperDef->getMemoryInst();
+      auto IsRedundantStore = [this, DefInst,
+                               UpperInst](MemoryLocation UpperLoc) {
+        if (DefInst->isIdenticalTo(UpperInst))
+          return true;
+        if (auto *MemSetI = dyn_cast<MemSetInst>(UpperInst)) {
+          if (auto *SI = dyn_cast<StoreInst>(DefInst)) {
+            auto MaybeDefLoc = getLocForWriteEx(DefInst);
+            if (!MaybeDefLoc)
+              return false;
+            int64_t InstWriteOffset = 0;
+            int64_t DepWriteOffset = 0;
+            auto OR = isOverwrite(UpperInst, DefInst, UpperLoc, *MaybeDefLoc,
+                                  InstWriteOffset, DepWriteOffset);
+            Value *StoredByte = isBytewiseValue(SI->getValueOperand(), DL);
+            return StoredByte && StoredByte == MemSetI->getOperand(1) &&
+                   OR == OW_Complete;
+          }
+        }
+        return false;
+      };
+
+      auto MaybeUpperLoc = getLocForWriteEx(UpperInst);
+      if (!MaybeUpperLoc || !IsRedundantStore(*MaybeUpperLoc) ||
+          isReadClobber(*MaybeUpperLoc, DefInst))
+        continue;
+      LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n  DEAD: " << *DefInst
+                        << '\n');
+      deleteDeadInstruction(DefInst);
+      NumRedundantStores++;
+      MadeChange = true;
+    }
+    return MadeChange;
+  }
 };
 
 static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
@@ -1883,68 +1969,64 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
                                 const LoopInfo &LI) {
   bool MadeChange = false;
 
-  DSEState State = DSEState::get(F, AA, MSSA, DT, PDT, TLI, LI);
+  DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);
   // For each store:
   for (unsigned I = 0; I < State.MemDefs.size(); I++) {
     MemoryDef *KillingDef = State.MemDefs[I];
     if (State.SkipStores.count(KillingDef))
       continue;
-    Instruction *SI = KillingDef->getMemoryInst();
+    Instruction *KillingI = KillingDef->getMemoryInst();
 
-    Optional<MemoryLocation> MaybeSILoc;
-    if (State.isMemTerminatorInst(SI))
-      MaybeSILoc = State.getLocForTerminator(SI).map(
+    Optional<MemoryLocation> MaybeKillingLoc;
+    if (State.isMemTerminatorInst(KillingI))
+      MaybeKillingLoc = State.getLocForTerminator(KillingI).map(
           [](const std::pair<MemoryLocation, bool> &P) { return P.first; });
     else
-      MaybeSILoc = State.getLocForWriteEx(SI);
+      MaybeKillingLoc = State.getLocForWriteEx(KillingI);
 
-    if (!MaybeSILoc) {
+    if (!MaybeKillingLoc) {
       LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "
-                        << *SI << "\n");
+                        << *KillingI << "\n");
       continue;
     }
-    MemoryLocation SILoc = *MaybeSILoc;
-    assert(SILoc.Ptr && "SILoc should not be null");
-    const Value *SILocUnd = getUnderlyingObject(SILoc.Ptr);
-
-    MemoryAccess *Current = KillingDef;
+    MemoryLocation KillingLoc = *MaybeKillingLoc;
+    assert(KillingLoc.Ptr && "KillingLoc should not be null");
+    const Value *KillingUndObj = getUnderlyingObject(KillingLoc.Ptr);
     LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
-                      << *Current << " (" << *SI << ")\n");
+                      << *KillingDef << " (" << *KillingI << ")\n");
 
     unsigned ScanLimit = MemorySSAScanLimit;
     unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit;
     unsigned PartialLimit = MemorySSAPartialStoreLimit;
     // Worklist of MemoryAccesses that may be killed by KillingDef.
     SetVector<MemoryAccess *> ToCheck;
-
-    if (SILocUnd)
-      ToCheck.insert(KillingDef->getDefiningAccess());
+    ToCheck.insert(KillingDef->getDefiningAccess());
 
     bool Shortend = false;
-    bool IsMemTerm = State.isMemTerminatorInst(SI);
+    bool IsMemTerm = State.isMemTerminatorInst(KillingI);
     // Check if MemoryAccesses in the worklist are killed by KillingDef.
     for (unsigned I = 0; I < ToCheck.size(); I++) {
-      Current = ToCheck[I];
+      MemoryAccess *Current = ToCheck[I];
       if (State.SkipStores.count(Current))
         continue;
 
-      Optional<MemoryAccess *> Next = State.getDomMemoryDef(
-          KillingDef, Current, SILoc, SILocUnd, ScanLimit, WalkerStepLimit,
-          IsMemTerm, PartialLimit);
+      Optional<MemoryAccess *> MaybeDeadAccess = State.getDomMemoryDef(
+          KillingDef, Current, KillingLoc, KillingUndObj, ScanLimit,
+          WalkerStepLimit, IsMemTerm, PartialLimit);
 
-      if (!Next) {
+      if (!MaybeDeadAccess) {
         LLVM_DEBUG(dbgs() << "  finished walk\n");
         continue;
       }
 
-      MemoryAccess *EarlierAccess = *Next;
-      LLVM_DEBUG(dbgs() << " Checking if we can kill " << *EarlierAccess);
-      if (isa<MemoryPhi>(EarlierAccess)) {
+      MemoryAccess *DeadAccess = *MaybeDeadAccess;
+      LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DeadAccess);
+      if (isa<MemoryPhi>(DeadAccess)) {
         LLVM_DEBUG(dbgs() << "\n  ... adding incoming values to worklist\n");
-        for (Value *V : cast<MemoryPhi>(EarlierAccess)->incoming_values()) {
+        for (Value *V : cast<MemoryPhi>(DeadAccess)->incoming_values()) {
           MemoryAccess *IncomingAccess = cast<MemoryAccess>(V);
           BasicBlock *IncomingBlock = IncomingAccess->getBlock();
-          BasicBlock *PhiBlock = EarlierAccess->getBlock();
+          BasicBlock *PhiBlock = DeadAccess->getBlock();
 
           // We only consider incoming MemoryAccesses that come before the
           // MemoryPhi. Otherwise we could discover candidates that do not
@@ -1955,72 +2037,73 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
         }
         continue;
       }
-      auto *NextDef = cast<MemoryDef>(EarlierAccess);
-      Instruction *NI = NextDef->getMemoryInst();
-      LLVM_DEBUG(dbgs() << " (" << *NI << ")\n");
-      ToCheck.insert(NextDef->getDefiningAccess());
+      auto *DeadDefAccess = cast<MemoryDef>(DeadAccess);
+      Instruction *DeadI = DeadDefAccess->getMemoryInst();
+      LLVM_DEBUG(dbgs() << " (" << *DeadI << ")\n");
+      ToCheck.insert(DeadDefAccess->getDefiningAccess());
       NumGetDomMemoryDefPassed++;
 
       if (!DebugCounter::shouldExecute(MemorySSACounter))
         continue;
 
-      MemoryLocation NILoc = *State.getLocForWriteEx(NI);
+      MemoryLocation DeadLoc = *State.getLocForWriteEx(DeadI);
 
       if (IsMemTerm) {
-        const Value *NIUnd = getUnderlyingObject(NILoc.Ptr);
-        if (SILocUnd != NIUnd)
+        const Value *DeadUndObj = getUnderlyingObject(DeadLoc.Ptr);
+        if (KillingUndObj != DeadUndObj)
           continue;
-        LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n  DEAD: " << *NI
-                          << "\n  KILLER: " << *SI << '\n');
-        State.deleteDeadInstruction(NI);
+        LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n  DEAD: " << *DeadI
+                          << "\n  KILLER: " << *KillingI << '\n');
+        State.deleteDeadInstruction(DeadI);
         ++NumFastStores;
         MadeChange = true;
       } else {
-        // Check if NI overwrites SI.
-        int64_t InstWriteOffset, DepWriteOffset;
-        OverwriteResult OR = State.isOverwrite(SI, NI, SILoc, NILoc,
-                                               DepWriteOffset, InstWriteOffset);
+        // Check if DeadI overwrites KillingI.
+        int64_t KillingOffset = 0;
+        int64_t DeadOffset = 0;
+        OverwriteResult OR = State.isOverwrite(
+            KillingI, DeadI, KillingLoc, DeadLoc, KillingOffset, DeadOffset);
         if (OR == OW_MaybePartial) {
           auto Iter = State.IOLs.insert(
               std::make_pair<BasicBlock *, InstOverlapIntervalsTy>(
-                  NI->getParent(), InstOverlapIntervalsTy()));
+                  DeadI->getParent(), InstOverlapIntervalsTy()));
           auto &IOL = Iter.first->second;
-          OR = isPartialOverwrite(SILoc, NILoc, DepWriteOffset, InstWriteOffset,
-                                  NI, IOL);
+          OR = isPartialOverwrite(KillingLoc, DeadLoc, KillingOffset,
+                                  DeadOffset, DeadI, IOL);
         }
 
         if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) {
-          auto *Earlier = dyn_cast<StoreInst>(NI);
-          auto *Later = dyn_cast<StoreInst>(SI);
+          auto *DeadSI = dyn_cast<StoreInst>(DeadI);
+          auto *KillingSI = dyn_cast<StoreInst>(KillingI);
           // We are re-using tryToMergePartialOverlappingStores, which requires
-          // Earlier to domiante Later.
+          // DeadSI to dominate DeadSI.
           // TODO: implement tryToMergeParialOverlappingStores using MemorySSA.
-          if (Earlier && Later && DT.dominates(Earlier, Later)) {
+          if (DeadSI && KillingSI && DT.dominates(DeadSI, KillingSI)) {
             if (Constant *Merged = tryToMergePartialOverlappingStores(
-                    Earlier, Later, InstWriteOffset, DepWriteOffset, State.DL,
+                    KillingSI, DeadSI, KillingOffset, DeadOffset, State.DL,
                     State.BatchAA, &DT)) {
 
               // Update stored value of earlier store to merged constant.
-              Earlier->setOperand(0, Merged);
+              DeadSI->setOperand(0, Merged);
               ++NumModifiedStores;
               MadeChange = true;
 
               Shortend = true;
-              // Remove later store and remove any outstanding overlap intervals
-              // for the updated store.
-              State.deleteDeadInstruction(Later);
-              auto I = State.IOLs.find(Earlier->getParent());
+              // Remove killing store and remove any outstanding overlap
+              // intervals for the updated store.
+              State.deleteDeadInstruction(KillingSI);
+              auto I = State.IOLs.find(DeadSI->getParent());
               if (I != State.IOLs.end())
-                I->second.erase(Earlier);
+                I->second.erase(DeadSI);
               break;
             }
           }
         }
 
         if (OR == OW_Complete) {
-          LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n  DEAD: " << *NI
-                            << "\n  KILLER: " << *SI << '\n');
-          State.deleteDeadInstruction(NI);
+          LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n  DEAD: " << *DeadI
+                            << "\n  KILLER: " << *KillingI << '\n');
+          State.deleteDeadInstruction(DeadI);
           ++NumFastStores;
           MadeChange = true;
         }
@@ -2028,10 +2111,11 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
     }
 
     // Check if the store is a no-op.
-    if (!Shortend && isRemovable(SI) &&
-        State.storeIsNoop(KillingDef, SILoc, SILocUnd)) {
-      LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n  DEAD: " << *SI << '\n');
-      State.deleteDeadInstruction(SI);
+    if (!Shortend && isRemovable(KillingI) &&
+        State.storeIsNoop(KillingDef, KillingUndObj)) {
+      LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n  DEAD: " << *KillingI
+                        << '\n');
+      State.deleteDeadInstruction(KillingI);
       NumRedundantStores++;
       MadeChange = true;
       continue;
@@ -2040,8 +2124,9 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
 
   if (EnablePartialOverwriteTracking)
     for (auto &KV : State.IOLs)
-      MadeChange |= removePartiallyOverlappedStores(State.DL, KV.second, TLI);
+      MadeChange |= State.removePartiallyOverlappedStores(KV.second);
 
+  MadeChange |= State.eliminateRedundantStoresOfExistingValues();
   MadeChange |= State.eliminateDeadWritesAtEndOfFunction();
   return MadeChange;
 }
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index 978c6a77b8dc..90f71f7729a7 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -293,7 +293,7 @@ static unsigned getHashValueImpl(SimpleValue Val) {
   // TODO: Extend this to handle intrinsics with >2 operands where the 1st
   //       2 operands are commutative.
   auto *II = dyn_cast<IntrinsicInst>(Inst);
-  if (II && II->isCommutative() && II->getNumArgOperands() == 2) {
+  if (II && II->isCommutative() && II->arg_size() == 2) {
     Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
     if (LHS > RHS)
       std::swap(LHS, RHS);
@@ -363,7 +363,7 @@ static bool isEqualImpl(SimpleValue LHS, SimpleValue RHS) {
   auto *LII = dyn_cast<IntrinsicInst>(LHSI);
   auto *RII = dyn_cast<IntrinsicInst>(RHSI);
   if (LII && RII && LII->getIntrinsicID() == RII->getIntrinsicID() &&
-      LII->isCommutative() && LII->getNumArgOperands() == 2) {
+      LII->isCommutative() && LII->arg_size() == 2) {
     return LII->getArgOperand(0) == RII->getArgOperand(1) &&
            LII->getArgOperand(1) == RII->getArgOperand(0);
   }
@@ -1265,6 +1265,12 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
       continue;
     }
 
+    // Skip pseudoprobe intrinsics, for the same reason as assume intrinsics.
+    if (match(&Inst, m_Intrinsic<Intrinsic::pseudoprobe>())) {
+      LLVM_DEBUG(dbgs() << "EarlyCSE skipping pseudoprobe: " << Inst << '\n');
+      continue;
+    }
+
     // We can skip all invariant.start intrinsics since they only read memory,
     // and we can forward values across it. For invariant starts without
     // invariant ends, we can use the fact that the invariantness never ends to
@@ -1642,6 +1648,16 @@ PreservedAnalyses EarlyCSEPass::run(Function &F,
   return PA;
 }
 
+void EarlyCSEPass::printPipeline(
+    raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+  static_cast<PassInfoMixin<EarlyCSEPass> *>(this)->printPipeline(
+      OS, MapClassName2PassName);
+  OS << "<";
+  if (UseMemorySSA)
+    OS << "memssa";
+  OS << ">";
+}
+
 namespace {
 
 /// A simple and fast domtree-based CSE pass.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Float2Int.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Float2Int.cpp
index 8a5d4f568774..a98bb8358aef 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Float2Int.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Float2Int.cpp
@@ -256,7 +256,7 @@ void Float2IntPass::walkForwards() {
       Op = [](ArrayRef<ConstantRange> Ops) {
         assert(Ops.size() == 1 && "FNeg is a unary operator!");
         unsigned Size = Ops[0].getBitWidth();
-        auto Zero = ConstantRange(APInt::getNullValue(Size));
+        auto Zero = ConstantRange(APInt::getZero(Size));
         return Zero.sub(Ops[0]);
       };
       break;
@@ -372,7 +372,7 @@ bool Float2IntPass::validateAndTransform() {
       // If it does, transformation would be illegal.
       //
       // Don't count the roots, as they terminate the graphs.
-      if (Roots.count(I) == 0) {
+      if (!Roots.contains(I)) {
         // Set the type of the conversion while we're here.
         if (!ConvertedToTy)
           ConvertedToTy = I->getType();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp
index 16368aec7c3f..00506fb86006 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -126,7 +126,7 @@ static cl::opt<uint32_t> MaxBBSpeculations(
              "into) when deducing if a value is fully available or not in GVN "
              "(default = 600)"));
 
-struct llvm::GVN::Expression {
+struct llvm::GVNPass::Expression {
   uint32_t opcode;
   bool commutative = false;
   Type *type = nullptr;
@@ -155,17 +155,18 @@ struct llvm::GVN::Expression {
 
 namespace llvm {
 
-template <> struct DenseMapInfo<GVN::Expression> {
-  static inline GVN::Expression getEmptyKey() { return ~0U; }
-  static inline GVN::Expression getTombstoneKey() { return ~1U; }
+template <> struct DenseMapInfo<GVNPass::Expression> {
+  static inline GVNPass::Expression getEmptyKey() { return ~0U; }
+  static inline GVNPass::Expression getTombstoneKey() { return ~1U; }
 
-  static unsigned getHashValue(const GVN::Expression &e) {
+  static unsigned getHashValue(const GVNPass::Expression &e) {
     using llvm::hash_value;
 
     return static_cast<unsigned>(hash_value(e));
   }
 
-  static bool isEqual(const GVN::Expression &LHS, const GVN::Expression &RHS) {
+  static bool isEqual(const GVNPass::Expression &LHS,
+                      const GVNPass::Expression &RHS) {
     return LHS == RHS;
   }
 };
@@ -246,7 +247,7 @@ struct llvm::gvn::AvailableValue {
   /// Emit code at the specified insertion point to adjust the value defined
   /// here to the specified type. This handles various coercion cases.
   Value *MaterializeAdjustedValue(LoadInst *Load, Instruction *InsertPt,
-                                  GVN &gvn) const;
+                                  GVNPass &gvn) const;
 };
 
 /// Represents an AvailableValue which can be rematerialized at the end of
@@ -276,7 +277,7 @@ struct llvm::gvn::AvailableValueInBlock {
 
   /// Emit code at the end of this block to adjust the value defined here to
   /// the specified type. This handles various coercion cases.
-  Value *MaterializeAdjustedValue(LoadInst *Load, GVN &gvn) const {
+  Value *MaterializeAdjustedValue(LoadInst *Load, GVNPass &gvn) const {
     return AV.MaterializeAdjustedValue(Load, BB->getTerminator(), gvn);
   }
 };
@@ -285,7 +286,7 @@ struct llvm::gvn::AvailableValueInBlock {
 //                     ValueTable Internal Functions
 //===----------------------------------------------------------------------===//
 
-GVN::Expression GVN::ValueTable::createExpr(Instruction *I) {
+GVNPass::Expression GVNPass::ValueTable::createExpr(Instruction *I) {
   Expression e;
   e.type = I->getType();
   e.opcode = I->getOpcode();
@@ -330,9 +331,8 @@ GVN::Expression GVN::ValueTable::createExpr(Instruction *I) {
   return e;
 }
 
-GVN::Expression GVN::ValueTable::createCmpExpr(unsigned Opcode,
-                                               CmpInst::Predicate Predicate,
-                                               Value *LHS, Value *RHS) {
+GVNPass::Expression GVNPass::ValueTable::createCmpExpr(
+    unsigned Opcode, CmpInst::Predicate Predicate, Value *LHS, Value *RHS) {
   assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
          "Not a comparison!");
   Expression e;
@@ -350,7 +350,8 @@ GVN::Expression GVN::ValueTable::createCmpExpr(unsigned Opcode,
   return e;
 }
 
-GVN::Expression GVN::ValueTable::createExtractvalueExpr(ExtractValueInst *EI) {
+GVNPass::Expression
+GVNPass::ValueTable::createExtractvalueExpr(ExtractValueInst *EI) {
   assert(EI && "Not an ExtractValueInst?");
   Expression e;
   e.type = EI->getType();
@@ -382,20 +383,21 @@ GVN::Expression GVN::ValueTable::createExtractvalueExpr(ExtractValueInst *EI) {
 //                     ValueTable External Functions
 //===----------------------------------------------------------------------===//
 
-GVN::ValueTable::ValueTable() = default;
-GVN::ValueTable::ValueTable(const ValueTable &) = default;
-GVN::ValueTable::ValueTable(ValueTable &&) = default;
-GVN::ValueTable::~ValueTable() = default;
-GVN::ValueTable &GVN::ValueTable::operator=(const GVN::ValueTable &Arg) = default;
+GVNPass::ValueTable::ValueTable() = default;
+GVNPass::ValueTable::ValueTable(const ValueTable &) = default;
+GVNPass::ValueTable::ValueTable(ValueTable &&) = default;
+GVNPass::ValueTable::~ValueTable() = default;
+GVNPass::ValueTable &
+GVNPass::ValueTable::operator=(const GVNPass::ValueTable &Arg) = default;
 
 /// add - Insert a value into the table with a specified value number.
-void GVN::ValueTable::add(Value *V, uint32_t num) {
+void GVNPass::ValueTable::add(Value *V, uint32_t num) {
   valueNumbering.insert(std::make_pair(V, num));
   if (PHINode *PN = dyn_cast<PHINode>(V))
     NumberingPhi[num] = PN;
 }
 
-uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) {
+uint32_t GVNPass::ValueTable::lookupOrAddCall(CallInst *C) {
   if (AA->doesNotAccessMemory(C)) {
     Expression exp = createExpr(C);
     uint32_t e = assignExpNewValueNum(exp).first;
@@ -421,13 +423,12 @@ uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) {
       // a normal load or store instruction.
       CallInst *local_cdep = dyn_cast<CallInst>(local_dep.getInst());
 
-      if (!local_cdep ||
-          local_cdep->getNumArgOperands() != C->getNumArgOperands()) {
+      if (!local_cdep || local_cdep->arg_size() != C->arg_size()) {
         valueNumbering[C] = nextValueNumber;
         return nextValueNumber++;
       }
 
-      for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
+      for (unsigned i = 0, e = C->arg_size(); i < e; ++i) {
         uint32_t c_vn = lookupOrAdd(C->getArgOperand(i));
         uint32_t cd_vn = lookupOrAdd(local_cdep->getArgOperand(i));
         if (c_vn != cd_vn) {
@@ -477,11 +478,11 @@ uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) {
       return nextValueNumber++;
     }
 
-    if (cdep->getNumArgOperands() != C->getNumArgOperands()) {
+    if (cdep->arg_size() != C->arg_size()) {
       valueNumbering[C] = nextValueNumber;
       return nextValueNumber++;
     }
-    for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
+    for (unsigned i = 0, e = C->arg_size(); i < e; ++i) {
       uint32_t c_vn = lookupOrAdd(C->getArgOperand(i));
       uint32_t cd_vn = lookupOrAdd(cdep->getArgOperand(i));
       if (c_vn != cd_vn) {
@@ -500,11 +501,13 @@ uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) {
 }
 
 /// Returns true if a value number exists for the specified value.
-bool GVN::ValueTable::exists(Value *V) const { return valueNumbering.count(V) != 0; }
+bool GVNPass::ValueTable::exists(Value *V) const {
+  return valueNumbering.count(V) != 0;
+}
 
 /// lookup_or_add - Returns the value number for the specified value, assigning
 /// it a new number if it did not have one before.
-uint32_t GVN::ValueTable::lookupOrAdd(Value *V) {
+uint32_t GVNPass::ValueTable::lookupOrAdd(Value *V) {
   DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
   if (VI != valueNumbering.end())
     return VI->second;
@@ -581,7 +584,7 @@ uint32_t GVN::ValueTable::lookupOrAdd(Value *V) {
 
 /// Returns the value number of the specified value. Fails if
 /// the value has not yet been numbered.
-uint32_t GVN::ValueTable::lookup(Value *V, bool Verify) const {
+uint32_t GVNPass::ValueTable::lookup(Value *V, bool Verify) const {
   DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V);
   if (Verify) {
     assert(VI != valueNumbering.end() && "Value not numbered?");
@@ -594,15 +597,15 @@ uint32_t GVN::ValueTable::lookup(Value *V, bool Verify) const {
 /// assigning it a new number if it did not have one before.  Useful when
 /// we deduced the result of a comparison, but don't immediately have an
 /// instruction realizing that comparison to hand.
-uint32_t GVN::ValueTable::lookupOrAddCmp(unsigned Opcode,
-                                         CmpInst::Predicate Predicate,
-                                         Value *LHS, Value *RHS) {
+uint32_t GVNPass::ValueTable::lookupOrAddCmp(unsigned Opcode,
+                                             CmpInst::Predicate Predicate,
+                                             Value *LHS, Value *RHS) {
   Expression exp = createCmpExpr(Opcode, Predicate, LHS, RHS);
   return assignExpNewValueNum(exp).first;
 }
 
 /// Remove all entries from the ValueTable.
-void GVN::ValueTable::clear() {
+void GVNPass::ValueTable::clear() {
   valueNumbering.clear();
   expressionNumbering.clear();
   NumberingPhi.clear();
@@ -614,7 +617,7 @@ void GVN::ValueTable::clear() {
 }
 
 /// Remove a value from the value numbering.
-void GVN::ValueTable::erase(Value *V) {
+void GVNPass::ValueTable::erase(Value *V) {
   uint32_t Num = valueNumbering.lookup(V);
   valueNumbering.erase(V);
   // If V is PHINode, V <--> value number is an one-to-one mapping.
@@ -624,7 +627,7 @@ void GVN::ValueTable::erase(Value *V) {
 
 /// verifyRemoved - Verify that the value is removed from all internal data
 /// structures.
-void GVN::ValueTable::verifyRemoved(const Value *V) const {
+void GVNPass::ValueTable::verifyRemoved(const Value *V) const {
   for (DenseMap<Value*, uint32_t>::const_iterator
          I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) {
     assert(I->first != V && "Inst still occurs in value numbering map!");
@@ -635,28 +638,28 @@ void GVN::ValueTable::verifyRemoved(const Value *V) const {
 //                                GVN Pass
 //===----------------------------------------------------------------------===//
 
-bool GVN::isPREEnabled() const {
+bool GVNPass::isPREEnabled() const {
   return Options.AllowPRE.getValueOr(GVNEnablePRE);
 }
 
-bool GVN::isLoadPREEnabled() const {
+bool GVNPass::isLoadPREEnabled() const {
   return Options.AllowLoadPRE.getValueOr(GVNEnableLoadPRE);
 }
 
-bool GVN::isLoadInLoopPREEnabled() const {
+bool GVNPass::isLoadInLoopPREEnabled() const {
   return Options.AllowLoadInLoopPRE.getValueOr(GVNEnableLoadInLoopPRE);
 }
 
-bool GVN::isLoadPRESplitBackedgeEnabled() const {
+bool GVNPass::isLoadPRESplitBackedgeEnabled() const {
   return Options.AllowLoadPRESplitBackedge.getValueOr(
       GVNEnableSplitBackedgeInLoadPRE);
 }
 
-bool GVN::isMemDepEnabled() const {
+bool GVNPass::isMemDepEnabled() const {
   return Options.AllowMemDep.getValueOr(GVNEnableMemDep);
 }
 
-PreservedAnalyses GVN::run(Function &F, FunctionAnalysisManager &AM) {
+PreservedAnalyses GVNPass::run(Function &F, FunctionAnalysisManager &AM) {
   // FIXME: The order of evaluation of these 'getResult' calls is very
   // significant! Re-ordering these variables will cause GVN when run alone to
   // be less effective! We should fix memdep and basic-aa to not exhibit this
@@ -684,8 +687,26 @@ PreservedAnalyses GVN::run(Function &F, FunctionAnalysisManager &AM) {
   return PA;
 }
 
+void GVNPass::printPipeline(
+    raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+  static_cast<PassInfoMixin<GVNPass> *>(this)->printPipeline(
+      OS, MapClassName2PassName);
+
+  OS << "<";
+  if (Options.AllowPRE != None)
+    OS << (Options.AllowPRE.getValue() ? "" : "no-") << "pre;";
+  if (Options.AllowLoadPRE != None)
+    OS << (Options.AllowLoadPRE.getValue() ? "" : "no-") << "load-pre;";
+  if (Options.AllowLoadPRESplitBackedge != None)
+    OS << (Options.AllowLoadPRESplitBackedge.getValue() ? "" : "no-")
+       << "split-backedge-load-pre;";
+  if (Options.AllowMemDep != None)
+    OS << (Options.AllowMemDep.getValue() ? "" : "no-") << "memdep";
+  OS << ">";
+}
+
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD void GVN::dump(DenseMap<uint32_t, Value*>& d) const {
+LLVM_DUMP_METHOD void GVNPass::dump(DenseMap<uint32_t, Value *> &d) const {
   errs() << "{\n";
   for (auto &I : d) {
     errs() << I.first << "\n";
@@ -835,7 +856,7 @@ static bool IsValueFullyAvailableInBlock(
 static Value *
 ConstructSSAForLoadSet(LoadInst *Load,
                        SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock,
-                       GVN &gvn) {
+                       GVNPass &gvn) {
   // Check for the fully redundant, dominating load case.  In this case, we can
   // just use the dominating value directly.
   if (ValuesPerBlock.size() == 1 &&
@@ -878,7 +899,7 @@ ConstructSSAForLoadSet(LoadInst *Load,
 
 Value *AvailableValue::MaterializeAdjustedValue(LoadInst *Load,
                                                 Instruction *InsertPt,
-                                                GVN &gvn) const {
+                                                GVNPass &gvn) const {
   Value *Res;
   Type *LoadTy = Load->getType();
   const DataLayout &DL = Load->getModule()->getDataLayout();
@@ -1002,8 +1023,8 @@ static void reportMayClobberedLoad(LoadInst *Load, MemDepResult DepInfo,
   ORE->emit(R);
 }
 
-bool GVN::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo,
-                                  Value *Address, AvailableValue &Res) {
+bool GVNPass::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo,
+                                      Value *Address, AvailableValue &Res) {
   assert((DepInfo.isDef() || DepInfo.isClobber()) &&
          "expected a local dependence");
   assert(Load->isUnordered() && "rules below are incorrect for ordered access");
@@ -1137,9 +1158,9 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo,
   return false;
 }
 
-void GVN::AnalyzeLoadAvailability(LoadInst *Load, LoadDepVect &Deps,
-                                  AvailValInBlkVect &ValuesPerBlock,
-                                  UnavailBlkVect &UnavailableBlocks) {
+void GVNPass::AnalyzeLoadAvailability(LoadInst *Load, LoadDepVect &Deps,
+                                      AvailValInBlkVect &ValuesPerBlock,
+                                      UnavailBlkVect &UnavailableBlocks) {
   // Filter out useless results (non-locals, etc).  Keep track of the blocks
   // where we have a value available in repl, also keep track of whether we see
   // dependencies that produce an unknown value for the load (such as a call
@@ -1182,7 +1203,7 @@ void GVN::AnalyzeLoadAvailability(LoadInst *Load, LoadDepVect &Deps,
          "post condition violation");
 }
 
-void GVN::eliminatePartiallyRedundantLoad(
+void GVNPass::eliminatePartiallyRedundantLoad(
     LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
     MapVector<BasicBlock *, Value *> &AvailableLoads) {
   for (const auto &AvailableLoad : AvailableLoads) {
@@ -1212,8 +1233,7 @@ void GVN::eliminatePartiallyRedundantLoad(
     }
 
     // Transfer the old load's AA tags to the new load.
-    AAMDNodes Tags;
-    Load->getAAMetadata(Tags);
+    AAMDNodes Tags = Load->getAAMetadata();
     if (Tags)
       NewLoad->setAAMetadata(Tags);
 
@@ -1257,8 +1277,8 @@ void GVN::eliminatePartiallyRedundantLoad(
   });
 }
 
-bool GVN::PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
-                         UnavailBlkVect &UnavailableBlocks) {
+bool GVNPass::PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
+                             UnavailBlkVect &UnavailableBlocks) {
   // Okay, we have *some* definitions of the value.  This means that the value
   // is available in some of our (transitive) predecessors.  Lets think about
   // doing PRE of this load.  This will involve inserting a new load into the
@@ -1498,8 +1518,9 @@ bool GVN::PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
   return true;
 }
 
-bool GVN::performLoopLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
-                             UnavailBlkVect &UnavailableBlocks) {
+bool GVNPass::performLoopLoadPRE(LoadInst *Load,
+                                 AvailValInBlkVect &ValuesPerBlock,
+                                 UnavailBlkVect &UnavailableBlocks) {
   if (!LI)
     return false;
 
@@ -1590,7 +1611,7 @@ static void reportLoadElim(LoadInst *Load, Value *AvailableValue,
 
 /// Attempt to eliminate a load whose dependencies are
 /// non-local by performing PHI construction.
-bool GVN::processNonLocalLoad(LoadInst *Load) {
+bool GVNPass::processNonLocalLoad(LoadInst *Load) {
   // non-local speculations are not allowed under asan.
   if (Load->getParent()->getParent()->hasFnAttribute(
           Attribute::SanitizeAddress) ||
@@ -1622,10 +1643,8 @@ bool GVN::processNonLocalLoad(LoadInst *Load) {
   // If this load follows a GEP, see if we can PRE the indices before analyzing.
   if (GetElementPtrInst *GEP =
           dyn_cast<GetElementPtrInst>(Load->getOperand(0))) {
-    for (GetElementPtrInst::op_iterator OI = GEP->idx_begin(),
-                                        OE = GEP->idx_end();
-         OI != OE; ++OI)
-      if (Instruction *I = dyn_cast<Instruction>(OI->get()))
+    for (Use &U : GEP->indices())
+      if (Instruction *I = dyn_cast<Instruction>(U.get()))
         Changed |= performScalarPRE(I);
   }
 
@@ -1673,8 +1692,11 @@ bool GVN::processNonLocalLoad(LoadInst *Load) {
   if (!isLoadInLoopPREEnabled() && LI && LI->getLoopFor(Load->getParent()))
     return Changed;
 
-  return Changed || PerformLoadPRE(Load, ValuesPerBlock, UnavailableBlocks) ||
-         performLoopLoadPRE(Load, ValuesPerBlock, UnavailableBlocks);
+  if (performLoopLoadPRE(Load, ValuesPerBlock, UnavailableBlocks) ||
+      PerformLoadPRE(Load, ValuesPerBlock, UnavailableBlocks))
+    return true;
+
+  return Changed;
 }
 
 static bool impliesEquivalanceIfTrue(CmpInst* Cmp) {
@@ -1738,7 +1760,7 @@ static bool hasUsersIn(Value *V, BasicBlock *BB) {
   return false;
 }
 
-bool GVN::processAssumeIntrinsic(AssumeInst *IntrinsicI) {
+bool GVNPass::processAssumeIntrinsic(AssumeInst *IntrinsicI) {
   Value *V = IntrinsicI->getArgOperand(0);
 
   if (ConstantInt *Cond = dyn_cast<ConstantInt>(V)) {
@@ -1882,7 +1904,7 @@ static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) {
 
 /// Attempt to eliminate a load, first by eliminating it
 /// locally, and then attempting non-local elimination if that fails.
-bool GVN::processLoad(LoadInst *L) {
+bool GVNPass::processLoad(LoadInst *L) {
   if (!MD)
     return false;
 
@@ -1936,7 +1958,7 @@ bool GVN::processLoad(LoadInst *L) {
 /// Return a pair the first field showing the value number of \p Exp and the
 /// second field showing whether it is a value number newly created.
 std::pair<uint32_t, bool>
-GVN::ValueTable::assignExpNewValueNum(Expression &Exp) {
+GVNPass::ValueTable::assignExpNewValueNum(Expression &Exp) {
   uint32_t &e = expressionNumbering[Exp];
   bool CreateNewValNum = !e;
   if (CreateNewValNum) {
@@ -1951,8 +1973,8 @@ GVN::ValueTable::assignExpNewValueNum(Expression &Exp) {
 
 /// Return whether all the values related with the same \p num are
 /// defined in \p BB.
-bool GVN::ValueTable::areAllValsInBB(uint32_t Num, const BasicBlock *BB,
-                                     GVN &Gvn) {
+bool GVNPass::ValueTable::areAllValsInBB(uint32_t Num, const BasicBlock *BB,
+                                         GVNPass &Gvn) {
   LeaderTableEntry *Vals = &Gvn.LeaderTable[Num];
   while (Vals && Vals->BB == BB)
     Vals = Vals->Next;
@@ -1960,9 +1982,9 @@ bool GVN::ValueTable::areAllValsInBB(uint32_t Num, const BasicBlock *BB,
 }
 
 /// Wrap phiTranslateImpl to provide caching functionality.
-uint32_t GVN::ValueTable::phiTranslate(const BasicBlock *Pred,
-                                       const BasicBlock *PhiBlock, uint32_t Num,
-                                       GVN &Gvn) {
+uint32_t GVNPass::ValueTable::phiTranslate(const BasicBlock *Pred,
+                                           const BasicBlock *PhiBlock,
+                                           uint32_t Num, GVNPass &Gvn) {
   auto FindRes = PhiTranslateTable.find({Num, Pred});
   if (FindRes != PhiTranslateTable.end())
     return FindRes->second;
@@ -1973,9 +1995,10 @@ uint32_t GVN::ValueTable::phiTranslate(const BasicBlock *Pred,
 
 // Return true if the value number \p Num and NewNum have equal value.
 // Return false if the result is unknown.
-bool GVN::ValueTable::areCallValsEqual(uint32_t Num, uint32_t NewNum,
-                                       const BasicBlock *Pred,
-                                       const BasicBlock *PhiBlock, GVN &Gvn) {
+bool GVNPass::ValueTable::areCallValsEqual(uint32_t Num, uint32_t NewNum,
+                                           const BasicBlock *Pred,
+                                           const BasicBlock *PhiBlock,
+                                           GVNPass &Gvn) {
   CallInst *Call = nullptr;
   LeaderTableEntry *Vals = &Gvn.LeaderTable[Num];
   while (Vals) {
@@ -2008,9 +2031,9 @@ bool GVN::ValueTable::areCallValsEqual(uint32_t Num, uint32_t NewNum,
 
 /// Translate value number \p Num using phis, so that it has the values of
 /// the phis in BB.
-uint32_t GVN::ValueTable::phiTranslateImpl(const BasicBlock *Pred,
-                                           const BasicBlock *PhiBlock,
-                                           uint32_t Num, GVN &Gvn) {
+uint32_t GVNPass::ValueTable::phiTranslateImpl(const BasicBlock *Pred,
+                                               const BasicBlock *PhiBlock,
+                                               uint32_t Num, GVNPass &Gvn) {
   if (PHINode *PN = NumberingPhi[Num]) {
     for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {
       if (PN->getParent() == PhiBlock && PN->getIncomingBlock(i) == Pred)
@@ -2063,8 +2086,8 @@ uint32_t GVN::ValueTable::phiTranslateImpl(const BasicBlock *Pred,
 
 /// Erase stale entry from phiTranslate cache so phiTranslate can be computed
 /// again.
-void GVN::ValueTable::eraseTranslateCacheEntry(uint32_t Num,
-                                               const BasicBlock &CurrBlock) {
+void GVNPass::ValueTable::eraseTranslateCacheEntry(
+    uint32_t Num, const BasicBlock &CurrBlock) {
   for (const BasicBlock *Pred : predecessors(&CurrBlock))
     PhiTranslateTable.erase({Num, Pred});
 }
@@ -2074,7 +2097,7 @@ void GVN::ValueTable::eraseTranslateCacheEntry(uint32_t Num,
 // and then scan the list to find one whose block dominates the block in
 // question.  This is fast because dominator tree queries consist of only
 // a few comparisons of DFS numbers.
-Value *GVN::findLeader(const BasicBlock *BB, uint32_t num) {
+Value *GVNPass::findLeader(const BasicBlock *BB, uint32_t num) {
   LeaderTableEntry Vals = LeaderTable[num];
   if (!Vals.Val) return nullptr;
 
@@ -2113,7 +2136,7 @@ static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E,
   return Pred != nullptr;
 }
 
-void GVN::assignBlockRPONumber(Function &F) {
+void GVNPass::assignBlockRPONumber(Function &F) {
   BlockRPONumber.clear();
   uint32_t NextBlockNumber = 1;
   ReversePostOrderTraversal<Function *> RPOT(&F);
@@ -2122,7 +2145,7 @@ void GVN::assignBlockRPONumber(Function &F) {
   InvalidBlockRPONumbers = false;
 }
 
-bool GVN::replaceOperandsForInBlockEquality(Instruction *Instr) const {
+bool GVNPass::replaceOperandsForInBlockEquality(Instruction *Instr) const {
   bool Changed = false;
   for (unsigned OpNum = 0; OpNum < Instr->getNumOperands(); ++OpNum) {
     Value *Operand = Instr->getOperand(OpNum);
@@ -2142,8 +2165,9 @@ bool GVN::replaceOperandsForInBlockEquality(Instruction *Instr) const {
 /// 'RHS' everywhere in the scope.  Returns whether a change was made.
 /// If DominatesByEdge is false, then it means that we will propagate the RHS
 /// value starting from the end of Root.Start.
-bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root,
-                            bool DominatesByEdge) {
+bool GVNPass::propagateEquality(Value *LHS, Value *RHS,
+                                const BasicBlockEdge &Root,
+                                bool DominatesByEdge) {
   SmallVector<std::pair<Value*, Value*>, 4> Worklist;
   Worklist.push_back(std::make_pair(LHS, RHS));
   bool Changed = false;
@@ -2291,7 +2315,7 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root,
 
 /// When calculating availability, handle an instruction
 /// by inserting it into the appropriate sets
-bool GVN::processInstruction(Instruction *I) {
+bool GVNPass::processInstruction(Instruction *I) {
   // Ignore dbg info intrinsics.
   if (isa<DbgInfoIntrinsic>(I))
     return false;
@@ -2432,10 +2456,10 @@ bool GVN::processInstruction(Instruction *I) {
 }
 
 /// runOnFunction - This is the main transformation entry point for a function.
-bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
-                  const TargetLibraryInfo &RunTLI, AAResults &RunAA,
-                  MemoryDependenceResults *RunMD, LoopInfo *LI,
-                  OptimizationRemarkEmitter *RunORE, MemorySSA *MSSA) {
+bool GVNPass::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
+                      const TargetLibraryInfo &RunTLI, AAResults &RunAA,
+                      MemoryDependenceResults *RunMD, LoopInfo *LI,
+                      OptimizationRemarkEmitter *RunORE, MemorySSA *MSSA) {
   AC = &RunAC;
   DT = &RunDT;
   VN.setDomTree(DT);
@@ -2457,10 +2481,8 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
   DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
   // Merge unconditional branches, allowing PRE to catch more
   // optimization opportunities.
-  for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) {
-    BasicBlock *BB = &*FI++;
-
-    bool removedBlock = MergeBlockIntoPredecessor(BB, &DTU, LI, MSSAU, MD);
+  for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
+    bool removedBlock = MergeBlockIntoPredecessor(&BB, &DTU, LI, MSSAU, MD);
     if (removedBlock)
       ++NumGVNBlocks;
 
@@ -2502,7 +2524,7 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
   return Changed;
 }
 
-bool GVN::processBlock(BasicBlock *BB) {
+bool GVNPass::processBlock(BasicBlock *BB) {
   // FIXME: Kill off InstrsToErase by doing erasing eagerly in a helper function
   // (and incrementing BI before processing an instruction).
   assert(InstrsToErase.empty() &&
@@ -2563,8 +2585,8 @@ bool GVN::processBlock(BasicBlock *BB) {
 }
 
 // Instantiate an expression in a predecessor that lacked it.
-bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
-                                    BasicBlock *Curr, unsigned int ValNo) {
+bool GVNPass::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
+                                        BasicBlock *Curr, unsigned int ValNo) {
   // Because we are going top-down through the block, all value numbers
   // will be available in the predecessor by the time we need them.  Any
   // that weren't originally present will have been instantiated earlier
@@ -2612,7 +2634,7 @@ bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
   return true;
 }
 
-bool GVN::performScalarPRE(Instruction *CurInst) {
+bool GVNPass::performScalarPRE(Instruction *CurInst) {
   if (isa<AllocaInst>(CurInst) || CurInst->isTerminator() ||
       isa<PHINode>(CurInst) || CurInst->getType()->isVoidTy() ||
       CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
@@ -2797,7 +2819,7 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
 
 /// Perform a purely local form of PRE that looks for diamond
 /// control flow patterns and attempts to perform simple PRE at the join point.
-bool GVN::performPRE(Function &F) {
+bool GVNPass::performPRE(Function &F) {
   bool Changed = false;
   for (BasicBlock *CurrentBlock : depth_first(&F.getEntryBlock())) {
     // Nothing to PRE in the entry block.
@@ -2824,7 +2846,7 @@ bool GVN::performPRE(Function &F) {
 
 /// Split the critical edge connecting the given two blocks, and return
 /// the block inserted to the critical edge.
-BasicBlock *GVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) {
+BasicBlock *GVNPass::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) {
   // GVN does not require loop-simplify, do not try to preserve it if it is not
   // possible.
   BasicBlock *BB = SplitCriticalEdge(
@@ -2840,7 +2862,7 @@ BasicBlock *GVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) {
 
 /// Split critical edges found during the previous
 /// iteration that may enable further optimization.
-bool GVN::splitCriticalEdges() {
+bool GVNPass::splitCriticalEdges() {
   if (toSplit.empty())
     return false;
 
@@ -2860,7 +2882,7 @@ bool GVN::splitCriticalEdges() {
 }
 
 /// Executes one iteration of GVN
-bool GVN::iterateOnFunction(Function &F) {
+bool GVNPass::iterateOnFunction(Function &F) {
   cleanupGlobalSets();
 
   // Top-down walk of the dominator tree
@@ -2876,7 +2898,7 @@ bool GVN::iterateOnFunction(Function &F) {
   return Changed;
 }
 
-void GVN::cleanupGlobalSets() {
+void GVNPass::cleanupGlobalSets() {
   VN.clear();
   LeaderTable.clear();
   BlockRPONumber.clear();
@@ -2887,7 +2909,7 @@ void GVN::cleanupGlobalSets() {
 
 /// Verify that the specified instruction does not occur in our
 /// internal data structures.
-void GVN::verifyRemoved(const Instruction *Inst) const {
+void GVNPass::verifyRemoved(const Instruction *Inst) const {
   VN.verifyRemoved(Inst);
 
   // Walk through the value number scope to make sure the instruction isn't
@@ -2907,7 +2929,7 @@ void GVN::verifyRemoved(const Instruction *Inst) const {
 /// function is to add all these blocks to "DeadBlocks". For the dead blocks'
 /// live successors, update their phi nodes by replacing the operands
 /// corresponding to dead blocks with UndefVal.
-void GVN::addDeadBlock(BasicBlock *BB) {
+void GVNPass::addDeadBlock(BasicBlock *BB) {
   SmallVector<BasicBlock *, 4> NewDead;
   SmallSetVector<BasicBlock *, 4> DF;
 
@@ -2995,7 +3017,7 @@ void GVN::addDeadBlock(BasicBlock *BB) {
 //     dead blocks with "UndefVal" in an hope these PHIs will optimized away.
 //
 // Return true iff *NEW* dead code are found.
-bool GVN::processFoldableCondBr(BranchInst *BI) {
+bool GVNPass::processFoldableCondBr(BranchInst *BI) {
   if (!BI || BI->isUnconditional())
     return false;
 
@@ -3023,7 +3045,7 @@ bool GVN::processFoldableCondBr(BranchInst *BI) {
 // associated val-num. As it normally has far more live instructions than dead
 // instructions, it makes more sense just to "fabricate" a val-number for the
 // dead code than checking if instruction involved is dead or not.
-void GVN::assignValNumForDeadCode() {
+void GVNPass::assignValNumForDeadCode() {
   for (BasicBlock *BB : DeadBlocks) {
     for (Instruction &Inst : *BB) {
       unsigned ValNum = VN.lookupOrAdd(&Inst);
@@ -3078,7 +3100,7 @@ public:
   }
 
 private:
-  GVN Impl;
+  GVNPass Impl;
 };
 
 char GVNLegacyPass::ID = 0;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNHoist.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNHoist.cpp
index 790d71992da4..fdc3afd9348a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNHoist.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNHoist.cpp
@@ -169,7 +169,7 @@ class InsnInfo {
 
 public:
   // Inserts I and its value number in VNtoScalars.
-  void insert(Instruction *I, GVN::ValueTable &VN) {
+  void insert(Instruction *I, GVNPass::ValueTable &VN) {
     // Scalar instruction.
     unsigned V = VN.lookupOrAdd(I);
     VNtoScalars[{V, InvalidVN}].push_back(I);
@@ -184,7 +184,7 @@ class LoadInfo {
 
 public:
   // Insert Load and the value number of its memory address in VNtoLoads.
-  void insert(LoadInst *Load, GVN::ValueTable &VN) {
+  void insert(LoadInst *Load, GVNPass::ValueTable &VN) {
     if (Load->isSimple()) {
       unsigned V = VN.lookupOrAdd(Load->getPointerOperand());
       VNtoLoads[{V, InvalidVN}].push_back(Load);
@@ -201,7 +201,7 @@ class StoreInfo {
 public:
   // Insert the Store and a hash number of the store address and the stored
   // value in VNtoStores.
-  void insert(StoreInst *Store, GVN::ValueTable &VN) {
+  void insert(StoreInst *Store, GVNPass::ValueTable &VN) {
     if (!Store->isSimple())
       return;
     // Hash the store address and the stored value.
@@ -221,7 +221,7 @@ class CallInfo {
 
 public:
   // Insert Call and its value numbering in one of the VNtoCalls* containers.
-  void insert(CallInst *Call, GVN::ValueTable &VN) {
+  void insert(CallInst *Call, GVNPass::ValueTable &VN) {
     // A call that doesNotAccessMemory is handled as a Scalar,
     // onlyReadsMemory will be handled as a Load instruction,
     // all other calls will be handled as stores.
@@ -274,7 +274,7 @@ public:
   unsigned int rank(const Value *V) const;
 
 private:
-  GVN::ValueTable VN;
+  GVNPass::ValueTable VN;
   DominatorTree *DT;
   PostDominatorTree *PDT;
   AliasAnalysis *AA;
@@ -377,12 +377,12 @@ private:
     if (!Root)
       return;
     // Depth first walk on PDom tree to fill the CHIargs at each PDF.
-    RenameStackType RenameStack;
     for (auto Node : depth_first(Root)) {
       BasicBlock *BB = Node->getBlock();
       if (!BB)
         continue;
 
+      RenameStackType RenameStack;
       // Collect all values in BB and push to stack.
       fillRenameStack(BB, ValueBBs, RenameStack);
 
@@ -827,6 +827,8 @@ void GVNHoist::fillRenameStack(BasicBlock *BB, InValuesType &ValueBBs,
   auto it1 = ValueBBs.find(BB);
   if (it1 != ValueBBs.end()) {
     // Iterate in reverse order to keep lower ranked values on the top.
+    LLVM_DEBUG(dbgs() << "\nVisiting: " << BB->getName()
+                      << " for pushing instructions on stack";);
     for (std::pair<VNType, Instruction *> &VI : reverse(it1->second)) {
       // Get the value of instruction I
       LLVM_DEBUG(dbgs() << "\nPushing on stack: " << *VI.second);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GuardWidening.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GuardWidening.cpp
index 61eb4ce0ed46..82b81003ef21 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GuardWidening.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GuardWidening.cpp
@@ -46,6 +46,7 @@
 #include "llvm/Analysis/GuardUtils.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
 #include "llvm/Analysis/PostDominators.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/ConstantRange.h"
@@ -105,8 +106,10 @@ static void setCondition(Instruction *I, Value *NewCond) {
 }
 
 // Eliminates the guard instruction properly.
-static void eliminateGuard(Instruction *GuardInst) {
+static void eliminateGuard(Instruction *GuardInst, MemorySSAUpdater *MSSAU) {
   GuardInst->eraseFromParent();
+  if (MSSAU)
+    MSSAU->removeMemoryAccess(GuardInst);
   ++GuardsEliminated;
 }
 
@@ -114,6 +117,7 @@ class GuardWideningImpl {
   DominatorTree &DT;
   PostDominatorTree *PDT;
   LoopInfo &LI;
+  MemorySSAUpdater *MSSAU;
 
   /// Together, these describe the region of interest.  This might be all of
   /// the blocks within a function, or only a given loop's blocks and preheader.
@@ -269,12 +273,12 @@ class GuardWideningImpl {
   }
 
 public:
-
   explicit GuardWideningImpl(DominatorTree &DT, PostDominatorTree *PDT,
-                             LoopInfo &LI, DomTreeNode *Root,
+                             LoopInfo &LI, MemorySSAUpdater *MSSAU,
+                             DomTreeNode *Root,
                              std::function<bool(BasicBlock*)> BlockFilter)
-    : DT(DT), PDT(PDT), LI(LI), Root(Root), BlockFilter(BlockFilter)
-        {}
+      : DT(DT), PDT(PDT), LI(LI), MSSAU(MSSAU), Root(Root),
+        BlockFilter(BlockFilter) {}
 
   /// The entry point for this pass.
   bool run();
@@ -313,7 +317,7 @@ bool GuardWideningImpl::run() {
     if (!WidenedGuards.count(I)) {
       assert(isa<ConstantInt>(getCondition(I)) && "Should be!");
       if (isSupportedGuardInstruction(I))
-        eliminateGuard(I);
+        eliminateGuard(I, MSSAU);
       else {
         assert(isa<BranchInst>(I) &&
                "Eliminated something other than guard or branch?");
@@ -514,27 +518,20 @@ bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1,
       ConstantRange CR1 =
           ConstantRange::makeExactICmpRegion(Pred1, RHS1->getValue());
 
-      // SubsetIntersect is a subset of the actual mathematical intersection of
-      // CR0 and CR1, while SupersetIntersect is a superset of the actual
-      // mathematical intersection.  If these two ConstantRanges are equal, then
-      // we know we were able to represent the actual mathematical intersection
-      // of CR0 and CR1, and can use the same to generate an icmp instruction.
-      //
       // Given what we're doing here and the semantics of guards, it would
-      // actually be correct to just use SubsetIntersect, but that may be too
+      // be correct to use a subset intersection, but that may be too
       // aggressive in cases we care about.
-      auto SubsetIntersect = CR0.inverse().unionWith(CR1.inverse()).inverse();
-      auto SupersetIntersect = CR0.intersectWith(CR1);
-
-      APInt NewRHSAP;
-      CmpInst::Predicate Pred;
-      if (SubsetIntersect == SupersetIntersect &&
-          SubsetIntersect.getEquivalentICmp(Pred, NewRHSAP)) {
-        if (InsertPt) {
-          ConstantInt *NewRHS = ConstantInt::get(Cond0->getContext(), NewRHSAP);
-          Result = new ICmpInst(InsertPt, Pred, LHS, NewRHS, "wide.chk");
+      if (Optional<ConstantRange> Intersect = CR0.exactIntersectWith(CR1)) {
+        APInt NewRHSAP;
+        CmpInst::Predicate Pred;
+        if (Intersect->getEquivalentICmp(Pred, NewRHSAP)) {
+          if (InsertPt) {
+            ConstantInt *NewRHS =
+                ConstantInt::get(Cond0->getContext(), NewRHSAP);
+            Result = new ICmpInst(InsertPt, Pred, LHS, NewRHS, "wide.chk");
+          }
+          return true;
         }
-        return true;
       }
     }
   }
@@ -766,12 +763,18 @@ PreservedAnalyses GuardWideningPass::run(Function &F,
   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
   auto &LI = AM.getResult<LoopAnalysis>(F);
   auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
-  if (!GuardWideningImpl(DT, &PDT, LI, DT.getRootNode(),
-                         [](BasicBlock*) { return true; } ).run())
+  auto *MSSAA = AM.getCachedResult<MemorySSAAnalysis>(F);
+  std::unique_ptr<MemorySSAUpdater> MSSAU;
+  if (MSSAA)
+    MSSAU = std::make_unique<MemorySSAUpdater>(&MSSAA->getMSSA());
+  if (!GuardWideningImpl(DT, &PDT, LI, MSSAU ? MSSAU.get() : nullptr,
+                         DT.getRootNode(), [](BasicBlock *) { return true; })
+           .run())
     return PreservedAnalyses::all();
 
   PreservedAnalyses PA;
   PA.preserveSet<CFGAnalyses>();
+  PA.preserve<MemorySSAAnalysis>();
   return PA;
 }
 
@@ -784,11 +787,17 @@ PreservedAnalyses GuardWideningPass::run(Loop &L, LoopAnalysisManager &AM,
   auto BlockFilter = [&](BasicBlock *BB) {
     return BB == RootBB || L.contains(BB);
   };
-  if (!GuardWideningImpl(AR.DT, nullptr, AR.LI, AR.DT.getNode(RootBB),
-                         BlockFilter).run())
+  std::unique_ptr<MemorySSAUpdater> MSSAU;
+  if (AR.MSSA)
+    MSSAU = std::make_unique<MemorySSAUpdater>(AR.MSSA);
+  if (!GuardWideningImpl(AR.DT, nullptr, AR.LI, MSSAU ? MSSAU.get() : nullptr,
+                         AR.DT.getNode(RootBB), BlockFilter).run())
     return PreservedAnalyses::all();
 
-  return getLoopPassPreservedAnalyses();
+  auto PA = getLoopPassPreservedAnalyses();
+  if (AR.MSSA)
+    PA.preserve<MemorySSAAnalysis>();
+  return PA;
 }
 
 namespace {
@@ -805,8 +814,14 @@ struct GuardWideningLegacyPass : public FunctionPass {
     auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
     auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
     auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
-    return GuardWideningImpl(DT, &PDT, LI, DT.getRootNode(),
-                         [](BasicBlock*) { return true; } ).run();
+    auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>();
+    std::unique_ptr<MemorySSAUpdater> MSSAU;
+    if (MSSAWP)
+      MSSAU = std::make_unique<MemorySSAUpdater>(&MSSAWP->getMSSA());
+    return GuardWideningImpl(DT, &PDT, LI, MSSAU ? MSSAU.get() : nullptr,
+                             DT.getRootNode(),
+                             [](BasicBlock *) { return true; })
+        .run();
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -814,6 +829,7 @@ struct GuardWideningLegacyPass : public FunctionPass {
     AU.addRequired<DominatorTreeWrapperPass>();
     AU.addRequired<PostDominatorTreeWrapperPass>();
     AU.addRequired<LoopInfoWrapperPass>();
+    AU.addPreserved<MemorySSAWrapperPass>();
   }
 };
 
@@ -833,13 +849,18 @@ struct LoopGuardWideningLegacyPass : public LoopPass {
     auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
     auto *PDTWP = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>();
     auto *PDT = PDTWP ? &PDTWP->getPostDomTree() : nullptr;
+    auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>();
+    std::unique_ptr<MemorySSAUpdater> MSSAU;
+    if (MSSAWP)
+      MSSAU = std::make_unique<MemorySSAUpdater>(&MSSAWP->getMSSA());
+
     BasicBlock *RootBB = L->getLoopPredecessor();
     if (!RootBB)
       RootBB = L->getHeader();
     auto BlockFilter = [&](BasicBlock *BB) {
       return BB == RootBB || L->contains(BB);
     };
-    return GuardWideningImpl(DT, PDT, LI,
+    return GuardWideningImpl(DT, PDT, LI, MSSAU ? MSSAU.get() : nullptr,
                              DT.getNode(RootBB), BlockFilter).run();
   }
 
@@ -847,6 +868,7 @@ struct LoopGuardWideningLegacyPass : public LoopPass {
     AU.setPreservesCFG();
     getLoopAnalysisUsage(AU);
     AU.addPreserved<PostDominatorTreeWrapperPass>();
+    AU.addPreserved<MemorySSAWrapperPass>();
   }
 };
 }
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 9ee2a2d0bf08..ae2fe2767074 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -89,6 +89,7 @@
 #include <utility>
 
 using namespace llvm;
+using namespace PatternMatch;
 
 #define DEBUG_TYPE "indvars"
 
@@ -155,6 +156,10 @@ class IndVarSimplify {
   bool rewriteNonIntegerIVs(Loop *L);
 
   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
+  /// Try to improve our exit conditions by converting condition from signed
+  /// to unsigned or rotating computation out of the loop.
+  /// (See inline comment about why this is duplicated from simplifyAndExtend)
+  bool canonicalizeExitCondition(Loop *L);
   /// Try to eliminate loop exits based on analyzeable exit counts
   bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
   /// Try to form loop invariant tests for loop exits by changing how many
@@ -494,6 +499,7 @@ bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
           MadeAnyChanges = true;
           PN.setIncomingValue(IncomingValIdx,
                               ExitVal->getIncomingValue(PreheaderIdx));
+          SE->forgetValue(&PN);
         }
       }
     }
@@ -541,18 +547,18 @@ static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
     return;
   }
 
-  if (!WI.WidestNativeType) {
+  if (!WI.WidestNativeType ||
+      Width > SE->getTypeSizeInBits(WI.WidestNativeType)) {
     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
     WI.IsSigned = IsSigned;
     return;
   }
 
-  // We extend the IV to satisfy the sign of its first user, arbitrarily.
-  if (WI.IsSigned != IsSigned)
-    return;
-
-  if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
-    WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
+  // We extend the IV to satisfy the sign of its user(s), or 'signed'
+  // if there are multiple users with both sign- and zero extensions,
+  // in order not to introduce nondeterministic behaviour based on the
+  // unspecified order of a PHI nodes' users-iterator.
+  WI.IsSigned |= IsSigned;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1274,9 +1280,9 @@ bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
       // Skip debug info intrinsics.
       do {
         --I;
-      } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
+      } while (I->isDebugOrPseudoInst() && I != Preheader->begin());
 
-      if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
+      if (I->isDebugOrPseudoInst() && I == Preheader->begin())
         Done = true;
     } else {
       Done = true;
@@ -1309,6 +1315,18 @@ static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
   replaceExitCond(BI, NewCond, DeadInsts);
 }
 
+static void replaceLoopPHINodesWithPreheaderValues(
+    Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
+  assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
+  auto *LoopPreheader = L->getLoopPreheader();
+  auto *LoopHeader = L->getHeader();
+  for (auto &PN : LoopHeader->phis()) {
+    auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader);
+    PN.replaceAllUsesWith(PreheaderIncoming);
+    DeadInsts.emplace_back(&PN);
+  }
+}
+
 static void replaceWithInvariantCond(
     const Loop *L, BasicBlock *ExitingBB, ICmpInst::Predicate InvariantPred,
     const SCEV *InvariantLHS, const SCEV *InvariantRHS, SCEVExpander &Rewriter,
@@ -1333,7 +1351,6 @@ static bool optimizeLoopExitWithUnknownExitCount(
     SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
   ICmpInst::Predicate Pred;
   Value *LHS, *RHS;
-  using namespace PatternMatch;
   BasicBlock *TrueSucc, *FalseSucc;
   if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
                       m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc))))
@@ -1394,6 +1411,140 @@ static bool optimizeLoopExitWithUnknownExitCount(
   return true;
 }
 
+bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
+  // Note: This is duplicating a particular part on SimplifyIndVars reasoning.
+  // We need to duplicate it because given icmp zext(small-iv), C, IVUsers
+  // never reaches the icmp since the zext doesn't fold to an AddRec unless
+  // it already has flags.  The alternative to this would be to extending the
+  // set of "interesting" IV users to include the icmp, but doing that
+  // regresses results in practice by querying SCEVs before trip counts which
+  // rely on them which results in SCEV caching sub-optimal answers.  The
+  // concern about caching sub-optimal results is why we only query SCEVs of
+  // the loop invariant RHS here.
+  SmallVector<BasicBlock*, 16> ExitingBlocks;
+  L->getExitingBlocks(ExitingBlocks);
+  bool Changed = false;
+  for (auto *ExitingBB : ExitingBlocks) {
+    auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
+    if (!BI)
+      continue;
+    assert(BI->isConditional() && "exit branch must be conditional");
+
+    auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
+    if (!ICmp || !ICmp->hasOneUse())
+      continue;
+
+    auto *LHS = ICmp->getOperand(0);
+    auto *RHS = ICmp->getOperand(1);
+    // For the range reasoning, avoid computing SCEVs in the loop to avoid
+    // poisoning cache with sub-optimal results.  For the must-execute case,
+    // this is a neccessary precondition for correctness.
+    if (!L->isLoopInvariant(RHS)) {
+      if (!L->isLoopInvariant(LHS))
+        continue;
+      // Same logic applies for the inverse case
+      std::swap(LHS, RHS);
+    }
+
+    // Match (icmp signed-cond zext, RHS)
+    Value *LHSOp = nullptr;
+    if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned())
+      continue;
+
+    const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
+    const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
+    const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
+    auto FullCR = ConstantRange::getFull(InnerBitWidth);
+    FullCR = FullCR.zeroExtend(OuterBitWidth);
+    auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
+    if (FullCR.contains(RHSCR)) {
+      // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
+      // replace the signed condition with the unsigned version.
+      ICmp->setPredicate(ICmp->getUnsignedPredicate());
+      Changed = true;
+      // Note: No SCEV invalidation needed.  We've changed the predicate, but
+      // have not changed exit counts, or the values produced by the compare.
+      continue;
+    }
+  }
+
+  // Now that we've canonicalized the condition to match the extend,
+  // see if we can rotate the extend out of the loop.
+  for (auto *ExitingBB : ExitingBlocks) {
+    auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
+    if (!BI)
+      continue;
+    assert(BI->isConditional() && "exit branch must be conditional");
+
+    auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
+    if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned())
+      continue;
+
+    bool Swapped = false;
+    auto *LHS = ICmp->getOperand(0);
+    auto *RHS = ICmp->getOperand(1);
+    if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS))
+      // Nothing to rotate
+      continue;
+    if (L->isLoopInvariant(LHS)) {
+      // Same logic applies for the inverse case until we actually pick
+      // which operand of the compare to update.
+      Swapped = true;
+      std::swap(LHS, RHS);
+    }
+    assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS));
+
+    // Match (icmp unsigned-cond zext, RHS)
+    // TODO: Extend to handle corresponding sext/signed-cmp case
+    // TODO: Extend to other invertible functions
+    Value *LHSOp = nullptr;
+    if (!match(LHS, m_ZExt(m_Value(LHSOp))))
+      continue;
+
+    // In general, we only rotate if we can do so without increasing the number
+    // of instructions.  The exception is when we have an zext(add-rec).  The
+    // reason for allowing this exception is that we know we need to get rid
+    // of the zext for SCEV to be able to compute a trip count for said loops;
+    // we consider the new trip count valuable enough to increase instruction
+    // count by one.
+    if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp)))
+      continue;
+
+    // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS
+    // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS)
+    // when zext is loop varying and RHS is loop invariant.  This converts
+    // loop varying work to loop-invariant work.
+    auto doRotateTransform = [&]() {
+      assert(ICmp->isUnsigned() && "must have proven unsigned already");
+      auto *NewRHS =
+        CastInst::Create(Instruction::Trunc, RHS, LHSOp->getType(), "",
+                         L->getLoopPreheader()->getTerminator());
+      ICmp->setOperand(Swapped ? 1 : 0, LHSOp);
+      ICmp->setOperand(Swapped ? 0 : 1, NewRHS);
+      if (LHS->use_empty())
+        DeadInsts.push_back(LHS);
+    };
+
+
+    const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
+    const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
+    const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
+    auto FullCR = ConstantRange::getFull(InnerBitWidth);
+    FullCR = FullCR.zeroExtend(OuterBitWidth);
+    auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
+    if (FullCR.contains(RHSCR)) {
+      doRotateTransform();
+      Changed = true;
+      // Note, we are leaving SCEV in an unfortunately imprecise case here
+      // as rotation tends to reveal information about trip counts not
+      // previously visible.
+      continue;
+    }
+  }
+
+  return Changed;
+}
+
 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
   SmallVector<BasicBlock*, 16> ExitingBlocks;
   L->getExitingBlocks(ExitingBlocks);
@@ -1499,20 +1650,18 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
     // If we know we'd exit on the first iteration, rewrite the exit to
     // reflect this.  This does not imply the loop must exit through this
     // exit; there may be an earlier one taken on the first iteration.
-    // TODO: Given we know the backedge can't be taken, we should go ahead
-    // and break it.  Or at least, kill all the header phis and simplify.
+    // We know that the backedge can't be taken, so we replace all
+    // the header PHIs with values coming from the preheader.
     if (ExitCount->isZero()) {
       foldExit(L, ExitingBB, true, DeadInsts);
+      replaceLoopPHINodesWithPreheaderValues(L, DeadInsts);
       Changed = true;
       continue;
     }
 
-    // If we end up with a pointer exit count, bail.  Note that we can end up
-    // with a pointer exit count for one exiting block, and not for another in
-    // the same loop.
-    if (!ExitCount->getType()->isIntegerTy() ||
-        !MaxExitCount->getType()->isIntegerTy())
-      continue;
+    assert(ExitCount->getType()->isIntegerTy() &&
+           MaxExitCount->getType()->isIntegerTy() &&
+           "Exit counts must be integers");
 
     Type *WiderType =
       SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
@@ -1569,14 +1718,11 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
   // through *explicit* control flow.  We have to eliminate the possibility of
   // implicit exits (see below) before we know it's truly exact.
   const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
-  if (isa<SCEVCouldNotCompute>(ExactBTC) ||
-      !SE->isLoopInvariant(ExactBTC, L) ||
-      !isSafeToExpand(ExactBTC, *SE))
+  if (isa<SCEVCouldNotCompute>(ExactBTC) || !isSafeToExpand(ExactBTC, *SE))
     return false;
 
-  // If we end up with a pointer exit count, bail.  It may be unsized.
-  if (!ExactBTC->getType()->isIntegerTy())
-    return false;
+  assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
+  assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
 
   auto BadExit = [&](BasicBlock *ExitingBB) {
     // If our exiting block exits multiple loops, we can only rewrite the
@@ -1603,15 +1749,12 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
       return true;
 
     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
-    if (isa<SCEVCouldNotCompute>(ExitCount) ||
-        !SE->isLoopInvariant(ExitCount, L) ||
-        !isSafeToExpand(ExitCount, *SE))
-      return true;
-
-    // If we end up with a pointer exit count, bail.  It may be unsized.
-    if (!ExitCount->getType()->isIntegerTy())
+    if (isa<SCEVCouldNotCompute>(ExitCount) || !isSafeToExpand(ExitCount, *SE))
       return true;
 
+    assert(SE->isLoopInvariant(ExitCount, L) &&
+           "Exit count must be loop invariant");
+    assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
     return false;
   };
 
@@ -1781,7 +1924,11 @@ bool IndVarSimplify::run(Loop *L) {
   }
 
   // Eliminate redundant IV cycles.
-  NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
+  NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI);
+
+  // Try to convert exit conditions to unsigned and rotate computation
+  // out of the loop.  Note: Handles invalidation internally if needed.
+  Changed |= canonicalizeExitCondition(L);
 
   // Try to eliminate loop exits based on analyzeable exit counts
   if (optimizeLoopExits(L, Rewriter))  {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index f7d631f5e785..883d4afff3bd 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -96,10 +96,13 @@
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstIterator.h"
@@ -115,6 +118,7 @@
 #include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
 #include "llvm/IR/ValueHandle.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
@@ -146,6 +150,14 @@ static const unsigned UninitializedAddressSpace =
 namespace {
 
 using ValueToAddrSpaceMapTy = DenseMap<const Value *, unsigned>;
+// Different from ValueToAddrSpaceMapTy, where a new addrspace is inferred on
+// the *def* of a value, PredicatedAddrSpaceMapTy is map where a new
+// addrspace is inferred on the *use* of a pointer. This map is introduced to
+// infer addrspace from the addrspace predicate assumption built from assume
+// intrinsic. In that scenario, only specific uses (under valid assumption
+// context) could be inferred with a new addrspace.
+using PredicatedAddrSpaceMapTy =
+    DenseMap<std::pair<const Value *, const Value *>, unsigned>;
 using PostorderStackTy = llvm::SmallVector<PointerIntPair<Value *, 1, bool>, 4>;
 
 class InferAddressSpaces : public FunctionPass {
@@ -160,6 +172,8 @@ public:
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
+    AU.addPreserved<DominatorTreeWrapperPass>();
+    AU.addRequired<AssumptionCacheTracker>();
     AU.addRequired<TargetTransformInfoWrapperPass>();
   }
 
@@ -167,6 +181,8 @@ public:
 };
 
 class InferAddressSpacesImpl {
+  AssumptionCache &AC;
+  DominatorTree *DT = nullptr;
   const TargetTransformInfo *TTI = nullptr;
   const DataLayout *DL = nullptr;
 
@@ -174,21 +190,24 @@ class InferAddressSpacesImpl {
   /// possible.
   unsigned FlatAddrSpace = 0;
 
-  // Returns the new address space of V if updated; otherwise, returns None.
-  Optional<unsigned>
-  updateAddressSpace(const Value &V,
-                     const ValueToAddrSpaceMapTy &InferredAddrSpace) const;
+  // Try to update the address space of V. If V is updated, returns true and
+  // false otherwise.
+  bool updateAddressSpace(const Value &V,
+                          ValueToAddrSpaceMapTy &InferredAddrSpace,
+                          PredicatedAddrSpaceMapTy &PredicatedAS) const;
 
   // Tries to infer the specific address space of each address expression in
   // Postorder.
   void inferAddressSpaces(ArrayRef<WeakTrackingVH> Postorder,
-                          ValueToAddrSpaceMapTy *InferredAddrSpace) const;
+                          ValueToAddrSpaceMapTy &InferredAddrSpace,
+                          PredicatedAddrSpaceMapTy &PredicatedAS) const;
 
   bool isSafeToCastConstAddrSpace(Constant *C, unsigned NewAS) const;
 
   Value *cloneInstructionWithNewAddressSpace(
       Instruction *I, unsigned NewAddrSpace,
       const ValueToValueMapTy &ValueWithNewAddrSpace,
+      const PredicatedAddrSpaceMapTy &PredicatedAS,
       SmallVectorImpl<const Use *> *UndefUsesToFix) const;
 
   // Changes the flat address expressions in function F to point to specific
@@ -196,7 +215,8 @@ class InferAddressSpacesImpl {
   // all flat expressions in the use-def graph of function F.
   bool rewriteWithNewAddressSpaces(
       const TargetTransformInfo &TTI, ArrayRef<WeakTrackingVH> Postorder,
-      const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const;
+      const ValueToAddrSpaceMapTy &InferredAddrSpace,
+      const PredicatedAddrSpaceMapTy &PredicatedAS, Function *F) const;
 
   void appendsFlatAddressExpressionToPostorderStack(
       Value *V, PostorderStackTy &PostorderStack,
@@ -211,14 +231,18 @@ class InferAddressSpacesImpl {
   std::vector<WeakTrackingVH> collectFlatAddressExpressions(Function &F) const;
 
   Value *cloneValueWithNewAddressSpace(
-    Value *V, unsigned NewAddrSpace,
-    const ValueToValueMapTy &ValueWithNewAddrSpace,
-    SmallVectorImpl<const Use *> *UndefUsesToFix) const;
+      Value *V, unsigned NewAddrSpace,
+      const ValueToValueMapTy &ValueWithNewAddrSpace,
+      const PredicatedAddrSpaceMapTy &PredicatedAS,
+      SmallVectorImpl<const Use *> *UndefUsesToFix) const;
   unsigned joinAddressSpaces(unsigned AS1, unsigned AS2) const;
 
+  unsigned getPredicatedAddrSpace(const Value &V, Value *Opnd) const;
+
 public:
-  InferAddressSpacesImpl(const TargetTransformInfo *TTI, unsigned FlatAddrSpace)
-      : TTI(TTI), FlatAddrSpace(FlatAddrSpace) {}
+  InferAddressSpacesImpl(AssumptionCache &AC, DominatorTree *DT,
+                         const TargetTransformInfo *TTI, unsigned FlatAddrSpace)
+      : AC(AC), DT(DT), TTI(TTI), FlatAddrSpace(FlatAddrSpace) {}
   bool run(Function &F);
 };
 
@@ -232,8 +256,12 @@ void initializeInferAddressSpacesPass(PassRegistry &);
 
 } // end namespace llvm
 
-INITIALIZE_PASS(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces",
-                false, false)
+INITIALIZE_PASS_BEGIN(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_END(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces",
+                    false, false)
 
 // Check whether that's no-op pointer bicast using a pair of
 // `ptrtoint`/`inttoptr` due to the missing no-op pointer bitcast over
@@ -505,6 +533,7 @@ InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const {
 static Value *operandWithNewAddressSpaceOrCreateUndef(
     const Use &OperandUse, unsigned NewAddrSpace,
     const ValueToValueMapTy &ValueWithNewAddrSpace,
+    const PredicatedAddrSpaceMapTy &PredicatedAS,
     SmallVectorImpl<const Use *> *UndefUsesToFix) {
   Value *Operand = OperandUse.get();
 
@@ -517,6 +546,18 @@ static Value *operandWithNewAddressSpaceOrCreateUndef(
   if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Operand))
     return NewOperand;
 
+  Instruction *Inst = cast<Instruction>(OperandUse.getUser());
+  auto I = PredicatedAS.find(std::make_pair(Inst, Operand));
+  if (I != PredicatedAS.end()) {
+    // Insert an addrspacecast on that operand before the user.
+    unsigned NewAS = I->second;
+    Type *NewPtrTy = PointerType::getWithSamePointeeType(
+        cast<PointerType>(Operand->getType()), NewAS);
+    auto *NewI = new AddrSpaceCastInst(Operand, NewPtrTy);
+    NewI->insertBefore(Inst);
+    return NewI;
+  }
+
   UndefUsesToFix->push_back(&OperandUse);
   return UndefValue::get(NewPtrTy);
 }
@@ -536,6 +577,7 @@ static Value *operandWithNewAddressSpaceOrCreateUndef(
 Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace(
     Instruction *I, unsigned NewAddrSpace,
     const ValueToValueMapTy &ValueWithNewAddrSpace,
+    const PredicatedAddrSpaceMapTy &PredicatedAS,
     SmallVectorImpl<const Use *> *UndefUsesToFix) const {
   Type *NewPtrType = PointerType::getWithSamePointeeType(
       cast<PointerType>(I->getType()), NewAddrSpace);
@@ -557,7 +599,7 @@ Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace(
     assert(II->getIntrinsicID() == Intrinsic::ptrmask);
     Value *NewPtr = operandWithNewAddressSpaceOrCreateUndef(
         II->getArgOperandUse(0), NewAddrSpace, ValueWithNewAddrSpace,
-        UndefUsesToFix);
+        PredicatedAS, UndefUsesToFix);
     Value *Rewrite =
         TTI->rewriteIntrinsicWithAddressSpace(II, II->getArgOperand(0), NewPtr);
     if (Rewrite) {
@@ -586,7 +628,8 @@ Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace(
       NewPointerOperands.push_back(nullptr);
     else
       NewPointerOperands.push_back(operandWithNewAddressSpaceOrCreateUndef(
-                                     OperandUse, NewAddrSpace, ValueWithNewAddrSpace, UndefUsesToFix));
+          OperandUse, NewAddrSpace, ValueWithNewAddrSpace, PredicatedAS,
+          UndefUsesToFix));
   }
 
   switch (I->getOpcode()) {
@@ -708,9 +751,8 @@ static Value *cloneConstantExprWithNewAddressSpace(
   if (CE->getOpcode() == Instruction::GetElementPtr) {
     // Needs to specify the source type while constructing a getelementptr
     // constant expression.
-    return CE->getWithOperands(
-      NewOperands, TargetType, /*OnlyIfReduced=*/false,
-      NewOperands[0]->getType()->getPointerElementType());
+    return CE->getWithOperands(NewOperands, TargetType, /*OnlyIfReduced=*/false,
+                               cast<GEPOperator>(CE)->getSourceElementType());
   }
 
   return CE->getWithOperands(NewOperands, TargetType);
@@ -724,6 +766,7 @@ static Value *cloneConstantExprWithNewAddressSpace(
 Value *InferAddressSpacesImpl::cloneValueWithNewAddressSpace(
     Value *V, unsigned NewAddrSpace,
     const ValueToValueMapTy &ValueWithNewAddrSpace,
+    const PredicatedAddrSpaceMapTy &PredicatedAS,
     SmallVectorImpl<const Use *> *UndefUsesToFix) const {
   // All values in Postorder are flat address expressions.
   assert(V->getType()->getPointerAddressSpace() == FlatAddrSpace &&
@@ -731,7 +774,7 @@ Value *InferAddressSpacesImpl::cloneValueWithNewAddressSpace(
 
   if (Instruction *I = dyn_cast<Instruction>(V)) {
     Value *NewV = cloneInstructionWithNewAddressSpace(
-      I, NewAddrSpace, ValueWithNewAddrSpace, UndefUsesToFix);
+        I, NewAddrSpace, ValueWithNewAddrSpace, PredicatedAS, UndefUsesToFix);
     if (Instruction *NewI = dyn_cast_or_null<Instruction>(NewV)) {
       if (NewI->getParent() == nullptr) {
         NewI->insertBefore(I);
@@ -779,46 +822,43 @@ bool InferAddressSpacesImpl::run(Function &F) {
   // Runs a data-flow analysis to refine the address spaces of every expression
   // in Postorder.
   ValueToAddrSpaceMapTy InferredAddrSpace;
-  inferAddressSpaces(Postorder, &InferredAddrSpace);
+  PredicatedAddrSpaceMapTy PredicatedAS;
+  inferAddressSpaces(Postorder, InferredAddrSpace, PredicatedAS);
 
   // Changes the address spaces of the flat address expressions who are inferred
   // to point to a specific address space.
-  return rewriteWithNewAddressSpaces(*TTI, Postorder, InferredAddrSpace, &F);
+  return rewriteWithNewAddressSpaces(*TTI, Postorder, InferredAddrSpace,
+                                     PredicatedAS, &F);
 }
 
 // Constants need to be tracked through RAUW to handle cases with nested
 // constant expressions, so wrap values in WeakTrackingVH.
 void InferAddressSpacesImpl::inferAddressSpaces(
     ArrayRef<WeakTrackingVH> Postorder,
-    ValueToAddrSpaceMapTy *InferredAddrSpace) const {
+    ValueToAddrSpaceMapTy &InferredAddrSpace,
+    PredicatedAddrSpaceMapTy &PredicatedAS) const {
   SetVector<Value *> Worklist(Postorder.begin(), Postorder.end());
   // Initially, all expressions are in the uninitialized address space.
   for (Value *V : Postorder)
-    (*InferredAddrSpace)[V] = UninitializedAddressSpace;
+    InferredAddrSpace[V] = UninitializedAddressSpace;
 
   while (!Worklist.empty()) {
     Value *V = Worklist.pop_back_val();
 
-    // Tries to update the address space of the stack top according to the
+    // Try to update the address space of the stack top according to the
     // address spaces of its operands.
-    LLVM_DEBUG(dbgs() << "Updating the address space of\n  " << *V << '\n');
-    Optional<unsigned> NewAS = updateAddressSpace(*V, *InferredAddrSpace);
-    if (!NewAS.hasValue())
+    if (!updateAddressSpace(*V, InferredAddrSpace, PredicatedAS))
       continue;
-    // If any updates are made, grabs its users to the worklist because
-    // their address spaces can also be possibly updated.
-    LLVM_DEBUG(dbgs() << "  to " << NewAS.getValue() << '\n');
-    (*InferredAddrSpace)[V] = NewAS.getValue();
 
     for (Value *User : V->users()) {
       // Skip if User is already in the worklist.
       if (Worklist.count(User))
         continue;
 
-      auto Pos = InferredAddrSpace->find(User);
+      auto Pos = InferredAddrSpace.find(User);
       // Our algorithm only updates the address spaces of flat address
       // expressions, which are those in InferredAddrSpace.
-      if (Pos == InferredAddrSpace->end())
+      if (Pos == InferredAddrSpace.end())
         continue;
 
       // Function updateAddressSpace moves the address space down a lattice
@@ -832,10 +872,37 @@ void InferAddressSpacesImpl::inferAddressSpaces(
   }
 }
 
-Optional<unsigned> InferAddressSpacesImpl::updateAddressSpace(
-    const Value &V, const ValueToAddrSpaceMapTy &InferredAddrSpace) const {
+unsigned InferAddressSpacesImpl::getPredicatedAddrSpace(const Value &V,
+                                                        Value *Opnd) const {
+  const Instruction *I = dyn_cast<Instruction>(&V);
+  if (!I)
+    return UninitializedAddressSpace;
+
+  Opnd = Opnd->stripInBoundsOffsets();
+  for (auto &AssumeVH : AC.assumptionsFor(Opnd)) {
+    if (!AssumeVH)
+      continue;
+    CallInst *CI = cast<CallInst>(AssumeVH);
+    if (!isValidAssumeForContext(CI, I, DT))
+      continue;
+
+    const Value *Ptr;
+    unsigned AS;
+    std::tie(Ptr, AS) = TTI->getPredicatedAddrSpace(CI->getArgOperand(0));
+    if (Ptr)
+      return AS;
+  }
+
+  return UninitializedAddressSpace;
+}
+
+bool InferAddressSpacesImpl::updateAddressSpace(
+    const Value &V, ValueToAddrSpaceMapTy &InferredAddrSpace,
+    PredicatedAddrSpaceMapTy &PredicatedAS) const {
   assert(InferredAddrSpace.count(&V));
 
+  LLVM_DEBUG(dbgs() << "Updating the address space of\n  " << V << '\n');
+
   // The new inferred address space equals the join of the address spaces
   // of all its pointer operands.
   unsigned NewAS = UninitializedAddressSpace;
@@ -861,7 +928,7 @@ Optional<unsigned> InferAddressSpacesImpl::updateAddressSpace(
     // address space is known.
     if ((C1 && Src0AS == UninitializedAddressSpace) ||
         (C0 && Src1AS == UninitializedAddressSpace))
-      return None;
+      return false;
 
     if (C0 && isSafeToCastConstAddrSpace(C0, Src1AS))
       NewAS = Src1AS;
@@ -878,10 +945,23 @@ Optional<unsigned> InferAddressSpacesImpl::updateAddressSpace(
       // Otherwise, infer the address space from its pointer operands.
       for (Value *PtrOperand : getPointerOperands(V, *DL, TTI)) {
         auto I = InferredAddrSpace.find(PtrOperand);
-        unsigned OperandAS =
-            I != InferredAddrSpace.end()
-                ? I->second
-                : PtrOperand->getType()->getPointerAddressSpace();
+        unsigned OperandAS;
+        if (I == InferredAddrSpace.end()) {
+          OperandAS = PtrOperand->getType()->getPointerAddressSpace();
+          if (OperandAS == FlatAddrSpace) {
+            // Check AC for assumption dominating V.
+            unsigned AS = getPredicatedAddrSpace(V, PtrOperand);
+            if (AS != UninitializedAddressSpace) {
+              LLVM_DEBUG(dbgs()
+                         << "  deduce operand AS from the predicate addrspace "
+                         << AS << '\n');
+              OperandAS = AS;
+              // Record this use with the predicated AS.
+              PredicatedAS[std::make_pair(&V, PtrOperand)] = OperandAS;
+            }
+          }
+        } else
+          OperandAS = I->second;
 
         // join(flat, *) = flat. So we can break if NewAS is already flat.
         NewAS = joinAddressSpaces(NewAS, OperandAS);
@@ -894,8 +974,13 @@ Optional<unsigned> InferAddressSpacesImpl::updateAddressSpace(
   unsigned OldAS = InferredAddrSpace.lookup(&V);
   assert(OldAS != FlatAddrSpace);
   if (OldAS == NewAS)
-    return None;
-  return NewAS;
+    return false;
+
+  // If any updates are made, grabs its users to the worklist because
+  // their address spaces can also be possibly updated.
+  LLVM_DEBUG(dbgs() << "  to " << NewAS << '\n');
+  InferredAddrSpace[&V] = NewAS;
+  return true;
 }
 
 /// \p returns true if \p U is the pointer operand of a memory instruction with
@@ -1026,7 +1111,8 @@ static Value::use_iterator skipToNextUser(Value::use_iterator I,
 
 bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
     const TargetTransformInfo &TTI, ArrayRef<WeakTrackingVH> Postorder,
-    const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const {
+    const ValueToAddrSpaceMapTy &InferredAddrSpace,
+    const PredicatedAddrSpaceMapTy &PredicatedAS, Function *F) const {
   // For each address expression to be modified, creates a clone of it with its
   // pointer operands converted to the new address space. Since the pointer
   // operands are converted, the clone is naturally in the new address space by
@@ -1042,8 +1128,9 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
       continue;
 
     if (V->getType()->getPointerAddressSpace() != NewAddrSpace) {
-      Value *New = cloneValueWithNewAddressSpace(
-          V, NewAddrSpace, ValueWithNewAddrSpace, &UndefUsesToFix);
+      Value *New =
+          cloneValueWithNewAddressSpace(V, NewAddrSpace, ValueWithNewAddrSpace,
+                                        PredicatedAS, &UndefUsesToFix);
       if (New)
         ValueWithNewAddrSpace[V] = New;
     }
@@ -1155,8 +1242,9 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
         if (AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(CurUser)) {
           unsigned NewAS = NewV->getType()->getPointerAddressSpace();
           if (ASC->getDestAddressSpace() == NewAS) {
-            if (ASC->getType()->getPointerElementType() !=
-                NewV->getType()->getPointerElementType()) {
+            if (!cast<PointerType>(ASC->getType())
+                    ->hasSameElementTypeAs(
+                        cast<PointerType>(NewV->getType()))) {
               NewV = CastInst::Create(Instruction::BitCast, NewV,
                                       ASC->getType(), "", ASC);
             }
@@ -1199,7 +1287,10 @@ bool InferAddressSpaces::runOnFunction(Function &F) {
   if (skipFunction(F))
     return false;
 
+  auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+  DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
   return InferAddressSpacesImpl(
+             getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F), DT,
              &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F),
              FlatAddrSpace)
       .run(F);
@@ -1217,11 +1308,14 @@ InferAddressSpacesPass::InferAddressSpacesPass(unsigned AddressSpace)
 PreservedAnalyses InferAddressSpacesPass::run(Function &F,
                                               FunctionAnalysisManager &AM) {
   bool Changed =
-      InferAddressSpacesImpl(&AM.getResult<TargetIRAnalysis>(F), FlatAddrSpace)
+      InferAddressSpacesImpl(AM.getResult<AssumptionAnalysis>(F),
+                             AM.getCachedResult<DominatorTreeAnalysis>(F),
+                             &AM.getResult<TargetIRAnalysis>(F), FlatAddrSpace)
           .run(F);
   if (Changed) {
     PreservedAnalyses PA;
     PA.preserveSet<CFGAnalyses>();
+    PA.preserve<DominatorTreeAnalysis>();
     return PA;
   }
   return PreservedAnalyses::all();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 9dc3b0351346..fe9a7211967c 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -331,7 +331,7 @@ bool JumpThreading::runOnFunction(Function &F) {
     BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
   }
 
-  bool Changed = Impl.runImpl(F, TLI, LVI, AA, &DTU, F.hasProfileData(),
+  bool Changed = Impl.runImpl(F, TLI, TTI, LVI, AA, &DTU, F.hasProfileData(),
                               std::move(BFI), std::move(BPI));
   if (PrintLVIAfterJumpThreading) {
     dbgs() << "LVI for function '" << F.getName() << "':\n";
@@ -360,7 +360,7 @@ PreservedAnalyses JumpThreadingPass::run(Function &F,
     BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
   }
 
-  bool Changed = runImpl(F, &TLI, &LVI, &AA, &DTU, F.hasProfileData(),
+  bool Changed = runImpl(F, &TLI, &TTI, &LVI, &AA, &DTU, F.hasProfileData(),
                          std::move(BFI), std::move(BPI));
 
   if (PrintLVIAfterJumpThreading) {
@@ -377,12 +377,14 @@ PreservedAnalyses JumpThreadingPass::run(Function &F,
 }
 
 bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
-                                LazyValueInfo *LVI_, AliasAnalysis *AA_,
-                                DomTreeUpdater *DTU_, bool HasProfileData_,
+                                TargetTransformInfo *TTI_, LazyValueInfo *LVI_,
+                                AliasAnalysis *AA_, DomTreeUpdater *DTU_,
+                                bool HasProfileData_,
                                 std::unique_ptr<BlockFrequencyInfo> BFI_,
                                 std::unique_ptr<BranchProbabilityInfo> BPI_) {
   LLVM_DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
   TLI = TLI_;
+  TTI = TTI_;
   LVI = LVI_;
   AA = AA_;
   DTU = DTU_;
@@ -514,7 +516,8 @@ static void replaceFoldableUses(Instruction *Cond, Value *ToVal) {
 /// Return the cost of duplicating a piece of this block from first non-phi
 /// and before StopAt instruction to thread across it. Stop scanning the block
 /// when exceeding the threshold. If duplication is impossible, returns ~0U.
-static unsigned getJumpThreadDuplicationCost(BasicBlock *BB,
+static unsigned getJumpThreadDuplicationCost(const TargetTransformInfo *TTI,
+                                             BasicBlock *BB,
                                              Instruction *StopAt,
                                              unsigned Threshold) {
   assert(StopAt->getParent() == BB && "Not an instruction from proper BB?");
@@ -550,26 +553,21 @@ static unsigned getJumpThreadDuplicationCost(BasicBlock *BB,
     if (Size > Threshold)
       return Size;
 
-    // Debugger intrinsics don't incur code size.
-    if (isa<DbgInfoIntrinsic>(I)) continue;
-
-    // Pseudo-probes don't incur code size.
-    if (isa<PseudoProbeInst>(I))
-      continue;
-
-    // If this is a pointer->pointer bitcast, it is free.
-    if (isa<BitCastInst>(I) && I->getType()->isPointerTy())
-      continue;
-
-    // Freeze instruction is free, too.
-    if (isa<FreezeInst>(I))
-      continue;
-
     // Bail out if this instruction gives back a token type, it is not possible
     // to duplicate it if it is used outside this BB.
     if (I->getType()->isTokenTy() && I->isUsedOutsideOfBlock(BB))
       return ~0U;
 
+    // Blocks with NoDuplicate are modelled as having infinite cost, so they
+    // are never duplicated.
+    if (const CallInst *CI = dyn_cast<CallInst>(I))
+      if (CI->cannotDuplicate() || CI->isConvergent())
+        return ~0U;
+
+    if (TTI->getUserCost(&*I, TargetTransformInfo::TCK_SizeAndLatency)
+            == TargetTransformInfo::TCC_Free)
+      continue;
+
     // All other instructions count for at least one unit.
     ++Size;
 
@@ -578,11 +576,7 @@ static unsigned getJumpThreadDuplicationCost(BasicBlock *BB,
     // as having cost of 2 total, and if they are a vector intrinsic, we model
     // them as having cost 1.
     if (const CallInst *CI = dyn_cast<CallInst>(I)) {
-      if (CI->cannotDuplicate() || CI->isConvergent())
-        // Blocks with NoDuplicate are modelled as having infinite cost, so they
-        // are never duplicated.
-        return ~0U;
-      else if (!isa<IntrinsicInst>(CI))
+      if (!isa<IntrinsicInst>(CI))
         Size += 3;
       else if (!CI->getType()->isVectorTy())
         Size += 1;
@@ -1363,8 +1357,7 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) {
 
   // If all of the loads and stores that feed the value have the same AA tags,
   // then we can propagate them onto any newly inserted loads.
-  AAMDNodes AATags;
-  LoadI->getAAMetadata(AATags);
+  AAMDNodes AATags = LoadI->getAAMetadata();
 
   SmallPtrSet<BasicBlock*, 8> PredsScanned;
 
@@ -2235,10 +2228,10 @@ bool JumpThreadingPass::maybethreadThroughTwoBasicBlocks(BasicBlock *BB,
   }
 
   // Compute the cost of duplicating BB and PredBB.
-  unsigned BBCost =
-      getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
+  unsigned BBCost = getJumpThreadDuplicationCost(
+      TTI, BB, BB->getTerminator(), BBDupThreshold);
   unsigned PredBBCost = getJumpThreadDuplicationCost(
-      PredBB, PredBB->getTerminator(), BBDupThreshold);
+      TTI, PredBB, PredBB->getTerminator(), BBDupThreshold);
 
   // Give up if costs are too high.  We need to check BBCost and PredBBCost
   // individually before checking their sum because getJumpThreadDuplicationCost
@@ -2346,8 +2339,8 @@ bool JumpThreadingPass::tryThreadEdge(
     return false;
   }
 
-  unsigned JumpThreadCost =
-      getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
+  unsigned JumpThreadCost = getJumpThreadDuplicationCost(
+      TTI, BB, BB->getTerminator(), BBDupThreshold);
   if (JumpThreadCost > BBDupThreshold) {
     LLVM_DEBUG(dbgs() << "  Not threading BB '" << BB->getName()
                       << "' - Cost is too high: " << JumpThreadCost << "\n");
@@ -2615,8 +2608,8 @@ bool JumpThreadingPass::duplicateCondBranchOnPHIIntoPred(
     return false;
   }
 
-  unsigned DuplicationCost =
-      getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
+  unsigned DuplicationCost = getJumpThreadDuplicationCost(
+      TTI, BB, BB->getTerminator(), BBDupThreshold);
   if (DuplicationCost > BBDupThreshold) {
     LLVM_DEBUG(dbgs() << "  Not duplicating BB '" << BB->getName()
                       << "' - Cost is too high: " << DuplicationCost << "\n");
@@ -3032,7 +3025,8 @@ bool JumpThreadingPass::threadGuard(BasicBlock *BB, IntrinsicInst *Guard,
 
   ValueToValueMapTy UnguardedMapping, GuardedMapping;
   Instruction *AfterGuard = Guard->getNextNode();
-  unsigned Cost = getJumpThreadDuplicationCost(BB, AfterGuard, BBDupThreshold);
+  unsigned Cost =
+      getJumpThreadDuplicationCost(TTI, BB, AfterGuard, BBDupThreshold);
   if (Cost > BBDupThreshold)
     return false;
   // Duplicate all instructions before the guard and the guard itself to the
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp
index 30058df3ded5..bf714d167670 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -117,13 +117,6 @@ static cl::opt<uint32_t> MaxNumUsesTraversed(
     cl::desc("Max num uses visited for identifying load "
              "invariance in loop using invariant start (default = 8)"));
 
-// Default value of zero implies we use the regular alias set tracker mechanism
-// instead of the cross product using AA to identify aliasing of the memory
-// location we are interested in.
-static cl::opt<int>
-LICMN2Theshold("licm-n2-threshold", cl::Hidden, cl::init(0),
-               cl::desc("How many instruction to cross product using AA"));
-
 // Experimental option to allow imprecision in LICM in pathological cases, in
 // exchange for faster compile. This is to be removed if MemorySSA starts to
 // address the same issue. This flag applies only when LICM uses MemorySSA
@@ -151,7 +144,8 @@ cl::opt<unsigned> llvm::SetLicmMssaNoAccForPromotionCap(
 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
                                   const LoopSafetyInfo *SafetyInfo,
-                                  TargetTransformInfo *TTI, bool &FreeInLoop);
+                                  TargetTransformInfo *TTI, bool &FreeInLoop,
+                                  bool LoopNestMode);
 static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
                   BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
                   MemorySSAUpdater *MSSAU, ScalarEvolution *SE,
@@ -180,7 +174,7 @@ static Instruction *cloneInstructionInExitBlock(
     const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU);
 
 static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
-                             AliasSetTracker *AST, MemorySSAUpdater *MSSAU);
+                             MemorySSAUpdater *MSSAU);
 
 static void moveInstructionBefore(Instruction &I, Instruction &Dest,
                                   ICFLoopSafetyInfo &SafetyInfo,
@@ -206,9 +200,6 @@ struct LoopInvariantCodeMotion {
 private:
   unsigned LicmMssaOptCap;
   unsigned LicmMssaNoAccForPromotionCap;
-
-  std::unique_ptr<AliasSetTracker>
-  collectAliasInfoForLoop(Loop *L, LoopInfo *LI, AAResults *AA);
 };
 
 struct LegacyLICMPass : public LoopPass {
@@ -228,9 +219,7 @@ struct LegacyLICMPass : public LoopPass {
                       << L->getHeader()->getNameOrAsOperand() << "\n");
 
     auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
-    MemorySSA *MSSA = EnableMSSALoopDependency
-                          ? (&getAnalysis<MemorySSAWrapperPass>().getMSSA())
-                          : nullptr;
+    MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
     bool hasProfileData = L->getHeader()->getParent()->hasProfileData();
     BlockFrequencyInfo *BFI =
         hasProfileData ? &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI()
@@ -257,10 +246,8 @@ struct LegacyLICMPass : public LoopPass {
     AU.addPreserved<DominatorTreeWrapperPass>();
     AU.addPreserved<LoopInfoWrapperPass>();
     AU.addRequired<TargetLibraryInfoWrapperPass>();
-    if (EnableMSSALoopDependency) {
-      AU.addRequired<MemorySSAWrapperPass>();
-      AU.addPreserved<MemorySSAWrapperPass>();
-    }
+    AU.addRequired<MemorySSAWrapperPass>();
+    AU.addPreserved<MemorySSAWrapperPass>();
     AU.addRequired<TargetTransformInfoWrapperPass>();
     getLoopAnalysisUsage(AU);
     LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
@@ -275,6 +262,9 @@ private:
 
 PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
                                 LoopStandardAnalysisResults &AR, LPMUpdater &) {
+  if (!AR.MSSA)
+    report_fatal_error("LICM requires MemorySSA (loop-mssa)");
+
   // For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
   // pass.  Function analyses need to be preserved across loop transformations
   // but ORE cannot be preserved (see comment before the pass definition).
@@ -289,8 +279,7 @@ PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
 
   PA.preserve<DominatorTreeAnalysis>();
   PA.preserve<LoopAnalysis>();
-  if (AR.MSSA)
-    PA.preserve<MemorySSAAnalysis>();
+  PA.preserve<MemorySSAAnalysis>();
 
   return PA;
 }
@@ -298,6 +287,9 @@ PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
 PreservedAnalyses LNICMPass::run(LoopNest &LN, LoopAnalysisManager &AM,
                                  LoopStandardAnalysisResults &AR,
                                  LPMUpdater &) {
+  if (!AR.MSSA)
+    report_fatal_error("LNICM requires MemorySSA (loop-mssa)");
+
   // For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
   // pass.  Function analyses need to be preserved across loop transformations
   // but ORE cannot be preserved (see comment before the pass definition).
@@ -316,8 +308,7 @@ PreservedAnalyses LNICMPass::run(LoopNest &LN, LoopAnalysisManager &AM,
 
   PA.preserve<DominatorTreeAnalysis>();
   PA.preserve<LoopAnalysis>();
-  if (AR.MSSA)
-    PA.preserve<MemorySSAAnalysis>();
+  PA.preserve<MemorySSAAnalysis>();
 
   return PA;
 }
@@ -386,10 +377,6 @@ bool LoopInvariantCodeMotion::runOnLoop(
     return false;
   }
 
-  std::unique_ptr<AliasSetTracker> CurAST;
-  std::unique_ptr<MemorySSAUpdater> MSSAU;
-  std::unique_ptr<SinkAndHoistLICMFlags> Flags;
-
   // Don't sink stores from loops with coroutine suspend instructions.
   // LICM would sink instructions into the default destination of
   // the coroutine switch. The default destination of the switch is to
@@ -406,17 +393,9 @@ bool LoopInvariantCodeMotion::runOnLoop(
     });
   });
 
-  if (!MSSA) {
-    LLVM_DEBUG(dbgs() << "LICM: Using Alias Set Tracker.\n");
-    CurAST = collectAliasInfoForLoop(L, LI, AA);
-    Flags = std::make_unique<SinkAndHoistLICMFlags>(
-        LicmMssaOptCap, LicmMssaNoAccForPromotionCap, /*IsSink=*/true);
-  } else {
-    LLVM_DEBUG(dbgs() << "LICM: Using MemorySSA.\n");
-    MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
-    Flags = std::make_unique<SinkAndHoistLICMFlags>(
-        LicmMssaOptCap, LicmMssaNoAccForPromotionCap, /*IsSink=*/true, L, MSSA);
-  }
+  MemorySSAUpdater MSSAU(MSSA);
+  SinkAndHoistLICMFlags Flags(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+                              /*IsSink=*/true, L, MSSA);
 
   // Get the preheader block to move instructions into...
   BasicBlock *Preheader = L->getLoopPreheader();
@@ -435,14 +414,16 @@ bool LoopInvariantCodeMotion::runOnLoop(
   // us to sink instructions in one pass, without iteration.  After sinking
   // instructions, we perform another pass to hoist them out of the loop.
   if (L->hasDedicatedExits())
-    Changed |=
-        sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, TTI, L,
-                   CurAST.get(), MSSAU.get(), &SafetyInfo, *Flags.get(), ORE);
-  Flags->setIsSink(false);
+    Changed |= LoopNestMode
+                   ? sinkRegionForLoopNest(DT->getNode(L->getHeader()), AA, LI,
+                                           DT, BFI, TLI, TTI, L, &MSSAU,
+                                           &SafetyInfo, Flags, ORE)
+                   : sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI,
+                                TLI, TTI, L, &MSSAU, &SafetyInfo, Flags, ORE);
+  Flags.setIsSink(false);
   if (Preheader)
     Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, L,
-                           CurAST.get(), MSSAU.get(), SE, &SafetyInfo,
-                           *Flags.get(), ORE, LoopNestMode);
+                           &MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode);
 
   // Now that all loop invariants have been removed from the loop, promote any
   // memory references to scalars that we can.
@@ -452,7 +433,7 @@ bool LoopInvariantCodeMotion::runOnLoop(
   // preheader for SSA updater, so also avoid sinking when no preheader
   // is available.
   if (!DisablePromotion && Preheader && L->hasDedicatedExits() &&
-      !Flags->tooManyMemoryAccesses() && !HasCoroSuspendInst) {
+      !Flags.tooManyMemoryAccesses() && !HasCoroSuspendInst) {
     // Figure out the loop exits and their insertion points
     SmallVector<BasicBlock *, 8> ExitBlocks;
     L->getUniqueExitBlocks(ExitBlocks);
@@ -466,55 +447,29 @@ bool LoopInvariantCodeMotion::runOnLoop(
       SmallVector<Instruction *, 8> InsertPts;
       SmallVector<MemoryAccess *, 8> MSSAInsertPts;
       InsertPts.reserve(ExitBlocks.size());
-      if (MSSAU)
-        MSSAInsertPts.reserve(ExitBlocks.size());
+      MSSAInsertPts.reserve(ExitBlocks.size());
       for (BasicBlock *ExitBlock : ExitBlocks) {
         InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
-        if (MSSAU)
-          MSSAInsertPts.push_back(nullptr);
+        MSSAInsertPts.push_back(nullptr);
       }
 
       PredIteratorCache PIC;
 
+      // Promoting one set of accesses may make the pointers for another set
+      // loop invariant, so run this in a loop (with the MaybePromotable set
+      // decreasing in size over time).
       bool Promoted = false;
-      if (CurAST.get()) {
-        // Loop over all of the alias sets in the tracker object.
-        for (AliasSet &AS : *CurAST) {
-          // We can promote this alias set if it has a store, if it is a "Must"
-          // alias set, if the pointer is loop invariant, and if we are not
-          // eliminating any volatile loads or stores.
-          if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
-              !L->isLoopInvariant(AS.begin()->getValue()))
-            continue;
-
-          assert(
-              !AS.empty() &&
-              "Must alias set should have at least one pointer element in it!");
-
-          SmallSetVector<Value *, 8> PointerMustAliases;
-          for (const auto &ASI : AS)
-            PointerMustAliases.insert(ASI.getValue());
-
-          Promoted |= promoteLoopAccessesToScalars(
-              PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,
-              DT, TLI, L, CurAST.get(), MSSAU.get(), &SafetyInfo, ORE);
+      bool LocalPromoted;
+      do {
+        LocalPromoted = false;
+        for (const SmallSetVector<Value *, 8> &PointerMustAliases :
+             collectPromotionCandidates(MSSA, AA, L)) {
+          LocalPromoted |= promoteLoopAccessesToScalars(
+              PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC,
+              LI, DT, TLI, L, &MSSAU, &SafetyInfo, ORE);
         }
-      } else {
-        // Promoting one set of accesses may make the pointers for another set
-        // loop invariant, so run this in a loop (with the MaybePromotable set
-        // decreasing in size over time).
-        bool LocalPromoted;
-        do {
-          LocalPromoted = false;
-          for (const SmallSetVector<Value *, 8> &PointerMustAliases :
-               collectPromotionCandidates(MSSA, AA, L)) {
-            LocalPromoted |= promoteLoopAccessesToScalars(
-                PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC,
-                LI, DT, TLI, L, /*AST*/nullptr, MSSAU.get(), &SafetyInfo, ORE);
-          }
-          Promoted |= LocalPromoted;
-        } while (LocalPromoted);
-      }
+        Promoted |= LocalPromoted;
+      } while (LocalPromoted);
 
       // Once we have promoted values across the loop body we have to
       // recursively reform LCSSA as any nested loop may now have values defined
@@ -536,8 +491,8 @@ bool LoopInvariantCodeMotion::runOnLoop(
   assert((L->isOutermost() || L->getParentLoop()->isLCSSAForm(*DT)) &&
          "Parent loop not left in LCSSA form after LICM!");
 
-  if (MSSAU.get() && VerifyMemorySSA)
-    MSSAU->getMemorySSA()->verifyMemorySSA();
+  if (VerifyMemorySSA)
+    MSSA->verifyMemorySSA();
 
   if (Changed && SE)
     SE->forgetLoopDispositions(L);
@@ -552,17 +507,15 @@ bool LoopInvariantCodeMotion::runOnLoop(
 bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
                       DominatorTree *DT, BlockFrequencyInfo *BFI,
                       TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
-                      Loop *CurLoop, AliasSetTracker *CurAST,
-                      MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo,
+                      Loop *CurLoop, MemorySSAUpdater *MSSAU,
+                      ICFLoopSafetyInfo *SafetyInfo,
                       SinkAndHoistLICMFlags &Flags,
-                      OptimizationRemarkEmitter *ORE) {
+                      OptimizationRemarkEmitter *ORE, Loop *OutermostLoop) {
 
   // Verify inputs.
   assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
-         CurLoop != nullptr && SafetyInfo != nullptr &&
+         CurLoop != nullptr && MSSAU != nullptr && SafetyInfo != nullptr &&
          "Unexpected input to sinkRegion.");
-  assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&
-         "Either AliasSetTracker or MemorySSA should be initialized.");
 
   // We want to visit children before parents. We will enque all the parents
   // before their children in the worklist and process the worklist in reverse
@@ -587,7 +540,7 @@ bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
         salvageKnowledge(&I);
         salvageDebugInfo(I);
         ++II;
-        eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
+        eraseInstruction(I, *SafetyInfo, MSSAU);
         Changed = true;
         continue;
       }
@@ -598,26 +551,46 @@ bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
       // operands of the instruction are loop invariant.
       //
       bool FreeInLoop = false;
+      bool LoopNestMode = OutermostLoop != nullptr;
       if (!I.mayHaveSideEffects() &&
-          isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) &&
-          canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags,
-                             ORE)) {
+          isNotUsedOrFreeInLoop(I, LoopNestMode ? OutermostLoop : CurLoop,
+                                SafetyInfo, TTI, FreeInLoop, LoopNestMode) &&
+          canSinkOrHoistInst(I, AA, DT, CurLoop, /*CurAST*/nullptr, MSSAU, true,
+                             &Flags, ORE)) {
         if (sink(I, LI, DT, BFI, CurLoop, SafetyInfo, MSSAU, ORE)) {
           if (!FreeInLoop) {
             ++II;
             salvageDebugInfo(I);
-            eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
+            eraseInstruction(I, *SafetyInfo, MSSAU);
           }
           Changed = true;
         }
       }
     }
   }
-  if (MSSAU && VerifyMemorySSA)
+  if (VerifyMemorySSA)
     MSSAU->getMemorySSA()->verifyMemorySSA();
   return Changed;
 }
 
+bool llvm::sinkRegionForLoopNest(
+    DomTreeNode *N, AAResults *AA, LoopInfo *LI, DominatorTree *DT,
+    BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
+    Loop *CurLoop, MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo,
+    SinkAndHoistLICMFlags &Flags, OptimizationRemarkEmitter *ORE) {
+
+  bool Changed = false;
+  SmallPriorityWorklist<Loop *, 4> Worklist;
+  Worklist.insert(CurLoop);
+  appendLoopsToWorklist(*CurLoop, Worklist);
+  while (!Worklist.empty()) {
+    Loop *L = Worklist.pop_back_val();
+    Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI,
+                          TTI, L, MSSAU, SafetyInfo, Flags, ORE, CurLoop);
+  }
+  return Changed;
+}
+
 namespace {
 // This is a helper class for hoistRegion to make it able to hoist control flow
 // in order to be able to hoist phis. The way this works is that we initially
@@ -820,9 +793,8 @@ public:
     if (HoistTarget == InitialPreheader) {
       // Phis in the loop header now need to use the new preheader.
       InitialPreheader->replaceSuccessorsPhiUsesWith(HoistCommonSucc);
-      if (MSSAU)
-        MSSAU->wireOldPredecessorsToNewImmediatePredecessor(
-            HoistTarget->getSingleSuccessor(), HoistCommonSucc, {HoistTarget});
+      MSSAU->wireOldPredecessorsToNewImmediatePredecessor(
+          HoistTarget->getSingleSuccessor(), HoistCommonSucc, {HoistTarget});
       // The new preheader dominates the loop header.
       DomTreeNode *PreheaderNode = DT->getNode(HoistCommonSucc);
       DomTreeNode *HeaderNode = DT->getNode(CurLoop->getHeader());
@@ -884,16 +856,14 @@ static bool worthSinkOrHoistInst(Instruction &I, BasicBlock *DstBlock,
 bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
                        DominatorTree *DT, BlockFrequencyInfo *BFI,
                        TargetLibraryInfo *TLI, Loop *CurLoop,
-                       AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
-                       ScalarEvolution *SE, ICFLoopSafetyInfo *SafetyInfo,
+                       MemorySSAUpdater *MSSAU, ScalarEvolution *SE,
+                       ICFLoopSafetyInfo *SafetyInfo,
                        SinkAndHoistLICMFlags &Flags,
                        OptimizationRemarkEmitter *ORE, bool LoopNestMode) {
   // Verify inputs.
   assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
-         CurLoop != nullptr && SafetyInfo != nullptr &&
+         CurLoop != nullptr && MSSAU != nullptr && SafetyInfo != nullptr &&
          "Unexpected input to hoistRegion.");
-  assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&
-         "Either AliasSetTracker or MemorySSA should be initialized.");
 
   ControlFlowHoister CFH(LI, DT, CurLoop, MSSAU);
 
@@ -913,8 +883,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
     if (!LoopNestMode && inSubLoop(BB, CurLoop, LI))
       continue;
 
-    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
-      Instruction &I = *II++;
+    for (Instruction &I : llvm::make_early_inc_range(*BB)) {
       // Try constant folding this instruction.  If all the operands are
       // constants, it is technically hoistable, but it would be better to
       // just fold it.
@@ -922,12 +891,10 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
               &I, I.getModule()->getDataLayout(), TLI)) {
         LLVM_DEBUG(dbgs() << "LICM folding inst: " << I << "  --> " << *C
                           << '\n');
-        if (CurAST)
-          CurAST->copyValue(&I, C);
         // FIXME MSSA: Such replacements may make accesses unoptimized (D51960).
         I.replaceAllUsesWith(C);
         if (isInstructionTriviallyDead(&I, TLI))
-          eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
+          eraseInstruction(I, *SafetyInfo, MSSAU);
         Changed = true;
         continue;
       }
@@ -940,8 +907,8 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
       // and we have accurately duplicated the control flow from the loop header
       // to that block.
       if (CurLoop->hasLoopInvariantOperands(&I) &&
-          canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags,
-                             ORE) &&
+          canSinkOrHoistInst(I, AA, DT, CurLoop, /*CurAST*/ nullptr, MSSAU,
+                             true, &Flags, ORE) &&
           worthSinkOrHoistInst(I, CurLoop->getLoopPreheader(), ORE, BFI) &&
           isSafeToExecuteUnconditionally(
               I, DT, TLI, CurLoop, SafetyInfo, ORE,
@@ -970,7 +937,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
         SafetyInfo->insertInstructionTo(Product, I.getParent());
         Product->insertAfter(&I);
         I.replaceAllUsesWith(Product);
-        eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
+        eraseInstruction(I, *SafetyInfo, MSSAU);
 
         hoist(*ReciprocalDivisor, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB),
               SafetyInfo, MSSAU, SE, ORE);
@@ -1049,7 +1016,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
       }
     }
   }
-  if (MSSAU && VerifyMemorySSA)
+  if (VerifyMemorySSA)
     MSSAU->getMemorySSA()->verifyMemorySSA();
 
     // Now that we've finished hoisting make sure that LI and DT are still
@@ -1101,6 +1068,10 @@ static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT,
       return false;
     Addr = BC->getOperand(0);
   }
+  // If we've ended up at a global/constant, bail. We shouldn't be looking at
+  // uselists for non-local Values in a loop pass.
+  if (isa<Constant>(Addr))
+    return false;
 
   unsigned UsesVisited = 0;
   // Traverse all uses of the load operand value, to see if invariant.start is
@@ -1273,7 +1244,7 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
       // writes to this memory in the loop, we can hoist or sink.
       if (AAResults::onlyAccessesArgPointees(Behavior)) {
         // TODO: expand to writeable arguments
-        for (Value *Op : CI->arg_operands())
+        for (Value *Op : CI->args())
           if (Op->getType()->isPointerTy()) {
             bool Invalidated;
             if (CurAST)
@@ -1443,7 +1414,8 @@ static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop,
 /// (e.g.,  a GEP can be folded into a load as an addressing mode in the loop).
 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
                                   const LoopSafetyInfo *SafetyInfo,
-                                  TargetTransformInfo *TTI, bool &FreeInLoop) {
+                                  TargetTransformInfo *TTI, bool &FreeInLoop,
+                                  bool LoopNestMode) {
   const auto &BlockColors = SafetyInfo->getBlockColors();
   bool IsFree = isFreeInLoop(I, CurLoop, TTI);
   for (const User *U : I.users()) {
@@ -1460,6 +1432,15 @@ static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
         if (!BlockColors.empty() &&
             BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1)
           return false;
+
+      if (LoopNestMode) {
+        while (isa<PHINode>(UI) && UI->hasOneUser() &&
+               UI->getNumOperands() == 1) {
+          if (!CurLoop->contains(UI))
+            break;
+          UI = cast<Instruction>(UI->user_back());
+        }
+      }
     }
 
     if (CurLoop->contains(UI)) {
@@ -1546,9 +1527,7 @@ static Instruction *cloneInstructionInExitBlock(
 }
 
 static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
-                             AliasSetTracker *AST, MemorySSAUpdater *MSSAU) {
-  if (AST)
-    AST->deleteValue(&I);
+                             MemorySSAUpdater *MSSAU) {
   if (MSSAU)
     MSSAU->removeMemoryAccess(&I);
   SafetyInfo.removeInstruction(&I);
@@ -1599,8 +1578,7 @@ static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) {
   // predecessor fairly simple.
   if (!SafetyInfo->getBlockColors().empty() && BB->getFirstNonPHI()->isEHPad())
     return false;
-  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
-    BasicBlock *BBPred = *PI;
+  for (BasicBlock *BBPred : predecessors(BB)) {
     if (isa<IndirectBrInst>(BBPred->getTerminator()) ||
         isa<CallBrInst>(BBPred->getTerminator()))
       return false;
@@ -1786,7 +1764,7 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
     Instruction *New = sinkThroughTriviallyReplaceablePHI(
         PN, &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU);
     PN->replaceAllUsesWith(New);
-    eraseInstruction(*PN, *SafetyInfo, nullptr, nullptr);
+    eraseInstruction(*PN, *SafetyInfo, nullptr);
     Changed = true;
   }
   return Changed;
@@ -1875,11 +1853,10 @@ class LoopPromoter : public LoadAndStorePromoter {
   SmallVectorImpl<Instruction *> &LoopInsertPts;
   SmallVectorImpl<MemoryAccess *> &MSSAInsertPts;
   PredIteratorCache &PredCache;
-  AliasSetTracker *AST;
   MemorySSAUpdater *MSSAU;
   LoopInfo &LI;
   DebugLoc DL;
-  int Alignment;
+  Align Alignment;
   bool UnorderedAtomic;
   AAMDNodes AATags;
   ICFLoopSafetyInfo &SafetyInfo;
@@ -1907,13 +1884,13 @@ public:
                SmallVectorImpl<BasicBlock *> &LEB,
                SmallVectorImpl<Instruction *> &LIP,
                SmallVectorImpl<MemoryAccess *> &MSSAIP, PredIteratorCache &PIC,
-               AliasSetTracker *ast, MemorySSAUpdater *MSSAU, LoopInfo &li,
-               DebugLoc dl, int alignment, bool UnorderedAtomic,
-               const AAMDNodes &AATags, ICFLoopSafetyInfo &SafetyInfo)
+               MemorySSAUpdater *MSSAU, LoopInfo &li, DebugLoc dl,
+               Align Alignment, bool UnorderedAtomic, const AAMDNodes &AATags,
+               ICFLoopSafetyInfo &SafetyInfo)
       : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
         LoopExitBlocks(LEB), LoopInsertPts(LIP), MSSAInsertPts(MSSAIP),
-        PredCache(PIC), AST(ast), MSSAU(MSSAU), LI(li), DL(std::move(dl)),
-        Alignment(alignment), UnorderedAtomic(UnorderedAtomic), AATags(AATags),
+        PredCache(PIC), MSSAU(MSSAU), LI(li), DL(std::move(dl)),
+        Alignment(Alignment), UnorderedAtomic(UnorderedAtomic), AATags(AATags),
         SafetyInfo(SafetyInfo) {}
 
   bool isInstInList(Instruction *I,
@@ -1940,39 +1917,29 @@ public:
       StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
       if (UnorderedAtomic)
         NewSI->setOrdering(AtomicOrdering::Unordered);
-      NewSI->setAlignment(Align(Alignment));
+      NewSI->setAlignment(Alignment);
       NewSI->setDebugLoc(DL);
       if (AATags)
         NewSI->setAAMetadata(AATags);
 
-      if (MSSAU) {
-        MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i];
-        MemoryAccess *NewMemAcc;
-        if (!MSSAInsertPoint) {
-          NewMemAcc = MSSAU->createMemoryAccessInBB(
-              NewSI, nullptr, NewSI->getParent(), MemorySSA::Beginning);
-        } else {
-          NewMemAcc =
-              MSSAU->createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint);
-        }
-        MSSAInsertPts[i] = NewMemAcc;
-        MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true);
-        // FIXME: true for safety, false may still be correct.
+      MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i];
+      MemoryAccess *NewMemAcc;
+      if (!MSSAInsertPoint) {
+        NewMemAcc = MSSAU->createMemoryAccessInBB(
+            NewSI, nullptr, NewSI->getParent(), MemorySSA::Beginning);
+      } else {
+        NewMemAcc =
+            MSSAU->createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint);
       }
+      MSSAInsertPts[i] = NewMemAcc;
+      MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true);
+      // FIXME: true for safety, false may still be correct.
     }
   }
 
-  void replaceLoadWithValue(LoadInst *LI, Value *V) const override {
-    // Update alias analysis.
-    if (AST)
-      AST->copyValue(LI, V);
-  }
   void instructionDeleted(Instruction *I) const override {
     SafetyInfo.removeInstruction(I);
-    if (AST)
-      AST->deleteValue(I);
-    if (MSSAU)
-      MSSAU->removeMemoryAccess(I);
+    MSSAU->removeMemoryAccess(I);
   }
 };
 
@@ -2023,8 +1990,8 @@ bool llvm::promoteLoopAccessesToScalars(
     SmallVectorImpl<Instruction *> &InsertPts,
     SmallVectorImpl<MemoryAccess *> &MSSAInsertPts, PredIteratorCache &PIC,
     LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
-    Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
-    ICFLoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) {
+    Loop *CurLoop, MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo,
+    OptimizationRemarkEmitter *ORE) {
   // Verify inputs.
   assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
          SafetyInfo != nullptr &&
@@ -2189,9 +2156,9 @@ bool llvm::promoteLoopAccessesToScalars(
       // Merge the AA tags.
       if (LoopUses.empty()) {
         // On the first load/store, just take its AA tags.
-        UI->getAAMetadata(AATags);
+        AATags = UI->getAAMetadata();
       } else if (AATags) {
-        UI->getAAMetadata(AATags, /* Merge = */ true);
+        AATags = AATags.merge(UI->getAAMetadata());
       }
 
       LoopUses.push_back(UI);
@@ -2256,9 +2223,8 @@ bool llvm::promoteLoopAccessesToScalars(
   SmallVector<PHINode *, 16> NewPHIs;
   SSAUpdater SSA(&NewPHIs);
   LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
-                        InsertPts, MSSAInsertPts, PIC, CurAST, MSSAU, *LI, DL,
-                        Alignment.value(), SawUnorderedAtomic, AATags,
-                        *SafetyInfo);
+                        InsertPts, MSSAInsertPts, PIC, MSSAU, *LI, DL,
+                        Alignment, SawUnorderedAtomic, AATags, *SafetyInfo);
 
   // Set up the preheader to have a definition of the value.  It is the live-out
   // value from the preheader that uses in the loop will use.
@@ -2273,24 +2239,22 @@ bool llvm::promoteLoopAccessesToScalars(
     PreheaderLoad->setAAMetadata(AATags);
   SSA.AddAvailableValue(Preheader, PreheaderLoad);
 
-  if (MSSAU) {
-    MemoryAccess *PreheaderLoadMemoryAccess = MSSAU->createMemoryAccessInBB(
-        PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End);
-    MemoryUse *NewMemUse = cast<MemoryUse>(PreheaderLoadMemoryAccess);
-    MSSAU->insertUse(NewMemUse, /*RenameUses=*/true);
-  }
+  MemoryAccess *PreheaderLoadMemoryAccess = MSSAU->createMemoryAccessInBB(
+      PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End);
+  MemoryUse *NewMemUse = cast<MemoryUse>(PreheaderLoadMemoryAccess);
+  MSSAU->insertUse(NewMemUse, /*RenameUses=*/true);
 
-  if (MSSAU && VerifyMemorySSA)
+  if (VerifyMemorySSA)
     MSSAU->getMemorySSA()->verifyMemorySSA();
   // Rewrite all the loads in the loop and remember all the definitions from
   // stores in the loop.
   Promoter.run(LoopUses);
 
-  if (MSSAU && VerifyMemorySSA)
+  if (VerifyMemorySSA)
     MSSAU->getMemorySSA()->verifyMemorySSA();
   // If the SSAUpdater didn't use the load in the preheader, just zap it now.
   if (PreheaderLoad->use_empty())
-    eraseInstruction(*PreheaderLoad, *SafetyInfo, CurAST, MSSAU);
+    eraseInstruction(*PreheaderLoad, *SafetyInfo, MSSAU);
 
   return true;
 }
@@ -2356,71 +2320,10 @@ collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L) {
   return Result;
 }
 
-/// Returns an owning pointer to an alias set which incorporates aliasing info
-/// from L and all subloops of L.
-std::unique_ptr<AliasSetTracker>
-LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI,
-                                                 AAResults *AA) {
-  auto CurAST = std::make_unique<AliasSetTracker>(*AA);
-
-  // Add everything from all the sub loops.
-  for (Loop *InnerL : L->getSubLoops())
-    for (BasicBlock *BB : InnerL->blocks())
-      CurAST->add(*BB);
-
-  // And merge in this loop (without anything from inner loops).
-  for (BasicBlock *BB : L->blocks())
-    if (LI->getLoopFor(BB) == L)
-      CurAST->add(*BB);
-
-  return CurAST;
-}
-
 static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
                                      AliasSetTracker *CurAST, Loop *CurLoop,
                                      AAResults *AA) {
-  // First check to see if any of the basic blocks in CurLoop invalidate *V.
-  bool isInvalidatedAccordingToAST = CurAST->getAliasSetFor(MemLoc).isMod();
-
-  if (!isInvalidatedAccordingToAST || !LICMN2Theshold)
-    return isInvalidatedAccordingToAST;
-
-  // Check with a diagnostic analysis if we can refine the information above.
-  // This is to identify the limitations of using the AST.
-  // The alias set mechanism used by LICM has a major weakness in that it
-  // combines all things which may alias into a single set *before* asking
-  // modref questions. As a result, a single readonly call within a loop will
-  // collapse all loads and stores into a single alias set and report
-  // invalidation if the loop contains any store. For example, readonly calls
-  // with deopt states have this form and create a general alias set with all
-  // loads and stores.  In order to get any LICM in loops containing possible
-  // deopt states we need a more precise invalidation of checking the mod ref
-  // info of each instruction within the loop and LI. This has a complexity of
-  // O(N^2), so currently, it is used only as a diagnostic tool since the
-  // default value of LICMN2Threshold is zero.
-
-  // Don't look at nested loops.
-  if (CurLoop->begin() != CurLoop->end())
-    return true;
-
-  int N = 0;
-  for (BasicBlock *BB : CurLoop->getBlocks())
-    for (Instruction &I : *BB) {
-      if (N >= LICMN2Theshold) {
-        LLVM_DEBUG(dbgs() << "Alasing N2 threshold exhausted for "
-                          << *(MemLoc.Ptr) << "\n");
-        return true;
-      }
-      N++;
-      auto Res = AA->getModRefInfo(&I, MemLoc);
-      if (isModSet(Res)) {
-        LLVM_DEBUG(dbgs() << "Aliasing failed on " << I << " for "
-                          << *(MemLoc.Ptr) << "\n");
-        return true;
-      }
-    }
-  LLVM_DEBUG(dbgs() << "Aliasing okay for " << *(MemLoc.Ptr) << "\n");
-  return false;
+  return CurAST->getAliasSetFor(MemLoc).isMod();
 }
 
 bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp
index 993b154dc9a8..d438d56e38ca 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Scalar/LoopBoundSplit.h"
+#include "llvm/ADT/Sequence.h"
 #include "llvm/Analysis/LoopAccessAnalysis.h"
 #include "llvm/Analysis/LoopAnalysisManager.h"
 #include "llvm/Analysis/LoopInfo.h"
@@ -39,10 +40,12 @@ struct ConditionInfo {
   ICmpInst::Predicate Pred;
   /// AddRec llvm value
   Value *AddRecValue;
+  /// Non PHI AddRec llvm value
+  Value *NonPHIAddRecValue;
   /// Bound llvm value
   Value *BoundValue;
   /// AddRec SCEV
-  const SCEV *AddRecSCEV;
+  const SCEVAddRecExpr *AddRecSCEV;
   /// Bound SCEV
   const SCEV *BoundSCEV;
 
@@ -54,19 +57,31 @@ struct ConditionInfo {
 } // namespace
 
 static void analyzeICmp(ScalarEvolution &SE, ICmpInst *ICmp,
-                        ConditionInfo &Cond) {
+                        ConditionInfo &Cond, const Loop &L) {
   Cond.ICmp = ICmp;
   if (match(ICmp, m_ICmp(Cond.Pred, m_Value(Cond.AddRecValue),
                          m_Value(Cond.BoundValue)))) {
-    Cond.AddRecSCEV = SE.getSCEV(Cond.AddRecValue);
-    Cond.BoundSCEV = SE.getSCEV(Cond.BoundValue);
+    const SCEV *AddRecSCEV = SE.getSCEV(Cond.AddRecValue);
+    const SCEV *BoundSCEV = SE.getSCEV(Cond.BoundValue);
+    const SCEVAddRecExpr *LHSAddRecSCEV = dyn_cast<SCEVAddRecExpr>(AddRecSCEV);
+    const SCEVAddRecExpr *RHSAddRecSCEV = dyn_cast<SCEVAddRecExpr>(BoundSCEV);
     // Locate AddRec in LHSSCEV and Bound in RHSSCEV.
-    if (isa<SCEVAddRecExpr>(Cond.BoundSCEV) &&
-        !isa<SCEVAddRecExpr>(Cond.AddRecSCEV)) {
+    if (!LHSAddRecSCEV && RHSAddRecSCEV) {
       std::swap(Cond.AddRecValue, Cond.BoundValue);
-      std::swap(Cond.AddRecSCEV, Cond.BoundSCEV);
+      std::swap(AddRecSCEV, BoundSCEV);
       Cond.Pred = ICmpInst::getSwappedPredicate(Cond.Pred);
     }
+
+    Cond.AddRecSCEV = dyn_cast<SCEVAddRecExpr>(AddRecSCEV);
+    Cond.BoundSCEV = BoundSCEV;
+    Cond.NonPHIAddRecValue = Cond.AddRecValue;
+
+    // If the Cond.AddRecValue is PHI node, update Cond.NonPHIAddRecValue with
+    // value from backedge.
+    if (Cond.AddRecSCEV && isa<PHINode>(Cond.AddRecValue)) {
+      PHINode *PN = cast<PHINode>(Cond.AddRecValue);
+      Cond.NonPHIAddRecValue = PN->getIncomingValueForBlock(L.getLoopLatch());
+    }
   }
 }
 
@@ -118,21 +133,20 @@ static bool calculateUpperBound(const Loop &L, ScalarEvolution &SE,
 static bool hasProcessableCondition(const Loop &L, ScalarEvolution &SE,
                                     ICmpInst *ICmp, ConditionInfo &Cond,
                                     bool IsExitCond) {
-  analyzeICmp(SE, ICmp, Cond);
+  analyzeICmp(SE, ICmp, Cond, L);
 
   // The BoundSCEV should be evaluated at loop entry.
   if (!SE.isAvailableAtLoopEntry(Cond.BoundSCEV, &L))
     return false;
 
-  const SCEVAddRecExpr *AddRecSCEV = dyn_cast<SCEVAddRecExpr>(Cond.AddRecSCEV);
   // Allowed AddRec as induction variable.
-  if (!AddRecSCEV)
+  if (!Cond.AddRecSCEV)
     return false;
 
-  if (!AddRecSCEV->isAffine())
+  if (!Cond.AddRecSCEV->isAffine())
     return false;
 
-  const SCEV *StepRecSCEV = AddRecSCEV->getStepRecurrence(SE);
+  const SCEV *StepRecSCEV = Cond.AddRecSCEV->getStepRecurrence(SE);
   // Allowed constant step.
   if (!isa<SCEVConstant>(StepRecSCEV))
     return false;
@@ -264,6 +278,14 @@ static BranchInst *findSplitCandidate(const Loop &L, ScalarEvolution &SE,
         SplitCandidateCond.BoundSCEV->getType())
       continue;
 
+    // After transformation, we assume the split condition of the pre-loop is
+    // always true. In order to guarantee it, we need to check the start value
+    // of the split cond AddRec satisfies the split condition.
+    if (!SE.isLoopEntryGuardedByCond(&L, SplitCandidateCond.Pred,
+                                     SplitCandidateCond.AddRecSCEV->getStart(),
+                                     SplitCandidateCond.BoundSCEV))
+      continue;
+
     SplitCandidateCond.BI = BI;
     return BI;
   }
@@ -341,13 +363,45 @@ static bool splitLoopBound(Loop &L, DominatorTree &DT, LoopInfo &LI,
                                     ".split", &LI, &DT, PostLoopBlocks);
   remapInstructionsInBlocks(PostLoopBlocks, VMap);
 
-  // Add conditional branch to check we can skip post-loop in its preheader.
   BasicBlock *PostLoopPreHeader = PostLoop->getLoopPreheader();
-  IRBuilder<> Builder(PostLoopPreHeader);
+  IRBuilder<> Builder(&PostLoopPreHeader->front());
+
+  // Update phi nodes in header of post-loop.
+  bool isExitingLatch =
+      (L.getExitingBlock() == L.getLoopLatch()) ? true : false;
+  Value *ExitingCondLCSSAPhi = nullptr;
+  for (PHINode &PN : L.getHeader()->phis()) {
+    // Create LCSSA phi node in preheader of post-loop.
+    PHINode *LCSSAPhi =
+        Builder.CreatePHI(PN.getType(), 1, PN.getName() + ".lcssa");
+    LCSSAPhi->setDebugLoc(PN.getDebugLoc());
+    // If the exiting block is loop latch, the phi does not have the update at
+    // last iteration. In this case, update lcssa phi with value from backedge.
+    LCSSAPhi->addIncoming(
+        isExitingLatch ? PN.getIncomingValueForBlock(L.getLoopLatch()) : &PN,
+        L.getExitingBlock());
+
+    // Update the start value of phi node in post-loop with the LCSSA phi node.
+    PHINode *PostLoopPN = cast<PHINode>(VMap[&PN]);
+    PostLoopPN->setIncomingValueForBlock(PostLoopPreHeader, LCSSAPhi);
+
+    // Find PHI with exiting condition from pre-loop. The PHI should be
+    // SCEVAddRecExpr and have same incoming value from backedge with
+    // ExitingCond.
+    if (!SE.isSCEVable(PN.getType()))
+      continue;
+
+    const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(&PN));
+    if (PhiSCEV && ExitingCond.NonPHIAddRecValue ==
+                       PN.getIncomingValueForBlock(L.getLoopLatch()))
+      ExitingCondLCSSAPhi = LCSSAPhi;
+  }
+
+  // Add conditional branch to check we can skip post-loop in its preheader.
   Instruction *OrigBI = PostLoopPreHeader->getTerminator();
   ICmpInst::Predicate Pred = ICmpInst::ICMP_NE;
   Value *Cond =
-      Builder.CreateICmp(Pred, ExitingCond.AddRecValue, ExitingCond.BoundValue);
+      Builder.CreateICmp(Pred, ExitingCondLCSSAPhi, ExitingCond.BoundValue);
   Builder.CreateCondBr(Cond, PostLoop->getHeader(), PostLoop->getExitBlock());
   OrigBI->eraseFromParent();
 
@@ -368,21 +422,6 @@ static bool splitLoopBound(Loop &L, DominatorTree &DT, LoopInfo &LI,
   // Replace exiting bound value of pre-loop NewBound.
   ExitingCond.ICmp->setOperand(1, NewBoundValue);
 
-  // Replace IV's start value of post-loop by NewBound.
-  for (PHINode &PN : L.getHeader()->phis()) {
-    // Find PHI with exiting condition from pre-loop.
-    if (SE.isSCEVable(PN.getType()) && isa<SCEVAddRecExpr>(SE.getSCEV(&PN))) {
-      for (Value *Op : PN.incoming_values()) {
-        if (Op == ExitingCond.AddRecValue) {
-          // Find cloned PHI for post-loop.
-          PHINode *PostLoopPN = cast<PHINode>(VMap[&PN]);
-          PostLoopPN->setIncomingValueForBlock(PostLoopPreHeader,
-                                               NewBoundValue);
-        }
-      }
-    }
-  }
-
   // Replace SplitCandidateCond.BI's condition of pre-loop by True.
   LLVMContext &Context = PreHeader->getContext();
   SplitCandidateCond.BI->setCondition(ConstantInt::getTrue(Context));
@@ -398,6 +437,30 @@ static bool splitLoopBound(Loop &L, DominatorTree &DT, LoopInfo &LI,
   else
     ExitingCond.BI->setSuccessor(1, PostLoopPreHeader);
 
+  // Update phi node in exit block of post-loop.
+  Builder.SetInsertPoint(&PostLoopPreHeader->front());
+  for (PHINode &PN : PostLoop->getExitBlock()->phis()) {
+    for (auto i : seq<int>(0, PN.getNumOperands())) {
+      // Check incoming block is pre-loop's exiting block.
+      if (PN.getIncomingBlock(i) == L.getExitingBlock()) {
+        Value *IncomingValue = PN.getIncomingValue(i);
+
+        // Create LCSSA phi node for incoming value.
+        PHINode *LCSSAPhi =
+            Builder.CreatePHI(PN.getType(), 1, PN.getName() + ".lcssa");
+        LCSSAPhi->setDebugLoc(PN.getDebugLoc());
+        LCSSAPhi->addIncoming(IncomingValue, PN.getIncomingBlock(i));
+
+        // Replace pre-loop's exiting block by post-loop's preheader.
+        PN.setIncomingBlock(i, PostLoopPreHeader);
+        // Replace incoming value by LCSSAPhi.
+        PN.setIncomingValue(i, LCSSAPhi);
+        // Add a new incoming value with post-loop's exiting block.
+        PN.addIncoming(VMap[IncomingValue], PostLoop->getExitingBlock());
+      }
+    }
+  }
+
   // Update dominator tree.
   DT.changeImmediateDominator(PostLoopPreHeader, L.getExitingBlock());
   DT.changeImmediateDominator(PostLoop->getExitBlock(), PostLoopPreHeader);
@@ -406,10 +469,7 @@ static bool splitLoopBound(Loop &L, DominatorTree &DT, LoopInfo &LI,
   SE.forgetLoop(&L);
 
   // Canonicalize loops.
-  // TODO: Try to update LCSSA information according to above change.
-  formLCSSA(L, DT, &LI, &SE);
   simplifyLoop(&L, &DT, &LI, &SE, nullptr, nullptr, true);
-  formLCSSA(*PostLoop, DT, &LI, &SE);
   simplifyLoop(PostLoop, &DT, &LI, &SE, nullptr, nullptr, true);
 
   // Add new post-loop to loop pass manager.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
index a5d7835bd094..77d76609c926 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
@@ -29,6 +29,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
@@ -127,6 +128,8 @@ public:
     AU.addPreserved<DominatorTreeWrapperPass>();
     AU.addRequired<LoopInfoWrapperPass>();
     AU.addPreserved<LoopInfoWrapperPass>();
+    AU.addRequiredID(LoopSimplifyID);
+    AU.addPreservedID(LoopSimplifyID);
     AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
     AU.addRequired<ScalarEvolutionWrapperPass>();
     AU.addPreserved<ScalarEvolutionWrapperPass>();
@@ -143,6 +146,7 @@ INITIALIZE_PASS_BEGIN(LoopDataPrefetchLegacyPass, "loop-data-prefetch",
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
 INITIALIZE_PASS_END(LoopDataPrefetchLegacyPass, "loop-data-prefetch",
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
index f7e8442fae81..5814e2f043d5 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -36,6 +36,8 @@ using namespace llvm;
 #define DEBUG_TYPE "loop-delete"
 
 STATISTIC(NumDeleted, "Number of loops deleted");
+STATISTIC(NumBackedgesBroken,
+          "Number of loops for which we managed to break the backedge");
 
 static cl::opt<bool> EnableSymbolicExecution(
     "loop-deletion-enable-symbolic-execution", cl::Hidden, cl::init(true),
@@ -191,6 +193,20 @@ getValueOnFirstIteration(Value *V, DenseMap<Value *, Value *> &FirstIterValue,
     Value *RHS =
         getValueOnFirstIteration(BO->getOperand(1), FirstIterValue, SQ);
     FirstIterV = SimplifyBinOp(BO->getOpcode(), LHS, RHS, SQ);
+  } else if (auto *Cmp = dyn_cast<ICmpInst>(V)) {
+    Value *LHS =
+        getValueOnFirstIteration(Cmp->getOperand(0), FirstIterValue, SQ);
+    Value *RHS =
+        getValueOnFirstIteration(Cmp->getOperand(1), FirstIterValue, SQ);
+    FirstIterV = SimplifyICmpInst(Cmp->getPredicate(), LHS, RHS, SQ);
+  } else if (auto *Select = dyn_cast<SelectInst>(V)) {
+    Value *Cond =
+        getValueOnFirstIteration(Select->getCondition(), FirstIterValue, SQ);
+    if (auto *C = dyn_cast<ConstantInt>(Cond)) {
+      auto *Selected = C->isAllOnesValue() ? Select->getTrueValue()
+                                           : Select->getFalseValue();
+      FirstIterV = getValueOnFirstIteration(Selected, FirstIterValue, SQ);
+    }
   }
   if (!FirstIterV)
     FirstIterV = V;
@@ -314,22 +330,20 @@ static bool canProveExitOnFirstIteration(Loop *L, DominatorTree &DT,
     }
 
     using namespace PatternMatch;
-    ICmpInst::Predicate Pred;
-    Value *LHS, *RHS;
+    Value *Cond;
     BasicBlock *IfTrue, *IfFalse;
     auto *Term = BB->getTerminator();
-    if (match(Term, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
+    if (match(Term, m_Br(m_Value(Cond),
                          m_BasicBlock(IfTrue), m_BasicBlock(IfFalse)))) {
-      if (!LHS->getType()->isIntegerTy()) {
+      auto *ICmp = dyn_cast<ICmpInst>(Cond);
+      if (!ICmp || !ICmp->getType()->isIntegerTy()) {
         MarkAllSuccessorsLive(BB);
         continue;
       }
 
       // Can we prove constant true or false for this condition?
-      LHS = getValueOnFirstIteration(LHS, FirstIterValue, SQ);
-      RHS = getValueOnFirstIteration(RHS, FirstIterValue, SQ);
-      auto *KnownCondition = SimplifyICmpInst(Pred, LHS, RHS, SQ);
-      if (!KnownCondition) {
+      auto *KnownCondition = getValueOnFirstIteration(ICmp, FirstIterValue, SQ);
+      if (KnownCondition == ICmp) {
         // Failed to simplify.
         MarkAllSuccessorsLive(BB);
         continue;
@@ -393,14 +407,25 @@ breakBackedgeIfNotTaken(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
   if (!L->getLoopLatch())
     return LoopDeletionResult::Unmodified;
 
-  auto *BTC = SE.getBackedgeTakenCount(L);
-  if (!isa<SCEVCouldNotCompute>(BTC) && SE.isKnownNonZero(BTC))
-    return LoopDeletionResult::Unmodified;
-  if (!BTC->isZero() && !canProveExitOnFirstIteration(L, DT, LI))
-    return LoopDeletionResult::Unmodified;
+  auto *BTC = SE.getSymbolicMaxBackedgeTakenCount(L);
+  if (BTC->isZero()) {
+    // SCEV knows this backedge isn't taken!
+    breakLoopBackedge(L, DT, SE, LI, MSSA);
+    ++NumBackedgesBroken;
+    return LoopDeletionResult::Deleted;
+  }
 
-  breakLoopBackedge(L, DT, SE, LI, MSSA);
-  return LoopDeletionResult::Deleted;
+  // If SCEV leaves open the possibility of a zero trip count, see if
+  // symbolically evaluating the first iteration lets us prove the backedge
+  // unreachable.
+  if (isa<SCEVCouldNotCompute>(BTC) || !SE.isKnownNonZero(BTC))
+    if (canProveExitOnFirstIteration(L, DT, LI)) {
+      breakLoopBackedge(L, DT, SE, LI, MSSA);
+      ++NumBackedgesBroken;
+      return LoopDeletionResult::Deleted;
+    }
+
+  return LoopDeletionResult::Unmodified;
 }
 
 /// Remove a loop if it is dead.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDistribute.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
index bac3dc0f3fb9..0f4c767c1e4c 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
@@ -1057,8 +1057,8 @@ PreservedAnalyses LoopDistributePass::run(Function &F,
   auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
   std::function<const LoopAccessInfo &(Loop &)> GetLAA =
       [&](Loop &L) -> const LoopAccessInfo & {
-    LoopStandardAnalysisResults AR = {AA,  AC,  DT,      LI,     SE,
-                                      TLI, TTI, nullptr, nullptr};
+    LoopStandardAnalysisResults AR = {AA,  AC,  DT,      LI,      SE,
+                                      TLI, TTI, nullptr, nullptr, nullptr};
     return LAM.getResult<LoopAccessAnalysis>(L, AR);
   };
 
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFlatten.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
index f54289f85ef5..965d1575518e 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
@@ -27,6 +27,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Scalar/LoopFlatten.h"
+
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
@@ -49,11 +51,13 @@
 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
 
-#define DEBUG_TYPE "loop-flatten"
-
 using namespace llvm;
 using namespace llvm::PatternMatch;
 
+#define DEBUG_TYPE "loop-flatten"
+
+STATISTIC(NumFlattened, "Number of loops flattened");
+
 static cl::opt<unsigned> RepeatedInstructionThreshold(
     "loop-flatten-cost-threshold", cl::Hidden, cl::init(2),
     cl::desc("Limit on the cost of instructions that can be repeated due to "
@@ -90,9 +94,33 @@ struct FlattenInfo {
   // Whether this holds the flatten info before or after widening.
   bool Widened = false;
 
+  // Holds the old/narrow induction phis, i.e. the Phis before IV widening has
+  // been applied. This bookkeeping is used so we can skip some checks on these
+  // phi nodes.
+  PHINode *NarrowInnerInductionPHI = nullptr;
+  PHINode *NarrowOuterInductionPHI = nullptr;
+
   FlattenInfo(Loop *OL, Loop *IL) : OuterLoop(OL), InnerLoop(IL) {};
+
+  bool isNarrowInductionPhi(PHINode *Phi) {
+    // This can't be the narrow phi if we haven't widened the IV first.
+    if (!Widened)
+      return false;
+    return NarrowInnerInductionPHI == Phi || NarrowOuterInductionPHI == Phi;
+  }
 };
 
+static bool
+setLoopComponents(Value *&TC, Value *&TripCount, BinaryOperator *&Increment,
+                  SmallPtrSetImpl<Instruction *> &IterationInstructions) {
+  TripCount = TC;
+  IterationInstructions.insert(Increment);
+  LLVM_DEBUG(dbgs() << "Found Increment: "; Increment->dump());
+  LLVM_DEBUG(dbgs() << "Found trip count: "; TripCount->dump());
+  LLVM_DEBUG(dbgs() << "Successfully found all loop components\n");
+  return true;
+}
+
 // Finds the induction variable, increment and trip count for a simple loop that
 // we can flatten.
 static bool findLoopComponents(
@@ -164,36 +192,68 @@ static bool findLoopComponents(
     return false;
   }
   // The trip count is the RHS of the compare. If this doesn't match the trip
-  // count computed by SCEV then this is either because the trip count variable
-  // has been widened (then leave the trip count as it is), or because it is a
-  // constant and another transformation has changed the compare, e.g.
-  // icmp ult %inc, tripcount -> icmp ult %j, tripcount-1, then we don't flatten
-  // the loop (yet).
-  TripCount = Compare->getOperand(1);
+  // count computed by SCEV then this is because the trip count variable
+  // has been widened so the types don't match, or because it is a constant and
+  // another transformation has changed the compare (e.g. icmp ult %inc,
+  // tripcount -> icmp ult %j, tripcount-1), or both.
+  Value *RHS = Compare->getOperand(1);
+  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
+  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
+    LLVM_DEBUG(dbgs() << "Backedge-taken count is not predictable\n");
+    return false;
+  }
+  // The use of the Extend=false flag on getTripCountFromExitCount was added
+  // during a refactoring to preserve existing behavior.  However, there's
+  // nothing obvious in the surrounding code when handles the overflow case.
+  // FIXME: audit code to establish whether there's a latent bug here.
   const SCEV *SCEVTripCount =
-      SE->getTripCountFromExitCount(SE->getBackedgeTakenCount(L));
-  if (SE->getSCEV(TripCount) != SCEVTripCount) {
-    if (!IsWidened) {
-      LLVM_DEBUG(dbgs() << "Could not find valid trip count\n");
-      return false;
-    }
-    auto TripCountInst = dyn_cast<Instruction>(TripCount);
-    if (!TripCountInst) {
-      LLVM_DEBUG(dbgs() << "Could not find valid extended trip count\n");
-      return false;
+    SE->getTripCountFromExitCount(BackedgeTakenCount, false);
+  const SCEV *SCEVRHS = SE->getSCEV(RHS);
+  if (SCEVRHS == SCEVTripCount)
+    return setLoopComponents(RHS, TripCount, Increment, IterationInstructions);
+  ConstantInt *ConstantRHS = dyn_cast<ConstantInt>(RHS);
+  if (ConstantRHS) {
+    const SCEV *BackedgeTCExt = nullptr;
+    if (IsWidened) {
+      const SCEV *SCEVTripCountExt;
+      // Find the extended backedge taken count and extended trip count using
+      // SCEV. One of these should now match the RHS of the compare.
+      BackedgeTCExt = SE->getZeroExtendExpr(BackedgeTakenCount, RHS->getType());
+      SCEVTripCountExt = SE->getTripCountFromExitCount(BackedgeTCExt, false);
+      if (SCEVRHS != BackedgeTCExt && SCEVRHS != SCEVTripCountExt) {
+        LLVM_DEBUG(dbgs() << "Could not find valid trip count\n");
+        return false;
+      }
     }
-    if ((!isa<ZExtInst>(TripCountInst) && !isa<SExtInst>(TripCountInst)) ||
-        SE->getSCEV(TripCountInst->getOperand(0)) != SCEVTripCount) {
-      LLVM_DEBUG(dbgs() << "Could not find valid extended trip count\n");
-      return false;
+    // If the RHS of the compare is equal to the backedge taken count we need
+    // to add one to get the trip count.
+    if (SCEVRHS == BackedgeTCExt || SCEVRHS == BackedgeTakenCount) {
+      ConstantInt *One = ConstantInt::get(ConstantRHS->getType(), 1);
+      Value *NewRHS = ConstantInt::get(
+          ConstantRHS->getContext(), ConstantRHS->getValue() + One->getValue());
+      return setLoopComponents(NewRHS, TripCount, Increment,
+                               IterationInstructions);
     }
+    return setLoopComponents(RHS, TripCount, Increment, IterationInstructions);
   }
-  IterationInstructions.insert(Increment);
-  LLVM_DEBUG(dbgs() << "Found increment: "; Increment->dump());
-  LLVM_DEBUG(dbgs() << "Found trip count: "; TripCount->dump());
-
-  LLVM_DEBUG(dbgs() << "Successfully found all loop components\n");
-  return true;
+  // If the RHS isn't a constant then check that the reason it doesn't match
+  // the SCEV trip count is because the RHS is a ZExt or SExt instruction
+  // (and take the trip count to be the RHS).
+  if (!IsWidened) {
+    LLVM_DEBUG(dbgs() << "Could not find valid trip count\n");
+    return false;
+  }
+  auto *TripCountInst = dyn_cast<Instruction>(RHS);
+  if (!TripCountInst) {
+    LLVM_DEBUG(dbgs() << "Could not find valid trip count\n");
+    return false;
+  }
+  if ((!isa<ZExtInst>(TripCountInst) && !isa<SExtInst>(TripCountInst)) ||
+      SE->getSCEV(TripCountInst->getOperand(0)) != SCEVTripCount) {
+    LLVM_DEBUG(dbgs() << "Could not find valid extended trip count\n");
+    return false;
+  }
+  return setLoopComponents(RHS, TripCount, Increment, IterationInstructions);
 }
 
 static bool checkPHIs(FlattenInfo &FI, const TargetTransformInfo *TTI) {
@@ -221,6 +281,8 @@ static bool checkPHIs(FlattenInfo &FI, const TargetTransformInfo *TTI) {
     // them specially when doing the transformation.
     if (&InnerPHI == FI.InnerInductionPHI)
       continue;
+    if (FI.isNarrowInductionPhi(&InnerPHI))
+      continue;
 
     // Each inner loop PHI node must have two incoming values/blocks - one
     // from the pre-header, and one from the latch.
@@ -266,6 +328,8 @@ static bool checkPHIs(FlattenInfo &FI, const TargetTransformInfo *TTI) {
   }
 
   for (PHINode &OuterPHI : FI.OuterLoop->getHeader()->phis()) {
+    if (FI.isNarrowInductionPhi(&OuterPHI))
+      continue;
     if (!SafeOuterPHIs.count(&OuterPHI)) {
       LLVM_DEBUG(dbgs() << "found unsafe PHI in outer loop: "; OuterPHI.dump());
       return false;
@@ -356,18 +420,25 @@ static bool checkIVUsers(FlattenInfo &FI) {
     if (U == FI.InnerIncrement)
       continue;
 
-    // After widening the IVs, a trunc instruction might have been introduced, so
-    // look through truncs.
+    // After widening the IVs, a trunc instruction might have been introduced,
+    // so look through truncs.
     if (isa<TruncInst>(U)) {
       if (!U->hasOneUse())
         return false;
       U = *U->user_begin();
     }
 
+    // If the use is in the compare (which is also the condition of the inner
+    // branch) then the compare has been altered by another transformation e.g
+    // icmp ult %inc, tripcount -> icmp ult %j, tripcount-1, where tripcount is
+    // a constant. Ignore this use as the compare gets removed later anyway.
+    if (U == FI.InnerBranch->getCondition())
+      continue;
+
     LLVM_DEBUG(dbgs() << "Found use of inner induction variable: "; U->dump());
 
-    Value *MatchedMul;
-    Value *MatchedItCount;
+    Value *MatchedMul = nullptr;
+    Value *MatchedItCount = nullptr;
     bool IsAdd = match(U, m_c_Add(m_Specific(FI.InnerInductionPHI),
                                   m_Value(MatchedMul))) &&
                  match(MatchedMul, m_c_Mul(m_Specific(FI.OuterInductionPHI),
@@ -375,11 +446,23 @@ static bool checkIVUsers(FlattenInfo &FI) {
 
     // Matches the same pattern as above, except it also looks for truncs
     // on the phi, which can be the result of widening the induction variables.
-    bool IsAddTrunc = match(U, m_c_Add(m_Trunc(m_Specific(FI.InnerInductionPHI)),
-                                       m_Value(MatchedMul))) &&
-                      match(MatchedMul,
-                            m_c_Mul(m_Trunc(m_Specific(FI.OuterInductionPHI)),
-                            m_Value(MatchedItCount)));
+    bool IsAddTrunc =
+        match(U, m_c_Add(m_Trunc(m_Specific(FI.InnerInductionPHI)),
+                         m_Value(MatchedMul))) &&
+        match(MatchedMul, m_c_Mul(m_Trunc(m_Specific(FI.OuterInductionPHI)),
+                                  m_Value(MatchedItCount)));
+
+    if (!MatchedItCount)
+      return false;
+    // Look through extends if the IV has been widened.
+    if (FI.Widened &&
+        (isa<SExtInst>(MatchedItCount) || isa<ZExtInst>(MatchedItCount))) {
+      assert(MatchedItCount->getType() == FI.InnerInductionPHI->getType() &&
+             "Unexpected type mismatch in types after widening");
+      MatchedItCount = isa<SExtInst>(MatchedItCount)
+                           ? dyn_cast<SExtInst>(MatchedItCount)->getOperand(0)
+                           : dyn_cast<ZExtInst>(MatchedItCount)->getOperand(0);
+    }
 
     if ((IsAdd || IsAddTrunc) && MatchedItCount == InnerTripCount) {
       LLVM_DEBUG(dbgs() << "Use is optimisable\n");
@@ -451,17 +534,27 @@ static OverflowResult checkOverflow(FlattenInfo &FI, DominatorTree *DT,
   for (Value *V : FI.LinearIVUses) {
     for (Value *U : V->users()) {
       if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
-        // The IV is used as the operand of a GEP, and the IV is at least as
-        // wide as the address space of the GEP. In this case, the GEP would
-        // wrap around the address space before the IV increment wraps, which
-        // would be UB.
-        if (GEP->isInBounds() &&
-            V->getType()->getIntegerBitWidth() >=
-                DL.getPointerTypeSizeInBits(GEP->getType())) {
-          LLVM_DEBUG(
-              dbgs() << "use of linear IV would be UB if overflow occurred: ";
-              GEP->dump());
-          return OverflowResult::NeverOverflows;
+        for (Value *GEPUser : U->users()) {
+          Instruction *GEPUserInst = dyn_cast<Instruction>(GEPUser);
+          if (!isa<LoadInst>(GEPUserInst) &&
+              !(isa<StoreInst>(GEPUserInst) &&
+                GEP == GEPUserInst->getOperand(1)))
+            continue;
+          if (!isGuaranteedToExecuteForEveryIteration(GEPUserInst,
+                                                      FI.InnerLoop))
+            continue;
+          // The IV is used as the operand of a GEP which dominates the loop
+          // latch, and the IV is at least as wide as the address space of the
+          // GEP. In this case, the GEP would wrap around the address space
+          // before the IV increment wraps, which would be UB.
+          if (GEP->isInBounds() &&
+              V->getType()->getIntegerBitWidth() >=
+                  DL.getPointerTypeSizeInBits(GEP->getType())) {
+            LLVM_DEBUG(
+                dbgs() << "use of linear IV would be UB if overflow occurred: ";
+                GEP->dump());
+            return OverflowResult::NeverOverflows;
+          }
         }
       }
     }
@@ -518,7 +611,7 @@ static bool CanFlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
 
 static bool DoFlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
                               ScalarEvolution *SE, AssumptionCache *AC,
-                              const TargetTransformInfo *TTI) {
+                              const TargetTransformInfo *TTI, LPMUpdater *U) {
   Function *F = FI.OuterLoop->getHeader()->getParent();
   LLVM_DEBUG(dbgs() << "Checks all passed, doing the transformation\n");
   {
@@ -574,7 +667,13 @@ static bool DoFlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
   // deleted, and any information that have about the outer loop invalidated.
   SE->forgetLoop(FI.OuterLoop);
   SE->forgetLoop(FI.InnerLoop);
+  if (U)
+    U->markLoopAsDeleted(*FI.InnerLoop, FI.InnerLoop->getName());
   LI->erase(FI.InnerLoop);
+
+  // Increment statistic value.
+  NumFlattened++;
+
   return true;
 }
 
@@ -605,14 +704,11 @@ static bool CanWidenIV(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
   }
 
   SCEVExpander Rewriter(*SE, DL, "loopflatten");
-  SmallVector<WideIVInfo, 2> WideIVs;
   SmallVector<WeakTrackingVH, 4> DeadInsts;
-  WideIVs.push_back( {FI.InnerInductionPHI, MaxLegalType, false });
-  WideIVs.push_back( {FI.OuterInductionPHI, MaxLegalType, false });
   unsigned ElimExt = 0;
   unsigned Widened = 0;
 
-  for (const auto &WideIV : WideIVs) {
+  auto CreateWideIV = [&] (WideIVInfo WideIV, bool &Deleted) -> bool {
     PHINode *WidePhi = createWideIV(WideIV, LI, SE, Rewriter, DT, DeadInsts,
                                     ElimExt, Widened, true /* HasGuards */,
                                     true /* UsePostIncrementRanges */);
@@ -620,17 +716,35 @@ static bool CanWidenIV(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
       return false;
     LLVM_DEBUG(dbgs() << "Created wide phi: "; WidePhi->dump());
     LLVM_DEBUG(dbgs() << "Deleting old phi: "; WideIV.NarrowIV->dump());
-    RecursivelyDeleteDeadPHINode(WideIV.NarrowIV);
-  }
-  // After widening, rediscover all the loop components.
+    Deleted = RecursivelyDeleteDeadPHINode(WideIV.NarrowIV);
+    return true;
+  };
+
+  bool Deleted;
+  if (!CreateWideIV({FI.InnerInductionPHI, MaxLegalType, false }, Deleted))
+    return false;
+  // Add the narrow phi to list, so that it will be adjusted later when the
+  // the transformation is performed.
+  if (!Deleted)
+    FI.InnerPHIsToTransform.insert(FI.InnerInductionPHI);
+
+  if (!CreateWideIV({FI.OuterInductionPHI, MaxLegalType, false }, Deleted))
+    return false;
+
   assert(Widened && "Widened IV expected");
   FI.Widened = true;
+
+  // Save the old/narrow induction phis, which we need to ignore in CheckPHIs.
+  FI.NarrowInnerInductionPHI = FI.InnerInductionPHI;
+  FI.NarrowOuterInductionPHI = FI.OuterInductionPHI;
+
+  // After widening, rediscover all the loop components.
   return CanFlattenLoopPair(FI, DT, LI, SE, AC, TTI);
 }
 
 static bool FlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
                             ScalarEvolution *SE, AssumptionCache *AC,
-                            const TargetTransformInfo *TTI) {
+                            const TargetTransformInfo *TTI, LPMUpdater *U) {
   LLVM_DEBUG(
       dbgs() << "Loop flattening running on outer loop "
              << FI.OuterLoop->getHeader()->getName() << " and inner loop "
@@ -641,12 +755,30 @@ static bool FlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
     return false;
 
   // Check if we can widen the induction variables to avoid overflow checks.
-  if (CanWidenIV(FI, DT, LI, SE, AC, TTI))
-    return DoFlattenLoopPair(FI, DT, LI, SE, AC, TTI);
-
-  // Check if the new iteration variable might overflow. In this case, we
-  // need to version the loop, and select the original version at runtime if
-  // the iteration space is too large.
+  bool CanFlatten = CanWidenIV(FI, DT, LI, SE, AC, TTI);
+
+  // It can happen that after widening of the IV, flattening may not be
+  // possible/happening, e.g. when it is deemed unprofitable. So bail here if
+  // that is the case.
+  // TODO: IV widening without performing the actual flattening transformation
+  // is not ideal. While this codegen change should not matter much, it is an
+  // unnecessary change which is better to avoid. It's unlikely this happens
+  // often, because if it's unprofitibale after widening, it should be
+  // unprofitabe before widening as checked in the first round of checks. But
+  // 'RepeatedInstructionThreshold' is set to only 2, which can probably be
+  // relaxed. Because this is making a code change (the IV widening, but not
+  // the flattening), we return true here.
+  if (FI.Widened && !CanFlatten)
+    return true;
+
+  // If we have widened and can perform the transformation, do that here.
+  if (CanFlatten)
+    return DoFlattenLoopPair(FI, DT, LI, SE, AC, TTI, U);
+
+  // Otherwise, if we haven't widened the IV, check if the new iteration
+  // variable might overflow. In this case, we need to version the loop, and
+  // select the original version at runtime if the iteration space is too
+  // large.
   // TODO: We currently don't version the loop.
   OverflowResult OR = checkOverflow(FI, DT, AC);
   if (OR == OverflowResult::AlwaysOverflowsHigh ||
@@ -659,18 +791,18 @@ static bool FlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
   }
 
   LLVM_DEBUG(dbgs() << "Multiply cannot overflow, modifying loop in-place\n");
-  return DoFlattenLoopPair(FI, DT, LI, SE, AC, TTI);
+  return DoFlattenLoopPair(FI, DT, LI, SE, AC, TTI, U);
 }
 
 bool Flatten(LoopNest &LN, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE,
-             AssumptionCache *AC, TargetTransformInfo *TTI) {
+             AssumptionCache *AC, TargetTransformInfo *TTI, LPMUpdater *U) {
   bool Changed = false;
   for (Loop *InnerLoop : LN.getLoops()) {
     auto *OuterLoop = InnerLoop->getParentLoop();
     if (!OuterLoop)
       continue;
     FlattenInfo FI(OuterLoop, InnerLoop);
-    Changed |= FlattenLoopPair(FI, DT, LI, SE, AC, TTI);
+    Changed |= FlattenLoopPair(FI, DT, LI, SE, AC, TTI, U);
   }
   return Changed;
 }
@@ -685,12 +817,12 @@ PreservedAnalyses LoopFlattenPass::run(LoopNest &LN, LoopAnalysisManager &LAM,
   // in simplified form, and also needs LCSSA. Running
   // this pass will simplify all loops that contain inner loops,
   // regardless of whether anything ends up being flattened.
-  Changed |= Flatten(LN, &AR.DT, &AR.LI, &AR.SE, &AR.AC, &AR.TTI);
+  Changed |= Flatten(LN, &AR.DT, &AR.LI, &AR.SE, &AR.AC, &AR.TTI, &U);
 
   if (!Changed)
     return PreservedAnalyses::all();
 
-  return PreservedAnalyses::none();
+  return getLoopPassPreservedAnalyses();
 }
 
 namespace {
@@ -735,7 +867,7 @@ bool LoopFlattenLegacyPass::runOnFunction(Function &F) {
   bool Changed = false;
   for (Loop *L : *LI) {
     auto LN = LoopNest::getLoopNest(*L, *SE);
-    Changed |= Flatten(*LN, DT, LI, SE, AC, TTI);
+    Changed |= Flatten(*LN, DT, LI, SE, AC, TTI, nullptr);
   }
   return Changed;
 }
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index a153f393448c..42da86a9ecf5 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -217,15 +217,15 @@ private:
   bool processLoopMemCpy(MemCpyInst *MCI, const SCEV *BECount);
   bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
 
-  bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
+  bool processLoopStridedStore(Value *DestPtr, const SCEV *StoreSizeSCEV,
                                MaybeAlign StoreAlignment, Value *StoredVal,
                                Instruction *TheStore,
                                SmallPtrSetImpl<Instruction *> &Stores,
                                const SCEVAddRecExpr *Ev, const SCEV *BECount,
-                               bool NegStride, bool IsLoopMemset = false);
+                               bool IsNegStride, bool IsLoopMemset = false);
   bool processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *BECount);
   bool processLoopStoreOfLoopLoad(Value *DestPtr, Value *SourcePtr,
-                                  unsigned StoreSize, MaybeAlign StoreAlign,
+                                  const SCEV *StoreSize, MaybeAlign StoreAlign,
                                   MaybeAlign LoadAlign, Instruction *TheStore,
                                   Instruction *TheLoad,
                                   const SCEVAddRecExpr *StoreEv,
@@ -625,8 +625,8 @@ bool LoopIdiomRecognize::runOnLoopBlock(
   // We can only promote stores in this block if they are unconditionally
   // executed in the loop.  For a block to be unconditionally executed, it has
   // to dominate all the exit blocks of the loop.  Verify this now.
-  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
-    if (!DT->dominates(BB, ExitBlocks[i]))
+  for (BasicBlock *ExitBlock : ExitBlocks)
+    if (!DT->dominates(BB, ExitBlock))
       return false;
 
   bool MadeChange = false;
@@ -750,16 +750,13 @@ bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
   bool Changed = false;
 
   // For stores that start but don't end a link in the chain:
-  for (SetVector<StoreInst *>::iterator it = Heads.begin(), e = Heads.end();
-       it != e; ++it) {
-    if (Tails.count(*it))
+  for (StoreInst *I : Heads) {
+    if (Tails.count(I))
       continue;
 
     // We found a store instr that starts a chain. Now follow the chain and try
     // to transform it.
     SmallPtrSet<Instruction *, 8> AdjacentStores;
-    StoreInst *I = *it;
-
     StoreInst *HeadStore = I;
     unsigned StoreSize = 0;
 
@@ -784,12 +781,14 @@ bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
     if (StoreSize != Stride && StoreSize != -Stride)
       continue;
 
-    bool NegStride = StoreSize == -Stride;
+    bool IsNegStride = StoreSize == -Stride;
 
-    if (processLoopStridedStore(StorePtr, StoreSize,
+    Type *IntIdxTy = DL->getIndexType(StorePtr->getType());
+    const SCEV *StoreSizeSCEV = SE->getConstant(IntIdxTy, StoreSize);
+    if (processLoopStridedStore(StorePtr, StoreSizeSCEV,
                                 MaybeAlign(HeadStore->getAlignment()),
                                 StoredVal, HeadStore, AdjacentStores, StoreEv,
-                                BECount, NegStride)) {
+                                BECount, IsNegStride)) {
       TransformedStores.insert(AdjacentStores.begin(), AdjacentStores.end());
       Changed = true;
     }
@@ -857,15 +856,15 @@ bool LoopIdiomRecognize::processLoopMemCpy(MemCpyInst *MCI,
 
   // Check if the stride matches the size of the memcpy. If so, then we know
   // that every byte is touched in the loop.
-  const SCEVConstant *StoreStride =
+  const SCEVConstant *ConstStoreStride =
       dyn_cast<SCEVConstant>(StoreEv->getOperand(1));
-  const SCEVConstant *LoadStride =
+  const SCEVConstant *ConstLoadStride =
       dyn_cast<SCEVConstant>(LoadEv->getOperand(1));
-  if (!StoreStride || !LoadStride)
+  if (!ConstStoreStride || !ConstLoadStride)
     return false;
 
-  APInt StoreStrideValue = StoreStride->getAPInt();
-  APInt LoadStrideValue = LoadStride->getAPInt();
+  APInt StoreStrideValue = ConstStoreStride->getAPInt();
+  APInt LoadStrideValue = ConstLoadStride->getAPInt();
   // Huge stride value - give up
   if (StoreStrideValue.getBitWidth() > 64 || LoadStrideValue.getBitWidth() > 64)
     return false;
@@ -875,7 +874,7 @@ bool LoopIdiomRecognize::processLoopMemCpy(MemCpyInst *MCI,
       return OptimizationRemarkMissed(DEBUG_TYPE, "SizeStrideUnequal", MCI)
              << ore::NV("Inst", "memcpy") << " in "
              << ore::NV("Function", MCI->getFunction())
-             << " function will not be hoised: "
+             << " function will not be hoisted: "
              << ore::NV("Reason", "memcpy size is not equal to stride");
     });
     return false;
@@ -887,16 +886,17 @@ bool LoopIdiomRecognize::processLoopMemCpy(MemCpyInst *MCI,
   if (StoreStrideInt != LoadStrideInt)
     return false;
 
-  return processLoopStoreOfLoopLoad(Dest, Source, (unsigned)SizeInBytes,
-                                    MCI->getDestAlign(), MCI->getSourceAlign(),
-                                    MCI, MCI, StoreEv, LoadEv, BECount);
+  return processLoopStoreOfLoopLoad(
+      Dest, Source, SE->getConstant(Dest->getType(), SizeInBytes),
+      MCI->getDestAlign(), MCI->getSourceAlign(), MCI, MCI, StoreEv, LoadEv,
+      BECount);
 }
 
 /// processLoopMemSet - See if this memset can be promoted to a large memset.
 bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
                                            const SCEV *BECount) {
-  // We can only handle non-volatile memsets with a constant size.
-  if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength()))
+  // We can only handle non-volatile memsets.
+  if (MSI->isVolatile())
     return false;
 
   // If we're not allowed to hack on memset, we fail.
@@ -909,23 +909,72 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
   // loop, which indicates a strided store.  If we have something else, it's a
   // random store we can't handle.
   const SCEVAddRecExpr *Ev = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Pointer));
-  if (!Ev || Ev->getLoop() != CurLoop || !Ev->isAffine())
+  if (!Ev || Ev->getLoop() != CurLoop)
     return false;
-
-  // Reject memsets that are so large that they overflow an unsigned.
-  uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
-  if ((SizeInBytes >> 32) != 0)
+  if (!Ev->isAffine()) {
+    LLVM_DEBUG(dbgs() << "  Pointer is not affine, abort\n");
     return false;
+  }
 
-  // Check to see if the stride matches the size of the memset.  If so, then we
-  // know that every byte is touched in the loop.
-  const SCEVConstant *ConstStride = dyn_cast<SCEVConstant>(Ev->getOperand(1));
-  if (!ConstStride)
+  const SCEV *PointerStrideSCEV = Ev->getOperand(1);
+  const SCEV *MemsetSizeSCEV = SE->getSCEV(MSI->getLength());
+  if (!PointerStrideSCEV || !MemsetSizeSCEV)
     return false;
 
-  APInt Stride = ConstStride->getAPInt();
-  if (SizeInBytes != Stride && SizeInBytes != -Stride)
-    return false;
+  bool IsNegStride = false;
+  const bool IsConstantSize = isa<ConstantInt>(MSI->getLength());
+
+  if (IsConstantSize) {
+    // Memset size is constant.
+    // Check if the pointer stride matches the memset size. If so, then
+    // we know that every byte is touched in the loop.
+    LLVM_DEBUG(dbgs() << "  memset size is constant\n");
+    uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
+    const SCEVConstant *ConstStride = dyn_cast<SCEVConstant>(Ev->getOperand(1));
+    if (!ConstStride)
+      return false;
+
+    APInt Stride = ConstStride->getAPInt();
+    if (SizeInBytes != Stride && SizeInBytes != -Stride)
+      return false;
+
+    IsNegStride = SizeInBytes == -Stride;
+  } else {
+    // Memset size is non-constant.
+    // Check if the pointer stride matches the memset size.
+    // To be conservative, the pass would not promote pointers that aren't in
+    // address space zero. Also, the pass only handles memset length and stride
+    // that are invariant for the top level loop.
+    LLVM_DEBUG(dbgs() << "  memset size is non-constant\n");
+    if (Pointer->getType()->getPointerAddressSpace() != 0) {
+      LLVM_DEBUG(dbgs() << "  pointer is not in address space zero, "
+                        << "abort\n");
+      return false;
+    }
+    if (!SE->isLoopInvariant(MemsetSizeSCEV, CurLoop)) {
+      LLVM_DEBUG(dbgs() << "  memset size is not a loop-invariant, "
+                        << "abort\n");
+      return false;
+    }
+
+    // Compare positive direction PointerStrideSCEV with MemsetSizeSCEV
+    IsNegStride = PointerStrideSCEV->isNonConstantNegative();
+    const SCEV *PositiveStrideSCEV =
+        IsNegStride ? SE->getNegativeSCEV(PointerStrideSCEV)
+                    : PointerStrideSCEV;
+    LLVM_DEBUG(dbgs() << "  MemsetSizeSCEV: " << *MemsetSizeSCEV << "\n"
+                      << "  PositiveStrideSCEV: " << *PositiveStrideSCEV
+                      << "\n");
+
+    if (PositiveStrideSCEV != MemsetSizeSCEV) {
+      // TODO: folding can be done to the SCEVs
+      // The folding is to fold expressions that is covered by the loop guard
+      // at loop entry. After the folding, compare again and proceed
+      // optimization if equal.
+      LLVM_DEBUG(dbgs() << "  SCEV don't match, abort\n");
+      return false;
+    }
+  }
 
   // Verify that the memset value is loop invariant.  If not, we can't promote
   // the memset.
@@ -935,10 +984,10 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
 
   SmallPtrSet<Instruction *, 1> MSIs;
   MSIs.insert(MSI);
-  bool NegStride = SizeInBytes == -Stride;
-  return processLoopStridedStore(
-      Pointer, (unsigned)SizeInBytes, MaybeAlign(MSI->getDestAlignment()),
-      SplatValue, MSI, MSIs, Ev, BECount, NegStride, /*IsLoopMemset=*/true);
+  return processLoopStridedStore(Pointer, SE->getSCEV(MSI->getLength()),
+                                 MaybeAlign(MSI->getDestAlignment()),
+                                 SplatValue, MSI, MSIs, Ev, BECount,
+                                 IsNegStride, /*IsLoopMemset=*/true);
 }
 
 /// mayLoopAccessLocation - Return true if the specified loop might access the
@@ -946,9 +995,9 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
 /// argument specifies what the verboten forms of access are (read or write).
 static bool
 mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
-                      const SCEV *BECount, unsigned StoreSize,
+                      const SCEV *BECount, const SCEV *StoreSizeSCEV,
                       AliasAnalysis &AA,
-                      SmallPtrSetImpl<Instruction *> &IgnoredStores) {
+                      SmallPtrSetImpl<Instruction *> &IgnoredInsts) {
   // Get the location that may be stored across the loop.  Since the access is
   // strided positively through memory, we say that the modified location starts
   // at the pointer and has infinite size.
@@ -956,9 +1005,11 @@ mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
 
   // If the loop iterates a fixed number of times, we can refine the access size
   // to be exactly the size of the memset, which is (BECount+1)*StoreSize
-  if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
+  const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount);
+  const SCEVConstant *ConstSize = dyn_cast<SCEVConstant>(StoreSizeSCEV);
+  if (BECst && ConstSize)
     AccessSize = LocationSize::precise((BECst->getValue()->getZExtValue() + 1) *
-                                       StoreSize);
+                                       ConstSize->getValue()->getZExtValue());
 
   // TODO: For this to be really effective, we have to dive into the pointer
   // operand in the store.  Store to &A[i] of 100 will always return may alias
@@ -966,14 +1017,12 @@ mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
   // which will then no-alias a store to &A[100].
   MemoryLocation StoreLoc(Ptr, AccessSize);
 
-  for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
-       ++BI)
-    for (Instruction &I : **BI)
-      if (IgnoredStores.count(&I) == 0 &&
+  for (BasicBlock *B : L->blocks())
+    for (Instruction &I : *B)
+      if (!IgnoredInsts.contains(&I) &&
           isModOrRefSet(
               intersectModRef(AA.getModRefInfo(&I, StoreLoc), Access)))
         return true;
-
   return false;
 }
 
@@ -981,57 +1030,67 @@ mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
 // we're trying to memset.  Therefore, we need to recompute the base pointer,
 // which is just Start - BECount*Size.
 static const SCEV *getStartForNegStride(const SCEV *Start, const SCEV *BECount,
-                                        Type *IntPtr, unsigned StoreSize,
+                                        Type *IntPtr, const SCEV *StoreSizeSCEV,
                                         ScalarEvolution *SE) {
   const SCEV *Index = SE->getTruncateOrZeroExtend(BECount, IntPtr);
-  if (StoreSize != 1)
-    Index = SE->getMulExpr(Index, SE->getConstant(IntPtr, StoreSize),
+  if (!StoreSizeSCEV->isOne()) {
+    // index = back edge count * store size
+    Index = SE->getMulExpr(Index,
+                           SE->getTruncateOrZeroExtend(StoreSizeSCEV, IntPtr),
                            SCEV::FlagNUW);
+  }
+  // base pointer = start - index * store size
   return SE->getMinusSCEV(Start, Index);
 }
 
-/// Compute the number of bytes as a SCEV from the backedge taken count.
-///
-/// This also maps the SCEV into the provided type and tries to handle the
-/// computation in a way that will fold cleanly.
-static const SCEV *getNumBytes(const SCEV *BECount, Type *IntPtr,
-                               unsigned StoreSize, Loop *CurLoop,
-                               const DataLayout *DL, ScalarEvolution *SE) {
-  const SCEV *NumBytesS;
-  // The # stored bytes is (BECount+1)*Size.  Expand the trip count out to
+/// Compute trip count from the backedge taken count.
+static const SCEV *getTripCount(const SCEV *BECount, Type *IntPtr,
+                                Loop *CurLoop, const DataLayout *DL,
+                                ScalarEvolution *SE) {
+  const SCEV *TripCountS = nullptr;
+  // The # stored bytes is (BECount+1).  Expand the trip count out to
   // pointer size if it isn't already.
   //
   // If we're going to need to zero extend the BE count, check if we can add
   // one to it prior to zero extending without overflow. Provided this is safe,
   // it allows better simplification of the +1.
-  if (DL->getTypeSizeInBits(BECount->getType()).getFixedSize() <
-          DL->getTypeSizeInBits(IntPtr).getFixedSize() &&
+  if (DL->getTypeSizeInBits(BECount->getType()) <
+          DL->getTypeSizeInBits(IntPtr) &&
       SE->isLoopEntryGuardedByCond(
           CurLoop, ICmpInst::ICMP_NE, BECount,
           SE->getNegativeSCEV(SE->getOne(BECount->getType())))) {
-    NumBytesS = SE->getZeroExtendExpr(
+    TripCountS = SE->getZeroExtendExpr(
         SE->getAddExpr(BECount, SE->getOne(BECount->getType()), SCEV::FlagNUW),
         IntPtr);
   } else {
-    NumBytesS = SE->getAddExpr(SE->getTruncateOrZeroExtend(BECount, IntPtr),
-                               SE->getOne(IntPtr), SCEV::FlagNUW);
+    TripCountS = SE->getAddExpr(SE->getTruncateOrZeroExtend(BECount, IntPtr),
+                                SE->getOne(IntPtr), SCEV::FlagNUW);
   }
 
-  // And scale it based on the store size.
-  if (StoreSize != 1) {
-    NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
-                               SCEV::FlagNUW);
-  }
-  return NumBytesS;
+  return TripCountS;
+}
+
+/// Compute the number of bytes as a SCEV from the backedge taken count.
+///
+/// This also maps the SCEV into the provided type and tries to handle the
+/// computation in a way that will fold cleanly.
+static const SCEV *getNumBytes(const SCEV *BECount, Type *IntPtr,
+                               const SCEV *StoreSizeSCEV, Loop *CurLoop,
+                               const DataLayout *DL, ScalarEvolution *SE) {
+  const SCEV *TripCountSCEV = getTripCount(BECount, IntPtr, CurLoop, DL, SE);
+
+  return SE->getMulExpr(TripCountSCEV,
+                        SE->getTruncateOrZeroExtend(StoreSizeSCEV, IntPtr),
+                        SCEV::FlagNUW);
 }
 
 /// processLoopStridedStore - We see a strided store of some value.  If we can
 /// transform this into a memset or memset_pattern in the loop preheader, do so.
 bool LoopIdiomRecognize::processLoopStridedStore(
-    Value *DestPtr, unsigned StoreSize, MaybeAlign StoreAlignment,
+    Value *DestPtr, const SCEV *StoreSizeSCEV, MaybeAlign StoreAlignment,
     Value *StoredVal, Instruction *TheStore,
     SmallPtrSetImpl<Instruction *> &Stores, const SCEVAddRecExpr *Ev,
-    const SCEV *BECount, bool NegStride, bool IsLoopMemset) {
+    const SCEV *BECount, bool IsNegStride, bool IsLoopMemset) {
   Value *SplatValue = isBytewiseValue(StoredVal, *DL);
   Constant *PatternValue = nullptr;
 
@@ -1056,8 +1115,8 @@ bool LoopIdiomRecognize::processLoopStridedStore(
   bool Changed = false;
   const SCEV *Start = Ev->getStart();
   // Handle negative strided loops.
-  if (NegStride)
-    Start = getStartForNegStride(Start, BECount, IntIdxTy, StoreSize, SE);
+  if (IsNegStride)
+    Start = getStartForNegStride(Start, BECount, IntIdxTy, StoreSizeSCEV, SE);
 
   // TODO: ideally we should still be able to generate memset if SCEV expander
   // is taught to generate the dependencies at the latest point.
@@ -1082,7 +1141,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
   Changed = true;
 
   if (mayLoopAccessLocation(BasePtr, ModRefInfo::ModRef, CurLoop, BECount,
-                            StoreSize, *AA, Stores))
+                            StoreSizeSCEV, *AA, Stores))
     return Changed;
 
   if (avoidLIRForMultiBlockLoop(/*IsMemset=*/true, IsLoopMemset))
@@ -1091,7 +1150,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
   // Okay, everything looks good, insert the memset.
 
   const SCEV *NumBytesS =
-      getNumBytes(BECount, IntIdxTy, StoreSize, CurLoop, DL, SE);
+      getNumBytes(BECount, IntIdxTy, StoreSizeSCEV, CurLoop, DL, SE);
 
   // TODO: ideally we should still be able to generate memset if SCEV expander
   // is taught to generate the dependencies at the latest point.
@@ -1138,13 +1197,20 @@ bool LoopIdiomRecognize::processLoopStridedStore(
                     << "\n");
 
   ORE.emit([&]() {
-    return OptimizationRemark(DEBUG_TYPE, "ProcessLoopStridedStore",
-                              NewCall->getDebugLoc(), Preheader)
-           << "Transformed loop-strided store in "
-           << ore::NV("Function", TheStore->getFunction())
-           << " function into a call to "
-           << ore::NV("NewFunction", NewCall->getCalledFunction())
-           << "() intrinsic";
+    OptimizationRemark R(DEBUG_TYPE, "ProcessLoopStridedStore",
+                         NewCall->getDebugLoc(), Preheader);
+    R << "Transformed loop-strided store in "
+      << ore::NV("Function", TheStore->getFunction())
+      << " function into a call to "
+      << ore::NV("NewFunction", NewCall->getCalledFunction())
+      << "() intrinsic";
+    if (!Stores.empty())
+      R << ore::setExtraArgs();
+    for (auto *I : Stores) {
+      R << ore::NV("FromBlock", I->getParent()->getName())
+        << ore::NV("ToBlock", Preheader->getName());
+    }
+    return R;
   });
 
   // Okay, the memset has been formed.  Zap the original store and anything that
@@ -1181,16 +1247,63 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
   // random load we can't handle.
   Value *LoadPtr = LI->getPointerOperand();
   const SCEVAddRecExpr *LoadEv = cast<SCEVAddRecExpr>(SE->getSCEV(LoadPtr));
-  return processLoopStoreOfLoopLoad(StorePtr, LoadPtr, StoreSize,
+
+  const SCEV *StoreSizeSCEV = SE->getConstant(StorePtr->getType(), StoreSize);
+  return processLoopStoreOfLoopLoad(StorePtr, LoadPtr, StoreSizeSCEV,
                                     SI->getAlign(), LI->getAlign(), SI, LI,
                                     StoreEv, LoadEv, BECount);
 }
 
+class MemmoveVerifier {
+public:
+  explicit MemmoveVerifier(const Value &LoadBasePtr, const Value &StoreBasePtr,
+                           const DataLayout &DL)
+      : DL(DL), LoadOff(0), StoreOff(0),
+        BP1(llvm::GetPointerBaseWithConstantOffset(
+            LoadBasePtr.stripPointerCasts(), LoadOff, DL)),
+        BP2(llvm::GetPointerBaseWithConstantOffset(
+            StoreBasePtr.stripPointerCasts(), StoreOff, DL)),
+        IsSameObject(BP1 == BP2) {}
+
+  bool loadAndStoreMayFormMemmove(unsigned StoreSize, bool IsNegStride,
+                                  const Instruction &TheLoad,
+                                  bool IsMemCpy) const {
+    if (IsMemCpy) {
+      // Ensure that LoadBasePtr is after StoreBasePtr or before StoreBasePtr
+      // for negative stride.
+      if ((!IsNegStride && LoadOff <= StoreOff) ||
+          (IsNegStride && LoadOff >= StoreOff))
+        return false;
+    } else {
+      // Ensure that LoadBasePtr is after StoreBasePtr or before StoreBasePtr
+      // for negative stride. LoadBasePtr shouldn't overlap with StoreBasePtr.
+      int64_t LoadSize =
+          DL.getTypeSizeInBits(TheLoad.getType()).getFixedSize() / 8;
+      if (BP1 != BP2 || LoadSize != int64_t(StoreSize))
+        return false;
+      if ((!IsNegStride && LoadOff < StoreOff + int64_t(StoreSize)) ||
+          (IsNegStride && LoadOff + LoadSize > StoreOff))
+        return false;
+    }
+    return true;
+  }
+
+private:
+  const DataLayout &DL;
+  int64_t LoadOff;
+  int64_t StoreOff;
+  const Value *BP1;
+  const Value *BP2;
+
+public:
+  const bool IsSameObject;
+};
+
 bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
-    Value *DestPtr, Value *SourcePtr, unsigned StoreSize, MaybeAlign StoreAlign,
-    MaybeAlign LoadAlign, Instruction *TheStore, Instruction *TheLoad,
-    const SCEVAddRecExpr *StoreEv, const SCEVAddRecExpr *LoadEv,
-    const SCEV *BECount) {
+    Value *DestPtr, Value *SourcePtr, const SCEV *StoreSizeSCEV,
+    MaybeAlign StoreAlign, MaybeAlign LoadAlign, Instruction *TheStore,
+    Instruction *TheLoad, const SCEVAddRecExpr *StoreEv,
+    const SCEVAddRecExpr *LoadEv, const SCEV *BECount) {
 
   // FIXME: until llvm.memcpy.inline supports dynamic sizes, we need to
   // conservatively bail here, since otherwise we may have to transform
@@ -1213,11 +1326,18 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
   Type *IntIdxTy = Builder.getIntNTy(DL->getIndexSizeInBits(StrAS));
 
   APInt Stride = getStoreStride(StoreEv);
-  bool NegStride = StoreSize == -Stride;
+  const SCEVConstant *ConstStoreSize = dyn_cast<SCEVConstant>(StoreSizeSCEV);
+
+  // TODO: Deal with non-constant size; Currently expect constant store size
+  assert(ConstStoreSize && "store size is expected to be a constant");
+
+  int64_t StoreSize = ConstStoreSize->getValue()->getZExtValue();
+  bool IsNegStride = StoreSize == -Stride;
 
   // Handle negative strided loops.
-  if (NegStride)
-    StrStart = getStartForNegStride(StrStart, BECount, IntIdxTy, StoreSize, SE);
+  if (IsNegStride)
+    StrStart =
+        getStartForNegStride(StrStart, BECount, IntIdxTy, StoreSizeSCEV, SE);
 
   // Okay, we have a strided store "p[i]" of a loaded value.  We can turn
   // this into a memcpy in the loop preheader now if we want.  However, this
@@ -1237,24 +1357,24 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
   // the return value will read this comment, and leave them alone.
   Changed = true;
 
-  SmallPtrSet<Instruction *, 2> Stores;
-  Stores.insert(TheStore);
+  SmallPtrSet<Instruction *, 2> IgnoredInsts;
+  IgnoredInsts.insert(TheStore);
 
   bool IsMemCpy = isa<MemCpyInst>(TheStore);
   const StringRef InstRemark = IsMemCpy ? "memcpy" : "load and store";
 
-  bool UseMemMove =
+  bool LoopAccessStore =
       mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount,
-                            StoreSize, *AA, Stores);
-  if (UseMemMove) {
+                            StoreSizeSCEV, *AA, IgnoredInsts);
+  if (LoopAccessStore) {
     // For memmove case it's not enough to guarantee that loop doesn't access
     // TheStore and TheLoad. Additionally we need to make sure that TheStore is
     // the only user of TheLoad.
     if (!TheLoad->hasOneUse())
       return Changed;
-    Stores.insert(TheLoad);
+    IgnoredInsts.insert(TheLoad);
     if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop,
-                              BECount, StoreSize, *AA, Stores)) {
+                              BECount, StoreSizeSCEV, *AA, IgnoredInsts)) {
       ORE.emit([&]() {
         return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessStore",
                                         TheStore)
@@ -1265,15 +1385,16 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
       });
       return Changed;
     }
-    Stores.erase(TheLoad);
+    IgnoredInsts.erase(TheLoad);
   }
 
   const SCEV *LdStart = LoadEv->getStart();
   unsigned LdAS = SourcePtr->getType()->getPointerAddressSpace();
 
   // Handle negative strided loops.
-  if (NegStride)
-    LdStart = getStartForNegStride(LdStart, BECount, IntIdxTy, StoreSize, SE);
+  if (IsNegStride)
+    LdStart =
+        getStartForNegStride(LdStart, BECount, IntIdxTy, StoreSizeSCEV, SE);
 
   // For a memcpy, we have to make sure that the input array is not being
   // mutated by the loop.
@@ -1283,42 +1404,40 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
   // If the store is a memcpy instruction, we must check if it will write to
   // the load memory locations. So remove it from the ignored stores.
   if (IsMemCpy)
-    Stores.erase(TheStore);
+    IgnoredInsts.erase(TheStore);
+  MemmoveVerifier Verifier(*LoadBasePtr, *StoreBasePtr, *DL);
   if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount,
-                            StoreSize, *AA, Stores)) {
-    ORE.emit([&]() {
-      return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessLoad", TheLoad)
-             << ore::NV("Inst", InstRemark) << " in "
-             << ore::NV("Function", TheStore->getFunction())
-             << " function will not be hoisted: "
-             << ore::NV("Reason", "The loop may access load location");
-    });
-    return Changed;
-  }
-  if (UseMemMove) {
-    // Ensure that LoadBasePtr is after StoreBasePtr or before StoreBasePtr for
-    // negative stride. LoadBasePtr shouldn't overlap with StoreBasePtr.
-    int64_t LoadOff = 0, StoreOff = 0;
-    const Value *BP1 = llvm::GetPointerBaseWithConstantOffset(
-        LoadBasePtr->stripPointerCasts(), LoadOff, *DL);
-    const Value *BP2 = llvm::GetPointerBaseWithConstantOffset(
-        StoreBasePtr->stripPointerCasts(), StoreOff, *DL);
-    int64_t LoadSize =
-        DL->getTypeSizeInBits(TheLoad->getType()).getFixedSize() / 8;
-    if (BP1 != BP2 || LoadSize != int64_t(StoreSize))
+                            StoreSizeSCEV, *AA, IgnoredInsts)) {
+    if (!IsMemCpy) {
+      ORE.emit([&]() {
+        return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessLoad",
+                                        TheLoad)
+               << ore::NV("Inst", InstRemark) << " in "
+               << ore::NV("Function", TheStore->getFunction())
+               << " function will not be hoisted: "
+               << ore::NV("Reason", "The loop may access load location");
+      });
       return Changed;
-    if ((!NegStride && LoadOff < StoreOff + int64_t(StoreSize)) ||
-        (NegStride && LoadOff + LoadSize > StoreOff))
+    }
+    // At this point loop may access load only for memcpy in same underlying
+    // object. If that's not the case bail out.
+    if (!Verifier.IsSameObject)
       return Changed;
   }
 
+  bool UseMemMove = IsMemCpy ? Verifier.IsSameObject : LoopAccessStore;
+  if (UseMemMove)
+    if (!Verifier.loadAndStoreMayFormMemmove(StoreSize, IsNegStride, *TheLoad,
+                                             IsMemCpy))
+      return Changed;
+
   if (avoidLIRForMultiBlockLoop())
     return Changed;
 
   // Okay, everything is safe, we can transform this!
 
   const SCEV *NumBytesS =
-      getNumBytes(BECount, IntIdxTy, StoreSize, CurLoop, DL, SE);
+      getNumBytes(BECount, IntIdxTy, StoreSizeSCEV, CurLoop, DL, SE);
 
   Value *NumBytes =
       Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());
@@ -1380,11 +1499,14 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
            << ore::NV("NewFunction", NewCall->getCalledFunction())
            << "() intrinsic from " << ore::NV("Inst", InstRemark)
            << " instruction in " << ore::NV("Function", TheStore->getFunction())
-           << " function";
+           << " function"
+           << ore::setExtraArgs()
+           << ore::NV("FromBlock", TheStore->getParent()->getName())
+           << ore::NV("ToBlock", Preheader->getName());
   });
 
-  // Okay, the memcpy has been formed.  Zap the original store and anything that
-  // feeds into it.
+  // Okay, a new call to memcpy/memmove has been formed.  Zap the original store
+  // and anything that feeds into it.
   if (MSSAU)
     MSSAU->removeMemoryAccess(TheStore, true);
   deleteDeadInstruction(TheStore);
@@ -1549,24 +1671,22 @@ static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB,
   // step 4: Find the instruction which count the population: cnt2 = cnt1 + 1
   {
     CountInst = nullptr;
-    for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI()->getIterator(),
-                              IterE = LoopEntry->end();
-         Iter != IterE; Iter++) {
-      Instruction *Inst = &*Iter;
-      if (Inst->getOpcode() != Instruction::Add)
+    for (Instruction &Inst : llvm::make_range(
+             LoopEntry->getFirstNonPHI()->getIterator(), LoopEntry->end())) {
+      if (Inst.getOpcode() != Instruction::Add)
         continue;
 
-      ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
+      ConstantInt *Inc = dyn_cast<ConstantInt>(Inst.getOperand(1));
       if (!Inc || !Inc->isOne())
         continue;
 
-      PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry);
+      PHINode *Phi = getRecurrenceVar(Inst.getOperand(0), &Inst, LoopEntry);
       if (!Phi)
         continue;
 
       // Check if the result of the instruction is live of the loop.
       bool LiveOutLoop = false;
-      for (User *U : Inst->users()) {
+      for (User *U : Inst.users()) {
         if ((cast<Instruction>(U))->getParent() != LoopEntry) {
           LiveOutLoop = true;
           break;
@@ -1574,7 +1694,7 @@ static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB,
       }
 
       if (LiveOutLoop) {
-        CountInst = Inst;
+        CountInst = &Inst;
         CountPhi = Phi;
         break;
       }
@@ -1675,22 +1795,20 @@ static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL,
   //       plus "cnt0". Currently it is not optimized.
   //       This step could be used to detect POPCNT instruction:
   //       cnt.next = cnt + (x.next & 1)
-  for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI()->getIterator(),
-                            IterE = LoopEntry->end();
-       Iter != IterE; Iter++) {
-    Instruction *Inst = &*Iter;
-    if (Inst->getOpcode() != Instruction::Add)
+  for (Instruction &Inst : llvm::make_range(
+           LoopEntry->getFirstNonPHI()->getIterator(), LoopEntry->end())) {
+    if (Inst.getOpcode() != Instruction::Add)
       continue;
 
-    ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
+    ConstantInt *Inc = dyn_cast<ConstantInt>(Inst.getOperand(1));
     if (!Inc || (!Inc->isOne() && !Inc->isMinusOne()))
       continue;
 
-    PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry);
+    PHINode *Phi = getRecurrenceVar(Inst.getOperand(0), &Inst, LoopEntry);
     if (!Phi)
       continue;
 
-    CntInst = Inst;
+    CntInst = &Inst;
     CntPhi = Phi;
     break;
   }
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
index 3153a8721193..b9e63a4bc06f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -105,9 +105,7 @@ static bool simplifyLoopInst(Loop &L, DominatorTree &DT, LoopInfo &LI,
         if (!V || !LI.replacementPreservesLCSSAForm(&I, V))
           continue;
 
-        for (Value::use_iterator UI = I.use_begin(), UE = I.use_end();
-             UI != UE;) {
-          Use &U = *UI++;
+        for (Use &U : llvm::make_early_inc_range(I.uses())) {
           auto *UserI = cast<Instruction>(U.getUser());
           U.set(V);
 
@@ -195,15 +193,10 @@ public:
     const TargetLibraryInfo &TLI =
         getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
             *L->getHeader()->getParent());
-    MemorySSA *MSSA = nullptr;
-    Optional<MemorySSAUpdater> MSSAU;
-    if (EnableMSSALoopDependency) {
-      MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
-      MSSAU = MemorySSAUpdater(MSSA);
-    }
+    MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
+    MemorySSAUpdater MSSAU(MSSA);
 
-    return simplifyLoopInst(*L, DT, LI, AC, TLI,
-                            MSSAU.hasValue() ? MSSAU.getPointer() : nullptr);
+    return simplifyLoopInst(*L, DT, LI, AC, TLI, &MSSAU);
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -211,10 +204,8 @@ public:
     AU.addRequired<DominatorTreeWrapperPass>();
     AU.addRequired<TargetLibraryInfoWrapperPass>();
     AU.setPreservesCFG();
-    if (EnableMSSALoopDependency) {
-      AU.addRequired<MemorySSAWrapperPass>();
-      AU.addPreserved<MemorySSAWrapperPass>();
-    }
+    AU.addRequired<MemorySSAWrapperPass>();
+    AU.addPreserved<MemorySSAWrapperPass>();
     getLoopAnalysisUsage(AU);
   }
 };
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 34545f35b3c3..9f605b4ac4ad 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -1710,16 +1710,12 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
   auto &OuterInnerReductions = LIL.getOuterInnerReductions();
   // Now update the reduction PHIs in the inner and outer loop headers.
   SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs;
-  for (PHINode &PHI : InnerLoopHeader->phis()) {
-    if (OuterInnerReductions.find(&PHI) == OuterInnerReductions.end())
-      continue;
-    InnerLoopPHIs.push_back(cast<PHINode>(&PHI));
-  }
-  for (PHINode &PHI : OuterLoopHeader->phis()) {
-    if (OuterInnerReductions.find(&PHI) == OuterInnerReductions.end())
-      continue;
-    OuterLoopPHIs.push_back(cast<PHINode>(&PHI));
-  }
+  for (PHINode &PHI : InnerLoopHeader->phis())
+    if (OuterInnerReductions.contains(&PHI))
+      InnerLoopPHIs.push_back(cast<PHINode>(&PHI));
+  for (PHINode &PHI : OuterLoopHeader->phis())
+    if (OuterInnerReductions.contains(&PHI))
+      OuterLoopPHIs.push_back(cast<PHINode>(&PHI));
 
   // Now move the remaining reduction PHIs from outer to inner loop header and
   // vice versa. The PHI nodes must be part of a reduction across the inner and
@@ -1767,6 +1763,7 @@ bool LoopInterchangeTransform::adjustLoopLinks() {
   return Changed;
 }
 
+namespace {
 /// Main LoopInterchange Pass.
 struct LoopInterchangeLegacyPass : public LoopPass {
   static char ID;
@@ -1795,6 +1792,7 @@ struct LoopInterchangeLegacyPass : public LoopPass {
     return LoopInterchange(SE, LI, DI, DT, ORE).run(L);
   }
 };
+} // namespace
 
 char LoopInterchangeLegacyPass::ID = 0;
 
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
index aaf586173e44..21d59936616b 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
@@ -34,7 +34,6 @@
 #include "llvm/Analysis/LoopAccessAnalysis.h"
 #include "llvm/Analysis/LoopAnalysisManager.h"
 #include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/MemorySSA.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -109,8 +108,8 @@ struct StoreToLoadForwardingCandidate {
     // Currently we only support accesses with unit stride.  FIXME: we should be
     // able to handle non unit stirde as well as long as the stride is equal to
     // the dependence distance.
-    if (getPtrStride(PSE, LoadPtr, L) != 1 ||
-        getPtrStride(PSE, StorePtr, L) != 1)
+    if (getPtrStride(PSE, LoadType, LoadPtr, L) != 1 ||
+        getPtrStride(PSE, LoadType, StorePtr, L) != 1)
       return false;
 
     auto &DL = Load->getParent()->getModule()->getDataLayout();
@@ -718,15 +717,12 @@ PreservedAnalyses LoopLoadEliminationPass::run(Function &F,
   auto *PSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
   auto *BFI = (PSI && PSI->hasProfileSummary()) ?
       &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
-  MemorySSA *MSSA = EnableMSSALoopDependency
-                        ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA()
-                        : nullptr;
 
   auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
   bool Changed = eliminateLoadsAcrossLoops(
       F, LI, DT, BFI, PSI, &SE, &AC, [&](Loop &L) -> const LoopAccessInfo & {
-        LoopStandardAnalysisResults AR = {AA,  AC,  DT,      LI,  SE,
-                                          TLI, TTI, nullptr, MSSA};
+        LoopStandardAnalysisResults AR = {AA,  AC,  DT,      LI,      SE,
+                                          TLI, TTI, nullptr, nullptr, nullptr};
         return LAM.getResult<LoopAccessAnalysis>(L, AR);
       });
 
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPassManager.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
index f4fce4871331..3df4cfe8e4c1 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
@@ -10,6 +10,7 @@
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/MemorySSA.h"
 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
@@ -44,6 +45,18 @@ PassManager<Loop, LoopAnalysisManager, LoopStandardAnalysisResults &,
   return PA;
 }
 
+void PassManager<Loop, LoopAnalysisManager, LoopStandardAnalysisResults &,
+                 LPMUpdater &>::printPipeline(raw_ostream &OS,
+                                              function_ref<StringRef(StringRef)>
+                                                  MapClassName2PassName) {
+  for (unsigned Idx = 0, Size = LoopPasses.size(); Idx != Size; ++Idx) {
+    auto *P = LoopPasses[Idx].get();
+    P->printPipeline(OS, MapClassName2PassName);
+    if (Idx + 1 < Size)
+      OS << ",";
+  }
+}
+
 // Run both loop passes and loop-nest passes on top-level loop \p L.
 PreservedAnalyses
 LoopPassManager::runWithLoopNestPasses(Loop &L, LoopAnalysisManager &AM,
@@ -112,12 +125,6 @@ LoopPassManager::runWithLoopNestPasses(Loop &L, LoopAnalysisManager &AM,
     // notify the updater, otherwise U.ParentL might gets outdated and triggers
     // assertion failures in addSiblingLoops and addChildLoops.
     U.setParentLoop(L.getParentLoop());
-
-    // FIXME: Historically, the pass managers all called the LLVM context's
-    // yield function here. We don't have a generic way to acquire the
-    // context and it isn't yet clear what the right pattern is for yielding
-    // in the new pass manager so it is currently omitted.
-    // ...getContext().yield();
   }
   return PA;
 }
@@ -161,17 +168,17 @@ LoopPassManager::runWithoutLoopNestPasses(Loop &L, LoopAnalysisManager &AM,
     // notify the updater, otherwise U.ParentL might gets outdated and triggers
     // assertion failures in addSiblingLoops and addChildLoops.
     U.setParentLoop(L.getParentLoop());
-
-    // FIXME: Historically, the pass managers all called the LLVM context's
-    // yield function here. We don't have a generic way to acquire the
-    // context and it isn't yet clear what the right pattern is for yielding
-    // in the new pass manager so it is currently omitted.
-    // ...getContext().yield();
   }
   return PA;
 }
 } // namespace llvm
 
+void FunctionToLoopPassAdaptor::printPipeline(
+    raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+  OS << (UseMemorySSA ? "loop-mssa(" : "loop(");
+  Pass->printPipeline(OS, MapClassName2PassName);
+  OS << ")";
+}
 PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F,
                                                  FunctionAnalysisManager &AM) {
   // Before we even compute any loop analyses, first run a miniature function
@@ -201,6 +208,10 @@ PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F,
   BlockFrequencyInfo *BFI = UseBlockFrequencyInfo && F.hasProfileData()
                                 ? (&AM.getResult<BlockFrequencyAnalysis>(F))
                                 : nullptr;
+  BranchProbabilityInfo *BPI =
+      UseBranchProbabilityInfo && F.hasProfileData()
+          ? (&AM.getResult<BranchProbabilityAnalysis>(F))
+          : nullptr;
   LoopStandardAnalysisResults LAR = {AM.getResult<AAManager>(F),
                                      AM.getResult<AssumptionAnalysis>(F),
                                      AM.getResult<DominatorTreeAnalysis>(F),
@@ -209,6 +220,7 @@ PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F,
                                      AM.getResult<TargetLibraryAnalysis>(F),
                                      AM.getResult<TargetIRAnalysis>(F),
                                      BFI,
+                                     BPI,
                                      MSSA};
 
   // Setup the loop analysis manager from its proxy. It is important that
@@ -285,6 +297,10 @@ PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F,
     else
       PI.runAfterPass<Loop>(*Pass, *L, PassPA);
 
+    if (LAR.MSSA && !PassPA.getChecker<MemorySSAAnalysis>().preserved())
+      report_fatal_error("Loop pass manager using MemorySSA contains a pass "
+                         "that does not preserve MemorySSA");
+
 #ifndef NDEBUG
     // LoopAnalysisResults should always be valid.
     // Note that we don't LAR.SE.verify() because that can change observed SE
@@ -325,6 +341,8 @@ PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F,
   PA.preserve<ScalarEvolutionAnalysis>();
   if (UseBlockFrequencyInfo && F.hasProfileData())
     PA.preserve<BlockFrequencyAnalysis>();
+  if (UseBranchProbabilityInfo && F.hasProfileData())
+    PA.preserve<BranchProbabilityAnalysis>();
   if (UseMemorySSA)
     PA.preserve<MemorySSAAnalysis>();
   return PA;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPredication.cpp
index 4f97641e2027..aa7e79a589f2 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPredication.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPredication.cpp
@@ -183,6 +183,8 @@
 #include "llvm/Analysis/GuardUtils.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/IR/Function.h"
@@ -254,7 +256,7 @@ class LoopPredication {
   DominatorTree *DT;
   ScalarEvolution *SE;
   LoopInfo *LI;
-  BranchProbabilityInfo *BPI;
+  MemorySSAUpdater *MSSAU;
 
   Loop *L;
   const DataLayout *DL;
@@ -302,16 +304,15 @@ class LoopPredication {
   // If the loop always exits through another block in the loop, we should not
   // predicate based on the latch check. For example, the latch check can be a
   // very coarse grained check and there can be more fine grained exit checks
-  // within the loop. We identify such unprofitable loops through BPI.
+  // within the loop.
   bool isLoopProfitableToPredicate();
 
   bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
 
 public:
-  LoopPredication(AliasAnalysis *AA, DominatorTree *DT,
-                  ScalarEvolution *SE, LoopInfo *LI,
-                  BranchProbabilityInfo *BPI)
-    : AA(AA), DT(DT), SE(SE), LI(LI), BPI(BPI) {};
+  LoopPredication(AliasAnalysis *AA, DominatorTree *DT, ScalarEvolution *SE,
+                  LoopInfo *LI, MemorySSAUpdater *MSSAU)
+      : AA(AA), DT(DT), SE(SE), LI(LI), MSSAU(MSSAU){};
   bool runOnLoop(Loop *L);
 };
 
@@ -325,6 +326,7 @@ public:
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<BranchProbabilityInfoWrapperPass>();
     getLoopAnalysisUsage(AU);
+    AU.addPreserved<MemorySSAWrapperPass>();
   }
 
   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
@@ -333,10 +335,12 @@ public:
     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
     auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-    BranchProbabilityInfo &BPI =
-        getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
+    auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>();
+    std::unique_ptr<MemorySSAUpdater> MSSAU;
+    if (MSSAWP)
+      MSSAU = std::make_unique<MemorySSAUpdater>(&MSSAWP->getMSSA());
     auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
-    LoopPredication LP(AA, DT, SE, LI, &BPI);
+    LoopPredication LP(AA, DT, SE, LI, MSSAU ? MSSAU.get() : nullptr);
     return LP.runOnLoop(L);
   }
 };
@@ -358,16 +362,18 @@ Pass *llvm::createLoopPredicationPass() {
 PreservedAnalyses LoopPredicationPass::run(Loop &L, LoopAnalysisManager &AM,
                                            LoopStandardAnalysisResults &AR,
                                            LPMUpdater &U) {
-  Function *F = L.getHeader()->getParent();
-  // For the new PM, we also can't use BranchProbabilityInfo as an analysis
-  // pass. Function analyses need to be preserved across loop transformations
-  // but BPI is not preserved, hence a newly built one is needed.
-  BranchProbabilityInfo BPI(*F, AR.LI, &AR.TLI, &AR.DT, nullptr);
-  LoopPredication LP(&AR.AA, &AR.DT, &AR.SE, &AR.LI, &BPI);
+  std::unique_ptr<MemorySSAUpdater> MSSAU;
+  if (AR.MSSA)
+    MSSAU = std::make_unique<MemorySSAUpdater>(AR.MSSA);
+  LoopPredication LP(&AR.AA, &AR.DT, &AR.SE, &AR.LI,
+                     MSSAU ? MSSAU.get() : nullptr);
   if (!LP.runOnLoop(&L))
     return PreservedAnalyses::all();
 
-  return getLoopPassPreservedAnalyses();
+  auto PA = getLoopPassPreservedAnalyses();
+  if (AR.MSSA)
+    PA.preserve<MemorySSAAnalysis>();
+  return PA;
 }
 
 Optional<LoopICmp>
@@ -809,7 +815,7 @@ bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard,
   Value *AllChecks = Builder.CreateAnd(Checks);
   auto *OldCond = Guard->getOperand(0);
   Guard->setOperand(0, AllChecks);
-  RecursivelyDeleteTriviallyDeadInstructions(OldCond);
+  RecursivelyDeleteTriviallyDeadInstructions(OldCond, nullptr /* TLI */, MSSAU);
 
   LLVM_DEBUG(dbgs() << "Widened checks = " << NumWidened << "\n");
   return true;
@@ -835,7 +841,7 @@ bool LoopPredication::widenWidenableBranchGuardConditions(
   Value *AllChecks = Builder.CreateAnd(Checks);
   auto *OldCond = BI->getCondition();
   BI->setCondition(AllChecks);
-  RecursivelyDeleteTriviallyDeadInstructions(OldCond);
+  RecursivelyDeleteTriviallyDeadInstructions(OldCond, nullptr /* TLI */, MSSAU);
   assert(isGuardAsWidenableBranch(BI) &&
          "Stopped being a guard after transform?");
 
@@ -912,7 +918,7 @@ Optional<LoopICmp> LoopPredication::parseLoopLatchICmp() {
 
 
 bool LoopPredication::isLoopProfitableToPredicate() {
-  if (SkipProfitabilityChecks || !BPI)
+  if (SkipProfitabilityChecks)
     return true;
 
   SmallVector<std::pair<BasicBlock *, BasicBlock *>, 8> ExitEdges;
@@ -934,8 +940,61 @@ bool LoopPredication::isLoopProfitableToPredicate() {
          "expected to be an exiting block with 2 succs!");
   unsigned LatchBrExitIdx =
       LatchTerm->getSuccessor(0) == L->getHeader() ? 1 : 0;
+  // We compute branch probabilities without BPI. We do not rely on BPI since
+  // Loop predication is usually run in an LPM and BPI is only preserved
+  // lossily within loop pass managers, while BPI has an inherent notion of
+  // being complete for an entire function.
+
+  // If the latch exits into a deoptimize or an unreachable block, do not
+  // predicate on that latch check.
+  auto *LatchExitBlock = LatchTerm->getSuccessor(LatchBrExitIdx);
+  if (isa<UnreachableInst>(LatchTerm) ||
+      LatchExitBlock->getTerminatingDeoptimizeCall())
+    return false;
+
+  auto IsValidProfileData = [](MDNode *ProfileData, const Instruction *Term) {
+    if (!ProfileData || !ProfileData->getOperand(0))
+      return false;
+    if (MDString *MDS = dyn_cast<MDString>(ProfileData->getOperand(0)))
+      if (!MDS->getString().equals("branch_weights"))
+        return false;
+    if (ProfileData->getNumOperands() != 1 + Term->getNumSuccessors())
+      return false;
+    return true;
+  };
+  MDNode *LatchProfileData = LatchTerm->getMetadata(LLVMContext::MD_prof);
+  // Latch terminator has no valid profile data, so nothing to check
+  // profitability on.
+  if (!IsValidProfileData(LatchProfileData, LatchTerm))
+    return true;
+
+  auto ComputeBranchProbability =
+      [&](const BasicBlock *ExitingBlock,
+          const BasicBlock *ExitBlock) -> BranchProbability {
+    auto *Term = ExitingBlock->getTerminator();
+    MDNode *ProfileData = Term->getMetadata(LLVMContext::MD_prof);
+    unsigned NumSucc = Term->getNumSuccessors();
+    if (IsValidProfileData(ProfileData, Term)) {
+      uint64_t Numerator = 0, Denominator = 0, ProfVal = 0;
+      for (unsigned i = 0; i < NumSucc; i++) {
+        ConstantInt *CI =
+            mdconst::extract<ConstantInt>(ProfileData->getOperand(i + 1));
+        ProfVal = CI->getValue().getZExtValue();
+        if (Term->getSuccessor(i) == ExitBlock)
+          Numerator += ProfVal;
+        Denominator += ProfVal;
+      }
+      return BranchProbability::getBranchProbability(Numerator, Denominator);
+    } else {
+      assert(LatchBlock != ExitingBlock &&
+             "Latch term should always have profile data!");
+      // No profile data, so we choose the weight as 1/num_of_succ(Src)
+      return BranchProbability::getBranchProbability(1, NumSucc);
+    }
+  };
+
   BranchProbability LatchExitProbability =
-      BPI->getEdgeProbability(LatchBlock, LatchBrExitIdx);
+      ComputeBranchProbability(LatchBlock, LatchExitBlock);
 
   // Protect against degenerate inputs provided by the user. Providing a value
   // less than one, can invert the definition of profitable loop predication.
@@ -948,18 +1007,18 @@ bool LoopPredication::isLoopProfitableToPredicate() {
     LLVM_DEBUG(dbgs() << "The value is set to 1.0\n");
     ScaleFactor = 1.0;
   }
-  const auto LatchProbabilityThreshold =
-      LatchExitProbability * ScaleFactor;
+  const auto LatchProbabilityThreshold = LatchExitProbability * ScaleFactor;
 
   for (const auto &ExitEdge : ExitEdges) {
     BranchProbability ExitingBlockProbability =
-        BPI->getEdgeProbability(ExitEdge.first, ExitEdge.second);
+        ComputeBranchProbability(ExitEdge.first, ExitEdge.second);
     // Some exiting edge has higher probability than the latch exiting edge.
     // No longer profitable to predicate.
     if (ExitingBlockProbability > LatchProbabilityThreshold)
       return false;
   }
-  // Using BPI, we have concluded that the most probable way to exit from the
+
+  // We have concluded that the most probable way to exit from the
   // loop is through the latch (or there's no profile information and all
   // exits are equally likely).
   return true;
@@ -1071,28 +1130,26 @@ bool LoopPredication::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
   // widen so that we gain ability to analyze it's exit count and perform this
   // transform.  TODO: It'd be nice to know for sure the exit became
   // analyzeable after dropping widenability.
-  {
-    bool Invalidate = false;
+  bool ChangedLoop = false;
 
-    for (auto *ExitingBB : ExitingBlocks) {
-      if (LI->getLoopFor(ExitingBB) != L)
-        continue;
+  for (auto *ExitingBB : ExitingBlocks) {
+    if (LI->getLoopFor(ExitingBB) != L)
+      continue;
 
-      auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
-      if (!BI)
-        continue;
+    auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
+    if (!BI)
+      continue;
 
-      Use *Cond, *WC;
-      BasicBlock *IfTrueBB, *IfFalseBB;
-      if (parseWidenableBranch(BI, Cond, WC, IfTrueBB, IfFalseBB) &&
-          L->contains(IfTrueBB)) {
-        WC->set(ConstantInt::getTrue(IfTrueBB->getContext()));
-        Invalidate = true;
-      }
+    Use *Cond, *WC;
+    BasicBlock *IfTrueBB, *IfFalseBB;
+    if (parseWidenableBranch(BI, Cond, WC, IfTrueBB, IfFalseBB) &&
+        L->contains(IfTrueBB)) {
+      WC->set(ConstantInt::getTrue(IfTrueBB->getContext()));
+      ChangedLoop = true;
     }
-    if (Invalidate)
-      SE->forgetLoop(L);
   }
+  if (ChangedLoop)
+    SE->forgetLoop(L);
 
   // The use of umin(all analyzeable exits) instead of latch is subtle, but
   // important for profitability.  We may have a loop which hasn't been fully
@@ -1104,18 +1161,24 @@ bool LoopPredication::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
   if (isa<SCEVCouldNotCompute>(MinEC) || MinEC->getType()->isPointerTy() ||
       !SE->isLoopInvariant(MinEC, L) ||
       !isSafeToExpandAt(MinEC, WidenableBR, *SE))
-    return false;
+    return ChangedLoop;
 
   // Subtlety: We need to avoid inserting additional uses of the WC.  We know
   // that it can only have one transitive use at the moment, and thus moving
   // that use to just before the branch and inserting code before it and then
   // modifying the operand is legal.
   auto *IP = cast<Instruction>(WidenableBR->getCondition());
+  // Here we unconditionally modify the IR, so after this point we should return
+  // only `true`!
   IP->moveBefore(WidenableBR);
+  if (MSSAU)
+    if (auto *MUD = MSSAU->getMemorySSA()->getMemoryAccess(IP))
+       MSSAU->moveToPlace(MUD, WidenableBR->getParent(),
+                          MemorySSA::BeforeTerminator);
   Rewriter.setInsertPoint(IP);
   IRBuilder<> B(IP);
 
-  bool Changed = false;
+  bool InvalidateLoop = false;
   Value *MinECV = nullptr; // lazily generated if needed
   for (BasicBlock *ExitingBB : ExitingBlocks) {
     // If our exiting block exits multiple loops, we can only rewrite the
@@ -1172,16 +1235,18 @@ bool LoopPredication::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
 
     Value *OldCond = BI->getCondition();
     BI->setCondition(ConstantInt::get(OldCond->getType(), !ExitIfTrue));
-    Changed = true;
+    InvalidateLoop = true;
   }
 
-  if (Changed)
+  if (InvalidateLoop)
     // We just mutated a bunch of loop exits changing there exit counts
     // widely.  We need to force recomputation of the exit counts given these
     // changes.  Note that all of the inserted exits are never taken, and
     // should be removed next time the CFG is modified.
     SE->forgetLoop(L);
-  return Changed;
+
+  // Always return `true` since we have moved the WidenableBR's condition.
+  return true;
 }
 
 bool LoopPredication::runOnLoop(Loop *Loop) {
@@ -1242,5 +1307,8 @@ bool LoopPredication::runOnLoop(Loop *Loop) {
   for (auto *Guard : GuardsAsWidenableBranches)
     Changed |= widenWidenableBranchGuardConditions(Guard, Expander);
   Changed |= predicateLoopExits(L, Expander);
+
+  if (MSSAU && VerifyMemorySSA)
+    MSSAU->getMemorySSA()->verifyMemorySSA();
   return Changed;
 }
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopRotation.cpp
index 6d5b19443c76..5ba137b1c85f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopRotation.cpp
@@ -99,8 +99,7 @@ public:
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<AssumptionCacheTracker>();
     AU.addRequired<TargetTransformInfoWrapperPass>();
-    if (EnableMSSALoopDependency)
-      AU.addPreserved<MemorySSAWrapperPass>();
+    AU.addPreserved<MemorySSAWrapperPass>();
     getLoopAnalysisUsage(AU);
 
     // Lazy BFI and BPI are marked as preserved here so LoopRotate
@@ -121,13 +120,11 @@ public:
     auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
     const SimplifyQuery SQ = getBestSimplifyQuery(*this, F);
     Optional<MemorySSAUpdater> MSSAU;
-    if (EnableMSSALoopDependency) {
-      // Not requiring MemorySSA and getting it only if available will split
-      // the loop pass pipeline when LoopRotate is being run first.
-      auto *MSSAA = getAnalysisIfAvailable<MemorySSAWrapperPass>();
-      if (MSSAA)
-        MSSAU = MemorySSAUpdater(&MSSAA->getMSSA());
-    }
+    // Not requiring MemorySSA and getting it only if available will split
+    // the loop pass pipeline when LoopRotate is being run first.
+    auto *MSSAA = getAnalysisIfAvailable<MemorySSAWrapperPass>();
+    if (MSSAA)
+      MSSAU = MemorySSAUpdater(&MSSAA->getMSSA());
     // Vectorization requires loop-rotation. Use default threshold for loops the
     // user explicitly marked for vectorization, even when header duplication is
     // disabled.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
index cc6d11220807..a87843d658a9 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
@@ -733,13 +733,12 @@ public:
     DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
     LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
     ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+    auto *MSSAA = getAnalysisIfAvailable<MemorySSAWrapperPass>();
     Optional<MemorySSAUpdater> MSSAU;
-    if (EnableMSSALoopDependency) {
-      MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
-      MSSAU = MemorySSAUpdater(MSSA);
-      if (VerifyMemorySSA)
-        MSSA->verifyMemorySSA();
-    }
+    if (MSSAA)
+      MSSAU = MemorySSAUpdater(&MSSAA->getMSSA());
+    if (MSSAA && VerifyMemorySSA)
+      MSSAU->getMemorySSA()->verifyMemorySSA();
     bool DeleteCurrentLoop = false;
     bool Changed = simplifyLoopCFG(
         *L, DT, LI, SE, MSSAU.hasValue() ? MSSAU.getPointer() : nullptr,
@@ -750,10 +749,7 @@ public:
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
-    if (EnableMSSALoopDependency) {
-      AU.addRequired<MemorySSAWrapperPass>();
-      AU.addPreserved<MemorySSAWrapperPass>();
-    }
+    AU.addPreserved<MemorySSAWrapperPass>();
     AU.addPreserved<DependenceAnalysisWrapperPass>();
     getLoopAnalysisUsage(AU);
   }
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSink.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSink.cpp
index a01287f587d7..c9c9e60d0921 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSink.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSink.cpp
@@ -323,15 +323,14 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
   // Traverse preheader's instructions in reverse order becaue if A depends
   // on B (A appears after B), A needs to be sinked first before B can be
   // sinked.
-  for (auto II = Preheader->rbegin(), E = Preheader->rend(); II != E;) {
-    Instruction *I = &*II++;
+  for (Instruction &I : llvm::make_early_inc_range(llvm::reverse(*Preheader))) {
     // No need to check for instruction's operands are loop invariant.
-    assert(L.hasLoopInvariantOperands(I) &&
+    assert(L.hasLoopInvariantOperands(&I) &&
            "Insts in a loop's preheader should have loop invariant operands!");
-    if (!canSinkOrHoistInst(*I, &AA, &DT, &L, CurAST, MSSAU.get(), false,
+    if (!canSinkOrHoistInst(I, &AA, &DT, &L, CurAST, MSSAU.get(), false,
                             LICMFlags.get()))
       continue;
-    if (sinkInstruction(L, *I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI,
+    if (sinkInstruction(L, I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI,
                         MSSAU.get()))
       Changed = true;
   }
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 404852f1dd4d..a9a2266e1196 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -136,6 +136,12 @@ using namespace llvm;
 /// worst cases before LSR burns too much compile time and stack space.
 static const unsigned MaxIVUsers = 200;
 
+/// Limit the size of expression that SCEV-based salvaging will attempt to
+/// translate into a DIExpression.
+/// Choose a maximum size such that debuginfo is not excessively increased and
+/// the salvaging is not too expensive for the compiler.
+static const unsigned MaxSCEVSalvageExpressionSize = 64;
+
 // Temporary flag to cleanup congruent phis after LSR phi expansion.
 // It's currently disabled until we can determine whether it's truly useful or
 // not. The flag should be removed after the v3.0 release.
@@ -689,7 +695,7 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
     const APInt &RA = RC->getAPInt();
     // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do
     // some folding.
-    if (RA.isAllOnesValue()) {
+    if (RA.isAllOnes()) {
       if (LHS->getType()->isPointerTy())
         return nullptr;
       return SE.getMulExpr(LHS, RC);
@@ -2816,9 +2822,7 @@ static const SCEV *getExprBase(const SCEV *S) {
     // there's nothing more complex.
     // FIXME: not sure if we want to recognize negation.
     const SCEVAddExpr *Add = cast<SCEVAddExpr>(S);
-    for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(Add->op_end()),
-           E(Add->op_begin()); I != E; ++I) {
-      const SCEV *SubExpr = *I;
+    for (const SCEV *SubExpr : reverse(Add->operands())) {
       if (SubExpr->getSCEVType() == scAddExpr)
         return getExprBase(SubExpr);
 
@@ -3150,7 +3154,7 @@ void LSRInstance::CollectChains() {
 void LSRInstance::FinalizeChain(IVChain &Chain) {
   assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
   LLVM_DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n");
-
+  
   for (const IVInc &Inc : Chain) {
     LLVM_DEBUG(dbgs() << "        Inc: " << *Inc.UserInst << "\n");
     auto UseI = find(Inc.UserInst->operands(), Inc.IVOperand);
@@ -3385,7 +3389,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
 void
 LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
   // Mark uses whose expressions cannot be expanded.
-  if (!isSafeToExpand(S, SE))
+  if (!isSafeToExpand(S, SE, /*CanonicalMode*/ false))
     LU.RigidFormula = true;
 
   Formula F;
@@ -3934,6 +3938,9 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
 
   // Check each interesting stride.
   for (int64_t Factor : Factors) {
+    // Check that Factor can be represented by IntTy
+    if (!ConstantInt::isValueValidForType(IntTy, Factor))
+      continue;
     // Check that the multiplication doesn't overflow.
     if (Base.BaseOffset == std::numeric_limits<int64_t>::min() && Factor == -1)
       continue;
@@ -4082,6 +4089,14 @@ void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
   if (DstTy->isPointerTy())
     return;
 
+  // It is invalid to extend a pointer type so exit early if ScaledReg or
+  // any of the BaseRegs are pointers.
+  if (Base.ScaledReg && Base.ScaledReg->getType()->isPointerTy())
+    return;
+  if (any_of(Base.BaseRegs,
+             [](const SCEV *S) { return S->getType()->isPointerTy(); }))
+    return;
+
   for (Type *SrcTy : Types) {
     if (SrcTy != DstTy && TTI.isTruncateFree(SrcTy, DstTy)) {
       Formula F = Base;
@@ -5689,23 +5704,6 @@ LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
     }
   }
 
-#ifndef NDEBUG
-  // All dominating loops must have preheaders, or SCEVExpander may not be able
-  // to materialize an AddRecExpr whose Start is an outer AddRecExpr.
-  //
-  // IVUsers analysis should only create users that are dominated by simple loop
-  // headers. Since this loop should dominate all of its users, its user list
-  // should be empty if this loop itself is not within a simple loop nest.
-  for (DomTreeNode *Rung = DT.getNode(L->getLoopPreheader());
-       Rung; Rung = Rung->getIDom()) {
-    BasicBlock *BB = Rung->getBlock();
-    const Loop *DomLoop = LI.getLoopFor(BB);
-    if (DomLoop && DomLoop->getHeader() == BB) {
-      assert(DomLoop->getLoopPreheader() && "LSR needs a simplified loop nest");
-    }
-  }
-#endif // DEBUG
-
   LLVM_DEBUG(dbgs() << "\nLSR on loop ";
              L->getHeader()->printAsOperand(dbgs(), /*PrintType=*/false);
              dbgs() << ":\n");
@@ -5870,6 +5868,7 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addPreserved<MemorySSAWrapperPass>();
 }
 
+namespace {
 struct SCEVDbgValueBuilder {
   SCEVDbgValueBuilder() = default;
   SCEVDbgValueBuilder(const SCEVDbgValueBuilder &Base) {
@@ -6117,14 +6116,15 @@ struct DVIRecoveryRec {
   Metadata *LocationOp;
   const llvm::SCEV *SCEV;
 };
+} // namespace
 
-static bool RewriteDVIUsingIterCount(DVIRecoveryRec CachedDVI,
+static void RewriteDVIUsingIterCount(DVIRecoveryRec CachedDVI,
                                      const SCEVDbgValueBuilder &IterationCount,
                                      ScalarEvolution &SE) {
   // LSR may add locations to previously single location-op DVIs which
   // are currently not supported.
   if (CachedDVI.DVI->getNumVariableLocationOps() != 1)
-    return false;
+    return;
 
   // SCEVs for SSA values are most frquently of the form
   // {start,+,stride}, but sometimes they are ({start,+,stride} + %a + ..).
@@ -6132,48 +6132,70 @@ static bool RewriteDVIUsingIterCount(DVIRecoveryRec CachedDVI,
   // SCEVs have not been observed to result in debuginfo-lossy optimisations,
   // so its not expected this point will be reached.
   if (!isa<SCEVAddRecExpr>(CachedDVI.SCEV))
-    return false;
+    return;
 
   LLVM_DEBUG(dbgs() << "scev-salvage: Value to salvage SCEV: "
                     << *CachedDVI.SCEV << '\n');
 
   const auto *Rec = cast<SCEVAddRecExpr>(CachedDVI.SCEV);
   if (!Rec->isAffine())
-    return false;
+    return;
+
+  if (CachedDVI.SCEV->getExpressionSize() > MaxSCEVSalvageExpressionSize)
+    return;
 
   // Initialise a new builder with the iteration count expression. In
   // combination with the value's SCEV this enables recovery.
   SCEVDbgValueBuilder RecoverValue(IterationCount);
   if (!RecoverValue.SCEVToValueExpr(*Rec, SE))
-    return false;
+    return;
 
   LLVM_DEBUG(dbgs() << "scev-salvage: Updating: " << *CachedDVI.DVI << '\n');
   RecoverValue.applyExprToDbgValue(*CachedDVI.DVI, CachedDVI.Expr);
   LLVM_DEBUG(dbgs() << "scev-salvage: to: " << *CachedDVI.DVI << '\n');
-  return true;
 }
 
-static bool
+static void RewriteDVIUsingOffset(DVIRecoveryRec &DVIRec, llvm::PHINode &IV,
+                                  int64_t Offset) {
+  assert(!DVIRec.DVI->hasArgList() && "Expected single location-op dbg.value.");
+  DbgValueInst *DVI = DVIRec.DVI;
+  SmallVector<uint64_t, 8> Ops;
+  DIExpression::appendOffset(Ops, Offset);
+  DIExpression *Expr = DIExpression::prependOpcodes(DVIRec.Expr, Ops, true);
+  LLVM_DEBUG(dbgs() << "scev-salvage: Updating: " << *DVIRec.DVI << '\n');
+  DVI->setExpression(Expr);
+  llvm::Value *ValIV = dyn_cast<llvm::Value>(&IV);
+  DVI->replaceVariableLocationOp(
+      0u, llvm::MetadataAsValue::get(DVI->getContext(),
+                                     llvm::ValueAsMetadata::get(ValIV)));
+  LLVM_DEBUG(dbgs() << "scev-salvage: updated with offset to IV: "
+                    << *DVIRec.DVI << '\n');
+}
+
+static void
 DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE,
                           llvm::PHINode *LSRInductionVar,
                           SmallVector<DVIRecoveryRec, 2> &DVIToUpdate) {
   if (DVIToUpdate.empty())
-    return false;
+    return;
 
   const llvm::SCEV *SCEVInductionVar = SE.getSCEV(LSRInductionVar);
   assert(SCEVInductionVar &&
          "Anticipated a SCEV for the post-LSR induction variable");
 
-  bool Changed = false;
   if (const SCEVAddRecExpr *IVAddRec =
           dyn_cast<SCEVAddRecExpr>(SCEVInductionVar)) {
     if (!IVAddRec->isAffine())
-      return false;
+      return;
 
+    if (IVAddRec->getExpressionSize() > MaxSCEVSalvageExpressionSize)
+      return;
+
+    // The iteration count is required to recover location values.
     SCEVDbgValueBuilder IterCountExpr;
     IterCountExpr.pushValue(LSRInductionVar);
     if (!IterCountExpr.SCEVToIterCountExpr(*IVAddRec, SE))
-      return false;
+      return;
 
     LLVM_DEBUG(dbgs() << "scev-salvage: IV SCEV: " << *SCEVInductionVar
                       << '\n');
@@ -6196,14 +6218,26 @@ DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE,
         DVIRec.DVI->setExpression(DVIRec.Expr);
       }
 
-      Changed |= RewriteDVIUsingIterCount(DVIRec, IterCountExpr, SE);
+      LLVM_DEBUG(dbgs() << "scev-salvage: value to recover SCEV: "
+                        << *DVIRec.SCEV << '\n');
+
+      // Create a simple expression if the IV and value to salvage SCEVs
+      // start values differ by only a constant value.
+      if (Optional<APInt> Offset =
+              SE.computeConstantDifference(DVIRec.SCEV, SCEVInductionVar)) {
+        if (Offset.getValue().getMinSignedBits() <= 64)
+          RewriteDVIUsingOffset(DVIRec, *LSRInductionVar,
+                                Offset.getValue().getSExtValue());
+      } else {
+        RewriteDVIUsingIterCount(DVIRec, IterCountExpr, SE);
+      }
     }
   }
-  return Changed;
 }
 
 /// Identify and cache salvageable DVI locations and expressions along with the
-/// corresponding SCEV(s). Also ensure that the DVI is not deleted before
+/// corresponding SCEV(s). Also ensure that the DVI is not deleted between
+/// cacheing and salvaging.
 static void
 DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE,
                        SmallVector<DVIRecoveryRec, 2> &SalvageableDVISCEVs,
@@ -6214,6 +6248,9 @@ DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE,
       if (!DVI)
         continue;
 
+      if (DVI->isUndef())
+        continue;
+
       if (DVI->hasArgList())
         continue;
 
@@ -6221,6 +6258,16 @@ DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE,
           !SE.isSCEVable(DVI->getVariableLocationOp(0)->getType()))
         continue;
 
+      // SCEVUnknown wraps an llvm::Value, it does not have a start and stride.
+      // Therefore no translation to DIExpression is performed.
+      const SCEV *S = SE.getSCEV(DVI->getVariableLocationOp(0));
+      if (isa<SCEVUnknown>(S))
+        continue;
+
+      // Avoid wasting resources generating an expression containing undef.
+      if (SE.containsUndefs(S))
+        continue;
+
       SalvageableDVISCEVs.push_back(
           {DVI, DVI->getExpression(), DVI->getRawLocation(),
            SE.getSCEV(DVI->getVariableLocationOp(0))});
@@ -6234,33 +6281,32 @@ DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE,
 /// surviving subsequent transforms.
 static llvm::PHINode *GetInductionVariable(const Loop &L, ScalarEvolution &SE,
                                            const LSRInstance &LSR) {
-  // For now, just pick the first IV generated and inserted. Ideally pick an IV
-  // that is unlikely to be optimised away by subsequent transforms.
+
+  auto IsSuitableIV = [&](PHINode *P) {
+    if (!SE.isSCEVable(P->getType()))
+      return false;
+    if (const SCEVAddRecExpr *Rec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(P)))
+      return Rec->isAffine() && !SE.containsUndefs(SE.getSCEV(P));
+    return false;
+  };
+
+  // For now, just pick the first IV that was generated and inserted by
+  // ScalarEvolution. Ideally pick an IV that is unlikely to be optimised away
+  // by subsequent transforms.
   for (const WeakVH &IV : LSR.getScalarEvolutionIVs()) {
     if (!IV)
       continue;
 
-    assert(isa<PHINode>(&*IV) && "Expected PhI node.");
-    if (SE.isSCEVable((*IV).getType())) {
-      PHINode *Phi = dyn_cast<PHINode>(&*IV);
-      LLVM_DEBUG(dbgs() << "scev-salvage: IV : " << *IV
-                        << "with SCEV: " << *SE.getSCEV(Phi) << "\n");
-      return Phi;
-    }
-  }
+    // There should only be PHI node IVs.
+    PHINode *P = cast<PHINode>(&*IV);
 
-  for (PHINode &Phi : L.getHeader()->phis()) {
-    if (!SE.isSCEVable(Phi.getType()))
-      continue;
-
-    const llvm::SCEV *PhiSCEV = SE.getSCEV(&Phi);
-    if (const llvm::SCEVAddRecExpr *Rec = dyn_cast<SCEVAddRecExpr>(PhiSCEV))
-      if (!Rec->isAffine())
-        continue;
+    if (IsSuitableIV(P))
+      return P;
+  }
 
-    LLVM_DEBUG(dbgs() << "scev-salvage: Selected IV from loop header: " << Phi
-                      << " with SCEV: " << *PhiSCEV << "\n");
-    return &Phi;
+  for (PHINode &P : L.getHeader()->phis()) {
+    if (IsSuitableIV(&P))
+      return &P;
   }
   return nullptr;
 }
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
index 71eb393fcdd7..1ecbb86724e1 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
@@ -286,8 +286,8 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
                       AssumptionCache &AC, DependenceInfo &DI,
                       OptimizationRemarkEmitter &ORE, int OptLevel) {
   TargetTransformInfo::UnrollingPreferences UP =
-      gatherUnrollingPreferences(L, SE, TTI, nullptr, nullptr, OptLevel, None,
-                                 None, None, None, None, None);
+      gatherUnrollingPreferences(L, SE, TTI, nullptr, nullptr, ORE, OptLevel,
+                                 None, None, None, None, None, None);
   TargetTransformInfo::PeelingPreferences PP =
       gatherPeelingPreferences(L, SE, TTI, None, None);
 
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 49501f324a49..67702520511b 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -184,7 +184,8 @@ static const unsigned NoThreshold = std::numeric_limits<unsigned>::max();
 /// flags, TTI overrides and user specified parameters.
 TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
     Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI,
-    BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel,
+    BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
+    OptimizationRemarkEmitter &ORE, int OptLevel,
     Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
     Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
     Optional<bool> UserUpperBound, Optional<unsigned> UserFullUnrollMaxCount) {
@@ -214,7 +215,7 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
   UP.MaxIterationsCountToAnalyze = UnrollMaxIterationsCountToAnalyze;
 
   // Override with any target specific settings
-  TTI.getUnrollingPreferences(L, SE, UP);
+  TTI.getUnrollingPreferences(L, SE, UP, &ORE);
 
   // Apply size attributes
   bool OptForSize = L->getHeader()->getParent()->hasOptSize() ||
@@ -318,6 +319,16 @@ struct EstimatedUnrollCost {
   unsigned RolledDynamicCost;
 };
 
+struct PragmaInfo {
+  PragmaInfo(bool UUC, bool PFU, unsigned PC, bool PEU)
+      : UserUnrollCount(UUC), PragmaFullUnroll(PFU), PragmaCount(PC),
+        PragmaEnableUnroll(PEU) {}
+  const bool UserUnrollCount;
+  const bool PragmaFullUnroll;
+  const unsigned PragmaCount;
+  const bool PragmaEnableUnroll;
+};
+
 } // end anonymous namespace
 
 /// Figure out if the loop is worth full unrolling.
@@ -746,13 +757,132 @@ public:
 
   // Returns loop size estimation for unrolled loop, given the unrolling
   // configuration specified by UP.
-  uint64_t getUnrolledLoopSize(TargetTransformInfo::UnrollingPreferences &UP) {
+  uint64_t
+  getUnrolledLoopSize(const TargetTransformInfo::UnrollingPreferences &UP,
+                      const unsigned CountOverwrite = 0) const {
     assert(LoopSize >= UP.BEInsns &&
            "LoopSize should not be less than BEInsns!");
-    return (uint64_t)(LoopSize - UP.BEInsns) * UP.Count + UP.BEInsns;
+    if (CountOverwrite)
+      return static_cast<uint64_t>(LoopSize - UP.BEInsns) * CountOverwrite +
+             UP.BEInsns;
+    else
+      return static_cast<uint64_t>(LoopSize - UP.BEInsns) * UP.Count +
+             UP.BEInsns;
   }
 };
 
+static Optional<unsigned>
+shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo,
+                   const unsigned TripMultiple, const unsigned TripCount,
+                   const UnrollCostEstimator UCE,
+                   const TargetTransformInfo::UnrollingPreferences &UP) {
+
+  // Using unroll pragma
+  // 1st priority is unroll count set by "unroll-count" option.
+
+  if (PInfo.UserUnrollCount) {
+    if (UP.AllowRemainder &&
+        UCE.getUnrolledLoopSize(UP, (unsigned)UnrollCount) < UP.Threshold)
+      return (unsigned)UnrollCount;
+  }
+
+  // 2nd priority is unroll count set by pragma.
+  if (PInfo.PragmaCount > 0) {
+    if ((UP.AllowRemainder || (TripMultiple % PInfo.PragmaCount == 0)) &&
+        UCE.getUnrolledLoopSize(UP, PInfo.PragmaCount) < PragmaUnrollThreshold)
+      return PInfo.PragmaCount;
+  }
+
+  if (PInfo.PragmaFullUnroll && TripCount != 0) {
+    if (UCE.getUnrolledLoopSize(UP, TripCount) < PragmaUnrollThreshold)
+      return TripCount;
+  }
+  // if didn't return until here, should continue to other priorties
+  return None;
+}
+
+static Optional<unsigned> shouldFullUnroll(
+    Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT,
+    ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
+    const unsigned FullUnrollTripCount, const UnrollCostEstimator UCE,
+    const TargetTransformInfo::UnrollingPreferences &UP) {
+
+  if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) {
+    // When computing the unrolled size, note that BEInsns are not replicated
+    // like the rest of the loop body.
+    if (UCE.getUnrolledLoopSize(UP) < UP.Threshold) {
+      return FullUnrollTripCount;
+
+    } else {
+      // The loop isn't that small, but we still can fully unroll it if that
+      // helps to remove a significant number of instructions.
+      // To check that, run additional analysis on the loop.
+      if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
+              L, FullUnrollTripCount, DT, SE, EphValues, TTI,
+              UP.Threshold * UP.MaxPercentThresholdBoost / 100,
+              UP.MaxIterationsCountToAnalyze)) {
+        unsigned Boost =
+            getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost);
+        if (Cost->UnrolledCost < UP.Threshold * Boost / 100) {
+          return FullUnrollTripCount;
+        }
+      }
+    }
+  }
+  return None;
+}
+
+static Optional<unsigned>
+shouldPartialUnroll(const unsigned LoopSize, const unsigned TripCount,
+                    const UnrollCostEstimator UCE,
+                    const TargetTransformInfo::UnrollingPreferences &UP) {
+
+  unsigned count = UP.Count;
+  if (TripCount) {
+    if (!UP.Partial) {
+      LLVM_DEBUG(dbgs() << "  will not try to unroll partially because "
+                        << "-unroll-allow-partial not given\n");
+      count = 0;
+      return count;
+    }
+    if (count == 0)
+      count = TripCount;
+    if (UP.PartialThreshold != NoThreshold) {
+      // Reduce unroll count to be modulo of TripCount for partial unrolling.
+      if (UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold)
+        count = (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
+                (LoopSize - UP.BEInsns);
+      if (count > UP.MaxCount)
+        count = UP.MaxCount;
+      while (count != 0 && TripCount % count != 0)
+        count--;
+      if (UP.AllowRemainder && count <= 1) {
+        // If there is no Count that is modulo of TripCount, set Count to
+        // largest power-of-two factor that satisfies the threshold limit.
+        // As we'll create fixup loop, do the type of unrolling only if
+        // remainder loop is allowed.
+        count = UP.DefaultUnrollRuntimeCount;
+        while (count != 0 &&
+               UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold)
+          count >>= 1;
+      }
+      if (count < 2) {
+        count = 0;
+      }
+    } else {
+      count = TripCount;
+    }
+    if (count > UP.MaxCount)
+      count = UP.MaxCount;
+
+    LLVM_DEBUG(dbgs() << "  partially unrolling with count: " << count << "\n");
+
+    return count;
+  }
+
+  // if didn't return until here, should continue to other priorties
+  return None;
+}
 // Returns true if unroll count was set explicitly.
 // Calculates unroll count and writes it to UP.Count.
 // Unless IgnoreUser is true, will also use metadata and command-line options
@@ -770,7 +900,18 @@ bool llvm::computeUnrollCount(
     TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound) {
 
   UnrollCostEstimator UCE(*L, LoopSize);
+  Optional<unsigned> UnrollFactor;
+
+  const bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
+  const bool PragmaFullUnroll = hasUnrollFullPragma(L);
+  const unsigned PragmaCount = unrollCountPragmaValue(L);
+  const bool PragmaEnableUnroll = hasUnrollEnablePragma(L);
 
+  const bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
+                              PragmaEnableUnroll || UserUnrollCount;
+
+  PragmaInfo PInfo(UserUnrollCount, PragmaFullUnroll, PragmaCount,
+                   PragmaEnableUnroll);
   // Use an explicit peel count that has been specified for testing. In this
   // case it's not permitted to also specify an explicit unroll count.
   if (PP.PeelCount) {
@@ -782,47 +923,29 @@ bool llvm::computeUnrollCount(
     UP.Runtime = false;
     return true;
   }
-
   // Check for explicit Count.
   // 1st priority is unroll count set by "unroll-count" option.
-  bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
-  if (UserUnrollCount) {
-    UP.Count = UnrollCount;
-    UP.AllowExpensiveTripCount = true;
-    UP.Force = true;
-    if (UP.AllowRemainder && UCE.getUnrolledLoopSize(UP) < UP.Threshold)
-      return true;
-  }
-
   // 2nd priority is unroll count set by pragma.
-  unsigned PragmaCount = unrollCountPragmaValue(L);
-  if (PragmaCount > 0) {
-    UP.Count = PragmaCount;
-    UP.Runtime = true;
-    UP.AllowExpensiveTripCount = true;
-    UP.Force = true;
-    if ((UP.AllowRemainder || (TripMultiple % PragmaCount == 0)) &&
-        UCE.getUnrolledLoopSize(UP) < PragmaUnrollThreshold)
-      return true;
-  }
-  bool PragmaFullUnroll = hasUnrollFullPragma(L);
-  if (PragmaFullUnroll && TripCount != 0) {
-    UP.Count = TripCount;
-    if (UCE.getUnrolledLoopSize(UP) < PragmaUnrollThreshold)
-      return false;
-  }
+  UnrollFactor = shouldPragmaUnroll(L, PInfo, TripMultiple, TripCount, UCE, UP);
+
+  if (UnrollFactor) {
+    UP.Count = *UnrollFactor;
 
-  bool PragmaEnableUnroll = hasUnrollEnablePragma(L);
-  bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
-                        PragmaEnableUnroll || UserUnrollCount;
-
-  if (ExplicitUnroll && TripCount != 0) {
-    // If the loop has an unrolling pragma, we want to be more aggressive with
-    // unrolling limits. Set thresholds to at least the PragmaUnrollThreshold
-    // value which is larger than the default limits.
-    UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
-    UP.PartialThreshold =
-        std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
+    if (UserUnrollCount || (PragmaCount > 0)) {
+      UP.AllowExpensiveTripCount = true;
+      UP.Force = true;
+    }
+    UP.Runtime |= (PragmaCount > 0);
+    return ExplicitUnroll;
+  } else {
+    if (ExplicitUnroll && TripCount != 0) {
+      // If the loop has an unrolling pragma, we want to be more aggressive with
+      // unrolling limits. Set thresholds to at least the PragmaUnrollThreshold
+      // value which is larger than the default limits.
+      UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
+      UP.PartialThreshold =
+          std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
+    }
   }
 
   // 3rd priority is full unroll count.
@@ -852,71 +975,55 @@ bool llvm::computeUnrollCount(
   unsigned FullUnrollTripCount =
       ExactTripCount ? ExactTripCount : FullUnrollMaxTripCount;
   UP.Count = FullUnrollTripCount;
-  if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) {
-    // When computing the unrolled size, note that BEInsns are not replicated
-    // like the rest of the loop body.
-    if (UCE.getUnrolledLoopSize(UP) < UP.Threshold) {
-      UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount);
-      return ExplicitUnroll;
-    } else {
-      // The loop isn't that small, but we still can fully unroll it if that
-      // helps to remove a significant number of instructions.
-      // To check that, run additional analysis on the loop.
-      if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
-              L, FullUnrollTripCount, DT, SE, EphValues, TTI,
-              UP.Threshold * UP.MaxPercentThresholdBoost / 100,
-              UP.MaxIterationsCountToAnalyze)) {
-        unsigned Boost =
-            getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost);
-        if (Cost->UnrolledCost < UP.Threshold * Boost / 100) {
-          UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount);
-          return ExplicitUnroll;
-        }
-      }
-    }
+
+  UnrollFactor =
+      shouldFullUnroll(L, TTI, DT, SE, EphValues, FullUnrollTripCount, UCE, UP);
+
+  // if shouldFullUnroll can do the unrolling, some side parameteres should be
+  // set
+  if (UnrollFactor) {
+    UP.Count = *UnrollFactor;
+    UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount);
+    TripCount = FullUnrollTripCount;
+    TripMultiple = UP.UpperBound ? 1 : TripMultiple;
+    return ExplicitUnroll;
+  } else {
+    UP.Count = FullUnrollTripCount;
   }
 
   // 4th priority is loop peeling.
-  computePeelCount(L, LoopSize, PP, TripCount, SE, UP.Threshold);
+  computePeelCount(L, LoopSize, PP, TripCount, DT, SE, UP.Threshold);
   if (PP.PeelCount) {
     UP.Runtime = false;
     UP.Count = 1;
     return ExplicitUnroll;
   }
 
+  // Before starting partial unrolling, set up.partial to true,
+  // if user explicitly asked  for unrolling
+  if (TripCount)
+    UP.Partial |= ExplicitUnroll;
+
   // 5th priority is partial unrolling.
   // Try partial unroll only when TripCount could be statically calculated.
-  if (TripCount) {
-    UP.Partial |= ExplicitUnroll;
-    if (!UP.Partial) {
-      LLVM_DEBUG(dbgs() << "  will not try to unroll partially because "
-                        << "-unroll-allow-partial not given\n");
-      UP.Count = 0;
-      return false;
-    }
-    if (UP.Count == 0)
-      UP.Count = TripCount;
+  UnrollFactor = shouldPartialUnroll(LoopSize, TripCount, UCE, UP);
+
+  if (UnrollFactor) {
+    UP.Count = *UnrollFactor;
+
+    if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
+        UP.Count != TripCount)
+      ORE->emit([&]() {
+        return OptimizationRemarkMissed(DEBUG_TYPE,
+                                        "FullUnrollAsDirectedTooLarge",
+                                        L->getStartLoc(), L->getHeader())
+               << "Unable to fully unroll loop as directed by unroll pragma "
+                  "because "
+                  "unrolled size is too large.";
+      });
+
     if (UP.PartialThreshold != NoThreshold) {
-      // Reduce unroll count to be modulo of TripCount for partial unrolling.
-      if (UCE.getUnrolledLoopSize(UP) > UP.PartialThreshold)
-        UP.Count =
-            (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
-            (LoopSize - UP.BEInsns);
-      if (UP.Count > UP.MaxCount)
-        UP.Count = UP.MaxCount;
-      while (UP.Count != 0 && TripCount % UP.Count != 0)
-        UP.Count--;
-      if (UP.AllowRemainder && UP.Count <= 1) {
-        // If there is no Count that is modulo of TripCount, set Count to
-        // largest power-of-two factor that satisfies the threshold limit.
-        // As we'll create fixup loop, do the type of unrolling only if
-        // remainder loop is allowed.
-        UP.Count = UP.DefaultUnrollRuntimeCount;
-        while (UP.Count != 0 &&
-               UCE.getUnrolledLoopSize(UP) > UP.PartialThreshold)
-          UP.Count >>= 1;
-      }
-      if (UP.Count < 2) {
+      if (UP.Count == 0) {
         if (PragmaEnableUnroll)
           ORE->emit([&]() {
             return OptimizationRemarkMissed(DEBUG_TYPE,
@@ -926,25 +1033,8 @@ bool llvm::computeUnrollCount(
                       "pragma "
                       "because unrolled size is too large.";
           });
-        UP.Count = 0;
       }
-    } else {
-      UP.Count = TripCount;
     }
-    if (UP.Count > UP.MaxCount)
-      UP.Count = UP.MaxCount;
-    if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
-        UP.Count != TripCount)
-      ORE->emit([&]() {
-        return OptimizationRemarkMissed(DEBUG_TYPE,
-                                        "FullUnrollAsDirectedTooLarge",
-                                        L->getStartLoc(), L->getHeader())
-               << "Unable to fully unroll loop as directed by unroll pragma "
-                  "because "
-                  "unrolled size is too large.";
-      });
-    LLVM_DEBUG(dbgs() << "  partially unrolling with count: " << UP.Count
-                      << "\n");
     return ExplicitUnroll;
   }
   assert(TripCount == 0 &&
@@ -981,8 +1071,6 @@ bool llvm::computeUnrollCount(
         UP.AllowExpensiveTripCount = true;
     }
   }
-
-  // Reduce count based on the type of unrolling and the threshold values.
   UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
   if (!UP.Runtime) {
     LLVM_DEBUG(
@@ -1017,7 +1105,7 @@ bool llvm::computeUnrollCount(
 
     using namespace ore;
 
-    if (PragmaCount > 0 && !UP.AllowRemainder)
+    if (unrollCountPragmaValue(L) > 0 && !UP.AllowRemainder)
       ORE->emit([&]() {
         return OptimizationRemarkMissed(DEBUG_TYPE,
                                         "DifferentUnrollCountFromDirected",
@@ -1079,7 +1167,7 @@ static LoopUnrollResult tryToUnrollLoop(
   bool NotDuplicatable;
   bool Convergent;
   TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
-      L, SE, TTI, BFI, PSI, OptLevel, ProvidedThreshold, ProvidedCount,
+      L, SE, TTI, BFI, PSI, ORE, OptLevel, ProvidedThreshold, ProvidedCount,
       ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
       ProvidedFullUnrollMaxCount);
   TargetTransformInfo::PeelingPreferences PP = gatherPeelingPreferences(
@@ -1529,3 +1617,25 @@ PreservedAnalyses LoopUnrollPass::run(Function &F,
 
   return getLoopPassPreservedAnalyses();
 }
+
+void LoopUnrollPass::printPipeline(
+    raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+  static_cast<PassInfoMixin<LoopUnrollPass> *>(this)->printPipeline(
+      OS, MapClassName2PassName);
+  OS << "<";
+  if (UnrollOpts.AllowPartial != None)
+    OS << (UnrollOpts.AllowPartial.getValue() ? "" : "no-") << "partial;";
+  if (UnrollOpts.AllowPeeling != None)
+    OS << (UnrollOpts.AllowPeeling.getValue() ? "" : "no-") << "peeling;";
+  if (UnrollOpts.AllowRuntime != None)
+    OS << (UnrollOpts.AllowRuntime.getValue() ? "" : "no-") << "runtime;";
+  if (UnrollOpts.AllowUpperBound != None)
+    OS << (UnrollOpts.AllowUpperBound.getValue() ? "" : "no-") << "upperbound;";
+  if (UnrollOpts.AllowProfileBasedPeeling != None)
+    OS << (UnrollOpts.AllowProfileBasedPeeling.getValue() ? "" : "no-")
+       << "profile-peeling;";
+  if (UnrollOpts.FullUnrollMaxCount != None)
+    OS << "full-unroll-max=" << UnrollOpts.FullUnrollMaxCount << ";";
+  OS << "O" << UnrollOpts.OptLevel;
+  OS << ">";
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
index 9a854ff80246..76bb5497c2c2 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -232,10 +232,8 @@ namespace {
       AU.addPreserved<LazyBranchProbabilityInfoPass>();
       AU.addRequired<AssumptionCacheTracker>();
       AU.addRequired<TargetTransformInfoWrapperPass>();
-      if (EnableMSSALoopDependency) {
-        AU.addRequired<MemorySSAWrapperPass>();
-        AU.addPreserved<MemorySSAWrapperPass>();
-      }
+      AU.addRequired<MemorySSAWrapperPass>();
+      AU.addPreserved<MemorySSAWrapperPass>();
       if (HasBranchDivergence)
         AU.addRequired<LegacyDivergenceAnalysis>();
       getLoopAnalysisUsage(AU);
@@ -539,11 +537,8 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPMRef) {
   LPM = &LPMRef;
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
-  if (EnableMSSALoopDependency) {
-    MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
-    MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
-    assert(DT && "Cannot update MemorySSA without a valid DomTree.");
-  }
+  MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
+  MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
   CurrentLoop = L;
   Function *F = CurrentLoop->getHeader()->getParent();
 
@@ -551,19 +546,19 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPMRef) {
   if (SanitizeMemory)
     SafetyInfo.computeLoopSafetyInfo(L);
 
-  if (MSSA && VerifyMemorySSA)
+  if (VerifyMemorySSA)
     MSSA->verifyMemorySSA();
 
   bool Changed = false;
   do {
     assert(CurrentLoop->isLCSSAForm(*DT));
-    if (MSSA && VerifyMemorySSA)
+    if (VerifyMemorySSA)
       MSSA->verifyMemorySSA();
     RedoLoop = false;
     Changed |= processCurrentLoop();
   } while (RedoLoop);
 
-  if (MSSA && VerifyMemorySSA)
+  if (VerifyMemorySSA)
     MSSA->verifyMemorySSA();
 
   return Changed;
@@ -1312,8 +1307,7 @@ void LoopUnswitch::splitExitEdges(
 
   for (unsigned I = 0, E = ExitBlocks.size(); I != E; ++I) {
     BasicBlock *ExitBlock = ExitBlocks[I];
-    SmallVector<BasicBlock *, 4> Preds(pred_begin(ExitBlock),
-                                       pred_end(ExitBlock));
+    SmallVector<BasicBlock *, 4> Preds(predecessors(ExitBlock));
 
     // Although SplitBlockPredecessors doesn't preserve loop-simplify in
     // general, if we call it on all predecessors of all exits then it does.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp
index bd3001988369..186065db327e 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp
@@ -55,11 +55,17 @@ static bool replaceConditionalBranchesOnConstant(Instruction *II,
                                                  Value *NewValue,
                                                  DomTreeUpdater *DTU) {
   bool HasDeadBlocks = false;
-  SmallSetVector<Instruction *, 8> Worklist;
+  SmallSetVector<Instruction *, 8> UnsimplifiedUsers;
   replaceAndRecursivelySimplify(II, NewValue, nullptr, nullptr, nullptr,
-                                &Worklist);
-  for (auto I : Worklist) {
-    BranchInst *BI = dyn_cast<BranchInst>(I);
+                                &UnsimplifiedUsers);
+  // UnsimplifiedUsers can contain PHI nodes that may be removed when
+  // replacing the branch instructions, so use a value handle worklist
+  // to handle those possibly removed instructions.
+  SmallVector<WeakVH, 8> Worklist(UnsimplifiedUsers.begin(),
+                                  UnsimplifiedUsers.end());
+
+  for (auto &VH : Worklist) {
+    BranchInst *BI = dyn_cast_or_null<BranchInst>(VH);
     if (!BI)
       continue;
     if (BI->isUnconditional())
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
index ead8082f3036..1c186e9a0488 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
@@ -357,11 +357,10 @@ static bool lowerExpectIntrinsic(Function &F) {
     // Remove llvm.expect intrinsics. Iterate backwards in order
     // to process select instructions before the intrinsic gets
     // removed.
-    for (auto BI = BB.rbegin(), BE = BB.rend(); BI != BE;) {
-      Instruction *Inst = &*BI++;
-      CallInst *CI = dyn_cast<CallInst>(Inst);
+    for (Instruction &Inst : llvm::make_early_inc_range(llvm::reverse(BB))) {
+      CallInst *CI = dyn_cast<CallInst>(&Inst);
       if (!CI) {
-        if (SelectInst *SI = dyn_cast<SelectInst>(Inst)) {
+        if (SelectInst *SI = dyn_cast<SelectInst>(&Inst)) {
           if (handleBrSelExpect(*SI))
             ExpectIntrinsicsHandled++;
         }
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
index 42c183a6408e..4e4097e13271 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -900,8 +900,7 @@ public:
     // UndefedInsts and then check that we in fact remove them.
     SmallSet<Instruction *, 16> UndefedInsts;
     for (auto *Inst : reverse(ToRemove)) {
-      for (auto I = Inst->use_begin(), E = Inst->use_end(); I != E;) {
-        Use &U = *I++;
+      for (Use &U : llvm::make_early_inc_range(Inst->uses())) {
         if (auto *Undefed = dyn_cast<Instruction>(U.getUser()))
           UndefedInsts.insert(Undefed);
         U.set(UndefValue::get(Inst->getType()));
@@ -981,8 +980,9 @@ public:
     Value *EltPtr = createElementPtr(Ptr, EltTy, Builder);
     MatrixTy Result;
     for (unsigned I = 0, E = Shape.getNumVectors(); I < E; ++I) {
-      Value *GEP = computeVectorAddr(EltPtr, Builder.getInt64(I), Stride,
-                                     Shape.getStride(), EltTy, Builder);
+      Value *GEP = computeVectorAddr(
+          EltPtr, Builder.getIntN(Stride->getType()->getScalarSizeInBits(), I),
+          Stride, Shape.getStride(), EltTy, Builder);
       Value *Vector = Builder.CreateAlignedLoad(
           VecTy, GEP, getAlignForIndex(I, Stride, EltTy, MAlign),
           IsVolatile, "col.load");
@@ -1071,9 +1071,11 @@ public:
     auto VType = cast<VectorType>(Ty);
     Value *EltPtr = createElementPtr(Ptr, VType->getElementType(), Builder);
     for (auto Vec : enumerate(StoreVal.vectors())) {
-      Value *GEP = computeVectorAddr(EltPtr, Builder.getInt64(Vec.index()),
-                                     Stride, StoreVal.getStride(),
-                                     VType->getElementType(), Builder);
+      Value *GEP = computeVectorAddr(
+          EltPtr,
+          Builder.getIntN(Stride->getType()->getScalarSizeInBits(),
+                          Vec.index()),
+          Stride, StoreVal.getStride(), VType->getElementType(), Builder);
       Builder.CreateAlignedStore(Vec.value(), GEP,
                                  getAlignForIndex(Vec.index(), Stride,
                                                   VType->getElementType(),
@@ -2261,6 +2263,16 @@ PreservedAnalyses LowerMatrixIntrinsicsPass::run(Function &F,
   return PreservedAnalyses::all();
 }
 
+void LowerMatrixIntrinsicsPass::printPipeline(
+    raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+  static_cast<PassInfoMixin<LowerMatrixIntrinsicsPass> *>(this)->printPipeline(
+      OS, MapClassName2PassName);
+  OS << "<";
+  if (Minimal)
+    OS << "minimal";
+  OS << ">";
+}
+
 namespace {
 
 class LowerMatrixIntrinsicsLegacyPass : public FunctionPass {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 9afbe0e9a2a5..67335a45fb58 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -22,7 +22,6 @@
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/Loads.h"
-#include "llvm/Analysis/MemoryDependenceAnalysis.h"
 #include "llvm/Analysis/MemoryLocation.h"
 #include "llvm/Analysis/MemorySSA.h"
 #include "llvm/Analysis/MemorySSAUpdater.h"
@@ -67,9 +66,10 @@ using namespace llvm;
 
 #define DEBUG_TYPE "memcpyopt"
 
-static cl::opt<bool>
-    EnableMemorySSA("enable-memcpyopt-memoryssa", cl::init(true), cl::Hidden,
-                    cl::desc("Use MemorySSA-backed MemCpyOpt."));
+static cl::opt<bool> EnableMemCpyOptWithoutLibcalls(
+    "enable-memcpyopt-without-libcalls", cl::init(false), cl::Hidden,
+    cl::ZeroOrMore,
+    cl::desc("Enable memcpyopt even when libcalls are disabled"));
 
 STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
 STATISTIC(NumMemSetInfer, "Number of memsets inferred");
@@ -282,13 +282,9 @@ private:
     AU.addPreserved<DominatorTreeWrapperPass>();
     AU.addPreserved<GlobalsAAWrapperPass>();
     AU.addRequired<TargetLibraryInfoWrapperPass>();
-    if (!EnableMemorySSA)
-      AU.addRequired<MemoryDependenceWrapperPass>();
-    AU.addPreserved<MemoryDependenceWrapperPass>();
     AU.addRequired<AAResultsWrapperPass>();
     AU.addPreserved<AAResultsWrapperPass>();
-    if (EnableMemorySSA)
-      AU.addRequired<MemorySSAWrapperPass>();
+    AU.addRequired<MemorySSAWrapperPass>();
     AU.addPreserved<MemorySSAWrapperPass>();
   }
 };
@@ -304,7 +300,6 @@ INITIALIZE_PASS_BEGIN(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization",
                       false, false)
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
@@ -329,10 +324,7 @@ static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start,
 }
 
 void MemCpyOptPass::eraseInstruction(Instruction *I) {
-  if (MSSAU)
-    MSSAU->removeMemoryAccess(I);
-  if (MD)
-    MD->removeInstruction(I);
+  MSSAU->removeMemoryAccess(I);
   I->eraseFromParent();
 }
 
@@ -394,14 +386,12 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
   // memsets.
   MemoryDef *LastMemDef = nullptr;
   for (++BI; !BI->isTerminator(); ++BI) {
-    if (MSSAU) {
-      auto *CurrentAcc = cast_or_null<MemoryUseOrDef>(
-          MSSAU->getMemorySSA()->getMemoryAccess(&*BI));
-      if (CurrentAcc) {
-        MemInsertPoint = CurrentAcc;
-        if (auto *CurrentDef = dyn_cast<MemoryDef>(CurrentAcc))
-          LastMemDef = CurrentDef;
-      }
+    auto *CurrentAcc = cast_or_null<MemoryUseOrDef>(
+        MSSAU->getMemorySSA()->getMemoryAccess(&*BI));
+    if (CurrentAcc) {
+      MemInsertPoint = CurrentAcc;
+      if (auto *CurrentDef = dyn_cast<MemoryDef>(CurrentAcc))
+        LastMemDef = CurrentDef;
     }
 
     // Calls that only access inaccessible memory do not block merging
@@ -503,19 +493,17 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
     if (!Range.TheStores.empty())
       AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
 
-    if (MSSAU) {
-      assert(LastMemDef && MemInsertPoint &&
-             "Both LastMemDef and MemInsertPoint need to be set");
-      auto *NewDef =
-          cast<MemoryDef>(MemInsertPoint->getMemoryInst() == &*BI
-                              ? MSSAU->createMemoryAccessBefore(
-                                    AMemSet, LastMemDef, MemInsertPoint)
-                              : MSSAU->createMemoryAccessAfter(
-                                    AMemSet, LastMemDef, MemInsertPoint));
-      MSSAU->insertDef(NewDef, /*RenameUses=*/true);
-      LastMemDef = NewDef;
-      MemInsertPoint = NewDef;
-    }
+    assert(LastMemDef && MemInsertPoint &&
+           "Both LastMemDef and MemInsertPoint need to be set");
+    auto *NewDef =
+        cast<MemoryDef>(MemInsertPoint->getMemoryInst() == &*BI
+                            ? MSSAU->createMemoryAccessBefore(
+                                  AMemSet, LastMemDef, MemInsertPoint)
+                            : MSSAU->createMemoryAccessAfter(
+                                  AMemSet, LastMemDef, MemInsertPoint));
+    MSSAU->insertDef(NewDef, /*RenameUses=*/true);
+    LastMemDef = NewDef;
+    MemInsertPoint = NewDef;
 
     // Zap all the stores.
     for (Instruction *SI : Range.TheStores)
@@ -624,17 +612,15 @@ bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
   // TODO: Simplify this once P will be determined by MSSA, in which case the
   // discrepancy can no longer occur.
   MemoryUseOrDef *MemInsertPoint = nullptr;
-  if (MSSAU) {
-    if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(P)) {
-      MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator());
-    } else {
-      const Instruction *ConstP = P;
-      for (const Instruction &I : make_range(++ConstP->getReverseIterator(),
-                                             ++LI->getReverseIterator())) {
-        if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
-          MemInsertPoint = MA;
-          break;
-        }
+  if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(P)) {
+    MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator());
+  } else {
+    const Instruction *ConstP = P;
+    for (const Instruction &I : make_range(++ConstP->getReverseIterator(),
+                                           ++LI->getReverseIterator())) {
+      if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
+        MemInsertPoint = MA;
+        break;
       }
     }
   }
@@ -643,12 +629,10 @@ bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
   for (auto *I : llvm::reverse(ToLift)) {
     LLVM_DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n");
     I->moveBefore(P);
-    if (MSSAU) {
-      assert(MemInsertPoint && "Must have found insert point");
-      if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(I)) {
-        MSSAU->moveAfter(MA, MemInsertPoint);
-        MemInsertPoint = MA;
-      }
+    assert(MemInsertPoint && "Must have found insert point");
+    if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(I)) {
+      MSSAU->moveAfter(MA, MemInsertPoint);
+      MemInsertPoint = MA;
     }
   }
 
@@ -682,7 +666,13 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
         LI->getParent() == SI->getParent()) {
 
       auto *T = LI->getType();
-      if (T->isAggregateType()) {
+      // Don't introduce calls to memcpy/memmove intrinsics out of thin air if
+      // the corresponding libcalls are not available.
+      // TODO: We should really distinguish between libcall availability and
+      // our ability to introduce intrinsics.
+      if (T->isAggregateType() &&
+          (EnableMemCpyOptWithoutLibcalls ||
+           (TLI->has(LibFunc_memcpy) && TLI->has(LibFunc_memmove)))) {
         MemoryLocation LoadLoc = MemoryLocation::get(LI);
 
         // We use alias analysis to check if an instruction may store to
@@ -712,9 +702,10 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
         if (P) {
           // If we load from memory that may alias the memory we store to,
           // memmove must be used to preserve semantic. If not, memcpy can
-          // be used.
+          // be used. Also, if we load from constant memory, memcpy can be used
+          // as the constant memory won't be modified.
           bool UseMemMove = false;
-          if (!AA->isNoAlias(MemoryLocation::get(SI), LoadLoc))
+          if (isModSet(AA->getModRefInfo(SI, LoadLoc)))
             UseMemMove = true;
 
           uint64_t Size = DL.getTypeStoreSize(T);
@@ -733,13 +724,10 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
           LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => "
                             << *M << "\n");
 
-          if (MSSAU) {
-            auto *LastDef =
-                cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI));
-            auto *NewAccess =
-                MSSAU->createMemoryAccessAfter(M, LastDef, LastDef);
-            MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
-          }
+          auto *LastDef =
+              cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI));
+          auto *NewAccess = MSSAU->createMemoryAccessAfter(M, LastDef, LastDef);
+          MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
 
           eraseInstruction(SI);
           eraseInstruction(LI);
@@ -755,38 +743,21 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
       // happen to be using a load-store pair to implement it, rather than
       // a memcpy.
       CallInst *C = nullptr;
-      if (EnableMemorySSA) {
-        if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
-                MSSA->getWalker()->getClobberingMemoryAccess(LI))) {
-          // The load most post-dom the call. Limit to the same block for now.
-          // TODO: Support non-local call-slot optimization?
-          if (LoadClobber->getBlock() == SI->getParent())
-            C = dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
-        }
-      } else {
-        MemDepResult ldep = MD->getDependency(LI);
-        if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst()))
-          C = dyn_cast<CallInst>(ldep.getInst());
+      if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
+              MSSA->getWalker()->getClobberingMemoryAccess(LI))) {
+        // The load most post-dom the call. Limit to the same block for now.
+        // TODO: Support non-local call-slot optimization?
+        if (LoadClobber->getBlock() == SI->getParent())
+          C = dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
       }
 
       if (C) {
         // Check that nothing touches the dest of the "copy" between
         // the call and the store.
         MemoryLocation StoreLoc = MemoryLocation::get(SI);
-        if (EnableMemorySSA) {
-          if (accessedBetween(*AA, StoreLoc, MSSA->getMemoryAccess(C),
-                              MSSA->getMemoryAccess(SI)))
-            C = nullptr;
-        } else {
-          for (BasicBlock::iterator I = --SI->getIterator(),
-                                    E = C->getIterator();
-               I != E; --I) {
-            if (isModOrRefSet(AA->getModRefInfo(&*I, StoreLoc))) {
-              C = nullptr;
-              break;
-            }
-          }
-        }
+        if (accessedBetween(*AA, StoreLoc, MSSA->getMemoryAccess(C),
+                            MSSA->getMemoryAccess(SI)))
+          C = nullptr;
       }
 
       if (C) {
@@ -805,6 +776,13 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
     }
   }
 
+  // The following code creates memset intrinsics out of thin air. Don't do
+  // this if the corresponding libfunc is not available.
+  // TODO: We should really distinguish between libcall availability and
+  // our ability to introduce intrinsics.
+  if (!(TLI->has(LibFunc_memset) || EnableMemCpyOptWithoutLibcalls))
+    return false;
+
   // There are two cases that are interesting for this code to handle: memcpy
   // and memset.  Right now we only handle memset.
 
@@ -831,13 +809,12 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
 
       LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
 
-      if (MSSAU) {
-        assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI)));
-        auto *LastDef =
-            cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI));
-        auto *NewAccess = MSSAU->createMemoryAccessAfter(M, LastDef, LastDef);
-        MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
-      }
+      // The newly inserted memset is immediately overwritten by the original
+      // store, so we do not need to rename uses.
+      auto *StoreDef = cast<MemoryDef>(MSSA->getMemoryAccess(SI));
+      auto *NewAccess = MSSAU->createMemoryAccessBefore(
+          M, StoreDef->getDefiningAccess(), StoreDef);
+      MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/false);
 
       eraseInstruction(SI);
       NumMemSetInfer++;
@@ -1033,11 +1010,6 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
     cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
   }
 
-  // Drop any cached information about the call, because we may have changed
-  // its dependence information by changing its parameter.
-  if (MD)
-    MD->removeInstruction(C);
-
   // Update AA metadata
   // FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be
   // handled here, but combineMetadata doesn't support them yet
@@ -1086,28 +1058,19 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
   //
   // TODO: If the code between M and MDep is transparent to the destination "c",
   // then we could still perform the xform by moving M up to the first memcpy.
-  if (EnableMemorySSA) {
-    // TODO: It would be sufficient to check the MDep source up to the memcpy
-    // size of M, rather than MDep.
-    if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep),
-                       MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M)))
-      return false;
-  } else {
-    // NOTE: This is conservative, it will stop on any read from the source loc,
-    // not just the defining memcpy.
-    MemDepResult SourceDep =
-        MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false,
-                                     M->getIterator(), M->getParent());
-    if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
-      return false;
-  }
+  // TODO: It would be sufficient to check the MDep source up to the memcpy
+  // size of M, rather than MDep.
+  if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep),
+                     MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M)))
+    return false;
 
   // If the dest of the second might alias the source of the first, then the
-  // source and dest might overlap.  We still want to eliminate the intermediate
-  // value, but we have to generate a memmove instead of memcpy.
+  // source and dest might overlap. In addition, if the source of the first
+  // points to constant memory, they won't overlap by definition. Otherwise, we
+  // still want to eliminate the intermediate value, but we have to generate a
+  // memmove instead of memcpy.
   bool UseMemMove = false;
-  if (!AA->isNoAlias(MemoryLocation::getForDest(M),
-                     MemoryLocation::getForSource(MDep)))
+  if (isModSet(AA->getModRefInfo(M, MemoryLocation::getForSource(MDep))))
     UseMemMove = true;
 
   // If all checks passed, then we can transform M.
@@ -1134,12 +1097,10 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
                                 MDep->getRawSource(), MDep->getSourceAlign(),
                                 M->getLength(), M->isVolatile());
 
-  if (MSSAU) {
-    assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)));
-    auto *LastDef = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
-    auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
-    MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
-  }
+  assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)));
+  auto *LastDef = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
+  auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
+  MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
 
   // Remove the instruction we're replacing.
   eraseInstruction(M);
@@ -1169,30 +1130,16 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
 
   // Check that src and dst of the memcpy aren't the same. While memcpy
   // operands cannot partially overlap, exact equality is allowed.
-  if (!AA->isNoAlias(MemoryLocation(MemCpy->getSource(),
-                                    LocationSize::precise(1)),
-                     MemoryLocation(MemCpy->getDest(),
-                                    LocationSize::precise(1))))
+  if (isModSet(AA->getModRefInfo(MemCpy, MemoryLocation::getForSource(MemCpy))))
     return false;
 
-  if (EnableMemorySSA) {
-    // We know that dst up to src_size is not written. We now need to make sure
-    // that dst up to dst_size is not accessed. (If we did not move the memset,
-    // checking for reads would be sufficient.)
-    if (accessedBetween(*AA, MemoryLocation::getForDest(MemSet),
-                        MSSA->getMemoryAccess(MemSet),
-                        MSSA->getMemoryAccess(MemCpy))) {
-      return false;
-    }
-  } else {
-    // We have already checked that dst up to src_size is not accessed. We
-    // need to make sure that there are no accesses up to dst_size either.
-    MemDepResult DstDepInfo = MD->getPointerDependencyFrom(
-        MemoryLocation::getForDest(MemSet), false, MemCpy->getIterator(),
-        MemCpy->getParent());
-    if (DstDepInfo.getInst() != MemSet)
-      return false;
-  }
+  // We know that dst up to src_size is not written. We now need to make sure
+  // that dst up to dst_size is not accessed. (If we did not move the memset,
+  // checking for reads would be sufficient.)
+  if (accessedBetween(*AA, MemoryLocation::getForDest(MemSet),
+                      MSSA->getMemoryAccess(MemSet),
+                      MSSA->getMemoryAccess(MemCpy)))
+    return false;
 
   // Use the same i8* dest as the memcpy, killing the memset dest if different.
   Value *Dest = MemCpy->getRawDest();
@@ -1242,18 +1189,16 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
                         SrcSize),
       MemSet->getOperand(1), MemsetLen, MaybeAlign(Align));
 
-  if (MSSAU) {
-    assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) &&
-           "MemCpy must be a MemoryDef");
-    // The new memset is inserted after the memcpy, but it is known that its
-    // defining access is the memset about to be removed which immediately
-    // precedes the memcpy.
-    auto *LastDef =
-        cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
-    auto *NewAccess = MSSAU->createMemoryAccessBefore(
-        NewMemSet, LastDef->getDefiningAccess(), LastDef);
-    MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
-  }
+  assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) &&
+         "MemCpy must be a MemoryDef");
+  // The new memset is inserted after the memcpy, but it is known that its
+  // defining access is the memset about to be removed which immediately
+  // precedes the memcpy.
+  auto *LastDef =
+      cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
+  auto *NewAccess = MSSAU->createMemoryAccessBefore(
+      NewMemSet, LastDef->getDefiningAccess(), LastDef);
+  MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
 
   eraseInstruction(MemSet);
   return true;
@@ -1261,23 +1206,8 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
 
 /// Determine whether the instruction has undefined content for the given Size,
 /// either because it was freshly alloca'd or started its lifetime.
-static bool hasUndefContents(Instruction *I, Value *Size) {
-  if (isa<AllocaInst>(I))
-    return true;
-
-  if (ConstantInt *CSize = dyn_cast<ConstantInt>(Size)) {
-    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
-      if (II->getIntrinsicID() == Intrinsic::lifetime_start)
-        if (ConstantInt *LTSize = dyn_cast<ConstantInt>(II->getArgOperand(0)))
-          if (LTSize->getZExtValue() >= CSize->getZExtValue())
-            return true;
-  }
-
-  return false;
-}
-
-static bool hasUndefContentsMSSA(MemorySSA *MSSA, AliasAnalysis *AA, Value *V,
-                                 MemoryDef *Def, Value *Size) {
+static bool hasUndefContents(MemorySSA *MSSA, AliasAnalysis *AA, Value *V,
+                             MemoryDef *Def, Value *Size) {
   if (MSSA->isLiveOnEntryDef(Def))
     return isa<AllocaInst>(getUnderlyingObject(V));
 
@@ -1351,19 +1281,12 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
       // easily represent this location, we use the full 0..CopySize range.
       MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
       bool CanReduceSize = false;
-      if (EnableMemorySSA) {
-        MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet);
-        MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
-            MemSetAccess->getDefiningAccess(), MemCpyLoc);
-        if (auto *MD = dyn_cast<MemoryDef>(Clobber))
-          if (hasUndefContentsMSSA(MSSA, AA, MemCpy->getSource(), MD, CopySize))
-            CanReduceSize = true;
-      } else {
-        MemDepResult DepInfo = MD->getPointerDependencyFrom(
-            MemCpyLoc, true, MemSet->getIterator(), MemSet->getParent());
-        if (DepInfo.isDef() && hasUndefContents(DepInfo.getInst(), CopySize))
+      MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet);
+      MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
+          MemSetAccess->getDefiningAccess(), MemCpyLoc);
+      if (auto *MD = dyn_cast<MemoryDef>(Clobber))
+        if (hasUndefContents(MSSA, AA, MemCpy->getSource(), MD, CopySize))
           CanReduceSize = true;
-      }
 
       if (!CanReduceSize)
         return false;
@@ -1375,12 +1298,10 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
   Instruction *NewM =
       Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
                            CopySize, MaybeAlign(MemCpy->getDestAlignment()));
-  if (MSSAU) {
-    auto *LastDef =
-        cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
-    auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
-    MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
-  }
+  auto *LastDef =
+      cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
+  auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
+  MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
 
   return true;
 }
@@ -1410,151 +1331,90 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
         Instruction *NewM =
             Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(),
                                  MaybeAlign(M->getDestAlignment()), false);
-        if (MSSAU) {
-          auto *LastDef =
-              cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
-          auto *NewAccess =
-              MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
-          MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
-        }
+        auto *LastDef =
+            cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
+        auto *NewAccess =
+            MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
+        MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
 
         eraseInstruction(M);
         ++NumCpyToSet;
         return true;
       }
 
-  if (EnableMemorySSA) {
-    MemoryUseOrDef *MA = MSSA->getMemoryAccess(M);
-    MemoryAccess *AnyClobber = MSSA->getWalker()->getClobberingMemoryAccess(MA);
-    MemoryLocation DestLoc = MemoryLocation::getForDest(M);
-    const MemoryAccess *DestClobber =
-        MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc);
-
-    // Try to turn a partially redundant memset + memcpy into
-    // memcpy + smaller memset.  We don't need the memcpy size for this.
-    // The memcpy most post-dom the memset, so limit this to the same basic
-    // block. A non-local generalization is likely not worthwhile.
-    if (auto *MD = dyn_cast<MemoryDef>(DestClobber))
-      if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst()))
-        if (DestClobber->getBlock() == M->getParent())
-          if (processMemSetMemCpyDependence(M, MDep))
-            return true;
-
-    MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
-        AnyClobber, MemoryLocation::getForSource(M));
-
-    // There are four possible optimizations we can do for memcpy:
-    //   a) memcpy-memcpy xform which exposes redundance for DSE.
-    //   b) call-memcpy xform for return slot optimization.
-    //   c) memcpy from freshly alloca'd space or space that has just started
-    //      its lifetime copies undefined data, and we can therefore eliminate
-    //      the memcpy in favor of the data that was already at the destination.
-    //   d) memcpy from a just-memset'd source can be turned into memset.
-    if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
-      if (Instruction *MI = MD->getMemoryInst()) {
-        if (ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
-          if (auto *C = dyn_cast<CallInst>(MI)) {
-            // The memcpy must post-dom the call. Limit to the same block for
-            // now. Additionally, we need to ensure that there are no accesses
-            // to dest between the call and the memcpy. Accesses to src will be
-            // checked by performCallSlotOptzn().
-            // TODO: Support non-local call-slot optimization?
-            if (C->getParent() == M->getParent() &&
-                !accessedBetween(*AA, DestLoc, MD, MA)) {
-              // FIXME: Can we pass in either of dest/src alignment here instead
-              // of conservatively taking the minimum?
-              Align Alignment = std::min(M->getDestAlign().valueOrOne(),
-                                         M->getSourceAlign().valueOrOne());
-              if (performCallSlotOptzn(
-                      M, M, M->getDest(), M->getSource(),
-                      TypeSize::getFixed(CopySize->getZExtValue()), Alignment,
-                      C)) {
-                LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
-                                  << "    call: " << *C << "\n"
-                                  << "    memcpy: " << *M << "\n");
-                eraseInstruction(M);
-                ++NumMemCpyInstr;
-                return true;
-              }
-            }
-          }
-        }
-        if (auto *MDep = dyn_cast<MemCpyInst>(MI))
-          return processMemCpyMemCpyDependence(M, MDep);
-        if (auto *MDep = dyn_cast<MemSetInst>(MI)) {
-          if (performMemCpyToMemSetOptzn(M, MDep)) {
-            LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n");
-            eraseInstruction(M);
-            ++NumCpyToSet;
-            return true;
-          }
-        }
-      }
-
-      if (hasUndefContentsMSSA(MSSA, AA, M->getSource(), MD, M->getLength())) {
-        LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n");
-        eraseInstruction(M);
-        ++NumMemCpyInstr;
-        return true;
-      }
-    }
-  } else {
-    MemDepResult DepInfo = MD->getDependency(M);
-
-    // Try to turn a partially redundant memset + memcpy into
-    // memcpy + smaller memset.  We don't need the memcpy size for this.
-    if (DepInfo.isClobber())
-      if (MemSetInst *MDep = dyn_cast<MemSetInst>(DepInfo.getInst()))
+  MemoryUseOrDef *MA = MSSA->getMemoryAccess(M);
+  MemoryAccess *AnyClobber = MSSA->getWalker()->getClobberingMemoryAccess(MA);
+  MemoryLocation DestLoc = MemoryLocation::getForDest(M);
+  const MemoryAccess *DestClobber =
+      MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc);
+
+  // Try to turn a partially redundant memset + memcpy into
+  // memcpy + smaller memset.  We don't need the memcpy size for this.
+  // The memcpy most post-dom the memset, so limit this to the same basic
+  // block. A non-local generalization is likely not worthwhile.
+  if (auto *MD = dyn_cast<MemoryDef>(DestClobber))
+    if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst()))
+      if (DestClobber->getBlock() == M->getParent())
         if (processMemSetMemCpyDependence(M, MDep))
           return true;
 
-    // There are four possible optimizations we can do for memcpy:
-    //   a) memcpy-memcpy xform which exposes redundance for DSE.
-    //   b) call-memcpy xform for return slot optimization.
-    //   c) memcpy from freshly alloca'd space or space that has just started
-    //      its lifetime copies undefined data, and we can therefore eliminate
-    //      the memcpy in favor of the data that was already at the destination.
-    //   d) memcpy from a just-memset'd source can be turned into memset.
-    if (ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
-      if (DepInfo.isClobber()) {
-        if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
-          // FIXME: Can we pass in either of dest/src alignment here instead
-          // of conservatively taking the minimum?
-          Align Alignment = std::min(M->getDestAlign().valueOrOne(),
-                                     M->getSourceAlign().valueOrOne());
-          if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
-                                   TypeSize::getFixed(CopySize->getZExtValue()),
-                                   Alignment, C)) {
-            eraseInstruction(M);
-            ++NumMemCpyInstr;
-            return true;
+  MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
+      AnyClobber, MemoryLocation::getForSource(M));
+
+  // There are four possible optimizations we can do for memcpy:
+  //   a) memcpy-memcpy xform which exposes redundance for DSE.
+  //   b) call-memcpy xform for return slot optimization.
+  //   c) memcpy from freshly alloca'd space or space that has just started
+  //      its lifetime copies undefined data, and we can therefore eliminate
+  //      the memcpy in favor of the data that was already at the destination.
+  //   d) memcpy from a just-memset'd source can be turned into memset.
+  if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
+    if (Instruction *MI = MD->getMemoryInst()) {
+      if (ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
+        if (auto *C = dyn_cast<CallInst>(MI)) {
+          // The memcpy must post-dom the call. Limit to the same block for
+          // now. Additionally, we need to ensure that there are no accesses
+          // to dest between the call and the memcpy. Accesses to src will be
+          // checked by performCallSlotOptzn().
+          // TODO: Support non-local call-slot optimization?
+          if (C->getParent() == M->getParent() &&
+              !accessedBetween(*AA, DestLoc, MD, MA)) {
+            // FIXME: Can we pass in either of dest/src alignment here instead
+            // of conservatively taking the minimum?
+            Align Alignment = std::min(M->getDestAlign().valueOrOne(),
+                                       M->getSourceAlign().valueOrOne());
+            if (performCallSlotOptzn(
+                    M, M, M->getDest(), M->getSource(),
+                    TypeSize::getFixed(CopySize->getZExtValue()), Alignment,
+                    C)) {
+              LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
+                                << "    call: " << *C << "\n"
+                                << "    memcpy: " << *M << "\n");
+              eraseInstruction(M);
+              ++NumMemCpyInstr;
+              return true;
+            }
           }
         }
       }
-    }
-
-    MemoryLocation SrcLoc = MemoryLocation::getForSource(M);
-    MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(
-        SrcLoc, true, M->getIterator(), M->getParent());
-
-    if (SrcDepInfo.isClobber()) {
-      if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
+      if (auto *MDep = dyn_cast<MemCpyInst>(MI))
         return processMemCpyMemCpyDependence(M, MDep);
-    } else if (SrcDepInfo.isDef()) {
-      if (hasUndefContents(SrcDepInfo.getInst(), M->getLength())) {
-        eraseInstruction(M);
-        ++NumMemCpyInstr;
-        return true;
-      }
-    }
-
-    if (SrcDepInfo.isClobber())
-      if (MemSetInst *MDep = dyn_cast<MemSetInst>(SrcDepInfo.getInst()))
+      if (auto *MDep = dyn_cast<MemSetInst>(MI)) {
         if (performMemCpyToMemSetOptzn(M, MDep)) {
+          LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n");
           eraseInstruction(M);
           ++NumCpyToSet;
           return true;
         }
+      }
+    }
+
+    if (hasUndefContents(MSSA, AA, M->getSource(), MD, M->getLength())) {
+      LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n");
+      eraseInstruction(M);
+      ++NumMemCpyInstr;
+      return true;
+    }
   }
 
   return false;
@@ -1563,12 +1423,8 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
 /// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
 /// not to alias.
 bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
-  if (!TLI->has(LibFunc_memmove))
-    return false;
-
-  // See if the pointers alias.
-  if (!AA->isNoAlias(MemoryLocation::getForDest(M),
-                     MemoryLocation::getForSource(M)))
+  // See if the source could be modified by this memmove potentially.
+  if (isModSet(AA->getModRefInfo(M, MemoryLocation::getForSource(M))))
     return false;
 
   LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
@@ -1584,11 +1440,6 @@ bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
   // For MemorySSA nothing really changes (except that memcpy may imply stricter
   // aliasing guarantees).
 
-  // MemDep may have over conservative information about this instruction, just
-  // conservatively flush it from the cache.
-  if (MD)
-    MD->removeInstruction(M);
-
   ++NumMoveToCpy;
   return true;
 }
@@ -1601,22 +1452,14 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
   Type *ByValTy = CB.getParamByValType(ArgNo);
   TypeSize ByValSize = DL.getTypeAllocSize(ByValTy);
   MemoryLocation Loc(ByValArg, LocationSize::precise(ByValSize));
+  MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
+  if (!CallAccess)
+    return false;
   MemCpyInst *MDep = nullptr;
-  if (EnableMemorySSA) {
-    MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
-    if (!CallAccess)
-      return false;
-    MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
-        CallAccess->getDefiningAccess(), Loc);
-    if (auto *MD = dyn_cast<MemoryDef>(Clobber))
-      MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
-  } else {
-    MemDepResult DepInfo = MD->getPointerDependencyFrom(
-        Loc, true, CB.getIterator(), CB.getParent());
-    if (!DepInfo.isClobber())
-      return false;
-    MDep = dyn_cast<MemCpyInst>(DepInfo.getInst());
-  }
+  MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
+      CallAccess->getDefiningAccess(), Loc);
+  if (auto *MD = dyn_cast<MemoryDef>(Clobber))
+    MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
 
   // If the byval argument isn't fed by a memcpy, ignore it.  If it is fed by
   // a memcpy, see if we can byval from the source of the memcpy instead of the
@@ -1655,19 +1498,9 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
   //    *b = 42;
   //    foo(*a)
   // It would be invalid to transform the second memcpy into foo(*b).
-  if (EnableMemorySSA) {
-    if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep),
-                       MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(&CB)))
-      return false;
-  } else {
-    // NOTE: This is conservative, it will stop on any read from the source loc,
-    // not just the defining memcpy.
-    MemDepResult SourceDep = MD->getPointerDependencyFrom(
-        MemoryLocation::getForSource(MDep), false,
-        CB.getIterator(), MDep->getParent());
-    if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
-      return false;
-  }
+  if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep),
+                     MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(&CB)))
+    return false;
 
   Value *TmpCast = MDep->getSource();
   if (MDep->getSource()->getType() != ByValArg->getType()) {
@@ -1734,47 +1567,33 @@ bool MemCpyOptPass::iterateOnFunction(Function &F) {
 }
 
 PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) {
-  auto *MD = !EnableMemorySSA ? &AM.getResult<MemoryDependenceAnalysis>(F)
-                              : AM.getCachedResult<MemoryDependenceAnalysis>(F);
   auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
   auto *AA = &AM.getResult<AAManager>(F);
   auto *AC = &AM.getResult<AssumptionAnalysis>(F);
   auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
-  auto *MSSA = EnableMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F)
-                               : AM.getCachedResult<MemorySSAAnalysis>(F);
+  auto *MSSA = &AM.getResult<MemorySSAAnalysis>(F);
 
-  bool MadeChange =
-      runImpl(F, MD, &TLI, AA, AC, DT, MSSA ? &MSSA->getMSSA() : nullptr);
+  bool MadeChange = runImpl(F, &TLI, AA, AC, DT, &MSSA->getMSSA());
   if (!MadeChange)
     return PreservedAnalyses::all();
 
   PreservedAnalyses PA;
   PA.preserveSet<CFGAnalyses>();
-  if (MD)
-    PA.preserve<MemoryDependenceAnalysis>();
-  if (MSSA)
-    PA.preserve<MemorySSAAnalysis>();
+  PA.preserve<MemorySSAAnalysis>();
   return PA;
 }
 
-bool MemCpyOptPass::runImpl(Function &F, MemoryDependenceResults *MD_,
-                            TargetLibraryInfo *TLI_, AliasAnalysis *AA_,
-                            AssumptionCache *AC_, DominatorTree *DT_,
-                            MemorySSA *MSSA_) {
+bool MemCpyOptPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
+                            AliasAnalysis *AA_, AssumptionCache *AC_,
+                            DominatorTree *DT_, MemorySSA *MSSA_) {
   bool MadeChange = false;
-  MD = MD_;
   TLI = TLI_;
   AA = AA_;
   AC = AC_;
   DT = DT_;
   MSSA = MSSA_;
   MemorySSAUpdater MSSAU_(MSSA_);
-  MSSAU = MSSA_ ? &MSSAU_ : nullptr;
-  // If we don't have at least memset and memcpy, there is little point of doing
-  // anything here.  These are required by a freestanding implementation, so if
-  // even they are disabled, there is no point in trying hard.
-  if (!TLI->has(LibFunc_memset) || !TLI->has(LibFunc_memcpy))
-    return false;
+  MSSAU = &MSSAU_;
 
   while (true) {
     if (!iterateOnFunction(F))
@@ -1782,10 +1601,9 @@ bool MemCpyOptPass::runImpl(Function &F, MemoryDependenceResults *MD_,
     MadeChange = true;
   }
 
-  if (MSSA_ && VerifyMemorySSA)
+  if (VerifyMemorySSA)
     MSSA_->verifyMemorySSA();
 
-  MD = nullptr;
   return MadeChange;
 }
 
@@ -1794,17 +1612,11 @@ bool MemCpyOptLegacyPass::runOnFunction(Function &F) {
   if (skipFunction(F))
     return false;
 
-  auto *MDWP = !EnableMemorySSA
-      ? &getAnalysis<MemoryDependenceWrapperPass>()
-      : getAnalysisIfAvailable<MemoryDependenceWrapperPass>();
   auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
   auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
   auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
   auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-  auto *MSSAWP = EnableMemorySSA
-      ? &getAnalysis<MemorySSAWrapperPass>()
-      : getAnalysisIfAvailable<MemorySSAWrapperPass>();
+  auto *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
 
-  return Impl.runImpl(F, MDWP ? & MDWP->getMemDep() : nullptr, TLI, AA, AC, DT,
-                      MSSAWP ? &MSSAWP->getMSSA() : nullptr);
+  return Impl.runImpl(F, TLI, AA, AC, DT, MSSA);
 }
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MergeICmps.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MergeICmps.cpp
index f13f24ad2027..aac0deea5be3 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MergeICmps.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MergeICmps.cpp
@@ -66,15 +66,6 @@ namespace {
 
 #define DEBUG_TYPE "mergeicmps"
 
-// Returns true if the instruction is a simple load or a simple store
-static bool isSimpleLoadOrStore(const Instruction *I) {
-  if (const LoadInst *LI = dyn_cast<LoadInst>(I))
-    return LI->isSimple();
-  if (const StoreInst *SI = dyn_cast<StoreInst>(I))
-    return SI->isSimple();
-  return false;
-}
-
 // A BCE atom "Binary Compare Expression Atom" represents an integer load
 // that is a constant offset from a base value, e.g. `a` or `o.c` in the example
 // at the top.
@@ -154,6 +145,10 @@ BCEAtom visitICmpLoadOperand(Value *const Val, BaseIdentifier &BaseId) {
     return {};
   }
   Value *const Addr = LoadI->getOperand(0);
+  if (Addr->getType()->getPointerAddressSpace() != 0) {
+    LLVM_DEBUG(dbgs() << "from non-zero AddressSpace\n");
+    return {};
+  }
   auto *const GEP = dyn_cast<GetElementPtrInst>(Addr);
   if (!GEP)
     return {};
@@ -234,6 +229,8 @@ class BCECmpBlock {
   InstructionSet BlockInsts;
   // The block requires splitting.
   bool RequireSplit = false;
+  // Original order of this block in the chain.
+  unsigned OrigOrder = 0;
 
 private:
   BCECmp Cmp;
@@ -244,14 +241,13 @@ bool BCECmpBlock::canSinkBCECmpInst(const Instruction *Inst,
   // If this instruction may clobber the loads and is in middle of the BCE cmp
   // block instructions, then bail for now.
   if (Inst->mayWriteToMemory()) {
-    // Bail if this is not a simple load or store
-    if (!isSimpleLoadOrStore(Inst))
-      return false;
-    // Disallow stores that might alias the BCE operands
-    MemoryLocation LLoc = MemoryLocation::get(Cmp.Lhs.LoadI);
-    MemoryLocation RLoc = MemoryLocation::get(Cmp.Rhs.LoadI);
-    if (isModSet(AA.getModRefInfo(Inst, LLoc)) ||
-        isModSet(AA.getModRefInfo(Inst, RLoc)))
+    auto MayClobber = [&](LoadInst *LI) {
+      // If a potentially clobbering instruction comes before the load,
+      // we can still safely sink the load.
+      return !Inst->comesBefore(LI) &&
+             isModSet(AA.getModRefInfo(Inst, MemoryLocation::get(LI)));
+    };
+    if (MayClobber(Cmp.Lhs.LoadI) || MayClobber(Cmp.Rhs.LoadI))
       return false;
   }
   // Make sure this instruction does not use any of the BCE cmp block
@@ -386,39 +382,83 @@ static inline void enqueueBlock(std::vector<BCECmpBlock> &Comparisons,
                     << Comparison.Rhs().BaseId << " + "
                     << Comparison.Rhs().Offset << "\n");
   LLVM_DEBUG(dbgs() << "\n");
+  Comparison.OrigOrder = Comparisons.size();
   Comparisons.push_back(std::move(Comparison));
 }
 
 // A chain of comparisons.
 class BCECmpChain {
- public:
-   BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
-               AliasAnalysis &AA);
-
-   int size() const { return Comparisons_.size(); }
+public:
+  using ContiguousBlocks = std::vector<BCECmpBlock>;
 
-#ifdef MERGEICMPS_DOT_ON
-  void dump() const;
-#endif  // MERGEICMPS_DOT_ON
+  BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
+              AliasAnalysis &AA);
 
   bool simplify(const TargetLibraryInfo &TLI, AliasAnalysis &AA,
                 DomTreeUpdater &DTU);
 
-private:
-  static bool IsContiguous(const BCECmpBlock &First,
-                           const BCECmpBlock &Second) {
-    return First.Lhs().BaseId == Second.Lhs().BaseId &&
-           First.Rhs().BaseId == Second.Rhs().BaseId &&
-           First.Lhs().Offset + First.SizeBits() / 8 == Second.Lhs().Offset &&
-           First.Rhs().Offset + First.SizeBits() / 8 == Second.Rhs().Offset;
+  bool atLeastOneMerged() const {
+    return any_of(MergedBlocks_,
+                  [](const auto &Blocks) { return Blocks.size() > 1; });
   }
 
+private:
   PHINode &Phi_;
-  std::vector<BCECmpBlock> Comparisons_;
+  // The list of all blocks in the chain, grouped by contiguity.
+  std::vector<ContiguousBlocks> MergedBlocks_;
   // The original entry block (before sorting);
   BasicBlock *EntryBlock_;
 };
 
+static bool areContiguous(const BCECmpBlock &First, const BCECmpBlock &Second) {
+  return First.Lhs().BaseId == Second.Lhs().BaseId &&
+         First.Rhs().BaseId == Second.Rhs().BaseId &&
+         First.Lhs().Offset + First.SizeBits() / 8 == Second.Lhs().Offset &&
+         First.Rhs().Offset + First.SizeBits() / 8 == Second.Rhs().Offset;
+}
+
+static unsigned getMinOrigOrder(const BCECmpChain::ContiguousBlocks &Blocks) {
+  unsigned MinOrigOrder = std::numeric_limits<unsigned>::max();
+  for (const BCECmpBlock &Block : Blocks)
+    MinOrigOrder = std::min(MinOrigOrder, Block.OrigOrder);
+  return MinOrigOrder;
+}
+
+/// Given a chain of comparison blocks, groups the blocks into contiguous
+/// ranges that can be merged together into a single comparison.
+static std::vector<BCECmpChain::ContiguousBlocks>
+mergeBlocks(std::vector<BCECmpBlock> &&Blocks) {
+  std::vector<BCECmpChain::ContiguousBlocks> MergedBlocks;
+
+  // Sort to detect continuous offsets.
+  llvm::sort(Blocks,
+             [](const BCECmpBlock &LhsBlock, const BCECmpBlock &RhsBlock) {
+               return std::tie(LhsBlock.Lhs(), LhsBlock.Rhs()) <
+                      std::tie(RhsBlock.Lhs(), RhsBlock.Rhs());
+             });
+
+  BCECmpChain::ContiguousBlocks *LastMergedBlock = nullptr;
+  for (BCECmpBlock &Block : Blocks) {
+    if (!LastMergedBlock || !areContiguous(LastMergedBlock->back(), Block)) {
+      MergedBlocks.emplace_back();
+      LastMergedBlock = &MergedBlocks.back();
+    } else {
+      LLVM_DEBUG(dbgs() << "Merging block " << Block.BB->getName() << " into "
+                        << LastMergedBlock->back().BB->getName() << "\n");
+    }
+    LastMergedBlock->push_back(std::move(Block));
+  }
+
+  // While we allow reordering for merging, do not reorder unmerged comparisons.
+  // Doing so may introduce branch on poison.
+  llvm::sort(MergedBlocks, [](const BCECmpChain::ContiguousBlocks &LhsBlocks,
+                              const BCECmpChain::ContiguousBlocks &RhsBlocks) {
+    return getMinOrigOrder(LhsBlocks) < getMinOrigOrder(RhsBlocks);
+  });
+
+  return MergedBlocks;
+}
+
 BCECmpChain::BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
                          AliasAnalysis &AA)
     : Phi_(Phi) {
@@ -498,47 +538,9 @@ BCECmpChain::BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
     return;
   }
   EntryBlock_ = Comparisons[0].BB;
-  Comparisons_ = std::move(Comparisons);
-#ifdef MERGEICMPS_DOT_ON
-  errs() << "BEFORE REORDERING:\n\n";
-  dump();
-#endif  // MERGEICMPS_DOT_ON
-  // Reorder blocks by LHS. We can do that without changing the
-  // semantics because we are only accessing dereferencable memory.
-  llvm::sort(Comparisons_,
-             [](const BCECmpBlock &LhsBlock, const BCECmpBlock &RhsBlock) {
-               return std::tie(LhsBlock.Lhs(), LhsBlock.Rhs()) <
-                      std::tie(RhsBlock.Lhs(), RhsBlock.Rhs());
-             });
-#ifdef MERGEICMPS_DOT_ON
-  errs() << "AFTER REORDERING:\n\n";
-  dump();
-#endif  // MERGEICMPS_DOT_ON
+  MergedBlocks_ = mergeBlocks(std::move(Comparisons));
 }
 
-#ifdef MERGEICMPS_DOT_ON
-void BCECmpChain::dump() const {
-  errs() << "digraph dag {\n";
-  errs() << " graph [bgcolor=transparent];\n";
-  errs() << " node [color=black,style=filled,fillcolor=lightyellow];\n";
-  errs() << " edge [color=black];\n";
-  for (size_t I = 0; I < Comparisons_.size(); ++I) {
-    const auto &Comparison = Comparisons_[I];
-    errs() << " \"" << I << "\" [label=\"%"
-           << Comparison.Lhs().Base()->getName() << " + "
-           << Comparison.Lhs().Offset << " == %"
-           << Comparison.Rhs().Base()->getName() << " + "
-           << Comparison.Rhs().Offset << " (" << (Comparison.SizeBits() / 8)
-           << " bytes)\"];\n";
-    const Value *const Val = Phi_.getIncomingValueForBlock(Comparison.BB);
-    if (I > 0) errs() << " \"" << (I - 1) << "\" -> \"" << I << "\";\n";
-    errs() << " \"" << I << "\" -> \"Phi\" [label=\"" << *Val << "\"];\n";
-  }
-  errs() << " \"Phi\" [label=\"Phi\"];\n";
-  errs() << "}\n\n";
-}
-#endif  // MERGEICMPS_DOT_ON
-
 namespace {
 
 // A class to compute the name of a set of merged basic blocks.
@@ -661,47 +663,18 @@ static BasicBlock *mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
 
 bool BCECmpChain::simplify(const TargetLibraryInfo &TLI, AliasAnalysis &AA,
                            DomTreeUpdater &DTU) {
-  assert(Comparisons_.size() >= 2 && "simplifying trivial BCECmpChain");
-  // First pass to check if there is at least one merge. If not, we don't do
-  // anything and we keep analysis passes intact.
-  const auto AtLeastOneMerged = [this]() {
-    for (size_t I = 1; I < Comparisons_.size(); ++I) {
-      if (IsContiguous(Comparisons_[I - 1], Comparisons_[I]))
-        return true;
-    }
-    return false;
-  };
-  if (!AtLeastOneMerged())
-    return false;
-
+  assert(atLeastOneMerged() && "simplifying trivial BCECmpChain");
   LLVM_DEBUG(dbgs() << "Simplifying comparison chain starting at block "
                     << EntryBlock_->getName() << "\n");
 
   // Effectively merge blocks. We go in the reverse direction from the phi block
   // so that the next block is always available to branch to.
-  const auto mergeRange = [this, &TLI, &AA, &DTU](int I, int Num,
-                                                  BasicBlock *InsertBefore,
-                                                  BasicBlock *Next) {
-    return mergeComparisons(makeArrayRef(Comparisons_).slice(I, Num),
-                            InsertBefore, Next, Phi_, TLI, AA, DTU);
-  };
-  int NumMerged = 1;
+  BasicBlock *InsertBefore = EntryBlock_;
   BasicBlock *NextCmpBlock = Phi_.getParent();
-  for (int I = static_cast<int>(Comparisons_.size()) - 2; I >= 0; --I) {
-    if (IsContiguous(Comparisons_[I], Comparisons_[I + 1])) {
-      LLVM_DEBUG(dbgs() << "Merging block " << Comparisons_[I].BB->getName()
-                        << " into " << Comparisons_[I + 1].BB->getName()
-                        << "\n");
-      ++NumMerged;
-    } else {
-      NextCmpBlock = mergeRange(I + 1, NumMerged, NextCmpBlock, NextCmpBlock);
-      NumMerged = 1;
-    }
+  for (const auto &Blocks : reverse(MergedBlocks_)) {
+    InsertBefore = NextCmpBlock = mergeComparisons(
+        Blocks, InsertBefore, NextCmpBlock, Phi_, TLI, AA, DTU);
   }
-  // Insert the entry block for the new chain before the old entry block.
-  // If the old entry block was the function entry, this ensures that the new
-  // entry can become the function entry.
-  NextCmpBlock = mergeRange(0, NumMerged, EntryBlock_, NextCmpBlock);
 
   // Replace the original cmp chain with the new cmp chain by pointing all
   // predecessors of EntryBlock_ to NextCmpBlock instead. This makes all cmp
@@ -729,13 +702,16 @@ bool BCECmpChain::simplify(const TargetLibraryInfo &TLI, AliasAnalysis &AA,
 
   // Delete merged blocks. This also removes incoming values in phi.
   SmallVector<BasicBlock *, 16> DeadBlocks;
-  for (auto &Cmp : Comparisons_) {
-    LLVM_DEBUG(dbgs() << "Deleting merged block " << Cmp.BB->getName() << "\n");
-    DeadBlocks.push_back(Cmp.BB);
+  for (const auto &Blocks : MergedBlocks_) {
+    for (const BCECmpBlock &Block : Blocks) {
+      LLVM_DEBUG(dbgs() << "Deleting merged block " << Block.BB->getName()
+                        << "\n");
+      DeadBlocks.push_back(Block.BB);
+    }
   }
   DeleteDeadBlocks(DeadBlocks, &DTU);
 
-  Comparisons_.clear();
+  MergedBlocks_.clear();
   return true;
 }
 
@@ -835,8 +811,8 @@ bool processPhi(PHINode &Phi, const TargetLibraryInfo &TLI, AliasAnalysis &AA,
   if (Blocks.empty()) return false;
   BCECmpChain CmpChain(Blocks, Phi, AA);
 
-  if (CmpChain.size() < 2) {
-    LLVM_DEBUG(dbgs() << "skip: only one compare block\n");
+  if (!CmpChain.atLeastOneMerged()) {
+    LLVM_DEBUG(dbgs() << "skip: nothing merged\n");
     return false;
   }
 
@@ -862,9 +838,9 @@ static bool runImpl(Function &F, const TargetLibraryInfo &TLI,
 
   bool MadeChange = false;
 
-  for (auto BBIt = ++F.begin(); BBIt != F.end(); ++BBIt) {
+  for (BasicBlock &BB : llvm::drop_begin(F)) {
     // A Phi operation is always first in a basic block.
-    if (auto *const Phi = dyn_cast<PHINode>(&*BBIt->begin()))
+    if (auto *const Phi = dyn_cast<PHINode>(&*BB.begin()))
       MadeChange |= processPhi(*Phi, TLI, AA, DTU);
   }
 
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
index 033fc168a67f..734532a6670c 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
@@ -420,3 +420,12 @@ MergedLoadStoreMotionPass::run(Function &F, FunctionAnalysisManager &AM) {
     PA.preserveSet<CFGAnalyses>();
   return PA;
 }
+
+void MergedLoadStoreMotionPass::printPipeline(
+    raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+  static_cast<PassInfoMixin<MergedLoadStoreMotionPass> *>(this)->printPipeline(
+      OS, MapClassName2PassName);
+  OS << "<";
+  OS << (Options.SplitFooterBB ? "" : "no-") << "split-footer-bb";
+  OS << ">";
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/NaryReassociate.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
index ded5caf53b5a..6dca30d9876e 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
@@ -282,8 +282,12 @@ NaryReassociatePass::matchAndReassociateMinOrMax(Instruction *I,
           m_Value(LHS), m_Value(RHS));
   if (match(I, MinMaxMatcher)) {
     OrigSCEV = SE->getSCEV(I);
-    return dyn_cast_or_null<Instruction>(
-        tryReassociateMinOrMax(I, MinMaxMatcher, LHS, RHS));
+    if (auto *NewMinMax = dyn_cast_or_null<Instruction>(
+            tryReassociateMinOrMax(I, MinMaxMatcher, LHS, RHS)))
+      return NewMinMax;
+    if (auto *NewMinMax = dyn_cast_or_null<Instruction>(
+            tryReassociateMinOrMax(I, MinMaxMatcher, RHS, LHS)))
+      return NewMinMax;
   }
   return nullptr;
 }
@@ -596,58 +600,60 @@ Value *NaryReassociatePass::tryReassociateMinOrMax(Instruction *I,
                                                    Value *LHS, Value *RHS) {
   Value *A = nullptr, *B = nullptr;
   MaxMinT m_MaxMin(m_Value(A), m_Value(B));
-  for (unsigned int i = 0; i < 2; ++i) {
-    if (!LHS->hasNUsesOrMore(3) && match(LHS, m_MaxMin)) {
-      const SCEV *AExpr = SE->getSCEV(A), *BExpr = SE->getSCEV(B);
-      const SCEV *RHSExpr = SE->getSCEV(RHS);
-      for (unsigned int j = 0; j < 2; ++j) {
-        if (j == 0) {
-          if (BExpr == RHSExpr)
-            continue;
-          // Transform 'I = (A op B) op RHS' to 'I = (A op RHS) op B' on the
-          // first iteration.
-          std::swap(BExpr, RHSExpr);
-        } else {
-          if (AExpr == RHSExpr)
-            continue;
-          // Transform 'I = (A op RHS) op B' 'I = (B op RHS) op A' on the second
-          // iteration.
-          std::swap(AExpr, RHSExpr);
-        }
-
-        // The optimization is profitable only if LHS can be removed in the end.
-        // In other words LHS should be used (directly or indirectly) by I only.
-        if (llvm::any_of(LHS->users(), [&](auto *U) {
-              return U != I && !(U->hasOneUser() && *U->users().begin() == I);
-            }))
-          continue;
-
-        SCEVExpander Expander(*SE, *DL, "nary-reassociate");
-        SmallVector<const SCEV *, 2> Ops1{ BExpr, AExpr };
-        const SCEVTypes SCEVType = convertToSCEVype(m_MaxMin);
-        const SCEV *R1Expr = SE->getMinMaxExpr(SCEVType, Ops1);
-
-        Instruction *R1MinMax = findClosestMatchingDominator(R1Expr, I);
-
-        if (!R1MinMax)
-          continue;
-
-        LLVM_DEBUG(dbgs() << "NARY: Found common sub-expr: " << *R1MinMax
-                          << "\n");
-
-        R1Expr = SE->getUnknown(R1MinMax);
-        SmallVector<const SCEV *, 2> Ops2{ RHSExpr, R1Expr };
-        const SCEV *R2Expr = SE->getMinMaxExpr(SCEVType, Ops2);
-
-        Value *NewMinMax = Expander.expandCodeFor(R2Expr, I->getType(), I);
-        NewMinMax->setName(Twine(I->getName()).concat(".nary"));
-
-        LLVM_DEBUG(dbgs() << "NARY: Deleting:  " << *I << "\n"
-                          << "NARY: Inserting: " << *NewMinMax << "\n");
-        return NewMinMax;
-      }
-    }
-    std::swap(LHS, RHS);
+
+  if (LHS->hasNUsesOrMore(3) ||
+      // The optimization is profitable only if LHS can be removed in the end.
+      // In other words LHS should be used (directly or indirectly) by I only.
+      llvm::any_of(LHS->users(),
+                    [&](auto *U) {
+                      return U != I &&
+                             !(U->hasOneUser() && *U->users().begin() == I);
+                    }) ||
+      !match(LHS, m_MaxMin))
+    return nullptr;
+
+  auto tryCombination = [&](Value *A, const SCEV *AExpr, Value *B,
+                            const SCEV *BExpr, Value *C,
+                            const SCEV *CExpr) -> Value * {
+    SmallVector<const SCEV *, 2> Ops1{BExpr, AExpr};
+    const SCEVTypes SCEVType = convertToSCEVype(m_MaxMin);
+    const SCEV *R1Expr = SE->getMinMaxExpr(SCEVType, Ops1);
+
+    Instruction *R1MinMax = findClosestMatchingDominator(R1Expr, I);
+
+    if (!R1MinMax)
+      return nullptr;
+
+    LLVM_DEBUG(dbgs() << "NARY: Found common sub-expr: " << *R1MinMax << "\n");
+
+    SmallVector<const SCEV *, 2> Ops2{SE->getUnknown(C),
+                                      SE->getUnknown(R1MinMax)};
+    const SCEV *R2Expr = SE->getMinMaxExpr(SCEVType, Ops2);
+
+    SCEVExpander Expander(*SE, *DL, "nary-reassociate");
+    Value *NewMinMax = Expander.expandCodeFor(R2Expr, I->getType(), I);
+    NewMinMax->setName(Twine(I->getName()).concat(".nary"));
+
+    LLVM_DEBUG(dbgs() << "NARY: Deleting:  " << *I << "\n"
+                      << "NARY: Inserting: " << *NewMinMax << "\n");
+    return NewMinMax;
+  };
+
+  const SCEV *AExpr = SE->getSCEV(A);
+  const SCEV *BExpr = SE->getSCEV(B);
+  const SCEV *RHSExpr = SE->getSCEV(RHS);
+
+  if (BExpr != RHSExpr) {
+    // Try (A op RHS) op B
+    if (auto *NewMinMax = tryCombination(A, AExpr, RHS, RHSExpr, B, BExpr))
+      return NewMinMax;
+  }
+
+  if (AExpr != RHSExpr) {
+    // Try (RHS op B) op A
+    if (auto *NewMinMax = tryCombination(RHS, RHSExpr, B, BExpr, A, AExpr))
+      return NewMinMax;
   }
+
   return nullptr;
 }
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/NewGVN.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/NewGVN.cpp
index a137d13c6ea0..91215cd19e2b 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -1194,9 +1194,10 @@ NewGVN::ExprResult NewGVN::createExpression(Instruction *I) const {
         SimplifyCastInst(CI->getOpcode(), E->getOperand(0), CI->getType(), SQ);
     if (auto Simplified = checkExprResults(E, I, V))
       return Simplified;
-  } else if (isa<GetElementPtrInst>(I)) {
-    Value *V = SimplifyGEPInst(
-        E->getType(), ArrayRef<Value *>(E->op_begin(), E->op_end()), SQ);
+  } else if (auto *GEPI = dyn_cast<GetElementPtrInst>(I)) {
+    Value *V = SimplifyGEPInst(GEPI->getSourceElementType(),
+                               ArrayRef<Value *>(E->op_begin(), E->op_end()),
+                               GEPI->isInBounds(), SQ);
     if (auto Simplified = checkExprResults(E, I, V))
       return Simplified;
   } else if (AllConstant) {
@@ -1818,7 +1819,7 @@ NewGVN::ExprResult NewGVN::performSymbolicCmpEvaluation(Instruction *I) const {
   // See if we know something about the comparison itself, like it is the target
   // of an assume.
   auto *CmpPI = PredInfo->getPredicateInfoFor(I);
-  if (dyn_cast_or_null<PredicateAssume>(CmpPI))
+  if (isa_and_nonnull<PredicateAssume>(CmpPI))
     return ExprResult::some(
         createConstantExpression(ConstantInt::getTrue(CI->getType())));
 
@@ -3606,7 +3607,7 @@ void NewGVN::convertClassToDFSOrdered(
 
         // Skip uses in unreachable blocks, as we're going
         // to delete them.
-        if (ReachableBlocks.count(IBlock) == 0)
+        if (!ReachableBlocks.contains(IBlock))
           continue;
 
         DomTreeNode *DomNode = DT->getNode(IBlock);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
index 7872c553b412..44027ccd92ca 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
@@ -82,7 +82,7 @@ static bool optimizeSQRT(CallInst *Call, Function *CalledFunc,
 
   // Add attribute "readnone" so that backend can use a native sqrt instruction
   // for this call.
-  Call->addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone);
+  Call->addFnAttr(Attribute::ReadNone);
 
   // Insert a FP compare instruction and use it as the CurrBB branch condition.
   Builder.SetInsertPoint(CurrBBTerm);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reassociate.cpp
index 888edc4d69a8..b0fb8daaba8f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -140,7 +140,7 @@ XorOpnd::XorOpnd(Value *V) {
 
   // view the operand as "V | 0"
   SymbolicPart = V;
-  ConstPart = APInt::getNullValue(V->getType()->getScalarSizeInBits());
+  ConstPart = APInt::getZero(V->getType()->getScalarSizeInBits());
   isOr = true;
 }
 
@@ -1279,10 +1279,10 @@ static Value *OptimizeAndOrXor(unsigned Opcode,
 /// be returned.
 static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd,
                              const APInt &ConstOpnd) {
-  if (ConstOpnd.isNullValue())
+  if (ConstOpnd.isZero())
     return nullptr;
 
-  if (ConstOpnd.isAllOnesValue())
+  if (ConstOpnd.isAllOnes())
     return Opnd;
 
   Instruction *I = BinaryOperator::CreateAnd(
@@ -1304,7 +1304,7 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
   //                       = ((x | c1) ^ c1) ^ (c1 ^ c2)
   //                       = (x & ~c1) ^ (c1 ^ c2)
   // It is useful only when c1 == c2.
-  if (!Opnd1->isOrExpr() || Opnd1->getConstPart().isNullValue())
+  if (!Opnd1->isOrExpr() || Opnd1->getConstPart().isZero())
     return false;
 
   if (!Opnd1->getValue()->hasOneUse())
@@ -1361,7 +1361,7 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
     APInt C3((~C1) ^ C2);
 
     // Do not increase code size!
-    if (!C3.isNullValue() && !C3.isAllOnesValue()) {
+    if (!C3.isZero() && !C3.isAllOnes()) {
       int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2;
       if (NewInstNum > DeadInstNum)
         return false;
@@ -1377,7 +1377,7 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
     APInt C3 = C1 ^ C2;
 
     // Do not increase code size
-    if (!C3.isNullValue() && !C3.isAllOnesValue()) {
+    if (!C3.isZero() && !C3.isAllOnes()) {
       int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2;
       if (NewInstNum > DeadInstNum)
         return false;
@@ -1468,8 +1468,7 @@ Value *ReassociatePass::OptimizeXor(Instruction *I,
     Value *CV;
 
     // Step 3.1: Try simplifying "CurrOpnd ^ ConstOpnd"
-    if (!ConstOpnd.isNullValue() &&
-        CombineXorOpnd(I, CurrOpnd, ConstOpnd, CV)) {
+    if (!ConstOpnd.isZero() && CombineXorOpnd(I, CurrOpnd, ConstOpnd, CV)) {
       Changed = true;
       if (CV)
         *CurrOpnd = XorOpnd(CV);
@@ -1510,7 +1509,7 @@ Value *ReassociatePass::OptimizeXor(Instruction *I,
       ValueEntry VE(getRank(O.getValue()), O.getValue());
       Ops.push_back(VE);
     }
-    if (!ConstOpnd.isNullValue()) {
+    if (!ConstOpnd.isZero()) {
       Value *C = ConstantInt::get(Ty, ConstOpnd);
       ValueEntry VE(getRank(C), C);
       Ops.push_back(VE);
@@ -1519,7 +1518,7 @@ Value *ReassociatePass::OptimizeXor(Instruction *I,
     if (Sz == 1)
       return Ops.back().Op;
     if (Sz == 0) {
-      assert(ConstOpnd.isNullValue());
+      assert(ConstOpnd.isZero());
       return ConstantInt::get(Ty, ConstOpnd);
     }
   }
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index bc0fecc972fc..2d3490b2d29e 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -755,7 +755,7 @@ public:
   }
 
   bool operator==(const BDVState &Other) const {
-    return OriginalValue == OriginalValue && BaseValue == Other.BaseValue &&
+    return OriginalValue == Other.OriginalValue && BaseValue == Other.BaseValue &&
       Status == Other.Status;
   }
 
@@ -910,7 +910,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
 #ifndef NDEBUG
   VerifyStates();
   LLVM_DEBUG(dbgs() << "States after initialization:\n");
-  for (auto Pair : States) {
+  for (const auto &Pair : States) {
     LLVM_DEBUG(dbgs() << " " << Pair.second << " for " << *Pair.first << "\n");
   }
 #endif
@@ -1002,7 +1002,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
 #ifndef NDEBUG
   VerifyStates();
   LLVM_DEBUG(dbgs() << "States after meet iteration:\n");
-  for (auto Pair : States) {
+  for (const auto &Pair : States) {
     LLVM_DEBUG(dbgs() << " " << Pair.second << " for " << *Pair.first << "\n");
   }
 #endif
@@ -1163,7 +1163,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
           // llvm::Value of the correct type (and still remain pure).
           // This will remove the need to add bitcasts.
           assert(Base->stripPointerCasts() == OldBase->stripPointerCasts() &&
-                 "Sanity -- findBaseOrBDV should be pure!");
+                 "findBaseOrBDV should be pure!");
 #endif
         }
         Value *Base = BlockToValue[InBB];
@@ -1377,11 +1377,11 @@ static AttributeList legalizeCallAttributes(LLVMContext &Ctx,
     return AL;
 
   // Remove the readonly, readnone, and statepoint function attributes.
-  AttrBuilder FnAttrs = AL.getFnAttributes();
+  AttrBuilder FnAttrs = AL.getFnAttrs();
   for (auto Attr : FnAttrsToStrip)
     FnAttrs.removeAttribute(Attr);
 
-  for (Attribute A : AL.getFnAttributes()) {
+  for (Attribute A : AL.getFnAttrs()) {
     if (isStatepointDirectiveAttr(A))
       FnAttrs.remove(A);
   }
@@ -1533,9 +1533,8 @@ static StringRef getDeoptLowering(CallBase *Call) {
     // FIXME: Calls have a *really* confusing interface around attributes
     // with values.
     const AttributeList &CSAS = Call->getAttributes();
-    if (CSAS.hasAttribute(AttributeList::FunctionIndex, DeoptLowering))
-      return CSAS.getAttribute(AttributeList::FunctionIndex, DeoptLowering)
-          .getValueAsString();
+    if (CSAS.hasFnAttr(DeoptLowering))
+      return CSAS.getFnAttr(DeoptLowering).getValueAsString();
     Function *F = Call->getCalledFunction();
     assert(F && F->hasFnAttribute(DeoptLowering));
     return F->getFnAttribute(DeoptLowering).getValueAsString();
@@ -1801,7 +1800,7 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
       CallInst *GCResult = Builder.CreateGCResult(Token, Call->getType(), Name);
       GCResult->setAttributes(
           AttributeList::get(GCResult->getContext(), AttributeList::ReturnIndex,
-                             Call->getAttributes().getRetAttributes()));
+                             Call->getAttributes().getRetAttrs()));
 
       // We cannot RAUW or delete CS.getInstruction() because it could be in the
       // live set of some other safepoint, in which case that safepoint's
@@ -1855,7 +1854,7 @@ makeStatepointExplicit(DominatorTree &DT, CallBase *Call,
 // It receives iterator to the statepoint gc relocates and emits a store to the
 // assigned location (via allocaMap) for the each one of them.  It adds the
 // visited values into the visitedLiveValues set, which we will later use them
-// for sanity checking.
+// for validation checking.
 static void
 insertRelocationStores(iterator_range<Value::user_iterator> GCRelocs,
                        DenseMap<Value *, AllocaInst *> &AllocaMap,
@@ -2454,7 +2453,7 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
                               SmallVectorImpl<CallBase *> &ToUpdate,
                               DefiningValueMapTy &DVCache) {
 #ifndef NDEBUG
-  // sanity check the input
+  // Validate the input
   std::set<CallBase *> Uniqued;
   Uniqued.insert(ToUpdate.begin(), ToUpdate.end());
   assert(Uniqued.size() == ToUpdate.size() && "no duplicates please!");
@@ -2620,9 +2619,9 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
     // we just grab that.
     llvm::append_range(Live, Info.StatepointToken->gc_args());
 #ifndef NDEBUG
-    // Do some basic sanity checks on our liveness results before performing
-    // relocation.  Relocation can and will turn mistakes in liveness results
-    // into non-sensical code which is must harder to debug.
+    // Do some basic validation checking on our liveness results before
+    // performing relocation.  Relocation can and will turn mistakes in liveness
+    // results into non-sensical code which is must harder to debug.
     // TODO: It would be nice to test consistency as well
     assert(DT.isReachableFromEntry(Info.StatepointToken->getParent()) &&
            "statepoint must be reachable or liveness is meaningless");
@@ -2641,7 +2640,7 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
   unique_unsorted(Live);
 
 #ifndef NDEBUG
-  // sanity check
+  // Validation check
   for (auto *Ptr : Live)
     assert(isHandledGCPointerType(Ptr->getType()) &&
            "must be a gc pointer type");
@@ -2656,18 +2655,19 @@ template <typename AttrHolder>
 static void RemoveNonValidAttrAtIndex(LLVMContext &Ctx, AttrHolder &AH,
                                       unsigned Index) {
   AttrBuilder R;
-  if (AH.getDereferenceableBytes(Index))
+  AttributeSet AS = AH.getAttributes().getAttributes(Index);
+  if (AS.getDereferenceableBytes())
     R.addAttribute(Attribute::get(Ctx, Attribute::Dereferenceable,
-                                  AH.getDereferenceableBytes(Index)));
-  if (AH.getDereferenceableOrNullBytes(Index))
+                                  AS.getDereferenceableBytes()));
+  if (AS.getDereferenceableOrNullBytes())
     R.addAttribute(Attribute::get(Ctx, Attribute::DereferenceableOrNull,
-                                  AH.getDereferenceableOrNullBytes(Index)));
+                                  AS.getDereferenceableOrNullBytes()));
   for (auto Attr : ParamAttrsToStrip)
-    if (AH.getAttributes().hasAttribute(Index, Attr))
+    if (AS.hasAttribute(Attr))
       R.addAttribute(Attr);
 
   if (!R.empty())
-    AH.setAttributes(AH.getAttributes().removeAttributes(Ctx, Index, R));
+    AH.setAttributes(AH.getAttributes().removeAttributesAtIndex(Ctx, Index, R));
 }
 
 static void stripNonValidAttributesFromPrototype(Function &F) {
@@ -3016,7 +3016,7 @@ static SetVector<Value *> computeKillSet(BasicBlock *BB) {
 
 #ifndef NDEBUG
 /// Check that the items in 'Live' dominate 'TI'.  This is used as a basic
-/// sanity check for the liveness computation.
+/// validation check for the liveness computation.
 static void checkBasicSSA(DominatorTree &DT, SetVector<Value *> &Live,
                           Instruction *TI, bool TermOkay = false) {
   for (Value *V : Live) {
@@ -3103,7 +3103,7 @@ static void computeLiveInValues(DominatorTree &DT, Function &F,
   } // while (!Worklist.empty())
 
 #ifndef NDEBUG
-  // Sanity check our output against SSA properties.  This helps catch any
+  // Verify our output against SSA properties.  This helps catch any
   // missing kills during the above iteration.
   for (BasicBlock &BB : F)
     checkBasicSSA(DT, Data, BB);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SCCP.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SCCP.cpp
index b09f896d0157..28e00c873361 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SCCP.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -490,17 +490,17 @@ bool llvm::runIPSCCP(
         AttrBuilder AttributesToRemove;
         AttributesToRemove.addAttribute(Attribute::ArgMemOnly);
         AttributesToRemove.addAttribute(Attribute::InaccessibleMemOrArgMemOnly);
-        F.removeAttributes(AttributeList::FunctionIndex, AttributesToRemove);
+        F.removeFnAttrs(AttributesToRemove);
 
         for (User *U : F.users()) {
           auto *CB = dyn_cast<CallBase>(U);
           if (!CB || CB->getCalledFunction() != &F)
             continue;
 
-          CB->removeAttributes(AttributeList::FunctionIndex,
-                               AttributesToRemove);
+          CB->removeFnAttrs(AttributesToRemove);
         }
       }
+      MadeChanges |= ReplacedPointerArg;
     }
 
     SmallPtrSet<Value *, 32> InsertedValues;
@@ -540,14 +540,13 @@ bool llvm::runIPSCCP(
       DTU.deleteBB(DeadBB);
 
     for (BasicBlock &BB : F) {
-      for (BasicBlock::iterator BI = BB.begin(), E = BB.end(); BI != E;) {
-        Instruction *Inst = &*BI++;
-        if (Solver.getPredicateInfoFor(Inst)) {
-          if (auto *II = dyn_cast<IntrinsicInst>(Inst)) {
+      for (Instruction &Inst : llvm::make_early_inc_range(BB)) {
+        if (Solver.getPredicateInfoFor(&Inst)) {
+          if (auto *II = dyn_cast<IntrinsicInst>(&Inst)) {
             if (II->getIntrinsicID() == Intrinsic::ssa_copy) {
               Value *Op = II->getOperand(0);
-              Inst->replaceAllUsesWith(Op);
-              Inst->eraseFromParent();
+              Inst.replaceAllUsesWith(Op);
+              Inst.eraseFromParent();
             }
           }
         }
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp
index fe160d5415bd..31c8999c3724 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -122,7 +122,7 @@ namespace {
 class IRBuilderPrefixedInserter final : public IRBuilderDefaultInserter {
   std::string Prefix;
 
-  const Twine getNameWithPrefix(const Twine &Name) const {
+  Twine getNameWithPrefix(const Twine &Name) const {
     return Name.isTriviallyEmpty() ? Name : Prefix + Name;
   }
 
@@ -1275,8 +1275,7 @@ static void speculatePHINodeLoads(PHINode &PN) {
 
   // Get the AA tags and alignment to use from one of the loads. It does not
   // matter which one we get and if any differ.
-  AAMDNodes AATags;
-  SomeLoad->getAAMetadata(AATags);
+  AAMDNodes AATags = SomeLoad->getAAMetadata();
   Align Alignment = SomeLoad->getAlign();
 
   // Rewrite all loads of the PN to use the new PHI.
@@ -1330,14 +1329,21 @@ static void speculatePHINodeLoads(PHINode &PN) {
 ///   %V = select i1 %cond, i32 %V1, i32 %V2
 ///
 /// We can do this to a select if its only uses are loads and if the operand
-/// to the select can be loaded unconditionally.
+/// to the select can be loaded unconditionally. If found an intervening bitcast
+/// with a single use of the load, allow the promotion.
 static bool isSafeSelectToSpeculate(SelectInst &SI) {
   Value *TValue = SI.getTrueValue();
   Value *FValue = SI.getFalseValue();
   const DataLayout &DL = SI.getModule()->getDataLayout();
 
   for (User *U : SI.users()) {
-    LoadInst *LI = dyn_cast<LoadInst>(U);
+    LoadInst *LI;
+    BitCastInst *BC = dyn_cast<BitCastInst>(U);
+    if (BC && BC->hasOneUse())
+      LI = dyn_cast<LoadInst>(*BC->user_begin());
+    else
+      LI = dyn_cast<LoadInst>(U);
+
     if (!LI || !LI->isSimple())
       return false;
 
@@ -1363,13 +1369,27 @@ static void speculateSelectInstLoads(SelectInst &SI) {
   Value *FV = SI.getFalseValue();
   // Replace the loads of the select with a select of two loads.
   while (!SI.use_empty()) {
-    LoadInst *LI = cast<LoadInst>(SI.user_back());
+    LoadInst *LI;
+    BitCastInst *BC = dyn_cast<BitCastInst>(SI.user_back());
+    if (BC) {
+      assert(BC->hasOneUse() && "Bitcast should have a single use.");
+      LI = cast<LoadInst>(BC->user_back());
+    } else {
+      LI = cast<LoadInst>(SI.user_back());
+    }
+
     assert(LI->isSimple() && "We only speculate simple loads");
 
     IRB.SetInsertPoint(LI);
-    LoadInst *TL = IRB.CreateLoad(LI->getType(), TV,
+    Value *NewTV =
+        BC ? IRB.CreateBitCast(TV, BC->getType(), TV->getName() + ".sroa.cast")
+           : TV;
+    Value *NewFV =
+        BC ? IRB.CreateBitCast(FV, BC->getType(), FV->getName() + ".sroa.cast")
+           : FV;
+    LoadInst *TL = IRB.CreateLoad(LI->getType(), NewTV,
                                   LI->getName() + ".sroa.speculate.load.true");
-    LoadInst *FL = IRB.CreateLoad(LI->getType(), FV,
+    LoadInst *FL = IRB.CreateLoad(LI->getType(), NewFV,
                                   LI->getName() + ".sroa.speculate.load.false");
     NumLoadsSpeculated += 2;
 
@@ -1377,8 +1397,7 @@ static void speculateSelectInstLoads(SelectInst &SI) {
     TL->setAlignment(LI->getAlign());
     FL->setAlignment(LI->getAlign());
 
-    AAMDNodes Tags;
-    LI->getAAMetadata(Tags);
+    AAMDNodes Tags = LI->getAAMetadata();
     if (Tags) {
       TL->setAAMetadata(Tags);
       FL->setAAMetadata(Tags);
@@ -1390,6 +1409,8 @@ static void speculateSelectInstLoads(SelectInst &SI) {
     LLVM_DEBUG(dbgs() << "          speculated to: " << *V << "\n");
     LI->replaceAllUsesWith(V);
     LI->eraseFromParent();
+    if (BC)
+      BC->eraseFromParent();
   }
   SI.eraseFromParent();
 }
@@ -1462,76 +1483,6 @@ static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &DL,
   return buildGEP(IRB, BasePtr, Indices, NamePrefix);
 }
 
-/// Recursively compute indices for a natural GEP.
-///
-/// This is the recursive step for getNaturalGEPWithOffset that walks down the
-/// element types adding appropriate indices for the GEP.
-static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL,
-                                       Value *Ptr, Type *Ty, APInt &Offset,
-                                       Type *TargetTy,
-                                       SmallVectorImpl<Value *> &Indices,
-                                       const Twine &NamePrefix) {
-  if (Offset == 0)
-    return getNaturalGEPWithType(IRB, DL, Ptr, Ty, TargetTy, Indices,
-                                 NamePrefix);
-
-  // We can't recurse through pointer types.
-  if (Ty->isPointerTy())
-    return nullptr;
-
-  // We try to analyze GEPs over vectors here, but note that these GEPs are
-  // extremely poorly defined currently. The long-term goal is to remove GEPing
-  // over a vector from the IR completely.
-  if (VectorType *VecTy = dyn_cast<VectorType>(Ty)) {
-    unsigned ElementSizeInBits =
-        DL.getTypeSizeInBits(VecTy->getScalarType()).getFixedSize();
-    if (ElementSizeInBits % 8 != 0) {
-      // GEPs over non-multiple of 8 size vector elements are invalid.
-      return nullptr;
-    }
-    APInt ElementSize(Offset.getBitWidth(), ElementSizeInBits / 8);
-    APInt NumSkippedElements = Offset.sdiv(ElementSize);
-    if (NumSkippedElements.ugt(cast<FixedVectorType>(VecTy)->getNumElements()))
-      return nullptr;
-    Offset -= NumSkippedElements * ElementSize;
-    Indices.push_back(IRB.getInt(NumSkippedElements));
-    return getNaturalGEPRecursively(IRB, DL, Ptr, VecTy->getElementType(),
-                                    Offset, TargetTy, Indices, NamePrefix);
-  }
-
-  if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
-    Type *ElementTy = ArrTy->getElementType();
-    APInt ElementSize(Offset.getBitWidth(),
-                      DL.getTypeAllocSize(ElementTy).getFixedSize());
-    APInt NumSkippedElements = Offset.sdiv(ElementSize);
-    if (NumSkippedElements.ugt(ArrTy->getNumElements()))
-      return nullptr;
-
-    Offset -= NumSkippedElements * ElementSize;
-    Indices.push_back(IRB.getInt(NumSkippedElements));
-    return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy,
-                                    Indices, NamePrefix);
-  }
-
-  StructType *STy = dyn_cast<StructType>(Ty);
-  if (!STy)
-    return nullptr;
-
-  const StructLayout *SL = DL.getStructLayout(STy);
-  uint64_t StructOffset = Offset.getZExtValue();
-  if (StructOffset >= SL->getSizeInBytes())
-    return nullptr;
-  unsigned Index = SL->getElementContainingOffset(StructOffset);
-  Offset -= APInt(Offset.getBitWidth(), SL->getElementOffset(Index));
-  Type *ElementTy = STy->getElementType(Index);
-  if (Offset.uge(DL.getTypeAllocSize(ElementTy).getFixedSize()))
-    return nullptr; // The offset points into alignment padding.
-
-  Indices.push_back(IRB.getInt32(Index));
-  return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy,
-                                  Indices, NamePrefix);
-}
-
 /// Get a natural GEP from a base pointer to a particular offset and
 /// resulting in a particular type.
 ///
@@ -1556,18 +1507,15 @@ static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL,
   Type *ElementTy = Ty->getElementType();
   if (!ElementTy->isSized())
     return nullptr; // We can't GEP through an unsized element.
-  if (isa<ScalableVectorType>(ElementTy))
+
+  SmallVector<APInt> IntIndices = DL.getGEPIndicesForOffset(ElementTy, Offset);
+  if (Offset != 0)
     return nullptr;
-  APInt ElementSize(Offset.getBitWidth(),
-                    DL.getTypeAllocSize(ElementTy).getFixedSize());
-  if (ElementSize == 0)
-    return nullptr; // Zero-length arrays can't help us build a natural GEP.
-  APInt NumSkippedElements = Offset.sdiv(ElementSize);
-
-  Offset -= NumSkippedElements * ElementSize;
-  Indices.push_back(IRB.getInt(NumSkippedElements));
-  return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy,
-                                  Indices, NamePrefix);
+
+  for (const APInt &Index : IntIndices)
+    Indices.push_back(IRB.getInt(Index));
+  return getNaturalGEPWithType(IRB, DL, Ptr, ElementTy, TargetTy, Indices,
+                               NamePrefix);
 }
 
 /// Compute an adjusted pointer from Ptr by Offset bytes where the
@@ -1588,6 +1536,15 @@ static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL,
 static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
                              APInt Offset, Type *PointerTy,
                              const Twine &NamePrefix) {
+  // Create i8 GEP for opaque pointers.
+  if (Ptr->getType()->isOpaquePointerTy()) {
+    if (Offset != 0)
+      Ptr = IRB.CreateInBoundsGEP(IRB.getInt8Ty(), Ptr, IRB.getInt(Offset),
+                                  NamePrefix + "sroa_idx");
+    return IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr, PointerTy,
+                                                   NamePrefix + "sroa_cast");
+  }
+
   // Even though we don't look through PHI nodes, we could be called on an
   // instruction in an unreachable block, which may be on a cycle.
   SmallPtrSet<Value *, 4> Visited;
@@ -1851,13 +1808,13 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
   } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
     if (!II->isLifetimeStartOrEnd() && !II->isDroppable())
       return false;
-  } else if (U->get()->getType()->getPointerElementType()->isStructTy()) {
-    // Disable vector promotion when there are loads or stores of an FCA.
-    return false;
   } else if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
     if (LI->isVolatile())
       return false;
     Type *LTy = LI->getType();
+    // Disable vector promotion when there are loads or stores of an FCA.
+    if (LTy->isStructTy())
+      return false;
     if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {
       assert(LTy->isIntegerTy());
       LTy = SplitIntTy;
@@ -1868,6 +1825,9 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
     if (SI->isVolatile())
       return false;
     Type *STy = SI->getValueOperand()->getType();
+    // Disable vector promotion when there are loads or stores of an FCA.
+    if (STy->isStructTy())
+      return false;
     if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {
       assert(STy->isIntegerTy());
       STy = SplitIntTy;
@@ -2282,7 +2242,7 @@ class llvm::sroa::AllocaSliceRewriter
 
   const DataLayout &DL;
   AllocaSlices &AS;
-  SROA &Pass;
+  SROAPass &Pass;
   AllocaInst &OldAI, &NewAI;
   const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
   Type *NewAllocaTy;
@@ -2330,7 +2290,7 @@ class llvm::sroa::AllocaSliceRewriter
   IRBuilderTy IRB;
 
 public:
-  AllocaSliceRewriter(const DataLayout &DL, AllocaSlices &AS, SROA &Pass,
+  AllocaSliceRewriter(const DataLayout &DL, AllocaSlices &AS, SROAPass &Pass,
                       AllocaInst &OldAI, AllocaInst &NewAI,
                       uint64_t NewAllocaBeginOffset,
                       uint64_t NewAllocaEndOffset, bool IsIntegerPromotable,
@@ -2510,8 +2470,7 @@ private:
     Value *OldOp = LI.getOperand(0);
     assert(OldOp == OldPtr);
 
-    AAMDNodes AATags;
-    LI.getAAMetadata(AATags);
+    AAMDNodes AATags = LI.getAAMetadata();
 
     unsigned AS = LI.getPointerAddressSpace();
 
@@ -2675,9 +2634,7 @@ private:
     Value *OldOp = SI.getOperand(1);
     assert(OldOp == OldPtr);
 
-    AAMDNodes AATags;
-    SI.getAAMetadata(AATags);
-
+    AAMDNodes AATags = SI.getAAMetadata();
     Value *V = SI.getValueOperand();
 
     // Strip all inbounds GEPs and pointer casts to try to dig out any root
@@ -2743,7 +2700,9 @@ private:
     deleteIfTriviallyDead(OldOp);
 
     LLVM_DEBUG(dbgs() << "          to: " << *NewSI << "\n");
-    return NewSI->getPointerOperand() == &NewAI && !SI.isVolatile();
+    return NewSI->getPointerOperand() == &NewAI &&
+           NewSI->getValueOperand()->getType() == NewAllocaTy &&
+           !SI.isVolatile();
   }
 
   /// Compute an integer value from splatting an i8 across the given
@@ -2784,8 +2743,7 @@ private:
     LLVM_DEBUG(dbgs() << "    original: " << II << "\n");
     assert(II.getRawDest() == OldPtr);
 
-    AAMDNodes AATags;
-    II.getAAMetadata(AATags);
+    AAMDNodes AATags = II.getAAMetadata();
 
     // If the memset has a variable size, it cannot be split, just adjust the
     // pointer to the new alloca.
@@ -2913,8 +2871,7 @@ private:
 
     LLVM_DEBUG(dbgs() << "    original: " << II << "\n");
 
-    AAMDNodes AATags;
-    II.getAAMetadata(AATags);
+    AAMDNodes AATags = II.getAAMetadata();
 
     bool IsDest = &II.getRawDestUse() == OldUse;
     assert((IsDest && II.getRawDest() == OldPtr) ||
@@ -3421,9 +3378,7 @@ private:
 
     // We have an aggregate being loaded, split it apart.
     LLVM_DEBUG(dbgs() << "    original: " << LI << "\n");
-    AAMDNodes AATags;
-    LI.getAAMetadata(AATags);
-    LoadOpSplitter Splitter(&LI, *U, LI.getType(), AATags,
+    LoadOpSplitter Splitter(&LI, *U, LI.getType(), LI.getAAMetadata(),
                             getAdjustedAlignment(&LI, 0), DL);
     Value *V = UndefValue::get(LI.getType());
     Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca");
@@ -3474,9 +3429,7 @@ private:
 
     // We have an aggregate being stored, split it apart.
     LLVM_DEBUG(dbgs() << "    original: " << SI << "\n");
-    AAMDNodes AATags;
-    SI.getAAMetadata(AATags);
-    StoreOpSplitter Splitter(&SI, *U, V->getType(), AATags,
+    StoreOpSplitter Splitter(&SI, *U, V->getType(), SI.getAAMetadata(),
                              getAdjustedAlignment(&SI, 0), DL);
     Splitter.emitSplitOps(V->getType(), V, V->getName() + ".fca");
     Visited.erase(&SI);
@@ -3802,7 +3755,7 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset,
 /// there all along.
 ///
 /// \returns true if any changes are made.
-bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
+bool SROAPass::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
   LLVM_DEBUG(dbgs() << "Pre-splitting loads and stores\n");
 
   // Track the loads and stores which are candidates for pre-splitting here, in
@@ -4282,8 +4235,8 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
 /// appropriate new offsets. It also evaluates how successful the rewrite was
 /// at enabling promotion and if it was successful queues the alloca to be
 /// promoted.
-AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
-                                   Partition &P) {
+AllocaInst *SROAPass::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
+                                       Partition &P) {
   // Try to compute a friendly type for this partition of the alloca. This
   // won't always succeed, in which case we fall back to a legal integer type
   // or an i8 array of an appropriate size.
@@ -4434,7 +4387,7 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
 
 /// Walks the slices of an alloca and form partitions based on them,
 /// rewriting each of their uses.
-bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
+bool SROAPass::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
   if (AS.begin() == AS.end())
     return false;
 
@@ -4605,7 +4558,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
 }
 
 /// Clobber a use with undef, deleting the used value if it becomes dead.
-void SROA::clobberUse(Use &U) {
+void SROAPass::clobberUse(Use &U) {
   Value *OldV = U;
   // Replace the use with an undef value.
   U = UndefValue::get(OldV->getType());
@@ -4624,7 +4577,7 @@ void SROA::clobberUse(Use &U) {
 /// This analyzes the alloca to ensure we can reason about it, builds
 /// the slices of the alloca, and then hands it off to be split and
 /// rewritten as needed.
-bool SROA::runOnAlloca(AllocaInst &AI) {
+bool SROAPass::runOnAlloca(AllocaInst &AI) {
   LLVM_DEBUG(dbgs() << "SROA alloca: " << AI << "\n");
   ++NumAllocasAnalyzed;
 
@@ -4698,7 +4651,7 @@ bool SROA::runOnAlloca(AllocaInst &AI) {
 ///
 /// We also record the alloca instructions deleted here so that they aren't
 /// subsequently handed to mem2reg to promote.
-bool SROA::deleteDeadInstructions(
+bool SROAPass::deleteDeadInstructions(
     SmallPtrSetImpl<AllocaInst *> &DeletedAllocas) {
   bool Changed = false;
   while (!DeadInsts.empty()) {
@@ -4737,7 +4690,7 @@ bool SROA::deleteDeadInstructions(
 /// This attempts to promote whatever allocas have been identified as viable in
 /// the PromotableAllocas list. If that list is empty, there is nothing to do.
 /// This function returns whether any promotion occurred.
-bool SROA::promoteAllocas(Function &F) {
+bool SROAPass::promoteAllocas(Function &F) {
   if (PromotableAllocas.empty())
     return false;
 
@@ -4749,8 +4702,8 @@ bool SROA::promoteAllocas(Function &F) {
   return true;
 }
 
-PreservedAnalyses SROA::runImpl(Function &F, DominatorTree &RunDT,
-                                AssumptionCache &RunAC) {
+PreservedAnalyses SROAPass::runImpl(Function &F, DominatorTree &RunDT,
+                                    AssumptionCache &RunAC) {
   LLVM_DEBUG(dbgs() << "SROA function: " << F.getName() << "\n");
   C = &F.getContext();
   DT = &RunDT;
@@ -4804,7 +4757,7 @@ PreservedAnalyses SROA::runImpl(Function &F, DominatorTree &RunDT,
   return PA;
 }
 
-PreservedAnalyses SROA::run(Function &F, FunctionAnalysisManager &AM) {
+PreservedAnalyses SROAPass::run(Function &F, FunctionAnalysisManager &AM) {
   return runImpl(F, AM.getResult<DominatorTreeAnalysis>(F),
                  AM.getResult<AssumptionAnalysis>(F));
 }
@@ -4815,7 +4768,7 @@ PreservedAnalyses SROA::run(Function &F, FunctionAnalysisManager &AM) {
 /// SROA pass.
 class llvm::sroa::SROALegacyPass : public FunctionPass {
   /// The SROA implementation.
-  SROA Impl;
+  SROAPass Impl;
 
 public:
   static char ID;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
index ca288a533f46..1284bae820a4 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
@@ -873,13 +873,11 @@ static bool runImpl(Function &F, const TargetTransformInfo &TTI,
   auto &DL = F.getParent()->getDataLayout();
   while (MadeChange) {
     MadeChange = false;
-    for (Function::iterator I = F.begin(); I != F.end();) {
-      BasicBlock *BB = &*I++;
+    for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
       bool ModifiedDTOnIteration = false;
-      MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration, TTI, DL,
+      MadeChange |= optimizeBlock(BB, ModifiedDTOnIteration, TTI, DL,
                                   DTU.hasValue() ? DTU.getPointer() : nullptr);
 
-
       // Restart BB iteration if the dominator tree of the Function was changed
       if (ModifiedDTOnIteration)
         break;
@@ -933,7 +931,7 @@ static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
   if (II) {
     // The scalarization code below does not work for scalable vectors.
     if (isa<ScalableVectorType>(II->getType()) ||
-        any_of(II->arg_operands(),
+        any_of(II->args(),
                [](Value *V) { return isa<ScalableVectorType>(V->getType()); }))
       return false;
 
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 8ef6b69673be..6b7419abe1d1 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -66,6 +66,15 @@ static cl::opt<bool>
 
 namespace {
 
+BasicBlock::iterator skipPastPhiNodesAndDbg(BasicBlock::iterator Itr) {
+  BasicBlock *BB = Itr->getParent();
+  if (isa<PHINode>(Itr))
+    Itr = BB->getFirstInsertionPt();
+  if (Itr != BB->end())
+    Itr = skipDebugIntrinsics(Itr);
+  return Itr;
+}
+
 // Used to store the scattered form of a vector.
 using ValueVector = SmallVector<Value *, 8>;
 
@@ -371,10 +380,11 @@ Scatterer ScalarizerVisitor::scatter(Instruction *Point, Value *V) {
       return Scatterer(Point->getParent(), Point->getIterator(),
                        UndefValue::get(V->getType()));
     // Put the scattered form of an instruction directly after the
-    // instruction.
+    // instruction, skipping over PHI nodes and debug intrinsics.
     BasicBlock *BB = VOp->getParent();
-    return Scatterer(BB, std::next(BasicBlock::iterator(VOp)),
-                     V, &Scattered[V]);
+    return Scatterer(
+        BB, skipPastPhiNodesAndDbg(std::next(BasicBlock::iterator(VOp))), V,
+        &Scattered[V]);
   }
   // In the fallback case, just put the scattered before Point and
   // keep the result local to Point.
@@ -530,7 +540,7 @@ bool ScalarizerVisitor::splitCall(CallInst &CI) {
     return false;
 
   unsigned NumElems = cast<FixedVectorType>(VT)->getNumElements();
-  unsigned NumArgs = CI.getNumArgOperands();
+  unsigned NumArgs = CI.arg_size();
 
   ValueVector ScalarOperands(NumArgs);
   SmallVector<Scatterer, 8> Scattered(NumArgs);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index f216956406b6..ffa2f9adb978 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -1164,8 +1164,11 @@ bool SeparateConstOffsetFromGEP::run(Function &F) {
   DL = &F.getParent()->getDataLayout();
   bool Changed = false;
   for (BasicBlock &B : F) {
-    for (BasicBlock::iterator I = B.begin(), IE = B.end(); I != IE;)
-      if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I++))
+    if (!DT->isReachableFromEntry(&B))
+      continue;
+
+    for (Instruction &I : llvm::make_early_inc_range(B))
+      if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I))
         Changed |= splitGEP(GEP);
     // No need to split GEP ConstantExprs because all its indices are constant
     // already.
@@ -1258,10 +1261,8 @@ bool SeparateConstOffsetFromGEP::reuniteExts(Function &F) {
   DominatingSubs.clear();
   for (const auto Node : depth_first(DT)) {
     BasicBlock *BB = Node->getBlock();
-    for (auto I = BB->begin(); I != BB->end(); ) {
-      Instruction *Cur = &*I++;
-      Changed |= reuniteExts(Cur);
-    }
+    for (Instruction &I : llvm::make_early_inc_range(*BB))
+      Changed |= reuniteExts(&I);
   }
   return Changed;
 }
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index b1c105258027..a27da047bfd3 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -28,6 +28,7 @@
 #include "llvm/Analysis/MemorySSAUpdater.h"
 #include "llvm/Analysis/MustExecute.h"
 #include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
@@ -49,7 +50,6 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/GenericDomTree.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/Local.h"
@@ -81,6 +81,7 @@ static cl::opt<bool> EnableNonTrivialUnswitch(
 
 static cl::opt<int>
     UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden,
+                      cl::ZeroOrMore,
                       cl::desc("The cost threshold for unswitching a loop."));
 
 static cl::opt<bool> EnableUnswitchCostMultiplier(
@@ -108,6 +109,10 @@ static cl::opt<unsigned>
                   cl::desc("Max number of memory uses to explore during "
                            "partial unswitching analysis"),
                   cl::init(100), cl::Hidden);
+static cl::opt<bool> FreezeLoopUnswitchCond(
+    "freeze-loop-unswitch-cond", cl::init(false), cl::Hidden,
+    cl::desc("If enabled, the freeze instruction will be added to condition "
+             "of loop unswitch to prevent miscompilation."));
 
 /// Collect all of the loop invariant input values transitively used by the
 /// homogeneous instruction graph from a given root.
@@ -195,15 +200,15 @@ static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB,
 /// Copy a set of loop invariant values \p ToDuplicate and insert them at the
 /// end of \p BB and conditionally branch on the copied condition. We only
 /// branch on a single value.
-static void buildPartialUnswitchConditionalBranch(BasicBlock &BB,
-                                                  ArrayRef<Value *> Invariants,
-                                                  bool Direction,
-                                                  BasicBlock &UnswitchedSucc,
-                                                  BasicBlock &NormalSucc) {
+static void buildPartialUnswitchConditionalBranch(
+    BasicBlock &BB, ArrayRef<Value *> Invariants, bool Direction,
+    BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze) {
   IRBuilder<> IRB(&BB);
 
   Value *Cond = Direction ? IRB.CreateOr(Invariants) :
     IRB.CreateAnd(Invariants);
+  if (InsertFreeze)
+    Cond = IRB.CreateFreeze(Cond, Cond->getName() + ".fr");
   IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
                    Direction ? &NormalSucc : &UnswitchedSucc);
 }
@@ -564,7 +569,7 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
              "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"
              " condition!");
     buildPartialUnswitchConditionalBranch(*OldPH, Invariants, ExitDirection,
-                                          *UnswitchedBB, *NewPH);
+                                          *UnswitchedBB, *NewPH, false);
   }
 
   // Update the dominator tree with the added edge.
@@ -2123,6 +2128,13 @@ static void unswitchNontrivialInvariants(
       SE->forgetTopmostLoop(&L);
   }
 
+  bool InsertFreeze = false;
+  if (FreezeLoopUnswitchCond) {
+    ICFLoopSafetyInfo SafetyInfo;
+    SafetyInfo.computeLoopSafetyInfo(&L);
+    InsertFreeze = !SafetyInfo.isGuaranteedToExecute(TI, &DT, &L);
+  }
+
   // If the edge from this terminator to a successor dominates that successor,
   // store a map from each block in its dominator subtree to it. This lets us
   // tell when cloning for a particular successor if a block is dominated by
@@ -2197,6 +2209,11 @@ static void unswitchNontrivialInvariants(
       BasicBlock *ClonedPH = ClonedPHs.begin()->second;
       BI->setSuccessor(ClonedSucc, ClonedPH);
       BI->setSuccessor(1 - ClonedSucc, LoopPH);
+      if (InsertFreeze) {
+        auto Cond = BI->getCondition();
+        if (!isGuaranteedNotToBeUndefOrPoison(Cond, &AC, BI, &DT))
+          BI->setCondition(new FreezeInst(Cond, Cond->getName() + ".fr", BI));
+      }
       DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
     } else {
       assert(SI && "Must either be a branch or switch!");
@@ -2211,6 +2228,11 @@ static void unswitchNontrivialInvariants(
         else
           Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second);
 
+      if (InsertFreeze) {
+        auto Cond = SI->getCondition();
+        if (!isGuaranteedNotToBeUndefOrPoison(Cond, &AC, SI, &DT))
+          SI->setCondition(new FreezeInst(Cond, Cond->getName() + ".fr", SI));
+      }
       // We need to use the set to populate domtree updates as even when there
       // are multiple cases pointing at the same successor we only want to
       // remove and insert one edge in the domtree.
@@ -2291,7 +2313,7 @@ static void unswitchNontrivialInvariants(
           *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU);
     else
       buildPartialUnswitchConditionalBranch(*SplitBB, Invariants, Direction,
-                                            *ClonedPH, *LoopPH);
+                                            *ClonedPH, *LoopPH, InsertFreeze);
     DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
 
     if (MSSAU) {
@@ -2370,7 +2392,9 @@ static void unswitchNontrivialInvariants(
     ConstantInt *ContinueReplacement =
         Direction ? ConstantInt::getFalse(BI->getContext())
                   : ConstantInt::getTrue(BI->getContext());
-    for (Value *Invariant : Invariants)
+    for (Value *Invariant : Invariants) {
+      assert(!isa<Constant>(Invariant) &&
+             "Should not be replacing constant values!");
       // Use make_early_inc_range here as set invalidates the iterator.
       for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
         Instruction *UserI = dyn_cast<Instruction>(U.getUser());
@@ -2385,6 +2409,7 @@ static void unswitchNontrivialInvariants(
                  DT.dominates(ClonedPH, UserI->getParent()))
           U.set(UnswitchedReplacement);
       }
+    }
   }
 
   // We can change which blocks are exit blocks of all the cloned sibling
@@ -2727,6 +2752,9 @@ static bool unswitchBestCondition(
       Cond = CondNext;
     BI->setCondition(Cond);
 
+    if (isa<Constant>(Cond))
+      continue;
+
     if (L.isLoopInvariant(BI->getCondition())) {
       UnswitchCandidates.push_back({BI, {BI->getCondition()}});
       continue;
@@ -3121,6 +3149,17 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
   return PA;
 }
 
+void SimpleLoopUnswitchPass::printPipeline(
+    raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+  static_cast<PassInfoMixin<SimpleLoopUnswitchPass> *>(this)->printPipeline(
+      OS, MapClassName2PassName);
+
+  OS << "<";
+  OS << (NonTrivial ? "" : "no-") << "nontrivial;";
+  OS << (Trivial ? "" : "no-") << "trivial";
+  OS << ">";
+}
+
 namespace {
 
 class SimpleLoopUnswitchLegacyPass : public LoopPass {
@@ -3140,10 +3179,8 @@ public:
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<AssumptionCacheTracker>();
     AU.addRequired<TargetTransformInfoWrapperPass>();
-    if (EnableMSSALoopDependency) {
-      AU.addRequired<MemorySSAWrapperPass>();
-      AU.addPreserved<MemorySSAWrapperPass>();
-    }
+    AU.addRequired<MemorySSAWrapperPass>();
+    AU.addPreserved<MemorySSAWrapperPass>();
     getLoopAnalysisUsage(AU);
   }
 };
@@ -3164,12 +3201,8 @@ bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
   auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
   auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
   auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
-  MemorySSA *MSSA = nullptr;
-  Optional<MemorySSAUpdater> MSSAU;
-  if (EnableMSSALoopDependency) {
-    MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
-    MSSAU = MemorySSAUpdater(MSSA);
-  }
+  MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
+  MemorySSAUpdater MSSAU(MSSA);
 
   auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
   auto *SE = SEWP ? &SEWP->getSE() : nullptr;
@@ -3197,15 +3230,13 @@ bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
     LPM.markLoopAsDeleted(L);
   };
 
-  if (MSSA && VerifyMemorySSA)
+  if (VerifyMemorySSA)
     MSSA->verifyMemorySSA();
 
-  bool Changed =
-      unswitchLoop(*L, DT, LI, AC, AA, TTI, true, NonTrivial, UnswitchCB, SE,
-                   MSSAU.hasValue() ? MSSAU.getPointer() : nullptr,
-                   DestroyLoopCB);
+  bool Changed = unswitchLoop(*L, DT, LI, AC, AA, TTI, true, NonTrivial,
+                              UnswitchCB, SE, &MSSAU, DestroyLoopCB);
 
-  if (MSSA && VerifyMemorySSA)
+  if (VerifyMemorySSA)
     MSSA->verifyMemorySSA();
 
   // Historically this pass has had issues with the dominator tree so verify it
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 09d59b0e884a..86d3620c312e 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -224,7 +224,11 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
   SmallVector<WeakVH, 16> LoopHeaders(UniqueLoopHeaders.begin(),
                                       UniqueLoopHeaders.end());
 
+  unsigned IterCnt = 0;
+  (void)IterCnt;
   while (LocalChange) {
+    assert(IterCnt++ < 1000 &&
+           "Sanity: iterative simplification didn't converge!");
     LocalChange = false;
 
     // Loop over all of the basic blocks and remove them if they are unneeded.
@@ -319,6 +323,21 @@ SimplifyCFGPass::SimplifyCFGPass(const SimplifyCFGOptions &Opts)
   applyCommandLineOverridesToOptions(Options);
 }
 
+void SimplifyCFGPass::printPipeline(
+    raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+  static_cast<PassInfoMixin<SimplifyCFGPass> *>(this)->printPipeline(
+      OS, MapClassName2PassName);
+  OS << "<";
+  OS << "bonus-inst-threshold=" << Options.BonusInstThreshold << ";";
+  OS << (Options.ForwardSwitchCondToPhi ? "" : "no-") << "forward-switch-cond;";
+  OS << (Options.ConvertSwitchToLookupTable ? "" : "no-")
+     << "switch-to-lookup;";
+  OS << (Options.NeedCanonicalLoop ? "" : "no-") << "keep-loops;";
+  OS << (Options.HoistCommonInsts ? "" : "no-") << "hoist-common-insts;";
+  OS << (Options.SinkCommonInsts ? "" : "no-") << "sink-common-insts";
+  OS << ">";
+}
+
 PreservedAnalyses SimplifyCFGPass::run(Function &F,
                                        FunctionAnalysisManager &AM) {
   auto &TTI = AM.getResult<TargetIRAnalysis>(F);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
index dfa30418ea01..06169a7834f6 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
@@ -268,7 +268,7 @@ bool SpeculativeExecutionPass::considerHoistingFromTo(
     if (const auto *DVI = dyn_cast<DbgVariableIntrinsic>(U)) {
       return all_of(DVI->location_ops(), [&NotHoisted](Value *V) {
         if (const auto *I = dyn_cast_or_null<Instruction>(V)) {
-          if (NotHoisted.count(I) == 0)
+          if (!NotHoisted.contains(I))
             return true;
         }
         return false;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
index 20b8b982e14b..b47378808216 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
@@ -607,7 +607,7 @@ Value *StraightLineStrengthReduce::emitBump(const Candidate &Basis,
   if (IndexOffset == 1)
     return C.Stride;
   // Common case 2: if (i' - i) is -1, Bump = -S.
-  if (IndexOffset.isAllOnesValue())
+  if (IndexOffset.isAllOnes())
     return Builder.CreateNeg(C.Stride);
 
   // Otherwise, Bump = (i' - i) * sext/trunc(S). Note that (i' - i) and S may
@@ -620,7 +620,7 @@ Value *StraightLineStrengthReduce::emitBump(const Candidate &Basis,
     ConstantInt *Exponent = ConstantInt::get(DeltaType, IndexOffset.logBase2());
     return Builder.CreateShl(ExtendedStride, Exponent);
   }
-  if ((-IndexOffset).isPowerOf2()) {
+  if (IndexOffset.isNegatedPowerOf2()) {
     // If (i - i') is a power of 2, Bump = -sext/trunc(S) << log(i' - i).
     ConstantInt *Exponent =
         ConstantInt::get(DeltaType, (-IndexOffset).logBase2());
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 846a9321f53e..3bcf92e28a21 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -262,7 +262,7 @@ static bool markTails(Function &F, OptimizationRemarkEmitter *ORE) {
         // Note that this runs whether we know an alloca has escaped or not. If
         // it has, then we can't trust Tracker.AllocaUsers to be accurate.
         bool SafeToTail = true;
-        for (auto &Arg : CI->arg_operands()) {
+        for (auto &Arg : CI->args()) {
           if (isa<Constant>(Arg.getUser()))
             continue;
           if (Argument *A = dyn_cast<Argument>(Arg.getUser()))
@@ -584,8 +584,8 @@ void TailRecursionEliminator::insertAccumulator(Instruction *AccRecInstr) {
 // call instruction into the newly created temporarily variable.
 void TailRecursionEliminator::copyByValueOperandIntoLocalTemp(CallInst *CI,
                                                               int OpndIdx) {
-  PointerType *ArgTy = cast<PointerType>(CI->getArgOperand(OpndIdx)->getType());
-  Type *AggTy = ArgTy->getElementType();
+  Type *AggTy = CI->getParamByValType(OpndIdx);
+  assert(AggTy);
   const DataLayout &DL = F.getParent()->getDataLayout();
 
   // Get alignment of byVal operand.
@@ -611,8 +611,8 @@ void TailRecursionEliminator::copyByValueOperandIntoLocalTemp(CallInst *CI,
 // into the corresponding function argument location.
 void TailRecursionEliminator::copyLocalTempOfByValueOperandIntoArguments(
     CallInst *CI, int OpndIdx) {
-  PointerType *ArgTy = cast<PointerType>(CI->getArgOperand(OpndIdx)->getType());
-  Type *AggTy = ArgTy->getElementType();
+  Type *AggTy = CI->getParamByValType(OpndIdx);
+  assert(AggTy);
   const DataLayout &DL = F.getParent()->getDataLayout();
 
   // Get alignment of byVal operand.
@@ -667,7 +667,7 @@ bool TailRecursionEliminator::eliminateCall(CallInst *CI) {
     createTailRecurseLoopHeader(CI);
 
   // Copy values of ByVal operands into local temporarily variables.
-  for (unsigned I = 0, E = CI->getNumArgOperands(); I != E; ++I) {
+  for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
     if (CI->isByValArgument(I))
       copyByValueOperandIntoLocalTemp(CI, I);
   }
@@ -675,7 +675,7 @@ bool TailRecursionEliminator::eliminateCall(CallInst *CI) {
   // Ok, now that we know we have a pseudo-entry block WITH all of the
   // required PHI nodes, add entries into the PHI node for the actual
   // parameters passed into the tail-recursive call.
-  for (unsigned I = 0, E = CI->getNumArgOperands(); I != E; ++I) {
+  for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
     if (CI->isByValArgument(I)) {
       copyLocalTempOfByValueOperandIntoArguments(CI, I);
       ArgumentPHIs[I]->addIncoming(F.getArg(I), BB);