9 files changed, 56 insertions, 579 deletions
diff --git a/lib/Target/AArch64/AArch64.h b/lib/Target/AArch64/AArch64.h
index b44b13e36e15..3e0e3978b90b 100644
--- a/lib/Target/AArch64/AArch64.h
+++ b/lib/Target/AArch64/AArch64.h
@@ -41,7 +41,6 @@ FunctionPass *createAArch64LoadStoreOptimizationPass();
 FunctionPass *createAArch64VectorByElementOptPass();
 ModulePass *createAArch64PromoteConstantPass();
 FunctionPass *createAArch64ConditionOptimizerPass();
-FunctionPass *createAArch64AddressTypePromotionPass();
 FunctionPass *createAArch64A57FPLoadBalancing();
 FunctionPass *createAArch64A53Fix835769();
 
@@ -54,7 +53,6 @@ createAArch64InstructionSelector(const AArch64TargetMachine &,
 
 void initializeAArch64A53Fix835769Pass(PassRegistry&);
 void initializeAArch64A57FPLoadBalancingPass(PassRegistry&);
-void initializeAArch64AddressTypePromotionPass(PassRegistry&);
 void initializeAArch64AdvSIMDScalarPass(PassRegistry&);
 void initializeAArch64CollectLOHPass(PassRegistry&);
 void initializeAArch64ConditionalComparesPass(PassRegistry&);
diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td
index 519ca2894683..73f2b6a25f66 100644
--- a/lib/Target/AArch64/AArch64.td
+++ b/lib/Target/AArch64/AArch64.td
@@ -358,7 +358,6 @@ def ProcThunderXT83 : SubtargetFeature<"thunderxt83", "ARMProcFamily",
                                        FeatureNEON]>;
 
 def : ProcessorModel<"generic", NoSchedModel, [
-                     FeatureCRC,
                      FeatureFPARMv8,
                      FeatureNEON,
                      FeaturePerfMon,
diff --git a/lib/Target/AArch64/AArch64AddressTypePromotion.cpp b/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
deleted file mode 100644
index e1b8ee6d03c3..000000000000
--- a/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
+++ /dev/null
@@ -1,493 +0,0 @@
-//===-- AArch64AddressTypePromotion.cpp --- Promote type for addr accesses -==//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass tries to promote the computations use to obtained a sign extended
-// value used into memory accesses.
-// E.g.
-// a = add nsw i32 b, 3
-// d = sext i32 a to i64
-// e = getelementptr ..., i64 d
-//
-// =>
-// f = sext i32 b to i64
-// a = add nsw i64 f, 3
-// e = getelementptr ..., i64 a
-//
-// This is legal to do if the computations are marked with either nsw or nuw
-// markers. Moreover, the current heuristic is simple: it does not create new
-// sext operations, i.e., it gives up when a sext would have forked (e.g., if a
-// = add i32 b, c, two sexts are required to promote the computation).
-//
-// FIXME: This pass may be useful for other targets too.
-// ===---------------------------------------------------------------------===//
-
-#include "AArch64.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Operator.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/Use.h"
-#include "llvm/IR/User.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cassert>
-
-using namespace llvm;
-
-#define DEBUG_TYPE "aarch64-type-promotion"
-
-static cl::opt<bool>
-EnableMerge("aarch64-type-promotion-merge", cl::Hidden,
-            cl::desc("Enable merging of redundant sexts when one is dominating"
-                     " the other."),
-            cl::init(true));
-
-#define AARCH64_TYPE_PROMO_NAME "AArch64 Address Type Promotion"
-
-//===----------------------------------------------------------------------===//
-//                       AArch64AddressTypePromotion
-//===----------------------------------------------------------------------===//
-
-namespace {
-
-class AArch64AddressTypePromotion : public FunctionPass {
-public:
-  static char ID;
-
-  AArch64AddressTypePromotion() : FunctionPass(ID) {
-    initializeAArch64AddressTypePromotionPass(*PassRegistry::getPassRegistry());
-  }
-
-  StringRef getPassName() const override { return AARCH64_TYPE_PROMO_NAME; }
-
-  /// Iterate over the functions and promote the computation of interesting
-  // sext instructions.
-  bool runOnFunction(Function &F) override;
-
-private:
-  /// The current function.
-  Function *Func = nullptr;
-
-  /// Filter out all sexts that does not have this type.
-  /// Currently initialized with Int64Ty.
-  Type *ConsideredSExtType = nullptr;
-
-  // This transformation requires dominator info.
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.setPreservesCFG();
-    AU.addRequired<DominatorTreeWrapperPass>();
-    AU.addPreserved<DominatorTreeWrapperPass>();
-    FunctionPass::getAnalysisUsage(AU);
-  }
-
-  typedef SmallPtrSet<Instruction *, 32> SetOfInstructions;
-  typedef SmallVector<Instruction *, 16> Instructions;
-  typedef DenseMap<Value *, Instructions> ValueToInsts;
-
-  /// Check if it is profitable to move a sext through this instruction.
-  /// Currently, we consider it is profitable if:
-  /// - Inst is used only once (no need to insert truncate).
-  /// - Inst has only one operand that will require a sext operation (we do
-  ///   do not create new sext operation).
-  bool shouldGetThrough(const Instruction *Inst);
-
-  /// Check if it is possible and legal to move a sext through this
-  /// instruction.
-  /// Current heuristic considers that we can get through:
-  /// - Arithmetic operation marked with the nsw or nuw flag.
-  /// - Other sext operation.
-  /// - Truncate operation if it was just dropping sign extended bits.
-  bool canGetThrough(const Instruction *Inst);
-
-  /// Move sext operations through safe to sext instructions.
-  bool propagateSignExtension(Instructions &SExtInsts);
-
-  /// Is this sext should be considered for code motion.
-  /// We look for sext with ConsideredSExtType and uses in at least one
-  // GetElementPtrInst.
-  bool shouldConsiderSExt(const Instruction *SExt) const;
-
-  /// Collect all interesting sext operations, i.e., the ones with the right
-  /// type and used in memory accesses.
-  /// More precisely, a sext instruction is considered as interesting if it
-  /// is used in a "complex" getelementptr or it exits at least another
-  /// sext instruction that sign extended the same initial value.
-  /// A getelementptr is considered as "complex" if it has more than 2
-  // operands.
-  void analyzeSExtension(Instructions &SExtInsts);
-
-  /// Merge redundant sign extension operations in common dominator.
-  void mergeSExts(ValueToInsts &ValToSExtendedUses,
-                  SetOfInstructions &ToRemove);
-};
-
-} // end anonymous namespace
-
-char AArch64AddressTypePromotion::ID = 0;
-
-INITIALIZE_PASS_BEGIN(AArch64AddressTypePromotion, "aarch64-type-promotion",
-                      AARCH64_TYPE_PROMO_NAME, false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(AArch64AddressTypePromotion, "aarch64-type-promotion",
-                    AARCH64_TYPE_PROMO_NAME, false, false)
-
-FunctionPass *llvm::createAArch64AddressTypePromotionPass() {
-  return new AArch64AddressTypePromotion();
-}
-
-bool AArch64AddressTypePromotion::canGetThrough(const Instruction *Inst) {
-  if (isa<SExtInst>(Inst))
-    return true;
-
-  const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst);
-  if (BinOp && isa<OverflowingBinaryOperator>(BinOp) &&
-      (BinOp->hasNoUnsignedWrap() || BinOp->hasNoSignedWrap()))
-    return true;
-
-  // sext(trunc(sext)) --> sext
-  if (isa<TruncInst>(Inst) && isa<SExtInst>(Inst->getOperand(0))) {
-    const Instruction *Opnd = cast<Instruction>(Inst->getOperand(0));
-    // Check that the truncate just drop sign extended bits.
-    if (Inst->getType()->getIntegerBitWidth() >=
-            Opnd->getOperand(0)->getType()->getIntegerBitWidth() &&
-        Inst->getOperand(0)->getType()->getIntegerBitWidth() <=
-            ConsideredSExtType->getIntegerBitWidth())
-      return true;
-  }
-
-  return false;
-}
-
-bool AArch64AddressTypePromotion::shouldGetThrough(const Instruction *Inst) {
-  // If the type of the sext is the same as the considered one, this sext
-  // will become useless.
-  // Otherwise, we will have to do something to preserve the original value,
-  // unless it is used once.
-  if (isa<SExtInst>(Inst) &&
-      (Inst->getType() == ConsideredSExtType || Inst->hasOneUse()))
-    return true;
-
-  // If the Inst is used more that once, we may need to insert truncate
-  // operations and we don't do that at the moment.
-  if (!Inst->hasOneUse())
-    return false;
-
-  // This truncate is used only once, thus if we can get thourgh, it will become
-  // useless.
-  if (isa<TruncInst>(Inst))
-    return true;
-
-  // If both operands are not constant, a new sext will be created here.
-  // Current heuristic is: each step should be profitable.
-  // Therefore we don't allow to increase the number of sext even if it may
-  // be profitable later on.
-  if (isa<BinaryOperator>(Inst) && isa<ConstantInt>(Inst->getOperand(1)))
-    return true;
-
-  return false;
-}
-
-static bool shouldSExtOperand(const Instruction *Inst, int OpIdx) {
-  return !(isa<SelectInst>(Inst) && OpIdx == 0);
-}
-
-bool
-AArch64AddressTypePromotion::shouldConsiderSExt(const Instruction *SExt) const {
-  if (SExt->getType() != ConsideredSExtType)
-    return false;
-
-  for (const User *U : SExt->users()) {
-    if (isa<GetElementPtrInst>(U))
-      return true;
-  }
-
-  return false;
-}
-
-// Input:
-// - SExtInsts contains all the sext instructions that are used directly in
-//   GetElementPtrInst, i.e., access to memory.
-// Algorithm:
-// - For each sext operation in SExtInsts:
-//   Let var be the operand of sext.
-//   while it is profitable (see shouldGetThrough), legal, and safe
-//   (see canGetThrough) to move sext through var's definition:
-//   * promote the type of var's definition.
-//   * fold var into sext uses.
-//   * move sext above var's definition.
-//   * update sext operand to use the operand of var that should be sign
-//     extended (by construction there is only one).
-//
-//   E.g.,
-//   a = ... i32 c, 3
-//   b = sext i32 a to i64 <- is it legal/safe/profitable to get through 'a'
-//   ...
-//   = b
-// => Yes, update the code
-//   b = sext i32 c to i64
-//   a = ... i64 b, 3
-//   ...
-//   = a
-// Iterate on 'c'.
-bool
-AArch64AddressTypePromotion::propagateSignExtension(Instructions &SExtInsts) {
-  DEBUG(dbgs() << "*** Propagate Sign Extension ***\n");
-
-  bool LocalChange = false;
-  SetOfInstructions ToRemove;
-  ValueToInsts ValToSExtendedUses;
-  while (!SExtInsts.empty()) {
-    // Get through simple chain.
-    Instruction *SExt = SExtInsts.pop_back_val();
-
-    DEBUG(dbgs() << "Consider:\n" << *SExt << '\n');
-
-    // If this SExt has already been merged continue.
-    if (SExt->use_empty() && ToRemove.count(SExt)) {
-      DEBUG(dbgs() << "No uses => marked as delete\n");
-      continue;
-    }
-
-    // Now try to get through the chain of definitions.
-    while (auto *Inst = dyn_cast<Instruction>(SExt->getOperand(0))) {
-      DEBUG(dbgs() << "Try to get through:\n" << *Inst << '\n');
-      if (!canGetThrough(Inst) || !shouldGetThrough(Inst)) {
-        // We cannot get through something that is not an Instruction
-        // or not safe to SExt.
-        DEBUG(dbgs() << "Cannot get through\n");
-        break;
-      }
-
-      LocalChange = true;
-      // If this is a sign extend, it becomes useless.
-      if (isa<SExtInst>(Inst) || isa<TruncInst>(Inst)) {
-        DEBUG(dbgs() << "SExt or trunc, mark it as to remove\n");
-        // We cannot use replaceAllUsesWith here because we may trigger some
-        // assertion on the type as all involved sext operation may have not
-        // been moved yet.
-        while (!Inst->use_empty()) {
-          Use &U = *Inst->use_begin();
-          Instruction *User = dyn_cast<Instruction>(U.getUser());
-          assert(User && "User of sext is not an Instruction!");
-          User->setOperand(U.getOperandNo(), SExt);
-        }
-        ToRemove.insert(Inst);
-        SExt->setOperand(0, Inst->getOperand(0));
-        SExt->moveBefore(Inst);
-        continue;
-      }
-
-      // Get through the Instruction:
-      // 1. Update its type.
-      // 2. Replace the uses of SExt by Inst.
-      // 3. Sign extend each operand that needs to be sign extended.
-
-      // Step #1.
-      Inst->mutateType(SExt->getType());
-      // Step #2.
-      SExt->replaceAllUsesWith(Inst);
-      // Step #3.
-      Instruction *SExtForOpnd = SExt;
-
-      DEBUG(dbgs() << "Propagate SExt to operands\n");
-      for (int OpIdx = 0, EndOpIdx = Inst->getNumOperands(); OpIdx != EndOpIdx;
-           ++OpIdx) {
-        DEBUG(dbgs() << "Operand:\n" << *(Inst->getOperand(OpIdx)) << '\n');
-        if (Inst->getOperand(OpIdx)->getType() == SExt->getType() ||
-            !shouldSExtOperand(Inst, OpIdx)) {
-          DEBUG(dbgs() << "No need to propagate\n");
-          continue;
-        }
-        // Check if we can statically sign extend the operand.
-        Value *Opnd = Inst->getOperand(OpIdx);
-        if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
-          DEBUG(dbgs() << "Statically sign extend\n");
-          Inst->setOperand(OpIdx, ConstantInt::getSigned(SExt->getType(),
-                                                         Cst->getSExtValue()));
-          continue;
-        }
-        // UndefValue are typed, so we have to statically sign extend them.
-        if (isa<UndefValue>(Opnd)) {
-          DEBUG(dbgs() << "Statically sign extend\n");
-          Inst->setOperand(OpIdx, UndefValue::get(SExt->getType()));
-          continue;
-        }
-
-        // Otherwise we have to explicity sign extend it.
-        assert(SExtForOpnd &&
-               "Only one operand should have been sign extended");
-
-        SExtForOpnd->setOperand(0, Opnd);
-
-        DEBUG(dbgs() << "Move before:\n" << *Inst << "\nSign extend\n");
-        // Move the sign extension before the insertion point.
-        SExtForOpnd->moveBefore(Inst);
-        Inst->setOperand(OpIdx, SExtForOpnd);
-        // If more sext are required, new instructions will have to be created.
-        SExtForOpnd = nullptr;
-      }
-      if (SExtForOpnd == SExt) {
-        DEBUG(dbgs() << "Sign extension is useless now\n");
-        ToRemove.insert(SExt);
-        break;
-      }
-    }
-
-    // If the use is already of the right type, connect its uses to its argument
-    // and delete it.
-    // This can happen for an Instruction all uses of which are sign extended.
-    if (!ToRemove.count(SExt) &&
-        SExt->getType() == SExt->getOperand(0)->getType()) {
-      DEBUG(dbgs() << "Sign extension is useless, attach its use to "
-                      "its argument\n");
-      SExt->replaceAllUsesWith(SExt->getOperand(0));
-      ToRemove.insert(SExt);
-    } else
-      ValToSExtendedUses[SExt->getOperand(0)].push_back(SExt);
-  }
-
-  if (EnableMerge)
-    mergeSExts(ValToSExtendedUses, ToRemove);
-
-  // Remove all instructions marked as ToRemove.
-  for (Instruction *I: ToRemove)
-    I->eraseFromParent();
-  return LocalChange;
-}
-
-void AArch64AddressTypePromotion::mergeSExts(ValueToInsts &ValToSExtendedUses,
-                                             SetOfInstructions &ToRemove) {
-  DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-
-  for (auto &Entry : ValToSExtendedUses) {
-    Instructions &Insts = Entry.second;
-    Instructions CurPts;
-    for (Instruction *Inst : Insts) {
-      if (ToRemove.count(Inst))
-        continue;
-      bool inserted = false;
-      for (auto &Pt : CurPts) {
-        if (DT.dominates(Inst, Pt)) {
-          DEBUG(dbgs() << "Replace all uses of:\n" << *Pt << "\nwith:\n"
-                       << *Inst << '\n');
-          Pt->replaceAllUsesWith(Inst);
-          ToRemove.insert(Pt);
-          Pt = Inst;
-          inserted = true;
-          break;
-        }
-        if (!DT.dominates(Pt, Inst))
-          // Give up if we need to merge in a common dominator as the
-          // expermients show it is not profitable.
-          continue;
-
-        DEBUG(dbgs() << "Replace all uses of:\n" << *Inst << "\nwith:\n"
-                     << *Pt << '\n');
-        Inst->replaceAllUsesWith(Pt);
-        ToRemove.insert(Inst);
-        inserted = true;
-        break;
-      }
-      if (!inserted)
-        CurPts.push_back(Inst);
-    }
-  }
-}
-
-void AArch64AddressTypePromotion::analyzeSExtension(Instructions &SExtInsts) {
-  DEBUG(dbgs() << "*** Analyze Sign Extensions ***\n");
-
-  DenseMap<Value *, Instruction *> SeenChains;
-
-  for (auto &BB : *Func) {
-    for (auto &II : BB) {
-      Instruction *SExt = &II;
-
-      // Collect all sext operation per type.
-      if (!isa<SExtInst>(SExt) || !shouldConsiderSExt(SExt))
-        continue;
-
-      DEBUG(dbgs() << "Found:\n" << (*SExt) << '\n');
-
-      // Cases where we actually perform the optimization:
-      // 1. SExt is used in a getelementptr with more than 2 operand =>
-      //    likely we can merge some computation if they are done on 64 bits.
-      // 2. The beginning of the SExt chain is SExt several time. =>
-      //    code sharing is possible.
-
-      bool insert = false;
-      // #1.
-      for (const User *U : SExt->users()) {
-        const Instruction *Inst = dyn_cast<GetElementPtrInst>(U);
-        if (Inst && Inst->getNumOperands() > 2) {
-          DEBUG(dbgs() << "Interesting use in GetElementPtrInst\n" << *Inst
-                       << '\n');
-          insert = true;
-          break;
-        }
-      }
-
-      // #2.
-      // Check the head of the chain.
-      Instruction *Inst = SExt;
-      Value *Last;
-      do {
-        int OpdIdx = 0;
-        const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst);
-        if (BinOp && isa<ConstantInt>(BinOp->getOperand(0)))
-          OpdIdx = 1;
-        Last = Inst->getOperand(OpdIdx);
-        Inst = dyn_cast<Instruction>(Last);
-      } while (Inst && canGetThrough(Inst) && shouldGetThrough(Inst));
-
-      DEBUG(dbgs() << "Head of the chain:\n" << *Last << '\n');
-      DenseMap<Value *, Instruction *>::iterator AlreadySeen =
-          SeenChains.find(Last);
-      if (insert || AlreadySeen != SeenChains.end()) {
-        DEBUG(dbgs() << "Insert\n");
-        SExtInsts.push_back(SExt);
-        if (AlreadySeen != SeenChains.end() && AlreadySeen->second != nullptr) {
-          DEBUG(dbgs() << "Insert chain member\n");
-          SExtInsts.push_back(AlreadySeen->second);
-          SeenChains[Last] = nullptr;
-        }
-      } else {
-        DEBUG(dbgs() << "Record its chain membership\n");
-        SeenChains[Last] = SExt;
-      }
-    }
-  }
-}
-
-bool AArch64AddressTypePromotion::runOnFunction(Function &F) {
-  if (skipFunction(F))
-    return false;
-
-  if (F.isDeclaration())
-    return false;
-  Func = &F;
-  ConsideredSExtType = Type::getInt64Ty(Func->getContext());
-
-  DEBUG(dbgs() << "*** " << getPassName() << ": " << Func->getName() << '\n');
-
-  Instructions SExtInsts;
-  analyzeSExtension(SExtInsts);
-  return propagateSignExtension(SExtInsts);
-}
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index eb1bbcafe6e6..4b1bb27dce73 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -758,6 +758,9 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) {
   setOperationAction(ISD::FP_TO_SINT, VT, Custom);
   setOperationAction(ISD::FP_TO_UINT, VT, Custom);
 
+  if (!VT.isFloatingPoint())
+    setOperationAction(ISD::ABS, VT, Legal);
+
   // [SU][MIN|MAX] are available for all NEON types apart from i64.
   if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
     for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
@@ -2482,6 +2485,9 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     EVT PtrVT = getPointerTy(DAG.getDataLayout());
     return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT);
   }
+  case Intrinsic::aarch64_neon_abs:
+    return DAG.getNode(ISD::ABS, dl, Op.getValueType(),
+                       Op.getOperand(1));
   case Intrinsic::aarch64_neon_smax:
     return DAG.getNode(ISD::SMAX, dl, Op.getValueType(),
                        Op.getOperand(1), Op.getOperand(2));
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index ce401206e517..902b08844216 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -2734,60 +2734,36 @@ defm FMOV : FPMoveImmediate<"fmov">;
 defm UABDL   : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl",
                                           int_aarch64_neon_uabd>;
 // Match UABDL in log2-shuffle patterns.
+def : Pat<(abs (v8i16 (sub (zext (v8i8 V64:$opA)),
+                           (zext (v8i8 V64:$opB))))),
+          (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>;
 def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))),
                (v8i16 (add (sub (zext (v8i8 V64:$opA)),
                                 (zext (v8i8 V64:$opB))),
                            (AArch64vashr v8i16:$src, (i32 15))))),
           (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>;
+def : Pat<(abs (v8i16 (sub (zext (extract_high_v16i8 V128:$opA)),
+                           (zext (extract_high_v16i8 V128:$opB))))),
+          (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>;
 def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))),
                (v8i16 (add (sub (zext (extract_high_v16i8 V128:$opA)),
                                 (zext (extract_high_v16i8 V128:$opB))),
                            (AArch64vashr v8i16:$src, (i32 15))))),
           (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>;
-def : Pat<(xor (v4i32 (AArch64vashr v4i32:$src, (i32 31))),
-               (v4i32 (add (sub (zext (v4i16 V64:$opA)),
-                                (zext (v4i16 V64:$opB))),
-                           (AArch64vashr v4i32:$src, (i32 31))))),
+def : Pat<(abs (v4i32 (sub (zext (v4i16 V64:$opA)),
+                           (zext (v4i16 V64:$opB))))),
           (UABDLv4i16_v4i32 V64:$opA, V64:$opB)>;
-def : Pat<(xor (v4i32 (AArch64vashr v4i32:$src, (i32 31))),
-               (v4i32 (add (sub (zext (extract_high_v8i16 V128:$opA)),
-                                (zext (extract_high_v8i16 V128:$opB))),
-                          (AArch64vashr v4i32:$src, (i32 31))))),
+def : Pat<(abs (v4i32 (sub (zext (extract_high_v8i16 V128:$opA)),
+                           (zext (extract_high_v8i16 V128:$opB))))),
           (UABDLv8i16_v4i32 V128:$opA, V128:$opB)>;
-def : Pat<(xor (v2i64 (AArch64vashr v2i64:$src, (i32 63))),
-               (v2i64 (add (sub (zext (v2i32 V64:$opA)),
-                                (zext (v2i32 V64:$opB))),
-                           (AArch64vashr v2i64:$src, (i32 63))))),
+def : Pat<(abs (v2i64 (sub (zext (v2i32 V64:$opA)),
+                           (zext (v2i32 V64:$opB))))),
           (UABDLv2i32_v2i64 V64:$opA, V64:$opB)>;
-def : Pat<(xor (v2i64 (AArch64vashr v2i64:$src, (i32 63))),
-               (v2i64 (add (sub (zext (extract_high_v4i32 V128:$opA)),
-                                (zext (extract_high_v4i32 V128:$opB))),
-                          (AArch64vashr v2i64:$src, (i32 63))))),
+def : Pat<(abs (v2i64 (sub (zext (extract_high_v4i32 V128:$opA)),
+                           (zext (extract_high_v4i32 V128:$opB))))),
           (UABDLv4i32_v2i64 V128:$opA, V128:$opB)>;
 
-defm ABS    : SIMDTwoVectorBHSD<0, 0b01011, "abs", int_aarch64_neon_abs>;
-def : Pat<(xor (v8i8 (AArch64vashr V64:$src, (i32 7))),
-               (v8i8 (add V64:$src, (AArch64vashr V64:$src, (i32 7))))),
-          (ABSv8i8 V64:$src)>;
-def : Pat<(xor (v4i16 (AArch64vashr V64:$src, (i32 15))),
-               (v4i16 (add V64:$src, (AArch64vashr V64:$src, (i32 15))))),
-          (ABSv4i16 V64:$src)>;
-def : Pat<(xor (v2i32 (AArch64vashr V64:$src, (i32 31))),
-               (v2i32 (add V64:$src, (AArch64vashr V64:$src, (i32 31))))),
-          (ABSv2i32 V64:$src)>;
-def : Pat<(xor (v16i8 (AArch64vashr V128:$src, (i32 7))),
-               (v16i8 (add V128:$src, (AArch64vashr V128:$src, (i32 7))))),
-          (ABSv16i8 V128:$src)>;
-def : Pat<(xor (v8i16 (AArch64vashr V128:$src, (i32 15))),
-               (v8i16 (add V128:$src, (AArch64vashr V128:$src, (i32 15))))),
-          (ABSv8i16 V128:$src)>;
-def : Pat<(xor (v4i32 (AArch64vashr V128:$src, (i32 31))),
-               (v4i32 (add V128:$src, (AArch64vashr V128:$src, (i32 31))))),
-          (ABSv4i32 V128:$src)>;
-def : Pat<(xor (v2i64 (AArch64vashr V128:$src, (i32 63))),
-               (v2i64 (add V128:$src, (AArch64vashr V128:$src, (i32 63))))),
-          (ABSv2i64 V128:$src)>;
-
+defm ABS    : SIMDTwoVectorBHSD<0, 0b01011, "abs", abs>;
 defm CLS    : SIMDTwoVectorBHS<0, 0b00100, "cls", int_aarch64_neon_cls>;
 defm CLZ    : SIMDTwoVectorBHS<1, 0b00100, "clz", ctlz>;
 defm CMEQ   : SIMDCmpTwoVector<0, 0b01001, "cmeq", AArch64cmeqz>;
@@ -3359,7 +3335,7 @@ def : Pat<(i64 (int_aarch64_neon_sqsub (i64 FPR64:$Rd),
 // Advanced SIMD two scalar instructions.
 //===----------------------------------------------------------------------===//
 
-defm ABS    : SIMDTwoScalarD<    0, 0b01011, "abs", int_aarch64_neon_abs>;
+defm ABS    : SIMDTwoScalarD<    0, 0b01011, "abs", abs>;
 defm CMEQ   : SIMDCmpTwoScalarD< 0, 0b01001, "cmeq", AArch64cmeqz>;
 defm CMGE   : SIMDCmpTwoScalarD< 1, 0b01000, "cmge", AArch64cmgez>;
 defm CMGT   : SIMDCmpTwoScalarD< 0, 0b01000, "cmgt", AArch64cmgtz>;
diff --git a/lib/Target/AArch64/AArch64RegisterBankInfo.cpp b/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
index 6f9021c4a030..5f895903da6f 100644
--- a/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
+++ b/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
@@ -260,15 +260,15 @@ AArch64RegisterBankInfo::getInstrAlternativeMappings(
     if (MI.getNumOperands() != 3)
       break;
     InstructionMappings AltMappings;
-    InstructionMapping GPRMapping(
+    const InstructionMapping &GPRMapping = getInstructionMapping(
         /*ID*/ 1, /*Cost*/ 1, getValueMapping(PMI_FirstGPR, Size),
         /*NumOperands*/ 3);
-    InstructionMapping FPRMapping(
+    const InstructionMapping &FPRMapping = getInstructionMapping(
         /*ID*/ 2, /*Cost*/ 1, getValueMapping(PMI_FirstFPR, Size),
         /*NumOperands*/ 3);
 
-    AltMappings.emplace_back(std::move(GPRMapping));
-    AltMappings.emplace_back(std::move(FPRMapping));
+    AltMappings.push_back(&GPRMapping);
+    AltMappings.push_back(&FPRMapping);
     return AltMappings;
   }
   case TargetOpcode::G_BITCAST: {
@@ -282,29 +282,29 @@ AArch64RegisterBankInfo::getInstrAlternativeMappings(
       break;
 
     InstructionMappings AltMappings;
-    InstructionMapping GPRMapping(
+    const InstructionMapping &GPRMapping = getInstructionMapping(
         /*ID*/ 1, /*Cost*/ 1,
         getCopyMapping(AArch64::GPRRegBankID, AArch64::GPRRegBankID, Size),
         /*NumOperands*/ 2);
-    InstructionMapping FPRMapping(
+    const InstructionMapping &FPRMapping = getInstructionMapping(
         /*ID*/ 2, /*Cost*/ 1,
         getCopyMapping(AArch64::FPRRegBankID, AArch64::FPRRegBankID, Size),
         /*NumOperands*/ 2);
-    InstructionMapping GPRToFPRMapping(
+    const InstructionMapping &GPRToFPRMapping = getInstructionMapping(
         /*ID*/ 3,
         /*Cost*/ copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank, Size),
         getCopyMapping(AArch64::FPRRegBankID, AArch64::GPRRegBankID, Size),
         /*NumOperands*/ 2);
-    InstructionMapping FPRToGPRMapping(
+    const InstructionMapping &FPRToGPRMapping = getInstructionMapping(
         /*ID*/ 3,
         /*Cost*/ copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank, Size),
         getCopyMapping(AArch64::GPRRegBankID, AArch64::FPRRegBankID, Size),
         /*NumOperands*/ 2);
 
-    AltMappings.emplace_back(std::move(GPRMapping));
-    AltMappings.emplace_back(std::move(FPRMapping));
-    AltMappings.emplace_back(std::move(GPRToFPRMapping));
-    AltMappings.emplace_back(std::move(FPRToGPRMapping));
+    AltMappings.push_back(&GPRMapping);
+    AltMappings.push_back(&FPRMapping);
+    AltMappings.push_back(&GPRToFPRMapping);
+    AltMappings.push_back(&FPRToGPRMapping);
     return AltMappings;
   }
   case TargetOpcode::G_LOAD: {
@@ -318,21 +318,21 @@ AArch64RegisterBankInfo::getInstrAlternativeMappings(
       break;
 
     InstructionMappings AltMappings;
-    InstructionMapping GPRMapping(
+    const InstructionMapping &GPRMapping = getInstructionMapping(
         /*ID*/ 1, /*Cost*/ 1,
         getOperandsMapping({getValueMapping(PMI_FirstGPR, Size),
                             // Addresses are GPR 64-bit.
                             getValueMapping(PMI_FirstGPR, 64)}),
         /*NumOperands*/ 2);
-    InstructionMapping FPRMapping(
+    const InstructionMapping &FPRMapping = getInstructionMapping(
         /*ID*/ 2, /*Cost*/ 1,
         getOperandsMapping({getValueMapping(PMI_FirstFPR, Size),
                             // Addresses are GPR 64-bit.
                             getValueMapping(PMI_FirstGPR, 64)}),
         /*NumOperands*/ 2);
 
-    AltMappings.emplace_back(std::move(GPRMapping));
-    AltMappings.emplace_back(std::move(FPRMapping));
+    AltMappings.push_back(&GPRMapping);
+    AltMappings.push_back(&FPRMapping);
     return AltMappings;
   }
   default:
@@ -373,8 +373,9 @@ static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) {
   return false;
 }
 
-RegisterBankInfo::InstructionMapping
-AArch64RegisterBankInfo::getSameKindOfOperandsMapping(const MachineInstr &MI) {
+const RegisterBankInfo::InstructionMapping &
+AArch64RegisterBankInfo::getSameKindOfOperandsMapping(
+    const MachineInstr &MI) const {
   const unsigned Opc = MI.getOpcode();
   const MachineFunction &MF = *MI.getParent()->getParent();
   const MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -411,11 +412,11 @@ AArch64RegisterBankInfo::getSameKindOfOperandsMapping(const MachineInstr &MI) {
   }
 #endif // End NDEBUG.
 
-  return InstructionMapping{DefaultMappingID, 1, getValueMapping(RBIdx, Size),
-                            NumOperands};
+  return getInstructionMapping(DefaultMappingID, 1,
+                               getValueMapping(RBIdx, Size), NumOperands);
 }
 
-RegisterBankInfo::InstructionMapping
+const RegisterBankInfo::InstructionMapping &
 AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
   const unsigned Opc = MI.getOpcode();
   const MachineFunction &MF = *MI.getParent()->getParent();
@@ -424,7 +425,8 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
   // Try the default logic for non-generic instructions that are either copies
   // or already have some operands assigned to banks.
   if (!isPreISelGenericOpcode(Opc)) {
-    RegisterBankInfo::InstructionMapping Mapping = getInstrMappingImpl(MI);
+    const RegisterBankInfo::InstructionMapping &Mapping =
+        getInstrMappingImpl(MI);
     if (Mapping.isValid())
       return Mapping;
   }
@@ -462,15 +464,15 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
         DstIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
     const RegisterBank &SrcRB =
         SrcIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
-    return InstructionMapping{
+    return getInstructionMapping(
         DefaultMappingID, copyCost(DstRB, SrcRB, Size),
         getCopyMapping(DstRB.getID(), SrcRB.getID(), Size),
-        /*NumOperands*/ 2};
+        /*NumOperands*/ 2);
   }
   case TargetOpcode::G_SEQUENCE:
     // FIXME: support this, but the generic code is really not going to do
     // anything sane.
-    return InstructionMapping();
+    return getInvalidInstructionMapping();
   default:
     break;
   }
@@ -533,19 +535,17 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
   }
 
   // Finally construct the computed mapping.
-  RegisterBankInfo::InstructionMapping Mapping =
-      InstructionMapping{DefaultMappingID, Cost, nullptr, NumOperands};
   SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands);
   for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
     if (MI.getOperand(Idx).isReg() && MI.getOperand(Idx).getReg()) {
       auto Mapping = getValueMapping(OpRegBankIdx[Idx], OpSize[Idx]);
       if (!Mapping->isValid())
-        return InstructionMapping();
+        return getInvalidInstructionMapping();
 
       OpdsMapping[Idx] = Mapping;
     }
   }
 
-  Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping));
-  return Mapping;
+  return getInstructionMapping(DefaultMappingID, Cost,
+                               getOperandsMapping(OpdsMapping), NumOperands);
 }
diff --git a/lib/Target/AArch64/AArch64RegisterBankInfo.h b/lib/Target/AArch64/AArch64RegisterBankInfo.h
index 0a795a42c0b1..6d74a47095a9 100644
--- a/lib/Target/AArch64/AArch64RegisterBankInfo.h
+++ b/lib/Target/AArch64/AArch64RegisterBankInfo.h
@@ -98,8 +98,8 @@ class AArch64RegisterBankInfo final : public AArch64GenRegisterBankInfo {
   ///
   /// \return An InstructionMappings with a statically allocated
   /// OperandsMapping.
-  static InstructionMapping
-  getSameKindOfOperandsMapping(const MachineInstr &MI);
+  const InstructionMapping &
+  getSameKindOfOperandsMapping(const MachineInstr &MI) const;
 
 public:
   AArch64RegisterBankInfo(const TargetRegisterInfo &TRI);
@@ -113,7 +113,8 @@ public:
   InstructionMappings
   getInstrAlternativeMappings(const MachineInstr &MI) const override;
 
-  InstructionMapping getInstrMapping(const MachineInstr &MI) const override;
+  const InstructionMapping &
+  getInstrMapping(const MachineInstr &MI) const override;
 };
 } // End llvm namespace.
 #endif
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
index de7108d302dd..5a90fd1eb1ba 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -109,11 +109,6 @@ EnableA53Fix835769("aarch64-fix-cortex-a53-835769", cl::Hidden,
                 cl::init(false));
 
 static cl::opt<bool>
-    EnableAddressTypePromotion("aarch64-enable-type-promotion", cl::Hidden,
-                               cl::desc("Enable the type promotion pass"),
-                               cl::init(false));
-
-static cl::opt<bool>
     EnableGEPOpt("aarch64-enable-gep-opt", cl::Hidden,
                  cl::desc("Enable optimizations on complex GEPs"),
                  cl::init(false));
@@ -146,7 +141,6 @@ extern "C" void LLVMInitializeAArch64Target() {
   initializeGlobalISel(*PR);
   initializeAArch64A53Fix835769Pass(*PR);
   initializeAArch64A57FPLoadBalancingPass(*PR);
-  initializeAArch64AddressTypePromotionPass(*PR);
   initializeAArch64AdvSIMDScalarPass(*PR);
   initializeAArch64CollectLOHPass(*PR);
   initializeAArch64ConditionalComparesPass(*PR);
@@ -382,9 +376,6 @@ bool AArch64PassConfig::addPreISel() {
     addPass(createGlobalMergePass(TM, 4095, OnlyOptimizeForSize));
   }
 
-  if (TM->getOptLevel() != CodeGenOpt::None && EnableAddressTypePromotion)
-    addPass(createAArch64AddressTypePromotionPass());
-
   return false;
 }
 
diff --git a/lib/Target/AArch64/CMakeLists.txt b/lib/Target/AArch64/CMakeLists.txt
index 6d0930c358f1..f0f50f29be0f 100644
--- a/lib/Target/AArch64/CMakeLists.txt
+++ b/lib/Target/AArch64/CMakeLists.txt
@@ -39,7 +39,6 @@ endif()
 
 add_llvm_target(AArch64CodeGen
   AArch64A57FPLoadBalancing.cpp
-  AArch64AddressTypePromotion.cpp
   AArch64AdvSIMDScalarPass.cpp
   AArch64AsmPrinter.cpp
   AArch64CleanupLocalDynamicTLSPass.cpp