src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2019-08-21 18:13:02 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2019-08-21 18:13:02 +0000
commit	54db30ce18663e6c2991958f3b5d18362e8e93c4 (patch)
tree	4aa6442802570767398cc83ba484e97b1309bdc2 /contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
parent	35284c22e9c8348159b7ce032ea45f2cdeb65298 (diff)
parent	e6d1592492a3a379186bfb02bd0f4eda0669c0d5 (diff)

Merge llvm trunk r366426, resolve conflicts, and update FREEBSD-Xlist.

Notes

Notes: svn path=/projects/clang900-import/; revision=351344

Diffstat (limited to 'contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp')

-rw-r--r--

contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp

449

1 files changed, 423 insertions, 26 deletions

diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index bc9bcab83a0a..ff3dfbfaca05 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp

@@ -1,17 +1,18 @@

//===-- PPCTargetTransformInfo.cpp - PPC specific TTI ---------------------===//

-// The LLVM Compiler Infrastructure

-//

-// This file is distributed under the University of Illinois Open Source

-// License. See LICENSE.TXT for details.

+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

+// See https://llvm.org/LICENSE.txt for license information.

+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//===----------------------------------------------------------------------===//

#include "PPCTargetTransformInfo.h"

+#include "llvm/Analysis/CodeMetrics.h"

#include "llvm/Analysis/TargetTransformInfo.h"

#include "llvm/CodeGen/BasicTTIImpl.h"

#include "llvm/CodeGen/CostTable.h"

#include "llvm/CodeGen/TargetLowering.h"

+#include "llvm/CodeGen/TargetSchedule.h"

#include "llvm/Support/CommandLine.h"

#include "llvm/Support/Debug.h"

using namespace llvm;

@@ -32,6 +33,13 @@ EnablePPCColdCC("ppc-enable-coldcc", cl::Hidden, cl::init(false),

cl::desc("Enable using coldcc calling conv for cold "

"internal functions"));

+// The latency of mtctr is only justified if there are more than 4

+// comparisons that will be removed as a result.

+static cl::opt<unsigned>

+SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden,

+ cl::desc("Loops with a constant trip count smaller than "

+ "this value will not use the count register."));

//===----------------------------------------------------------------------===//

// PPC cost model.

@@ -205,6 +213,341 @@ unsigned PPCTTIImpl::getUserCost(const User *U,

return BaseT::getUserCost(U, Operands);

}

+bool PPCTTIImpl::mightUseCTR(BasicBlock *BB,

+ TargetLibraryInfo *LibInfo) {

+ const PPCTargetMachine &TM = ST->getTargetMachine();

+ // Loop through the inline asm constraints and look for something that

+ // clobbers ctr.

+ auto asmClobbersCTR = [](InlineAsm *IA) {

+ InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints();

+ for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) {

+ InlineAsm::ConstraintInfo &C = CIV[i];

+ if (C.Type != InlineAsm::isInput)

+ for (unsigned j = 0, je = C.Codes.size(); j < je; ++j)

+ if (StringRef(C.Codes[j]).equals_lower("{ctr}"))

+ return true;

+ }

+ return false;

+ };

+ // Determining the address of a TLS variable results in a function call in

+ // certain TLS models.

+ std::function<bool(const Value*)> memAddrUsesCTR =

+ [&memAddrUsesCTR, &TM](const Value *MemAddr) -> bool {

+ const auto *GV = dyn_cast<GlobalValue>(MemAddr);

+ if (!GV) {

+ // Recurse to check for constants that refer to TLS global variables.

+ if (const auto *CV = dyn_cast<Constant>(MemAddr))

+ for (const auto &CO : CV->operands())

+ if (memAddrUsesCTR(CO))

+ return true;

+ return false;

+ }

+ if (!GV->isThreadLocal())

+ return false;

+ TLSModel::Model Model = TM.getTLSModel(GV);

+ return Model == TLSModel::GeneralDynamic ||

+ Model == TLSModel::LocalDynamic;

+ };

+ auto isLargeIntegerTy = [](bool Is32Bit, Type *Ty) {

+ if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))

+ return ITy->getBitWidth() > (Is32Bit ? 32U : 64U);

+ return false;

+ };

+ for (BasicBlock::iterator J = BB->begin(), JE = BB->end();

+ J != JE; ++J) {

+ if (CallInst *CI = dyn_cast<CallInst>(J)) {

+ // Inline ASM is okay, unless it clobbers the ctr register.

+ if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) {

+ if (asmClobbersCTR(IA))

+ return true;

+ continue;

+ }

+ if (Function *F = CI->getCalledFunction()) {

+ // Most intrinsics don't become function calls, but some might.

+ // sin, cos, exp and log are always calls.

+ unsigned Opcode = 0;

+ if (F->getIntrinsicID() != Intrinsic::not_intrinsic) {

+ switch (F->getIntrinsicID()) {

+ default: continue;

+ // If we have a call to ppc_is_decremented_ctr_nonzero, or ppc_mtctr

+ // we're definitely using CTR.

+ case Intrinsic::set_loop_iterations:

+ case Intrinsic::loop_decrement:

+ return true;

+// VisualStudio defines setjmp as _setjmp

+#if defined(_MSC_VER) && defined(setjmp) && \

+ !defined(setjmp_undefined_for_msvc)

+# pragma push_macro("setjmp")

+# undef setjmp

+# define setjmp_undefined_for_msvc

+#endif

+ case Intrinsic::setjmp:

+#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)

+ // let's return it to _setjmp state

+# pragma pop_macro("setjmp")

+# undef setjmp_undefined_for_msvc

+#endif

+ case Intrinsic::longjmp:

+ // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp

+ // because, although it does clobber the counter register, the

+ // control can't then return to inside the loop unless there is also

+ // an eh_sjlj_setjmp.

+ case Intrinsic::eh_sjlj_setjmp:

+ case Intrinsic::memcpy:

+ case Intrinsic::memmove:

+ case Intrinsic::memset:

+ case Intrinsic::powi:

+ case Intrinsic::log:

+ case Intrinsic::log2:

+ case Intrinsic::log10:

+ case Intrinsic::exp:

+ case Intrinsic::exp2:

+ case Intrinsic::pow:

+ case Intrinsic::sin:

+ case Intrinsic::cos:

+ return true;

+ case Intrinsic::copysign:

+ if (CI->getArgOperand(0)->getType()->getScalarType()->

+ isPPC_FP128Ty())

+ return true;

+ else

+ continue; // ISD::FCOPYSIGN is never a library call.

+ case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;

+ case Intrinsic::floor: Opcode = ISD::FFLOOR; break;

+ case Intrinsic::ceil: Opcode = ISD::FCEIL; break;

+ case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;

+ case Intrinsic::rint: Opcode = ISD::FRINT; break;

+ case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;

+ case Intrinsic::round: Opcode = ISD::FROUND; break;

+ case Intrinsic::minnum: Opcode = ISD::FMINNUM; break;

+ case Intrinsic::maxnum: Opcode = ISD::FMAXNUM; break;

+ case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO; break;

+ case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO; break;

+ }

+ // PowerPC does not use [US]DIVREM or other library calls for

+ // operations on regular types which are not otherwise library calls

+ // (i.e. soft float or atomics). If adapting for targets that do,

+ // additional care is required here.

+ LibFunc Func;

+ if (!F->hasLocalLinkage() && F->hasName() && LibInfo &&

+ LibInfo->getLibFunc(F->getName(), Func) &&

+ LibInfo->hasOptimizedCodeGen(Func)) {

+ // Non-read-only functions are never treated as intrinsics.

+ if (!CI->onlyReadsMemory())

+ return true;

+ // Conversion happens only for FP calls.

+ if (!CI->getArgOperand(0)->getType()->isFloatingPointTy())

+ return true;

+ switch (Func) {

+ default: return true;

+ case LibFunc_copysign:

+ case LibFunc_copysignf:

+ continue; // ISD::FCOPYSIGN is never a library call.

+ case LibFunc_copysignl:

+ return true;

+ case LibFunc_fabs:

+ case LibFunc_fabsf:

+ case LibFunc_fabsl:

+ continue; // ISD::FABS is never a library call.

+ case LibFunc_sqrt:

+ case LibFunc_sqrtf:

+ case LibFunc_sqrtl:

+ Opcode = ISD::FSQRT; break;

+ case LibFunc_floor:

+ case LibFunc_floorf:

+ case LibFunc_floorl:

+ Opcode = ISD::FFLOOR; break;

+ case LibFunc_nearbyint:

+ case LibFunc_nearbyintf:

+ case LibFunc_nearbyintl:

+ Opcode = ISD::FNEARBYINT; break;

+ case LibFunc_ceil:

+ case LibFunc_ceilf:

+ case LibFunc_ceill:

+ Opcode = ISD::FCEIL; break;

+ case LibFunc_rint:

+ case LibFunc_rintf:

+ case LibFunc_rintl:

+ Opcode = ISD::FRINT; break;

+ case LibFunc_round:

+ case LibFunc_roundf:

+ case LibFunc_roundl:

+ Opcode = ISD::FROUND; break;

+ case LibFunc_trunc:

+ case LibFunc_truncf:

+ case LibFunc_truncl:

+ Opcode = ISD::FTRUNC; break;

+ case LibFunc_fmin:

+ case LibFunc_fminf:

+ case LibFunc_fminl:

+ Opcode = ISD::FMINNUM; break;

+ case LibFunc_fmax:

+ case LibFunc_fmaxf:

+ case LibFunc_fmaxl:

+ Opcode = ISD::FMAXNUM; break;

+ }

+ if (Opcode) {

+ EVT EVTy =

+ TLI->getValueType(DL, CI->getArgOperand(0)->getType(), true);

+ if (EVTy == MVT::Other)

+ return true;

+ if (TLI->isOperationLegalOrCustom(Opcode, EVTy))

+ continue;

+ else if (EVTy.isVector() &&

+ TLI->isOperationLegalOrCustom(Opcode, EVTy.getScalarType()))

+ continue;

+ return true;

+ }

+ return true;

+ } else if (isa<BinaryOperator>(J) &&

+ J->getType()->getScalarType()->isPPC_FP128Ty()) {

+ // Most operations on ppc_f128 values become calls.

+ return true;

+ } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) ||

+ isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) {

+ CastInst *CI = cast<CastInst>(J);

+ if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||

+ CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||

+ isLargeIntegerTy(!TM.isPPC64(), CI->getSrcTy()->getScalarType()) ||

+ isLargeIntegerTy(!TM.isPPC64(), CI->getDestTy()->getScalarType()))

+ return true;

+ } else if (isLargeIntegerTy(!TM.isPPC64(),

+ J->getType()->getScalarType()) &&

+ (J->getOpcode() == Instruction::UDiv ||

+ J->getOpcode() == Instruction::SDiv ||

+ J->getOpcode() == Instruction::URem ||

+ J->getOpcode() == Instruction::SRem)) {

+ return true;

+ } else if (!TM.isPPC64() &&

+ isLargeIntegerTy(false, J->getType()->getScalarType()) &&

+ (J->getOpcode() == Instruction::Shl ||

+ J->getOpcode() == Instruction::AShr ||

+ J->getOpcode() == Instruction::LShr)) {

+ // Only on PPC32, for 128-bit integers (specifically not 64-bit

+ // integers), these might be runtime calls.

+ return true;

+ } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) {

+ // On PowerPC, indirect jumps use the counter register.

+ return true;

+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {

+ if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries())

+ return true;

+ }

+ // FREM is always a call.

+ if (J->getOpcode() == Instruction::FRem)

+ return true;

+ if (ST->useSoftFloat()) {

+ switch(J->getOpcode()) {

+ case Instruction::FAdd:

+ case Instruction::FSub:

+ case Instruction::FMul:

+ case Instruction::FDiv:

+ case Instruction::FPTrunc:

+ case Instruction::FPExt:

+ case Instruction::FPToUI:

+ case Instruction::FPToSI:

+ case Instruction::UIToFP:

+ case Instruction::SIToFP:

+ case Instruction::FCmp:

+ return true;

+ }

+ for (Value *Operand : J->operands())

+ if (memAddrUsesCTR(Operand))

+ return true;

+ }

+ return false;

+bool PPCTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,

+ AssumptionCache &AC,

+ TargetLibraryInfo *LibInfo,

+ HardwareLoopInfo &HWLoopInfo) {

+ const PPCTargetMachine &TM = ST->getTargetMachine();

+ TargetSchedModel SchedModel;

+ SchedModel.init(ST);

+ // Do not convert small short loops to CTR loop.

+ unsigned ConstTripCount = SE.getSmallConstantTripCount(L);

+ if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) {

+ SmallPtrSet<const Value *, 32> EphValues;

+ CodeMetrics::collectEphemeralValues(L, &AC, EphValues);

+ CodeMetrics Metrics;

+ for (BasicBlock *BB : L->blocks())

+ Metrics.analyzeBasicBlock(BB, *this, EphValues);

+ // 6 is an approximate latency for the mtctr instruction.

+ if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth()))

+ return false;

+ }

+ // We don't want to spill/restore the counter register, and so we don't

+ // want to use the counter register if the loop contains calls.

+ for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();

+ I != IE; ++I)

+ if (mightUseCTR(*I, LibInfo))

+ return false;

+ SmallVector<BasicBlock*, 4> ExitingBlocks;

+ L->getExitingBlocks(ExitingBlocks);

+ // If there is an exit edge known to be frequently taken,

+ // we should not transform this loop.

+ for (auto &BB : ExitingBlocks) {

+ Instruction *TI = BB->getTerminator();

+ if (!TI) continue;

+ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {

+ uint64_t TrueWeight = 0, FalseWeight = 0;

+ if (!BI->isConditional() ||

+ !BI->extractProfMetadata(TrueWeight, FalseWeight))

+ continue;

+ // If the exit path is more frequent than the loop path,

+ // we return here without further analysis for this loop.

+ bool TrueIsExit = !L->contains(BI->getSuccessor(0));

+ if (( TrueIsExit && FalseWeight < TrueWeight) ||

+ (!TrueIsExit && FalseWeight > TrueWeight))

+ return false;

+ }

+ LLVMContext &C = L->getHeader()->getContext();

+ HWLoopInfo.CountType = TM.isPPC64() ?

+ Type::getInt64Ty(C) : Type::getInt32Ty(C);

+ HWLoopInfo.LoopDecrement = ConstantInt::get(HWLoopInfo.CountType, 1);

+ return true;

void PPCTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,

TTI::UnrollingPreferences &UP) {

if (ST->getDarwinDirective() == PPC::DIR_A2) {

@@ -239,17 +582,12 @@ bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) {

return LoopHasReductions;

}

-const PPCTTIImpl::TTI::MemCmpExpansionOptions *

-PPCTTIImpl::enableMemCmpExpansion(bool IsZeroCmp) const {

- static const auto Options = []() {

- TTI::MemCmpExpansionOptions Options;

- Options.LoadSizes.push_back(8);

- Options.LoadSizes.push_back(4);

- Options.LoadSizes.push_back(2);

- Options.LoadSizes.push_back(1);

- return Options;

- }();

- return &Options;

+PPCTTIImpl::TTI::MemCmpExpansionOptions

+PPCTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {

+ TTI::MemCmpExpansionOptions Options;

+ Options.LoadSizes = {8, 4, 2, 1};

+ Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);

+ return Options;

}

bool PPCTTIImpl::enableInterleavedAccessVectorization() {

@@ -324,6 +662,33 @@ unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) {

return 2;

}

+// Adjust the cost of vector instructions on targets which there is overlap

+// between the vector and scalar units, thereby reducing the overall throughput

+// of vector code wrt. scalar code.

+int PPCTTIImpl::vectorCostAdjustment(int Cost, unsigned Opcode, Type *Ty1,

+ Type *Ty2) {

+ if (!ST->vectorsUseTwoUnits() || !Ty1->isVectorTy())

+ return Cost;

+ std::pair<int, MVT> LT1 = TLI->getTypeLegalizationCost(DL, Ty1);

+ // If type legalization involves splitting the vector, we don't want to

+ // double the cost at every step - only the last step.

+ if (LT1.first != 1 || !LT1.second.isVector())

+ return Cost;

+ int ISD = TLI->InstructionOpcodeToISD(Opcode);

+ if (TLI->isOperationExpand(ISD, LT1.second))

+ return Cost;

+ if (Ty2) {

+ std::pair<int, MVT> LT2 = TLI->getTypeLegalizationCost(DL, Ty2);

+ if (LT2.first != 1 || !LT2.second.isVector())

+ return Cost;

+ }

+ return Cost * 2;

int PPCTTIImpl::getArithmeticInstrCost(

unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,

TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,

@@ -331,8 +696,9 @@ int PPCTTIImpl::getArithmeticInstrCost(

assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");

// Fallback to the default implementation.

- return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,

- Opd1PropInfo, Opd2PropInfo);

+ int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,

+ Opd1PropInfo, Opd2PropInfo);

+ return vectorCostAdjustment(Cost, Opcode, Ty, nullptr);

}

int PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,

@@ -345,19 +711,22 @@ int PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,

// instruction). We need one such shuffle instruction for each actual

// register (this is not true for arbitrary shuffles, but is true for the

// structured types of shuffles covered by TTI::ShuffleKind).

- return LT.first;

+ return vectorCostAdjustment(LT.first, Instruction::ShuffleVector, Tp,

+ nullptr);

}

int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,

const Instruction *I) {

assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");

- return BaseT::getCastInstrCost(Opcode, Dst, Src);

+ int Cost = BaseT::getCastInstrCost(Opcode, Dst, Src);

+ return vectorCostAdjustment(Cost, Opcode, Dst, Src);

}

int PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,

const Instruction *I) {

- return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);

+ int Cost = BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);

+ return vectorCostAdjustment(Cost, Opcode, ValTy, nullptr);

}

int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {

@@ -366,18 +735,23 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {

int ISD = TLI->InstructionOpcodeToISD(Opcode);

assert(ISD && "Invalid opcode");

+ int Cost = BaseT::getVectorInstrCost(Opcode, Val, Index);

+ Cost = vectorCostAdjustment(Cost, Opcode, Val, nullptr);

if (ST->hasVSX() && Val->getScalarType()->isDoubleTy()) {

- // Double-precision scalars are already located in index #0.

- if (Index == 0)

+ // Double-precision scalars are already located in index #0 (or #1 if LE).

+ if (ISD == ISD::EXTRACT_VECTOR_ELT &&

+ Index == (ST->isLittleEndian() ? 1 : 0))

return 0;

- return BaseT::getVectorInstrCost(Opcode, Val, Index);

+ return Cost;

} else if (ST->hasQPX() && Val->getScalarType()->isFloatingPointTy()) {

// Floating point scalars are already located in index #0.

if (Index == 0)

return 0;

- return BaseT::getVectorInstrCost(Opcode, Val, Index);

+ return Cost;

}

// Estimated cost of a load-hit-store delay. This was obtained

@@ -394,9 +768,9 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {

// these need to be estimated as very costly.

if (ISD == ISD::EXTRACT_VECTOR_ELT ||

ISD == ISD::INSERT_VECTOR_ELT)

- return LHSPenalty + BaseT::getVectorInstrCost(Opcode, Val, Index);

+ return LHSPenalty + Cost;

- return BaseT::getVectorInstrCost(Opcode, Val, Index);

+ return Cost;

}

int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,

@@ -407,6 +781,7 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,

"Invalid Opcode");

int Cost = BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);

+ Cost = vectorCostAdjustment(Cost, Opcode, Src, nullptr);

bool IsAltivecType = ST->hasAltivec() &&

(LT.second == MVT::v16i8 || LT.second == MVT::v8i16 ||

@@ -500,3 +875,25 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,

return Cost;

}

+bool PPCTTIImpl::canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,

+ LoopInfo *LI, DominatorTree *DT,

+ AssumptionCache *AC, TargetLibraryInfo *LibInfo) {

+ // Process nested loops first.

+ for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)

+ if (canSaveCmp(*I, BI, SE, LI, DT, AC, LibInfo))

+ return false; // Stop search.

+ HardwareLoopInfo HWLoopInfo(L);

+ if (!HWLoopInfo.canAnalyze(*LI))

+ return false;

+ if (!isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo))

+ return false;

+ if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT))

+ return false;

+ *BI = HWLoopInfo.ExitBranch;

+ return true;