aboutsummaryrefslogtreecommitdiff
path: root/lib/CodeGen/CodeGenPrepare.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen/CodeGenPrepare.cpp')
-rw-r--r--lib/CodeGen/CodeGenPrepare.cpp523
1 files changed, 397 insertions, 126 deletions
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index c35f8666fa3c..52b4bbea012b 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -1,9 +1,8 @@
//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -16,6 +15,7 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -32,6 +32,7 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -292,15 +293,16 @@ class TypePromotionTransaction;
/// Keep track of SExt promoted.
ValueToSExts ValToSExtendedUses;
- /// True if CFG is modified in any way.
- bool ModifiedDT;
-
/// True if optimizing for size.
bool OptSize;
/// DataLayout for the Function being processed.
const DataLayout *DL = nullptr;
+ /// Building the dominator tree can be expensive, so we only build it
+ /// lazily and update it when required.
+ std::unique_ptr<DominatorTree> DT;
+
public:
static char ID; // Pass identification, replacement for typeid
@@ -339,6 +341,13 @@ class TypePromotionTransaction;
}
}
+ // Get the DominatorTree, building if necessary.
+ DominatorTree &getDT(Function &F) {
+ if (!DT)
+ DT = llvm::make_unique<DominatorTree>(F);
+ return *DT;
+ }
+
bool eliminateFallThrough(Function &F);
bool eliminateMostlyEmptyBlocks(Function &F);
BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
@@ -355,11 +364,12 @@ class TypePromotionTransaction;
bool optimizeExt(Instruction *&I);
bool optimizeExtUses(Instruction *I);
bool optimizeLoadExt(LoadInst *Load);
+ bool optimizeShiftInst(BinaryOperator *BO);
bool optimizeSelectInst(SelectInst *SI);
bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
bool optimizeSwitchInst(SwitchInst *SI);
bool optimizeExtractElementInst(Instruction *Inst);
- bool dupRetToEnableTailCallOpts(BasicBlock *BB);
+ bool dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT);
bool placeDbgValues(Function &F);
bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
@@ -374,8 +384,15 @@ class TypePromotionTransaction;
bool AllowPromotionWithoutCommonHeader,
bool HasPromoted, TypePromotionTransaction &TPT,
SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
- bool splitBranchCondition(Function &F);
+ bool splitBranchCondition(Function &F, bool &ModifiedDT);
bool simplifyOffsetableRelocate(Instruction &I);
+
+ bool tryToSinkFreeOperands(Instruction *I);
+ bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, CmpInst *Cmp,
+ Intrinsic::ID IID);
+ bool optimizeCmp(CmpInst *Cmp, bool &ModifiedDT);
+ bool combineToUSubWithOverflow(CmpInst *Cmp, bool &ModifiedDT);
+ bool combineToUAddWithOverflow(CmpInst *Cmp, bool &ModifiedDT);
};
} // end anonymous namespace
@@ -401,7 +418,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
InsertedInsts.clear();
PromotedInsts.clear();
- ModifiedDT = false;
if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) {
TM = &TPC->getTM<TargetMachine>();
SubtargetInfo = TM->getSubtargetImpl(F);
@@ -413,7 +429,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
BPI.reset(new BranchProbabilityInfo(F, *LI));
BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
- OptSize = F.optForSize();
+ OptSize = F.hasOptSize();
ProfileSummaryInfo *PSI =
&getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
@@ -444,8 +460,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
// unconditional branch.
EverMadeChange |= eliminateMostlyEmptyBlocks(F);
+ bool ModifiedDT = false;
if (!DisableBranchOpts)
- EverMadeChange |= splitBranchCondition(F);
+ EverMadeChange |= splitBranchCondition(F, ModifiedDT);
// Split some critical edges where one of the sources is an indirect branch,
// to help generate sane code for PHIs involving such edges.
@@ -454,6 +471,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
bool MadeChange = true;
while (MadeChange) {
MadeChange = false;
+ DT.reset();
for (Function::iterator I = F.begin(); I != F.end(); ) {
BasicBlock *BB = &*I++;
bool ModifiedDTOnIteration = false;
@@ -654,6 +672,16 @@ bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
BB->getSinglePredecessor()->getSingleSuccessor()))
return false;
+ // Skip merging if the block's successor is also a successor to any callbr
+ // that leads to this block.
+ // FIXME: Is this really needed? Is this a correctness issue?
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ if (auto *CBI = dyn_cast<CallBrInst>((*PI)->getTerminator()))
+ for (unsigned i = 0, e = CBI->getNumSuccessors(); i != e; ++i)
+ if (DestBB == CBI->getSuccessor(i))
+ return false;
+ }
+
// Try to skip merging if the unique predecessor of BB is terminated by a
// switch or indirect branch instruction, and BB is used as an incoming block
// of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to
@@ -1040,7 +1068,7 @@ bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) {
return MadeChange;
}
-/// SinkCast - Sink the specified cast instruction into its user blocks
+/// Sink the specified cast instruction into its user blocks.
static bool SinkCast(CastInst *CI) {
BasicBlock *DefBB = CI->getParent();
@@ -1114,8 +1142,8 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
// Sink only "cheap" (or nop) address-space casts. This is a weaker condition
// than sinking only nop casts, but is helpful on some platforms.
if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) {
- if (!TLI.isCheapAddrSpaceCast(ASC->getSrcAddressSpace(),
- ASC->getDestAddressSpace()))
+ if (!TLI.isFreeAddrSpaceCast(ASC->getSrcAddressSpace(),
+ ASC->getDestAddressSpace()))
return false;
}
@@ -1148,54 +1176,169 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
return SinkCast(CI);
}
-/// Try to combine CI into a call to the llvm.uadd.with.overflow intrinsic if
-/// possible.
-///
-/// Return true if any changes were made.
-static bool CombineUAddWithOverflow(CmpInst *CI) {
- Value *A, *B;
- Instruction *AddI;
- if (!match(CI,
- m_UAddWithOverflow(m_Value(A), m_Value(B), m_Instruction(AddI))))
+bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
+ CmpInst *Cmp,
+ Intrinsic::ID IID) {
+ if (BO->getParent() != Cmp->getParent()) {
+ // We used to use a dominator tree here to allow multi-block optimization.
+ // But that was problematic because:
+ // 1. It could cause a perf regression by hoisting the math op into the
+ // critical path.
+ // 2. It could cause a perf regression by creating a value that was live
+ // across multiple blocks and increasing register pressure.
+ // 3. Use of a dominator tree could cause large compile-time regression.
+ // This is because we recompute the DT on every change in the main CGP
+ // run-loop. The recomputing is probably unnecessary in many cases, so if
+ // that was fixed, using a DT here would be ok.
+ return false;
+ }
+
+ // We allow matching the canonical IR (add X, C) back to (usubo X, -C).
+ Value *Arg0 = BO->getOperand(0);
+ Value *Arg1 = BO->getOperand(1);
+ if (BO->getOpcode() == Instruction::Add &&
+ IID == Intrinsic::usub_with_overflow) {
+ assert(isa<Constant>(Arg1) && "Unexpected input for usubo");
+ Arg1 = ConstantExpr::getNeg(cast<Constant>(Arg1));
+ }
+
+ // Insert at the first instruction of the pair.
+ Instruction *InsertPt = nullptr;
+ for (Instruction &Iter : *Cmp->getParent()) {
+ if (&Iter == BO || &Iter == Cmp) {
+ InsertPt = &Iter;
+ break;
+ }
+ }
+ assert(InsertPt != nullptr && "Parent block did not contain cmp or binop");
+
+ IRBuilder<> Builder(InsertPt);
+ Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1);
+ Value *Math = Builder.CreateExtractValue(MathOV, 0, "math");
+ Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov");
+ BO->replaceAllUsesWith(Math);
+ Cmp->replaceAllUsesWith(OV);
+ BO->eraseFromParent();
+ Cmp->eraseFromParent();
+ return true;
+}
+
+/// Match special-case patterns that check for unsigned add overflow.
+static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp,
+ BinaryOperator *&Add) {
+ // Add = add A, 1; Cmp = icmp eq A,-1 (overflow if A is max val)
+ // Add = add A,-1; Cmp = icmp ne A, 0 (overflow if A is non-zero)
+ Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
+
+ // We are not expecting non-canonical/degenerate code. Just bail out.
+ if (isa<Constant>(A))
+ return false;
+
+ ICmpInst::Predicate Pred = Cmp->getPredicate();
+ if (Pred == ICmpInst::ICMP_EQ && match(B, m_AllOnes()))
+ B = ConstantInt::get(B->getType(), 1);
+ else if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt()))
+ B = ConstantInt::get(B->getType(), -1);
+ else
return false;
- Type *Ty = AddI->getType();
- if (!isa<IntegerType>(Ty))
+ // Check the users of the variable operand of the compare looking for an add
+ // with the adjusted constant.
+ for (User *U : A->users()) {
+ if (match(U, m_Add(m_Specific(A), m_Specific(B)))) {
+ Add = cast<BinaryOperator>(U);
+ return true;
+ }
+ }
+ return false;
+}
+
+/// Try to combine the compare into a call to the llvm.uadd.with.overflow
+/// intrinsic. Return true if any changes were made.
+bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
+ bool &ModifiedDT) {
+ Value *A, *B;
+ BinaryOperator *Add;
+ if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add))))
+ if (!matchUAddWithOverflowConstantEdgeCases(Cmp, Add))
+ return false;
+
+ if (!TLI->shouldFormOverflowOp(ISD::UADDO,
+ TLI->getValueType(*DL, Add->getType())))
return false;
- // We don't want to move around uses of condition values this late, so we we
+ // We don't want to move around uses of condition values this late, so we
// check if it is legal to create the call to the intrinsic in the basic
- // block containing the icmp:
+ // block containing the icmp.
+ if (Add->getParent() != Cmp->getParent() && !Add->hasOneUse())
+ return false;
- if (AddI->getParent() != CI->getParent() && !AddI->hasOneUse())
+ if (!replaceMathCmpWithIntrinsic(Add, Cmp, Intrinsic::uadd_with_overflow))
return false;
-#ifndef NDEBUG
- // Someday m_UAddWithOverflow may get smarter, but this is a safe assumption
- // for now:
- if (AddI->hasOneUse())
- assert(*AddI->user_begin() == CI && "expected!");
-#endif
+ // Reset callers - do not crash by iterating over a dead instruction.
+ ModifiedDT = true;
+ return true;
+}
+
+bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
+ bool &ModifiedDT) {
+ // We are not expecting non-canonical/degenerate code. Just bail out.
+ Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
+ if (isa<Constant>(A) && isa<Constant>(B))
+ return false;
+
+ // Convert (A u> B) to (A u< B) to simplify pattern matching.
+ ICmpInst::Predicate Pred = Cmp->getPredicate();
+ if (Pred == ICmpInst::ICMP_UGT) {
+ std::swap(A, B);
+ Pred = ICmpInst::ICMP_ULT;
+ }
+ // Convert special-case: (A == 0) is the same as (A u< 1).
+ if (Pred == ICmpInst::ICMP_EQ && match(B, m_ZeroInt())) {
+ B = ConstantInt::get(B->getType(), 1);
+ Pred = ICmpInst::ICMP_ULT;
+ }
+ // Convert special-case: (A != 0) is the same as (0 u< A).
+ if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt())) {
+ std::swap(A, B);
+ Pred = ICmpInst::ICMP_ULT;
+ }
+ if (Pred != ICmpInst::ICMP_ULT)
+ return false;
+
+ // Walk the users of a variable operand of a compare looking for a subtract or
+ // add with that same operand. Also match the 2nd operand of the compare to
+ // the add/sub, but that may be a negated constant operand of an add.
+ Value *CmpVariableOperand = isa<Constant>(A) ? B : A;
+ BinaryOperator *Sub = nullptr;
+ for (User *U : CmpVariableOperand->users()) {
+ // A - B, A u< B --> usubo(A, B)
+ if (match(U, m_Sub(m_Specific(A), m_Specific(B)))) {
+ Sub = cast<BinaryOperator>(U);
+ break;
+ }
+
+ // A + (-C), A u< C (canonicalized form of (sub A, C))
+ const APInt *CmpC, *AddC;
+ if (match(U, m_Add(m_Specific(A), m_APInt(AddC))) &&
+ match(B, m_APInt(CmpC)) && *AddC == -(*CmpC)) {
+ Sub = cast<BinaryOperator>(U);
+ break;
+ }
+ }
+ if (!Sub)
+ return false;
+
+ if (!TLI->shouldFormOverflowOp(ISD::USUBO,
+ TLI->getValueType(*DL, Sub->getType())))
+ return false;
+
+ if (!replaceMathCmpWithIntrinsic(Sub, Cmp, Intrinsic::usub_with_overflow))
+ return false;
- Module *M = CI->getModule();
- Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty);
-
- auto *InsertPt = AddI->hasOneUse() ? CI : AddI;
-
- DebugLoc Loc = CI->getDebugLoc();
- auto *UAddWithOverflow =
- CallInst::Create(F, {A, B}, "uadd.overflow", InsertPt);
- UAddWithOverflow->setDebugLoc(Loc);
- auto *UAdd = ExtractValueInst::Create(UAddWithOverflow, 0, "uadd", InsertPt);
- UAdd->setDebugLoc(Loc);
- auto *Overflow =
- ExtractValueInst::Create(UAddWithOverflow, 1, "overflow", InsertPt);
- Overflow->setDebugLoc(Loc);
-
- CI->replaceAllUsesWith(Overflow);
- AddI->replaceAllUsesWith(UAdd);
- CI->eraseFromParent();
- AddI->eraseFromParent();
+ // Reset callers - do not crash by iterating over a dead instruction.
+ ModifiedDT = true;
return true;
}
@@ -1205,18 +1348,19 @@ static bool CombineUAddWithOverflow(CmpInst *CI) {
/// lose; some adjustment may be wanted there.
///
/// Return true if any changes are made.
-static bool SinkCmpExpression(CmpInst *CI, const TargetLowering *TLI) {
- BasicBlock *DefBB = CI->getParent();
+static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
+ if (TLI.hasMultipleConditionRegisters())
+ return false;
// Avoid sinking soft-FP comparisons, since this can move them into a loop.
- if (TLI && TLI->useSoftFloat() && isa<FCmpInst>(CI))
+ if (TLI.useSoftFloat() && isa<FCmpInst>(Cmp))
return false;
// Only insert a cmp in each block once.
DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
bool MadeChange = false;
- for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
+ for (Value::user_iterator UI = Cmp->user_begin(), E = Cmp->user_end();
UI != E; ) {
Use &TheUse = UI.getUse();
Instruction *User = cast<Instruction>(*UI);
@@ -1230,6 +1374,7 @@ static bool SinkCmpExpression(CmpInst *CI, const TargetLowering *TLI) {
// Figure out which BB this cmp is used in.
BasicBlock *UserBB = User->getParent();
+ BasicBlock *DefBB = Cmp->getParent();
// If this user is in the same block as the cmp, don't change the cmp.
if (UserBB == DefBB) continue;
@@ -1241,10 +1386,11 @@ static bool SinkCmpExpression(CmpInst *CI, const TargetLowering *TLI) {
BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
assert(InsertPt != UserBB->end());
InsertedCmp =
- CmpInst::Create(CI->getOpcode(), CI->getPredicate(),
- CI->getOperand(0), CI->getOperand(1), "", &*InsertPt);
+ CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(),
+ Cmp->getOperand(0), Cmp->getOperand(1), "",
+ &*InsertPt);
// Propagate the debug info.
- InsertedCmp->setDebugLoc(CI->getDebugLoc());
+ InsertedCmp->setDebugLoc(Cmp->getDebugLoc());
}
// Replace a use of the cmp with a use of the new cmp.
@@ -1254,19 +1400,22 @@ static bool SinkCmpExpression(CmpInst *CI, const TargetLowering *TLI) {
}
// If we removed all uses, nuke the cmp.
- if (CI->use_empty()) {
- CI->eraseFromParent();
+ if (Cmp->use_empty()) {
+ Cmp->eraseFromParent();
MadeChange = true;
}
return MadeChange;
}
-static bool OptimizeCmpExpression(CmpInst *CI, const TargetLowering *TLI) {
- if (SinkCmpExpression(CI, TLI))
+bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, bool &ModifiedDT) {
+ if (sinkCmpExpression(Cmp, *TLI))
return true;
- if (CombineUAddWithOverflow(CI))
+ if (combineToUAddWithOverflow(Cmp, ModifiedDT))
+ return true;
+
+ if (combineToUSubWithOverflow(Cmp, ModifiedDT))
return true;
return false;
@@ -1301,7 +1450,7 @@ static bool sinkAndCmp0Expression(Instruction *AndI,
for (auto *U : AndI->users()) {
Instruction *User = cast<Instruction>(U);
- // Only sink for and mask feeding icmp with 0.
+ // Only sink 'and' feeding icmp with 0.
if (!isa<ICmpInst>(User))
return false;
@@ -1704,9 +1853,23 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
if (II) {
switch (II->getIntrinsicID()) {
default: break;
+ case Intrinsic::experimental_widenable_condition: {
+ // Give up on future widening oppurtunties so that we can fold away dead
+ // paths and merge blocks before going into block-local instruction
+ // selection.
+ if (II->use_empty()) {
+ II->eraseFromParent();
+ return true;
+ }
+ Constant *RetVal = ConstantInt::getTrue(II->getContext());
+ resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
+ replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
+ });
+ return true;
+ }
case Intrinsic::objectsize: {
// Lower all uses of llvm.objectsize.*
- ConstantInt *RetVal =
+ Value *RetVal =
lowerObjectSizeCall(II, *DL, TLInfo, /*MustSucceed=*/true);
resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
@@ -1735,6 +1898,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
InsertedInsts.insert(ExtVal);
return true;
}
+
case Intrinsic::launder_invariant_group:
case Intrinsic::strip_invariant_group: {
Value *ArgVal = II->getArgOperand(0);
@@ -1818,7 +1982,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
/// %tmp2 = tail call i32 @f2()
/// ret i32 %tmp2
/// @endcode
-bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) {
+bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT) {
if (!TLI)
return false;
@@ -1846,10 +2010,8 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) {
// return is the first instruction in the block.
if (PN) {
BasicBlock::iterator BI = BB->begin();
- do { ++BI; } while (isa<DbgInfoIntrinsic>(BI));
- if (&*BI == BCI)
- // Also skip over the bitcast.
- ++BI;
+ // Skip over debug and the bitcast.
+ do { ++BI; } while (isa<DbgInfoIntrinsic>(BI) || &*BI == BCI);
if (&*BI != RetI)
return false;
} else {
@@ -1865,7 +2027,9 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) {
SmallVector<CallInst*, 4> TailCalls;
if (PN) {
for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
- CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I));
+ // Look through bitcasts.
+ Value *IncomingVal = PN->getIncomingValue(I)->stripPointerCasts();
+ CallInst *CI = dyn_cast<CallInst>(IncomingVal);
// Make sure the phi value is indeed produced by the tail call.
if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) &&
TLI->mayBeEmittedAsTailCall(CI) &&
@@ -1929,6 +2093,7 @@ struct ExtAddrMode : public TargetLowering::AddrMode {
Value *BaseReg = nullptr;
Value *ScaledReg = nullptr;
Value *OriginalValue = nullptr;
+ bool InBounds = true;
enum FieldName {
NoField = 0x00,
@@ -1940,6 +2105,7 @@ struct ExtAddrMode : public TargetLowering::AddrMode {
MultipleFields = 0xff
};
+
ExtAddrMode() = default;
void print(raw_ostream &OS) const;
@@ -1958,6 +2124,10 @@ struct ExtAddrMode : public TargetLowering::AddrMode {
ScaledReg->getType() != other.ScaledReg->getType())
return MultipleFields;
+ // Conservatively reject 'inbounds' mismatches.
+ if (InBounds != other.InBounds)
+ return MultipleFields;
+
// Check each field to see if it differs.
unsigned Result = NoField;
if (BaseReg != other.BaseReg)
@@ -2056,6 +2226,8 @@ static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
void ExtAddrMode::print(raw_ostream &OS) const {
bool NeedPlus = false;
OS << "[";
+ if (InBounds)
+ OS << "inbounds ";
if (BaseGV) {
OS << (NeedPlus ? " + " : "")
<< "GV:";
@@ -3126,6 +3298,8 @@ private:
PhiNodeSet &PhiNodesToMatch) {
SmallVector<PHIPair, 8> WorkList;
Matcher.insert({ PHI, Candidate });
+ SmallSet<PHINode *, 8> MatchedPHIs;
+ MatchedPHIs.insert(PHI);
WorkList.push_back({ PHI, Candidate });
SmallSet<PHIPair, 8> Visited;
while (!WorkList.empty()) {
@@ -3158,8 +3332,10 @@ private:
if (Matcher.count({ FirstPhi, SecondPhi }))
continue;
// So the values are different and does not match. So we need them to
- // match.
- Matcher.insert({ FirstPhi, SecondPhi });
+ // match. (But we register no more than one match per PHI node, so that
+ // we won't later try to replace them twice.)
+ if (!MatchedPHIs.insert(FirstPhi).second)
+ Matcher.insert({ FirstPhi, SecondPhi });
// But me must check it.
WorkList.push_back({ FirstPhi, SecondPhi });
}
@@ -3354,6 +3530,7 @@ bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
ConstantInt *CI = nullptr; Value *AddLHS = nullptr;
if (isa<Instruction>(ScaleReg) && // not a constant expr.
match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
+ TestAddrMode.InBounds = false;
TestAddrMode.ScaledReg = AddLHS;
TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
@@ -3928,6 +4105,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
+ AddrMode.InBounds = false;
if (matchAddr(AddrInst->getOperand(1), Depth+1) &&
matchAddr(AddrInst->getOperand(0), Depth+1))
return true;
@@ -3954,6 +4132,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
case Instruction::Mul:
case Instruction::Shl: {
// Can only handle X*C and X << C.
+ AddrMode.InBounds = false;
ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
if (!RHS || RHS->getBitWidth() > 64)
return false;
@@ -4005,8 +4184,11 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
if (ConstantOffset == 0 ||
TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) {
// Check to see if we can fold the base pointer in too.
- if (matchAddr(AddrInst->getOperand(0), Depth+1))
+ if (matchAddr(AddrInst->getOperand(0), Depth+1)) {
+ if (!cast<GEPOperator>(AddrInst)->isInBounds())
+ AddrMode.InBounds = false;
return true;
+ }
} else if (EnableGEPOffsetSplit && isa<GetElementPtrInst>(AddrInst) &&
TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 &&
ConstantOffset > 0) {
@@ -4020,15 +4202,11 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
if (isa<Argument>(Base) || isa<GlobalValue>(Base) ||
(BaseI && !isa<CastInst>(BaseI) &&
!isa<GetElementPtrInst>(BaseI))) {
- // If the base is an instruction, make sure the GEP is not in the same
- // basic block as the base. If the base is an argument or global
- // value, make sure the GEP is not in the entry block. Otherwise,
- // instruction selection can undo the split. Also make sure the
- // parent block allows inserting non-PHI instructions before the
- // terminator.
+ // Make sure the parent block allows inserting non-PHI instructions
+ // before the terminator.
BasicBlock *Parent =
BaseI ? BaseI->getParent() : &GEP->getFunction()->getEntryBlock();
- if (GEP->getParent() != Parent && !Parent->getTerminator()->isEHPad())
+ if (!Parent->getTerminator()->isEHPad())
LargeOffsetGEP = std::make_pair(GEP, ConstantOffset);
}
}
@@ -4042,6 +4220,8 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
// See if the scale and offset amount is valid for this target.
AddrMode.BaseOffs += ConstantOffset;
+ if (!cast<GEPOperator>(AddrInst)->isInBounds())
+ AddrMode.InBounds = false;
// Match the base operand of the GEP.
if (!matchAddr(AddrInst->getOperand(0), Depth+1)) {
@@ -4268,7 +4448,7 @@ static bool FindAllMemoryUses(
if (!MightBeFoldableInst(I))
return true;
- const bool OptSize = I->getFunction()->optForSize();
+ const bool OptSize = I->getFunction()->hasOptSize();
// Loop over all the uses, recursively processing them.
for (Use &U : I->uses()) {
@@ -4556,8 +4736,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP);
GetElementPtrInst *GEP = LargeOffsetGEP.first;
- if (GEP && GEP->getParent() != MemoryInst->getParent() &&
- !NewGEPBases.count(GEP)) {
+ if (GEP && !NewGEPBases.count(GEP)) {
// If splitting the underlying data structure can reduce the offset of a
// GEP, collect the GEP. Skip the GEPs that are the new bases of
// previously split data structures.
@@ -4727,7 +4906,11 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// SDAG consecutive load/store merging.
if (ResultPtr->getType() != I8PtrTy)
ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
- ResultPtr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
+ ResultPtr =
+ AddrMode.InBounds
+ ? Builder.CreateInBoundsGEP(I8Ty, ResultPtr, ResultIndex,
+ "sunkaddr")
+ : Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
}
ResultIndex = V;
@@ -4738,7 +4921,11 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
} else {
if (ResultPtr->getType() != I8PtrTy)
ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
- SunkAddr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
+ SunkAddr =
+ AddrMode.InBounds
+ ? Builder.CreateInBoundsGEP(I8Ty, ResultPtr, ResultIndex,
+ "sunkaddr")
+ : Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
}
if (SunkAddr->getType() != Addr->getType())
@@ -5037,7 +5224,6 @@ bool CodeGenPrepare::tryToPromoteExts(
/// Merging redundant sexts when one is dominating the other.
bool CodeGenPrepare::mergeSExts(Function &F) {
- DominatorTree DT(F);
bool Changed = false;
for (auto &Entry : ValToSExtendedUses) {
SExts &Insts = Entry.second;
@@ -5048,7 +5234,7 @@ bool CodeGenPrepare::mergeSExts(Function &F) {
continue;
bool inserted = false;
for (auto &Pt : CurPts) {
- if (DT.dominates(Inst, Pt)) {
+ if (getDT(F).dominates(Inst, Pt)) {
Pt->replaceAllUsesWith(Inst);
RemovedInsts.insert(Pt);
Pt->removeFromParent();
@@ -5057,7 +5243,7 @@ bool CodeGenPrepare::mergeSExts(Function &F) {
Changed = true;
break;
}
- if (!DT.dominates(Pt, Inst))
+ if (!getDT(F).dominates(Pt, Inst))
// Give up if we need to merge in a common dominator as the
// experiments show it is not profitable.
continue;
@@ -5715,7 +5901,7 @@ static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI,
static Value *getTrueOrFalseValue(
SelectInst *SI, bool isTrue,
const SmallPtrSet<const Instruction *, 2> &Selects) {
- Value *V;
+ Value *V = nullptr;
for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI);
DefSI = dyn_cast<SelectInst>(V)) {
@@ -5723,9 +5909,44 @@ static Value *getTrueOrFalseValue(
"The condition of DefSI does not match with SI");
V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
}
+
+ assert(V && "Failed to get select true/false value");
return V;
}
+bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) {
+ assert(Shift->isShift() && "Expected a shift");
+
+ // If this is (1) a vector shift, (2) shifts by scalars are cheaper than
+ // general vector shifts, and (3) the shift amount is a select-of-splatted
+ // values, hoist the shifts before the select:
+ // shift Op0, (select Cond, TVal, FVal) -->
+ // select Cond, (shift Op0, TVal), (shift Op0, FVal)
+ //
+ // This is inverting a generic IR transform when we know that the cost of a
+ // general vector shift is more than the cost of 2 shift-by-scalars.
+ // We can't do this effectively in SDAG because we may not be able to
+ // determine if the select operands are splats from within a basic block.
+ Type *Ty = Shift->getType();
+ if (!Ty->isVectorTy() || !TLI->isVectorShiftByScalarCheap(Ty))
+ return false;
+ Value *Cond, *TVal, *FVal;
+ if (!match(Shift->getOperand(1),
+ m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
+ return false;
+ if (!isSplatValue(TVal) || !isSplatValue(FVal))
+ return false;
+
+ IRBuilder<> Builder(Shift);
+ BinaryOperator::BinaryOps Opcode = Shift->getOpcode();
+ Value *NewTVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), TVal);
+ Value *NewFVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), FVal);
+ Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
+ Shift->replaceAllUsesWith(NewSel);
+ Shift->eraseFromParent();
+ return true;
+}
+
/// If we have a SelectInst that will likely profit from branch prediction,
/// turn it into a branch.
bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
@@ -5769,7 +5990,11 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
!isFormingBranchFromSelectProfitable(TTI, TLI, SI))
return false;
- ModifiedDT = true;
+ // The DominatorTree needs to be rebuilt by any consumers after this
+ // transformation. We simply reset here rather than setting the ModifiedDT
+ // flag to avoid restarting the function walk in runOnFunction for each
+ // select optimized.
+ DT.reset();
// Transform a sequence like this:
// start:
@@ -5943,6 +6168,7 @@ bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
InsertedShuffle =
new ShuffleVectorInst(SVI->getOperand(0), SVI->getOperand(1),
SVI->getOperand(2), "", &*InsertPt);
+ InsertedShuffle->setDebugLoc(SVI->getDebugLoc());
}
UI->replaceUsesOfWith(SVI, InsertedShuffle);
@@ -5958,6 +6184,48 @@ bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
return MadeChange;
}
+bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
+ // If the operands of I can be folded into a target instruction together with
+ // I, duplicate and sink them.
+ SmallVector<Use *, 4> OpsToSink;
+ if (!TLI || !TLI->shouldSinkOperands(I, OpsToSink))
+ return false;
+
+ // OpsToSink can contain multiple uses in a use chain (e.g.
+ // (%u1 with %u1 = shufflevector), (%u2 with %u2 = zext %u1)). The dominating
+ // uses must come first, which means they are sunk first, temporarily creating
+ // invalid IR. This will be fixed once their dominated users are sunk and
+ // updated.
+ BasicBlock *TargetBB = I->getParent();
+ bool Changed = false;
+ SmallVector<Use *, 4> ToReplace;
+ for (Use *U : OpsToSink) {
+ auto *UI = cast<Instruction>(U->get());
+ if (UI->getParent() == TargetBB || isa<PHINode>(UI))
+ continue;
+ ToReplace.push_back(U);
+ }
+
+ SmallPtrSet<Instruction *, 4> MaybeDead;
+ for (Use *U : ToReplace) {
+ auto *UI = cast<Instruction>(U->get());
+ Instruction *NI = UI->clone();
+ MaybeDead.insert(UI);
+ LLVM_DEBUG(dbgs() << "Sinking " << *UI << " to user " << *I << "\n");
+ NI->insertBefore(I);
+ InsertedInsts.insert(NI);
+ U->set(NI);
+ Changed = true;
+ }
+
+ // Remove instructions that are dead after sinking.
+ for (auto *I : MaybeDead)
+ if (!I->hasNUsesOrMore(1))
+ I->eraseFromParent();
+
+ return Changed;
+}
+
bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
if (!TLI || !DL)
return false;
@@ -6412,14 +6680,17 @@ static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL,
const TargetLowering &TLI) {
// Handle simple but common cases only.
Type *StoreType = SI.getValueOperand()->getType();
- if (DL.getTypeStoreSizeInBits(StoreType) != DL.getTypeSizeInBits(StoreType) ||
+ if (!DL.typeSizeEqualsStoreSize(StoreType) ||
DL.getTypeSizeInBits(StoreType) == 0)
return false;
unsigned HalfValBitSize = DL.getTypeSizeInBits(StoreType) / 2;
Type *SplitStoreType = Type::getIntNTy(SI.getContext(), HalfValBitSize);
- if (DL.getTypeStoreSizeInBits(SplitStoreType) !=
- DL.getTypeSizeInBits(SplitStoreType))
+ if (!DL.typeSizeEqualsStoreSize(SplitStoreType))
+ return false;
+
+ // Don't split the store if it is volatile.
+ if (SI.isVolatile())
return false;
// Match the following patterns:
@@ -6658,11 +6929,13 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
if (InsertedInsts.count(I))
return false;
+ // TODO: Move into the switch on opcode below here.
if (PHINode *P = dyn_cast<PHINode>(I)) {
// It is possible for very late stage optimizations (such as SimplifyCFG)
// to introduce PHI nodes too late to be cleaned up. If we detect such a
// trivial PHI, go ahead and zap it here.
if (Value *V = SimplifyInstruction(P, {*DL, TLInfo})) {
+ LargeOffsetGEPMap.erase(P);
P->replaceAllUsesWith(V);
P->eraseFromParent();
++NumPHIsElim;
@@ -6700,9 +6973,9 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
return false;
}
- if (CmpInst *CI = dyn_cast<CmpInst>(I))
- if (!TLI || !TLI->hasMultipleConditionRegisters())
- return OptimizeCmpExpression(CI, TLI);
+ if (auto *Cmp = dyn_cast<CmpInst>(I))
+ if (TLI && optimizeCmp(Cmp, ModifiedDT))
+ return true;
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
LI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
@@ -6745,13 +7018,13 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
EnableAndCmpSinking && TLI)
return sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts);
+ // TODO: Move this into the switch on opcode - it handles shifts already.
if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
BinOp->getOpcode() == Instruction::LShr)) {
ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
if (TLI && CI && TLI->hasExtractBitsInsn())
- return OptimizeExtractBits(BinOp, CI, *TLI, *DL);
-
- return false;
+ if (OptimizeExtractBits(BinOp, CI, *TLI, *DL))
+ return true;
}
if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
@@ -6772,20 +7045,25 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
return false;
}
- if (CallInst *CI = dyn_cast<CallInst>(I))
- return optimizeCallInst(CI, ModifiedDT);
-
- if (SelectInst *SI = dyn_cast<SelectInst>(I))
- return optimizeSelectInst(SI);
-
- if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I))
- return optimizeShuffleVectorInst(SVI);
-
- if (auto *Switch = dyn_cast<SwitchInst>(I))
- return optimizeSwitchInst(Switch);
+ if (tryToSinkFreeOperands(I))
+ return true;
- if (isa<ExtractElementInst>(I))
- return optimizeExtractElementInst(I);
+ switch (I->getOpcode()) {
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ return optimizeShiftInst(cast<BinaryOperator>(I));
+ case Instruction::Call:
+ return optimizeCallInst(cast<CallInst>(I), ModifiedDT);
+ case Instruction::Select:
+ return optimizeSelectInst(cast<SelectInst>(I));
+ case Instruction::ShuffleVector:
+ return optimizeShuffleVectorInst(cast<ShuffleVectorInst>(I));
+ case Instruction::Switch:
+ return optimizeSwitchInst(cast<SwitchInst>(I));
+ case Instruction::ExtractElement:
+ return optimizeExtractElementInst(cast<ExtractElementInst>(I));
+ }
return false;
}
@@ -6833,7 +7111,7 @@ bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) {
}
}
}
- MadeChange |= dupRetToEnableTailCallOpts(&BB);
+ MadeChange |= dupRetToEnableTailCallOpts(&BB, ModifiedDT);
return MadeChange;
}
@@ -6909,7 +7187,7 @@ static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
///
/// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
///
-bool CodeGenPrepare::splitBranchCondition(Function &F) {
+bool CodeGenPrepare::splitBranchCondition(Function &F, bool &ModifiedDT) {
if (!TM || !TM->Options.EnableFastISel || !TLI || TLI->isJumpExpensive())
return false;
@@ -6983,11 +7261,7 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
std::swap(TBB, FBB);
// Replace the old BB with the new BB.
- for (PHINode &PN : TBB->phis()) {
- int i;
- while ((i = PN.getBasicBlockIndex(&BB)) >= 0)
- PN.setIncomingBlock(i, TmpBB);
- }
+ TBB->replacePhiUsesWith(&BB, TmpBB);
// Add another incoming edge form the new BB.
for (PHINode &PN : FBB->phis()) {
@@ -7066,10 +7340,7 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
}
}
- // Note: No point in getting fancy here, since the DT info is never
- // available to CodeGenPrepare.
ModifiedDT = true;
-
MadeChange = true;
LLVM_DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump();