aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Transforms/Scalar
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Transforms/Scalar')
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ADCE.cpp14
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp2002
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp602
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp19
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp14
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/DCE.cpp18
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp124
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp55
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp9
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/GVN.cpp313
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/GlobalMerge.cpp310
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp290
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp93
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LICM.cpp365
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoadCombine.cpp268
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp20
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp133
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp40
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp95
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp108
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp854
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp404
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp106
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp14
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp163
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp632
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp7
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp130
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp20
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SCCP.cpp105
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SROA.cpp892
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SampleProfile.cpp1098
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Scalar.cpp17
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp395
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Scalarizer.cpp663
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp776
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp31
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Sink.cpp54
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp89
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp395
40 files changed, 6941 insertions, 4796 deletions
diff --git a/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp b/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
index a3eb07a9f6d6..1a3a4aadce6a 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
@@ -14,20 +14,21 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "adce"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/InstIterator.h"
using namespace llvm;
+#define DEBUG_TYPE "adce"
+
STATISTIC(NumRemoved, "Number of instructions removed");
namespace {
@@ -37,9 +38,9 @@ namespace {
initializeADCEPass(*PassRegistry::getPassRegistry());
}
- virtual bool runOnFunction(Function& F);
+ bool runOnFunction(Function& F) override;
- virtual void getAnalysisUsage(AnalysisUsage& AU) const {
+ void getAnalysisUsage(AnalysisUsage& AU) const override {
AU.setPreservesCFG();
}
@@ -50,6 +51,9 @@ char ADCE::ID = 0;
INITIALIZE_PASS(ADCE, "adce", "Aggressive Dead Code Elimination", false, false)
bool ADCE::runOnFunction(Function& F) {
+ if (skipOptnoneFunction(F))
+ return false;
+
SmallPtrSet<Instruction*, 128> alive;
SmallVector<Instruction*, 128> worklist;
diff --git a/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp b/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
deleted file mode 100644
index 007e9b79e20a..000000000000
--- a/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ /dev/null
@@ -1,2002 +0,0 @@
-//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass munges the code in the input function to better prepare it for
-// SelectionDAG-based code generation. This works around limitations in it's
-// basic-block-at-a-time approach. It should eventually be removed.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "codegenprepare"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/ValueMap.h"
-#include "llvm/Analysis/DominatorInternals.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Assembly/Writer.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InlineAsm.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CallSite.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/Support/PatternMatch.h"
-#include "llvm/Support/ValueHandle.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/BuildLibCalls.h"
-#include "llvm/Transforms/Utils/BypassSlowDivision.h"
-#include "llvm/Transforms/Utils/Local.h"
-using namespace llvm;
-using namespace llvm::PatternMatch;
-
-STATISTIC(NumBlocksElim, "Number of blocks eliminated");
-STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
-STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
-STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
- "sunken Cmps");
-STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
- "of sunken Casts");
-STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
- "computations were sunk");
-STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
-STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
-STATISTIC(NumRetsDup, "Number of return instructions duplicated");
-STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
-STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
-
-static cl::opt<bool> DisableBranchOpts(
- "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
- cl::desc("Disable branch optimizations in CodeGenPrepare"));
-
-static cl::opt<bool> DisableSelectToBranch(
- "disable-cgp-select2branch", cl::Hidden, cl::init(false),
- cl::desc("Disable select to branch conversion."));
-
-namespace {
- class CodeGenPrepare : public FunctionPass {
- /// TLI - Keep a pointer of a TargetLowering to consult for determining
- /// transformation profitability.
- const TargetMachine *TM;
- const TargetLowering *TLI;
- const TargetLibraryInfo *TLInfo;
- DominatorTree *DT;
-
- /// CurInstIterator - As we scan instructions optimizing them, this is the
- /// next instruction to optimize. Xforms that can invalidate this should
- /// update it.
- BasicBlock::iterator CurInstIterator;
-
- /// Keeps track of non-local addresses that have been sunk into a block.
- /// This allows us to avoid inserting duplicate code for blocks with
- /// multiple load/stores of the same address.
- ValueMap<Value*, Value*> SunkAddrs;
-
- /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to
- /// be updated.
- bool ModifiedDT;
-
- /// OptSize - True if optimizing for size.
- bool OptSize;
-
- public:
- static char ID; // Pass identification, replacement for typeid
- explicit CodeGenPrepare(const TargetMachine *TM = 0)
- : FunctionPass(ID), TM(TM), TLI(0) {
- initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
- }
- bool runOnFunction(Function &F);
-
- const char *getPassName() const { return "CodeGen Prepare"; }
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addPreserved<DominatorTree>();
- AU.addRequired<TargetLibraryInfo>();
- }
-
- private:
- bool EliminateFallThrough(Function &F);
- bool EliminateMostlyEmptyBlocks(Function &F);
- bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
- void EliminateMostlyEmptyBlock(BasicBlock *BB);
- bool OptimizeBlock(BasicBlock &BB);
- bool OptimizeInst(Instruction *I);
- bool OptimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy);
- bool OptimizeInlineAsmInst(CallInst *CS);
- bool OptimizeCallInst(CallInst *CI);
- bool MoveExtToFormExtLoad(Instruction *I);
- bool OptimizeExtUses(Instruction *I);
- bool OptimizeSelectInst(SelectInst *SI);
- bool DupRetToEnableTailCallOpts(BasicBlock *BB);
- bool PlaceDbgValues(Function &F);
- };
-}
-
-char CodeGenPrepare::ID = 0;
-INITIALIZE_PASS_BEGIN(CodeGenPrepare, "codegenprepare",
- "Optimize for code generation", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
-INITIALIZE_PASS_END(CodeGenPrepare, "codegenprepare",
- "Optimize for code generation", false, false)
-
-FunctionPass *llvm::createCodeGenPreparePass(const TargetMachine *TM) {
- return new CodeGenPrepare(TM);
-}
-
-bool CodeGenPrepare::runOnFunction(Function &F) {
- bool EverMadeChange = false;
-
- ModifiedDT = false;
- if (TM) TLI = TM->getTargetLowering();
- TLInfo = &getAnalysis<TargetLibraryInfo>();
- DT = getAnalysisIfAvailable<DominatorTree>();
- OptSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::OptimizeForSize);
-
- /// This optimization identifies DIV instructions that can be
- /// profitably bypassed and carried out with a shorter, faster divide.
- if (!OptSize && TLI && TLI->isSlowDivBypassed()) {
- const DenseMap<unsigned int, unsigned int> &BypassWidths =
- TLI->getBypassSlowDivWidths();
- for (Function::iterator I = F.begin(); I != F.end(); I++)
- EverMadeChange |= bypassSlowDivision(F, I, BypassWidths);
- }
-
- // Eliminate blocks that contain only PHI nodes and an
- // unconditional branch.
- EverMadeChange |= EliminateMostlyEmptyBlocks(F);
-
- // llvm.dbg.value is far away from the value then iSel may not be able
- // handle it properly. iSel will drop llvm.dbg.value if it can not
- // find a node corresponding to the value.
- EverMadeChange |= PlaceDbgValues(F);
-
- bool MadeChange = true;
- while (MadeChange) {
- MadeChange = false;
- for (Function::iterator I = F.begin(); I != F.end(); ) {
- BasicBlock *BB = I++;
- MadeChange |= OptimizeBlock(*BB);
- }
- EverMadeChange |= MadeChange;
- }
-
- SunkAddrs.clear();
-
- if (!DisableBranchOpts) {
- MadeChange = false;
- SmallPtrSet<BasicBlock*, 8> WorkList;
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
- MadeChange |= ConstantFoldTerminator(BB, true);
- if (!MadeChange) continue;
-
- for (SmallVectorImpl<BasicBlock*>::iterator
- II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
- if (pred_begin(*II) == pred_end(*II))
- WorkList.insert(*II);
- }
-
- // Delete the dead blocks and any of their dead successors.
- MadeChange |= !WorkList.empty();
- while (!WorkList.empty()) {
- BasicBlock *BB = *WorkList.begin();
- WorkList.erase(BB);
- SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
-
- DeleteDeadBlock(BB);
-
- for (SmallVectorImpl<BasicBlock*>::iterator
- II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
- if (pred_begin(*II) == pred_end(*II))
- WorkList.insert(*II);
- }
-
- // Merge pairs of basic blocks with unconditional branches, connected by
- // a single edge.
- if (EverMadeChange || MadeChange)
- MadeChange |= EliminateFallThrough(F);
-
- if (MadeChange)
- ModifiedDT = true;
- EverMadeChange |= MadeChange;
- }
-
- if (ModifiedDT && DT)
- DT->DT->recalculate(F);
-
- return EverMadeChange;
-}
-
-/// EliminateFallThrough - Merge basic blocks which are connected
-/// by a single edge, where one of the basic blocks has a single successor
-/// pointing to the other basic block, which has a single predecessor.
-bool CodeGenPrepare::EliminateFallThrough(Function &F) {
- bool Changed = false;
- // Scan all of the blocks in the function, except for the entry block.
- for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) {
- BasicBlock *BB = I++;
- // If the destination block has a single pred, then this is a trivial
- // edge, just collapse it.
- BasicBlock *SinglePred = BB->getSinglePredecessor();
-
- // Don't merge if BB's address is taken.
- if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue;
-
- BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
- if (Term && !Term->isConditional()) {
- Changed = true;
- DEBUG(dbgs() << "To merge:\n"<< *SinglePred << "\n\n\n");
- // Remember if SinglePred was the entry block of the function.
- // If so, we will need to move BB back to the entry position.
- bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
- MergeBasicBlockIntoOnlyPred(BB, this);
-
- if (isEntry && BB != &BB->getParent()->getEntryBlock())
- BB->moveBefore(&BB->getParent()->getEntryBlock());
-
- // We have erased a block. Update the iterator.
- I = BB;
- }
- }
- return Changed;
-}
-
-/// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes,
-/// debug info directives, and an unconditional branch. Passes before isel
-/// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for
-/// isel. Start by eliminating these blocks so we can split them the way we
-/// want them.
-bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) {
- bool MadeChange = false;
- // Note that this intentionally skips the entry block.
- for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) {
- BasicBlock *BB = I++;
-
- // If this block doesn't end with an uncond branch, ignore it.
- BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
- if (!BI || !BI->isUnconditional())
- continue;
-
- // If the instruction before the branch (skipping debug info) isn't a phi
- // node, then other stuff is happening here.
- BasicBlock::iterator BBI = BI;
- if (BBI != BB->begin()) {
- --BBI;
- while (isa<DbgInfoIntrinsic>(BBI)) {
- if (BBI == BB->begin())
- break;
- --BBI;
- }
- if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
- continue;
- }
-
- // Do not break infinite loops.
- BasicBlock *DestBB = BI->getSuccessor(0);
- if (DestBB == BB)
- continue;
-
- if (!CanMergeBlocks(BB, DestBB))
- continue;
-
- EliminateMostlyEmptyBlock(BB);
- MadeChange = true;
- }
- return MadeChange;
-}
-
-/// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a
-/// single uncond branch between them, and BB contains no other non-phi
-/// instructions.
-bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB,
- const BasicBlock *DestBB) const {
- // We only want to eliminate blocks whose phi nodes are used by phi nodes in
- // the successor. If there are more complex condition (e.g. preheaders),
- // don't mess around with them.
- BasicBlock::const_iterator BBI = BB->begin();
- while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
- for (Value::const_use_iterator UI = PN->use_begin(), E = PN->use_end();
- UI != E; ++UI) {
- const Instruction *User = cast<Instruction>(*UI);
- if (User->getParent() != DestBB || !isa<PHINode>(User))
- return false;
- // If User is inside DestBB block and it is a PHINode then check
- // incoming value. If incoming value is not from BB then this is
- // a complex condition (e.g. preheaders) we want to avoid here.
- if (User->getParent() == DestBB) {
- if (const PHINode *UPN = dyn_cast<PHINode>(User))
- for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
- Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
- if (Insn && Insn->getParent() == BB &&
- Insn->getParent() != UPN->getIncomingBlock(I))
- return false;
- }
- }
- }
- }
-
- // If BB and DestBB contain any common predecessors, then the phi nodes in BB
- // and DestBB may have conflicting incoming values for the block. If so, we
- // can't merge the block.
- const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
- if (!DestBBPN) return true; // no conflict.
-
- // Collect the preds of BB.
- SmallPtrSet<const BasicBlock*, 16> BBPreds;
- if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
- // It is faster to get preds from a PHI than with pred_iterator.
- for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
- BBPreds.insert(BBPN->getIncomingBlock(i));
- } else {
- BBPreds.insert(pred_begin(BB), pred_end(BB));
- }
-
- // Walk the preds of DestBB.
- for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
- BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
- if (BBPreds.count(Pred)) { // Common predecessor?
- BBI = DestBB->begin();
- while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
- const Value *V1 = PN->getIncomingValueForBlock(Pred);
- const Value *V2 = PN->getIncomingValueForBlock(BB);
-
- // If V2 is a phi node in BB, look up what the mapped value will be.
- if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
- if (V2PN->getParent() == BB)
- V2 = V2PN->getIncomingValueForBlock(Pred);
-
- // If there is a conflict, bail out.
- if (V1 != V2) return false;
- }
- }
- }
-
- return true;
-}
-
-
-/// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and
-/// an unconditional branch in it.
-void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
- BranchInst *BI = cast<BranchInst>(BB->getTerminator());
- BasicBlock *DestBB = BI->getSuccessor(0);
-
- DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB);
-
- // If the destination block has a single pred, then this is a trivial edge,
- // just collapse it.
- if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
- if (SinglePred != DestBB) {
- // Remember if SinglePred was the entry block of the function. If so, we
- // will need to move BB back to the entry position.
- bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
- MergeBasicBlockIntoOnlyPred(DestBB, this);
-
- if (isEntry && BB != &BB->getParent()->getEntryBlock())
- BB->moveBefore(&BB->getParent()->getEntryBlock());
-
- DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
- return;
- }
- }
-
- // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
- // to handle the new incoming edges it is about to have.
- PHINode *PN;
- for (BasicBlock::iterator BBI = DestBB->begin();
- (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
- // Remove the incoming value for BB, and remember it.
- Value *InVal = PN->removeIncomingValue(BB, false);
-
- // Two options: either the InVal is a phi node defined in BB or it is some
- // value that dominates BB.
- PHINode *InValPhi = dyn_cast<PHINode>(InVal);
- if (InValPhi && InValPhi->getParent() == BB) {
- // Add all of the input values of the input PHI as inputs of this phi.
- for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
- PN->addIncoming(InValPhi->getIncomingValue(i),
- InValPhi->getIncomingBlock(i));
- } else {
- // Otherwise, add one instance of the dominating value for each edge that
- // we will be adding.
- if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
- for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
- PN->addIncoming(InVal, BBPN->getIncomingBlock(i));
- } else {
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
- PN->addIncoming(InVal, *PI);
- }
- }
- }
-
- // The PHIs are now updated, change everything that refers to BB to use
- // DestBB and remove BB.
- BB->replaceAllUsesWith(DestBB);
- if (DT && !ModifiedDT) {
- BasicBlock *BBIDom = DT->getNode(BB)->getIDom()->getBlock();
- BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock();
- BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom);
- DT->changeImmediateDominator(DestBB, NewIDom);
- DT->eraseNode(BB);
- }
- BB->eraseFromParent();
- ++NumBlocksElim;
-
- DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
-}
-
-/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop
-/// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC),
-/// sink it into user blocks to reduce the number of virtual
-/// registers that must be created and coalesced.
-///
-/// Return true if any changes are made.
-///
-static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
- // If this is a noop copy,
- EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType());
- EVT DstVT = TLI.getValueType(CI->getType());
-
- // This is an fp<->int conversion?
- if (SrcVT.isInteger() != DstVT.isInteger())
- return false;
-
- // If this is an extension, it will be a zero or sign extension, which
- // isn't a noop.
- if (SrcVT.bitsLT(DstVT)) return false;
-
- // If these values will be promoted, find out what they will be promoted
- // to. This helps us consider truncates on PPC as noop copies when they
- // are.
- if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
- TargetLowering::TypePromoteInteger)
- SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
- if (TLI.getTypeAction(CI->getContext(), DstVT) ==
- TargetLowering::TypePromoteInteger)
- DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
-
- // If, after promotion, these are the same types, this is a noop copy.
- if (SrcVT != DstVT)
- return false;
-
- BasicBlock *DefBB = CI->getParent();
-
- /// InsertedCasts - Only insert a cast in each block once.
- DenseMap<BasicBlock*, CastInst*> InsertedCasts;
-
- bool MadeChange = false;
- for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end();
- UI != E; ) {
- Use &TheUse = UI.getUse();
- Instruction *User = cast<Instruction>(*UI);
-
- // Figure out which BB this cast is used in. For PHI's this is the
- // appropriate predecessor block.
- BasicBlock *UserBB = User->getParent();
- if (PHINode *PN = dyn_cast<PHINode>(User)) {
- UserBB = PN->getIncomingBlock(UI);
- }
-
- // Preincrement use iterator so we don't invalidate it.
- ++UI;
-
- // If this user is in the same block as the cast, don't change the cast.
- if (UserBB == DefBB) continue;
-
- // If we have already inserted a cast into this block, use it.
- CastInst *&InsertedCast = InsertedCasts[UserBB];
-
- if (!InsertedCast) {
- BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
- InsertedCast =
- CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "",
- InsertPt);
- MadeChange = true;
- }
-
- // Replace a use of the cast with a use of the new cast.
- TheUse = InsertedCast;
- ++NumCastUses;
- }
-
- // If we removed all uses, nuke the cast.
- if (CI->use_empty()) {
- CI->eraseFromParent();
- MadeChange = true;
- }
-
- return MadeChange;
-}
-
-/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce
-/// the number of virtual registers that must be created and coalesced. This is
-/// a clear win except on targets with multiple condition code registers
-/// (PowerPC), where it might lose; some adjustment may be wanted there.
-///
-/// Return true if any changes are made.
-static bool OptimizeCmpExpression(CmpInst *CI) {
- BasicBlock *DefBB = CI->getParent();
-
- /// InsertedCmp - Only insert a cmp in each block once.
- DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
-
- bool MadeChange = false;
- for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end();
- UI != E; ) {
- Use &TheUse = UI.getUse();
- Instruction *User = cast<Instruction>(*UI);
-
- // Preincrement use iterator so we don't invalidate it.
- ++UI;
-
- // Don't bother for PHI nodes.
- if (isa<PHINode>(User))
- continue;
-
- // Figure out which BB this cmp is used in.
- BasicBlock *UserBB = User->getParent();
-
- // If this user is in the same block as the cmp, don't change the cmp.
- if (UserBB == DefBB) continue;
-
- // If we have already inserted a cmp into this block, use it.
- CmpInst *&InsertedCmp = InsertedCmps[UserBB];
-
- if (!InsertedCmp) {
- BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
- InsertedCmp =
- CmpInst::Create(CI->getOpcode(),
- CI->getPredicate(), CI->getOperand(0),
- CI->getOperand(1), "", InsertPt);
- MadeChange = true;
- }
-
- // Replace a use of the cmp with a use of the new cmp.
- TheUse = InsertedCmp;
- ++NumCmpUses;
- }
-
- // If we removed all uses, nuke the cmp.
- if (CI->use_empty())
- CI->eraseFromParent();
-
- return MadeChange;
-}
-
-namespace {
-class CodeGenPrepareFortifiedLibCalls : public SimplifyFortifiedLibCalls {
-protected:
- void replaceCall(Value *With) {
- CI->replaceAllUsesWith(With);
- CI->eraseFromParent();
- }
- bool isFoldable(unsigned SizeCIOp, unsigned, bool) const {
- if (ConstantInt *SizeCI =
- dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp)))
- return SizeCI->isAllOnesValue();
- return false;
- }
-};
-} // end anonymous namespace
-
-bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
- BasicBlock *BB = CI->getParent();
-
- // Lower inline assembly if we can.
- // If we found an inline asm expession, and if the target knows how to
- // lower it to normal LLVM code, do so now.
- if (TLI && isa<InlineAsm>(CI->getCalledValue())) {
- if (TLI->ExpandInlineAsm(CI)) {
- // Avoid invalidating the iterator.
- CurInstIterator = BB->begin();
- // Avoid processing instructions out of order, which could cause
- // reuse before a value is defined.
- SunkAddrs.clear();
- return true;
- }
- // Sink address computing for memory operands into the block.
- if (OptimizeInlineAsmInst(CI))
- return true;
- }
-
- // Lower all uses of llvm.objectsize.*
- IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
- if (II && II->getIntrinsicID() == Intrinsic::objectsize) {
- bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
- Type *ReturnTy = CI->getType();
- Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
-
- // Substituting this can cause recursive simplifications, which can
- // invalidate our iterator. Use a WeakVH to hold onto it in case this
- // happens.
- WeakVH IterHandle(CurInstIterator);
-
- replaceAndRecursivelySimplify(CI, RetVal, TLI ? TLI->getDataLayout() : 0,
- TLInfo, ModifiedDT ? 0 : DT);
-
- // If the iterator instruction was recursively deleted, start over at the
- // start of the block.
- if (IterHandle != CurInstIterator) {
- CurInstIterator = BB->begin();
- SunkAddrs.clear();
- }
- return true;
- }
-
- if (II && TLI) {
- SmallVector<Value*, 2> PtrOps;
- Type *AccessTy;
- if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy))
- while (!PtrOps.empty())
- if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy))
- return true;
- }
-
- // From here on out we're working with named functions.
- if (CI->getCalledFunction() == 0) return false;
-
- // We'll need DataLayout from here on out.
- const DataLayout *TD = TLI ? TLI->getDataLayout() : 0;
- if (!TD) return false;
-
- // Lower all default uses of _chk calls. This is very similar
- // to what InstCombineCalls does, but here we are only lowering calls
- // that have the default "don't know" as the objectsize. Anything else
- // should be left alone.
- CodeGenPrepareFortifiedLibCalls Simplifier;
- return Simplifier.fold(CI, TD, TLInfo);
-}
-
-/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return
-/// instructions to the predecessor to enable tail call optimizations. The
-/// case it is currently looking for is:
-/// @code
-/// bb0:
-/// %tmp0 = tail call i32 @f0()
-/// br label %return
-/// bb1:
-/// %tmp1 = tail call i32 @f1()
-/// br label %return
-/// bb2:
-/// %tmp2 = tail call i32 @f2()
-/// br label %return
-/// return:
-/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
-/// ret i32 %retval
-/// @endcode
-///
-/// =>
-///
-/// @code
-/// bb0:
-/// %tmp0 = tail call i32 @f0()
-/// ret i32 %tmp0
-/// bb1:
-/// %tmp1 = tail call i32 @f1()
-/// ret i32 %tmp1
-/// bb2:
-/// %tmp2 = tail call i32 @f2()
-/// ret i32 %tmp2
-/// @endcode
-bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) {
- if (!TLI)
- return false;
-
- ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator());
- if (!RI)
- return false;
-
- PHINode *PN = 0;
- BitCastInst *BCI = 0;
- Value *V = RI->getReturnValue();
- if (V) {
- BCI = dyn_cast<BitCastInst>(V);
- if (BCI)
- V = BCI->getOperand(0);
-
- PN = dyn_cast<PHINode>(V);
- if (!PN)
- return false;
- }
-
- if (PN && PN->getParent() != BB)
- return false;
-
- // It's not safe to eliminate the sign / zero extension of the return value.
- // See llvm::isInTailCallPosition().
- const Function *F = BB->getParent();
- AttributeSet CallerAttrs = F->getAttributes();
- if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) ||
- CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
- return false;
-
- // Make sure there are no instructions between the PHI and return, or that the
- // return is the first instruction in the block.
- if (PN) {
- BasicBlock::iterator BI = BB->begin();
- do { ++BI; } while (isa<DbgInfoIntrinsic>(BI));
- if (&*BI == BCI)
- // Also skip over the bitcast.
- ++BI;
- if (&*BI != RI)
- return false;
- } else {
- BasicBlock::iterator BI = BB->begin();
- while (isa<DbgInfoIntrinsic>(BI)) ++BI;
- if (&*BI != RI)
- return false;
- }
-
- /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
- /// call.
- SmallVector<CallInst*, 4> TailCalls;
- if (PN) {
- for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
- CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I));
- // Make sure the phi value is indeed produced by the tail call.
- if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) &&
- TLI->mayBeEmittedAsTailCall(CI))
- TailCalls.push_back(CI);
- }
- } else {
- SmallPtrSet<BasicBlock*, 4> VisitedBBs;
- for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
- if (!VisitedBBs.insert(*PI))
- continue;
-
- BasicBlock::InstListType &InstList = (*PI)->getInstList();
- BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin();
- BasicBlock::InstListType::reverse_iterator RE = InstList.rend();
- do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI));
- if (RI == RE)
- continue;
-
- CallInst *CI = dyn_cast<CallInst>(&*RI);
- if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI))
- TailCalls.push_back(CI);
- }
- }
-
- bool Changed = false;
- for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) {
- CallInst *CI = TailCalls[i];
- CallSite CS(CI);
-
- // Conservatively require the attributes of the call to match those of the
- // return. Ignore noalias because it doesn't affect the call sequence.
- AttributeSet CalleeAttrs = CS.getAttributes();
- if (AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex).
- removeAttribute(Attribute::NoAlias) !=
- AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex).
- removeAttribute(Attribute::NoAlias))
- continue;
-
- // Make sure the call instruction is followed by an unconditional branch to
- // the return block.
- BasicBlock *CallBB = CI->getParent();
- BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator());
- if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
- continue;
-
- // Duplicate the return into CallBB.
- (void)FoldReturnIntoUncondBranch(RI, BB, CallBB);
- ModifiedDT = Changed = true;
- ++NumRetsDup;
- }
-
- // If we eliminated all predecessors of the block, delete the block now.
- if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB))
- BB->eraseFromParent();
-
- return Changed;
-}
-
-//===----------------------------------------------------------------------===//
-// Memory Optimization
-//===----------------------------------------------------------------------===//
-
-namespace {
-
-/// ExtAddrMode - This is an extended version of TargetLowering::AddrMode
-/// which holds actual Value*'s for register values.
-struct ExtAddrMode : public TargetLowering::AddrMode {
- Value *BaseReg;
- Value *ScaledReg;
- ExtAddrMode() : BaseReg(0), ScaledReg(0) {}
- void print(raw_ostream &OS) const;
- void dump() const;
-
- bool operator==(const ExtAddrMode& O) const {
- return (BaseReg == O.BaseReg) && (ScaledReg == O.ScaledReg) &&
- (BaseGV == O.BaseGV) && (BaseOffs == O.BaseOffs) &&
- (HasBaseReg == O.HasBaseReg) && (Scale == O.Scale);
- }
-};
-
-#ifndef NDEBUG
-static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
- AM.print(OS);
- return OS;
-}
-#endif
-
-void ExtAddrMode::print(raw_ostream &OS) const {
- bool NeedPlus = false;
- OS << "[";
- if (BaseGV) {
- OS << (NeedPlus ? " + " : "")
- << "GV:";
- WriteAsOperand(OS, BaseGV, /*PrintType=*/false);
- NeedPlus = true;
- }
-
- if (BaseOffs)
- OS << (NeedPlus ? " + " : "") << BaseOffs, NeedPlus = true;
-
- if (BaseReg) {
- OS << (NeedPlus ? " + " : "")
- << "Base:";
- WriteAsOperand(OS, BaseReg, /*PrintType=*/false);
- NeedPlus = true;
- }
- if (Scale) {
- OS << (NeedPlus ? " + " : "")
- << Scale << "*";
- WriteAsOperand(OS, ScaledReg, /*PrintType=*/false);
- }
-
- OS << ']';
-}
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void ExtAddrMode::dump() const {
- print(dbgs());
- dbgs() << '\n';
-}
-#endif
-
-
-/// \brief A helper class for matching addressing modes.
-///
-/// This encapsulates the logic for matching the target-legal addressing modes.
-class AddressingModeMatcher {
- SmallVectorImpl<Instruction*> &AddrModeInsts;
- const TargetLowering &TLI;
-
- /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
- /// the memory instruction that we're computing this address for.
- Type *AccessTy;
- Instruction *MemoryInst;
-
- /// AddrMode - This is the addressing mode that we're building up. This is
- /// part of the return value of this addressing mode matching stuff.
- ExtAddrMode &AddrMode;
-
- /// IgnoreProfitability - This is set to true when we should not do
- /// profitability checks. When true, IsProfitableToFoldIntoAddressingMode
- /// always returns true.
- bool IgnoreProfitability;
-
- AddressingModeMatcher(SmallVectorImpl<Instruction*> &AMI,
- const TargetLowering &T, Type *AT,
- Instruction *MI, ExtAddrMode &AM)
- : AddrModeInsts(AMI), TLI(T), AccessTy(AT), MemoryInst(MI), AddrMode(AM) {
- IgnoreProfitability = false;
- }
-public:
-
- /// Match - Find the maximal addressing mode that a load/store of V can fold,
- /// give an access type of AccessTy. This returns a list of involved
- /// instructions in AddrModeInsts.
- static ExtAddrMode Match(Value *V, Type *AccessTy,
- Instruction *MemoryInst,
- SmallVectorImpl<Instruction*> &AddrModeInsts,
- const TargetLowering &TLI) {
- ExtAddrMode Result;
-
- bool Success =
- AddressingModeMatcher(AddrModeInsts, TLI, AccessTy,
- MemoryInst, Result).MatchAddr(V, 0);
- (void)Success; assert(Success && "Couldn't select *anything*?");
- return Result;
- }
-private:
- bool MatchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
- bool MatchAddr(Value *V, unsigned Depth);
- bool MatchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth);
- bool IsProfitableToFoldIntoAddressingMode(Instruction *I,
- ExtAddrMode &AMBefore,
- ExtAddrMode &AMAfter);
- bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
-};
-
-/// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode.
-/// Return true and update AddrMode if this addr mode is legal for the target,
-/// false if not.
-bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
- unsigned Depth) {
- // If Scale is 1, then this is the same as adding ScaleReg to the addressing
- // mode. Just process that directly.
- if (Scale == 1)
- return MatchAddr(ScaleReg, Depth);
-
- // If the scale is 0, it takes nothing to add this.
- if (Scale == 0)
- return true;
-
- // If we already have a scale of this value, we can add to it, otherwise, we
- // need an available scale field.
- if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
- return false;
-
- ExtAddrMode TestAddrMode = AddrMode;
-
- // Add scale to turn X*4+X*3 -> X*7. This could also do things like
- // [A+B + A*7] -> [B+A*8].
- TestAddrMode.Scale += Scale;
- TestAddrMode.ScaledReg = ScaleReg;
-
- // If the new address isn't legal, bail out.
- if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy))
- return false;
-
- // It was legal, so commit it.
- AddrMode = TestAddrMode;
-
- // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
- // to see if ScaleReg is actually X+C. If so, we can turn this into adding
- // X*Scale + C*Scale to addr mode.
- ConstantInt *CI = 0; Value *AddLHS = 0;
- if (isa<Instruction>(ScaleReg) && // not a constant expr.
- match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
- TestAddrMode.ScaledReg = AddLHS;
- TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
-
- // If this addressing mode is legal, commit it and remember that we folded
- // this instruction.
- if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) {
- AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
- AddrMode = TestAddrMode;
- return true;
- }
- }
-
- // Otherwise, not (x+c)*scale, just return what we have.
- return true;
-}
-
-/// MightBeFoldableInst - This is a little filter, which returns true if an
-/// addressing computation involving I might be folded into a load/store
-/// accessing it. This doesn't need to be perfect, but needs to accept at least
-/// the set of instructions that MatchOperationAddr can.
-static bool MightBeFoldableInst(Instruction *I) {
- switch (I->getOpcode()) {
- case Instruction::BitCast:
- // Don't touch identity bitcasts.
- if (I->getType() == I->getOperand(0)->getType())
- return false;
- return I->getType()->isPointerTy() || I->getType()->isIntegerTy();
- case Instruction::PtrToInt:
- // PtrToInt is always a noop, as we know that the int type is pointer sized.
- return true;
- case Instruction::IntToPtr:
- // We know the input is intptr_t, so this is foldable.
- return true;
- case Instruction::Add:
- return true;
- case Instruction::Mul:
- case Instruction::Shl:
- // Can only handle X*C and X << C.
- return isa<ConstantInt>(I->getOperand(1));
- case Instruction::GetElementPtr:
- return true;
- default:
- return false;
- }
-}
-
-/// MatchOperationAddr - Given an instruction or constant expr, see if we can
-/// fold the operation into the addressing mode. If so, update the addressing
-/// mode and return true, otherwise return false without modifying AddrMode.
-bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
- unsigned Depth) {
- // Avoid exponential behavior on extremely deep expression trees.
- if (Depth >= 5) return false;
-
- switch (Opcode) {
- case Instruction::PtrToInt:
- // PtrToInt is always a noop, as we know that the int type is pointer sized.
- return MatchAddr(AddrInst->getOperand(0), Depth);
- case Instruction::IntToPtr:
- // This inttoptr is a no-op if the integer type is pointer sized.
- if (TLI.getValueType(AddrInst->getOperand(0)->getType()) ==
- TLI.getPointerTy(AddrInst->getType()->getPointerAddressSpace()))
- return MatchAddr(AddrInst->getOperand(0), Depth);
- return false;
- case Instruction::BitCast:
- // BitCast is always a noop, and we can handle it as long as it is
- // int->int or pointer->pointer (we don't want int<->fp or something).
- if ((AddrInst->getOperand(0)->getType()->isPointerTy() ||
- AddrInst->getOperand(0)->getType()->isIntegerTy()) &&
- // Don't touch identity bitcasts. These were probably put here by LSR,
- // and we don't want to mess around with them. Assume it knows what it
- // is doing.
- AddrInst->getOperand(0)->getType() != AddrInst->getType())
- return MatchAddr(AddrInst->getOperand(0), Depth);
- return false;
- case Instruction::Add: {
- // Check to see if we can merge in the RHS then the LHS. If so, we win.
- ExtAddrMode BackupAddrMode = AddrMode;
- unsigned OldSize = AddrModeInsts.size();
- if (MatchAddr(AddrInst->getOperand(1), Depth+1) &&
- MatchAddr(AddrInst->getOperand(0), Depth+1))
- return true;
-
- // Restore the old addr mode info.
- AddrMode = BackupAddrMode;
- AddrModeInsts.resize(OldSize);
-
- // Otherwise this was over-aggressive. Try merging in the LHS then the RHS.
- if (MatchAddr(AddrInst->getOperand(0), Depth+1) &&
- MatchAddr(AddrInst->getOperand(1), Depth+1))
- return true;
-
- // Otherwise we definitely can't merge the ADD in.
- AddrMode = BackupAddrMode;
- AddrModeInsts.resize(OldSize);
- break;
- }
- //case Instruction::Or:
- // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
- //break;
- case Instruction::Mul:
- case Instruction::Shl: {
- // Can only handle X*C and X << C.
- ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
- if (!RHS) return false;
- int64_t Scale = RHS->getSExtValue();
- if (Opcode == Instruction::Shl)
- Scale = 1LL << Scale;
-
- return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth);
- }
- case Instruction::GetElementPtr: {
- // Scan the GEP. We check it if it contains constant offsets and at most
- // one variable offset.
- int VariableOperand = -1;
- unsigned VariableScale = 0;
-
- int64_t ConstantOffset = 0;
- const DataLayout *TD = TLI.getDataLayout();
- gep_type_iterator GTI = gep_type_begin(AddrInst);
- for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
- if (StructType *STy = dyn_cast<StructType>(*GTI)) {
- const StructLayout *SL = TD->getStructLayout(STy);
- unsigned Idx =
- cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
- ConstantOffset += SL->getElementOffset(Idx);
- } else {
- uint64_t TypeSize = TD->getTypeAllocSize(GTI.getIndexedType());
- if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
- ConstantOffset += CI->getSExtValue()*TypeSize;
- } else if (TypeSize) { // Scales of zero don't do anything.
- // We only allow one variable index at the moment.
- if (VariableOperand != -1)
- return false;
-
- // Remember the variable index.
- VariableOperand = i;
- VariableScale = TypeSize;
- }
- }
- }
-
- // A common case is for the GEP to only do a constant offset. In this case,
- // just add it to the disp field and check validity.
- if (VariableOperand == -1) {
- AddrMode.BaseOffs += ConstantOffset;
- if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){
- // Check to see if we can fold the base pointer in too.
- if (MatchAddr(AddrInst->getOperand(0), Depth+1))
- return true;
- }
- AddrMode.BaseOffs -= ConstantOffset;
- return false;
- }
-
- // Save the valid addressing mode in case we can't match.
- ExtAddrMode BackupAddrMode = AddrMode;
- unsigned OldSize = AddrModeInsts.size();
-
- // See if the scale and offset amount is valid for this target.
- AddrMode.BaseOffs += ConstantOffset;
-
- // Match the base operand of the GEP.
- if (!MatchAddr(AddrInst->getOperand(0), Depth+1)) {
- // If it couldn't be matched, just stuff the value in a register.
- if (AddrMode.HasBaseReg) {
- AddrMode = BackupAddrMode;
- AddrModeInsts.resize(OldSize);
- return false;
- }
- AddrMode.HasBaseReg = true;
- AddrMode.BaseReg = AddrInst->getOperand(0);
- }
-
- // Match the remaining variable portion of the GEP.
- if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
- Depth)) {
- // If it couldn't be matched, try stuffing the base into a register
- // instead of matching it, and retrying the match of the scale.
- AddrMode = BackupAddrMode;
- AddrModeInsts.resize(OldSize);
- if (AddrMode.HasBaseReg)
- return false;
- AddrMode.HasBaseReg = true;
- AddrMode.BaseReg = AddrInst->getOperand(0);
- AddrMode.BaseOffs += ConstantOffset;
- if (!MatchScaledValue(AddrInst->getOperand(VariableOperand),
- VariableScale, Depth)) {
- // If even that didn't work, bail.
- AddrMode = BackupAddrMode;
- AddrModeInsts.resize(OldSize);
- return false;
- }
- }
-
- return true;
- }
- }
- return false;
-}
-
-/// MatchAddr - If we can, try to add the value of 'Addr' into the current
-/// addressing mode. If Addr can't be added to AddrMode this returns false and
-/// leaves AddrMode unmodified. This assumes that Addr is either a pointer type
-/// or intptr_t for the target.
-///
-bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
- if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
- // Fold in immediates if legal for the target.
- AddrMode.BaseOffs += CI->getSExtValue();
- if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
- return true;
- AddrMode.BaseOffs -= CI->getSExtValue();
- } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
- // If this is a global variable, try to fold it into the addressing mode.
- if (AddrMode.BaseGV == 0) {
- AddrMode.BaseGV = GV;
- if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
- return true;
- AddrMode.BaseGV = 0;
- }
- } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
- ExtAddrMode BackupAddrMode = AddrMode;
- unsigned OldSize = AddrModeInsts.size();
-
- // Check to see if it is possible to fold this operation.
- if (MatchOperationAddr(I, I->getOpcode(), Depth)) {
- // Okay, it's possible to fold this. Check to see if it is actually
- // *profitable* to do so. We use a simple cost model to avoid increasing
- // register pressure too much.
- if (I->hasOneUse() ||
- IsProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
- AddrModeInsts.push_back(I);
- return true;
- }
-
- // It isn't profitable to do this, roll back.
- //cerr << "NOT FOLDING: " << *I;
- AddrMode = BackupAddrMode;
- AddrModeInsts.resize(OldSize);
- }
- } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
- if (MatchOperationAddr(CE, CE->getOpcode(), Depth))
- return true;
- } else if (isa<ConstantPointerNull>(Addr)) {
- // Null pointer gets folded without affecting the addressing mode.
- return true;
- }
-
- // Worse case, the target should support [reg] addressing modes. :)
- if (!AddrMode.HasBaseReg) {
- AddrMode.HasBaseReg = true;
- AddrMode.BaseReg = Addr;
- // Still check for legality in case the target supports [imm] but not [i+r].
- if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
- return true;
- AddrMode.HasBaseReg = false;
- AddrMode.BaseReg = 0;
- }
-
- // If the base register is already taken, see if we can do [r+r].
- if (AddrMode.Scale == 0) {
- AddrMode.Scale = 1;
- AddrMode.ScaledReg = Addr;
- if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
- return true;
- AddrMode.Scale = 0;
- AddrMode.ScaledReg = 0;
- }
- // Couldn't match.
- return false;
-}
-
-/// IsOperandAMemoryOperand - Check to see if all uses of OpVal by the specified
-/// inline asm call are due to memory operands. If so, return true, otherwise
-/// return false.
-static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
- const TargetLowering &TLI) {
- TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(ImmutableCallSite(CI));
- for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
- TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
-
- // Compute the constraint code and ConstraintType to use.
- TLI.ComputeConstraintToUse(OpInfo, SDValue());
-
- // If this asm operand is our Value*, and if it isn't an indirect memory
- // operand, we can't fold it!
- if (OpInfo.CallOperandVal == OpVal &&
- (OpInfo.ConstraintType != TargetLowering::C_Memory ||
- !OpInfo.isIndirect))
- return false;
- }
-
- return true;
-}
-
-/// FindAllMemoryUses - Recursively walk all the uses of I until we find a
-/// memory use. If we find an obviously non-foldable instruction, return true.
-/// Add the ultimately found memory instructions to MemoryUses.
-static bool FindAllMemoryUses(Instruction *I,
- SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses,
- SmallPtrSet<Instruction*, 16> &ConsideredInsts,
- const TargetLowering &TLI) {
- // If we already considered this instruction, we're done.
- if (!ConsideredInsts.insert(I))
- return false;
-
- // If this is an obviously unfoldable instruction, bail out.
- if (!MightBeFoldableInst(I))
- return true;
-
- // Loop over all the uses, recursively processing them.
- for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
- UI != E; ++UI) {
- User *U = *UI;
-
- if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
- MemoryUses.push_back(std::make_pair(LI, UI.getOperandNo()));
- continue;
- }
-
- if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
- unsigned opNo = UI.getOperandNo();
- if (opNo == 0) return true; // Storing addr, not into addr.
- MemoryUses.push_back(std::make_pair(SI, opNo));
- continue;
- }
-
- if (CallInst *CI = dyn_cast<CallInst>(U)) {
- InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
- if (!IA) return true;
-
- // If this is a memory operand, we're cool, otherwise bail out.
- if (!IsOperandAMemoryOperand(CI, IA, I, TLI))
- return true;
- continue;
- }
-
- if (FindAllMemoryUses(cast<Instruction>(U), MemoryUses, ConsideredInsts,
- TLI))
- return true;
- }
-
- return false;
-}
-
-/// ValueAlreadyLiveAtInst - Retrn true if Val is already known to be live at
-/// the use site that we're folding it into. If so, there is no cost to
-/// include it in the addressing mode. KnownLive1 and KnownLive2 are two values
-/// that we know are live at the instruction already.
-bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
- Value *KnownLive2) {
- // If Val is either of the known-live values, we know it is live!
- if (Val == 0 || Val == KnownLive1 || Val == KnownLive2)
- return true;
-
- // All values other than instructions and arguments (e.g. constants) are live.
- if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true;
-
- // If Val is a constant sized alloca in the entry block, it is live, this is
- // true because it is just a reference to the stack/frame pointer, which is
- // live for the whole function.
- if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
- if (AI->isStaticAlloca())
- return true;
-
- // Check to see if this value is already used in the memory instruction's
- // block. If so, it's already live into the block at the very least, so we
- // can reasonably fold it.
- return Val->isUsedInBasicBlock(MemoryInst->getParent());
-}
-
-/// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing
-/// mode of the machine to fold the specified instruction into a load or store
-/// that ultimately uses it. However, the specified instruction has multiple
-/// uses. Given this, it may actually increase register pressure to fold it
-/// into the load. For example, consider this code:
-///
-/// X = ...
-/// Y = X+1
-/// use(Y) -> nonload/store
-/// Z = Y+1
-/// load Z
-///
-/// In this case, Y has multiple uses, and can be folded into the load of Z
-/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to
-/// be live at the use(Y) line. If we don't fold Y into load Z, we use one
-/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the
-/// number of computations either.
-///
-/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If
-/// X was live across 'load Z' for other reasons, we actually *would* want to
-/// fold the addressing mode in the Z case. This would make Y die earlier.
-bool AddressingModeMatcher::
-IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
- ExtAddrMode &AMAfter) {
- if (IgnoreProfitability) return true;
-
- // AMBefore is the addressing mode before this instruction was folded into it,
- // and AMAfter is the addressing mode after the instruction was folded. Get
- // the set of registers referenced by AMAfter and subtract out those
- // referenced by AMBefore: this is the set of values which folding in this
- // address extends the lifetime of.
- //
- // Note that there are only two potential values being referenced here,
- // BaseReg and ScaleReg (global addresses are always available, as are any
- // folded immediates).
- Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
-
- // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
- // lifetime wasn't extended by adding this instruction.
- if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
- BaseReg = 0;
- if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
- ScaledReg = 0;
-
- // If folding this instruction (and it's subexprs) didn't extend any live
- // ranges, we're ok with it.
- if (BaseReg == 0 && ScaledReg == 0)
- return true;
-
- // If all uses of this instruction are ultimately load/store/inlineasm's,
- // check to see if their addressing modes will include this instruction. If
- // so, we can fold it into all uses, so it doesn't matter if it has multiple
- // uses.
- SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
- SmallPtrSet<Instruction*, 16> ConsideredInsts;
- if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI))
- return false; // Has a non-memory, non-foldable use!
-
- // Now that we know that all uses of this instruction are part of a chain of
- // computation involving only operations that could theoretically be folded
- // into a memory use, loop over each of these uses and see if they could
- // *actually* fold the instruction.
- SmallVector<Instruction*, 32> MatchedAddrModeInsts;
- for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
- Instruction *User = MemoryUses[i].first;
- unsigned OpNo = MemoryUses[i].second;
-
- // Get the access type of this use. If the use isn't a pointer, we don't
- // know what it accesses.
- Value *Address = User->getOperand(OpNo);
- if (!Address->getType()->isPointerTy())
- return false;
- Type *AddressAccessTy = Address->getType()->getPointerElementType();
-
- // Do a match against the root of this address, ignoring profitability. This
- // will tell us if the addressing mode for the memory operation will
- // *actually* cover the shared instruction.
- ExtAddrMode Result;
- AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy,
- MemoryInst, Result);
- Matcher.IgnoreProfitability = true;
- bool Success = Matcher.MatchAddr(Address, 0);
- (void)Success; assert(Success && "Couldn't select *anything*?");
-
- // If the match didn't cover I, then it won't be shared by it.
- if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(),
- I) == MatchedAddrModeInsts.end())
- return false;
-
- MatchedAddrModeInsts.clear();
- }
-
- return true;
-}
-
-} // end anonymous namespace
-
-/// IsNonLocalValue - Return true if the specified values are defined in a
-/// different basic block than BB.
-static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
- if (Instruction *I = dyn_cast<Instruction>(V))
- return I->getParent() != BB;
- return false;
-}
-
-/// OptimizeMemoryInst - Load and Store Instructions often have
-/// addressing modes that can do significant amounts of computation. As such,
-/// instruction selection will try to get the load or store to do as much
-/// computation as possible for the program. The problem is that isel can only
-/// see within a single block. As such, we sink as much legal addressing mode
-/// stuff into the block as possible.
-///
-/// This method is used to optimize both load/store and inline asms with memory
-/// operands.
-bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
- Type *AccessTy) {
- Value *Repl = Addr;
-
- // Try to collapse single-value PHI nodes. This is necessary to undo
- // unprofitable PRE transformations.
- SmallVector<Value*, 8> worklist;
- SmallPtrSet<Value*, 16> Visited;
- worklist.push_back(Addr);
-
- // Use a worklist to iteratively look through PHI nodes, and ensure that
- // the addressing mode obtained from the non-PHI roots of the graph
- // are equivalent.
- Value *Consensus = 0;
- unsigned NumUsesConsensus = 0;
- bool IsNumUsesConsensusValid = false;
- SmallVector<Instruction*, 16> AddrModeInsts;
- ExtAddrMode AddrMode;
- while (!worklist.empty()) {
- Value *V = worklist.back();
- worklist.pop_back();
-
- // Break use-def graph loops.
- if (!Visited.insert(V)) {
- Consensus = 0;
- break;
- }
-
- // For a PHI node, push all of its incoming values.
- if (PHINode *P = dyn_cast<PHINode>(V)) {
- for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i)
- worklist.push_back(P->getIncomingValue(i));
- continue;
- }
-
- // For non-PHIs, determine the addressing mode being computed.
- SmallVector<Instruction*, 16> NewAddrModeInsts;
- ExtAddrMode NewAddrMode =
- AddressingModeMatcher::Match(V, AccessTy, MemoryInst,
- NewAddrModeInsts, *TLI);
-
- // This check is broken into two cases with very similar code to avoid using
- // getNumUses() as much as possible. Some values have a lot of uses, so
- // calling getNumUses() unconditionally caused a significant compile-time
- // regression.
- if (!Consensus) {
- Consensus = V;
- AddrMode = NewAddrMode;
- AddrModeInsts = NewAddrModeInsts;
- continue;
- } else if (NewAddrMode == AddrMode) {
- if (!IsNumUsesConsensusValid) {
- NumUsesConsensus = Consensus->getNumUses();
- IsNumUsesConsensusValid = true;
- }
-
- // Ensure that the obtained addressing mode is equivalent to that obtained
- // for all other roots of the PHI traversal. Also, when choosing one
- // such root as representative, select the one with the most uses in order
- // to keep the cost modeling heuristics in AddressingModeMatcher
- // applicable.
- unsigned NumUses = V->getNumUses();
- if (NumUses > NumUsesConsensus) {
- Consensus = V;
- NumUsesConsensus = NumUses;
- AddrModeInsts = NewAddrModeInsts;
- }
- continue;
- }
-
- Consensus = 0;
- break;
- }
-
- // If the addressing mode couldn't be determined, or if multiple different
- // ones were determined, bail out now.
- if (!Consensus) return false;
-
- // Check to see if any of the instructions supersumed by this addr mode are
- // non-local to I's BB.
- bool AnyNonLocal = false;
- for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) {
- if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) {
- AnyNonLocal = true;
- break;
- }
- }
-
- // If all the instructions matched are already in this BB, don't do anything.
- if (!AnyNonLocal) {
- DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n");
- return false;
- }
-
- // Insert this computation right after this user. Since our caller is
- // scanning from the top of the BB to the bottom, reuse of the expr are
- // guaranteed to happen later.
- IRBuilder<> Builder(MemoryInst);
-
- // Now that we determined the addressing expression we want to use and know
- // that we have to sink it into this block. Check to see if we have already
- // done this for some other load/store instr in this block. If so, reuse the
- // computation.
- Value *&SunkAddr = SunkAddrs[Addr];
- if (SunkAddr) {
- DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for "
- << *MemoryInst);
- if (SunkAddr->getType() != Addr->getType())
- SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
- } else {
- DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
- << *MemoryInst);
- Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType());
- Value *Result = 0;
-
- // Start with the base register. Do this first so that subsequent address
- // matching finds it last, which will prevent it from trying to match it
- // as the scaled value in case it happens to be a mul. That would be
- // problematic if we've sunk a different mul for the scale, because then
- // we'd end up sinking both muls.
- if (AddrMode.BaseReg) {
- Value *V = AddrMode.BaseReg;
- if (V->getType()->isPointerTy())
- V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
- if (V->getType() != IntPtrTy)
- V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
- Result = V;
- }
-
- // Add the scale value.
- if (AddrMode.Scale) {
- Value *V = AddrMode.ScaledReg;
- if (V->getType() == IntPtrTy) {
- // done.
- } else if (V->getType()->isPointerTy()) {
- V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
- } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
- cast<IntegerType>(V->getType())->getBitWidth()) {
- V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
- } else {
- V = Builder.CreateSExt(V, IntPtrTy, "sunkaddr");
- }
- if (AddrMode.Scale != 1)
- V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
- "sunkaddr");
- if (Result)
- Result = Builder.CreateAdd(Result, V, "sunkaddr");
- else
- Result = V;
- }
-
- // Add in the BaseGV if present.
- if (AddrMode.BaseGV) {
- Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr");
- if (Result)
- Result = Builder.CreateAdd(Result, V, "sunkaddr");
- else
- Result = V;
- }
-
- // Add in the Base Offset if present.
- if (AddrMode.BaseOffs) {
- Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
- if (Result)
- Result = Builder.CreateAdd(Result, V, "sunkaddr");
- else
- Result = V;
- }
-
- if (Result == 0)
- SunkAddr = Constant::getNullValue(Addr->getType());
- else
- SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
- }
-
- MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
-
- // If we have no uses, recursively delete the value and all dead instructions
- // using it.
- if (Repl->use_empty()) {
- // This can cause recursive deletion, which can invalidate our iterator.
- // Use a WeakVH to hold onto it in case this happens.
- WeakVH IterHandle(CurInstIterator);
- BasicBlock *BB = CurInstIterator->getParent();
-
- RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo);
-
- if (IterHandle != CurInstIterator) {
- // If the iterator instruction was recursively deleted, start over at the
- // start of the block.
- CurInstIterator = BB->begin();
- SunkAddrs.clear();
- }
- }
- ++NumMemoryInsts;
- return true;
-}
-
-/// OptimizeInlineAsmInst - If there are any memory operands, use
-/// OptimizeMemoryInst to sink their address computing into the block when
-/// possible / profitable.
-bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) {
- bool MadeChange = false;
-
- TargetLowering::AsmOperandInfoVector
- TargetConstraints = TLI->ParseConstraints(CS);
- unsigned ArgNo = 0;
- for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
- TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
-
- // Compute the constraint code and ConstraintType to use.
- TLI->ComputeConstraintToUse(OpInfo, SDValue());
-
- if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
- OpInfo.isIndirect) {
- Value *OpVal = CS->getArgOperand(ArgNo++);
- MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType());
- } else if (OpInfo.Type == InlineAsm::isInput)
- ArgNo++;
- }
-
- return MadeChange;
-}
-
-/// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same
-/// basic block as the load, unless conditions are unfavorable. This allows
-/// SelectionDAG to fold the extend into the load.
-///
-bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) {
- // Look for a load being extended.
- LoadInst *LI = dyn_cast<LoadInst>(I->getOperand(0));
- if (!LI) return false;
-
- // If they're already in the same block, there's nothing to do.
- if (LI->getParent() == I->getParent())
- return false;
-
- // If the load has other users and the truncate is not free, this probably
- // isn't worthwhile.
- if (!LI->hasOneUse() &&
- TLI && (TLI->isTypeLegal(TLI->getValueType(LI->getType())) ||
- !TLI->isTypeLegal(TLI->getValueType(I->getType()))) &&
- !TLI->isTruncateFree(I->getType(), LI->getType()))
- return false;
-
- // Check whether the target supports casts folded into loads.
- unsigned LType;
- if (isa<ZExtInst>(I))
- LType = ISD::ZEXTLOAD;
- else {
- assert(isa<SExtInst>(I) && "Unexpected ext type!");
- LType = ISD::SEXTLOAD;
- }
- if (TLI && !TLI->isLoadExtLegal(LType, TLI->getValueType(LI->getType())))
- return false;
-
- // Move the extend into the same block as the load, so that SelectionDAG
- // can fold it.
- I->removeFromParent();
- I->insertAfter(LI);
- ++NumExtsMoved;
- return true;
-}
-
-bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
- BasicBlock *DefBB = I->getParent();
-
- // If the result of a {s|z}ext and its source are both live out, rewrite all
- // other uses of the source with result of extension.
- Value *Src = I->getOperand(0);
- if (Src->hasOneUse())
- return false;
-
- // Only do this xform if truncating is free.
- if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType()))
- return false;
-
- // Only safe to perform the optimization if the source is also defined in
- // this block.
- if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
- return false;
-
- bool DefIsLiveOut = false;
- for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
- UI != E; ++UI) {
- Instruction *User = cast<Instruction>(*UI);
-
- // Figure out which BB this ext is used in.
- BasicBlock *UserBB = User->getParent();
- if (UserBB == DefBB) continue;
- DefIsLiveOut = true;
- break;
- }
- if (!DefIsLiveOut)
- return false;
-
- // Make sure none of the uses are PHI nodes.
- for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
- UI != E; ++UI) {
- Instruction *User = cast<Instruction>(*UI);
- BasicBlock *UserBB = User->getParent();
- if (UserBB == DefBB) continue;
- // Be conservative. We don't want this xform to end up introducing
- // reloads just before load / store instructions.
- if (isa<PHINode>(User) || isa<LoadInst>(User) || isa<StoreInst>(User))
- return false;
- }
-
- // InsertedTruncs - Only insert one trunc in each block once.
- DenseMap<BasicBlock*, Instruction*> InsertedTruncs;
-
- bool MadeChange = false;
- for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
- UI != E; ++UI) {
- Use &TheUse = UI.getUse();
- Instruction *User = cast<Instruction>(*UI);
-
- // Figure out which BB this ext is used in.
- BasicBlock *UserBB = User->getParent();
- if (UserBB == DefBB) continue;
-
- // Both src and def are live in this block. Rewrite the use.
- Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
-
- if (!InsertedTrunc) {
- BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
- InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt);
- }
-
- // Replace a use of the {s|z}ext source with a use of the result.
- TheUse = InsertedTrunc;
- ++NumExtUses;
- MadeChange = true;
- }
-
- return MadeChange;
-}
-
-/// isFormingBranchFromSelectProfitable - Returns true if a SelectInst should be
-/// turned into an explicit branch.
-static bool isFormingBranchFromSelectProfitable(SelectInst *SI) {
- // FIXME: This should use the same heuristics as IfConversion to determine
- // whether a select is better represented as a branch. This requires that
- // branch probability metadata is preserved for the select, which is not the
- // case currently.
-
- CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
-
- // If the branch is predicted right, an out of order CPU can avoid blocking on
- // the compare. Emit cmovs on compares with a memory operand as branches to
- // avoid stalls on the load from memory. If the compare has more than one use
- // there's probably another cmov or setcc around so it's not worth emitting a
- // branch.
- if (!Cmp)
- return false;
-
- Value *CmpOp0 = Cmp->getOperand(0);
- Value *CmpOp1 = Cmp->getOperand(1);
-
- // We check that the memory operand has one use to avoid uses of the loaded
- // value directly after the compare, making branches unprofitable.
- return Cmp->hasOneUse() &&
- ((isa<LoadInst>(CmpOp0) && CmpOp0->hasOneUse()) ||
- (isa<LoadInst>(CmpOp1) && CmpOp1->hasOneUse()));
-}
-
-
-/// If we have a SelectInst that will likely profit from branch prediction,
-/// turn it into a branch.
-bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) {
- bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
-
- // Can we convert the 'select' to CF ?
- if (DisableSelectToBranch || OptSize || !TLI || VectorCond)
- return false;
-
- TargetLowering::SelectSupportKind SelectKind;
- if (VectorCond)
- SelectKind = TargetLowering::VectorMaskSelect;
- else if (SI->getType()->isVectorTy())
- SelectKind = TargetLowering::ScalarCondVectorVal;
- else
- SelectKind = TargetLowering::ScalarValSelect;
-
- // Do we have efficient codegen support for this kind of 'selects' ?
- if (TLI->isSelectSupported(SelectKind)) {
- // We have efficient codegen support for the select instruction.
- // Check if it is profitable to keep this 'select'.
- if (!TLI->isPredictableSelectExpensive() ||
- !isFormingBranchFromSelectProfitable(SI))
- return false;
- }
-
- ModifiedDT = true;
-
- // First, we split the block containing the select into 2 blocks.
- BasicBlock *StartBlock = SI->getParent();
- BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(SI));
- BasicBlock *NextBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
-
- // Create a new block serving as the landing pad for the branch.
- BasicBlock *SmallBlock = BasicBlock::Create(SI->getContext(), "select.mid",
- NextBlock->getParent(), NextBlock);
-
- // Move the unconditional branch from the block with the select in it into our
- // landing pad block.
- StartBlock->getTerminator()->eraseFromParent();
- BranchInst::Create(NextBlock, SmallBlock);
-
- // Insert the real conditional branch based on the original condition.
- BranchInst::Create(NextBlock, SmallBlock, SI->getCondition(), SI);
-
- // The select itself is replaced with a PHI Node.
- PHINode *PN = PHINode::Create(SI->getType(), 2, "", NextBlock->begin());
- PN->takeName(SI);
- PN->addIncoming(SI->getTrueValue(), StartBlock);
- PN->addIncoming(SI->getFalseValue(), SmallBlock);
- SI->replaceAllUsesWith(PN);
- SI->eraseFromParent();
-
- // Instruct OptimizeBlock to skip to the next block.
- CurInstIterator = StartBlock->end();
- ++NumSelectsExpanded;
- return true;
-}
-
-bool CodeGenPrepare::OptimizeInst(Instruction *I) {
- if (PHINode *P = dyn_cast<PHINode>(I)) {
- // It is possible for very late stage optimizations (such as SimplifyCFG)
- // to introduce PHI nodes too late to be cleaned up. If we detect such a
- // trivial PHI, go ahead and zap it here.
- if (Value *V = SimplifyInstruction(P, TLI ? TLI->getDataLayout() : 0,
- TLInfo, DT)) {
- P->replaceAllUsesWith(V);
- P->eraseFromParent();
- ++NumPHIsElim;
- return true;
- }
- return false;
- }
-
- if (CastInst *CI = dyn_cast<CastInst>(I)) {
- // If the source of the cast is a constant, then this should have
- // already been constant folded. The only reason NOT to constant fold
- // it is if something (e.g. LSR) was careful to place the constant
- // evaluation in a block other than then one that uses it (e.g. to hoist
- // the address of globals out of a loop). If this is the case, we don't
- // want to forward-subst the cast.
- if (isa<Constant>(CI->getOperand(0)))
- return false;
-
- if (TLI && OptimizeNoopCopyExpression(CI, *TLI))
- return true;
-
- if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
- bool MadeChange = MoveExtToFormExtLoad(I);
- return MadeChange | OptimizeExtUses(I);
- }
- return false;
- }
-
- if (CmpInst *CI = dyn_cast<CmpInst>(I))
- return OptimizeCmpExpression(CI);
-
- if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- if (TLI)
- return OptimizeMemoryInst(I, I->getOperand(0), LI->getType());
- return false;
- }
-
- if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- if (TLI)
- return OptimizeMemoryInst(I, SI->getOperand(1),
- SI->getOperand(0)->getType());
- return false;
- }
-
- if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
- if (GEPI->hasAllZeroIndices()) {
- /// The GEP operand must be a pointer, so must its result -> BitCast
- Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
- GEPI->getName(), GEPI);
- GEPI->replaceAllUsesWith(NC);
- GEPI->eraseFromParent();
- ++NumGEPsElim;
- OptimizeInst(NC);
- return true;
- }
- return false;
- }
-
- if (CallInst *CI = dyn_cast<CallInst>(I))
- return OptimizeCallInst(CI);
-
- if (SelectInst *SI = dyn_cast<SelectInst>(I))
- return OptimizeSelectInst(SI);
-
- return false;
-}
-
-// In this pass we look for GEP and cast instructions that are used
-// across basic blocks and rewrite them to improve basic-block-at-a-time
-// selection.
-bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
- SunkAddrs.clear();
- bool MadeChange = false;
-
- CurInstIterator = BB.begin();
- while (CurInstIterator != BB.end())
- MadeChange |= OptimizeInst(CurInstIterator++);
-
- MadeChange |= DupRetToEnableTailCallOpts(&BB);
-
- return MadeChange;
-}
-
-// llvm.dbg.value is far away from the value then iSel may not be able
-// handle it properly. iSel will drop llvm.dbg.value if it can not
-// find a node corresponding to the value.
-bool CodeGenPrepare::PlaceDbgValues(Function &F) {
- bool MadeChange = false;
- for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
- Instruction *PrevNonDbgInst = NULL;
- for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) {
- Instruction *Insn = BI; ++BI;
- DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn);
- if (!DVI) {
- PrevNonDbgInst = Insn;
- continue;
- }
-
- Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue());
- if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) {
- DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI);
- DVI->removeFromParent();
- if (isa<PHINode>(VI))
- DVI->insertBefore(VI->getParent()->getFirstInsertionPt());
- else
- DVI->insertAfter(VI);
- MadeChange = true;
- ++NumDbgValueMoved;
- }
- }
- }
- return MadeChange;
-}
diff --git a/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
new file mode 100644
index 000000000000..763d02b9fcd6
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -0,0 +1,602 @@
+//===- ConstantHoisting.cpp - Prepare code for expensive constants --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass identifies expensive constants to hoist and coalesces them to
+// better prepare it for SelectionDAG-based code generation. This works around
+// the limitations of the basic-block-at-a-time approach.
+//
+// First it scans all instructions for integer constants and calculates its
+// cost. If the constant can be folded into the instruction (the cost is
+// TCC_Free) or the cost is just a simple operation (TCC_BASIC), then we don't
+// consider it expensive and leave it alone. This is the default behavior and
+// the default implementation of getIntImmCost will always return TCC_Free.
+//
+// If the cost is more than TCC_BASIC, then the integer constant can't be folded
+// into the instruction and it might be beneficial to hoist the constant.
+// Similar constants are coalesced to reduce register pressure and
+// materialization code.
+//
+// When a constant is hoisted, it is also hidden behind a bitcast to force it to
+// be live-out of the basic block. Otherwise the constant would be just
+// duplicated and each basic block would have its own copy in the SelectionDAG.
+// The SelectionDAG recognizes such constants as opaque and doesn't perform
+// certain transformations on them, which would create a new expensive constant.
+//
+// This optimization is only applied to integer constants in instructions and
+// simple (this means not nested) constant cast expressions. For example:
+// %0 = load i64* inttoptr (i64 big_constant to i64*)
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include <tuple>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "consthoist"
+
+STATISTIC(NumConstantsHoisted, "Number of constants hoisted");
+STATISTIC(NumConstantsRebased, "Number of constants rebased");
+
+namespace {
+struct ConstantUser;
+struct RebasedConstantInfo;
+
+typedef SmallVector<ConstantUser, 8> ConstantUseListType;
+typedef SmallVector<RebasedConstantInfo, 4> RebasedConstantListType;
+
+/// \brief Keeps track of the user of a constant and the operand index where the
+/// constant is used.
+struct ConstantUser {
+ Instruction *Inst;
+ unsigned OpndIdx;
+
+ ConstantUser(Instruction *Inst, unsigned Idx) : Inst(Inst), OpndIdx(Idx) { }
+};
+
+/// \brief Keeps track of a constant candidate and its uses.
+struct ConstantCandidate {
+ ConstantUseListType Uses;
+ ConstantInt *ConstInt;
+ unsigned CumulativeCost;
+
+ ConstantCandidate(ConstantInt *ConstInt)
+ : ConstInt(ConstInt), CumulativeCost(0) { }
+
+ /// \brief Add the user to the use list and update the cost.
+ void addUser(Instruction *Inst, unsigned Idx, unsigned Cost) {
+ CumulativeCost += Cost;
+ Uses.push_back(ConstantUser(Inst, Idx));
+ }
+};
+
+/// \brief This represents a constant that has been rebased with respect to a
+/// base constant. The difference to the base constant is recorded in Offset.
+struct RebasedConstantInfo {
+ ConstantUseListType Uses;
+ Constant *Offset;
+
+ RebasedConstantInfo(ConstantUseListType &&Uses, Constant *Offset)
+ : Uses(Uses), Offset(Offset) { }
+};
+
+/// \brief A base constant and all its rebased constants.
+struct ConstantInfo {
+ ConstantInt *BaseConstant;
+ RebasedConstantListType RebasedConstants;
+};
+
+/// \brief The constant hoisting pass.
+class ConstantHoisting : public FunctionPass {
+ typedef DenseMap<ConstantInt *, unsigned> ConstCandMapType;
+ typedef std::vector<ConstantCandidate> ConstCandVecType;
+
+ const TargetTransformInfo *TTI;
+ DominatorTree *DT;
+ BasicBlock *Entry;
+
+ /// Keeps track of constant candidates found in the function.
+ ConstCandVecType ConstCandVec;
+
+ /// Keep track of cast instructions we already cloned.
+ SmallDenseMap<Instruction *, Instruction *> ClonedCastMap;
+
+ /// These are the final constants we decided to hoist.
+ SmallVector<ConstantInfo, 8> ConstantVec;
+public:
+ static char ID; // Pass identification, replacement for typeid
+ ConstantHoisting() : FunctionPass(ID), TTI(nullptr), DT(nullptr),
+ Entry(nullptr) {
+ initializeConstantHoistingPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &Fn) override;
+
+ const char *getPassName() const override { return "Constant Hoisting"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<TargetTransformInfo>();
+ }
+
+private:
+ /// \brief Initialize the pass.
+ void setup(Function &Fn) {
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ TTI = &getAnalysis<TargetTransformInfo>();
+ Entry = &Fn.getEntryBlock();
+ }
+
+ /// \brief Cleanup.
+ void cleanup() {
+ ConstantVec.clear();
+ ClonedCastMap.clear();
+ ConstCandVec.clear();
+
+ TTI = nullptr;
+ DT = nullptr;
+ Entry = nullptr;
+ }
+
+ Instruction *findMatInsertPt(Instruction *Inst, unsigned Idx = ~0U) const;
+ Instruction *findConstantInsertionPoint(const ConstantInfo &ConstInfo) const;
+ void collectConstantCandidates(ConstCandMapType &ConstCandMap,
+ Instruction *Inst, unsigned Idx,
+ ConstantInt *ConstInt);
+ void collectConstantCandidates(ConstCandMapType &ConstCandMap,
+ Instruction *Inst);
+ void collectConstantCandidates(Function &Fn);
+ void findAndMakeBaseConstant(ConstCandVecType::iterator S,
+ ConstCandVecType::iterator E);
+ void findBaseConstants();
+ void emitBaseConstants(Instruction *Base, Constant *Offset,
+ const ConstantUser &ConstUser);
+ bool emitBaseConstants();
+ void deleteDeadCastInst() const;
+ bool optimizeConstants(Function &Fn);
+};
+}
+
+char ConstantHoisting::ID = 0;
+INITIALIZE_PASS_BEGIN(ConstantHoisting, "consthoist", "Constant Hoisting",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_END(ConstantHoisting, "consthoist", "Constant Hoisting",
+ false, false)
+
+FunctionPass *llvm::createConstantHoistingPass() {
+ return new ConstantHoisting();
+}
+
+/// \brief Perform the constant hoisting optimization for the given function.
+bool ConstantHoisting::runOnFunction(Function &Fn) {
+ DEBUG(dbgs() << "********** Begin Constant Hoisting **********\n");
+ DEBUG(dbgs() << "********** Function: " << Fn.getName() << '\n');
+
+ setup(Fn);
+
+ bool MadeChange = optimizeConstants(Fn);
+
+ if (MadeChange) {
+ DEBUG(dbgs() << "********** Function after Constant Hoisting: "
+ << Fn.getName() << '\n');
+ DEBUG(dbgs() << Fn);
+ }
+ DEBUG(dbgs() << "********** End Constant Hoisting **********\n");
+
+ cleanup();
+
+ return MadeChange;
+}
+
+
+/// \brief Find the constant materialization insertion point.
+Instruction *ConstantHoisting::findMatInsertPt(Instruction *Inst,
+ unsigned Idx) const {
+ // If the operand is a cast instruction, then we have to materialize the
+ // constant before the cast instruction.
+ if (Idx != ~0U) {
+ Value *Opnd = Inst->getOperand(Idx);
+ if (auto CastInst = dyn_cast<Instruction>(Opnd))
+ if (CastInst->isCast())
+ return CastInst;
+ }
+
+ // The simple and common case. This also includes constant expressions.
+ if (!isa<PHINode>(Inst) && !isa<LandingPadInst>(Inst))
+ return Inst;
+
+ // We can't insert directly before a phi node or landing pad. Insert before
+ // the terminator of the incoming or dominating block.
+ assert(Entry != Inst->getParent() && "PHI or landing pad in entry block!");
+ if (Idx != ~0U && isa<PHINode>(Inst))
+ return cast<PHINode>(Inst)->getIncomingBlock(Idx)->getTerminator();
+
+ BasicBlock *IDom = DT->getNode(Inst->getParent())->getIDom()->getBlock();
+ return IDom->getTerminator();
+}
+
+/// \brief Find an insertion point that dominates all uses.
+Instruction *ConstantHoisting::
+findConstantInsertionPoint(const ConstantInfo &ConstInfo) const {
+ assert(!ConstInfo.RebasedConstants.empty() && "Invalid constant info entry.");
+ // Collect all basic blocks.
+ SmallPtrSet<BasicBlock *, 8> BBs;
+ for (auto const &RCI : ConstInfo.RebasedConstants)
+ for (auto const &U : RCI.Uses)
+ BBs.insert(findMatInsertPt(U.Inst, U.OpndIdx)->getParent());
+
+ if (BBs.count(Entry))
+ return &Entry->front();
+
+ while (BBs.size() >= 2) {
+ BasicBlock *BB, *BB1, *BB2;
+ BB1 = *BBs.begin();
+ BB2 = *std::next(BBs.begin());
+ BB = DT->findNearestCommonDominator(BB1, BB2);
+ if (BB == Entry)
+ return &Entry->front();
+ BBs.erase(BB1);
+ BBs.erase(BB2);
+ BBs.insert(BB);
+ }
+ assert((BBs.size() == 1) && "Expected only one element.");
+ Instruction &FirstInst = (*BBs.begin())->front();
+ return findMatInsertPt(&FirstInst);
+}
+
+
+/// \brief Record constant integer ConstInt for instruction Inst at operand
+/// index Idx.
+///
+/// The operand at index Idx is not necessarily the constant integer itself. It
+/// could also be a cast instruction or a constant expression that uses the
+// constant integer.
+void ConstantHoisting::collectConstantCandidates(ConstCandMapType &ConstCandMap,
+ Instruction *Inst,
+ unsigned Idx,
+ ConstantInt *ConstInt) {
+ unsigned Cost;
+ // Ask the target about the cost of materializing the constant for the given
+ // instruction and operand index.
+ if (auto IntrInst = dyn_cast<IntrinsicInst>(Inst))
+ Cost = TTI->getIntImmCost(IntrInst->getIntrinsicID(), Idx,
+ ConstInt->getValue(), ConstInt->getType());
+ else
+ Cost = TTI->getIntImmCost(Inst->getOpcode(), Idx, ConstInt->getValue(),
+ ConstInt->getType());
+
+ // Ignore cheap integer constants.
+ if (Cost > TargetTransformInfo::TCC_Basic) {
+ ConstCandMapType::iterator Itr;
+ bool Inserted;
+ std::tie(Itr, Inserted) = ConstCandMap.insert(std::make_pair(ConstInt, 0));
+ if (Inserted) {
+ ConstCandVec.push_back(ConstantCandidate(ConstInt));
+ Itr->second = ConstCandVec.size() - 1;
+ }
+ ConstCandVec[Itr->second].addUser(Inst, Idx, Cost);
+ DEBUG(if (isa<ConstantInt>(Inst->getOperand(Idx)))
+ dbgs() << "Collect constant " << *ConstInt << " from " << *Inst
+ << " with cost " << Cost << '\n';
+ else
+ dbgs() << "Collect constant " << *ConstInt << " indirectly from "
+ << *Inst << " via " << *Inst->getOperand(Idx) << " with cost "
+ << Cost << '\n';
+ );
+ }
+}
+
+/// \brief Scan the instruction for expensive integer constants and record them
+/// in the constant candidate vector.
+void ConstantHoisting::collectConstantCandidates(ConstCandMapType &ConstCandMap,
+ Instruction *Inst) {
+ // Skip all cast instructions. They are visited indirectly later on.
+ if (Inst->isCast())
+ return;
+
+ // Can't handle inline asm. Skip it.
+ if (auto Call = dyn_cast<CallInst>(Inst))
+ if (isa<InlineAsm>(Call->getCalledValue()))
+ return;
+
+ // Scan all operands.
+ for (unsigned Idx = 0, E = Inst->getNumOperands(); Idx != E; ++Idx) {
+ Value *Opnd = Inst->getOperand(Idx);
+
+ // Visit constant integers.
+ if (auto ConstInt = dyn_cast<ConstantInt>(Opnd)) {
+ collectConstantCandidates(ConstCandMap, Inst, Idx, ConstInt);
+ continue;
+ }
+
+ // Visit cast instructions that have constant integers.
+ if (auto CastInst = dyn_cast<Instruction>(Opnd)) {
+ // Only visit cast instructions, which have been skipped. All other
+ // instructions should have already been visited.
+ if (!CastInst->isCast())
+ continue;
+
+ if (auto *ConstInt = dyn_cast<ConstantInt>(CastInst->getOperand(0))) {
+ // Pretend the constant is directly used by the instruction and ignore
+ // the cast instruction.
+ collectConstantCandidates(ConstCandMap, Inst, Idx, ConstInt);
+ continue;
+ }
+ }
+
+ // Visit constant expressions that have constant integers.
+ if (auto ConstExpr = dyn_cast<ConstantExpr>(Opnd)) {
+ // Only visit constant cast expressions.
+ if (!ConstExpr->isCast())
+ continue;
+
+ if (auto ConstInt = dyn_cast<ConstantInt>(ConstExpr->getOperand(0))) {
+ // Pretend the constant is directly used by the instruction and ignore
+ // the constant expression.
+ collectConstantCandidates(ConstCandMap, Inst, Idx, ConstInt);
+ continue;
+ }
+ }
+ } // end of for all operands
+}
+
+/// \brief Collect all integer constants in the function that cannot be folded
+/// into an instruction itself.
+void ConstantHoisting::collectConstantCandidates(Function &Fn) {
+ ConstCandMapType ConstCandMap;
+ for (Function::iterator BB : Fn)
+ for (BasicBlock::iterator Inst : *BB)
+ collectConstantCandidates(ConstCandMap, Inst);
+}
+
+/// \brief Find the base constant within the given range and rebase all other
+/// constants with respect to the base constant.
+void ConstantHoisting::findAndMakeBaseConstant(ConstCandVecType::iterator S,
+ ConstCandVecType::iterator E) {
+ auto MaxCostItr = S;
+ unsigned NumUses = 0;
+ // Use the constant that has the maximum cost as base constant.
+ for (auto ConstCand = S; ConstCand != E; ++ConstCand) {
+ NumUses += ConstCand->Uses.size();
+ if (ConstCand->CumulativeCost > MaxCostItr->CumulativeCost)
+ MaxCostItr = ConstCand;
+ }
+
+ // Don't hoist constants that have only one use.
+ if (NumUses <= 1)
+ return;
+
+ ConstantInfo ConstInfo;
+ ConstInfo.BaseConstant = MaxCostItr->ConstInt;
+ Type *Ty = ConstInfo.BaseConstant->getType();
+
+ // Rebase the constants with respect to the base constant.
+ for (auto ConstCand = S; ConstCand != E; ++ConstCand) {
+ APInt Diff = ConstCand->ConstInt->getValue() -
+ ConstInfo.BaseConstant->getValue();
+ Constant *Offset = Diff == 0 ? nullptr : ConstantInt::get(Ty, Diff);
+ ConstInfo.RebasedConstants.push_back(
+ RebasedConstantInfo(std::move(ConstCand->Uses), Offset));
+ }
+ ConstantVec.push_back(ConstInfo);
+}
+
+/// \brief Finds and combines constant candidates that can be easily
+/// rematerialized with an add from a common base constant.
+void ConstantHoisting::findBaseConstants() {
+ // Sort the constants by value and type. This invalidates the mapping!
+ std::sort(ConstCandVec.begin(), ConstCandVec.end(),
+ [](const ConstantCandidate &LHS, const ConstantCandidate &RHS) {
+ if (LHS.ConstInt->getType() != RHS.ConstInt->getType())
+ return LHS.ConstInt->getType()->getBitWidth() <
+ RHS.ConstInt->getType()->getBitWidth();
+ return LHS.ConstInt->getValue().ult(RHS.ConstInt->getValue());
+ });
+
+ // Simple linear scan through the sorted constant candidate vector for viable
+ // merge candidates.
+ auto MinValItr = ConstCandVec.begin();
+ for (auto CC = std::next(ConstCandVec.begin()), E = ConstCandVec.end();
+ CC != E; ++CC) {
+ if (MinValItr->ConstInt->getType() == CC->ConstInt->getType()) {
+ // Check if the constant is in range of an add with immediate.
+ APInt Diff = CC->ConstInt->getValue() - MinValItr->ConstInt->getValue();
+ if ((Diff.getBitWidth() <= 64) &&
+ TTI->isLegalAddImmediate(Diff.getSExtValue()))
+ continue;
+ }
+ // We either have now a different constant type or the constant is not in
+ // range of an add with immediate anymore.
+ findAndMakeBaseConstant(MinValItr, CC);
+ // Start a new base constant search.
+ MinValItr = CC;
+ }
+ // Finalize the last base constant search.
+ findAndMakeBaseConstant(MinValItr, ConstCandVec.end());
+}
+
+/// \brief Updates the operand at Idx in instruction Inst with the result of
+/// instruction Mat. If the instruction is a PHI node then special
+/// handling for duplicate values form the same incomming basic block is
+/// required.
+/// \return The update will always succeed, but the return value indicated if
+/// Mat was used for the update or not.
+static bool updateOperand(Instruction *Inst, unsigned Idx, Instruction *Mat) {
+ if (auto PHI = dyn_cast<PHINode>(Inst)) {
+ // Check if any previous operand of the PHI node has the same incoming basic
+ // block. This is a very odd case that happens when the incoming basic block
+ // has a switch statement. In this case use the same value as the previous
+ // operand(s), otherwise we will fail verification due to different values.
+ // The values are actually the same, but the variable names are different
+ // and the verifier doesn't like that.
+ BasicBlock *IncomingBB = PHI->getIncomingBlock(Idx);
+ for (unsigned i = 0; i < Idx; ++i) {
+ if (PHI->getIncomingBlock(i) == IncomingBB) {
+ Value *IncomingVal = PHI->getIncomingValue(i);
+ Inst->setOperand(Idx, IncomingVal);
+ return false;
+ }
+ }
+ }
+
+ Inst->setOperand(Idx, Mat);
+ return true;
+}
+
+/// \brief Emit materialization code for all rebased constants and update their
+/// users.
+void ConstantHoisting::emitBaseConstants(Instruction *Base, Constant *Offset,
+ const ConstantUser &ConstUser) {
+ Instruction *Mat = Base;
+ if (Offset) {
+ Instruction *InsertionPt = findMatInsertPt(ConstUser.Inst,
+ ConstUser.OpndIdx);
+ Mat = BinaryOperator::Create(Instruction::Add, Base, Offset,
+ "const_mat", InsertionPt);
+
+ DEBUG(dbgs() << "Materialize constant (" << *Base->getOperand(0)
+ << " + " << *Offset << ") in BB "
+ << Mat->getParent()->getName() << '\n' << *Mat << '\n');
+ Mat->setDebugLoc(ConstUser.Inst->getDebugLoc());
+ }
+ Value *Opnd = ConstUser.Inst->getOperand(ConstUser.OpndIdx);
+
+ // Visit constant integer.
+ if (isa<ConstantInt>(Opnd)) {
+ DEBUG(dbgs() << "Update: " << *ConstUser.Inst << '\n');
+ if (!updateOperand(ConstUser.Inst, ConstUser.OpndIdx, Mat) && Offset)
+ Mat->eraseFromParent();
+ DEBUG(dbgs() << "To : " << *ConstUser.Inst << '\n');
+ return;
+ }
+
+ // Visit cast instruction.
+ if (auto CastInst = dyn_cast<Instruction>(Opnd)) {
+ assert(CastInst->isCast() && "Expected an cast instruction!");
+ // Check if we already have visited this cast instruction before to avoid
+ // unnecessary cloning.
+ Instruction *&ClonedCastInst = ClonedCastMap[CastInst];
+ if (!ClonedCastInst) {
+ ClonedCastInst = CastInst->clone();
+ ClonedCastInst->setOperand(0, Mat);
+ ClonedCastInst->insertAfter(CastInst);
+ // Use the same debug location as the original cast instruction.
+ ClonedCastInst->setDebugLoc(CastInst->getDebugLoc());
+ DEBUG(dbgs() << "Clone instruction: " << *CastInst << '\n'
+ << "To : " << *ClonedCastInst << '\n');
+ }
+
+ DEBUG(dbgs() << "Update: " << *ConstUser.Inst << '\n');
+ updateOperand(ConstUser.Inst, ConstUser.OpndIdx, ClonedCastInst);
+ DEBUG(dbgs() << "To : " << *ConstUser.Inst << '\n');
+ return;
+ }
+
+ // Visit constant expression.
+ if (auto ConstExpr = dyn_cast<ConstantExpr>(Opnd)) {
+ Instruction *ConstExprInst = ConstExpr->getAsInstruction();
+ ConstExprInst->setOperand(0, Mat);
+ ConstExprInst->insertBefore(findMatInsertPt(ConstUser.Inst,
+ ConstUser.OpndIdx));
+
+ // Use the same debug location as the instruction we are about to update.
+ ConstExprInst->setDebugLoc(ConstUser.Inst->getDebugLoc());
+
+ DEBUG(dbgs() << "Create instruction: " << *ConstExprInst << '\n'
+ << "From : " << *ConstExpr << '\n');
+ DEBUG(dbgs() << "Update: " << *ConstUser.Inst << '\n');
+ if (!updateOperand(ConstUser.Inst, ConstUser.OpndIdx, ConstExprInst)) {
+ ConstExprInst->eraseFromParent();
+ if (Offset)
+ Mat->eraseFromParent();
+ }
+ DEBUG(dbgs() << "To : " << *ConstUser.Inst << '\n');
+ return;
+ }
+}
+
+/// \brief Hoist and hide the base constant behind a bitcast and emit
+/// materialization code for derived constants.
+bool ConstantHoisting::emitBaseConstants() {
+ bool MadeChange = false;
+ for (auto const &ConstInfo : ConstantVec) {
+ // Hoist and hide the base constant behind a bitcast.
+ Instruction *IP = findConstantInsertionPoint(ConstInfo);
+ IntegerType *Ty = ConstInfo.BaseConstant->getType();
+ Instruction *Base =
+ new BitCastInst(ConstInfo.BaseConstant, Ty, "const", IP);
+ DEBUG(dbgs() << "Hoist constant (" << *ConstInfo.BaseConstant << ") to BB "
+ << IP->getParent()->getName() << '\n' << *Base << '\n');
+ NumConstantsHoisted++;
+
+ // Emit materialization code for all rebased constants.
+ for (auto const &RCI : ConstInfo.RebasedConstants) {
+ NumConstantsRebased++;
+ for (auto const &U : RCI.Uses)
+ emitBaseConstants(Base, RCI.Offset, U);
+ }
+
+ // Use the same debug location as the last user of the constant.
+ assert(!Base->use_empty() && "The use list is empty!?");
+ assert(isa<Instruction>(Base->user_back()) &&
+ "All uses should be instructions.");
+ Base->setDebugLoc(cast<Instruction>(Base->user_back())->getDebugLoc());
+
+ // Correct for base constant, which we counted above too.
+ NumConstantsRebased--;
+ MadeChange = true;
+ }
+ return MadeChange;
+}
+
+/// \brief Check all cast instructions we made a copy of and remove them if they
+/// have no more users.
+void ConstantHoisting::deleteDeadCastInst() const {
+ for (auto const &I : ClonedCastMap)
+ if (I.first->use_empty())
+ I.first->eraseFromParent();
+}
+
+/// \brief Optimize expensive integer constants in the given function.
+bool ConstantHoisting::optimizeConstants(Function &Fn) {
+ // Collect all constant candidates.
+ collectConstantCandidates(Fn);
+
+ // There are no constant candidates to worry about.
+ if (ConstCandVec.empty())
+ return false;
+
+ // Combine constants that can be easily materialized with an add from a common
+ // base constant.
+ findBaseConstants();
+
+ // There are no constants to emit.
+ if (ConstantVec.empty())
+ return false;
+
+ // Finally hoist the base constant and emit materialization code for dependent
+ // constants.
+ bool MadeChange = emitBaseConstants();
+
+ // Cleanup dead instructions.
+ deleteDeadCastInst();
+
+ return MadeChange;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp b/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp
index d5a96eceb993..dd51ce1bc28c 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp
@@ -18,19 +18,20 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "constprop"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
#include "llvm/Pass.h"
-#include "llvm/Support/InstIterator.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include <set>
using namespace llvm;
+#define DEBUG_TYPE "constprop"
+
STATISTIC(NumInstKilled, "Number of instructions killed");
namespace {
@@ -40,9 +41,9 @@ namespace {
initializeConstantPropagationPass(*PassRegistry::getPassRegistry());
}
- bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<TargetLibraryInfo>();
}
@@ -67,7 +68,8 @@ bool ConstantPropagation::runOnFunction(Function &F) {
WorkList.insert(&*i);
}
bool Changed = false;
- DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
while (!WorkList.empty()) {
@@ -75,12 +77,11 @@ bool ConstantPropagation::runOnFunction(Function &F) {
WorkList.erase(WorkList.begin()); // Get an element from the worklist...
if (!I->use_empty()) // Don't muck with dead instructions...
- if (Constant *C = ConstantFoldInstruction(I, TD, TLI)) {
+ if (Constant *C = ConstantFoldInstruction(I, DL, TLI)) {
// Add all of the users of this instruction to the worklist, they might
// be constant propagatable now...
- for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
- UI != UE; ++UI)
- WorkList.insert(cast<Instruction>(*UI));
+ for (User *U : I->users())
+ WorkList.insert(cast<Instruction>(U));
// Replace all of the uses of a variable with uses of the constant.
I->replaceAllUsesWith(C);
diff --git a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 995782e1bc6b..082946229b35 100644
--- a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -11,21 +11,22 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "correlated-value-propagation"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
+#define DEBUG_TYPE "correlated-value-propagation"
+
STATISTIC(NumPhis, "Number of phis propagated");
STATISTIC(NumSelects, "Number of selects propagated");
STATISTIC(NumMemAccess, "Number of memory access targets propagated");
@@ -48,9 +49,9 @@ namespace {
initializeCorrelatedValuePropagationPass(*PassRegistry::getPassRegistry());
}
- bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<LazyValueInfo>();
}
};
@@ -138,7 +139,7 @@ bool CorrelatedValuePropagation::processPHI(PHINode *P) {
}
bool CorrelatedValuePropagation::processMemAccess(Instruction *I) {
- Value *Pointer = 0;
+ Value *Pointer = nullptr;
if (LoadInst *L = dyn_cast<LoadInst>(I))
Pointer = L->getPointerOperand();
else
@@ -281,6 +282,9 @@ bool CorrelatedValuePropagation::processSwitch(SwitchInst *SI) {
}
bool CorrelatedValuePropagation::runOnFunction(Function &F) {
+ if (skipOptnoneFunction(F))
+ return false;
+
LVI = &getAnalysis<LazyValueInfo>();
bool FnChanged = false;
diff --git a/contrib/llvm/lib/Transforms/Scalar/DCE.cpp b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
index e8a090af40c3..99fac751df8e 100644
--- a/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
@@ -16,16 +16,17 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "dce"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
#include "llvm/Pass.h"
-#include "llvm/Support/InstIterator.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
+#define DEBUG_TYPE "dce"
+
STATISTIC(DIEEliminated, "Number of insts removed by DIE pass");
STATISTIC(DCEEliminated, "Number of insts removed");
@@ -38,7 +39,9 @@ namespace {
DeadInstElimination() : BasicBlockPass(ID) {
initializeDeadInstEliminationPass(*PassRegistry::getPassRegistry());
}
- virtual bool runOnBasicBlock(BasicBlock &BB) {
+ bool runOnBasicBlock(BasicBlock &BB) override {
+ if (skipOptnoneFunction(BB))
+ return false;
TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
bool Changed = false;
for (BasicBlock::iterator DI = BB.begin(); DI != BB.end(); ) {
@@ -52,7 +55,7 @@ namespace {
return Changed;
}
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
}
};
@@ -77,9 +80,9 @@ namespace {
initializeDCEPass(*PassRegistry::getPassRegistry());
}
- virtual bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
}
};
@@ -89,6 +92,9 @@ char DCE::ID = 0;
INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false)
bool DCE::runOnFunction(Function &F) {
+ if (skipOptnoneFunction(F))
+ return false;
+
TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
// Start out with all of the instructions in the worklist...
diff --git a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 57432c7d71d8..3af8ee7546fb 100644
--- a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -15,19 +15,18 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "dse"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CaptureTracking.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instructions.h"
@@ -38,6 +37,8 @@
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
+#define DEBUG_TYPE "dse"
+
STATISTIC(NumFastStores, "Number of stores deleted");
STATISTIC(NumFastOther , "Number of other instrs removed");
@@ -49,14 +50,17 @@ namespace {
const TargetLibraryInfo *TLI;
static char ID; // Pass identification, replacement for typeid
- DSE() : FunctionPass(ID), AA(0), MD(0), DT(0) {
+ DSE() : FunctionPass(ID), AA(nullptr), MD(nullptr), DT(nullptr) {
initializeDSEPass(*PassRegistry::getPassRegistry());
}
- virtual bool runOnFunction(Function &F) {
+ bool runOnFunction(Function &F) override {
+ if (skipOptnoneFunction(F))
+ return false;
+
AA = &getAnalysis<AliasAnalysis>();
MD = &getAnalysis<MemoryDependenceAnalysis>();
- DT = &getAnalysis<DominatorTree>();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
TLI = AA->getTargetLibraryInfo();
bool Changed = false;
@@ -66,7 +70,7 @@ namespace {
if (DT->isReachableFromEntry(I))
Changed |= runOnBasicBlock(*I);
- AA = 0; MD = 0; DT = 0;
+ AA = nullptr; MD = nullptr; DT = nullptr;
return Changed;
}
@@ -76,13 +80,13 @@ namespace {
void RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
SmallSetVector<Value*, 16> &DeadStackObjects);
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
- AU.addRequired<DominatorTree>();
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<AliasAnalysis>();
AU.addRequired<MemoryDependenceAnalysis>();
AU.addPreserved<AliasAnalysis>();
- AU.addPreserved<DominatorTree>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<MemoryDependenceAnalysis>();
}
};
@@ -90,7 +94,7 @@ namespace {
char DSE::ID = 0;
INITIALIZE_PASS_BEGIN(DSE, "dse", "Dead Store Elimination", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(DSE, "dse", "Dead Store Elimination", false, false)
@@ -108,9 +112,9 @@ FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }
/// If ValueSet is non-null, remove any deleted instructions from it as well.
///
static void DeleteDeadInstruction(Instruction *I,
- MemoryDependenceAnalysis &MD,
- const TargetLibraryInfo *TLI,
- SmallSetVector<Value*, 16> *ValueSet = 0) {
+ MemoryDependenceAnalysis &MD,
+ const TargetLibraryInfo *TLI,
+ SmallSetVector<Value*, 16> *ValueSet = nullptr) {
SmallVector<Instruction*, 32> NowDeadInsts;
NowDeadInsts.push_back(I);
@@ -128,7 +132,7 @@ static void DeleteDeadInstruction(Instruction *I,
for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
Value *Op = DeadInst->getOperand(op);
- DeadInst->setOperand(op, 0);
+ DeadInst->setOperand(op, nullptr);
// If this operand just became dead, add it to the NowDeadInsts list.
if (!Op->use_empty()) continue;
@@ -190,6 +194,7 @@ static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo *TLI) {
/// describe the memory operations for this instruction.
static AliasAnalysis::Location
getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
+ const DataLayout *DL = AA.getDataLayout();
if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
return AA.getLocation(SI);
@@ -199,13 +204,13 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
// If we don't have target data around, an unknown size in Location means
// that we should use the size of the pointee type. This isn't valid for
// memset/memcpy, which writes more than an i8.
- if (Loc.Size == AliasAnalysis::UnknownSize && AA.getDataLayout() == 0)
+ if (Loc.Size == AliasAnalysis::UnknownSize && DL == nullptr)
return AliasAnalysis::Location();
return Loc;
}
IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst);
- if (II == 0) return AliasAnalysis::Location();
+ if (!II) return AliasAnalysis::Location();
switch (II->getIntrinsicID()) {
default: return AliasAnalysis::Location(); // Unhandled intrinsic.
@@ -213,7 +218,7 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
// If we don't have target data around, an unknown size in Location means
// that we should use the size of the pointee type. This isn't valid for
// init.trampoline, which writes more than an i8.
- if (AA.getDataLayout() == 0) return AliasAnalysis::Location();
+ if (!DL) return AliasAnalysis::Location();
// FIXME: We don't know the size of the trampoline, so we can't really
// handle it here.
@@ -341,6 +346,7 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
AliasAnalysis &AA,
int64_t &EarlierOff,
int64_t &LaterOff) {
+ const DataLayout *DL = AA.getDataLayout();
const Value *P1 = Earlier.Ptr->stripPointerCasts();
const Value *P2 = Later.Ptr->stripPointerCasts();
@@ -354,8 +360,7 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
// If we have no DataLayout information around, then the size of the store
// is inferrable from the pointee type. If they are the same type, then
// we know that the store is safe.
- if (AA.getDataLayout() == 0 &&
- Later.Ptr->getType() == Earlier.Ptr->getType())
+ if (DL == nullptr && Later.Ptr->getType() == Earlier.Ptr->getType())
return OverwriteComplete;
return OverwriteUnknown;
@@ -369,17 +374,14 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
// Otherwise, we have to have size information, and the later store has to be
// larger than the earlier one.
if (Later.Size == AliasAnalysis::UnknownSize ||
- Earlier.Size == AliasAnalysis::UnknownSize ||
- AA.getDataLayout() == 0)
+ Earlier.Size == AliasAnalysis::UnknownSize || DL == nullptr)
return OverwriteUnknown;
// Check to see if the later store is to the entire object (either a global,
- // an alloca, or a byval argument). If so, then it clearly overwrites any
- // other store to the same object.
- const DataLayout *TD = AA.getDataLayout();
-
- const Value *UO1 = GetUnderlyingObject(P1, TD),
- *UO2 = GetUnderlyingObject(P2, TD);
+ // an alloca, or a byval/inalloca argument). If so, then it clearly
+ // overwrites any other store to the same object.
+ const Value *UO1 = GetUnderlyingObject(P1, DL),
+ *UO2 = GetUnderlyingObject(P2, DL);
// If we can't resolve the same pointers to the same object, then we can't
// analyze them at all.
@@ -397,8 +399,8 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
// pointers are equal, then we can reason about the two stores.
EarlierOff = 0;
LaterOff = 0;
- const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, TD);
- const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, TD);
+ const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, DL);
+ const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, DL);
// If the base pointers still differ, we have two completely different stores.
if (BP1 != BP2)
@@ -460,7 +462,7 @@ static bool isPossibleSelfRead(Instruction *Inst,
// Self reads can only happen for instructions that read memory. Get the
// location read.
AliasAnalysis::Location InstReadLoc = getLocForRead(Inst, AA);
- if (InstReadLoc.Ptr == 0) return false; // Not a reading instruction.
+ if (!InstReadLoc.Ptr) return false; // Not a reading instruction.
// If the read and written loc obviously don't alias, it isn't a read.
if (AA.isNoAlias(InstReadLoc, InstStoreLoc)) return false;
@@ -527,7 +529,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
DeleteDeadInstruction(SI, *MD, TLI);
- if (NextInst == 0) // Next instruction deleted.
+ if (!NextInst) // Next instruction deleted.
BBI = BB.begin();
else if (BBI != BB.begin()) // Revisit this instruction if possible.
--BBI;
@@ -542,7 +544,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
AliasAnalysis::Location Loc = getLocForWrite(Inst, *AA);
// If we didn't get a useful location, fail.
- if (Loc.Ptr == 0)
+ if (!Loc.Ptr)
continue;
while (InstDep.isDef() || InstDep.isClobber()) {
@@ -556,7 +558,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
Instruction *DepWrite = InstDep.getInst();
AliasAnalysis::Location DepLoc = getLocForWrite(DepWrite, *AA);
// If we didn't get a useful location, or if it isn't a size, bail out.
- if (DepLoc.Ptr == 0)
+ if (!DepLoc.Ptr)
break;
// If we find a write that is a) removable (i.e., non-volatile), b) is
@@ -679,7 +681,7 @@ bool DSE::HandleFree(CallInst *F) {
if (!AA->isMustAlias(F->getArgOperand(0), DepPointer))
break;
- Instruction *Next = llvm::next(BasicBlock::iterator(Dependency));
+ Instruction *Next = std::next(BasicBlock::iterator(Dependency));
// DCE instructions only used to calculate that store
DeleteDeadInstruction(Dependency, *MD, TLI);
@@ -701,22 +703,6 @@ bool DSE::HandleFree(CallInst *F) {
return MadeChange;
}
-namespace {
- struct CouldRef {
- typedef Value *argument_type;
- const CallSite CS;
- AliasAnalysis *AA;
-
- bool operator()(Value *I) {
- // See if the call site touches the value.
- AliasAnalysis::ModRefResult A =
- AA->getModRefInfo(CS, I, getPointerSize(I, *AA));
-
- return A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref;
- }
- };
-}
-
/// handleEndBlock - Remove dead stores to stack-allocated locations in the
/// function end block. Ex:
/// %A = alloca i32
@@ -742,11 +728,11 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
DeadStackObjects.insert(I);
}
- // Treat byval arguments the same, stores to them are dead at the end of the
- // function.
+ // Treat byval or inalloca arguments the same, stores to them are dead at the
+ // end of the function.
for (Function::arg_iterator AI = BB.getParent()->arg_begin(),
AE = BB.getParent()->arg_end(); AI != AE; ++AI)
- if (AI->hasByValAttr())
+ if (AI->hasByValOrInAllocaAttr())
DeadStackObjects.insert(AI);
// Scan the basic block backwards
@@ -776,7 +762,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
for (SmallVectorImpl<Value *>::iterator I = Pointers.begin(),
E = Pointers.end(); I != E; ++I) {
dbgs() << **I;
- if (llvm::next(I) != E)
+ if (std::next(I) != E)
dbgs() << ", ";
}
dbgs() << '\n');
@@ -818,8 +804,13 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
// If the call might load from any of our allocas, then any store above
// the call is live.
- CouldRef Pred = { CS, AA };
- DeadStackObjects.remove_if(Pred);
+ DeadStackObjects.remove_if([&](Value *I) {
+ // See if the call site touches the value.
+ AliasAnalysis::ModRefResult A =
+ AA->getModRefInfo(CS, I, getPointerSize(I, *AA));
+
+ return A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref;
+ });
// If all of the allocas were clobbered by the call then we're not going
// to find anything else to process.
@@ -862,20 +853,6 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
return MadeChange;
}
-namespace {
- struct CouldAlias {
- typedef Value *argument_type;
- const AliasAnalysis::Location &LoadedLoc;
- AliasAnalysis *AA;
-
- bool operator()(Value *I) {
- // See if the loaded location could alias the stack location.
- AliasAnalysis::Location StackLoc(I, getPointerSize(I, *AA));
- return !AA->isNoAlias(StackLoc, LoadedLoc);
- }
- };
-}
-
/// RemoveAccessedObjects - Check to see if the specified location may alias any
/// of the stack objects in the DeadStackObjects set. If so, they become live
/// because the location is being loaded.
@@ -895,6 +872,9 @@ void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
}
// Remove objects that could alias LoadedLoc.
- CouldAlias Pred = { LoadedLoc, AA };
- DeadStackObjects.remove_if(Pred);
+ DeadStackObjects.remove_if([&](Value *I) {
+ // See if the loaded location could alias the stack location.
+ AliasAnalysis::Location StackLoc(I, getPointerSize(I, *AA));
+ return !AA->isNoAlias(StackLoc, LoadedLoc);
+ });
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index 5266894bc34c..735f5c194cb5 100644
--- a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -12,23 +12,24 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "early-cse"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/ScopedHashTable.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/RecyclingAllocator.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
-#include <deque>
+#include <vector>
using namespace llvm;
+#define DEBUG_TYPE "early-cse"
+
STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd");
STATISTIC(NumCSE, "Number of instructions CSE'd");
STATISTIC(NumCSELoad, "Number of load instructions CSE'd");
@@ -207,7 +208,7 @@ namespace {
return false;
CallInst *CI = dyn_cast<CallInst>(Inst);
- if (CI == 0 || !CI->onlyReadsMemory())
+ if (!CI || !CI->onlyReadsMemory())
return false;
return true;
}
@@ -262,7 +263,7 @@ namespace {
/// cases.
class EarlyCSE : public FunctionPass {
public:
- const DataLayout *TD;
+ const DataLayout *DL;
const TargetLibraryInfo *TLI;
DominatorTree *DT;
typedef RecyclingAllocator<BumpPtrAllocator,
@@ -303,7 +304,7 @@ public:
initializeEarlyCSEPass(*PassRegistry::getPassRegistry());
}
- bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
private:
@@ -376,8 +377,8 @@ private:
bool processNode(DomTreeNode *Node);
// This transformation requires dominator postdominator info
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<DominatorTree>();
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<TargetLibraryInfo>();
AU.setPreservesCFG();
}
@@ -392,7 +393,7 @@ FunctionPass *llvm::createEarlyCSEPass() {
}
INITIALIZE_PASS_BEGIN(EarlyCSE, "early-cse", "Early CSE", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_PASS_END(EarlyCSE, "early-cse", "Early CSE", false, false)
@@ -405,14 +406,14 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// have invalidated the live-out memory values of our parent value. For now,
// just be conservative and invalidate memory if this block has multiple
// predecessors.
- if (BB->getSinglePredecessor() == 0)
+ if (!BB->getSinglePredecessor())
++CurrentGeneration;
/// LastStore - Keep track of the last non-volatile store that we saw... for
/// as long as there in no instruction that reads memory. If we see a store
/// to the same location, we delete the dead store. This zaps trivial dead
/// stores which can occur in bitfield code among other things.
- StoreInst *LastStore = 0;
+ StoreInst *LastStore = nullptr;
bool Changed = false;
@@ -432,7 +433,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// If the instruction can be simplified (e.g. X+0 = X) then replace it with
// its simpler value.
- if (Value *V = SimplifyInstruction(Inst, TD, TLI, DT)) {
+ if (Value *V = SimplifyInstruction(Inst, DL, TLI, DT)) {
DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << " to: " << *V << '\n');
Inst->replaceAllUsesWith(V);
Inst->eraseFromParent();
@@ -462,7 +463,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
// Ignore volatile loads.
if (!LI->isSimple()) {
- LastStore = 0;
+ LastStore = nullptr;
continue;
}
@@ -470,7 +471,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// generation, replace this instruction.
std::pair<Value*, unsigned> InVal =
AvailableLoads->lookup(Inst->getOperand(0));
- if (InVal.first != 0 && InVal.second == CurrentGeneration) {
+ if (InVal.first != nullptr && InVal.second == CurrentGeneration) {
DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst << " to: "
<< *InVal.first << '\n');
if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first);
@@ -483,20 +484,20 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// Otherwise, remember that we have this instruction.
AvailableLoads->insert(Inst->getOperand(0),
std::pair<Value*, unsigned>(Inst, CurrentGeneration));
- LastStore = 0;
+ LastStore = nullptr;
continue;
}
// If this instruction may read from memory, forget LastStore.
if (Inst->mayReadFromMemory())
- LastStore = 0;
+ LastStore = nullptr;
// If this is a read-only call, process it.
if (CallValue::canHandle(Inst)) {
// If we have an available version of this call, and if it is the right
// generation, replace this instruction.
std::pair<Value*, unsigned> InVal = AvailableCalls->lookup(Inst);
- if (InVal.first != 0 && InVal.second == CurrentGeneration) {
+ if (InVal.first != nullptr && InVal.second == CurrentGeneration) {
DEBUG(dbgs() << "EarlyCSE CSE CALL: " << *Inst << " to: "
<< *InVal.first << '\n');
if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first);
@@ -528,7 +529,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
LastStore->eraseFromParent();
Changed = true;
++NumDSE;
- LastStore = 0;
+ LastStore = nullptr;
continue;
}
@@ -552,11 +553,15 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
bool EarlyCSE::runOnFunction(Function &F) {
- std::deque<StackNode *> nodesToProcess;
+ if (skipOptnoneFunction(F))
+ return false;
+
+ std::vector<StackNode *> nodesToProcess;
- TD = getAnalysisIfAvailable<DataLayout>();
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
TLI = &getAnalysis<TargetLibraryInfo>();
- DT = &getAnalysis<DominatorTree>();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
// Tables that the pass uses when walking the domtree.
ScopedHTType AVTable;
@@ -570,7 +575,7 @@ bool EarlyCSE::runOnFunction(Function &F) {
bool Changed = false;
// Process the root node.
- nodesToProcess.push_front(
+ nodesToProcess.push_back(
new StackNode(AvailableValues, AvailableLoads, AvailableCalls,
CurrentGeneration, DT->getRootNode(),
DT->getRootNode()->begin(),
@@ -583,7 +588,7 @@ bool EarlyCSE::runOnFunction(Function &F) {
while (!nodesToProcess.empty()) {
// Grab the first item off the stack. Set the current generation, remove
// the node from the stack, and process it.
- StackNode *NodeToProcess = nodesToProcess.front();
+ StackNode *NodeToProcess = nodesToProcess.back();
// Initialize class members.
CurrentGeneration = NodeToProcess->currentGeneration();
@@ -597,7 +602,7 @@ bool EarlyCSE::runOnFunction(Function &F) {
} else if (NodeToProcess->childIter() != NodeToProcess->end()) {
// Push the next child onto the stack.
DomTreeNode *child = NodeToProcess->nextChild();
- nodesToProcess.push_front(
+ nodesToProcess.push_back(
new StackNode(AvailableValues,
AvailableLoads,
AvailableCalls,
@@ -607,7 +612,7 @@ bool EarlyCSE::runOnFunction(Function &F) {
// It has been processed, and there are no more children to process,
// so delete it and pop it off the stack.
delete NodeToProcess;
- nodesToProcess.pop_front();
+ nodesToProcess.pop_back();
}
} // while (!nodes...)
diff --git a/contrib/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp b/contrib/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
index e7de07f246db..0430c1898c8d 100644
--- a/contrib/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
@@ -11,14 +11,15 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "flattencfg"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/IR/CFG.h"
#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
+#define DEBUG_TYPE "flattencfg"
+
namespace {
struct FlattenCFGPass : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
@@ -26,9 +27,9 @@ public:
FlattenCFGPass() : FunctionPass(ID) {
initializeFlattenCFGPassPass(*PassRegistry::getPassRegistry());
}
- bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
- void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AliasAnalysis>();
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
index 6af269dfed32..106eba099ca0 100644
--- a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -15,35 +15,34 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "gvn"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/Hashing.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/PHITransAddr.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Assembly/Writer.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/PatternMatch.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
@@ -51,6 +50,8 @@
using namespace llvm;
using namespace PatternMatch;
+#define DEBUG_TYPE "gvn"
+
STATISTIC(NumGVNInstr, "Number of instructions deleted");
STATISTIC(NumGVNLoad, "Number of loads deleted");
STATISTIC(NumGVNPRE, "Number of instructions PRE'd");
@@ -214,13 +215,13 @@ Expression ValueTable::create_cmp_expression(unsigned Opcode,
}
Expression ValueTable::create_extractvalue_expression(ExtractValueInst *EI) {
- assert(EI != 0 && "Not an ExtractValueInst?");
+ assert(EI && "Not an ExtractValueInst?");
Expression e;
e.type = EI->getType();
e.opcode = 0;
IntrinsicInst *I = dyn_cast<IntrinsicInst>(EI->getAggregateOperand());
- if (I != 0 && EI->getNumIndices() == 1 && *EI->idx_begin() == 0 ) {
+ if (I != nullptr && EI->getNumIndices() == 1 && *EI->idx_begin() == 0 ) {
// EI might be an extract from one of our recognised intrinsics. If it
// is we'll synthesize a semantically equivalent expression instead on
// an extract value expression.
@@ -328,7 +329,7 @@ uint32_t ValueTable::lookup_or_add_call(CallInst *C) {
const MemoryDependenceAnalysis::NonLocalDepInfo &deps =
MD->getNonLocalCallDependency(CallSite(C));
// FIXME: Move the checking logic to MemDep!
- CallInst* cdep = 0;
+ CallInst* cdep = nullptr;
// Check to see if we have a single dominating call instruction that is
// identical to C.
@@ -339,8 +340,8 @@ uint32_t ValueTable::lookup_or_add_call(CallInst *C) {
// We don't handle non-definitions. If we already have a call, reject
// instruction dependencies.
- if (!I->getResult().isDef() || cdep != 0) {
- cdep = 0;
+ if (!I->getResult().isDef() || cdep != nullptr) {
+ cdep = nullptr;
break;
}
@@ -351,7 +352,7 @@ uint32_t ValueTable::lookup_or_add_call(CallInst *C) {
continue;
}
- cdep = 0;
+ cdep = nullptr;
break;
}
@@ -552,7 +553,7 @@ namespace {
static AvailableValueInBlock getUndef(BasicBlock *BB) {
AvailableValueInBlock Res;
Res.BB = BB;
- Res.Val.setPointer(0);
+ Res.Val.setPointer(nullptr);
Res.Val.setInt(UndefVal);
Res.Offset = 0;
return Res;
@@ -587,7 +588,7 @@ namespace {
bool NoLoads;
MemoryDependenceAnalysis *MD;
DominatorTree *DT;
- const DataLayout *TD;
+ const DataLayout *DL;
const TargetLibraryInfo *TLI;
SetVector<BasicBlock *> DeadBlocks;
@@ -612,11 +613,11 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
explicit GVN(bool noloads = false)
- : FunctionPass(ID), NoLoads(noloads), MD(0) {
+ : FunctionPass(ID), NoLoads(noloads), MD(nullptr) {
initializeGVNPass(*PassRegistry::getPassRegistry());
}
- bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
/// markInstructionForDeletion - This removes the specified instruction from
/// our various maps and marks it for deletion.
@@ -625,7 +626,7 @@ namespace {
InstrsToErase.push_back(I);
}
- const DataLayout *getDataLayout() const { return TD; }
+ const DataLayout *getDataLayout() const { return DL; }
DominatorTree &getDominatorTree() const { return *DT; }
AliasAnalysis *getAliasAnalysis() const { return VN.getAliasAnalysis(); }
MemoryDependenceAnalysis &getMemDep() const { return *MD; }
@@ -650,7 +651,7 @@ namespace {
/// removeFromLeaderTable - Scan the list of values corresponding to a given
/// value number, and remove the given instruction if encountered.
void removeFromLeaderTable(uint32_t N, Instruction *I, BasicBlock *BB) {
- LeaderTableEntry* Prev = 0;
+ LeaderTableEntry* Prev = nullptr;
LeaderTableEntry* Curr = &LeaderTable[N];
while (Curr->Val != I || Curr->BB != BB) {
@@ -662,8 +663,8 @@ namespace {
Prev->Next = Curr->Next;
} else {
if (!Curr->Next) {
- Curr->Val = 0;
- Curr->BB = 0;
+ Curr->Val = nullptr;
+ Curr->BB = nullptr;
} else {
LeaderTableEntry* Next = Curr->Next;
Curr->Val = Next->Val;
@@ -677,14 +678,14 @@ namespace {
SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit;
// This transformation requires dominator postdominator info
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<DominatorTree>();
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<TargetLibraryInfo>();
if (!NoLoads)
AU.addRequired<MemoryDependenceAnalysis>();
AU.addRequired<AliasAnalysis>();
- AU.addPreserved<DominatorTree>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<AliasAnalysis>();
}
@@ -727,7 +728,7 @@ FunctionPass *llvm::createGVNPass(bool NoLoads) {
INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false)
@@ -818,8 +819,7 @@ SpeculationFailure:
// Mark as unavailable.
EntryVal = 0;
- for (succ_iterator I = succ_begin(Entry), E = succ_end(Entry); I != E; ++I)
- BBWorklist.push_back(*I);
+ BBWorklist.append(succ_begin(Entry), succ_end(Entry));
} while (!BBWorklist.empty());
return false;
@@ -830,7 +830,7 @@ SpeculationFailure:
/// CoerceAvailableValueToLoadType will succeed.
static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
Type *LoadTy,
- const DataLayout &TD) {
+ const DataLayout &DL) {
// If the loaded or stored value is an first class array or struct, don't try
// to transform them. We need to be able to bitcast to integer.
if (LoadTy->isStructTy() || LoadTy->isArrayTy() ||
@@ -839,8 +839,8 @@ static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
return false;
// The store has to be at least as big as the load.
- if (TD.getTypeSizeInBits(StoredVal->getType()) <
- TD.getTypeSizeInBits(LoadTy))
+ if (DL.getTypeSizeInBits(StoredVal->getType()) <
+ DL.getTypeSizeInBits(LoadTy))
return false;
return true;
@@ -855,15 +855,15 @@ static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
Type *LoadedTy,
Instruction *InsertPt,
- const DataLayout &TD) {
- if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD))
- return 0;
+ const DataLayout &DL) {
+ if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, DL))
+ return nullptr;
// If this is already the right type, just return it.
Type *StoredValTy = StoredVal->getType();
- uint64_t StoreSize = TD.getTypeSizeInBits(StoredValTy);
- uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy);
+ uint64_t StoreSize = DL.getTypeSizeInBits(StoredValTy);
+ uint64_t LoadSize = DL.getTypeSizeInBits(LoadedTy);
// If the store and reload are the same size, we can always reuse it.
if (StoreSize == LoadSize) {
@@ -874,13 +874,13 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
// Convert source pointers to integers, which can be bitcast.
if (StoredValTy->getScalarType()->isPointerTy()) {
- StoredValTy = TD.getIntPtrType(StoredValTy);
+ StoredValTy = DL.getIntPtrType(StoredValTy);
StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
}
Type *TypeToCastTo = LoadedTy;
if (TypeToCastTo->getScalarType()->isPointerTy())
- TypeToCastTo = TD.getIntPtrType(TypeToCastTo);
+ TypeToCastTo = DL.getIntPtrType(TypeToCastTo);
if (StoredValTy != TypeToCastTo)
StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt);
@@ -899,7 +899,7 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
// Convert source pointers to integers, which can be manipulated.
if (StoredValTy->getScalarType()->isPointerTy()) {
- StoredValTy = TD.getIntPtrType(StoredValTy);
+ StoredValTy = DL.getIntPtrType(StoredValTy);
StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
}
@@ -911,7 +911,7 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
// If this is a big-endian system, we need to shift the value down to the low
// bits so that a truncate will work.
- if (TD.isBigEndian()) {
+ if (DL.isBigEndian()) {
Constant *Val = ConstantInt::get(StoredVal->getType(), StoreSize-LoadSize);
StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt);
}
@@ -942,15 +942,15 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
Value *WritePtr,
uint64_t WriteSizeInBits,
- const DataLayout &TD) {
+ const DataLayout &DL) {
// If the loaded or stored value is a first class array or struct, don't try
// to transform them. We need to be able to bitcast to integer.
if (LoadTy->isStructTy() || LoadTy->isArrayTy())
return -1;
int64_t StoreOffset = 0, LoadOffset = 0;
- Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr,StoreOffset,&TD);
- Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, &TD);
+ Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr,StoreOffset,&DL);
+ Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, &DL);
if (StoreBase != LoadBase)
return -1;
@@ -972,7 +972,7 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
// If the load and store don't overlap at all, the store doesn't provide
// anything to the load. In this case, they really don't alias at all, AA
// must have gotten confused.
- uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy);
+ uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy);
if ((WriteSizeInBits & 7) | (LoadSize & 7))
return -1;
@@ -1015,61 +1015,61 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
/// memdep query of a load that ends up being a clobbering store.
static int AnalyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
StoreInst *DepSI,
- const DataLayout &TD) {
+ const DataLayout &DL) {
// Cannot handle reading from store of first-class aggregate yet.
if (DepSI->getValueOperand()->getType()->isStructTy() ||
DepSI->getValueOperand()->getType()->isArrayTy())
return -1;
Value *StorePtr = DepSI->getPointerOperand();
- uint64_t StoreSize =TD.getTypeSizeInBits(DepSI->getValueOperand()->getType());
+ uint64_t StoreSize =DL.getTypeSizeInBits(DepSI->getValueOperand()->getType());
return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
- StorePtr, StoreSize, TD);
+ StorePtr, StoreSize, DL);
}
/// AnalyzeLoadFromClobberingLoad - This function is called when we have a
/// memdep query of a load that ends up being clobbered by another load. See if
/// the other load can feed into the second load.
static int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr,
- LoadInst *DepLI, const DataLayout &TD){
+ LoadInst *DepLI, const DataLayout &DL){
// Cannot handle reading from store of first-class aggregate yet.
if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy())
return -1;
Value *DepPtr = DepLI->getPointerOperand();
- uint64_t DepSize = TD.getTypeSizeInBits(DepLI->getType());
- int R = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, TD);
+ uint64_t DepSize = DL.getTypeSizeInBits(DepLI->getType());
+ int R = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, DL);
if (R != -1) return R;
// If we have a load/load clobber an DepLI can be widened to cover this load,
// then we should widen it!
int64_t LoadOffs = 0;
const Value *LoadBase =
- GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, &TD);
- unsigned LoadSize = TD.getTypeStoreSize(LoadTy);
+ GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, &DL);
+ unsigned LoadSize = DL.getTypeStoreSize(LoadTy);
unsigned Size = MemoryDependenceAnalysis::
- getLoadLoadClobberFullWidthSize(LoadBase, LoadOffs, LoadSize, DepLI, TD);
+ getLoadLoadClobberFullWidthSize(LoadBase, LoadOffs, LoadSize, DepLI, DL);
if (Size == 0) return -1;
- return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size*8, TD);
+ return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size*8, DL);
}
static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
MemIntrinsic *MI,
- const DataLayout &TD) {
+ const DataLayout &DL) {
// If the mem operation is a non-constant size, we can't handle it.
ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength());
- if (SizeCst == 0) return -1;
+ if (!SizeCst) return -1;
uint64_t MemSizeInBits = SizeCst->getZExtValue()*8;
// If this is memset, we just need to see if the offset is valid in the size
// of the memset..
if (MI->getIntrinsicID() == Intrinsic::memset)
return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
- MemSizeInBits, TD);
+ MemSizeInBits, DL);
// If we have a memcpy/memmove, the only case we can handle is if this is a
// copy from constant memory. In that case, we can read directly from the
@@ -1077,14 +1077,14 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
MemTransferInst *MTI = cast<MemTransferInst>(MI);
Constant *Src = dyn_cast<Constant>(MTI->getSource());
- if (Src == 0) return -1;
+ if (!Src) return -1;
- GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, &TD));
- if (GV == 0 || !GV->isConstant()) return -1;
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, &DL));
+ if (!GV || !GV->isConstant()) return -1;
// See if the access is within the bounds of the transfer.
int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
- MI->getDest(), MemSizeInBits, TD);
+ MI->getDest(), MemSizeInBits, DL);
if (Offset == -1)
return Offset;
@@ -1097,7 +1097,7 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
Src = ConstantExpr::getGetElementPtr(Src, OffsetCst);
Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
- if (ConstantFoldLoadFromConstPtr(Src, &TD))
+ if (ConstantFoldLoadFromConstPtr(Src, &DL))
return Offset;
return -1;
}
@@ -1110,11 +1110,11 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
/// before we give up.
static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
Type *LoadTy,
- Instruction *InsertPt, const DataLayout &TD){
+ Instruction *InsertPt, const DataLayout &DL){
LLVMContext &Ctx = SrcVal->getType()->getContext();
- uint64_t StoreSize = (TD.getTypeSizeInBits(SrcVal->getType()) + 7) / 8;
- uint64_t LoadSize = (TD.getTypeSizeInBits(LoadTy) + 7) / 8;
+ uint64_t StoreSize = (DL.getTypeSizeInBits(SrcVal->getType()) + 7) / 8;
+ uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy) + 7) / 8;
IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
@@ -1122,13 +1122,13 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
// to an integer type to start with.
if (SrcVal->getType()->getScalarType()->isPointerTy())
SrcVal = Builder.CreatePtrToInt(SrcVal,
- TD.getIntPtrType(SrcVal->getType()));
+ DL.getIntPtrType(SrcVal->getType()));
if (!SrcVal->getType()->isIntegerTy())
SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8));
// Shift the bits to the least significant depending on endianness.
unsigned ShiftAmt;
- if (TD.isLittleEndian())
+ if (DL.isLittleEndian())
ShiftAmt = Offset*8;
else
ShiftAmt = (StoreSize-LoadSize-Offset)*8;
@@ -1139,7 +1139,7 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
if (LoadSize != StoreSize)
SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8));
- return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD);
+ return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, DL);
}
/// GetLoadValueForLoad - This function is called when we have a
@@ -1150,11 +1150,11 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
Type *LoadTy, Instruction *InsertPt,
GVN &gvn) {
- const DataLayout &TD = *gvn.getDataLayout();
+ const DataLayout &DL = *gvn.getDataLayout();
// If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to
// widen SrcVal out to a larger load.
- unsigned SrcValSize = TD.getTypeStoreSize(SrcVal->getType());
- unsigned LoadSize = TD.getTypeStoreSize(LoadTy);
+ unsigned SrcValSize = DL.getTypeStoreSize(SrcVal->getType());
+ unsigned LoadSize = DL.getTypeStoreSize(LoadTy);
if (Offset+LoadSize > SrcValSize) {
assert(SrcVal->isSimple() && "Cannot widen volatile/atomic load!");
assert(SrcVal->getType()->isIntegerTy() && "Can't widen non-integer load");
@@ -1186,7 +1186,7 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
// Replace uses of the original load with the wider load. On a big endian
// system, we need to shift down to get the relevant bits.
Value *RV = NewLoad;
- if (TD.isBigEndian())
+ if (DL.isBigEndian())
RV = Builder.CreateLShr(RV,
NewLoadSize*8-SrcVal->getType()->getPrimitiveSizeInBits());
RV = Builder.CreateTrunc(RV, SrcVal->getType());
@@ -1201,7 +1201,7 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
SrcVal = NewLoad;
}
- return GetStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, TD);
+ return GetStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, DL);
}
@@ -1209,9 +1209,9 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
/// memdep query of a load that ends up being a clobbering mem intrinsic.
static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
Type *LoadTy, Instruction *InsertPt,
- const DataLayout &TD){
+ const DataLayout &DL){
LLVMContext &Ctx = LoadTy->getContext();
- uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8;
+ uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy)/8;
IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
@@ -1242,7 +1242,7 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
++NumBytesSet;
}
- return CoerceAvailableValueToLoadType(Val, LoadTy, InsertPt, TD);
+ return CoerceAvailableValueToLoadType(Val, LoadTy, InsertPt, DL);
}
// Otherwise, this is a memcpy/memmove from a constant global.
@@ -1258,7 +1258,7 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
Src = ConstantExpr::getGetElementPtr(Src, OffsetCst);
Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
- return ConstantFoldLoadFromConstPtr(Src, &TD);
+ return ConstantFoldLoadFromConstPtr(Src, &DL);
}
@@ -1324,10 +1324,10 @@ Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) c
if (isSimpleValue()) {
Res = getSimpleValue();
if (Res->getType() != LoadTy) {
- const DataLayout *TD = gvn.getDataLayout();
- assert(TD && "Need target data to handle type mismatch case");
+ const DataLayout *DL = gvn.getDataLayout();
+ assert(DL && "Need target data to handle type mismatch case");
Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(),
- *TD);
+ *DL);
DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " "
<< *getSimpleValue() << '\n'
@@ -1346,10 +1346,10 @@ Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) c
<< *Res << '\n' << "\n\n\n");
}
} else if (isMemIntrinValue()) {
- const DataLayout *TD = gvn.getDataLayout();
- assert(TD && "Need target data to handle type mismatch case");
+ const DataLayout *DL = gvn.getDataLayout();
+ assert(DL && "Need target data to handle type mismatch case");
Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
- LoadTy, BB->getTerminator(), *TD);
+ LoadTy, BB->getTerminator(), *DL);
DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
<< " " << *getMemIntrinValue() << '\n'
<< *Res << '\n' << "\n\n\n");
@@ -1402,9 +1402,9 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
// read by the load, we can extract the bits we need for the load from the
// stored value.
if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) {
- if (TD && Address) {
+ if (DL && Address) {
int Offset = AnalyzeLoadFromClobberingStore(LI->getType(), Address,
- DepSI, *TD);
+ DepSI, *DL);
if (Offset != -1) {
ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
DepSI->getValueOperand(),
@@ -1421,10 +1421,9 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInfo.getInst())) {
// If this is a clobber and L is the first instruction in its block, then
// we have the first instruction in the entry block.
- if (DepLI != LI && Address && TD) {
- int Offset = AnalyzeLoadFromClobberingLoad(LI->getType(),
- LI->getPointerOperand(),
- DepLI, *TD);
+ if (DepLI != LI && Address && DL) {
+ int Offset = AnalyzeLoadFromClobberingLoad(LI->getType(), Address,
+ DepLI, *DL);
if (Offset != -1) {
ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB,DepLI,
@@ -1437,9 +1436,9 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
// If the clobbering value is a memset/memcpy/memmove, see if we can
// forward a value on from it.
if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) {
- if (TD && Address) {
+ if (DL && Address) {
int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address,
- DepMI, *TD);
+ DepMI, *DL);
if (Offset != -1) {
ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI,
Offset));
@@ -1465,14 +1464,21 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
continue;
}
+ // Loading from calloc (which zero initializes memory) -> zero
+ if (isCallocLikeFn(DepInst, TLI)) {
+ ValuesPerBlock.push_back(AvailableValueInBlock::get(
+ DepBB, Constant::getNullValue(LI->getType())));
+ continue;
+ }
+
if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) {
// Reject loads and stores that are to the same address but are of
// different types if we have to.
if (S->getValueOperand()->getType() != LI->getType()) {
// If the stored value is larger or equal to the loaded value, we can
// reuse it.
- if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(),
- LI->getType(), *TD)) {
+ if (!DL || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(),
+ LI->getType(), *DL)) {
UnavailableBlocks.push_back(DepBB);
continue;
}
@@ -1488,7 +1494,7 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
if (LD->getType() != LI->getType()) {
// If the stored value is larger or equal to the loaded value, we can
// reuse it.
- if (TD == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*TD)){
+ if (!DL || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*DL)) {
UnavailableBlocks.push_back(DepBB);
continue;
}
@@ -1541,7 +1547,7 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
// Check to see how many predecessors have the loaded value fully
// available.
- DenseMap<BasicBlock*, Value*> PredLoads;
+ MapVector<BasicBlock *, Value *> PredLoads;
DenseMap<BasicBlock*, char> FullyAvailableBlocks;
for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
FullyAvailableBlocks[ValuesPerBlock[i].BB] = true;
@@ -1555,7 +1561,6 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks, 0)) {
continue;
}
- PredLoads[Pred] = 0;
if (Pred->getTerminator()->getNumSuccessors() != 1) {
if (isa<IndirectBrInst>(Pred->getTerminator())) {
@@ -1572,11 +1577,14 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
}
CriticalEdgePred.push_back(Pred);
+ } else {
+ // Only add the predecessors that will not be split for now.
+ PredLoads[Pred] = nullptr;
}
}
// Decide whether PRE is profitable for this load.
- unsigned NumUnavailablePreds = PredLoads.size();
+ unsigned NumUnavailablePreds = PredLoads.size() + CriticalEdgePred.size();
assert(NumUnavailablePreds != 0 &&
"Fully available value should already be eliminated!");
@@ -1588,12 +1596,10 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
return false;
// Split critical edges, and update the unavailable predecessors accordingly.
- for (SmallVectorImpl<BasicBlock *>::iterator I = CriticalEdgePred.begin(),
- E = CriticalEdgePred.end(); I != E; I++) {
- BasicBlock *OrigPred = *I;
+ for (BasicBlock *OrigPred : CriticalEdgePred) {
BasicBlock *NewPred = splitCriticalEdges(OrigPred, LoadBB);
- PredLoads.erase(OrigPred);
- PredLoads[NewPred] = 0;
+ assert(!PredLoads.count(OrigPred) && "Split edges shouldn't be in map!");
+ PredLoads[NewPred] = nullptr;
DEBUG(dbgs() << "Split critical edge " << OrigPred->getName() << "->"
<< LoadBB->getName() << '\n');
}
@@ -1601,9 +1607,8 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
// Check if the load can safely be moved to all the unavailable predecessors.
bool CanDoPRE = true;
SmallVector<Instruction*, 8> NewInsts;
- for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(),
- E = PredLoads.end(); I != E; ++I) {
- BasicBlock *UnavailablePred = I->first;
+ for (auto &PredLoad : PredLoads) {
+ BasicBlock *UnavailablePred = PredLoad.first;
// Do PHI translation to get its value in the predecessor if necessary. The
// returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
@@ -1611,21 +1616,21 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
// If all preds have a single successor, then we know it is safe to insert
// the load on the pred (?!?), so we can insert code to materialize the
// pointer if it is not available.
- PHITransAddr Address(LI->getPointerOperand(), TD);
- Value *LoadPtr = 0;
+ PHITransAddr Address(LI->getPointerOperand(), DL);
+ Value *LoadPtr = nullptr;
LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
*DT, NewInsts);
// If we couldn't find or insert a computation of this phi translated value,
// we fail PRE.
- if (LoadPtr == 0) {
+ if (!LoadPtr) {
DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
<< *LI->getPointerOperand() << "\n");
CanDoPRE = false;
break;
}
- I->second = LoadPtr;
+ PredLoad.second = LoadPtr;
}
if (!CanDoPRE) {
@@ -1634,8 +1639,8 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
if (MD) MD->removeInstruction(I);
I->eraseFromParent();
}
- // HINT:Don't revert the edge-splitting as following transformation may
- // also need to split these critial edges.
+ // HINT: Don't revert the edge-splitting as following transformation may
+ // also need to split these critical edges.
return !CriticalEdgePred.empty();
}
@@ -1656,10 +1661,9 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
VN.lookup_or_add(NewInsts[i]);
}
- for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(),
- E = PredLoads.end(); I != E; ++I) {
- BasicBlock *UnavailablePred = I->first;
- Value *LoadPtr = I->second;
+ for (const auto &PredLoad : PredLoads) {
+ BasicBlock *UnavailablePred = PredLoad.first;
+ Value *LoadPtr = PredLoad.second;
Instruction *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false,
LI->getAlignment(),
@@ -1712,7 +1716,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
!Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) {
DEBUG(
dbgs() << "GVN: non-local load ";
- WriteAsOperand(dbgs(), LI);
+ LI->printAsOperand(dbgs());
dbgs() << " has unknown dependencies\n";
);
return false;
@@ -1778,7 +1782,7 @@ static void patchReplacementInstruction(Instruction *I, Value *Repl) {
MDNode *ReplMD = Metadata[i].second;
switch(Kind) {
default:
- ReplInst->setMetadata(Kind, NULL); // Remove unknown metadata
+ ReplInst->setMetadata(Kind, nullptr); // Remove unknown metadata
break;
case LLVMContext::MD_dbg:
llvm_unreachable("getAllMetadataOtherThanDebugLoc returned a MD_dbg");
@@ -1789,11 +1793,15 @@ static void patchReplacementInstruction(Instruction *I, Value *Repl) {
ReplInst->setMetadata(Kind, MDNode::getMostGenericRange(IMD, ReplMD));
break;
case LLVMContext::MD_prof:
- llvm_unreachable("MD_prof in a non terminator instruction");
+ llvm_unreachable("MD_prof in a non-terminator instruction");
break;
case LLVMContext::MD_fpmath:
ReplInst->setMetadata(Kind, MDNode::getMostGenericFPMath(IMD, ReplMD));
break;
+ case LLVMContext::MD_invariant_load:
+ // Only set the !invariant.load if it is present in both instructions.
+ ReplInst->setMetadata(Kind, IMD);
+ break;
}
}
}
@@ -1823,7 +1831,7 @@ bool GVN::processLoad(LoadInst *L) {
// If we have a clobber and target data is around, see if this is a clobber
// that we can fix up through code synthesis.
- if (Dep.isClobber() && TD) {
+ if (Dep.isClobber() && DL) {
// Check to see if we have something like this:
// store i32 123, i32* %P
// %A = bitcast i32* %P to i8*
@@ -1834,14 +1842,14 @@ bool GVN::processLoad(LoadInst *L) {
// a common base + constant offset, and if the previous store (or memset)
// completely covers this load. This sort of thing can happen in bitfield
// access code.
- Value *AvailVal = 0;
+ Value *AvailVal = nullptr;
if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) {
int Offset = AnalyzeLoadFromClobberingStore(L->getType(),
L->getPointerOperand(),
- DepSI, *TD);
+ DepSI, *DL);
if (Offset != -1)
AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset,
- L->getType(), L, *TD);
+ L->getType(), L, *DL);
}
// Check to see if we have something like this:
@@ -1856,7 +1864,7 @@ bool GVN::processLoad(LoadInst *L) {
int Offset = AnalyzeLoadFromClobberingLoad(L->getType(),
L->getPointerOperand(),
- DepLI, *TD);
+ DepLI, *DL);
if (Offset != -1)
AvailVal = GetLoadValueForLoad(DepLI, Offset, L->getType(), L, *this);
}
@@ -1866,9 +1874,9 @@ bool GVN::processLoad(LoadInst *L) {
if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) {
int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(),
L->getPointerOperand(),
- DepMI, *TD);
+ DepMI, *DL);
if (Offset != -1)
- AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L, *TD);
+ AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L, *DL);
}
if (AvailVal) {
@@ -1890,7 +1898,7 @@ bool GVN::processLoad(LoadInst *L) {
DEBUG(
// fast print dep, using operator<< on instruction is too slow.
dbgs() << "GVN: load ";
- WriteAsOperand(dbgs(), L);
+ L->printAsOperand(dbgs());
Instruction *I = Dep.getInst();
dbgs() << " is clobbered by " << *I << '\n';
);
@@ -1905,7 +1913,7 @@ bool GVN::processLoad(LoadInst *L) {
DEBUG(
// fast print dep, using operator<< on instruction is too slow.
dbgs() << "GVN: load ";
- WriteAsOperand(dbgs(), L);
+ L->printAsOperand(dbgs());
dbgs() << " has unknown dependence\n";
);
return false;
@@ -1919,10 +1927,10 @@ bool GVN::processLoad(LoadInst *L) {
// actually have the same type. See if we know how to reuse the stored
// value (depending on its type).
if (StoredVal->getType() != L->getType()) {
- if (TD) {
+ if (DL) {
StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(),
- L, *TD);
- if (StoredVal == 0)
+ L, *DL);
+ if (!StoredVal)
return false;
DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal
@@ -1948,10 +1956,10 @@ bool GVN::processLoad(LoadInst *L) {
// the same type. See if we know how to reuse the previously loaded value
// (depending on its type).
if (DepLI->getType() != L->getType()) {
- if (TD) {
+ if (DL) {
AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(),
- L, *TD);
- if (AvailableVal == 0)
+ L, *DL);
+ if (!AvailableVal)
return false;
DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal
@@ -1991,6 +1999,15 @@ bool GVN::processLoad(LoadInst *L) {
}
}
+ // If this load follows a calloc (which zero initializes memory),
+ // then the loaded value is zero
+ if (isCallocLikeFn(DepInst, TLI)) {
+ L->replaceAllUsesWith(Constant::getNullValue(L->getType()));
+ markInstructionForDeletion(L);
+ ++NumGVNLoad;
+ return true;
+ }
+
return false;
}
@@ -2001,9 +2018,9 @@ bool GVN::processLoad(LoadInst *L) {
// a few comparisons of DFS numbers.
Value *GVN::findLeader(const BasicBlock *BB, uint32_t num) {
LeaderTableEntry Vals = LeaderTable[num];
- if (!Vals.Val) return 0;
+ if (!Vals.Val) return nullptr;
- Value *Val = 0;
+ Value *Val = nullptr;
if (DT->dominates(Vals.BB, BB)) {
Val = Vals.Val;
if (isa<Constant>(Val)) return Val;
@@ -2030,7 +2047,7 @@ unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To,
unsigned Count = 0;
for (Value::use_iterator UI = From->use_begin(), UE = From->use_end();
UI != UE; ) {
- Use &U = (UI++).getUse();
+ Use &U = *UI++;
if (DT->dominates(Root, U)) {
U.set(To);
@@ -2054,7 +2071,7 @@ static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E,
const BasicBlock *Src = E.getStart();
assert((!Pred || Pred == Src) && "No edge between these basic blocks!");
(void)Src;
- return Pred != 0;
+ return Pred != nullptr;
}
/// propagateEquality - The given values are known to be equal in every block
@@ -2202,7 +2219,7 @@ bool GVN::processInstruction(Instruction *I) {
// to value numbering it. Value numbering often exposes redundancies, for
// example if it determines that %y is equal to %x then the instruction
// "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
- if (Value *V = SimplifyInstruction(I, TD, TLI, DT)) {
+ if (Value *V = SimplifyInstruction(I, DL, TLI, DT)) {
I->replaceAllUsesWith(V);
if (MD && V->getType()->getScalarType()->isPointerTy())
MD->invalidateCachedPointerInfo(V);
@@ -2298,7 +2315,7 @@ bool GVN::processInstruction(Instruction *I) {
// Perform fast-path value-number based elimination of values inherited from
// dominators.
Value *repl = findLeader(I->getParent(), Num);
- if (repl == 0) {
+ if (!repl) {
// Failure, just remember this instance for future use.
addToLeaderTable(Num, I, I->getParent());
return false;
@@ -2314,10 +2331,14 @@ bool GVN::processInstruction(Instruction *I) {
/// runOnFunction - This is the main transformation entry point for a function.
bool GVN::runOnFunction(Function& F) {
+ if (skipOptnoneFunction(F))
+ return false;
+
if (!NoLoads)
MD = &getAnalysis<MemoryDependenceAnalysis>();
- DT = &getAnalysis<DominatorTree>();
- TD = getAnalysisIfAvailable<DataLayout>();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
TLI = &getAnalysis<TargetLibraryInfo>();
VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
VN.setMemDep(MD);
@@ -2419,10 +2440,7 @@ bool GVN::processBlock(BasicBlock *BB) {
bool GVN::performPRE(Function &F) {
bool Changed = false;
SmallVector<std::pair<Value*, BasicBlock*>, 8> predMap;
- for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
- DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) {
- BasicBlock *CurrentBlock = *DI;
-
+ for (BasicBlock *CurrentBlock : depth_first(&F.getEntryBlock())) {
// Nothing to PRE in the entry block.
if (CurrentBlock == &F.getEntryBlock()) continue;
@@ -2462,7 +2480,7 @@ bool GVN::performPRE(Function &F) {
// more complicated to get right.
unsigned NumWith = 0;
unsigned NumWithout = 0;
- BasicBlock *PREPred = 0;
+ BasicBlock *PREPred = nullptr;
predMap.clear();
for (pred_iterator PI = pred_begin(CurrentBlock),
@@ -2480,8 +2498,8 @@ bool GVN::performPRE(Function &F) {
}
Value* predV = findLeader(P, ValNo);
- if (predV == 0) {
- predMap.push_back(std::make_pair(static_cast<Value *>(0), P));
+ if (!predV) {
+ predMap.push_back(std::make_pair(static_cast<Value *>(nullptr), P));
PREPred = P;
++NumWithout;
} else if (predV == CurInst) {
@@ -2635,9 +2653,8 @@ bool GVN::iterateOnFunction(Function &F) {
//
std::vector<BasicBlock *> BBVect;
BBVect.reserve(256);
- for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()),
- DE = df_end(DT->getRootNode()); DI != DE; ++DI)
- BBVect.push_back(DI->getBlock());
+ for (DomTreeNode *x : depth_first(DT->getRootNode()))
+ BBVect.push_back(x->getBlock());
for (std::vector<BasicBlock *>::iterator I = BBVect.begin(), E = BBVect.end();
I != E; I++)
diff --git a/contrib/llvm/lib/Transforms/Scalar/GlobalMerge.cpp b/contrib/llvm/lib/Transforms/Scalar/GlobalMerge.cpp
deleted file mode 100644
index 954e5459810d..000000000000
--- a/contrib/llvm/lib/Transforms/Scalar/GlobalMerge.cpp
+++ /dev/null
@@ -1,310 +0,0 @@
-//===-- GlobalMerge.cpp - Internal globals merging -----------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// This pass merges globals with internal linkage into one. This way all the
-// globals which were merged into a biggest one can be addressed using offsets
-// from the same base pointer (no need for separate base pointer for each of the
-// global). Such a transformation can significantly reduce the register pressure
-// when many globals are involved.
-//
-// For example, consider the code which touches several global variables at
-// once:
-//
-// static int foo[N], bar[N], baz[N];
-//
-// for (i = 0; i < N; ++i) {
-// foo[i] = bar[i] * baz[i];
-// }
-//
-// On ARM the addresses of 3 arrays should be kept in the registers, thus
-// this code has quite large register pressure (loop body):
-//
-// ldr r1, [r5], #4
-// ldr r2, [r6], #4
-// mul r1, r2, r1
-// str r1, [r0], #4
-//
-// Pass converts the code to something like:
-//
-// static struct {
-// int foo[N];
-// int bar[N];
-// int baz[N];
-// } merged;
-//
-// for (i = 0; i < N; ++i) {
-// merged.foo[i] = merged.bar[i] * merged.baz[i];
-// }
-//
-// and in ARM code this becomes:
-//
-// ldr r0, [r5, #40]
-// ldr r1, [r5, #80]
-// mul r0, r1, r0
-// str r0, [r5], #4
-//
-// note that we saved 2 registers here almostly "for free".
-// ===---------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "global-merge"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-using namespace llvm;
-
-static cl::opt<bool>
-EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden,
- cl::desc("Enable global merge pass on constants"),
- cl::init(false));
-
-STATISTIC(NumMerged , "Number of globals merged");
-namespace {
- class GlobalMerge : public FunctionPass {
- const TargetMachine *TM;
-
- bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
- Module &M, bool isConst, unsigned AddrSpace) const;
-
- /// \brief Check if the given variable has been identified as must keep
- /// \pre setMustKeepGlobalVariables must have been called on the Module that
- /// contains GV
- bool isMustKeepGlobalVariable(const GlobalVariable *GV) const {
- return MustKeepGlobalVariables.count(GV);
- }
-
- /// Collect every variables marked as "used" or used in a landing pad
- /// instruction for this Module.
- void setMustKeepGlobalVariables(Module &M);
-
- /// Collect every variables marked as "used"
- void collectUsedGlobalVariables(Module &M);
-
- /// Keep track of the GlobalVariable that must not be merged away
- SmallPtrSet<const GlobalVariable *, 16> MustKeepGlobalVariables;
-
- public:
- static char ID; // Pass identification, replacement for typeid.
- explicit GlobalMerge(const TargetMachine *TM = 0)
- : FunctionPass(ID), TM(TM) {
- initializeGlobalMergePass(*PassRegistry::getPassRegistry());
- }
-
- virtual bool doInitialization(Module &M);
- virtual bool runOnFunction(Function &F);
- virtual bool doFinalization(Module &M);
-
- const char *getPassName() const {
- return "Merge internal globals";
- }
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- FunctionPass::getAnalysisUsage(AU);
- }
-
- struct GlobalCmp {
- const DataLayout *TD;
-
- GlobalCmp(const DataLayout *td) : TD(td) { }
-
- bool operator()(const GlobalVariable *GV1, const GlobalVariable *GV2) {
- Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType();
- Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType();
-
- return (TD->getTypeAllocSize(Ty1) < TD->getTypeAllocSize(Ty2));
- }
- };
- };
-} // end anonymous namespace
-
-char GlobalMerge::ID = 0;
-INITIALIZE_PASS(GlobalMerge, "global-merge",
- "Global Merge", false, false)
-
-
-bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
- Module &M, bool isConst, unsigned AddrSpace) const {
- const TargetLowering *TLI = TM->getTargetLowering();
- const DataLayout *TD = TLI->getDataLayout();
-
- // FIXME: Infer the maximum possible offset depending on the actual users
- // (these max offsets are different for the users inside Thumb or ARM
- // functions)
- unsigned MaxOffset = TLI->getMaximalGlobalOffset();
-
- // FIXME: Find better heuristics
- std::stable_sort(Globals.begin(), Globals.end(), GlobalCmp(TD));
-
- Type *Int32Ty = Type::getInt32Ty(M.getContext());
-
- for (size_t i = 0, e = Globals.size(); i != e; ) {
- size_t j = 0;
- uint64_t MergedSize = 0;
- std::vector<Type*> Tys;
- std::vector<Constant*> Inits;
- for (j = i; j != e; ++j) {
- Type *Ty = Globals[j]->getType()->getElementType();
- MergedSize += TD->getTypeAllocSize(Ty);
- if (MergedSize > MaxOffset) {
- break;
- }
- Tys.push_back(Ty);
- Inits.push_back(Globals[j]->getInitializer());
- }
-
- StructType *MergedTy = StructType::get(M.getContext(), Tys);
- Constant *MergedInit = ConstantStruct::get(MergedTy, Inits);
- GlobalVariable *MergedGV = new GlobalVariable(M, MergedTy, isConst,
- GlobalValue::InternalLinkage,
- MergedInit, "_MergedGlobals",
- 0, GlobalVariable::NotThreadLocal,
- AddrSpace);
- for (size_t k = i; k < j; ++k) {
- Constant *Idx[2] = {
- ConstantInt::get(Int32Ty, 0),
- ConstantInt::get(Int32Ty, k-i)
- };
- Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx);
- Globals[k]->replaceAllUsesWith(GEP);
- Globals[k]->eraseFromParent();
- NumMerged++;
- }
- i = j;
- }
-
- return true;
-}
-
-void GlobalMerge::collectUsedGlobalVariables(Module &M) {
- // Extract global variables from llvm.used array
- const GlobalVariable *GV = M.getGlobalVariable("llvm.used");
- if (!GV || !GV->hasInitializer()) return;
-
- // Should be an array of 'i8*'.
- const ConstantArray *InitList = cast<ConstantArray>(GV->getInitializer());
-
- for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
- if (const GlobalVariable *G =
- dyn_cast<GlobalVariable>(InitList->getOperand(i)->stripPointerCasts()))
- MustKeepGlobalVariables.insert(G);
-}
-
-void GlobalMerge::setMustKeepGlobalVariables(Module &M) {
- collectUsedGlobalVariables(M);
-
- for (Module::iterator IFn = M.begin(), IEndFn = M.end(); IFn != IEndFn;
- ++IFn) {
- for (Function::iterator IBB = IFn->begin(), IEndBB = IFn->end();
- IBB != IEndBB; ++IBB) {
- // Follow the inwoke link to find the landing pad instruction
- const InvokeInst *II = dyn_cast<InvokeInst>(IBB->getTerminator());
- if (!II) continue;
-
- const LandingPadInst *LPInst = II->getUnwindDest()->getLandingPadInst();
- // Look for globals in the clauses of the landing pad instruction
- for (unsigned Idx = 0, NumClauses = LPInst->getNumClauses();
- Idx != NumClauses; ++Idx)
- if (const GlobalVariable *GV =
- dyn_cast<GlobalVariable>(LPInst->getClause(Idx)
- ->stripPointerCasts()))
- MustKeepGlobalVariables.insert(GV);
- }
- }
-}
-
-bool GlobalMerge::doInitialization(Module &M) {
- DenseMap<unsigned, SmallVector<GlobalVariable*, 16> > Globals, ConstGlobals,
- BSSGlobals;
- const TargetLowering *TLI = TM->getTargetLowering();
- const DataLayout *TD = TLI->getDataLayout();
- unsigned MaxOffset = TLI->getMaximalGlobalOffset();
- bool Changed = false;
- setMustKeepGlobalVariables(M);
-
- // Grab all non-const globals.
- for (Module::global_iterator I = M.global_begin(),
- E = M.global_end(); I != E; ++I) {
- // Merge is safe for "normal" internal globals only
- if (!I->hasLocalLinkage() || I->isThreadLocal() || I->hasSection())
- continue;
-
- PointerType *PT = dyn_cast<PointerType>(I->getType());
- assert(PT && "Global variable is not a pointer!");
-
- unsigned AddressSpace = PT->getAddressSpace();
-
- // Ignore fancy-aligned globals for now.
- unsigned Alignment = TD->getPreferredAlignment(I);
- Type *Ty = I->getType()->getElementType();
- if (Alignment > TD->getABITypeAlignment(Ty))
- continue;
-
- // Ignore all 'special' globals.
- if (I->getName().startswith("llvm.") ||
- I->getName().startswith(".llvm."))
- continue;
-
- // Ignore all "required" globals:
- if (isMustKeepGlobalVariable(I))
- continue;
-
- if (TD->getTypeAllocSize(Ty) < MaxOffset) {
- if (TargetLoweringObjectFile::getKindForGlobal(I, TLI->getTargetMachine())
- .isBSSLocal())
- BSSGlobals[AddressSpace].push_back(I);
- else if (I->isConstant())
- ConstGlobals[AddressSpace].push_back(I);
- else
- Globals[AddressSpace].push_back(I);
- }
- }
-
- for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator
- I = Globals.begin(), E = Globals.end(); I != E; ++I)
- if (I->second.size() > 1)
- Changed |= doMerge(I->second, M, false, I->first);
-
- for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator
- I = BSSGlobals.begin(), E = BSSGlobals.end(); I != E; ++I)
- if (I->second.size() > 1)
- Changed |= doMerge(I->second, M, false, I->first);
-
- if (EnableGlobalMergeOnConst)
- for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator
- I = ConstGlobals.begin(), E = ConstGlobals.end(); I != E; ++I)
- if (I->second.size() > 1)
- Changed |= doMerge(I->second, M, true, I->first);
-
- return Changed;
-}
-
-bool GlobalMerge::runOnFunction(Function &F) {
- return false;
-}
-
-bool GlobalMerge::doFinalization(Module &M) {
- MustKeepGlobalVariables.clear();
- return false;
-}
-
-Pass *llvm::createGlobalMergePass(const TargetMachine *TM) {
- return new GlobalMerge(TM);
-}
diff --git a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 235aaaa6f801..e83a5c421b47 100644
--- a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -24,23 +24,22 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "indvars"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Type.h"
-#include "llvm/Support/CFG.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -50,6 +49,8 @@
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
using namespace llvm;
+#define DEBUG_TYPE "indvars"
+
STATISTIC(NumWidened , "Number of indvars widened");
STATISTIC(NumReplaced , "Number of exit values replaced");
STATISTIC(NumLFTR , "Number of loop exit tests replaced");
@@ -63,12 +64,15 @@ static cl::opt<bool> VerifyIndvars(
"verify-indvars", cl::Hidden,
cl::desc("Verify the ScalarEvolution result after running indvars"));
+static cl::opt<bool> ReduceLiveIVs("liv-reduce", cl::Hidden,
+ cl::desc("Reduce live induction variables."));
+
namespace {
class IndVarSimplify : public LoopPass {
LoopInfo *LI;
ScalarEvolution *SE;
DominatorTree *DT;
- DataLayout *TD;
+ const DataLayout *DL;
TargetLibraryInfo *TLI;
SmallVector<WeakVH, 16> DeadInsts;
@@ -76,15 +80,15 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
- IndVarSimplify() : LoopPass(ID), LI(0), SE(0), DT(0), TD(0),
- Changed(false) {
+ IndVarSimplify() : LoopPass(ID), LI(nullptr), SE(nullptr), DT(nullptr),
+ DL(nullptr), Changed(false) {
initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry());
}
- virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<DominatorTree>();
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfo>();
AU.addRequired<ScalarEvolution>();
AU.addRequiredID(LoopSimplifyID);
@@ -96,7 +100,7 @@ namespace {
}
private:
- virtual void releaseMemory() {
+ void releaseMemory() override {
DeadInsts.clear();
}
@@ -119,7 +123,7 @@ namespace {
char IndVarSimplify::ID = 0;
INITIALIZE_PASS_BEGIN(IndVarSimplify, "indvars",
"Induction Variable Simplification", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
@@ -193,7 +197,7 @@ static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
if (!PHI)
return User;
- Instruction *InsertPt = 0;
+ Instruction *InsertPt = nullptr;
for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
if (PHI->getIncomingValue(i) != Def)
continue;
@@ -254,34 +258,34 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
// an add or increment value can not be represented by an integer.
BinaryOperator *Incr =
dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
- if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return;
+ if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return;
// If this is not an add of the PHI with a constantfp, or if the constant fp
// is not an integer, bail out.
ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
int64_t IncValue;
- if (IncValueVal == 0 || Incr->getOperand(0) != PN ||
+ if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
!ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
return;
// Check Incr uses. One user is PN and the other user is an exit condition
// used by the conditional terminator.
- Value::use_iterator IncrUse = Incr->use_begin();
+ Value::user_iterator IncrUse = Incr->user_begin();
Instruction *U1 = cast<Instruction>(*IncrUse++);
- if (IncrUse == Incr->use_end()) return;
+ if (IncrUse == Incr->user_end()) return;
Instruction *U2 = cast<Instruction>(*IncrUse++);
- if (IncrUse != Incr->use_end()) return;
+ if (IncrUse != Incr->user_end()) return;
// Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
// only used by a branch, we can't transform it.
FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
if (!Compare)
Compare = dyn_cast<FCmpInst>(U2);
- if (Compare == 0 || !Compare->hasOneUse() ||
- !isa<BranchInst>(Compare->use_back()))
+ if (!Compare || !Compare->hasOneUse() ||
+ !isa<BranchInst>(Compare->user_back()))
return;
- BranchInst *TheBr = cast<BranchInst>(Compare->use_back());
+ BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
// We need to verify that the branch actually controls the iteration count
// of the loop. If not, the new IV can overflow and no one will notice.
@@ -298,7 +302,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
// transform it.
ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
int64_t ExitValue;
- if (ExitValueVal == 0 ||
+ if (ExitValueVal == nullptr ||
!ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
return;
@@ -494,6 +498,21 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
unsigned NumPreds = PN->getNumIncomingValues();
+ // We would like to be able to RAUW single-incoming value PHI nodes. We
+ // have to be certain this is safe even when this is an LCSSA PHI node.
+ // While the computed exit value is no longer varying in *this* loop, the
+ // exit block may be an exit block for an outer containing loop as well,
+ // the exit value may be varying in the outer loop, and thus it may still
+ // require an LCSSA PHI node. The safe case is when this is
+ // single-predecessor PHI node (LCSSA) and the exit block containing it is
+ // part of the enclosing loop, or this is the outer most loop of the nest.
+ // In either case the exit value could (at most) be varying in the same
+ // loop body as the phi node itself. Thus if it is in turn used outside of
+ // an enclosing loop it will only be via a separate LCSSA node.
+ bool LCSSASafePhiForRAUW =
+ NumPreds == 1 &&
+ (!L->getParentLoop() || L->getParentLoop() == LI->getLoopFor(ExitBB));
+
// Iterate over all of the PHI nodes.
BasicBlock::iterator BBI = ExitBB->begin();
while ((PN = dyn_cast<PHINode>(BBI++))) {
@@ -545,8 +564,8 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
unsigned NumHardInternalUses = 0;
unsigned NumSoftExternalUses = 0;
unsigned NumUses = 0;
- for (Value::use_iterator IB=Inst->use_begin(), IE=Inst->use_end();
- IB!=IE && NumUses<=6 ; ++IB) {
+ for (auto IB = Inst->user_begin(), IE = Inst->user_end();
+ IB != IE && NumUses <= 6; ++IB) {
Instruction *UseInstr = cast<Instruction>(*IB);
unsigned Opc = UseInstr->getOpcode();
NumUses++;
@@ -558,9 +577,9 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
// Do not count the Phi as a use. LCSSA may have inserted
// plenty of trivial ones.
NumUses--;
- for (Value::use_iterator PB=UseInstr->use_begin(),
- PE=UseInstr->use_end();
- PB!=PE && NumUses<=6 ; ++PB, ++NumUses) {
+ for (auto PB = UseInstr->user_begin(),
+ PE = UseInstr->user_end();
+ PB != PE && NumUses <= 6; ++PB, ++NumUses) {
unsigned PhiOpc = cast<Instruction>(*PB)->getOpcode();
if (PhiOpc != Instruction::Call && PhiOpc != Instruction::Ret)
NumSoftExternalUses++;
@@ -594,17 +613,18 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
if (isInstructionTriviallyDead(Inst, TLI))
DeadInsts.push_back(Inst);
- if (NumPreds == 1) {
- // Completely replace a single-pred PHI. This is safe, because the
- // NewVal won't be variant in the loop, so we don't need an LCSSA phi
- // node anymore.
+ // If we determined that this PHI is safe to replace even if an LCSSA
+ // PHI, do so.
+ if (LCSSASafePhiForRAUW) {
PN->replaceAllUsesWith(ExitVal);
PN->eraseFromParent();
}
}
- if (NumPreds != 1) {
- // Clone the PHI and delete the original one. This lets IVUsers and
- // any other maps purge the original user from their records.
+
+ // If we were unable to completely replace the PHI node, clone the PHI
+ // and delete the original one. This lets IVUsers and any other maps
+ // purge the original user from their records.
+ if (!LCSSASafePhiForRAUW) {
PHINode *NewPN = cast<PHINode>(PN->clone());
NewPN->takeName(PN);
NewPN->insertBefore(PN);
@@ -632,36 +652,23 @@ namespace {
Type *WidestNativeType; // Widest integer type created [sz]ext
bool IsSigned; // Was an sext user seen before a zext?
- WideIVInfo() : NarrowIV(0), WidestNativeType(0), IsSigned(false) {}
- };
-
- class WideIVVisitor : public IVVisitor {
- ScalarEvolution *SE;
- const DataLayout *TD;
-
- public:
- WideIVInfo WI;
-
- WideIVVisitor(PHINode *NarrowIV, ScalarEvolution *SCEV,
- const DataLayout *TData) :
- SE(SCEV), TD(TData) { WI.NarrowIV = NarrowIV; }
-
- // Implement the interface used by simplifyUsersOfIV.
- virtual void visitCast(CastInst *Cast);
+ WideIVInfo() : NarrowIV(nullptr), WidestNativeType(nullptr),
+ IsSigned(false) {}
};
}
/// visitCast - Update information about the induction variable that is
/// extended by this sign or zero extend operation. This is used to determine
/// the final width of the IV before actually widening it.
-void WideIVVisitor::visitCast(CastInst *Cast) {
+static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
+ const DataLayout *DL) {
bool IsSigned = Cast->getOpcode() == Instruction::SExt;
if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
return;
Type *Ty = Cast->getType();
uint64_t Width = SE->getTypeSizeInBits(Ty);
- if (TD && !TD->isLegalInteger(Width))
+ if (DL && !DL->isLegalInteger(Width))
return;
if (!WI.WidestNativeType) {
@@ -688,7 +695,7 @@ struct NarrowIVDefUse {
Instruction *NarrowUse;
Instruction *WideDef;
- NarrowIVDefUse(): NarrowDef(0), NarrowUse(0), WideDef(0) {}
+ NarrowIVDefUse(): NarrowDef(nullptr), NarrowUse(nullptr), WideDef(nullptr) {}
NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD):
NarrowDef(ND), NarrowUse(NU), WideDef(WD) {}
@@ -731,9 +738,9 @@ public:
L(LI->getLoopFor(OrigPhi->getParent())),
SE(SEv),
DT(DTree),
- WidePhi(0),
- WideInc(0),
- WideIncExpr(0),
+ WidePhi(nullptr),
+ WideInc(nullptr),
+ WideIncExpr(nullptr),
DeadInsts(DI) {
assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
}
@@ -788,7 +795,7 @@ Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) {
unsigned Opcode = DU.NarrowUse->getOpcode();
switch (Opcode) {
default:
- return 0;
+ return nullptr;
case Instruction::Add:
case Instruction::Mul:
case Instruction::UDiv:
@@ -833,14 +840,14 @@ Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) {
const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
// Handle the common case of add<nsw/nuw>
if (DU.NarrowUse->getOpcode() != Instruction::Add)
- return 0;
+ return nullptr;
// One operand (NarrowDef) has already been extended to WideDef. Now determine
// if extending the other will lead to a recurrence.
unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
- const SCEV *ExtendOperExpr = 0;
+ const SCEV *ExtendOperExpr = nullptr;
const OverflowingBinaryOperator *OBO =
cast<OverflowingBinaryOperator>(DU.NarrowUse);
if (IsSigned && OBO->hasNoSignedWrap())
@@ -850,7 +857,7 @@ const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
ExtendOperExpr = SE->getZeroExtendExpr(
SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
else
- return 0;
+ return nullptr;
// When creating this AddExpr, don't apply the current operations NSW or NUW
// flags. This instruction may be guarded by control flow that the no-wrap
@@ -861,7 +868,7 @@ const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr));
if (!AddRec || AddRec->getLoop() != L)
- return 0;
+ return nullptr;
return AddRec;
}
@@ -872,14 +879,14 @@ const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
/// recurrence. Otherwise return NULL.
const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
if (!SE->isSCEVable(NarrowUse->getType()))
- return 0;
+ return nullptr;
const SCEV *NarrowExpr = SE->getSCEV(NarrowUse);
if (SE->getTypeSizeInBits(NarrowExpr->getType())
>= SE->getTypeSizeInBits(WideType)) {
// NarrowUse implicitly widens its operand. e.g. a gep with a narrow
// index. So don't follow this use.
- return 0;
+ return nullptr;
}
const SCEV *WideExpr = IsSigned ?
@@ -887,19 +894,47 @@ const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
SE->getZeroExtendExpr(NarrowExpr, WideType);
const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
if (!AddRec || AddRec->getLoop() != L)
- return 0;
+ return nullptr;
return AddRec;
}
+/// This IV user cannot be widen. Replace this use of the original narrow IV
+/// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
+static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT) {
+ DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef
+ << " for user " << *DU.NarrowUse << "\n");
+ IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT));
+ Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
+ DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
+}
+
/// WidenIVUse - Determine whether an individual user of the narrow IV can be
/// widened. If so, return the wide clone of the user.
Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
// Stop traversing the def-use chain at inner-loop phis or post-loop phis.
- if (isa<PHINode>(DU.NarrowUse) &&
- LI->getLoopFor(DU.NarrowUse->getParent()) != L)
- return 0;
-
+ if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
+ if (LI->getLoopFor(UsePhi->getParent()) != L) {
+ // For LCSSA phis, sink the truncate outside the loop.
+ // After SimplifyCFG most loop exit targets have a single predecessor.
+ // Otherwise fall back to a truncate within the loop.
+ if (UsePhi->getNumOperands() != 1)
+ truncateIVUse(DU, DT);
+ else {
+ PHINode *WidePhi =
+ PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
+ UsePhi);
+ WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
+ IRBuilder<> Builder(WidePhi->getParent()->getFirstInsertionPt());
+ Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
+ UsePhi->replaceAllUsesWith(Trunc);
+ DeadInsts.push_back(UsePhi);
+ DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi
+ << " to " << *WidePhi << "\n");
+ }
+ return nullptr;
+ }
+ }
// Our raison d'etre! Eliminate sign and zero extension.
if (IsSigned ? isa<SExtInst>(DU.NarrowUse) : isa<ZExtInst>(DU.NarrowUse)) {
Value *NewDef = DU.WideDef;
@@ -935,7 +970,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
// push the uses of WideDef here.
// No further widening is needed. The deceased [sz]ext had done it for us.
- return 0;
+ return nullptr;
}
// Does this user itself evaluate to a recurrence after widening?
@@ -947,10 +982,8 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
// This user does not evaluate to a recurence after widening, so don't
// follow it. Instead insert a Trunc to kill off the original use,
// eventually isolating the original narrow IV so it can be removed.
- IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT));
- Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
- DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
- return 0;
+ truncateIVUse(DU, DT);
+ return nullptr;
}
// Assume block terminators cannot evaluate to a recurrence. We can't to
// insert a Trunc after a terminator if there happens to be a critical edge.
@@ -959,14 +992,14 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
// Reuse the IV increment that SCEVExpander created as long as it dominates
// NarrowUse.
- Instruction *WideUse = 0;
+ Instruction *WideUse = nullptr;
if (WideAddRec == WideIncExpr
&& Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
WideUse = WideInc;
else {
WideUse = CloneIVUser(DU);
if (!WideUse)
- return 0;
+ return nullptr;
}
// Evaluation of WideAddRec ensured that the narrow expression could be
// extended outside the loop without overflow. This suggests that the wide use
@@ -977,7 +1010,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse
<< ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec << "\n");
DeadInsts.push_back(WideUse);
- return 0;
+ return nullptr;
}
// Returning WideUse pushes it on the worklist.
@@ -987,15 +1020,14 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
/// pushNarrowIVUsers - Add eligible users of NarrowDef to NarrowIVUsers.
///
void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
- for (Value::use_iterator UI = NarrowDef->use_begin(),
- UE = NarrowDef->use_end(); UI != UE; ++UI) {
- Instruction *NarrowUse = cast<Instruction>(*UI);
+ for (User *U : NarrowDef->users()) {
+ Instruction *NarrowUser = cast<Instruction>(U);
// Handle data flow merges and bizarre phi cycles.
- if (!Widened.insert(NarrowUse))
+ if (!Widened.insert(NarrowUser))
continue;
- NarrowIVUsers.push_back(NarrowIVDefUse(NarrowDef, NarrowUse, WideDef));
+ NarrowIVUsers.push_back(NarrowIVDefUse(NarrowDef, NarrowUser, WideDef));
}
}
@@ -1013,7 +1045,7 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
// Is this phi an induction variable?
const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
if (!AddRec)
- return NULL;
+ return nullptr;
// Widen the induction variable expression.
const SCEV *WideIVExpr = IsSigned ?
@@ -1026,7 +1058,7 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
// Can the IV be extended outside the loop without overflow?
AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
if (!AddRec || AddRec->getLoop() != L)
- return NULL;
+ return nullptr;
// An AddRec must have loop-invariant operands. Since this AddRec is
// materialized by a loop header phi, the expression cannot have any post-loop
@@ -1080,9 +1112,36 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
}
//===----------------------------------------------------------------------===//
+// Live IV Reduction - Minimize IVs live across the loop.
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
// Simplification of IV users based on SCEV evaluation.
//===----------------------------------------------------------------------===//
+namespace {
+ class IndVarSimplifyVisitor : public IVVisitor {
+ ScalarEvolution *SE;
+ const DataLayout *DL;
+ PHINode *IVPhi;
+
+ public:
+ WideIVInfo WI;
+
+ IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
+ const DataLayout *DL, const DominatorTree *DTree):
+ SE(SCEV), DL(DL), IVPhi(IV) {
+ DT = DTree;
+ WI.NarrowIV = IVPhi;
+ if (ReduceLiveIVs)
+ setSplitOverflowIntrinsics();
+ }
+
+ // Implement the interface used by simplifyUsersOfIV.
+ void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, DL); }
+ };
+}
/// SimplifyAndExtend - Iteratively perform simplification on a worklist of IV
/// users. Each successive simplification may push more users which may
@@ -1114,12 +1173,12 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L,
PHINode *CurrIV = LoopPhis.pop_back_val();
// Information about sign/zero extensions of CurrIV.
- WideIVVisitor WIV(CurrIV, SE, TD);
+ IndVarSimplifyVisitor Visitor(CurrIV, SE, DL, DT);
- Changed |= simplifyUsersOfIV(CurrIV, SE, &LPM, DeadInsts, &WIV);
+ Changed |= simplifyUsersOfIV(CurrIV, SE, &LPM, DeadInsts, &Visitor);
- if (WIV.WI.WidestNativeType) {
- WideIVs.push_back(WIV.WI);
+ if (Visitor.WI.WidestNativeType) {
+ WideIVs.push_back(Visitor.WI);
}
} while(!LoopPhis.empty());
@@ -1225,7 +1284,7 @@ static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
Instruction *IncI = dyn_cast<Instruction>(IncV);
if (!IncI)
- return 0;
+ return nullptr;
switch (IncI->getOpcode()) {
case Instruction::Add:
@@ -1236,17 +1295,17 @@ static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
if (IncI->getNumOperands() == 2)
break;
default:
- return 0;
+ return nullptr;
}
PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
if (Phi && Phi->getParent() == L->getHeader()) {
if (isLoopInvariant(IncI->getOperand(1), L, DT))
return Phi;
- return 0;
+ return nullptr;
}
if (IncI->getOpcode() == Instruction::GetElementPtr)
- return 0;
+ return nullptr;
// Allow add/sub to be commuted.
Phi = dyn_cast<PHINode>(IncI->getOperand(1));
@@ -1254,7 +1313,7 @@ static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
if (isLoopInvariant(IncI->getOperand(0), L, DT))
return Phi;
}
- return 0;
+ return nullptr;
}
/// Return the compare guarding the loop latch, or NULL for unrecognized tests.
@@ -1264,7 +1323,7 @@ static ICmpInst *getLoopTest(Loop *L) {
BasicBlock *LatchBlock = L->getLoopLatch();
// Don't bother with LFTR if the loop is not properly simplified.
if (!LatchBlock)
- return 0;
+ return nullptr;
BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
assert(BI && "expected exit branch");
@@ -1359,15 +1418,11 @@ static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
Value *IncV = Phi->getIncomingValue(LatchIdx);
- for (Value::use_iterator UI = Phi->use_begin(), UE = Phi->use_end();
- UI != UE; ++UI) {
- if (*UI != Cond && *UI != IncV) return false;
- }
+ for (User *U : Phi->users())
+ if (U != Cond && U != IncV) return false;
- for (Value::use_iterator UI = IncV->use_begin(), UE = IncV->use_end();
- UI != UE; ++UI) {
- if (*UI != Cond && *UI != Phi) return false;
- }
+ for (User *U : IncV->users())
+ if (U != Cond && U != Phi) return false;
return true;
}
@@ -1386,15 +1441,15 @@ static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
/// could at least handle constant BECounts.
static PHINode *
FindLoopCounter(Loop *L, const SCEV *BECount,
- ScalarEvolution *SE, DominatorTree *DT, const DataLayout *TD) {
+ ScalarEvolution *SE, DominatorTree *DT, const DataLayout *DL) {
uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
Value *Cond =
cast<BranchInst>(L->getExitingBlock()->getTerminator())->getCondition();
// Loop over all of the PHI nodes, looking for a simple counter.
- PHINode *BestPhi = 0;
- const SCEV *BestInit = 0;
+ PHINode *BestPhi = nullptr;
+ const SCEV *BestInit = nullptr;
BasicBlock *LatchBlock = L->getLoopLatch();
assert(LatchBlock && "needsLFTR should guarantee a loop latch");
@@ -1415,7 +1470,7 @@ FindLoopCounter(Loop *L, const SCEV *BECount,
// AR may be wider than BECount. With eq/ne tests overflow is immaterial.
// AR may not be a narrower type, or we may never exit.
uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
- if (PhiWidth < BCWidth || (TD && !TD->isLegalInteger(PhiWidth)))
+ if (PhiWidth < BCWidth || (DL && !DL->isLegalInteger(PhiWidth)))
continue;
const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
@@ -1518,7 +1573,7 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
// IVInit integer and IVCount pointer would only occur if a canonical IV
// were generated on top of case #2, which is not expected.
- const SCEV *IVLimit = 0;
+ const SCEV *IVLimit = nullptr;
// For unit stride, IVCount = Start + BECount with 2's complement overflow.
// For non-zero Start, compute IVCount here.
if (AR->getStart()->isZero())
@@ -1697,13 +1752,12 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
// Determine if there is a use in or before the loop (direct or
// otherwise).
bool UsedInLoop = false;
- for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
- UI != UE; ++UI) {
- User *U = *UI;
- BasicBlock *UseBB = cast<Instruction>(U)->getParent();
- if (PHINode *P = dyn_cast<PHINode>(U)) {
+ for (Use &U : I->uses()) {
+ Instruction *User = cast<Instruction>(U.getUser());
+ BasicBlock *UseBB = User->getParent();
+ if (PHINode *P = dyn_cast<PHINode>(User)) {
unsigned i =
- PHINode::getIncomingValueNumForOperand(UI.getOperandNo());
+ PHINode::getIncomingValueNumForOperand(U.getOperandNo());
UseBB = P->getIncomingBlock(i);
}
if (UseBB == Preheader || L->contains(UseBB)) {
@@ -1743,6 +1797,9 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
//===----------------------------------------------------------------------===//
bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
+ if (skipOptnoneFunction(L))
+ return false;
+
// If LoopSimplify form is not available, stay out of trouble. Some notes:
// - LSR currently only supports LoopSimplify-form loops. Indvars'
// canonicalization can be a pessimization without LSR to "clean up"
@@ -1756,8 +1813,9 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
LI = &getAnalysis<LoopInfo>();
SE = &getAnalysis<ScalarEvolution>();
- DT = &getAnalysis<DominatorTree>();
- TD = getAnalysisIfAvailable<DataLayout>();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
DeadInsts.clear();
@@ -1799,13 +1857,13 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// If we have a trip count expression, rewrite the loop's exit condition
// using it. We can currently only handle loops with a single exit.
if (canExpandBackedgeTakenCount(L, SE) && needsLFTR(L, DT)) {
- PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT, TD);
+ PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT, DL);
if (IndVar) {
// Check preconditions for proper SCEVExpander operation. SCEV does not
// express SCEVExpander's dependencies, such as LoopSimplify. Instead any
// pass that uses the SCEVExpander must do it. This does not work well for
- // loop passes because SCEVExpander makes assumptions about all loops, while
- // LoopPassManager only forces the current loop to be simplified.
+ // loop passes because SCEVExpander makes assumptions about all loops,
+ // while LoopPassManager only forces the current loop to be simplified.
//
// FIXME: SCEV expansion has no way to bail out, so the caller must
// explicitly check any assumptions made by SCEV. Brittle.
diff --git a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index b3ec2fc84c03..21f80385cf46 100644
--- a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "jump-threading"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
@@ -27,10 +26,10 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -38,6 +37,8 @@
#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
+#define DEBUG_TYPE "jump-threading"
+
STATISTIC(NumThreads, "Number of jumps threaded");
STATISTIC(NumFolds, "Number of terminators folded");
STATISTIC(NumDupes, "Number of branch blocks duplicated to eliminate phi");
@@ -76,7 +77,7 @@ namespace {
/// revectored to the false side of the second if.
///
class JumpThreading : public FunctionPass {
- DataLayout *TD;
+ const DataLayout *DL;
TargetLibraryInfo *TLI;
LazyValueInfo *LVI;
#ifdef NDEBUG
@@ -105,9 +106,9 @@ namespace {
initializeJumpThreadingPass(*PassRegistry::getPassRegistry());
}
- bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<LazyValueInfo>();
AU.addPreserved<LazyValueInfo>();
AU.addRequired<TargetLibraryInfo>();
@@ -148,11 +149,24 @@ FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); }
/// runOnFunction - Top level algorithm.
///
bool JumpThreading::runOnFunction(Function &F) {
+ if (skipOptnoneFunction(F))
+ return false;
+
DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
- TD = getAnalysisIfAvailable<DataLayout>();
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
TLI = &getAnalysis<TargetLibraryInfo>();
LVI = &getAnalysis<LazyValueInfo>();
+ // Remove unreachable blocks from function as they may result in infinite
+ // loop. We do threading if we found something profitable. Jump threading a
+ // branch can create other opportunities. If these opportunities form a cycle
+ // i.e. if any jump treading is undoing previous threading in the path, then
+ // we will loop forever. We take care of this issue by not jump threading for
+ // back edges. This works for normal cases but not for unreachable blocks as
+ // they may have cycle with no back edge.
+ removeUnreachableBlocks(F);
+
FindLoopHeaders(F);
bool Changed, EverChanged = false;
@@ -251,7 +265,7 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB,
// as having cost of 2 total, and if they are a vector intrinsic, we model
// them as having cost 1.
if (const CallInst *CI = dyn_cast<CallInst>(I)) {
- if (CI->hasFnAttr(Attribute::NoDuplicate))
+ if (CI->cannotDuplicate())
// Blocks with NoDuplicate are modelled as having infinite cost, so they
// are never duplicated.
return ~0U;
@@ -304,7 +318,7 @@ void JumpThreading::FindLoopHeaders(Function &F) {
/// Returns null if Val is null or not an appropriate constant.
static Constant *getKnownConstant(Value *Val, ConstantPreference Preference) {
if (!Val)
- return 0;
+ return nullptr;
// Undef is "known" enough.
if (UndefValue *U = dyn_cast<UndefValue>(Val))
@@ -348,7 +362,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
// If V is a non-instruction value, or an instruction in a different block,
// then it can't be derived from a PHI.
Instruction *I = dyn_cast<Instruction>(V);
- if (I == 0 || I->getParent() != BB) {
+ if (!I || I->getParent() != BB) {
// Okay, if this is a live-in value, see if it has a known value at the end
// of any of our predecessors.
@@ -490,8 +504,8 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
Value *LHS = PN->getIncomingValue(i);
Value *RHS = Cmp->getOperand(1)->DoPHITranslation(BB, PredBB);
- Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, TD);
- if (Res == 0) {
+ Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, DL);
+ if (!Res) {
if (!isa<Constant>(RHS))
continue;
@@ -577,7 +591,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
// Either operand will do, so be sure to pick the one that's a known
// constant.
// FIXME: Do this more cleverly if both values are known constants?
- KnownCond = (TrueVal != 0);
+ KnownCond = (TrueVal != nullptr);
}
// See if the select has a known constant value for this predecessor.
@@ -655,14 +669,9 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
if (LoopHeaders.erase(SinglePred))
LoopHeaders.insert(BB);
- // Remember if SinglePred was the entry block of the function. If so, we
- // will need to move BB back to the entry position.
- bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
LVI->eraseBlock(SinglePred);
MergeBasicBlockIntoOnlyPred(BB);
- if (isEntry && BB != &BB->getParent()->getEntryBlock())
- BB->moveBefore(&BB->getParent()->getEntryBlock());
return true;
}
}
@@ -692,7 +701,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
// Run constant folding to see if we can reduce the condition to a simple
// constant.
if (Instruction *I = dyn_cast<Instruction>(Condition)) {
- Value *SimpleVal = ConstantFoldInstruction(I, TD, TLI);
+ Value *SimpleVal = ConstantFoldInstruction(I, DL, TLI);
if (SimpleVal) {
I->replaceAllUsesWith(SimpleVal);
I->eraseFromParent();
@@ -733,7 +742,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
Instruction *CondInst = dyn_cast<Instruction>(Condition);
// All the rest of our checks depend on the condition being an instruction.
- if (CondInst == 0) {
+ if (!CondInst) {
// FIXME: Unify this with code below.
if (ProcessThreadableEdges(Condition, BB, Preference))
return true;
@@ -886,7 +895,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
SmallPtrSet<BasicBlock*, 8> PredsScanned;
typedef SmallVector<std::pair<BasicBlock*, Value*>, 8> AvailablePredsTy;
AvailablePredsTy AvailablePreds;
- BasicBlock *OneUnavailablePred = 0;
+ BasicBlock *OneUnavailablePred = nullptr;
// If we got here, the loaded value is transparent through to the start of the
// block. Check to see if it is available in any of the predecessor blocks.
@@ -900,16 +909,16 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// Scan the predecessor to see if the value is available in the pred.
BBIt = PredBB->end();
- MDNode *ThisTBAATag = 0;
+ MDNode *ThisTBAATag = nullptr;
Value *PredAvailable = FindAvailableLoadedValue(LoadedPtr, PredBB, BBIt, 6,
- 0, &ThisTBAATag);
+ nullptr, &ThisTBAATag);
if (!PredAvailable) {
OneUnavailablePred = PredBB;
continue;
}
// If tbaa tags disagree or are not present, forget about them.
- if (TBAATag != ThisTBAATag) TBAATag = 0;
+ if (TBAATag != ThisTBAATag) TBAATag = nullptr;
// If so, this load is partially redundant. Remember this info so that we
// can create a PHI node.
@@ -925,7 +934,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// predecessor, we want to insert a merge block for those common predecessors.
// This ensures that we only have to insert one reload, thus not increasing
// code size.
- BasicBlock *UnavailablePred = 0;
+ BasicBlock *UnavailablePred = nullptr;
// If there is exactly one predecessor where the value is unavailable, the
// already computed 'OneUnavailablePred' block is it. If it ends in an
@@ -992,7 +1001,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
BasicBlock *P = *PI;
AvailablePredsTy::iterator I =
std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(),
- std::make_pair(P, (Value*)0));
+ std::make_pair(P, (Value*)nullptr));
assert(I != AvailablePreds.end() && I->first == P &&
"Didn't find entry for predecessor!");
@@ -1099,7 +1108,7 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
SmallPtrSet<BasicBlock*, 16> SeenPreds;
SmallVector<std::pair<BasicBlock*, BasicBlock*>, 16> PredToDestList;
- BasicBlock *OnlyDest = 0;
+ BasicBlock *OnlyDest = nullptr;
BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL;
for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
@@ -1116,7 +1125,7 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
BasicBlock *DestBB;
if (isa<UndefValue>(Val))
- DestBB = 0;
+ DestBB = nullptr;
else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero());
else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
@@ -1167,7 +1176,7 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
// If the threadable edges are branching on an undefined value, we get to pick
// the destination that these predecessors should get to.
- if (MostPopularDest == 0)
+ if (!MostPopularDest)
MostPopularDest = BB->getTerminator()->
getSuccessor(GetBestDestForJumpOnUndef(BB));
@@ -1269,7 +1278,7 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
}
// Determine which value to split on, true, false, or undef if neither.
- ConstantInt *SplitVal = 0;
+ ConstantInt *SplitVal = nullptr;
if (NumTrue > NumFalse)
SplitVal = ConstantInt::getTrue(BB->getContext());
else if (NumTrue != 0 || NumFalse != 0)
@@ -1290,7 +1299,7 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
// help us. However, we can just replace the LHS or RHS with the constant.
if (BlocksToFoldInto.size() ==
cast<PHINode>(BB->front()).getNumIncomingValues()) {
- if (SplitVal == 0) {
+ if (!SplitVal) {
// If all preds provide undef, just nuke the xor, because it is undef too.
BO->replaceAllUsesWith(UndefValue::get(BO->getType()));
BO->eraseFromParent();
@@ -1431,16 +1440,15 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
// Scan all uses of this instruction to see if it is used outside of its
// block, and if so, record them in UsesToRename.
- for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
- ++UI) {
- Instruction *User = cast<Instruction>(*UI);
+ for (Use &U : I->uses()) {
+ Instruction *User = cast<Instruction>(U.getUser());
if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
- if (UserPN->getIncomingBlock(UI) == BB)
+ if (UserPN->getIncomingBlock(U) == BB)
continue;
} else if (User->getParent() == BB)
continue;
- UsesToRename.push_back(&UI.getUse());
+ UsesToRename.push_back(&U);
}
// If there are no uses outside the block, we're done with this instruction.
@@ -1475,7 +1483,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
// At this point, the IR is fully up to date and consistent. Do a quick scan
// over the new instructions and zap any that are constants or dead. This
// frequently happens because of phi translation.
- SimplifyInstructionsInBlock(NewBB, TD, TLI);
+ SimplifyInstructionsInBlock(NewBB, DL, TLI);
// Threaded an edge!
++NumThreads;
@@ -1528,7 +1536,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
// can just clone the bits from BB into the end of the new PredBB.
BranchInst *OldPredBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
- if (OldPredBranch == 0 || !OldPredBranch->isUnconditional()) {
+ if (!OldPredBranch || !OldPredBranch->isUnconditional()) {
PredBB = SplitEdge(PredBB, BB, this);
OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
}
@@ -1557,7 +1565,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
// If this instruction can be simplified after the operands are updated,
// just use the simplified value instead. This frequently happens due to
// phi translation.
- if (Value *IV = SimplifyInstruction(New, TD)) {
+ if (Value *IV = SimplifyInstruction(New, DL)) {
delete New;
ValueMapping[BI] = IV;
} else {
@@ -1585,16 +1593,15 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
// Scan all uses of this instruction to see if it is used outside of its
// block, and if so, record them in UsesToRename.
- for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
- ++UI) {
- Instruction *User = cast<Instruction>(*UI);
+ for (Use &U : I->uses()) {
+ Instruction *User = cast<Instruction>(U.getUser());
if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
- if (UserPN->getIncomingBlock(UI) == BB)
+ if (UserPN->getIncomingBlock(U) == BB)
continue;
} else if (User->getParent() == BB)
continue;
- UsesToRename.push_back(&UI.getUse());
+ UsesToRename.push_back(&U);
}
// If there are no uses outside the block, we're done with this instruction.
diff --git a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
index f94cd2a073ef..abcceb20050a 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -30,33 +30,37 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "licm"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
-#include "llvm/Support/CFG.h"
+#include "llvm/IR/PredIteratorCache.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include <algorithm>
using namespace llvm;
+#define DEBUG_TYPE "licm"
+
STATISTIC(NumSunk , "Number of instructions sunk out of loop");
STATISTIC(NumHoisted , "Number of instructions hoisted out of loop");
STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
@@ -74,26 +78,28 @@ namespace {
initializeLICMPass(*PassRegistry::getPassRegistry());
}
- virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override;
/// This transformation requires natural loop information & requires that
/// loop preheaders be inserted into the CFG...
///
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
- AU.addRequired<DominatorTree>();
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfo>();
AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+ AU.addPreservedID(LCSSAID);
AU.addRequired<AliasAnalysis>();
AU.addPreserved<AliasAnalysis>();
- AU.addPreserved("scalar-evolution");
- AU.addPreservedID(LoopSimplifyID);
+ AU.addPreserved<ScalarEvolution>();
AU.addRequired<TargetLibraryInfo>();
}
using llvm::Pass::doFinalization;
- bool doFinalization() {
+ bool doFinalization() override {
assert(LoopToAliasSetMap.empty() && "Didn't free loop alias sets");
return false;
}
@@ -103,7 +109,7 @@ namespace {
LoopInfo *LI; // Current LoopInfo
DominatorTree *DT; // Dominator Tree for the current Loop.
- DataLayout *TD; // DataLayout for constant folding.
+ const DataLayout *DL; // DataLayout for constant folding.
TargetLibraryInfo *TLI; // TargetLibraryInfo for constant folding.
// State that is updated as we process loops.
@@ -117,11 +123,12 @@ namespace {
DenseMap<Loop*, AliasSetTracker*> LoopToAliasSetMap;
/// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
- void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L);
+ void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To,
+ Loop *L) override;
/// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias
/// set.
- void deleteAnalysisValue(Value *V, Loop *L);
+ void deleteAnalysisValue(Value *V, Loop *L) override;
/// SinkRegion - Walk the specified region of the CFG (defined by all blocks
/// dominated by the specified block, and that are in the current loop) in
@@ -183,15 +190,26 @@ namespace {
void PromoteAliasSet(AliasSet &AS,
SmallVectorImpl<BasicBlock*> &ExitBlocks,
- SmallVectorImpl<Instruction*> &InsertPts);
+ SmallVectorImpl<Instruction*> &InsertPts,
+ PredIteratorCache &PIC);
+
+ /// \brief Create a copy of the instruction in the exit block and patch up
+ /// SSA.
+ /// PN is a user of I in ExitBlock that can be used to get the number and
+ /// list of predecessors fast.
+ Instruction *CloneInstructionInExitBlock(Instruction &I,
+ BasicBlock &ExitBlock,
+ PHINode &PN);
};
}
char LICM::ID = 0;
INITIALIZE_PASS_BEGIN(LICM, "licm", "Loop Invariant Code Motion", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(LICM, "licm", "Loop Invariant Code Motion", false, false)
@@ -203,16 +221,22 @@ Pass *llvm::createLICMPass() { return new LICM(); }
/// times on one loop.
///
bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
+ if (skipOptnoneFunction(L))
+ return false;
+
Changed = false;
// Get our Loop and Alias Analysis information...
LI = &getAnalysis<LoopInfo>();
AA = &getAnalysis<AliasAnalysis>();
- DT = &getAnalysis<DominatorTree>();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- TD = getAnalysisIfAvailable<DataLayout>();
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
TLI = &getAnalysis<TargetLibraryInfo>();
+ assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
+
CurAST = new AliasSetTracker(*AA);
// Collect Alias info from subloops.
for (Loop::iterator LoopItr = L->begin(), LoopItrE = L->end();
@@ -272,19 +296,36 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
// Now that all loop invariants have been removed from the loop, promote any
// memory references to scalars that we can.
- if (!DisablePromotion && Preheader && L->hasDedicatedExits()) {
+ if (!DisablePromotion && (Preheader || L->hasDedicatedExits())) {
SmallVector<BasicBlock *, 8> ExitBlocks;
SmallVector<Instruction *, 8> InsertPts;
+ PredIteratorCache PIC;
// Loop over all of the alias sets in the tracker object.
for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
I != E; ++I)
- PromoteAliasSet(*I, ExitBlocks, InsertPts);
+ PromoteAliasSet(*I, ExitBlocks, InsertPts, PIC);
+
+ // Once we have promoted values across the loop body we have to recursively
+ // reform LCSSA as any nested loop may now have values defined within the
+ // loop used in the outer loop.
+ // FIXME: This is really heavy handed. It would be a bit better to use an
+ // SSAUpdater strategy during promotion that was LCSSA aware and reformed
+ // it as it went.
+ if (Changed)
+ formLCSSARecursively(*L, *DT, getAnalysisIfAvailable<ScalarEvolution>());
}
+ // Check that neither this loop nor its parent have had LCSSA broken. LICM is
+ // specifically moving instructions across the loop boundary and so it is
+ // especially in need of sanity checking here.
+ assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!");
+ assert((!L->getParentLoop() || L->getParentLoop()->isLCSSAForm(*DT)) &&
+ "Parent loop not left in LCSSA form after LICM!");
+
// Clear out loops state information for the next iteration
- CurLoop = 0;
- Preheader = 0;
+ CurLoop = nullptr;
+ Preheader = nullptr;
// If this loop is nested inside of another one, save the alias information
// for when we process the outer loop.
@@ -302,7 +343,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
/// iteration.
///
void LICM::SinkRegion(DomTreeNode *N) {
- assert(N != 0 && "Null dominator tree node?");
+ assert(N != nullptr && "Null dominator tree node?");
BasicBlock *BB = N->getBlock();
// If this subregion is not in the top level loop at all, exit.
@@ -349,7 +390,7 @@ void LICM::SinkRegion(DomTreeNode *N) {
/// before uses, allowing us to hoist a loop body in one pass without iteration.
///
void LICM::HoistRegion(DomTreeNode *N) {
- assert(N != 0 && "Null dominator tree node?");
+ assert(N != nullptr && "Null dominator tree node?");
BasicBlock *BB = N->getBlock();
// If this subregion is not in the top level loop at all, exit.
@@ -364,7 +405,7 @@ void LICM::HoistRegion(DomTreeNode *N) {
// Try constant folding this instruction. If all the operands are
// constants, it is technically hoistable, but it would be better to just
// fold it.
- if (Constant *C = ConstantFoldInstruction(&I, TD, TLI)) {
+ if (Constant *C = ConstantFoldInstruction(&I, DL, TLI)) {
DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n');
CurAST->copyValue(&I, C);
CurAST->deleteValue(&I);
@@ -450,26 +491,82 @@ bool LICM::canSinkOrHoistInst(Instruction &I) {
return isSafeToExecuteUnconditionally(I);
}
+/// \brief Returns true if a PHINode is a trivially replaceable with an
+/// Instruction.
+///
+/// This is true when all incoming values are that instruction. This pattern
+/// occurs most often with LCSSA PHI nodes.
+static bool isTriviallyReplacablePHI(PHINode &PN, Instruction &I) {
+ for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
+ if (PN.getIncomingValue(i) != &I)
+ return false;
+
+ return true;
+}
+
/// isNotUsedInLoop - Return true if the only users of this instruction are
/// outside of the loop. If this is true, we can sink the instruction to the
/// exit blocks of the loop.
///
bool LICM::isNotUsedInLoop(Instruction &I) {
- for (Value::use_iterator UI = I.use_begin(), E = I.use_end(); UI != E; ++UI) {
- Instruction *User = cast<Instruction>(*UI);
- if (PHINode *PN = dyn_cast<PHINode>(User)) {
- // PHI node uses occur in predecessor blocks!
+ for (User *U : I.users()) {
+ Instruction *UI = cast<Instruction>(U);
+ if (PHINode *PN = dyn_cast<PHINode>(UI)) {
+ // A PHI node where all of the incoming values are this instruction are
+ // special -- they can just be RAUW'ed with the instruction and thus
+ // don't require a use in the predecessor. This is a particular important
+ // special case because it is the pattern found in LCSSA form.
+ if (isTriviallyReplacablePHI(*PN, I)) {
+ if (CurLoop->contains(PN))
+ return false;
+ else
+ continue;
+ }
+
+ // Otherwise, PHI node uses occur in predecessor blocks if the incoming
+ // values. Check for such a use being inside the loop.
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
if (PN->getIncomingValue(i) == &I)
if (CurLoop->contains(PN->getIncomingBlock(i)))
return false;
- } else if (CurLoop->contains(User)) {
- return false;
+
+ continue;
}
+
+ if (CurLoop->contains(UI))
+ return false;
}
return true;
}
+Instruction *LICM::CloneInstructionInExitBlock(Instruction &I,
+ BasicBlock &ExitBlock,
+ PHINode &PN) {
+ Instruction *New = I.clone();
+ ExitBlock.getInstList().insert(ExitBlock.getFirstInsertionPt(), New);
+ if (!I.getName().empty()) New->setName(I.getName() + ".le");
+
+ // Build LCSSA PHI nodes for any in-loop operands. Note that this is
+ // particularly cheap because we can rip off the PHI node that we're
+ // replacing for the number and blocks of the predecessors.
+ // OPT: If this shows up in a profile, we can instead finish sinking all
+ // invariant instructions, and then walk their operands to re-establish
+ // LCSSA. That will eliminate creating PHI nodes just to nuke them when
+ // sinking bottom-up.
+ for (User::op_iterator OI = New->op_begin(), OE = New->op_end(); OI != OE;
+ ++OI)
+ if (Instruction *OInst = dyn_cast<Instruction>(*OI))
+ if (Loop *OLoop = LI->getLoopFor(OInst->getParent()))
+ if (!OLoop->contains(&PN)) {
+ PHINode *OpPN =
+ PHINode::Create(OInst->getType(), PN.getNumIncomingValues(),
+ OInst->getName() + ".lcssa", ExitBlock.begin());
+ for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
+ OpPN->addIncoming(OInst, PN.getIncomingBlock(i));
+ *OI = OpPN;
+ }
+ return New;
+}
/// sink - When an instruction is found to only be used outside of the loop,
/// this function moves it to the exit blocks and patches up SSA form as needed.
@@ -479,119 +576,45 @@ bool LICM::isNotUsedInLoop(Instruction &I) {
void LICM::sink(Instruction &I) {
DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
- SmallVector<BasicBlock*, 8> ExitBlocks;
- CurLoop->getUniqueExitBlocks(ExitBlocks);
-
if (isa<LoadInst>(I)) ++NumMovedLoads;
else if (isa<CallInst>(I)) ++NumMovedCalls;
++NumSunk;
Changed = true;
- // The case where there is only a single exit node of this loop is common
- // enough that we handle it as a special (more efficient) case. It is more
- // efficient to handle because there are no PHI nodes that need to be placed.
- if (ExitBlocks.size() == 1) {
- if (!DT->dominates(I.getParent(), ExitBlocks[0])) {
- // Instruction is not used, just delete it.
- CurAST->deleteValue(&I);
- // If I has users in unreachable blocks, eliminate.
- // If I is not void type then replaceAllUsesWith undef.
- // This allows ValueHandlers and custom metadata to adjust itself.
- if (!I.use_empty())
- I.replaceAllUsesWith(UndefValue::get(I.getType()));
- I.eraseFromParent();
- } else {
- // Move the instruction to the start of the exit block, after any PHI
- // nodes in it.
- I.moveBefore(ExitBlocks[0]->getFirstInsertionPt());
-
- // This instruction is no longer in the AST for the current loop, because
- // we just sunk it out of the loop. If we just sunk it into an outer
- // loop, we will rediscover the operation when we process it.
- CurAST->deleteValue(&I);
- }
- return;
- }
-
- if (ExitBlocks.empty()) {
- // The instruction is actually dead if there ARE NO exit blocks.
- CurAST->deleteValue(&I);
- // If I has users in unreachable blocks, eliminate.
- // If I is not void type then replaceAllUsesWith undef.
- // This allows ValueHandlers and custom metadata to adjust itself.
- if (!I.use_empty())
- I.replaceAllUsesWith(UndefValue::get(I.getType()));
- I.eraseFromParent();
- return;
- }
-
- // Otherwise, if we have multiple exits, use the SSAUpdater to do all of the
- // hard work of inserting PHI nodes as necessary.
- SmallVector<PHINode*, 8> NewPHIs;
- SSAUpdater SSA(&NewPHIs);
-
- if (!I.use_empty())
- SSA.Initialize(I.getType(), I.getName());
-
- // Insert a copy of the instruction in each exit block of the loop that is
- // dominated by the instruction. Each exit block is known to only be in the
- // ExitBlocks list once.
- BasicBlock *InstOrigBB = I.getParent();
- unsigned NumInserted = 0;
+#ifndef NDEBUG
+ SmallVector<BasicBlock *, 32> ExitBlocks;
+ CurLoop->getUniqueExitBlocks(ExitBlocks);
+ SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(), ExitBlocks.end());
+#endif
- for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
- BasicBlock *ExitBlock = ExitBlocks[i];
+ // Clones of this instruction. Don't create more than one per exit block!
+ SmallDenseMap<BasicBlock *, Instruction *, 32> SunkCopies;
- if (!DT->dominates(InstOrigBB, ExitBlock))
- continue;
+ // If this instruction is only used outside of the loop, then all users are
+ // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
+ // the instruction.
+ while (!I.use_empty()) {
+ // The user must be a PHI node.
+ PHINode *PN = cast<PHINode>(I.user_back());
- // Insert the code after the last PHI node.
- BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
+ BasicBlock *ExitBlock = PN->getParent();
+ assert(ExitBlockSet.count(ExitBlock) &&
+ "The LCSSA PHI is not in an exit block!");
- // If this is the first exit block processed, just move the original
- // instruction, otherwise clone the original instruction and insert
- // the copy.
Instruction *New;
- if (NumInserted++ == 0) {
- I.moveBefore(InsertPt);
- New = &I;
- } else {
- New = I.clone();
- if (!I.getName().empty())
- New->setName(I.getName()+".le");
- ExitBlock->getInstList().insert(InsertPt, New);
- }
-
- // Now that we have inserted the instruction, inform SSAUpdater.
- if (!I.use_empty())
- SSA.AddAvailableValue(ExitBlock, New);
- }
-
- // If the instruction doesn't dominate any exit blocks, it must be dead.
- if (NumInserted == 0) {
- CurAST->deleteValue(&I);
- if (!I.use_empty())
- I.replaceAllUsesWith(UndefValue::get(I.getType()));
- I.eraseFromParent();
- return;
- }
-
- // Next, rewrite uses of the instruction, inserting PHI nodes as needed.
- for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE; ) {
- // Grab the use before incrementing the iterator.
- Use &U = UI.getUse();
- // Increment the iterator before removing the use from the list.
- ++UI;
- SSA.RewriteUseAfterInsertions(U);
+ auto It = SunkCopies.find(ExitBlock);
+ if (It != SunkCopies.end())
+ New = It->second;
+ else
+ New = SunkCopies[ExitBlock] =
+ CloneInstructionInExitBlock(I, *ExitBlock, *PN);
+
+ PN->replaceAllUsesWith(New);
+ PN->eraseFromParent();
}
- // Update CurAST for NewPHIs if I had pointer type.
- if (I.getType()->isPointerTy())
- for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
- CurAST->copyValue(&I, NewPHIs[i]);
-
- // Finally, remove the instruction from CurAST. It is no longer in the loop.
CurAST->deleteValue(&I);
+ I.eraseFromParent();
}
/// hoist - When an instruction is found to only use loop invariant operands
@@ -616,7 +639,7 @@ void LICM::hoist(Instruction &I) {
///
bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) {
// If it is not a trapping instruction, it is always safe to hoist.
- if (isSafeToSpeculativelyExecute(&Inst))
+ if (isSafeToSpeculativelyExecute(&Inst, DL))
return true;
return isGuaranteedToExecute(Inst);
@@ -662,24 +685,42 @@ namespace {
SmallPtrSet<Value*, 4> &PointerMustAliases;
SmallVectorImpl<BasicBlock*> &LoopExitBlocks;
SmallVectorImpl<Instruction*> &LoopInsertPts;
+ PredIteratorCache &PredCache;
AliasSetTracker &AST;
+ LoopInfo &LI;
DebugLoc DL;
int Alignment;
MDNode *TBAATag;
+
+ Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const {
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ if (Loop *L = LI.getLoopFor(I->getParent()))
+ if (!L->contains(BB)) {
+ // We need to create an LCSSA PHI node for the incoming value and
+ // store that.
+ PHINode *PN = PHINode::Create(
+ I->getType(), PredCache.GetNumPreds(BB),
+ I->getName() + ".lcssa", BB->begin());
+ for (BasicBlock **PI = PredCache.GetPreds(BB); *PI; ++PI)
+ PN->addIncoming(I, *PI);
+ return PN;
+ }
+ return V;
+ }
+
public:
- LoopPromoter(Value *SP,
- const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S,
- SmallPtrSet<Value*, 4> &PMA,
- SmallVectorImpl<BasicBlock*> &LEB,
- SmallVectorImpl<Instruction*> &LIP,
- AliasSetTracker &ast, DebugLoc dl, int alignment,
+ LoopPromoter(Value *SP, const SmallVectorImpl<Instruction *> &Insts,
+ SSAUpdater &S, SmallPtrSet<Value *, 4> &PMA,
+ SmallVectorImpl<BasicBlock *> &LEB,
+ SmallVectorImpl<Instruction *> &LIP, PredIteratorCache &PIC,
+ AliasSetTracker &ast, LoopInfo &li, DebugLoc dl, int alignment,
MDNode *TBAATag)
- : LoadAndStorePromoter(Insts, S), SomePtr(SP),
- PointerMustAliases(PMA), LoopExitBlocks(LEB), LoopInsertPts(LIP),
- AST(ast), DL(dl), Alignment(alignment), TBAATag(TBAATag) {}
+ : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
+ LoopExitBlocks(LEB), LoopInsertPts(LIP), PredCache(PIC), AST(ast),
+ LI(li), DL(dl), Alignment(alignment), TBAATag(TBAATag) {}
- virtual bool isInstInList(Instruction *I,
- const SmallVectorImpl<Instruction*> &) const {
+ bool isInstInList(Instruction *I,
+ const SmallVectorImpl<Instruction*> &) const override {
Value *Ptr;
if (LoadInst *LI = dyn_cast<LoadInst>(I))
Ptr = LI->getOperand(0);
@@ -688,7 +729,7 @@ namespace {
return PointerMustAliases.count(Ptr);
}
- virtual void doExtraRewritesBeforeFinalDeletion() const {
+ void doExtraRewritesBeforeFinalDeletion() const override {
// Insert stores after in the loop exit blocks. Each exit block gets a
// store of the live-out values that feed them. Since we've already told
// the SSA updater about the defs in the loop and the preheader
@@ -696,19 +737,21 @@ namespace {
for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
BasicBlock *ExitBlock = LoopExitBlocks[i];
Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
+ LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock);
+ Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock);
Instruction *InsertPos = LoopInsertPts[i];
- StoreInst *NewSI = new StoreInst(LiveInValue, SomePtr, InsertPos);
+ StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
NewSI->setAlignment(Alignment);
NewSI->setDebugLoc(DL);
if (TBAATag) NewSI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
}
}
- virtual void replaceLoadWithValue(LoadInst *LI, Value *V) const {
+ void replaceLoadWithValue(LoadInst *LI, Value *V) const override {
// Update alias analysis.
AST.copyValue(LI, V);
}
- virtual void instructionDeleted(Instruction *I) const {
+ void instructionDeleted(Instruction *I) const override {
AST.deleteValue(I);
}
};
@@ -721,7 +764,8 @@ namespace {
///
void LICM::PromoteAliasSet(AliasSet &AS,
SmallVectorImpl<BasicBlock*> &ExitBlocks,
- SmallVectorImpl<Instruction*> &InsertPts) {
+ SmallVectorImpl<Instruction*> &InsertPts,
+ PredIteratorCache &PIC) {
// We can promote this alias set if it has a store, if it is a "Must" alias
// set, if the pointer is loop invariant, and if we are not eliminating any
// volatile loads or stores.
@@ -754,7 +798,7 @@ void LICM::PromoteAliasSet(AliasSet &AS,
// We start with an alignment of one and try to find instructions that allow
// us to prove better alignment.
unsigned Alignment = 1;
- MDNode *TBAATag = 0;
+ MDNode *TBAATag = nullptr;
// Check that all of the pointers in the alias set have the same type. We
// cannot (yet) promote a memory location that is loaded and stored in
@@ -769,23 +813,22 @@ void LICM::PromoteAliasSet(AliasSet &AS,
if (SomePtr->getType() != ASIV->getType())
return;
- for (Value::use_iterator UI = ASIV->use_begin(), UE = ASIV->use_end();
- UI != UE; ++UI) {
+ for (User *U : ASIV->users()) {
// Ignore instructions that are outside the loop.
- Instruction *Use = dyn_cast<Instruction>(*UI);
- if (!Use || !CurLoop->contains(Use))
+ Instruction *UI = dyn_cast<Instruction>(U);
+ if (!UI || !CurLoop->contains(UI))
continue;
// If there is an non-load/store instruction in the loop, we can't promote
// it.
- if (LoadInst *load = dyn_cast<LoadInst>(Use)) {
+ if (LoadInst *load = dyn_cast<LoadInst>(UI)) {
assert(!load->isVolatile() && "AST broken");
if (!load->isSimple())
return;
- } else if (StoreInst *store = dyn_cast<StoreInst>(Use)) {
+ } else if (StoreInst *store = dyn_cast<StoreInst>(UI)) {
// Stores *of* the pointer are not interesting, only stores *to* the
// pointer.
- if (Use->getOperand(1) != ASIV)
+ if (UI->getOperand(1) != ASIV)
continue;
assert(!store->isVolatile() && "AST broken");
if (!store->isSimple())
@@ -801,13 +844,13 @@ void LICM::PromoteAliasSet(AliasSet &AS,
// Larger is better, with the exception of 0 being the best alignment.
unsigned InstAlignment = store->getAlignment();
if ((InstAlignment > Alignment || InstAlignment == 0) && Alignment != 0)
- if (isGuaranteedToExecute(*Use)) {
+ if (isGuaranteedToExecute(*UI)) {
GuaranteedToExecute = true;
Alignment = InstAlignment;
}
if (!GuaranteedToExecute)
- GuaranteedToExecute = isGuaranteedToExecute(*Use);
+ GuaranteedToExecute = isGuaranteedToExecute(*UI);
} else
return; // Not a load or store.
@@ -815,13 +858,13 @@ void LICM::PromoteAliasSet(AliasSet &AS,
// Merge the TBAA tags.
if (LoopUses.empty()) {
// On the first load/store, just take its TBAA tag.
- TBAATag = Use->getMetadata(LLVMContext::MD_tbaa);
+ TBAATag = UI->getMetadata(LLVMContext::MD_tbaa);
} else if (TBAATag) {
TBAATag = MDNode::getMostGenericTBAA(TBAATag,
- Use->getMetadata(LLVMContext::MD_tbaa));
+ UI->getMetadata(LLVMContext::MD_tbaa));
}
-
- LoopUses.push_back(Use);
+
+ LoopUses.push_back(UI);
}
}
@@ -853,7 +896,7 @@ void LICM::PromoteAliasSet(AliasSet &AS,
SmallVector<PHINode*, 16> NewPHIs;
SSAUpdater SSA(&NewPHIs);
LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
- InsertPts, *CurAST, DL, Alignment, TBAATag);
+ InsertPts, PIC, *CurAST, *LI, DL, Alignment, TBAATag);
// Set up the preheader to have a definition of the value. It is the live-out
// value from the preheader that uses in the loop will use.
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoadCombine.cpp b/contrib/llvm/lib/Transforms/Scalar/LoadCombine.cpp
new file mode 100644
index 000000000000..846aa703c9c3
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LoadCombine.cpp
@@ -0,0 +1,268 @@
+//===- LoadCombine.cpp - Combine Adjacent Loads ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This transformation combines adjacent loads.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar.h"
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/TargetFolder.h"
+#include "llvm/Pass.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "load-combine"
+
+STATISTIC(NumLoadsAnalyzed, "Number of loads analyzed for combining");
+STATISTIC(NumLoadsCombined, "Number of loads combined");
+
+namespace {
+struct PointerOffsetPair {
+ Value *Pointer;
+ uint64_t Offset;
+};
+
+struct LoadPOPPair {
+ LoadPOPPair(LoadInst *L, PointerOffsetPair P, unsigned O)
+ : Load(L), POP(P), InsertOrder(O) {}
+ LoadPOPPair() {}
+ LoadInst *Load;
+ PointerOffsetPair POP;
+ /// \brief The new load needs to be created before the first load in IR order.
+ unsigned InsertOrder;
+};
+
+class LoadCombine : public BasicBlockPass {
+ LLVMContext *C;
+ const DataLayout *DL;
+
+public:
+ LoadCombine()
+ : BasicBlockPass(ID),
+ C(nullptr), DL(nullptr) {
+ initializeSROAPass(*PassRegistry::getPassRegistry());
+ }
+ bool doInitialization(Function &) override;
+ bool runOnBasicBlock(BasicBlock &BB) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ const char *getPassName() const override { return "LoadCombine"; }
+ static char ID;
+
+ typedef IRBuilder<true, TargetFolder> BuilderTy;
+
+private:
+ BuilderTy *Builder;
+
+ PointerOffsetPair getPointerOffsetPair(LoadInst &);
+ bool combineLoads(DenseMap<const Value *, SmallVector<LoadPOPPair, 8>> &);
+ bool aggregateLoads(SmallVectorImpl<LoadPOPPair> &);
+ bool combineLoads(SmallVectorImpl<LoadPOPPair> &);
+};
+}
+
+bool LoadCombine::doInitialization(Function &F) {
+ DEBUG(dbgs() << "LoadCombine function: " << F.getName() << "\n");
+ C = &F.getContext();
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ if (!DLP) {
+ DEBUG(dbgs() << " Skipping LoadCombine -- no target data!\n");
+ return false;
+ }
+ DL = &DLP->getDataLayout();
+ return true;
+}
+
+PointerOffsetPair LoadCombine::getPointerOffsetPair(LoadInst &LI) {
+ PointerOffsetPair POP;
+ POP.Pointer = LI.getPointerOperand();
+ POP.Offset = 0;
+ while (isa<BitCastInst>(POP.Pointer) || isa<GetElementPtrInst>(POP.Pointer)) {
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(POP.Pointer)) {
+ unsigned BitWidth = DL->getPointerTypeSizeInBits(GEP->getType());
+ APInt Offset(BitWidth, 0);
+ if (GEP->accumulateConstantOffset(*DL, Offset))
+ POP.Offset += Offset.getZExtValue();
+ else
+ // Can't handle GEPs with variable indices.
+ return POP;
+ POP.Pointer = GEP->getPointerOperand();
+ } else if (auto *BC = dyn_cast<BitCastInst>(POP.Pointer))
+ POP.Pointer = BC->getOperand(0);
+ }
+ return POP;
+}
+
+bool LoadCombine::combineLoads(
+ DenseMap<const Value *, SmallVector<LoadPOPPair, 8>> &LoadMap) {
+ bool Combined = false;
+ for (auto &Loads : LoadMap) {
+ if (Loads.second.size() < 2)
+ continue;
+ std::sort(Loads.second.begin(), Loads.second.end(),
+ [](const LoadPOPPair &A, const LoadPOPPair &B) {
+ return A.POP.Offset < B.POP.Offset;
+ });
+ if (aggregateLoads(Loads.second))
+ Combined = true;
+ }
+ return Combined;
+}
+
+/// \brief Try to aggregate loads from a sorted list of loads to be combined.
+///
+/// It is guaranteed that no writes occur between any of the loads. All loads
+/// have the same base pointer. There are at least two loads.
+bool LoadCombine::aggregateLoads(SmallVectorImpl<LoadPOPPair> &Loads) {
+ assert(Loads.size() >= 2 && "Insufficient loads!");
+ LoadInst *BaseLoad = nullptr;
+ SmallVector<LoadPOPPair, 8> AggregateLoads;
+ bool Combined = false;
+ uint64_t PrevOffset = -1ull;
+ uint64_t PrevSize = 0;
+ for (auto &L : Loads) {
+ if (PrevOffset == -1ull) {
+ BaseLoad = L.Load;
+ PrevOffset = L.POP.Offset;
+ PrevSize = DL->getTypeStoreSize(L.Load->getType());
+ AggregateLoads.push_back(L);
+ continue;
+ }
+ if (L.Load->getAlignment() > BaseLoad->getAlignment())
+ continue;
+ if (L.POP.Offset > PrevOffset + PrevSize) {
+ // No other load will be combinable
+ if (combineLoads(AggregateLoads))
+ Combined = true;
+ AggregateLoads.clear();
+ PrevOffset = -1;
+ continue;
+ }
+ if (L.POP.Offset != PrevOffset + PrevSize)
+ // This load is offset less than the size of the last load.
+ // FIXME: We may want to handle this case.
+ continue;
+ PrevOffset = L.POP.Offset;
+ PrevSize = DL->getTypeStoreSize(L.Load->getType());
+ AggregateLoads.push_back(L);
+ }
+ if (combineLoads(AggregateLoads))
+ Combined = true;
+ return Combined;
+}
+
+/// \brief Given a list of combinable load. Combine the maximum number of them.
+bool LoadCombine::combineLoads(SmallVectorImpl<LoadPOPPair> &Loads) {
+ // Remove loads from the end while the size is not a power of 2.
+ unsigned TotalSize = 0;
+ for (const auto &L : Loads)
+ TotalSize += L.Load->getType()->getPrimitiveSizeInBits();
+ while (TotalSize != 0 && !isPowerOf2_32(TotalSize))
+ TotalSize -= Loads.pop_back_val().Load->getType()->getPrimitiveSizeInBits();
+ if (Loads.size() < 2)
+ return false;
+
+ DEBUG({
+ dbgs() << "***** Combining Loads ******\n";
+ for (const auto &L : Loads) {
+ dbgs() << L.POP.Offset << ": " << *L.Load << "\n";
+ }
+ });
+
+ // Find first load. This is where we put the new load.
+ LoadPOPPair FirstLP;
+ FirstLP.InsertOrder = -1u;
+ for (const auto &L : Loads)
+ if (L.InsertOrder < FirstLP.InsertOrder)
+ FirstLP = L;
+
+ unsigned AddressSpace =
+ FirstLP.POP.Pointer->getType()->getPointerAddressSpace();
+
+ Builder->SetInsertPoint(FirstLP.Load);
+ Value *Ptr = Builder->CreateConstGEP1_64(
+ Builder->CreatePointerCast(Loads[0].POP.Pointer,
+ Builder->getInt8PtrTy(AddressSpace)),
+ Loads[0].POP.Offset);
+ LoadInst *NewLoad = new LoadInst(
+ Builder->CreatePointerCast(
+ Ptr, PointerType::get(IntegerType::get(Ptr->getContext(), TotalSize),
+ Ptr->getType()->getPointerAddressSpace())),
+ Twine(Loads[0].Load->getName()) + ".combined", false,
+ Loads[0].Load->getAlignment(), FirstLP.Load);
+
+ for (const auto &L : Loads) {
+ Builder->SetInsertPoint(L.Load);
+ Value *V = Builder->CreateExtractInteger(
+ *DL, NewLoad, cast<IntegerType>(L.Load->getType()),
+ L.POP.Offset - Loads[0].POP.Offset, "combine.extract");
+ L.Load->replaceAllUsesWith(V);
+ }
+
+ NumLoadsCombined = NumLoadsCombined + Loads.size();
+ return true;
+}
+
+bool LoadCombine::runOnBasicBlock(BasicBlock &BB) {
+ if (skipOptnoneFunction(BB) || !DL)
+ return false;
+
+ IRBuilder<true, TargetFolder>
+ TheBuilder(BB.getContext(), TargetFolder(DL));
+ Builder = &TheBuilder;
+
+ DenseMap<const Value *, SmallVector<LoadPOPPair, 8>> LoadMap;
+
+ bool Combined = false;
+ unsigned Index = 0;
+ for (auto &I : BB) {
+ if (I.mayWriteToMemory() || I.mayThrow()) {
+ if (combineLoads(LoadMap))
+ Combined = true;
+ LoadMap.clear();
+ continue;
+ }
+ LoadInst *LI = dyn_cast<LoadInst>(&I);
+ if (!LI)
+ continue;
+ ++NumLoadsAnalyzed;
+ if (!LI->isSimple() || !LI->getType()->isIntegerTy())
+ continue;
+ auto POP = getPointerOffsetPair(*LI);
+ if (!POP.Pointer)
+ continue;
+ LoadMap[POP.Pointer].push_back(LoadPOPPair(LI, POP, Index++));
+ }
+ if (combineLoads(LoadMap))
+ Combined = true;
+ return Combined;
+}
+
+void LoadCombine::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+}
+
+char LoadCombine::ID = 0;
+
+BasicBlockPass *llvm::createLoadCombinePass() {
+ return new LoadCombine();
+}
+
+INITIALIZE_PASS(LoadCombine, "load-combine", "Combine Adjacent Loads", false,
+ false)
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
index 9e39d2ee84f0..5ab686aa831a 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -14,15 +14,16 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loop-delete"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/IR/Dominators.h"
using namespace llvm;
+#define DEBUG_TYPE "loop-delete"
+
STATISTIC(NumDeleted, "Number of loops deleted");
namespace {
@@ -34,17 +35,17 @@ namespace {
}
// Possibly eliminate loop L if it is dead.
- bool runOnLoop(Loop *L, LPPassManager &LPM);
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<DominatorTree>();
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfo>();
AU.addRequired<ScalarEvolution>();
AU.addRequiredID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
AU.addPreserved<ScalarEvolution>();
- AU.addPreserved<DominatorTree>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<LoopInfo>();
AU.addPreservedID(LoopSimplifyID);
AU.addPreservedID(LCSSAID);
@@ -61,7 +62,7 @@ namespace {
char LoopDeletion::ID = 0;
INITIALIZE_PASS_BEGIN(LoopDeletion, "loop-deletion",
"Delete dead loops", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
@@ -130,6 +131,9 @@ bool LoopDeletion::isLoopDead(Loop *L,
/// NOTE: This entire process relies pretty heavily on LoopSimplify and LCSSA
/// in order to make various safety checks work.
bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &LPM) {
+ if (skipOptnoneFunction(L))
+ return false;
+
// We can only remove the loop if there is a preheader that we can
// branch from after removing it.
BasicBlock *preheader = L->getLoopPreheader();
@@ -202,7 +206,7 @@ bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &LPM) {
// Update the dominator tree and remove the instructions and blocks that will
// be deleted from the reference counting scheme.
- DominatorTree &DT = getAnalysis<DominatorTree>();
+ DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
SmallVector<DomTreeNode*, 8> ChildNodes;
for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
LI != LE; ++LI) {
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 952b76b822cf..a12f5a7a0334 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -41,7 +41,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loop-idiom"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -51,6 +50,7 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
@@ -60,6 +60,8 @@
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
+#define DEBUG_TYPE "loop-idiom"
+
STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
@@ -78,9 +80,6 @@ namespace {
return dyn_cast<BranchInst>(BB->getTerminator());
}
- /// Return the condition of the branch terminating the given basic block.
- static Value *getBrCondtion(BasicBlock *);
-
/// Derive the precondition block (i.e the block that guards the loop
/// preheader) from the given preheader.
static BasicBlock *getPrecondBb(BasicBlock *PreHead);
@@ -108,22 +107,22 @@ namespace {
bool preliminaryScreen();
/// Check if the given conditional branch is based on the comparison
- /// beween a variable and zero, and if the variable is non-zero, the
- /// control yeilds to the loop entry. If the branch matches the behavior,
+ /// between a variable and zero, and if the variable is non-zero, the
+ /// control yields to the loop entry. If the branch matches the behavior,
/// the variable involved in the comparion is returned. This function will
/// be called to see if the precondition and postcondition of the loop
/// are in desirable form.
- Value *matchCondition (BranchInst *Br, BasicBlock *NonZeroTarget) const;
+ Value *matchCondition(BranchInst *Br, BasicBlock *NonZeroTarget) const;
/// Return true iff the idiom is detected in the loop. and 1) \p CntInst
- /// is set to the instruction counting the pupulation bit. 2) \p CntPhi
+ /// is set to the instruction counting the population bit. 2) \p CntPhi
/// is set to the corresponding phi node. 3) \p Var is set to the value
/// whose population bits are being counted.
bool detectIdiom
(Instruction *&CntInst, PHINode *&CntPhi, Value *&Var) const;
/// Insert ctpop intrinsic function and some obviously dead instructions.
- void transform (Instruction *CntInst, PHINode *CntPhi, Value *Var);
+ void transform(Instruction *CntInst, PHINode *CntPhi, Value *Var);
/// Create llvm.ctpop.* intrinsic function.
CallInst *createPopcntIntrinsic(IRBuilderTy &IRB, Value *Val, DebugLoc DL);
@@ -131,7 +130,7 @@ namespace {
class LoopIdiomRecognize : public LoopPass {
Loop *CurLoop;
- const DataLayout *TD;
+ const DataLayout *DL;
DominatorTree *DT;
ScalarEvolution *SE;
TargetLibraryInfo *TLI;
@@ -140,10 +139,10 @@ namespace {
static char ID;
explicit LoopIdiomRecognize() : LoopPass(ID) {
initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
- TD = 0; DT = 0; SE = 0; TLI = 0; TTI = 0;
+ DL = nullptr; DT = nullptr; SE = nullptr; TLI = nullptr; TTI = nullptr;
}
- bool runOnLoop(Loop *L, LPPassManager &LPM);
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override;
bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
SmallVectorImpl<BasicBlock*> &ExitBlocks);
@@ -163,7 +162,7 @@ namespace {
/// This transformation requires natural loop information & requires that
/// loop preheaders be inserted into the CFG.
///
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<LoopInfo>();
AU.addPreserved<LoopInfo>();
AU.addRequiredID(LoopSimplifyID);
@@ -174,18 +173,23 @@ namespace {
AU.addPreserved<AliasAnalysis>();
AU.addRequired<ScalarEvolution>();
AU.addPreserved<ScalarEvolution>();
- AU.addPreserved<DominatorTree>();
- AU.addRequired<DominatorTree>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<TargetLibraryInfo>();
AU.addRequired<TargetTransformInfo>();
}
const DataLayout *getDataLayout() {
- return TD ? TD : TD=getAnalysisIfAvailable<DataLayout>();
+ if (DL)
+ return DL;
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
+ return DL;
}
DominatorTree *getDominatorTree() {
- return DT ? DT : (DT=&getAnalysis<DominatorTree>());
+ return DT ? DT
+ : (DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree());
}
ScalarEvolution *getScalarEvolution() {
@@ -212,7 +216,7 @@ char LoopIdiomRecognize::ID = 0;
INITIALIZE_PASS_BEGIN(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
false, false)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
@@ -244,7 +248,7 @@ static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE,
for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
Value *Op = DeadInst->getOperand(op);
- DeadInst->setOperand(op, 0);
+ DeadInst->setOperand(op, nullptr);
// If this operand just became dead, add it to the NowDeadInsts list.
if (!Op->use_empty()) continue;
@@ -286,17 +290,12 @@ bool LIRUtil::isAlmostEmpty(BasicBlock *BB) {
return false;
}
-Value *LIRUtil::getBrCondtion(BasicBlock *BB) {
- BranchInst *Br = getBranch(BB);
- return Br ? Br->getCondition() : 0;
-}
-
BasicBlock *LIRUtil::getPrecondBb(BasicBlock *PreHead) {
if (BasicBlock *BB = PreHead->getSinglePredecessor()) {
BranchInst *Br = getBranch(BB);
- return Br && Br->isConditional() ? BB : 0;
+ return Br && Br->isConditional() ? BB : nullptr;
}
- return 0;
+ return nullptr;
}
//===----------------------------------------------------------------------===//
@@ -306,7 +305,7 @@ BasicBlock *LIRUtil::getPrecondBb(BasicBlock *PreHead) {
//===----------------------------------------------------------------------===//
NclPopcountRecognize::NclPopcountRecognize(LoopIdiomRecognize &TheLIR):
- LIR(TheLIR), CurLoop(TheLIR.getLoop()), PreCondBB(0) {
+ LIR(TheLIR), CurLoop(TheLIR.getLoop()), PreCondBB(nullptr) {
}
bool NclPopcountRecognize::preliminaryScreen() {
@@ -343,25 +342,25 @@ bool NclPopcountRecognize::preliminaryScreen() {
return true;
}
-Value *NclPopcountRecognize::matchCondition (BranchInst *Br,
- BasicBlock *LoopEntry) const {
+Value *NclPopcountRecognize::matchCondition(BranchInst *Br,
+ BasicBlock *LoopEntry) const {
if (!Br || !Br->isConditional())
- return 0;
+ return nullptr;
ICmpInst *Cond = dyn_cast<ICmpInst>(Br->getCondition());
if (!Cond)
- return 0;
+ return nullptr;
ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1));
if (!CmpZero || !CmpZero->isZero())
- return 0;
+ return nullptr;
ICmpInst::Predicate Pred = Cond->getPredicate();
if ((Pred == ICmpInst::ICMP_NE && Br->getSuccessor(0) == LoopEntry) ||
(Pred == ICmpInst::ICMP_EQ && Br->getSuccessor(1) == LoopEntry))
return Cond->getOperand(0);
- return 0;
+ return nullptr;
}
bool NclPopcountRecognize::detectIdiom(Instruction *&CntInst,
@@ -392,9 +391,9 @@ bool NclPopcountRecognize::detectIdiom(Instruction *&CntInst,
Value *VarX1, *VarX0;
PHINode *PhiX, *CountPhi;
- DefX2 = CountInst = 0;
- VarX1 = VarX0 = 0;
- PhiX = CountPhi = 0;
+ DefX2 = CountInst = nullptr;
+ VarX1 = VarX0 = nullptr;
+ PhiX = CountPhi = nullptr;
LoopEntry = *(CurLoop->block_begin());
// step 1: Check if the loop-back branch is in desirable form.
@@ -441,7 +440,7 @@ bool NclPopcountRecognize::detectIdiom(Instruction *&CntInst,
// step 4: Find the instruction which count the population: cnt2 = cnt1 + 1
{
- CountInst = NULL;
+ CountInst = nullptr;
for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI(),
IterE = LoopEntry->end(); Iter != IterE; Iter++) {
Instruction *Inst = Iter;
@@ -458,9 +457,8 @@ bool NclPopcountRecognize::detectIdiom(Instruction *&CntInst,
// Check if the result of the instruction is live of the loop.
bool LiveOutLoop = false;
- for (Value::use_iterator I = Inst->use_begin(), E = Inst->use_end();
- I != E; I++) {
- if ((cast<Instruction>(*I))->getParent() != LoopEntry) {
+ for (User *U : Inst->users()) {
+ if ((cast<Instruction>(U))->getParent() != LoopEntry) {
LiveOutLoop = true; break;
}
}
@@ -519,7 +517,7 @@ void NclPopcountRecognize::transform(Instruction *CntInst,
// TripCnt is exactly the number of iterations the loop has
TripCnt = NewCount;
- // If the popoulation counter's initial value is not zero, insert Add Inst.
+ // If the population counter's initial value is not zero, insert Add Inst.
Value *CntInitVal = CntPhi->getIncomingValueForBlock(PreHead);
ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
if (!InitConst || !InitConst->isZero()) {
@@ -596,11 +594,9 @@ void NclPopcountRecognize::transform(Instruction *CntInst,
// __builtin_ctpop().
{
SmallVector<Value *, 4> CntUses;
- for (Value::use_iterator I = CntInst->use_begin(), E = CntInst->use_end();
- I != E; I++) {
- if (cast<Instruction>(*I)->getParent() != Body)
- CntUses.push_back(*I);
- }
+ for (User *U : CntInst->users())
+ if (cast<Instruction>(U)->getParent() != Body)
+ CntUses.push_back(U);
for (unsigned Idx = 0; Idx < CntUses.size(); Idx++) {
(cast<Instruction>(CntUses[Idx]))->replaceUsesOfWith(CntInst, NewCount);
}
@@ -705,6 +701,9 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() {
}
bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
+ if (skipOptnoneFunction(L))
+ return false;
+
CurLoop = L;
// If the loop could not be converted to canonical form, it must have an
@@ -746,7 +745,7 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
// If processing the store invalidated our iterator, start over from the
// top of the block.
- if (InstPtr == 0)
+ if (!InstPtr)
I = BB->begin();
continue;
}
@@ -759,7 +758,7 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
// If processing the memset invalidated our iterator, start over from the
// top of the block.
- if (InstPtr == 0)
+ if (!InstPtr)
I = BB->begin();
continue;
}
@@ -777,7 +776,7 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
Value *StorePtr = SI->getPointerOperand();
// Reject stores that are so large that they overflow an unsigned.
- uint64_t SizeInBits = TD->getTypeSizeInBits(StoredVal->getType());
+ uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
return false;
@@ -786,7 +785,7 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
// random store we can't handle.
const SCEVAddRecExpr *StoreEv =
dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
- if (StoreEv == 0 || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
+ if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
return false;
// Check to see if the stride matches the size of the store. If so, then we
@@ -794,7 +793,7 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
unsigned StoreSize = (unsigned)SizeInBits >> 3;
const SCEVConstant *Stride = dyn_cast<SCEVConstant>(StoreEv->getOperand(1));
- if (Stride == 0 || StoreSize != Stride->getValue()->getValue()) {
+ if (!Stride || StoreSize != Stride->getValue()->getValue()) {
// TODO: Could also handle negative stride here someday, that will require
// the validity check in mayLoopAccessLocation to be updated though.
// Enable this to print exact negative strides.
@@ -843,7 +842,7 @@ processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) {
// loop, which indicates a strided store. If we have something else, it's a
// random store we can't handle.
const SCEVAddRecExpr *Ev = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Pointer));
- if (Ev == 0 || Ev->getLoop() != CurLoop || !Ev->isAffine())
+ if (!Ev || Ev->getLoop() != CurLoop || !Ev->isAffine())
return false;
// Reject memsets that are so large that they overflow an unsigned.
@@ -857,7 +856,7 @@ processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) {
// TODO: Could also handle negative stride here someday, that will require the
// validity check in mayLoopAccessLocation to be updated though.
- if (Stride == 0 || MSI->getLength() != Stride->getValue())
+ if (!Stride || MSI->getLength() != Stride->getValue())
return false;
return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
@@ -905,28 +904,28 @@ static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access,
///
/// Note that we don't ever attempt to use memset_pattern8 or 4, because these
/// just replicate their input array and then pass on to memset_pattern16.
-static Constant *getMemSetPatternValue(Value *V, const DataLayout &TD) {
+static Constant *getMemSetPatternValue(Value *V, const DataLayout &DL) {
// If the value isn't a constant, we can't promote it to being in a constant
// array. We could theoretically do a store to an alloca or something, but
// that doesn't seem worthwhile.
Constant *C = dyn_cast<Constant>(V);
- if (C == 0) return 0;
+ if (!C) return nullptr;
// Only handle simple values that are a power of two bytes in size.
- uint64_t Size = TD.getTypeSizeInBits(V->getType());
+ uint64_t Size = DL.getTypeSizeInBits(V->getType());
if (Size == 0 || (Size & 7) || (Size & (Size-1)))
- return 0;
+ return nullptr;
// Don't care enough about darwin/ppc to implement this.
- if (TD.isBigEndian())
- return 0;
+ if (DL.isBigEndian())
+ return nullptr;
// Convert to size in bytes.
Size /= 8;
// TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
// if the top and bottom are the same (e.g. for vectors and large integers).
- if (Size > 16) return 0;
+ if (Size > 16) return nullptr;
// If the constant is exactly 16 bytes, just use it.
if (Size == 16) return C;
@@ -951,7 +950,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
// are stored. A store of i32 0x01020304 can never be turned into a memset,
// but it can be turned into memset_pattern if the target supports it.
Value *SplatValue = isBytewiseValue(StoredVal);
- Constant *PatternValue = 0;
+ Constant *PatternValue = nullptr;
unsigned DestAS = DestPtr->getType()->getPointerAddressSpace();
@@ -962,13 +961,13 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
// promote the memset.
CurLoop->isLoopInvariant(SplatValue)) {
// Keep and use SplatValue.
- PatternValue = 0;
+ PatternValue = nullptr;
} else if (DestAS == 0 &&
TLI->has(LibFunc::memset_pattern16) &&
- (PatternValue = getMemSetPatternValue(StoredVal, *TD))) {
+ (PatternValue = getMemSetPatternValue(StoredVal, *DL))) {
// Don't create memset_pattern16s with address spaces.
// It looks like we can use PatternValue!
- SplatValue = 0;
+ SplatValue = nullptr;
} else {
// Otherwise, this isn't an idiom we can transform. For example, we can't
// do anything with a 3-byte store.
@@ -1006,7 +1005,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
// pointer size if it isn't already.
- Type *IntPtr = Builder.getIntPtrTy(TD, DestAS);
+ Type *IntPtr = Builder.getIntPtrTy(DL, DestAS);
BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
@@ -1035,7 +1034,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
Int8PtrTy,
Int8PtrTy,
IntPtr,
- (void*)0);
+ (void*)nullptr);
// Otherwise we should form a memset_pattern16. PatternValue is known to be
// an constant array of 16-bytes. Plop the value into a mergable global.
@@ -1120,7 +1119,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
// pointer size if it isn't already.
- Type *IntPtrTy = Builder.getIntPtrTy(TD, SI->getPointerAddressSpace());
+ Type *IntPtrTy = Builder.getIntPtrTy(DL, SI->getPointerAddressSpace());
BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy);
const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtrTy, 1),
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
index a23860aad80e..ab1a9393c526 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -11,21 +11,22 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loop-instsimplify"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/Analysis/Dominators.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
+#define DEBUG_TYPE "loop-instsimplify"
+
STATISTIC(NumSimplified, "Number of redundant instructions simplified");
namespace {
@@ -36,9 +37,9 @@ namespace {
initializeLoopInstSimplifyPass(*PassRegistry::getPassRegistry());
}
- bool runOnLoop(Loop*, LPPassManager&);
+ bool runOnLoop(Loop*, LPPassManager&) override;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<LoopInfo>();
AU.addRequiredID(LoopSimplifyID);
@@ -54,7 +55,7 @@ char LoopInstSimplify::ID = 0;
INITIALIZE_PASS_BEGIN(LoopInstSimplify, "loop-instsimplify",
"Simplify instructions in loops", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
INITIALIZE_PASS_END(LoopInstSimplify, "loop-instsimplify",
@@ -65,9 +66,15 @@ Pass *llvm::createLoopInstSimplifyPass() {
}
bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
- DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
+ if (skipOptnoneFunction(L))
+ return false;
+
+ DominatorTreeWrapperPass *DTWP =
+ getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
LoopInfo *LI = &getAnalysis<LoopInfo>();
- const DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
SmallVector<BasicBlock*, 8> ExitBlocks;
@@ -109,19 +116,26 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// Don't bother simplifying unused instructions.
if (!I->use_empty()) {
- Value *V = SimplifyInstruction(I, TD, TLI, DT);
+ Value *V = SimplifyInstruction(I, DL, TLI, DT);
if (V && LI->replacementPreservesLCSSAForm(I, V)) {
// Mark all uses for resimplification next time round the loop.
- for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
- UI != UE; ++UI)
- Next->insert(cast<Instruction>(*UI));
+ for (User *U : I->users())
+ Next->insert(cast<Instruction>(U));
I->replaceAllUsesWith(V);
LocalChanged = true;
++NumSimplified;
}
}
- LocalChanged |= RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
+ bool res = RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
+ if (res) {
+ // RecursivelyDeleteTriviallyDeadInstruction can remove
+ // more than one instruction, so simply incrementing the
+ // iterator does not work. When instructions get deleted
+ // re-iterate instead.
+ BI = BB->begin(); BE = BB->end();
+ LocalChanged |= res;
+ }
if (IsSubloopHeader && !isa<PHINode>(I))
break;
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
index 643bc78f6e58..b6fbb16166dd 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -11,11 +11,10 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loop-reroll"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/LoopPass.h"
@@ -24,6 +23,7 @@
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -35,6 +35,8 @@
using namespace llvm;
+#define DEBUG_TYPE "loop-reroll"
+
STATISTIC(NumRerolledLoops, "Number of rerolled loops");
static cl::opt<unsigned>
@@ -124,14 +126,14 @@ namespace {
initializeLoopRerollPass(*PassRegistry::getPassRegistry());
}
- bool runOnLoop(Loop *L, LPPassManager &LPM);
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AliasAnalysis>();
AU.addRequired<LoopInfo>();
AU.addPreserved<LoopInfo>();
- AU.addRequired<DominatorTree>();
- AU.addPreserved<DominatorTree>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<ScalarEvolution>();
AU.addRequired<TargetLibraryInfo>();
}
@@ -140,7 +142,7 @@ protected:
AliasAnalysis *AA;
LoopInfo *LI;
ScalarEvolution *SE;
- DataLayout *DL;
+ const DataLayout *DL;
TargetLibraryInfo *TLI;
DominatorTree *DT;
@@ -189,12 +191,12 @@ protected:
iterator begin() {
assert(Valid && "Using invalid reduction");
- return llvm::next(Instructions.begin());
+ return std::next(Instructions.begin());
}
const_iterator begin() const {
assert(Valid && "Using invalid reduction");
- return llvm::next(Instructions.begin());
+ return std::next(Instructions.begin());
}
iterator end() { return Instructions.end(); }
@@ -340,7 +342,7 @@ char LoopReroll::ID = 0;
INITIALIZE_PASS_BEGIN(LoopReroll, "loop-reroll", "Reroll loops", false, false)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_PASS_END(LoopReroll, "loop-reroll", "Reroll loops", false, false)
@@ -353,12 +355,9 @@ Pass *llvm::createLoopRerollPass() {
// This operates like Instruction::isUsedOutsideOfBlock, but considers PHIs in
// non-loop blocks to be outside the loop.
static bool hasUsesOutsideLoop(Instruction *I, Loop *L) {
- for (Value::use_iterator UI = I->use_begin(),
- UIE = I->use_end(); UI != UIE; ++UI) {
- Instruction *User = cast<Instruction>(*UI);
- if (!L->contains(User))
+ for (User *U : I->users())
+ if (!L->contains(cast<Instruction>(U)))
return true;
- }
return false;
}
@@ -408,7 +407,7 @@ void LoopReroll::SimpleLoopReduction::add(Loop *L) {
Instruction *C = Instructions.front();
do {
- C = cast<Instruction>(*C->use_begin());
+ C = cast<Instruction>(*C->user_begin());
if (C->hasOneUse()) {
if (!C->isBinaryOp())
return;
@@ -423,17 +422,15 @@ void LoopReroll::SimpleLoopReduction::add(Loop *L) {
if (Instructions.size() < 2 ||
!C->isSameOperationAs(Instructions.back()) ||
- C->use_begin() == C->use_end())
+ C->use_empty())
return;
// C is now the (potential) last instruction in the reduction chain.
- for (Value::use_iterator UI = C->use_begin(), UIE = C->use_end();
- UI != UIE; ++UI) {
+ for (User *U : C->users())
// The only in-loop user can be the initial PHI.
- if (L->contains(cast<Instruction>(*UI)))
- if (cast<Instruction>(*UI ) != Instructions.front())
+ if (L->contains(cast<Instruction>(U)))
+ if (cast<Instruction>(U) != Instructions.front())
return;
- }
Instructions.push_back(C);
Valid = true;
@@ -483,12 +480,11 @@ void LoopReroll::collectInLoopUserSet(Loop *L,
continue;
if (!Final.count(I))
- for (Value::use_iterator UI = I->use_begin(),
- UIE = I->use_end(); UI != UIE; ++UI) {
- Instruction *User = cast<Instruction>(*UI);
+ for (Use &U : I->uses()) {
+ Instruction *User = cast<Instruction>(U.getUser());
if (PHINode *PN = dyn_cast<PHINode>(User)) {
// Ignore "wrap-around" uses to PHIs of this loop's header.
- if (PN->getIncomingBlock(UI) == L->getHeader())
+ if (PN->getIncomingBlock(U) == L->getHeader())
continue;
}
@@ -559,8 +555,8 @@ bool LoopReroll::findScaleFromMul(Instruction *RealIV, uint64_t &Scale,
if (RealIV->getNumUses() != 2)
return false;
const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(RealIV));
- Instruction *User1 = cast<Instruction>(*RealIV->use_begin()),
- *User2 = cast<Instruction>(*llvm::next(RealIV->use_begin()));
+ Instruction *User1 = cast<Instruction>(*RealIV->user_begin()),
+ *User2 = cast<Instruction>(*std::next(RealIV->user_begin()));
if (!SE->isSCEVable(User1->getType()) || !SE->isSCEVable(User2->getType()))
return false;
const SCEVAddRecExpr *User1SCEV =
@@ -616,26 +612,25 @@ bool LoopReroll::collectAllRoots(Loop *L, uint64_t Inc, uint64_t Scale,
SmallVector<SmallInstructionVector, 32> &Roots,
SmallInstructionSet &AllRoots,
SmallInstructionVector &LoopIncs) {
- for (Value::use_iterator UI = IV->use_begin(),
- UIE = IV->use_end(); UI != UIE; ++UI) {
- Instruction *User = cast<Instruction>(*UI);
- if (!SE->isSCEVable(User->getType()))
+ for (User *U : IV->users()) {
+ Instruction *UI = cast<Instruction>(U);
+ if (!SE->isSCEVable(UI->getType()))
continue;
- if (User->getType() != IV->getType())
+ if (UI->getType() != IV->getType())
continue;
- if (!L->contains(User))
+ if (!L->contains(UI))
continue;
- if (hasUsesOutsideLoop(User, L))
+ if (hasUsesOutsideLoop(UI, L))
continue;
if (const SCEVConstant *Diff = dyn_cast<SCEVConstant>(SE->getMinusSCEV(
- SE->getSCEV(User), SE->getSCEV(IV)))) {
+ SE->getSCEV(UI), SE->getSCEV(IV)))) {
uint64_t Idx = Diff->getValue()->getValue().getZExtValue();
if (Idx > 0 && Idx < Scale) {
- Roots[Idx-1].push_back(User);
- AllRoots.insert(User);
+ Roots[Idx-1].push_back(UI);
+ AllRoots.insert(UI);
} else if (Idx == Scale && Inc > 1) {
- LoopIncs.push_back(User);
+ LoopIncs.push_back(UI);
}
}
}
@@ -719,10 +714,8 @@ void LoopReroll::ReductionTracker::replaceSelected() {
// Replace users with the new end-of-chain value.
SmallInstructionVector Users;
- for (Value::use_iterator UI =
- PossibleReds[i].getReducedValue()->use_begin(),
- UIE = PossibleReds[i].getReducedValue()->use_end(); UI != UIE; ++UI)
- Users.push_back(cast<Instruction>(*UI));
+ for (User *U : PossibleReds[i].getReducedValue()->users())
+ Users.push_back(cast<Instruction>(U));
for (SmallInstructionVector::iterator J = Users.begin(),
JE = Users.end(); J != JE; ++J)
@@ -931,8 +924,10 @@ bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
// them, and this matching fails. As an exception, we allow the alias
// set tracker to handle regular (simple) load/store dependencies.
if (FutureSideEffects &&
- ((!isSimpleLoadStore(J1) && !isSafeToSpeculativelyExecute(J1)) ||
- (!isSimpleLoadStore(J2) && !isSafeToSpeculativelyExecute(J2)))) {
+ ((!isSimpleLoadStore(J1) &&
+ !isSafeToSpeculativelyExecute(J1, DL)) ||
+ (!isSimpleLoadStore(J2) &&
+ !isSafeToSpeculativelyExecute(J2, DL)))) {
DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
" vs. " << *J2 <<
" (side effects prevent reordering)\n");
@@ -953,7 +948,7 @@ bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
bool InReduction = Reductions.isPairInSame(J1, J2);
if (!(InReduction && J1->isAssociative())) {
- bool Swapped = false, SomeOpMatched = false;;
+ bool Swapped = false, SomeOpMatched = false;
for (unsigned j = 0; j < J1->getNumOperands() && !MatchFailed; ++j) {
Value *Op2 = J2->getOperand(j);
@@ -1133,12 +1128,16 @@ bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
}
bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) {
+ if (skipOptnoneFunction(L))
+ return false;
+
AA = &getAnalysis<AliasAnalysis>();
LI = &getAnalysis<LoopInfo>();
SE = &getAnalysis<ScalarEvolution>();
TLI = &getAnalysis<TargetLibraryInfo>();
- DL = getAnalysisIfAvailable<DataLayout>();
- DT = &getAnalysis<DominatorTree>();
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
BasicBlock *Header = L->getHeader();
DEBUG(dbgs() << "LRR: F[" << Header->getParent()->getName() <<
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
index 14c5655f0838..2ce58314f8ef 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loop-rotate"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CodeMetrics.h"
@@ -20,9 +19,11 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -30,7 +31,11 @@
#include "llvm/Transforms/Utils/ValueMapper.h"
using namespace llvm;
-#define MAX_HEADER_SIZE 16
+#define DEBUG_TYPE "loop-rotate"
+
+static cl::opt<unsigned>
+DefaultRotationThreshold("rotation-max-header-size", cl::init(16), cl::Hidden,
+ cl::desc("The default maximum header size for automatic loop rotation"));
STATISTIC(NumRotated, "Number of loops rotated");
namespace {
@@ -38,13 +43,17 @@ namespace {
class LoopRotate : public LoopPass {
public:
static char ID; // Pass ID, replacement for typeid
- LoopRotate() : LoopPass(ID) {
+ LoopRotate(int SpecifiedMaxHeaderSize = -1) : LoopPass(ID) {
initializeLoopRotatePass(*PassRegistry::getPassRegistry());
+ if (SpecifiedMaxHeaderSize == -1)
+ MaxHeaderSize = DefaultRotationThreshold;
+ else
+ MaxHeaderSize = unsigned(SpecifiedMaxHeaderSize);
}
// LCSSA form makes instruction renaming easier.
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addPreserved<DominatorTree>();
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfo>();
AU.addPreserved<LoopInfo>();
AU.addRequiredID(LoopSimplifyID);
@@ -55,11 +64,12 @@ namespace {
AU.addRequired<TargetTransformInfo>();
}
- bool runOnLoop(Loop *L, LPPassManager &LPM);
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override;
bool simplifyLoopLatch(Loop *L);
bool rotateLoop(Loop *L, bool SimplifiedLatch);
private:
+ unsigned MaxHeaderSize;
LoopInfo *LI;
const TargetTransformInfo *TTI;
};
@@ -73,11 +83,19 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
INITIALIZE_PASS_END(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
-Pass *llvm::createLoopRotatePass() { return new LoopRotate(); }
+Pass *llvm::createLoopRotatePass(int MaxHeaderSize) {
+ return new LoopRotate(MaxHeaderSize);
+}
/// Rotate Loop L as many times as possible. Return true if
/// the loop is rotated at least once.
bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) {
+ if (skipOptnoneFunction(L))
+ return false;
+
+ // Save the loop metadata.
+ MDNode *LoopMD = L->getLoopID();
+
LI = &getAnalysis<LoopInfo>();
TTI = &getAnalysis<TargetTransformInfo>();
@@ -92,6 +110,12 @@ bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) {
MadeChange = true;
SimplifiedLatch = false;
}
+
+ // Restore the loop metadata.
+ // NB! We presume LoopRotation DOESN'T ADD its own metadata.
+ if ((MadeChange || SimplifiedLatch) && LoopMD)
+ L->setLoopID(LoopMD);
+
return MadeChange;
}
@@ -130,7 +154,7 @@ static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
for (Value::use_iterator UI = OrigHeaderVal->use_begin(),
UE = OrigHeaderVal->use_end(); UI != UE; ) {
// Grab the use before incrementing the iterator.
- Use &U = UI.getUse();
+ Use &U = *UI;
// Increment the iterator before removing the use from the list.
++UI;
@@ -251,8 +275,9 @@ bool LoopRotate::simplifyLoopLatch(Loop *L) {
// Nuke the Latch block.
assert(Latch->empty() && "unable to evacuate Latch");
LI->removeBlock(Latch);
- if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>())
- DT->eraseNode(Latch);
+ if (DominatorTreeWrapperPass *DTWP =
+ getAnalysisIfAvailable<DominatorTreeWrapperPass>())
+ DTWP->getDomTree().eraseNode(Latch);
Latch->eraseFromParent();
return true;
}
@@ -276,7 +301,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
BasicBlock *OrigLatch = L->getLoopLatch();
BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
- if (BI == 0 || BI->isUnconditional())
+ if (!BI || BI->isUnconditional())
return false;
// If the loop header is not one of the loop exiting blocks then
@@ -287,7 +312,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// If the loop latch already contains a branch that leaves the loop then the
// loop is already rotated.
- if (OrigLatch == 0)
+ if (!OrigLatch)
return false;
// Rotate if either the loop latch does *not* exit the loop, or if the loop
@@ -301,11 +326,11 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
CodeMetrics Metrics;
Metrics.analyzeBasicBlock(OrigHeader, *TTI);
if (Metrics.notDuplicatable) {
- DEBUG(dbgs() << "LoopRotation: NOT rotating - contains non duplicatable"
+ DEBUG(dbgs() << "LoopRotation: NOT rotating - contains non-duplicatable"
<< " instructions: "; L->dump());
return false;
}
- if (Metrics.NumInsts > MAX_HEADER_SIZE)
+ if (Metrics.NumInsts > MaxHeaderSize)
return false;
}
@@ -314,7 +339,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// If the loop could not be converted to canonical form, it must have an
// indirectbr in it, just give up.
- if (OrigPreheader == 0)
+ if (!OrigPreheader)
return false;
// Anything ScalarEvolution may know about this loop or the PHI nodes
@@ -433,23 +458,25 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// The conditional branch can't be folded, handle the general case.
// Update DominatorTree to reflect the CFG change we just made. Then split
// edges as necessary to preserve LoopSimplify form.
- if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) {
+ if (DominatorTreeWrapperPass *DTWP =
+ getAnalysisIfAvailable<DominatorTreeWrapperPass>()) {
+ DominatorTree &DT = DTWP->getDomTree();
// Everything that was dominated by the old loop header is now dominated
// by the original loop preheader. Conceptually the header was merged
// into the preheader, even though we reuse the actual block as a new
// loop latch.
- DomTreeNode *OrigHeaderNode = DT->getNode(OrigHeader);
+ DomTreeNode *OrigHeaderNode = DT.getNode(OrigHeader);
SmallVector<DomTreeNode *, 8> HeaderChildren(OrigHeaderNode->begin(),
OrigHeaderNode->end());
- DomTreeNode *OrigPreheaderNode = DT->getNode(OrigPreheader);
+ DomTreeNode *OrigPreheaderNode = DT.getNode(OrigPreheader);
for (unsigned I = 0, E = HeaderChildren.size(); I != E; ++I)
- DT->changeImmediateDominator(HeaderChildren[I], OrigPreheaderNode);
+ DT.changeImmediateDominator(HeaderChildren[I], OrigPreheaderNode);
- assert(DT->getNode(Exit)->getIDom() == OrigPreheaderNode);
- assert(DT->getNode(NewHeader)->getIDom() == OrigPreheaderNode);
+ assert(DT.getNode(Exit)->getIDom() == OrigPreheaderNode);
+ assert(DT.getNode(NewHeader)->getIDom() == OrigPreheaderNode);
// Update OrigHeader to be dominated by the new header block.
- DT->changeImmediateDominator(OrigHeader, OrigLatch);
+ DT.changeImmediateDominator(OrigHeader, OrigLatch);
}
// Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and
@@ -459,9 +486,24 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
NewPH->setName(NewHeader->getName() + ".lr.ph");
// Preserve canonical loop form, which means that 'Exit' should have only
- // one predecessor.
- BasicBlock *ExitSplit = SplitCriticalEdge(L->getLoopLatch(), Exit, this);
- ExitSplit->moveBefore(Exit);
+ // one predecessor. Note that Exit could be an exit block for multiple
+ // nested loops, causing both of the edges to now be critical and need to
+ // be split.
+ SmallVector<BasicBlock *, 4> ExitPreds(pred_begin(Exit), pred_end(Exit));
+ bool SplitLatchEdge = false;
+ for (SmallVectorImpl<BasicBlock *>::iterator PI = ExitPreds.begin(),
+ PE = ExitPreds.end();
+ PI != PE; ++PI) {
+ // We only need to split loop exit edges.
+ Loop *PredLoop = LI->getLoopFor(*PI);
+ if (!PredLoop || PredLoop->contains(Exit))
+ continue;
+ SplitLatchEdge |= L->getLoopLatch() == *PI;
+ BasicBlock *ExitSplit = SplitCriticalEdge(*PI, Exit, this);
+ ExitSplit->moveBefore(Exit);
+ }
+ assert(SplitLatchEdge &&
+ "Despite splitting all preds, failed to split latch exit?");
} else {
// We can fold the conditional branch in the preheader, this makes things
// simpler. The first step is to remove the extra edge to the Exit block.
@@ -471,15 +513,17 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
PHBI->eraseFromParent();
// With our CFG finalized, update DomTree if it is available.
- if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) {
+ if (DominatorTreeWrapperPass *DTWP =
+ getAnalysisIfAvailable<DominatorTreeWrapperPass>()) {
+ DominatorTree &DT = DTWP->getDomTree();
// Update OrigHeader to be dominated by the new header block.
- DT->changeImmediateDominator(NewHeader, OrigPreheader);
- DT->changeImmediateDominator(OrigHeader, OrigLatch);
+ DT.changeImmediateDominator(NewHeader, OrigPreheader);
+ DT.changeImmediateDominator(OrigHeader, OrigLatch);
// Brute force incremental dominator tree update. Call
// findNearestCommonDominator on all CFG predecessors of each child of the
// original header.
- DomTreeNode *OrigHeaderNode = DT->getNode(OrigHeader);
+ DomTreeNode *OrigHeaderNode = DT.getNode(OrigHeader);
SmallVector<DomTreeNode *, 8> HeaderChildren(OrigHeaderNode->begin(),
OrigHeaderNode->end());
bool Changed;
@@ -492,11 +536,11 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
pred_iterator PI = pred_begin(BB);
BasicBlock *NearestDom = *PI;
for (pred_iterator PE = pred_end(BB); PI != PE; ++PI)
- NearestDom = DT->findNearestCommonDominator(NearestDom, *PI);
+ NearestDom = DT.findNearestCommonDominator(NearestDom, *PI);
// Remember if this changes the DomTree.
if (Node->getIDom()->getBlock() != NearestDom) {
- DT->changeImmediateDominator(BB, NearestDom);
+ DT.changeImmediateDominator(BB, NearestDom);
Changed = true;
}
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 6133962e42d7..914b56aa8167 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -53,31 +53,32 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loop-reduce"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallBitVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/IVUsers.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Assembly/Writer.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
using namespace llvm;
+#define DEBUG_TYPE "loop-reduce"
+
/// MaxIVUsers is an arbitrary threshold that provides an early opportunitiy for
/// bail out. This threshold is far beyond the number of users that LSR can
/// conceivably solve, so it should not affect generated code, but catches the
@@ -237,7 +238,15 @@ struct Formula {
int64_t Scale;
/// BaseRegs - The list of "base" registers for this use. When this is
- /// non-empty,
+ /// non-empty. The canonical representation of a formula is
+ /// 1. BaseRegs.size > 1 implies ScaledReg != NULL and
+ /// 2. ScaledReg != NULL implies Scale != 1 || !BaseRegs.empty().
+ /// #1 enforces that the scaled register is always used when at least two
+ /// registers are needed by the formula: e.g., reg1 + reg2 is reg1 + 1 * reg2.
+ /// #2 enforces that 1 * reg is reg.
+ /// This invariant can be temporarly broken while building a formula.
+ /// However, every formula inserted into the LSRInstance must be in canonical
+ /// form.
SmallVector<const SCEV *, 4> BaseRegs;
/// ScaledReg - The 'scaled' register for this use. This should be non-null
@@ -250,12 +259,18 @@ struct Formula {
int64_t UnfoldedOffset;
Formula()
- : BaseGV(0), BaseOffset(0), HasBaseReg(false), Scale(0), ScaledReg(0),
- UnfoldedOffset(0) {}
+ : BaseGV(nullptr), BaseOffset(0), HasBaseReg(false), Scale(0),
+ ScaledReg(nullptr), UnfoldedOffset(0) {}
void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
- unsigned getNumRegs() const;
+ bool isCanonical() const;
+
+ void Canonicalize();
+
+ bool Unscale();
+
+ size_t getNumRegs() const;
Type *getType() const;
void DeleteBaseReg(const SCEV *&S);
@@ -345,12 +360,58 @@ void Formula::InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
BaseRegs.push_back(Sum);
HasBaseReg = true;
}
+ Canonicalize();
+}
+
+/// \brief Check whether or not this formula statisfies the canonical
+/// representation.
+/// \see Formula::BaseRegs.
+bool Formula::isCanonical() const {
+ if (ScaledReg)
+ return Scale != 1 || !BaseRegs.empty();
+ return BaseRegs.size() <= 1;
+}
+
+/// \brief Helper method to morph a formula into its canonical representation.
+/// \see Formula::BaseRegs.
+/// Every formula having more than one base register, must use the ScaledReg
+/// field. Otherwise, we would have to do special cases everywhere in LSR
+/// to treat reg1 + reg2 + ... the same way as reg1 + 1*reg2 + ...
+/// On the other hand, 1*reg should be canonicalized into reg.
+void Formula::Canonicalize() {
+ if (isCanonical())
+ return;
+ // So far we did not need this case. This is easy to implement but it is
+ // useless to maintain dead code. Beside it could hurt compile time.
+ assert(!BaseRegs.empty() && "1*reg => reg, should not be needed.");
+ // Keep the invariant sum in BaseRegs and one of the variant sum in ScaledReg.
+ ScaledReg = BaseRegs.back();
+ BaseRegs.pop_back();
+ Scale = 1;
+ size_t BaseRegsSize = BaseRegs.size();
+ size_t Try = 0;
+ // If ScaledReg is an invariant, try to find a variant expression.
+ while (Try < BaseRegsSize && !isa<SCEVAddRecExpr>(ScaledReg))
+ std::swap(ScaledReg, BaseRegs[Try++]);
+}
+
+/// \brief Get rid of the scale in the formula.
+/// In other words, this method morphes reg1 + 1*reg2 into reg1 + reg2.
+/// \return true if it was possible to get rid of the scale, false otherwise.
+/// \note After this operation the formula may not be in the canonical form.
+bool Formula::Unscale() {
+ if (Scale != 1)
+ return false;
+ Scale = 0;
+ BaseRegs.push_back(ScaledReg);
+ ScaledReg = nullptr;
+ return true;
}
/// getNumRegs - Return the total number of register operands used by this
/// formula. This does not include register uses implied by non-constant
/// addrec strides.
-unsigned Formula::getNumRegs() const {
+size_t Formula::getNumRegs() const {
return !!ScaledReg + BaseRegs.size();
}
@@ -360,7 +421,7 @@ Type *Formula::getType() const {
return !BaseRegs.empty() ? BaseRegs.front()->getType() :
ScaledReg ? ScaledReg->getType() :
BaseGV ? BaseGV->getType() :
- 0;
+ nullptr;
}
/// DeleteBaseReg - Delete the given base reg from the BaseRegs list.
@@ -394,7 +455,7 @@ void Formula::print(raw_ostream &OS) const {
bool First = true;
if (BaseGV) {
if (!First) OS << " + "; else First = false;
- WriteAsOperand(OS, BaseGV, /*PrintType=*/false);
+ BaseGV->printAsOperand(OS, /*PrintType=*/false);
}
if (BaseOffset != 0) {
if (!First) OS << " + "; else First = false;
@@ -422,7 +483,7 @@ void Formula::print(raw_ostream &OS) const {
OS << ')';
}
if (UnfoldedOffset != 0) {
- if (!First) OS << " + "; else First = false;
+ if (!First) OS << " + ";
OS << "imm(" << UnfoldedOffset << ')';
}
}
@@ -487,11 +548,11 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
// Check for a division of a constant by a constant.
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) {
if (!RC)
- return 0;
+ return nullptr;
const APInt &LA = C->getValue()->getValue();
const APInt &RA = RC->getValue()->getValue();
if (LA.srem(RA) != 0)
- return 0;
+ return nullptr;
return SE.getConstant(LA.sdiv(RA));
}
@@ -500,16 +561,16 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
if (IgnoreSignificantBits || isAddRecSExtable(AR, SE)) {
const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE,
IgnoreSignificantBits);
- if (!Step) return 0;
+ if (!Step) return nullptr;
const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,
IgnoreSignificantBits);
- if (!Start) return 0;
+ if (!Start) return nullptr;
// FlagNW is independent of the start value, step direction, and is
// preserved with smaller magnitude steps.
// FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
return SE.getAddRecExpr(Start, Step, AR->getLoop(), SCEV::FlagAnyWrap);
}
- return 0;
+ return nullptr;
}
// Distribute the sdiv over add operands, if the add doesn't overflow.
@@ -520,12 +581,12 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
I != E; ++I) {
const SCEV *Op = getExactSDiv(*I, RHS, SE,
IgnoreSignificantBits);
- if (!Op) return 0;
+ if (!Op) return nullptr;
Ops.push_back(Op);
}
return SE.getAddExpr(Ops);
}
- return 0;
+ return nullptr;
}
// Check for a multiply operand that we can pull RHS out of.
@@ -544,13 +605,13 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
}
Ops.push_back(S);
}
- return Found ? SE.getMulExpr(Ops) : 0;
+ return Found ? SE.getMulExpr(Ops) : nullptr;
}
- return 0;
+ return nullptr;
}
// Otherwise we don't know.
- return 0;
+ return nullptr;
}
/// ExtractImmediate - If S involves the addition of a constant integer value,
@@ -604,7 +665,7 @@ static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
SCEV::FlagAnyWrap);
return Result;
}
- return 0;
+ return nullptr;
}
/// isAddressUse - Returns true if the specified instruction is using the
@@ -723,13 +784,12 @@ static bool isHighCostExpansion(const SCEV *S,
// multiplication already generates this expression.
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Mul->getOperand(1))) {
Value *UVal = U->getValue();
- for (Value::use_iterator UI = UVal->use_begin(), UE = UVal->use_end();
- UI != UE; ++UI) {
+ for (User *UR : UVal->users()) {
// If U is a constant, it may be used by a ConstantExpr.
- Instruction *User = dyn_cast<Instruction>(*UI);
- if (User && User->getOpcode() == Instruction::Mul
- && SE.isSCEVable(User->getType())) {
- return SE.getSCEV(User) == Mul;
+ Instruction *UI = dyn_cast<Instruction>(UR);
+ if (UI && UI->getOpcode() == Instruction::Mul &&
+ SE.isSCEVable(UI->getType())) {
+ return SE.getSCEV(UI) == Mul;
}
}
}
@@ -756,12 +816,12 @@ DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
Value *V = DeadInsts.pop_back_val();
Instruction *I = dyn_cast_or_null<Instruction>(V);
- if (I == 0 || !isInstructionTriviallyDead(I))
+ if (!I || !isInstructionTriviallyDead(I))
continue;
for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
if (Instruction *U = dyn_cast<Instruction>(*OI)) {
- *OI = 0;
+ *OI = nullptr;
if (U->use_empty())
DeadInsts.push_back(U);
}
@@ -776,9 +836,18 @@ DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
namespace {
class LSRUse;
}
-// Check if it is legal to fold 2 base registers.
-static bool isLegal2RegAMUse(const TargetTransformInfo &TTI, const LSRUse &LU,
- const Formula &F);
+
+/// \brief Check if the addressing mode defined by \p F is completely
+/// folded in \p LU at isel time.
+/// This includes address-mode folding and special icmp tricks.
+/// This function returns true if \p LU can accommodate what \p F
+/// defines and up to 1 base + 1 scaled + offset.
+/// In other words, if \p F has several base registers, this function may
+/// still return true. Therefore, users still need to account for
+/// additional base registers and/or unfolded offsets to derive an
+/// accurate cost model.
+static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
+ const LSRUse &LU, const Formula &F);
// Get the cost of the scaling factor used in F for LU.
static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
const LSRUse &LU, const Formula &F);
@@ -804,7 +873,7 @@ public:
bool operator<(const Cost &Other) const;
- void Loose();
+ void Lose();
#ifndef NDEBUG
// Once any of the metrics loses, they must all remain losers.
@@ -829,7 +898,7 @@ public:
const SmallVectorImpl<int64_t> &Offsets,
ScalarEvolution &SE, DominatorTree &DT,
const LSRUse &LU,
- SmallPtrSet<const SCEV *, 16> *LoserRegs = 0);
+ SmallPtrSet<const SCEV *, 16> *LoserRegs = nullptr);
void print(raw_ostream &OS) const;
void dump() const;
@@ -864,7 +933,7 @@ void Cost::RateRegister(const SCEV *Reg,
return;
// Otherwise, do not consider this formula at all.
- Loose();
+ Lose();
return;
}
AddRecCost += 1; /// TODO: This should be a function of the stride.
@@ -903,7 +972,7 @@ void Cost::RatePrimaryRegister(const SCEV *Reg,
ScalarEvolution &SE, DominatorTree &DT,
SmallPtrSet<const SCEV *, 16> *LoserRegs) {
if (LoserRegs && LoserRegs->count(Reg)) {
- Loose();
+ Lose();
return;
}
if (Regs.insert(Reg)) {
@@ -922,10 +991,11 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
ScalarEvolution &SE, DominatorTree &DT,
const LSRUse &LU,
SmallPtrSet<const SCEV *, 16> *LoserRegs) {
+ assert(F.isCanonical() && "Cost is accurate only for canonical formula");
// Tally up the registers.
if (const SCEV *ScaledReg = F.ScaledReg) {
if (VisitedRegs.count(ScaledReg)) {
- Loose();
+ Lose();
return;
}
RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs);
@@ -936,7 +1006,7 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
E = F.BaseRegs.end(); I != E; ++I) {
const SCEV *BaseReg = *I;
if (VisitedRegs.count(BaseReg)) {
- Loose();
+ Lose();
return;
}
RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs);
@@ -945,11 +1015,13 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
}
// Determine how many (unfolded) adds we'll need inside the loop.
- size_t NumBaseParts = F.BaseRegs.size() + (F.UnfoldedOffset != 0);
+ size_t NumBaseParts = F.getNumRegs();
if (NumBaseParts > 1)
// Do not count the base and a possible second register if the target
// allows to fold 2 registers.
- NumBaseAdds += NumBaseParts - (1 + isLegal2RegAMUse(TTI, LU, F));
+ NumBaseAdds +=
+ NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(TTI, LU, F)));
+ NumBaseAdds += (F.UnfoldedOffset != 0);
// Accumulate non-free scaling amounts.
ScaleCost += getScalingFactorCost(TTI, LU, F);
@@ -967,8 +1039,8 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
assert(isValid() && "invalid cost");
}
-/// Loose - Set this cost to a losing value.
-void Cost::Loose() {
+/// Lose - Set this cost to a losing value.
+void Cost::Lose() {
NumRegs = ~0u;
AddRecCost = ~0u;
NumIVMuls = ~0u;
@@ -980,21 +1052,11 @@ void Cost::Loose() {
/// operator< - Choose the lower cost.
bool Cost::operator<(const Cost &Other) const {
- if (NumRegs != Other.NumRegs)
- return NumRegs < Other.NumRegs;
- if (AddRecCost != Other.AddRecCost)
- return AddRecCost < Other.AddRecCost;
- if (NumIVMuls != Other.NumIVMuls)
- return NumIVMuls < Other.NumIVMuls;
- if (NumBaseAdds != Other.NumBaseAdds)
- return NumBaseAdds < Other.NumBaseAdds;
- if (ScaleCost != Other.ScaleCost)
- return ScaleCost < Other.ScaleCost;
- if (ImmCost != Other.ImmCost)
- return ImmCost < Other.ImmCost;
- if (SetupCost != Other.SetupCost)
- return SetupCost < Other.SetupCost;
- return false;
+ return std::tie(NumRegs, AddRecCost, NumIVMuls, NumBaseAdds, ScaleCost,
+ ImmCost, SetupCost) <
+ std::tie(Other.NumRegs, Other.AddRecCost, Other.NumIVMuls,
+ Other.NumBaseAdds, Other.ScaleCost, Other.ImmCost,
+ Other.SetupCost);
}
void Cost::print(raw_ostream &OS) const {
@@ -1058,7 +1120,8 @@ struct LSRFixup {
}
LSRFixup::LSRFixup()
- : UserInst(0), OperandValToReplace(0), LUIdx(~size_t(0)), Offset(0) {}
+ : UserInst(nullptr), OperandValToReplace(nullptr), LUIdx(~size_t(0)),
+ Offset(0) {}
/// isUseFullyOutsideLoop - Test whether this fixup always uses its
/// value outside of the given loop.
@@ -1080,19 +1143,19 @@ void LSRFixup::print(raw_ostream &OS) const {
// Store is common and interesting enough to be worth special-casing.
if (StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {
OS << "store ";
- WriteAsOperand(OS, Store->getOperand(0), /*PrintType=*/false);
+ Store->getOperand(0)->printAsOperand(OS, /*PrintType=*/false);
} else if (UserInst->getType()->isVoidTy())
OS << UserInst->getOpcodeName();
else
- WriteAsOperand(OS, UserInst, /*PrintType=*/false);
+ UserInst->printAsOperand(OS, /*PrintType=*/false);
OS << ", OperandValToReplace=";
- WriteAsOperand(OS, OperandValToReplace, /*PrintType=*/false);
+ OperandValToReplace->printAsOperand(OS, /*PrintType=*/false);
for (PostIncLoopSet::const_iterator I = PostIncLoops.begin(),
E = PostIncLoops.end(); I != E; ++I) {
OS << ", PostIncLoop=";
- WriteAsOperand(OS, (*I)->getHeader(), /*PrintType=*/false);
+ (*I)->getHeader()->printAsOperand(OS, /*PrintType=*/false);
}
if (LUIdx != ~size_t(0))
@@ -1126,11 +1189,7 @@ struct UniquifierDenseMapInfo {
}
static unsigned getHashValue(const SmallVector<const SCEV *, 4> &V) {
- unsigned Result = 0;
- for (SmallVectorImpl<const SCEV *>::const_iterator I = V.begin(),
- E = V.end(); I != E; ++I)
- Result ^= DenseMapInfo<const SCEV *>::getHashValue(*I);
- return Result;
+ return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
}
static bool isEqual(const SmallVector<const SCEV *, 4> &LHS,
@@ -1158,6 +1217,8 @@ public:
// TODO: Add a generic icmp too?
};
+ typedef PointerIntPair<const SCEV *, 2, KindType> SCEVUseKindPair;
+
KindType Kind;
Type *AccessTy;
@@ -1196,7 +1257,7 @@ public:
MaxOffset(INT64_MIN),
AllFixupsOutsideLoop(true),
RigidFormula(false),
- WidestFixupType(0) {}
+ WidestFixupType(nullptr) {}
bool HasFormulaWithSameRegs(const Formula &F) const;
bool InsertFormula(const Formula &F);
@@ -1221,7 +1282,10 @@ bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
/// InsertFormula - If the given formula has not yet been inserted, add it to
/// the list, and return true. Return false otherwise.
+/// The formula must be in canonical form.
bool LSRUse::InsertFormula(const Formula &F) {
+ assert(F.isCanonical() && "Invalid canonical representation");
+
if (!Formulae.empty() && RigidFormula)
return false;
@@ -1247,6 +1311,8 @@ bool LSRUse::InsertFormula(const Formula &F) {
// Record registers now being used by this use.
Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
+ if (F.ScaledReg)
+ Regs.insert(F.ScaledReg);
return true;
}
@@ -1295,7 +1361,7 @@ void LSRUse::print(raw_ostream &OS) const {
for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
E = Offsets.end(); I != E; ++I) {
OS << *I;
- if (llvm::next(I) != E)
+ if (std::next(I) != E)
OS << ',';
}
OS << '}';
@@ -1313,12 +1379,10 @@ void LSRUse::dump() const {
}
#endif
-/// isLegalUse - Test whether the use described by AM is "legal", meaning it can
-/// be completely folded into the user instruction at isel time. This includes
-/// address-mode folding and special icmp tricks.
-static bool isLegalUse(const TargetTransformInfo &TTI, LSRUse::KindType Kind,
- Type *AccessTy, GlobalValue *BaseGV, int64_t BaseOffset,
- bool HasBaseReg, int64_t Scale) {
+static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
+ LSRUse::KindType Kind, Type *AccessTy,
+ GlobalValue *BaseGV, int64_t BaseOffset,
+ bool HasBaseReg, int64_t Scale) {
switch (Kind) {
case LSRUse::Address:
return TTI.isLegalAddressingMode(AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
@@ -1369,10 +1433,11 @@ static bool isLegalUse(const TargetTransformInfo &TTI, LSRUse::KindType Kind,
llvm_unreachable("Invalid LSRUse Kind!");
}
-static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
- int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
- GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg,
- int64_t Scale) {
+static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
+ int64_t MinOffset, int64_t MaxOffset,
+ LSRUse::KindType Kind, Type *AccessTy,
+ GlobalValue *BaseGV, int64_t BaseOffset,
+ bool HasBaseReg, int64_t Scale) {
// Check for overflow.
if (((int64_t)((uint64_t)BaseOffset + MinOffset) > BaseOffset) !=
(MinOffset > 0))
@@ -1383,9 +1448,41 @@ static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
return false;
MaxOffset = (uint64_t)BaseOffset + MaxOffset;
- return isLegalUse(TTI, Kind, AccessTy, BaseGV, MinOffset, HasBaseReg,
- Scale) &&
- isLegalUse(TTI, Kind, AccessTy, BaseGV, MaxOffset, HasBaseReg, Scale);
+ return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MinOffset,
+ HasBaseReg, Scale) &&
+ isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MaxOffset,
+ HasBaseReg, Scale);
+}
+
+static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
+ int64_t MinOffset, int64_t MaxOffset,
+ LSRUse::KindType Kind, Type *AccessTy,
+ const Formula &F) {
+ // For the purpose of isAMCompletelyFolded either having a canonical formula
+ // or a scale not equal to zero is correct.
+ // Problems may arise from non canonical formulae having a scale == 0.
+ // Strictly speaking it would best to just rely on canonical formulae.
+ // However, when we generate the scaled formulae, we first check that the
+ // scaling factor is profitable before computing the actual ScaledReg for
+ // compile time sake.
+ assert((F.isCanonical() || F.Scale != 0));
+ return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
+ F.BaseGV, F.BaseOffset, F.HasBaseReg, F.Scale);
+}
+
+/// isLegalUse - Test whether we know how to expand the current formula.
+static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
+ int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
+ GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg,
+ int64_t Scale) {
+ // We know how to expand completely foldable formulae.
+ return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
+ BaseOffset, HasBaseReg, Scale) ||
+ // Or formulae that use a base register produced by a sum of base
+ // registers.
+ (Scale == 1 &&
+ isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
+ BaseGV, BaseOffset, true, 0));
}
static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
@@ -1395,36 +1492,23 @@ static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
F.BaseOffset, F.HasBaseReg, F.Scale);
}
-static bool isLegal2RegAMUse(const TargetTransformInfo &TTI, const LSRUse &LU,
- const Formula &F) {
- // If F is used as an Addressing Mode, it may fold one Base plus one
- // scaled register. If the scaled register is nil, do as if another
- // element of the base regs is a 1-scaled register.
- // This is possible if BaseRegs has at least 2 registers.
-
- // If this is not an address calculation, this is not an addressing mode
- // use.
- if (LU.Kind != LSRUse::Address)
- return false;
-
- // F is already scaled.
- if (F.Scale != 0)
- return false;
-
- // We need to keep one register for the base and one to scale.
- if (F.BaseRegs.size() < 2)
- return false;
-
- return isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
- F.BaseGV, F.BaseOffset, F.HasBaseReg, 1);
- }
+static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
+ const LSRUse &LU, const Formula &F) {
+ return isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
+ LU.AccessTy, F.BaseGV, F.BaseOffset, F.HasBaseReg,
+ F.Scale);
+}
static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
const LSRUse &LU, const Formula &F) {
if (!F.Scale)
return 0;
- assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
- LU.AccessTy, F) && "Illegal formula in use.");
+
+ // If the use is not completely folded in that instruction, we will have to
+ // pay an extra cost only for scale != 1.
+ if (!isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
+ LU.AccessTy, F))
+ return F.Scale != 1;
switch (LU.Kind) {
case LSRUse::Address: {
@@ -1443,12 +1527,10 @@ static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
}
case LSRUse::ICmpZero:
- // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg.
- // Therefore, return 0 in case F.Scale == -1.
- return F.Scale != -1;
-
case LSRUse::Basic:
case LSRUse::Special:
+ // The use is completely folded, i.e., everything is folded into the
+ // instruction.
return 0;
}
@@ -1473,7 +1555,8 @@ static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
HasBaseReg = true;
}
- return isLegalUse(TTI, Kind, AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
+ return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, BaseOffset,
+ HasBaseReg, Scale);
}
static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
@@ -1498,36 +1581,12 @@ static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
// base and a scale.
int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
- return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
- BaseOffset, HasBaseReg, Scale);
+ return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
+ BaseOffset, HasBaseReg, Scale);
}
namespace {
-/// UseMapDenseMapInfo - A DenseMapInfo implementation for holding
-/// DenseMaps and DenseSets of pairs of const SCEV* and LSRUse::Kind.
-struct UseMapDenseMapInfo {
- static std::pair<const SCEV *, LSRUse::KindType> getEmptyKey() {
- return std::make_pair(reinterpret_cast<const SCEV *>(-1), LSRUse::Basic);
- }
-
- static std::pair<const SCEV *, LSRUse::KindType> getTombstoneKey() {
- return std::make_pair(reinterpret_cast<const SCEV *>(-2), LSRUse::Basic);
- }
-
- static unsigned
- getHashValue(const std::pair<const SCEV *, LSRUse::KindType> &V) {
- unsigned Result = DenseMapInfo<const SCEV *>::getHashValue(V.first);
- Result ^= DenseMapInfo<unsigned>::getHashValue(unsigned(V.second));
- return Result;
- }
-
- static bool isEqual(const std::pair<const SCEV *, LSRUse::KindType> &LHS,
- const std::pair<const SCEV *, LSRUse::KindType> &RHS) {
- return LHS == RHS;
- }
-};
-
/// IVInc - An individual increment in a Chain of IV increments.
/// Relate an IV user to an expression that computes the IV it uses from the IV
/// used by the previous link in the Chain.
@@ -1552,7 +1611,7 @@ struct IVChain {
SmallVector<IVInc,1> Incs;
const SCEV *ExprBase;
- IVChain() : ExprBase(0) {}
+ IVChain() : ExprBase(nullptr) {}
IVChain(const IVInc &Head, const SCEV *Base)
: Incs(1, Head), ExprBase(Base) {}
@@ -1562,7 +1621,7 @@ struct IVChain {
// begin - return the first increment in the chain.
const_iterator begin() const {
assert(!Incs.empty());
- return llvm::next(Incs.begin());
+ return std::next(Incs.begin());
}
const_iterator end() const {
return Incs.end();
@@ -1656,9 +1715,7 @@ class LSRInstance {
}
// Support for sharing of LSRUses between LSRFixups.
- typedef DenseMap<std::pair<const SCEV *, LSRUse::KindType>,
- size_t,
- UseMapDenseMapInfo> UseMapTy;
+ typedef DenseMap<LSRUse::SCEVUseKindPair, size_t> UseMapTy;
UseMapTy UseMap;
bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
@@ -1681,8 +1738,19 @@ class LSRInstance {
void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base,
unsigned Depth = 0);
+
+ void GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
+ const Formula &Base, unsigned Depth,
+ size_t Idx, bool IsScaledReg = false);
void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base);
+ void GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
+ const Formula &Base, size_t Idx,
+ bool IsScaledReg = false);
void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
+ void GenerateConstantOffsetsImpl(LSRUse &LU, unsigned LUIdx,
+ const Formula &Base,
+ const SmallVectorImpl<int64_t> &Worklist,
+ size_t Idx, bool IsScaledReg = false);
void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base);
void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base);
@@ -1760,7 +1828,7 @@ void LSRInstance::OptimizeShadowIV() {
IVUsers::const_iterator CandidateUI = UI;
++UI;
Instruction *ShadowUse = CandidateUI->getUser();
- Type *DestTy = 0;
+ Type *DestTy = nullptr;
bool IsSigned = false;
/* If shadow use is a int->float cast then insert a second IV
@@ -1822,7 +1890,7 @@ void LSRInstance::OptimizeShadowIV() {
continue;
/* Initialize new IV, double d = 0.0 in above example. */
- ConstantInt *C = 0;
+ ConstantInt *C = nullptr;
if (Incr->getOperand(0) == PH)
C = dyn_cast<ConstantInt>(Incr->getOperand(1));
else if (Incr->getOperand(1) == PH)
@@ -1944,7 +2012,7 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
// for ICMP_ULE here because the comparison would be with zero, which
// isn't interesting.
CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
- const SCEVNAryExpr *Max = 0;
+ const SCEVNAryExpr *Max = nullptr;
if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
Pred = ICmpInst::ICMP_SLE;
Max = S;
@@ -1987,7 +2055,7 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
// Check the right operand of the select, and remember it, as it will
// be used in the new comparison instruction.
- Value *NewRHS = 0;
+ Value *NewRHS = nullptr;
if (ICmpInst::isTrueWhenEqual(Pred)) {
// Look for n+1, and grab n.
if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1)))
@@ -2057,7 +2125,7 @@ LSRInstance::OptimizeLoopTermCond() {
continue;
// Search IVUsesByStride to find Cond's IVUse if there is one.
- IVStrideUse *CondUse = 0;
+ IVStrideUse *CondUse = nullptr;
ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
if (!FindIVUserForCond(Cond, CondUse))
continue;
@@ -2110,12 +2178,12 @@ LSRInstance::OptimizeLoopTermCond() {
// Check for possible scaled-address reuse.
Type *AccessTy = getAccessType(UI->getUser());
int64_t Scale = C->getSExtValue();
- if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ 0,
+ if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ nullptr,
/*BaseOffset=*/ 0,
/*HasBaseReg=*/ false, Scale))
goto decline_post_inc;
Scale = -Scale;
- if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ 0,
+ if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ nullptr,
/*BaseOffset=*/ 0,
/*HasBaseReg=*/ false, Scale))
goto decline_post_inc;
@@ -2185,23 +2253,25 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
// the uses will have all its uses outside the loop, for example.
if (LU.Kind != Kind)
return false;
+
+ // Check for a mismatched access type, and fall back conservatively as needed.
+ // TODO: Be less conservative when the type is similar and can use the same
+ // addressing modes.
+ if (Kind == LSRUse::Address && AccessTy != LU.AccessTy)
+ NewAccessTy = Type::getVoidTy(AccessTy->getContext());
+
// Conservatively assume HasBaseReg is true for now.
if (NewOffset < LU.MinOffset) {
- if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
+ if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr,
LU.MaxOffset - NewOffset, HasBaseReg))
return false;
NewMinOffset = NewOffset;
} else if (NewOffset > LU.MaxOffset) {
- if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
+ if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr,
NewOffset - LU.MinOffset, HasBaseReg))
return false;
NewMaxOffset = NewOffset;
}
- // Check for a mismatched access type, and fall back conservatively as needed.
- // TODO: Be less conservative when the type is similar and can use the same
- // addressing modes.
- if (Kind == LSRUse::Address && AccessTy != LU.AccessTy)
- NewAccessTy = Type::getVoidTy(AccessTy->getContext());
// Update the use.
LU.MinOffset = NewMinOffset;
@@ -2222,14 +2292,14 @@ LSRInstance::getUse(const SCEV *&Expr,
int64_t Offset = ExtractImmediate(Expr, SE);
// Basic uses can't accept any offset, for example.
- if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
+ if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ nullptr,
Offset, /*HasBaseReg=*/ true)) {
Expr = Copy;
Offset = 0;
}
std::pair<UseMapTy::iterator, bool> P =
- UseMap.insert(std::make_pair(std::make_pair(Expr, Kind), 0));
+ UseMap.insert(std::make_pair(LSRUse::SCEVUseKindPair(Expr, Kind), 0));
if (!P.second) {
// A use already existed with this base.
size_t LUIdx = P.first->second;
@@ -2306,7 +2376,7 @@ LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
}
// Nothing looked good.
- return 0;
+ return nullptr;
}
void LSRInstance::CollectInterestingTypesAndFactors() {
@@ -2338,7 +2408,7 @@ void LSRInstance::CollectInterestingTypesAndFactors() {
for (SmallSetVector<const SCEV *, 4>::const_iterator
I = Strides.begin(), E = Strides.end(); I != E; ++I)
for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =
- llvm::next(I); NewStrideIter != E; ++NewStrideIter) {
+ std::next(I); NewStrideIter != E; ++NewStrideIter) {
const SCEV *OldStride = *I;
const SCEV *NewStride = *NewStrideIter;
@@ -2424,7 +2494,7 @@ static const SCEV *getExprBase(const SCEV *S) {
default: // uncluding scUnknown.
return S;
case scConstant:
- return 0;
+ return nullptr;
case scTruncate:
return getExprBase(cast<SCEVTruncateExpr>(S)->getOperand());
case scZeroExtend:
@@ -2515,7 +2585,7 @@ isProfitableChain(IVChain &Chain, SmallPtrSet<Instruction*, 4> &Users,
&& SE.getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
--cost;
}
- const SCEV *LastIncExpr = 0;
+ const SCEV *LastIncExpr = nullptr;
unsigned NumConstIncrements = 0;
unsigned NumVarIncrements = 0;
unsigned NumReusedIncrements = 0;
@@ -2574,7 +2644,7 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
// Visit all existing chains. Check if its IVOper can be computed as a
// profitable loop invariant increment from the last link in the Chain.
unsigned ChainIdx = 0, NChains = IVChainVec.size();
- const SCEV *LastIncExpr = 0;
+ const SCEV *LastIncExpr = nullptr;
for (; ChainIdx < NChains; ++ChainIdx) {
IVChain &Chain = IVChainVec[ChainIdx];
@@ -2646,9 +2716,8 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
// they will eventually be used be the current chain, or can be computed
// from one of the chain increments. To be more precise we could
// transitively follow its user and only add leaf IV users to the set.
- for (Value::use_iterator UseIter = IVOper->use_begin(),
- UseEnd = IVOper->use_end(); UseIter != UseEnd; ++UseIter) {
- Instruction *OtherUse = dyn_cast<Instruction>(*UseIter);
+ for (User *U : IVOper->users()) {
+ Instruction *OtherUse = dyn_cast<Instruction>(U);
if (!OtherUse)
continue;
// Uses in the chain will no longer be uses if the chain is formed.
@@ -2738,7 +2807,7 @@ void LSRInstance::CollectChains() {
Instruction *IVOpInst = cast<Instruction>(*IVOpIter);
if (UniqueOperands.insert(IVOpInst))
ChainInstruction(I, IVOpInst, ChainUsersVec);
- IVOpIter = findIVOperand(llvm::next(IVOpIter), IVOpEnd, L, SE);
+ IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
}
} // Continue walking down the instructions.
} // Continue walking down the domtree.
@@ -2795,7 +2864,7 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
int64_t IncOffset = IncConst->getValue()->getSExtValue();
if (!isAlwaysFoldable(TTI, LSRUse::Address,
- getAccessType(UserInst), /*BaseGV=*/ 0,
+ getAccessType(UserInst), /*BaseGV=*/ nullptr,
IncOffset, /*HaseBaseReg=*/ false))
return false;
@@ -2813,7 +2882,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
// findIVOperand returns IVOpEnd if it can no longer find a valid IV user.
User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(),
IVOpEnd, L, SE);
- Value *IVSrc = 0;
+ Value *IVSrc = nullptr;
while (IVOpIter != IVOpEnd) {
IVSrc = getWideOperand(*IVOpIter);
@@ -2829,7 +2898,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
|| SE.getSCEV(IVSrc) == Head.IncExpr) {
break;
}
- IVOpIter = findIVOperand(llvm::next(IVOpIter), IVOpEnd, L, SE);
+ IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
}
if (IVOpIter == IVOpEnd) {
// Gracefully give up on this chain.
@@ -2840,7 +2909,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
DEBUG(dbgs() << "Generate chain at: " << *IVSrc << "\n");
Type *IVTy = IVSrc->getType();
Type *IntTy = SE.getEffectiveSCEVType(IVTy);
- const SCEV *LeftOverExpr = 0;
+ const SCEV *LeftOverExpr = nullptr;
for (IVChain::const_iterator IncI = Chain.begin(),
IncE = Chain.end(); IncI != IncE; ++IncI) {
@@ -2871,7 +2940,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
TTI)) {
assert(IVTy == IVOper->getType() && "inconsistent IV increment type");
IVSrc = IVOper;
- LeftOverExpr = 0;
+ LeftOverExpr = nullptr;
}
}
Type *OperTy = IncI->IVOperand->getType();
@@ -2926,7 +2995,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
LF.PostIncLoops = UI->getPostIncLoops();
LSRUse::KindType Kind = LSRUse::Basic;
- Type *AccessTy = 0;
+ Type *AccessTy = nullptr;
if (isAddressUse(LF.UserInst, LF.OperandValToReplace)) {
Kind = LSRUse::Address;
AccessTy = getAccessType(LF.UserInst);
@@ -2957,7 +3026,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
if (SE.isLoopInvariant(N, L) && isSafeToExpand(N, SE)) {
// S is normalized, so normalize N before folding it into S
// to keep the result normalized.
- N = TransformForPostIncUse(Normalize, N, CI, 0,
+ N = TransformForPostIncUse(Normalize, N, CI, nullptr,
LF.PostIncLoops, SE, DT);
Kind = LSRUse::ICmpZero;
S = SE.getMinusSCEV(N, S);
@@ -3032,6 +3101,9 @@ void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
/// InsertFormula - If the given formula has not yet been inserted, add it to
/// the list, and return true. Return false otherwise.
bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
+ // Do not insert formula that we will not be able to expand.
+ assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) &&
+ "Formula is illegal");
if (!LU.InsertFormula(F))
return false;
@@ -3059,18 +3131,17 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
Worklist.push_back(D->getLHS());
Worklist.push_back(D->getRHS());
- } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
- if (!Inserted.insert(U)) continue;
- const Value *V = U->getValue();
+ } else if (const SCEVUnknown *US = dyn_cast<SCEVUnknown>(S)) {
+ if (!Inserted.insert(US)) continue;
+ const Value *V = US->getValue();
if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
// Look for instructions defined outside the loop.
if (L->contains(Inst)) continue;
} else if (isa<UndefValue>(V))
// Undef doesn't have a live range, so it doesn't matter.
continue;
- for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
- UI != UE; ++UI) {
- const Instruction *UserInst = dyn_cast<Instruction>(*UI);
+ for (const Use &U : V->uses()) {
+ const Instruction *UserInst = dyn_cast<Instruction>(U.getUser());
// Ignore non-instructions.
if (!UserInst)
continue;
@@ -3082,7 +3153,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
UserInst->getParent() :
cast<PHINode>(UserInst)->getIncomingBlock(
- PHINode::getIncomingValueNumForOperand(UI.getOperandNo()));
+ PHINode::getIncomingValueNumForOperand(U.getOperandNo()));
if (!DT.dominates(L->getHeader(), UseBB))
continue;
// Ignore uses which are part of other SCEV expressions, to avoid
@@ -3092,7 +3163,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
// If the user is a no-op, look through to its uses.
if (!isa<SCEVUnknown>(UserS))
continue;
- if (UserS == U) {
+ if (UserS == US) {
Worklist.push_back(
SE.getUnknown(const_cast<Instruction *>(UserInst)));
continue;
@@ -3100,7 +3171,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
}
// Ignore icmp instructions which are already being analyzed.
if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
- unsigned OtherIdx = !UI.getOperandNo();
+ unsigned OtherIdx = !U.getOperandNo();
Value *OtherOp = const_cast<Value *>(ICI->getOperand(OtherIdx));
if (SE.hasComputableLoopEvolution(SE.getSCEV(OtherOp), L))
continue;
@@ -3108,8 +3179,8 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
LSRFixup &LF = getNewFixup();
LF.UserInst = const_cast<Instruction *>(UserInst);
- LF.OperandValToReplace = UI.getUse();
- std::pair<size_t, int64_t> P = getUse(S, LSRUse::Basic, 0);
+ LF.OperandValToReplace = U;
+ std::pair<size_t, int64_t> P = getUse(S, LSRUse::Basic, nullptr);
LF.LUIdx = P.first;
LF.Offset = P.second;
LSRUse &LU = Uses[LF.LUIdx];
@@ -3118,7 +3189,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
SE.getTypeSizeInBits(LU.WidestFixupType) <
SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
LU.WidestFixupType = LF.OperandValToReplace->getType();
- InsertSupplementalFormula(U, LU, LF.LUIdx);
+ InsertSupplementalFormula(US, LU, LF.LUIdx);
CountRegisters(LU.Formulae.back(), Uses.size() - 1);
break;
}
@@ -3148,7 +3219,7 @@ static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
if (Remainder)
Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
}
- return 0;
+ return nullptr;
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
// Split a non-zero base out of an addrec.
if (AR->getStart()->isZero())
@@ -3160,7 +3231,7 @@ static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
// does not pertain to this loop.
if (Remainder && (AR->getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {
Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
- Remainder = 0;
+ Remainder = nullptr;
}
if (Remainder != AR->getStart()) {
if (!Remainder)
@@ -3182,90 +3253,110 @@ static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
CollectSubexprs(Mul->getOperand(1), C, Ops, L, SE, Depth+1);
if (Remainder)
Ops.push_back(SE.getMulExpr(C, Remainder));
- return 0;
+ return nullptr;
}
}
return S;
}
-/// GenerateReassociations - Split out subexpressions from adds and the bases of
-/// addrecs.
-void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
- Formula Base,
- unsigned Depth) {
- // Arbitrarily cap recursion to protect compile time.
- if (Depth >= 3) return;
+/// \brief Helper function for LSRInstance::GenerateReassociations.
+void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
+ const Formula &Base,
+ unsigned Depth, size_t Idx,
+ bool IsScaledReg) {
+ const SCEV *BaseReg = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
+ SmallVector<const SCEV *, 8> AddOps;
+ const SCEV *Remainder = CollectSubexprs(BaseReg, nullptr, AddOps, L, SE);
+ if (Remainder)
+ AddOps.push_back(Remainder);
+
+ if (AddOps.size() == 1)
+ return;
- for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
- const SCEV *BaseReg = Base.BaseRegs[i];
+ for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
+ JE = AddOps.end();
+ J != JE; ++J) {
- SmallVector<const SCEV *, 8> AddOps;
- const SCEV *Remainder = CollectSubexprs(BaseReg, 0, AddOps, L, SE);
- if (Remainder)
- AddOps.push_back(Remainder);
+ // Loop-variant "unknown" values are uninteresting; we won't be able to
+ // do anything meaningful with them.
+ if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L))
+ continue;
- if (AddOps.size() == 1) continue;
+ // Don't pull a constant into a register if the constant could be folded
+ // into an immediate field.
+ if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
+ LU.AccessTy, *J, Base.getNumRegs() > 1))
+ continue;
- for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
- JE = AddOps.end(); J != JE; ++J) {
+ // Collect all operands except *J.
+ SmallVector<const SCEV *, 8> InnerAddOps(
+ ((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
+ InnerAddOps.append(std::next(J),
+ ((const SmallVector<const SCEV *, 8> &)AddOps).end());
+
+ // Don't leave just a constant behind in a register if the constant could
+ // be folded into an immediate field.
+ if (InnerAddOps.size() == 1 &&
+ isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
+ LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1))
+ continue;
- // Loop-variant "unknown" values are uninteresting; we won't be able to
- // do anything meaningful with them.
- if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L))
- continue;
+ const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
+ if (InnerSum->isZero())
+ continue;
+ Formula F = Base;
- // Don't pull a constant into a register if the constant could be folded
- // into an immediate field.
- if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
- LU.AccessTy, *J, Base.getNumRegs() > 1))
- continue;
+ // Add the remaining pieces of the add back into the new formula.
+ const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
+ if (InnerSumSC && SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
+ TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
+ InnerSumSC->getValue()->getZExtValue())) {
+ F.UnfoldedOffset =
+ (uint64_t)F.UnfoldedOffset + InnerSumSC->getValue()->getZExtValue();
+ if (IsScaledReg)
+ F.ScaledReg = nullptr;
+ else
+ F.BaseRegs.erase(F.BaseRegs.begin() + Idx);
+ } else if (IsScaledReg)
+ F.ScaledReg = InnerSum;
+ else
+ F.BaseRegs[Idx] = InnerSum;
+
+ // Add J as its own register, or an unfolded immediate.
+ const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
+ if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
+ TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
+ SC->getValue()->getZExtValue()))
+ F.UnfoldedOffset =
+ (uint64_t)F.UnfoldedOffset + SC->getValue()->getZExtValue();
+ else
+ F.BaseRegs.push_back(*J);
+ // We may have changed the number of register in base regs, adjust the
+ // formula accordingly.
+ F.Canonicalize();
- // Collect all operands except *J.
- SmallVector<const SCEV *, 8> InnerAddOps
- (((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
- InnerAddOps.append
- (llvm::next(J), ((const SmallVector<const SCEV *, 8> &)AddOps).end());
-
- // Don't leave just a constant behind in a register if the constant could
- // be folded into an immediate field.
- if (InnerAddOps.size() == 1 &&
- isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
- LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1))
- continue;
+ if (InsertFormula(LU, LUIdx, F))
+ // If that formula hadn't been seen before, recurse to find more like
+ // it.
+ GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth + 1);
+ }
+}
- const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
- if (InnerSum->isZero())
- continue;
- Formula F = Base;
+/// GenerateReassociations - Split out subexpressions from adds and the bases of
+/// addrecs.
+void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
+ Formula Base, unsigned Depth) {
+ assert(Base.isCanonical() && "Input must be in the canonical form");
+ // Arbitrarily cap recursion to protect compile time.
+ if (Depth >= 3)
+ return;
- // Add the remaining pieces of the add back into the new formula.
- const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
- if (InnerSumSC &&
- SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
- TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
- InnerSumSC->getValue()->getZExtValue())) {
- F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
- InnerSumSC->getValue()->getZExtValue();
- F.BaseRegs.erase(F.BaseRegs.begin() + i);
- } else
- F.BaseRegs[i] = InnerSum;
-
- // Add J as its own register, or an unfolded immediate.
- const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
- if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
- TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
- SC->getValue()->getZExtValue()))
- F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
- SC->getValue()->getZExtValue();
- else
- F.BaseRegs.push_back(*J);
+ for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
+ GenerateReassociationsImpl(LU, LUIdx, Base, Depth, i);
- if (InsertFormula(LU, LUIdx, F))
- // If that formula hadn't been seen before, recurse to find more like
- // it.
- GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth+1);
- }
- }
+ if (Base.Scale == 1)
+ GenerateReassociationsImpl(LU, LUIdx, Base, Depth,
+ /* Idx */ -1, /* IsScaledReg */ true);
}
/// GenerateCombinations - Generate a formula consisting of all of the
@@ -3273,8 +3364,12 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
Formula Base) {
// This method is only interesting on a plurality of registers.
- if (Base.BaseRegs.size() <= 1) return;
+ if (Base.BaseRegs.size() + (Base.Scale == 1) <= 1)
+ return;
+ // Flatten the representation, i.e., reg1 + 1*reg2 => reg1 + reg2, before
+ // processing the formula.
+ Base.Unscale();
Formula F = Base;
F.BaseRegs.clear();
SmallVector<const SCEV *, 4> Ops;
@@ -3294,29 +3389,87 @@ void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
// rather than proceed with zero in a register.
if (!Sum->isZero()) {
F.BaseRegs.push_back(Sum);
+ F.Canonicalize();
(void)InsertFormula(LU, LUIdx, F);
}
}
}
+/// \brief Helper function for LSRInstance::GenerateSymbolicOffsets.
+void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
+ const Formula &Base, size_t Idx,
+ bool IsScaledReg) {
+ const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
+ GlobalValue *GV = ExtractSymbol(G, SE);
+ if (G->isZero() || !GV)
+ return;
+ Formula F = Base;
+ F.BaseGV = GV;
+ if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
+ return;
+ if (IsScaledReg)
+ F.ScaledReg = G;
+ else
+ F.BaseRegs[Idx] = G;
+ (void)InsertFormula(LU, LUIdx, F);
+}
+
/// GenerateSymbolicOffsets - Generate reuse formulae using symbolic offsets.
void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
Formula Base) {
// We can't add a symbolic offset if the address already contains one.
if (Base.BaseGV) return;
- for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
- const SCEV *G = Base.BaseRegs[i];
- GlobalValue *GV = ExtractSymbol(G, SE);
- if (G->isZero() || !GV)
- continue;
+ for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
+ GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, i);
+ if (Base.Scale == 1)
+ GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, /* Idx */ -1,
+ /* IsScaledReg */ true);
+}
+
+/// \brief Helper function for LSRInstance::GenerateConstantOffsets.
+void LSRInstance::GenerateConstantOffsetsImpl(
+ LSRUse &LU, unsigned LUIdx, const Formula &Base,
+ const SmallVectorImpl<int64_t> &Worklist, size_t Idx, bool IsScaledReg) {
+ const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
+ for (SmallVectorImpl<int64_t>::const_iterator I = Worklist.begin(),
+ E = Worklist.end();
+ I != E; ++I) {
Formula F = Base;
- F.BaseGV = GV;
- if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
- continue;
- F.BaseRegs[i] = G;
- (void)InsertFormula(LU, LUIdx, F);
+ F.BaseOffset = (uint64_t)Base.BaseOffset - *I;
+ if (isLegalUse(TTI, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind,
+ LU.AccessTy, F)) {
+ // Add the offset to the base register.
+ const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G);
+ // If it cancelled out, drop the base register, otherwise update it.
+ if (NewG->isZero()) {
+ if (IsScaledReg) {
+ F.Scale = 0;
+ F.ScaledReg = nullptr;
+ } else
+ F.DeleteBaseReg(F.BaseRegs[Idx]);
+ F.Canonicalize();
+ } else if (IsScaledReg)
+ F.ScaledReg = NewG;
+ else
+ F.BaseRegs[Idx] = NewG;
+
+ (void)InsertFormula(LU, LUIdx, F);
+ }
}
+
+ int64_t Imm = ExtractImmediate(G, SE);
+ if (G->isZero() || Imm == 0)
+ return;
+ Formula F = Base;
+ F.BaseOffset = (uint64_t)F.BaseOffset + Imm;
+ if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
+ return;
+ if (IsScaledReg)
+ F.ScaledReg = G;
+ else
+ F.BaseRegs[Idx] = G;
+ (void)InsertFormula(LU, LUIdx, F);
}
/// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets.
@@ -3329,38 +3482,11 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
if (LU.MaxOffset != LU.MinOffset)
Worklist.push_back(LU.MaxOffset);
- for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
- const SCEV *G = Base.BaseRegs[i];
-
- for (SmallVectorImpl<int64_t>::const_iterator I = Worklist.begin(),
- E = Worklist.end(); I != E; ++I) {
- Formula F = Base;
- F.BaseOffset = (uint64_t)Base.BaseOffset - *I;
- if (isLegalUse(TTI, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind,
- LU.AccessTy, F)) {
- // Add the offset to the base register.
- const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G);
- // If it cancelled out, drop the base register, otherwise update it.
- if (NewG->isZero()) {
- std::swap(F.BaseRegs[i], F.BaseRegs.back());
- F.BaseRegs.pop_back();
- } else
- F.BaseRegs[i] = NewG;
-
- (void)InsertFormula(LU, LUIdx, F);
- }
- }
-
- int64_t Imm = ExtractImmediate(G, SE);
- if (G->isZero() || Imm == 0)
- continue;
- Formula F = Base;
- F.BaseOffset = (uint64_t)F.BaseOffset + Imm;
- if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
- continue;
- F.BaseRegs[i] = G;
- (void)InsertFormula(LU, LUIdx, F);
- }
+ for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
+ GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, i);
+ if (Base.Scale == 1)
+ GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, /* Idx */ -1,
+ /* IsScaledReg */ true);
}
/// GenerateICmpZeroScales - For ICmpZero, check to see if we can scale up
@@ -3460,7 +3586,11 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
if (!IntTy) return;
// If this Formula already has a scaled register, we can't add another one.
- if (Base.Scale != 0) return;
+ // Try to unscale the formula to generate a better scale.
+ if (Base.Scale != 0 && !Base.Unscale())
+ return;
+
+ assert(Base.Scale == 0 && "Unscale did not did its job!");
// Check each interesting stride.
for (SmallSetVector<int64_t, 8>::const_iterator
@@ -3501,6 +3631,11 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
Formula F = Base;
F.ScaledReg = Quotient;
F.DeleteBaseReg(F.BaseRegs[i]);
+ // The canonical representation of 1*reg is reg, which is already in
+ // Base. In that case, do not try to insert the formula, it will be
+ // rejected anyway.
+ if (F.Scale == 1 && F.BaseRegs.empty())
+ continue;
(void)InsertFormula(LU, LUIdx, F);
}
}
@@ -3626,8 +3761,9 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
// Conservatively examine offsets between this orig reg a few selected
// other orig regs.
ImmMapTy::const_iterator OtherImms[] = {
- Imms.begin(), prior(Imms.end()),
- Imms.lower_bound((Imms.begin()->first + prior(Imms.end())->first) / 2)
+ Imms.begin(), std::prev(Imms.end()),
+ Imms.lower_bound((Imms.begin()->first + std::prev(Imms.end())->first) /
+ 2)
};
for (size_t i = 0, e = array_lengthof(OtherImms); i != e; ++i) {
ImmMapTy::const_iterator M = OtherImms[i];
@@ -3664,7 +3800,12 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
// TODO: Use a more targeted data structure.
for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
- const Formula &F = LU.Formulae[L];
+ Formula F = LU.Formulae[L];
+ // FIXME: The code for the scaled and unscaled registers looks
+ // very similar but slightly different. Investigate if they
+ // could be merged. That way, we would not have to unscale the
+ // Formula.
+ F.Unscale();
// Use the immediate in the scaled register.
if (F.ScaledReg == OrigReg) {
int64_t Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale;
@@ -3690,6 +3831,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
continue;
// OK, looks good.
+ NewF.Canonicalize();
(void)InsertFormula(LU, LUIdx, NewF);
} else {
// Use the immediate in a base register.
@@ -3723,6 +3865,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
goto skip_formula;
// Ok, looks good.
+ NewF.Canonicalize();
(void)InsertFormula(LU, LUIdx, NewF);
break;
skip_formula:;
@@ -3976,7 +4119,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
E = LU.Formulae.end(); I != E; ++I) {
const Formula &F = *I;
- if (F.BaseOffset == 0 || F.Scale != 0)
+ if (F.BaseOffset == 0 || (F.Scale != 0 && F.Scale != 1))
continue;
LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU);
@@ -4073,7 +4216,7 @@ void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
// Pick the register which is used by the most LSRUses, which is likely
// to be a good reuse register candidate.
- const SCEV *Best = 0;
+ const SCEV *Best = nullptr;
unsigned BestNum = 0;
for (RegUseTracker::const_iterator I = RegUses.begin(), E = RegUses.end();
I != E; ++I) {
@@ -4170,19 +4313,22 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
E = LU.Formulae.end(); I != E; ++I) {
const Formula &F = *I;
- // Ignore formulae which do not use any of the required registers.
- bool SatisfiedReqReg = true;
+ // Ignore formulae which may not be ideal in terms of register reuse of
+ // ReqRegs. The formula should use all required registers before
+ // introducing new ones.
+ int NumReqRegsToFind = std::min(F.getNumRegs(), ReqRegs.size());
for (SmallSetVector<const SCEV *, 4>::const_iterator J = ReqRegs.begin(),
JE = ReqRegs.end(); J != JE; ++J) {
const SCEV *Reg = *J;
- if ((!F.ScaledReg || F.ScaledReg != Reg) &&
- std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) ==
+ if ((F.ScaledReg && F.ScaledReg == Reg) ||
+ std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) !=
F.BaseRegs.end()) {
- SatisfiedReqReg = false;
- break;
+ --NumReqRegsToFind;
+ if (NumReqRegsToFind == 0)
+ break;
}
}
- if (!SatisfiedReqReg) {
+ if (NumReqRegsToFind != 0) {
// If none of the formulae satisfied the required registers, then we could
// clear ReqRegs and try again. Currently, we simply give up in this case.
continue;
@@ -4222,7 +4368,7 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
SmallVector<const Formula *, 8> Workspace;
Cost SolutionCost;
- SolutionCost.Loose();
+ SolutionCost.Lose();
Cost CurCost;
SmallPtrSet<const SCEV *, 16> CurRegs;
DenseSet<const SCEV *> VisitedRegs;
@@ -4280,7 +4426,7 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
}
bool AllDominate = true;
- Instruction *BetterPos = 0;
+ Instruction *BetterPos = nullptr;
Instruction *Tentative = IDom->getTerminator();
for (SmallVectorImpl<Instruction *>::const_iterator I = Inputs.begin(),
E = Inputs.end(); I != E; ++I) {
@@ -4293,7 +4439,7 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
// instead of at the end, so that it can be used for other expansions.
if (IDom == Inst->getParent() &&
(!BetterPos || !DT.dominates(Inst, BetterPos)))
- BetterPos = llvm::next(BasicBlock::iterator(Inst));
+ BetterPos = std::next(BasicBlock::iterator(Inst));
}
if (!AllDominate)
break;
@@ -4419,11 +4565,11 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
LF.UserInst, LF.OperandValToReplace,
Loops, SE, DT);
- Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP)));
+ Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr, IP)));
}
// Expand the ScaledReg portion.
- Value *ICmpScaledV = 0;
+ Value *ICmpScaledV = nullptr;
if (F.Scale != 0) {
const SCEV *ScaledS = F.ScaledReg;
@@ -4434,25 +4580,34 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
Loops, SE, DT);
if (LU.Kind == LSRUse::ICmpZero) {
- // An interesting way of "folding" with an icmp is to use a negated
- // scale, which we'll implement by inserting it into the other operand
- // of the icmp.
- assert(F.Scale == -1 &&
- "The only scale supported by ICmpZero uses is -1!");
- ICmpScaledV = Rewriter.expandCodeFor(ScaledS, 0, IP);
+ // Expand ScaleReg as if it was part of the base regs.
+ if (F.Scale == 1)
+ Ops.push_back(
+ SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, IP)));
+ else {
+ // An interesting way of "folding" with an icmp is to use a negated
+ // scale, which we'll implement by inserting it into the other operand
+ // of the icmp.
+ assert(F.Scale == -1 &&
+ "The only scale supported by ICmpZero uses is -1!");
+ ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr, IP);
+ }
} else {
// Otherwise just expand the scaled register and an explicit scale,
// which is expected to be matched as part of the address.
// Flush the operand list to suppress SCEVExpander hoisting address modes.
- if (!Ops.empty() && LU.Kind == LSRUse::Address) {
+ // Unless the addressing mode will not be folded.
+ if (!Ops.empty() && LU.Kind == LSRUse::Address &&
+ isAMCompletelyFolded(TTI, LU, F)) {
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
- ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, 0, IP));
- ScaledS = SE.getMulExpr(ScaledS,
- SE.getConstant(ScaledS->getType(), F.Scale));
+ ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, IP));
+ if (F.Scale != 1)
+ ScaledS =
+ SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.Scale));
Ops.push_back(ScaledS);
}
}
@@ -4530,7 +4685,9 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
}
CI->setOperand(1, ICmpScaledV);
} else {
- assert(F.Scale == 0 &&
+ // A scale of 1 means that the scale has been expanded as part of the
+ // base regs.
+ assert((F.Scale == 0 || F.Scale == 1) &&
"ICmp does not support folding a global value and "
"a scale at the same time!");
Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy),
@@ -4571,7 +4728,7 @@ void LSRInstance::RewriteForPHI(PHINode *PN,
Loop *PNLoop = LI.getLoopFor(Parent);
if (!PNLoop || Parent != PNLoop->getHeader()) {
// Split the critical edge.
- BasicBlock *NewBB = 0;
+ BasicBlock *NewBB = nullptr;
if (!Parent->isLandingPad()) {
NewBB = SplitCriticalEdge(BB, Parent, P,
/*MergeIdenticalEdges=*/true,
@@ -4600,7 +4757,7 @@ void LSRInstance::RewriteForPHI(PHINode *PN,
}
std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair =
- Inserted.insert(std::make_pair(BB, static_cast<Value *>(0)));
+ Inserted.insert(std::make_pair(BB, static_cast<Value *>(nullptr)));
if (!Pair.second)
PN->setIncomingValue(i, Pair.first->second);
else {
@@ -4707,9 +4864,10 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
LSRInstance::LSRInstance(Loop *L, Pass *P)
: IU(P->getAnalysis<IVUsers>()), SE(P->getAnalysis<ScalarEvolution>()),
- DT(P->getAnalysis<DominatorTree>()), LI(P->getAnalysis<LoopInfo>()),
+ DT(P->getAnalysis<DominatorTreeWrapperPass>().getDomTree()),
+ LI(P->getAnalysis<LoopInfo>()),
TTI(P->getAnalysis<TargetTransformInfo>()), L(L), Changed(false),
- IVIncInsertPos(0) {
+ IVIncInsertPos(nullptr) {
// If LoopSimplify form is not available, stay out of trouble.
if (!L->isLoopSimplifyForm())
return;
@@ -4746,7 +4904,7 @@ LSRInstance::LSRInstance(Loop *L, Pass *P)
#endif // DEBUG
DEBUG(dbgs() << "\nLSR on loop ";
- WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false);
+ L->getHeader()->printAsOperand(dbgs(), /*PrintType=*/false);
dbgs() << ":\n");
// First, perform some low-level loop optimizations.
@@ -4876,8 +5034,8 @@ public:
LoopStrengthReduce();
private:
- bool runOnLoop(Loop *L, LPPassManager &LPM);
- void getAnalysisUsage(AnalysisUsage &AU) const;
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
};
}
@@ -4886,7 +5044,7 @@ char LoopStrengthReduce::ID = 0;
INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
"Loop Strength Reduction", false, false)
INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_DEPENDENCY(IVUsers)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
@@ -4911,8 +5069,8 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<LoopInfo>();
AU.addPreserved<LoopInfo>();
AU.addRequiredID(LoopSimplifyID);
- AU.addRequired<DominatorTree>();
- AU.addPreserved<DominatorTree>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<ScalarEvolution>();
AU.addPreserved<ScalarEvolution>();
// Requiring LoopSimplify a second time here prevents IVUsers from running
@@ -4924,6 +5082,9 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
}
bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
+ if (skipOptnoneFunction(L))
+ return false;
+
bool Changed = false;
// Run the main LSR transformation.
@@ -4937,10 +5098,9 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
#ifndef NDEBUG
Rewriter.setDebugType(DEBUG_TYPE);
#endif
- unsigned numFolded =
- Rewriter.replaceCongruentIVs(L, &getAnalysis<DominatorTree>(),
- DeadInsts,
- &getAnalysis<TargetTransformInfo>());
+ unsigned numFolded = Rewriter.replaceCongruentIVs(
+ L, &getAnalysis<DominatorTreeWrapperPass>().getDomTree(), DeadInsts,
+ &getAnalysis<TargetTransformInfo>());
if (numFolded) {
Changed = true;
DeleteTriviallyDeadInstructions(DeadInsts);
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 08ac38dec5dd..935f289f040f 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -12,14 +12,16 @@
// counts of loops easily.
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loop-unroll"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -28,13 +30,16 @@
using namespace llvm;
+#define DEBUG_TYPE "loop-unroll"
+
static cl::opt<unsigned>
UnrollThreshold("unroll-threshold", cl::init(150), cl::Hidden,
cl::desc("The cut-off point for automatic loop unrolling"));
static cl::opt<unsigned>
UnrollCount("unroll-count", cl::init(0), cl::Hidden,
- cl::desc("Use this unroll count for all loops, for testing purposes"));
+ cl::desc("Use this unroll count for all loops including those with "
+ "unroll_count pragma values, for testing purposes"));
static cl::opt<bool>
UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden,
@@ -45,6 +50,11 @@ static cl::opt<bool>
UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::init(false), cl::Hidden,
cl::desc("Unroll loops with run-time trip counts"));
+static cl::opt<unsigned>
+PragmaUnrollThreshold("pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden,
+ cl::desc("Unrolled size limit for loops with an unroll(enable) or "
+ "unroll_count pragma."));
+
namespace {
class LoopUnroll : public LoopPass {
public:
@@ -86,12 +96,12 @@ namespace {
bool UserAllowPartial; // CurrentAllowPartial is user-specified.
bool UserRuntime; // CurrentRuntime is user-specified.
- bool runOnLoop(Loop *L, LPPassManager &LPM);
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override;
/// This transformation requires natural loop information & requires that
/// loop preheaders be inserted into the CFG...
///
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<LoopInfo>();
AU.addPreserved<LoopInfo>();
AU.addRequiredID(LoopSimplifyID);
@@ -105,7 +115,67 @@ namespace {
// If loop unroll does not preserve dom info then LCSSA pass on next
// loop will receive invalid dom info.
// For now, recreate dom info, if loop is unrolled.
- AU.addPreserved<DominatorTree>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ }
+
+ // Fill in the UnrollingPreferences parameter with values from the
+ // TargetTransformationInfo.
+ void getUnrollingPreferences(Loop *L, const TargetTransformInfo &TTI,
+ TargetTransformInfo::UnrollingPreferences &UP) {
+ UP.Threshold = CurrentThreshold;
+ UP.OptSizeThreshold = OptSizeUnrollThreshold;
+ UP.PartialThreshold = CurrentThreshold;
+ UP.PartialOptSizeThreshold = OptSizeUnrollThreshold;
+ UP.Count = CurrentCount;
+ UP.MaxCount = UINT_MAX;
+ UP.Partial = CurrentAllowPartial;
+ UP.Runtime = CurrentRuntime;
+ TTI.getUnrollingPreferences(L, UP);
+ }
+
+ // Select and return an unroll count based on parameters from
+ // user, unroll preferences, unroll pragmas, or a heuristic.
+ // SetExplicitly is set to true if the unroll count is is set by
+ // the user or a pragma rather than selected heuristically.
+ unsigned
+ selectUnrollCount(const Loop *L, unsigned TripCount, bool HasEnablePragma,
+ unsigned PragmaCount,
+ const TargetTransformInfo::UnrollingPreferences &UP,
+ bool &SetExplicitly);
+
+
+ // Select threshold values used to limit unrolling based on a
+ // total unrolled size. Parameters Threshold and PartialThreshold
+ // are set to the maximum unrolled size for fully and partially
+ // unrolled loops respectively.
+ void selectThresholds(const Loop *L, bool HasPragma,
+ const TargetTransformInfo::UnrollingPreferences &UP,
+ unsigned &Threshold, unsigned &PartialThreshold) {
+ // Determine the current unrolling threshold. While this is
+ // normally set from UnrollThreshold, it is overridden to a
+ // smaller value if the current function is marked as
+ // optimize-for-size, and the unroll threshold was not user
+ // specified.
+ Threshold = UserThreshold ? CurrentThreshold : UP.Threshold;
+ PartialThreshold = UserThreshold ? CurrentThreshold : UP.PartialThreshold;
+ if (!UserThreshold &&
+ L->getHeader()->getParent()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::OptimizeForSize)) {
+ Threshold = UP.OptSizeThreshold;
+ PartialThreshold = UP.PartialOptSizeThreshold;
+ }
+ if (HasPragma) {
+ // If the loop has an unrolling pragma, we want to be more
+ // aggressive with unrolling limits. Set thresholds to at
+ // least the PragmaTheshold value which is larger than the
+ // default limits.
+ if (Threshold != NoThreshold)
+ Threshold = std::max<unsigned>(Threshold, PragmaUnrollThreshold);
+ if (PartialThreshold != NoThreshold)
+ PartialThreshold =
+ std::max<unsigned>(PartialThreshold, PragmaUnrollThreshold);
+ }
}
};
}
@@ -124,6 +194,10 @@ Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial,
return new LoopUnroll(Threshold, Count, AllowPartial, Runtime);
}
+Pass *llvm::createSimpleLoopUnrollPass() {
+ return llvm::createLoopUnrollPass(-1, -1, 0, 0);
+}
+
/// ApproximateLoopSize - Approximate the size of the loop.
static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
bool &NotDuplicatable,
@@ -145,7 +219,144 @@ static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
return LoopSize;
}
+// Returns the value associated with the given metadata node name (for
+// example, "llvm.loop.unroll.count"). If no such named metadata node
+// exists, then nullptr is returned.
+static const ConstantInt *GetUnrollMetadataValue(const Loop *L,
+ StringRef Name) {
+ MDNode *LoopID = L->getLoopID();
+ if (!LoopID) return nullptr;
+
+ // First operand should refer to the loop id itself.
+ assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
+ assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
+
+ for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
+ const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
+ if (!MD) continue;
+
+ const MDString *S = dyn_cast<MDString>(MD->getOperand(0));
+ if (!S) continue;
+
+ if (Name.equals(S->getString())) {
+ assert(MD->getNumOperands() == 2 &&
+ "Unroll hint metadata should have two operands.");
+ return cast<ConstantInt>(MD->getOperand(1));
+ }
+ }
+ return nullptr;
+}
+
+// Returns true if the loop has an unroll(enable) pragma.
+static bool HasUnrollEnablePragma(const Loop *L) {
+ const ConstantInt *EnableValue =
+ GetUnrollMetadataValue(L, "llvm.loop.unroll.enable");
+ return (EnableValue && EnableValue->getZExtValue());
+}
+
+// Returns true if the loop has an unroll(disable) pragma.
+static bool HasUnrollDisablePragma(const Loop *L) {
+ const ConstantInt *EnableValue =
+ GetUnrollMetadataValue(L, "llvm.loop.unroll.enable");
+ return (EnableValue && !EnableValue->getZExtValue());
+}
+
+// If loop has an unroll_count pragma return the (necessarily
+// positive) value from the pragma. Otherwise return 0.
+static unsigned UnrollCountPragmaValue(const Loop *L) {
+ const ConstantInt *CountValue =
+ GetUnrollMetadataValue(L, "llvm.loop.unroll.count");
+ if (CountValue) {
+ unsigned Count = CountValue->getZExtValue();
+ assert(Count >= 1 && "Unroll count must be positive.");
+ return Count;
+ }
+ return 0;
+}
+
+// Remove existing unroll metadata and add unroll disable metadata to
+// indicate the loop has already been unrolled. This prevents a loop
+// from being unrolled more than is directed by a pragma if the loop
+// unrolling pass is run more than once (which it generally is).
+static void SetLoopAlreadyUnrolled(Loop *L) {
+ MDNode *LoopID = L->getLoopID();
+ if (!LoopID) return;
+
+ // First remove any existing loop unrolling metadata.
+ SmallVector<Value *, 4> Vals;
+ // Reserve first location for self reference to the LoopID metadata node.
+ Vals.push_back(nullptr);
+ for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
+ bool IsUnrollMetadata = false;
+ MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
+ if (MD) {
+ const MDString *S = dyn_cast<MDString>(MD->getOperand(0));
+ IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll.");
+ }
+ if (!IsUnrollMetadata) Vals.push_back(LoopID->getOperand(i));
+ }
+
+ // Add unroll(disable) metadata to disable future unrolling.
+ LLVMContext &Context = L->getHeader()->getContext();
+ SmallVector<Value *, 2> DisableOperands;
+ DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.enable"));
+ DisableOperands.push_back(ConstantInt::get(Type::getInt1Ty(Context), 0));
+ MDNode *DisableNode = MDNode::get(Context, DisableOperands);
+ Vals.push_back(DisableNode);
+
+ MDNode *NewLoopID = MDNode::get(Context, Vals);
+ // Set operand 0 to refer to the loop id itself.
+ NewLoopID->replaceOperandWith(0, NewLoopID);
+ L->setLoopID(NewLoopID);
+ LoopID->replaceAllUsesWith(NewLoopID);
+}
+
+unsigned LoopUnroll::selectUnrollCount(
+ const Loop *L, unsigned TripCount, bool HasEnablePragma,
+ unsigned PragmaCount, const TargetTransformInfo::UnrollingPreferences &UP,
+ bool &SetExplicitly) {
+ SetExplicitly = true;
+
+ // User-specified count (either as a command-line option or
+ // constructor parameter) has highest precedence.
+ unsigned Count = UserCount ? CurrentCount : 0;
+
+ // If there is no user-specified count, unroll pragmas have the next
+ // highest precendence.
+ if (Count == 0) {
+ if (PragmaCount) {
+ Count = PragmaCount;
+ } else if (HasEnablePragma) {
+ // unroll(enable) pragma without an unroll_count pragma
+ // indicates to unroll loop fully.
+ Count = TripCount;
+ }
+ }
+
+ if (Count == 0)
+ Count = UP.Count;
+
+ if (Count == 0) {
+ SetExplicitly = false;
+ if (TripCount == 0)
+ // Runtime trip count.
+ Count = UnrollRuntimeCount;
+ else
+ // Conservative heuristic: if we know the trip count, see if we can
+ // completely unroll (subject to the threshold, checked below); otherwise
+ // try to find greatest modulo of the trip count which is still under
+ // threshold value.
+ Count = TripCount;
+ }
+ if (TripCount && Count > TripCount)
+ return TripCount;
+ return Count;
+}
+
bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
+ if (skipOptnoneFunction(L))
+ return false;
+
LoopInfo *LI = &getAnalysis<LoopInfo>();
ScalarEvolution *SE = &getAnalysis<ScalarEvolution>();
const TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>();
@@ -153,26 +364,16 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
BasicBlock *Header = L->getHeader();
DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
<< "] Loop %" << Header->getName() << "\n");
- (void)Header;
+
+ if (HasUnrollDisablePragma(L)) {
+ return false;
+ }
+ bool HasEnablePragma = HasUnrollEnablePragma(L);
+ unsigned PragmaCount = UnrollCountPragmaValue(L);
+ bool HasPragma = HasEnablePragma || PragmaCount > 0;
TargetTransformInfo::UnrollingPreferences UP;
- UP.Threshold = CurrentThreshold;
- UP.OptSizeThreshold = OptSizeUnrollThreshold;
- UP.Count = CurrentCount;
- UP.Partial = CurrentAllowPartial;
- UP.Runtime = CurrentRuntime;
- TTI.getUnrollingPreferences(L, UP);
-
- // Determine the current unrolling threshold. While this is normally set
- // from UnrollThreshold, it is overridden to a smaller value if the current
- // function is marked as optimize-for-size, and the unroll threshold was
- // not user specified.
- unsigned Threshold = UserThreshold ? CurrentThreshold : UP.Threshold;
- if (!UserThreshold &&
- Header->getParent()->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex,
- Attribute::OptimizeForSize))
- Threshold = UP.OptSizeThreshold;
+ getUnrollingPreferences(L, TTI, UP);
// Find trip count and trip multiple if count is not available
unsigned TripCount = 0;
@@ -186,74 +387,121 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock);
}
- bool Runtime = UserRuntime ? CurrentRuntime : UP.Runtime;
+ // Select an initial unroll count. This may be reduced later based
+ // on size thresholds.
+ bool CountSetExplicitly;
+ unsigned Count = selectUnrollCount(L, TripCount, HasEnablePragma, PragmaCount,
+ UP, CountSetExplicitly);
+
+ unsigned NumInlineCandidates;
+ bool notDuplicatable;
+ unsigned LoopSize =
+ ApproximateLoopSize(L, NumInlineCandidates, notDuplicatable, TTI);
+ DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
+ uint64_t UnrolledSize = (uint64_t)LoopSize * Count;
+ if (notDuplicatable) {
+ DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable"
+ << " instructions.\n");
+ return false;
+ }
+ if (NumInlineCandidates != 0) {
+ DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
+ return false;
+ }
- // Use a default unroll-count if the user doesn't specify a value
- // and the trip count is a run-time value. The default is different
- // for run-time or compile-time trip count loops.
- unsigned Count = UserCount ? CurrentCount : UP.Count;
- if (Runtime && Count == 0 && TripCount == 0)
- Count = UnrollRuntimeCount;
+ unsigned Threshold, PartialThreshold;
+ selectThresholds(L, HasPragma, UP, Threshold, PartialThreshold);
- if (Count == 0) {
- // Conservative heuristic: if we know the trip count, see if we can
- // completely unroll (subject to the threshold, checked below); otherwise
- // try to find greatest modulo of the trip count which is still under
- // threshold value.
- if (TripCount == 0)
- return false;
- Count = TripCount;
+ // Given Count, TripCount and thresholds determine the type of
+ // unrolling which is to be performed.
+ enum { Full = 0, Partial = 1, Runtime = 2 };
+ int Unrolling;
+ if (TripCount && Count == TripCount) {
+ if (Threshold != NoThreshold && UnrolledSize > Threshold) {
+ DEBUG(dbgs() << " Too large to fully unroll with count: " << Count
+ << " because size: " << UnrolledSize << ">" << Threshold
+ << "\n");
+ Unrolling = Partial;
+ } else {
+ Unrolling = Full;
+ }
+ } else if (TripCount && Count < TripCount) {
+ Unrolling = Partial;
+ } else {
+ Unrolling = Runtime;
}
- // Enforce the threshold.
- if (Threshold != NoThreshold) {
- unsigned NumInlineCandidates;
- bool notDuplicatable;
- unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates,
- notDuplicatable, TTI);
- DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
- if (notDuplicatable) {
- DEBUG(dbgs() << " Not unrolling loop which contains non duplicatable"
- << " instructions.\n");
+ // Reduce count based on the type of unrolling and the threshold values.
+ unsigned OriginalCount = Count;
+ bool AllowRuntime = UserRuntime ? CurrentRuntime : UP.Runtime;
+ if (Unrolling == Partial) {
+ bool AllowPartial = UserAllowPartial ? CurrentAllowPartial : UP.Partial;
+ if (!AllowPartial && !CountSetExplicitly) {
+ DEBUG(dbgs() << " will not try to unroll partially because "
+ << "-unroll-allow-partial not given\n");
return false;
}
- if (NumInlineCandidates != 0) {
- DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
+ if (PartialThreshold != NoThreshold && UnrolledSize > PartialThreshold) {
+ // Reduce unroll count to be modulo of TripCount for partial unrolling.
+ Count = PartialThreshold / LoopSize;
+ while (Count != 0 && TripCount % Count != 0)
+ Count--;
+ }
+ } else if (Unrolling == Runtime) {
+ if (!AllowRuntime && !CountSetExplicitly) {
+ DEBUG(dbgs() << " will not try to unroll loop with runtime trip count "
+ << "-unroll-runtime not given\n");
return false;
}
- uint64_t Size = (uint64_t)LoopSize*Count;
- if (TripCount != 1 && Size > Threshold) {
- DEBUG(dbgs() << " Too large to fully unroll with count: " << Count
- << " because size: " << Size << ">" << Threshold << "\n");
- bool AllowPartial = UserAllowPartial ? CurrentAllowPartial : UP.Partial;
- if (!AllowPartial && !(Runtime && TripCount == 0)) {
- DEBUG(dbgs() << " will not try to unroll partially because "
- << "-unroll-allow-partial not given\n");
- return false;
- }
- if (TripCount) {
- // Reduce unroll count to be modulo of TripCount for partial unrolling
- Count = Threshold / LoopSize;
- while (Count != 0 && TripCount%Count != 0)
- Count--;
- }
- else if (Runtime) {
- // Reduce unroll count to be a lower power-of-two value
- while (Count != 0 && Size > Threshold) {
- Count >>= 1;
- Size = LoopSize*Count;
- }
- }
- if (Count < 2) {
- DEBUG(dbgs() << " could not unroll partially\n");
- return false;
+ // Reduce unroll count to be the largest power-of-two factor of
+ // the original count which satisfies the threshold limit.
+ while (Count != 0 && UnrolledSize > PartialThreshold) {
+ Count >>= 1;
+ UnrolledSize = LoopSize * Count;
+ }
+ if (Count > UP.MaxCount)
+ Count = UP.MaxCount;
+ DEBUG(dbgs() << " partially unrolling with count: " << Count << "\n");
+ }
+
+ if (HasPragma) {
+ // Mark loop as unrolled to prevent unrolling beyond that
+ // requested by the pragma.
+ SetLoopAlreadyUnrolled(L);
+
+ // Emit optimization remarks if we are unable to unroll the loop
+ // as directed by a pragma.
+ DebugLoc LoopLoc = L->getStartLoc();
+ Function *F = Header->getParent();
+ LLVMContext &Ctx = F->getContext();
+ if (HasEnablePragma && PragmaCount == 0) {
+ if (TripCount && Count != TripCount) {
+ emitOptimizationRemarkMissed(
+ Ctx, DEBUG_TYPE, *F, LoopLoc,
+ "Unable to fully unroll loop as directed by unroll(enable) pragma "
+ "because unrolled size is too large.");
+ } else if (!TripCount) {
+ emitOptimizationRemarkMissed(
+ Ctx, DEBUG_TYPE, *F, LoopLoc,
+ "Unable to fully unroll loop as directed by unroll(enable) pragma "
+ "because loop has a runtime trip count.");
}
- DEBUG(dbgs() << " partially unrolling with count: " << Count << "\n");
+ } else if (PragmaCount > 0 && Count != OriginalCount) {
+ emitOptimizationRemarkMissed(
+ Ctx, DEBUG_TYPE, *F, LoopLoc,
+ "Unable to unroll loop the number of times directed by "
+ "unroll_count pragma because unrolled size is too large.");
}
}
+ if (Unrolling != Full && Count < 2) {
+ // Partial unrolling by 1 is a nop. For full unrolling, a factor
+ // of 1 makes sense because loop control can be eliminated.
+ return false;
+ }
+
// Unroll the loop.
- if (!UnrollLoop(L, Count, TripCount, Runtime, TripMultiple, LI, &LPM))
+ if (!UnrollLoop(L, Count, TripCount, AllowRuntime, TripMultiple, LI, this, &LPM))
return false;
return true;
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
index c4ebfd5f413d..977c53a3bc63 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -26,13 +26,11 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loop-unswitch"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CodeMetrics.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
@@ -40,6 +38,7 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Support/CommandLine.h"
@@ -53,6 +52,8 @@
#include <set>
using namespace llvm;
+#define DEBUG_TYPE "loop-unswitch"
+
STATISTIC(NumBranches, "Number of branches unswitched");
STATISTIC(NumSwitches, "Number of switches unswitched");
STATISTIC(NumSelects , "Number of selects unswitched");
@@ -96,7 +97,7 @@ namespace {
public:
LUAnalysisCache() :
- CurLoopInstructions(0), CurrentLoopProperties(0),
+ CurLoopInstructions(nullptr), CurrentLoopProperties(nullptr),
MaxSize(Threshold)
{}
@@ -151,44 +152,35 @@ namespace {
static char ID; // Pass ID, replacement for typeid
explicit LoopUnswitch(bool Os = false) :
LoopPass(ID), OptimizeForSize(Os), redoLoop(false),
- currentLoop(0), DT(0), loopHeader(0),
- loopPreheader(0) {
+ currentLoop(nullptr), DT(nullptr), loopHeader(nullptr),
+ loopPreheader(nullptr) {
initializeLoopUnswitchPass(*PassRegistry::getPassRegistry());
}
- bool runOnLoop(Loop *L, LPPassManager &LPM);
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override;
bool processCurrentLoop();
/// This transformation requires natural loop information & requires that
/// loop preheaders be inserted into the CFG.
///
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequiredID(LoopSimplifyID);
AU.addPreservedID(LoopSimplifyID);
AU.addRequired<LoopInfo>();
AU.addPreserved<LoopInfo>();
AU.addRequiredID(LCSSAID);
AU.addPreservedID(LCSSAID);
- AU.addPreserved<DominatorTree>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<ScalarEvolution>();
AU.addRequired<TargetTransformInfo>();
}
private:
- virtual void releaseMemory() {
+ void releaseMemory() override {
BranchesInfo.forgetLoop(currentLoop);
}
- /// RemoveLoopFromWorklist - If the specified loop is on the loop worklist,
- /// remove it.
- void RemoveLoopFromWorklist(Loop *L) {
- std::vector<Loop*>::iterator I = std::find(LoopProcessWorklist.begin(),
- LoopProcessWorklist.end(), L);
- if (I != LoopProcessWorklist.end())
- LoopProcessWorklist.erase(I);
- }
-
void initLoopData() {
loopHeader = currentLoop->getHeader();
loopPreheader = currentLoop->getLoopPreheader();
@@ -212,9 +204,8 @@ namespace {
Instruction *InsertPt);
void SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L);
- void RemoveLoopFromHierarchy(Loop *L);
- bool IsTrivialUnswitchCondition(Value *Cond, Constant **Val = 0,
- BasicBlock **LoopExit = 0);
+ bool IsTrivialUnswitchCondition(Value *Cond, Constant **Val = nullptr,
+ BasicBlock **LoopExit = nullptr);
};
}
@@ -225,7 +216,7 @@ bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI) {
LoopPropsMapIt PropsIt;
bool Inserted;
- llvm::tie(PropsIt, Inserted) =
+ std::tie(PropsIt, Inserted) =
LoopsProperties.insert(std::make_pair(L, LoopProperties()));
LoopProperties &Props = PropsIt->second;
@@ -283,8 +274,8 @@ void LUAnalysisCache::forgetLoop(const Loop *L) {
LoopsProperties.erase(LIt);
}
- CurrentLoopProperties = 0;
- CurLoopInstructions = 0;
+ CurrentLoopProperties = nullptr;
+ CurLoopInstructions = nullptr;
}
// Mark case value as unswitched.
@@ -355,10 +346,10 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) {
// We can never unswitch on vector conditions.
if (Cond->getType()->isVectorTy())
- return 0;
+ return nullptr;
// Constants should be folded, not unswitched on!
- if (isa<Constant>(Cond)) return 0;
+ if (isa<Constant>(Cond)) return nullptr;
// TODO: Handle: br (VARIANT|INVARIANT).
@@ -378,13 +369,18 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) {
return RHS;
}
- return 0;
+ return nullptr;
}
bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
+ if (skipOptnoneFunction(L))
+ return false;
+
LI = &getAnalysis<LoopInfo>();
LPM = &LPM_Ref;
- DT = getAnalysisIfAvailable<DominatorTree>();
+ DominatorTreeWrapperPass *DTWP =
+ getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ DT = DTWP ? &DTWP->getDomTree() : nullptr;
currentLoop = L;
Function *F = currentLoop->getHeader()->getParent();
bool Changed = false;
@@ -397,7 +393,7 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
if (Changed) {
// FIXME: Reconstruct dom info, because it is not preserved properly.
if (DT)
- DT->runOnFunction(*F);
+ DT->recalculate(*F);
}
return Changed;
}
@@ -456,7 +452,7 @@ bool LoopUnswitch::processCurrentLoop() {
// Find a value to unswitch on:
// FIXME: this should chose the most expensive case!
// FIXME: scan for a case with a non-critical edge?
- Constant *UnswitchVal = 0;
+ Constant *UnswitchVal = nullptr;
// Do not process same value again and again.
// At this point we have some cases already unswitched and
@@ -513,7 +509,7 @@ static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB,
if (!L->contains(BB)) {
// Otherwise, this is a loop exit, this is fine so long as this is the
// first exit.
- if (ExitBB != 0) return false;
+ if (ExitBB) return false;
ExitBB = BB;
return true;
}
@@ -540,10 +536,10 @@ static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB,
static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) {
std::set<BasicBlock*> Visited;
Visited.insert(L->getHeader()); // Branches to header make infinite loops.
- BasicBlock *ExitBB = 0;
+ BasicBlock *ExitBB = nullptr;
if (isTrivialLoopExitBlockHelper(L, BB, ExitBB, Visited))
return ExitBB;
- return 0;
+ return nullptr;
}
/// IsTrivialUnswitchCondition - Check to see if this unswitch condition is
@@ -564,7 +560,7 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
TerminatorInst *HeaderTerm = Header->getTerminator();
LLVMContext &Context = Header->getContext();
- BasicBlock *LoopExitBB = 0;
+ BasicBlock *LoopExitBB = nullptr;
if (BranchInst *BI = dyn_cast<BranchInst>(HeaderTerm)) {
// If the header block doesn't end with a conditional branch on Cond, we
// can't handle it.
@@ -634,8 +630,8 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
/// unswitch the loop, reprocess the pieces, then return true.
bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) {
Function *F = loopHeader->getParent();
- Constant *CondVal = 0;
- BasicBlock *ExitBlock = 0;
+ Constant *CondVal = nullptr;
+ BasicBlock *ExitBlock = nullptr;
if (IsTrivialUnswitchCondition(LoopCond, &CondVal, &ExitBlock)) {
// If the condition is trivial, always unswitch. There is no code growth
@@ -934,9 +930,8 @@ static void ReplaceUsesOfWith(Instruction *I, Value *V,
Worklist.push_back(Use);
// Add users to the worklist which may be simplified now.
- for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
- UI != E; ++UI)
- Worklist.push_back(cast<Instruction>(*UI));
+ for (User *U : I->users())
+ Worklist.push_back(cast<Instruction>(U));
LPM->deleteSimpleAnalysisValue(I, L);
RemoveFromWorklist(I, Worklist);
I->replaceAllUsesWith(V);
@@ -944,17 +939,6 @@ static void ReplaceUsesOfWith(Instruction *I, Value *V,
++NumSimplify;
}
-/// RemoveLoopFromHierarchy - We have discovered that the specified loop has
-/// become unwrapped, either because the backedge was deleted, or because the
-/// edge into the header was removed. If the edge into the header from the
-/// latch block was removed, the loop is unwrapped but subloops are still alive,
-/// so they just reparent loops. If the loops are actually dead, they will be
-/// removed later.
-void LoopUnswitch::RemoveLoopFromHierarchy(Loop *L) {
- LPM->deleteLoopFromQueue(L);
- RemoveLoopFromWorklist(L);
-}
-
// RewriteLoopBodyWithConditionConstant - We know either that the value LIC has
// the value specified by Val in the specified loop, or we know it does NOT have
// that value. Rewrite any uses of LIC or of properties correlated to it.
@@ -986,12 +970,11 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
Replacement = ConstantInt::get(Type::getInt1Ty(Val->getContext()),
!cast<ConstantInt>(Val)->getZExtValue());
- for (Value::use_iterator UI = LIC->use_begin(), E = LIC->use_end();
- UI != E; ++UI) {
- Instruction *U = dyn_cast<Instruction>(*UI);
- if (!U || !L->contains(U))
+ for (User *U : LIC->users()) {
+ Instruction *UI = dyn_cast<Instruction>(U);
+ if (!UI || !L->contains(UI))
continue;
- Worklist.push_back(U);
+ Worklist.push_back(UI);
}
for (std::vector<Instruction*>::iterator UI = Worklist.begin(),
@@ -1005,20 +988,19 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
// Otherwise, we don't know the precise value of LIC, but we do know that it
// is certainly NOT "Val". As such, simplify any uses in the loop that we
// can. This case occurs when we unswitch switch statements.
- for (Value::use_iterator UI = LIC->use_begin(), E = LIC->use_end();
- UI != E; ++UI) {
- Instruction *U = dyn_cast<Instruction>(*UI);
- if (!U || !L->contains(U))
+ for (User *U : LIC->users()) {
+ Instruction *UI = dyn_cast<Instruction>(U);
+ if (!UI || !L->contains(UI))
continue;
- Worklist.push_back(U);
+ Worklist.push_back(UI);
// TODO: We could do other simplifications, for example, turning
// 'icmp eq LIC, Val' -> false.
// If we know that LIC is not Val, use this info to simplify code.
- SwitchInst *SI = dyn_cast<SwitchInst>(U);
- if (SI == 0 || !isa<ConstantInt>(Val)) continue;
+ SwitchInst *SI = dyn_cast<SwitchInst>(UI);
+ if (!SI || !isa<ConstantInt>(Val)) continue;
SwitchInst::CaseIt DeadCase = SI->findCaseValue(cast<ConstantInt>(Val));
// Default case is live for multiple values.
diff --git a/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp b/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp
index 8ced4946c832..3314e1ed41ab 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loweratomic"
#include "llvm/Transforms/Scalar.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@@ -20,6 +19,8 @@
#include "llvm/Pass.h"
using namespace llvm;
+#define DEBUG_TYPE "loweratomic"
+
static bool LowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) {
IRBuilder<> Builder(CXI->getParent(), CXI);
Value *Ptr = CXI->getPointerOperand();
@@ -31,7 +32,10 @@ static bool LowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) {
Value *Res = Builder.CreateSelect(Equal, Val, Orig);
Builder.CreateStore(Res, Ptr);
- CXI->replaceAllUsesWith(Orig);
+ Res = Builder.CreateInsertValue(UndefValue::get(CXI->getType()), Orig, 0);
+ Res = Builder.CreateInsertValue(Res, Equal, 1);
+
+ CXI->replaceAllUsesWith(Res);
CXI->eraseFromParent();
return true;
}
@@ -42,7 +46,7 @@ static bool LowerAtomicRMWInst(AtomicRMWInst *RMWI) {
Value *Val = RMWI->getValOperand();
LoadInst *Orig = Builder.CreateLoad(Ptr);
- Value *Res = NULL;
+ Value *Res = nullptr;
switch (RMWI->getOperation()) {
default: llvm_unreachable("Unexpected RMW operation");
@@ -111,7 +115,9 @@ namespace {
LowerAtomic() : BasicBlockPass(ID) {
initializeLowerAtomicPass(*PassRegistry::getPassRegistry());
}
- bool runOnBasicBlock(BasicBlock &BB) {
+ bool runOnBasicBlock(BasicBlock &BB) override {
+ if (skipOptnoneFunction(BB))
+ return false;
bool Changed = false;
for (BasicBlock::iterator DI = BB.begin(), DE = BB.end(); DI != DE; ) {
Instruction *Inst = DI++;
diff --git a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 9912d3dafed3..7c184a4ad2c3 100644
--- a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -12,27 +12,28 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "memcpyopt"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
#include <list>
using namespace llvm;
+#define DEBUG_TYPE "memcpyopt"
+
STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
STATISTIC(NumMemSetInfer, "Number of memsets inferred");
STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
@@ -49,7 +50,7 @@ static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx,
int64_t Offset = 0;
for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) {
ConstantInt *OpC = dyn_cast<ConstantInt>(GEP->getOperand(i));
- if (OpC == 0)
+ if (!OpC)
return VariableIdxFound = true;
if (OpC->isZero()) continue; // No offset.
@@ -75,6 +76,13 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
const DataLayout &TD) {
Ptr1 = Ptr1->stripPointerCasts();
Ptr2 = Ptr2->stripPointerCasts();
+
+ // Handle the trivial case first.
+ if (Ptr1 == Ptr2) {
+ Offset = 0;
+ return true;
+ }
+
GEPOperator *GEP1 = dyn_cast<GEPOperator>(Ptr1);
GEPOperator *GEP2 = dyn_cast<GEPOperator>(Ptr2);
@@ -82,12 +90,12 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
// If one pointer is a GEP and the other isn't, then see if the GEP is a
// constant offset from the base, as in "P" and "gep P, 1".
- if (GEP1 && GEP2 == 0 && GEP1->getOperand(0)->stripPointerCasts() == Ptr2) {
+ if (GEP1 && !GEP2 && GEP1->getOperand(0)->stripPointerCasts() == Ptr2) {
Offset = -GetOffsetFromIndex(GEP1, 1, VariableIdxFound, TD);
return !VariableIdxFound;
}
- if (GEP2 && GEP1 == 0 && GEP2->getOperand(0)->stripPointerCasts() == Ptr1) {
+ if (GEP2 && !GEP1 && GEP2->getOperand(0)->stripPointerCasts() == Ptr1) {
Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, TD);
return !VariableIdxFound;
}
@@ -195,9 +203,9 @@ class MemsetRanges {
/// because each element is relatively large and expensive to copy.
std::list<MemsetRange> Ranges;
typedef std::list<MemsetRange>::iterator range_iterator;
- const DataLayout &TD;
+ const DataLayout &DL;
public:
- MemsetRanges(const DataLayout &td) : TD(td) {}
+ MemsetRanges(const DataLayout &DL) : DL(DL) {}
typedef std::list<MemsetRange>::const_iterator const_iterator;
const_iterator begin() const { return Ranges.begin(); }
@@ -212,7 +220,7 @@ public:
}
void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
- int64_t StoreSize = TD.getTypeStoreSize(SI->getOperand(0)->getType());
+ int64_t StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType());
addRange(OffsetFromFirst, StoreSize,
SI->getPointerOperand(), SI->getAlignment(), SI);
@@ -305,23 +313,23 @@ namespace {
class MemCpyOpt : public FunctionPass {
MemoryDependenceAnalysis *MD;
TargetLibraryInfo *TLI;
- const DataLayout *TD;
+ const DataLayout *DL;
public:
static char ID; // Pass identification, replacement for typeid
MemCpyOpt() : FunctionPass(ID) {
initializeMemCpyOptPass(*PassRegistry::getPassRegistry());
- MD = 0;
- TLI = 0;
- TD = 0;
+ MD = nullptr;
+ TLI = nullptr;
+ DL = nullptr;
}
- bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
private:
// This transformation requires dominator postdominator info
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
- AU.addRequired<DominatorTree>();
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<MemoryDependenceAnalysis>();
AU.addRequired<AliasAnalysis>();
AU.addRequired<TargetLibraryInfo>();
@@ -353,7 +361,7 @@ FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOpt(); }
INITIALIZE_PASS_BEGIN(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
@@ -366,13 +374,13 @@ INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
/// attempts to merge them together into a memcpy/memset.
Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
Value *StartPtr, Value *ByteVal) {
- if (TD == 0) return 0;
+ if (!DL) return nullptr;
// Okay, so we now have a single store that can be splatable. Scan to find
// all subsequent stores of the same value to offset from the same pointer.
// Join these together into ranges, so we can decide whether contiguous blocks
// are stored.
- MemsetRanges Ranges(*TD);
+ MemsetRanges Ranges(*DL);
BasicBlock::iterator BI = StartInst;
for (++BI; !isa<TerminatorInst>(BI); ++BI) {
@@ -396,7 +404,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
// Check to see if this store is to a constant offset from the start ptr.
int64_t Offset;
if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(),
- Offset, *TD))
+ Offset, *DL))
break;
Ranges.addStore(Offset, NextStore);
@@ -409,7 +417,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
// Check to see if this store is to a constant offset from the start ptr.
int64_t Offset;
- if (!IsPointerOffset(StartPtr, MSI->getDest(), Offset, *TD))
+ if (!IsPointerOffset(StartPtr, MSI->getDest(), Offset, *DL))
break;
Ranges.addMemSet(Offset, MSI);
@@ -419,7 +427,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
// If we have no ranges, then we just had a single store with nothing that
// could be merged in. This is a very common case of course.
if (Ranges.empty())
- return 0;
+ return nullptr;
// If we had at least one store that could be merged in, add the starting
// store as well. We try to avoid this unless there is at least something
@@ -433,7 +441,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
// Now that we have full information about ranges, loop over the ranges and
// emit memset's for anything big enough to be worthwhile.
- Instruction *AMemSet = 0;
+ Instruction *AMemSet = nullptr;
for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end();
I != E; ++I) {
const MemsetRange &Range = *I;
@@ -441,7 +449,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
if (Range.TheStores.size() == 1) continue;
// If it is profitable to lower this range to memset, do so now.
- if (!Range.isProfitableToUseMemset(*TD))
+ if (!Range.isProfitableToUseMemset(*DL))
continue;
// Otherwise, we do want to transform this! Create a new memset.
@@ -453,7 +461,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
if (Alignment == 0) {
Type *EltType =
cast<PointerType>(StartPtr->getType())->getElementType();
- Alignment = TD->getABITypeAlignment(EltType);
+ Alignment = DL->getABITypeAlignment(EltType);
}
AMemSet =
@@ -484,7 +492,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
if (!SI->isSimple()) return false;
- if (TD == 0) return false;
+ if (!DL) return false;
// Detect cases where we're performing call slot forwarding, but
// happen to be using a load-store pair to implement it, rather than
@@ -493,7 +501,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
if (LI->isSimple() && LI->hasOneUse() &&
LI->getParent() == SI->getParent()) {
MemDepResult ldep = MD->getDependency(LI);
- CallInst *C = 0;
+ CallInst *C = nullptr;
if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst()))
C = dyn_cast<CallInst>(ldep.getInst());
@@ -505,7 +513,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
for (BasicBlock::iterator I = --BasicBlock::iterator(SI),
E = C; I != E; --I) {
if (AA.getModRefInfo(&*I, StoreLoc) != AliasAnalysis::NoModRef) {
- C = 0;
+ C = nullptr;
break;
}
}
@@ -514,15 +522,15 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
if (C) {
unsigned storeAlign = SI->getAlignment();
if (!storeAlign)
- storeAlign = TD->getABITypeAlignment(SI->getOperand(0)->getType());
+ storeAlign = DL->getABITypeAlignment(SI->getOperand(0)->getType());
unsigned loadAlign = LI->getAlignment();
if (!loadAlign)
- loadAlign = TD->getABITypeAlignment(LI->getType());
+ loadAlign = DL->getABITypeAlignment(LI->getType());
bool changed = performCallSlotOptzn(LI,
SI->getPointerOperand()->stripPointerCasts(),
LI->getPointerOperand()->stripPointerCasts(),
- TD->getTypeStoreSize(SI->getOperand(0)->getType()),
+ DL->getTypeStoreSize(SI->getOperand(0)->getType()),
std::min(storeAlign, loadAlign), C);
if (changed) {
MD->removeInstruction(SI);
@@ -596,13 +604,13 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
return false;
// Check that all of src is copied to dest.
- if (TD == 0) return false;
+ if (!DL) return false;
ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
if (!srcArraySize)
return false;
- uint64_t srcSize = TD->getTypeAllocSize(srcAlloca->getAllocatedType()) *
+ uint64_t srcSize = DL->getTypeAllocSize(srcAlloca->getAllocatedType()) *
srcArraySize->getZExtValue();
if (cpyLen < srcSize)
@@ -617,7 +625,7 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
if (!destArraySize)
return false;
- uint64_t destSize = TD->getTypeAllocSize(A->getAllocatedType()) *
+ uint64_t destSize = DL->getTypeAllocSize(A->getAllocatedType()) *
destArraySize->getZExtValue();
if (destSize < srcSize)
@@ -636,7 +644,7 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
return false;
}
- uint64_t destSize = TD->getTypeAllocSize(StructTy);
+ uint64_t destSize = DL->getTypeAllocSize(StructTy);
if (destSize < srcSize)
return false;
} else {
@@ -646,7 +654,7 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
// Check that dest points to memory that is at least as aligned as src.
unsigned srcAlign = srcAlloca->getAlignment();
if (!srcAlign)
- srcAlign = TD->getABITypeAlignment(srcAlloca->getAllocatedType());
+ srcAlign = DL->getABITypeAlignment(srcAlloca->getAllocatedType());
bool isDestSufficientlyAligned = srcAlign <= cpyAlign;
// If dest is not aligned enough and we can't increase its alignment then
// bail out.
@@ -657,30 +665,34 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
// guarantees that it holds only undefined values when passed in (so the final
// memcpy can be dropped), that it is not read or written between the call and
// the memcpy, and that writing beyond the end of it is undefined.
- SmallVector<User*, 8> srcUseList(srcAlloca->use_begin(),
- srcAlloca->use_end());
+ SmallVector<User*, 8> srcUseList(srcAlloca->user_begin(),
+ srcAlloca->user_end());
while (!srcUseList.empty()) {
- User *UI = srcUseList.pop_back_val();
+ User *U = srcUseList.pop_back_val();
- if (isa<BitCastInst>(UI)) {
- for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
- I != E; ++I)
- srcUseList.push_back(*I);
- } else if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(UI)) {
+ if (isa<BitCastInst>(U) || isa<AddrSpaceCastInst>(U)) {
+ for (User *UU : U->users())
+ srcUseList.push_back(UU);
+ } else if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(U)) {
if (G->hasAllZeroIndices())
- for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
- I != E; ++I)
- srcUseList.push_back(*I);
+ for (User *UU : U->users())
+ srcUseList.push_back(UU);
else
return false;
- } else if (UI != C && UI != cpy) {
+ } else if (U != C && U != cpy) {
return false;
}
}
+ // Check that src isn't captured by the called function since the
+ // transformation can cause aliasing issues in that case.
+ for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
+ if (CS.getArgument(i) == cpySrc && !CS.doesNotCapture(i))
+ return false;
+
// Since we're changing the parameter to the callsite, we need to make sure
// that what would be the new parameter dominates the callsite.
- DominatorTree &DT = getAnalysis<DominatorTree>();
+ DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
if (Instruction *cpyDestInst = dyn_cast<Instruction>(cpyDest))
if (!DT.dominates(cpyDestInst, C))
return false;
@@ -816,9 +828,8 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
/// circumstances). This allows later passes to remove the first memcpy
/// altogether.
bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
- // We can only optimize statically-sized memcpy's that are non-volatile.
- ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
- if (CopySize == 0 || M->isVolatile()) return false;
+ // We can only optimize non-volatile memcpy's.
+ if (M->isVolatile()) return false;
// If the source and destination of the memcpy are the same, then zap it.
if (M->getSource() == M->getDest()) {
@@ -832,7 +843,7 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
if (GV->isConstant() && GV->hasDefinitiveInitializer())
if (Value *ByteVal = isBytewiseValue(GV->getInitializer())) {
IRBuilder<> Builder(M);
- Builder.CreateMemSet(M->getRawDest(), ByteVal, CopySize,
+ Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(),
M->getAlignment(), false);
MD->removeInstruction(M);
M->eraseFromParent();
@@ -840,9 +851,16 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
return true;
}
- // The are two possible optimizations we can do for memcpy:
+ // The optimizations after this point require the memcpy size.
+ ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
+ if (!CopySize) return false;
+
+ // The are three possible optimizations we can do for memcpy:
// a) memcpy-memcpy xform which exposes redundance for DSE.
// b) call-memcpy xform for return slot optimization.
+ // c) memcpy from freshly alloca'd space or space that has just started its
+ // lifetime copies undefined data, and we can therefore eliminate the
+ // memcpy in favor of the data that was already at the destination.
MemDepResult DepInfo = MD->getDependency(M);
if (DepInfo.isClobber()) {
if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
@@ -862,6 +880,25 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
if (SrcDepInfo.isClobber()) {
if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
return processMemCpyMemCpyDependence(M, MDep, CopySize->getZExtValue());
+ } else if (SrcDepInfo.isDef()) {
+ Instruction *I = SrcDepInfo.getInst();
+ bool hasUndefContents = false;
+
+ if (isa<AllocaInst>(I)) {
+ hasUndefContents = true;
+ } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start)
+ if (ConstantInt *LTSize = dyn_cast<ConstantInt>(II->getArgOperand(0)))
+ if (LTSize->getZExtValue() >= CopySize->getZExtValue())
+ hasUndefContents = true;
+ }
+
+ if (hasUndefContents) {
+ MD->removeInstruction(M);
+ M->eraseFromParent();
+ ++NumMemCpyInstr;
+ return true;
+ }
}
return false;
@@ -899,12 +936,12 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {
/// processByValArgument - This is called on every byval argument in call sites.
bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
- if (TD == 0) return false;
+ if (!DL) return false;
// Find out what feeds this byval argument.
Value *ByValArg = CS.getArgument(ArgNo);
Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType();
- uint64_t ByValSize = TD->getTypeAllocSize(ByValTy);
+ uint64_t ByValSize = DL->getTypeAllocSize(ByValTy);
MemDepResult DepInfo =
MD->getPointerDependencyFrom(AliasAnalysis::Location(ByValArg, ByValSize),
true, CS.getInstruction(),
@@ -916,13 +953,13 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
// a memcpy, see if we can byval from the source of the memcpy instead of the
// result.
MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst());
- if (MDep == 0 || MDep->isVolatile() ||
+ if (!MDep || MDep->isVolatile() ||
ByValArg->stripPointerCasts() != MDep->getDest())
return false;
// The length of the memcpy must be larger or equal to the size of the byval.
ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
- if (C1 == 0 || C1->getValue().getZExtValue() < ByValSize)
+ if (!C1 || C1->getValue().getZExtValue() < ByValSize)
return false;
// Get the alignment of the byval. If the call doesn't specify the alignment,
@@ -933,7 +970,7 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
// If it is greater than the memcpy, then we check to see if we can force the
// source of the memcpy to the alignment we need. If we fail, we bail out.
if (MDep->getAlignment() < ByValAlign &&
- getOrEnforceKnownAlignment(MDep->getSource(),ByValAlign, TD) < ByValAlign)
+ getOrEnforceKnownAlignment(MDep->getSource(),ByValAlign, DL) < ByValAlign)
return false;
// Verify that the copied-from memory doesn't change in between the memcpy and
@@ -1007,9 +1044,13 @@ bool MemCpyOpt::iterateOnFunction(Function &F) {
// function.
//
bool MemCpyOpt::runOnFunction(Function &F) {
+ if (skipOptnoneFunction(F))
+ return false;
+
bool MadeChange = false;
MD = &getAnalysis<MemoryDependenceAnalysis>();
- TD = getAnalysisIfAvailable<DataLayout>();
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
TLI = &getAnalysis<TargetLibraryInfo>();
// If we don't have at least memset and memcpy, there is little point of doing
@@ -1024,6 +1065,6 @@ bool MemCpyOpt::runOnFunction(Function &F) {
MadeChange = true;
}
- MD = 0;
+ MD = nullptr;
return MadeChange;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
new file mode 100644
index 000000000000..a7e80240d9e7
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
@@ -0,0 +1,632 @@
+//===- MergedLoadStoreMotion.cpp - merge and hoist/sink load/stores -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//! \file
+//! \brief This pass performs merges of loads and stores on both sides of a
+// diamond (hammock). It hoists the loads and sinks the stores.
+//
+// The algorithm iteratively hoists two loads to the same address out of a
+// diamond (hammock) and merges them into a single load in the header. Similar
+// it sinks and merges two stores to the tail block (footer). The algorithm
+// iterates over the instructions of one side of the diamond and attempts to
+// find a matching load/store on the other side. It hoists / sinks when it
+// thinks it safe to do so. This optimization helps with eg. hiding load
+// latencies, triggering if-conversion, and reducing static code size.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+// Example:
+// Diamond shaped code before merge:
+//
+// header:
+// br %cond, label %if.then, label %if.else
+// / \
+// / \
+// / \
+// if.then: if.else:
+// %lt = load %addr_l %le = load %addr_l
+// <use %lt> <use %le>
+// <...> <...>
+// store %st, %addr_s store %se, %addr_s
+// br label %if.end br label %if.end
+// \ /
+// \ /
+// \ /
+// if.end ("footer"):
+// <...>
+//
+// Diamond shaped code after merge:
+//
+// header:
+// %l = load %addr_l
+// br %cond, label %if.then, label %if.else
+// / \
+// / \
+// / \
+// if.then: if.else:
+// <use %l> <use %l>
+// <...> <...>
+// br label %if.end br label %if.end
+// \ /
+// \ /
+// \ /
+// if.end ("footer"):
+// %s.sink = phi [%st, if.then], [%se, if.else]
+// <...>
+// store %s.sink, %addr_s
+// <...>
+//
+//
+//===----------------------- TODO -----------------------------------------===//
+//
+// 1) Generalize to regions other than diamonds
+// 2) Be more aggressive merging memory operations
+// Note that both changes require register pressure control
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include <vector>
+using namespace llvm;
+
+#define DEBUG_TYPE "mldst-motion"
+
+//===----------------------------------------------------------------------===//
+// MergedLoadStoreMotion Pass
+//===----------------------------------------------------------------------===//
+static cl::opt<bool>
+EnableMLSM("mlsm", cl::desc("Enable motion of merged load and store"),
+ cl::init(true));
+
+namespace {
+class MergedLoadStoreMotion : public FunctionPass {
+ AliasAnalysis *AA;
+ MemoryDependenceAnalysis *MD;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit MergedLoadStoreMotion(void)
+ : FunctionPass(ID), MD(nullptr), MagicCompileTimeControl(250) {
+ initializeMergedLoadStoreMotionPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override;
+
+private:
+ // This transformation requires dominator postdominator info
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetLibraryInfo>();
+ AU.addRequired<MemoryDependenceAnalysis>();
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
+ }
+
+ // Helper routines
+
+ ///
+ /// \brief Remove instruction from parent and update memory dependence
+ /// analysis.
+ ///
+ void removeInstruction(Instruction *Inst);
+ BasicBlock *getDiamondTail(BasicBlock *BB);
+ bool isDiamondHead(BasicBlock *BB);
+ // Routines for hoisting loads
+ bool isLoadHoistBarrier(Instruction *Inst);
+ LoadInst *canHoistFromBlock(BasicBlock *BB, LoadInst *LI);
+ void hoistInstruction(BasicBlock *BB, Instruction *HoistCand,
+ Instruction *ElseInst);
+ bool isSafeToHoist(Instruction *I) const;
+ bool hoistLoad(BasicBlock *BB, LoadInst *HoistCand, LoadInst *ElseInst);
+ bool mergeLoads(BasicBlock *BB);
+ // Routines for sinking stores
+ StoreInst *canSinkFromBlock(BasicBlock *BB, StoreInst *SI);
+ PHINode *getPHIOperand(BasicBlock *BB, StoreInst *S0, StoreInst *S1);
+ bool isStoreSinkBarrier(Instruction *Inst);
+ bool sinkStore(BasicBlock *BB, StoreInst *SinkCand, StoreInst *ElseInst);
+ bool mergeStores(BasicBlock *BB);
+ // The mergeLoad/Store algorithms could have Size0 * Size1 complexity,
+ // where Size0 and Size1 are the #instructions on the two sides of
+ // the diamond. The constant chosen here is arbitrary. Compiler Time
+ // Control is enforced by the check Size0 * Size1 < MagicCompileTimeControl.
+ const int MagicCompileTimeControl;
+};
+
+char MergedLoadStoreMotion::ID = 0;
+}
+
+///
+/// \brief createMergedLoadStoreMotionPass - The public interface to this file.
+///
+FunctionPass *llvm::createMergedLoadStoreMotionPass() {
+ return new MergedLoadStoreMotion();
+}
+
+INITIALIZE_PASS_BEGIN(MergedLoadStoreMotion, "mldst-motion",
+ "MergedLoadStoreMotion", false, false)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MergedLoadStoreMotion, "mldst-motion",
+ "MergedLoadStoreMotion", false, false)
+
+///
+/// \brief Remove instruction from parent and update memory dependence analysis.
+///
+void MergedLoadStoreMotion::removeInstruction(Instruction *Inst) {
+ // Notify the memory dependence analysis.
+ if (MD) {
+ MD->removeInstruction(Inst);
+ if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
+ MD->invalidateCachedPointerInfo(LI->getPointerOperand());
+ if (Inst->getType()->getScalarType()->isPointerTy()) {
+ MD->invalidateCachedPointerInfo(Inst);
+ }
+ }
+ Inst->eraseFromParent();
+}
+
+///
+/// \brief Return tail block of a diamond.
+///
+BasicBlock *MergedLoadStoreMotion::getDiamondTail(BasicBlock *BB) {
+ assert(isDiamondHead(BB) && "Basic block is not head of a diamond");
+ BranchInst *BI = (BranchInst *)(BB->getTerminator());
+ BasicBlock *Succ0 = BI->getSuccessor(0);
+ BasicBlock *Tail = Succ0->getTerminator()->getSuccessor(0);
+ return Tail;
+}
+
+///
+/// \brief True when BB is the head of a diamond (hammock)
+///
+bool MergedLoadStoreMotion::isDiamondHead(BasicBlock *BB) {
+ if (!BB)
+ return false;
+ if (!isa<BranchInst>(BB->getTerminator()))
+ return false;
+ if (BB->getTerminator()->getNumSuccessors() != 2)
+ return false;
+
+ BranchInst *BI = (BranchInst *)(BB->getTerminator());
+ BasicBlock *Succ0 = BI->getSuccessor(0);
+ BasicBlock *Succ1 = BI->getSuccessor(1);
+
+ if (!Succ0->getSinglePredecessor() ||
+ Succ0->getTerminator()->getNumSuccessors() != 1)
+ return false;
+ if (!Succ1->getSinglePredecessor() ||
+ Succ1->getTerminator()->getNumSuccessors() != 1)
+ return false;
+
+ BasicBlock *Tail = Succ0->getTerminator()->getSuccessor(0);
+ // Ignore triangles.
+ if (Succ1->getTerminator()->getSuccessor(0) != Tail)
+ return false;
+ return true;
+}
+
+///
+/// \brief True when instruction is a hoist barrier for a load
+///
+/// Whenever an instruction could possibly modify the value
+/// being loaded or protect against the load from happening
+/// it is considered a hoist barrier.
+///
+bool MergedLoadStoreMotion::isLoadHoistBarrier(Instruction *Inst) {
+ // FIXME: A call with no side effects should not be a barrier.
+ // Aren't all such calls covered by mayHaveSideEffects() below?
+ // Then this check can be removed.
+ if (isa<CallInst>(Inst))
+ return true;
+ if (isa<TerminatorInst>(Inst))
+ return true;
+ // FIXME: Conservatively let a store instruction block the load.
+ // Use alias analysis instead.
+ if (isa<StoreInst>(Inst))
+ return true;
+ // Note: mayHaveSideEffects covers all instructions that could
+ // trigger a change to state. Eg. in-flight stores have to be executed
+ // before ordered loads or fences, calls could invoke functions that store
+ // data to memory etc.
+ if (Inst->mayHaveSideEffects()) {
+ return true;
+ }
+ DEBUG(dbgs() << "No Hoist Barrier\n");
+ return false;
+}
+
+///
+/// \brief Decide if a load can be hoisted
+///
+/// When there is a load in \p BB to the same address as \p LI
+/// and it can be hoisted from \p BB, return that load.
+/// Otherwise return Null.
+///
+LoadInst *MergedLoadStoreMotion::canHoistFromBlock(BasicBlock *BB,
+ LoadInst *LI) {
+ LoadInst *I = nullptr;
+ assert(isa<LoadInst>(LI));
+ if (LI->isUsedOutsideOfBlock(LI->getParent()))
+ return nullptr;
+
+ for (BasicBlock::iterator BBI = BB->begin(), BBE = BB->end(); BBI != BBE;
+ ++BBI) {
+ Instruction *Inst = BBI;
+
+ // Only merge and hoist loads when their result in used only in BB
+ if (isLoadHoistBarrier(Inst))
+ break;
+ if (!isa<LoadInst>(Inst))
+ continue;
+ if (Inst->isUsedOutsideOfBlock(Inst->getParent()))
+ continue;
+
+ AliasAnalysis::Location LocLI = AA->getLocation(LI);
+ AliasAnalysis::Location LocInst = AA->getLocation((LoadInst *)Inst);
+ if (AA->isMustAlias(LocLI, LocInst) && LI->getType() == Inst->getType()) {
+ I = (LoadInst *)Inst;
+ break;
+ }
+ }
+ return I;
+}
+
+///
+/// \brief Merge two equivalent instructions \p HoistCand and \p ElseInst into
+/// \p BB
+///
+/// BB is the head of a diamond
+///
+void MergedLoadStoreMotion::hoistInstruction(BasicBlock *BB,
+ Instruction *HoistCand,
+ Instruction *ElseInst) {
+ DEBUG(dbgs() << " Hoist Instruction into BB \n"; BB->dump();
+ dbgs() << "Instruction Left\n"; HoistCand->dump(); dbgs() << "\n";
+ dbgs() << "Instruction Right\n"; ElseInst->dump(); dbgs() << "\n");
+ // Hoist the instruction.
+ assert(HoistCand->getParent() != BB);
+
+ // Intersect optional metadata.
+ HoistCand->intersectOptionalDataWith(ElseInst);
+ HoistCand->dropUnknownMetadata();
+
+ // Prepend point for instruction insert
+ Instruction *HoistPt = BB->getTerminator();
+
+ // Merged instruction
+ Instruction *HoistedInst = HoistCand->clone();
+
+ // Notify AA of the new value.
+ if (isa<LoadInst>(HoistCand))
+ AA->copyValue(HoistCand, HoistedInst);
+
+ // Hoist instruction.
+ HoistedInst->insertBefore(HoistPt);
+
+ HoistCand->replaceAllUsesWith(HoistedInst);
+ removeInstruction(HoistCand);
+ // Replace the else block instruction.
+ ElseInst->replaceAllUsesWith(HoistedInst);
+ removeInstruction(ElseInst);
+}
+
+///
+/// \brief Return true if no operand of \p I is defined in I's parent block
+///
+bool MergedLoadStoreMotion::isSafeToHoist(Instruction *I) const {
+ BasicBlock *Parent = I->getParent();
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ Instruction *Instr = dyn_cast<Instruction>(I->getOperand(i));
+ if (Instr && Instr->getParent() == Parent)
+ return false;
+ }
+ return true;
+}
+
+///
+/// \brief Merge two equivalent loads and GEPs and hoist into diamond head
+///
+bool MergedLoadStoreMotion::hoistLoad(BasicBlock *BB, LoadInst *L0,
+ LoadInst *L1) {
+ // Only one definition?
+ Instruction *A0 = dyn_cast<Instruction>(L0->getPointerOperand());
+ Instruction *A1 = dyn_cast<Instruction>(L1->getPointerOperand());
+ if (A0 && A1 && A0->isIdenticalTo(A1) && isSafeToHoist(A0) &&
+ A0->hasOneUse() && (A0->getParent() == L0->getParent()) &&
+ A1->hasOneUse() && (A1->getParent() == L1->getParent()) &&
+ isa<GetElementPtrInst>(A0)) {
+ DEBUG(dbgs() << "Hoist Instruction into BB \n"; BB->dump();
+ dbgs() << "Instruction Left\n"; L0->dump(); dbgs() << "\n";
+ dbgs() << "Instruction Right\n"; L1->dump(); dbgs() << "\n");
+ hoistInstruction(BB, A0, A1);
+ hoistInstruction(BB, L0, L1);
+ return true;
+ } else
+ return false;
+}
+
+///
+/// \brief Try to hoist two loads to same address into diamond header
+///
+/// Starting from a diamond head block, iterate over the instructions in one
+/// successor block and try to match a load in the second successor.
+///
+bool MergedLoadStoreMotion::mergeLoads(BasicBlock *BB) {
+ bool MergedLoads = false;
+ assert(isDiamondHead(BB));
+ BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
+ BasicBlock *Succ0 = BI->getSuccessor(0);
+ BasicBlock *Succ1 = BI->getSuccessor(1);
+ // #Instructions in Succ1 for Compile Time Control
+ int Size1 = Succ1->size();
+ int NLoads = 0;
+ for (BasicBlock::iterator BBI = Succ0->begin(), BBE = Succ0->end();
+ BBI != BBE;) {
+
+ Instruction *I = BBI;
+ ++BBI;
+ if (isLoadHoistBarrier(I))
+ break;
+
+ // Only move non-simple (atomic, volatile) loads.
+ if (!isa<LoadInst>(I))
+ continue;
+
+ LoadInst *L0 = (LoadInst *)I;
+ if (!L0->isSimple())
+ continue;
+
+ ++NLoads;
+ if (NLoads * Size1 >= MagicCompileTimeControl)
+ break;
+ if (LoadInst *L1 = canHoistFromBlock(Succ1, L0)) {
+ bool Res = hoistLoad(BB, L0, L1);
+ MergedLoads |= Res;
+ // Don't attempt to hoist above loads that had not been hoisted.
+ if (!Res)
+ break;
+ }
+ }
+ return MergedLoads;
+}
+
+///
+/// \brief True when instruction is sink barrier for a store
+///
+bool MergedLoadStoreMotion::isStoreSinkBarrier(Instruction *Inst) {
+ // FIXME: Conservatively let a load instruction block the store.
+ // Use alias analysis instead.
+ if (isa<LoadInst>(Inst))
+ return true;
+ if (isa<CallInst>(Inst))
+ return true;
+ if (isa<TerminatorInst>(Inst) && !isa<BranchInst>(Inst))
+ return true;
+ // Note: mayHaveSideEffects covers all instructions that could
+ // trigger a change to state. Eg. in-flight stores have to be executed
+ // before ordered loads or fences, calls could invoke functions that store
+ // data to memory etc.
+ if (!isa<StoreInst>(Inst) && Inst->mayHaveSideEffects()) {
+ return true;
+ }
+ DEBUG(dbgs() << "No Sink Barrier\n");
+ return false;
+}
+
+///
+/// \brief Check if \p BB contains a store to the same address as \p SI
+///
+/// \return The store in \p when it is safe to sink. Otherwise return Null.
+///
+StoreInst *MergedLoadStoreMotion::canSinkFromBlock(BasicBlock *BB,
+ StoreInst *SI) {
+ StoreInst *I = 0;
+ DEBUG(dbgs() << "can Sink? : "; SI->dump(); dbgs() << "\n");
+ for (BasicBlock::reverse_iterator RBI = BB->rbegin(), RBE = BB->rend();
+ RBI != RBE; ++RBI) {
+ Instruction *Inst = &*RBI;
+
+ // Only move loads if they are used in the block.
+ if (isStoreSinkBarrier(Inst))
+ break;
+ if (isa<StoreInst>(Inst)) {
+ AliasAnalysis::Location LocSI = AA->getLocation(SI);
+ AliasAnalysis::Location LocInst = AA->getLocation((StoreInst *)Inst);
+ if (AA->isMustAlias(LocSI, LocInst)) {
+ I = (StoreInst *)Inst;
+ break;
+ }
+ }
+ }
+ return I;
+}
+
+///
+/// \brief Create a PHI node in BB for the operands of S0 and S1
+///
+PHINode *MergedLoadStoreMotion::getPHIOperand(BasicBlock *BB, StoreInst *S0,
+ StoreInst *S1) {
+ // Create a phi if the values mismatch.
+ PHINode *NewPN = 0;
+ Value *Opd1 = S0->getValueOperand();
+ Value *Opd2 = S1->getValueOperand();
+ if (Opd1 != Opd2) {
+ NewPN = PHINode::Create(Opd1->getType(), 2, Opd2->getName() + ".sink",
+ BB->begin());
+ NewPN->addIncoming(Opd1, S0->getParent());
+ NewPN->addIncoming(Opd2, S1->getParent());
+ if (NewPN->getType()->getScalarType()->isPointerTy()) {
+ // Notify AA of the new value.
+ AA->copyValue(Opd1, NewPN);
+ AA->copyValue(Opd2, NewPN);
+ // AA needs to be informed when a PHI-use of the pointer value is added
+ for (unsigned I = 0, E = NewPN->getNumIncomingValues(); I != E; ++I) {
+ unsigned J = PHINode::getOperandNumForIncomingValue(I);
+ AA->addEscapingUse(NewPN->getOperandUse(J));
+ }
+ if (MD)
+ MD->invalidateCachedPointerInfo(NewPN);
+ }
+ }
+ return NewPN;
+}
+
+///
+/// \brief Merge two stores to same address and sink into \p BB
+///
+/// Also sinks GEP instruction computing the store address
+///
+bool MergedLoadStoreMotion::sinkStore(BasicBlock *BB, StoreInst *S0,
+ StoreInst *S1) {
+ // Only one definition?
+ Instruction *A0 = dyn_cast<Instruction>(S0->getPointerOperand());
+ Instruction *A1 = dyn_cast<Instruction>(S1->getPointerOperand());
+ if (A0 && A1 && A0->isIdenticalTo(A1) && A0->hasOneUse() &&
+ (A0->getParent() == S0->getParent()) && A1->hasOneUse() &&
+ (A1->getParent() == S1->getParent()) && isa<GetElementPtrInst>(A0)) {
+ DEBUG(dbgs() << "Sink Instruction into BB \n"; BB->dump();
+ dbgs() << "Instruction Left\n"; S0->dump(); dbgs() << "\n";
+ dbgs() << "Instruction Right\n"; S1->dump(); dbgs() << "\n");
+ // Hoist the instruction.
+ BasicBlock::iterator InsertPt = BB->getFirstInsertionPt();
+ // Intersect optional metadata.
+ S0->intersectOptionalDataWith(S1);
+ S0->dropUnknownMetadata();
+
+ // Create the new store to be inserted at the join point.
+ StoreInst *SNew = (StoreInst *)(S0->clone());
+ Instruction *ANew = A0->clone();
+ AA->copyValue(S0, SNew);
+ SNew->insertBefore(InsertPt);
+ ANew->insertBefore(SNew);
+
+ assert(S0->getParent() == A0->getParent());
+ assert(S1->getParent() == A1->getParent());
+
+ PHINode *NewPN = getPHIOperand(BB, S0, S1);
+ // New PHI operand? Use it.
+ if (NewPN)
+ SNew->setOperand(0, NewPN);
+ removeInstruction(S0);
+ removeInstruction(S1);
+ A0->replaceAllUsesWith(ANew);
+ removeInstruction(A0);
+ A1->replaceAllUsesWith(ANew);
+ removeInstruction(A1);
+ return true;
+ }
+ return false;
+}
+
+///
+/// \brief True when two stores are equivalent and can sink into the footer
+///
+/// Starting from a diamond tail block, iterate over the instructions in one
+/// predecessor block and try to match a store in the second predecessor.
+///
+bool MergedLoadStoreMotion::mergeStores(BasicBlock *T) {
+
+ bool MergedStores = false;
+ assert(T && "Footer of a diamond cannot be empty");
+
+ pred_iterator PI = pred_begin(T), E = pred_end(T);
+ assert(PI != E);
+ BasicBlock *Pred0 = *PI;
+ ++PI;
+ BasicBlock *Pred1 = *PI;
+ ++PI;
+ // tail block of a diamond/hammock?
+ if (Pred0 == Pred1)
+ return false; // No.
+ if (PI != E)
+ return false; // No. More than 2 predecessors.
+
+ // #Instructions in Succ1 for Compile Time Control
+ int Size1 = Pred1->size();
+ int NStores = 0;
+
+ for (BasicBlock::reverse_iterator RBI = Pred0->rbegin(), RBE = Pred0->rend();
+ RBI != RBE;) {
+
+ Instruction *I = &*RBI;
+ ++RBI;
+ if (isStoreSinkBarrier(I))
+ break;
+ // Sink move non-simple (atomic, volatile) stores
+ if (!isa<StoreInst>(I))
+ continue;
+ StoreInst *S0 = (StoreInst *)I;
+ if (!S0->isSimple())
+ continue;
+
+ ++NStores;
+ if (NStores * Size1 >= MagicCompileTimeControl)
+ break;
+ if (StoreInst *S1 = canSinkFromBlock(Pred1, S0)) {
+ bool Res = sinkStore(T, S0, S1);
+ MergedStores |= Res;
+ // Don't attempt to sink below stores that had to stick around
+ // But after removal of a store and some of its feeding
+ // instruction search again from the beginning since the iterator
+ // is likely stale at this point.
+ if (!Res)
+ break;
+ else {
+ RBI = Pred0->rbegin();
+ RBE = Pred0->rend();
+ DEBUG(dbgs() << "Search again\n"; Instruction *I = &*RBI; I->dump());
+ }
+ }
+ }
+ return MergedStores;
+}
+///
+/// \brief Run the transformation for each function
+///
+bool MergedLoadStoreMotion::runOnFunction(Function &F) {
+ MD = &getAnalysis<MemoryDependenceAnalysis>();
+ AA = &getAnalysis<AliasAnalysis>();
+
+ bool Changed = false;
+ if (!EnableMLSM)
+ return false;
+ DEBUG(dbgs() << "Instruction Merger\n");
+
+ // Merge unconditional branches, allowing PRE to catch more
+ // optimization opportunities.
+ for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE;) {
+ BasicBlock *BB = FI++;
+
+ // Hoist equivalent loads and sink stores
+ // outside diamonds when possible
+ // Run outside core GVN
+ if (isDiamondHead(BB)) {
+ Changed |= mergeLoads(BB);
+ Changed |= mergeStores(getDiamondTail(BB));
+ }
+ }
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
index 15cee44e13dd..7cce89e0627e 100644
--- a/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
@@ -13,7 +13,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "partially-inline-libcalls"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Intrinsics.h"
@@ -25,6 +24,8 @@
using namespace llvm;
+#define DEBUG_TYPE "partially-inline-libcalls"
+
namespace {
class PartiallyInlineLibCalls : public FunctionPass {
public:
@@ -35,8 +36,8 @@ namespace {
initializePartiallyInlineLibCallsPass(*PassRegistry::getPassRegistry());
}
- virtual void getAnalysisUsage(AnalysisUsage &AU) const;
- virtual bool runOnFunction(Function &F);
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnFunction(Function &F) override;
private:
/// Optimize calls to sqrt.
diff --git a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
index 328a9c5755b3..ea2cf7cf9b5f 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -20,29 +20,29 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "reassociate"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Assembly/Writer.h"
+#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
using namespace llvm;
+#define DEBUG_TYPE "reassociate"
+
STATISTIC(NumChanged, "Number of insts reassociated");
STATISTIC(NumAnnihil, "Number of expr tree annihilated");
STATISTIC(NumFactor , "Number of multiplies factored");
@@ -67,7 +67,7 @@ static void PrintOps(Instruction *I, const SmallVectorImpl<ValueEntry> &Ops) {
<< *Ops[0].Op->getType() << '\t';
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
dbgs() << "[ ";
- WriteAsOperand(dbgs(), Ops[i].Op, false, M);
+ Ops[i].Op->printAsOperand(dbgs(), false, M);
dbgs() << ", #" << Ops[i].Rank << "] ";
}
}
@@ -123,14 +123,14 @@ namespace {
public:
XorOpnd(Value *V);
- bool isInvalid() const { return SymbolicPart == 0; }
+ bool isInvalid() const { return SymbolicPart == nullptr; }
bool isOrExpr() const { return isOr; }
Value *getValue() const { return OrigVal; }
Value *getSymbolicPart() const { return SymbolicPart; }
unsigned getSymbolicRank() const { return SymbolicRank; }
const APInt &getConstPart() const { return ConstPart; }
- void Invalidate() { SymbolicPart = OrigVal = 0; }
+ void Invalidate() { SymbolicPart = OrigVal = nullptr; }
void setSymbolicRank(unsigned R) { SymbolicRank = R; }
// Sort the XorOpnd-Pointer in ascending order of symbolic-value-rank.
@@ -168,9 +168,9 @@ namespace {
initializeReassociatePass(*PassRegistry::getPassRegistry());
}
- bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
}
private:
@@ -237,7 +237,7 @@ static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
if (V->hasOneUse() && isa<Instruction>(V) &&
cast<Instruction>(V)->getOpcode() == Opcode)
return cast<BinaryOperator>(V);
- return 0;
+ return nullptr;
}
static bool isUnmovableInstruction(Instruction *I) {
@@ -285,7 +285,7 @@ void Reassociate::BuildRankMap(Function &F) {
unsigned Reassociate::getRank(Value *V) {
Instruction *I = dyn_cast<Instruction>(V);
- if (I == 0) {
+ if (!I) {
if (isa<Argument>(V)) return ValueRankMap[V]; // Function argument.
return 0; // Otherwise it's a global or constant, rank 0.
}
@@ -706,7 +706,7 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
// ExpressionChanged - Non-null if the rewritten expression differs from the
// original in some non-trivial way, requiring the clearing of optional flags.
// Flags are cleared from the operator in ExpressionChanged up to I inclusive.
- BinaryOperator *ExpressionChanged = 0;
+ BinaryOperator *ExpressionChanged = nullptr;
for (unsigned i = 0; ; ++i) {
// The last operation (which comes earliest in the IR) is special as both
// operands will come from Ops, rather than just one with the other being
@@ -821,7 +821,7 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
if (ExpressionChanged == I)
break;
ExpressionChanged->moveBefore(I);
- ExpressionChanged = cast<BinaryOperator>(*ExpressionChanged->use_begin());
+ ExpressionChanged = cast<BinaryOperator>(*ExpressionChanged->user_begin());
} while (1);
// Throw away any left over nodes from the original expression.
@@ -863,8 +863,7 @@ static Value *NegateValue(Value *V, Instruction *BI) {
// Okay, we need to materialize a negated version of V with an instruction.
// Scan the use lists of V to see if we have one already.
- for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
- User *U = *UI;
+ for (User *U : V->users()) {
if (!BinaryOperator::isNeg(U)) continue;
// We found one! Now we have to make sure that the definition dominates
@@ -914,8 +913,8 @@ static bool ShouldBreakUpSubtract(Instruction *Sub) {
isReassociableOp(Sub->getOperand(1), Instruction::Sub))
return true;
if (Sub->hasOneUse() &&
- (isReassociableOp(Sub->use_back(), Instruction::Add) ||
- isReassociableOp(Sub->use_back(), Instruction::Sub)))
+ (isReassociableOp(Sub->user_back(), Instruction::Add) ||
+ isReassociableOp(Sub->user_back(), Instruction::Sub)))
return true;
return false;
@@ -997,7 +996,7 @@ static Value *EmitAddTreeOfValues(Instruction *I,
/// remove Factor from the tree and return the new tree.
Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) {
BinaryOperator *BO = isReassociableOp(V, Instruction::Mul);
- if (!BO) return 0;
+ if (!BO) return nullptr;
SmallVector<RepeatedValue, 8> Tree;
MadeChange |= LinearizeExprTree(BO, Tree);
@@ -1031,7 +1030,7 @@ Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) {
if (!FoundFactor) {
// Make sure to restore the operands to the expression tree.
RewriteExprTree(BO, Factors);
- return 0;
+ return nullptr;
}
BasicBlock::iterator InsertPt = BO; ++InsertPt;
@@ -1116,7 +1115,7 @@ static Value *OptimizeAndOrXor(unsigned Opcode,
++NumAnnihil;
}
}
- return 0;
+ return nullptr;
}
/// Helper funciton of CombineXorOpnd(). It creates a bitwise-and
@@ -1137,7 +1136,7 @@ static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd,
}
return Opnd;
}
- return 0;
+ return nullptr;
}
// Helper function of OptimizeXor(). It tries to simplify "Opnd1 ^ ConstOpnd"
@@ -1263,7 +1262,7 @@ Value *Reassociate::OptimizeXor(Instruction *I,
return V;
if (Ops.size() == 1)
- return 0;
+ return nullptr;
SmallVector<XorOpnd, 8> Opnds;
SmallVector<XorOpnd*, 8> OpndPtrs;
@@ -1293,10 +1292,10 @@ Value *Reassociate::OptimizeXor(Instruction *I,
// the same symbolic value cluster together. For instance, the input operand
// sequence ("x | 123", "y & 456", "x & 789") will be sorted into:
// ("x | 123", "x & 789", "y & 456").
- std::sort(OpndPtrs.begin(), OpndPtrs.end(), XorOpnd::PtrSortFunctor());
+ std::stable_sort(OpndPtrs.begin(), OpndPtrs.end(), XorOpnd::PtrSortFunctor());
// Step 3: Combine adjacent operands
- XorOpnd *PrevOpnd = 0;
+ XorOpnd *PrevOpnd = nullptr;
bool Changed = false;
for (unsigned i = 0, e = Opnds.size(); i < e; i++) {
XorOpnd *CurrOpnd = OpndPtrs[i];
@@ -1330,7 +1329,7 @@ Value *Reassociate::OptimizeXor(Instruction *I,
PrevOpnd = CurrOpnd;
} else {
CurrOpnd->Invalidate();
- PrevOpnd = 0;
+ PrevOpnd = nullptr;
}
Changed = true;
}
@@ -1360,7 +1359,7 @@ Value *Reassociate::OptimizeXor(Instruction *I,
}
}
- return 0;
+ return nullptr;
}
/// OptimizeAdd - Optimize a series of operands to an 'add' instruction. This
@@ -1369,11 +1368,10 @@ Value *Reassociate::OptimizeXor(Instruction *I,
Value *Reassociate::OptimizeAdd(Instruction *I,
SmallVectorImpl<ValueEntry> &Ops) {
// Scan the operand lists looking for X and -X pairs. If we find any, we
- // can simplify the expression. X+-X == 0. While we're at it, scan for any
+ // can simplify expressions like X+-X == 0 and X+~X ==-1. While we're at it,
+ // scan for any
// duplicates. We want to canonicalize Y+Y+Y+Z -> 3*Y+Z.
- //
- // TODO: We could handle "X + ~X" -> "-1" if we wanted, since "-X = ~X+1".
- //
+
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
Value *TheOp = Ops[i].Op;
// Check to see if we've seen this operand before. If so, we factor all
@@ -1413,19 +1411,28 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
continue;
}
- // Check for X and -X in the operand list.
- if (!BinaryOperator::isNeg(TheOp))
+ // Check for X and -X or X and ~X in the operand list.
+ if (!BinaryOperator::isNeg(TheOp) && !BinaryOperator::isNot(TheOp))
continue;
- Value *X = BinaryOperator::getNegArgument(TheOp);
+ Value *X = nullptr;
+ if (BinaryOperator::isNeg(TheOp))
+ X = BinaryOperator::getNegArgument(TheOp);
+ else if (BinaryOperator::isNot(TheOp))
+ X = BinaryOperator::getNotArgument(TheOp);
+
unsigned FoundX = FindInOperandList(Ops, i, X);
if (FoundX == i)
continue;
// Remove X and -X from the operand list.
- if (Ops.size() == 2)
+ if (Ops.size() == 2 && BinaryOperator::isNeg(TheOp))
return Constant::getNullValue(X->getType());
+ // Remove X and ~X from the operand list.
+ if (Ops.size() == 2 && BinaryOperator::isNot(TheOp))
+ return Constant::getAllOnesValue(X->getType());
+
Ops.erase(Ops.begin()+i);
if (i < FoundX)
--FoundX;
@@ -1435,6 +1442,13 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
++NumAnnihil;
--i; // Revisit element.
e -= 2; // Removed two elements.
+
+ // if X and ~X we append -1 to the operand list.
+ if (BinaryOperator::isNot(TheOp)) {
+ Value *V = Constant::getAllOnesValue(X->getType());
+ Ops.insert(Ops.end(), ValueEntry(getRank(V), V));
+ e += 1;
+ }
}
// Scan the operand list, checking to see if there are any common factors
@@ -1447,7 +1461,7 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
// Keep track of each multiply we see, to avoid triggering on (X*4)+(X*4)
// where they are actually the same multiply.
unsigned MaxOcc = 0;
- Value *MaxOccVal = 0;
+ Value *MaxOccVal = nullptr;
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
BinaryOperator *BOp = isReassociableOp(Ops[i].Op, Instruction::Mul);
if (!BOp)
@@ -1545,20 +1559,7 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
Ops.insert(Ops.begin(), ValueEntry(getRank(V2), V2));
}
- return 0;
-}
-
-namespace {
- /// \brief Predicate tests whether a ValueEntry's op is in a map.
- struct IsValueInMap {
- const DenseMap<Value *, unsigned> &Map;
-
- IsValueInMap(const DenseMap<Value *, unsigned> &Map) : Map(Map) {}
-
- bool operator()(const ValueEntry &Entry) {
- return Map.find(Entry.Op) != Map.end();
- }
- };
+ return nullptr;
}
/// \brief Build up a vector of value/power pairs factoring a product.
@@ -1619,7 +1620,7 @@ bool Reassociate::collectMultiplyFactors(SmallVectorImpl<ValueEntry> &Ops,
// below our mininum of '4'.
assert(FactorPowerSum >= 4);
- std::sort(Factors.begin(), Factors.end(), Factor::PowerDescendingSorter());
+ std::stable_sort(Factors.begin(), Factors.end(), Factor::PowerDescendingSorter());
return true;
}
@@ -1703,14 +1704,14 @@ Value *Reassociate::OptimizeMul(BinaryOperator *I,
// We can only optimize the multiplies when there is a chain of more than
// three, such that a balanced tree might require fewer total multiplies.
if (Ops.size() < 4)
- return 0;
+ return nullptr;
// Try to turn linear trees of multiplies without other uses of the
// intermediate stages into minimal multiply DAGs with perfect sub-expression
// re-use.
SmallVector<Factor, 4> Factors;
if (!collectMultiplyFactors(Ops, Factors))
- return 0; // All distinct factors, so nothing left for us to do.
+ return nullptr; // All distinct factors, so nothing left for us to do.
IRBuilder<> Builder(I);
Value *V = buildMinimalMultiplyDAG(Builder, Factors);
@@ -1719,14 +1720,14 @@ Value *Reassociate::OptimizeMul(BinaryOperator *I,
ValueEntry NewEntry = ValueEntry(getRank(V), V);
Ops.insert(std::lower_bound(Ops.begin(), Ops.end(), NewEntry), NewEntry);
- return 0;
+ return nullptr;
}
Value *Reassociate::OptimizeExpression(BinaryOperator *I,
SmallVectorImpl<ValueEntry> &Ops) {
// Now that we have the linearized expression tree, try to optimize it.
// Start by folding any constants that we found.
- Constant *Cst = 0;
+ Constant *Cst = nullptr;
unsigned Opcode = I->getOpcode();
while (!Ops.empty() && isa<Constant>(Ops.back().Op)) {
Constant *C = cast<Constant>(Ops.pop_back_val().Op);
@@ -1776,7 +1777,7 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,
if (Ops.size() != NumOps)
return OptimizeExpression(I, Ops);
- return 0;
+ return nullptr;
}
/// EraseInst - Zap the given instruction, adding interesting operands to the
@@ -1795,9 +1796,9 @@ void Reassociate::EraseInst(Instruction *I) {
// If this is a node in an expression tree, climb to the expression root
// and add that since that's where optimization actually happens.
unsigned Opcode = Op->getOpcode();
- while (Op->hasOneUse() && Op->use_back()->getOpcode() == Opcode &&
+ while (Op->hasOneUse() && Op->user_back()->getOpcode() == Opcode &&
Visited.insert(Op))
- Op = Op->use_back();
+ Op = Op->user_back();
RedoInsts.insert(Op);
}
}
@@ -1815,8 +1816,8 @@ void Reassociate::OptimizeInst(Instruction *I) {
// is used by a reassociable multiply or add, turn into a multiply.
if (isReassociableOp(I->getOperand(0), Instruction::Mul) ||
(I->hasOneUse() &&
- (isReassociableOp(I->use_back(), Instruction::Mul) ||
- isReassociableOp(I->use_back(), Instruction::Add)))) {
+ (isReassociableOp(I->user_back(), Instruction::Mul) ||
+ isReassociableOp(I->user_back(), Instruction::Add)))) {
Instruction *NI = ConvertShiftToMul(I);
RedoInsts.insert(I);
MadeChange = true;
@@ -1869,7 +1870,7 @@ void Reassociate::OptimizeInst(Instruction *I) {
// and if this is not an inner node of a multiply tree.
if (isReassociableOp(I->getOperand(1), Instruction::Mul) &&
(!I->hasOneUse() ||
- !isReassociableOp(I->use_back(), Instruction::Mul))) {
+ !isReassociableOp(I->user_back(), Instruction::Mul))) {
Instruction *NI = LowerNegateToMultiply(I);
RedoInsts.insert(I);
MadeChange = true;
@@ -1885,13 +1886,13 @@ void Reassociate::OptimizeInst(Instruction *I) {
// If this is an interior node of a reassociable tree, ignore it until we
// get to the root of the tree, to avoid N^2 analysis.
unsigned Opcode = BO->getOpcode();
- if (BO->hasOneUse() && BO->use_back()->getOpcode() == Opcode)
+ if (BO->hasOneUse() && BO->user_back()->getOpcode() == Opcode)
return;
// If this is an add tree that is used by a sub instruction, ignore it
// until we process the subtract.
if (BO->hasOneUse() && BO->getOpcode() == Instruction::Add &&
- cast<Instruction>(BO->use_back())->getOpcode() == Instruction::Sub)
+ cast<Instruction>(BO->user_back())->getOpcode() == Instruction::Sub)
return;
ReassociateExpression(BO);
@@ -1943,7 +1944,7 @@ void Reassociate::ReassociateExpression(BinaryOperator *I) {
// In this case we reassociate to put the negation on the outside so that we
// can fold the negation into the add: (-X)*Y + Z -> Z-X*Y
if (I->getOpcode() == Instruction::Mul && I->hasOneUse() &&
- cast<Instruction>(I->use_back())->getOpcode() == Instruction::Add &&
+ cast<Instruction>(I->user_back())->getOpcode() == Instruction::Add &&
isa<ConstantInt>(Ops.back().Op) &&
cast<ConstantInt>(Ops.back().Op)->isAllOnesValue()) {
ValueEntry Tmp = Ops.pop_back_val();
@@ -1972,6 +1973,9 @@ void Reassociate::ReassociateExpression(BinaryOperator *I) {
}
bool Reassociate::runOnFunction(Function &F) {
+ if (skipOptnoneFunction(F))
+ return false;
+
// Calculate the rank map for F
BuildRankMap(F);
diff --git a/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp b/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
index 07f540a30127..b6023e2ce789 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -16,20 +16,21 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "reg2mem"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
#include "llvm/Transforms/Utils/Local.h"
#include <list>
using namespace llvm;
+#define DEBUG_TYPE "reg2mem"
+
STATISTIC(NumRegsDemoted, "Number of registers demoted");
STATISTIC(NumPhisDemoted, "Number of phi-nodes demoted");
@@ -40,23 +41,22 @@ namespace {
initializeRegToMemPass(*PassRegistry::getPassRegistry());
}
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequiredID(BreakCriticalEdgesID);
AU.addPreservedID(BreakCriticalEdgesID);
}
- bool valueEscapes(const Instruction *Inst) const {
- const BasicBlock *BB = Inst->getParent();
- for (Value::const_use_iterator UI = Inst->use_begin(),E = Inst->use_end();
- UI != E; ++UI) {
- const Instruction *I = cast<Instruction>(*UI);
- if (I->getParent() != BB || isa<PHINode>(I))
+ bool valueEscapes(const Instruction *Inst) const {
+ const BasicBlock *BB = Inst->getParent();
+ for (const User *U : Inst->users()) {
+ const Instruction *UI = cast<Instruction>(U);
+ if (UI->getParent() != BB || isa<PHINode>(UI))
return true;
}
return false;
}
- virtual bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
};
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
index 43647207c2cc..90c3520c8323 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -17,7 +17,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "sccp"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
@@ -26,13 +25,13 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/InstVisitor.h"
#include "llvm/Pass.h"
-#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -42,6 +41,8 @@
#include <algorithm>
using namespace llvm;
+#define DEBUG_TYPE "sccp"
+
STATISTIC(NumInstRemoved, "Number of instructions removed");
STATISTIC(NumDeadBlocks , "Number of basic blocks unreachable");
@@ -81,7 +82,7 @@ class LatticeVal {
}
public:
- LatticeVal() : Val(0, undefined) {}
+ LatticeVal() : Val(nullptr, undefined) {}
bool isUndefined() const { return getLatticeValue() == undefined; }
bool isConstant() const {
@@ -133,7 +134,7 @@ public:
ConstantInt *getConstantInt() const {
if (isConstant())
return dyn_cast<ConstantInt>(getConstant());
- return 0;
+ return nullptr;
}
void markForcedConstant(Constant *V) {
@@ -153,7 +154,7 @@ namespace {
/// Constant Propagation.
///
class SCCPSolver : public InstVisitor<SCCPSolver> {
- const DataLayout *TD;
+ const DataLayout *DL;
const TargetLibraryInfo *TLI;
SmallPtrSet<BasicBlock*, 8> BBExecutable; // The BBs that are executable.
DenseMap<Value*, LatticeVal> ValueState; // The state each value is in.
@@ -205,8 +206,8 @@ class SCCPSolver : public InstVisitor<SCCPSolver> {
typedef std::pair<BasicBlock*, BasicBlock*> Edge;
DenseSet<Edge> KnownFeasibleEdges;
public:
- SCCPSolver(const DataLayout *td, const TargetLibraryInfo *tli)
- : TD(td), TLI(tli) {}
+ SCCPSolver(const DataLayout *DL, const TargetLibraryInfo *tli)
+ : DL(DL), TLI(tli) {}
/// MarkBlockExecutable - This method can be used by clients to mark all of
/// the blocks that are known to be intrinsically live in the processed unit.
@@ -403,7 +404,7 @@ private:
if (Constant *C = dyn_cast<Constant>(V)) {
Constant *Elt = C->getAggregateElement(i);
- if (Elt == 0)
+ if (!Elt)
LV.markOverdefined(); // Unknown sort of constant.
else if (isa<UndefValue>(Elt))
; // Undef values remain undefined.
@@ -491,10 +492,11 @@ private:
}
void visitCallSite (CallSite CS);
void visitResumeInst (TerminatorInst &I) { /*returns void*/ }
- void visitUnwindInst (TerminatorInst &I) { /*returns void*/ }
void visitUnreachableInst(TerminatorInst &I) { /*returns void*/ }
void visitFenceInst (FenceInst &I) { /*returns void*/ }
- void visitAtomicCmpXchgInst (AtomicCmpXchgInst &I) { markOverdefined(&I); }
+ void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) {
+ markAnythingOverdefined(&I);
+ }
void visitAtomicRMWInst (AtomicRMWInst &I) { markOverdefined(&I); }
void visitAllocaInst (Instruction &I) { markOverdefined(&I); }
void visitVAArgInst (Instruction &I) { markAnythingOverdefined(&I); }
@@ -523,7 +525,7 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,
LatticeVal BCValue = getValueState(BI->getCondition());
ConstantInt *CI = BCValue.getConstantInt();
- if (CI == 0) {
+ if (!CI) {
// Overdefined condition variables, and branches on unfoldable constant
// conditions, mean the branch could go either way.
if (!BCValue.isUndefined())
@@ -550,7 +552,7 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,
LatticeVal SCValue = getValueState(SI->getCondition());
ConstantInt *CI = SCValue.getConstantInt();
- if (CI == 0) { // Overdefined or undefined condition?
+ if (!CI) { // Overdefined or undefined condition?
// All destinations are executable!
if (!SCValue.isUndefined())
Succs.assign(TI.getNumSuccessors(), true);
@@ -595,7 +597,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
// Overdefined condition variables mean the branch could go either way,
// undef conditions mean that neither edge is feasible yet.
ConstantInt *CI = BCValue.getConstantInt();
- if (CI == 0)
+ if (!CI)
return !BCValue.isUndefined();
// Constant condition variables mean the branch can only go a single way.
@@ -613,7 +615,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
LatticeVal SCValue = getValueState(SI->getCondition());
ConstantInt *CI = SCValue.getConstantInt();
- if (CI == 0)
+ if (!CI)
return !SCValue.isUndefined();
return SI->findCaseValue(CI).getCaseSuccessor() == To;
@@ -627,7 +629,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
#ifndef NDEBUG
dbgs() << "Unknown terminator instruction: " << *TI << '\n';
#endif
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
// visit Implementations - Something changed in this instruction, either an
@@ -668,7 +670,7 @@ void SCCPSolver::visitPHINode(PHINode &PN) {
// constant. If they are constant and don't agree, the PHI is overdefined.
// If there are no executable operands, the PHI remains undefined.
//
- Constant *OperandVal = 0;
+ Constant *OperandVal = nullptr;
for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
LatticeVal IV = getValueState(PN.getIncomingValue(i));
if (IV.isUndefined()) continue; // Doesn't influence PHI node.
@@ -679,7 +681,7 @@ void SCCPSolver::visitPHINode(PHINode &PN) {
if (IV.isOverdefined()) // PHI node becomes overdefined!
return markOverdefined(&PN);
- if (OperandVal == 0) { // Grab the first value.
+ if (!OperandVal) { // Grab the first value.
OperandVal = IV.getConstant();
continue;
}
@@ -775,7 +777,7 @@ void SCCPSolver::visitExtractValueInst(ExtractValueInst &EVI) {
void SCCPSolver::visitInsertValueInst(InsertValueInst &IVI) {
StructType *STy = dyn_cast<StructType>(IVI.getType());
- if (STy == 0)
+ if (!STy)
return markOverdefined(&IVI);
// If this has more than one index, we can't handle it, drive all results to
@@ -863,7 +865,7 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) {
// If this is an AND or OR with 0 or -1, it doesn't matter that the other
// operand is overdefined.
if (I.getOpcode() == Instruction::And || I.getOpcode() == Instruction::Or) {
- LatticeVal *NonOverdefVal = 0;
+ LatticeVal *NonOverdefVal = nullptr;
if (!V1State.isOverdefined())
NonOverdefVal = &V1State;
else if (!V2State.isOverdefined())
@@ -1067,7 +1069,7 @@ void SCCPSolver::visitLoadInst(LoadInst &I) {
}
// Transform load from a constant into a constant if possible.
- if (Constant *C = ConstantFoldLoadFromConstPtr(Ptr, TD))
+ if (Constant *C = ConstantFoldLoadFromConstPtr(Ptr, DL))
return markConstant(IV, &I, C);
// Otherwise we cannot say for certain what value this load will produce.
@@ -1082,7 +1084,7 @@ void SCCPSolver::visitCallSite(CallSite CS) {
// The common case is that we aren't tracking the callee, either because we
// are not doing interprocedural analysis or the callee is indirect, or is
// external. Handle these cases first.
- if (F == 0 || F->isDeclaration()) {
+ if (!F || F->isDeclaration()) {
CallOverdefined:
// Void return and not tracking callee, just bail.
if (I->getType()->isVoidTy()) return;
@@ -1181,10 +1183,9 @@ void SCCPSolver::Solve() {
// since all of its users will have already been marked as overdefined
// Update all of the users of this instruction's value.
//
- for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
- UI != E; ++UI)
- if (Instruction *I = dyn_cast<Instruction>(*UI))
- OperandChangedState(I);
+ for (User *U : I->users())
+ if (Instruction *UI = dyn_cast<Instruction>(U))
+ OperandChangedState(UI);
}
// Process the instruction work list.
@@ -1201,10 +1202,9 @@ void SCCPSolver::Solve() {
// Update all of the users of this instruction's value.
//
if (I->getType()->isStructTy() || !getValueState(I).isOverdefined())
- for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
- UI != E; ++UI)
- if (Instruction *I = dyn_cast<Instruction>(*UI))
- OperandChangedState(I);
+ for (User *U : I->users())
+ if (Instruction *UI = dyn_cast<Instruction>(U))
+ OperandChangedState(UI);
}
// Process the basic block work list.
@@ -1499,7 +1499,7 @@ namespace {
/// Sparse Conditional Constant Propagator.
///
struct SCCP : public FunctionPass {
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetLibraryInfo>();
}
static char ID; // Pass identification, replacement for typeid
@@ -1510,7 +1510,7 @@ namespace {
// runOnFunction - Run the Sparse Conditional Constant Propagation
// algorithm, and return true if the function was modified.
//
- bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
};
} // end anonymous namespace
@@ -1553,10 +1553,14 @@ static void DeleteInstructionInBlock(BasicBlock *BB) {
// and return true if the function was modified.
//
bool SCCP::runOnFunction(Function &F) {
+ if (skipOptnoneFunction(F))
+ return false;
+
DEBUG(dbgs() << "SCCP on function '" << F.getName() << "'\n");
- const DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
+ const DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
- SCCPSolver Solver(TD, TLI);
+ SCCPSolver Solver(DL, TLI);
// Mark the first block of the function as being executable.
Solver.MarkBlockExecutable(F.begin());
@@ -1628,14 +1632,14 @@ namespace {
/// Constant Propagation.
///
struct IPSCCP : public ModulePass {
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetLibraryInfo>();
}
static char ID;
IPSCCP() : ModulePass(ID) {
initializeIPSCCPPass(*PassRegistry::getPassRegistry());
}
- bool runOnModule(Module &M);
+ bool runOnModule(Module &M) override;
};
} // end anonymous namespace
@@ -1658,21 +1662,20 @@ static bool AddressIsTaken(const GlobalValue *GV) {
// Delete any dead constantexpr klingons.
GV->removeDeadConstantUsers();
- for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end();
- UI != E; ++UI) {
- const User *U = *UI;
- if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ for (const Use &U : GV->uses()) {
+ const User *UR = U.getUser();
+ if (const StoreInst *SI = dyn_cast<StoreInst>(UR)) {
if (SI->getOperand(0) == GV || SI->isVolatile())
return true; // Storing addr of GV.
- } else if (isa<InvokeInst>(U) || isa<CallInst>(U)) {
+ } else if (isa<InvokeInst>(UR) || isa<CallInst>(UR)) {
// Make sure we are calling the function, not passing the address.
- ImmutableCallSite CS(cast<Instruction>(U));
- if (!CS.isCallee(UI))
+ ImmutableCallSite CS(cast<Instruction>(UR));
+ if (!CS.isCallee(&U))
return true;
- } else if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ } else if (const LoadInst *LI = dyn_cast<LoadInst>(UR)) {
if (LI->isVolatile())
return true;
- } else if (isa<BlockAddress>(U)) {
+ } else if (isa<BlockAddress>(UR)) {
// blockaddress doesn't take the address of the function, it takes addr
// of label.
} else {
@@ -1683,9 +1686,10 @@ static bool AddressIsTaken(const GlobalValue *GV) {
}
bool IPSCCP::runOnModule(Module &M) {
- const DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
- SCCPSolver Solver(TD, TLI);
+ SCCPSolver Solver(DL, TLI);
// AddressTakenFunctions - This set keeps track of the address-taken functions
// that are in the input. As IPSCCP runs through and simplifies code,
@@ -1834,8 +1838,9 @@ bool IPSCCP::runOnModule(Module &M) {
for (unsigned i = 0, e = BlocksToErase.size(); i != e; ++i) {
// If there are any PHI nodes in this successor, drop entries for BB now.
BasicBlock *DeadBB = BlocksToErase[i];
- for (Value::use_iterator UI = DeadBB->use_begin(), UE = DeadBB->use_end();
- UI != UE; ) {
+ for (Value::user_iterator UI = DeadBB->user_begin(),
+ UE = DeadBB->user_end();
+ UI != UE;) {
// Grab the user and then increment the iterator early, as the user
// will be deleted. Step past all adjacent uses from the same user.
Instruction *I = dyn_cast<Instruction>(*UI);
@@ -1925,7 +1930,7 @@ bool IPSCCP::runOnModule(Module &M) {
"Overdefined values should have been taken out of the map!");
DEBUG(dbgs() << "Found that GV '" << GV->getName() << "' is constant!\n");
while (!GV->use_empty()) {
- StoreInst *SI = cast<StoreInst>(GV->use_back());
+ StoreInst *SI = cast<StoreInst>(GV->user_back());
SI->eraseFromParent();
}
M.getGlobalList().erase(GV);
diff --git a/contrib/llvm/lib/Transforms/Scalar/SROA.cpp b/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
index 9f3fc83d129d..8c7f253290ba 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -23,40 +23,48 @@
///
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "sroa"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/PtrUseVisitor.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/DIBuilder.h"
-#include "llvm/DebugInfo.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Operator.h"
-#include "llvm/InstVisitor.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TimeValue.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
+
+#if __cplusplus >= 201103L && !defined(NDEBUG)
+// We only use this for a debug check in C++11
+#include <random>
+#endif
+
using namespace llvm;
+#define DEBUG_TYPE "sroa"
+
STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement");
STATISTIC(NumAllocaPartitions, "Number of alloca partitions formed");
STATISTIC(MaxPartitionsPerAlloca, "Maximum number of partitions per alloca");
@@ -73,6 +81,16 @@ STATISTIC(NumVectorized, "Number of vectorized aggregates");
static cl::opt<bool>
ForceSSAUpdater("force-ssa-updater", cl::init(false), cl::Hidden);
+/// Hidden option to enable randomly shuffling the slices to help uncover
+/// instability in their order.
+static cl::opt<bool> SROARandomShuffleSlices("sroa-random-shuffle-slices",
+ cl::init(false), cl::Hidden);
+
+/// Hidden option to experiment with completely strict handling of inbounds
+/// GEPs.
+static cl::opt<bool> SROAStrictInbounds("sroa-strict-inbounds",
+ cl::init(false), cl::Hidden);
+
namespace {
/// \brief A custom IRBuilder inserter which prefixes all names if they are
/// preserved.
@@ -142,8 +160,8 @@ public:
Use *getUse() const { return UseAndIsSplittable.getPointer(); }
- bool isDead() const { return getUse() == 0; }
- void kill() { UseAndIsSplittable.setPointer(0); }
+ bool isDead() const { return getUse() == nullptr; }
+ void kill() { UseAndIsSplittable.setPointer(nullptr); }
/// \brief Support for ordering ranges.
///
@@ -244,8 +262,8 @@ public:
void printUse(raw_ostream &OS, const_iterator I,
StringRef Indent = " ") const;
void print(raw_ostream &OS) const;
- void LLVM_ATTRIBUTE_NOINLINE LLVM_ATTRIBUTE_USED dump(const_iterator I) const;
- void LLVM_ATTRIBUTE_NOINLINE LLVM_ATTRIBUTE_USED dump() const;
+ void dump(const_iterator I) const;
+ void dump() const;
#endif
private:
@@ -303,7 +321,7 @@ static Value *foldSelectInst(SelectInst &SI) {
if (SI.getOperand(1) == SI.getOperand(2))
return SI.getOperand(1);
- return 0;
+ return nullptr;
}
/// \brief Builder for the alloca slices.
@@ -339,7 +357,7 @@ private:
bool IsSplittable = false) {
// Completely skip uses which have a zero size or start either before or
// past the end of the allocation.
- if (Size == 0 || Offset.isNegative() || Offset.uge(AllocSize)) {
+ if (Size == 0 || Offset.uge(AllocSize)) {
DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @" << Offset
<< " which has zero size or starts outside of the "
<< AllocSize << " byte alloca:\n"
@@ -380,6 +398,43 @@ private:
if (GEPI.use_empty())
return markAsDead(GEPI);
+ if (SROAStrictInbounds && GEPI.isInBounds()) {
+ // FIXME: This is a manually un-factored variant of the basic code inside
+ // of GEPs with checking of the inbounds invariant specified in the
+ // langref in a very strict sense. If we ever want to enable
+ // SROAStrictInbounds, this code should be factored cleanly into
+ // PtrUseVisitor, but it is easier to experiment with SROAStrictInbounds
+ // by writing out the code here where we have tho underlying allocation
+ // size readily available.
+ APInt GEPOffset = Offset;
+ for (gep_type_iterator GTI = gep_type_begin(GEPI),
+ GTE = gep_type_end(GEPI);
+ GTI != GTE; ++GTI) {
+ ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
+ if (!OpC)
+ break;
+
+ // Handle a struct index, which adds its field offset to the pointer.
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+ unsigned ElementIdx = OpC->getZExtValue();
+ const StructLayout *SL = DL.getStructLayout(STy);
+ GEPOffset +=
+ APInt(Offset.getBitWidth(), SL->getElementOffset(ElementIdx));
+ } else {
+ // For array or vector indices, scale the index by the size of the type.
+ APInt Index = OpC->getValue().sextOrTrunc(Offset.getBitWidth());
+ GEPOffset += Index * APInt(Offset.getBitWidth(),
+ DL.getTypeAllocSize(GTI.getIndexedType()));
+ }
+
+ // If this index has computed an intermediate pointer which is not
+ // inbounds, then the result of the GEP is a poison value and we can
+ // delete it and all uses.
+ if (GEPOffset.ugt(AllocSize))
+ return markAsDead(GEPI);
+ }
+ }
+
return Base::visitGetElementPtrInst(GEPI);
}
@@ -426,8 +481,7 @@ private:
// risk of overflow.
// FIXME: We should instead consider the pointer to have escaped if this
// function is being instrumented for addressing bugs or race conditions.
- if (Offset.isNegative() || Size > AllocSize ||
- Offset.ugt(AllocSize - Size)) {
+ if (Size > AllocSize || Offset.ugt(AllocSize - Size)) {
DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @" << Offset
<< " which extends past the end of the " << AllocSize
<< " byte alloca:\n"
@@ -446,7 +500,7 @@ private:
assert(II.getRawDest() == *U && "Pointer use is not the destination?");
ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
if ((Length && Length->getValue() == 0) ||
- (IsOffsetKnown && !Offset.isNegative() && Offset.uge(AllocSize)))
+ (IsOffsetKnown && Offset.uge(AllocSize)))
// Zero-length mem transfer intrinsics can be ignored entirely.
return markAsDead(II);
@@ -461,14 +515,30 @@ private:
void visitMemTransferInst(MemTransferInst &II) {
ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
- if ((Length && Length->getValue() == 0) ||
- (IsOffsetKnown && !Offset.isNegative() && Offset.uge(AllocSize)))
+ if (Length && Length->getValue() == 0)
// Zero-length mem transfer intrinsics can be ignored entirely.
return markAsDead(II);
+ // Because we can visit these intrinsics twice, also check to see if the
+ // first time marked this instruction as dead. If so, skip it.
+ if (VisitedDeadInsts.count(&II))
+ return;
+
if (!IsOffsetKnown)
return PI.setAborted(&II);
+ // This side of the transfer is completely out-of-bounds, and so we can
+ // nuke the entire transfer. However, we also need to nuke the other side
+ // if already added to our partitions.
+ // FIXME: Yet another place we really should bypass this when
+ // instrumenting for ASan.
+ if (Offset.uge(AllocSize)) {
+ SmallDenseMap<Instruction *, unsigned>::iterator MTPI = MemTransferSliceMap.find(&II);
+ if (MTPI != MemTransferSliceMap.end())
+ S.Slices[MTPI->second].kill();
+ return markAsDead(II);
+ }
+
uint64_t RawOffset = Offset.getLimitedValue();
uint64_t Size = Length ? Length->getLimitedValue()
: AllocSize - RawOffset;
@@ -487,7 +557,7 @@ private:
// they both point to the same alloca.
bool Inserted;
SmallDenseMap<Instruction *, unsigned>::iterator MTPI;
- llvm::tie(MTPI, Inserted) =
+ std::tie(MTPI, Inserted) =
MemTransferSliceMap.insert(std::make_pair(&II, S.Slices.size()));
unsigned PrevIdx = MTPI->second;
if (!Inserted) {
@@ -546,7 +616,7 @@ private:
Size = 0;
do {
Instruction *I, *UsedI;
- llvm::tie(UsedI, I) = Uses.pop_back_val();
+ std::tie(UsedI, I) = Uses.pop_back_val();
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
Size = std::max(Size, DL.getTypeStoreSize(LI->getType()));
@@ -568,13 +638,12 @@ private:
return I;
}
- for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE;
- ++UI)
- if (Visited.insert(cast<Instruction>(*UI)))
- Uses.push_back(std::make_pair(I, cast<Instruction>(*UI)));
+ for (User *U : I->users())
+ if (Visited.insert(cast<Instruction>(U)))
+ Uses.push_back(std::make_pair(I, cast<Instruction>(U)));
} while (!Uses.empty());
- return 0;
+ return nullptr;
}
void visitPHINode(PHINode &PN) {
@@ -597,8 +666,7 @@ private:
// themselves which should be replaced with undef.
// FIXME: This should instead be escaped in the event we're instrumenting
// for address sanitization.
- if ((Offset.isNegative() && (-Offset).uge(PHISize)) ||
- (!Offset.isNegative() && Offset.uge(AllocSize))) {
+ if (Offset.uge(AllocSize)) {
S.DeadOperands.push_back(U);
return;
}
@@ -638,8 +706,7 @@ private:
// themselves which should be replaced with undef.
// FIXME: This should instead be escaped in the event we're instrumenting
// for address sanitization.
- if ((Offset.isNegative() && Offset.uge(SelectSize)) ||
- (!Offset.isNegative() && Offset.uge(AllocSize))) {
+ if (Offset.uge(AllocSize)) {
S.DeadOperands.push_back(U);
return;
}
@@ -658,7 +725,7 @@ AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
AI(AI),
#endif
- PointerEscapingInstr(0) {
+ PointerEscapingInstr(nullptr) {
SliceBuilder PB(DL, AI, *this);
SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI);
if (PtrI.isEscaped() || PtrI.isAborted()) {
@@ -674,6 +741,13 @@ AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
std::mem_fun_ref(&Slice::isDead)),
Slices.end());
+#if __cplusplus >= 201103L && !defined(NDEBUG)
+ if (SROARandomShuffleSlices) {
+ std::mt19937 MT(static_cast<unsigned>(sys::TimeValue::now().msec()));
+ std::shuffle(Slices.begin(), Slices.end(), MT);
+ }
+#endif
+
// Sort the uses. This arranges for the offsets to be in ascending order,
// and the sizes to be in descending order.
std::sort(Slices.begin(), Slices.end());
@@ -712,8 +786,10 @@ void AllocaSlices::print(raw_ostream &OS) const {
print(OS, I);
}
-void AllocaSlices::dump(const_iterator I) const { print(dbgs(), I); }
-void AllocaSlices::dump() const { print(dbgs()); }
+LLVM_DUMP_METHOD void AllocaSlices::dump(const_iterator I) const {
+ print(dbgs(), I);
+}
+LLVM_DUMP_METHOD void AllocaSlices::dump() const { print(dbgs()); }
#endif // !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -741,12 +817,10 @@ public:
// Retain the debug information attached to the alloca for use when
// rewriting loads and stores.
if (MDNode *DebugNode = MDNode::getIfExists(AI.getContext(), &AI)) {
- for (Value::use_iterator UI = DebugNode->use_begin(),
- UE = DebugNode->use_end();
- UI != UE; ++UI)
- if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI))
+ for (User *U : DebugNode->users())
+ if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U))
DDIs.push_back(DDI);
- else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(*UI))
+ else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(U))
DVIs.push_back(DVI);
}
@@ -760,8 +834,8 @@ public:
DVIs.pop_back_val()->eraseFromParent();
}
- virtual bool isInstInList(Instruction *I,
- const SmallVectorImpl<Instruction*> &Insts) const {
+ bool isInstInList(Instruction *I,
+ const SmallVectorImpl<Instruction*> &Insts) const override {
Value *Ptr;
if (LoadInst *LI = dyn_cast<LoadInst>(I))
Ptr = LI->getOperand(0);
@@ -788,7 +862,7 @@ public:
return false;
}
- virtual void updateDebugInfo(Instruction *Inst) const {
+ void updateDebugInfo(Instruction *Inst) const override {
for (SmallVectorImpl<DbgDeclareInst *>::const_iterator I = DDIs.begin(),
E = DDIs.end(); I != E; ++I) {
DbgDeclareInst *DDI = *I;
@@ -800,7 +874,7 @@ public:
for (SmallVectorImpl<DbgValueInst *>::const_iterator I = DVIs.begin(),
E = DVIs.end(); I != E; ++I) {
DbgValueInst *DVI = *I;
- Value *Arg = 0;
+ Value *Arg = nullptr;
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
// If an argument is zero extended then use argument directly. The ZExt
// may be zapped by an optimization pass in future.
@@ -896,13 +970,13 @@ class SROA : public FunctionPass {
public:
SROA(bool RequiresDomTree = true)
: FunctionPass(ID), RequiresDomTree(RequiresDomTree),
- C(0), DL(0), DT(0) {
+ C(nullptr), DL(nullptr), DT(nullptr) {
initializeSROAPass(*PassRegistry::getPassRegistry());
}
- bool runOnFunction(Function &F);
- void getAnalysisUsage(AnalysisUsage &AU) const;
+ bool runOnFunction(Function &F) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
- const char *getPassName() const { return "SROA"; }
+ const char *getPassName() const override { return "SROA"; }
static char ID;
private:
@@ -915,6 +989,7 @@ private:
ArrayRef<AllocaSlices::iterator> SplitUses);
bool splitAlloca(AllocaInst &AI, AllocaSlices &S);
bool runOnAlloca(AllocaInst &AI);
+ void clobberUse(Use &U);
void deleteDeadInstructions(SmallPtrSet<AllocaInst *, 4> &DeletedAllocas);
bool promoteAllocas(Function &F);
};
@@ -928,7 +1003,7 @@ FunctionPass *llvm::createSROAPass(bool RequiresDomTree) {
INITIALIZE_PASS_BEGIN(SROA, "sroa", "Scalar Replacement Of Aggregates",
false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(SROA, "sroa", "Scalar Replacement Of Aggregates",
false, false)
@@ -937,8 +1012,12 @@ INITIALIZE_PASS_END(SROA, "sroa", "Scalar Replacement Of Aggregates",
static Type *findCommonType(AllocaSlices::const_iterator B,
AllocaSlices::const_iterator E,
uint64_t EndOffset) {
- Type *Ty = 0;
- bool IgnoreNonIntegralTypes = false;
+ Type *Ty = nullptr;
+ bool TyIsCommon = true;
+ IntegerType *ITy = nullptr;
+
+ // Note that we need to look at *every* alloca slice's Use to ensure we
+ // always get consistent results regardless of the order of slices.
for (AllocaSlices::const_iterator I = B; I != E; ++I) {
Use *U = I->getUse();
if (isa<IntrinsicInst>(*U->getUser()))
@@ -946,42 +1025,37 @@ static Type *findCommonType(AllocaSlices::const_iterator B,
if (I->beginOffset() != B->beginOffset() || I->endOffset() != EndOffset)
continue;
- Type *UserTy = 0;
+ Type *UserTy = nullptr;
if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
UserTy = LI->getType();
} else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
UserTy = SI->getValueOperand()->getType();
- } else {
- IgnoreNonIntegralTypes = true; // Give up on anything but an iN type.
- continue;
}
- if (IntegerType *ITy = dyn_cast<IntegerType>(UserTy)) {
+ if (IntegerType *UserITy = dyn_cast_or_null<IntegerType>(UserTy)) {
// If the type is larger than the partition, skip it. We only encounter
// this for split integer operations where we want to use the type of the
// entity causing the split. Also skip if the type is not a byte width
// multiple.
- if (ITy->getBitWidth() % 8 != 0 ||
- ITy->getBitWidth() / 8 > (EndOffset - B->beginOffset()))
+ if (UserITy->getBitWidth() % 8 != 0 ||
+ UserITy->getBitWidth() / 8 > (EndOffset - B->beginOffset()))
continue;
- // If we have found an integer type use covering the alloca, use that
- // regardless of the other types, as integers are often used for
- // a "bucket of bits" type.
- //
- // NB: This *must* be the only return from inside the loop so that the
- // order of slices doesn't impact the computed type.
- return ITy;
- } else if (IgnoreNonIntegralTypes) {
- continue;
+ // Track the largest bitwidth integer type used in this way in case there
+ // is no common type.
+ if (!ITy || ITy->getBitWidth() < UserITy->getBitWidth())
+ ITy = UserITy;
}
- if (Ty && Ty != UserTy)
- IgnoreNonIntegralTypes = true; // Give up on anything but an iN type.
-
- Ty = UserTy;
+ // To avoid depending on the order of slices, Ty and TyIsCommon must not
+ // depend on types skipped above.
+ if (!UserTy || (Ty && Ty != UserTy))
+ TyIsCommon = false; // Give up on anything but an iN type.
+ else
+ Ty = UserTy;
}
- return Ty;
+
+ return TyIsCommon ? Ty : ITy;
}
/// PHI instructions that use an alloca and are subsequently loaded can be
@@ -1003,7 +1077,7 @@ static Type *findCommonType(AllocaSlices::const_iterator B,
/// FIXME: This should be hoisted into a generic utility, likely in
/// Transforms/Util/Local.h
static bool isSafePHIToSpeculate(PHINode &PN,
- const DataLayout *DL = 0) {
+ const DataLayout *DL = nullptr) {
// For now, we can only do this promotion if the load is in the same block
// as the PHI, and if there are no stores between the phi and load.
// TODO: Allow recursive phi users.
@@ -1011,10 +1085,9 @@ static bool isSafePHIToSpeculate(PHINode &PN,
BasicBlock *BB = PN.getParent();
unsigned MaxAlign = 0;
bool HaveLoad = false;
- for (Value::use_iterator UI = PN.use_begin(), UE = PN.use_end(); UI != UE;
- ++UI) {
- LoadInst *LI = dyn_cast<LoadInst>(*UI);
- if (LI == 0 || !LI->isSimple())
+ for (User *U : PN.users()) {
+ LoadInst *LI = dyn_cast<LoadInst>(U);
+ if (!LI || !LI->isSimple())
return false;
// For now we only allow loads in the same block as the PHI. This is
@@ -1057,7 +1130,7 @@ static bool isSafePHIToSpeculate(PHINode &PN,
// If this pointer is always safe to load, or if we can prove that there
// is already a load in the block, then we can move the load to the pred
// block.
- if (InVal->isDereferenceablePointer() ||
+ if (InVal->isDereferenceablePointer(DL) ||
isSafeToLoadUnconditionally(InVal, TI, MaxAlign, DL))
continue;
@@ -1077,13 +1150,13 @@ static void speculatePHINodeLoads(PHINode &PN) {
// Get the TBAA tag and alignment to use from one of the loads. It doesn't
// matter which one we get and if any differ.
- LoadInst *SomeLoad = cast<LoadInst>(*PN.use_begin());
+ LoadInst *SomeLoad = cast<LoadInst>(PN.user_back());
MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa);
unsigned Align = SomeLoad->getAlignment();
// Rewrite all loads of the PN to use the new PHI.
while (!PN.use_empty()) {
- LoadInst *LI = cast<LoadInst>(*PN.use_begin());
+ LoadInst *LI = cast<LoadInst>(PN.user_back());
LI->replaceAllUsesWith(NewPN);
LI->eraseFromParent();
}
@@ -1121,16 +1194,16 @@ static void speculatePHINodeLoads(PHINode &PN) {
///
/// We can do this to a select if its only uses are loads and if the operand
/// to the select can be loaded unconditionally.
-static bool isSafeSelectToSpeculate(SelectInst &SI, const DataLayout *DL = 0) {
+static bool isSafeSelectToSpeculate(SelectInst &SI,
+ const DataLayout *DL = nullptr) {
Value *TValue = SI.getTrueValue();
Value *FValue = SI.getFalseValue();
- bool TDerefable = TValue->isDereferenceablePointer();
- bool FDerefable = FValue->isDereferenceablePointer();
+ bool TDerefable = TValue->isDereferenceablePointer(DL);
+ bool FDerefable = FValue->isDereferenceablePointer(DL);
- for (Value::use_iterator UI = SI.use_begin(), UE = SI.use_end(); UI != UE;
- ++UI) {
- LoadInst *LI = dyn_cast<LoadInst>(*UI);
- if (LI == 0 || !LI->isSimple())
+ for (User *U : SI.users()) {
+ LoadInst *LI = dyn_cast<LoadInst>(U);
+ if (!LI || !LI->isSimple())
return false;
// Both operands to the select need to be dereferencable, either
@@ -1155,7 +1228,7 @@ static void speculateSelectInstLoads(SelectInst &SI) {
Value *FV = SI.getFalseValue();
// Replace the loads of the select with a select of two loads.
while (!SI.use_empty()) {
- LoadInst *LI = cast<LoadInst>(*SI.use_begin());
+ LoadInst *LI = cast<LoadInst>(SI.user_back());
assert(LI->isSimple() && "We only speculate simple loads");
IRB.SetInsertPoint(LI);
@@ -1188,7 +1261,7 @@ static void speculateSelectInstLoads(SelectInst &SI) {
/// This will return the BasePtr if that is valid, or build a new GEP
/// instruction using the IRBuilder if GEP-ing is needed.
static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr,
- SmallVectorImpl<Value *> &Indices) {
+ SmallVectorImpl<Value *> &Indices, Twine NamePrefix) {
if (Indices.empty())
return BasePtr;
@@ -1197,7 +1270,7 @@ static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr,
if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero())
return BasePtr;
- return IRB.CreateInBoundsGEP(BasePtr, Indices, "idx");
+ return IRB.CreateInBoundsGEP(BasePtr, Indices, NamePrefix + "sroa_idx");
}
/// \brief Get a natural GEP off of the BasePtr walking through Ty toward
@@ -1211,9 +1284,13 @@ static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr,
/// indicated by Indices to have the correct offset.
static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &DL,
Value *BasePtr, Type *Ty, Type *TargetTy,
- SmallVectorImpl<Value *> &Indices) {
+ SmallVectorImpl<Value *> &Indices,
+ Twine NamePrefix) {
if (Ty == TargetTy)
- return buildGEP(IRB, BasePtr, Indices);
+ return buildGEP(IRB, BasePtr, Indices, NamePrefix);
+
+ // Pointer size to use for the indices.
+ unsigned PtrSize = DL.getPointerTypeSizeInBits(BasePtr->getType());
// See if we can descend into a struct and locate a field with the correct
// type.
@@ -1222,11 +1299,13 @@ static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &DL,
do {
if (ElementTy->isPointerTy())
break;
- if (SequentialType *SeqTy = dyn_cast<SequentialType>(ElementTy)) {
- ElementTy = SeqTy->getElementType();
- // Note that we use the default address space as this index is over an
- // array or a vector, not a pointer.
- Indices.push_back(IRB.getInt(APInt(DL.getPointerSizeInBits(0), 0)));
+
+ if (ArrayType *ArrayTy = dyn_cast<ArrayType>(ElementTy)) {
+ ElementTy = ArrayTy->getElementType();
+ Indices.push_back(IRB.getIntN(PtrSize, 0));
+ } else if (VectorType *VectorTy = dyn_cast<VectorType>(ElementTy)) {
+ ElementTy = VectorTy->getElementType();
+ Indices.push_back(IRB.getInt32(0));
} else if (StructType *STy = dyn_cast<StructType>(ElementTy)) {
if (STy->element_begin() == STy->element_end())
break; // Nothing left to descend into.
@@ -1240,7 +1319,7 @@ static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &DL,
if (ElementTy != TargetTy)
Indices.erase(Indices.end() - NumLayers, Indices.end());
- return buildGEP(IRB, BasePtr, Indices);
+ return buildGEP(IRB, BasePtr, Indices, NamePrefix);
}
/// \brief Recursively compute indices for a natural GEP.
@@ -1250,29 +1329,32 @@ static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &DL,
static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL,
Value *Ptr, Type *Ty, APInt &Offset,
Type *TargetTy,
- SmallVectorImpl<Value *> &Indices) {
+ SmallVectorImpl<Value *> &Indices,
+ Twine NamePrefix) {
if (Offset == 0)
- return getNaturalGEPWithType(IRB, DL, Ptr, Ty, TargetTy, Indices);
+ return getNaturalGEPWithType(IRB, DL, Ptr, Ty, TargetTy, Indices, NamePrefix);
// We can't recurse through pointer types.
if (Ty->isPointerTy())
- return 0;
+ return nullptr;
// We try to analyze GEPs over vectors here, but note that these GEPs are
// extremely poorly defined currently. The long-term goal is to remove GEPing
// over a vector from the IR completely.
if (VectorType *VecTy = dyn_cast<VectorType>(Ty)) {
unsigned ElementSizeInBits = DL.getTypeSizeInBits(VecTy->getScalarType());
- if (ElementSizeInBits % 8)
- return 0; // GEPs over non-multiple of 8 size vector elements are invalid.
+ if (ElementSizeInBits % 8 != 0) {
+ // GEPs over non-multiple of 8 size vector elements are invalid.
+ return nullptr;
+ }
APInt ElementSize(Offset.getBitWidth(), ElementSizeInBits / 8);
APInt NumSkippedElements = Offset.sdiv(ElementSize);
if (NumSkippedElements.ugt(VecTy->getNumElements()))
- return 0;
+ return nullptr;
Offset -= NumSkippedElements * ElementSize;
Indices.push_back(IRB.getInt(NumSkippedElements));
return getNaturalGEPRecursively(IRB, DL, Ptr, VecTy->getElementType(),
- Offset, TargetTy, Indices);
+ Offset, TargetTy, Indices, NamePrefix);
}
if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
@@ -1280,31 +1362,31 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL,
APInt ElementSize(Offset.getBitWidth(), DL.getTypeAllocSize(ElementTy));
APInt NumSkippedElements = Offset.sdiv(ElementSize);
if (NumSkippedElements.ugt(ArrTy->getNumElements()))
- return 0;
+ return nullptr;
Offset -= NumSkippedElements * ElementSize;
Indices.push_back(IRB.getInt(NumSkippedElements));
return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy,
- Indices);
+ Indices, NamePrefix);
}
StructType *STy = dyn_cast<StructType>(Ty);
if (!STy)
- return 0;
+ return nullptr;
const StructLayout *SL = DL.getStructLayout(STy);
uint64_t StructOffset = Offset.getZExtValue();
if (StructOffset >= SL->getSizeInBytes())
- return 0;
+ return nullptr;
unsigned Index = SL->getElementContainingOffset(StructOffset);
Offset -= APInt(Offset.getBitWidth(), SL->getElementOffset(Index));
Type *ElementTy = STy->getElementType(Index);
if (Offset.uge(DL.getTypeAllocSize(ElementTy)))
- return 0; // The offset points into alignment padding.
+ return nullptr; // The offset points into alignment padding.
Indices.push_back(IRB.getInt32(Index));
return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy,
- Indices);
+ Indices, NamePrefix);
}
/// \brief Get a natural GEP from a base pointer to a particular offset and
@@ -1319,26 +1401,27 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL,
/// If no natural GEP can be constructed, this function returns null.
static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL,
Value *Ptr, APInt Offset, Type *TargetTy,
- SmallVectorImpl<Value *> &Indices) {
+ SmallVectorImpl<Value *> &Indices,
+ Twine NamePrefix) {
PointerType *Ty = cast<PointerType>(Ptr->getType());
// Don't consider any GEPs through an i8* as natural unless the TargetTy is
// an i8.
- if (Ty == IRB.getInt8PtrTy() && TargetTy->isIntegerTy(8))
- return 0;
+ if (Ty == IRB.getInt8PtrTy(Ty->getAddressSpace()) && TargetTy->isIntegerTy(8))
+ return nullptr;
Type *ElementTy = Ty->getElementType();
if (!ElementTy->isSized())
- return 0; // We can't GEP through an unsized element.
+ return nullptr; // We can't GEP through an unsized element.
APInt ElementSize(Offset.getBitWidth(), DL.getTypeAllocSize(ElementTy));
if (ElementSize == 0)
- return 0; // Zero-length arrays can't help us build a natural GEP.
+ return nullptr; // Zero-length arrays can't help us build a natural GEP.
APInt NumSkippedElements = Offset.sdiv(ElementSize);
Offset -= NumSkippedElements * ElementSize;
Indices.push_back(IRB.getInt(NumSkippedElements));
return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy,
- Indices);
+ Indices, NamePrefix);
}
/// \brief Compute an adjusted pointer from Ptr by Offset bytes where the
@@ -1356,8 +1439,9 @@ static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL,
/// properties. The algorithm tries to fold as many constant indices into
/// a single GEP as possible, thus making each GEP more independent of the
/// surrounding code.
-static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL,
- Value *Ptr, APInt Offset, Type *PointerTy) {
+static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
+ APInt Offset, Type *PointerTy,
+ Twine NamePrefix) {
// Even though we don't look through PHI nodes, we could be called on an
// instruction in an unreachable block, which may be on a cycle.
SmallPtrSet<Value *, 4> Visited;
@@ -1367,11 +1451,11 @@ static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL,
// We may end up computing an offset pointer that has the wrong type. If we
// never are able to compute one directly that has the correct type, we'll
// fall back to it, so keep it around here.
- Value *OffsetPtr = 0;
+ Value *OffsetPtr = nullptr;
// Remember any i8 pointer we come across to re-use if we need to do a raw
// byte offset.
- Value *Int8Ptr = 0;
+ Value *Int8Ptr = nullptr;
APInt Int8PtrOffset(Offset.getBitWidth(), 0);
Type *TargetTy = PointerTy->getPointerElementType();
@@ -1391,7 +1475,7 @@ static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL,
// See if we can perform a natural GEP here.
Indices.clear();
if (Value *P = getNaturalGEPWithOffset(IRB, DL, Ptr, Offset, TargetTy,
- Indices)) {
+ Indices, NamePrefix)) {
if (P->getType() == PointerTy) {
// Zap any offset pointer that we ended up computing in previous rounds.
if (OffsetPtr && OffsetPtr->use_empty())
@@ -1425,20 +1509,21 @@ static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL,
if (!OffsetPtr) {
if (!Int8Ptr) {
- Int8Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(),
- "raw_cast");
+ Int8Ptr = IRB.CreateBitCast(
+ Ptr, IRB.getInt8PtrTy(PointerTy->getPointerAddressSpace()),
+ NamePrefix + "sroa_raw_cast");
Int8PtrOffset = Offset;
}
OffsetPtr = Int8PtrOffset == 0 ? Int8Ptr :
IRB.CreateInBoundsGEP(Int8Ptr, IRB.getInt(Int8PtrOffset),
- "raw_idx");
+ NamePrefix + "sroa_raw_idx");
}
Ptr = OffsetPtr;
// On the off chance we were targeting i8*, guard the bitcast here.
if (Ptr->getType() != PointerTy)
- Ptr = IRB.CreateBitCast(Ptr, PointerTy, "cast");
+ Ptr = IRB.CreateBitCast(Ptr, PointerTy, NamePrefix + "sroa_cast");
return Ptr;
}
@@ -1931,16 +2016,22 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
// integer type will be stored here for easy access during rewriting.
IntegerType *IntTy;
- // The offset of the slice currently being rewritten.
+ // The original offset of the slice currently being rewritten relative to
+ // the original alloca.
uint64_t BeginOffset, EndOffset;
+ // The new offsets of the slice currently being rewritten relative to the
+ // original alloca.
+ uint64_t NewBeginOffset, NewEndOffset;
+
+ uint64_t SliceSize;
bool IsSplittable;
bool IsSplit;
Use *OldUse;
Instruction *OldPtr;
- // Output members carrying state about the result of visiting and rewriting
- // the slice of the alloca.
- bool IsUsedByRewrittenSpeculatableInstructions;
+ // Track post-rewrite users which are PHI nodes and Selects.
+ SmallPtrSetImpl<PHINode *> &PHIUsers;
+ SmallPtrSetImpl<SelectInst *> &SelectUsers;
// Utility IR builder, whose name prefix is setup for each visited use, and
// the insertion point is set to point to the user.
@@ -1949,22 +2040,25 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
public:
AllocaSliceRewriter(const DataLayout &DL, AllocaSlices &S, SROA &Pass,
AllocaInst &OldAI, AllocaInst &NewAI,
- uint64_t NewBeginOffset, uint64_t NewEndOffset,
- bool IsVectorPromotable = false,
- bool IsIntegerPromotable = false)
+ uint64_t NewAllocaBeginOffset,
+ uint64_t NewAllocaEndOffset, bool IsVectorPromotable,
+ bool IsIntegerPromotable,
+ SmallPtrSetImpl<PHINode *> &PHIUsers,
+ SmallPtrSetImpl<SelectInst *> &SelectUsers)
: DL(DL), S(S), Pass(Pass), OldAI(OldAI), NewAI(NewAI),
- NewAllocaBeginOffset(NewBeginOffset), NewAllocaEndOffset(NewEndOffset),
+ NewAllocaBeginOffset(NewAllocaBeginOffset),
+ NewAllocaEndOffset(NewAllocaEndOffset),
NewAllocaTy(NewAI.getAllocatedType()),
- VecTy(IsVectorPromotable ? cast<VectorType>(NewAllocaTy) : 0),
- ElementTy(VecTy ? VecTy->getElementType() : 0),
+ VecTy(IsVectorPromotable ? cast<VectorType>(NewAllocaTy) : nullptr),
+ ElementTy(VecTy ? VecTy->getElementType() : nullptr),
ElementSize(VecTy ? DL.getTypeSizeInBits(ElementTy) / 8 : 0),
IntTy(IsIntegerPromotable
? Type::getIntNTy(
NewAI.getContext(),
DL.getTypeSizeInBits(NewAI.getAllocatedType()))
- : 0),
+ : nullptr),
BeginOffset(), EndOffset(), IsSplittable(), IsSplit(), OldUse(),
- OldPtr(), IsUsedByRewrittenSpeculatableInstructions(false),
+ OldPtr(), PHIUsers(PHIUsers), SelectUsers(SelectUsers),
IRB(NewAI.getContext(), ConstantFolder()) {
if (VecTy) {
assert((DL.getTypeSizeInBits(ElementTy) % 8) == 0 &&
@@ -1983,6 +2077,14 @@ public:
IsSplit =
BeginOffset < NewAllocaBeginOffset || EndOffset > NewAllocaEndOffset;
+ // Compute the intersecting offset range.
+ assert(BeginOffset < NewAllocaEndOffset);
+ assert(EndOffset > NewAllocaBeginOffset);
+ NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
+ NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
+
+ SliceSize = NewEndOffset - NewBeginOffset;
+
OldUse = I->getUse();
OldPtr = cast<Instruction>(OldUse->get());
@@ -1997,20 +2099,6 @@ public:
return CanSROA;
}
- /// \brief Query whether this slice is used by speculatable instructions after
- /// rewriting.
- ///
- /// These instructions (PHIs and Selects currently) require the alloca slice
- /// to run back through the rewriter. Thus, they are promotable, but not on
- /// this iteration. This is distinct from a slice which is unpromotable for
- /// some other reason, in which case we don't even want to perform the
- /// speculation. This can be querried at any time and reflects whether (at
- /// that point) a visit call has rewritten a speculatable instruction on the
- /// current slice.
- bool isUsedByRewrittenSpeculatableInstructions() const {
- return IsUsedByRewrittenSpeculatableInstructions;
- }
-
private:
// Make sure the other visit overloads are visible.
using Base::visit;
@@ -2021,30 +2109,53 @@ private:
llvm_unreachable("No rewrite rule for this instruction!");
}
- Value *getAdjustedAllocaPtr(IRBuilderTy &IRB, uint64_t Offset,
- Type *PointerTy) {
- assert(Offset >= NewAllocaBeginOffset);
- return getAdjustedPtr(IRB, DL, &NewAI, APInt(DL.getPointerSizeInBits(),
- Offset - NewAllocaBeginOffset),
- PointerTy);
+ Value *getNewAllocaSlicePtr(IRBuilderTy &IRB, Type *PointerTy) {
+ // Note that the offset computation can use BeginOffset or NewBeginOffset
+ // interchangeably for unsplit slices.
+ assert(IsSplit || BeginOffset == NewBeginOffset);
+ uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
+
+#ifndef NDEBUG
+ StringRef OldName = OldPtr->getName();
+ // Skip through the last '.sroa.' component of the name.
+ size_t LastSROAPrefix = OldName.rfind(".sroa.");
+ if (LastSROAPrefix != StringRef::npos) {
+ OldName = OldName.substr(LastSROAPrefix + strlen(".sroa."));
+ // Look for an SROA slice index.
+ size_t IndexEnd = OldName.find_first_not_of("0123456789");
+ if (IndexEnd != StringRef::npos && OldName[IndexEnd] == '.') {
+ // Strip the index and look for the offset.
+ OldName = OldName.substr(IndexEnd + 1);
+ size_t OffsetEnd = OldName.find_first_not_of("0123456789");
+ if (OffsetEnd != StringRef::npos && OldName[OffsetEnd] == '.')
+ // Strip the offset.
+ OldName = OldName.substr(OffsetEnd + 1);
+ }
+ }
+ // Strip any SROA suffixes as well.
+ OldName = OldName.substr(0, OldName.find(".sroa_"));
+#endif
+
+ return getAdjustedPtr(IRB, DL, &NewAI,
+ APInt(DL.getPointerSizeInBits(), Offset), PointerTy,
+#ifndef NDEBUG
+ Twine(OldName) + "."
+#else
+ Twine()
+#endif
+ );
}
- /// \brief Compute suitable alignment to access an offset into the new alloca.
- unsigned getOffsetAlign(uint64_t Offset) {
+ /// \brief Compute suitable alignment to access this slice of the *new* alloca.
+ ///
+ /// You can optionally pass a type to this routine and if that type's ABI
+ /// alignment is itself suitable, this will return zero.
+ unsigned getSliceAlign(Type *Ty = nullptr) {
unsigned NewAIAlign = NewAI.getAlignment();
if (!NewAIAlign)
NewAIAlign = DL.getABITypeAlignment(NewAI.getAllocatedType());
- return MinAlign(NewAIAlign, Offset);
- }
-
- /// \brief Compute suitable alignment to access a type at an offset of the
- /// new alloca.
- ///
- /// \returns zero if the type's ABI alignment is a suitable alignment,
- /// otherwise returns the maximal suitable alignment.
- unsigned getOffsetTypeAlign(Type *Ty, uint64_t Offset) {
- unsigned Align = getOffsetAlign(Offset);
- return Align == DL.getABITypeAlignment(Ty) ? 0 : Align;
+ unsigned Align = MinAlign(NewAIAlign, NewBeginOffset - NewAllocaBeginOffset);
+ return (Ty && Align == DL.getABITypeAlignment(Ty)) ? 0 : Align;
}
unsigned getIndex(uint64_t Offset) {
@@ -2062,8 +2173,7 @@ private:
Pass.DeadInsts.insert(I);
}
- Value *rewriteVectorizedLoadInst(uint64_t NewBeginOffset,
- uint64_t NewEndOffset) {
+ Value *rewriteVectorizedLoadInst() {
unsigned BeginIndex = getIndex(NewBeginOffset);
unsigned EndIndex = getIndex(NewEndOffset);
assert(EndIndex > BeginIndex && "Empty vector!");
@@ -2073,8 +2183,7 @@ private:
return extractVector(IRB, V, BeginIndex, EndIndex, "vec");
}
- Value *rewriteIntegerLoad(LoadInst &LI, uint64_t NewBeginOffset,
- uint64_t NewEndOffset) {
+ Value *rewriteIntegerLoad(LoadInst &LI) {
assert(IntTy && "We cannot insert an integer to the alloca");
assert(!LI.isVolatile());
Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
@@ -2093,32 +2202,23 @@ private:
Value *OldOp = LI.getOperand(0);
assert(OldOp == OldPtr);
- // Compute the intersecting offset range.
- assert(BeginOffset < NewAllocaEndOffset);
- assert(EndOffset > NewAllocaBeginOffset);
- uint64_t NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
- uint64_t NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
-
- uint64_t Size = NewEndOffset - NewBeginOffset;
-
- Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), Size * 8)
+ Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), SliceSize * 8)
: LI.getType();
bool IsPtrAdjusted = false;
Value *V;
if (VecTy) {
- V = rewriteVectorizedLoadInst(NewBeginOffset, NewEndOffset);
+ V = rewriteVectorizedLoadInst();
} else if (IntTy && LI.getType()->isIntegerTy()) {
- V = rewriteIntegerLoad(LI, NewBeginOffset, NewEndOffset);
+ V = rewriteIntegerLoad(LI);
} else if (NewBeginOffset == NewAllocaBeginOffset &&
canConvertValue(DL, NewAllocaTy, LI.getType())) {
V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- LI.isVolatile(), "load");
+ LI.isVolatile(), LI.getName());
} else {
Type *LTy = TargetTy->getPointerTo();
- V = IRB.CreateAlignedLoad(
- getAdjustedAllocaPtr(IRB, NewBeginOffset, LTy),
- getOffsetTypeAlign(TargetTy, NewBeginOffset - NewAllocaBeginOffset),
- LI.isVolatile(), "load");
+ V = IRB.CreateAlignedLoad(getNewAllocaSlicePtr(IRB, LTy),
+ getSliceAlign(TargetTy), LI.isVolatile(),
+ LI.getName());
IsPtrAdjusted = true;
}
V = convertValue(DL, IRB, V, TargetTy);
@@ -2127,13 +2227,13 @@ private:
assert(!LI.isVolatile());
assert(LI.getType()->isIntegerTy() &&
"Only integer type loads and stores are split");
- assert(Size < DL.getTypeStoreSize(LI.getType()) &&
+ assert(SliceSize < DL.getTypeStoreSize(LI.getType()) &&
"Split load isn't smaller than original load");
assert(LI.getType()->getIntegerBitWidth() ==
DL.getTypeStoreSizeInBits(LI.getType()) &&
"Non-byte-multiple bit width");
// Move the insertion point just past the load so that we can refer to it.
- IRB.SetInsertPoint(llvm::next(BasicBlock::iterator(&LI)));
+ IRB.SetInsertPoint(std::next(BasicBlock::iterator(&LI)));
// Create a placeholder value with the same type as LI to use as the
// basis for the new value. This allows us to replace the uses of LI with
// the computed value, and then replace the placeholder with LI, leaving
@@ -2155,9 +2255,7 @@ private:
return !LI.isVolatile() && !IsPtrAdjusted;
}
- bool rewriteVectorizedStoreInst(Value *V, StoreInst &SI, Value *OldOp,
- uint64_t NewBeginOffset,
- uint64_t NewEndOffset) {
+ bool rewriteVectorizedStoreInst(Value *V, StoreInst &SI, Value *OldOp) {
if (V->getType() != VecTy) {
unsigned BeginIndex = getIndex(NewBeginOffset);
unsigned EndIndex = getIndex(NewEndOffset);
@@ -2183,8 +2281,7 @@ private:
return true;
}
- bool rewriteIntegerStore(Value *V, StoreInst &SI,
- uint64_t NewBeginOffset, uint64_t NewEndOffset) {
+ bool rewriteIntegerStore(Value *V, StoreInst &SI) {
assert(IntTy && "We cannot extract an integer from the alloca");
assert(!SI.isVolatile());
if (DL.getTypeSizeInBits(V->getType()) != IntTy->getBitWidth()) {
@@ -2217,30 +2314,22 @@ private:
if (AllocaInst *AI = dyn_cast<AllocaInst>(V->stripInBoundsOffsets()))
Pass.PostPromotionWorklist.insert(AI);
- // Compute the intersecting offset range.
- assert(BeginOffset < NewAllocaEndOffset);
- assert(EndOffset > NewAllocaBeginOffset);
- uint64_t NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
- uint64_t NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
-
- uint64_t Size = NewEndOffset - NewBeginOffset;
- if (Size < DL.getTypeStoreSize(V->getType())) {
+ if (SliceSize < DL.getTypeStoreSize(V->getType())) {
assert(!SI.isVolatile());
assert(V->getType()->isIntegerTy() &&
"Only integer type loads and stores are split");
assert(V->getType()->getIntegerBitWidth() ==
DL.getTypeStoreSizeInBits(V->getType()) &&
"Non-byte-multiple bit width");
- IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8);
+ IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), SliceSize * 8);
V = extractInteger(DL, IRB, V, NarrowTy, NewBeginOffset,
"extract");
}
if (VecTy)
- return rewriteVectorizedStoreInst(V, SI, OldOp, NewBeginOffset,
- NewEndOffset);
+ return rewriteVectorizedStoreInst(V, SI, OldOp);
if (IntTy && V->getType()->isIntegerTy())
- return rewriteIntegerStore(V, SI, NewBeginOffset, NewEndOffset);
+ return rewriteIntegerStore(V, SI);
StoreInst *NewSI;
if (NewBeginOffset == NewAllocaBeginOffset &&
@@ -2250,12 +2339,9 @@ private:
NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
SI.isVolatile());
} else {
- Value *NewPtr = getAdjustedAllocaPtr(IRB, NewBeginOffset,
- V->getType()->getPointerTo());
- NewSI = IRB.CreateAlignedStore(
- V, NewPtr, getOffsetTypeAlign(
- V->getType(), NewBeginOffset - NewAllocaBeginOffset),
- SI.isVolatile());
+ Value *NewPtr = getNewAllocaSlicePtr(IRB, V->getType()->getPointerTo());
+ NewSI = IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(V->getType()),
+ SI.isVolatile());
}
(void)NewSI;
Pass.DeadInsts.insert(&SI);
@@ -2307,11 +2393,10 @@ private:
// pointer to the new alloca.
if (!isa<Constant>(II.getLength())) {
assert(!IsSplit);
- assert(BeginOffset >= NewAllocaBeginOffset);
- II.setDest(
- getAdjustedAllocaPtr(IRB, BeginOffset, II.getRawDest()->getType()));
+ assert(NewBeginOffset == BeginOffset);
+ II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->getType()));
Type *CstTy = II.getAlignmentCst()->getType();
- II.setAlignment(ConstantInt::get(CstTy, getOffsetAlign(BeginOffset)));
+ II.setAlignment(ConstantInt::get(CstTy, getSliceAlign()));
deleteIfTriviallyDead(OldPtr);
return false;
@@ -2323,13 +2408,6 @@ private:
Type *AllocaTy = NewAI.getAllocatedType();
Type *ScalarTy = AllocaTy->getScalarType();
- // Compute the intersecting offset range.
- assert(BeginOffset < NewAllocaEndOffset);
- assert(EndOffset > NewAllocaBeginOffset);
- uint64_t NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
- uint64_t NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
- uint64_t SliceOffset = NewBeginOffset - NewAllocaBeginOffset;
-
// If this doesn't map cleanly onto the alloca type, and that type isn't
// a single value type, just emit a memset.
if (!VecTy && !IntTy &&
@@ -2341,8 +2419,8 @@ private:
Type *SizeTy = II.getLength()->getType();
Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
CallInst *New = IRB.CreateMemSet(
- getAdjustedAllocaPtr(IRB, NewBeginOffset, II.getRawDest()->getType()),
- II.getValue(), Size, getOffsetAlign(SliceOffset), II.isVolatile());
+ getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size,
+ getSliceAlign(), II.isVolatile());
(void)New;
DEBUG(dbgs() << " to: " << *New << "\n");
return false;
@@ -2419,25 +2497,11 @@ private:
DEBUG(dbgs() << " original: " << II << "\n");
- // Compute the intersecting offset range.
- assert(BeginOffset < NewAllocaEndOffset);
- assert(EndOffset > NewAllocaBeginOffset);
- uint64_t NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
- uint64_t NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
-
- assert(II.getRawSource() == OldPtr || II.getRawDest() == OldPtr);
- bool IsDest = II.getRawDest() == OldPtr;
+ bool IsDest = &II.getRawDestUse() == OldUse;
+ assert((IsDest && II.getRawDest() == OldPtr) ||
+ (!IsDest && II.getRawSource() == OldPtr));
- // Compute the relative offset within the transfer.
- unsigned IntPtrWidth = DL.getPointerSizeInBits();
- APInt RelOffset(IntPtrWidth, NewBeginOffset - BeginOffset);
-
- unsigned Align = II.getAlignment();
- uint64_t SliceOffset = NewBeginOffset - NewAllocaBeginOffset;
- if (Align > 1)
- Align =
- MinAlign(RelOffset.zextOrTrunc(64).getZExtValue(),
- MinAlign(II.getAlignment(), getOffsetAlign(SliceOffset)));
+ unsigned SliceAlign = getSliceAlign();
// For unsplit intrinsics, we simply modify the source and destination
// pointers in place. This isn't just an optimization, it is a matter of
@@ -2447,19 +2511,20 @@ private:
// memcpy, and so simply updating the pointers is the necessary for us to
// update both source and dest of a single call.
if (!IsSplittable) {
- Value *OldOp = IsDest ? II.getRawDest() : II.getRawSource();
+ Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
if (IsDest)
- II.setDest(
- getAdjustedAllocaPtr(IRB, BeginOffset, II.getRawDest()->getType()));
+ II.setDest(AdjustedPtr);
else
- II.setSource(getAdjustedAllocaPtr(IRB, BeginOffset,
- II.getRawSource()->getType()));
+ II.setSource(AdjustedPtr);
- Type *CstTy = II.getAlignmentCst()->getType();
- II.setAlignment(ConstantInt::get(CstTy, Align));
+ if (II.getAlignment() > SliceAlign) {
+ Type *CstTy = II.getAlignmentCst()->getType();
+ II.setAlignment(
+ ConstantInt::get(CstTy, MinAlign(II.getAlignment(), SliceAlign)));
+ }
DEBUG(dbgs() << " to: " << II << "\n");
- deleteIfTriviallyDead(OldOp);
+ deleteIfTriviallyDead(OldPtr);
return false;
}
// For split transfer intrinsics we have an incredibly useful assurance:
@@ -2495,37 +2560,39 @@ private:
// alloca that should be re-examined after rewriting this instruction.
Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest();
if (AllocaInst *AI
- = dyn_cast<AllocaInst>(OtherPtr->stripInBoundsOffsets()))
+ = dyn_cast<AllocaInst>(OtherPtr->stripInBoundsOffsets())) {
+ assert(AI != &OldAI && AI != &NewAI &&
+ "Splittable transfers cannot reach the same alloca on both ends.");
Pass.Worklist.insert(AI);
+ }
- if (EmitMemCpy) {
- Type *OtherPtrTy = IsDest ? II.getRawSource()->getType()
- : II.getRawDest()->getType();
+ Type *OtherPtrTy = OtherPtr->getType();
+ unsigned OtherAS = OtherPtrTy->getPointerAddressSpace();
+ // Compute the relative offset for the other pointer within the transfer.
+ unsigned IntPtrWidth = DL.getPointerSizeInBits(OtherAS);
+ APInt OtherOffset(IntPtrWidth, NewBeginOffset - BeginOffset);
+ unsigned OtherAlign = MinAlign(II.getAlignment() ? II.getAlignment() : 1,
+ OtherOffset.zextOrTrunc(64).getZExtValue());
+
+ if (EmitMemCpy) {
// Compute the other pointer, folding as much as possible to produce
// a single, simple GEP in most cases.
- OtherPtr = getAdjustedPtr(IRB, DL, OtherPtr, RelOffset, OtherPtrTy);
+ OtherPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
+ OtherPtr->getName() + ".");
- Value *OurPtr = getAdjustedAllocaPtr(
- IRB, NewBeginOffset,
- IsDest ? II.getRawDest()->getType() : II.getRawSource()->getType());
+ Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
Type *SizeTy = II.getLength()->getType();
Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
- CallInst *New = IRB.CreateMemCpy(IsDest ? OurPtr : OtherPtr,
- IsDest ? OtherPtr : OurPtr,
- Size, Align, II.isVolatile());
+ CallInst *New = IRB.CreateMemCpy(
+ IsDest ? OurPtr : OtherPtr, IsDest ? OtherPtr : OurPtr, Size,
+ MinAlign(SliceAlign, OtherAlign), II.isVolatile());
(void)New;
DEBUG(dbgs() << " to: " << *New << "\n");
return false;
}
- // Note that we clamp the alignment to 1 here as a 0 alignment for a memcpy
- // is equivalent to 1, but that isn't true if we end up rewriting this as
- // a load or store.
- if (!Align)
- Align = 1;
-
bool IsWholeAlloca = NewBeginOffset == NewAllocaBeginOffset &&
NewEndOffset == NewAllocaEndOffset;
uint64_t Size = NewEndOffset - NewBeginOffset;
@@ -2533,24 +2600,32 @@ private:
unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0;
unsigned NumElements = EndIndex - BeginIndex;
IntegerType *SubIntTy
- = IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : 0;
+ = IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : nullptr;
- Type *OtherPtrTy = NewAI.getType();
+ // Reset the other pointer type to match the register type we're going to
+ // use, but using the address space of the original other pointer.
if (VecTy && !IsWholeAlloca) {
if (NumElements == 1)
OtherPtrTy = VecTy->getElementType();
else
OtherPtrTy = VectorType::get(VecTy->getElementType(), NumElements);
- OtherPtrTy = OtherPtrTy->getPointerTo();
+ OtherPtrTy = OtherPtrTy->getPointerTo(OtherAS);
} else if (IntTy && !IsWholeAlloca) {
- OtherPtrTy = SubIntTy->getPointerTo();
+ OtherPtrTy = SubIntTy->getPointerTo(OtherAS);
+ } else {
+ OtherPtrTy = NewAllocaTy->getPointerTo(OtherAS);
}
- Value *SrcPtr = getAdjustedPtr(IRB, DL, OtherPtr, RelOffset, OtherPtrTy);
+ Value *SrcPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
+ OtherPtr->getName() + ".");
+ unsigned SrcAlign = OtherAlign;
Value *DstPtr = &NewAI;
- if (!IsDest)
+ unsigned DstAlign = SliceAlign;
+ if (!IsDest) {
std::swap(SrcPtr, DstPtr);
+ std::swap(SrcAlign, DstAlign);
+ }
Value *Src;
if (VecTy && !IsWholeAlloca && !IsDest) {
@@ -2564,7 +2639,7 @@ private:
uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
Src = extractInteger(DL, IRB, Src, SubIntTy, Offset, "extract");
} else {
- Src = IRB.CreateAlignedLoad(SrcPtr, Align, II.isVolatile(),
+ Src = IRB.CreateAlignedLoad(SrcPtr, SrcAlign, II.isVolatile(),
"copyload");
}
@@ -2582,7 +2657,7 @@ private:
}
StoreInst *Store = cast<StoreInst>(
- IRB.CreateAlignedStore(Src, DstPtr, Align, II.isVolatile()));
+ IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile()));
(void)Store;
DEBUG(dbgs() << " to: " << *Store << "\n");
return !II.isVolatile();
@@ -2594,20 +2669,13 @@ private:
DEBUG(dbgs() << " original: " << II << "\n");
assert(II.getArgOperand(1) == OldPtr);
- // Compute the intersecting offset range.
- assert(BeginOffset < NewAllocaEndOffset);
- assert(EndOffset > NewAllocaBeginOffset);
- uint64_t NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
- uint64_t NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
-
// Record this instruction for deletion.
Pass.DeadInsts.insert(&II);
ConstantInt *Size
= ConstantInt::get(cast<IntegerType>(II.getArgOperand(0)->getType()),
NewEndOffset - NewBeginOffset);
- Value *Ptr =
- getAdjustedAllocaPtr(IRB, NewBeginOffset, II.getArgOperand(1)->getType());
+ Value *Ptr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
Value *New;
if (II.getIntrinsicID() == Intrinsic::lifetime_start)
New = IRB.CreateLifetimeStart(Ptr, Size);
@@ -2628,28 +2696,22 @@ private:
// as local as possible to the PHI. To do that, we re-use the location of
// the old pointer, which necessarily must be in the right position to
// dominate the PHI.
- IRBuilderTy PtrBuilder(OldPtr);
- PtrBuilder.SetNamePrefix(Twine(NewAI.getName()) + "." + Twine(BeginOffset) +
- ".");
+ IRBuilderTy PtrBuilder(IRB);
+ PtrBuilder.SetInsertPoint(OldPtr);
+ PtrBuilder.SetCurrentDebugLocation(OldPtr->getDebugLoc());
- Value *NewPtr =
- getAdjustedAllocaPtr(PtrBuilder, BeginOffset, OldPtr->getType());
+ Value *NewPtr = getNewAllocaSlicePtr(PtrBuilder, OldPtr->getType());
// Replace the operands which were using the old pointer.
std::replace(PN.op_begin(), PN.op_end(), cast<Value>(OldPtr), NewPtr);
DEBUG(dbgs() << " to: " << PN << "\n");
deleteIfTriviallyDead(OldPtr);
- // Check whether we can speculate this PHI node, and if so remember that
- // fact and queue it up for another iteration after the speculation
- // occurs.
- if (isSafePHIToSpeculate(PN, &DL)) {
- Pass.SpeculatablePHIs.insert(&PN);
- IsUsedByRewrittenSpeculatableInstructions = true;
- return true;
- }
-
- return false; // PHIs can't be promoted on their own.
+ // PHIs can't be promoted on their own, but often can be speculated. We
+ // check the speculation outside of the rewriter so that we see the
+ // fully-rewritten alloca.
+ PHIUsers.insert(&PN);
+ return true;
}
bool visitSelectInst(SelectInst &SI) {
@@ -2659,7 +2721,7 @@ private:
assert(BeginOffset >= NewAllocaBeginOffset && "Selects are unsplittable");
assert(EndOffset <= NewAllocaEndOffset && "Selects are unsplittable");
- Value *NewPtr = getAdjustedAllocaPtr(IRB, BeginOffset, OldPtr->getType());
+ Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
// Replace the operands which were using the old pointer.
if (SI.getOperand(1) == OldPtr)
SI.setOperand(1, NewPtr);
@@ -2669,16 +2731,11 @@ private:
DEBUG(dbgs() << " to: " << SI << "\n");
deleteIfTriviallyDead(OldPtr);
- // Check whether we can speculate this select instruction, and if so
- // remember that fact and queue it up for another iteration after the
- // speculation occurs.
- if (isSafeSelectToSpeculate(SI, &DL)) {
- Pass.SpeculatableSelects.insert(&SI);
- IsUsedByRewrittenSpeculatableInstructions = true;
- return true;
- }
-
- return false; // Selects can't be promoted on their own.
+ // Selects can't be promoted on their own, but often can be speculated. We
+ // check the speculation outside of the rewriter so that we see the
+ // fully-rewritten alloca.
+ SelectUsers.insert(&SI);
+ return true;
}
};
@@ -2726,10 +2783,9 @@ private:
/// Enqueue all the users of the given instruction for further processing.
/// This uses a set to de-duplicate users.
void enqueueUsers(Instruction &I) {
- for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE;
- ++UI)
- if (Visited.insert(*UI))
- Queue.push_back(&UI.getUse());
+ for (Use &U : I.uses())
+ if (Visited.insert(U.getUser()))
+ Queue.push_back(&U);
}
// Conservative default is to not rewrite anything.
@@ -2942,22 +2998,22 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty,
return stripAggregateTypeWrapping(DL, Ty);
if (Offset > DL.getTypeAllocSize(Ty) ||
(DL.getTypeAllocSize(Ty) - Offset) < Size)
- return 0;
+ return nullptr;
if (SequentialType *SeqTy = dyn_cast<SequentialType>(Ty)) {
// We can't partition pointers...
if (SeqTy->isPointerTy())
- return 0;
+ return nullptr;
Type *ElementTy = SeqTy->getElementType();
uint64_t ElementSize = DL.getTypeAllocSize(ElementTy);
uint64_t NumSkippedElements = Offset / ElementSize;
if (ArrayType *ArrTy = dyn_cast<ArrayType>(SeqTy)) {
if (NumSkippedElements >= ArrTy->getNumElements())
- return 0;
+ return nullptr;
} else if (VectorType *VecTy = dyn_cast<VectorType>(SeqTy)) {
if (NumSkippedElements >= VecTy->getNumElements())
- return 0;
+ return nullptr;
}
Offset -= NumSkippedElements * ElementSize;
@@ -2965,7 +3021,7 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty,
if (Offset > 0 || Size < ElementSize) {
// Bail if the partition ends in a different array element.
if ((Offset + Size) > ElementSize)
- return 0;
+ return nullptr;
// Recurse through the element type trying to peel off offset bytes.
return getTypePartition(DL, ElementTy, Offset, Size);
}
@@ -2976,20 +3032,20 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty,
assert(Size > ElementSize);
uint64_t NumElements = Size / ElementSize;
if (NumElements * ElementSize != Size)
- return 0;
+ return nullptr;
return ArrayType::get(ElementTy, NumElements);
}
StructType *STy = dyn_cast<StructType>(Ty);
if (!STy)
- return 0;
+ return nullptr;
const StructLayout *SL = DL.getStructLayout(STy);
if (Offset >= SL->getSizeInBytes())
- return 0;
+ return nullptr;
uint64_t EndOffset = Offset + Size;
if (EndOffset > SL->getSizeInBytes())
- return 0;
+ return nullptr;
unsigned Index = SL->getElementContainingOffset(Offset);
Offset -= SL->getElementOffset(Index);
@@ -2997,12 +3053,12 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty,
Type *ElementTy = STy->getElementType(Index);
uint64_t ElementSize = DL.getTypeAllocSize(ElementTy);
if (Offset >= ElementSize)
- return 0; // The offset points into alignment padding.
+ return nullptr; // The offset points into alignment padding.
// See if any partition must be contained by the element.
if (Offset > 0 || Size < ElementSize) {
if ((Offset + Size) > ElementSize)
- return 0;
+ return nullptr;
return getTypePartition(DL, ElementTy, Offset, Size);
}
assert(Offset == 0);
@@ -3015,14 +3071,14 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty,
if (EndOffset < SL->getSizeInBytes()) {
unsigned EndIndex = SL->getElementContainingOffset(EndOffset);
if (Index == EndIndex)
- return 0; // Within a single element and its padding.
+ return nullptr; // Within a single element and its padding.
// Don't try to form "natural" types if the elements don't line up with the
// expected size.
// FIXME: We could potentially recurse down through the last element in the
// sub-struct to find a natural end point.
if (SL->getElementOffset(EndIndex) != EndOffset)
- return 0;
+ return nullptr;
assert(Index < EndIndex);
EE = STy->element_begin() + EndIndex;
@@ -3033,7 +3089,7 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty,
STy->isPacked());
const StructLayout *SubSL = DL.getStructLayout(SubTy);
if (Size != SubSL->getSizeInBytes())
- return 0; // The sub-struct doesn't have quite the size needed.
+ return nullptr; // The sub-struct doesn't have quite the size needed.
return SubTy;
}
@@ -3058,7 +3114,7 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &S,
// Try to compute a friendly type for this partition of the alloca. This
// won't always succeed, in which case we fall back to a legal integer type
// or an i8 array of an appropriate size.
- Type *SliceTy = 0;
+ Type *SliceTy = nullptr;
if (Type *CommonUseTy = findCommonType(B, E, EndOffset))
if (DL->getTypeAllocSize(CommonUseTy) >= SliceSize)
SliceTy = CommonUseTy;
@@ -3105,7 +3161,7 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &S,
// the alloca's alignment unconstrained.
if (Alignment <= DL->getABITypeAlignment(SliceTy))
Alignment = 0;
- NewAI = new AllocaInst(SliceTy, 0, Alignment,
+ NewAI = new AllocaInst(SliceTy, nullptr, Alignment,
AI.getName() + ".sroa." + Twine(B - S.begin()), &AI);
++NumNewAllocas;
}
@@ -3114,17 +3170,17 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &S,
<< "[" << BeginOffset << "," << EndOffset << ") to: " << *NewAI
<< "\n");
- // Track the high watermark on several worklists that are only relevant for
+ // Track the high watermark on the worklist as it is only relevant for
// promoted allocas. We will reset it to this point if the alloca is not in
// fact scheduled for promotion.
unsigned PPWOldSize = PostPromotionWorklist.size();
- unsigned SPOldSize = SpeculatablePHIs.size();
- unsigned SSOldSize = SpeculatableSelects.size();
unsigned NumUses = 0;
+ SmallPtrSet<PHINode *, 8> PHIUsers;
+ SmallPtrSet<SelectInst *, 8> SelectUsers;
AllocaSliceRewriter Rewriter(*DL, S, *this, AI, *NewAI, BeginOffset,
EndOffset, IsVectorPromotable,
- IsIntegerPromotable);
+ IsIntegerPromotable, PHIUsers, SelectUsers);
bool Promotable = true;
for (ArrayRef<AllocaSlices::iterator>::const_iterator SUI = SplitUses.begin(),
SUE = SplitUses.end();
@@ -3145,50 +3201,60 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &S,
MaxUsesPerAllocaPartition =
std::max<unsigned>(NumUses, MaxUsesPerAllocaPartition);
- if (Promotable && !Rewriter.isUsedByRewrittenSpeculatableInstructions()) {
- DEBUG(dbgs() << " and queuing for promotion\n");
- PromotableAllocas.push_back(NewAI);
- } else if (NewAI != &AI ||
- (Promotable &&
- Rewriter.isUsedByRewrittenSpeculatableInstructions())) {
+ // Now that we've processed all the slices in the new partition, check if any
+ // PHIs or Selects would block promotion.
+ for (SmallPtrSetImpl<PHINode *>::iterator I = PHIUsers.begin(),
+ E = PHIUsers.end();
+ I != E; ++I)
+ if (!isSafePHIToSpeculate(**I, DL)) {
+ Promotable = false;
+ PHIUsers.clear();
+ SelectUsers.clear();
+ break;
+ }
+ for (SmallPtrSetImpl<SelectInst *>::iterator I = SelectUsers.begin(),
+ E = SelectUsers.end();
+ I != E; ++I)
+ if (!isSafeSelectToSpeculate(**I, DL)) {
+ Promotable = false;
+ PHIUsers.clear();
+ SelectUsers.clear();
+ break;
+ }
+
+ if (Promotable) {
+ if (PHIUsers.empty() && SelectUsers.empty()) {
+ // Promote the alloca.
+ PromotableAllocas.push_back(NewAI);
+ } else {
+ // If we have either PHIs or Selects to speculate, add them to those
+ // worklists and re-queue the new alloca so that we promote in on the
+ // next iteration.
+ for (SmallPtrSetImpl<PHINode *>::iterator I = PHIUsers.begin(),
+ E = PHIUsers.end();
+ I != E; ++I)
+ SpeculatablePHIs.insert(*I);
+ for (SmallPtrSetImpl<SelectInst *>::iterator I = SelectUsers.begin(),
+ E = SelectUsers.end();
+ I != E; ++I)
+ SpeculatableSelects.insert(*I);
+ Worklist.insert(NewAI);
+ }
+ } else {
// If we can't promote the alloca, iterate on it to check for new
// refinements exposed by splitting the current alloca. Don't iterate on an
// alloca which didn't actually change and didn't get promoted.
- //
- // Alternatively, if we could promote the alloca but have speculatable
- // instructions then we will speculate them after finishing our processing
- // of the original alloca. Mark the new one for re-visiting in the next
- // iteration so the speculated operations can be rewritten.
- //
- // FIXME: We should actually track whether the rewriter changed anything.
- Worklist.insert(NewAI);
- }
-
- // Drop any post-promotion work items if promotion didn't happen.
- if (!Promotable) {
+ if (NewAI != &AI)
+ Worklist.insert(NewAI);
+
+ // Drop any post-promotion work items if promotion didn't happen.
while (PostPromotionWorklist.size() > PPWOldSize)
PostPromotionWorklist.pop_back();
- while (SpeculatablePHIs.size() > SPOldSize)
- SpeculatablePHIs.pop_back();
- while (SpeculatableSelects.size() > SSOldSize)
- SpeculatableSelects.pop_back();
}
return true;
}
-namespace {
-struct IsSliceEndLessOrEqualTo {
- uint64_t UpperBound;
-
- IsSliceEndLessOrEqualTo(uint64_t UpperBound) : UpperBound(UpperBound) {}
-
- bool operator()(const AllocaSlices::iterator &I) {
- return I->endOffset() <= UpperBound;
- }
-};
-}
-
static void
removeFinishedSplitUses(SmallVectorImpl<AllocaSlices::iterator> &SplitUses,
uint64_t &MaxSplitUseEndOffset, uint64_t Offset) {
@@ -3200,7 +3266,9 @@ removeFinishedSplitUses(SmallVectorImpl<AllocaSlices::iterator> &SplitUses,
size_t SplitUsesOldSize = SplitUses.size();
SplitUses.erase(std::remove_if(SplitUses.begin(), SplitUses.end(),
- IsSliceEndLessOrEqualTo(Offset)),
+ [Offset](const AllocaSlices::iterator &I) {
+ return I->endOffset() <= Offset;
+ }),
SplitUses.end());
if (SplitUsesOldSize == SplitUses.size())
return;
@@ -3227,7 +3295,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &S) {
uint64_t BeginOffset = S.begin()->beginOffset();
- for (AllocaSlices::iterator SI = S.begin(), SJ = llvm::next(SI), SE = S.end();
+ for (AllocaSlices::iterator SI = S.begin(), SJ = std::next(SI), SE = S.end();
SI != SE; SI = SJ) {
uint64_t MaxEndOffset = SI->endOffset();
@@ -3326,6 +3394,21 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &S) {
return Changed;
}
+/// \brief Clobber a use with undef, deleting the used value if it becomes dead.
+void SROA::clobberUse(Use &U) {
+ Value *OldV = U;
+ // Replace the use with an undef value.
+ U = UndefValue::get(OldV->getType());
+
+ // Check for this making an instruction dead. We have to garbage collect
+ // all the dead instructions to ensure the uses of any alloca end up being
+ // minimal.
+ if (Instruction *OldI = dyn_cast<Instruction>(OldV))
+ if (isInstructionTriviallyDead(OldI)) {
+ DeadInsts.insert(OldI);
+ }
+}
+
/// \brief Analyze an alloca for SROA.
///
/// This analyzes the alloca to ensure we can reason about it, builds
@@ -3363,21 +3446,22 @@ bool SROA::runOnAlloca(AllocaInst &AI) {
for (AllocaSlices::dead_user_iterator DI = S.dead_user_begin(),
DE = S.dead_user_end();
DI != DE; ++DI) {
- Changed = true;
+ // Free up everything used by this instruction.
+ for (Use &DeadOp : (*DI)->operands())
+ clobberUse(DeadOp);
+
+ // Now replace the uses of this instruction.
(*DI)->replaceAllUsesWith(UndefValue::get((*DI)->getType()));
+
+ // And mark it for deletion.
DeadInsts.insert(*DI);
+ Changed = true;
}
for (AllocaSlices::dead_op_iterator DO = S.dead_op_begin(),
DE = S.dead_op_end();
DO != DE; ++DO) {
- Value *OldV = **DO;
- // Clobber the use with an undef value.
- **DO = UndefValue::get(OldV->getType());
- if (Instruction *OldI = dyn_cast<Instruction>(OldV))
- if (isInstructionTriviallyDead(OldI)) {
- Changed = true;
- DeadInsts.insert(OldI);
- }
+ clobberUse(**DO);
+ Changed = true;
}
// No slices to split. Leave the dead alloca for a later pass to clean up.
@@ -3413,10 +3497,10 @@ void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) {
I->replaceAllUsesWith(UndefValue::get(I->getType()));
- for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
- if (Instruction *U = dyn_cast<Instruction>(*OI)) {
+ for (Use &Operand : I->operands())
+ if (Instruction *U = dyn_cast<Instruction>(Operand)) {
// Zero out the operand and see if it becomes trivially dead.
- *OI = 0;
+ Operand = nullptr;
if (isInstructionTriviallyDead(U))
DeadInsts.insert(U);
}
@@ -3432,10 +3516,9 @@ void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) {
static void enqueueUsersInWorklist(Instruction &I,
SmallVectorImpl<Instruction *> &Worklist,
SmallPtrSet<Instruction *, 8> &Visited) {
- for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE;
- ++UI)
- if (Visited.insert(cast<Instruction>(*UI)))
- Worklist.push_back(cast<Instruction>(*UI));
+ for (User *U : I.users())
+ if (Visited.insert(cast<Instruction>(U)))
+ Worklist.push_back(cast<Instruction>(U));
}
/// \brief Promote the allocas, using the best available technique.
@@ -3521,32 +3604,24 @@ bool SROA::promoteAllocas(Function &F) {
return true;
}
-namespace {
- /// \brief A predicate to test whether an alloca belongs to a set.
- class IsAllocaInSet {
- typedef SmallPtrSet<AllocaInst *, 4> SetType;
- const SetType &Set;
-
- public:
- typedef AllocaInst *argument_type;
-
- IsAllocaInSet(const SetType &Set) : Set(Set) {}
- bool operator()(AllocaInst *AI) const { return Set.count(AI); }
- };
-}
-
bool SROA::runOnFunction(Function &F) {
+ if (skipOptnoneFunction(F))
+ return false;
+
DEBUG(dbgs() << "SROA function: " << F.getName() << "\n");
C = &F.getContext();
- DL = getAnalysisIfAvailable<DataLayout>();
- if (!DL) {
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ if (!DLP) {
DEBUG(dbgs() << " Skipping SROA -- no target data!\n");
return false;
}
- DT = getAnalysisIfAvailable<DominatorTree>();
+ DL = &DLP->getDataLayout();
+ DominatorTreeWrapperPass *DTWP =
+ getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ DT = DTWP ? &DTWP->getDomTree() : nullptr;
BasicBlock &EntryBB = F.getEntryBlock();
- for (BasicBlock::iterator I = EntryBB.begin(), E = llvm::prior(EntryBB.end());
+ for (BasicBlock::iterator I = EntryBB.begin(), E = std::prev(EntryBB.end());
I != E; ++I)
if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
Worklist.insert(AI);
@@ -3564,11 +3639,14 @@ bool SROA::runOnFunction(Function &F) {
// Remove the deleted allocas from various lists so that we don't try to
// continue processing them.
if (!DeletedAllocas.empty()) {
- Worklist.remove_if(IsAllocaInSet(DeletedAllocas));
- PostPromotionWorklist.remove_if(IsAllocaInSet(DeletedAllocas));
+ auto IsInSet = [&](AllocaInst *AI) {
+ return DeletedAllocas.count(AI);
+ };
+ Worklist.remove_if(IsInSet);
+ PostPromotionWorklist.remove_if(IsInSet);
PromotableAllocas.erase(std::remove_if(PromotableAllocas.begin(),
PromotableAllocas.end(),
- IsAllocaInSet(DeletedAllocas)),
+ IsInSet),
PromotableAllocas.end());
DeletedAllocas.clear();
}
@@ -3585,6 +3663,6 @@ bool SROA::runOnFunction(Function &F) {
void SROA::getAnalysisUsage(AnalysisUsage &AU) const {
if (RequiresDomTree)
- AU.addRequired<DominatorTree>();
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.setPreservesCFG();
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/SampleProfile.cpp b/contrib/llvm/lib/Transforms/Scalar/SampleProfile.cpp
index 9bcd702a9137..73c97ffeef4f 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SampleProfile.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SampleProfile.cpp
@@ -22,38 +22,198 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "sample-profile"
-
+#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/DebugInfo/DIContext.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/PostDominators.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Metadata.h"
#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/LineIterator.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Regex.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
+#include <cctype>
using namespace llvm;
+#define DEBUG_TYPE "sample-profile"
+
// Command line option to specify the file to read samples from. This is
// mainly used for debugging.
static cl::opt<std::string> SampleProfileFile(
"sample-profile-file", cl::init(""), cl::value_desc("filename"),
cl::desc("Profile file loaded by -sample-profile"), cl::Hidden);
+static cl::opt<unsigned> SampleProfileMaxPropagateIterations(
+ "sample-profile-max-propagate-iterations", cl::init(100),
+ cl::desc("Maximum number of iterations to go through when propagating "
+ "sample block/edge weights through the CFG."));
+
+namespace {
+/// \brief Represents the relative location of an instruction.
+///
+/// Instruction locations are specified by the line offset from the
+/// beginning of the function (marked by the line where the function
+/// header is) and the discriminator value within that line.
+///
+/// The discriminator value is useful to distinguish instructions
+/// that are on the same line but belong to different basic blocks
+/// (e.g., the two post-increment instructions in "if (p) x++; else y++;").
+struct InstructionLocation {
+ InstructionLocation(int L, unsigned D) : LineOffset(L), Discriminator(D) {}
+ int LineOffset;
+ unsigned Discriminator;
+};
+}
+
+namespace llvm {
+template <> struct DenseMapInfo<InstructionLocation> {
+ typedef DenseMapInfo<int> OffsetInfo;
+ typedef DenseMapInfo<unsigned> DiscriminatorInfo;
+ static inline InstructionLocation getEmptyKey() {
+ return InstructionLocation(OffsetInfo::getEmptyKey(),
+ DiscriminatorInfo::getEmptyKey());
+ }
+ static inline InstructionLocation getTombstoneKey() {
+ return InstructionLocation(OffsetInfo::getTombstoneKey(),
+ DiscriminatorInfo::getTombstoneKey());
+ }
+ static inline unsigned getHashValue(InstructionLocation Val) {
+ return DenseMapInfo<std::pair<int, unsigned>>::getHashValue(
+ std::pair<int, unsigned>(Val.LineOffset, Val.Discriminator));
+ }
+ static inline bool isEqual(InstructionLocation LHS, InstructionLocation RHS) {
+ return LHS.LineOffset == RHS.LineOffset &&
+ LHS.Discriminator == RHS.Discriminator;
+ }
+};
+}
namespace {
+typedef DenseMap<InstructionLocation, unsigned> BodySampleMap;
+typedef DenseMap<BasicBlock *, unsigned> BlockWeightMap;
+typedef DenseMap<BasicBlock *, BasicBlock *> EquivalenceClassMap;
+typedef std::pair<BasicBlock *, BasicBlock *> Edge;
+typedef DenseMap<Edge, unsigned> EdgeWeightMap;
+typedef DenseMap<BasicBlock *, SmallVector<BasicBlock *, 8>> BlockEdgeMap;
+
+/// \brief Representation of the runtime profile for a function.
+///
+/// This data structure contains the runtime profile for a given
+/// function. It contains the total number of samples collected
+/// in the function and a map of samples collected in every statement.
+class SampleFunctionProfile {
+public:
+ SampleFunctionProfile()
+ : TotalSamples(0), TotalHeadSamples(0), HeaderLineno(0), DT(nullptr),
+ PDT(nullptr), LI(nullptr), Ctx(nullptr) {}
+
+ unsigned getFunctionLoc(Function &F);
+ bool emitAnnotations(Function &F, DominatorTree *DomTree,
+ PostDominatorTree *PostDomTree, LoopInfo *Loops);
+ unsigned getInstWeight(Instruction &I);
+ unsigned getBlockWeight(BasicBlock *B);
+ void addTotalSamples(unsigned Num) { TotalSamples += Num; }
+ void addHeadSamples(unsigned Num) { TotalHeadSamples += Num; }
+ void addBodySamples(int LineOffset, unsigned Discriminator, unsigned Num) {
+ assert(LineOffset >= 0);
+ BodySamples[InstructionLocation(LineOffset, Discriminator)] += Num;
+ }
+ void print(raw_ostream &OS);
+ void printEdgeWeight(raw_ostream &OS, Edge E);
+ void printBlockWeight(raw_ostream &OS, BasicBlock *BB);
+ void printBlockEquivalence(raw_ostream &OS, BasicBlock *BB);
+ bool computeBlockWeights(Function &F);
+ void findEquivalenceClasses(Function &F);
+ void findEquivalencesFor(BasicBlock *BB1,
+ SmallVector<BasicBlock *, 8> Descendants,
+ DominatorTreeBase<BasicBlock> *DomTree);
+ void propagateWeights(Function &F);
+ unsigned visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
+ void buildEdges(Function &F);
+ bool propagateThroughEdges(Function &F);
+ bool empty() { return BodySamples.empty(); }
+
+protected:
+ /// \brief Total number of samples collected inside this function.
+ ///
+ /// Samples are cumulative, they include all the samples collected
+ /// inside this function and all its inlined callees.
+ unsigned TotalSamples;
+
+ /// \brief Total number of samples collected at the head of the function.
+ /// FIXME: Use head samples to estimate a cold/hot attribute for the function.
+ unsigned TotalHeadSamples;
+
+ /// \brief Line number for the function header. Used to compute relative
+ /// line numbers from the absolute line LOCs found in instruction locations.
+ /// The relative line numbers are needed to address the samples from the
+ /// profile file.
+ unsigned HeaderLineno;
+
+ /// \brief Map line offsets to collected samples.
+ ///
+ /// Each entry in this map contains the number of samples
+ /// collected at the corresponding line offset. All line locations
+ /// are an offset from the start of the function.
+ BodySampleMap BodySamples;
+
+ /// \brief Map basic blocks to their computed weights.
+ ///
+ /// The weight of a basic block is defined to be the maximum
+ /// of all the instruction weights in that block.
+ BlockWeightMap BlockWeights;
+
+ /// \brief Map edges to their computed weights.
+ ///
+ /// Edge weights are computed by propagating basic block weights in
+ /// SampleProfile::propagateWeights.
+ EdgeWeightMap EdgeWeights;
+
+ /// \brief Set of visited blocks during propagation.
+ SmallPtrSet<BasicBlock *, 128> VisitedBlocks;
+
+ /// \brief Set of visited edges during propagation.
+ SmallSet<Edge, 128> VisitedEdges;
+
+ /// \brief Equivalence classes for block weights.
+ ///
+ /// Two blocks BB1 and BB2 are in the same equivalence class if they
+ /// dominate and post-dominate each other, and they are in the same loop
+ /// nest. When this happens, the two blocks are guaranteed to execute
+ /// the same number of times.
+ EquivalenceClassMap EquivalenceClass;
+
+ /// \brief Dominance, post-dominance and loop information.
+ DominatorTree *DT;
+ PostDominatorTree *PDT;
+ LoopInfo *LI;
+
+ /// \brief Predecessors for each basic block in the CFG.
+ BlockEdgeMap Predecessors;
+
+ /// \brief Successors for each basic block in the CFG.
+ BlockEdgeMap Successors;
+
+ /// \brief LLVM context holding the debug data we need.
+ LLVMContext *Ctx;
+};
+
/// \brief Sample-based profile reader.
///
/// Each profile contains sample counts for all the functions
@@ -77,61 +237,33 @@ namespace {
/// 2. The samples collected at each line in F. To provide some
/// protection against source code shuffling, line numbers should
/// be relative to the start of the function.
-class SampleProfile {
+class SampleModuleProfile {
public:
- SampleProfile(StringRef F) : Profiles(0), Filename(F) {}
+ SampleModuleProfile(const Module &M, StringRef F)
+ : Profiles(0), Filename(F), M(M) {}
void dump();
- void loadText();
+ bool loadText();
void loadNative() { llvm_unreachable("not implemented"); }
- bool emitAnnotations(Function &F);
void printFunctionProfile(raw_ostream &OS, StringRef FName);
void dumpFunctionProfile(StringRef FName);
+ SampleFunctionProfile &getProfile(const Function &F) {
+ return Profiles[F.getName()];
+ }
-protected:
- typedef DenseMap<uint32_t, uint32_t> BodySampleMap;
- typedef DenseMap<BasicBlock *, uint32_t> BlockWeightMap;
-
- /// \brief Representation of the runtime profile for a function.
- ///
- /// This data structure contains the runtime profile for a given
- /// function. It contains the total number of samples collected
- /// in the function and a map of samples collected in every statement.
- struct FunctionProfile {
- /// \brief Total number of samples collected inside this function.
- ///
- /// Samples are cumulative, they include all the samples collected
- /// inside this function and all its inlined callees.
- unsigned TotalSamples;
-
- // \brief Total number of samples collected at the head of the function.
- unsigned TotalHeadSamples;
-
- /// \brief Map line offsets to collected samples.
- ///
- /// Each entry in this map contains the number of samples
- /// collected at the corresponding line offset. All line locations
- /// are an offset from the start of the function.
- BodySampleMap BodySamples;
-
- /// \brief Map basic blocks to their computed weights.
- ///
- /// The weight of a basic block is defined to be the maximum
- /// of all the instruction weights in that block.
- BlockWeightMap BlockWeights;
- };
-
- uint32_t getInstWeight(Instruction &I, unsigned FirstLineno,
- BodySampleMap &BodySamples);
- uint32_t computeBlockWeight(BasicBlock *B, unsigned FirstLineno,
- BodySampleMap &BodySamples);
+ /// \brief Report a parse error message.
+ void reportParseError(int64_t LineNumber, Twine Msg) const {
+ DiagnosticInfoSampleProfile Diag(Filename.data(), LineNumber, Msg);
+ M.getContext().diagnose(Diag);
+ }
+protected:
/// \brief Map every function to its associated profile.
///
/// The profile of every function executed at runtime is collected
- /// in the structure FunctionProfile. This maps function objects
+ /// in the structure SampleFunctionProfile. This maps function objects
/// to their corresponding profiles.
- StringMap<FunctionProfile> Profiles;
+ StringMap<SampleFunctionProfile> Profiles;
/// \brief Path name to the file holding the profile data.
///
@@ -140,63 +272,10 @@ protected:
/// version of the profile format to be used in constructing test
/// cases and debugging.
StringRef Filename;
-};
-/// \brief Loader class for text-based profiles.
-///
-/// This class defines a simple interface to read text files containing
-/// profiles. It keeps track of line number information and location of
-/// the file pointer. Users of this class are responsible for actually
-/// parsing the lines returned by the readLine function.
-///
-/// TODO - This does not really belong here. It is a generic text file
-/// reader. It should be moved to the Support library and made more general.
-class ExternalProfileTextLoader {
-public:
- ExternalProfileTextLoader(StringRef F) : Filename(F) {
- error_code EC;
- EC = MemoryBuffer::getFile(Filename, Buffer);
- if (EC)
- report_fatal_error("Could not open profile file " + Filename + ": " +
- EC.message());
- FP = Buffer->getBufferStart();
- Lineno = 0;
- }
-
- /// \brief Read a line from the mapped file.
- StringRef readLine() {
- size_t Length = 0;
- const char *start = FP;
- while (FP != Buffer->getBufferEnd() && *FP != '\n') {
- Length++;
- FP++;
- }
- if (FP != Buffer->getBufferEnd())
- FP++;
- Lineno++;
- return StringRef(start, Length);
- }
-
- /// \brief Return true, if we've reached EOF.
- bool atEOF() const { return FP == Buffer->getBufferEnd(); }
-
- /// \brief Report a parse error message and stop compilation.
- void reportParseError(Twine Msg) const {
- report_fatal_error(Filename + ":" + Twine(Lineno) + ": " + Msg + "\n");
- }
-
-private:
- /// \brief Memory buffer holding the text file.
- OwningPtr<MemoryBuffer> Buffer;
-
- /// \brief Current position into the memory buffer.
- const char *FP;
-
- /// \brief Current line number.
- int64_t Lineno;
-
- /// \brief Path name where to the profile file.
- StringRef Filename;
+ /// \brief Module being compiled. Used mainly to access the current
+ /// LLVM context for diagnostics.
+ const Module &M;
};
/// \brief Sample profile pass.
@@ -210,148 +289,242 @@ public:
static char ID;
SampleProfileLoader(StringRef Name = SampleProfileFile)
- : FunctionPass(ID), Profiler(0), Filename(Name) {
+ : FunctionPass(ID), Profiler(), Filename(Name), ProfileIsValid(false) {
initializeSampleProfileLoaderPass(*PassRegistry::getPassRegistry());
}
- virtual bool doInitialization(Module &M);
+ bool doInitialization(Module &M) override;
void dump() { Profiler->dump(); }
- virtual const char *getPassName() const { return "Sample profile pass"; }
+ const char *getPassName() const override { return "Sample profile pass"; }
- virtual bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
+ AU.addRequired<LoopInfo>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<PostDominatorTree>();
}
protected:
/// \brief Profile reader object.
- OwningPtr<SampleProfile> Profiler;
+ std::unique_ptr<SampleModuleProfile> Profiler;
/// \brief Name of the profile file to load.
StringRef Filename;
+
+ /// \brief Flag indicating whether the profile input loaded successfully.
+ bool ProfileIsValid;
};
}
-/// \brief Print the function profile for \p FName on stream \p OS.
+/// \brief Print this function profile on stream \p OS.
///
/// \param OS Stream to emit the output to.
-/// \param FName Name of the function to print.
-void SampleProfile::printFunctionProfile(raw_ostream &OS, StringRef FName) {
- FunctionProfile FProfile = Profiles[FName];
- OS << "Function: " << FName << ", " << FProfile.TotalSamples << ", "
- << FProfile.TotalHeadSamples << ", " << FProfile.BodySamples.size()
+void SampleFunctionProfile::print(raw_ostream &OS) {
+ OS << TotalSamples << ", " << TotalHeadSamples << ", " << BodySamples.size()
<< " sampled lines\n";
- for (BodySampleMap::const_iterator SI = FProfile.BodySamples.begin(),
- SE = FProfile.BodySamples.end();
+ for (BodySampleMap::const_iterator SI = BodySamples.begin(),
+ SE = BodySamples.end();
SI != SE; ++SI)
- OS << "\tline offset: " << SI->first
+ OS << "\tline offset: " << SI->first.LineOffset
+ << ", discriminator: " << SI->first.Discriminator
<< ", number of samples: " << SI->second << "\n";
OS << "\n";
}
+/// \brief Print the weight of edge \p E on stream \p OS.
+///
+/// \param OS Stream to emit the output to.
+/// \param E Edge to print.
+void SampleFunctionProfile::printEdgeWeight(raw_ostream &OS, Edge E) {
+ OS << "weight[" << E.first->getName() << "->" << E.second->getName()
+ << "]: " << EdgeWeights[E] << "\n";
+}
+
+/// \brief Print the equivalence class of block \p BB on stream \p OS.
+///
+/// \param OS Stream to emit the output to.
+/// \param BB Block to print.
+void SampleFunctionProfile::printBlockEquivalence(raw_ostream &OS,
+ BasicBlock *BB) {
+ BasicBlock *Equiv = EquivalenceClass[BB];
+ OS << "equivalence[" << BB->getName()
+ << "]: " << ((Equiv) ? EquivalenceClass[BB]->getName() : "NONE") << "\n";
+}
+
+/// \brief Print the weight of block \p BB on stream \p OS.
+///
+/// \param OS Stream to emit the output to.
+/// \param BB Block to print.
+void SampleFunctionProfile::printBlockWeight(raw_ostream &OS, BasicBlock *BB) {
+ OS << "weight[" << BB->getName() << "]: " << BlockWeights[BB] << "\n";
+}
+
+/// \brief Print the function profile for \p FName on stream \p OS.
+///
+/// \param OS Stream to emit the output to.
+/// \param FName Name of the function to print.
+void SampleModuleProfile::printFunctionProfile(raw_ostream &OS,
+ StringRef FName) {
+ OS << "Function: " << FName << ":\n";
+ Profiles[FName].print(OS);
+}
+
/// \brief Dump the function profile for \p FName.
///
/// \param FName Name of the function to print.
-void SampleProfile::dumpFunctionProfile(StringRef FName) {
+void SampleModuleProfile::dumpFunctionProfile(StringRef FName) {
printFunctionProfile(dbgs(), FName);
}
/// \brief Dump all the function profiles found.
-void SampleProfile::dump() {
- for (StringMap<FunctionProfile>::const_iterator I = Profiles.begin(),
- E = Profiles.end();
+void SampleModuleProfile::dump() {
+ for (StringMap<SampleFunctionProfile>::const_iterator I = Profiles.begin(),
+ E = Profiles.end();
I != E; ++I)
dumpFunctionProfile(I->getKey());
}
/// \brief Load samples from a text file.
///
-/// The file is divided in two segments:
-///
-/// Symbol table (represented with the string "symbol table")
-/// Number of symbols in the table
-/// symbol 1
-/// symbol 2
-/// ...
-/// symbol N
+/// The file contains a list of samples for every function executed at
+/// runtime. Each function profile has the following format:
///
-/// Function body profiles
-/// function1:total_samples:total_head_samples:number_of_locations
-/// location_offset_1: number_of_samples
-/// location_offset_2: number_of_samples
+/// function1:total_samples:total_head_samples
+/// offset1[.discriminator]: number_of_samples [fn1:num fn2:num ... ]
+/// offset2[.discriminator]: number_of_samples [fn3:num fn4:num ... ]
/// ...
-/// location_offset_N: number_of_samples
+/// offsetN[.discriminator]: number_of_samples [fn5:num fn6:num ... ]
///
/// Function names must be mangled in order for the profile loader to
-/// match them in the current translation unit.
+/// match them in the current translation unit. The two numbers in the
+/// function header specify how many total samples were accumulated in
+/// the function (first number), and the total number of samples accumulated
+/// at the prologue of the function (second number). This head sample
+/// count provides an indicator of how frequent is the function invoked.
+///
+/// Each sampled line may contain several items. Some are optional
+/// (marked below):
+///
+/// a- Source line offset. This number represents the line number
+/// in the function where the sample was collected. The line number
+/// is always relative to the line where symbol of the function
+/// is defined. So, if the function has its header at line 280,
+/// the offset 13 is at line 293 in the file.
+///
+/// b- [OPTIONAL] Discriminator. This is used if the sampled program
+/// was compiled with DWARF discriminator support
+/// (http://wiki.dwarfstd.org/index.php?title=Path_Discriminators)
+///
+/// c- Number of samples. This is the number of samples collected by
+/// the profiler at this source location.
+///
+/// d- [OPTIONAL] Potential call targets and samples. If present, this
+/// line contains a call instruction. This models both direct and
+/// indirect calls. Each called target is listed together with the
+/// number of samples. For example,
+///
+/// 130: 7 foo:3 bar:2 baz:7
+///
+/// The above means that at relative line offset 130 there is a
+/// call instruction that calls one of foo(), bar() and baz(). With
+/// baz() being the relatively more frequent call target.
+///
+/// FIXME: This is currently unhandled, but it has a lot of
+/// potential for aiding the inliner.
+///
///
/// Since this is a flat profile, a function that shows up more than
/// once gets all its samples aggregated across all its instances.
-/// TODO - flat profiles are too imprecise to provide good optimization
-/// opportunities. Convert them to context-sensitive profile.
+///
+/// FIXME: flat profiles are too imprecise to provide good optimization
+/// opportunities. Convert them to context-sensitive profile.
///
/// This textual representation is useful to generate unit tests and
/// for debugging purposes, but it should not be used to generate
/// profiles for large programs, as the representation is extremely
/// inefficient.
-void SampleProfile::loadText() {
- ExternalProfileTextLoader Loader(Filename);
-
- // Read the symbol table.
- StringRef Line = Loader.readLine();
- if (Line != "symbol table")
- Loader.reportParseError("Expected 'symbol table', found " + Line);
- int NumSymbols;
- Line = Loader.readLine();
- if (Line.getAsInteger(10, NumSymbols))
- Loader.reportParseError("Expected a number, found " + Line);
- for (int I = 0; I < NumSymbols; I++) {
- StringRef FName = Loader.readLine();
- FunctionProfile &FProfile = Profiles[FName];
- FProfile.BodySamples.clear();
- FProfile.TotalSamples = 0;
- FProfile.TotalHeadSamples = 0;
+///
+/// \returns true if the file was loaded successfully, false otherwise.
+bool SampleModuleProfile::loadText() {
+ ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
+ MemoryBuffer::getFile(Filename);
+ if (std::error_code EC = BufferOrErr.getError()) {
+ std::string Msg(EC.message());
+ M.getContext().diagnose(DiagnosticInfoSampleProfile(Filename.data(), Msg));
+ return false;
}
+ std::unique_ptr<MemoryBuffer> Buffer = std::move(BufferOrErr.get());
+ line_iterator LineIt(*Buffer, '#');
// Read the profile of each function. Since each function may be
// mentioned more than once, and we are collecting flat profiles,
// accumulate samples as we parse them.
- Regex HeadRE("^([^:]+):([0-9]+):([0-9]+):([0-9]+)$");
- Regex LineSample("^([0-9]+): ([0-9]+)$");
- while (!Loader.atEOF()) {
- SmallVector<StringRef, 4> Matches;
- Line = Loader.readLine();
- if (!HeadRE.match(Line, &Matches))
- Loader.reportParseError("Expected 'mangled_name:NUM:NUM:NUM', found " +
- Line);
- assert(Matches.size() == 5);
+ Regex HeadRE("^([^0-9].*):([0-9]+):([0-9]+)$");
+ Regex LineSample("^([0-9]+)\\.?([0-9]+)?: ([0-9]+)(.*)$");
+ while (!LineIt.is_at_eof()) {
+ // Read the header of each function.
+ //
+ // Note that for function identifiers we are actually expecting
+ // mangled names, but we may not always get them. This happens when
+ // the compiler decides not to emit the function (e.g., it was inlined
+ // and removed). In this case, the binary will not have the linkage
+ // name for the function, so the profiler will emit the function's
+ // unmangled name, which may contain characters like ':' and '>' in its
+ // name (member functions, templates, etc).
+ //
+ // The only requirement we place on the identifier, then, is that it
+ // should not begin with a number.
+ SmallVector<StringRef, 3> Matches;
+ if (!HeadRE.match(*LineIt, &Matches)) {
+ reportParseError(LineIt.line_number(),
+ "Expected 'mangled_name:NUM:NUM', found " + *LineIt);
+ return false;
+ }
+ assert(Matches.size() == 4);
StringRef FName = Matches[1];
- unsigned NumSamples, NumHeadSamples, NumSampledLines;
+ unsigned NumSamples, NumHeadSamples;
Matches[2].getAsInteger(10, NumSamples);
Matches[3].getAsInteger(10, NumHeadSamples);
- Matches[4].getAsInteger(10, NumSampledLines);
- FunctionProfile &FProfile = Profiles[FName];
- FProfile.TotalSamples += NumSamples;
- FProfile.TotalHeadSamples += NumHeadSamples;
- BodySampleMap &SampleMap = FProfile.BodySamples;
- unsigned I;
- for (I = 0; I < NumSampledLines && !Loader.atEOF(); I++) {
- Line = Loader.readLine();
- if (!LineSample.match(Line, &Matches))
- Loader.reportParseError("Expected 'NUM: NUM', found " + Line);
- assert(Matches.size() == 3);
- unsigned LineOffset, NumSamples;
+ Profiles[FName] = SampleFunctionProfile();
+ SampleFunctionProfile &FProfile = Profiles[FName];
+ FProfile.addTotalSamples(NumSamples);
+ FProfile.addHeadSamples(NumHeadSamples);
+ ++LineIt;
+
+ // Now read the body. The body of the function ends when we reach
+ // EOF or when we see the start of the next function.
+ while (!LineIt.is_at_eof() && isdigit((*LineIt)[0])) {
+ if (!LineSample.match(*LineIt, &Matches)) {
+ reportParseError(
+ LineIt.line_number(),
+ "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " + *LineIt);
+ return false;
+ }
+ assert(Matches.size() == 5);
+ unsigned LineOffset, NumSamples, Discriminator = 0;
Matches[1].getAsInteger(10, LineOffset);
- Matches[2].getAsInteger(10, NumSamples);
- SampleMap[LineOffset] += NumSamples;
- }
+ if (Matches[2] != "")
+ Matches[2].getAsInteger(10, Discriminator);
+ Matches[3].getAsInteger(10, NumSamples);
- if (I < NumSampledLines)
- Loader.reportParseError("Unexpected end of file");
+ // FIXME: Handle called targets (in Matches[4]).
+
+ // When dealing with instruction weights, we use the value
+ // zero to indicate the absence of a sample. If we read an
+ // actual zero from the profile file, return it as 1 to
+ // avoid the confusion later on.
+ if (NumSamples == 0)
+ NumSamples = 1;
+ FProfile.addBodySamples(LineOffset, Discriminator, NumSamples);
+ ++LineIt;
+ }
}
+
+ return true;
}
/// \brief Get the weight for an instruction.
@@ -359,46 +532,49 @@ void SampleProfile::loadText() {
/// The "weight" of an instruction \p Inst is the number of samples
/// collected on that instruction at runtime. To retrieve it, we
/// need to compute the line number of \p Inst relative to the start of its
-/// function. We use \p FirstLineno to compute the offset. We then
-/// look up the samples collected for \p Inst using \p BodySamples.
+/// function. We use HeaderLineno to compute the offset. We then
+/// look up the samples collected for \p Inst using BodySamples.
///
/// \param Inst Instruction to query.
-/// \param FirstLineno Line number of the first instruction in the function.
-/// \param BodySamples Map of relative source line locations to samples.
///
/// \returns The profiled weight of I.
-uint32_t SampleProfile::getInstWeight(Instruction &Inst, unsigned FirstLineno,
- BodySampleMap &BodySamples) {
- unsigned LOffset = Inst.getDebugLoc().getLine() - FirstLineno + 1;
- return BodySamples.lookup(LOffset);
+unsigned SampleFunctionProfile::getInstWeight(Instruction &Inst) {
+ DebugLoc DLoc = Inst.getDebugLoc();
+ unsigned Lineno = DLoc.getLine();
+ if (Lineno < HeaderLineno)
+ return 0;
+
+ DILocation DIL(DLoc.getAsMDNode(*Ctx));
+ int LOffset = Lineno - HeaderLineno;
+ unsigned Discriminator = DIL.getDiscriminator();
+ unsigned Weight =
+ BodySamples.lookup(InstructionLocation(LOffset, Discriminator));
+ DEBUG(dbgs() << " " << Lineno << "." << Discriminator << ":" << Inst
+ << " (line offset: " << LOffset << "." << Discriminator
+ << " - weight: " << Weight << ")\n");
+ return Weight;
}
/// \brief Compute the weight of a basic block.
///
/// The weight of basic block \p B is the maximum weight of all the
-/// instructions in B.
+/// instructions in B. The weight of \p B is computed and cached in
+/// the BlockWeights map.
///
/// \param B The basic block to query.
-/// \param FirstLineno The line number for the first line in the
-/// function holding B.
-/// \param BodySamples The map containing all the samples collected in that
-/// function.
///
/// \returns The computed weight of B.
-uint32_t SampleProfile::computeBlockWeight(BasicBlock *B, unsigned FirstLineno,
- BodySampleMap &BodySamples) {
+unsigned SampleFunctionProfile::getBlockWeight(BasicBlock *B) {
// If we've computed B's weight before, return it.
- Function *F = B->getParent();
- FunctionProfile &FProfile = Profiles[F->getName()];
std::pair<BlockWeightMap::iterator, bool> Entry =
- FProfile.BlockWeights.insert(std::make_pair(B, 0));
+ BlockWeights.insert(std::make_pair(B, 0));
if (!Entry.second)
return Entry.first->second;
// Otherwise, compute and cache B's weight.
- uint32_t Weight = 0;
+ unsigned Weight = 0;
for (BasicBlock::iterator I = B->begin(), E = B->end(); I != E; ++I) {
- uint32_t InstWeight = getInstWeight(*I, FirstLineno, BodySamples);
+ unsigned InstWeight = getInstWeight(*I);
if (InstWeight > Weight)
Weight = InstWeight;
}
@@ -406,31 +582,344 @@ uint32_t SampleProfile::computeBlockWeight(BasicBlock *B, unsigned FirstLineno,
return Weight;
}
-/// \brief Generate branch weight metadata for all branches in \p F.
+/// \brief Compute and store the weights of every basic block.
+///
+/// This populates the BlockWeights map by computing
+/// the weights of every basic block in the CFG.
+///
+/// \param F The function to query.
+bool SampleFunctionProfile::computeBlockWeights(Function &F) {
+ bool Changed = false;
+ DEBUG(dbgs() << "Block weights\n");
+ for (Function::iterator B = F.begin(), E = F.end(); B != E; ++B) {
+ unsigned Weight = getBlockWeight(B);
+ Changed |= (Weight > 0);
+ DEBUG(printBlockWeight(dbgs(), B));
+ }
+
+ return Changed;
+}
+
+/// \brief Find equivalence classes for the given block.
///
-/// For every branch instruction B in \p F, we compute the weight of the
-/// target block for each of the edges out of B. This is the weight
-/// that we associate with that branch.
+/// This finds all the blocks that are guaranteed to execute the same
+/// number of times as \p BB1. To do this, it traverses all the the
+/// descendants of \p BB1 in the dominator or post-dominator tree.
///
-/// TODO - This weight assignment will most likely be wrong if the
-/// target branch has more than two predecessors. This needs to be done
-/// using some form of flow propagation.
+/// A block BB2 will be in the same equivalence class as \p BB1 if
+/// the following holds:
///
-/// Once all the branch weights are computed, we emit the MD_prof
-/// metadata on B using the computed values.
+/// 1- \p BB1 is a descendant of BB2 in the opposite tree. So, if BB2
+/// is a descendant of \p BB1 in the dominator tree, then BB2 should
+/// dominate BB1 in the post-dominator tree.
+///
+/// 2- Both BB2 and \p BB1 must be in the same loop.
+///
+/// For every block BB2 that meets those two requirements, we set BB2's
+/// equivalence class to \p BB1.
+///
+/// \param BB1 Block to check.
+/// \param Descendants Descendants of \p BB1 in either the dom or pdom tree.
+/// \param DomTree Opposite dominator tree. If \p Descendants is filled
+/// with blocks from \p BB1's dominator tree, then
+/// this is the post-dominator tree, and vice versa.
+void SampleFunctionProfile::findEquivalencesFor(
+ BasicBlock *BB1, SmallVector<BasicBlock *, 8> Descendants,
+ DominatorTreeBase<BasicBlock> *DomTree) {
+ for (SmallVectorImpl<BasicBlock *>::iterator I = Descendants.begin(),
+ E = Descendants.end();
+ I != E; ++I) {
+ BasicBlock *BB2 = *I;
+ bool IsDomParent = DomTree->dominates(BB2, BB1);
+ bool IsInSameLoop = LI->getLoopFor(BB1) == LI->getLoopFor(BB2);
+ if (BB1 != BB2 && VisitedBlocks.insert(BB2) && IsDomParent &&
+ IsInSameLoop) {
+ EquivalenceClass[BB2] = BB1;
+
+ // If BB2 is heavier than BB1, make BB2 have the same weight
+ // as BB1.
+ //
+ // Note that we don't worry about the opposite situation here
+ // (when BB2 is lighter than BB1). We will deal with this
+ // during the propagation phase. Right now, we just want to
+ // make sure that BB1 has the largest weight of all the
+ // members of its equivalence set.
+ unsigned &BB1Weight = BlockWeights[BB1];
+ unsigned &BB2Weight = BlockWeights[BB2];
+ BB1Weight = std::max(BB1Weight, BB2Weight);
+ }
+ }
+}
+
+/// \brief Find equivalence classes.
+///
+/// Since samples may be missing from blocks, we can fill in the gaps by setting
+/// the weights of all the blocks in the same equivalence class to the same
+/// weight. To compute the concept of equivalence, we use dominance and loop
+/// information. Two blocks B1 and B2 are in the same equivalence class if B1
+/// dominates B2, B2 post-dominates B1 and both are in the same loop.
///
/// \param F The function to query.
-bool SampleProfile::emitAnnotations(Function &F) {
+void SampleFunctionProfile::findEquivalenceClasses(Function &F) {
+ SmallVector<BasicBlock *, 8> DominatedBBs;
+ DEBUG(dbgs() << "\nBlock equivalence classes\n");
+ // Find equivalence sets based on dominance and post-dominance information.
+ for (Function::iterator B = F.begin(), E = F.end(); B != E; ++B) {
+ BasicBlock *BB1 = B;
+
+ // Compute BB1's equivalence class once.
+ if (EquivalenceClass.count(BB1)) {
+ DEBUG(printBlockEquivalence(dbgs(), BB1));
+ continue;
+ }
+
+ // By default, blocks are in their own equivalence class.
+ EquivalenceClass[BB1] = BB1;
+
+ // Traverse all the blocks dominated by BB1. We are looking for
+ // every basic block BB2 such that:
+ //
+ // 1- BB1 dominates BB2.
+ // 2- BB2 post-dominates BB1.
+ // 3- BB1 and BB2 are in the same loop nest.
+ //
+ // If all those conditions hold, it means that BB2 is executed
+ // as many times as BB1, so they are placed in the same equivalence
+ // class by making BB2's equivalence class be BB1.
+ DominatedBBs.clear();
+ DT->getDescendants(BB1, DominatedBBs);
+ findEquivalencesFor(BB1, DominatedBBs, PDT->DT);
+
+ // Repeat the same logic for all the blocks post-dominated by BB1.
+ // We are looking for every basic block BB2 such that:
+ //
+ // 1- BB1 post-dominates BB2.
+ // 2- BB2 dominates BB1.
+ // 3- BB1 and BB2 are in the same loop nest.
+ //
+ // If all those conditions hold, BB2's equivalence class is BB1.
+ DominatedBBs.clear();
+ PDT->getDescendants(BB1, DominatedBBs);
+ findEquivalencesFor(BB1, DominatedBBs, DT);
+
+ DEBUG(printBlockEquivalence(dbgs(), BB1));
+ }
+
+ // Assign weights to equivalence classes.
+ //
+ // All the basic blocks in the same equivalence class will execute
+ // the same number of times. Since we know that the head block in
+ // each equivalence class has the largest weight, assign that weight
+ // to all the blocks in that equivalence class.
+ DEBUG(dbgs() << "\nAssign the same weight to all blocks in the same class\n");
+ for (Function::iterator B = F.begin(), E = F.end(); B != E; ++B) {
+ BasicBlock *BB = B;
+ BasicBlock *EquivBB = EquivalenceClass[BB];
+ if (BB != EquivBB)
+ BlockWeights[BB] = BlockWeights[EquivBB];
+ DEBUG(printBlockWeight(dbgs(), BB));
+ }
+}
+
+/// \brief Visit the given edge to decide if it has a valid weight.
+///
+/// If \p E has not been visited before, we copy to \p UnknownEdge
+/// and increment the count of unknown edges.
+///
+/// \param E Edge to visit.
+/// \param NumUnknownEdges Current number of unknown edges.
+/// \param UnknownEdge Set if E has not been visited before.
+///
+/// \returns E's weight, if known. Otherwise, return 0.
+unsigned SampleFunctionProfile::visitEdge(Edge E, unsigned *NumUnknownEdges,
+ Edge *UnknownEdge) {
+ if (!VisitedEdges.count(E)) {
+ (*NumUnknownEdges)++;
+ *UnknownEdge = E;
+ return 0;
+ }
+
+ return EdgeWeights[E];
+}
+
+/// \brief Propagate weights through incoming/outgoing edges.
+///
+/// If the weight of a basic block is known, and there is only one edge
+/// with an unknown weight, we can calculate the weight of that edge.
+///
+/// Similarly, if all the edges have a known count, we can calculate the
+/// count of the basic block, if needed.
+///
+/// \param F Function to process.
+///
+/// \returns True if new weights were assigned to edges or blocks.
+bool SampleFunctionProfile::propagateThroughEdges(Function &F) {
bool Changed = false;
- FunctionProfile &FProfile = Profiles[F.getName()];
- unsigned FirstLineno = inst_begin(F)->getDebugLoc().getLine();
- MDBuilder MDB(F.getContext());
+ DEBUG(dbgs() << "\nPropagation through edges\n");
+ for (Function::iterator BI = F.begin(), EI = F.end(); BI != EI; ++BI) {
+ BasicBlock *BB = BI;
+
+ // Visit all the predecessor and successor edges to determine
+ // which ones have a weight assigned already. Note that it doesn't
+ // matter that we only keep track of a single unknown edge. The
+ // only case we are interested in handling is when only a single
+ // edge is unknown (see setEdgeOrBlockWeight).
+ for (unsigned i = 0; i < 2; i++) {
+ unsigned TotalWeight = 0;
+ unsigned NumUnknownEdges = 0;
+ Edge UnknownEdge, SelfReferentialEdge;
+
+ if (i == 0) {
+ // First, visit all predecessor edges.
+ for (size_t I = 0; I < Predecessors[BB].size(); I++) {
+ Edge E = std::make_pair(Predecessors[BB][I], BB);
+ TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
+ if (E.first == E.second)
+ SelfReferentialEdge = E;
+ }
+ } else {
+ // On the second round, visit all successor edges.
+ for (size_t I = 0; I < Successors[BB].size(); I++) {
+ Edge E = std::make_pair(BB, Successors[BB][I]);
+ TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
+ }
+ }
+
+ // After visiting all the edges, there are three cases that we
+ // can handle immediately:
+ //
+ // - All the edge weights are known (i.e., NumUnknownEdges == 0).
+ // In this case, we simply check that the sum of all the edges
+ // is the same as BB's weight. If not, we change BB's weight
+ // to match. Additionally, if BB had not been visited before,
+ // we mark it visited.
+ //
+ // - Only one edge is unknown and BB has already been visited.
+ // In this case, we can compute the weight of the edge by
+ // subtracting the total block weight from all the known
+ // edge weights. If the edges weight more than BB, then the
+ // edge of the last remaining edge is set to zero.
+ //
+ // - There exists a self-referential edge and the weight of BB is
+ // known. In this case, this edge can be based on BB's weight.
+ // We add up all the other known edges and set the weight on
+ // the self-referential edge as we did in the previous case.
+ //
+ // In any other case, we must continue iterating. Eventually,
+ // all edges will get a weight, or iteration will stop when
+ // it reaches SampleProfileMaxPropagateIterations.
+ if (NumUnknownEdges <= 1) {
+ unsigned &BBWeight = BlockWeights[BB];
+ if (NumUnknownEdges == 0) {
+ // If we already know the weight of all edges, the weight of the
+ // basic block can be computed. It should be no larger than the sum
+ // of all edge weights.
+ if (TotalWeight > BBWeight) {
+ BBWeight = TotalWeight;
+ Changed = true;
+ DEBUG(dbgs() << "All edge weights for " << BB->getName()
+ << " known. Set weight for block: ";
+ printBlockWeight(dbgs(), BB););
+ }
+ if (VisitedBlocks.insert(BB))
+ Changed = true;
+ } else if (NumUnknownEdges == 1 && VisitedBlocks.count(BB)) {
+ // If there is a single unknown edge and the block has been
+ // visited, then we can compute E's weight.
+ if (BBWeight >= TotalWeight)
+ EdgeWeights[UnknownEdge] = BBWeight - TotalWeight;
+ else
+ EdgeWeights[UnknownEdge] = 0;
+ VisitedEdges.insert(UnknownEdge);
+ Changed = true;
+ DEBUG(dbgs() << "Set weight for edge: ";
+ printEdgeWeight(dbgs(), UnknownEdge));
+ }
+ } else if (SelfReferentialEdge.first && VisitedBlocks.count(BB)) {
+ unsigned &BBWeight = BlockWeights[BB];
+ // We have a self-referential edge and the weight of BB is known.
+ if (BBWeight >= TotalWeight)
+ EdgeWeights[SelfReferentialEdge] = BBWeight - TotalWeight;
+ else
+ EdgeWeights[SelfReferentialEdge] = 0;
+ VisitedEdges.insert(SelfReferentialEdge);
+ Changed = true;
+ DEBUG(dbgs() << "Set self-referential edge weight to: ";
+ printEdgeWeight(dbgs(), SelfReferentialEdge));
+ }
+ }
+ }
+
+ return Changed;
+}
+
+/// \brief Build in/out edge lists for each basic block in the CFG.
+///
+/// We are interested in unique edges. If a block B1 has multiple
+/// edges to another block B2, we only add a single B1->B2 edge.
+void SampleFunctionProfile::buildEdges(Function &F) {
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ BasicBlock *B1 = I;
+
+ // Add predecessors for B1.
+ SmallPtrSet<BasicBlock *, 16> Visited;
+ if (!Predecessors[B1].empty())
+ llvm_unreachable("Found a stale predecessors list in a basic block.");
+ for (pred_iterator PI = pred_begin(B1), PE = pred_end(B1); PI != PE; ++PI) {
+ BasicBlock *B2 = *PI;
+ if (Visited.insert(B2))
+ Predecessors[B1].push_back(B2);
+ }
+
+ // Add successors for B1.
+ Visited.clear();
+ if (!Successors[B1].empty())
+ llvm_unreachable("Found a stale successors list in a basic block.");
+ for (succ_iterator SI = succ_begin(B1), SE = succ_end(B1); SI != SE; ++SI) {
+ BasicBlock *B2 = *SI;
+ if (Visited.insert(B2))
+ Successors[B1].push_back(B2);
+ }
+ }
+}
- // Clear the block weights cache.
- FProfile.BlockWeights.clear();
+/// \brief Propagate weights into edges
+///
+/// The following rules are applied to every block B in the CFG:
+///
+/// - If B has a single predecessor/successor, then the weight
+/// of that edge is the weight of the block.
+///
+/// - If all incoming or outgoing edges are known except one, and the
+/// weight of the block is already known, the weight of the unknown
+/// edge will be the weight of the block minus the sum of all the known
+/// edges. If the sum of all the known edges is larger than B's weight,
+/// we set the unknown edge weight to zero.
+///
+/// - If there is a self-referential edge, and the weight of the block is
+/// known, the weight for that edge is set to the weight of the block
+/// minus the weight of the other incoming edges to that block (if
+/// known).
+void SampleFunctionProfile::propagateWeights(Function &F) {
+ bool Changed = true;
+ unsigned i = 0;
+
+ // Before propagation starts, build, for each block, a list of
+ // unique predecessors and successors. This is necessary to handle
+ // identical edges in multiway branches. Since we visit all blocks and all
+ // edges of the CFG, it is cleaner to build these lists once at the start
+ // of the pass.
+ buildEdges(F);
+
+ // Propagate until we converge or we go past the iteration limit.
+ while (Changed && i++ < SampleProfileMaxPropagateIterations) {
+ Changed = propagateThroughEdges(F);
+ }
- // When we find a branch instruction: For each edge E out of the branch,
- // the weight of E is the weight of the target block.
+ // Generate MD_prof metadata for every branch instruction using the
+ // edge weights computed during propagation.
+ DEBUG(dbgs() << "\nPropagation complete. Setting branch weights\n");
+ MDBuilder MDB(F.getContext());
for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
BasicBlock *B = I;
TerminatorInst *TI = B->getTerminator();
@@ -439,34 +928,155 @@ bool SampleProfile::emitAnnotations(Function &F) {
if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI))
continue;
- SmallVector<uint32_t, 4> Weights;
- unsigned NSuccs = TI->getNumSuccessors();
- for (unsigned I = 0; I < NSuccs; ++I) {
+ DEBUG(dbgs() << "\nGetting weights for branch at line "
+ << TI->getDebugLoc().getLine() << ".\n");
+ SmallVector<unsigned, 4> Weights;
+ bool AllWeightsZero = true;
+ for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
BasicBlock *Succ = TI->getSuccessor(I);
- uint32_t Weight =
- computeBlockWeight(Succ, FirstLineno, FProfile.BodySamples);
+ Edge E = std::make_pair(B, Succ);
+ unsigned Weight = EdgeWeights[E];
+ DEBUG(dbgs() << "\t"; printEdgeWeight(dbgs(), E));
Weights.push_back(Weight);
+ if (Weight != 0)
+ AllWeightsZero = false;
}
- TI->setMetadata(llvm::LLVMContext::MD_prof,
- MDB.createBranchWeights(Weights));
- Changed = true;
+ // Only set weights if there is at least one non-zero weight.
+ // In any other case, let the analyzer set weights.
+ if (!AllWeightsZero) {
+ DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n");
+ TI->setMetadata(llvm::LLVMContext::MD_prof,
+ MDB.createBranchWeights(Weights));
+ } else {
+ DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");
+ }
}
+}
- return Changed;
+/// \brief Get the line number for the function header.
+///
+/// This looks up function \p F in the current compilation unit and
+/// retrieves the line number where the function is defined. This is
+/// line 0 for all the samples read from the profile file. Every line
+/// number is relative to this line.
+///
+/// \param F Function object to query.
+///
+/// \returns the line number where \p F is defined. If it returns 0,
+/// it means that there is no debug information available for \p F.
+unsigned SampleFunctionProfile::getFunctionLoc(Function &F) {
+ NamedMDNode *CUNodes = F.getParent()->getNamedMetadata("llvm.dbg.cu");
+ if (CUNodes) {
+ for (unsigned I = 0, E1 = CUNodes->getNumOperands(); I != E1; ++I) {
+ DICompileUnit CU(CUNodes->getOperand(I));
+ DIArray Subprograms = CU.getSubprograms();
+ for (unsigned J = 0, E2 = Subprograms.getNumElements(); J != E2; ++J) {
+ DISubprogram Subprogram(Subprograms.getElement(J));
+ if (Subprogram.describes(&F))
+ return Subprogram.getLineNumber();
+ }
+ }
+ }
+
+ F.getContext().diagnose(DiagnosticInfoSampleProfile(
+ "No debug information found in function " + F.getName()));
+ return 0;
}
-char SampleProfileLoader::ID = 0;
-INITIALIZE_PASS(SampleProfileLoader, "sample-profile", "Sample Profile loader",
- false, false)
+/// \brief Generate branch weight metadata for all branches in \p F.
+///
+/// Branch weights are computed out of instruction samples using a
+/// propagation heuristic. Propagation proceeds in 3 phases:
+///
+/// 1- Assignment of block weights. All the basic blocks in the function
+/// are initial assigned the same weight as their most frequently
+/// executed instruction.
+///
+/// 2- Creation of equivalence classes. Since samples may be missing from
+/// blocks, we can fill in the gaps by setting the weights of all the
+/// blocks in the same equivalence class to the same weight. To compute
+/// the concept of equivalence, we use dominance and loop information.
+/// Two blocks B1 and B2 are in the same equivalence class if B1
+/// dominates B2, B2 post-dominates B1 and both are in the same loop.
+///
+/// 3- Propagation of block weights into edges. This uses a simple
+/// propagation heuristic. The following rules are applied to every
+/// block B in the CFG:
+///
+/// - If B has a single predecessor/successor, then the weight
+/// of that edge is the weight of the block.
+///
+/// - If all the edges are known except one, and the weight of the
+/// block is already known, the weight of the unknown edge will
+/// be the weight of the block minus the sum of all the known
+/// edges. If the sum of all the known edges is larger than B's weight,
+/// we set the unknown edge weight to zero.
+///
+/// - If there is a self-referential edge, and the weight of the block is
+/// known, the weight for that edge is set to the weight of the block
+/// minus the weight of the other incoming edges to that block (if
+/// known).
+///
+/// Since this propagation is not guaranteed to finalize for every CFG, we
+/// only allow it to proceed for a limited number of iterations (controlled
+/// by -sample-profile-max-propagate-iterations).
+///
+/// FIXME: Try to replace this propagation heuristic with a scheme
+/// that is guaranteed to finalize. A work-list approach similar to
+/// the standard value propagation algorithm used by SSA-CCP might
+/// work here.
+///
+/// Once all the branch weights are computed, we emit the MD_prof
+/// metadata on B using the computed values for each of its branches.
+///
+/// \param F The function to query.
+///
+/// \returns true if \p F was modified. Returns false, otherwise.
+bool SampleFunctionProfile::emitAnnotations(Function &F, DominatorTree *DomTree,
+ PostDominatorTree *PostDomTree,
+ LoopInfo *Loops) {
+ bool Changed = false;
-bool SampleProfileLoader::runOnFunction(Function &F) {
- return Profiler->emitAnnotations(F);
+ // Initialize invariants used during computation and propagation.
+ HeaderLineno = getFunctionLoc(F);
+ if (HeaderLineno == 0)
+ return false;
+
+ DEBUG(dbgs() << "Line number for the first instruction in " << F.getName()
+ << ": " << HeaderLineno << "\n");
+ DT = DomTree;
+ PDT = PostDomTree;
+ LI = Loops;
+ Ctx = &F.getParent()->getContext();
+
+ // Compute basic block weights.
+ Changed |= computeBlockWeights(F);
+
+ if (Changed) {
+ // Find equivalence classes.
+ findEquivalenceClasses(F);
+
+ // Propagate weights to all edges.
+ propagateWeights(F);
+ }
+
+ return Changed;
}
+char SampleProfileLoader::ID = 0;
+INITIALIZE_PASS_BEGIN(SampleProfileLoader, "sample-profile",
+ "Sample Profile loader", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(AddDiscriminators)
+INITIALIZE_PASS_END(SampleProfileLoader, "sample-profile",
+ "Sample Profile loader", false, false)
+
bool SampleProfileLoader::doInitialization(Module &M) {
- Profiler.reset(new SampleProfile(Filename));
- Profiler->loadText();
+ Profiler.reset(new SampleModuleProfile(M, Filename));
+ ProfileIsValid = Profiler->loadText();
return true;
}
@@ -477,3 +1087,15 @@ FunctionPass *llvm::createSampleProfileLoaderPass() {
FunctionPass *llvm::createSampleProfileLoaderPass(StringRef Name) {
return new SampleProfileLoader(Name);
}
+
+bool SampleProfileLoader::runOnFunction(Function &F) {
+ if (!ProfileIsValid)
+ return false;
+ DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ PostDominatorTree *PDT = &getAnalysis<PostDominatorTree>();
+ LoopInfo *LI = &getAnalysis<LoopInfo>();
+ SampleFunctionProfile &FunctionProfile = Profiler->getProfile(F);
+ if (!FunctionProfile.empty())
+ return FunctionProfile.emitAnnotations(F, DT, PDT, LI);
+ return false;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
index 857597e47462..de724d419a48 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -17,8 +17,8 @@
#include "llvm-c/Initialization.h"
#include "llvm-c/Transforms/Scalar.h"
#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/Verifier.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
#include "llvm/PassManager.h"
@@ -29,11 +29,12 @@ using namespace llvm;
void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeADCEPass(Registry);
initializeSampleProfileLoaderPass(Registry);
- initializeCodeGenPreparePass(Registry);
+ initializeConstantHoistingPass(Registry);
initializeConstantPropagationPass(Registry);
initializeCorrelatedValuePropagationPass(Registry);
initializeDCEPass(Registry);
initializeDeadInstEliminationPass(Registry);
+ initializeScalarizerPass(Registry);
initializeDSEPass(Registry);
initializeGVNPass(Registry);
initializeEarlyCSEPass(Registry);
@@ -51,6 +52,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeLowerAtomicPass(Registry);
initializeLowerExpectIntrinsicPass(Registry);
initializeMemCpyOptPass(Registry);
+ initializeMergedLoadStoreMotionPass(Registry);
initializePartiallyInlineLibCallsPass(Registry);
initializeReassociatePass(Registry);
initializeRegToMemPass(Registry);
@@ -63,6 +65,8 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeStructurizeCFGPass(Registry);
initializeSinkingPass(Registry);
initializeTailCallElimPass(Registry);
+ initializeSeparateConstOffsetFromGEPPass(Registry);
+ initializeLoadCombinePass(Registry);
}
void LLVMInitializeScalarOpts(LLVMPassRegistryRef R) {
@@ -81,10 +85,18 @@ void LLVMAddDeadStoreEliminationPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createDeadStoreEliminationPass());
}
+void LLVMAddScalarizerPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createScalarizerPass());
+}
+
void LLVMAddGVNPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createGVNPass());
}
+void LLVMAddMergedLoadStoreMotionPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createMergedLoadStoreMotionPass());
+}
+
void LLVMAddIndVarSimplifyPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createIndVarSimplifyPass());
}
@@ -176,6 +188,7 @@ void LLVMAddDemoteMemoryToRegisterPass(LLVMPassManagerRef PM) {
void LLVMAddVerifierPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createVerifierPass());
+ // FIXME: should this also add createDebugInfoVerifierPass()?
}
void LLVMAddCorrelatedValuePropagationPass(LLVMPassManagerRef PM) {
diff --git a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 57b290e14b13..e2a24a7fd4a7 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -19,20 +19,21 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "scalarrepl"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/DIBuilder.h"
-#include "llvm/DebugInfo.h"
+#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
@@ -41,10 +42,8 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/Pass.h"
-#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -52,6 +51,8 @@
#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
+#define DEBUG_TYPE "scalarrepl"
+
STATISTIC(NumReplaced, "Number of allocas broken up");
STATISTIC(NumPromoted, "Number of allocas promoted");
STATISTIC(NumAdjusted, "Number of scalar allocas adjusted to allow promotion");
@@ -80,14 +81,14 @@ namespace {
ScalarLoadThreshold = SLT;
}
- bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
bool performScalarRepl(Function &F);
bool performPromotion(Function &F);
private:
bool HasDomTree;
- DataLayout *TD;
+ const DataLayout *DL;
/// DeadInsts - Keep track of instructions we have made dead, so that
/// we can remove them after we are done working.
@@ -195,8 +196,8 @@ namespace {
// getAnalysisUsage - This pass does not require any passes, but we know it
// will not alter the CFG, so say so.
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<DominatorTree>();
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.setPreservesCFG();
}
};
@@ -212,7 +213,7 @@ namespace {
// getAnalysisUsage - This pass does not require any passes, but we know it
// will not alter the CFG, so say so.
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
}
};
@@ -224,7 +225,7 @@ char SROA_SSAUp::ID = 0;
INITIALIZE_PASS_BEGIN(SROA_DT, "scalarrepl",
"Scalar Replacement of Aggregates (DT)", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(SROA_DT, "scalarrepl",
"Scalar Replacement of Aggregates (DT)", false, false)
@@ -258,7 +259,7 @@ namespace {
class ConvertToScalarInfo {
/// AllocaSize - The size of the alloca being considered in bytes.
unsigned AllocaSize;
- const DataLayout &TD;
+ const DataLayout &DL;
unsigned ScalarLoadThreshold;
/// IsNotTrivial - This is set to true if there is some access to the object
@@ -301,10 +302,10 @@ class ConvertToScalarInfo {
bool HadDynamicAccess;
public:
- explicit ConvertToScalarInfo(unsigned Size, const DataLayout &td,
+ explicit ConvertToScalarInfo(unsigned Size, const DataLayout &DL,
unsigned SLT)
- : AllocaSize(Size), TD(td), ScalarLoadThreshold(SLT), IsNotTrivial(false),
- ScalarKind(Unknown), VectorTy(0), HadNonMemTransferAccess(false),
+ : AllocaSize(Size), DL(DL), ScalarLoadThreshold(SLT), IsNotTrivial(false),
+ ScalarKind(Unknown), VectorTy(nullptr), HadNonMemTransferAccess(false),
HadDynamicAccess(false) { }
AllocaInst *TryConvert(AllocaInst *AI);
@@ -332,8 +333,8 @@ private:
AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
// If we can't convert this scalar, or if mem2reg can trivially do it, bail
// out.
- if (!CanConvertToScalar(AI, 0, 0) || !IsNotTrivial)
- return 0;
+ if (!CanConvertToScalar(AI, 0, nullptr) || !IsNotTrivial)
+ return nullptr;
// If an alloca has only memset / memcpy uses, it may still have an Unknown
// ScalarKind. Treat it as an Integer below.
@@ -361,23 +362,24 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
// Do not convert to scalar integer if the alloca size exceeds the
// scalar load threshold.
if (BitWidth > ScalarLoadThreshold)
- return 0;
+ return nullptr;
if ((ScalarKind == ImplicitVector || ScalarKind == Integer) &&
- !HadNonMemTransferAccess && !TD.fitsInLegalInteger(BitWidth))
- return 0;
+ !HadNonMemTransferAccess && !DL.fitsInLegalInteger(BitWidth))
+ return nullptr;
// Dynamic accesses on integers aren't yet supported. They need us to shift
// by a dynamic amount which could be difficult to work out as we might not
// know whether to use a left or right shift.
if (ScalarKind == Integer && HadDynamicAccess)
- return 0;
+ return nullptr;
DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n");
// Create and insert the integer alloca.
NewTy = IntegerType::get(AI->getContext(), BitWidth);
}
- AllocaInst *NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin());
- ConvertUsesToScalar(AI, NewAI, 0, 0);
+ AllocaInst *NewAI = new AllocaInst(NewTy, nullptr, "",
+ AI->getParent()->begin());
+ ConvertUsesToScalar(AI, NewAI, 0, nullptr);
return NewAI;
}
@@ -466,10 +468,10 @@ bool ConvertToScalarInfo::MergeInVectorType(VectorType *VInTy,
/// SawVec flag.
bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset,
Value* NonConstantIdx) {
- for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
- Instruction *User = cast<Instruction>(*UI);
+ for (User *U : V->users()) {
+ Instruction *UI = cast<Instruction>(U);
- if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(UI)) {
// Don't break volatile loads.
if (!LI->isSimple())
return false;
@@ -481,7 +483,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset,
continue;
}
- if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
// Storing the pointer, not into the value?
if (SI->getOperand(0) == V || !SI->isSimple()) return false;
// Don't touch MMX operations.
@@ -492,7 +494,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset,
continue;
}
- if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(UI)) {
if (!onlyUsedByLifetimeMarkers(BCI))
IsNotTrivial = true; // Can't be mem2reg'd.
if (!CanConvertToScalar(BCI, Offset, NonConstantIdx))
@@ -500,7 +502,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset,
continue;
}
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(UI)) {
// If this is a GEP with a variable indices, we can't handle it.
PointerType* PtrTy = dyn_cast<PointerType>(GEP->getPointerOperandType());
if (!PtrTy)
@@ -508,7 +510,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset,
// Compute the offset that this GEP adds to the pointer.
SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
- Value *GEPNonConstantIdx = 0;
+ Value *GEPNonConstantIdx = nullptr;
if (!GEP->hasAllConstantIndices()) {
if (!isa<VectorType>(PtrTy->getElementType()))
return false;
@@ -520,7 +522,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset,
HadDynamicAccess = true;
} else
GEPNonConstantIdx = NonConstantIdx;
- uint64_t GEPOffset = TD.getIndexedOffset(PtrTy,
+ uint64_t GEPOffset = DL.getIndexedOffset(PtrTy,
Indices);
// See if all uses can be converted.
if (!CanConvertToScalar(GEP, Offset+GEPOffset, GEPNonConstantIdx))
@@ -532,7 +534,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset,
// If this is a constant sized memset of a constant value (e.g. 0) we can
// handle it.
- if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
+ if (MemSetInst *MSI = dyn_cast<MemSetInst>(UI)) {
// Store to dynamic index.
if (NonConstantIdx)
return false;
@@ -559,12 +561,12 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset,
// If this is a memcpy or memmove into or out of the whole allocation, we
// can handle it like a load or store of the scalar type.
- if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(UI)) {
// Store to dynamic index.
if (NonConstantIdx)
return false;
ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength());
- if (Len == 0 || Len->getZExtValue() != AllocaSize || Offset != 0)
+ if (!Len || Len->getZExtValue() != AllocaSize || Offset != 0)
return false;
IsNotTrivial = true; // Can't be mem2reg'd.
@@ -572,7 +574,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset,
}
// If this is a lifetime intrinsic, we can handle it.
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(UI)) {
if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
II->getIntrinsicID() == Intrinsic::lifetime_end) {
continue;
@@ -597,7 +599,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
uint64_t Offset,
Value* NonConstantIdx) {
while (!Ptr->use_empty()) {
- Instruction *User = cast<Instruction>(Ptr->use_back());
+ Instruction *User = cast<Instruction>(Ptr->user_back());
if (BitCastInst *CI = dyn_cast<BitCastInst>(User)) {
ConvertUsesToScalar(CI, NewAI, Offset, NonConstantIdx);
@@ -608,14 +610,14 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
// Compute the offset that this GEP adds to the pointer.
SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
- Value* GEPNonConstantIdx = 0;
+ Value* GEPNonConstantIdx = nullptr;
if (!GEP->hasAllConstantIndices()) {
assert(!NonConstantIdx &&
"Dynamic GEP reading from dynamic GEP unsupported");
GEPNonConstantIdx = Indices.pop_back_val();
} else
GEPNonConstantIdx = NonConstantIdx;
- uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
+ uint64_t GEPOffset = DL.getIndexedOffset(GEP->getPointerOperandType(),
Indices);
ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8, GEPNonConstantIdx);
GEP->eraseFromParent();
@@ -671,7 +673,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
Value *New = ConvertScalar_InsertValue(
ConstantInt::get(User->getContext(), APVal),
- Old, Offset, 0, Builder);
+ Old, Offset, nullptr, Builder);
Builder.CreateStore(New, NewAI);
// If the load we just inserted is now dead, then the memset overwrote
@@ -692,9 +694,9 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
// If the source and destination are both to the same alloca, then this is
// a noop copy-to-self, just delete it. Otherwise, emit a load and store
// as appropriate.
- AllocaInst *OrigAI = cast<AllocaInst>(GetUnderlyingObject(Ptr, &TD, 0));
+ AllocaInst *OrigAI = cast<AllocaInst>(GetUnderlyingObject(Ptr, &DL, 0));
- if (GetUnderlyingObject(MTI->getSource(), &TD, 0) != OrigAI) {
+ if (GetUnderlyingObject(MTI->getSource(), &DL, 0) != OrigAI) {
// Dest must be OrigAI, change this to be a load from the original
// pointer (bitcasted), then a store to our new alloca.
assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?");
@@ -710,7 +712,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval");
SrcVal->setAlignment(MTI->getAlignment());
Builder.CreateStore(SrcVal, NewAI);
- } else if (GetUnderlyingObject(MTI->getDest(), &TD, 0) != OrigAI) {
+ } else if (GetUnderlyingObject(MTI->getDest(), &DL, 0) != OrigAI) {
// Src must be OrigAI, change this to be a load from NewAI then a store
// through the original dest pointer (bitcasted).
assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?");
@@ -770,15 +772,15 @@ ConvertScalar_ExtractValue(Value *FromVal, Type *ToType,
// If the result alloca is a vector type, this is either an element
// access or a bitcast to another vector type of the same size.
if (VectorType *VTy = dyn_cast<VectorType>(FromType)) {
- unsigned FromTypeSize = TD.getTypeAllocSize(FromType);
- unsigned ToTypeSize = TD.getTypeAllocSize(ToType);
+ unsigned FromTypeSize = DL.getTypeAllocSize(FromType);
+ unsigned ToTypeSize = DL.getTypeAllocSize(ToType);
if (FromTypeSize == ToTypeSize)
return Builder.CreateBitCast(FromVal, ToType);
// Otherwise it must be an element access.
unsigned Elt = 0;
if (Offset) {
- unsigned EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType());
+ unsigned EltSize = DL.getTypeAllocSizeInBits(VTy->getElementType());
Elt = Offset/EltSize;
assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
}
@@ -804,12 +806,12 @@ ConvertScalar_ExtractValue(Value *FromVal, Type *ToType,
if (StructType *ST = dyn_cast<StructType>(ToType)) {
assert(!NonConstantIdx &&
"Dynamic indexing into struct types not supported");
- const StructLayout &Layout = *TD.getStructLayout(ST);
+ const StructLayout &Layout = *DL.getStructLayout(ST);
Value *Res = UndefValue::get(ST);
for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i),
Offset+Layout.getElementOffsetInBits(i),
- 0, Builder);
+ nullptr, Builder);
Res = Builder.CreateInsertValue(Res, Elt, i);
}
return Res;
@@ -818,11 +820,12 @@ ConvertScalar_ExtractValue(Value *FromVal, Type *ToType,
if (ArrayType *AT = dyn_cast<ArrayType>(ToType)) {
assert(!NonConstantIdx &&
"Dynamic indexing into array types not supported");
- uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
+ uint64_t EltSize = DL.getTypeAllocSizeInBits(AT->getElementType());
Value *Res = UndefValue::get(AT);
for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(),
- Offset+i*EltSize, 0, Builder);
+ Offset+i*EltSize, nullptr,
+ Builder);
Res = Builder.CreateInsertValue(Res, Elt, i);
}
return Res;
@@ -834,12 +837,12 @@ ConvertScalar_ExtractValue(Value *FromVal, Type *ToType,
// If this is a big-endian system and the load is narrower than the
// full alloca type, we need to do a shift to get the right bits.
int ShAmt = 0;
- if (TD.isBigEndian()) {
+ if (DL.isBigEndian()) {
// On big-endian machines, the lowest bit is stored at the bit offset
// from the pointer given by getTypeStoreSizeInBits. This matters for
// integers with a bitwidth that is not a multiple of 8.
- ShAmt = TD.getTypeStoreSizeInBits(NTy) -
- TD.getTypeStoreSizeInBits(ToType) - Offset;
+ ShAmt = DL.getTypeStoreSizeInBits(NTy) -
+ DL.getTypeStoreSizeInBits(ToType) - Offset;
} else {
ShAmt = Offset;
}
@@ -855,7 +858,7 @@ ConvertScalar_ExtractValue(Value *FromVal, Type *ToType,
ConstantInt::get(FromVal->getType(), -ShAmt));
// Finally, unconditionally truncate the integer to the right width.
- unsigned LIBitWidth = TD.getTypeSizeInBits(ToType);
+ unsigned LIBitWidth = DL.getTypeSizeInBits(ToType);
if (LIBitWidth < NTy->getBitWidth())
FromVal =
Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(),
@@ -902,8 +905,8 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
LLVMContext &Context = Old->getContext();
if (VectorType *VTy = dyn_cast<VectorType>(AllocaType)) {
- uint64_t VecSize = TD.getTypeAllocSizeInBits(VTy);
- uint64_t ValSize = TD.getTypeAllocSizeInBits(SV->getType());
+ uint64_t VecSize = DL.getTypeAllocSizeInBits(VTy);
+ uint64_t ValSize = DL.getTypeAllocSizeInBits(SV->getType());
// Changing the whole vector with memset or with an access of a different
// vector type?
@@ -914,7 +917,7 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
Type *EltTy = VTy->getElementType();
if (SV->getType() != EltTy)
SV = Builder.CreateBitCast(SV, EltTy);
- uint64_t EltSize = TD.getTypeAllocSizeInBits(EltTy);
+ uint64_t EltSize = DL.getTypeAllocSizeInBits(EltTy);
unsigned Elt = Offset/EltSize;
Value *Idx;
if (NonConstantIdx) {
@@ -933,12 +936,12 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
if (StructType *ST = dyn_cast<StructType>(SV->getType())) {
assert(!NonConstantIdx &&
"Dynamic indexing into struct types not supported");
- const StructLayout &Layout = *TD.getStructLayout(ST);
+ const StructLayout &Layout = *DL.getStructLayout(ST);
for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
Value *Elt = Builder.CreateExtractValue(SV, i);
Old = ConvertScalar_InsertValue(Elt, Old,
Offset+Layout.getElementOffsetInBits(i),
- 0, Builder);
+ nullptr, Builder);
}
return Old;
}
@@ -946,24 +949,25 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
if (ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) {
assert(!NonConstantIdx &&
"Dynamic indexing into array types not supported");
- uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
+ uint64_t EltSize = DL.getTypeAllocSizeInBits(AT->getElementType());
for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
Value *Elt = Builder.CreateExtractValue(SV, i);
- Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, 0, Builder);
+ Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, nullptr,
+ Builder);
}
return Old;
}
// If SV is a float, convert it to the appropriate integer type.
// If it is a pointer, do the same.
- unsigned SrcWidth = TD.getTypeSizeInBits(SV->getType());
- unsigned DestWidth = TD.getTypeSizeInBits(AllocaType);
- unsigned SrcStoreWidth = TD.getTypeStoreSizeInBits(SV->getType());
- unsigned DestStoreWidth = TD.getTypeStoreSizeInBits(AllocaType);
+ unsigned SrcWidth = DL.getTypeSizeInBits(SV->getType());
+ unsigned DestWidth = DL.getTypeSizeInBits(AllocaType);
+ unsigned SrcStoreWidth = DL.getTypeStoreSizeInBits(SV->getType());
+ unsigned DestStoreWidth = DL.getTypeStoreSizeInBits(AllocaType);
if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy())
SV = Builder.CreateBitCast(SV, IntegerType::get(SV->getContext(),SrcWidth));
else if (SV->getType()->isPointerTy())
- SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getType()));
+ SV = Builder.CreatePtrToInt(SV, DL.getIntPtrType(SV->getType()));
// Zero extend or truncate the value if needed.
if (SV->getType() != AllocaType) {
@@ -982,7 +986,7 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
// If this is a big-endian system and the store is narrower than the
// full alloca type, we need to do a shift to get the right bits.
int ShAmt = 0;
- if (TD.isBigEndian()) {
+ if (DL.isBigEndian()) {
// On big-endian machines, the lowest bit is stored at the bit offset
// from the pointer given by getTypeStoreSizeInBits. This matters for
// integers with a bitwidth that is not a multiple of 8.
@@ -1020,7 +1024,11 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
bool SROA::runOnFunction(Function &F) {
- TD = getAnalysisIfAvailable<DataLayout>();
+ if (skipOptnoneFunction(F))
+ return false;
+
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
bool Changed = performPromotion(F);
@@ -1028,7 +1036,7 @@ bool SROA::runOnFunction(Function &F) {
// theoretically needs to. It should be refactored in order to support
// target-independent IR. Until this is done, just skip the actual
// scalar-replacement portion of this pass.
- if (!TD) return Changed;
+ if (!DL) return Changed;
while (1) {
bool LocalChange = performScalarRepl(F);
@@ -1050,17 +1058,16 @@ class AllocaPromoter : public LoadAndStorePromoter {
public:
AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S,
DIBuilder *DB)
- : LoadAndStorePromoter(Insts, S), AI(0), DIB(DB) {}
+ : LoadAndStorePromoter(Insts, S), AI(nullptr), DIB(DB) {}
void run(AllocaInst *AI, const SmallVectorImpl<Instruction*> &Insts) {
// Remember which alloca we're promoting (for isInstInList).
this->AI = AI;
if (MDNode *DebugNode = MDNode::getIfExists(AI->getContext(), AI)) {
- for (Value::use_iterator UI = DebugNode->use_begin(),
- E = DebugNode->use_end(); UI != E; ++UI)
- if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI))
+ for (User *U : DebugNode->users())
+ if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U))
DDIs.push_back(DDI);
- else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(*UI))
+ else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(U))
DVIs.push_back(DVI);
}
@@ -1078,14 +1085,14 @@ public:
}
}
- virtual bool isInstInList(Instruction *I,
- const SmallVectorImpl<Instruction*> &Insts) const {
+ bool isInstInList(Instruction *I,
+ const SmallVectorImpl<Instruction*> &Insts) const override {
if (LoadInst *LI = dyn_cast<LoadInst>(I))
return LI->getOperand(0) == AI;
return cast<StoreInst>(I)->getPointerOperand() == AI;
}
- virtual void updateDebugInfo(Instruction *Inst) const {
+ void updateDebugInfo(Instruction *Inst) const override {
for (SmallVectorImpl<DbgDeclareInst *>::const_iterator I = DDIs.begin(),
E = DDIs.end(); I != E; ++I) {
DbgDeclareInst *DDI = *I;
@@ -1097,7 +1104,7 @@ public:
for (SmallVectorImpl<DbgValueInst *>::const_iterator I = DVIs.begin(),
E = DVIs.end(); I != E; ++I) {
DbgValueInst *DVI = *I;
- Value *Arg = NULL;
+ Value *Arg = nullptr;
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
// If an argument is zero extended then use argument directly. The ZExt
// may be zapped by an optimization pass in future.
@@ -1134,22 +1141,21 @@ public:
///
/// We can do this to a select if its only uses are loads and if the operand to
/// the select can be loaded unconditionally.
-static bool isSafeSelectToSpeculate(SelectInst *SI, const DataLayout *TD) {
- bool TDerefable = SI->getTrueValue()->isDereferenceablePointer();
- bool FDerefable = SI->getFalseValue()->isDereferenceablePointer();
+static bool isSafeSelectToSpeculate(SelectInst *SI, const DataLayout *DL) {
+ bool TDerefable = SI->getTrueValue()->isDereferenceablePointer(DL);
+ bool FDerefable = SI->getFalseValue()->isDereferenceablePointer(DL);
- for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end();
- UI != UE; ++UI) {
- LoadInst *LI = dyn_cast<LoadInst>(*UI);
- if (LI == 0 || !LI->isSimple()) return false;
+ for (User *U : SI->users()) {
+ LoadInst *LI = dyn_cast<LoadInst>(U);
+ if (!LI || !LI->isSimple()) return false;
// Both operands to the select need to be dereferencable, either absolutely
// (e.g. allocas) or at this point because we can see other accesses to it.
if (!TDerefable && !isSafeToLoadUnconditionally(SI->getTrueValue(), LI,
- LI->getAlignment(), TD))
+ LI->getAlignment(), DL))
return false;
if (!FDerefable && !isSafeToLoadUnconditionally(SI->getFalseValue(), LI,
- LI->getAlignment(), TD))
+ LI->getAlignment(), DL))
return false;
}
@@ -1172,17 +1178,16 @@ static bool isSafeSelectToSpeculate(SelectInst *SI, const DataLayout *TD) {
///
/// We can do this to a select if its only uses are loads and if the operand to
/// the select can be loaded unconditionally.
-static bool isSafePHIToSpeculate(PHINode *PN, const DataLayout *TD) {
+static bool isSafePHIToSpeculate(PHINode *PN, const DataLayout *DL) {
// For now, we can only do this promotion if the load is in the same block as
// the PHI, and if there are no stores between the phi and load.
// TODO: Allow recursive phi users.
// TODO: Allow stores.
BasicBlock *BB = PN->getParent();
unsigned MaxAlign = 0;
- for (Value::use_iterator UI = PN->use_begin(), UE = PN->use_end();
- UI != UE; ++UI) {
- LoadInst *LI = dyn_cast<LoadInst>(*UI);
- if (LI == 0 || !LI->isSimple()) return false;
+ for (User *U : PN->users()) {
+ LoadInst *LI = dyn_cast<LoadInst>(U);
+ if (!LI || !LI->isSimple()) return false;
// For now we only allow loads in the same block as the PHI. This is a
// common case that happens when instcombine merges two loads through a PHI.
@@ -1221,8 +1226,8 @@ static bool isSafePHIToSpeculate(PHINode *PN, const DataLayout *TD) {
// If this pointer is always safe to load, or if we can prove that there is
// already a load in the block, then we can move the load to the pred block.
- if (InVal->isDereferenceablePointer() ||
- isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign, TD))
+ if (InVal->isDereferenceablePointer(DL) ||
+ isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign, DL))
continue;
return false;
@@ -1236,13 +1241,10 @@ static bool isSafePHIToSpeculate(PHINode *PN, const DataLayout *TD) {
/// direct (non-volatile) loads and stores to it. If the alloca is close but
/// not quite there, this will transform the code to allow promotion. As such,
/// it is a non-pure predicate.
-static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *TD) {
+static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *DL) {
SetVector<Instruction*, SmallVector<Instruction*, 4>,
SmallPtrSet<Instruction*, 4> > InstsToRewrite;
-
- for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end();
- UI != UE; ++UI) {
- User *U = *UI;
+ for (User *U : AI->users()) {
if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
if (!LI->isSimple())
return false;
@@ -1265,12 +1267,12 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *TD) {
// This is very rare and we just scrambled the use list of AI, start
// over completely.
- return tryToMakeAllocaBePromotable(AI, TD);
+ return tryToMakeAllocaBePromotable(AI, DL);
}
// If it is safe to turn "load (select c, AI, ptr)" into a select of two
// loads, then we can transform this by rewriting the select.
- if (!isSafeSelectToSpeculate(SI, TD))
+ if (!isSafeSelectToSpeculate(SI, DL))
return false;
InstsToRewrite.insert(SI);
@@ -1285,7 +1287,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *TD) {
// If it is safe to turn "load (phi [AI, ptr, ...])" into a PHI of loads
// in the pred blocks, then we can transform this by rewriting the PHI.
- if (!isSafePHIToSpeculate(PN, TD))
+ if (!isSafePHIToSpeculate(PN, DL))
return false;
InstsToRewrite.insert(PN);
@@ -1312,12 +1314,9 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *TD) {
for (unsigned i = 0, e = InstsToRewrite.size(); i != e; ++i) {
if (BitCastInst *BCI = dyn_cast<BitCastInst>(InstsToRewrite[i])) {
// This could only be a bitcast used by nothing but lifetime intrinsics.
- for (BitCastInst::use_iterator I = BCI->use_begin(), E = BCI->use_end();
- I != E;) {
- Use &U = I.getUse();
- ++I;
- cast<Instruction>(U.getUser())->eraseFromParent();
- }
+ for (BitCastInst::user_iterator I = BCI->user_begin(), E = BCI->user_end();
+ I != E;)
+ cast<Instruction>(*I++)->eraseFromParent();
BCI->eraseFromParent();
continue;
}
@@ -1326,7 +1325,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *TD) {
// Selects in InstsToRewrite only have load uses. Rewrite each as two
// loads with a new select.
while (!SI->use_empty()) {
- LoadInst *LI = cast<LoadInst>(SI->use_back());
+ LoadInst *LI = cast<LoadInst>(SI->user_back());
IRBuilder<> Builder(LI);
LoadInst *TrueLoad =
@@ -1367,13 +1366,13 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *TD) {
// Get the TBAA tag and alignment to use from one of the loads. It doesn't
// matter which one we get and if any differ, it doesn't matter.
- LoadInst *SomeLoad = cast<LoadInst>(PN->use_back());
+ LoadInst *SomeLoad = cast<LoadInst>(PN->user_back());
MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa);
unsigned Align = SomeLoad->getAlignment();
// Rewrite all loads of the PN to use the new PHI.
while (!PN->use_empty()) {
- LoadInst *LI = cast<LoadInst>(PN->use_back());
+ LoadInst *LI = cast<LoadInst>(PN->user_back());
LI->replaceAllUsesWith(NewPN);
LI->eraseFromParent();
}
@@ -1385,7 +1384,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *TD) {
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
BasicBlock *Pred = PN->getIncomingBlock(i);
LoadInst *&Load = InsertedLoads[Pred];
- if (Load == 0) {
+ if (!Load) {
Load = new LoadInst(PN->getIncomingValue(i),
PN->getName() + "." + Pred->getName(),
Pred->getTerminator());
@@ -1405,9 +1404,9 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *TD) {
bool SROA::performPromotion(Function &F) {
std::vector<AllocaInst*> Allocas;
- DominatorTree *DT = 0;
+ DominatorTree *DT = nullptr;
if (HasDomTree)
- DT = &getAnalysis<DominatorTree>();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function
DIBuilder DIB(*F.getParent());
@@ -1420,7 +1419,7 @@ bool SROA::performPromotion(Function &F) {
// the entry node
for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I)
if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca?
- if (tryToMakeAllocaBePromotable(AI, TD))
+ if (tryToMakeAllocaBePromotable(AI, DL))
Allocas.push_back(AI);
if (Allocas.empty()) break;
@@ -1433,9 +1432,8 @@ bool SROA::performPromotion(Function &F) {
AllocaInst *AI = Allocas[i];
// Build list of instructions to promote.
- for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
- UI != E; ++UI)
- Insts.push_back(cast<Instruction>(*UI));
+ for (User *U : AI->users())
+ Insts.push_back(cast<Instruction>(U));
AllocaPromoter(Insts, SSA, &DIB).run(AI, Insts);
Insts.clear();
}
@@ -1496,7 +1494,7 @@ bool SROA::performScalarRepl(Function &F) {
// transform the allocation instruction if it is an array allocation
// (allocations OF arrays are ok though), and an allocation of a scalar
// value cannot be decomposed at all.
- uint64_t AllocaSize = TD->getTypeAllocSize(AI->getAllocatedType());
+ uint64_t AllocaSize = DL->getTypeAllocSize(AI->getAllocatedType());
// Do not promote [0 x %struct].
if (AllocaSize == 0) continue;
@@ -1520,7 +1518,7 @@ bool SROA::performScalarRepl(Function &F) {
// that we can't just check based on the type: the alloca may be of an i32
// but that has pointer arithmetic to set byte 3 of it or something.
if (AllocaInst *NewAI = ConvertToScalarInfo(
- (unsigned)AllocaSize, *TD, ScalarLoadThreshold).TryConvert(AI)) {
+ (unsigned)AllocaSize, *DL, ScalarLoadThreshold).TryConvert(AI)) {
NewAI->takeName(AI);
AI->eraseFromParent();
++NumConverted;
@@ -1543,7 +1541,7 @@ void SROA::DoScalarReplacement(AllocaInst *AI,
if (StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
ElementAllocas.reserve(ST->getNumContainedTypes());
for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) {
- AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0,
+ AllocaInst *NA = new AllocaInst(ST->getContainedType(i), nullptr,
AI->getAlignment(),
AI->getName() + "." + Twine(i), AI);
ElementAllocas.push_back(NA);
@@ -1554,7 +1552,7 @@ void SROA::DoScalarReplacement(AllocaInst *AI,
ElementAllocas.reserve(AT->getNumElements());
Type *ElTy = AT->getElementType();
for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
- AllocaInst *NA = new AllocaInst(ElTy, 0, AI->getAlignment(),
+ AllocaInst *NA = new AllocaInst(ElTy, nullptr, AI->getAlignment(),
AI->getName() + "." + Twine(i), AI);
ElementAllocas.push_back(NA);
WorkList.push_back(NA); // Add to worklist for recursive processing
@@ -1583,7 +1581,7 @@ void SROA::DeleteDeadInstructions() {
// Zero out the operand and see if it becomes trivially dead.
// (But, don't add allocas to the dead instruction list -- they are
// already on the worklist and will be deleted separately.)
- *OI = 0;
+ *OI = nullptr;
if (isInstructionTriviallyDead(U) && !isa<AllocaInst>(U))
DeadInsts.push_back(U);
}
@@ -1598,8 +1596,8 @@ void SROA::DeleteDeadInstructions() {
/// referenced by this instruction.
void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
AllocaInfo &Info) {
- for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) {
- Instruction *User = cast<Instruction>(*UI);
+ for (Use &U : I->uses()) {
+ Instruction *User = cast<Instruction>(U.getUser());
if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
isSafeForScalarRepl(BC, Offset, Info);
@@ -1610,19 +1608,17 @@ void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
isSafeForScalarRepl(GEPI, GEPOffset, Info);
} else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
- if (Length == 0)
- return MarkUnsafe(Info, User);
- if (Length->isNegative())
+ if (!Length || Length->isNegative())
return MarkUnsafe(Info, User);
- isSafeMemAccess(Offset, Length->getZExtValue(), 0,
- UI.getOperandNo() == 0, Info, MI,
+ isSafeMemAccess(Offset, Length->getZExtValue(), nullptr,
+ U.getOperandNo() == 0, Info, MI,
true /*AllowWholeAccess*/);
} else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
if (!LI->isSimple())
return MarkUnsafe(Info, User);
Type *LIType = LI->getType();
- isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType),
+ isSafeMemAccess(Offset, DL->getTypeAllocSize(LIType),
LIType, false, Info, LI, true /*AllowWholeAccess*/);
Info.hasALoadOrStore = true;
@@ -1632,7 +1628,7 @@ void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
return MarkUnsafe(Info, User);
Type *SIType = SI->getOperand(0)->getType();
- isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType),
+ isSafeMemAccess(Offset, DL->getTypeAllocSize(SIType),
SIType, true, Info, SI, true /*AllowWholeAccess*/);
Info.hasALoadOrStore = true;
} else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) {
@@ -1665,39 +1661,39 @@ void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset,
if (!Info.CheckedPHIs.insert(PN))
return;
- for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) {
- Instruction *User = cast<Instruction>(*UI);
+ for (User *U : I->users()) {
+ Instruction *UI = cast<Instruction>(U);
- if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
+ if (BitCastInst *BC = dyn_cast<BitCastInst>(UI)) {
isSafePHISelectUseForScalarRepl(BC, Offset, Info);
- } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
+ } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(UI)) {
// Only allow "bitcast" GEPs for simplicity. We could generalize this,
// but would have to prove that we're staying inside of an element being
// promoted.
if (!GEPI->hasAllZeroIndices())
- return MarkUnsafe(Info, User);
+ return MarkUnsafe(Info, UI);
isSafePHISelectUseForScalarRepl(GEPI, Offset, Info);
- } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(UI)) {
if (!LI->isSimple())
- return MarkUnsafe(Info, User);
+ return MarkUnsafe(Info, UI);
Type *LIType = LI->getType();
- isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType),
+ isSafeMemAccess(Offset, DL->getTypeAllocSize(LIType),
LIType, false, Info, LI, false /*AllowWholeAccess*/);
Info.hasALoadOrStore = true;
- } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
// Store is ok if storing INTO the pointer, not storing the pointer
if (!SI->isSimple() || SI->getOperand(0) == I)
- return MarkUnsafe(Info, User);
+ return MarkUnsafe(Info, UI);
Type *SIType = SI->getOperand(0)->getType();
- isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType),
+ isSafeMemAccess(Offset, DL->getTypeAllocSize(SIType),
SIType, true, Info, SI, false /*AllowWholeAccess*/);
Info.hasALoadOrStore = true;
- } else if (isa<PHINode>(User) || isa<SelectInst>(User)) {
- isSafePHISelectUseForScalarRepl(User, Offset, Info);
+ } else if (isa<PHINode>(UI) || isa<SelectInst>(UI)) {
+ isSafePHISelectUseForScalarRepl(UI, Offset, Info);
} else {
- return MarkUnsafe(Info, User);
+ return MarkUnsafe(Info, UI);
}
if (Info.isUnsafe) return;
}
@@ -1731,12 +1727,12 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI,
// Compute the offset due to this GEP and check if the alloca has a
// component element at that offset.
SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
- // If this GEP is non constant then the last operand must have been a
+ // If this GEP is non-constant then the last operand must have been a
// dynamic index into a vector. Pop this now as it has no impact on the
// constant part of the offset.
if (NonConstant)
Indices.pop_back();
- Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), Indices);
+ Offset += DL->getIndexedOffset(GEPI->getPointerOperandType(), Indices);
if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset,
NonConstantIdxSize))
MarkUnsafe(Info, GEPI);
@@ -1750,12 +1746,12 @@ static bool isHomogeneousAggregate(Type *T, unsigned &NumElts,
Type *&EltTy) {
if (ArrayType *AT = dyn_cast<ArrayType>(T)) {
NumElts = AT->getNumElements();
- EltTy = (NumElts == 0 ? 0 : AT->getElementType());
+ EltTy = (NumElts == 0 ? nullptr : AT->getElementType());
return true;
}
if (StructType *ST = dyn_cast<StructType>(T)) {
NumElts = ST->getNumContainedTypes();
- EltTy = (NumElts == 0 ? 0 : ST->getContainedType(0));
+ EltTy = (NumElts == 0 ? nullptr : ST->getContainedType(0));
for (unsigned n = 1; n < NumElts; ++n) {
if (ST->getContainedType(n) != EltTy)
return false;
@@ -1795,7 +1791,7 @@ void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
bool AllowWholeAccess) {
// Check if this is a load/store of the entire alloca.
if (Offset == 0 && AllowWholeAccess &&
- MemSize == TD->getTypeAllocSize(Info.AI->getAllocatedType())) {
+ MemSize == DL->getTypeAllocSize(Info.AI->getAllocatedType())) {
// This can be safe for MemIntrinsics (where MemOpType is 0) and integer
// loads/stores (which are essentially the same as the MemIntrinsics with
// regard to copying padding between elements). But, if an alloca is
@@ -1832,20 +1828,20 @@ bool SROA::TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size) {
Type *EltTy;
uint64_t EltSize;
if (StructType *ST = dyn_cast<StructType>(T)) {
- const StructLayout *Layout = TD->getStructLayout(ST);
+ const StructLayout *Layout = DL->getStructLayout(ST);
unsigned EltIdx = Layout->getElementContainingOffset(Offset);
EltTy = ST->getContainedType(EltIdx);
- EltSize = TD->getTypeAllocSize(EltTy);
+ EltSize = DL->getTypeAllocSize(EltTy);
Offset -= Layout->getElementOffset(EltIdx);
} else if (ArrayType *AT = dyn_cast<ArrayType>(T)) {
EltTy = AT->getElementType();
- EltSize = TD->getTypeAllocSize(EltTy);
+ EltSize = DL->getTypeAllocSize(EltTy);
if (Offset >= AT->getNumElements() * EltSize)
return false;
Offset %= EltSize;
} else if (VectorType *VT = dyn_cast<VectorType>(T)) {
EltTy = VT->getElementType();
- EltSize = TD->getTypeAllocSize(EltTy);
+ EltSize = DL->getTypeAllocSize(EltTy);
if (Offset >= VT->getNumElements() * EltSize)
return false;
Offset %= EltSize;
@@ -1867,8 +1863,8 @@ bool SROA::TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size) {
void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
SmallVectorImpl<AllocaInst *> &NewElts) {
for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E;) {
- Use &TheUse = UI.getUse();
- Instruction *User = cast<Instruction>(*UI++);
+ Use &TheUse = *UI++;
+ Instruction *User = cast<Instruction>(TheUse.getUser());
if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
RewriteBitCast(BC, AI, Offset, NewElts);
@@ -1884,7 +1880,7 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
uint64_t MemSize = Length->getZExtValue();
if (Offset == 0 &&
- MemSize == TD->getTypeAllocSize(AI->getAllocatedType()))
+ MemSize == DL->getTypeAllocSize(AI->getAllocatedType()))
RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts);
// Otherwise the intrinsic can only touch a single element and the
// address operand will be updated, so nothing else needs to be done.
@@ -1920,8 +1916,8 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
LI->replaceAllUsesWith(Insert);
DeadInsts.push_back(LI);
} else if (LIType->isIntegerTy() &&
- TD->getTypeAllocSize(LIType) ==
- TD->getTypeAllocSize(AI->getAllocatedType())) {
+ DL->getTypeAllocSize(LIType) ==
+ DL->getTypeAllocSize(AI->getAllocatedType())) {
// If this is a load of the entire alloca to an integer, rewrite it.
RewriteLoadUserOfWholeAlloca(LI, AI, NewElts);
}
@@ -1947,8 +1943,8 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
}
DeadInsts.push_back(SI);
} else if (SIType->isIntegerTy() &&
- TD->getTypeAllocSize(SIType) ==
- TD->getTypeAllocSize(AI->getAllocatedType())) {
+ DL->getTypeAllocSize(SIType) ==
+ DL->getTypeAllocSize(AI->getAllocatedType())) {
// If this is a store of the entire alloca from an integer, rewrite it.
RewriteStoreUserOfWholeAlloca(SI, AI, NewElts);
}
@@ -2010,7 +2006,7 @@ uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset,
Type *&IdxTy) {
uint64_t Idx = 0;
if (StructType *ST = dyn_cast<StructType>(T)) {
- const StructLayout *Layout = TD->getStructLayout(ST);
+ const StructLayout *Layout = DL->getStructLayout(ST);
Idx = Layout->getElementContainingOffset(Offset);
T = ST->getContainedType(Idx);
Offset -= Layout->getElementOffset(Idx);
@@ -2018,7 +2014,7 @@ uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset,
return Idx;
} else if (ArrayType *AT = dyn_cast<ArrayType>(T)) {
T = AT->getElementType();
- uint64_t EltSize = TD->getTypeAllocSize(T);
+ uint64_t EltSize = DL->getTypeAllocSize(T);
Idx = Offset / EltSize;
Offset -= Idx * EltSize;
IdxTy = Type::getInt64Ty(T->getContext());
@@ -2026,7 +2022,7 @@ uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset,
}
VectorType *VT = cast<VectorType>(T);
T = VT->getElementType();
- uint64_t EltSize = TD->getTypeAllocSize(T);
+ uint64_t EltSize = DL->getTypeAllocSize(T);
Idx = Offset / EltSize;
Offset -= Idx * EltSize;
IdxTy = Type::getInt64Ty(T->getContext());
@@ -2044,10 +2040,10 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
// In this case, it must be the last GEP operand which is dynamic so keep that
// aside until we've found the constant GEP offset then add it back in at the
// end.
- Value* NonConstantIdx = 0;
+ Value* NonConstantIdx = nullptr;
if (!GEPI->hasAllConstantIndices())
NonConstantIdx = Indices.pop_back_val();
- Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), Indices);
+ Offset += DL->getIndexedOffset(GEPI->getPointerOperandType(), Indices);
RewriteForScalarRepl(GEPI, AI, Offset, NewElts);
@@ -2114,11 +2110,12 @@ void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI,
if (NewOffset) {
// Splice the first element and index 'NewOffset' bytes in. SROA will
// split the alloca again later.
- Value *V = Builder.CreateBitCast(NewElts[Idx], Builder.getInt8PtrTy());
+ unsigned AS = AI->getType()->getAddressSpace();
+ Value *V = Builder.CreateBitCast(NewElts[Idx], Builder.getInt8PtrTy(AS));
V = Builder.CreateGEP(V, Builder.getInt64(NewOffset));
IdxTy = NewElts[Idx]->getAllocatedType();
- uint64_t EltSize = TD->getTypeAllocSize(IdxTy) - NewOffset;
+ uint64_t EltSize = DL->getTypeAllocSize(IdxTy) - NewOffset;
if (EltSize > Size) {
EltSize = Size;
Size = 0;
@@ -2134,7 +2131,7 @@ void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI,
for (; Idx != NewElts.size() && Size; ++Idx) {
IdxTy = NewElts[Idx]->getAllocatedType();
- uint64_t EltSize = TD->getTypeAllocSize(IdxTy);
+ uint64_t EltSize = DL->getTypeAllocSize(IdxTy);
if (EltSize > Size) {
EltSize = Size;
Size = 0;
@@ -2161,7 +2158,7 @@ SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
// appropriate type. The "Other" pointer is the pointer that goes to memory
// that doesn't have anything to do with the alloca that we are promoting. For
// memset, this Value* stays null.
- Value *OtherPtr = 0;
+ Value *OtherPtr = nullptr;
unsigned MemAlignment = MI->getAlignment();
if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { // memmove/memcopy
if (Inst == MTI->getRawDest())
@@ -2213,7 +2210,7 @@ SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
// If this is a memcpy/memmove, emit a GEP of the other element address.
- Value *OtherElt = 0;
+ Value *OtherElt = nullptr;
unsigned OtherEltAlign = MemAlignment;
if (OtherPtr) {
@@ -2226,10 +2223,10 @@ SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType());
Type *OtherTy = OtherPtrTy->getElementType();
if (StructType *ST = dyn_cast<StructType>(OtherTy)) {
- EltOffset = TD->getStructLayout(ST)->getElementOffset(i);
+ EltOffset = DL->getStructLayout(ST)->getElementOffset(i);
} else {
Type *EltTy = cast<SequentialType>(OtherTy)->getElementType();
- EltOffset = TD->getTypeAllocSize(EltTy)*i;
+ EltOffset = DL->getTypeAllocSize(EltTy)*i;
}
// The alignment of the other pointer is the guaranteed alignment of the
@@ -2270,7 +2267,7 @@ SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
Type *ValTy = EltTy->getScalarType();
// Construct an integer with the right value.
- unsigned EltSize = TD->getTypeSizeInBits(ValTy);
+ unsigned EltSize = DL->getTypeSizeInBits(ValTy);
APInt OneVal(EltSize, CI->getZExtValue());
APInt TotalVal(OneVal);
// Set each byte.
@@ -2300,7 +2297,7 @@ SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
// this element.
}
- unsigned EltSize = TD->getTypeAllocSize(EltTy);
+ unsigned EltSize = DL->getTypeAllocSize(EltTy);
if (!EltSize)
continue;
@@ -2334,12 +2331,12 @@ SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
// and store the element value to the individual alloca.
Value *SrcVal = SI->getOperand(0);
Type *AllocaEltTy = AI->getAllocatedType();
- uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
+ uint64_t AllocaSizeBits = DL->getTypeAllocSizeInBits(AllocaEltTy);
IRBuilder<> Builder(SI);
// Handle tail padding by extending the operand
- if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits)
+ if (DL->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits)
SrcVal = Builder.CreateZExt(SrcVal,
IntegerType::get(SI->getContext(), AllocaSizeBits));
@@ -2349,15 +2346,15 @@ SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
// There are two forms here: AI could be an array or struct. Both cases
// have different ways to compute the element offset.
if (StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
- const StructLayout *Layout = TD->getStructLayout(EltSTy);
+ const StructLayout *Layout = DL->getStructLayout(EltSTy);
for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
// Get the number of bits to shift SrcVal to get the value.
Type *FieldTy = EltSTy->getElementType(i);
uint64_t Shift = Layout->getElementOffsetInBits(i);
- if (TD->isBigEndian())
- Shift = AllocaSizeBits-Shift-TD->getTypeAllocSizeInBits(FieldTy);
+ if (DL->isBigEndian())
+ Shift = AllocaSizeBits-Shift-DL->getTypeAllocSizeInBits(FieldTy);
Value *EltVal = SrcVal;
if (Shift) {
@@ -2366,7 +2363,7 @@ SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
}
// Truncate down to an integer of the right size.
- uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy);
+ uint64_t FieldSizeBits = DL->getTypeSizeInBits(FieldTy);
// Ignore zero sized fields like {}, they obviously contain no data.
if (FieldSizeBits == 0) continue;
@@ -2391,12 +2388,12 @@ SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
} else {
ArrayType *ATy = cast<ArrayType>(AllocaEltTy);
Type *ArrayEltTy = ATy->getElementType();
- uint64_t ElementOffset = TD->getTypeAllocSizeInBits(ArrayEltTy);
- uint64_t ElementSizeBits = TD->getTypeSizeInBits(ArrayEltTy);
+ uint64_t ElementOffset = DL->getTypeAllocSizeInBits(ArrayEltTy);
+ uint64_t ElementSizeBits = DL->getTypeSizeInBits(ArrayEltTy);
uint64_t Shift;
- if (TD->isBigEndian())
+ if (DL->isBigEndian())
Shift = AllocaSizeBits-ElementOffset;
else
Shift = 0;
@@ -2430,7 +2427,7 @@ SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
}
new StoreInst(EltVal, DestField, SI);
- if (TD->isBigEndian())
+ if (DL->isBigEndian())
Shift -= ElementOffset;
else
Shift += ElementOffset;
@@ -2448,20 +2445,20 @@ SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
// Extract each element out of the NewElts according to its structure offset
// and form the result value.
Type *AllocaEltTy = AI->getAllocatedType();
- uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
+ uint64_t AllocaSizeBits = DL->getTypeAllocSizeInBits(AllocaEltTy);
DEBUG(dbgs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI
<< '\n');
// There are two forms here: AI could be an array or struct. Both cases
// have different ways to compute the element offset.
- const StructLayout *Layout = 0;
+ const StructLayout *Layout = nullptr;
uint64_t ArrayEltBitOffset = 0;
if (StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
- Layout = TD->getStructLayout(EltSTy);
+ Layout = DL->getStructLayout(EltSTy);
} else {
Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType();
- ArrayEltBitOffset = TD->getTypeAllocSizeInBits(ArrayEltTy);
+ ArrayEltBitOffset = DL->getTypeAllocSizeInBits(ArrayEltTy);
}
Value *ResultVal =
@@ -2473,7 +2470,7 @@ SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
Value *SrcField = NewElts[i];
Type *FieldTy =
cast<PointerType>(SrcField->getType())->getElementType();
- uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy);
+ uint64_t FieldSizeBits = DL->getTypeSizeInBits(FieldTy);
// Ignore zero sized fields like {}, they obviously contain no data.
if (FieldSizeBits == 0) continue;
@@ -2504,7 +2501,7 @@ SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
else // Array case.
Shift = i*ArrayEltBitOffset;
- if (TD->isBigEndian())
+ if (DL->isBigEndian())
Shift = AllocaSizeBits-Shift-FieldIntTy->getBitWidth();
if (Shift) {
@@ -2521,7 +2518,7 @@ SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
}
// Handle tail padding by truncating the result
- if (TD->getTypeSizeInBits(LI->getType()) != AllocaSizeBits)
+ if (DL->getTypeSizeInBits(LI->getType()) != AllocaSizeBits)
ResultVal = new TruncInst(ResultVal, LI->getType(), "", LI);
LI->replaceAllUsesWith(ResultVal);
@@ -2531,15 +2528,15 @@ SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
/// HasPadding - Return true if the specified type has any structure or
/// alignment padding in between the elements that would be split apart
/// by SROA; return false otherwise.
-static bool HasPadding(Type *Ty, const DataLayout &TD) {
+static bool HasPadding(Type *Ty, const DataLayout &DL) {
if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
Ty = ATy->getElementType();
- return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty);
+ return DL.getTypeSizeInBits(Ty) != DL.getTypeAllocSizeInBits(Ty);
}
// SROA currently handles only Arrays and Structs.
StructType *STy = cast<StructType>(Ty);
- const StructLayout *SL = TD.getStructLayout(STy);
+ const StructLayout *SL = DL.getStructLayout(STy);
unsigned PrevFieldBitOffset = 0;
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
unsigned FieldBitOffset = SL->getElementOffsetInBits(i);
@@ -2548,7 +2545,7 @@ static bool HasPadding(Type *Ty, const DataLayout &TD) {
// previous one.
if (i) {
unsigned PrevFieldEnd =
- PrevFieldBitOffset+TD.getTypeSizeInBits(STy->getElementType(i-1));
+ PrevFieldBitOffset+DL.getTypeSizeInBits(STy->getElementType(i-1));
if (PrevFieldEnd < FieldBitOffset)
return true;
}
@@ -2557,7 +2554,7 @@ static bool HasPadding(Type *Ty, const DataLayout &TD) {
// Check for tail padding.
if (unsigned EltCount = STy->getNumElements()) {
unsigned PrevFieldEnd = PrevFieldBitOffset +
- TD.getTypeSizeInBits(STy->getElementType(EltCount-1));
+ DL.getTypeSizeInBits(STy->getElementType(EltCount-1));
if (PrevFieldEnd < SL->getSizeInBits())
return true;
}
@@ -2584,7 +2581,7 @@ bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
// types, but may actually be used. In these cases, we refuse to promote the
// struct.
if (Info.isMemCpySrc && Info.isMemCpyDst &&
- HasPadding(AI->getAllocatedType(), *TD))
+ HasPadding(AI->getAllocatedType(), *DL))
return false;
// If the alloca never has an access to just *part* of it, but is accessed
diff --git a/contrib/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/contrib/llvm/lib/Transforms/Scalar/Scalarizer.cpp
new file mode 100644
index 000000000000..7a73f113b1d9
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -0,0 +1,663 @@
+//===--- Scalarizer.cpp - Scalarize vector operations ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass converts vector operations into scalar operations, in order
+// to expose optimization opportunities on the individual scalar operations.
+// It is mainly intended for targets that do not have vector units, but it
+// may also be useful for revectorizing code to different vector widths.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "scalarizer"
+
+namespace {
+// Used to store the scattered form of a vector.
+typedef SmallVector<Value *, 8> ValueVector;
+
+// Used to map a vector Value to its scattered form. We use std::map
+// because we want iterators to persist across insertion and because the
+// values are relatively large.
+typedef std::map<Value *, ValueVector> ScatterMap;
+
+// Lists Instructions that have been replaced with scalar implementations,
+// along with a pointer to their scattered forms.
+typedef SmallVector<std::pair<Instruction *, ValueVector *>, 16> GatherList;
+
+// Provides a very limited vector-like interface for lazily accessing one
+// component of a scattered vector or vector pointer.
+class Scatterer {
+public:
+ Scatterer() {}
+
+ // Scatter V into Size components. If new instructions are needed,
+ // insert them before BBI in BB. If Cache is nonnull, use it to cache
+ // the results.
+ Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
+ ValueVector *cachePtr = nullptr);
+
+ // Return component I, creating a new Value for it if necessary.
+ Value *operator[](unsigned I);
+
+ // Return the number of components.
+ unsigned size() const { return Size; }
+
+private:
+ BasicBlock *BB;
+ BasicBlock::iterator BBI;
+ Value *V;
+ ValueVector *CachePtr;
+ PointerType *PtrTy;
+ ValueVector Tmp;
+ unsigned Size;
+};
+
+// FCmpSpliiter(FCI)(Builder, X, Y, Name) uses Builder to create an FCmp
+// called Name that compares X and Y in the same way as FCI.
+struct FCmpSplitter {
+ FCmpSplitter(FCmpInst &fci) : FCI(fci) {}
+ Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
+ const Twine &Name) const {
+ return Builder.CreateFCmp(FCI.getPredicate(), Op0, Op1, Name);
+ }
+ FCmpInst &FCI;
+};
+
+// ICmpSpliiter(ICI)(Builder, X, Y, Name) uses Builder to create an ICmp
+// called Name that compares X and Y in the same way as ICI.
+struct ICmpSplitter {
+ ICmpSplitter(ICmpInst &ici) : ICI(ici) {}
+ Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
+ const Twine &Name) const {
+ return Builder.CreateICmp(ICI.getPredicate(), Op0, Op1, Name);
+ }
+ ICmpInst &ICI;
+};
+
+// BinarySpliiter(BO)(Builder, X, Y, Name) uses Builder to create
+// a binary operator like BO called Name with operands X and Y.
+struct BinarySplitter {
+ BinarySplitter(BinaryOperator &bo) : BO(bo) {}
+ Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
+ const Twine &Name) const {
+ return Builder.CreateBinOp(BO.getOpcode(), Op0, Op1, Name);
+ }
+ BinaryOperator &BO;
+};
+
+// Information about a load or store that we're scalarizing.
+struct VectorLayout {
+ VectorLayout() : VecTy(nullptr), ElemTy(nullptr), VecAlign(0), ElemSize(0) {}
+
+ // Return the alignment of element I.
+ uint64_t getElemAlign(unsigned I) {
+ return MinAlign(VecAlign, I * ElemSize);
+ }
+
+ // The type of the vector.
+ VectorType *VecTy;
+
+ // The type of each element.
+ Type *ElemTy;
+
+ // The alignment of the vector.
+ uint64_t VecAlign;
+
+ // The size of each element.
+ uint64_t ElemSize;
+};
+
+class Scalarizer : public FunctionPass,
+ public InstVisitor<Scalarizer, bool> {
+public:
+ static char ID;
+
+ Scalarizer() :
+ FunctionPass(ID) {
+ initializeScalarizerPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool doInitialization(Module &M) override;
+ bool runOnFunction(Function &F) override;
+
+ // InstVisitor methods. They return true if the instruction was scalarized,
+ // false if nothing changed.
+ bool visitInstruction(Instruction &) { return false; }
+ bool visitSelectInst(SelectInst &SI);
+ bool visitICmpInst(ICmpInst &);
+ bool visitFCmpInst(FCmpInst &);
+ bool visitBinaryOperator(BinaryOperator &);
+ bool visitGetElementPtrInst(GetElementPtrInst &);
+ bool visitCastInst(CastInst &);
+ bool visitBitCastInst(BitCastInst &);
+ bool visitShuffleVectorInst(ShuffleVectorInst &);
+ bool visitPHINode(PHINode &);
+ bool visitLoadInst(LoadInst &);
+ bool visitStoreInst(StoreInst &);
+
+private:
+ Scatterer scatter(Instruction *, Value *);
+ void gather(Instruction *, const ValueVector &);
+ bool canTransferMetadata(unsigned Kind);
+ void transferMetadata(Instruction *, const ValueVector &);
+ bool getVectorLayout(Type *, unsigned, VectorLayout &);
+ bool finish();
+
+ template<typename T> bool splitBinary(Instruction &, const T &);
+
+ ScatterMap Scattered;
+ GatherList Gathered;
+ unsigned ParallelLoopAccessMDKind;
+ const DataLayout *DL;
+};
+
+char Scalarizer::ID = 0;
+} // end anonymous namespace
+
+// This is disabled by default because having separate loads and stores makes
+// it more likely that the -combiner-alias-analysis limits will be reached.
+static cl::opt<bool> ScalarizeLoadStore
+ ("scalarize-load-store", cl::Hidden, cl::init(false),
+ cl::desc("Allow the scalarizer pass to scalarize loads and store"));
+
+INITIALIZE_PASS(Scalarizer, "scalarizer", "Scalarize vector operations",
+ false, false)
+
+Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
+ ValueVector *cachePtr)
+ : BB(bb), BBI(bbi), V(v), CachePtr(cachePtr) {
+ Type *Ty = V->getType();
+ PtrTy = dyn_cast<PointerType>(Ty);
+ if (PtrTy)
+ Ty = PtrTy->getElementType();
+ Size = Ty->getVectorNumElements();
+ if (!CachePtr)
+ Tmp.resize(Size, nullptr);
+ else if (CachePtr->empty())
+ CachePtr->resize(Size, nullptr);
+ else
+ assert(Size == CachePtr->size() && "Inconsistent vector sizes");
+}
+
+// Return component I, creating a new Value for it if necessary.
+Value *Scatterer::operator[](unsigned I) {
+ ValueVector &CV = (CachePtr ? *CachePtr : Tmp);
+ // Try to reuse a previous value.
+ if (CV[I])
+ return CV[I];
+ IRBuilder<> Builder(BB, BBI);
+ if (PtrTy) {
+ if (!CV[0]) {
+ Type *Ty =
+ PointerType::get(PtrTy->getElementType()->getVectorElementType(),
+ PtrTy->getAddressSpace());
+ CV[0] = Builder.CreateBitCast(V, Ty, V->getName() + ".i0");
+ }
+ if (I != 0)
+ CV[I] = Builder.CreateConstGEP1_32(CV[0], I,
+ V->getName() + ".i" + Twine(I));
+ } else {
+ // Search through a chain of InsertElementInsts looking for element I.
+ // Record other elements in the cache. The new V is still suitable
+ // for all uncached indices.
+ for (;;) {
+ InsertElementInst *Insert = dyn_cast<InsertElementInst>(V);
+ if (!Insert)
+ break;
+ ConstantInt *Idx = dyn_cast<ConstantInt>(Insert->getOperand(2));
+ if (!Idx)
+ break;
+ unsigned J = Idx->getZExtValue();
+ CV[J] = Insert->getOperand(1);
+ V = Insert->getOperand(0);
+ if (I == J)
+ return CV[J];
+ }
+ CV[I] = Builder.CreateExtractElement(V, Builder.getInt32(I),
+ V->getName() + ".i" + Twine(I));
+ }
+ return CV[I];
+}
+
+bool Scalarizer::doInitialization(Module &M) {
+ ParallelLoopAccessMDKind =
+ M.getContext().getMDKindID("llvm.mem.parallel_loop_access");
+ return false;
+}
+
+bool Scalarizer::runOnFunction(Function &F) {
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
+ for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
+ BasicBlock *BB = BBI;
+ for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) {
+ Instruction *I = II;
+ bool Done = visit(I);
+ ++II;
+ if (Done && I->getType()->isVoidTy())
+ I->eraseFromParent();
+ }
+ }
+ return finish();
+}
+
+// Return a scattered form of V that can be accessed by Point. V must be a
+// vector or a pointer to a vector.
+Scatterer Scalarizer::scatter(Instruction *Point, Value *V) {
+ if (Argument *VArg = dyn_cast<Argument>(V)) {
+ // Put the scattered form of arguments in the entry block,
+ // so that it can be used everywhere.
+ Function *F = VArg->getParent();
+ BasicBlock *BB = &F->getEntryBlock();
+ return Scatterer(BB, BB->begin(), V, &Scattered[V]);
+ }
+ if (Instruction *VOp = dyn_cast<Instruction>(V)) {
+ // Put the scattered form of an instruction directly after the
+ // instruction.
+ BasicBlock *BB = VOp->getParent();
+ return Scatterer(BB, std::next(BasicBlock::iterator(VOp)),
+ V, &Scattered[V]);
+ }
+ // In the fallback case, just put the scattered before Point and
+ // keep the result local to Point.
+ return Scatterer(Point->getParent(), Point, V);
+}
+
+// Replace Op with the gathered form of the components in CV. Defer the
+// deletion of Op and creation of the gathered form to the end of the pass,
+// so that we can avoid creating the gathered form if all uses of Op are
+// replaced with uses of CV.
+void Scalarizer::gather(Instruction *Op, const ValueVector &CV) {
+ // Since we're not deleting Op yet, stub out its operands, so that it
+ // doesn't make anything live unnecessarily.
+ for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I)
+ Op->setOperand(I, UndefValue::get(Op->getOperand(I)->getType()));
+
+ transferMetadata(Op, CV);
+
+ // If we already have a scattered form of Op (created from ExtractElements
+ // of Op itself), replace them with the new form.
+ ValueVector &SV = Scattered[Op];
+ if (!SV.empty()) {
+ for (unsigned I = 0, E = SV.size(); I != E; ++I) {
+ Instruction *Old = cast<Instruction>(SV[I]);
+ CV[I]->takeName(Old);
+ Old->replaceAllUsesWith(CV[I]);
+ Old->eraseFromParent();
+ }
+ }
+ SV = CV;
+ Gathered.push_back(GatherList::value_type(Op, &SV));
+}
+
+// Return true if it is safe to transfer the given metadata tag from
+// vector to scalar instructions.
+bool Scalarizer::canTransferMetadata(unsigned Tag) {
+ return (Tag == LLVMContext::MD_tbaa
+ || Tag == LLVMContext::MD_fpmath
+ || Tag == LLVMContext::MD_tbaa_struct
+ || Tag == LLVMContext::MD_invariant_load
+ || Tag == ParallelLoopAccessMDKind);
+}
+
+// Transfer metadata from Op to the instructions in CV if it is known
+// to be safe to do so.
+void Scalarizer::transferMetadata(Instruction *Op, const ValueVector &CV) {
+ SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
+ Op->getAllMetadataOtherThanDebugLoc(MDs);
+ for (unsigned I = 0, E = CV.size(); I != E; ++I) {
+ if (Instruction *New = dyn_cast<Instruction>(CV[I])) {
+ for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator
+ MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI)
+ if (canTransferMetadata(MI->first))
+ New->setMetadata(MI->first, MI->second);
+ New->setDebugLoc(Op->getDebugLoc());
+ }
+ }
+}
+
+// Try to fill in Layout from Ty, returning true on success. Alignment is
+// the alignment of the vector, or 0 if the ABI default should be used.
+bool Scalarizer::getVectorLayout(Type *Ty, unsigned Alignment,
+ VectorLayout &Layout) {
+ if (!DL)
+ return false;
+
+ // Make sure we're dealing with a vector.
+ Layout.VecTy = dyn_cast<VectorType>(Ty);
+ if (!Layout.VecTy)
+ return false;
+
+ // Check that we're dealing with full-byte elements.
+ Layout.ElemTy = Layout.VecTy->getElementType();
+ if (DL->getTypeSizeInBits(Layout.ElemTy) !=
+ DL->getTypeStoreSizeInBits(Layout.ElemTy))
+ return false;
+
+ if (Alignment)
+ Layout.VecAlign = Alignment;
+ else
+ Layout.VecAlign = DL->getABITypeAlignment(Layout.VecTy);
+ Layout.ElemSize = DL->getTypeStoreSize(Layout.ElemTy);
+ return true;
+}
+
+// Scalarize two-operand instruction I, using Split(Builder, X, Y, Name)
+// to create an instruction like I with operands X and Y and name Name.
+template<typename Splitter>
+bool Scalarizer::splitBinary(Instruction &I, const Splitter &Split) {
+ VectorType *VT = dyn_cast<VectorType>(I.getType());
+ if (!VT)
+ return false;
+
+ unsigned NumElems = VT->getNumElements();
+ IRBuilder<> Builder(I.getParent(), &I);
+ Scatterer Op0 = scatter(&I, I.getOperand(0));
+ Scatterer Op1 = scatter(&I, I.getOperand(1));
+ assert(Op0.size() == NumElems && "Mismatched binary operation");
+ assert(Op1.size() == NumElems && "Mismatched binary operation");
+ ValueVector Res;
+ Res.resize(NumElems);
+ for (unsigned Elem = 0; Elem < NumElems; ++Elem)
+ Res[Elem] = Split(Builder, Op0[Elem], Op1[Elem],
+ I.getName() + ".i" + Twine(Elem));
+ gather(&I, Res);
+ return true;
+}
+
+bool Scalarizer::visitSelectInst(SelectInst &SI) {
+ VectorType *VT = dyn_cast<VectorType>(SI.getType());
+ if (!VT)
+ return false;
+
+ unsigned NumElems = VT->getNumElements();
+ IRBuilder<> Builder(SI.getParent(), &SI);
+ Scatterer Op1 = scatter(&SI, SI.getOperand(1));
+ Scatterer Op2 = scatter(&SI, SI.getOperand(2));
+ assert(Op1.size() == NumElems && "Mismatched select");
+ assert(Op2.size() == NumElems && "Mismatched select");
+ ValueVector Res;
+ Res.resize(NumElems);
+
+ if (SI.getOperand(0)->getType()->isVectorTy()) {
+ Scatterer Op0 = scatter(&SI, SI.getOperand(0));
+ assert(Op0.size() == NumElems && "Mismatched select");
+ for (unsigned I = 0; I < NumElems; ++I)
+ Res[I] = Builder.CreateSelect(Op0[I], Op1[I], Op2[I],
+ SI.getName() + ".i" + Twine(I));
+ } else {
+ Value *Op0 = SI.getOperand(0);
+ for (unsigned I = 0; I < NumElems; ++I)
+ Res[I] = Builder.CreateSelect(Op0, Op1[I], Op2[I],
+ SI.getName() + ".i" + Twine(I));
+ }
+ gather(&SI, Res);
+ return true;
+}
+
+bool Scalarizer::visitICmpInst(ICmpInst &ICI) {
+ return splitBinary(ICI, ICmpSplitter(ICI));
+}
+
+bool Scalarizer::visitFCmpInst(FCmpInst &FCI) {
+ return splitBinary(FCI, FCmpSplitter(FCI));
+}
+
+bool Scalarizer::visitBinaryOperator(BinaryOperator &BO) {
+ return splitBinary(BO, BinarySplitter(BO));
+}
+
+bool Scalarizer::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
+ VectorType *VT = dyn_cast<VectorType>(GEPI.getType());
+ if (!VT)
+ return false;
+
+ IRBuilder<> Builder(GEPI.getParent(), &GEPI);
+ unsigned NumElems = VT->getNumElements();
+ unsigned NumIndices = GEPI.getNumIndices();
+
+ Scatterer Base = scatter(&GEPI, GEPI.getOperand(0));
+
+ SmallVector<Scatterer, 8> Ops;
+ Ops.resize(NumIndices);
+ for (unsigned I = 0; I < NumIndices; ++I)
+ Ops[I] = scatter(&GEPI, GEPI.getOperand(I + 1));
+
+ ValueVector Res;
+ Res.resize(NumElems);
+ for (unsigned I = 0; I < NumElems; ++I) {
+ SmallVector<Value *, 8> Indices;
+ Indices.resize(NumIndices);
+ for (unsigned J = 0; J < NumIndices; ++J)
+ Indices[J] = Ops[J][I];
+ Res[I] = Builder.CreateGEP(Base[I], Indices,
+ GEPI.getName() + ".i" + Twine(I));
+ if (GEPI.isInBounds())
+ if (GetElementPtrInst *NewGEPI = dyn_cast<GetElementPtrInst>(Res[I]))
+ NewGEPI->setIsInBounds();
+ }
+ gather(&GEPI, Res);
+ return true;
+}
+
+bool Scalarizer::visitCastInst(CastInst &CI) {
+ VectorType *VT = dyn_cast<VectorType>(CI.getDestTy());
+ if (!VT)
+ return false;
+
+ unsigned NumElems = VT->getNumElements();
+ IRBuilder<> Builder(CI.getParent(), &CI);
+ Scatterer Op0 = scatter(&CI, CI.getOperand(0));
+ assert(Op0.size() == NumElems && "Mismatched cast");
+ ValueVector Res;
+ Res.resize(NumElems);
+ for (unsigned I = 0; I < NumElems; ++I)
+ Res[I] = Builder.CreateCast(CI.getOpcode(), Op0[I], VT->getElementType(),
+ CI.getName() + ".i" + Twine(I));
+ gather(&CI, Res);
+ return true;
+}
+
+bool Scalarizer::visitBitCastInst(BitCastInst &BCI) {
+ VectorType *DstVT = dyn_cast<VectorType>(BCI.getDestTy());
+ VectorType *SrcVT = dyn_cast<VectorType>(BCI.getSrcTy());
+ if (!DstVT || !SrcVT)
+ return false;
+
+ unsigned DstNumElems = DstVT->getNumElements();
+ unsigned SrcNumElems = SrcVT->getNumElements();
+ IRBuilder<> Builder(BCI.getParent(), &BCI);
+ Scatterer Op0 = scatter(&BCI, BCI.getOperand(0));
+ ValueVector Res;
+ Res.resize(DstNumElems);
+
+ if (DstNumElems == SrcNumElems) {
+ for (unsigned I = 0; I < DstNumElems; ++I)
+ Res[I] = Builder.CreateBitCast(Op0[I], DstVT->getElementType(),
+ BCI.getName() + ".i" + Twine(I));
+ } else if (DstNumElems > SrcNumElems) {
+ // <M x t1> -> <N*M x t2>. Convert each t1 to <N x t2> and copy the
+ // individual elements to the destination.
+ unsigned FanOut = DstNumElems / SrcNumElems;
+ Type *MidTy = VectorType::get(DstVT->getElementType(), FanOut);
+ unsigned ResI = 0;
+ for (unsigned Op0I = 0; Op0I < SrcNumElems; ++Op0I) {
+ Value *V = Op0[Op0I];
+ Instruction *VI;
+ // Look through any existing bitcasts before converting to <N x t2>.
+ // In the best case, the resulting conversion might be a no-op.
+ while ((VI = dyn_cast<Instruction>(V)) &&
+ VI->getOpcode() == Instruction::BitCast)
+ V = VI->getOperand(0);
+ V = Builder.CreateBitCast(V, MidTy, V->getName() + ".cast");
+ Scatterer Mid = scatter(&BCI, V);
+ for (unsigned MidI = 0; MidI < FanOut; ++MidI)
+ Res[ResI++] = Mid[MidI];
+ }
+ } else {
+ // <N*M x t1> -> <M x t2>. Convert each group of <N x t1> into a t2.
+ unsigned FanIn = SrcNumElems / DstNumElems;
+ Type *MidTy = VectorType::get(SrcVT->getElementType(), FanIn);
+ unsigned Op0I = 0;
+ for (unsigned ResI = 0; ResI < DstNumElems; ++ResI) {
+ Value *V = UndefValue::get(MidTy);
+ for (unsigned MidI = 0; MidI < FanIn; ++MidI)
+ V = Builder.CreateInsertElement(V, Op0[Op0I++], Builder.getInt32(MidI),
+ BCI.getName() + ".i" + Twine(ResI)
+ + ".upto" + Twine(MidI));
+ Res[ResI] = Builder.CreateBitCast(V, DstVT->getElementType(),
+ BCI.getName() + ".i" + Twine(ResI));
+ }
+ }
+ gather(&BCI, Res);
+ return true;
+}
+
+bool Scalarizer::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
+ VectorType *VT = dyn_cast<VectorType>(SVI.getType());
+ if (!VT)
+ return false;
+
+ unsigned NumElems = VT->getNumElements();
+ Scatterer Op0 = scatter(&SVI, SVI.getOperand(0));
+ Scatterer Op1 = scatter(&SVI, SVI.getOperand(1));
+ ValueVector Res;
+ Res.resize(NumElems);
+
+ for (unsigned I = 0; I < NumElems; ++I) {
+ int Selector = SVI.getMaskValue(I);
+ if (Selector < 0)
+ Res[I] = UndefValue::get(VT->getElementType());
+ else if (unsigned(Selector) < Op0.size())
+ Res[I] = Op0[Selector];
+ else
+ Res[I] = Op1[Selector - Op0.size()];
+ }
+ gather(&SVI, Res);
+ return true;
+}
+
+bool Scalarizer::visitPHINode(PHINode &PHI) {
+ VectorType *VT = dyn_cast<VectorType>(PHI.getType());
+ if (!VT)
+ return false;
+
+ unsigned NumElems = VT->getNumElements();
+ IRBuilder<> Builder(PHI.getParent(), &PHI);
+ ValueVector Res;
+ Res.resize(NumElems);
+
+ unsigned NumOps = PHI.getNumOperands();
+ for (unsigned I = 0; I < NumElems; ++I)
+ Res[I] = Builder.CreatePHI(VT->getElementType(), NumOps,
+ PHI.getName() + ".i" + Twine(I));
+
+ for (unsigned I = 0; I < NumOps; ++I) {
+ Scatterer Op = scatter(&PHI, PHI.getIncomingValue(I));
+ BasicBlock *IncomingBlock = PHI.getIncomingBlock(I);
+ for (unsigned J = 0; J < NumElems; ++J)
+ cast<PHINode>(Res[J])->addIncoming(Op[J], IncomingBlock);
+ }
+ gather(&PHI, Res);
+ return true;
+}
+
+bool Scalarizer::visitLoadInst(LoadInst &LI) {
+ if (!ScalarizeLoadStore)
+ return false;
+ if (!LI.isSimple())
+ return false;
+
+ VectorLayout Layout;
+ if (!getVectorLayout(LI.getType(), LI.getAlignment(), Layout))
+ return false;
+
+ unsigned NumElems = Layout.VecTy->getNumElements();
+ IRBuilder<> Builder(LI.getParent(), &LI);
+ Scatterer Ptr = scatter(&LI, LI.getPointerOperand());
+ ValueVector Res;
+ Res.resize(NumElems);
+
+ for (unsigned I = 0; I < NumElems; ++I)
+ Res[I] = Builder.CreateAlignedLoad(Ptr[I], Layout.getElemAlign(I),
+ LI.getName() + ".i" + Twine(I));
+ gather(&LI, Res);
+ return true;
+}
+
+bool Scalarizer::visitStoreInst(StoreInst &SI) {
+ if (!ScalarizeLoadStore)
+ return false;
+ if (!SI.isSimple())
+ return false;
+
+ VectorLayout Layout;
+ Value *FullValue = SI.getValueOperand();
+ if (!getVectorLayout(FullValue->getType(), SI.getAlignment(), Layout))
+ return false;
+
+ unsigned NumElems = Layout.VecTy->getNumElements();
+ IRBuilder<> Builder(SI.getParent(), &SI);
+ Scatterer Ptr = scatter(&SI, SI.getPointerOperand());
+ Scatterer Val = scatter(&SI, FullValue);
+
+ ValueVector Stores;
+ Stores.resize(NumElems);
+ for (unsigned I = 0; I < NumElems; ++I) {
+ unsigned Align = Layout.getElemAlign(I);
+ Stores[I] = Builder.CreateAlignedStore(Val[I], Ptr[I], Align);
+ }
+ transferMetadata(&SI, Stores);
+ return true;
+}
+
+// Delete the instructions that we scalarized. If a full vector result
+// is still needed, recreate it using InsertElements.
+bool Scalarizer::finish() {
+ if (Gathered.empty())
+ return false;
+ for (GatherList::iterator GMI = Gathered.begin(), GME = Gathered.end();
+ GMI != GME; ++GMI) {
+ Instruction *Op = GMI->first;
+ ValueVector &CV = *GMI->second;
+ if (!Op->use_empty()) {
+ // The value is still needed, so recreate it using a series of
+ // InsertElements.
+ Type *Ty = Op->getType();
+ Value *Res = UndefValue::get(Ty);
+ BasicBlock *BB = Op->getParent();
+ unsigned Count = Ty->getVectorNumElements();
+ IRBuilder<> Builder(BB, Op);
+ if (isa<PHINode>(Op))
+ Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
+ for (unsigned I = 0; I < Count; ++I)
+ Res = Builder.CreateInsertElement(Res, CV[I], Builder.getInt32(I),
+ Op->getName() + ".upto" + Twine(I));
+ Res->takeName(Op);
+ Op->replaceAllUsesWith(Res);
+ }
+ Op->eraseFromParent();
+ }
+ Gathered.clear();
+ Scattered.clear();
+ return true;
+}
+
+FunctionPass *llvm::createScalarizerPass() {
+ return new Scalarizer();
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/contrib/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
new file mode 100644
index 000000000000..6557ce4575dd
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -0,0 +1,776 @@
+//===-- SeparateConstOffsetFromGEP.cpp - ------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Loop unrolling may create many similar GEPs for array accesses.
+// e.g., a 2-level loop
+//
+// float a[32][32]; // global variable
+//
+// for (int i = 0; i < 2; ++i) {
+// for (int j = 0; j < 2; ++j) {
+// ...
+// ... = a[x + i][y + j];
+// ...
+// }
+// }
+//
+// will probably be unrolled to:
+//
+// gep %a, 0, %x, %y; load
+// gep %a, 0, %x, %y + 1; load
+// gep %a, 0, %x + 1, %y; load
+// gep %a, 0, %x + 1, %y + 1; load
+//
+// LLVM's GVN does not use partial redundancy elimination yet, and is thus
+// unable to reuse (gep %a, 0, %x, %y). As a result, this misoptimization incurs
+// significant slowdown in targets with limited addressing modes. For instance,
+// because the PTX target does not support the reg+reg addressing mode, the
+// NVPTX backend emits PTX code that literally computes the pointer address of
+// each GEP, wasting tons of registers. It emits the following PTX for the
+// first load and similar PTX for other loads.
+//
+// mov.u32 %r1, %x;
+// mov.u32 %r2, %y;
+// mul.wide.u32 %rl2, %r1, 128;
+// mov.u64 %rl3, a;
+// add.s64 %rl4, %rl3, %rl2;
+// mul.wide.u32 %rl5, %r2, 4;
+// add.s64 %rl6, %rl4, %rl5;
+// ld.global.f32 %f1, [%rl6];
+//
+// To reduce the register pressure, the optimization implemented in this file
+// merges the common part of a group of GEPs, so we can compute each pointer
+// address by adding a simple offset to the common part, saving many registers.
+//
+// It works by splitting each GEP into a variadic base and a constant offset.
+// The variadic base can be computed once and reused by multiple GEPs, and the
+// constant offsets can be nicely folded into the reg+immediate addressing mode
+// (supported by most targets) without using any extra register.
+//
+// For instance, we transform the four GEPs and four loads in the above example
+// into:
+//
+// base = gep a, 0, x, y
+// load base
+// laod base + 1 * sizeof(float)
+// load base + 32 * sizeof(float)
+// load base + 33 * sizeof(float)
+//
+// Given the transformed IR, a backend that supports the reg+immediate
+// addressing mode can easily fold the pointer arithmetics into the loads. For
+// example, the NVPTX backend can easily fold the pointer arithmetics into the
+// ld.global.f32 instructions, and the resultant PTX uses much fewer registers.
+//
+// mov.u32 %r1, %tid.x;
+// mov.u32 %r2, %tid.y;
+// mul.wide.u32 %rl2, %r1, 128;
+// mov.u64 %rl3, a;
+// add.s64 %rl4, %rl3, %rl2;
+// mul.wide.u32 %rl5, %r2, 4;
+// add.s64 %rl6, %rl4, %rl5;
+// ld.global.f32 %f1, [%rl6]; // so far the same as unoptimized PTX
+// ld.global.f32 %f2, [%rl6+4]; // much better
+// ld.global.f32 %f3, [%rl6+128]; // much better
+// ld.global.f32 %f4, [%rl6+132]; // much better
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+
+static cl::opt<bool> DisableSeparateConstOffsetFromGEP(
+ "disable-separate-const-offset-from-gep", cl::init(false),
+ cl::desc("Do not separate the constant offset from a GEP instruction"),
+ cl::Hidden);
+
+namespace {
+
+/// \brief A helper class for separating a constant offset from a GEP index.
+///
+/// In real programs, a GEP index may be more complicated than a simple addition
+/// of something and a constant integer which can be trivially splitted. For
+/// example, to split ((a << 3) | 5) + b, we need to search deeper for the
+/// constant offset, so that we can separate the index to (a << 3) + b and 5.
+///
+/// Therefore, this class looks into the expression that computes a given GEP
+/// index, and tries to find a constant integer that can be hoisted to the
+/// outermost level of the expression as an addition. Not every constant in an
+/// expression can jump out. e.g., we cannot transform (b * (a + 5)) to (b * a +
+/// 5); nor can we transform (3 * (a + 5)) to (3 * a + 5), however in this case,
+/// -instcombine probably already optimized (3 * (a + 5)) to (3 * a + 15).
+class ConstantOffsetExtractor {
+ public:
+ /// Extracts a constant offset from the given GEP index. It outputs the
+ /// numeric value of the extracted constant offset (0 if failed), and a
+ /// new index representing the remainder (equal to the original index minus
+ /// the constant offset).
+ /// \p Idx The given GEP index
+ /// \p NewIdx The new index to replace (output)
+ /// \p DL The datalayout of the module
+ /// \p GEP The given GEP
+ static int64_t Extract(Value *Idx, Value *&NewIdx, const DataLayout *DL,
+ GetElementPtrInst *GEP);
+ /// Looks for a constant offset without extracting it. The meaning of the
+ /// arguments and the return value are the same as Extract.
+ static int64_t Find(Value *Idx, const DataLayout *DL, GetElementPtrInst *GEP);
+
+ private:
+ ConstantOffsetExtractor(const DataLayout *Layout, Instruction *InsertionPt)
+ : DL(Layout), IP(InsertionPt) {}
+ /// Searches the expression that computes V for a non-zero constant C s.t.
+ /// V can be reassociated into the form V' + C. If the searching is
+ /// successful, returns C and update UserChain as a def-use chain from C to V;
+ /// otherwise, UserChain is empty.
+ ///
+ /// \p V The given expression
+ /// \p SignExtended Whether V will be sign-extended in the computation of the
+ /// GEP index
+ /// \p ZeroExtended Whether V will be zero-extended in the computation of the
+ /// GEP index
+ /// \p NonNegative Whether V is guaranteed to be non-negative. For example,
+ /// an index of an inbounds GEP is guaranteed to be
+ /// non-negative. Levaraging this, we can better split
+ /// inbounds GEPs.
+ APInt find(Value *V, bool SignExtended, bool ZeroExtended, bool NonNegative);
+ /// A helper function to look into both operands of a binary operator.
+ APInt findInEitherOperand(BinaryOperator *BO, bool SignExtended,
+ bool ZeroExtended);
+ /// After finding the constant offset C from the GEP index I, we build a new
+ /// index I' s.t. I' + C = I. This function builds and returns the new
+ /// index I' according to UserChain produced by function "find".
+ ///
+ /// The building conceptually takes two steps:
+ /// 1) iteratively distribute s/zext towards the leaves of the expression tree
+ /// that computes I
+ /// 2) reassociate the expression tree to the form I' + C.
+ ///
+ /// For example, to extract the 5 from sext(a + (b + 5)), we first distribute
+ /// sext to a, b and 5 so that we have
+ /// sext(a) + (sext(b) + 5).
+ /// Then, we reassociate it to
+ /// (sext(a) + sext(b)) + 5.
+ /// Given this form, we know I' is sext(a) + sext(b).
+ Value *rebuildWithoutConstOffset();
+ /// After the first step of rebuilding the GEP index without the constant
+ /// offset, distribute s/zext to the operands of all operators in UserChain.
+ /// e.g., zext(sext(a + (b + 5)) (assuming no overflow) =>
+ /// zext(sext(a)) + (zext(sext(b)) + zext(sext(5))).
+ ///
+ /// The function also updates UserChain to point to new subexpressions after
+ /// distributing s/zext. e.g., the old UserChain of the above example is
+ /// 5 -> b + 5 -> a + (b + 5) -> sext(...) -> zext(sext(...)),
+ /// and the new UserChain is
+ /// zext(sext(5)) -> zext(sext(b)) + zext(sext(5)) ->
+ /// zext(sext(a)) + (zext(sext(b)) + zext(sext(5))
+ ///
+ /// \p ChainIndex The index to UserChain. ChainIndex is initially
+ /// UserChain.size() - 1, and is decremented during
+ /// the recursion.
+ Value *distributeExtsAndCloneChain(unsigned ChainIndex);
+ /// Reassociates the GEP index to the form I' + C and returns I'.
+ Value *removeConstOffset(unsigned ChainIndex);
+ /// A helper function to apply ExtInsts, a list of s/zext, to value V.
+ /// e.g., if ExtInsts = [sext i32 to i64, zext i16 to i32], this function
+ /// returns "sext i32 (zext i16 V to i32) to i64".
+ Value *applyExts(Value *V);
+
+ /// Returns true if LHS and RHS have no bits in common, i.e., LHS | RHS == 0.
+ bool NoCommonBits(Value *LHS, Value *RHS) const;
+ /// Computes which bits are known to be one or zero.
+ /// \p KnownOne Mask of all bits that are known to be one.
+ /// \p KnownZero Mask of all bits that are known to be zero.
+ void ComputeKnownBits(Value *V, APInt &KnownOne, APInt &KnownZero) const;
+ /// A helper function that returns whether we can trace into the operands
+ /// of binary operator BO for a constant offset.
+ ///
+ /// \p SignExtended Whether BO is surrounded by sext
+ /// \p ZeroExtended Whether BO is surrounded by zext
+ /// \p NonNegative Whether BO is known to be non-negative, e.g., an in-bound
+ /// array index.
+ bool CanTraceInto(bool SignExtended, bool ZeroExtended, BinaryOperator *BO,
+ bool NonNegative);
+
+ /// The path from the constant offset to the old GEP index. e.g., if the GEP
+ /// index is "a * b + (c + 5)". After running function find, UserChain[0] will
+ /// be the constant 5, UserChain[1] will be the subexpression "c + 5", and
+ /// UserChain[2] will be the entire expression "a * b + (c + 5)".
+ ///
+ /// This path helps to rebuild the new GEP index.
+ SmallVector<User *, 8> UserChain;
+ /// A data structure used in rebuildWithoutConstOffset. Contains all
+ /// sext/zext instructions along UserChain.
+ SmallVector<CastInst *, 16> ExtInsts;
+ /// The data layout of the module. Used in ComputeKnownBits.
+ const DataLayout *DL;
+ Instruction *IP; /// Insertion position of cloned instructions.
+};
+
+/// \brief A pass that tries to split every GEP in the function into a variadic
+/// base and a constant offset. It is a FunctionPass because searching for the
+/// constant offset may inspect other basic blocks.
+class SeparateConstOffsetFromGEP : public FunctionPass {
+ public:
+ static char ID;
+ SeparateConstOffsetFromGEP() : FunctionPass(ID) {
+ initializeSeparateConstOffsetFromGEPPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<DataLayoutPass>();
+ AU.addRequired<TargetTransformInfo>();
+ }
+
+ bool doInitialization(Module &M) override {
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ if (DLP == nullptr)
+ report_fatal_error("data layout missing");
+ DL = &DLP->getDataLayout();
+ return false;
+ }
+
+ bool runOnFunction(Function &F) override;
+
+ private:
+ /// Tries to split the given GEP into a variadic base and a constant offset,
+ /// and returns true if the splitting succeeds.
+ bool splitGEP(GetElementPtrInst *GEP);
+ /// Finds the constant offset within each index, and accumulates them. This
+ /// function only inspects the GEP without changing it. The output
+ /// NeedsExtraction indicates whether we can extract a non-zero constant
+ /// offset from any index.
+ int64_t accumulateByteOffset(GetElementPtrInst *GEP, bool &NeedsExtraction);
+ /// Canonicalize array indices to pointer-size integers. This helps to
+ /// simplify the logic of splitting a GEP. For example, if a + b is a
+ /// pointer-size integer, we have
+ /// gep base, a + b = gep (gep base, a), b
+ /// However, this equality may not hold if the size of a + b is smaller than
+ /// the pointer size, because LLVM conceptually sign-extends GEP indices to
+ /// pointer size before computing the address
+ /// (http://llvm.org/docs/LangRef.html#id181).
+ ///
+ /// This canonicalization is very likely already done in clang and
+ /// instcombine. Therefore, the program will probably remain the same.
+ ///
+ /// Returns true if the module changes.
+ ///
+ /// Verified in @i32_add in split-gep.ll
+ bool canonicalizeArrayIndicesToPointerSize(GetElementPtrInst *GEP);
+
+ const DataLayout *DL;
+};
+} // anonymous namespace
+
+char SeparateConstOffsetFromGEP::ID = 0;
+INITIALIZE_PASS_BEGIN(
+ SeparateConstOffsetFromGEP, "separate-const-offset-from-gep",
+ "Split GEPs to a variadic base and a constant offset for better CSE", false,
+ false)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(DataLayoutPass)
+INITIALIZE_PASS_END(
+ SeparateConstOffsetFromGEP, "separate-const-offset-from-gep",
+ "Split GEPs to a variadic base and a constant offset for better CSE", false,
+ false)
+
+FunctionPass *llvm::createSeparateConstOffsetFromGEPPass() {
+ return new SeparateConstOffsetFromGEP();
+}
+
+bool ConstantOffsetExtractor::CanTraceInto(bool SignExtended,
+ bool ZeroExtended,
+ BinaryOperator *BO,
+ bool NonNegative) {
+ // We only consider ADD, SUB and OR, because a non-zero constant found in
+ // expressions composed of these operations can be easily hoisted as a
+ // constant offset by reassociation.
+ if (BO->getOpcode() != Instruction::Add &&
+ BO->getOpcode() != Instruction::Sub &&
+ BO->getOpcode() != Instruction::Or) {
+ return false;
+ }
+
+ Value *LHS = BO->getOperand(0), *RHS = BO->getOperand(1);
+ // Do not trace into "or" unless it is equivalent to "add". If LHS and RHS
+ // don't have common bits, (LHS | RHS) is equivalent to (LHS + RHS).
+ if (BO->getOpcode() == Instruction::Or && !NoCommonBits(LHS, RHS))
+ return false;
+
+ // In addition, tracing into BO requires that its surrounding s/zext (if
+ // any) is distributable to both operands.
+ //
+ // Suppose BO = A op B.
+ // SignExtended | ZeroExtended | Distributable?
+ // --------------+--------------+----------------------------------
+ // 0 | 0 | true because no s/zext exists
+ // 0 | 1 | zext(BO) == zext(A) op zext(B)
+ // 1 | 0 | sext(BO) == sext(A) op sext(B)
+ // 1 | 1 | zext(sext(BO)) ==
+ // | | zext(sext(A)) op zext(sext(B))
+ if (BO->getOpcode() == Instruction::Add && !ZeroExtended && NonNegative) {
+ // If a + b >= 0 and (a >= 0 or b >= 0), then
+ // sext(a + b) = sext(a) + sext(b)
+ // even if the addition is not marked nsw.
+ //
+ // Leveraging this invarient, we can trace into an sext'ed inbound GEP
+ // index if the constant offset is non-negative.
+ //
+ // Verified in @sext_add in split-gep.ll.
+ if (ConstantInt *ConstLHS = dyn_cast<ConstantInt>(LHS)) {
+ if (!ConstLHS->isNegative())
+ return true;
+ }
+ if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(RHS)) {
+ if (!ConstRHS->isNegative())
+ return true;
+ }
+ }
+
+ // sext (add/sub nsw A, B) == add/sub nsw (sext A), (sext B)
+ // zext (add/sub nuw A, B) == add/sub nuw (zext A), (zext B)
+ if (BO->getOpcode() == Instruction::Add ||
+ BO->getOpcode() == Instruction::Sub) {
+ if (SignExtended && !BO->hasNoSignedWrap())
+ return false;
+ if (ZeroExtended && !BO->hasNoUnsignedWrap())
+ return false;
+ }
+
+ return true;
+}
+
+APInt ConstantOffsetExtractor::findInEitherOperand(BinaryOperator *BO,
+ bool SignExtended,
+ bool ZeroExtended) {
+ // BO being non-negative does not shed light on whether its operands are
+ // non-negative. Clear the NonNegative flag here.
+ APInt ConstantOffset = find(BO->getOperand(0), SignExtended, ZeroExtended,
+ /* NonNegative */ false);
+ // If we found a constant offset in the left operand, stop and return that.
+ // This shortcut might cause us to miss opportunities of combining the
+ // constant offsets in both operands, e.g., (a + 4) + (b + 5) => (a + b) + 9.
+ // However, such cases are probably already handled by -instcombine,
+ // given this pass runs after the standard optimizations.
+ if (ConstantOffset != 0) return ConstantOffset;
+ ConstantOffset = find(BO->getOperand(1), SignExtended, ZeroExtended,
+ /* NonNegative */ false);
+ // If U is a sub operator, negate the constant offset found in the right
+ // operand.
+ if (BO->getOpcode() == Instruction::Sub)
+ ConstantOffset = -ConstantOffset;
+ return ConstantOffset;
+}
+
+APInt ConstantOffsetExtractor::find(Value *V, bool SignExtended,
+ bool ZeroExtended, bool NonNegative) {
+ // TODO(jingyue): We could trace into integer/pointer casts, such as
+ // inttoptr, ptrtoint, bitcast, and addrspacecast. We choose to handle only
+ // integers because it gives good enough results for our benchmarks.
+ unsigned BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
+
+ // We cannot do much with Values that are not a User, such as an Argument.
+ User *U = dyn_cast<User>(V);
+ if (U == nullptr) return APInt(BitWidth, 0);
+
+ APInt ConstantOffset(BitWidth, 0);
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ // Hooray, we found it!
+ ConstantOffset = CI->getValue();
+ } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V)) {
+ // Trace into subexpressions for more hoisting opportunities.
+ if (CanTraceInto(SignExtended, ZeroExtended, BO, NonNegative)) {
+ ConstantOffset = findInEitherOperand(BO, SignExtended, ZeroExtended);
+ }
+ } else if (isa<SExtInst>(V)) {
+ ConstantOffset = find(U->getOperand(0), /* SignExtended */ true,
+ ZeroExtended, NonNegative).sext(BitWidth);
+ } else if (isa<ZExtInst>(V)) {
+ // As an optimization, we can clear the SignExtended flag because
+ // sext(zext(a)) = zext(a). Verified in @sext_zext in split-gep.ll.
+ //
+ // Clear the NonNegative flag, because zext(a) >= 0 does not imply a >= 0.
+ ConstantOffset =
+ find(U->getOperand(0), /* SignExtended */ false,
+ /* ZeroExtended */ true, /* NonNegative */ false).zext(BitWidth);
+ }
+
+ // If we found a non-zero constant offset, add it to the path for
+ // rebuildWithoutConstOffset. Zero is a valid constant offset, but doesn't
+ // help this optimization.
+ if (ConstantOffset != 0)
+ UserChain.push_back(U);
+ return ConstantOffset;
+}
+
+Value *ConstantOffsetExtractor::applyExts(Value *V) {
+ Value *Current = V;
+ // ExtInsts is built in the use-def order. Therefore, we apply them to V
+ // in the reversed order.
+ for (auto I = ExtInsts.rbegin(), E = ExtInsts.rend(); I != E; ++I) {
+ if (Constant *C = dyn_cast<Constant>(Current)) {
+ // If Current is a constant, apply s/zext using ConstantExpr::getCast.
+ // ConstantExpr::getCast emits a ConstantInt if C is a ConstantInt.
+ Current = ConstantExpr::getCast((*I)->getOpcode(), C, (*I)->getType());
+ } else {
+ Instruction *Ext = (*I)->clone();
+ Ext->setOperand(0, Current);
+ Ext->insertBefore(IP);
+ Current = Ext;
+ }
+ }
+ return Current;
+}
+
+Value *ConstantOffsetExtractor::rebuildWithoutConstOffset() {
+ distributeExtsAndCloneChain(UserChain.size() - 1);
+ // Remove all nullptrs (used to be s/zext) from UserChain.
+ unsigned NewSize = 0;
+ for (auto I = UserChain.begin(), E = UserChain.end(); I != E; ++I) {
+ if (*I != nullptr) {
+ UserChain[NewSize] = *I;
+ NewSize++;
+ }
+ }
+ UserChain.resize(NewSize);
+ return removeConstOffset(UserChain.size() - 1);
+}
+
+Value *
+ConstantOffsetExtractor::distributeExtsAndCloneChain(unsigned ChainIndex) {
+ User *U = UserChain[ChainIndex];
+ if (ChainIndex == 0) {
+ assert(isa<ConstantInt>(U));
+ // If U is a ConstantInt, applyExts will return a ConstantInt as well.
+ return UserChain[ChainIndex] = cast<ConstantInt>(applyExts(U));
+ }
+
+ if (CastInst *Cast = dyn_cast<CastInst>(U)) {
+ assert((isa<SExtInst>(Cast) || isa<ZExtInst>(Cast)) &&
+ "We only traced into two types of CastInst: sext and zext");
+ ExtInsts.push_back(Cast);
+ UserChain[ChainIndex] = nullptr;
+ return distributeExtsAndCloneChain(ChainIndex - 1);
+ }
+
+ // Function find only trace into BinaryOperator and CastInst.
+ BinaryOperator *BO = cast<BinaryOperator>(U);
+ // OpNo = which operand of BO is UserChain[ChainIndex - 1]
+ unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1);
+ Value *TheOther = applyExts(BO->getOperand(1 - OpNo));
+ Value *NextInChain = distributeExtsAndCloneChain(ChainIndex - 1);
+
+ BinaryOperator *NewBO = nullptr;
+ if (OpNo == 0) {
+ NewBO = BinaryOperator::Create(BO->getOpcode(), NextInChain, TheOther,
+ BO->getName(), IP);
+ } else {
+ NewBO = BinaryOperator::Create(BO->getOpcode(), TheOther, NextInChain,
+ BO->getName(), IP);
+ }
+ return UserChain[ChainIndex] = NewBO;
+}
+
+Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) {
+ if (ChainIndex == 0) {
+ assert(isa<ConstantInt>(UserChain[ChainIndex]));
+ return ConstantInt::getNullValue(UserChain[ChainIndex]->getType());
+ }
+
+ BinaryOperator *BO = cast<BinaryOperator>(UserChain[ChainIndex]);
+ unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1);
+ assert(BO->getOperand(OpNo) == UserChain[ChainIndex - 1]);
+ Value *NextInChain = removeConstOffset(ChainIndex - 1);
+ Value *TheOther = BO->getOperand(1 - OpNo);
+
+ // If NextInChain is 0 and not the LHS of a sub, we can simplify the
+ // sub-expression to be just TheOther.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(NextInChain)) {
+ if (CI->isZero() && !(BO->getOpcode() == Instruction::Sub && OpNo == 0))
+ return TheOther;
+ }
+
+ if (BO->getOpcode() == Instruction::Or) {
+ // Rebuild "or" as "add", because "or" may be invalid for the new
+ // epxression.
+ //
+ // For instance, given
+ // a | (b + 5) where a and b + 5 have no common bits,
+ // we can extract 5 as the constant offset.
+ //
+ // However, reusing the "or" in the new index would give us
+ // (a | b) + 5
+ // which does not equal a | (b + 5).
+ //
+ // Replacing the "or" with "add" is fine, because
+ // a | (b + 5) = a + (b + 5) = (a + b) + 5
+ return BinaryOperator::CreateAdd(BO->getOperand(0), BO->getOperand(1),
+ BO->getName(), IP);
+ }
+
+ // We can reuse BO in this case, because the new expression shares the same
+ // instruction type and BO is used at most once.
+ assert(BO->getNumUses() <= 1 &&
+ "distributeExtsAndCloneChain clones each BinaryOperator in "
+ "UserChain, so no one should be used more than "
+ "once");
+ BO->setOperand(OpNo, NextInChain);
+ BO->setHasNoSignedWrap(false);
+ BO->setHasNoUnsignedWrap(false);
+ // Make sure it appears after all instructions we've inserted so far.
+ BO->moveBefore(IP);
+ return BO;
+}
+
+int64_t ConstantOffsetExtractor::Extract(Value *Idx, Value *&NewIdx,
+ const DataLayout *DL,
+ GetElementPtrInst *GEP) {
+ ConstantOffsetExtractor Extractor(DL, GEP);
+ // Find a non-zero constant offset first.
+ APInt ConstantOffset =
+ Extractor.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
+ GEP->isInBounds());
+ if (ConstantOffset != 0) {
+ // Separates the constant offset from the GEP index.
+ NewIdx = Extractor.rebuildWithoutConstOffset();
+ }
+ return ConstantOffset.getSExtValue();
+}
+
+int64_t ConstantOffsetExtractor::Find(Value *Idx, const DataLayout *DL,
+ GetElementPtrInst *GEP) {
+ // If Idx is an index of an inbound GEP, Idx is guaranteed to be non-negative.
+ return ConstantOffsetExtractor(DL, GEP)
+ .find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
+ GEP->isInBounds())
+ .getSExtValue();
+}
+
+void ConstantOffsetExtractor::ComputeKnownBits(Value *V, APInt &KnownOne,
+ APInt &KnownZero) const {
+ IntegerType *IT = cast<IntegerType>(V->getType());
+ KnownOne = APInt(IT->getBitWidth(), 0);
+ KnownZero = APInt(IT->getBitWidth(), 0);
+ llvm::computeKnownBits(V, KnownZero, KnownOne, DL, 0);
+}
+
+bool ConstantOffsetExtractor::NoCommonBits(Value *LHS, Value *RHS) const {
+ assert(LHS->getType() == RHS->getType() &&
+ "LHS and RHS should have the same type");
+ APInt LHSKnownOne, LHSKnownZero, RHSKnownOne, RHSKnownZero;
+ ComputeKnownBits(LHS, LHSKnownOne, LHSKnownZero);
+ ComputeKnownBits(RHS, RHSKnownOne, RHSKnownZero);
+ return (LHSKnownZero | RHSKnownZero).isAllOnesValue();
+}
+
+bool SeparateConstOffsetFromGEP::canonicalizeArrayIndicesToPointerSize(
+ GetElementPtrInst *GEP) {
+ bool Changed = false;
+ Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
+ gep_type_iterator GTI = gep_type_begin(*GEP);
+ for (User::op_iterator I = GEP->op_begin() + 1, E = GEP->op_end();
+ I != E; ++I, ++GTI) {
+ // Skip struct member indices which must be i32.
+ if (isa<SequentialType>(*GTI)) {
+ if ((*I)->getType() != IntPtrTy) {
+ *I = CastInst::CreateIntegerCast(*I, IntPtrTy, true, "idxprom", GEP);
+ Changed = true;
+ }
+ }
+ }
+ return Changed;
+}
+
+int64_t
+SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP,
+ bool &NeedsExtraction) {
+ NeedsExtraction = false;
+ int64_t AccumulativeByteOffset = 0;
+ gep_type_iterator GTI = gep_type_begin(*GEP);
+ for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
+ if (isa<SequentialType>(*GTI)) {
+ // Tries to extract a constant offset from this GEP index.
+ int64_t ConstantOffset =
+ ConstantOffsetExtractor::Find(GEP->getOperand(I), DL, GEP);
+ if (ConstantOffset != 0) {
+ NeedsExtraction = true;
+ // A GEP may have multiple indices. We accumulate the extracted
+ // constant offset to a byte offset, and later offset the remainder of
+ // the original GEP with this byte offset.
+ AccumulativeByteOffset +=
+ ConstantOffset * DL->getTypeAllocSize(GTI.getIndexedType());
+ }
+ }
+ }
+ return AccumulativeByteOffset;
+}
+
+bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
+ // Skip vector GEPs.
+ if (GEP->getType()->isVectorTy())
+ return false;
+
+ // The backend can already nicely handle the case where all indices are
+ // constant.
+ if (GEP->hasAllConstantIndices())
+ return false;
+
+ bool Changed = canonicalizeArrayIndicesToPointerSize(GEP);
+
+ bool NeedsExtraction;
+ int64_t AccumulativeByteOffset = accumulateByteOffset(GEP, NeedsExtraction);
+
+ if (!NeedsExtraction)
+ return Changed;
+ // Before really splitting the GEP, check whether the backend supports the
+ // addressing mode we are about to produce. If no, this splitting probably
+ // won't be beneficial.
+ TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>();
+ if (!TTI.isLegalAddressingMode(GEP->getType()->getElementType(),
+ /*BaseGV=*/nullptr, AccumulativeByteOffset,
+ /*HasBaseReg=*/true, /*Scale=*/0)) {
+ return Changed;
+ }
+
+ // Remove the constant offset in each GEP index. The resultant GEP computes
+ // the variadic base.
+ gep_type_iterator GTI = gep_type_begin(*GEP);
+ for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
+ if (isa<SequentialType>(*GTI)) {
+ Value *NewIdx = nullptr;
+ // Tries to extract a constant offset from this GEP index.
+ int64_t ConstantOffset =
+ ConstantOffsetExtractor::Extract(GEP->getOperand(I), NewIdx, DL, GEP);
+ if (ConstantOffset != 0) {
+ assert(NewIdx != nullptr &&
+ "ConstantOffset != 0 implies NewIdx is set");
+ GEP->setOperand(I, NewIdx);
+ }
+ }
+ }
+ // Clear the inbounds attribute because the new index may be off-bound.
+ // e.g.,
+ //
+ // b = add i64 a, 5
+ // addr = gep inbounds float* p, i64 b
+ //
+ // is transformed to:
+ //
+ // addr2 = gep float* p, i64 a
+ // addr = gep float* addr2, i64 5
+ //
+ // If a is -4, although the old index b is in bounds, the new index a is
+ // off-bound. http://llvm.org/docs/LangRef.html#id181 says "if the
+ // inbounds keyword is not present, the offsets are added to the base
+ // address with silently-wrapping two's complement arithmetic".
+ // Therefore, the final code will be a semantically equivalent.
+ //
+ // TODO(jingyue): do some range analysis to keep as many inbounds as
+ // possible. GEPs with inbounds are more friendly to alias analysis.
+ GEP->setIsInBounds(false);
+
+ // Offsets the base with the accumulative byte offset.
+ //
+ // %gep ; the base
+ // ... %gep ...
+ //
+ // => add the offset
+ //
+ // %gep2 ; clone of %gep
+ // %new.gep = gep %gep2, <offset / sizeof(*%gep)>
+ // %gep ; will be removed
+ // ... %gep ...
+ //
+ // => replace all uses of %gep with %new.gep and remove %gep
+ //
+ // %gep2 ; clone of %gep
+ // %new.gep = gep %gep2, <offset / sizeof(*%gep)>
+ // ... %new.gep ...
+ //
+ // If AccumulativeByteOffset is not a multiple of sizeof(*%gep), we emit an
+ // uglygep (http://llvm.org/docs/GetElementPtr.html#what-s-an-uglygep):
+ // bitcast %gep2 to i8*, add the offset, and bitcast the result back to the
+ // type of %gep.
+ //
+ // %gep2 ; clone of %gep
+ // %0 = bitcast %gep2 to i8*
+ // %uglygep = gep %0, <offset>
+ // %new.gep = bitcast %uglygep to <type of %gep>
+ // ... %new.gep ...
+ Instruction *NewGEP = GEP->clone();
+ NewGEP->insertBefore(GEP);
+
+ uint64_t ElementTypeSizeOfGEP =
+ DL->getTypeAllocSize(GEP->getType()->getElementType());
+ Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
+ if (AccumulativeByteOffset % ElementTypeSizeOfGEP == 0) {
+ // Very likely. As long as %gep is natually aligned, the byte offset we
+ // extracted should be a multiple of sizeof(*%gep).
+ // Per ANSI C standard, signed / unsigned = unsigned. Therefore, we
+ // cast ElementTypeSizeOfGEP to signed.
+ int64_t Index =
+ AccumulativeByteOffset / static_cast<int64_t>(ElementTypeSizeOfGEP);
+ NewGEP = GetElementPtrInst::Create(
+ NewGEP, ConstantInt::get(IntPtrTy, Index, true), GEP->getName(), GEP);
+ } else {
+ // Unlikely but possible. For example,
+ // #pragma pack(1)
+ // struct S {
+ // int a[3];
+ // int64 b[8];
+ // };
+ // #pragma pack()
+ //
+ // Suppose the gep before extraction is &s[i + 1].b[j + 3]. After
+ // extraction, it becomes &s[i].b[j] and AccumulativeByteOffset is
+ // sizeof(S) + 3 * sizeof(int64) = 100, which is not a multiple of
+ // sizeof(int64).
+ //
+ // Emit an uglygep in this case.
+ Type *I8PtrTy = Type::getInt8PtrTy(GEP->getContext(),
+ GEP->getPointerAddressSpace());
+ NewGEP = new BitCastInst(NewGEP, I8PtrTy, "", GEP);
+ NewGEP = GetElementPtrInst::Create(
+ NewGEP, ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true),
+ "uglygep", GEP);
+ if (GEP->getType() != I8PtrTy)
+ NewGEP = new BitCastInst(NewGEP, GEP->getType(), GEP->getName(), GEP);
+ }
+
+ GEP->replaceAllUsesWith(NewGEP);
+ GEP->eraseFromParent();
+
+ return true;
+}
+
+bool SeparateConstOffsetFromGEP::runOnFunction(Function &F) {
+ if (DisableSeparateConstOffsetFromGEP)
+ return false;
+
+ bool Changed = false;
+ for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B) {
+ for (BasicBlock::iterator I = B->begin(), IE = B->end(); I != IE; ) {
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I++)) {
+ Changed |= splitGEP(GEP);
+ }
+ // No need to split GEP ConstantExprs because all its indices are constant
+ // already.
+ }
+ }
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 8371f6d35279..5d5606ba47b0 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -21,23 +21,24 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "simplifycfg"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Attributes.h"
+#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
+#define DEBUG_TYPE "simplifycfg"
+
STATISTIC(NumSimpl, "Number of blocks simplified");
namespace {
@@ -46,9 +47,9 @@ struct CFGSimplifyPass : public FunctionPass {
CFGSimplifyPass() : FunctionPass(ID) {
initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry());
}
- virtual bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetTransformInfo>();
}
};
@@ -71,7 +72,7 @@ FunctionPass *llvm::createCFGSimplificationPass() {
static bool mergeEmptyReturnBlocks(Function &F) {
bool Changed = false;
- BasicBlock *RetBlock = 0;
+ BasicBlock *RetBlock = nullptr;
// Scan all the blocks in the function, looking for empty return blocks.
for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; ) {
@@ -79,7 +80,7 @@ static bool mergeEmptyReturnBlocks(Function &F) {
// Only look at return blocks.
ReturnInst *Ret = dyn_cast<ReturnInst>(BB.getTerminator());
- if (Ret == 0) continue;
+ if (!Ret) continue;
// Only look at the block if it is empty or the only other thing in it is a
// single PHI node that is the operand to the return.
@@ -98,7 +99,7 @@ static bool mergeEmptyReturnBlocks(Function &F) {
}
// If this is the first returning block, remember it and keep going.
- if (RetBlock == 0) {
+ if (!RetBlock) {
RetBlock = &BB;
continue;
}
@@ -119,7 +120,7 @@ static bool mergeEmptyReturnBlocks(Function &F) {
// If the canonical return block has no PHI node, create one now.
PHINode *RetBlockPHI = dyn_cast<PHINode>(RetBlock->begin());
- if (RetBlockPHI == 0) {
+ if (!RetBlockPHI) {
Value *InVal = cast<ReturnInst>(RetBlock->getTerminator())->getOperand(0);
pred_iterator PB = pred_begin(RetBlock), PE = pred_end(RetBlock);
RetBlockPHI = PHINode::Create(Ret->getOperand(0)->getType(),
@@ -145,7 +146,7 @@ static bool mergeEmptyReturnBlocks(Function &F) {
/// iterativelySimplifyCFG - Call SimplifyCFG on all the blocks in the function,
/// iterating until no more changes are made.
static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
- const DataLayout *TD) {
+ const DataLayout *DL) {
bool Changed = false;
bool LocalChange = true;
while (LocalChange) {
@@ -154,7 +155,7 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
// Loop over all of the basic blocks and remove them if they are unneeded...
//
for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) {
- if (SimplifyCFG(BBIt++, TTI, TD)) {
+ if (SimplifyCFG(BBIt++, TTI, DL)) {
LocalChange = true;
++NumSimpl;
}
@@ -168,11 +169,15 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
// simplify the CFG.
//
bool CFGSimplifyPass::runOnFunction(Function &F) {
+ if (skipOptnoneFunction(F))
+ return false;
+
const TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>();
- const DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
bool EverChanged = removeUnreachableBlocks(F);
EverChanged |= mergeEmptyReturnBlocks(F);
- EverChanged |= iterativelySimplifyCFG(F, TTI, TD);
+ EverChanged |= iterativelySimplifyCFG(F, TTI, DL);
// If neither pass changed anything, we're done.
if (!EverChanged) return false;
@@ -186,7 +191,7 @@ bool CFGSimplifyPass::runOnFunction(Function &F) {
return true;
do {
- EverChanged = iterativelySimplifyCFG(F, TTI, TD);
+ EverChanged = iterativelySimplifyCFG(F, TTI, DL);
EverChanged |= removeUnreachableBlocks(F);
} while (EverChanged);
diff --git a/contrib/llvm/lib/Transforms/Scalar/Sink.cpp b/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
index d4595bb373e6..7348c45c5d37 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
@@ -12,20 +12,21 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "sink"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Assembly/Writer.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+#define DEBUG_TYPE "sink"
+
STATISTIC(NumSunk, "Number of instructions sunk");
STATISTIC(NumSinkIter, "Number of sinking iterations");
@@ -34,6 +35,7 @@ namespace {
DominatorTree *DT;
LoopInfo *LI;
AliasAnalysis *AA;
+ const DataLayout *DL;
public:
static char ID; // Pass identification
@@ -41,15 +43,15 @@ namespace {
initializeSinkingPass(*PassRegistry::getPassRegistry());
}
- virtual bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
FunctionPass::getAnalysisUsage(AU);
AU.addRequired<AliasAnalysis>();
- AU.addRequired<DominatorTree>();
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfo>();
- AU.addPreserved<DominatorTree>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<LoopInfo>();
}
private:
@@ -63,7 +65,7 @@ namespace {
char Sinking::ID = 0;
INITIALIZE_PASS_BEGIN(Sinking, "sink", "Code sinking", false, false)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(Sinking, "sink", "Code sinking", false, false)
@@ -77,15 +79,14 @@ bool Sinking::AllUsesDominatedByBlock(Instruction *Inst,
// This may leave a referencing dbg_value in the original block, before
// the definition of the vreg. Dwarf generator handles this although the
// user might not get the right info at runtime.
- for (Value::use_iterator I = Inst->use_begin(),
- E = Inst->use_end(); I != E; ++I) {
+ for (Use &U : Inst->uses()) {
// Determine the block of the use.
- Instruction *UseInst = cast<Instruction>(*I);
+ Instruction *UseInst = cast<Instruction>(U.getUser());
BasicBlock *UseBlock = UseInst->getParent();
if (PHINode *PN = dyn_cast<PHINode>(UseInst)) {
// PHI nodes use the operand in the predecessor block, not the block with
// the PHI.
- unsigned Num = PHINode::getIncomingValueNumForOperand(I.getOperandNo());
+ unsigned Num = PHINode::getIncomingValueNumForOperand(U.getOperandNo());
UseBlock = PN->getIncomingBlock(Num);
}
// Check that it dominates.
@@ -96,9 +97,11 @@ bool Sinking::AllUsesDominatedByBlock(Instruction *Inst,
}
bool Sinking::runOnFunction(Function &F) {
- DT = &getAnalysis<DominatorTree>();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
LI = &getAnalysis<LoopInfo>();
AA = &getAnalysis<AliasAnalysis>();
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
bool MadeChange, EverMadeChange = false;
@@ -194,7 +197,7 @@ bool Sinking::IsAcceptableTarget(Instruction *Inst,
if (SuccToSinkTo->getUniquePredecessor() != Inst->getParent()) {
// We cannot sink a load across a critical edge - there may be stores in
// other code paths.
- if (!isSafeToSpeculativelyExecute(Inst))
+ if (!isSafeToSpeculativelyExecute(Inst, DL))
return false;
// We don't want to sink across a critical edge if we don't dominate the
@@ -205,7 +208,7 @@ bool Sinking::IsAcceptableTarget(Instruction *Inst,
// Don't sink instructions into a loop.
Loop *succ = LI->getLoopFor(SuccToSinkTo);
Loop *cur = LI->getLoopFor(Inst->getParent());
- if (succ != 0 && succ != cur)
+ if (succ != nullptr && succ != cur)
return false;
}
@@ -218,6 +221,13 @@ bool Sinking::IsAcceptableTarget(Instruction *Inst,
/// instruction out of its current block into a successor.
bool Sinking::SinkInstruction(Instruction *Inst,
SmallPtrSet<Instruction *, 8> &Stores) {
+
+ // Don't sink static alloca instructions. CodeGen assumes allocas outside the
+ // entry block are dynamically sized stack objects.
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst))
+ if (AI->isStaticAlloca())
+ return false;
+
// Check if it's safe to move the instruction.
if (!isSafeToMove(Inst, AA, Stores))
return false;
@@ -232,14 +242,14 @@ bool Sinking::SinkInstruction(Instruction *Inst,
// SuccToSinkTo - This is the successor to sink this instruction to, once we
// decide.
- BasicBlock *SuccToSinkTo = 0;
+ BasicBlock *SuccToSinkTo = nullptr;
// Instructions can only be sunk if all their uses are in blocks
// dominated by one of the successors.
// Look at all the postdominators and see if we can sink it in one.
DomTreeNode *DTN = DT->getNode(Inst->getParent());
for (DomTreeNode::iterator I = DTN->begin(), E = DTN->end();
- I != E && SuccToSinkTo == 0; ++I) {
+ I != E && SuccToSinkTo == nullptr; ++I) {
BasicBlock *Candidate = (*I)->getBlock();
if ((*I)->getIDom()->getBlock() == Inst->getParent() &&
IsAcceptableTarget(Inst, Candidate))
@@ -249,19 +259,19 @@ bool Sinking::SinkInstruction(Instruction *Inst,
// If no suitable postdominator was found, look at all the successors and
// decide which one we should sink to, if any.
for (succ_iterator I = succ_begin(Inst->getParent()),
- E = succ_end(Inst->getParent()); I != E && SuccToSinkTo == 0; ++I) {
+ E = succ_end(Inst->getParent()); I != E && !SuccToSinkTo; ++I) {
if (IsAcceptableTarget(Inst, *I))
SuccToSinkTo = *I;
}
// If we couldn't find a block to sink to, ignore this instruction.
- if (SuccToSinkTo == 0)
+ if (!SuccToSinkTo)
return false;
DEBUG(dbgs() << "Sink" << *Inst << " (";
- WriteAsOperand(dbgs(), Inst->getParent(), false);
+ Inst->getParent()->printAsOperand(dbgs(), false);
dbgs() << " -> ";
- WriteAsOperand(dbgs(), SuccToSinkTo, false);
+ SuccToSinkTo->printAsOperand(dbgs(), false);
dbgs() << ")\n");
// Move the instruction.
diff --git a/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index 5045ff8fdfda..b9673ed655e0 100644
--- a/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -7,7 +7,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "structurizecfg"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SCCIterator.h"
@@ -15,12 +14,14 @@
#include "llvm/Analysis/RegionIterator.h"
#include "llvm/Analysis/RegionPass.h"
#include "llvm/IR/Module.h"
-#include "llvm/Support/PatternMatch.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
using namespace llvm::PatternMatch;
+#define DEBUG_TYPE "structurizecfg"
+
namespace {
// Definition of the complex types used in this pass.
@@ -64,14 +65,14 @@ public:
/// \brief Start a new query
NearestCommonDominator(DominatorTree *DomTree) {
DT = DomTree;
- Result = 0;
+ Result = nullptr;
}
/// \brief Add BB to the resulting dominator
void addBlock(BasicBlock *BB, bool Remember = true) {
DomTreeNode *Node = DT->getNode(BB);
- if (Result == 0) {
+ if (!Result) {
unsigned Numbering = 0;
for (;Node;Node = Node->getIDom())
IndexMap[Node] = ++Numbering;
@@ -235,18 +236,18 @@ public:
}
using Pass::doInitialization;
- virtual bool doInitialization(Region *R, RGPassManager &RGM);
+ bool doInitialization(Region *R, RGPassManager &RGM) override;
- virtual bool runOnRegion(Region *R, RGPassManager &RGM);
+ bool runOnRegion(Region *R, RGPassManager &RGM) override;
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "Structurize control flow";
}
- void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequiredID(LowerSwitchID);
- AU.addRequired<DominatorTree>();
- AU.addPreserved<DominatorTree>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
RegionPass::getAnalysisUsage(AU);
}
};
@@ -258,8 +259,8 @@ char StructurizeCFG::ID = 0;
INITIALIZE_PASS_BEGIN(StructurizeCFG, "structurizecfg", "Structurize the CFG",
false, false)
INITIALIZE_PASS_DEPENDENCY(LowerSwitch)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
-INITIALIZE_PASS_DEPENDENCY(RegionInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
INITIALIZE_PASS_END(StructurizeCFG, "structurizecfg", "Structurize the CFG",
false, false)
@@ -277,10 +278,9 @@ bool StructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) {
/// \brief Build up the general order of nodes
void StructurizeCFG::orderNodes() {
- scc_iterator<Region *> I = scc_begin(ParentRegion),
- E = scc_end(ParentRegion);
- for (Order.clear(); I != E; ++I) {
- std::vector<RegionNode *> &Nodes = *I;
+ scc_iterator<Region *> I = scc_begin(ParentRegion);
+ for (Order.clear(); !I.isAtEnd(); ++I) {
+ const std::vector<RegionNode *> &Nodes = *I;
Order.append(Nodes.begin(), Nodes.end());
}
}
@@ -326,16 +326,10 @@ Value *StructurizeCFG::invert(Value *Condition) {
if (Instruction *Inst = dyn_cast<Instruction>(Condition)) {
// Third: Check all the users for an invert
BasicBlock *Parent = Inst->getParent();
- for (Value::use_iterator I = Condition->use_begin(),
- E = Condition->use_end(); I != E; ++I) {
-
- Instruction *User = dyn_cast<Instruction>(*I);
- if (!User || User->getParent() != Parent)
- continue;
-
- if (match(*I, m_Not(m_Specific(Condition))))
- return *I;
- }
+ for (User *U : Condition->users())
+ if (Instruction *I = dyn_cast<Instruction>(U))
+ if (I->getParent() == Parent && match(I, m_Not(m_Specific(Condition))))
+ return I;
// Last option: Create a new instruction
return BinaryOperator::CreateNot(Condition, "", Parent->getTerminator());
@@ -412,11 +406,11 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) {
} else {
// It's an exit from a sub region
- while(R->getParent() != ParentRegion)
+ while (R->getParent() != ParentRegion)
R = R->getParent();
// Edge from inside a subregion to its entry, ignore it
- if (R == N)
+ if (*R == *N)
continue;
BasicBlock *Entry = R->getEntry();
@@ -460,10 +454,7 @@ void StructurizeCFG::insertConditions(bool Loops) {
Value *Default = Loops ? BoolTrue : BoolFalse;
SSAUpdater PhiInserter;
- for (BranchVector::iterator I = Conds.begin(),
- E = Conds.end(); I != E; ++I) {
-
- BranchInst *Term = *I;
+ for (BranchInst *Term : Conds) {
assert(Term->isConditional());
BasicBlock *Parent = Term->getParent();
@@ -479,7 +470,7 @@ void StructurizeCFG::insertConditions(bool Loops) {
NearestCommonDominator Dominator(DT);
Dominator.addBlock(Parent, false);
- Value *ParentValue = 0;
+ Value *ParentValue = nullptr;
for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end();
PI != PE; ++PI) {
@@ -598,7 +589,7 @@ void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit,
if (Node->isSubRegion()) {
Region *SubRegion = Node->getNodeAs<Region>();
BasicBlock *OldExit = SubRegion->getExit();
- BasicBlock *Dominator = 0;
+ BasicBlock *Dominator = nullptr;
// Find all the edges from the sub region to the exit
for (pred_iterator I = pred_begin(OldExit), E = pred_end(OldExit);
@@ -685,7 +676,8 @@ BasicBlock *StructurizeCFG::needPostfix(BasicBlock *Flow,
/// \brief Set the previous node
void StructurizeCFG::setPrevNode(BasicBlock *BB) {
- PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB) : 0;
+ PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB)
+ : nullptr;
}
/// \brief Does BB dominate all the predicates of Node ?
@@ -706,7 +698,7 @@ bool StructurizeCFG::isPredictableTrue(RegionNode *Node) {
bool Dominated = false;
// Regionentry is always true
- if (PrevNode == 0)
+ if (!PrevNode)
return true;
for (BBPredicates::iterator I = Preds.begin(), E = Preds.end();
@@ -813,11 +805,11 @@ void StructurizeCFG::createFlow() {
Conditions.clear();
LoopConds.clear();
- PrevNode = 0;
+ PrevNode = nullptr;
Visited.clear();
while (!Order.empty()) {
- handleLoops(EntryDominatesExit, 0);
+ handleLoops(EntryDominatesExit, nullptr);
}
if (PrevNode)
@@ -830,25 +822,19 @@ void StructurizeCFG::createFlow() {
/// no longer dominate all their uses. Not sure if this is really nessasary
void StructurizeCFG::rebuildSSA() {
SSAUpdater Updater;
- for (Region::block_iterator I = ParentRegion->block_begin(),
- E = ParentRegion->block_end();
- I != E; ++I) {
-
- BasicBlock *BB = *I;
+ for (const auto &BB : ParentRegion->blocks())
for (BasicBlock::iterator II = BB->begin(), IE = BB->end();
II != IE; ++II) {
bool Initialized = false;
- for (Use *I = &II->use_begin().getUse(), *Next; I; I = Next) {
-
- Next = I->getNext();
-
- Instruction *User = cast<Instruction>(I->getUser());
+ for (auto I = II->use_begin(), E = II->use_end(); I != E;) {
+ Use &U = *I++;
+ Instruction *User = cast<Instruction>(U.getUser());
if (User->getParent() == BB) {
continue;
} else if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
- if (UserPN->getIncomingBlock(*I) == BB)
+ if (UserPN->getIncomingBlock(U) == BB)
continue;
}
@@ -862,10 +848,9 @@ void StructurizeCFG::rebuildSSA() {
Updater.AddAvailableValue(BB, II);
Initialized = true;
}
- Updater.RewriteUseAfterInsertions(*I);
+ Updater.RewriteUseAfterInsertions(U);
}
}
- }
}
/// \brief Run the transformation for each region found
@@ -876,7 +861,7 @@ bool StructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) {
Func = R->getEntry()->getParent();
ParentRegion = R;
- DT = &getAnalysis<DominatorTree>();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
orderNodes();
collectInfos();
diff --git a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 9fb8ddc3d2c1..b7580255150c 100644
--- a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -50,32 +50,35 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "tailcallelim"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
+#define DEBUG_TYPE "tailcallelim"
+
STATISTIC(NumEliminated, "Number of tail calls removed");
STATISTIC(NumRetDuped, "Number of return duplicated");
STATISTIC(NumAccumAdded, "Number of accumulators introduced");
@@ -89,11 +92,14 @@ namespace {
initializeTailCallElimPass(*PassRegistry::getPassRegistry());
}
- virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
- virtual bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
private:
+ bool runTRE(Function &F);
+ bool markTails(Function &F, bool &AllCallsAreTailCalls);
+
CallInst *FindTRECandidate(Instruction *I,
bool CannotTailCallElimCallsMarkedTail);
bool EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
@@ -131,52 +137,253 @@ void TailCallElim::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetTransformInfo>();
}
-/// CanTRE - Scan the specified basic block for alloca instructions.
-/// If it contains any that are variable-sized or not in the entry block,
-/// returns false.
-static bool CanTRE(AllocaInst *AI) {
- // Because of PR962, we don't TRE allocas outside the entry block.
-
- // If this alloca is in the body of the function, or if it is a variable
- // sized allocation, we cannot tail call eliminate calls marked 'tail'
- // with this mechanism.
- BasicBlock *BB = AI->getParent();
- return BB == &BB->getParent()->getEntryBlock() &&
- isa<ConstantInt>(AI->getArraySize());
+/// \brief Scan the specified function for alloca instructions.
+/// If it contains any dynamic allocas, returns false.
+static bool CanTRE(Function &F) {
+ // Because of PR962, we don't TRE dynamic allocas.
+ for (auto &BB : F) {
+ for (auto &I : BB) {
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
+ if (!AI->isStaticAlloca())
+ return false;
+ }
+ }
+ }
+
+ return true;
}
-namespace {
-struct AllocaCaptureTracker : public CaptureTracker {
- AllocaCaptureTracker() : Captured(false) {}
+bool TailCallElim::runOnFunction(Function &F) {
+ if (skipOptnoneFunction(F))
+ return false;
- void tooManyUses() LLVM_OVERRIDE { Captured = true; }
+ bool AllCallsAreTailCalls = false;
+ bool Modified = markTails(F, AllCallsAreTailCalls);
+ if (AllCallsAreTailCalls)
+ Modified |= runTRE(F);
+ return Modified;
+}
- bool shouldExplore(Use *U) LLVM_OVERRIDE {
- Value *V = U->getUser();
- if (isa<CallInst>(V) || isa<InvokeInst>(V))
- UsesAlloca.insert(V);
- return true;
+namespace {
+struct AllocaDerivedValueTracker {
+ // Start at a root value and walk its use-def chain to mark calls that use the
+ // value or a derived value in AllocaUsers, and places where it may escape in
+ // EscapePoints.
+ void walk(Value *Root) {
+ SmallVector<Use *, 32> Worklist;
+ SmallPtrSet<Use *, 32> Visited;
+
+ auto AddUsesToWorklist = [&](Value *V) {
+ for (auto &U : V->uses()) {
+ if (!Visited.insert(&U))
+ continue;
+ Worklist.push_back(&U);
+ }
+ };
+
+ AddUsesToWorklist(Root);
+
+ while (!Worklist.empty()) {
+ Use *U = Worklist.pop_back_val();
+ Instruction *I = cast<Instruction>(U->getUser());
+
+ switch (I->getOpcode()) {
+ case Instruction::Call:
+ case Instruction::Invoke: {
+ CallSite CS(I);
+ bool IsNocapture = !CS.isCallee(U) &&
+ CS.doesNotCapture(CS.getArgumentNo(U));
+ callUsesLocalStack(CS, IsNocapture);
+ if (IsNocapture) {
+ // If the alloca-derived argument is passed in as nocapture, then it
+ // can't propagate to the call's return. That would be capturing.
+ continue;
+ }
+ break;
+ }
+ case Instruction::Load: {
+ // The result of a load is not alloca-derived (unless an alloca has
+ // otherwise escaped, but this is a local analysis).
+ continue;
+ }
+ case Instruction::Store: {
+ if (U->getOperandNo() == 0)
+ EscapePoints.insert(I);
+ continue; // Stores have no users to analyze.
+ }
+ case Instruction::BitCast:
+ case Instruction::GetElementPtr:
+ case Instruction::PHI:
+ case Instruction::Select:
+ case Instruction::AddrSpaceCast:
+ break;
+ default:
+ EscapePoints.insert(I);
+ break;
+ }
+
+ AddUsesToWorklist(I);
+ }
}
- bool captured(Use *U) LLVM_OVERRIDE {
- if (isa<ReturnInst>(U->getUser()))
- return false;
- Captured = true;
- return true;
+ void callUsesLocalStack(CallSite CS, bool IsNocapture) {
+ // Add it to the list of alloca users.
+ AllocaUsers.insert(CS.getInstruction());
+
+ // If it's nocapture then it can't capture this alloca.
+ if (IsNocapture)
+ return;
+
+ // If it can write to memory, it can leak the alloca value.
+ if (!CS.onlyReadsMemory())
+ EscapePoints.insert(CS.getInstruction());
}
- bool Captured;
- SmallPtrSet<const Value *, 16> UsesAlloca;
+ SmallPtrSet<Instruction *, 32> AllocaUsers;
+ SmallPtrSet<Instruction *, 32> EscapePoints;
};
-} // end anonymous namespace
+}
-bool TailCallElim::runOnFunction(Function &F) {
+bool TailCallElim::markTails(Function &F, bool &AllCallsAreTailCalls) {
+ if (F.callsFunctionThatReturnsTwice())
+ return false;
+ AllCallsAreTailCalls = true;
+
+ // The local stack holds all alloca instructions and all byval arguments.
+ AllocaDerivedValueTracker Tracker;
+ for (Argument &Arg : F.args()) {
+ if (Arg.hasByValAttr())
+ Tracker.walk(&Arg);
+ }
+ for (auto &BB : F) {
+ for (auto &I : BB)
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(&I))
+ Tracker.walk(AI);
+ }
+
+ bool Modified = false;
+
+ // Track whether a block is reachable after an alloca has escaped. Blocks that
+ // contain the escaping instruction will be marked as being visited without an
+ // escaped alloca, since that is how the block began.
+ enum VisitType {
+ UNVISITED,
+ UNESCAPED,
+ ESCAPED
+ };
+ DenseMap<BasicBlock *, VisitType> Visited;
+
+ // We propagate the fact that an alloca has escaped from block to successor.
+ // Visit the blocks that are propagating the escapedness first. To do this, we
+ // maintain two worklists.
+ SmallVector<BasicBlock *, 32> WorklistUnescaped, WorklistEscaped;
+
+ // We may enter a block and visit it thinking that no alloca has escaped yet,
+ // then see an escape point and go back around a loop edge and come back to
+ // the same block twice. Because of this, we defer setting tail on calls when
+ // we first encounter them in a block. Every entry in this list does not
+ // statically use an alloca via use-def chain analysis, but may find an alloca
+ // through other means if the block turns out to be reachable after an escape
+ // point.
+ SmallVector<CallInst *, 32> DeferredTails;
+
+ BasicBlock *BB = &F.getEntryBlock();
+ VisitType Escaped = UNESCAPED;
+ do {
+ for (auto &I : *BB) {
+ if (Tracker.EscapePoints.count(&I))
+ Escaped = ESCAPED;
+
+ CallInst *CI = dyn_cast<CallInst>(&I);
+ if (!CI || CI->isTailCall())
+ continue;
+
+ if (CI->doesNotAccessMemory()) {
+ // A call to a readnone function whose arguments are all things computed
+ // outside this function can be marked tail. Even if you stored the
+ // alloca address into a global, a readnone function can't load the
+ // global anyhow.
+ //
+ // Note that this runs whether we know an alloca has escaped or not. If
+ // it has, then we can't trust Tracker.AllocaUsers to be accurate.
+ bool SafeToTail = true;
+ for (auto &Arg : CI->arg_operands()) {
+ if (isa<Constant>(Arg.getUser()))
+ continue;
+ if (Argument *A = dyn_cast<Argument>(Arg.getUser()))
+ if (!A->hasByValAttr())
+ continue;
+ SafeToTail = false;
+ break;
+ }
+ if (SafeToTail) {
+ emitOptimizationRemark(
+ F.getContext(), "tailcallelim", F, CI->getDebugLoc(),
+ "marked this readnone call a tail call candidate");
+ CI->setTailCall();
+ Modified = true;
+ continue;
+ }
+ }
+
+ if (Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI)) {
+ DeferredTails.push_back(CI);
+ } else {
+ AllCallsAreTailCalls = false;
+ }
+ }
+
+ for (auto *SuccBB : make_range(succ_begin(BB), succ_end(BB))) {
+ auto &State = Visited[SuccBB];
+ if (State < Escaped) {
+ State = Escaped;
+ if (State == ESCAPED)
+ WorklistEscaped.push_back(SuccBB);
+ else
+ WorklistUnescaped.push_back(SuccBB);
+ }
+ }
+
+ if (!WorklistEscaped.empty()) {
+ BB = WorklistEscaped.pop_back_val();
+ Escaped = ESCAPED;
+ } else {
+ BB = nullptr;
+ while (!WorklistUnescaped.empty()) {
+ auto *NextBB = WorklistUnescaped.pop_back_val();
+ if (Visited[NextBB] == UNESCAPED) {
+ BB = NextBB;
+ Escaped = UNESCAPED;
+ break;
+ }
+ }
+ }
+ } while (BB);
+
+ for (CallInst *CI : DeferredTails) {
+ if (Visited[CI->getParent()] != ESCAPED) {
+ // If the escape point was part way through the block, calls after the
+ // escape point wouldn't have been put into DeferredTails.
+ emitOptimizationRemark(F.getContext(), "tailcallelim", F,
+ CI->getDebugLoc(),
+ "marked this call a tail call candidate");
+ CI->setTailCall();
+ Modified = true;
+ } else {
+ AllCallsAreTailCalls = false;
+ }
+ }
+
+ return Modified;
+}
+
+bool TailCallElim::runTRE(Function &F) {
// If this function is a varargs function, we won't be able to PHI the args
// right, so don't even try to convert it...
if (F.getFunctionType()->isVarArg()) return false;
TTI = &getAnalysis<TargetTransformInfo>();
- BasicBlock *OldEntry = 0;
+ BasicBlock *OldEntry = nullptr;
bool TailCallsAreMarkedTail = false;
SmallVector<PHINode*, 8> ArgumentPHIs;
bool MadeChange = false;
@@ -185,39 +392,23 @@ bool TailCallElim::runOnFunction(Function &F) {
// marked with the 'tail' attribute, because doing so would cause the stack
// size to increase (real TRE would deallocate variable sized allocas, TRE
// doesn't).
- bool CanTRETailMarkedCall = true;
-
- // Find calls that can be marked tail.
- AllocaCaptureTracker ACT;
- for (Function::iterator BB = F.begin(), EE = F.end(); BB != EE; ++BB) {
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
- if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
- CanTRETailMarkedCall &= CanTRE(AI);
- PointerMayBeCaptured(AI, &ACT);
- // If any allocas are captured, exit.
- if (ACT.Captured)
- return false;
- }
- }
- }
+ bool CanTRETailMarkedCall = CanTRE(F);
- // Second pass, change any tail recursive calls to loops.
+ // Change any tail recursive calls to loops.
//
// FIXME: The code generator produces really bad code when an 'escaping
// alloca' is changed from being a static alloca to being a dynamic alloca.
// Until this is resolved, disable this transformation if that would ever
// happen. This bug is PR962.
- if (ACT.UsesAlloca.empty()) {
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
- bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
- ArgumentPHIs, !CanTRETailMarkedCall);
- if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
- Change = FoldReturnAndProcessPred(BB, Ret, OldEntry,
- TailCallsAreMarkedTail, ArgumentPHIs,
- !CanTRETailMarkedCall);
- MadeChange |= Change;
- }
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
+ ArgumentPHIs, !CanTRETailMarkedCall);
+ if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
+ Change = FoldReturnAndProcessPred(BB, Ret, OldEntry,
+ TailCallsAreMarkedTail, ArgumentPHIs,
+ !CanTRETailMarkedCall);
+ MadeChange |= Change;
}
}
@@ -226,34 +417,13 @@ bool TailCallElim::runOnFunction(Function &F) {
// with themselves. Check to see if we did and clean up our mess if so. This
// occurs when a function passes an argument straight through to its tail
// call.
- if (!ArgumentPHIs.empty()) {
- for (unsigned i = 0, e = ArgumentPHIs.size(); i != e; ++i) {
- PHINode *PN = ArgumentPHIs[i];
-
- // If the PHI Node is a dynamic constant, replace it with the value it is.
- if (Value *PNV = SimplifyInstruction(PN)) {
- PN->replaceAllUsesWith(PNV);
- PN->eraseFromParent();
- }
- }
- }
+ for (unsigned i = 0, e = ArgumentPHIs.size(); i != e; ++i) {
+ PHINode *PN = ArgumentPHIs[i];
- // At this point, we know that the function does not have any captured
- // allocas. If additionally the function does not call setjmp, mark all calls
- // in the function that do not access stack memory with the tail keyword. This
- // implies ensuring that there does not exist any path from a call that takes
- // in an alloca but does not capture it and the call which we wish to mark
- // with "tail".
- if (!F.callsFunctionThatReturnsTwice()) {
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
- if (CallInst *CI = dyn_cast<CallInst>(I)) {
- if (!ACT.UsesAlloca.count(CI)) {
- CI->setTailCall();
- MadeChange = true;
- }
- }
- }
+ // If the PHI Node is a dynamic constant, replace it with the value it is.
+ if (Value *PNV = SimplifyInstruction(PN)) {
+ PN->replaceAllUsesWith(PNV);
+ PN->eraseFromParent();
}
}
@@ -340,11 +510,11 @@ static bool isDynamicConstant(Value *V, CallInst *CI, ReturnInst *RI) {
//
static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) {
Function *F = CI->getParent()->getParent();
- Value *ReturnedValue = 0;
+ Value *ReturnedValue = nullptr;
for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI) {
ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator());
- if (RI == 0 || RI == IgnoreRI) continue;
+ if (RI == nullptr || RI == IgnoreRI) continue;
// We can only perform this transformation if the value returned is
// evaluatable at the start of the initial invocation of the function,
@@ -352,10 +522,10 @@ static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) {
//
Value *RetOp = RI->getOperand(0);
if (!isDynamicConstant(RetOp, CI, RI))
- return 0;
+ return nullptr;
if (ReturnedValue && RetOp != ReturnedValue)
- return 0; // Cannot transform if differing values are returned.
+ return nullptr; // Cannot transform if differing values are returned.
ReturnedValue = RetOp;
}
return ReturnedValue;
@@ -367,23 +537,23 @@ static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) {
///
Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I,
CallInst *CI) {
- if (!I->isAssociative() || !I->isCommutative()) return 0;
+ if (!I->isAssociative() || !I->isCommutative()) return nullptr;
assert(I->getNumOperands() == 2 &&
"Associative/commutative operations should have 2 args!");
// Exactly one operand should be the result of the call instruction.
if ((I->getOperand(0) == CI && I->getOperand(1) == CI) ||
(I->getOperand(0) != CI && I->getOperand(1) != CI))
- return 0;
+ return nullptr;
// The only user of this instruction we allow is a single return instruction.
- if (!I->hasOneUse() || !isa<ReturnInst>(I->use_back()))
- return 0;
+ if (!I->hasOneUse() || !isa<ReturnInst>(I->user_back()))
+ return nullptr;
// Ok, now we have to check all of the other return instructions in this
// function. If they return non-constants or differing values, then we cannot
// transform the function safely.
- return getCommonReturnValue(cast<ReturnInst>(I->use_back()), CI);
+ return getCommonReturnValue(cast<ReturnInst>(I->user_back()), CI);
}
static Instruction *FirstNonDbg(BasicBlock::iterator I) {
@@ -399,11 +569,11 @@ TailCallElim::FindTRECandidate(Instruction *TI,
Function *F = BB->getParent();
if (&BB->front() == TI) // Make sure there is something before the terminator.
- return 0;
+ return nullptr;
// Scan backwards from the return, checking to see if there is a tail call in
// this block. If so, set CI to it.
- CallInst *CI = 0;
+ CallInst *CI = nullptr;
BasicBlock::iterator BBI = TI;
while (true) {
CI = dyn_cast<CallInst>(BBI);
@@ -411,14 +581,14 @@ TailCallElim::FindTRECandidate(Instruction *TI,
break;
if (BBI == BB->begin())
- return 0; // Didn't find a potential tail call.
+ return nullptr; // Didn't find a potential tail call.
--BBI;
}
// If this call is marked as a tail call, and if there are dynamic allocas in
// the function, we cannot perform this optimization.
if (CI->isTailCall() && CannotTailCallElimCallsMarkedTail)
- return 0;
+ return nullptr;
// As a special case, detect code like this:
// double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call
@@ -426,7 +596,7 @@ TailCallElim::FindTRECandidate(Instruction *TI,
// lower the call to fabs into inline code.
if (BB == &F->getEntryBlock() &&
FirstNonDbg(BB->front()) == CI &&
- FirstNonDbg(llvm::next(BB->begin())) == TI &&
+ FirstNonDbg(std::next(BB->begin())) == TI &&
CI->getCalledFunction() &&
!TTI->isLoweredToCall(CI->getCalledFunction())) {
// A single-block function with just a call and a return. Check that
@@ -438,7 +608,7 @@ TailCallElim::FindTRECandidate(Instruction *TI,
for (; I != E && FI != FE; ++I, ++FI)
if (*I != &*FI) break;
if (I == E && FI == FE)
- return 0;
+ return nullptr;
}
return CI;
@@ -459,8 +629,8 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
// which is different to the constant returned by other return instructions
// (which is recorded in AccumulatorRecursionEliminationInitVal). This is a
// special case of accumulator recursion, the operation being "return C".
- Value *AccumulatorRecursionEliminationInitVal = 0;
- Instruction *AccumulatorRecursionInstr = 0;
+ Value *AccumulatorRecursionEliminationInitVal = nullptr;
+ Instruction *AccumulatorRecursionInstr = nullptr;
// Ok, we found a potential tail call. We can currently only transform the
// tail call if all of the instructions between the call and the return are
@@ -490,8 +660,8 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
// accumulator recursion variable eliminated.
if (Ret->getNumOperands() == 1 && Ret->getReturnValue() != CI &&
!isa<UndefValue>(Ret->getReturnValue()) &&
- AccumulatorRecursionEliminationInitVal == 0 &&
- !getCommonReturnValue(0, CI)) {
+ AccumulatorRecursionEliminationInitVal == nullptr &&
+ !getCommonReturnValue(nullptr, CI)) {
// One case remains that we are able to handle: the current return
// instruction returns a constant, and all other return instructions
// return a different constant.
@@ -507,9 +677,12 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
BasicBlock *BB = Ret->getParent();
Function *F = BB->getParent();
+ emitOptimizationRemark(F->getContext(), "tailcallelim", *F, CI->getDebugLoc(),
+ "transforming tail recursion to loop");
+
// OK! We can transform this tail call. If this is the first one found,
// create the new entry block, allowing us to branch back to the old entry.
- if (OldEntry == 0) {
+ if (!OldEntry) {
OldEntry = &F->getEntryBlock();
BasicBlock *NewEntry = BasicBlock::Create(F->getContext(), "", F, OldEntry);
NewEntry->takeName(OldEntry);