diff options
Diffstat (limited to 'contrib/llvm/lib/Transforms/Utils')
52 files changed, 34521 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp b/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp new file mode 100644 index 000000000000..df9d5da9e26e --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp @@ -0,0 +1,150 @@ +//===-- ASanStackFrameLayout.cpp - helper for AddressSanitizer ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Definition of ComputeASanStackFrameLayout (see ASanStackFrameLayout.h). +// +//===----------------------------------------------------------------------===// +#include "llvm/Transforms/Utils/ASanStackFrameLayout.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/ScopedPrinter.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> + +namespace llvm { + +// We sort the stack variables by alignment (largest first) to minimize +// unnecessary large gaps due to alignment. +// It is tempting to also sort variables by size so that larger variables +// have larger redzones at both ends. But reordering will make report analysis +// harder, especially when temporary unnamed variables are present. +// So, until we can provide more information (type, line number, etc) +// for the stack variables we avoid reordering them too much. +static inline bool CompareVars(const ASanStackVariableDescription &a, + const ASanStackVariableDescription &b) { + return a.Alignment > b.Alignment; +} + +// We also force minimal alignment for all vars to kMinAlignment so that vars +// with e.g. alignment 1 and alignment 16 do not get reordered by CompareVars. +static const size_t kMinAlignment = 16; + +// The larger the variable Size the larger is the redzone. +// The resulting frame size is a multiple of Alignment. +static size_t VarAndRedzoneSize(size_t Size, size_t Alignment) { + size_t Res = 0; + if (Size <= 4) Res = 16; + else if (Size <= 16) Res = 32; + else if (Size <= 128) Res = Size + 32; + else if (Size <= 512) Res = Size + 64; + else if (Size <= 4096) Res = Size + 128; + else Res = Size + 256; + return alignTo(Res, Alignment); +} + +ASanStackFrameLayout +ComputeASanStackFrameLayout(SmallVectorImpl<ASanStackVariableDescription> &Vars, + size_t Granularity, size_t MinHeaderSize) { + assert(Granularity >= 8 && Granularity <= 64 && + (Granularity & (Granularity - 1)) == 0); + assert(MinHeaderSize >= 16 && (MinHeaderSize & (MinHeaderSize - 1)) == 0 && + MinHeaderSize >= Granularity); + const size_t NumVars = Vars.size(); + assert(NumVars > 0); + for (size_t i = 0; i < NumVars; i++) + Vars[i].Alignment = std::max(Vars[i].Alignment, kMinAlignment); + + std::stable_sort(Vars.begin(), Vars.end(), CompareVars); + + ASanStackFrameLayout Layout; + Layout.Granularity = Granularity; + Layout.FrameAlignment = std::max(Granularity, Vars[0].Alignment); + size_t Offset = std::max(std::max(MinHeaderSize, Granularity), + Vars[0].Alignment); + assert((Offset % Granularity) == 0); + for (size_t i = 0; i < NumVars; i++) { + bool IsLast = i == NumVars - 1; + size_t Alignment = std::max(Granularity, Vars[i].Alignment); + (void)Alignment; // Used only in asserts. + size_t Size = Vars[i].Size; + assert((Alignment & (Alignment - 1)) == 0); + assert(Layout.FrameAlignment >= Alignment); + assert((Offset % Alignment) == 0); + assert(Size > 0); + size_t NextAlignment = IsLast ? Granularity + : std::max(Granularity, Vars[i + 1].Alignment); + size_t SizeWithRedzone = VarAndRedzoneSize(Size, NextAlignment); + Vars[i].Offset = Offset; + Offset += SizeWithRedzone; + } + if (Offset % MinHeaderSize) { + Offset += MinHeaderSize - (Offset % MinHeaderSize); + } + Layout.FrameSize = Offset; + assert((Layout.FrameSize % MinHeaderSize) == 0); + return Layout; +} + +SmallString<64> ComputeASanStackFrameDescription( + const SmallVectorImpl<ASanStackVariableDescription> &Vars) { + SmallString<2048> StackDescriptionStorage; + raw_svector_ostream StackDescription(StackDescriptionStorage); + StackDescription << Vars.size(); + + for (const auto &Var : Vars) { + std::string Name = Var.Name; + if (Var.Line) { + Name += ":"; + Name += to_string(Var.Line); + } + StackDescription << " " << Var.Offset << " " << Var.Size << " " + << Name.size() << " " << Name; + } + return StackDescription.str(); +} + +SmallVector<uint8_t, 64> +GetShadowBytes(const SmallVectorImpl<ASanStackVariableDescription> &Vars, + const ASanStackFrameLayout &Layout) { + assert(Vars.size() > 0); + SmallVector<uint8_t, 64> SB; + SB.clear(); + const size_t Granularity = Layout.Granularity; + SB.resize(Vars[0].Offset / Granularity, kAsanStackLeftRedzoneMagic); + for (const auto &Var : Vars) { + SB.resize(Var.Offset / Granularity, kAsanStackMidRedzoneMagic); + + SB.resize(SB.size() + Var.Size / Granularity, 0); + if (Var.Size % Granularity) + SB.push_back(Var.Size % Granularity); + } + SB.resize(Layout.FrameSize / Granularity, kAsanStackRightRedzoneMagic); + return SB; +} + +SmallVector<uint8_t, 64> GetShadowBytesAfterScope( + const SmallVectorImpl<ASanStackVariableDescription> &Vars, + const ASanStackFrameLayout &Layout) { + SmallVector<uint8_t, 64> SB = GetShadowBytes(Vars, Layout); + const size_t Granularity = Layout.Granularity; + + for (const auto &Var : Vars) { + assert(Var.LifetimeSize <= Var.Size); + const size_t LifetimeShadowSize = + (Var.LifetimeSize + Granularity - 1) / Granularity; + const size_t Offset = Var.Offset / Granularity; + std::fill(SB.begin() + Offset, SB.begin() + Offset + LifetimeShadowSize, + kAsanStackUseAfterScopeMagic); + } + + return SB; +} + +} // llvm namespace diff --git a/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp b/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp new file mode 100644 index 000000000000..2e95926c0b3f --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp @@ -0,0 +1,238 @@ +//===- AddDiscriminators.cpp - Insert DWARF path discriminators -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file adds DWARF discriminators to the IR. Path discriminators are +// used to decide what CFG path was taken inside sub-graphs whose instructions +// share the same line and column number information. +// +// The main user of this is the sample profiler. Instruction samples are +// mapped to line number information. Since a single line may be spread +// out over several basic blocks, discriminators add more precise location +// for the samples. +// +// For example, +// +// 1 #define ASSERT(P) +// 2 if (!(P)) +// 3 abort() +// ... +// 100 while (true) { +// 101 ASSERT (sum < 0); +// 102 ... +// 130 } +// +// when converted to IR, this snippet looks something like: +// +// while.body: ; preds = %entry, %if.end +// %0 = load i32* %sum, align 4, !dbg !15 +// %cmp = icmp slt i32 %0, 0, !dbg !15 +// br i1 %cmp, label %if.end, label %if.then, !dbg !15 +// +// if.then: ; preds = %while.body +// call void @abort(), !dbg !15 +// br label %if.end, !dbg !15 +// +// Notice that all the instructions in blocks 'while.body' and 'if.then' +// have exactly the same debug information. When this program is sampled +// at runtime, the profiler will assume that all these instructions are +// equally frequent. This, in turn, will consider the edge while.body->if.then +// to be frequently taken (which is incorrect). +// +// By adding a discriminator value to the instructions in block 'if.then', +// we can distinguish instructions at line 101 with discriminator 0 from +// the instructions at line 101 with discriminator 1. +// +// For more details about DWARF discriminators, please visit +// http://wiki.dwarfstd.org/index.php?title=Path_Discriminators +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/AddDiscriminators.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" + +using namespace llvm; + +#define DEBUG_TYPE "add-discriminators" + +namespace { +// The legacy pass of AddDiscriminators. +struct AddDiscriminatorsLegacyPass : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + AddDiscriminatorsLegacyPass() : FunctionPass(ID) { + initializeAddDiscriminatorsLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; +}; + +} // end anonymous namespace + +char AddDiscriminatorsLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(AddDiscriminatorsLegacyPass, "add-discriminators", + "Add DWARF path discriminators", false, false) +INITIALIZE_PASS_END(AddDiscriminatorsLegacyPass, "add-discriminators", + "Add DWARF path discriminators", false, false) + +// Command line option to disable discriminator generation even in the +// presence of debug information. This is only needed when debugging +// debug info generation issues. +static cl::opt<bool> NoDiscriminators( + "no-discriminators", cl::init(false), + cl::desc("Disable generation of discriminator information.")); + +// Create the legacy AddDiscriminatorsPass. +FunctionPass *llvm::createAddDiscriminatorsPass() { + return new AddDiscriminatorsLegacyPass(); +} + +/// \brief Assign DWARF discriminators. +/// +/// To assign discriminators, we examine the boundaries of every +/// basic block and its successors. Suppose there is a basic block B1 +/// with successor B2. The last instruction I1 in B1 and the first +/// instruction I2 in B2 are located at the same file and line number. +/// This situation is illustrated in the following code snippet: +/// +/// if (i < 10) x = i; +/// +/// entry: +/// br i1 %cmp, label %if.then, label %if.end, !dbg !10 +/// if.then: +/// %1 = load i32* %i.addr, align 4, !dbg !10 +/// store i32 %1, i32* %x, align 4, !dbg !10 +/// br label %if.end, !dbg !10 +/// if.end: +/// ret void, !dbg !12 +/// +/// Notice how the branch instruction in block 'entry' and all the +/// instructions in block 'if.then' have the exact same debug location +/// information (!dbg !10). +/// +/// To distinguish instructions in block 'entry' from instructions in +/// block 'if.then', we generate a new lexical block for all the +/// instruction in block 'if.then' that share the same file and line +/// location with the last instruction of block 'entry'. +/// +/// This new lexical block will have the same location information as +/// the previous one, but with a new DWARF discriminator value. +/// +/// One of the main uses of this discriminator value is in runtime +/// sample profilers. It allows the profiler to distinguish instructions +/// at location !dbg !10 that execute on different basic blocks. This is +/// important because while the predicate 'if (x < 10)' may have been +/// executed millions of times, the assignment 'x = i' may have only +/// executed a handful of times (meaning that the entry->if.then edge is +/// seldom taken). +/// +/// If we did not have discriminator information, the profiler would +/// assign the same weight to both blocks 'entry' and 'if.then', which +/// in turn will make it conclude that the entry->if.then edge is very +/// hot. +/// +/// To decide where to create new discriminator values, this function +/// traverses the CFG and examines instruction at basic block boundaries. +/// If the last instruction I1 of a block B1 is at the same file and line +/// location as instruction I2 of successor B2, then it creates a new +/// lexical block for I2 and all the instruction in B2 that share the same +/// file and line location as I2. This new lexical block will have a +/// different discriminator number than I1. +static bool addDiscriminators(Function &F) { + // If the function has debug information, but the user has disabled + // discriminators, do nothing. + // Simlarly, if the function has no debug info, do nothing. + if (NoDiscriminators || !F.getSubprogram()) + return false; + + bool Changed = false; + + typedef std::pair<StringRef, unsigned> Location; + typedef DenseSet<const BasicBlock *> BBSet; + typedef DenseMap<Location, BBSet> LocationBBMap; + typedef DenseMap<Location, unsigned> LocationDiscriminatorMap; + typedef DenseSet<Location> LocationSet; + + LocationBBMap LBM; + LocationDiscriminatorMap LDM; + + // Traverse all instructions in the function. If the source line location + // of the instruction appears in other basic block, assign a new + // discriminator for this instruction. + for (BasicBlock &B : F) { + for (auto &I : B.getInstList()) { + if (isa<IntrinsicInst>(&I)) + continue; + const DILocation *DIL = I.getDebugLoc(); + if (!DIL) + continue; + Location L = std::make_pair(DIL->getFilename(), DIL->getLine()); + auto &BBMap = LBM[L]; + auto R = BBMap.insert(&B); + if (BBMap.size() == 1) + continue; + // If we could insert more than one block with the same line+file, a + // discriminator is needed to distinguish both instructions. + // Only the lowest 7 bits are used to represent a discriminator to fit + // it in 1 byte ULEB128 representation. + unsigned Discriminator = (R.second ? ++LDM[L] : LDM[L]) & 0x7f; + I.setDebugLoc(DIL->cloneWithDiscriminator(Discriminator)); + DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":" + << DIL->getColumn() << ":" << Discriminator << " " << I + << "\n"); + Changed = true; + } + } + + // Traverse all instructions and assign new discriminators to call + // instructions with the same lineno that are in the same basic block. + // Sample base profile needs to distinguish different function calls within + // a same source line for correct profile annotation. + for (BasicBlock &B : F) { + LocationSet CallLocations; + for (auto &I : B.getInstList()) { + CallInst *Current = dyn_cast<CallInst>(&I); + if (!Current || isa<IntrinsicInst>(&I)) + continue; + + DILocation *CurrentDIL = Current->getDebugLoc(); + if (!CurrentDIL) + continue; + Location L = + std::make_pair(CurrentDIL->getFilename(), CurrentDIL->getLine()); + if (!CallLocations.insert(L).second) { + Current->setDebugLoc( + CurrentDIL->cloneWithDiscriminator((++LDM[L]) & 0x7f)); + Changed = true; + } + } + } + return Changed; +} + +bool AddDiscriminatorsLegacyPass::runOnFunction(Function &F) { + return addDiscriminators(F); +} +PreservedAnalyses AddDiscriminatorsPass::run(Function &F, + FunctionAnalysisManager &AM) { + if (!addDiscriminators(F)) + return PreservedAnalyses::all(); + + // FIXME: should be all() + return PreservedAnalyses::none(); +} diff --git a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp new file mode 100644 index 000000000000..b90349d3cdad --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -0,0 +1,768 @@ +//===-- BasicBlockUtils.cpp - BasicBlock Utilities -------------------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This family of functions perform manipulations on basic blocks, and +// instructions contained within basic blocks. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" +#include <algorithm> +using namespace llvm; + +void llvm::DeleteDeadBlock(BasicBlock *BB) { + assert((pred_begin(BB) == pred_end(BB) || + // Can delete self loop. + BB->getSinglePredecessor() == BB) && "Block is not dead!"); + TerminatorInst *BBTerm = BB->getTerminator(); + + // Loop through all of our successors and make sure they know that one + // of their predecessors is going away. + for (BasicBlock *Succ : BBTerm->successors()) + Succ->removePredecessor(BB); + + // Zap all the instructions in the block. + while (!BB->empty()) { + Instruction &I = BB->back(); + // If this instruction is used, replace uses with an arbitrary value. + // Because control flow can't get here, we don't care what we replace the + // value with. Note that since this block is unreachable, and all values + // contained within it must dominate their uses, that all uses will + // eventually be removed (they are themselves dead). + if (!I.use_empty()) + I.replaceAllUsesWith(UndefValue::get(I.getType())); + BB->getInstList().pop_back(); + } + + // Zap the block! + BB->eraseFromParent(); +} + +void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, + MemoryDependenceResults *MemDep) { + if (!isa<PHINode>(BB->begin())) return; + + while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) { + if (PN->getIncomingValue(0) != PN) + PN->replaceAllUsesWith(PN->getIncomingValue(0)); + else + PN->replaceAllUsesWith(UndefValue::get(PN->getType())); + + if (MemDep) + MemDep->removeInstruction(PN); // Memdep updates AA itself. + + PN->eraseFromParent(); + } +} + +bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) { + // Recursively deleting a PHI may cause multiple PHIs to be deleted + // or RAUW'd undef, so use an array of WeakVH for the PHIs to delete. + SmallVector<WeakVH, 8> PHIs; + for (BasicBlock::iterator I = BB->begin(); + PHINode *PN = dyn_cast<PHINode>(I); ++I) + PHIs.push_back(PN); + + bool Changed = false; + for (unsigned i = 0, e = PHIs.size(); i != e; ++i) + if (PHINode *PN = dyn_cast_or_null<PHINode>(PHIs[i].operator Value*())) + Changed |= RecursivelyDeleteDeadPHINode(PN, TLI); + + return Changed; +} + +bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT, + LoopInfo *LI, + MemoryDependenceResults *MemDep) { + // Don't merge away blocks who have their address taken. + if (BB->hasAddressTaken()) return false; + + // Can't merge if there are multiple predecessors, or no predecessors. + BasicBlock *PredBB = BB->getUniquePredecessor(); + if (!PredBB) return false; + + // Don't break self-loops. + if (PredBB == BB) return false; + // Don't break unwinding instructions. + if (PredBB->getTerminator()->isExceptional()) + return false; + + succ_iterator SI(succ_begin(PredBB)), SE(succ_end(PredBB)); + BasicBlock *OnlySucc = BB; + for (; SI != SE; ++SI) + if (*SI != OnlySucc) { + OnlySucc = nullptr; // There are multiple distinct successors! + break; + } + + // Can't merge if there are multiple successors. + if (!OnlySucc) return false; + + // Can't merge if there is PHI loop. + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) { + if (PHINode *PN = dyn_cast<PHINode>(BI)) { + for (Value *IncValue : PN->incoming_values()) + if (IncValue == PN) + return false; + } else + break; + } + + // Begin by getting rid of unneeded PHIs. + if (isa<PHINode>(BB->front())) + FoldSingleEntryPHINodes(BB, MemDep); + + // Delete the unconditional branch from the predecessor... + PredBB->getInstList().pop_back(); + + // Make all PHI nodes that referred to BB now refer to Pred as their + // source... + BB->replaceAllUsesWith(PredBB); + + // Move all definitions in the successor to the predecessor... + PredBB->getInstList().splice(PredBB->end(), BB->getInstList()); + + // Inherit predecessors name if it exists. + if (!PredBB->hasName()) + PredBB->takeName(BB); + + // Finally, erase the old block and update dominator info. + if (DT) + if (DomTreeNode *DTN = DT->getNode(BB)) { + DomTreeNode *PredDTN = DT->getNode(PredBB); + SmallVector<DomTreeNode *, 8> Children(DTN->begin(), DTN->end()); + for (DomTreeNode *DI : Children) + DT->changeImmediateDominator(DI, PredDTN); + + DT->eraseNode(BB); + } + + if (LI) + LI->removeBlock(BB); + + if (MemDep) + MemDep->invalidateCachedPredecessors(); + + BB->eraseFromParent(); + return true; +} + +void llvm::ReplaceInstWithValue(BasicBlock::InstListType &BIL, + BasicBlock::iterator &BI, Value *V) { + Instruction &I = *BI; + // Replaces all of the uses of the instruction with uses of the value + I.replaceAllUsesWith(V); + + // Make sure to propagate a name if there is one already. + if (I.hasName() && !V->hasName()) + V->takeName(&I); + + // Delete the unnecessary instruction now... + BI = BIL.erase(BI); +} + +void llvm::ReplaceInstWithInst(BasicBlock::InstListType &BIL, + BasicBlock::iterator &BI, Instruction *I) { + assert(I->getParent() == nullptr && + "ReplaceInstWithInst: Instruction already inserted into basic block!"); + + // Copy debug location to newly added instruction, if it wasn't already set + // by the caller. + if (!I->getDebugLoc()) + I->setDebugLoc(BI->getDebugLoc()); + + // Insert the new instruction into the basic block... + BasicBlock::iterator New = BIL.insert(BI, I); + + // Replace all uses of the old instruction, and delete it. + ReplaceInstWithValue(BIL, BI, I); + + // Move BI back to point to the newly inserted instruction + BI = New; +} + +void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) { + BasicBlock::iterator BI(From); + ReplaceInstWithInst(From->getParent()->getInstList(), BI, To); +} + +BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, DominatorTree *DT, + LoopInfo *LI) { + unsigned SuccNum = GetSuccessorNumber(BB, Succ); + + // If this is a critical edge, let SplitCriticalEdge do it. + TerminatorInst *LatchTerm = BB->getTerminator(); + if (SplitCriticalEdge(LatchTerm, SuccNum, CriticalEdgeSplittingOptions(DT, LI) + .setPreserveLCSSA())) + return LatchTerm->getSuccessor(SuccNum); + + // If the edge isn't critical, then BB has a single successor or Succ has a + // single pred. Split the block. + if (BasicBlock *SP = Succ->getSinglePredecessor()) { + // If the successor only has a single pred, split the top of the successor + // block. + assert(SP == BB && "CFG broken"); + SP = nullptr; + return SplitBlock(Succ, &Succ->front(), DT, LI); + } + + // Otherwise, if BB has a single successor, split it at the bottom of the + // block. + assert(BB->getTerminator()->getNumSuccessors() == 1 && + "Should have a single succ!"); + return SplitBlock(BB, BB->getTerminator(), DT, LI); +} + +unsigned +llvm::SplitAllCriticalEdges(Function &F, + const CriticalEdgeSplittingOptions &Options) { + unsigned NumBroken = 0; + for (BasicBlock &BB : F) { + TerminatorInst *TI = BB.getTerminator(); + if (TI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(TI)) + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + if (SplitCriticalEdge(TI, i, Options)) + ++NumBroken; + } + return NumBroken; +} + +BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, + DominatorTree *DT, LoopInfo *LI) { + BasicBlock::iterator SplitIt = SplitPt->getIterator(); + while (isa<PHINode>(SplitIt) || SplitIt->isEHPad()) + ++SplitIt; + BasicBlock *New = Old->splitBasicBlock(SplitIt, Old->getName()+".split"); + + // The new block lives in whichever loop the old one did. This preserves + // LCSSA as well, because we force the split point to be after any PHI nodes. + if (LI) + if (Loop *L = LI->getLoopFor(Old)) + L->addBasicBlockToLoop(New, *LI); + + if (DT) + // Old dominates New. New node dominates all other nodes dominated by Old. + if (DomTreeNode *OldNode = DT->getNode(Old)) { + std::vector<DomTreeNode *> Children(OldNode->begin(), OldNode->end()); + + DomTreeNode *NewNode = DT->addNewBlock(New, Old); + for (DomTreeNode *I : Children) + DT->changeImmediateDominator(I, NewNode); + } + + return New; +} + +/// Update DominatorTree, LoopInfo, and LCCSA analysis information. +static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB, + ArrayRef<BasicBlock *> Preds, + DominatorTree *DT, LoopInfo *LI, + bool PreserveLCSSA, bool &HasLoopExit) { + // Update dominator tree if available. + if (DT) + DT->splitBlock(NewBB); + + // The rest of the logic is only relevant for updating the loop structures. + if (!LI) + return; + + Loop *L = LI->getLoopFor(OldBB); + + // If we need to preserve loop analyses, collect some information about how + // this split will affect loops. + bool IsLoopEntry = !!L; + bool SplitMakesNewLoopHeader = false; + for (BasicBlock *Pred : Preds) { + // If we need to preserve LCSSA, determine if any of the preds is a loop + // exit. + if (PreserveLCSSA) + if (Loop *PL = LI->getLoopFor(Pred)) + if (!PL->contains(OldBB)) + HasLoopExit = true; + + // If we need to preserve LoopInfo, note whether any of the preds crosses + // an interesting loop boundary. + if (!L) + continue; + if (L->contains(Pred)) + IsLoopEntry = false; + else + SplitMakesNewLoopHeader = true; + } + + // Unless we have a loop for OldBB, nothing else to do here. + if (!L) + return; + + if (IsLoopEntry) { + // Add the new block to the nearest enclosing loop (and not an adjacent + // loop). To find this, examine each of the predecessors and determine which + // loops enclose them, and select the most-nested loop which contains the + // loop containing the block being split. + Loop *InnermostPredLoop = nullptr; + for (BasicBlock *Pred : Preds) { + if (Loop *PredLoop = LI->getLoopFor(Pred)) { + // Seek a loop which actually contains the block being split (to avoid + // adjacent loops). + while (PredLoop && !PredLoop->contains(OldBB)) + PredLoop = PredLoop->getParentLoop(); + + // Select the most-nested of these loops which contains the block. + if (PredLoop && PredLoop->contains(OldBB) && + (!InnermostPredLoop || + InnermostPredLoop->getLoopDepth() < PredLoop->getLoopDepth())) + InnermostPredLoop = PredLoop; + } + } + + if (InnermostPredLoop) + InnermostPredLoop->addBasicBlockToLoop(NewBB, *LI); + } else { + L->addBasicBlockToLoop(NewBB, *LI); + if (SplitMakesNewLoopHeader) + L->moveToHeader(NewBB); + } +} + +/// Update the PHI nodes in OrigBB to include the values coming from NewBB. +/// This also updates AliasAnalysis, if available. +static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB, + ArrayRef<BasicBlock *> Preds, BranchInst *BI, + bool HasLoopExit) { + // Otherwise, create a new PHI node in NewBB for each PHI node in OrigBB. + SmallPtrSet<BasicBlock *, 16> PredSet(Preds.begin(), Preds.end()); + for (BasicBlock::iterator I = OrigBB->begin(); isa<PHINode>(I); ) { + PHINode *PN = cast<PHINode>(I++); + + // Check to see if all of the values coming in are the same. If so, we + // don't need to create a new PHI node, unless it's needed for LCSSA. + Value *InVal = nullptr; + if (!HasLoopExit) { + InVal = PN->getIncomingValueForBlock(Preds[0]); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + if (!PredSet.count(PN->getIncomingBlock(i))) + continue; + if (!InVal) + InVal = PN->getIncomingValue(i); + else if (InVal != PN->getIncomingValue(i)) { + InVal = nullptr; + break; + } + } + } + + if (InVal) { + // If all incoming values for the new PHI would be the same, just don't + // make a new PHI. Instead, just remove the incoming values from the old + // PHI. + + // NOTE! This loop walks backwards for a reason! First off, this minimizes + // the cost of removal if we end up removing a large number of values, and + // second off, this ensures that the indices for the incoming values + // aren't invalidated when we remove one. + for (int64_t i = PN->getNumIncomingValues() - 1; i >= 0; --i) + if (PredSet.count(PN->getIncomingBlock(i))) + PN->removeIncomingValue(i, false); + + // Add an incoming value to the PHI node in the loop for the preheader + // edge. + PN->addIncoming(InVal, NewBB); + continue; + } + + // If the values coming into the block are not the same, we need a new + // PHI. + // Create the new PHI node, insert it into NewBB at the end of the block + PHINode *NewPHI = + PHINode::Create(PN->getType(), Preds.size(), PN->getName() + ".ph", BI); + + // NOTE! This loop walks backwards for a reason! First off, this minimizes + // the cost of removal if we end up removing a large number of values, and + // second off, this ensures that the indices for the incoming values aren't + // invalidated when we remove one. + for (int64_t i = PN->getNumIncomingValues() - 1; i >= 0; --i) { + BasicBlock *IncomingBB = PN->getIncomingBlock(i); + if (PredSet.count(IncomingBB)) { + Value *V = PN->removeIncomingValue(i, false); + NewPHI->addIncoming(V, IncomingBB); + } + } + + PN->addIncoming(NewPHI, NewBB); + } +} + +BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, + ArrayRef<BasicBlock *> Preds, + const char *Suffix, DominatorTree *DT, + LoopInfo *LI, bool PreserveLCSSA) { + // Do not attempt to split that which cannot be split. + if (!BB->canSplitPredecessors()) + return nullptr; + + // For the landingpads we need to act a bit differently. + // Delegate this work to the SplitLandingPadPredecessors. + if (BB->isLandingPad()) { + SmallVector<BasicBlock*, 2> NewBBs; + std::string NewName = std::string(Suffix) + ".split-lp"; + + SplitLandingPadPredecessors(BB, Preds, Suffix, NewName.c_str(), NewBBs, DT, + LI, PreserveLCSSA); + return NewBBs[0]; + } + + // Create new basic block, insert right before the original block. + BasicBlock *NewBB = BasicBlock::Create( + BB->getContext(), BB->getName() + Suffix, BB->getParent(), BB); + + // The new block unconditionally branches to the old block. + BranchInst *BI = BranchInst::Create(BB, NewBB); + BI->setDebugLoc(BB->getFirstNonPHI()->getDebugLoc()); + + // Move the edges from Preds to point to NewBB instead of BB. + for (unsigned i = 0, e = Preds.size(); i != e; ++i) { + // This is slightly more strict than necessary; the minimum requirement + // is that there be no more than one indirectbr branching to BB. And + // all BlockAddress uses would need to be updated. + assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) && + "Cannot split an edge from an IndirectBrInst"); + Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB); + } + + // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI + // node becomes an incoming value for BB's phi node. However, if the Preds + // list is empty, we need to insert dummy entries into the PHI nodes in BB to + // account for the newly created predecessor. + if (Preds.size() == 0) { + // Insert dummy values as the incoming value. + for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I) + cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB); + return NewBB; + } + + // Update DominatorTree, LoopInfo, and LCCSA analysis information. + bool HasLoopExit = false; + UpdateAnalysisInformation(BB, NewBB, Preds, DT, LI, PreserveLCSSA, + HasLoopExit); + + // Update the PHI nodes in BB with the values coming from NewBB. + UpdatePHINodes(BB, NewBB, Preds, BI, HasLoopExit); + return NewBB; +} + +void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, + ArrayRef<BasicBlock *> Preds, + const char *Suffix1, const char *Suffix2, + SmallVectorImpl<BasicBlock *> &NewBBs, + DominatorTree *DT, LoopInfo *LI, + bool PreserveLCSSA) { + assert(OrigBB->isLandingPad() && "Trying to split a non-landing pad!"); + + // Create a new basic block for OrigBB's predecessors listed in Preds. Insert + // it right before the original block. + BasicBlock *NewBB1 = BasicBlock::Create(OrigBB->getContext(), + OrigBB->getName() + Suffix1, + OrigBB->getParent(), OrigBB); + NewBBs.push_back(NewBB1); + + // The new block unconditionally branches to the old block. + BranchInst *BI1 = BranchInst::Create(OrigBB, NewBB1); + BI1->setDebugLoc(OrigBB->getFirstNonPHI()->getDebugLoc()); + + // Move the edges from Preds to point to NewBB1 instead of OrigBB. + for (unsigned i = 0, e = Preds.size(); i != e; ++i) { + // This is slightly more strict than necessary; the minimum requirement + // is that there be no more than one indirectbr branching to BB. And + // all BlockAddress uses would need to be updated. + assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) && + "Cannot split an edge from an IndirectBrInst"); + Preds[i]->getTerminator()->replaceUsesOfWith(OrigBB, NewBB1); + } + + bool HasLoopExit = false; + UpdateAnalysisInformation(OrigBB, NewBB1, Preds, DT, LI, PreserveLCSSA, + HasLoopExit); + + // Update the PHI nodes in OrigBB with the values coming from NewBB1. + UpdatePHINodes(OrigBB, NewBB1, Preds, BI1, HasLoopExit); + + // Move the remaining edges from OrigBB to point to NewBB2. + SmallVector<BasicBlock*, 8> NewBB2Preds; + for (pred_iterator i = pred_begin(OrigBB), e = pred_end(OrigBB); + i != e; ) { + BasicBlock *Pred = *i++; + if (Pred == NewBB1) continue; + assert(!isa<IndirectBrInst>(Pred->getTerminator()) && + "Cannot split an edge from an IndirectBrInst"); + NewBB2Preds.push_back(Pred); + e = pred_end(OrigBB); + } + + BasicBlock *NewBB2 = nullptr; + if (!NewBB2Preds.empty()) { + // Create another basic block for the rest of OrigBB's predecessors. + NewBB2 = BasicBlock::Create(OrigBB->getContext(), + OrigBB->getName() + Suffix2, + OrigBB->getParent(), OrigBB); + NewBBs.push_back(NewBB2); + + // The new block unconditionally branches to the old block. + BranchInst *BI2 = BranchInst::Create(OrigBB, NewBB2); + BI2->setDebugLoc(OrigBB->getFirstNonPHI()->getDebugLoc()); + + // Move the remaining edges from OrigBB to point to NewBB2. + for (BasicBlock *NewBB2Pred : NewBB2Preds) + NewBB2Pred->getTerminator()->replaceUsesOfWith(OrigBB, NewBB2); + + // Update DominatorTree, LoopInfo, and LCCSA analysis information. + HasLoopExit = false; + UpdateAnalysisInformation(OrigBB, NewBB2, NewBB2Preds, DT, LI, + PreserveLCSSA, HasLoopExit); + + // Update the PHI nodes in OrigBB with the values coming from NewBB2. + UpdatePHINodes(OrigBB, NewBB2, NewBB2Preds, BI2, HasLoopExit); + } + + LandingPadInst *LPad = OrigBB->getLandingPadInst(); + Instruction *Clone1 = LPad->clone(); + Clone1->setName(Twine("lpad") + Suffix1); + NewBB1->getInstList().insert(NewBB1->getFirstInsertionPt(), Clone1); + + if (NewBB2) { + Instruction *Clone2 = LPad->clone(); + Clone2->setName(Twine("lpad") + Suffix2); + NewBB2->getInstList().insert(NewBB2->getFirstInsertionPt(), Clone2); + + // Create a PHI node for the two cloned landingpad instructions only + // if the original landingpad instruction has some uses. + if (!LPad->use_empty()) { + assert(!LPad->getType()->isTokenTy() && + "Split cannot be applied if LPad is token type. Otherwise an " + "invalid PHINode of token type would be created."); + PHINode *PN = PHINode::Create(LPad->getType(), 2, "lpad.phi", LPad); + PN->addIncoming(Clone1, NewBB1); + PN->addIncoming(Clone2, NewBB2); + LPad->replaceAllUsesWith(PN); + } + LPad->eraseFromParent(); + } else { + // There is no second clone. Just replace the landing pad with the first + // clone. + LPad->replaceAllUsesWith(Clone1); + LPad->eraseFromParent(); + } +} + +ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, + BasicBlock *Pred) { + Instruction *UncondBranch = Pred->getTerminator(); + // Clone the return and add it to the end of the predecessor. + Instruction *NewRet = RI->clone(); + Pred->getInstList().push_back(NewRet); + + // If the return instruction returns a value, and if the value was a + // PHI node in "BB", propagate the right value into the return. + for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end(); + i != e; ++i) { + Value *V = *i; + Instruction *NewBC = nullptr; + if (BitCastInst *BCI = dyn_cast<BitCastInst>(V)) { + // Return value might be bitcasted. Clone and insert it before the + // return instruction. + V = BCI->getOperand(0); + NewBC = BCI->clone(); + Pred->getInstList().insert(NewRet->getIterator(), NewBC); + *i = NewBC; + } + if (PHINode *PN = dyn_cast<PHINode>(V)) { + if (PN->getParent() == BB) { + if (NewBC) + NewBC->setOperand(0, PN->getIncomingValueForBlock(Pred)); + else + *i = PN->getIncomingValueForBlock(Pred); + } + } + } + + // Update any PHI nodes in the returning block to realize that we no + // longer branch to them. + BB->removePredecessor(Pred); + UncondBranch->eraseFromParent(); + return cast<ReturnInst>(NewRet); +} + +TerminatorInst * +llvm::SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore, + bool Unreachable, MDNode *BranchWeights, + DominatorTree *DT, LoopInfo *LI) { + BasicBlock *Head = SplitBefore->getParent(); + BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator()); + TerminatorInst *HeadOldTerm = Head->getTerminator(); + LLVMContext &C = Head->getContext(); + BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail); + TerminatorInst *CheckTerm; + if (Unreachable) + CheckTerm = new UnreachableInst(C, ThenBlock); + else + CheckTerm = BranchInst::Create(Tail, ThenBlock); + CheckTerm->setDebugLoc(SplitBefore->getDebugLoc()); + BranchInst *HeadNewTerm = + BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cond); + HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights); + ReplaceInstWithInst(HeadOldTerm, HeadNewTerm); + + if (DT) { + if (DomTreeNode *OldNode = DT->getNode(Head)) { + std::vector<DomTreeNode *> Children(OldNode->begin(), OldNode->end()); + + DomTreeNode *NewNode = DT->addNewBlock(Tail, Head); + for (DomTreeNode *Child : Children) + DT->changeImmediateDominator(Child, NewNode); + + // Head dominates ThenBlock. + DT->addNewBlock(ThenBlock, Head); + } + } + + if (LI) { + Loop *L = LI->getLoopFor(Head); + L->addBasicBlockToLoop(ThenBlock, *LI); + L->addBasicBlockToLoop(Tail, *LI); + } + + return CheckTerm; +} + +void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore, + TerminatorInst **ThenTerm, + TerminatorInst **ElseTerm, + MDNode *BranchWeights) { + BasicBlock *Head = SplitBefore->getParent(); + BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator()); + TerminatorInst *HeadOldTerm = Head->getTerminator(); + LLVMContext &C = Head->getContext(); + BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail); + BasicBlock *ElseBlock = BasicBlock::Create(C, "", Head->getParent(), Tail); + *ThenTerm = BranchInst::Create(Tail, ThenBlock); + (*ThenTerm)->setDebugLoc(SplitBefore->getDebugLoc()); + *ElseTerm = BranchInst::Create(Tail, ElseBlock); + (*ElseTerm)->setDebugLoc(SplitBefore->getDebugLoc()); + BranchInst *HeadNewTerm = + BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/ElseBlock, Cond); + HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights); + ReplaceInstWithInst(HeadOldTerm, HeadNewTerm); +} + + +Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, + BasicBlock *&IfFalse) { + PHINode *SomePHI = dyn_cast<PHINode>(BB->begin()); + BasicBlock *Pred1 = nullptr; + BasicBlock *Pred2 = nullptr; + + if (SomePHI) { + if (SomePHI->getNumIncomingValues() != 2) + return nullptr; + Pred1 = SomePHI->getIncomingBlock(0); + Pred2 = SomePHI->getIncomingBlock(1); + } else { + pred_iterator PI = pred_begin(BB), PE = pred_end(BB); + if (PI == PE) // No predecessor + return nullptr; + Pred1 = *PI++; + if (PI == PE) // Only one predecessor + return nullptr; + Pred2 = *PI++; + if (PI != PE) // More than two predecessors + return nullptr; + } + + // We can only handle branches. Other control flow will be lowered to + // branches if possible anyway. + BranchInst *Pred1Br = dyn_cast<BranchInst>(Pred1->getTerminator()); + BranchInst *Pred2Br = dyn_cast<BranchInst>(Pred2->getTerminator()); + if (!Pred1Br || !Pred2Br) + return nullptr; + + // Eliminate code duplication by ensuring that Pred1Br is conditional if + // either are. + if (Pred2Br->isConditional()) { + // If both branches are conditional, we don't have an "if statement". In + // reality, we could transform this case, but since the condition will be + // required anyway, we stand no chance of eliminating it, so the xform is + // probably not profitable. + if (Pred1Br->isConditional()) + return nullptr; + + std::swap(Pred1, Pred2); + std::swap(Pred1Br, Pred2Br); + } + + if (Pred1Br->isConditional()) { + // The only thing we have to watch out for here is to make sure that Pred2 + // doesn't have incoming edges from other blocks. If it does, the condition + // doesn't dominate BB. + if (!Pred2->getSinglePredecessor()) + return nullptr; + + // If we found a conditional branch predecessor, make sure that it branches + // to BB and Pred2Br. If it doesn't, this isn't an "if statement". + if (Pred1Br->getSuccessor(0) == BB && + Pred1Br->getSuccessor(1) == Pred2) { + IfTrue = Pred1; + IfFalse = Pred2; + } else if (Pred1Br->getSuccessor(0) == Pred2 && + Pred1Br->getSuccessor(1) == BB) { + IfTrue = Pred2; + IfFalse = Pred1; + } else { + // We know that one arm of the conditional goes to BB, so the other must + // go somewhere unrelated, and this must not be an "if statement". + return nullptr; + } + + return Pred1Br->getCondition(); + } + + // Ok, if we got here, both predecessors end with an unconditional branch to + // BB. Don't panic! If both blocks only have a single (identical) + // predecessor, and THAT is a conditional branch, then we're all ok! + BasicBlock *CommonPred = Pred1->getSinglePredecessor(); + if (CommonPred == nullptr || CommonPred != Pred2->getSinglePredecessor()) + return nullptr; + + // Otherwise, if this is a conditional branch, then we can use it! + BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator()); + if (!BI) return nullptr; + + assert(BI->isConditional() && "Two successors but not conditional?"); + if (BI->getSuccessor(0) == Pred1) { + IfTrue = Pred1; + IfFalse = Pred2; + } else { + IfTrue = Pred2; + IfFalse = Pred1; + } + return BI->getCondition(); +} diff --git a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp new file mode 100644 index 000000000000..175cbd2ce0df --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -0,0 +1,328 @@ +//===- BreakCriticalEdges.cpp - Critical Edge Elimination Pass ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// BreakCriticalEdges pass - Break all of the critical edges in the CFG by +// inserting a dummy basic block. This pass may be "required" by passes that +// cannot deal with critical edges. For this usage, the structure type is +// forward declared. This pass obviously invalidates the CFG, but can update +// dominator trees. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/BreakCriticalEdges.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +using namespace llvm; + +#define DEBUG_TYPE "break-crit-edges" + +STATISTIC(NumBroken, "Number of blocks inserted"); + +namespace { + struct BreakCriticalEdges : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + BreakCriticalEdges() : FunctionPass(ID) { + initializeBreakCriticalEdgesPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); + auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; + auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); + auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; + unsigned N = + SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions(DT, LI)); + NumBroken += N; + return N > 0; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addPreserved<DominatorTreeWrapperPass>(); + AU.addPreserved<LoopInfoWrapperPass>(); + + // No loop canonicalization guarantees are broken by this pass. + AU.addPreservedID(LoopSimplifyID); + } + }; +} + +char BreakCriticalEdges::ID = 0; +INITIALIZE_PASS(BreakCriticalEdges, "break-crit-edges", + "Break critical edges in CFG", false, false) + +// Publicly exposed interface to pass... +char &llvm::BreakCriticalEdgesID = BreakCriticalEdges::ID; +FunctionPass *llvm::createBreakCriticalEdgesPass() { + return new BreakCriticalEdges(); +} + +PreservedAnalyses BreakCriticalEdgesPass::run(Function &F, + FunctionAnalysisManager &AM) { + auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F); + auto *LI = AM.getCachedResult<LoopAnalysis>(F); + unsigned N = SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions(DT, LI)); + NumBroken += N; + if (N == 0) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserve<DominatorTreeAnalysis>(); + PA.preserve<LoopAnalysis>(); + return PA; +} + +//===----------------------------------------------------------------------===// +// Implementation of the external critical edge manipulation functions +//===----------------------------------------------------------------------===// + +/// When a loop exit edge is split, LCSSA form may require new PHIs in the new +/// exit block. This function inserts the new PHIs, as needed. Preds is a list +/// of preds inside the loop, SplitBB is the new loop exit block, and DestBB is +/// the old loop exit, now the successor of SplitBB. +static void createPHIsForSplitLoopExit(ArrayRef<BasicBlock *> Preds, + BasicBlock *SplitBB, + BasicBlock *DestBB) { + // SplitBB shouldn't have anything non-trivial in it yet. + assert((SplitBB->getFirstNonPHI() == SplitBB->getTerminator() || + SplitBB->isLandingPad()) && "SplitBB has non-PHI nodes!"); + + // For each PHI in the destination block. + for (BasicBlock::iterator I = DestBB->begin(); + PHINode *PN = dyn_cast<PHINode>(I); ++I) { + unsigned Idx = PN->getBasicBlockIndex(SplitBB); + Value *V = PN->getIncomingValue(Idx); + + // If the input is a PHI which already satisfies LCSSA, don't create + // a new one. + if (const PHINode *VP = dyn_cast<PHINode>(V)) + if (VP->getParent() == SplitBB) + continue; + + // Otherwise a new PHI is needed. Create one and populate it. + PHINode *NewPN = PHINode::Create( + PN->getType(), Preds.size(), "split", + SplitBB->isLandingPad() ? &SplitBB->front() : SplitBB->getTerminator()); + for (unsigned i = 0, e = Preds.size(); i != e; ++i) + NewPN->addIncoming(V, Preds[i]); + + // Update the original PHI. + PN->setIncomingValue(Idx, NewPN); + } +} + +BasicBlock * +llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, + const CriticalEdgeSplittingOptions &Options) { + if (!isCriticalEdge(TI, SuccNum, Options.MergeIdenticalEdges)) + return nullptr; + + assert(!isa<IndirectBrInst>(TI) && + "Cannot split critical edge from IndirectBrInst"); + + BasicBlock *TIBB = TI->getParent(); + BasicBlock *DestBB = TI->getSuccessor(SuccNum); + + // Splitting the critical edge to a pad block is non-trivial. Don't do + // it in this generic function. + if (DestBB->isEHPad()) return nullptr; + + // Create a new basic block, linking it into the CFG. + BasicBlock *NewBB = BasicBlock::Create(TI->getContext(), + TIBB->getName() + "." + DestBB->getName() + "_crit_edge"); + // Create our unconditional branch. + BranchInst *NewBI = BranchInst::Create(DestBB, NewBB); + NewBI->setDebugLoc(TI->getDebugLoc()); + + // Branch to the new block, breaking the edge. + TI->setSuccessor(SuccNum, NewBB); + + // Insert the block into the function... right after the block TI lives in. + Function &F = *TIBB->getParent(); + Function::iterator FBBI = TIBB->getIterator(); + F.getBasicBlockList().insert(++FBBI, NewBB); + + // If there are any PHI nodes in DestBB, we need to update them so that they + // merge incoming values from NewBB instead of from TIBB. + { + unsigned BBIdx = 0; + for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) { + // We no longer enter through TIBB, now we come in through NewBB. + // Revector exactly one entry in the PHI node that used to come from + // TIBB to come from NewBB. + PHINode *PN = cast<PHINode>(I); + + // Reuse the previous value of BBIdx if it lines up. In cases where we + // have multiple phi nodes with *lots* of predecessors, this is a speed + // win because we don't have to scan the PHI looking for TIBB. This + // happens because the BB list of PHI nodes are usually in the same + // order. + if (PN->getIncomingBlock(BBIdx) != TIBB) + BBIdx = PN->getBasicBlockIndex(TIBB); + PN->setIncomingBlock(BBIdx, NewBB); + } + } + + // If there are any other edges from TIBB to DestBB, update those to go + // through the split block, making those edges non-critical as well (and + // reducing the number of phi entries in the DestBB if relevant). + if (Options.MergeIdenticalEdges) { + for (unsigned i = SuccNum+1, e = TI->getNumSuccessors(); i != e; ++i) { + if (TI->getSuccessor(i) != DestBB) continue; + + // Remove an entry for TIBB from DestBB phi nodes. + DestBB->removePredecessor(TIBB, Options.DontDeleteUselessPHIs); + + // We found another edge to DestBB, go to NewBB instead. + TI->setSuccessor(i, NewBB); + } + } + + // If we have nothing to update, just return. + auto *DT = Options.DT; + auto *LI = Options.LI; + if (!DT && !LI) + return NewBB; + + // Now update analysis information. Since the only predecessor of NewBB is + // the TIBB, TIBB clearly dominates NewBB. TIBB usually doesn't dominate + // anything, as there are other successors of DestBB. However, if all other + // predecessors of DestBB are already dominated by DestBB (e.g. DestBB is a + // loop header) then NewBB dominates DestBB. + SmallVector<BasicBlock*, 8> OtherPreds; + + // If there is a PHI in the block, loop over predecessors with it, which is + // faster than iterating pred_begin/end. + if (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) { + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingBlock(i) != NewBB) + OtherPreds.push_back(PN->getIncomingBlock(i)); + } else { + for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB); + I != E; ++I) { + BasicBlock *P = *I; + if (P != NewBB) + OtherPreds.push_back(P); + } + } + + bool NewBBDominatesDestBB = true; + + // Should we update DominatorTree information? + if (DT) { + DomTreeNode *TINode = DT->getNode(TIBB); + + // The new block is not the immediate dominator for any other nodes, but + // TINode is the immediate dominator for the new node. + // + if (TINode) { // Don't break unreachable code! + DomTreeNode *NewBBNode = DT->addNewBlock(NewBB, TIBB); + DomTreeNode *DestBBNode = nullptr; + + // If NewBBDominatesDestBB hasn't been computed yet, do so with DT. + if (!OtherPreds.empty()) { + DestBBNode = DT->getNode(DestBB); + while (!OtherPreds.empty() && NewBBDominatesDestBB) { + if (DomTreeNode *OPNode = DT->getNode(OtherPreds.back())) + NewBBDominatesDestBB = DT->dominates(DestBBNode, OPNode); + OtherPreds.pop_back(); + } + OtherPreds.clear(); + } + + // If NewBBDominatesDestBB, then NewBB dominates DestBB, otherwise it + // doesn't dominate anything. + if (NewBBDominatesDestBB) { + if (!DestBBNode) DestBBNode = DT->getNode(DestBB); + DT->changeImmediateDominator(DestBBNode, NewBBNode); + } + } + } + + // Update LoopInfo if it is around. + if (LI) { + if (Loop *TIL = LI->getLoopFor(TIBB)) { + // If one or the other blocks were not in a loop, the new block is not + // either, and thus LI doesn't need to be updated. + if (Loop *DestLoop = LI->getLoopFor(DestBB)) { + if (TIL == DestLoop) { + // Both in the same loop, the NewBB joins loop. + DestLoop->addBasicBlockToLoop(NewBB, *LI); + } else if (TIL->contains(DestLoop)) { + // Edge from an outer loop to an inner loop. Add to the outer loop. + TIL->addBasicBlockToLoop(NewBB, *LI); + } else if (DestLoop->contains(TIL)) { + // Edge from an inner loop to an outer loop. Add to the outer loop. + DestLoop->addBasicBlockToLoop(NewBB, *LI); + } else { + // Edge from two loops with no containment relation. Because these + // are natural loops, we know that the destination block must be the + // header of its loop (adding a branch into a loop elsewhere would + // create an irreducible loop). + assert(DestLoop->getHeader() == DestBB && + "Should not create irreducible loops!"); + if (Loop *P = DestLoop->getParentLoop()) + P->addBasicBlockToLoop(NewBB, *LI); + } + } + + // If TIBB is in a loop and DestBB is outside of that loop, we may need + // to update LoopSimplify form and LCSSA form. + if (!TIL->contains(DestBB)) { + assert(!TIL->contains(NewBB) && + "Split point for loop exit is contained in loop!"); + + // Update LCSSA form in the newly created exit block. + if (Options.PreserveLCSSA) { + createPHIsForSplitLoopExit(TIBB, NewBB, DestBB); + } + + // The only that we can break LoopSimplify form by splitting a critical + // edge is if after the split there exists some edge from TIL to DestBB + // *and* the only edge into DestBB from outside of TIL is that of + // NewBB. If the first isn't true, then LoopSimplify still holds, NewBB + // is the new exit block and it has no non-loop predecessors. If the + // second isn't true, then DestBB was not in LoopSimplify form prior to + // the split as it had a non-loop predecessor. In both of these cases, + // the predecessor must be directly in TIL, not in a subloop, or again + // LoopSimplify doesn't hold. + SmallVector<BasicBlock *, 4> LoopPreds; + for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB); I != E; + ++I) { + BasicBlock *P = *I; + if (P == NewBB) + continue; // The new block is known. + if (LI->getLoopFor(P) != TIL) { + // No need to re-simplify, it wasn't to start with. + LoopPreds.clear(); + break; + } + LoopPreds.push_back(P); + } + if (!LoopPreds.empty()) { + assert(!DestBB->isEHPad() && "We don't split edges to EH pads!"); + BasicBlock *NewExitBB = SplitBlockPredecessors( + DestBB, LoopPreds, "split", DT, LI, Options.PreserveLCSSA); + if (Options.PreserveLCSSA) + createPHIsForSplitLoopExit(LoopPreds, NewExitBB, DestBB); + } + } + } + } + + return NewBB; +} diff --git a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp new file mode 100644 index 000000000000..e61b04fbdd57 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -0,0 +1,1008 @@ +//===- BuildLibCalls.cpp - Utility builder for libcalls -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements some functions that will create standard C libcalls. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/BuildLibCalls.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" + +using namespace llvm; + +#define DEBUG_TYPE "build-libcalls" + +//- Infer Attributes ---------------------------------------------------------// + +STATISTIC(NumReadNone, "Number of functions inferred as readnone"); +STATISTIC(NumReadOnly, "Number of functions inferred as readonly"); +STATISTIC(NumArgMemOnly, "Number of functions inferred as argmemonly"); +STATISTIC(NumNoUnwind, "Number of functions inferred as nounwind"); +STATISTIC(NumNoCapture, "Number of arguments inferred as nocapture"); +STATISTIC(NumReadOnlyArg, "Number of arguments inferred as readonly"); +STATISTIC(NumNoAlias, "Number of function returns inferred as noalias"); +STATISTIC(NumNonNull, "Number of function returns inferred as nonnull returns"); + +static bool setDoesNotAccessMemory(Function &F) { + if (F.doesNotAccessMemory()) + return false; + F.setDoesNotAccessMemory(); + ++NumReadNone; + return true; +} + +static bool setOnlyReadsMemory(Function &F) { + if (F.onlyReadsMemory()) + return false; + F.setOnlyReadsMemory(); + ++NumReadOnly; + return true; +} + +static bool setOnlyAccessesArgMemory(Function &F) { + if (F.onlyAccessesArgMemory()) + return false; + F.setOnlyAccessesArgMemory (); + ++NumArgMemOnly; + return true; +} + +static bool setDoesNotThrow(Function &F) { + if (F.doesNotThrow()) + return false; + F.setDoesNotThrow(); + ++NumNoUnwind; + return true; +} + +static bool setDoesNotCapture(Function &F, unsigned n) { + if (F.doesNotCapture(n)) + return false; + F.setDoesNotCapture(n); + ++NumNoCapture; + return true; +} + +static bool setOnlyReadsMemory(Function &F, unsigned n) { + if (F.onlyReadsMemory(n)) + return false; + F.setOnlyReadsMemory(n); + ++NumReadOnlyArg; + return true; +} + +static bool setDoesNotAlias(Function &F, unsigned n) { + if (F.doesNotAlias(n)) + return false; + F.setDoesNotAlias(n); + ++NumNoAlias; + return true; +} + +static bool setNonNull(Function &F, unsigned n) { + assert((n != AttributeSet::ReturnIndex || + F.getReturnType()->isPointerTy()) && + "nonnull applies only to pointers"); + if (F.getAttributes().hasAttribute(n, Attribute::NonNull)) + return false; + F.addAttribute(n, Attribute::NonNull); + ++NumNonNull; + return true; +} + +bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { + LibFunc::Func TheLibFunc; + if (!(TLI.getLibFunc(F, TheLibFunc) && TLI.has(TheLibFunc))) + return false; + + bool Changed = false; + switch (TheLibFunc) { + case LibFunc::strlen: + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::strchr: + case LibFunc::strrchr: + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotThrow(F); + return Changed; + case LibFunc::strtol: + case LibFunc::strtod: + case LibFunc::strtof: + case LibFunc::strtoul: + case LibFunc::strtoll: + case LibFunc::strtold: + case LibFunc::strtoull: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::strcpy: + case LibFunc::stpcpy: + case LibFunc::strcat: + case LibFunc::strncat: + case LibFunc::strncpy: + case LibFunc::stpncpy: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::strxfrm: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::strcmp: // 0,1 + case LibFunc::strspn: // 0,1 + case LibFunc::strncmp: // 0,1 + case LibFunc::strcspn: // 0,1 + case LibFunc::strcoll: // 0,1 + case LibFunc::strcasecmp: // 0,1 + case LibFunc::strncasecmp: // + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::strstr: + case LibFunc::strpbrk: + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::strtok: + case LibFunc::strtok_r: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::scanf: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::setbuf: + case LibFunc::setvbuf: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::strdup: + case LibFunc::strndup: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::stat: + case LibFunc::statvfs: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::sscanf: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::sprintf: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::snprintf: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 3); + Changed |= setOnlyReadsMemory(F, 3); + return Changed; + case LibFunc::setitimer: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + Changed |= setDoesNotCapture(F, 3); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::system: + // May throw; "system" is a valid pthread cancellation point. + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::malloc: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + return Changed; + case LibFunc::memcmp: + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::memchr: + case LibFunc::memrchr: + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotThrow(F); + return Changed; + case LibFunc::modf: + case LibFunc::modff: + case LibFunc::modfl: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::memcpy: + case LibFunc::mempcpy: + case LibFunc::memccpy: + case LibFunc::memmove: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::memcpy_chk: + Changed |= setDoesNotThrow(F); + return Changed; + case LibFunc::memalign: + Changed |= setDoesNotAlias(F, 0); + return Changed; + case LibFunc::mkdir: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::mktime: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::realloc: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::read: + // May throw; "read" is a valid pthread cancellation point. + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::rewind: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::rmdir: + case LibFunc::remove: + case LibFunc::realpath: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::rename: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::readlink: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::write: + // May throw; "write" is a valid pthread cancellation point. + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::bcopy: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::bcmp: + Changed |= setDoesNotThrow(F); + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::bzero: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::calloc: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + return Changed; + case LibFunc::chmod: + case LibFunc::chown: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::ctermid: + case LibFunc::clearerr: + case LibFunc::closedir: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::atoi: + case LibFunc::atol: + case LibFunc::atof: + case LibFunc::atoll: + Changed |= setDoesNotThrow(F); + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::access: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::fopen: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::fdopen: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::feof: + case LibFunc::free: + case LibFunc::fseek: + case LibFunc::ftell: + case LibFunc::fgetc: + case LibFunc::fseeko: + case LibFunc::ftello: + case LibFunc::fileno: + case LibFunc::fflush: + case LibFunc::fclose: + case LibFunc::fsetpos: + case LibFunc::flockfile: + case LibFunc::funlockfile: + case LibFunc::ftrylockfile: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::ferror: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F); + return Changed; + case LibFunc::fputc: + case LibFunc::fstat: + case LibFunc::frexp: + case LibFunc::frexpf: + case LibFunc::frexpl: + case LibFunc::fstatvfs: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::fgets: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 3); + return Changed; + case LibFunc::fread: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 4); + return Changed; + case LibFunc::fwrite: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 4); + // FIXME: readonly #1? + return Changed; + case LibFunc::fputs: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::fscanf: + case LibFunc::fprintf: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::fgetpos: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::getc: + case LibFunc::getlogin_r: + case LibFunc::getc_unlocked: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::getenv: + Changed |= setDoesNotThrow(F); + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::gets: + case LibFunc::getchar: + Changed |= setDoesNotThrow(F); + return Changed; + case LibFunc::getitimer: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::getpwnam: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::ungetc: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::uname: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::unlink: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::unsetenv: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::utime: + case LibFunc::utimes: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::putc: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::puts: + case LibFunc::printf: + case LibFunc::perror: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::pread: + // May throw; "pread" is a valid pthread cancellation point. + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::pwrite: + // May throw; "pwrite" is a valid pthread cancellation point. + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::putchar: + Changed |= setDoesNotThrow(F); + return Changed; + case LibFunc::popen: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::pclose: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::vscanf: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::vsscanf: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::vfscanf: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::valloc: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + return Changed; + case LibFunc::vprintf: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::vfprintf: + case LibFunc::vsprintf: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::vsnprintf: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 3); + Changed |= setOnlyReadsMemory(F, 3); + return Changed; + case LibFunc::open: + // May throw; "open" is a valid pthread cancellation point. + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::opendir: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::tmpfile: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + return Changed; + case LibFunc::times: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::htonl: + case LibFunc::htons: + case LibFunc::ntohl: + case LibFunc::ntohs: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAccessMemory(F); + return Changed; + case LibFunc::lstat: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::lchown: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::qsort: + // May throw; places call through function pointer. + Changed |= setDoesNotCapture(F, 4); + return Changed; + case LibFunc::dunder_strdup: + case LibFunc::dunder_strndup: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::dunder_strtok_r: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::under_IO_getc: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::under_IO_putc: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::dunder_isoc99_scanf: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::stat64: + case LibFunc::lstat64: + case LibFunc::statvfs64: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::dunder_isoc99_sscanf: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::fopen64: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::fseeko64: + case LibFunc::ftello64: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::tmpfile64: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + return Changed; + case LibFunc::fstat64: + case LibFunc::fstatvfs64: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::open64: + // May throw; "open" is a valid pthread cancellation point. + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::gettimeofday: + // Currently some platforms have the restrict keyword on the arguments to + // gettimeofday. To be conservative, do not add noalias to gettimeofday's + // arguments. + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::Znwj: // new(unsigned int) + case LibFunc::Znwm: // new(unsigned long) + case LibFunc::Znaj: // new[](unsigned int) + case LibFunc::Znam: // new[](unsigned long) + case LibFunc::msvc_new_int: // new(unsigned int) + case LibFunc::msvc_new_longlong: // new(unsigned long long) + case LibFunc::msvc_new_array_int: // new[](unsigned int) + case LibFunc::msvc_new_array_longlong: // new[](unsigned long long) + // Operator new always returns a nonnull noalias pointer + Changed |= setNonNull(F, AttributeSet::ReturnIndex); + Changed |= setDoesNotAlias(F, AttributeSet::ReturnIndex); + return Changed; + //TODO: add LibFunc entries for: + //case LibFunc::memset_pattern4: + //case LibFunc::memset_pattern8: + case LibFunc::memset_pattern16: + Changed |= setOnlyAccessesArgMemory(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + // int __nvvm_reflect(const char *) + case LibFunc::nvvm_reflect: + Changed |= setDoesNotAccessMemory(F); + Changed |= setDoesNotThrow(F); + return Changed; + + default: + // FIXME: It'd be really nice to cover all the library functions we're + // aware of here. + return false; + } +} + +//- Emit LibCalls ------------------------------------------------------------// + +Value *llvm::castToCStr(Value *V, IRBuilder<> &B) { + unsigned AS = V->getType()->getPointerAddressSpace(); + return B.CreateBitCast(V, B.getInt8PtrTy(AS), "cstr"); +} + +Value *llvm::emitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout &DL, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::strlen)) + return nullptr; + + Module *M = B.GetInsertBlock()->getModule(); + LLVMContext &Context = B.GetInsertBlock()->getContext(); + Constant *StrLen = M->getOrInsertFunction("strlen", DL.getIntPtrType(Context), + B.getInt8PtrTy(), nullptr); + inferLibFuncAttributes(*M->getFunction("strlen"), *TLI); + CallInst *CI = B.CreateCall(StrLen, castToCStr(Ptr, B), "strlen"); + if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; +} + +Value *llvm::emitStrChr(Value *Ptr, char C, IRBuilder<> &B, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::strchr)) + return nullptr; + + Module *M = B.GetInsertBlock()->getModule(); + Type *I8Ptr = B.getInt8PtrTy(); + Type *I32Ty = B.getInt32Ty(); + Constant *StrChr = + M->getOrInsertFunction("strchr", I8Ptr, I8Ptr, I32Ty, nullptr); + inferLibFuncAttributes(*M->getFunction("strchr"), *TLI); + CallInst *CI = B.CreateCall( + StrChr, {castToCStr(Ptr, B), ConstantInt::get(I32Ty, C)}, "strchr"); + if (const Function *F = dyn_cast<Function>(StrChr->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + return CI; +} + +Value *llvm::emitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B, + const DataLayout &DL, const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::strncmp)) + return nullptr; + + Module *M = B.GetInsertBlock()->getModule(); + LLVMContext &Context = B.GetInsertBlock()->getContext(); + Value *StrNCmp = M->getOrInsertFunction("strncmp", B.getInt32Ty(), + B.getInt8PtrTy(), B.getInt8PtrTy(), + DL.getIntPtrType(Context), nullptr); + inferLibFuncAttributes(*M->getFunction("strncmp"), *TLI); + CallInst *CI = B.CreateCall( + StrNCmp, {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, "strncmp"); + + if (const Function *F = dyn_cast<Function>(StrNCmp->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; +} + +Value *llvm::emitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B, + const TargetLibraryInfo *TLI, StringRef Name) { + if (!TLI->has(LibFunc::strcpy)) + return nullptr; + + Module *M = B.GetInsertBlock()->getModule(); + Type *I8Ptr = B.getInt8PtrTy(); + Value *StrCpy = M->getOrInsertFunction(Name, I8Ptr, I8Ptr, I8Ptr, nullptr); + inferLibFuncAttributes(*M->getFunction(Name), *TLI); + CallInst *CI = + B.CreateCall(StrCpy, {castToCStr(Dst, B), castToCStr(Src, B)}, Name); + if (const Function *F = dyn_cast<Function>(StrCpy->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + return CI; +} + +Value *llvm::emitStrNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B, + const TargetLibraryInfo *TLI, StringRef Name) { + if (!TLI->has(LibFunc::strncpy)) + return nullptr; + + Module *M = B.GetInsertBlock()->getModule(); + Type *I8Ptr = B.getInt8PtrTy(); + Value *StrNCpy = M->getOrInsertFunction(Name, I8Ptr, I8Ptr, I8Ptr, + Len->getType(), nullptr); + inferLibFuncAttributes(*M->getFunction(Name), *TLI); + CallInst *CI = B.CreateCall( + StrNCpy, {castToCStr(Dst, B), castToCStr(Src, B), Len}, "strncpy"); + if (const Function *F = dyn_cast<Function>(StrNCpy->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + return CI; +} + +Value *llvm::emitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, + IRBuilder<> &B, const DataLayout &DL, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::memcpy_chk)) + return nullptr; + + Module *M = B.GetInsertBlock()->getModule(); + AttributeSet AS; + AS = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); + LLVMContext &Context = B.GetInsertBlock()->getContext(); + Value *MemCpy = M->getOrInsertFunction( + "__memcpy_chk", AttributeSet::get(M->getContext(), AS), B.getInt8PtrTy(), + B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context), + DL.getIntPtrType(Context), nullptr); + Dst = castToCStr(Dst, B); + Src = castToCStr(Src, B); + CallInst *CI = B.CreateCall(MemCpy, {Dst, Src, Len, ObjSize}); + if (const Function *F = dyn_cast<Function>(MemCpy->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + return CI; +} + +Value *llvm::emitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B, + const DataLayout &DL, const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::memchr)) + return nullptr; + + Module *M = B.GetInsertBlock()->getModule(); + LLVMContext &Context = B.GetInsertBlock()->getContext(); + Value *MemChr = M->getOrInsertFunction("memchr", B.getInt8PtrTy(), + B.getInt8PtrTy(), B.getInt32Ty(), + DL.getIntPtrType(Context), nullptr); + inferLibFuncAttributes(*M->getFunction("memchr"), *TLI); + CallInst *CI = B.CreateCall(MemChr, {castToCStr(Ptr, B), Val, Len}, "memchr"); + + if (const Function *F = dyn_cast<Function>(MemChr->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; +} + +Value *llvm::emitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B, + const DataLayout &DL, const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::memcmp)) + return nullptr; + + Module *M = B.GetInsertBlock()->getModule(); + LLVMContext &Context = B.GetInsertBlock()->getContext(); + Value *MemCmp = M->getOrInsertFunction("memcmp", B.getInt32Ty(), + B.getInt8PtrTy(), B.getInt8PtrTy(), + DL.getIntPtrType(Context), nullptr); + inferLibFuncAttributes(*M->getFunction("memcmp"), *TLI); + CallInst *CI = B.CreateCall( + MemCmp, {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, "memcmp"); + + if (const Function *F = dyn_cast<Function>(MemCmp->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; +} + +/// Append a suffix to the function name according to the type of 'Op'. +static void appendTypeSuffix(Value *Op, StringRef &Name, + SmallString<20> &NameBuffer) { + if (!Op->getType()->isDoubleTy()) { + NameBuffer += Name; + + if (Op->getType()->isFloatTy()) + NameBuffer += 'f'; + else + NameBuffer += 'l'; + + Name = NameBuffer; + } +} + +Value *llvm::emitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B, + const AttributeSet &Attrs) { + SmallString<20> NameBuffer; + appendTypeSuffix(Op, Name, NameBuffer); + + Module *M = B.GetInsertBlock()->getModule(); + Value *Callee = M->getOrInsertFunction(Name, Op->getType(), + Op->getType(), nullptr); + CallInst *CI = B.CreateCall(Callee, Op, Name); + CI->setAttributes(Attrs); + if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; +} + +Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name, + IRBuilder<> &B, const AttributeSet &Attrs) { + SmallString<20> NameBuffer; + appendTypeSuffix(Op1, Name, NameBuffer); + + Module *M = B.GetInsertBlock()->getModule(); + Value *Callee = M->getOrInsertFunction(Name, Op1->getType(), Op1->getType(), + Op2->getType(), nullptr); + CallInst *CI = B.CreateCall(Callee, {Op1, Op2}, Name); + CI->setAttributes(Attrs); + if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; +} + +Value *llvm::emitPutChar(Value *Char, IRBuilder<> &B, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::putchar)) + return nullptr; + + Module *M = B.GetInsertBlock()->getModule(); + Value *PutChar = M->getOrInsertFunction("putchar", B.getInt32Ty(), + B.getInt32Ty(), nullptr); + CallInst *CI = B.CreateCall(PutChar, + B.CreateIntCast(Char, + B.getInt32Ty(), + /*isSigned*/true, + "chari"), + "putchar"); + + if (const Function *F = dyn_cast<Function>(PutChar->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + return CI; +} + +Value *llvm::emitPutS(Value *Str, IRBuilder<> &B, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::puts)) + return nullptr; + + Module *M = B.GetInsertBlock()->getModule(); + Value *PutS = + M->getOrInsertFunction("puts", B.getInt32Ty(), B.getInt8PtrTy(), nullptr); + inferLibFuncAttributes(*M->getFunction("puts"), *TLI); + CallInst *CI = B.CreateCall(PutS, castToCStr(Str, B), "puts"); + if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + return CI; +} + +Value *llvm::emitFPutC(Value *Char, Value *File, IRBuilder<> &B, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::fputc)) + return nullptr; + + Module *M = B.GetInsertBlock()->getModule(); + Constant *F = M->getOrInsertFunction("fputc", B.getInt32Ty(), B.getInt32Ty(), + File->getType(), nullptr); + if (File->getType()->isPointerTy()) + inferLibFuncAttributes(*M->getFunction("fputc"), *TLI); + Char = B.CreateIntCast(Char, B.getInt32Ty(), /*isSigned*/true, + "chari"); + CallInst *CI = B.CreateCall(F, {Char, File}, "fputc"); + + if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) + CI->setCallingConv(Fn->getCallingConv()); + return CI; +} + +Value *llvm::emitFPutS(Value *Str, Value *File, IRBuilder<> &B, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::fputs)) + return nullptr; + + Module *M = B.GetInsertBlock()->getModule(); + StringRef FPutsName = TLI->getName(LibFunc::fputs); + Constant *F = M->getOrInsertFunction( + FPutsName, B.getInt32Ty(), B.getInt8PtrTy(), File->getType(), nullptr); + if (File->getType()->isPointerTy()) + inferLibFuncAttributes(*M->getFunction(FPutsName), *TLI); + CallInst *CI = B.CreateCall(F, {castToCStr(Str, B), File}, "fputs"); + + if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) + CI->setCallingConv(Fn->getCallingConv()); + return CI; +} + +Value *llvm::emitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B, + const DataLayout &DL, const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::fwrite)) + return nullptr; + + Module *M = B.GetInsertBlock()->getModule(); + LLVMContext &Context = B.GetInsertBlock()->getContext(); + StringRef FWriteName = TLI->getName(LibFunc::fwrite); + Constant *F = M->getOrInsertFunction( + FWriteName, DL.getIntPtrType(Context), B.getInt8PtrTy(), + DL.getIntPtrType(Context), DL.getIntPtrType(Context), File->getType(), + nullptr); + if (File->getType()->isPointerTy()) + inferLibFuncAttributes(*M->getFunction(FWriteName), *TLI); + CallInst *CI = + B.CreateCall(F, {castToCStr(Ptr, B), Size, + ConstantInt::get(DL.getIntPtrType(Context), 1), File}); + + if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) + CI->setCallingConv(Fn->getCallingConv()); + return CI; +} diff --git a/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp b/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp new file mode 100644 index 000000000000..bc2cef26edcb --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -0,0 +1,272 @@ +//===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains an optimization for div and rem on architectures that +// execute short instructions significantly faster than longer instructions. +// For example, on Intel Atom 32-bit divides are slow enough that during +// runtime it is profitable to check the value of the operands, and if they are +// positive and less than 256 use an unsigned 8-bit divide. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/BypassSlowDivision.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Transforms/Utils/Local.h" + +using namespace llvm; + +#define DEBUG_TYPE "bypass-slow-division" + +namespace { + struct DivOpInfo { + bool SignedOp; + Value *Dividend; + Value *Divisor; + + DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor) + : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {} + }; + + struct DivPhiNodes { + PHINode *Quotient; + PHINode *Remainder; + + DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder) + : Quotient(InQuotient), Remainder(InRemainder) {} + }; +} + +namespace llvm { + template<> + struct DenseMapInfo<DivOpInfo> { + static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) { + return Val1.SignedOp == Val2.SignedOp && + Val1.Dividend == Val2.Dividend && + Val1.Divisor == Val2.Divisor; + } + + static DivOpInfo getEmptyKey() { + return DivOpInfo(false, nullptr, nullptr); + } + + static DivOpInfo getTombstoneKey() { + return DivOpInfo(true, nullptr, nullptr); + } + + static unsigned getHashValue(const DivOpInfo &Val) { + return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^ + reinterpret_cast<uintptr_t>(Val.Divisor)) ^ + (unsigned)Val.SignedOp; + } + }; + + typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy; +} + +// insertFastDiv - Substitutes the div/rem instruction with code that checks the +// value of the operands and uses a shorter-faster div/rem instruction when +// possible and the longer-slower div/rem instruction otherwise. +static bool insertFastDiv(Instruction *I, IntegerType *BypassType, + bool UseDivOp, bool UseSignedOp, + DivCacheTy &PerBBDivCache) { + Function *F = I->getParent()->getParent(); + // Get instruction operands + Value *Dividend = I->getOperand(0); + Value *Divisor = I->getOperand(1); + + if (isa<ConstantInt>(Divisor)) { + // Division by a constant should have been been solved and replaced earlier + // in the pipeline. + return false; + } + + // If the numerator is a constant, bail if it doesn't fit into BypassType. + if (ConstantInt *ConstDividend = dyn_cast<ConstantInt>(Dividend)) + if (ConstDividend->getValue().getActiveBits() > BypassType->getBitWidth()) + return false; + + // Basic Block is split before divide + BasicBlock *MainBB = &*I->getParent(); + BasicBlock *SuccessorBB = MainBB->splitBasicBlock(I); + + // Add new basic block for slow divide operation + BasicBlock *SlowBB = + BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB); + SlowBB->moveBefore(SuccessorBB); + IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin()); + Value *SlowQuotientV; + Value *SlowRemainderV; + if (UseSignedOp) { + SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor); + SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor); + } else { + SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor); + SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor); + } + SlowBuilder.CreateBr(SuccessorBB); + + // Add new basic block for fast divide operation + BasicBlock *FastBB = + BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB); + FastBB->moveBefore(SlowBB); + IRBuilder<> FastBuilder(FastBB, FastBB->begin()); + Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor, + BypassType); + Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend, + BypassType); + + // udiv/urem because optimization only handles positive numbers + Value *ShortQuotientV = FastBuilder.CreateUDiv(ShortDividendV, ShortDivisorV); + Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV, + ShortDivisorV); + Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt, + ShortQuotientV, + Dividend->getType()); + Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt, + ShortRemainderV, + Dividend->getType()); + FastBuilder.CreateBr(SuccessorBB); + + // Phi nodes for result of div and rem + IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin()); + PHINode *QuoPhi = SuccessorBuilder.CreatePHI(I->getType(), 2); + QuoPhi->addIncoming(SlowQuotientV, SlowBB); + QuoPhi->addIncoming(FastQuotientV, FastBB); + PHINode *RemPhi = SuccessorBuilder.CreatePHI(I->getType(), 2); + RemPhi->addIncoming(SlowRemainderV, SlowBB); + RemPhi->addIncoming(FastRemainderV, FastBB); + + // Replace I with appropriate phi node + if (UseDivOp) + I->replaceAllUsesWith(QuoPhi); + else + I->replaceAllUsesWith(RemPhi); + I->eraseFromParent(); + + // Combine operands into a single value with OR for value testing below + MainBB->getInstList().back().eraseFromParent(); + IRBuilder<> MainBuilder(MainBB, MainBB->end()); + + // We should have bailed out above if the divisor is a constant, but the + // dividend may still be a constant. Set OrV to our non-constant operands + // OR'ed together. + assert(!isa<ConstantInt>(Divisor)); + + Value *OrV; + if (!isa<ConstantInt>(Dividend)) + OrV = MainBuilder.CreateOr(Dividend, Divisor); + else + OrV = Divisor; + + // BitMask is inverted to check if the operands are + // larger than the bypass type + uint64_t BitMask = ~BypassType->getBitMask(); + Value *AndV = MainBuilder.CreateAnd(OrV, BitMask); + + // Compare operand values and branch + Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0); + Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV); + MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB); + + // Cache phi nodes to be used later in place of other instances + // of div or rem with the same sign, dividend, and divisor + DivOpInfo Key(UseSignedOp, Dividend, Divisor); + DivPhiNodes Value(QuoPhi, RemPhi); + PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value)); + return true; +} + +// reuseOrInsertFastDiv - Reuses previously computed dividend or remainder from +// the current BB if operands and operation are identical. Otherwise calls +// insertFastDiv to perform the optimization and caches the resulting dividend +// and remainder. +static bool reuseOrInsertFastDiv(Instruction *I, IntegerType *BypassType, + bool UseDivOp, bool UseSignedOp, + DivCacheTy &PerBBDivCache) { + // Get instruction operands + DivOpInfo Key(UseSignedOp, I->getOperand(0), I->getOperand(1)); + DivCacheTy::iterator CacheI = PerBBDivCache.find(Key); + + if (CacheI == PerBBDivCache.end()) { + // If previous instance does not exist, insert fast div + return insertFastDiv(I, BypassType, UseDivOp, UseSignedOp, PerBBDivCache); + } + + // Replace operation value with previously generated phi node + DivPhiNodes &Value = CacheI->second; + if (UseDivOp) { + // Replace all uses of div instruction with quotient phi node + I->replaceAllUsesWith(Value.Quotient); + } else { + // Replace all uses of rem instruction with remainder phi node + I->replaceAllUsesWith(Value.Remainder); + } + + // Remove redundant operation + I->eraseFromParent(); + return true; +} + +// bypassSlowDivision - This optimization identifies DIV instructions in a BB +// that can be profitably bypassed and carried out with a shorter, faster +// divide. +bool llvm::bypassSlowDivision( + BasicBlock *BB, const DenseMap<unsigned int, unsigned int> &BypassWidths) { + DivCacheTy DivCache; + + bool MadeChange = false; + Instruction* Next = &*BB->begin(); + while (Next != nullptr) { + // We may add instructions immediately after I, but we want to skip over + // them. + Instruction* I = Next; + Next = Next->getNextNode(); + + // Get instruction details + unsigned Opcode = I->getOpcode(); + bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv; + bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem; + bool UseSignedOp = Opcode == Instruction::SDiv || + Opcode == Instruction::SRem; + + // Only optimize div or rem ops + if (!UseDivOp && !UseRemOp) + continue; + + // Skip division on vector types, only optimize integer instructions + if (!I->getType()->isIntegerTy()) + continue; + + // Get bitwidth of div/rem instruction + IntegerType *T = cast<IntegerType>(I->getType()); + unsigned int bitwidth = T->getBitWidth(); + + // Continue if bitwidth is not bypassed + DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth); + if (BI == BypassWidths.end()) + continue; + + // Get type for div/rem instruction with bypass bitwidth + IntegerType *BT = IntegerType::get(I->getContext(), BI->second); + + MadeChange |= reuseOrInsertFastDiv(I, BT, UseDivOp, UseSignedOp, DivCache); + } + + // Above we eagerly create divs and rems, as pairs, so that we can efficiently + // create divrem machine instructions. Now erase any unused divs / rems so we + // don't leave extra instructions sitting around. + for (auto &KV : DivCache) + for (Instruction *Phi : {KV.second.Quotient, KV.second.Remainder}) + RecursivelyDeleteTriviallyDeadInstructions(Phi); + + return MadeChange; +} diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp new file mode 100644 index 000000000000..4d33e22fecfb --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -0,0 +1,749 @@ +//===- CloneFunction.cpp - Clone a function into another function ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the CloneFunctionInto interface, which is used as the +// low-level function cloner. This is used by the CloneFunction and function +// inliner to do the dirty work of copying the body of a function around. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/ValueMapper.h" +#include <map> +using namespace llvm; + +/// See comments in Cloning.h. +BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, + ValueToValueMapTy &VMap, + const Twine &NameSuffix, Function *F, + ClonedCodeInfo *CodeInfo) { + BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F); + if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix); + + bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false; + + // Loop over all instructions, and copy them over. + for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); + II != IE; ++II) { + Instruction *NewInst = II->clone(); + if (II->hasName()) + NewInst->setName(II->getName()+NameSuffix); + NewBB->getInstList().push_back(NewInst); + VMap[&*II] = NewInst; // Add instruction map to value. + + hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II)); + if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) { + if (isa<ConstantInt>(AI->getArraySize())) + hasStaticAllocas = true; + else + hasDynamicAllocas = true; + } + } + + if (CodeInfo) { + CodeInfo->ContainsCalls |= hasCalls; + CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas; + CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas && + BB != &BB->getParent()->getEntryBlock(); + } + return NewBB; +} + +// Clone OldFunc into NewFunc, transforming the old arguments into references to +// VMap values. +// +void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, + ValueToValueMapTy &VMap, + bool ModuleLevelChanges, + SmallVectorImpl<ReturnInst*> &Returns, + const char *NameSuffix, ClonedCodeInfo *CodeInfo, + ValueMapTypeRemapper *TypeMapper, + ValueMaterializer *Materializer) { + assert(NameSuffix && "NameSuffix cannot be null!"); + +#ifndef NDEBUG + for (const Argument &I : OldFunc->args()) + assert(VMap.count(&I) && "No mapping from source argument specified!"); +#endif + + // Copy all attributes other than those stored in the AttributeSet. We need + // to remap the parameter indices of the AttributeSet. + AttributeSet NewAttrs = NewFunc->getAttributes(); + NewFunc->copyAttributesFrom(OldFunc); + NewFunc->setAttributes(NewAttrs); + + // Fix up the personality function that got copied over. + if (OldFunc->hasPersonalityFn()) + NewFunc->setPersonalityFn( + MapValue(OldFunc->getPersonalityFn(), VMap, + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, + TypeMapper, Materializer)); + + AttributeSet OldAttrs = OldFunc->getAttributes(); + // Clone any argument attributes that are present in the VMap. + for (const Argument &OldArg : OldFunc->args()) + if (Argument *NewArg = dyn_cast<Argument>(VMap[&OldArg])) { + AttributeSet attrs = + OldAttrs.getParamAttributes(OldArg.getArgNo() + 1); + if (attrs.getNumSlots() > 0) + NewArg->addAttr(attrs); + } + + NewFunc->setAttributes( + NewFunc->getAttributes() + .addAttributes(NewFunc->getContext(), AttributeSet::ReturnIndex, + OldAttrs.getRetAttributes()) + .addAttributes(NewFunc->getContext(), AttributeSet::FunctionIndex, + OldAttrs.getFnAttributes())); + + SmallVector<std::pair<unsigned, MDNode *>, 1> MDs; + OldFunc->getAllMetadata(MDs); + for (auto MD : MDs) + NewFunc->addMetadata( + MD.first, + *MapMetadata(MD.second, VMap, + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, + TypeMapper, Materializer)); + + // Loop over all of the basic blocks in the function, cloning them as + // appropriate. Note that we save BE this way in order to handle cloning of + // recursive functions into themselves. + // + for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end(); + BI != BE; ++BI) { + const BasicBlock &BB = *BI; + + // Create a new basic block and copy instructions into it! + BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo); + + // Add basic block mapping. + VMap[&BB] = CBB; + + // It is only legal to clone a function if a block address within that + // function is never referenced outside of the function. Given that, we + // want to map block addresses from the old function to block addresses in + // the clone. (This is different from the generic ValueMapper + // implementation, which generates an invalid blockaddress when + // cloning a function.) + if (BB.hasAddressTaken()) { + Constant *OldBBAddr = BlockAddress::get(const_cast<Function*>(OldFunc), + const_cast<BasicBlock*>(&BB)); + VMap[OldBBAddr] = BlockAddress::get(NewFunc, CBB); + } + + // Note return instructions for the caller. + if (ReturnInst *RI = dyn_cast<ReturnInst>(CBB->getTerminator())) + Returns.push_back(RI); + } + + // Loop over all of the instructions in the function, fixing up operand + // references as we go. This uses VMap to do all the hard work. + for (Function::iterator BB = + cast<BasicBlock>(VMap[&OldFunc->front()])->getIterator(), + BE = NewFunc->end(); + BB != BE; ++BB) + // Loop over all instructions, fixing each one as we find it... + for (Instruction &II : *BB) + RemapInstruction(&II, VMap, + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, + TypeMapper, Materializer); +} + +/// Return a copy of the specified function and add it to that function's +/// module. Also, any references specified in the VMap are changed to refer to +/// their mapped value instead of the original one. If any of the arguments to +/// the function are in the VMap, the arguments are deleted from the resultant +/// function. The VMap is updated to include mappings from all of the +/// instructions and basicblocks in the function from their old to new values. +/// +Function *llvm::CloneFunction(Function *F, ValueToValueMapTy &VMap, + ClonedCodeInfo *CodeInfo) { + std::vector<Type*> ArgTypes; + + // The user might be deleting arguments to the function by specifying them in + // the VMap. If so, we need to not add the arguments to the arg ty vector + // + for (const Argument &I : F->args()) + if (VMap.count(&I) == 0) // Haven't mapped the argument to anything yet? + ArgTypes.push_back(I.getType()); + + // Create a new function type... + FunctionType *FTy = FunctionType::get(F->getFunctionType()->getReturnType(), + ArgTypes, F->getFunctionType()->isVarArg()); + + // Create the new function... + Function *NewF = + Function::Create(FTy, F->getLinkage(), F->getName(), F->getParent()); + + // Loop over the arguments, copying the names of the mapped arguments over... + Function::arg_iterator DestI = NewF->arg_begin(); + for (const Argument & I : F->args()) + if (VMap.count(&I) == 0) { // Is this argument preserved? + DestI->setName(I.getName()); // Copy the name over... + VMap[&I] = &*DestI++; // Add mapping to VMap + } + + SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned. + CloneFunctionInto(NewF, F, VMap, /*ModuleLevelChanges=*/false, Returns, "", + CodeInfo); + + return NewF; +} + + + +namespace { + /// This is a private class used to implement CloneAndPruneFunctionInto. + struct PruningFunctionCloner { + Function *NewFunc; + const Function *OldFunc; + ValueToValueMapTy &VMap; + bool ModuleLevelChanges; + const char *NameSuffix; + ClonedCodeInfo *CodeInfo; + + public: + PruningFunctionCloner(Function *newFunc, const Function *oldFunc, + ValueToValueMapTy &valueMap, bool moduleLevelChanges, + const char *nameSuffix, ClonedCodeInfo *codeInfo) + : NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap), + ModuleLevelChanges(moduleLevelChanges), NameSuffix(nameSuffix), + CodeInfo(codeInfo) {} + + /// The specified block is found to be reachable, clone it and + /// anything that it can reach. + void CloneBlock(const BasicBlock *BB, + BasicBlock::const_iterator StartingInst, + std::vector<const BasicBlock*> &ToClone); + }; +} + +/// The specified block is found to be reachable, clone it and +/// anything that it can reach. +void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, + BasicBlock::const_iterator StartingInst, + std::vector<const BasicBlock*> &ToClone){ + WeakVH &BBEntry = VMap[BB]; + + // Have we already cloned this block? + if (BBEntry) return; + + // Nope, clone it now. + BasicBlock *NewBB; + BBEntry = NewBB = BasicBlock::Create(BB->getContext()); + if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix); + + // It is only legal to clone a function if a block address within that + // function is never referenced outside of the function. Given that, we + // want to map block addresses from the old function to block addresses in + // the clone. (This is different from the generic ValueMapper + // implementation, which generates an invalid blockaddress when + // cloning a function.) + // + // Note that we don't need to fix the mapping for unreachable blocks; + // the default mapping there is safe. + if (BB->hasAddressTaken()) { + Constant *OldBBAddr = BlockAddress::get(const_cast<Function*>(OldFunc), + const_cast<BasicBlock*>(BB)); + VMap[OldBBAddr] = BlockAddress::get(NewFunc, NewBB); + } + + bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false; + + // Loop over all instructions, and copy them over, DCE'ing as we go. This + // loop doesn't include the terminator. + for (BasicBlock::const_iterator II = StartingInst, IE = --BB->end(); + II != IE; ++II) { + + Instruction *NewInst = II->clone(); + + // Eagerly remap operands to the newly cloned instruction, except for PHI + // nodes for which we defer processing until we update the CFG. + if (!isa<PHINode>(NewInst)) { + RemapInstruction(NewInst, VMap, + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); + + // If we can simplify this instruction to some other value, simply add + // a mapping to that value rather than inserting a new instruction into + // the basic block. + if (Value *V = + SimplifyInstruction(NewInst, BB->getModule()->getDataLayout())) { + // On the off-chance that this simplifies to an instruction in the old + // function, map it back into the new function. + if (Value *MappedV = VMap.lookup(V)) + V = MappedV; + + if (!NewInst->mayHaveSideEffects()) { + VMap[&*II] = V; + delete NewInst; + continue; + } + } + } + + if (II->hasName()) + NewInst->setName(II->getName()+NameSuffix); + VMap[&*II] = NewInst; // Add instruction map to value. + NewBB->getInstList().push_back(NewInst); + hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II)); + + if (CodeInfo) + if (auto CS = ImmutableCallSite(&*II)) + if (CS.hasOperandBundles()) + CodeInfo->OperandBundleCallSites.push_back(NewInst); + + if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) { + if (isa<ConstantInt>(AI->getArraySize())) + hasStaticAllocas = true; + else + hasDynamicAllocas = true; + } + } + + // Finally, clone over the terminator. + const TerminatorInst *OldTI = BB->getTerminator(); + bool TerminatorDone = false; + if (const BranchInst *BI = dyn_cast<BranchInst>(OldTI)) { + if (BI->isConditional()) { + // If the condition was a known constant in the callee... + ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition()); + // Or is a known constant in the caller... + if (!Cond) { + Value *V = VMap.lookup(BI->getCondition()); + Cond = dyn_cast_or_null<ConstantInt>(V); + } + + // Constant fold to uncond branch! + if (Cond) { + BasicBlock *Dest = BI->getSuccessor(!Cond->getZExtValue()); + VMap[OldTI] = BranchInst::Create(Dest, NewBB); + ToClone.push_back(Dest); + TerminatorDone = true; + } + } + } else if (const SwitchInst *SI = dyn_cast<SwitchInst>(OldTI)) { + // If switching on a value known constant in the caller. + ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition()); + if (!Cond) { // Or known constant after constant prop in the callee... + Value *V = VMap.lookup(SI->getCondition()); + Cond = dyn_cast_or_null<ConstantInt>(V); + } + if (Cond) { // Constant fold to uncond branch! + SwitchInst::ConstCaseIt Case = SI->findCaseValue(Cond); + BasicBlock *Dest = const_cast<BasicBlock*>(Case.getCaseSuccessor()); + VMap[OldTI] = BranchInst::Create(Dest, NewBB); + ToClone.push_back(Dest); + TerminatorDone = true; + } + } + + if (!TerminatorDone) { + Instruction *NewInst = OldTI->clone(); + if (OldTI->hasName()) + NewInst->setName(OldTI->getName()+NameSuffix); + NewBB->getInstList().push_back(NewInst); + VMap[OldTI] = NewInst; // Add instruction map to value. + + if (CodeInfo) + if (auto CS = ImmutableCallSite(OldTI)) + if (CS.hasOperandBundles()) + CodeInfo->OperandBundleCallSites.push_back(NewInst); + + // Recursively clone any reachable successor blocks. + const TerminatorInst *TI = BB->getTerminator(); + for (const BasicBlock *Succ : TI->successors()) + ToClone.push_back(Succ); + } + + if (CodeInfo) { + CodeInfo->ContainsCalls |= hasCalls; + CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas; + CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas && + BB != &BB->getParent()->front(); + } +} + +/// This works like CloneAndPruneFunctionInto, except that it does not clone the +/// entire function. Instead it starts at an instruction provided by the caller +/// and copies (and prunes) only the code reachable from that instruction. +void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, + const Instruction *StartingInst, + ValueToValueMapTy &VMap, + bool ModuleLevelChanges, + SmallVectorImpl<ReturnInst *> &Returns, + const char *NameSuffix, + ClonedCodeInfo *CodeInfo) { + assert(NameSuffix && "NameSuffix cannot be null!"); + + ValueMapTypeRemapper *TypeMapper = nullptr; + ValueMaterializer *Materializer = nullptr; + +#ifndef NDEBUG + // If the cloning starts at the beginning of the function, verify that + // the function arguments are mapped. + if (!StartingInst) + for (const Argument &II : OldFunc->args()) + assert(VMap.count(&II) && "No mapping from source argument specified!"); +#endif + + PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges, + NameSuffix, CodeInfo); + const BasicBlock *StartingBB; + if (StartingInst) + StartingBB = StartingInst->getParent(); + else { + StartingBB = &OldFunc->getEntryBlock(); + StartingInst = &StartingBB->front(); + } + + // Clone the entry block, and anything recursively reachable from it. + std::vector<const BasicBlock*> CloneWorklist; + PFC.CloneBlock(StartingBB, StartingInst->getIterator(), CloneWorklist); + while (!CloneWorklist.empty()) { + const BasicBlock *BB = CloneWorklist.back(); + CloneWorklist.pop_back(); + PFC.CloneBlock(BB, BB->begin(), CloneWorklist); + } + + // Loop over all of the basic blocks in the old function. If the block was + // reachable, we have cloned it and the old block is now in the value map: + // insert it into the new function in the right order. If not, ignore it. + // + // Defer PHI resolution until rest of function is resolved. + SmallVector<const PHINode*, 16> PHIToResolve; + for (const BasicBlock &BI : *OldFunc) { + Value *V = VMap.lookup(&BI); + BasicBlock *NewBB = cast_or_null<BasicBlock>(V); + if (!NewBB) continue; // Dead block. + + // Add the new block to the new function. + NewFunc->getBasicBlockList().push_back(NewBB); + + // Handle PHI nodes specially, as we have to remove references to dead + // blocks. + for (BasicBlock::const_iterator I = BI.begin(), E = BI.end(); I != E; ++I) { + // PHI nodes may have been remapped to non-PHI nodes by the caller or + // during the cloning process. + if (const PHINode *PN = dyn_cast<PHINode>(I)) { + if (isa<PHINode>(VMap[PN])) + PHIToResolve.push_back(PN); + else + break; + } else { + break; + } + } + + // Finally, remap the terminator instructions, as those can't be remapped + // until all BBs are mapped. + RemapInstruction(NewBB->getTerminator(), VMap, + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, + TypeMapper, Materializer); + } + + // Defer PHI resolution until rest of function is resolved, PHI resolution + // requires the CFG to be up-to-date. + for (unsigned phino = 0, e = PHIToResolve.size(); phino != e; ) { + const PHINode *OPN = PHIToResolve[phino]; + unsigned NumPreds = OPN->getNumIncomingValues(); + const BasicBlock *OldBB = OPN->getParent(); + BasicBlock *NewBB = cast<BasicBlock>(VMap[OldBB]); + + // Map operands for blocks that are live and remove operands for blocks + // that are dead. + for (; phino != PHIToResolve.size() && + PHIToResolve[phino]->getParent() == OldBB; ++phino) { + OPN = PHIToResolve[phino]; + PHINode *PN = cast<PHINode>(VMap[OPN]); + for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) { + Value *V = VMap.lookup(PN->getIncomingBlock(pred)); + if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(V)) { + Value *InVal = MapValue(PN->getIncomingValue(pred), + VMap, + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); + assert(InVal && "Unknown input value?"); + PN->setIncomingValue(pred, InVal); + PN->setIncomingBlock(pred, MappedBlock); + } else { + PN->removeIncomingValue(pred, false); + --pred; // Revisit the next entry. + --e; + } + } + } + + // The loop above has removed PHI entries for those blocks that are dead + // and has updated others. However, if a block is live (i.e. copied over) + // but its terminator has been changed to not go to this block, then our + // phi nodes will have invalid entries. Update the PHI nodes in this + // case. + PHINode *PN = cast<PHINode>(NewBB->begin()); + NumPreds = std::distance(pred_begin(NewBB), pred_end(NewBB)); + if (NumPreds != PN->getNumIncomingValues()) { + assert(NumPreds < PN->getNumIncomingValues()); + // Count how many times each predecessor comes to this block. + std::map<BasicBlock*, unsigned> PredCount; + for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB); + PI != E; ++PI) + --PredCount[*PI]; + + // Figure out how many entries to remove from each PHI. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + ++PredCount[PN->getIncomingBlock(i)]; + + // At this point, the excess predecessor entries are positive in the + // map. Loop over all of the PHIs and remove excess predecessor + // entries. + BasicBlock::iterator I = NewBB->begin(); + for (; (PN = dyn_cast<PHINode>(I)); ++I) { + for (const auto &PCI : PredCount) { + BasicBlock *Pred = PCI.first; + for (unsigned NumToRemove = PCI.second; NumToRemove; --NumToRemove) + PN->removeIncomingValue(Pred, false); + } + } + } + + // If the loops above have made these phi nodes have 0 or 1 operand, + // replace them with undef or the input value. We must do this for + // correctness, because 0-operand phis are not valid. + PN = cast<PHINode>(NewBB->begin()); + if (PN->getNumIncomingValues() == 0) { + BasicBlock::iterator I = NewBB->begin(); + BasicBlock::const_iterator OldI = OldBB->begin(); + while ((PN = dyn_cast<PHINode>(I++))) { + Value *NV = UndefValue::get(PN->getType()); + PN->replaceAllUsesWith(NV); + assert(VMap[&*OldI] == PN && "VMap mismatch"); + VMap[&*OldI] = NV; + PN->eraseFromParent(); + ++OldI; + } + } + } + + // Make a second pass over the PHINodes now that all of them have been + // remapped into the new function, simplifying the PHINode and performing any + // recursive simplifications exposed. This will transparently update the + // WeakVH in the VMap. Notably, we rely on that so that if we coalesce + // two PHINodes, the iteration over the old PHIs remains valid, and the + // mapping will just map us to the new node (which may not even be a PHI + // node). + const DataLayout &DL = NewFunc->getParent()->getDataLayout(); + SmallSetVector<const Value *, 8> Worklist; + for (unsigned Idx = 0, Size = PHIToResolve.size(); Idx != Size; ++Idx) + if (isa<PHINode>(VMap[PHIToResolve[Idx]])) + Worklist.insert(PHIToResolve[Idx]); + + // Note that we must test the size on each iteration, the worklist can grow. + for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) { + const Value *OrigV = Worklist[Idx]; + auto *I = dyn_cast_or_null<Instruction>(VMap.lookup(OrigV)); + if (!I) + continue; + + // Skip over non-intrinsic callsites, we don't want to remove any nodes from + // the CGSCC. + CallSite CS = CallSite(I); + if (CS && CS.getCalledFunction() && !CS.getCalledFunction()->isIntrinsic()) + continue; + + // See if this instruction simplifies. + Value *SimpleV = SimplifyInstruction(I, DL); + if (!SimpleV) + continue; + + // Stash away all the uses of the old instruction so we can check them for + // recursive simplifications after a RAUW. This is cheaper than checking all + // uses of To on the recursive step in most cases. + for (const User *U : OrigV->users()) + Worklist.insert(cast<Instruction>(U)); + + // Replace the instruction with its simplified value. + I->replaceAllUsesWith(SimpleV); + + // If the original instruction had no side effects, remove it. + if (isInstructionTriviallyDead(I)) + I->eraseFromParent(); + else + VMap[OrigV] = I; + } + + // Now that the inlined function body has been fully constructed, go through + // and zap unconditional fall-through branches. This happens all the time when + // specializing code: code specialization turns conditional branches into + // uncond branches, and this code folds them. + Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB])->getIterator(); + Function::iterator I = Begin; + while (I != NewFunc->end()) { + // Check if this block has become dead during inlining or other + // simplifications. Note that the first block will appear dead, as it has + // not yet been wired up properly. + if (I != Begin && (pred_begin(&*I) == pred_end(&*I) || + I->getSinglePredecessor() == &*I)) { + BasicBlock *DeadBB = &*I++; + DeleteDeadBlock(DeadBB); + continue; + } + + // We need to simplify conditional branches and switches with a constant + // operand. We try to prune these out when cloning, but if the + // simplification required looking through PHI nodes, those are only + // available after forming the full basic block. That may leave some here, + // and we still want to prune the dead code as early as possible. + ConstantFoldTerminator(&*I); + + BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator()); + if (!BI || BI->isConditional()) { ++I; continue; } + + BasicBlock *Dest = BI->getSuccessor(0); + if (!Dest->getSinglePredecessor()) { + ++I; continue; + } + + // We shouldn't be able to get single-entry PHI nodes here, as instsimplify + // above should have zapped all of them.. + assert(!isa<PHINode>(Dest->begin())); + + // We know all single-entry PHI nodes in the inlined function have been + // removed, so we just need to splice the blocks. + BI->eraseFromParent(); + + // Make all PHI nodes that referred to Dest now refer to I as their source. + Dest->replaceAllUsesWith(&*I); + + // Move all the instructions in the succ to the pred. + I->getInstList().splice(I->end(), Dest->getInstList()); + + // Remove the dest block. + Dest->eraseFromParent(); + + // Do not increment I, iteratively merge all things this block branches to. + } + + // Make a final pass over the basic blocks from the old function to gather + // any return instructions which survived folding. We have to do this here + // because we can iteratively remove and merge returns above. + for (Function::iterator I = cast<BasicBlock>(VMap[StartingBB])->getIterator(), + E = NewFunc->end(); + I != E; ++I) + if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator())) + Returns.push_back(RI); +} + + +/// This works exactly like CloneFunctionInto, +/// except that it does some simple constant prop and DCE on the fly. The +/// effect of this is to copy significantly less code in cases where (for +/// example) a function call with constant arguments is inlined, and those +/// constant arguments cause a significant amount of code in the callee to be +/// dead. Since this doesn't produce an exact copy of the input, it can't be +/// used for things like CloneFunction or CloneModule. +void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, + ValueToValueMapTy &VMap, + bool ModuleLevelChanges, + SmallVectorImpl<ReturnInst*> &Returns, + const char *NameSuffix, + ClonedCodeInfo *CodeInfo, + Instruction *TheCall) { + CloneAndPruneIntoFromInst(NewFunc, OldFunc, &OldFunc->front().front(), VMap, + ModuleLevelChanges, Returns, NameSuffix, CodeInfo); +} + +/// \brief Remaps instructions in \p Blocks using the mapping in \p VMap. +void llvm::remapInstructionsInBlocks( + const SmallVectorImpl<BasicBlock *> &Blocks, ValueToValueMapTy &VMap) { + // Rewrite the code to refer to itself. + for (auto *BB : Blocks) + for (auto &Inst : *BB) + RemapInstruction(&Inst, VMap, + RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); +} + +/// \brief Clones a loop \p OrigLoop. Returns the loop and the blocks in \p +/// Blocks. +/// +/// Updates LoopInfo and DominatorTree assuming the loop is dominated by block +/// \p LoopDomBB. Insert the new blocks before block specified in \p Before. +Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB, + Loop *OrigLoop, ValueToValueMapTy &VMap, + const Twine &NameSuffix, LoopInfo *LI, + DominatorTree *DT, + SmallVectorImpl<BasicBlock *> &Blocks) { + assert(OrigLoop->getSubLoops().empty() && + "Loop to be cloned cannot have inner loop"); + Function *F = OrigLoop->getHeader()->getParent(); + Loop *ParentLoop = OrigLoop->getParentLoop(); + + Loop *NewLoop = new Loop(); + if (ParentLoop) + ParentLoop->addChildLoop(NewLoop); + else + LI->addTopLevelLoop(NewLoop); + + BasicBlock *OrigPH = OrigLoop->getLoopPreheader(); + assert(OrigPH && "No preheader"); + BasicBlock *NewPH = CloneBasicBlock(OrigPH, VMap, NameSuffix, F); + // To rename the loop PHIs. + VMap[OrigPH] = NewPH; + Blocks.push_back(NewPH); + + // Update LoopInfo. + if (ParentLoop) + ParentLoop->addBasicBlockToLoop(NewPH, *LI); + + // Update DominatorTree. + DT->addNewBlock(NewPH, LoopDomBB); + + for (BasicBlock *BB : OrigLoop->getBlocks()) { + BasicBlock *NewBB = CloneBasicBlock(BB, VMap, NameSuffix, F); + VMap[BB] = NewBB; + + // Update LoopInfo. + NewLoop->addBasicBlockToLoop(NewBB, *LI); + + // Add DominatorTree node. After seeing all blocks, update to correct IDom. + DT->addNewBlock(NewBB, NewPH); + + Blocks.push_back(NewBB); + } + + for (BasicBlock *BB : OrigLoop->getBlocks()) { + // Update DominatorTree. + BasicBlock *IDomBB = DT->getNode(BB)->getIDom()->getBlock(); + DT->changeImmediateDominator(cast<BasicBlock>(VMap[BB]), + cast<BasicBlock>(VMap[IDomBB])); + } + + // Move them physically from the end of the block list. + F->getBasicBlockList().splice(Before->getIterator(), F->getBasicBlockList(), + NewPH); + F->getBasicBlockList().splice(Before->getIterator(), F->getBasicBlockList(), + NewLoop->getHeader()->getIterator(), F->end()); + + return NewLoop; +} diff --git a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp new file mode 100644 index 000000000000..7ebeb615d248 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp @@ -0,0 +1,187 @@ +//===- CloneModule.cpp - Clone an entire module ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the CloneModule interface which makes a copy of an +// entire module. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Module.h" +#include "llvm/Transforms/Utils/ValueMapper.h" +#include "llvm-c/Core.h" +using namespace llvm; + +/// This is not as easy as it might seem because we have to worry about making +/// copies of global variables and functions, and making their (initializers and +/// references, respectively) refer to the right globals. +/// +std::unique_ptr<Module> llvm::CloneModule(const Module *M) { + // Create the value map that maps things from the old module over to the new + // module. + ValueToValueMapTy VMap; + return CloneModule(M, VMap); +} + +std::unique_ptr<Module> llvm::CloneModule(const Module *M, + ValueToValueMapTy &VMap) { + return CloneModule(M, VMap, [](const GlobalValue *GV) { return true; }); +} + +std::unique_ptr<Module> llvm::CloneModule( + const Module *M, ValueToValueMapTy &VMap, + function_ref<bool(const GlobalValue *)> ShouldCloneDefinition) { + // First off, we need to create the new module. + std::unique_ptr<Module> New = + llvm::make_unique<Module>(M->getModuleIdentifier(), M->getContext()); + New->setDataLayout(M->getDataLayout()); + New->setTargetTriple(M->getTargetTriple()); + New->setModuleInlineAsm(M->getModuleInlineAsm()); + + // Loop over all of the global variables, making corresponding globals in the + // new module. Here we add them to the VMap and to the new Module. We + // don't worry about attributes or initializers, they will come later. + // + for (Module::const_global_iterator I = M->global_begin(), E = M->global_end(); + I != E; ++I) { + GlobalVariable *GV = new GlobalVariable(*New, + I->getValueType(), + I->isConstant(), I->getLinkage(), + (Constant*) nullptr, I->getName(), + (GlobalVariable*) nullptr, + I->getThreadLocalMode(), + I->getType()->getAddressSpace()); + GV->copyAttributesFrom(&*I); + VMap[&*I] = GV; + } + + // Loop over the functions in the module, making external functions as before + for (const Function &I : *M) { + Function *NF = Function::Create(cast<FunctionType>(I.getValueType()), + I.getLinkage(), I.getName(), New.get()); + NF->copyAttributesFrom(&I); + VMap[&I] = NF; + } + + // Loop over the aliases in the module + for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); + I != E; ++I) { + if (!ShouldCloneDefinition(&*I)) { + // An alias cannot act as an external reference, so we need to create + // either a function or a global variable depending on the value type. + // FIXME: Once pointee types are gone we can probably pick one or the + // other. + GlobalValue *GV; + if (I->getValueType()->isFunctionTy()) + GV = Function::Create(cast<FunctionType>(I->getValueType()), + GlobalValue::ExternalLinkage, I->getName(), + New.get()); + else + GV = new GlobalVariable( + *New, I->getValueType(), false, GlobalValue::ExternalLinkage, + (Constant *)nullptr, I->getName(), (GlobalVariable *)nullptr, + I->getThreadLocalMode(), I->getType()->getAddressSpace()); + VMap[&*I] = GV; + // We do not copy attributes (mainly because copying between different + // kinds of globals is forbidden), but this is generally not required for + // correctness. + continue; + } + auto *GA = GlobalAlias::create(I->getValueType(), + I->getType()->getPointerAddressSpace(), + I->getLinkage(), I->getName(), New.get()); + GA->copyAttributesFrom(&*I); + VMap[&*I] = GA; + } + + // Now that all of the things that global variable initializer can refer to + // have been created, loop through and copy the global variable referrers + // over... We also set the attributes on the global now. + // + for (Module::const_global_iterator I = M->global_begin(), E = M->global_end(); + I != E; ++I) { + if (I->isDeclaration()) + continue; + + GlobalVariable *GV = cast<GlobalVariable>(VMap[&*I]); + if (!ShouldCloneDefinition(&*I)) { + // Skip after setting the correct linkage for an external reference. + GV->setLinkage(GlobalValue::ExternalLinkage); + continue; + } + if (I->hasInitializer()) + GV->setInitializer(MapValue(I->getInitializer(), VMap)); + + SmallVector<std::pair<unsigned, MDNode *>, 1> MDs; + I->getAllMetadata(MDs); + for (auto MD : MDs) + GV->addMetadata(MD.first, *MapMetadata(MD.second, VMap)); + } + + // Similarly, copy over function bodies now... + // + for (const Function &I : *M) { + if (I.isDeclaration()) + continue; + + Function *F = cast<Function>(VMap[&I]); + if (!ShouldCloneDefinition(&I)) { + // Skip after setting the correct linkage for an external reference. + F->setLinkage(GlobalValue::ExternalLinkage); + // Personality function is not valid on a declaration. + F->setPersonalityFn(nullptr); + continue; + } + + Function::arg_iterator DestI = F->arg_begin(); + for (Function::const_arg_iterator J = I.arg_begin(); J != I.arg_end(); + ++J) { + DestI->setName(J->getName()); + VMap[&*J] = &*DestI++; + } + + SmallVector<ReturnInst *, 8> Returns; // Ignore returns cloned. + CloneFunctionInto(F, &I, VMap, /*ModuleLevelChanges=*/true, Returns); + + if (I.hasPersonalityFn()) + F->setPersonalityFn(MapValue(I.getPersonalityFn(), VMap)); + } + + // And aliases + for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); + I != E; ++I) { + // We already dealt with undefined aliases above. + if (!ShouldCloneDefinition(&*I)) + continue; + GlobalAlias *GA = cast<GlobalAlias>(VMap[&*I]); + if (const Constant *C = I->getAliasee()) + GA->setAliasee(MapValue(C, VMap)); + } + + // And named metadata.... + for (Module::const_named_metadata_iterator I = M->named_metadata_begin(), + E = M->named_metadata_end(); I != E; ++I) { + const NamedMDNode &NMD = *I; + NamedMDNode *NewNMD = New->getOrInsertNamedMetadata(NMD.getName()); + for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i) + NewNMD->addOperand(MapMetadata(NMD.getOperand(i), VMap)); + } + + return New; +} + +extern "C" { + +LLVMModuleRef LLVMCloneModule(LLVMModuleRef M) { + return wrap(CloneModule(unwrap(M)).release()); +} + +} diff --git a/contrib/llvm/lib/Transforms/Utils/CmpInstAnalysis.cpp b/contrib/llvm/lib/Transforms/Utils/CmpInstAnalysis.cpp new file mode 100644 index 000000000000..60ae3745c835 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/CmpInstAnalysis.cpp @@ -0,0 +1,108 @@ +//===- CmpInstAnalysis.cpp - Utils to help fold compares ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file holds routines to help analyse compare instructions +// and fold them into constants or other compare instructions +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/CmpInstAnalysis.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" + +using namespace llvm; + +unsigned llvm::getICmpCode(const ICmpInst *ICI, bool InvertPred) { + ICmpInst::Predicate Pred = InvertPred ? ICI->getInversePredicate() + : ICI->getPredicate(); + switch (Pred) { + // False -> 0 + case ICmpInst::ICMP_UGT: return 1; // 001 + case ICmpInst::ICMP_SGT: return 1; // 001 + case ICmpInst::ICMP_EQ: return 2; // 010 + case ICmpInst::ICMP_UGE: return 3; // 011 + case ICmpInst::ICMP_SGE: return 3; // 011 + case ICmpInst::ICMP_ULT: return 4; // 100 + case ICmpInst::ICMP_SLT: return 4; // 100 + case ICmpInst::ICMP_NE: return 5; // 101 + case ICmpInst::ICMP_ULE: return 6; // 110 + case ICmpInst::ICMP_SLE: return 6; // 110 + // True -> 7 + default: + llvm_unreachable("Invalid ICmp predicate!"); + } +} + +Value *llvm::getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS, + CmpInst::Predicate &NewICmpPred) { + switch (Code) { + default: llvm_unreachable("Illegal ICmp code!"); + case 0: // False. + return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); + case 1: NewICmpPred = Sign ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break; + case 2: NewICmpPred = ICmpInst::ICMP_EQ; break; + case 3: NewICmpPred = Sign ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break; + case 4: NewICmpPred = Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break; + case 5: NewICmpPred = ICmpInst::ICMP_NE; break; + case 6: NewICmpPred = Sign ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break; + case 7: // True. + return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1); + } + return nullptr; +} + +bool llvm::PredicatesFoldable(ICmpInst::Predicate p1, ICmpInst::Predicate p2) { + return (CmpInst::isSigned(p1) == CmpInst::isSigned(p2)) || + (CmpInst::isSigned(p1) && ICmpInst::isEquality(p2)) || + (CmpInst::isSigned(p2) && ICmpInst::isEquality(p1)); +} + +bool llvm::decomposeBitTestICmp(const ICmpInst *I, CmpInst::Predicate &Pred, + Value *&X, Value *&Y, Value *&Z) { + ConstantInt *C = dyn_cast<ConstantInt>(I->getOperand(1)); + if (!C) + return false; + + switch (I->getPredicate()) { + default: + return false; + case ICmpInst::ICMP_SLT: + // X < 0 is equivalent to (X & SignBit) != 0. + if (!C->isZero()) + return false; + Y = ConstantInt::get(I->getContext(), APInt::getSignBit(C->getBitWidth())); + Pred = ICmpInst::ICMP_NE; + break; + case ICmpInst::ICMP_SGT: + // X > -1 is equivalent to (X & SignBit) == 0. + if (!C->isAllOnesValue()) + return false; + Y = ConstantInt::get(I->getContext(), APInt::getSignBit(C->getBitWidth())); + Pred = ICmpInst::ICMP_EQ; + break; + case ICmpInst::ICMP_ULT: + // X <u 2^n is equivalent to (X & ~(2^n-1)) == 0. + if (!C->getValue().isPowerOf2()) + return false; + Y = ConstantInt::get(I->getContext(), -C->getValue()); + Pred = ICmpInst::ICMP_EQ; + break; + case ICmpInst::ICMP_UGT: + // X >u 2^n-1 is equivalent to (X & ~(2^n-1)) != 0. + if (!(C->getValue() + 1).isPowerOf2()) + return false; + Y = ConstantInt::get(I->getContext(), ~C->getValue()); + Pred = ICmpInst::ICMP_NE; + break; + } + + X = I->getOperand(0); + Z = ConstantInt::getNullValue(C->getType()); + return true; +} diff --git a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp new file mode 100644 index 000000000000..c514c9c9cd4a --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -0,0 +1,875 @@ +//===- CodeExtractor.cpp - Pull code region into a new function -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the interface to tear out a code region, such as an +// individual loop or a parallel section, into a new function, replacing it with +// a call to the new function. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/CodeExtractor.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BlockFrequencyInfoImpl.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/RegionInfo.h" +#include "llvm/Analysis/RegionIterator.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Pass.h" +#include "llvm/Support/BlockFrequency.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include <algorithm> +#include <set> +using namespace llvm; + +#define DEBUG_TYPE "code-extractor" + +// Provide a command-line option to aggregate function arguments into a struct +// for functions produced by the code extractor. This is useful when converting +// extracted functions to pthread-based code, as only one argument (void*) can +// be passed in to pthread_create(). +static cl::opt<bool> +AggregateArgsOpt("aggregate-extracted-args", cl::Hidden, + cl::desc("Aggregate arguments to code-extracted functions")); + +/// \brief Test whether a block is valid for extraction. +bool CodeExtractor::isBlockValidForExtraction(const BasicBlock &BB) { + // Landing pads must be in the function where they were inserted for cleanup. + if (BB.isEHPad()) + return false; + + // Don't hoist code containing allocas, invokes, or vastarts. + for (BasicBlock::const_iterator I = BB.begin(), E = BB.end(); I != E; ++I) { + if (isa<AllocaInst>(I) || isa<InvokeInst>(I)) + return false; + if (const CallInst *CI = dyn_cast<CallInst>(I)) + if (const Function *F = CI->getCalledFunction()) + if (F->getIntrinsicID() == Intrinsic::vastart) + return false; + } + + return true; +} + +/// \brief Build a set of blocks to extract if the input blocks are viable. +template <typename IteratorT> +static SetVector<BasicBlock *> buildExtractionBlockSet(IteratorT BBBegin, + IteratorT BBEnd) { + SetVector<BasicBlock *> Result; + + assert(BBBegin != BBEnd); + + // Loop over the blocks, adding them to our set-vector, and aborting with an + // empty set if we encounter invalid blocks. + do { + if (!Result.insert(*BBBegin)) + llvm_unreachable("Repeated basic blocks in extraction input"); + + if (!CodeExtractor::isBlockValidForExtraction(**BBBegin)) { + Result.clear(); + return Result; + } + } while (++BBBegin != BBEnd); + +#ifndef NDEBUG + for (SetVector<BasicBlock *>::iterator I = std::next(Result.begin()), + E = Result.end(); + I != E; ++I) + for (pred_iterator PI = pred_begin(*I), PE = pred_end(*I); + PI != PE; ++PI) + assert(Result.count(*PI) && + "No blocks in this region may have entries from outside the region" + " except for the first block!"); +#endif + + return Result; +} + +/// \brief Helper to call buildExtractionBlockSet with an ArrayRef. +static SetVector<BasicBlock *> +buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs) { + return buildExtractionBlockSet(BBs.begin(), BBs.end()); +} + +/// \brief Helper to call buildExtractionBlockSet with a RegionNode. +static SetVector<BasicBlock *> +buildExtractionBlockSet(const RegionNode &RN) { + if (!RN.isSubRegion()) + // Just a single BasicBlock. + return buildExtractionBlockSet(RN.getNodeAs<BasicBlock>()); + + const Region &R = *RN.getNodeAs<Region>(); + + return buildExtractionBlockSet(R.block_begin(), R.block_end()); +} + +CodeExtractor::CodeExtractor(BasicBlock *BB, bool AggregateArgs, + BlockFrequencyInfo *BFI, + BranchProbabilityInfo *BPI) + : DT(nullptr), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), + BPI(BPI), Blocks(buildExtractionBlockSet(BB)), NumExitBlocks(~0U) {} + +CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT, + bool AggregateArgs, BlockFrequencyInfo *BFI, + BranchProbabilityInfo *BPI) + : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), + BPI(BPI), Blocks(buildExtractionBlockSet(BBs)), NumExitBlocks(~0U) {} + +CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs, + BlockFrequencyInfo *BFI, + BranchProbabilityInfo *BPI) + : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), + BPI(BPI), Blocks(buildExtractionBlockSet(L.getBlocks())), + NumExitBlocks(~0U) {} + +CodeExtractor::CodeExtractor(DominatorTree &DT, const RegionNode &RN, + bool AggregateArgs, BlockFrequencyInfo *BFI, + BranchProbabilityInfo *BPI) + : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), + BPI(BPI), Blocks(buildExtractionBlockSet(RN)), NumExitBlocks(~0U) {} + +/// definedInRegion - Return true if the specified value is defined in the +/// extracted region. +static bool definedInRegion(const SetVector<BasicBlock *> &Blocks, Value *V) { + if (Instruction *I = dyn_cast<Instruction>(V)) + if (Blocks.count(I->getParent())) + return true; + return false; +} + +/// definedInCaller - Return true if the specified value is defined in the +/// function being code extracted, but not in the region being extracted. +/// These values must be passed in as live-ins to the function. +static bool definedInCaller(const SetVector<BasicBlock *> &Blocks, Value *V) { + if (isa<Argument>(V)) return true; + if (Instruction *I = dyn_cast<Instruction>(V)) + if (!Blocks.count(I->getParent())) + return true; + return false; +} + +void CodeExtractor::findInputsOutputs(ValueSet &Inputs, + ValueSet &Outputs) const { + for (BasicBlock *BB : Blocks) { + // If a used value is defined outside the region, it's an input. If an + // instruction is used outside the region, it's an output. + for (Instruction &II : *BB) { + for (User::op_iterator OI = II.op_begin(), OE = II.op_end(); OI != OE; + ++OI) + if (definedInCaller(Blocks, *OI)) + Inputs.insert(*OI); + + for (User *U : II.users()) + if (!definedInRegion(Blocks, U)) { + Outputs.insert(&II); + break; + } + } + } +} + +/// severSplitPHINodes - If a PHI node has multiple inputs from outside of the +/// region, we need to split the entry block of the region so that the PHI node +/// is easier to deal with. +void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { + unsigned NumPredsFromRegion = 0; + unsigned NumPredsOutsideRegion = 0; + + if (Header != &Header->getParent()->getEntryBlock()) { + PHINode *PN = dyn_cast<PHINode>(Header->begin()); + if (!PN) return; // No PHI nodes. + + // If the header node contains any PHI nodes, check to see if there is more + // than one entry from outside the region. If so, we need to sever the + // header block into two. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (Blocks.count(PN->getIncomingBlock(i))) + ++NumPredsFromRegion; + else + ++NumPredsOutsideRegion; + + // If there is one (or fewer) predecessor from outside the region, we don't + // need to do anything special. + if (NumPredsOutsideRegion <= 1) return; + } + + // Otherwise, we need to split the header block into two pieces: one + // containing PHI nodes merging values from outside of the region, and a + // second that contains all of the code for the block and merges back any + // incoming values from inside of the region. + BasicBlock::iterator AfterPHIs = Header->getFirstNonPHI()->getIterator(); + BasicBlock *NewBB = Header->splitBasicBlock(AfterPHIs, + Header->getName()+".ce"); + + // We only want to code extract the second block now, and it becomes the new + // header of the region. + BasicBlock *OldPred = Header; + Blocks.remove(OldPred); + Blocks.insert(NewBB); + Header = NewBB; + + // Okay, update dominator sets. The blocks that dominate the new one are the + // blocks that dominate TIBB plus the new block itself. + if (DT) + DT->splitBlock(NewBB); + + // Okay, now we need to adjust the PHI nodes and any branches from within the + // region to go to the new header block instead of the old header block. + if (NumPredsFromRegion) { + PHINode *PN = cast<PHINode>(OldPred->begin()); + // Loop over all of the predecessors of OldPred that are in the region, + // changing them to branch to NewBB instead. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (Blocks.count(PN->getIncomingBlock(i))) { + TerminatorInst *TI = PN->getIncomingBlock(i)->getTerminator(); + TI->replaceUsesOfWith(OldPred, NewBB); + } + + // Okay, everything within the region is now branching to the right block, we + // just have to update the PHI nodes now, inserting PHI nodes into NewBB. + for (AfterPHIs = OldPred->begin(); isa<PHINode>(AfterPHIs); ++AfterPHIs) { + PHINode *PN = cast<PHINode>(AfterPHIs); + // Create a new PHI node in the new region, which has an incoming value + // from OldPred of PN. + PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion, + PN->getName() + ".ce", &NewBB->front()); + NewPN->addIncoming(PN, OldPred); + + // Loop over all of the incoming value in PN, moving them to NewPN if they + // are from the extracted region. + for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) { + if (Blocks.count(PN->getIncomingBlock(i))) { + NewPN->addIncoming(PN->getIncomingValue(i), PN->getIncomingBlock(i)); + PN->removeIncomingValue(i); + --i; + } + } + } + } +} + +void CodeExtractor::splitReturnBlocks() { + for (BasicBlock *Block : Blocks) + if (ReturnInst *RI = dyn_cast<ReturnInst>(Block->getTerminator())) { + BasicBlock *New = + Block->splitBasicBlock(RI->getIterator(), Block->getName() + ".ret"); + if (DT) { + // Old dominates New. New node dominates all other nodes dominated + // by Old. + DomTreeNode *OldNode = DT->getNode(Block); + SmallVector<DomTreeNode *, 8> Children(OldNode->begin(), + OldNode->end()); + + DomTreeNode *NewNode = DT->addNewBlock(New, Block); + + for (DomTreeNode *I : Children) + DT->changeImmediateDominator(I, NewNode); + } + } +} + +/// constructFunction - make a function based on inputs and outputs, as follows: +/// f(in0, ..., inN, out0, ..., outN) +/// +Function *CodeExtractor::constructFunction(const ValueSet &inputs, + const ValueSet &outputs, + BasicBlock *header, + BasicBlock *newRootNode, + BasicBlock *newHeader, + Function *oldFunction, + Module *M) { + DEBUG(dbgs() << "inputs: " << inputs.size() << "\n"); + DEBUG(dbgs() << "outputs: " << outputs.size() << "\n"); + + // This function returns unsigned, outputs will go back by reference. + switch (NumExitBlocks) { + case 0: + case 1: RetTy = Type::getVoidTy(header->getContext()); break; + case 2: RetTy = Type::getInt1Ty(header->getContext()); break; + default: RetTy = Type::getInt16Ty(header->getContext()); break; + } + + std::vector<Type*> paramTy; + + // Add the types of the input values to the function's argument list + for (Value *value : inputs) { + DEBUG(dbgs() << "value used in func: " << *value << "\n"); + paramTy.push_back(value->getType()); + } + + // Add the types of the output values to the function's argument list. + for (Value *output : outputs) { + DEBUG(dbgs() << "instr used in func: " << *output << "\n"); + if (AggregateArgs) + paramTy.push_back(output->getType()); + else + paramTy.push_back(PointerType::getUnqual(output->getType())); + } + + DEBUG({ + dbgs() << "Function type: " << *RetTy << " f("; + for (Type *i : paramTy) + dbgs() << *i << ", "; + dbgs() << ")\n"; + }); + + StructType *StructTy; + if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { + StructTy = StructType::get(M->getContext(), paramTy); + paramTy.clear(); + paramTy.push_back(PointerType::getUnqual(StructTy)); + } + FunctionType *funcType = + FunctionType::get(RetTy, paramTy, false); + + // Create the new function + Function *newFunction = Function::Create(funcType, + GlobalValue::InternalLinkage, + oldFunction->getName() + "_" + + header->getName(), M); + // If the old function is no-throw, so is the new one. + if (oldFunction->doesNotThrow()) + newFunction->setDoesNotThrow(); + + // Inherit the uwtable attribute if we need to. + if (oldFunction->hasUWTable()) + newFunction->setHasUWTable(); + + // Inherit all of the target dependent attributes. + // (e.g. If the extracted region contains a call to an x86.sse + // instruction we need to make sure that the extracted region has the + // "target-features" attribute allowing it to be lowered. + // FIXME: This should be changed to check to see if a specific + // attribute can not be inherited. + AttributeSet OldFnAttrs = oldFunction->getAttributes().getFnAttributes(); + AttrBuilder AB(OldFnAttrs, AttributeSet::FunctionIndex); + for (auto Attr : AB.td_attrs()) + newFunction->addFnAttr(Attr.first, Attr.second); + + newFunction->getBasicBlockList().push_back(newRootNode); + + // Create an iterator to name all of the arguments we inserted. + Function::arg_iterator AI = newFunction->arg_begin(); + + // Rewrite all users of the inputs in the extracted region to use the + // arguments (or appropriate addressing into struct) instead. + for (unsigned i = 0, e = inputs.size(); i != e; ++i) { + Value *RewriteVal; + if (AggregateArgs) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext())); + Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i); + TerminatorInst *TI = newFunction->begin()->getTerminator(); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI); + RewriteVal = new LoadInst(GEP, "loadgep_" + inputs[i]->getName(), TI); + } else + RewriteVal = &*AI++; + + std::vector<User*> Users(inputs[i]->user_begin(), inputs[i]->user_end()); + for (User *use : Users) + if (Instruction *inst = dyn_cast<Instruction>(use)) + if (Blocks.count(inst->getParent())) + inst->replaceUsesOfWith(inputs[i], RewriteVal); + } + + // Set names for input and output arguments. + if (!AggregateArgs) { + AI = newFunction->arg_begin(); + for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++AI) + AI->setName(inputs[i]->getName()); + for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++AI) + AI->setName(outputs[i]->getName()+".out"); + } + + // Rewrite branches to basic blocks outside of the loop to new dummy blocks + // within the new function. This must be done before we lose track of which + // blocks were originally in the code region. + std::vector<User*> Users(header->user_begin(), header->user_end()); + for (unsigned i = 0, e = Users.size(); i != e; ++i) + // The BasicBlock which contains the branch is not in the region + // modify the branch target to a new block + if (TerminatorInst *TI = dyn_cast<TerminatorInst>(Users[i])) + if (!Blocks.count(TI->getParent()) && + TI->getParent()->getParent() == oldFunction) + TI->replaceUsesOfWith(header, newHeader); + + return newFunction; +} + +/// FindPhiPredForUseInBlock - Given a value and a basic block, find a PHI +/// that uses the value within the basic block, and return the predecessor +/// block associated with that use, or return 0 if none is found. +static BasicBlock* FindPhiPredForUseInBlock(Value* Used, BasicBlock* BB) { + for (Use &U : Used->uses()) { + PHINode *P = dyn_cast<PHINode>(U.getUser()); + if (P && P->getParent() == BB) + return P->getIncomingBlock(U); + } + + return nullptr; +} + +/// emitCallAndSwitchStatement - This method sets up the caller side by adding +/// the call instruction, splitting any PHI nodes in the header block as +/// necessary. +void CodeExtractor:: +emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, + ValueSet &inputs, ValueSet &outputs) { + // Emit a call to the new function, passing in: *pointer to struct (if + // aggregating parameters), or plan inputs and allocated memory for outputs + std::vector<Value*> params, StructValues, ReloadOutputs, Reloads; + + LLVMContext &Context = newFunction->getContext(); + + // Add inputs as params, or to be filled into the struct + for (Value *input : inputs) + if (AggregateArgs) + StructValues.push_back(input); + else + params.push_back(input); + + // Create allocas for the outputs + for (Value *output : outputs) { + if (AggregateArgs) { + StructValues.push_back(output); + } else { + AllocaInst *alloca = + new AllocaInst(output->getType(), nullptr, output->getName() + ".loc", + &codeReplacer->getParent()->front().front()); + ReloadOutputs.push_back(alloca); + params.push_back(alloca); + } + } + + StructType *StructArgTy = nullptr; + AllocaInst *Struct = nullptr; + if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { + std::vector<Type*> ArgTypes; + for (ValueSet::iterator v = StructValues.begin(), + ve = StructValues.end(); v != ve; ++v) + ArgTypes.push_back((*v)->getType()); + + // Allocate a struct at the beginning of this function + StructArgTy = StructType::get(newFunction->getContext(), ArgTypes); + Struct = new AllocaInst(StructArgTy, nullptr, "structArg", + &codeReplacer->getParent()->front().front()); + params.push_back(Struct); + + for (unsigned i = 0, e = inputs.size(); i != e; ++i) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName()); + codeReplacer->getInstList().push_back(GEP); + StoreInst *SI = new StoreInst(StructValues[i], GEP); + codeReplacer->getInstList().push_back(SI); + } + } + + // Emit the call to the function + CallInst *call = CallInst::Create(newFunction, params, + NumExitBlocks > 1 ? "targetBlock" : ""); + codeReplacer->getInstList().push_back(call); + + Function::arg_iterator OutputArgBegin = newFunction->arg_begin(); + unsigned FirstOut = inputs.size(); + if (!AggregateArgs) + std::advance(OutputArgBegin, inputs.size()); + + // Reload the outputs passed in by reference + for (unsigned i = 0, e = outputs.size(); i != e; ++i) { + Value *Output = nullptr; + if (AggregateArgs) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); + codeReplacer->getInstList().push_back(GEP); + Output = GEP; + } else { + Output = ReloadOutputs[i]; + } + LoadInst *load = new LoadInst(Output, outputs[i]->getName()+".reload"); + Reloads.push_back(load); + codeReplacer->getInstList().push_back(load); + std::vector<User*> Users(outputs[i]->user_begin(), outputs[i]->user_end()); + for (unsigned u = 0, e = Users.size(); u != e; ++u) { + Instruction *inst = cast<Instruction>(Users[u]); + if (!Blocks.count(inst->getParent())) + inst->replaceUsesOfWith(outputs[i], load); + } + } + + // Now we can emit a switch statement using the call as a value. + SwitchInst *TheSwitch = + SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)), + codeReplacer, 0, codeReplacer); + + // Since there may be multiple exits from the original region, make the new + // function return an unsigned, switch on that number. This loop iterates + // over all of the blocks in the extracted region, updating any terminator + // instructions in the to-be-extracted region that branch to blocks that are + // not in the region to be extracted. + std::map<BasicBlock*, BasicBlock*> ExitBlockMap; + + unsigned switchVal = 0; + for (BasicBlock *Block : Blocks) { + TerminatorInst *TI = Block->getTerminator(); + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + if (!Blocks.count(TI->getSuccessor(i))) { + BasicBlock *OldTarget = TI->getSuccessor(i); + // add a new basic block which returns the appropriate value + BasicBlock *&NewTarget = ExitBlockMap[OldTarget]; + if (!NewTarget) { + // If we don't already have an exit stub for this non-extracted + // destination, create one now! + NewTarget = BasicBlock::Create(Context, + OldTarget->getName() + ".exitStub", + newFunction); + unsigned SuccNum = switchVal++; + + Value *brVal = nullptr; + switch (NumExitBlocks) { + case 0: + case 1: break; // No value needed. + case 2: // Conditional branch, return a bool + brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum); + break; + default: + brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum); + break; + } + + ReturnInst *NTRet = ReturnInst::Create(Context, brVal, NewTarget); + + // Update the switch instruction. + TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context), + SuccNum), + OldTarget); + + // Restore values just before we exit + Function::arg_iterator OAI = OutputArgBegin; + for (unsigned out = 0, e = outputs.size(); out != e; ++out) { + // For an invoke, the normal destination is the only one that is + // dominated by the result of the invocation + BasicBlock *DefBlock = cast<Instruction>(outputs[out])->getParent(); + + bool DominatesDef = true; + + BasicBlock *NormalDest = nullptr; + if (auto *Invoke = dyn_cast<InvokeInst>(outputs[out])) + NormalDest = Invoke->getNormalDest(); + + if (NormalDest) { + DefBlock = NormalDest; + + // Make sure we are looking at the original successor block, not + // at a newly inserted exit block, which won't be in the dominator + // info. + for (const auto &I : ExitBlockMap) + if (DefBlock == I.second) { + DefBlock = I.first; + break; + } + + // In the extract block case, if the block we are extracting ends + // with an invoke instruction, make sure that we don't emit a + // store of the invoke value for the unwind block. + if (!DT && DefBlock != OldTarget) + DominatesDef = false; + } + + if (DT) { + DominatesDef = DT->dominates(DefBlock, OldTarget); + + // If the output value is used by a phi in the target block, + // then we need to test for dominance of the phi's predecessor + // instead. Unfortunately, this a little complicated since we + // have already rewritten uses of the value to uses of the reload. + BasicBlock* pred = FindPhiPredForUseInBlock(Reloads[out], + OldTarget); + if (pred && DT && DT->dominates(DefBlock, pred)) + DominatesDef = true; + } + + if (DominatesDef) { + if (AggregateArgs) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), + FirstOut+out); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructArgTy, &*OAI, Idx, "gep_" + outputs[out]->getName(), + NTRet); + new StoreInst(outputs[out], GEP, NTRet); + } else { + new StoreInst(outputs[out], &*OAI, NTRet); + } + } + // Advance output iterator even if we don't emit a store + if (!AggregateArgs) ++OAI; + } + } + + // rewrite the original branch instruction with this new target + TI->setSuccessor(i, NewTarget); + } + } + + // Now that we've done the deed, simplify the switch instruction. + Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType(); + switch (NumExitBlocks) { + case 0: + // There are no successors (the block containing the switch itself), which + // means that previously this was the last part of the function, and hence + // this should be rewritten as a `ret' + + // Check if the function should return a value + if (OldFnRetTy->isVoidTy()) { + ReturnInst::Create(Context, nullptr, TheSwitch); // Return void + } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) { + // return what we have + ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch); + } else { + // Otherwise we must have code extracted an unwind or something, just + // return whatever we want. + ReturnInst::Create(Context, + Constant::getNullValue(OldFnRetTy), TheSwitch); + } + + TheSwitch->eraseFromParent(); + break; + case 1: + // Only a single destination, change the switch into an unconditional + // branch. + BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch); + TheSwitch->eraseFromParent(); + break; + case 2: + BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2), + call, TheSwitch); + TheSwitch->eraseFromParent(); + break; + default: + // Otherwise, make the default destination of the switch instruction be one + // of the other successors. + TheSwitch->setCondition(call); + TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks)); + // Remove redundant case + TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks-1)); + break; + } +} + +void CodeExtractor::moveCodeToFunction(Function *newFunction) { + Function *oldFunc = (*Blocks.begin())->getParent(); + Function::BasicBlockListType &oldBlocks = oldFunc->getBasicBlockList(); + Function::BasicBlockListType &newBlocks = newFunction->getBasicBlockList(); + + for (BasicBlock *Block : Blocks) { + // Delete the basic block from the old function, and the list of blocks + oldBlocks.remove(Block); + + // Insert this basic block into the new function + newBlocks.push_back(Block); + } +} + +void CodeExtractor::calculateNewCallTerminatorWeights( + BasicBlock *CodeReplacer, + DenseMap<BasicBlock *, BlockFrequency> &ExitWeights, + BranchProbabilityInfo *BPI) { + typedef BlockFrequencyInfoImplBase::Distribution Distribution; + typedef BlockFrequencyInfoImplBase::BlockNode BlockNode; + + // Update the branch weights for the exit block. + TerminatorInst *TI = CodeReplacer->getTerminator(); + SmallVector<unsigned, 8> BranchWeights(TI->getNumSuccessors(), 0); + + // Block Frequency distribution with dummy node. + Distribution BranchDist; + + // Add each of the frequencies of the successors. + for (unsigned i = 0, e = TI->getNumSuccessors(); i < e; ++i) { + BlockNode ExitNode(i); + uint64_t ExitFreq = ExitWeights[TI->getSuccessor(i)].getFrequency(); + if (ExitFreq != 0) + BranchDist.addExit(ExitNode, ExitFreq); + else + BPI->setEdgeProbability(CodeReplacer, i, BranchProbability::getZero()); + } + + // Check for no total weight. + if (BranchDist.Total == 0) + return; + + // Normalize the distribution so that they can fit in unsigned. + BranchDist.normalize(); + + // Create normalized branch weights and set the metadata. + for (unsigned I = 0, E = BranchDist.Weights.size(); I < E; ++I) { + const auto &Weight = BranchDist.Weights[I]; + + // Get the weight and update the current BFI. + BranchWeights[Weight.TargetNode.Index] = Weight.Amount; + BranchProbability BP(Weight.Amount, BranchDist.Total); + BPI->setEdgeProbability(CodeReplacer, Weight.TargetNode.Index, BP); + } + TI->setMetadata( + LLVMContext::MD_prof, + MDBuilder(TI->getContext()).createBranchWeights(BranchWeights)); +} + +Function *CodeExtractor::extractCodeRegion() { + if (!isEligible()) + return nullptr; + + ValueSet inputs, outputs; + + // Assumption: this is a single-entry code region, and the header is the first + // block in the region. + BasicBlock *header = *Blocks.begin(); + + // Calculate the entry frequency of the new function before we change the root + // block. + BlockFrequency EntryFreq; + if (BFI) { + assert(BPI && "Both BPI and BFI are required to preserve profile info"); + for (BasicBlock *Pred : predecessors(header)) { + if (Blocks.count(Pred)) + continue; + EntryFreq += + BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, header); + } + } + + // If we have to split PHI nodes or the entry block, do so now. + severSplitPHINodes(header); + + // If we have any return instructions in the region, split those blocks so + // that the return is not in the region. + splitReturnBlocks(); + + Function *oldFunction = header->getParent(); + + // This takes place of the original loop + BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), + "codeRepl", oldFunction, + header); + + // The new function needs a root node because other nodes can branch to the + // head of the region, but the entry node of a function cannot have preds. + BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(), + "newFuncRoot"); + newFuncRoot->getInstList().push_back(BranchInst::Create(header)); + + // Find inputs to, outputs from the code region. + findInputsOutputs(inputs, outputs); + + // Calculate the exit blocks for the extracted region and the total exit + // weights for each of those blocks. + DenseMap<BasicBlock *, BlockFrequency> ExitWeights; + SmallPtrSet<BasicBlock *, 1> ExitBlocks; + for (BasicBlock *Block : Blocks) { + for (succ_iterator SI = succ_begin(Block), SE = succ_end(Block); SI != SE; + ++SI) { + if (!Blocks.count(*SI)) { + // Update the branch weight for this successor. + if (BFI) { + BlockFrequency &BF = ExitWeights[*SI]; + BF += BFI->getBlockFreq(Block) * BPI->getEdgeProbability(Block, *SI); + } + ExitBlocks.insert(*SI); + } + } + } + NumExitBlocks = ExitBlocks.size(); + + // Construct new function based on inputs/outputs & add allocas for all defs. + Function *newFunction = constructFunction(inputs, outputs, header, + newFuncRoot, + codeReplacer, oldFunction, + oldFunction->getParent()); + + // Update the entry count of the function. + if (BFI) { + Optional<uint64_t> EntryCount = + BFI->getProfileCountFromFreq(EntryFreq.getFrequency()); + if (EntryCount.hasValue()) + newFunction->setEntryCount(EntryCount.getValue()); + BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency()); + } + + emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs); + + moveCodeToFunction(newFunction); + + // Update the branch weights for the exit block. + if (BFI && NumExitBlocks > 1) + calculateNewCallTerminatorWeights(codeReplacer, ExitWeights, BPI); + + // Loop over all of the PHI nodes in the header block, and change any + // references to the old incoming edge to be the new incoming edge. + for (BasicBlock::iterator I = header->begin(); isa<PHINode>(I); ++I) { + PHINode *PN = cast<PHINode>(I); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (!Blocks.count(PN->getIncomingBlock(i))) + PN->setIncomingBlock(i, newFuncRoot); + } + + // Look at all successors of the codeReplacer block. If any of these blocks + // had PHI nodes in them, we need to update the "from" block to be the code + // replacer, not the original block in the extracted region. + std::vector<BasicBlock*> Succs(succ_begin(codeReplacer), + succ_end(codeReplacer)); + for (unsigned i = 0, e = Succs.size(); i != e; ++i) + for (BasicBlock::iterator I = Succs[i]->begin(); isa<PHINode>(I); ++I) { + PHINode *PN = cast<PHINode>(I); + std::set<BasicBlock*> ProcessedPreds; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (Blocks.count(PN->getIncomingBlock(i))) { + if (ProcessedPreds.insert(PN->getIncomingBlock(i)).second) + PN->setIncomingBlock(i, codeReplacer); + else { + // There were multiple entries in the PHI for this block, now there + // is only one, so remove the duplicated entries. + PN->removeIncomingValue(i, false); + --i; --e; + } + } + } + + //cerr << "NEW FUNCTION: " << *newFunction; + // verifyFunction(*newFunction); + + // cerr << "OLD FUNCTION: " << *oldFunction; + // verifyFunction(*oldFunction); + + DEBUG(if (verifyFunction(*newFunction)) + report_fatal_error("verifyFunction failed!")); + return newFunction; +} diff --git a/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp b/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp new file mode 100644 index 000000000000..6642a97a29c2 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp @@ -0,0 +1,165 @@ +//===- CtorUtils.cpp - Helpers for working with global_ctors ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines functions that are used to process llvm.global_ctors. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/CtorUtils.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#define DEBUG_TYPE "ctor_utils" + +namespace llvm { + +namespace { +/// Given a specified llvm.global_ctors list, remove the listed elements. +void removeGlobalCtors(GlobalVariable *GCL, const BitVector &CtorsToRemove) { + // Filter out the initializer elements to remove. + ConstantArray *OldCA = cast<ConstantArray>(GCL->getInitializer()); + SmallVector<Constant *, 10> CAList; + for (unsigned I = 0, E = OldCA->getNumOperands(); I < E; ++I) + if (!CtorsToRemove.test(I)) + CAList.push_back(OldCA->getOperand(I)); + + // Create the new array initializer. + ArrayType *ATy = + ArrayType::get(OldCA->getType()->getElementType(), CAList.size()); + Constant *CA = ConstantArray::get(ATy, CAList); + + // If we didn't change the number of elements, don't create a new GV. + if (CA->getType() == OldCA->getType()) { + GCL->setInitializer(CA); + return; + } + + // Create the new global and insert it next to the existing list. + GlobalVariable *NGV = + new GlobalVariable(CA->getType(), GCL->isConstant(), GCL->getLinkage(), + CA, "", GCL->getThreadLocalMode()); + GCL->getParent()->getGlobalList().insert(GCL->getIterator(), NGV); + NGV->takeName(GCL); + + // Nuke the old list, replacing any uses with the new one. + if (!GCL->use_empty()) { + Constant *V = NGV; + if (V->getType() != GCL->getType()) + V = ConstantExpr::getBitCast(V, GCL->getType()); + GCL->replaceAllUsesWith(V); + } + GCL->eraseFromParent(); +} + +/// Given a llvm.global_ctors list that we can understand, +/// return a list of the functions and null terminator as a vector. +std::vector<Function *> parseGlobalCtors(GlobalVariable *GV) { + if (GV->getInitializer()->isNullValue()) + return std::vector<Function *>(); + ConstantArray *CA = cast<ConstantArray>(GV->getInitializer()); + std::vector<Function *> Result; + Result.reserve(CA->getNumOperands()); + for (auto &V : CA->operands()) { + ConstantStruct *CS = cast<ConstantStruct>(V); + Result.push_back(dyn_cast<Function>(CS->getOperand(1))); + } + return Result; +} + +/// Find the llvm.global_ctors list, verifying that all initializers have an +/// init priority of 65535. +GlobalVariable *findGlobalCtors(Module &M) { + GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors"); + if (!GV) + return nullptr; + + // Verify that the initializer is simple enough for us to handle. We are + // only allowed to optimize the initializer if it is unique. + if (!GV->hasUniqueInitializer()) + return nullptr; + + if (isa<ConstantAggregateZero>(GV->getInitializer())) + return GV; + ConstantArray *CA = cast<ConstantArray>(GV->getInitializer()); + + for (auto &V : CA->operands()) { + if (isa<ConstantAggregateZero>(V)) + continue; + ConstantStruct *CS = cast<ConstantStruct>(V); + if (isa<ConstantPointerNull>(CS->getOperand(1))) + continue; + + // Must have a function or null ptr. + if (!isa<Function>(CS->getOperand(1))) + return nullptr; + + // Init priority must be standard. + ConstantInt *CI = cast<ConstantInt>(CS->getOperand(0)); + if (CI->getZExtValue() != 65535) + return nullptr; + } + + return GV; +} +} // namespace + +/// Call "ShouldRemove" for every entry in M's global_ctor list and remove the +/// entries for which it returns true. Return true if anything changed. +bool optimizeGlobalCtorsList(Module &M, + function_ref<bool(Function *)> ShouldRemove) { + GlobalVariable *GlobalCtors = findGlobalCtors(M); + if (!GlobalCtors) + return false; + + std::vector<Function *> Ctors = parseGlobalCtors(GlobalCtors); + if (Ctors.empty()) + return false; + + bool MadeChange = false; + + // Loop over global ctors, optimizing them when we can. + unsigned NumCtors = Ctors.size(); + BitVector CtorsToRemove(NumCtors); + for (unsigned i = 0; i != Ctors.size() && NumCtors > 0; ++i) { + Function *F = Ctors[i]; + // Found a null terminator in the middle of the list, prune off the rest of + // the list. + if (!F) + continue; + + DEBUG(dbgs() << "Optimizing Global Constructor: " << *F << "\n"); + + // We cannot simplify external ctor functions. + if (F->empty()) + continue; + + // If we can evaluate the ctor at compile time, do. + if (ShouldRemove(F)) { + Ctors[i] = nullptr; + CtorsToRemove.set(i); + NumCtors--; + MadeChange = true; + continue; + } + } + + if (!MadeChange) + return false; + + removeGlobalCtors(GlobalCtors, CtorsToRemove); + return true; +} + +} // End llvm namespace diff --git a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp new file mode 100644 index 000000000000..75a1dde57c4c --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp @@ -0,0 +1,146 @@ +//===- DemoteRegToStack.cpp - Move a virtual register to the stack --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Type.h" +#include "llvm/Transforms/Utils/Local.h" +using namespace llvm; + +/// DemoteRegToStack - This function takes a virtual register computed by an +/// Instruction and replaces it with a slot in the stack frame, allocated via +/// alloca. This allows the CFG to be changed around without fear of +/// invalidating the SSA information for the value. It returns the pointer to +/// the alloca inserted to create a stack slot for I. +AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, + Instruction *AllocaPoint) { + if (I.use_empty()) { + I.eraseFromParent(); + return nullptr; + } + + // Create a stack slot to hold the value. + AllocaInst *Slot; + if (AllocaPoint) { + Slot = new AllocaInst(I.getType(), nullptr, + I.getName()+".reg2mem", AllocaPoint); + } else { + Function *F = I.getParent()->getParent(); + Slot = new AllocaInst(I.getType(), nullptr, I.getName() + ".reg2mem", + &F->getEntryBlock().front()); + } + + // We cannot demote invoke instructions to the stack if their normal edge + // is critical. Therefore, split the critical edge and create a basic block + // into which the store can be inserted. + if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) { + if (!II->getNormalDest()->getSinglePredecessor()) { + unsigned SuccNum = GetSuccessorNumber(II->getParent(), II->getNormalDest()); + assert(isCriticalEdge(II, SuccNum) && "Expected a critical edge!"); + BasicBlock *BB = SplitCriticalEdge(II, SuccNum); + assert(BB && "Unable to split critical edge."); + (void)BB; + } + } + + // Change all of the users of the instruction to read from the stack slot. + while (!I.use_empty()) { + Instruction *U = cast<Instruction>(I.user_back()); + if (PHINode *PN = dyn_cast<PHINode>(U)) { + // If this is a PHI node, we can't insert a load of the value before the + // use. Instead insert the load in the predecessor block corresponding + // to the incoming value. + // + // Note that if there are multiple edges from a basic block to this PHI + // node that we cannot have multiple loads. The problem is that the + // resulting PHI node will have multiple values (from each load) coming in + // from the same block, which is illegal SSA form. For this reason, we + // keep track of and reuse loads we insert. + DenseMap<BasicBlock*, Value*> Loads; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingValue(i) == &I) { + Value *&V = Loads[PN->getIncomingBlock(i)]; + if (!V) { + // Insert the load into the predecessor block + V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads, + PN->getIncomingBlock(i)->getTerminator()); + } + PN->setIncomingValue(i, V); + } + + } else { + // If this is a normal instruction, just insert a load. + Value *V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads, U); + U->replaceUsesOfWith(&I, V); + } + } + + // Insert stores of the computed value into the stack slot. We have to be + // careful if I is an invoke instruction, because we can't insert the store + // AFTER the terminator instruction. + BasicBlock::iterator InsertPt; + if (!isa<TerminatorInst>(I)) { + InsertPt = ++I.getIterator(); + for (; isa<PHINode>(InsertPt) || InsertPt->isEHPad(); ++InsertPt) + /* empty */; // Don't insert before PHI nodes or landingpad instrs. + } else { + InvokeInst &II = cast<InvokeInst>(I); + InsertPt = II.getNormalDest()->getFirstInsertionPt(); + } + + new StoreInst(&I, Slot, &*InsertPt); + return Slot; +} + +/// DemotePHIToStack - This function takes a virtual register computed by a PHI +/// node and replaces it with a slot in the stack frame allocated via alloca. +/// The PHI node is deleted. It returns the pointer to the alloca inserted. +AllocaInst *llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) { + if (P->use_empty()) { + P->eraseFromParent(); + return nullptr; + } + + // Create a stack slot to hold the value. + AllocaInst *Slot; + if (AllocaPoint) { + Slot = new AllocaInst(P->getType(), nullptr, + P->getName()+".reg2mem", AllocaPoint); + } else { + Function *F = P->getParent()->getParent(); + Slot = new AllocaInst(P->getType(), nullptr, P->getName() + ".reg2mem", + &F->getEntryBlock().front()); + } + + // Iterate over each operand inserting a store in each predecessor. + for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) { + if (InvokeInst *II = dyn_cast<InvokeInst>(P->getIncomingValue(i))) { + assert(II->getParent() != P->getIncomingBlock(i) && + "Invoke edge not supported yet"); (void)II; + } + new StoreInst(P->getIncomingValue(i), Slot, + P->getIncomingBlock(i)->getTerminator()); + } + + // Insert a load in place of the PHI and replace all uses. + BasicBlock::iterator InsertPt = P->getIterator(); + + for (; isa<PHINode>(InsertPt) || InsertPt->isEHPad(); ++InsertPt) + /* empty */; // Don't insert before PHI nodes or landingpad instrs. + + Value *V = new LoadInst(Slot, P->getName() + ".reload", &*InsertPt); + P->replaceAllUsesWith(V); + + // Delete PHI. + P->eraseFromParent(); + return Slot; +} diff --git a/contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp b/contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp new file mode 100644 index 000000000000..8c2386554da5 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp @@ -0,0 +1,96 @@ +//===- EscapeEnumerator.cpp -----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Defines a helper class that enumerates all possible exits from a function, +// including exception handling. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/EscapeEnumerator.h" +#include "llvm/Analysis/EHPersonalities.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Module.h" +#include "llvm/Transforms/Utils/Local.h" +using namespace llvm; + +static Constant *getDefaultPersonalityFn(Module *M) { + LLVMContext &C = M->getContext(); + Triple T(M->getTargetTriple()); + EHPersonality Pers = getDefaultEHPersonality(T); + return M->getOrInsertFunction(getEHPersonalityName(Pers), + FunctionType::get(Type::getInt32Ty(C), true)); +} + +IRBuilder<> *EscapeEnumerator::Next() { + if (Done) + return nullptr; + + // Find all 'return', 'resume', and 'unwind' instructions. + while (StateBB != StateE) { + BasicBlock *CurBB = &*StateBB++; + + // Branches and invokes do not escape, only unwind, resume, and return + // do. + TerminatorInst *TI = CurBB->getTerminator(); + if (!isa<ReturnInst>(TI) && !isa<ResumeInst>(TI)) + continue; + + Builder.SetInsertPoint(TI); + return &Builder; + } + + Done = true; + + if (!HandleExceptions) + return nullptr; + + if (F.doesNotThrow()) + return nullptr; + + // Find all 'call' instructions that may throw. + SmallVector<Instruction *, 16> Calls; + for (BasicBlock &BB : F) + for (Instruction &II : BB) + if (CallInst *CI = dyn_cast<CallInst>(&II)) + if (!CI->doesNotThrow()) + Calls.push_back(CI); + + if (Calls.empty()) + return nullptr; + + // Create a cleanup block. + LLVMContext &C = F.getContext(); + BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F); + Type *ExnTy = + StructType::get(Type::getInt8PtrTy(C), Type::getInt32Ty(C), nullptr); + if (!F.hasPersonalityFn()) { + Constant *PersFn = getDefaultPersonalityFn(F.getParent()); + F.setPersonalityFn(PersFn); + } + + if (isFuncletEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) { + report_fatal_error("Funclet EH not supported"); + } + + LandingPadInst *LPad = + LandingPadInst::Create(ExnTy, 1, "cleanup.lpad", CleanupBB); + LPad->setCleanup(true); + ResumeInst *RI = ResumeInst::Create(LPad, CleanupBB); + + // Transform the 'call' instructions into 'invoke's branching to the + // cleanup block. Go in reverse order to make prettier BB names. + SmallVector<Value *, 16> Args; + for (unsigned I = Calls.size(); I != 0;) { + CallInst *CI = cast<CallInst>(Calls[--I]); + changeToInvokeAndSplitBasicBlock(CI, CleanupBB); + } + + Builder.SetInsertPoint(RI); + return &Builder; +} diff --git a/contrib/llvm/lib/Transforms/Utils/Evaluator.cpp b/contrib/llvm/lib/Transforms/Utils/Evaluator.cpp new file mode 100644 index 000000000000..4adf1754253d --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/Evaluator.cpp @@ -0,0 +1,596 @@ +//===- Evaluator.cpp - LLVM IR evaluator ----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Function evaluator for LLVM IR. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/Evaluator.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DiagnosticPrinter.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Operator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#define DEBUG_TYPE "evaluator" + +using namespace llvm; + +static inline bool +isSimpleEnoughValueToCommit(Constant *C, + SmallPtrSetImpl<Constant *> &SimpleConstants, + const DataLayout &DL); + +/// Return true if the specified constant can be handled by the code generator. +/// We don't want to generate something like: +/// void *X = &X/42; +/// because the code generator doesn't have a relocation that can handle that. +/// +/// This function should be called if C was not found (but just got inserted) +/// in SimpleConstants to avoid having to rescan the same constants all the +/// time. +static bool +isSimpleEnoughValueToCommitHelper(Constant *C, + SmallPtrSetImpl<Constant *> &SimpleConstants, + const DataLayout &DL) { + // Simple global addresses are supported, do not allow dllimport or + // thread-local globals. + if (auto *GV = dyn_cast<GlobalValue>(C)) + return !GV->hasDLLImportStorageClass() && !GV->isThreadLocal(); + + // Simple integer, undef, constant aggregate zero, etc are all supported. + if (C->getNumOperands() == 0 || isa<BlockAddress>(C)) + return true; + + // Aggregate values are safe if all their elements are. + if (isa<ConstantAggregate>(C)) { + for (Value *Op : C->operands()) + if (!isSimpleEnoughValueToCommit(cast<Constant>(Op), SimpleConstants, DL)) + return false; + return true; + } + + // We don't know exactly what relocations are allowed in constant expressions, + // so we allow &global+constantoffset, which is safe and uniformly supported + // across targets. + ConstantExpr *CE = cast<ConstantExpr>(C); + switch (CE->getOpcode()) { + case Instruction::BitCast: + // Bitcast is fine if the casted value is fine. + return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL); + + case Instruction::IntToPtr: + case Instruction::PtrToInt: + // int <=> ptr is fine if the int type is the same size as the + // pointer type. + if (DL.getTypeSizeInBits(CE->getType()) != + DL.getTypeSizeInBits(CE->getOperand(0)->getType())) + return false; + return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL); + + // GEP is fine if it is simple + constant offset. + case Instruction::GetElementPtr: + for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i) + if (!isa<ConstantInt>(CE->getOperand(i))) + return false; + return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL); + + case Instruction::Add: + // We allow simple+cst. + if (!isa<ConstantInt>(CE->getOperand(1))) + return false; + return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL); + } + return false; +} + +static inline bool +isSimpleEnoughValueToCommit(Constant *C, + SmallPtrSetImpl<Constant *> &SimpleConstants, + const DataLayout &DL) { + // If we already checked this constant, we win. + if (!SimpleConstants.insert(C).second) + return true; + // Check the constant. + return isSimpleEnoughValueToCommitHelper(C, SimpleConstants, DL); +} + +/// Return true if this constant is simple enough for us to understand. In +/// particular, if it is a cast to anything other than from one pointer type to +/// another pointer type, we punt. We basically just support direct accesses to +/// globals and GEP's of globals. This should be kept up to date with +/// CommitValueTo. +static bool isSimpleEnoughPointerToCommit(Constant *C) { + // Conservatively, avoid aggregate types. This is because we don't + // want to worry about them partially overlapping other stores. + if (!cast<PointerType>(C->getType())->getElementType()->isSingleValueType()) + return false; + + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) + // Do not allow weak/*_odr/linkonce linkage or external globals. + return GV->hasUniqueInitializer(); + + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { + // Handle a constantexpr gep. + if (CE->getOpcode() == Instruction::GetElementPtr && + isa<GlobalVariable>(CE->getOperand(0)) && + cast<GEPOperator>(CE)->isInBounds()) { + GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); + // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or + // external globals. + if (!GV->hasUniqueInitializer()) + return false; + + // The first index must be zero. + ConstantInt *CI = dyn_cast<ConstantInt>(*std::next(CE->op_begin())); + if (!CI || !CI->isZero()) return false; + + // The remaining indices must be compile-time known integers within the + // notional bounds of the corresponding static array types. + if (!CE->isGEPWithNoNotionalOverIndexing()) + return false; + + return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); + + // A constantexpr bitcast from a pointer to another pointer is a no-op, + // and we know how to evaluate it by moving the bitcast from the pointer + // operand to the value operand. + } else if (CE->getOpcode() == Instruction::BitCast && + isa<GlobalVariable>(CE->getOperand(0))) { + // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or + // external globals. + return cast<GlobalVariable>(CE->getOperand(0))->hasUniqueInitializer(); + } + } + + return false; +} + +/// Return the value that would be computed by a load from P after the stores +/// reflected by 'memory' have been performed. If we can't decide, return null. +Constant *Evaluator::ComputeLoadResult(Constant *P) { + // If this memory location has been recently stored, use the stored value: it + // is the most up-to-date. + DenseMap<Constant*, Constant*>::const_iterator I = MutatedMemory.find(P); + if (I != MutatedMemory.end()) return I->second; + + // Access it. + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) { + if (GV->hasDefinitiveInitializer()) + return GV->getInitializer(); + return nullptr; + } + + // Handle a constantexpr getelementptr. + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P)) + if (CE->getOpcode() == Instruction::GetElementPtr && + isa<GlobalVariable>(CE->getOperand(0))) { + GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); + if (GV->hasDefinitiveInitializer()) + return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); + } + + return nullptr; // don't know how to evaluate. +} + +/// Evaluate all instructions in block BB, returning true if successful, false +/// if we can't evaluate it. NewBB returns the next BB that control flows into, +/// or null upon return. +bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, + BasicBlock *&NextBB) { + // This is the main evaluation loop. + while (1) { + Constant *InstResult = nullptr; + + DEBUG(dbgs() << "Evaluating Instruction: " << *CurInst << "\n"); + + if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) { + if (!SI->isSimple()) { + DEBUG(dbgs() << "Store is not simple! Can not evaluate.\n"); + return false; // no volatile/atomic accesses. + } + Constant *Ptr = getVal(SI->getOperand(1)); + if (auto *FoldedPtr = ConstantFoldConstant(Ptr, DL, TLI)) { + DEBUG(dbgs() << "Folding constant ptr expression: " << *Ptr); + Ptr = FoldedPtr; + DEBUG(dbgs() << "; To: " << *Ptr << "\n"); + } + if (!isSimpleEnoughPointerToCommit(Ptr)) { + // If this is too complex for us to commit, reject it. + DEBUG(dbgs() << "Pointer is too complex for us to evaluate store."); + return false; + } + + Constant *Val = getVal(SI->getOperand(0)); + + // If this might be too difficult for the backend to handle (e.g. the addr + // of one global variable divided by another) then we can't commit it. + if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, DL)) { + DEBUG(dbgs() << "Store value is too complex to evaluate store. " << *Val + << "\n"); + return false; + } + + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) { + if (CE->getOpcode() == Instruction::BitCast) { + DEBUG(dbgs() << "Attempting to resolve bitcast on constant ptr.\n"); + // If we're evaluating a store through a bitcast, then we need + // to pull the bitcast off the pointer type and push it onto the + // stored value. + Ptr = CE->getOperand(0); + + Type *NewTy = cast<PointerType>(Ptr->getType())->getElementType(); + + // In order to push the bitcast onto the stored value, a bitcast + // from NewTy to Val's type must be legal. If it's not, we can try + // introspecting NewTy to find a legal conversion. + while (!Val->getType()->canLosslesslyBitCastTo(NewTy)) { + // If NewTy is a struct, we can convert the pointer to the struct + // into a pointer to its first member. + // FIXME: This could be extended to support arrays as well. + if (StructType *STy = dyn_cast<StructType>(NewTy)) { + NewTy = STy->getTypeAtIndex(0U); + + IntegerType *IdxTy = IntegerType::get(NewTy->getContext(), 32); + Constant *IdxZero = ConstantInt::get(IdxTy, 0, false); + Constant * const IdxList[] = {IdxZero, IdxZero}; + + Ptr = ConstantExpr::getGetElementPtr(nullptr, Ptr, IdxList); + if (auto *FoldedPtr = ConstantFoldConstant(Ptr, DL, TLI)) + Ptr = FoldedPtr; + + // If we can't improve the situation by introspecting NewTy, + // we have to give up. + } else { + DEBUG(dbgs() << "Failed to bitcast constant ptr, can not " + "evaluate.\n"); + return false; + } + } + + // If we found compatible types, go ahead and push the bitcast + // onto the stored value. + Val = ConstantExpr::getBitCast(Val, NewTy); + + DEBUG(dbgs() << "Evaluated bitcast: " << *Val << "\n"); + } + } + + MutatedMemory[Ptr] = Val; + } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) { + InstResult = ConstantExpr::get(BO->getOpcode(), + getVal(BO->getOperand(0)), + getVal(BO->getOperand(1))); + DEBUG(dbgs() << "Found a BinaryOperator! Simplifying: " << *InstResult + << "\n"); + } else if (CmpInst *CI = dyn_cast<CmpInst>(CurInst)) { + InstResult = ConstantExpr::getCompare(CI->getPredicate(), + getVal(CI->getOperand(0)), + getVal(CI->getOperand(1))); + DEBUG(dbgs() << "Found a CmpInst! Simplifying: " << *InstResult + << "\n"); + } else if (CastInst *CI = dyn_cast<CastInst>(CurInst)) { + InstResult = ConstantExpr::getCast(CI->getOpcode(), + getVal(CI->getOperand(0)), + CI->getType()); + DEBUG(dbgs() << "Found a Cast! Simplifying: " << *InstResult + << "\n"); + } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) { + InstResult = ConstantExpr::getSelect(getVal(SI->getOperand(0)), + getVal(SI->getOperand(1)), + getVal(SI->getOperand(2))); + DEBUG(dbgs() << "Found a Select! Simplifying: " << *InstResult + << "\n"); + } else if (auto *EVI = dyn_cast<ExtractValueInst>(CurInst)) { + InstResult = ConstantExpr::getExtractValue( + getVal(EVI->getAggregateOperand()), EVI->getIndices()); + DEBUG(dbgs() << "Found an ExtractValueInst! Simplifying: " << *InstResult + << "\n"); + } else if (auto *IVI = dyn_cast<InsertValueInst>(CurInst)) { + InstResult = ConstantExpr::getInsertValue( + getVal(IVI->getAggregateOperand()), + getVal(IVI->getInsertedValueOperand()), IVI->getIndices()); + DEBUG(dbgs() << "Found an InsertValueInst! Simplifying: " << *InstResult + << "\n"); + } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) { + Constant *P = getVal(GEP->getOperand(0)); + SmallVector<Constant*, 8> GEPOps; + for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); + i != e; ++i) + GEPOps.push_back(getVal(*i)); + InstResult = + ConstantExpr::getGetElementPtr(GEP->getSourceElementType(), P, GEPOps, + cast<GEPOperator>(GEP)->isInBounds()); + DEBUG(dbgs() << "Found a GEP! Simplifying: " << *InstResult + << "\n"); + } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) { + + if (!LI->isSimple()) { + DEBUG(dbgs() << "Found a Load! Not a simple load, can not evaluate.\n"); + return false; // no volatile/atomic accesses. + } + + Constant *Ptr = getVal(LI->getOperand(0)); + if (auto *FoldedPtr = ConstantFoldConstant(Ptr, DL, TLI)) { + Ptr = FoldedPtr; + DEBUG(dbgs() << "Found a constant pointer expression, constant " + "folding: " << *Ptr << "\n"); + } + InstResult = ComputeLoadResult(Ptr); + if (!InstResult) { + DEBUG(dbgs() << "Failed to compute load result. Can not evaluate load." + "\n"); + return false; // Could not evaluate load. + } + + DEBUG(dbgs() << "Evaluated load: " << *InstResult << "\n"); + } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) { + if (AI->isArrayAllocation()) { + DEBUG(dbgs() << "Found an array alloca. Can not evaluate.\n"); + return false; // Cannot handle array allocs. + } + Type *Ty = AI->getAllocatedType(); + AllocaTmps.push_back( + make_unique<GlobalVariable>(Ty, false, GlobalValue::InternalLinkage, + UndefValue::get(Ty), AI->getName())); + InstResult = AllocaTmps.back().get(); + DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n"); + } else if (isa<CallInst>(CurInst) || isa<InvokeInst>(CurInst)) { + CallSite CS(&*CurInst); + + // Debug info can safely be ignored here. + if (isa<DbgInfoIntrinsic>(CS.getInstruction())) { + DEBUG(dbgs() << "Ignoring debug info.\n"); + ++CurInst; + continue; + } + + // Cannot handle inline asm. + if (isa<InlineAsm>(CS.getCalledValue())) { + DEBUG(dbgs() << "Found inline asm, can not evaluate.\n"); + return false; + } + + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) { + if (MemSetInst *MSI = dyn_cast<MemSetInst>(II)) { + if (MSI->isVolatile()) { + DEBUG(dbgs() << "Can not optimize a volatile memset " << + "intrinsic.\n"); + return false; + } + Constant *Ptr = getVal(MSI->getDest()); + Constant *Val = getVal(MSI->getValue()); + Constant *DestVal = ComputeLoadResult(getVal(Ptr)); + if (Val->isNullValue() && DestVal && DestVal->isNullValue()) { + // This memset is a no-op. + DEBUG(dbgs() << "Ignoring no-op memset.\n"); + ++CurInst; + continue; + } + } + + if (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) { + DEBUG(dbgs() << "Ignoring lifetime intrinsic.\n"); + ++CurInst; + continue; + } + + if (II->getIntrinsicID() == Intrinsic::invariant_start) { + // We don't insert an entry into Values, as it doesn't have a + // meaningful return value. + if (!II->use_empty()) { + DEBUG(dbgs() << "Found unused invariant_start. Can't evaluate.\n"); + return false; + } + ConstantInt *Size = cast<ConstantInt>(II->getArgOperand(0)); + Value *PtrArg = getVal(II->getArgOperand(1)); + Value *Ptr = PtrArg->stripPointerCasts(); + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) { + Type *ElemTy = GV->getValueType(); + if (!Size->isAllOnesValue() && + Size->getValue().getLimitedValue() >= + DL.getTypeStoreSize(ElemTy)) { + Invariants.insert(GV); + DEBUG(dbgs() << "Found a global var that is an invariant: " << *GV + << "\n"); + } else { + DEBUG(dbgs() << "Found a global var, but can not treat it as an " + "invariant.\n"); + } + } + // Continue even if we do nothing. + ++CurInst; + continue; + } else if (II->getIntrinsicID() == Intrinsic::assume) { + DEBUG(dbgs() << "Skipping assume intrinsic.\n"); + ++CurInst; + continue; + } + + DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n"); + return false; + } + + // Resolve function pointers. + Function *Callee = dyn_cast<Function>(getVal(CS.getCalledValue())); + if (!Callee || Callee->isInterposable()) { + DEBUG(dbgs() << "Can not resolve function pointer.\n"); + return false; // Cannot resolve. + } + + SmallVector<Constant*, 8> Formals; + for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) + Formals.push_back(getVal(*i)); + + if (Callee->isDeclaration()) { + // If this is a function we can constant fold, do it. + if (Constant *C = ConstantFoldCall(Callee, Formals, TLI)) { + InstResult = C; + DEBUG(dbgs() << "Constant folded function call. Result: " << + *InstResult << "\n"); + } else { + DEBUG(dbgs() << "Can not constant fold function call.\n"); + return false; + } + } else { + if (Callee->getFunctionType()->isVarArg()) { + DEBUG(dbgs() << "Can not constant fold vararg function call.\n"); + return false; + } + + Constant *RetVal = nullptr; + // Execute the call, if successful, use the return value. + ValueStack.emplace_back(); + if (!EvaluateFunction(Callee, RetVal, Formals)) { + DEBUG(dbgs() << "Failed to evaluate function.\n"); + return false; + } + ValueStack.pop_back(); + InstResult = RetVal; + + if (InstResult) { + DEBUG(dbgs() << "Successfully evaluated function. Result: " + << *InstResult << "\n\n"); + } else { + DEBUG(dbgs() << "Successfully evaluated function. Result: 0\n\n"); + } + } + } else if (isa<TerminatorInst>(CurInst)) { + DEBUG(dbgs() << "Found a terminator instruction.\n"); + + if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) { + if (BI->isUnconditional()) { + NextBB = BI->getSuccessor(0); + } else { + ConstantInt *Cond = + dyn_cast<ConstantInt>(getVal(BI->getCondition())); + if (!Cond) return false; // Cannot determine. + + NextBB = BI->getSuccessor(!Cond->getZExtValue()); + } + } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurInst)) { + ConstantInt *Val = + dyn_cast<ConstantInt>(getVal(SI->getCondition())); + if (!Val) return false; // Cannot determine. + NextBB = SI->findCaseValue(Val).getCaseSuccessor(); + } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(CurInst)) { + Value *Val = getVal(IBI->getAddress())->stripPointerCasts(); + if (BlockAddress *BA = dyn_cast<BlockAddress>(Val)) + NextBB = BA->getBasicBlock(); + else + return false; // Cannot determine. + } else if (isa<ReturnInst>(CurInst)) { + NextBB = nullptr; + } else { + // invoke, unwind, resume, unreachable. + DEBUG(dbgs() << "Can not handle terminator."); + return false; // Cannot handle this terminator. + } + + // We succeeded at evaluating this block! + DEBUG(dbgs() << "Successfully evaluated block.\n"); + return true; + } else { + // Did not know how to evaluate this! + DEBUG(dbgs() << "Failed to evaluate block due to unhandled instruction." + "\n"); + return false; + } + + if (!CurInst->use_empty()) { + if (auto *FoldedInstResult = ConstantFoldConstant(InstResult, DL, TLI)) + InstResult = FoldedInstResult; + + setVal(&*CurInst, InstResult); + } + + // If we just processed an invoke, we finished evaluating the block. + if (InvokeInst *II = dyn_cast<InvokeInst>(CurInst)) { + NextBB = II->getNormalDest(); + DEBUG(dbgs() << "Found an invoke instruction. Finished Block.\n\n"); + return true; + } + + // Advance program counter. + ++CurInst; + } +} + +/// Evaluate a call to function F, returning true if successful, false if we +/// can't evaluate it. ActualArgs contains the formal arguments for the +/// function. +bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal, + const SmallVectorImpl<Constant*> &ActualArgs) { + // Check to see if this function is already executing (recursion). If so, + // bail out. TODO: we might want to accept limited recursion. + if (is_contained(CallStack, F)) + return false; + + CallStack.push_back(F); + + // Initialize arguments to the incoming values specified. + unsigned ArgNo = 0; + for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E; + ++AI, ++ArgNo) + setVal(&*AI, ActualArgs[ArgNo]); + + // ExecutedBlocks - We only handle non-looping, non-recursive code. As such, + // we can only evaluate any one basic block at most once. This set keeps + // track of what we have executed so we can detect recursive cases etc. + SmallPtrSet<BasicBlock*, 32> ExecutedBlocks; + + // CurBB - The current basic block we're evaluating. + BasicBlock *CurBB = &F->front(); + + BasicBlock::iterator CurInst = CurBB->begin(); + + while (1) { + BasicBlock *NextBB = nullptr; // Initialized to avoid compiler warnings. + DEBUG(dbgs() << "Trying to evaluate BB: " << *CurBB << "\n"); + + if (!EvaluateBlock(CurInst, NextBB)) + return false; + + if (!NextBB) { + // Successfully running until there's no next block means that we found + // the return. Fill it the return value and pop the call stack. + ReturnInst *RI = cast<ReturnInst>(CurBB->getTerminator()); + if (RI->getNumOperands()) + RetVal = getVal(RI->getOperand(0)); + CallStack.pop_back(); + return true; + } + + // Okay, we succeeded in evaluating this control flow. See if we have + // executed the new block before. If so, we have a looping function, + // which we cannot evaluate in reasonable time. + if (!ExecutedBlocks.insert(NextBB).second) + return false; // looped! + + // Okay, we have never been in this block before. Check to see if there + // are any PHI nodes. If so, evaluate them with information about where + // we came from. + PHINode *PN = nullptr; + for (CurInst = NextBB->begin(); + (PN = dyn_cast<PHINode>(CurInst)); ++CurInst) + setVal(PN, getVal(PN->getIncomingValueForBlock(CurBB))); + + // Advance to the next block. + CurBB = NextBB; + } +} + diff --git a/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp b/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp new file mode 100644 index 000000000000..7b96fbb11a14 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp @@ -0,0 +1,482 @@ +//===- FlatternCFG.cpp - Code to perform CFG flattening ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Reduce conditional branches in CFG. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +using namespace llvm; + +#define DEBUG_TYPE "flattencfg" + +namespace { +class FlattenCFGOpt { + AliasAnalysis *AA; + /// \brief Use parallel-and or parallel-or to generate conditions for + /// conditional branches. + bool FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder); + /// \brief If \param BB is the merge block of an if-region, attempt to merge + /// the if-region with an adjacent if-region upstream if two if-regions + /// contain identical instructions. + bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder); + /// \brief Compare a pair of blocks: \p Block1 and \p Block2, which + /// are from two if-regions whose entry blocks are \p Head1 and \p + /// Head2. \returns true if \p Block1 and \p Block2 contain identical + /// instructions, and have no memory reference alias with \p Head2. + /// This is used as a legality check for merging if-regions. + bool CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2, + BasicBlock *Block1, BasicBlock *Block2); + +public: + FlattenCFGOpt(AliasAnalysis *AA) : AA(AA) {} + bool run(BasicBlock *BB); +}; +} + +/// If \param [in] BB has more than one predecessor that is a conditional +/// branch, attempt to use parallel and/or for the branch condition. \returns +/// true on success. +/// +/// Before: +/// ...... +/// %cmp10 = fcmp une float %tmp1, %tmp2 +/// br i1 %cmp1, label %if.then, label %lor.rhs +/// +/// lor.rhs: +/// ...... +/// %cmp11 = fcmp une float %tmp3, %tmp4 +/// br i1 %cmp11, label %if.then, label %ifend +/// +/// if.end: // the merge block +/// ...... +/// +/// if.then: // has two predecessors, both of them contains conditional branch. +/// ...... +/// br label %if.end; +/// +/// After: +/// ...... +/// %cmp10 = fcmp une float %tmp1, %tmp2 +/// ...... +/// %cmp11 = fcmp une float %tmp3, %tmp4 +/// %cmp12 = or i1 %cmp10, %cmp11 // parallel-or mode. +/// br i1 %cmp12, label %if.then, label %ifend +/// +/// if.end: +/// ...... +/// +/// if.then: +/// ...... +/// br label %if.end; +/// +/// Current implementation handles two cases. +/// Case 1: \param BB is on the else-path. +/// +/// BB1 +/// / | +/// BB2 | +/// / \ | +/// BB3 \ | where, BB1, BB2 contain conditional branches. +/// \ | / BB3 contains unconditional branch. +/// \ | / BB4 corresponds to \param BB which is also the merge. +/// BB => BB4 +/// +/// +/// Corresponding source code: +/// +/// if (a == b && c == d) +/// statement; // BB3 +/// +/// Case 2: \param BB BB is on the then-path. +/// +/// BB1 +/// / | +/// | BB2 +/// \ / | where BB1, BB2 contain conditional branches. +/// BB => BB3 | BB3 contains unconditiona branch and corresponds +/// \ / to \param BB. BB4 is the merge. +/// BB4 +/// +/// Corresponding source code: +/// +/// if (a == b || c == d) +/// statement; // BB3 +/// +/// In both cases, \param BB is the common successor of conditional branches. +/// In Case 1, \param BB (BB4) has an unconditional branch (BB3) as +/// its predecessor. In Case 2, \param BB (BB3) only has conditional branches +/// as its predecessors. +/// +bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) { + PHINode *PHI = dyn_cast<PHINode>(BB->begin()); + if (PHI) + return false; // For simplicity, avoid cases containing PHI nodes. + + BasicBlock *LastCondBlock = nullptr; + BasicBlock *FirstCondBlock = nullptr; + BasicBlock *UnCondBlock = nullptr; + int Idx = -1; + + // Check predecessors of \param BB. + SmallPtrSet<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB)); + for (SmallPtrSetIterator<BasicBlock *> PI = Preds.begin(), PE = Preds.end(); + PI != PE; ++PI) { + BasicBlock *Pred = *PI; + BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator()); + + // All predecessors should terminate with a branch. + if (!PBI) + return false; + + BasicBlock *PP = Pred->getSinglePredecessor(); + + if (PBI->isUnconditional()) { + // Case 1: Pred (BB3) is an unconditional block, it should + // have a single predecessor (BB2) that is also a predecessor + // of \param BB (BB4) and should not have address-taken. + // There should exist only one such unconditional + // branch among the predecessors. + if (UnCondBlock || !PP || (Preds.count(PP) == 0) || + Pred->hasAddressTaken()) + return false; + + UnCondBlock = Pred; + continue; + } + + // Only conditional branches are allowed beyond this point. + assert(PBI->isConditional()); + + // Condition's unique use should be the branch instruction. + Value *PC = PBI->getCondition(); + if (!PC || !PC->hasOneUse()) + return false; + + if (PP && Preds.count(PP)) { + // These are internal condition blocks to be merged from, e.g., + // BB2 in both cases. + // Should not be address-taken. + if (Pred->hasAddressTaken()) + return false; + + // Instructions in the internal condition blocks should be safe + // to hoist up. + for (BasicBlock::iterator BI = Pred->begin(), BE = PBI->getIterator(); + BI != BE;) { + Instruction *CI = &*BI++; + if (isa<PHINode>(CI) || !isSafeToSpeculativelyExecute(CI)) + return false; + } + } else { + // This is the condition block to be merged into, e.g. BB1 in + // both cases. + if (FirstCondBlock) + return false; + FirstCondBlock = Pred; + } + + // Find whether BB is uniformly on the true (or false) path + // for all of its predecessors. + BasicBlock *PS1 = PBI->getSuccessor(0); + BasicBlock *PS2 = PBI->getSuccessor(1); + BasicBlock *PS = (PS1 == BB) ? PS2 : PS1; + int CIdx = (PS1 == BB) ? 0 : 1; + + if (Idx == -1) + Idx = CIdx; + else if (CIdx != Idx) + return false; + + // PS is the successor which is not BB. Check successors to identify + // the last conditional branch. + if (Preds.count(PS) == 0) { + // Case 2. + LastCondBlock = Pred; + } else { + // Case 1 + BranchInst *BPS = dyn_cast<BranchInst>(PS->getTerminator()); + if (BPS && BPS->isUnconditional()) { + // Case 1: PS(BB3) should be an unconditional branch. + LastCondBlock = Pred; + } + } + } + + if (!FirstCondBlock || !LastCondBlock || (FirstCondBlock == LastCondBlock)) + return false; + + TerminatorInst *TBB = LastCondBlock->getTerminator(); + BasicBlock *PS1 = TBB->getSuccessor(0); + BasicBlock *PS2 = TBB->getSuccessor(1); + BranchInst *PBI1 = dyn_cast<BranchInst>(PS1->getTerminator()); + BranchInst *PBI2 = dyn_cast<BranchInst>(PS2->getTerminator()); + + // If PS1 does not jump into PS2, but PS2 jumps into PS1, + // attempt branch inversion. + if (!PBI1 || !PBI1->isUnconditional() || + (PS1->getTerminator()->getSuccessor(0) != PS2)) { + // Check whether PS2 jumps into PS1. + if (!PBI2 || !PBI2->isUnconditional() || + (PS2->getTerminator()->getSuccessor(0) != PS1)) + return false; + + // Do branch inversion. + BasicBlock *CurrBlock = LastCondBlock; + bool EverChanged = false; + for (;CurrBlock != FirstCondBlock; + CurrBlock = CurrBlock->getSinglePredecessor()) { + BranchInst *BI = dyn_cast<BranchInst>(CurrBlock->getTerminator()); + CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition()); + if (!CI) + continue; + + CmpInst::Predicate Predicate = CI->getPredicate(); + // Canonicalize icmp_ne -> icmp_eq, fcmp_one -> fcmp_oeq + if ((Predicate == CmpInst::ICMP_NE) || (Predicate == CmpInst::FCMP_ONE)) { + CI->setPredicate(ICmpInst::getInversePredicate(Predicate)); + BI->swapSuccessors(); + EverChanged = true; + } + } + return EverChanged; + } + + // PS1 must have a conditional branch. + if (!PBI1 || !PBI1->isUnconditional()) + return false; + + // PS2 should not contain PHI node. + PHI = dyn_cast<PHINode>(PS2->begin()); + if (PHI) + return false; + + // Do the transformation. + BasicBlock *CB; + BranchInst *PBI = dyn_cast<BranchInst>(FirstCondBlock->getTerminator()); + bool Iteration = true; + IRBuilder<>::InsertPointGuard Guard(Builder); + Value *PC = PBI->getCondition(); + + do { + CB = PBI->getSuccessor(1 - Idx); + // Delete the conditional branch. + FirstCondBlock->getInstList().pop_back(); + FirstCondBlock->getInstList() + .splice(FirstCondBlock->end(), CB->getInstList()); + PBI = cast<BranchInst>(FirstCondBlock->getTerminator()); + Value *CC = PBI->getCondition(); + // Merge conditions. + Builder.SetInsertPoint(PBI); + Value *NC; + if (Idx == 0) + // Case 2, use parallel or. + NC = Builder.CreateOr(PC, CC); + else + // Case 1, use parallel and. + NC = Builder.CreateAnd(PC, CC); + + PBI->replaceUsesOfWith(CC, NC); + PC = NC; + if (CB == LastCondBlock) + Iteration = false; + // Remove internal conditional branches. + CB->dropAllReferences(); + // make CB unreachable and let downstream to delete the block. + new UnreachableInst(CB->getContext(), CB); + } while (Iteration); + + DEBUG(dbgs() << "Use parallel and/or in:\n" << *FirstCondBlock); + return true; +} + +/// Compare blocks from two if-regions, where \param Head1 is the entry of the +/// 1st if-region. \param Head2 is the entry of the 2nd if-region. \param +/// Block1 is a block in the 1st if-region to compare. \param Block2 is a block +// in the 2nd if-region to compare. \returns true if \param Block1 and \param +/// Block2 have identical instructions and do not have memory reference alias +/// with \param Head2. +/// +bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2, + BasicBlock *Block1, + BasicBlock *Block2) { + TerminatorInst *PTI2 = Head2->getTerminator(); + Instruction *PBI2 = &Head2->front(); + + bool eq1 = (Block1 == Head1); + bool eq2 = (Block2 == Head2); + if (eq1 || eq2) { + // An empty then-path or else-path. + return (eq1 == eq2); + } + + // Check whether instructions in Block1 and Block2 are identical + // and do not alias with instructions in Head2. + BasicBlock::iterator iter1 = Block1->begin(); + BasicBlock::iterator end1 = Block1->getTerminator()->getIterator(); + BasicBlock::iterator iter2 = Block2->begin(); + BasicBlock::iterator end2 = Block2->getTerminator()->getIterator(); + + while (1) { + if (iter1 == end1) { + if (iter2 != end2) + return false; + break; + } + + if (!iter1->isIdenticalTo(&*iter2)) + return false; + + // Illegal to remove instructions with side effects except + // non-volatile stores. + if (iter1->mayHaveSideEffects()) { + Instruction *CurI = &*iter1; + StoreInst *SI = dyn_cast<StoreInst>(CurI); + if (!SI || SI->isVolatile()) + return false; + } + + // For simplicity and speed, data dependency check can be + // avoided if read from memory doesn't exist. + if (iter1->mayReadFromMemory()) + return false; + + if (iter1->mayWriteToMemory()) { + for (BasicBlock::iterator BI(PBI2), BE(PTI2); BI != BE; ++BI) { + if (BI->mayReadFromMemory() || BI->mayWriteToMemory()) { + // Check alias with Head2. + if (!AA || AA->alias(&*iter1, &*BI)) + return false; + } + } + } + ++iter1; + ++iter2; + } + + return true; +} + +/// Check whether \param BB is the merge block of a if-region. If yes, check +/// whether there exists an adjacent if-region upstream, the two if-regions +/// contain identical instructions and can be legally merged. \returns true if +/// the two if-regions are merged. +/// +/// From: +/// if (a) +/// statement; +/// if (b) +/// statement; +/// +/// To: +/// if (a || b) +/// statement; +/// +bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) { + BasicBlock *IfTrue2, *IfFalse2; + Value *IfCond2 = GetIfCondition(BB, IfTrue2, IfFalse2); + Instruction *CInst2 = dyn_cast_or_null<Instruction>(IfCond2); + if (!CInst2) + return false; + + BasicBlock *SecondEntryBlock = CInst2->getParent(); + if (SecondEntryBlock->hasAddressTaken()) + return false; + + BasicBlock *IfTrue1, *IfFalse1; + Value *IfCond1 = GetIfCondition(SecondEntryBlock, IfTrue1, IfFalse1); + Instruction *CInst1 = dyn_cast_or_null<Instruction>(IfCond1); + if (!CInst1) + return false; + + BasicBlock *FirstEntryBlock = CInst1->getParent(); + + // Either then-path or else-path should be empty. + if ((IfTrue1 != FirstEntryBlock) && (IfFalse1 != FirstEntryBlock)) + return false; + if ((IfTrue2 != SecondEntryBlock) && (IfFalse2 != SecondEntryBlock)) + return false; + + TerminatorInst *PTI2 = SecondEntryBlock->getTerminator(); + Instruction *PBI2 = &SecondEntryBlock->front(); + + if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfTrue1, + IfTrue2)) + return false; + + if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfFalse1, + IfFalse2)) + return false; + + // Check whether \param SecondEntryBlock has side-effect and is safe to + // speculate. + for (BasicBlock::iterator BI(PBI2), BE(PTI2); BI != BE; ++BI) { + Instruction *CI = &*BI; + if (isa<PHINode>(CI) || CI->mayHaveSideEffects() || + !isSafeToSpeculativelyExecute(CI)) + return false; + } + + // Merge \param SecondEntryBlock into \param FirstEntryBlock. + FirstEntryBlock->getInstList().pop_back(); + FirstEntryBlock->getInstList() + .splice(FirstEntryBlock->end(), SecondEntryBlock->getInstList()); + BranchInst *PBI = dyn_cast<BranchInst>(FirstEntryBlock->getTerminator()); + Value *CC = PBI->getCondition(); + BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); + BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + Builder.SetInsertPoint(PBI); + Value *NC = Builder.CreateOr(CInst1, CC); + PBI->replaceUsesOfWith(CC, NC); + Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt); + + // Remove IfTrue1 + if (IfTrue1 != FirstEntryBlock) { + IfTrue1->dropAllReferences(); + IfTrue1->eraseFromParent(); + } + + // Remove IfFalse1 + if (IfFalse1 != FirstEntryBlock) { + IfFalse1->dropAllReferences(); + IfFalse1->eraseFromParent(); + } + + // Remove \param SecondEntryBlock + SecondEntryBlock->dropAllReferences(); + SecondEntryBlock->eraseFromParent(); + DEBUG(dbgs() << "If conditions merged into:\n" << *FirstEntryBlock); + return true; +} + +bool FlattenCFGOpt::run(BasicBlock *BB) { + assert(BB && BB->getParent() && "Block not embedded in function!"); + assert(BB->getTerminator() && "Degenerate basic block encountered!"); + + IRBuilder<> Builder(BB); + + if (FlattenParallelAndOr(BB, Builder) || MergeIfRegion(BB, Builder)) + return true; + return false; +} + +/// FlattenCFG - This function is used to flatten a CFG. For +/// example, it uses parallel-and and parallel-or mode to collapse +// if-conditions and merge if-regions with identical statements. +/// +bool llvm::FlattenCFG(BasicBlock *BB, AliasAnalysis *AA) { + return FlattenCFGOpt(AA).run(BB); +} diff --git a/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp b/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp new file mode 100644 index 000000000000..81a7c4ceffab --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp @@ -0,0 +1,919 @@ +//===- FunctionComparator.h - Function Comparator -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the FunctionComparator and GlobalNumberState classes +// which are used by the MergeFunctions pass for comparing functions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/FunctionComparator.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "functioncomparator" + +int FunctionComparator::cmpNumbers(uint64_t L, uint64_t R) const { + if (L < R) return -1; + if (L > R) return 1; + return 0; +} + +int FunctionComparator::cmpOrderings(AtomicOrdering L, AtomicOrdering R) const { + if ((int)L < (int)R) return -1; + if ((int)L > (int)R) return 1; + return 0; +} + +int FunctionComparator::cmpAPInts(const APInt &L, const APInt &R) const { + if (int Res = cmpNumbers(L.getBitWidth(), R.getBitWidth())) + return Res; + if (L.ugt(R)) return 1; + if (R.ugt(L)) return -1; + return 0; +} + +int FunctionComparator::cmpAPFloats(const APFloat &L, const APFloat &R) const { + // Floats are ordered first by semantics (i.e. float, double, half, etc.), + // then by value interpreted as a bitstring (aka APInt). + const fltSemantics &SL = L.getSemantics(), &SR = R.getSemantics(); + if (int Res = cmpNumbers(APFloat::semanticsPrecision(SL), + APFloat::semanticsPrecision(SR))) + return Res; + if (int Res = cmpNumbers(APFloat::semanticsMaxExponent(SL), + APFloat::semanticsMaxExponent(SR))) + return Res; + if (int Res = cmpNumbers(APFloat::semanticsMinExponent(SL), + APFloat::semanticsMinExponent(SR))) + return Res; + if (int Res = cmpNumbers(APFloat::semanticsSizeInBits(SL), + APFloat::semanticsSizeInBits(SR))) + return Res; + return cmpAPInts(L.bitcastToAPInt(), R.bitcastToAPInt()); +} + +int FunctionComparator::cmpMem(StringRef L, StringRef R) const { + // Prevent heavy comparison, compare sizes first. + if (int Res = cmpNumbers(L.size(), R.size())) + return Res; + + // Compare strings lexicographically only when it is necessary: only when + // strings are equal in size. + return L.compare(R); +} + +int FunctionComparator::cmpAttrs(const AttributeSet L, + const AttributeSet R) const { + if (int Res = cmpNumbers(L.getNumSlots(), R.getNumSlots())) + return Res; + + for (unsigned i = 0, e = L.getNumSlots(); i != e; ++i) { + AttributeSet::iterator LI = L.begin(i), LE = L.end(i), RI = R.begin(i), + RE = R.end(i); + for (; LI != LE && RI != RE; ++LI, ++RI) { + Attribute LA = *LI; + Attribute RA = *RI; + if (LA < RA) + return -1; + if (RA < LA) + return 1; + } + if (LI != LE) + return 1; + if (RI != RE) + return -1; + } + return 0; +} + +int FunctionComparator::cmpRangeMetadata(const MDNode *L, + const MDNode *R) const { + if (L == R) + return 0; + if (!L) + return -1; + if (!R) + return 1; + // Range metadata is a sequence of numbers. Make sure they are the same + // sequence. + // TODO: Note that as this is metadata, it is possible to drop and/or merge + // this data when considering functions to merge. Thus this comparison would + // return 0 (i.e. equivalent), but merging would become more complicated + // because the ranges would need to be unioned. It is not likely that + // functions differ ONLY in this metadata if they are actually the same + // function semantically. + if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands())) + return Res; + for (size_t I = 0; I < L->getNumOperands(); ++I) { + ConstantInt *LLow = mdconst::extract<ConstantInt>(L->getOperand(I)); + ConstantInt *RLow = mdconst::extract<ConstantInt>(R->getOperand(I)); + if (int Res = cmpAPInts(LLow->getValue(), RLow->getValue())) + return Res; + } + return 0; +} + +int FunctionComparator::cmpOperandBundlesSchema(const Instruction *L, + const Instruction *R) const { + ImmutableCallSite LCS(L); + ImmutableCallSite RCS(R); + + assert(LCS && RCS && "Must be calls or invokes!"); + assert(LCS.isCall() == RCS.isCall() && "Can't compare otherwise!"); + + if (int Res = + cmpNumbers(LCS.getNumOperandBundles(), RCS.getNumOperandBundles())) + return Res; + + for (unsigned i = 0, e = LCS.getNumOperandBundles(); i != e; ++i) { + auto OBL = LCS.getOperandBundleAt(i); + auto OBR = RCS.getOperandBundleAt(i); + + if (int Res = OBL.getTagName().compare(OBR.getTagName())) + return Res; + + if (int Res = cmpNumbers(OBL.Inputs.size(), OBR.Inputs.size())) + return Res; + } + + return 0; +} + +/// Constants comparison: +/// 1. Check whether type of L constant could be losslessly bitcasted to R +/// type. +/// 2. Compare constant contents. +/// For more details see declaration comments. +int FunctionComparator::cmpConstants(const Constant *L, + const Constant *R) const { + + Type *TyL = L->getType(); + Type *TyR = R->getType(); + + // Check whether types are bitcastable. This part is just re-factored + // Type::canLosslesslyBitCastTo method, but instead of returning true/false, + // we also pack into result which type is "less" for us. + int TypesRes = cmpTypes(TyL, TyR); + if (TypesRes != 0) { + // Types are different, but check whether we can bitcast them. + if (!TyL->isFirstClassType()) { + if (TyR->isFirstClassType()) + return -1; + // Neither TyL nor TyR are values of first class type. Return the result + // of comparing the types + return TypesRes; + } + if (!TyR->isFirstClassType()) { + if (TyL->isFirstClassType()) + return 1; + return TypesRes; + } + + // Vector -> Vector conversions are always lossless if the two vector types + // have the same size, otherwise not. + unsigned TyLWidth = 0; + unsigned TyRWidth = 0; + + if (auto *VecTyL = dyn_cast<VectorType>(TyL)) + TyLWidth = VecTyL->getBitWidth(); + if (auto *VecTyR = dyn_cast<VectorType>(TyR)) + TyRWidth = VecTyR->getBitWidth(); + + if (TyLWidth != TyRWidth) + return cmpNumbers(TyLWidth, TyRWidth); + + // Zero bit-width means neither TyL nor TyR are vectors. + if (!TyLWidth) { + PointerType *PTyL = dyn_cast<PointerType>(TyL); + PointerType *PTyR = dyn_cast<PointerType>(TyR); + if (PTyL && PTyR) { + unsigned AddrSpaceL = PTyL->getAddressSpace(); + unsigned AddrSpaceR = PTyR->getAddressSpace(); + if (int Res = cmpNumbers(AddrSpaceL, AddrSpaceR)) + return Res; + } + if (PTyL) + return 1; + if (PTyR) + return -1; + + // TyL and TyR aren't vectors, nor pointers. We don't know how to + // bitcast them. + return TypesRes; + } + } + + // OK, types are bitcastable, now check constant contents. + + if (L->isNullValue() && R->isNullValue()) + return TypesRes; + if (L->isNullValue() && !R->isNullValue()) + return 1; + if (!L->isNullValue() && R->isNullValue()) + return -1; + + auto GlobalValueL = const_cast<GlobalValue*>(dyn_cast<GlobalValue>(L)); + auto GlobalValueR = const_cast<GlobalValue*>(dyn_cast<GlobalValue>(R)); + if (GlobalValueL && GlobalValueR) { + return cmpGlobalValues(GlobalValueL, GlobalValueR); + } + + if (int Res = cmpNumbers(L->getValueID(), R->getValueID())) + return Res; + + if (const auto *SeqL = dyn_cast<ConstantDataSequential>(L)) { + const auto *SeqR = cast<ConstantDataSequential>(R); + // This handles ConstantDataArray and ConstantDataVector. Note that we + // compare the two raw data arrays, which might differ depending on the host + // endianness. This isn't a problem though, because the endiness of a module + // will affect the order of the constants, but this order is the same + // for a given input module and host platform. + return cmpMem(SeqL->getRawDataValues(), SeqR->getRawDataValues()); + } + + switch (L->getValueID()) { + case Value::UndefValueVal: + case Value::ConstantTokenNoneVal: + return TypesRes; + case Value::ConstantIntVal: { + const APInt &LInt = cast<ConstantInt>(L)->getValue(); + const APInt &RInt = cast<ConstantInt>(R)->getValue(); + return cmpAPInts(LInt, RInt); + } + case Value::ConstantFPVal: { + const APFloat &LAPF = cast<ConstantFP>(L)->getValueAPF(); + const APFloat &RAPF = cast<ConstantFP>(R)->getValueAPF(); + return cmpAPFloats(LAPF, RAPF); + } + case Value::ConstantArrayVal: { + const ConstantArray *LA = cast<ConstantArray>(L); + const ConstantArray *RA = cast<ConstantArray>(R); + uint64_t NumElementsL = cast<ArrayType>(TyL)->getNumElements(); + uint64_t NumElementsR = cast<ArrayType>(TyR)->getNumElements(); + if (int Res = cmpNumbers(NumElementsL, NumElementsR)) + return Res; + for (uint64_t i = 0; i < NumElementsL; ++i) { + if (int Res = cmpConstants(cast<Constant>(LA->getOperand(i)), + cast<Constant>(RA->getOperand(i)))) + return Res; + } + return 0; + } + case Value::ConstantStructVal: { + const ConstantStruct *LS = cast<ConstantStruct>(L); + const ConstantStruct *RS = cast<ConstantStruct>(R); + unsigned NumElementsL = cast<StructType>(TyL)->getNumElements(); + unsigned NumElementsR = cast<StructType>(TyR)->getNumElements(); + if (int Res = cmpNumbers(NumElementsL, NumElementsR)) + return Res; + for (unsigned i = 0; i != NumElementsL; ++i) { + if (int Res = cmpConstants(cast<Constant>(LS->getOperand(i)), + cast<Constant>(RS->getOperand(i)))) + return Res; + } + return 0; + } + case Value::ConstantVectorVal: { + const ConstantVector *LV = cast<ConstantVector>(L); + const ConstantVector *RV = cast<ConstantVector>(R); + unsigned NumElementsL = cast<VectorType>(TyL)->getNumElements(); + unsigned NumElementsR = cast<VectorType>(TyR)->getNumElements(); + if (int Res = cmpNumbers(NumElementsL, NumElementsR)) + return Res; + for (uint64_t i = 0; i < NumElementsL; ++i) { + if (int Res = cmpConstants(cast<Constant>(LV->getOperand(i)), + cast<Constant>(RV->getOperand(i)))) + return Res; + } + return 0; + } + case Value::ConstantExprVal: { + const ConstantExpr *LE = cast<ConstantExpr>(L); + const ConstantExpr *RE = cast<ConstantExpr>(R); + unsigned NumOperandsL = LE->getNumOperands(); + unsigned NumOperandsR = RE->getNumOperands(); + if (int Res = cmpNumbers(NumOperandsL, NumOperandsR)) + return Res; + for (unsigned i = 0; i < NumOperandsL; ++i) { + if (int Res = cmpConstants(cast<Constant>(LE->getOperand(i)), + cast<Constant>(RE->getOperand(i)))) + return Res; + } + return 0; + } + case Value::BlockAddressVal: { + const BlockAddress *LBA = cast<BlockAddress>(L); + const BlockAddress *RBA = cast<BlockAddress>(R); + if (int Res = cmpValues(LBA->getFunction(), RBA->getFunction())) + return Res; + if (LBA->getFunction() == RBA->getFunction()) { + // They are BBs in the same function. Order by which comes first in the + // BB order of the function. This order is deterministic. + Function* F = LBA->getFunction(); + BasicBlock *LBB = LBA->getBasicBlock(); + BasicBlock *RBB = RBA->getBasicBlock(); + if (LBB == RBB) + return 0; + for(BasicBlock &BB : F->getBasicBlockList()) { + if (&BB == LBB) { + assert(&BB != RBB); + return -1; + } + if (&BB == RBB) + return 1; + } + llvm_unreachable("Basic Block Address does not point to a basic block in " + "its function."); + return -1; + } else { + // cmpValues said the functions are the same. So because they aren't + // literally the same pointer, they must respectively be the left and + // right functions. + assert(LBA->getFunction() == FnL && RBA->getFunction() == FnR); + // cmpValues will tell us if these are equivalent BasicBlocks, in the + // context of their respective functions. + return cmpValues(LBA->getBasicBlock(), RBA->getBasicBlock()); + } + } + default: // Unknown constant, abort. + DEBUG(dbgs() << "Looking at valueID " << L->getValueID() << "\n"); + llvm_unreachable("Constant ValueID not recognized."); + return -1; + } +} + +int FunctionComparator::cmpGlobalValues(GlobalValue *L, GlobalValue *R) const { + uint64_t LNumber = GlobalNumbers->getNumber(L); + uint64_t RNumber = GlobalNumbers->getNumber(R); + return cmpNumbers(LNumber, RNumber); +} + +/// cmpType - compares two types, +/// defines total ordering among the types set. +/// See method declaration comments for more details. +int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const { + PointerType *PTyL = dyn_cast<PointerType>(TyL); + PointerType *PTyR = dyn_cast<PointerType>(TyR); + + const DataLayout &DL = FnL->getParent()->getDataLayout(); + if (PTyL && PTyL->getAddressSpace() == 0) + TyL = DL.getIntPtrType(TyL); + if (PTyR && PTyR->getAddressSpace() == 0) + TyR = DL.getIntPtrType(TyR); + + if (TyL == TyR) + return 0; + + if (int Res = cmpNumbers(TyL->getTypeID(), TyR->getTypeID())) + return Res; + + switch (TyL->getTypeID()) { + default: + llvm_unreachable("Unknown type!"); + // Fall through in Release mode. + LLVM_FALLTHROUGH; + case Type::IntegerTyID: + return cmpNumbers(cast<IntegerType>(TyL)->getBitWidth(), + cast<IntegerType>(TyR)->getBitWidth()); + // TyL == TyR would have returned true earlier, because types are uniqued. + case Type::VoidTyID: + case Type::FloatTyID: + case Type::DoubleTyID: + case Type::X86_FP80TyID: + case Type::FP128TyID: + case Type::PPC_FP128TyID: + case Type::LabelTyID: + case Type::MetadataTyID: + case Type::TokenTyID: + return 0; + + case Type::PointerTyID: { + assert(PTyL && PTyR && "Both types must be pointers here."); + return cmpNumbers(PTyL->getAddressSpace(), PTyR->getAddressSpace()); + } + + case Type::StructTyID: { + StructType *STyL = cast<StructType>(TyL); + StructType *STyR = cast<StructType>(TyR); + if (STyL->getNumElements() != STyR->getNumElements()) + return cmpNumbers(STyL->getNumElements(), STyR->getNumElements()); + + if (STyL->isPacked() != STyR->isPacked()) + return cmpNumbers(STyL->isPacked(), STyR->isPacked()); + + for (unsigned i = 0, e = STyL->getNumElements(); i != e; ++i) { + if (int Res = cmpTypes(STyL->getElementType(i), STyR->getElementType(i))) + return Res; + } + return 0; + } + + case Type::FunctionTyID: { + FunctionType *FTyL = cast<FunctionType>(TyL); + FunctionType *FTyR = cast<FunctionType>(TyR); + if (FTyL->getNumParams() != FTyR->getNumParams()) + return cmpNumbers(FTyL->getNumParams(), FTyR->getNumParams()); + + if (FTyL->isVarArg() != FTyR->isVarArg()) + return cmpNumbers(FTyL->isVarArg(), FTyR->isVarArg()); + + if (int Res = cmpTypes(FTyL->getReturnType(), FTyR->getReturnType())) + return Res; + + for (unsigned i = 0, e = FTyL->getNumParams(); i != e; ++i) { + if (int Res = cmpTypes(FTyL->getParamType(i), FTyR->getParamType(i))) + return Res; + } + return 0; + } + + case Type::ArrayTyID: + case Type::VectorTyID: { + auto *STyL = cast<SequentialType>(TyL); + auto *STyR = cast<SequentialType>(TyR); + if (STyL->getNumElements() != STyR->getNumElements()) + return cmpNumbers(STyL->getNumElements(), STyR->getNumElements()); + return cmpTypes(STyL->getElementType(), STyR->getElementType()); + } + } +} + +// Determine whether the two operations are the same except that pointer-to-A +// and pointer-to-B are equivalent. This should be kept in sync with +// Instruction::isSameOperationAs. +// Read method declaration comments for more details. +int FunctionComparator::cmpOperations(const Instruction *L, + const Instruction *R, + bool &needToCmpOperands) const { + needToCmpOperands = true; + if (int Res = cmpValues(L, R)) + return Res; + + // Differences from Instruction::isSameOperationAs: + // * replace type comparison with calls to cmpTypes. + // * we test for I->getRawSubclassOptionalData (nuw/nsw/tail) at the top. + // * because of the above, we don't test for the tail bit on calls later on. + if (int Res = cmpNumbers(L->getOpcode(), R->getOpcode())) + return Res; + + if (const GetElementPtrInst *GEPL = dyn_cast<GetElementPtrInst>(L)) { + needToCmpOperands = false; + const GetElementPtrInst *GEPR = cast<GetElementPtrInst>(R); + if (int Res = + cmpValues(GEPL->getPointerOperand(), GEPR->getPointerOperand())) + return Res; + return cmpGEPs(GEPL, GEPR); + } + + if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands())) + return Res; + + if (int Res = cmpTypes(L->getType(), R->getType())) + return Res; + + if (int Res = cmpNumbers(L->getRawSubclassOptionalData(), + R->getRawSubclassOptionalData())) + return Res; + + // We have two instructions of identical opcode and #operands. Check to see + // if all operands are the same type + for (unsigned i = 0, e = L->getNumOperands(); i != e; ++i) { + if (int Res = + cmpTypes(L->getOperand(i)->getType(), R->getOperand(i)->getType())) + return Res; + } + + // Check special state that is a part of some instructions. + if (const AllocaInst *AI = dyn_cast<AllocaInst>(L)) { + if (int Res = cmpTypes(AI->getAllocatedType(), + cast<AllocaInst>(R)->getAllocatedType())) + return Res; + return cmpNumbers(AI->getAlignment(), cast<AllocaInst>(R)->getAlignment()); + } + if (const LoadInst *LI = dyn_cast<LoadInst>(L)) { + if (int Res = cmpNumbers(LI->isVolatile(), cast<LoadInst>(R)->isVolatile())) + return Res; + if (int Res = + cmpNumbers(LI->getAlignment(), cast<LoadInst>(R)->getAlignment())) + return Res; + if (int Res = + cmpOrderings(LI->getOrdering(), cast<LoadInst>(R)->getOrdering())) + return Res; + if (int Res = + cmpNumbers(LI->getSynchScope(), cast<LoadInst>(R)->getSynchScope())) + return Res; + return cmpRangeMetadata(LI->getMetadata(LLVMContext::MD_range), + cast<LoadInst>(R)->getMetadata(LLVMContext::MD_range)); + } + if (const StoreInst *SI = dyn_cast<StoreInst>(L)) { + if (int Res = + cmpNumbers(SI->isVolatile(), cast<StoreInst>(R)->isVolatile())) + return Res; + if (int Res = + cmpNumbers(SI->getAlignment(), cast<StoreInst>(R)->getAlignment())) + return Res; + if (int Res = + cmpOrderings(SI->getOrdering(), cast<StoreInst>(R)->getOrdering())) + return Res; + return cmpNumbers(SI->getSynchScope(), cast<StoreInst>(R)->getSynchScope()); + } + if (const CmpInst *CI = dyn_cast<CmpInst>(L)) + return cmpNumbers(CI->getPredicate(), cast<CmpInst>(R)->getPredicate()); + if (const CallInst *CI = dyn_cast<CallInst>(L)) { + if (int Res = cmpNumbers(CI->getCallingConv(), + cast<CallInst>(R)->getCallingConv())) + return Res; + if (int Res = + cmpAttrs(CI->getAttributes(), cast<CallInst>(R)->getAttributes())) + return Res; + if (int Res = cmpOperandBundlesSchema(CI, R)) + return Res; + return cmpRangeMetadata( + CI->getMetadata(LLVMContext::MD_range), + cast<CallInst>(R)->getMetadata(LLVMContext::MD_range)); + } + if (const InvokeInst *II = dyn_cast<InvokeInst>(L)) { + if (int Res = cmpNumbers(II->getCallingConv(), + cast<InvokeInst>(R)->getCallingConv())) + return Res; + if (int Res = + cmpAttrs(II->getAttributes(), cast<InvokeInst>(R)->getAttributes())) + return Res; + if (int Res = cmpOperandBundlesSchema(II, R)) + return Res; + return cmpRangeMetadata( + II->getMetadata(LLVMContext::MD_range), + cast<InvokeInst>(R)->getMetadata(LLVMContext::MD_range)); + } + if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(L)) { + ArrayRef<unsigned> LIndices = IVI->getIndices(); + ArrayRef<unsigned> RIndices = cast<InsertValueInst>(R)->getIndices(); + if (int Res = cmpNumbers(LIndices.size(), RIndices.size())) + return Res; + for (size_t i = 0, e = LIndices.size(); i != e; ++i) { + if (int Res = cmpNumbers(LIndices[i], RIndices[i])) + return Res; + } + return 0; + } + if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(L)) { + ArrayRef<unsigned> LIndices = EVI->getIndices(); + ArrayRef<unsigned> RIndices = cast<ExtractValueInst>(R)->getIndices(); + if (int Res = cmpNumbers(LIndices.size(), RIndices.size())) + return Res; + for (size_t i = 0, e = LIndices.size(); i != e; ++i) { + if (int Res = cmpNumbers(LIndices[i], RIndices[i])) + return Res; + } + } + if (const FenceInst *FI = dyn_cast<FenceInst>(L)) { + if (int Res = + cmpOrderings(FI->getOrdering(), cast<FenceInst>(R)->getOrdering())) + return Res; + return cmpNumbers(FI->getSynchScope(), cast<FenceInst>(R)->getSynchScope()); + } + if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(L)) { + if (int Res = cmpNumbers(CXI->isVolatile(), + cast<AtomicCmpXchgInst>(R)->isVolatile())) + return Res; + if (int Res = cmpNumbers(CXI->isWeak(), + cast<AtomicCmpXchgInst>(R)->isWeak())) + return Res; + if (int Res = + cmpOrderings(CXI->getSuccessOrdering(), + cast<AtomicCmpXchgInst>(R)->getSuccessOrdering())) + return Res; + if (int Res = + cmpOrderings(CXI->getFailureOrdering(), + cast<AtomicCmpXchgInst>(R)->getFailureOrdering())) + return Res; + return cmpNumbers(CXI->getSynchScope(), + cast<AtomicCmpXchgInst>(R)->getSynchScope()); + } + if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(L)) { + if (int Res = cmpNumbers(RMWI->getOperation(), + cast<AtomicRMWInst>(R)->getOperation())) + return Res; + if (int Res = cmpNumbers(RMWI->isVolatile(), + cast<AtomicRMWInst>(R)->isVolatile())) + return Res; + if (int Res = cmpOrderings(RMWI->getOrdering(), + cast<AtomicRMWInst>(R)->getOrdering())) + return Res; + return cmpNumbers(RMWI->getSynchScope(), + cast<AtomicRMWInst>(R)->getSynchScope()); + } + if (const PHINode *PNL = dyn_cast<PHINode>(L)) { + const PHINode *PNR = cast<PHINode>(R); + // Ensure that in addition to the incoming values being identical + // (checked by the caller of this function), the incoming blocks + // are also identical. + for (unsigned i = 0, e = PNL->getNumIncomingValues(); i != e; ++i) { + if (int Res = + cmpValues(PNL->getIncomingBlock(i), PNR->getIncomingBlock(i))) + return Res; + } + } + return 0; +} + +// Determine whether two GEP operations perform the same underlying arithmetic. +// Read method declaration comments for more details. +int FunctionComparator::cmpGEPs(const GEPOperator *GEPL, + const GEPOperator *GEPR) const { + + unsigned int ASL = GEPL->getPointerAddressSpace(); + unsigned int ASR = GEPR->getPointerAddressSpace(); + + if (int Res = cmpNumbers(ASL, ASR)) + return Res; + + // When we have target data, we can reduce the GEP down to the value in bytes + // added to the address. + const DataLayout &DL = FnL->getParent()->getDataLayout(); + unsigned BitWidth = DL.getPointerSizeInBits(ASL); + APInt OffsetL(BitWidth, 0), OffsetR(BitWidth, 0); + if (GEPL->accumulateConstantOffset(DL, OffsetL) && + GEPR->accumulateConstantOffset(DL, OffsetR)) + return cmpAPInts(OffsetL, OffsetR); + if (int Res = cmpTypes(GEPL->getSourceElementType(), + GEPR->getSourceElementType())) + return Res; + + if (int Res = cmpNumbers(GEPL->getNumOperands(), GEPR->getNumOperands())) + return Res; + + for (unsigned i = 0, e = GEPL->getNumOperands(); i != e; ++i) { + if (int Res = cmpValues(GEPL->getOperand(i), GEPR->getOperand(i))) + return Res; + } + + return 0; +} + +int FunctionComparator::cmpInlineAsm(const InlineAsm *L, + const InlineAsm *R) const { + // InlineAsm's are uniqued. If they are the same pointer, obviously they are + // the same, otherwise compare the fields. + if (L == R) + return 0; + if (int Res = cmpTypes(L->getFunctionType(), R->getFunctionType())) + return Res; + if (int Res = cmpMem(L->getAsmString(), R->getAsmString())) + return Res; + if (int Res = cmpMem(L->getConstraintString(), R->getConstraintString())) + return Res; + if (int Res = cmpNumbers(L->hasSideEffects(), R->hasSideEffects())) + return Res; + if (int Res = cmpNumbers(L->isAlignStack(), R->isAlignStack())) + return Res; + if (int Res = cmpNumbers(L->getDialect(), R->getDialect())) + return Res; + llvm_unreachable("InlineAsm blocks were not uniqued."); + return 0; +} + +/// Compare two values used by the two functions under pair-wise comparison. If +/// this is the first time the values are seen, they're added to the mapping so +/// that we will detect mismatches on next use. +/// See comments in declaration for more details. +int FunctionComparator::cmpValues(const Value *L, const Value *R) const { + // Catch self-reference case. + if (L == FnL) { + if (R == FnR) + return 0; + return -1; + } + if (R == FnR) { + if (L == FnL) + return 0; + return 1; + } + + const Constant *ConstL = dyn_cast<Constant>(L); + const Constant *ConstR = dyn_cast<Constant>(R); + if (ConstL && ConstR) { + if (L == R) + return 0; + return cmpConstants(ConstL, ConstR); + } + + if (ConstL) + return 1; + if (ConstR) + return -1; + + const InlineAsm *InlineAsmL = dyn_cast<InlineAsm>(L); + const InlineAsm *InlineAsmR = dyn_cast<InlineAsm>(R); + + if (InlineAsmL && InlineAsmR) + return cmpInlineAsm(InlineAsmL, InlineAsmR); + if (InlineAsmL) + return 1; + if (InlineAsmR) + return -1; + + auto LeftSN = sn_mapL.insert(std::make_pair(L, sn_mapL.size())), + RightSN = sn_mapR.insert(std::make_pair(R, sn_mapR.size())); + + return cmpNumbers(LeftSN.first->second, RightSN.first->second); +} + +// Test whether two basic blocks have equivalent behaviour. +int FunctionComparator::cmpBasicBlocks(const BasicBlock *BBL, + const BasicBlock *BBR) const { + BasicBlock::const_iterator InstL = BBL->begin(), InstLE = BBL->end(); + BasicBlock::const_iterator InstR = BBR->begin(), InstRE = BBR->end(); + + do { + bool needToCmpOperands = true; + if (int Res = cmpOperations(&*InstL, &*InstR, needToCmpOperands)) + return Res; + if (needToCmpOperands) { + assert(InstL->getNumOperands() == InstR->getNumOperands()); + + for (unsigned i = 0, e = InstL->getNumOperands(); i != e; ++i) { + Value *OpL = InstL->getOperand(i); + Value *OpR = InstR->getOperand(i); + if (int Res = cmpValues(OpL, OpR)) + return Res; + // cmpValues should ensure this is true. + assert(cmpTypes(OpL->getType(), OpR->getType()) == 0); + } + } + + ++InstL; + ++InstR; + } while (InstL != InstLE && InstR != InstRE); + + if (InstL != InstLE && InstR == InstRE) + return 1; + if (InstL == InstLE && InstR != InstRE) + return -1; + return 0; +} + +int FunctionComparator::compareSignature() const { + if (int Res = cmpAttrs(FnL->getAttributes(), FnR->getAttributes())) + return Res; + + if (int Res = cmpNumbers(FnL->hasGC(), FnR->hasGC())) + return Res; + + if (FnL->hasGC()) { + if (int Res = cmpMem(FnL->getGC(), FnR->getGC())) + return Res; + } + + if (int Res = cmpNumbers(FnL->hasSection(), FnR->hasSection())) + return Res; + + if (FnL->hasSection()) { + if (int Res = cmpMem(FnL->getSection(), FnR->getSection())) + return Res; + } + + if (int Res = cmpNumbers(FnL->isVarArg(), FnR->isVarArg())) + return Res; + + // TODO: if it's internal and only used in direct calls, we could handle this + // case too. + if (int Res = cmpNumbers(FnL->getCallingConv(), FnR->getCallingConv())) + return Res; + + if (int Res = cmpTypes(FnL->getFunctionType(), FnR->getFunctionType())) + return Res; + + assert(FnL->arg_size() == FnR->arg_size() && + "Identically typed functions have different numbers of args!"); + + // Visit the arguments so that they get enumerated in the order they're + // passed in. + for (Function::const_arg_iterator ArgLI = FnL->arg_begin(), + ArgRI = FnR->arg_begin(), + ArgLE = FnL->arg_end(); + ArgLI != ArgLE; ++ArgLI, ++ArgRI) { + if (cmpValues(&*ArgLI, &*ArgRI) != 0) + llvm_unreachable("Arguments repeat!"); + } + return 0; +} + +// Test whether the two functions have equivalent behaviour. +int FunctionComparator::compare() { + beginCompare(); + + if (int Res = compareSignature()) + return Res; + + // We do a CFG-ordered walk since the actual ordering of the blocks in the + // linked list is immaterial. Our walk starts at the entry block for both + // functions, then takes each block from each terminator in order. As an + // artifact, this also means that unreachable blocks are ignored. + SmallVector<const BasicBlock *, 8> FnLBBs, FnRBBs; + SmallPtrSet<const BasicBlock *, 32> VisitedBBs; // in terms of F1. + + FnLBBs.push_back(&FnL->getEntryBlock()); + FnRBBs.push_back(&FnR->getEntryBlock()); + + VisitedBBs.insert(FnLBBs[0]); + while (!FnLBBs.empty()) { + const BasicBlock *BBL = FnLBBs.pop_back_val(); + const BasicBlock *BBR = FnRBBs.pop_back_val(); + + if (int Res = cmpValues(BBL, BBR)) + return Res; + + if (int Res = cmpBasicBlocks(BBL, BBR)) + return Res; + + const TerminatorInst *TermL = BBL->getTerminator(); + const TerminatorInst *TermR = BBR->getTerminator(); + + assert(TermL->getNumSuccessors() == TermR->getNumSuccessors()); + for (unsigned i = 0, e = TermL->getNumSuccessors(); i != e; ++i) { + if (!VisitedBBs.insert(TermL->getSuccessor(i)).second) + continue; + + FnLBBs.push_back(TermL->getSuccessor(i)); + FnRBBs.push_back(TermR->getSuccessor(i)); + } + } + return 0; +} + +namespace { + +// Accumulate the hash of a sequence of 64-bit integers. This is similar to a +// hash of a sequence of 64bit ints, but the entire input does not need to be +// available at once. This interface is necessary for functionHash because it +// needs to accumulate the hash as the structure of the function is traversed +// without saving these values to an intermediate buffer. This form of hashing +// is not often needed, as usually the object to hash is just read from a +// buffer. +class HashAccumulator64 { + uint64_t Hash; +public: + // Initialize to random constant, so the state isn't zero. + HashAccumulator64() { Hash = 0x6acaa36bef8325c5ULL; } + void add(uint64_t V) { + Hash = llvm::hashing::detail::hash_16_bytes(Hash, V); + } + // No finishing is required, because the entire hash value is used. + uint64_t getHash() { return Hash; } +}; +} // end anonymous namespace + +// A function hash is calculated by considering only the number of arguments and +// whether a function is varargs, the order of basic blocks (given by the +// successors of each basic block in depth first order), and the order of +// opcodes of each instruction within each of these basic blocks. This mirrors +// the strategy compare() uses to compare functions by walking the BBs in depth +// first order and comparing each instruction in sequence. Because this hash +// does not look at the operands, it is insensitive to things such as the +// target of calls and the constants used in the function, which makes it useful +// when possibly merging functions which are the same modulo constants and call +// targets. +FunctionComparator::FunctionHash FunctionComparator::functionHash(Function &F) { + HashAccumulator64 H; + H.add(F.isVarArg()); + H.add(F.arg_size()); + + SmallVector<const BasicBlock *, 8> BBs; + SmallSet<const BasicBlock *, 16> VisitedBBs; + + // Walk the blocks in the same order as FunctionComparator::cmpBasicBlocks(), + // accumulating the hash of the function "structure." (BB and opcode sequence) + BBs.push_back(&F.getEntryBlock()); + VisitedBBs.insert(BBs[0]); + while (!BBs.empty()) { + const BasicBlock *BB = BBs.pop_back_val(); + // This random value acts as a block header, as otherwise the partition of + // opcodes into BBs wouldn't affect the hash, only the order of the opcodes + H.add(45798); + for (auto &Inst : *BB) { + H.add(Inst.getOpcode()); + } + const TerminatorInst *Term = BB->getTerminator(); + for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { + if (!VisitedBBs.insert(Term->getSuccessor(i)).second) + continue; + BBs.push_back(Term->getSuccessor(i)); + } + } + return H.getHash(); +} + + diff --git a/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp b/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp new file mode 100644 index 000000000000..9844190ef84a --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp @@ -0,0 +1,262 @@ +//===- lib/Transforms/Utils/FunctionImportUtils.cpp - Importing utilities -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the FunctionImportGlobalProcessing class, used +// to perform the necessary global value handling for function importing. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/ModuleSummaryAnalysis.h" +#include "llvm/Transforms/Utils/FunctionImportUtils.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +using namespace llvm; + +/// Checks if we should import SGV as a definition, otherwise import as a +/// declaration. +bool FunctionImportGlobalProcessing::doImportAsDefinition( + const GlobalValue *SGV, DenseSet<const GlobalValue *> *GlobalsToImport) { + + // For alias, we tie the definition to the base object. Extract it and recurse + if (auto *GA = dyn_cast<GlobalAlias>(SGV)) { + if (GA->hasWeakAnyLinkage()) + return false; + const GlobalObject *GO = GA->getBaseObject(); + if (!GO->hasLinkOnceODRLinkage()) + return false; + return FunctionImportGlobalProcessing::doImportAsDefinition( + GO, GlobalsToImport); + } + // Only import the globals requested for importing. + if (GlobalsToImport->count(SGV)) + return true; + // Otherwise no. + return false; +} + +bool FunctionImportGlobalProcessing::doImportAsDefinition( + const GlobalValue *SGV) { + if (!isPerformingImport()) + return false; + return FunctionImportGlobalProcessing::doImportAsDefinition(SGV, + GlobalsToImport); +} + +bool FunctionImportGlobalProcessing::shouldPromoteLocalToGlobal( + const GlobalValue *SGV) { + assert(SGV->hasLocalLinkage()); + // Both the imported references and the original local variable must + // be promoted. + if (!isPerformingImport() && !isModuleExporting()) + return false; + + if (isPerformingImport()) { + assert((!GlobalsToImport->count(SGV) || !isNonRenamableLocal(*SGV)) && + "Attempting to promote non-renamable local"); + // We don't know for sure yet if we are importing this value (as either + // a reference or a def), since we are simply walking all values in the + // module. But by necessity if we end up importing it and it is local, + // it must be promoted, so unconditionally promote all values in the + // importing module. + return true; + } + + // When exporting, consult the index. We can have more than one local + // with the same GUID, in the case of same-named locals in different but + // same-named source files that were compiled in their respective directories + // (so the source file name and resulting GUID is the same). Find the one + // in this module. + auto Summary = ImportIndex.findSummaryInModule( + SGV->getGUID(), SGV->getParent()->getModuleIdentifier()); + assert(Summary && "Missing summary for global value when exporting"); + auto Linkage = Summary->linkage(); + if (!GlobalValue::isLocalLinkage(Linkage)) { + assert(!isNonRenamableLocal(*SGV) && + "Attempting to promote non-renamable local"); + return true; + } + + return false; +} + +#ifndef NDEBUG +bool FunctionImportGlobalProcessing::isNonRenamableLocal( + const GlobalValue &GV) const { + if (!GV.hasLocalLinkage()) + return false; + // This needs to stay in sync with the logic in buildModuleSummaryIndex. + if (GV.hasSection()) + return true; + if (Used.count(const_cast<GlobalValue *>(&GV))) + return true; + return false; +} +#endif + +std::string FunctionImportGlobalProcessing::getName(const GlobalValue *SGV, + bool DoPromote) { + // For locals that must be promoted to global scope, ensure that + // the promoted name uniquely identifies the copy in the original module, + // using the ID assigned during combined index creation. When importing, + // we rename all locals (not just those that are promoted) in order to + // avoid naming conflicts between locals imported from different modules. + if (SGV->hasLocalLinkage() && (DoPromote || isPerformingImport())) + return ModuleSummaryIndex::getGlobalNameForLocal( + SGV->getName(), + ImportIndex.getModuleHash(SGV->getParent()->getModuleIdentifier())); + return SGV->getName(); +} + +GlobalValue::LinkageTypes +FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV, + bool DoPromote) { + // Any local variable that is referenced by an exported function needs + // to be promoted to global scope. Since we don't currently know which + // functions reference which local variables/functions, we must treat + // all as potentially exported if this module is exporting anything. + if (isModuleExporting()) { + if (SGV->hasLocalLinkage() && DoPromote) + return GlobalValue::ExternalLinkage; + return SGV->getLinkage(); + } + + // Otherwise, if we aren't importing, no linkage change is needed. + if (!isPerformingImport()) + return SGV->getLinkage(); + + switch (SGV->getLinkage()) { + case GlobalValue::ExternalLinkage: + // External defnitions are converted to available_externally + // definitions upon import, so that they are available for inlining + // and/or optimization, but are turned into declarations later + // during the EliminateAvailableExternally pass. + if (doImportAsDefinition(SGV) && !dyn_cast<GlobalAlias>(SGV)) + return GlobalValue::AvailableExternallyLinkage; + // An imported external declaration stays external. + return SGV->getLinkage(); + + case GlobalValue::AvailableExternallyLinkage: + // An imported available_externally definition converts + // to external if imported as a declaration. + if (!doImportAsDefinition(SGV)) + return GlobalValue::ExternalLinkage; + // An imported available_externally declaration stays that way. + return SGV->getLinkage(); + + case GlobalValue::LinkOnceAnyLinkage: + case GlobalValue::LinkOnceODRLinkage: + // These both stay the same when importing the definition. + // The ThinLTO pass will eventually force-import their definitions. + return SGV->getLinkage(); + + case GlobalValue::WeakAnyLinkage: + // Can't import weak_any definitions correctly, or we might change the + // program semantics, since the linker will pick the first weak_any + // definition and importing would change the order they are seen by the + // linker. The module linking caller needs to enforce this. + assert(!doImportAsDefinition(SGV)); + // If imported as a declaration, it becomes external_weak. + return SGV->getLinkage(); + + case GlobalValue::WeakODRLinkage: + // For weak_odr linkage, there is a guarantee that all copies will be + // equivalent, so the issue described above for weak_any does not exist, + // and the definition can be imported. It can be treated similarly + // to an imported externally visible global value. + if (doImportAsDefinition(SGV) && !dyn_cast<GlobalAlias>(SGV)) + return GlobalValue::AvailableExternallyLinkage; + else + return GlobalValue::ExternalLinkage; + + case GlobalValue::AppendingLinkage: + // It would be incorrect to import an appending linkage variable, + // since it would cause global constructors/destructors to be + // executed multiple times. This should have already been handled + // by linkIfNeeded, and we will assert in shouldLinkFromSource + // if we try to import, so we simply return AppendingLinkage. + return GlobalValue::AppendingLinkage; + + case GlobalValue::InternalLinkage: + case GlobalValue::PrivateLinkage: + // If we are promoting the local to global scope, it is handled + // similarly to a normal externally visible global. + if (DoPromote) { + if (doImportAsDefinition(SGV) && !dyn_cast<GlobalAlias>(SGV)) + return GlobalValue::AvailableExternallyLinkage; + else + return GlobalValue::ExternalLinkage; + } + // A non-promoted imported local definition stays local. + // The ThinLTO pass will eventually force-import their definitions. + return SGV->getLinkage(); + + case GlobalValue::ExternalWeakLinkage: + // External weak doesn't apply to definitions, must be a declaration. + assert(!doImportAsDefinition(SGV)); + // Linkage stays external_weak. + return SGV->getLinkage(); + + case GlobalValue::CommonLinkage: + // Linkage stays common on definitions. + // The ThinLTO pass will eventually force-import their definitions. + return SGV->getLinkage(); + } + + llvm_unreachable("unknown linkage type"); +} + +void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) { + bool DoPromote = false; + if (GV.hasLocalLinkage() && + ((DoPromote = shouldPromoteLocalToGlobal(&GV)) || isPerformingImport())) { + // Once we change the name or linkage it is difficult to determine + // again whether we should promote since shouldPromoteLocalToGlobal needs + // to locate the summary (based on GUID from name and linkage). Therefore, + // use DoPromote result saved above. + GV.setName(getName(&GV, DoPromote)); + GV.setLinkage(getLinkage(&GV, DoPromote)); + if (!GV.hasLocalLinkage()) + GV.setVisibility(GlobalValue::HiddenVisibility); + } else + GV.setLinkage(getLinkage(&GV, /* DoPromote */ false)); + + // Remove functions imported as available externally defs from comdats, + // as this is a declaration for the linker, and will be dropped eventually. + // It is illegal for comdats to contain declarations. + auto *GO = dyn_cast_or_null<GlobalObject>(&GV); + if (GO && GO->isDeclarationForLinker() && GO->hasComdat()) { + // The IRMover should not have placed any imported declarations in + // a comdat, so the only declaration that should be in a comdat + // at this point would be a definition imported as available_externally. + assert(GO->hasAvailableExternallyLinkage() && + "Expected comdat on definition (possibly available external)"); + GO->setComdat(nullptr); + } +} + +void FunctionImportGlobalProcessing::processGlobalsForThinLTO() { + for (GlobalVariable &GV : M.globals()) + processGlobalForThinLTO(GV); + for (Function &SF : M) + processGlobalForThinLTO(SF); + for (GlobalAlias &GA : M.aliases()) + processGlobalForThinLTO(GA); +} + +bool FunctionImportGlobalProcessing::run() { + processGlobalsForThinLTO(); + return false; +} + +bool llvm::renameModuleForThinLTO( + Module &M, const ModuleSummaryIndex &Index, + DenseSet<const GlobalValue *> *GlobalsToImport) { + FunctionImportGlobalProcessing ThinLTOProcessing(M, Index, GlobalsToImport); + return ThinLTOProcessing.run(); +} diff --git a/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp b/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp new file mode 100644 index 000000000000..74ebcda8355c --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp @@ -0,0 +1,187 @@ +//===-- GlobalStatus.cpp - Compute status info for globals -----------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Transforms/Utils/GlobalStatus.h" + +using namespace llvm; + +/// Return the stronger of the two ordering. If the two orderings are acquire +/// and release, then return AcquireRelease. +/// +static AtomicOrdering strongerOrdering(AtomicOrdering X, AtomicOrdering Y) { + if ((X == AtomicOrdering::Acquire && Y == AtomicOrdering::Release) || + (Y == AtomicOrdering::Acquire && X == AtomicOrdering::Release)) + return AtomicOrdering::AcquireRelease; + return (AtomicOrdering)std::max((unsigned)X, (unsigned)Y); +} + +/// It is safe to destroy a constant iff it is only used by constants itself. +/// Note that constants cannot be cyclic, so this test is pretty easy to +/// implement recursively. +/// +bool llvm::isSafeToDestroyConstant(const Constant *C) { + if (isa<GlobalValue>(C)) + return false; + + if (isa<ConstantData>(C)) + return false; + + for (const User *U : C->users()) + if (const Constant *CU = dyn_cast<Constant>(U)) { + if (!isSafeToDestroyConstant(CU)) + return false; + } else + return false; + return true; +} + +static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS, + SmallPtrSetImpl<const PHINode *> &PhiUsers) { + if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) + if (GV->isExternallyInitialized()) + GS.StoredType = GlobalStatus::StoredOnce; + + for (const Use &U : V->uses()) { + const User *UR = U.getUser(); + if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(UR)) { + GS.HasNonInstructionUser = true; + + // If the result of the constantexpr isn't pointer type, then we won't + // know to expect it in various places. Just reject early. + if (!isa<PointerType>(CE->getType())) + return true; + + if (analyzeGlobalAux(CE, GS, PhiUsers)) + return true; + } else if (const Instruction *I = dyn_cast<Instruction>(UR)) { + if (!GS.HasMultipleAccessingFunctions) { + const Function *F = I->getParent()->getParent(); + if (!GS.AccessingFunction) + GS.AccessingFunction = F; + else if (GS.AccessingFunction != F) + GS.HasMultipleAccessingFunctions = true; + } + if (const LoadInst *LI = dyn_cast<LoadInst>(I)) { + GS.IsLoaded = true; + // Don't hack on volatile loads. + if (LI->isVolatile()) + return true; + GS.Ordering = strongerOrdering(GS.Ordering, LI->getOrdering()); + } else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) { + // Don't allow a store OF the address, only stores TO the address. + if (SI->getOperand(0) == V) + return true; + + // Don't hack on volatile stores. + if (SI->isVolatile()) + return true; + + GS.Ordering = strongerOrdering(GS.Ordering, SI->getOrdering()); + + // If this is a direct store to the global (i.e., the global is a scalar + // value, not an aggregate), keep more specific information about + // stores. + if (GS.StoredType != GlobalStatus::Stored) { + if (const GlobalVariable *GV = + dyn_cast<GlobalVariable>(SI->getOperand(1))) { + Value *StoredVal = SI->getOperand(0); + + if (Constant *C = dyn_cast<Constant>(StoredVal)) { + if (C->isThreadDependent()) { + // The stored value changes between threads; don't track it. + return true; + } + } + + if (GV->hasInitializer() && StoredVal == GV->getInitializer()) { + if (GS.StoredType < GlobalStatus::InitializerStored) + GS.StoredType = GlobalStatus::InitializerStored; + } else if (isa<LoadInst>(StoredVal) && + cast<LoadInst>(StoredVal)->getOperand(0) == GV) { + if (GS.StoredType < GlobalStatus::InitializerStored) + GS.StoredType = GlobalStatus::InitializerStored; + } else if (GS.StoredType < GlobalStatus::StoredOnce) { + GS.StoredType = GlobalStatus::StoredOnce; + GS.StoredOnceValue = StoredVal; + } else if (GS.StoredType == GlobalStatus::StoredOnce && + GS.StoredOnceValue == StoredVal) { + // noop. + } else { + GS.StoredType = GlobalStatus::Stored; + } + } else { + GS.StoredType = GlobalStatus::Stored; + } + } + } else if (isa<BitCastInst>(I)) { + if (analyzeGlobalAux(I, GS, PhiUsers)) + return true; + } else if (isa<GetElementPtrInst>(I)) { + if (analyzeGlobalAux(I, GS, PhiUsers)) + return true; + } else if (isa<SelectInst>(I)) { + if (analyzeGlobalAux(I, GS, PhiUsers)) + return true; + } else if (const PHINode *PN = dyn_cast<PHINode>(I)) { + // PHI nodes we can check just like select or GEP instructions, but we + // have to be careful about infinite recursion. + if (PhiUsers.insert(PN).second) // Not already visited. + if (analyzeGlobalAux(I, GS, PhiUsers)) + return true; + } else if (isa<CmpInst>(I)) { + GS.IsCompared = true; + } else if (const MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) { + if (MTI->isVolatile()) + return true; + if (MTI->getArgOperand(0) == V) + GS.StoredType = GlobalStatus::Stored; + if (MTI->getArgOperand(1) == V) + GS.IsLoaded = true; + } else if (const MemSetInst *MSI = dyn_cast<MemSetInst>(I)) { + assert(MSI->getArgOperand(0) == V && "Memset only takes one pointer!"); + if (MSI->isVolatile()) + return true; + GS.StoredType = GlobalStatus::Stored; + } else if (auto C = ImmutableCallSite(I)) { + if (!C.isCallee(&U)) + return true; + GS.IsLoaded = true; + } else { + return true; // Any other non-load instruction might take address! + } + } else if (const Constant *C = dyn_cast<Constant>(UR)) { + GS.HasNonInstructionUser = true; + // We might have a dead and dangling constant hanging off of here. + if (!isSafeToDestroyConstant(C)) + return true; + } else { + GS.HasNonInstructionUser = true; + // Otherwise must be some other user. + return true; + } + } + + return false; +} + +bool GlobalStatus::analyzeGlobal(const Value *V, GlobalStatus &GS) { + SmallPtrSet<const PHINode *, 16> PhiUsers; + return analyzeGlobalAux(V, GS, PhiUsers); +} + +GlobalStatus::GlobalStatus() + : IsCompared(false), IsLoaded(false), StoredType(NotStored), + StoredOnceValue(nullptr), AccessingFunction(nullptr), + HasMultipleAccessingFunctions(false), HasNonInstructionUser(false), + Ordering(AtomicOrdering::NotAtomic) {} diff --git a/contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp b/contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp new file mode 100644 index 000000000000..ed018bb73107 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp @@ -0,0 +1,203 @@ +//===-- ImportedFunctionsInliningStats.cpp ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// Generating inliner statistics for imported functions, mostly useful for +// ThinLTO. +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <iomanip> +#include <sstream> +using namespace llvm; + +ImportedFunctionsInliningStatistics::InlineGraphNode & +ImportedFunctionsInliningStatistics::createInlineGraphNode(const Function &F) { + + auto &ValueLookup = NodesMap[F.getName()]; + if (!ValueLookup) { + ValueLookup = llvm::make_unique<InlineGraphNode>(); + ValueLookup->Imported = F.getMetadata("thinlto_src_module") != nullptr; + } + return *ValueLookup; +} + +void ImportedFunctionsInliningStatistics::recordInline(const Function &Caller, + const Function &Callee) { + + InlineGraphNode &CallerNode = createInlineGraphNode(Caller); + InlineGraphNode &CalleeNode = createInlineGraphNode(Callee); + CalleeNode.NumberOfInlines++; + + if (!CallerNode.Imported && !CalleeNode.Imported) { + // Direct inline from not imported callee to not imported caller, so we + // don't have to add this to graph. It might be very helpful if you wanna + // get the inliner statistics in compile step where there are no imported + // functions. In this case the graph would be empty. + CalleeNode.NumberOfRealInlines++; + return; + } + + CallerNode.InlinedCallees.push_back(&CalleeNode); + if (!CallerNode.Imported) { + // We could avoid second lookup, but it would make the code ultra ugly. + auto It = NodesMap.find(Caller.getName()); + assert(It != NodesMap.end() && "The node should be already there."); + // Save Caller as a starting node for traversal. The string has to be one + // from map because Caller can disappear (and function name with it). + NonImportedCallers.push_back(It->first()); + } +} + +void ImportedFunctionsInliningStatistics::setModuleInfo(const Module &M) { + ModuleName = M.getName(); + for (const auto &F : M.functions()) { + AllFunctions++; + ImportedFunctions += int(F.getMetadata("thinlto_src_module") != nullptr); + } +} +static std::string getStatString(const char *Msg, int32_t Fraction, int32_t All, + const char *PercentageOfMsg, + bool LineEnd = true) { + double Result = 0; + if (All != 0) + Result = 100 * static_cast<double>(Fraction) / All; + + std::stringstream Str; + Str << std::setprecision(4) << Msg << ": " << Fraction << " [" << Result + << "% of " << PercentageOfMsg << "]"; + if (LineEnd) + Str << "\n"; + return Str.str(); +} + +void ImportedFunctionsInliningStatistics::dump(const bool Verbose) { + calculateRealInlines(); + NonImportedCallers.clear(); + + int32_t InlinedImportedFunctionsCount = 0; + int32_t InlinedNotImportedFunctionsCount = 0; + + int32_t InlinedImportedFunctionsToImportingModuleCount = 0; + int32_t InlinedNotImportedFunctionsToImportingModuleCount = 0; + + const auto SortedNodes = getSortedNodes(); + std::string Out; + Out.reserve(5000); + raw_string_ostream Ostream(Out); + + Ostream << "------- Dumping inliner stats for [" << ModuleName + << "] -------\n"; + + if (Verbose) + Ostream << "-- List of inlined functions:\n"; + + for (const auto &Node : SortedNodes) { + assert(Node->second->NumberOfInlines >= Node->second->NumberOfRealInlines); + if (Node->second->NumberOfInlines == 0) + continue; + + if (Node->second->Imported) { + InlinedImportedFunctionsCount++; + InlinedImportedFunctionsToImportingModuleCount += + int(Node->second->NumberOfRealInlines > 0); + } else { + InlinedNotImportedFunctionsCount++; + InlinedNotImportedFunctionsToImportingModuleCount += + int(Node->second->NumberOfRealInlines > 0); + } + + if (Verbose) + Ostream << "Inlined " + << (Node->second->Imported ? "imported " : "not imported ") + << "function [" << Node->first() << "]" + << ": #inlines = " << Node->second->NumberOfInlines + << ", #inlines_to_importing_module = " + << Node->second->NumberOfRealInlines << "\n"; + } + + auto InlinedFunctionsCount = + InlinedImportedFunctionsCount + InlinedNotImportedFunctionsCount; + auto NotImportedFuncCount = AllFunctions - ImportedFunctions; + auto ImportedNotInlinedIntoModule = + ImportedFunctions - InlinedImportedFunctionsToImportingModuleCount; + + Ostream << "-- Summary:\n" + << "All functions: " << AllFunctions + << ", imported functions: " << ImportedFunctions << "\n" + << getStatString("inlined functions", InlinedFunctionsCount, + AllFunctions, "all functions") + << getStatString("imported functions inlined anywhere", + InlinedImportedFunctionsCount, ImportedFunctions, + "imported functions") + << getStatString("imported functions inlined into importing module", + InlinedImportedFunctionsToImportingModuleCount, + ImportedFunctions, "imported functions", + /*LineEnd=*/false) + << getStatString(", remaining", ImportedNotInlinedIntoModule, + ImportedFunctions, "imported functions") + << getStatString("non-imported functions inlined anywhere", + InlinedNotImportedFunctionsCount, + NotImportedFuncCount, "non-imported functions") + << getStatString( + "non-imported functions inlined into importing module", + InlinedNotImportedFunctionsToImportingModuleCount, + NotImportedFuncCount, "non-imported functions"); + Ostream.flush(); + dbgs() << Out; +} + +void ImportedFunctionsInliningStatistics::calculateRealInlines() { + // Removing duplicated Callers. + std::sort(NonImportedCallers.begin(), NonImportedCallers.end()); + NonImportedCallers.erase( + std::unique(NonImportedCallers.begin(), NonImportedCallers.end()), + NonImportedCallers.end()); + + for (const auto &Name : NonImportedCallers) { + auto &Node = *NodesMap[Name]; + if (!Node.Visited) + dfs(Node); + } +} + +void ImportedFunctionsInliningStatistics::dfs(InlineGraphNode &GraphNode) { + assert(!GraphNode.Visited); + GraphNode.Visited = true; + for (auto *const InlinedFunctionNode : GraphNode.InlinedCallees) { + InlinedFunctionNode->NumberOfRealInlines++; + if (!InlinedFunctionNode->Visited) + dfs(*InlinedFunctionNode); + } +} + +ImportedFunctionsInliningStatistics::SortedNodesTy +ImportedFunctionsInliningStatistics::getSortedNodes() { + SortedNodesTy SortedNodes; + SortedNodes.reserve(NodesMap.size()); + for (const NodesMapTy::value_type& Node : NodesMap) + SortedNodes.push_back(&Node); + + std::sort( + SortedNodes.begin(), SortedNodes.end(), + [&](const SortedNodesTy::value_type &Lhs, + const SortedNodesTy::value_type &Rhs) { + if (Lhs->second->NumberOfInlines != Rhs->second->NumberOfInlines) + return Lhs->second->NumberOfInlines > Rhs->second->NumberOfInlines; + if (Lhs->second->NumberOfRealInlines != Rhs->second->NumberOfRealInlines) + return Lhs->second->NumberOfRealInlines > + Rhs->second->NumberOfRealInlines; + return Lhs->first() < Rhs->first(); + }); + return SortedNodes; +} diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp new file mode 100644 index 000000000000..a40079ca8e76 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -0,0 +1,2216 @@ +//===- InlineFunction.cpp - Code to perform function inlining -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements inlining of a function into a call site, resolving +// parameters and the return value as appropriate. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/EHPersonalities.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DIBuilder.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Support/CommandLine.h" +#include <algorithm> + +using namespace llvm; + +static cl::opt<bool> +EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(true), + cl::Hidden, + cl::desc("Convert noalias attributes to metadata during inlining.")); + +static cl::opt<bool> +PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining", + cl::init(true), cl::Hidden, + cl::desc("Convert align attributes to assumptions during inlining.")); + +bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI, + AAResults *CalleeAAR, bool InsertLifetime) { + return InlineFunction(CallSite(CI), IFI, CalleeAAR, InsertLifetime); +} +bool llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI, + AAResults *CalleeAAR, bool InsertLifetime) { + return InlineFunction(CallSite(II), IFI, CalleeAAR, InsertLifetime); +} + +namespace { + /// A class for recording information about inlining a landing pad. + class LandingPadInliningInfo { + BasicBlock *OuterResumeDest; ///< Destination of the invoke's unwind. + BasicBlock *InnerResumeDest; ///< Destination for the callee's resume. + LandingPadInst *CallerLPad; ///< LandingPadInst associated with the invoke. + PHINode *InnerEHValuesPHI; ///< PHI for EH values from landingpad insts. + SmallVector<Value*, 8> UnwindDestPHIValues; + + public: + LandingPadInliningInfo(InvokeInst *II) + : OuterResumeDest(II->getUnwindDest()), InnerResumeDest(nullptr), + CallerLPad(nullptr), InnerEHValuesPHI(nullptr) { + // If there are PHI nodes in the unwind destination block, we need to keep + // track of which values came into them from the invoke before removing + // the edge from this block. + llvm::BasicBlock *InvokeBB = II->getParent(); + BasicBlock::iterator I = OuterResumeDest->begin(); + for (; isa<PHINode>(I); ++I) { + // Save the value to use for this edge. + PHINode *PHI = cast<PHINode>(I); + UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB)); + } + + CallerLPad = cast<LandingPadInst>(I); + } + + /// The outer unwind destination is the target of + /// unwind edges introduced for calls within the inlined function. + BasicBlock *getOuterResumeDest() const { + return OuterResumeDest; + } + + BasicBlock *getInnerResumeDest(); + + LandingPadInst *getLandingPadInst() const { return CallerLPad; } + + /// Forward the 'resume' instruction to the caller's landing pad block. + /// When the landing pad block has only one predecessor, this is + /// a simple branch. When there is more than one predecessor, we need to + /// split the landing pad block after the landingpad instruction and jump + /// to there. + void forwardResume(ResumeInst *RI, + SmallPtrSetImpl<LandingPadInst*> &InlinedLPads); + + /// Add incoming-PHI values to the unwind destination block for the given + /// basic block, using the values for the original invoke's source block. + void addIncomingPHIValuesFor(BasicBlock *BB) const { + addIncomingPHIValuesForInto(BB, OuterResumeDest); + } + + void addIncomingPHIValuesForInto(BasicBlock *src, BasicBlock *dest) const { + BasicBlock::iterator I = dest->begin(); + for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) { + PHINode *phi = cast<PHINode>(I); + phi->addIncoming(UnwindDestPHIValues[i], src); + } + } + }; +} // anonymous namespace + +/// Get or create a target for the branch from ResumeInsts. +BasicBlock *LandingPadInliningInfo::getInnerResumeDest() { + if (InnerResumeDest) return InnerResumeDest; + + // Split the landing pad. + BasicBlock::iterator SplitPoint = ++CallerLPad->getIterator(); + InnerResumeDest = + OuterResumeDest->splitBasicBlock(SplitPoint, + OuterResumeDest->getName() + ".body"); + + // The number of incoming edges we expect to the inner landing pad. + const unsigned PHICapacity = 2; + + // Create corresponding new PHIs for all the PHIs in the outer landing pad. + Instruction *InsertPoint = &InnerResumeDest->front(); + BasicBlock::iterator I = OuterResumeDest->begin(); + for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) { + PHINode *OuterPHI = cast<PHINode>(I); + PHINode *InnerPHI = PHINode::Create(OuterPHI->getType(), PHICapacity, + OuterPHI->getName() + ".lpad-body", + InsertPoint); + OuterPHI->replaceAllUsesWith(InnerPHI); + InnerPHI->addIncoming(OuterPHI, OuterResumeDest); + } + + // Create a PHI for the exception values. + InnerEHValuesPHI = PHINode::Create(CallerLPad->getType(), PHICapacity, + "eh.lpad-body", InsertPoint); + CallerLPad->replaceAllUsesWith(InnerEHValuesPHI); + InnerEHValuesPHI->addIncoming(CallerLPad, OuterResumeDest); + + // All done. + return InnerResumeDest; +} + +/// Forward the 'resume' instruction to the caller's landing pad block. +/// When the landing pad block has only one predecessor, this is a simple +/// branch. When there is more than one predecessor, we need to split the +/// landing pad block after the landingpad instruction and jump to there. +void LandingPadInliningInfo::forwardResume( + ResumeInst *RI, SmallPtrSetImpl<LandingPadInst *> &InlinedLPads) { + BasicBlock *Dest = getInnerResumeDest(); + BasicBlock *Src = RI->getParent(); + + BranchInst::Create(Dest, Src); + + // Update the PHIs in the destination. They were inserted in an order which + // makes this work. + addIncomingPHIValuesForInto(Src, Dest); + + InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src); + RI->eraseFromParent(); +} + +/// Helper for getUnwindDestToken/getUnwindDestTokenHelper. +static Value *getParentPad(Value *EHPad) { + if (auto *FPI = dyn_cast<FuncletPadInst>(EHPad)) + return FPI->getParentPad(); + return cast<CatchSwitchInst>(EHPad)->getParentPad(); +} + +typedef DenseMap<Instruction *, Value *> UnwindDestMemoTy; + +/// Helper for getUnwindDestToken that does the descendant-ward part of +/// the search. +static Value *getUnwindDestTokenHelper(Instruction *EHPad, + UnwindDestMemoTy &MemoMap) { + SmallVector<Instruction *, 8> Worklist(1, EHPad); + + while (!Worklist.empty()) { + Instruction *CurrentPad = Worklist.pop_back_val(); + // We only put pads on the worklist that aren't in the MemoMap. When + // we find an unwind dest for a pad we may update its ancestors, but + // the queue only ever contains uncles/great-uncles/etc. of CurrentPad, + // so they should never get updated while queued on the worklist. + assert(!MemoMap.count(CurrentPad)); + Value *UnwindDestToken = nullptr; + if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(CurrentPad)) { + if (CatchSwitch->hasUnwindDest()) { + UnwindDestToken = CatchSwitch->getUnwindDest()->getFirstNonPHI(); + } else { + // Catchswitch doesn't have a 'nounwind' variant, and one might be + // annotated as "unwinds to caller" when really it's nounwind (see + // e.g. SimplifyCFGOpt::SimplifyUnreachable), so we can't infer the + // parent's unwind dest from this. We can check its catchpads' + // descendants, since they might include a cleanuppad with an + // "unwinds to caller" cleanupret, which can be trusted. + for (auto HI = CatchSwitch->handler_begin(), + HE = CatchSwitch->handler_end(); + HI != HE && !UnwindDestToken; ++HI) { + BasicBlock *HandlerBlock = *HI; + auto *CatchPad = cast<CatchPadInst>(HandlerBlock->getFirstNonPHI()); + for (User *Child : CatchPad->users()) { + // Intentionally ignore invokes here -- since the catchswitch is + // marked "unwind to caller", it would be a verifier error if it + // contained an invoke which unwinds out of it, so any invoke we'd + // encounter must unwind to some child of the catch. + if (!isa<CleanupPadInst>(Child) && !isa<CatchSwitchInst>(Child)) + continue; + + Instruction *ChildPad = cast<Instruction>(Child); + auto Memo = MemoMap.find(ChildPad); + if (Memo == MemoMap.end()) { + // Haven't figured out this child pad yet; queue it. + Worklist.push_back(ChildPad); + continue; + } + // We've already checked this child, but might have found that + // it offers no proof either way. + Value *ChildUnwindDestToken = Memo->second; + if (!ChildUnwindDestToken) + continue; + // We already know the child's unwind dest, which can either + // be ConstantTokenNone to indicate unwind to caller, or can + // be another child of the catchpad. Only the former indicates + // the unwind dest of the catchswitch. + if (isa<ConstantTokenNone>(ChildUnwindDestToken)) { + UnwindDestToken = ChildUnwindDestToken; + break; + } + assert(getParentPad(ChildUnwindDestToken) == CatchPad); + } + } + } + } else { + auto *CleanupPad = cast<CleanupPadInst>(CurrentPad); + for (User *U : CleanupPad->users()) { + if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(U)) { + if (BasicBlock *RetUnwindDest = CleanupRet->getUnwindDest()) + UnwindDestToken = RetUnwindDest->getFirstNonPHI(); + else + UnwindDestToken = ConstantTokenNone::get(CleanupPad->getContext()); + break; + } + Value *ChildUnwindDestToken; + if (auto *Invoke = dyn_cast<InvokeInst>(U)) { + ChildUnwindDestToken = Invoke->getUnwindDest()->getFirstNonPHI(); + } else if (isa<CleanupPadInst>(U) || isa<CatchSwitchInst>(U)) { + Instruction *ChildPad = cast<Instruction>(U); + auto Memo = MemoMap.find(ChildPad); + if (Memo == MemoMap.end()) { + // Haven't resolved this child yet; queue it and keep searching. + Worklist.push_back(ChildPad); + continue; + } + // We've checked this child, but still need to ignore it if it + // had no proof either way. + ChildUnwindDestToken = Memo->second; + if (!ChildUnwindDestToken) + continue; + } else { + // Not a relevant user of the cleanuppad + continue; + } + // In a well-formed program, the child/invoke must either unwind to + // an(other) child of the cleanup, or exit the cleanup. In the + // first case, continue searching. + if (isa<Instruction>(ChildUnwindDestToken) && + getParentPad(ChildUnwindDestToken) == CleanupPad) + continue; + UnwindDestToken = ChildUnwindDestToken; + break; + } + } + // If we haven't found an unwind dest for CurrentPad, we may have queued its + // children, so move on to the next in the worklist. + if (!UnwindDestToken) + continue; + + // Now we know that CurrentPad unwinds to UnwindDestToken. It also exits + // any ancestors of CurrentPad up to but not including UnwindDestToken's + // parent pad. Record this in the memo map, and check to see if the + // original EHPad being queried is one of the ones exited. + Value *UnwindParent; + if (auto *UnwindPad = dyn_cast<Instruction>(UnwindDestToken)) + UnwindParent = getParentPad(UnwindPad); + else + UnwindParent = nullptr; + bool ExitedOriginalPad = false; + for (Instruction *ExitedPad = CurrentPad; + ExitedPad && ExitedPad != UnwindParent; + ExitedPad = dyn_cast<Instruction>(getParentPad(ExitedPad))) { + // Skip over catchpads since they just follow their catchswitches. + if (isa<CatchPadInst>(ExitedPad)) + continue; + MemoMap[ExitedPad] = UnwindDestToken; + ExitedOriginalPad |= (ExitedPad == EHPad); + } + + if (ExitedOriginalPad) + return UnwindDestToken; + + // Continue the search. + } + + // No definitive information is contained within this funclet. + return nullptr; +} + +/// Given an EH pad, find where it unwinds. If it unwinds to an EH pad, +/// return that pad instruction. If it unwinds to caller, return +/// ConstantTokenNone. If it does not have a definitive unwind destination, +/// return nullptr. +/// +/// This routine gets invoked for calls in funclets in inlinees when inlining +/// an invoke. Since many funclets don't have calls inside them, it's queried +/// on-demand rather than building a map of pads to unwind dests up front. +/// Determining a funclet's unwind dest may require recursively searching its +/// descendants, and also ancestors and cousins if the descendants don't provide +/// an answer. Since most funclets will have their unwind dest immediately +/// available as the unwind dest of a catchswitch or cleanupret, this routine +/// searches top-down from the given pad and then up. To avoid worst-case +/// quadratic run-time given that approach, it uses a memo map to avoid +/// re-processing funclet trees. The callers that rewrite the IR as they go +/// take advantage of this, for correctness, by checking/forcing rewritten +/// pads' entries to match the original callee view. +static Value *getUnwindDestToken(Instruction *EHPad, + UnwindDestMemoTy &MemoMap) { + // Catchpads unwind to the same place as their catchswitch; + // redirct any queries on catchpads so the code below can + // deal with just catchswitches and cleanuppads. + if (auto *CPI = dyn_cast<CatchPadInst>(EHPad)) + EHPad = CPI->getCatchSwitch(); + + // Check if we've already determined the unwind dest for this pad. + auto Memo = MemoMap.find(EHPad); + if (Memo != MemoMap.end()) + return Memo->second; + + // Search EHPad and, if necessary, its descendants. + Value *UnwindDestToken = getUnwindDestTokenHelper(EHPad, MemoMap); + assert((UnwindDestToken == nullptr) != (MemoMap.count(EHPad) != 0)); + if (UnwindDestToken) + return UnwindDestToken; + + // No information is available for this EHPad from itself or any of its + // descendants. An unwind all the way out to a pad in the caller would + // need also to agree with the unwind dest of the parent funclet, so + // search up the chain to try to find a funclet with information. Put + // null entries in the memo map to avoid re-processing as we go up. + MemoMap[EHPad] = nullptr; +#ifndef NDEBUG + SmallPtrSet<Instruction *, 4> TempMemos; + TempMemos.insert(EHPad); +#endif + Instruction *LastUselessPad = EHPad; + Value *AncestorToken; + for (AncestorToken = getParentPad(EHPad); + auto *AncestorPad = dyn_cast<Instruction>(AncestorToken); + AncestorToken = getParentPad(AncestorToken)) { + // Skip over catchpads since they just follow their catchswitches. + if (isa<CatchPadInst>(AncestorPad)) + continue; + // If the MemoMap had an entry mapping AncestorPad to nullptr, since we + // haven't yet called getUnwindDestTokenHelper for AncestorPad in this + // call to getUnwindDestToken, that would mean that AncestorPad had no + // information in itself, its descendants, or its ancestors. If that + // were the case, then we should also have recorded the lack of information + // for the descendant that we're coming from. So assert that we don't + // find a null entry in the MemoMap for AncestorPad. + assert(!MemoMap.count(AncestorPad) || MemoMap[AncestorPad]); + auto AncestorMemo = MemoMap.find(AncestorPad); + if (AncestorMemo == MemoMap.end()) { + UnwindDestToken = getUnwindDestTokenHelper(AncestorPad, MemoMap); + } else { + UnwindDestToken = AncestorMemo->second; + } + if (UnwindDestToken) + break; + LastUselessPad = AncestorPad; + MemoMap[LastUselessPad] = nullptr; +#ifndef NDEBUG + TempMemos.insert(LastUselessPad); +#endif + } + + // We know that getUnwindDestTokenHelper was called on LastUselessPad and + // returned nullptr (and likewise for EHPad and any of its ancestors up to + // LastUselessPad), so LastUselessPad has no information from below. Since + // getUnwindDestTokenHelper must investigate all downward paths through + // no-information nodes to prove that a node has no information like this, + // and since any time it finds information it records it in the MemoMap for + // not just the immediately-containing funclet but also any ancestors also + // exited, it must be the case that, walking downward from LastUselessPad, + // visiting just those nodes which have not been mapped to an unwind dest + // by getUnwindDestTokenHelper (the nullptr TempMemos notwithstanding, since + // they are just used to keep getUnwindDestTokenHelper from repeating work), + // any node visited must have been exhaustively searched with no information + // for it found. + SmallVector<Instruction *, 8> Worklist(1, LastUselessPad); + while (!Worklist.empty()) { + Instruction *UselessPad = Worklist.pop_back_val(); + auto Memo = MemoMap.find(UselessPad); + if (Memo != MemoMap.end() && Memo->second) { + // Here the name 'UselessPad' is a bit of a misnomer, because we've found + // that it is a funclet that does have information about unwinding to + // a particular destination; its parent was a useless pad. + // Since its parent has no information, the unwind edge must not escape + // the parent, and must target a sibling of this pad. This local unwind + // gives us no information about EHPad. Leave it and the subtree rooted + // at it alone. + assert(getParentPad(Memo->second) == getParentPad(UselessPad)); + continue; + } + // We know we don't have information for UselesPad. If it has an entry in + // the MemoMap (mapping it to nullptr), it must be one of the TempMemos + // added on this invocation of getUnwindDestToken; if a previous invocation + // recorded nullptr, it would have had to prove that the ancestors of + // UselessPad, which include LastUselessPad, had no information, and that + // in turn would have required proving that the descendants of + // LastUselesPad, which include EHPad, have no information about + // LastUselessPad, which would imply that EHPad was mapped to nullptr in + // the MemoMap on that invocation, which isn't the case if we got here. + assert(!MemoMap.count(UselessPad) || TempMemos.count(UselessPad)); + // Assert as we enumerate users that 'UselessPad' doesn't have any unwind + // information that we'd be contradicting by making a map entry for it + // (which is something that getUnwindDestTokenHelper must have proved for + // us to get here). Just assert on is direct users here; the checks in + // this downward walk at its descendants will verify that they don't have + // any unwind edges that exit 'UselessPad' either (i.e. they either have no + // unwind edges or unwind to a sibling). + MemoMap[UselessPad] = UnwindDestToken; + if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(UselessPad)) { + assert(CatchSwitch->getUnwindDest() == nullptr && "Expected useless pad"); + for (BasicBlock *HandlerBlock : CatchSwitch->handlers()) { + auto *CatchPad = HandlerBlock->getFirstNonPHI(); + for (User *U : CatchPad->users()) { + assert( + (!isa<InvokeInst>(U) || + (getParentPad( + cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHI()) == + CatchPad)) && + "Expected useless pad"); + if (isa<CatchSwitchInst>(U) || isa<CleanupPadInst>(U)) + Worklist.push_back(cast<Instruction>(U)); + } + } + } else { + assert(isa<CleanupPadInst>(UselessPad)); + for (User *U : UselessPad->users()) { + assert(!isa<CleanupReturnInst>(U) && "Expected useless pad"); + assert((!isa<InvokeInst>(U) || + (getParentPad( + cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHI()) == + UselessPad)) && + "Expected useless pad"); + if (isa<CatchSwitchInst>(U) || isa<CleanupPadInst>(U)) + Worklist.push_back(cast<Instruction>(U)); + } + } + } + + return UnwindDestToken; +} + +/// When we inline a basic block into an invoke, +/// we have to turn all of the calls that can throw into invokes. +/// This function analyze BB to see if there are any calls, and if so, +/// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI +/// nodes in that block with the values specified in InvokeDestPHIValues. +static BasicBlock *HandleCallsInBlockInlinedThroughInvoke( + BasicBlock *BB, BasicBlock *UnwindEdge, + UnwindDestMemoTy *FuncletUnwindMap = nullptr) { + for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) { + Instruction *I = &*BBI++; + + // We only need to check for function calls: inlined invoke + // instructions require no special handling. + CallInst *CI = dyn_cast<CallInst>(I); + + if (!CI || CI->doesNotThrow() || isa<InlineAsm>(CI->getCalledValue())) + continue; + + // We do not need to (and in fact, cannot) convert possibly throwing calls + // to @llvm.experimental_deoptimize (resp. @llvm.experimental.guard) into + // invokes. The caller's "segment" of the deoptimization continuation + // attached to the newly inlined @llvm.experimental_deoptimize + // (resp. @llvm.experimental.guard) call should contain the exception + // handling logic, if any. + if (auto *F = CI->getCalledFunction()) + if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize || + F->getIntrinsicID() == Intrinsic::experimental_guard) + continue; + + if (auto FuncletBundle = CI->getOperandBundle(LLVMContext::OB_funclet)) { + // This call is nested inside a funclet. If that funclet has an unwind + // destination within the inlinee, then unwinding out of this call would + // be UB. Rewriting this call to an invoke which targets the inlined + // invoke's unwind dest would give the call's parent funclet multiple + // unwind destinations, which is something that subsequent EH table + // generation can't handle and that the veirifer rejects. So when we + // see such a call, leave it as a call. + auto *FuncletPad = cast<Instruction>(FuncletBundle->Inputs[0]); + Value *UnwindDestToken = + getUnwindDestToken(FuncletPad, *FuncletUnwindMap); + if (UnwindDestToken && !isa<ConstantTokenNone>(UnwindDestToken)) + continue; +#ifndef NDEBUG + Instruction *MemoKey; + if (auto *CatchPad = dyn_cast<CatchPadInst>(FuncletPad)) + MemoKey = CatchPad->getCatchSwitch(); + else + MemoKey = FuncletPad; + assert(FuncletUnwindMap->count(MemoKey) && + (*FuncletUnwindMap)[MemoKey] == UnwindDestToken && + "must get memoized to avoid confusing later searches"); +#endif // NDEBUG + } + + changeToInvokeAndSplitBasicBlock(CI, UnwindEdge); + return BB; + } + return nullptr; +} + +/// If we inlined an invoke site, we need to convert calls +/// in the body of the inlined function into invokes. +/// +/// II is the invoke instruction being inlined. FirstNewBlock is the first +/// block of the inlined code (the last block is the end of the function), +/// and InlineCodeInfo is information about the code that got inlined. +static void HandleInlinedLandingPad(InvokeInst *II, BasicBlock *FirstNewBlock, + ClonedCodeInfo &InlinedCodeInfo) { + BasicBlock *InvokeDest = II->getUnwindDest(); + + Function *Caller = FirstNewBlock->getParent(); + + // The inlined code is currently at the end of the function, scan from the + // start of the inlined code to its end, checking for stuff we need to + // rewrite. + LandingPadInliningInfo Invoke(II); + + // Get all of the inlined landing pad instructions. + SmallPtrSet<LandingPadInst*, 16> InlinedLPads; + for (Function::iterator I = FirstNewBlock->getIterator(), E = Caller->end(); + I != E; ++I) + if (InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) + InlinedLPads.insert(II->getLandingPadInst()); + + // Append the clauses from the outer landing pad instruction into the inlined + // landing pad instructions. + LandingPadInst *OuterLPad = Invoke.getLandingPadInst(); + for (LandingPadInst *InlinedLPad : InlinedLPads) { + unsigned OuterNum = OuterLPad->getNumClauses(); + InlinedLPad->reserveClauses(OuterNum); + for (unsigned OuterIdx = 0; OuterIdx != OuterNum; ++OuterIdx) + InlinedLPad->addClause(OuterLPad->getClause(OuterIdx)); + if (OuterLPad->isCleanup()) + InlinedLPad->setCleanup(true); + } + + for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end(); + BB != E; ++BB) { + if (InlinedCodeInfo.ContainsCalls) + if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke( + &*BB, Invoke.getOuterResumeDest())) + // Update any PHI nodes in the exceptional block to indicate that there + // is now a new entry in them. + Invoke.addIncomingPHIValuesFor(NewBB); + + // Forward any resumes that are remaining here. + if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) + Invoke.forwardResume(RI, InlinedLPads); + } + + // Now that everything is happy, we have one final detail. The PHI nodes in + // the exception destination block still have entries due to the original + // invoke instruction. Eliminate these entries (which might even delete the + // PHI node) now. + InvokeDest->removePredecessor(II->getParent()); +} + +/// If we inlined an invoke site, we need to convert calls +/// in the body of the inlined function into invokes. +/// +/// II is the invoke instruction being inlined. FirstNewBlock is the first +/// block of the inlined code (the last block is the end of the function), +/// and InlineCodeInfo is information about the code that got inlined. +static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock, + ClonedCodeInfo &InlinedCodeInfo) { + BasicBlock *UnwindDest = II->getUnwindDest(); + Function *Caller = FirstNewBlock->getParent(); + + assert(UnwindDest->getFirstNonPHI()->isEHPad() && "unexpected BasicBlock!"); + + // If there are PHI nodes in the unwind destination block, we need to keep + // track of which values came into them from the invoke before removing the + // edge from this block. + SmallVector<Value *, 8> UnwindDestPHIValues; + llvm::BasicBlock *InvokeBB = II->getParent(); + for (Instruction &I : *UnwindDest) { + // Save the value to use for this edge. + PHINode *PHI = dyn_cast<PHINode>(&I); + if (!PHI) + break; + UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB)); + } + + // Add incoming-PHI values to the unwind destination block for the given basic + // block, using the values for the original invoke's source block. + auto UpdatePHINodes = [&](BasicBlock *Src) { + BasicBlock::iterator I = UnwindDest->begin(); + for (Value *V : UnwindDestPHIValues) { + PHINode *PHI = cast<PHINode>(I); + PHI->addIncoming(V, Src); + ++I; + } + }; + + // This connects all the instructions which 'unwind to caller' to the invoke + // destination. + UnwindDestMemoTy FuncletUnwindMap; + for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end(); + BB != E; ++BB) { + if (auto *CRI = dyn_cast<CleanupReturnInst>(BB->getTerminator())) { + if (CRI->unwindsToCaller()) { + auto *CleanupPad = CRI->getCleanupPad(); + CleanupReturnInst::Create(CleanupPad, UnwindDest, CRI); + CRI->eraseFromParent(); + UpdatePHINodes(&*BB); + // Finding a cleanupret with an unwind destination would confuse + // subsequent calls to getUnwindDestToken, so map the cleanuppad + // to short-circuit any such calls and recognize this as an "unwind + // to caller" cleanup. + assert(!FuncletUnwindMap.count(CleanupPad) || + isa<ConstantTokenNone>(FuncletUnwindMap[CleanupPad])); + FuncletUnwindMap[CleanupPad] = + ConstantTokenNone::get(Caller->getContext()); + } + } + + Instruction *I = BB->getFirstNonPHI(); + if (!I->isEHPad()) + continue; + + Instruction *Replacement = nullptr; + if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) { + if (CatchSwitch->unwindsToCaller()) { + Value *UnwindDestToken; + if (auto *ParentPad = + dyn_cast<Instruction>(CatchSwitch->getParentPad())) { + // This catchswitch is nested inside another funclet. If that + // funclet has an unwind destination within the inlinee, then + // unwinding out of this catchswitch would be UB. Rewriting this + // catchswitch to unwind to the inlined invoke's unwind dest would + // give the parent funclet multiple unwind destinations, which is + // something that subsequent EH table generation can't handle and + // that the veirifer rejects. So when we see such a call, leave it + // as "unwind to caller". + UnwindDestToken = getUnwindDestToken(ParentPad, FuncletUnwindMap); + if (UnwindDestToken && !isa<ConstantTokenNone>(UnwindDestToken)) + continue; + } else { + // This catchswitch has no parent to inherit constraints from, and + // none of its descendants can have an unwind edge that exits it and + // targets another funclet in the inlinee. It may or may not have a + // descendant that definitively has an unwind to caller. In either + // case, we'll have to assume that any unwinds out of it may need to + // be routed to the caller, so treat it as though it has a definitive + // unwind to caller. + UnwindDestToken = ConstantTokenNone::get(Caller->getContext()); + } + auto *NewCatchSwitch = CatchSwitchInst::Create( + CatchSwitch->getParentPad(), UnwindDest, + CatchSwitch->getNumHandlers(), CatchSwitch->getName(), + CatchSwitch); + for (BasicBlock *PadBB : CatchSwitch->handlers()) + NewCatchSwitch->addHandler(PadBB); + // Propagate info for the old catchswitch over to the new one in + // the unwind map. This also serves to short-circuit any subsequent + // checks for the unwind dest of this catchswitch, which would get + // confused if they found the outer handler in the callee. + FuncletUnwindMap[NewCatchSwitch] = UnwindDestToken; + Replacement = NewCatchSwitch; + } + } else if (!isa<FuncletPadInst>(I)) { + llvm_unreachable("unexpected EHPad!"); + } + + if (Replacement) { + Replacement->takeName(I); + I->replaceAllUsesWith(Replacement); + I->eraseFromParent(); + UpdatePHINodes(&*BB); + } + } + + if (InlinedCodeInfo.ContainsCalls) + for (Function::iterator BB = FirstNewBlock->getIterator(), + E = Caller->end(); + BB != E; ++BB) + if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke( + &*BB, UnwindDest, &FuncletUnwindMap)) + // Update any PHI nodes in the exceptional block to indicate that there + // is now a new entry in them. + UpdatePHINodes(NewBB); + + // Now that everything is happy, we have one final detail. The PHI nodes in + // the exception destination block still have entries due to the original + // invoke instruction. Eliminate these entries (which might even delete the + // PHI node) now. + UnwindDest->removePredecessor(InvokeBB); +} + +/// When inlining a call site that has !llvm.mem.parallel_loop_access metadata, +/// that metadata should be propagated to all memory-accessing cloned +/// instructions. +static void PropagateParallelLoopAccessMetadata(CallSite CS, + ValueToValueMapTy &VMap) { + MDNode *M = + CS.getInstruction()->getMetadata(LLVMContext::MD_mem_parallel_loop_access); + if (!M) + return; + + for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end(); + VMI != VMIE; ++VMI) { + if (!VMI->second) + continue; + + Instruction *NI = dyn_cast<Instruction>(VMI->second); + if (!NI) + continue; + + if (MDNode *PM = NI->getMetadata(LLVMContext::MD_mem_parallel_loop_access)) { + M = MDNode::concatenate(PM, M); + NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M); + } else if (NI->mayReadOrWriteMemory()) { + NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M); + } + } +} + +/// When inlining a function that contains noalias scope metadata, +/// this metadata needs to be cloned so that the inlined blocks +/// have different "unique scopes" at every call site. Were this not done, then +/// aliasing scopes from a function inlined into a caller multiple times could +/// not be differentiated (and this would lead to miscompiles because the +/// non-aliasing property communicated by the metadata could have +/// call-site-specific control dependencies). +static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) { + const Function *CalledFunc = CS.getCalledFunction(); + SetVector<const MDNode *> MD; + + // Note: We could only clone the metadata if it is already used in the + // caller. I'm omitting that check here because it might confuse + // inter-procedural alias analysis passes. We can revisit this if it becomes + // an efficiency or overhead problem. + + for (const BasicBlock &I : *CalledFunc) + for (const Instruction &J : I) { + if (const MDNode *M = J.getMetadata(LLVMContext::MD_alias_scope)) + MD.insert(M); + if (const MDNode *M = J.getMetadata(LLVMContext::MD_noalias)) + MD.insert(M); + } + + if (MD.empty()) + return; + + // Walk the existing metadata, adding the complete (perhaps cyclic) chain to + // the set. + SmallVector<const Metadata *, 16> Queue(MD.begin(), MD.end()); + while (!Queue.empty()) { + const MDNode *M = cast<MDNode>(Queue.pop_back_val()); + for (unsigned i = 0, ie = M->getNumOperands(); i != ie; ++i) + if (const MDNode *M1 = dyn_cast<MDNode>(M->getOperand(i))) + if (MD.insert(M1)) + Queue.push_back(M1); + } + + // Now we have a complete set of all metadata in the chains used to specify + // the noalias scopes and the lists of those scopes. + SmallVector<TempMDTuple, 16> DummyNodes; + DenseMap<const MDNode *, TrackingMDNodeRef> MDMap; + for (const MDNode *I : MD) { + DummyNodes.push_back(MDTuple::getTemporary(CalledFunc->getContext(), None)); + MDMap[I].reset(DummyNodes.back().get()); + } + + // Create new metadata nodes to replace the dummy nodes, replacing old + // metadata references with either a dummy node or an already-created new + // node. + for (const MDNode *I : MD) { + SmallVector<Metadata *, 4> NewOps; + for (unsigned i = 0, ie = I->getNumOperands(); i != ie; ++i) { + const Metadata *V = I->getOperand(i); + if (const MDNode *M = dyn_cast<MDNode>(V)) + NewOps.push_back(MDMap[M]); + else + NewOps.push_back(const_cast<Metadata *>(V)); + } + + MDNode *NewM = MDNode::get(CalledFunc->getContext(), NewOps); + MDTuple *TempM = cast<MDTuple>(MDMap[I]); + assert(TempM->isTemporary() && "Expected temporary node"); + + TempM->replaceAllUsesWith(NewM); + } + + // Now replace the metadata in the new inlined instructions with the + // repacements from the map. + for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end(); + VMI != VMIE; ++VMI) { + if (!VMI->second) + continue; + + Instruction *NI = dyn_cast<Instruction>(VMI->second); + if (!NI) + continue; + + if (MDNode *M = NI->getMetadata(LLVMContext::MD_alias_scope)) { + MDNode *NewMD = MDMap[M]; + // If the call site also had alias scope metadata (a list of scopes to + // which instructions inside it might belong), propagate those scopes to + // the inlined instructions. + if (MDNode *CSM = + CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope)) + NewMD = MDNode::concatenate(NewMD, CSM); + NI->setMetadata(LLVMContext::MD_alias_scope, NewMD); + } else if (NI->mayReadOrWriteMemory()) { + if (MDNode *M = + CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope)) + NI->setMetadata(LLVMContext::MD_alias_scope, M); + } + + if (MDNode *M = NI->getMetadata(LLVMContext::MD_noalias)) { + MDNode *NewMD = MDMap[M]; + // If the call site also had noalias metadata (a list of scopes with + // which instructions inside it don't alias), propagate those scopes to + // the inlined instructions. + if (MDNode *CSM = + CS.getInstruction()->getMetadata(LLVMContext::MD_noalias)) + NewMD = MDNode::concatenate(NewMD, CSM); + NI->setMetadata(LLVMContext::MD_noalias, NewMD); + } else if (NI->mayReadOrWriteMemory()) { + if (MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_noalias)) + NI->setMetadata(LLVMContext::MD_noalias, M); + } + } +} + +/// If the inlined function has noalias arguments, +/// then add new alias scopes for each noalias argument, tag the mapped noalias +/// parameters with noalias metadata specifying the new scope, and tag all +/// non-derived loads, stores and memory intrinsics with the new alias scopes. +static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap, + const DataLayout &DL, AAResults *CalleeAAR) { + if (!EnableNoAliasConversion) + return; + + const Function *CalledFunc = CS.getCalledFunction(); + SmallVector<const Argument *, 4> NoAliasArgs; + + for (const Argument &Arg : CalledFunc->args()) + if (Arg.hasNoAliasAttr() && !Arg.use_empty()) + NoAliasArgs.push_back(&Arg); + + if (NoAliasArgs.empty()) + return; + + // To do a good job, if a noalias variable is captured, we need to know if + // the capture point dominates the particular use we're considering. + DominatorTree DT; + DT.recalculate(const_cast<Function&>(*CalledFunc)); + + // noalias indicates that pointer values based on the argument do not alias + // pointer values which are not based on it. So we add a new "scope" for each + // noalias function argument. Accesses using pointers based on that argument + // become part of that alias scope, accesses using pointers not based on that + // argument are tagged as noalias with that scope. + + DenseMap<const Argument *, MDNode *> NewScopes; + MDBuilder MDB(CalledFunc->getContext()); + + // Create a new scope domain for this function. + MDNode *NewDomain = + MDB.createAnonymousAliasScopeDomain(CalledFunc->getName()); + for (unsigned i = 0, e = NoAliasArgs.size(); i != e; ++i) { + const Argument *A = NoAliasArgs[i]; + + std::string Name = CalledFunc->getName(); + if (A->hasName()) { + Name += ": %"; + Name += A->getName(); + } else { + Name += ": argument "; + Name += utostr(i); + } + + // Note: We always create a new anonymous root here. This is true regardless + // of the linkage of the callee because the aliasing "scope" is not just a + // property of the callee, but also all control dependencies in the caller. + MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); + NewScopes.insert(std::make_pair(A, NewScope)); + } + + // Iterate over all new instructions in the map; for all memory-access + // instructions, add the alias scope metadata. + for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end(); + VMI != VMIE; ++VMI) { + if (const Instruction *I = dyn_cast<Instruction>(VMI->first)) { + if (!VMI->second) + continue; + + Instruction *NI = dyn_cast<Instruction>(VMI->second); + if (!NI) + continue; + + bool IsArgMemOnlyCall = false, IsFuncCall = false; + SmallVector<const Value *, 2> PtrArgs; + + if (const LoadInst *LI = dyn_cast<LoadInst>(I)) + PtrArgs.push_back(LI->getPointerOperand()); + else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) + PtrArgs.push_back(SI->getPointerOperand()); + else if (const VAArgInst *VAAI = dyn_cast<VAArgInst>(I)) + PtrArgs.push_back(VAAI->getPointerOperand()); + else if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I)) + PtrArgs.push_back(CXI->getPointerOperand()); + else if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I)) + PtrArgs.push_back(RMWI->getPointerOperand()); + else if (ImmutableCallSite ICS = ImmutableCallSite(I)) { + // If we know that the call does not access memory, then we'll still + // know that about the inlined clone of this call site, and we don't + // need to add metadata. + if (ICS.doesNotAccessMemory()) + continue; + + IsFuncCall = true; + if (CalleeAAR) { + FunctionModRefBehavior MRB = CalleeAAR->getModRefBehavior(ICS); + if (MRB == FMRB_OnlyAccessesArgumentPointees || + MRB == FMRB_OnlyReadsArgumentPointees) + IsArgMemOnlyCall = true; + } + + for (Value *Arg : ICS.args()) { + // We need to check the underlying objects of all arguments, not just + // the pointer arguments, because we might be passing pointers as + // integers, etc. + // However, if we know that the call only accesses pointer arguments, + // then we only need to check the pointer arguments. + if (IsArgMemOnlyCall && !Arg->getType()->isPointerTy()) + continue; + + PtrArgs.push_back(Arg); + } + } + + // If we found no pointers, then this instruction is not suitable for + // pairing with an instruction to receive aliasing metadata. + // However, if this is a call, this we might just alias with none of the + // noalias arguments. + if (PtrArgs.empty() && !IsFuncCall) + continue; + + // It is possible that there is only one underlying object, but you + // need to go through several PHIs to see it, and thus could be + // repeated in the Objects list. + SmallPtrSet<const Value *, 4> ObjSet; + SmallVector<Metadata *, 4> Scopes, NoAliases; + + SmallSetVector<const Argument *, 4> NAPtrArgs; + for (const Value *V : PtrArgs) { + SmallVector<Value *, 4> Objects; + GetUnderlyingObjects(const_cast<Value*>(V), + Objects, DL, /* LI = */ nullptr); + + for (Value *O : Objects) + ObjSet.insert(O); + } + + // Figure out if we're derived from anything that is not a noalias + // argument. + bool CanDeriveViaCapture = false, UsesAliasingPtr = false; + for (const Value *V : ObjSet) { + // Is this value a constant that cannot be derived from any pointer + // value (we need to exclude constant expressions, for example, that + // are formed from arithmetic on global symbols). + bool IsNonPtrConst = isa<ConstantInt>(V) || isa<ConstantFP>(V) || + isa<ConstantPointerNull>(V) || + isa<ConstantDataVector>(V) || isa<UndefValue>(V); + if (IsNonPtrConst) + continue; + + // If this is anything other than a noalias argument, then we cannot + // completely describe the aliasing properties using alias.scope + // metadata (and, thus, won't add any). + if (const Argument *A = dyn_cast<Argument>(V)) { + if (!A->hasNoAliasAttr()) + UsesAliasingPtr = true; + } else { + UsesAliasingPtr = true; + } + + // If this is not some identified function-local object (which cannot + // directly alias a noalias argument), or some other argument (which, + // by definition, also cannot alias a noalias argument), then we could + // alias a noalias argument that has been captured). + if (!isa<Argument>(V) && + !isIdentifiedFunctionLocal(const_cast<Value*>(V))) + CanDeriveViaCapture = true; + } + + // A function call can always get captured noalias pointers (via other + // parameters, globals, etc.). + if (IsFuncCall && !IsArgMemOnlyCall) + CanDeriveViaCapture = true; + + // First, we want to figure out all of the sets with which we definitely + // don't alias. Iterate over all noalias set, and add those for which: + // 1. The noalias argument is not in the set of objects from which we + // definitely derive. + // 2. The noalias argument has not yet been captured. + // An arbitrary function that might load pointers could see captured + // noalias arguments via other noalias arguments or globals, and so we + // must always check for prior capture. + for (const Argument *A : NoAliasArgs) { + if (!ObjSet.count(A) && (!CanDeriveViaCapture || + // It might be tempting to skip the + // PointerMayBeCapturedBefore check if + // A->hasNoCaptureAttr() is true, but this is + // incorrect because nocapture only guarantees + // that no copies outlive the function, not + // that the value cannot be locally captured. + !PointerMayBeCapturedBefore(A, + /* ReturnCaptures */ false, + /* StoreCaptures */ false, I, &DT))) + NoAliases.push_back(NewScopes[A]); + } + + if (!NoAliases.empty()) + NI->setMetadata(LLVMContext::MD_noalias, + MDNode::concatenate( + NI->getMetadata(LLVMContext::MD_noalias), + MDNode::get(CalledFunc->getContext(), NoAliases))); + + // Next, we want to figure out all of the sets to which we might belong. + // We might belong to a set if the noalias argument is in the set of + // underlying objects. If there is some non-noalias argument in our list + // of underlying objects, then we cannot add a scope because the fact + // that some access does not alias with any set of our noalias arguments + // cannot itself guarantee that it does not alias with this access + // (because there is some pointer of unknown origin involved and the + // other access might also depend on this pointer). We also cannot add + // scopes to arbitrary functions unless we know they don't access any + // non-parameter pointer-values. + bool CanAddScopes = !UsesAliasingPtr; + if (CanAddScopes && IsFuncCall) + CanAddScopes = IsArgMemOnlyCall; + + if (CanAddScopes) + for (const Argument *A : NoAliasArgs) { + if (ObjSet.count(A)) + Scopes.push_back(NewScopes[A]); + } + + if (!Scopes.empty()) + NI->setMetadata( + LLVMContext::MD_alias_scope, + MDNode::concatenate(NI->getMetadata(LLVMContext::MD_alias_scope), + MDNode::get(CalledFunc->getContext(), Scopes))); + } + } +} + +/// If the inlined function has non-byval align arguments, then +/// add @llvm.assume-based alignment assumptions to preserve this information. +static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) { + if (!PreserveAlignmentAssumptions || !IFI.GetAssumptionCache) + return; + + AssumptionCache *AC = &(*IFI.GetAssumptionCache)(*CS.getCaller()); + auto &DL = CS.getCaller()->getParent()->getDataLayout(); + + // To avoid inserting redundant assumptions, we should check for assumptions + // already in the caller. To do this, we might need a DT of the caller. + DominatorTree DT; + bool DTCalculated = false; + + Function *CalledFunc = CS.getCalledFunction(); + for (Function::arg_iterator I = CalledFunc->arg_begin(), + E = CalledFunc->arg_end(); + I != E; ++I) { + unsigned Align = I->getType()->isPointerTy() ? I->getParamAlignment() : 0; + if (Align && !I->hasByValOrInAllocaAttr() && !I->hasNUses(0)) { + if (!DTCalculated) { + DT.recalculate(const_cast<Function&>(*CS.getInstruction()->getParent() + ->getParent())); + DTCalculated = true; + } + + // If we can already prove the asserted alignment in the context of the + // caller, then don't bother inserting the assumption. + Value *Arg = CS.getArgument(I->getArgNo()); + if (getKnownAlignment(Arg, DL, CS.getInstruction(), AC, &DT) >= Align) + continue; + + CallInst *NewAssumption = IRBuilder<>(CS.getInstruction()) + .CreateAlignmentAssumption(DL, Arg, Align); + AC->registerAssumption(NewAssumption); + } + } +} + +/// Once we have cloned code over from a callee into the caller, +/// update the specified callgraph to reflect the changes we made. +/// Note that it's possible that not all code was copied over, so only +/// some edges of the callgraph may remain. +static void UpdateCallGraphAfterInlining(CallSite CS, + Function::iterator FirstNewBlock, + ValueToValueMapTy &VMap, + InlineFunctionInfo &IFI) { + CallGraph &CG = *IFI.CG; + const Function *Caller = CS.getInstruction()->getParent()->getParent(); + const Function *Callee = CS.getCalledFunction(); + CallGraphNode *CalleeNode = CG[Callee]; + CallGraphNode *CallerNode = CG[Caller]; + + // Since we inlined some uninlined call sites in the callee into the caller, + // add edges from the caller to all of the callees of the callee. + CallGraphNode::iterator I = CalleeNode->begin(), E = CalleeNode->end(); + + // Consider the case where CalleeNode == CallerNode. + CallGraphNode::CalledFunctionsVector CallCache; + if (CalleeNode == CallerNode) { + CallCache.assign(I, E); + I = CallCache.begin(); + E = CallCache.end(); + } + + for (; I != E; ++I) { + const Value *OrigCall = I->first; + + ValueToValueMapTy::iterator VMI = VMap.find(OrigCall); + // Only copy the edge if the call was inlined! + if (VMI == VMap.end() || VMI->second == nullptr) + continue; + + // If the call was inlined, but then constant folded, there is no edge to + // add. Check for this case. + Instruction *NewCall = dyn_cast<Instruction>(VMI->second); + if (!NewCall) + continue; + + // We do not treat intrinsic calls like real function calls because we + // expect them to become inline code; do not add an edge for an intrinsic. + CallSite CS = CallSite(NewCall); + if (CS && CS.getCalledFunction() && CS.getCalledFunction()->isIntrinsic()) + continue; + + // Remember that this call site got inlined for the client of + // InlineFunction. + IFI.InlinedCalls.push_back(NewCall); + + // It's possible that inlining the callsite will cause it to go from an + // indirect to a direct call by resolving a function pointer. If this + // happens, set the callee of the new call site to a more precise + // destination. This can also happen if the call graph node of the caller + // was just unnecessarily imprecise. + if (!I->second->getFunction()) + if (Function *F = CallSite(NewCall).getCalledFunction()) { + // Indirect call site resolved to direct call. + CallerNode->addCalledFunction(CallSite(NewCall), CG[F]); + + continue; + } + + CallerNode->addCalledFunction(CallSite(NewCall), I->second); + } + + // Update the call graph by deleting the edge from Callee to Caller. We must + // do this after the loop above in case Caller and Callee are the same. + CallerNode->removeCallEdgeFor(CS); +} + +static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M, + BasicBlock *InsertBlock, + InlineFunctionInfo &IFI) { + Type *AggTy = cast<PointerType>(Src->getType())->getElementType(); + IRBuilder<> Builder(InsertBlock, InsertBlock->begin()); + + Value *Size = Builder.getInt64(M->getDataLayout().getTypeStoreSize(AggTy)); + + // Always generate a memcpy of alignment 1 here because we don't know + // the alignment of the src pointer. Other optimizations can infer + // better alignment. + Builder.CreateMemCpy(Dst, Src, Size, /*Align=*/1); +} + +/// When inlining a call site that has a byval argument, +/// we have to make the implicit memcpy explicit by adding it. +static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, + const Function *CalledFunc, + InlineFunctionInfo &IFI, + unsigned ByValAlignment) { + PointerType *ArgTy = cast<PointerType>(Arg->getType()); + Type *AggTy = ArgTy->getElementType(); + + Function *Caller = TheCall->getParent()->getParent(); + + // If the called function is readonly, then it could not mutate the caller's + // copy of the byval'd memory. In this case, it is safe to elide the copy and + // temporary. + if (CalledFunc->onlyReadsMemory()) { + // If the byval argument has a specified alignment that is greater than the + // passed in pointer, then we either have to round up the input pointer or + // give up on this transformation. + if (ByValAlignment <= 1) // 0 = unspecified, 1 = no particular alignment. + return Arg; + + AssumptionCache *AC = + IFI.GetAssumptionCache ? &(*IFI.GetAssumptionCache)(*Caller) : nullptr; + const DataLayout &DL = Caller->getParent()->getDataLayout(); + + // If the pointer is already known to be sufficiently aligned, or if we can + // round it up to a larger alignment, then we don't need a temporary. + if (getOrEnforceKnownAlignment(Arg, ByValAlignment, DL, TheCall, AC) >= + ByValAlignment) + return Arg; + + // Otherwise, we have to make a memcpy to get a safe alignment. This is bad + // for code quality, but rarely happens and is required for correctness. + } + + // Create the alloca. If we have DataLayout, use nice alignment. + unsigned Align = + Caller->getParent()->getDataLayout().getPrefTypeAlignment(AggTy); + + // If the byval had an alignment specified, we *must* use at least that + // alignment, as it is required by the byval argument (and uses of the + // pointer inside the callee). + Align = std::max(Align, ByValAlignment); + + Value *NewAlloca = new AllocaInst(AggTy, nullptr, Align, Arg->getName(), + &*Caller->begin()->begin()); + IFI.StaticAllocas.push_back(cast<AllocaInst>(NewAlloca)); + + // Uses of the argument in the function should use our new alloca + // instead. + return NewAlloca; +} + +// Check whether this Value is used by a lifetime intrinsic. +static bool isUsedByLifetimeMarker(Value *V) { + for (User *U : V->users()) { + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + return true; + } + } + } + return false; +} + +// Check whether the given alloca already has +// lifetime.start or lifetime.end intrinsics. +static bool hasLifetimeMarkers(AllocaInst *AI) { + Type *Ty = AI->getType(); + Type *Int8PtrTy = Type::getInt8PtrTy(Ty->getContext(), + Ty->getPointerAddressSpace()); + if (Ty == Int8PtrTy) + return isUsedByLifetimeMarker(AI); + + // Do a scan to find all the casts to i8*. + for (User *U : AI->users()) { + if (U->getType() != Int8PtrTy) continue; + if (U->stripPointerCasts() != AI) continue; + if (isUsedByLifetimeMarker(U)) + return true; + } + return false; +} + +/// Rebuild the entire inlined-at chain for this instruction so that the top of +/// the chain now is inlined-at the new call site. +static DebugLoc +updateInlinedAtInfo(const DebugLoc &DL, DILocation *InlinedAtNode, + LLVMContext &Ctx, + DenseMap<const DILocation *, DILocation *> &IANodes) { + SmallVector<DILocation *, 3> InlinedAtLocations; + DILocation *Last = InlinedAtNode; + DILocation *CurInlinedAt = DL; + + // Gather all the inlined-at nodes + while (DILocation *IA = CurInlinedAt->getInlinedAt()) { + // Skip any we've already built nodes for + if (DILocation *Found = IANodes[IA]) { + Last = Found; + break; + } + + InlinedAtLocations.push_back(IA); + CurInlinedAt = IA; + } + + // Starting from the top, rebuild the nodes to point to the new inlined-at + // location (then rebuilding the rest of the chain behind it) and update the + // map of already-constructed inlined-at nodes. + for (const DILocation *MD : reverse(InlinedAtLocations)) { + Last = IANodes[MD] = DILocation::getDistinct( + Ctx, MD->getLine(), MD->getColumn(), MD->getScope(), Last); + } + + // And finally create the normal location for this instruction, referring to + // the new inlined-at chain. + return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(), Last); +} + +/// Return the result of AI->isStaticAlloca() if AI were moved to the entry +/// block. Allocas used in inalloca calls and allocas of dynamic array size +/// cannot be static. +static bool allocaWouldBeStaticInEntry(const AllocaInst *AI ) { + return isa<Constant>(AI->getArraySize()) && !AI->isUsedWithInAlloca(); +} + +/// Update inlined instructions' line numbers to +/// to encode location where these instructions are inlined. +static void fixupLineNumbers(Function *Fn, Function::iterator FI, + Instruction *TheCall, bool CalleeHasDebugInfo) { + const DebugLoc &TheCallDL = TheCall->getDebugLoc(); + if (!TheCallDL) + return; + + auto &Ctx = Fn->getContext(); + DILocation *InlinedAtNode = TheCallDL; + + // Create a unique call site, not to be confused with any other call from the + // same location. + InlinedAtNode = DILocation::getDistinct( + Ctx, InlinedAtNode->getLine(), InlinedAtNode->getColumn(), + InlinedAtNode->getScope(), InlinedAtNode->getInlinedAt()); + + // Cache the inlined-at nodes as they're built so they are reused, without + // this every instruction's inlined-at chain would become distinct from each + // other. + DenseMap<const DILocation *, DILocation *> IANodes; + + for (; FI != Fn->end(); ++FI) { + for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); + BI != BE; ++BI) { + if (DebugLoc DL = BI->getDebugLoc()) { + BI->setDebugLoc( + updateInlinedAtInfo(DL, InlinedAtNode, BI->getContext(), IANodes)); + continue; + } + + if (CalleeHasDebugInfo) + continue; + + // If the inlined instruction has no line number, make it look as if it + // originates from the call location. This is important for + // ((__always_inline__, __nodebug__)) functions which must use caller + // location for all instructions in their function body. + + // Don't update static allocas, as they may get moved later. + if (auto *AI = dyn_cast<AllocaInst>(BI)) + if (allocaWouldBeStaticInEntry(AI)) + continue; + + BI->setDebugLoc(TheCallDL); + } + } +} + +/// This function inlines the called function into the basic block of the +/// caller. This returns false if it is not possible to inline this call. +/// The program is still in a well defined state if this occurs though. +/// +/// Note that this only does one level of inlining. For example, if the +/// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now +/// exists in the instruction stream. Similarly this will inline a recursive +/// function by one level. +bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, + AAResults *CalleeAAR, bool InsertLifetime) { + Instruction *TheCall = CS.getInstruction(); + assert(TheCall->getParent() && TheCall->getParent()->getParent() && + "Instruction not in function!"); + + // If IFI has any state in it, zap it before we fill it in. + IFI.reset(); + + const Function *CalledFunc = CS.getCalledFunction(); + if (!CalledFunc || // Can't inline external function or indirect + CalledFunc->isDeclaration() || // call, or call to a vararg function! + CalledFunc->getFunctionType()->isVarArg()) return false; + + // The inliner does not know how to inline through calls with operand bundles + // in general ... + if (CS.hasOperandBundles()) { + for (int i = 0, e = CS.getNumOperandBundles(); i != e; ++i) { + uint32_t Tag = CS.getOperandBundleAt(i).getTagID(); + // ... but it knows how to inline through "deopt" operand bundles ... + if (Tag == LLVMContext::OB_deopt) + continue; + // ... and "funclet" operand bundles. + if (Tag == LLVMContext::OB_funclet) + continue; + + return false; + } + } + + // If the call to the callee cannot throw, set the 'nounwind' flag on any + // calls that we inline. + bool MarkNoUnwind = CS.doesNotThrow(); + + BasicBlock *OrigBB = TheCall->getParent(); + Function *Caller = OrigBB->getParent(); + + // GC poses two hazards to inlining, which only occur when the callee has GC: + // 1. If the caller has no GC, then the callee's GC must be propagated to the + // caller. + // 2. If the caller has a differing GC, it is invalid to inline. + if (CalledFunc->hasGC()) { + if (!Caller->hasGC()) + Caller->setGC(CalledFunc->getGC()); + else if (CalledFunc->getGC() != Caller->getGC()) + return false; + } + + // Get the personality function from the callee if it contains a landing pad. + Constant *CalledPersonality = + CalledFunc->hasPersonalityFn() + ? CalledFunc->getPersonalityFn()->stripPointerCasts() + : nullptr; + + // Find the personality function used by the landing pads of the caller. If it + // exists, then check to see that it matches the personality function used in + // the callee. + Constant *CallerPersonality = + Caller->hasPersonalityFn() + ? Caller->getPersonalityFn()->stripPointerCasts() + : nullptr; + if (CalledPersonality) { + if (!CallerPersonality) + Caller->setPersonalityFn(CalledPersonality); + // If the personality functions match, then we can perform the + // inlining. Otherwise, we can't inline. + // TODO: This isn't 100% true. Some personality functions are proper + // supersets of others and can be used in place of the other. + else if (CalledPersonality != CallerPersonality) + return false; + } + + // We need to figure out which funclet the callsite was in so that we may + // properly nest the callee. + Instruction *CallSiteEHPad = nullptr; + if (CallerPersonality) { + EHPersonality Personality = classifyEHPersonality(CallerPersonality); + if (isFuncletEHPersonality(Personality)) { + Optional<OperandBundleUse> ParentFunclet = + CS.getOperandBundle(LLVMContext::OB_funclet); + if (ParentFunclet) + CallSiteEHPad = cast<FuncletPadInst>(ParentFunclet->Inputs.front()); + + // OK, the inlining site is legal. What about the target function? + + if (CallSiteEHPad) { + if (Personality == EHPersonality::MSVC_CXX) { + // The MSVC personality cannot tolerate catches getting inlined into + // cleanup funclets. + if (isa<CleanupPadInst>(CallSiteEHPad)) { + // Ok, the call site is within a cleanuppad. Let's check the callee + // for catchpads. + for (const BasicBlock &CalledBB : *CalledFunc) { + if (isa<CatchSwitchInst>(CalledBB.getFirstNonPHI())) + return false; + } + } + } else if (isAsynchronousEHPersonality(Personality)) { + // SEH is even less tolerant, there may not be any sort of exceptional + // funclet in the callee. + for (const BasicBlock &CalledBB : *CalledFunc) { + if (CalledBB.isEHPad()) + return false; + } + } + } + } + } + + // Determine if we are dealing with a call in an EHPad which does not unwind + // to caller. + bool EHPadForCallUnwindsLocally = false; + if (CallSiteEHPad && CS.isCall()) { + UnwindDestMemoTy FuncletUnwindMap; + Value *CallSiteUnwindDestToken = + getUnwindDestToken(CallSiteEHPad, FuncletUnwindMap); + + EHPadForCallUnwindsLocally = + CallSiteUnwindDestToken && + !isa<ConstantTokenNone>(CallSiteUnwindDestToken); + } + + // Get an iterator to the last basic block in the function, which will have + // the new function inlined after it. + Function::iterator LastBlock = --Caller->end(); + + // Make sure to capture all of the return instructions from the cloned + // function. + SmallVector<ReturnInst*, 8> Returns; + ClonedCodeInfo InlinedFunctionInfo; + Function::iterator FirstNewBlock; + + { // Scope to destroy VMap after cloning. + ValueToValueMapTy VMap; + // Keep a list of pair (dst, src) to emit byval initializations. + SmallVector<std::pair<Value*, Value*>, 4> ByValInit; + + auto &DL = Caller->getParent()->getDataLayout(); + + assert(CalledFunc->arg_size() == CS.arg_size() && + "No varargs calls can be inlined!"); + + // Calculate the vector of arguments to pass into the function cloner, which + // matches up the formal to the actual argument values. + CallSite::arg_iterator AI = CS.arg_begin(); + unsigned ArgNo = 0; + for (Function::const_arg_iterator I = CalledFunc->arg_begin(), + E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) { + Value *ActualArg = *AI; + + // When byval arguments actually inlined, we need to make the copy implied + // by them explicit. However, we don't do this if the callee is readonly + // or readnone, because the copy would be unneeded: the callee doesn't + // modify the struct. + if (CS.isByValArgument(ArgNo)) { + ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI, + CalledFunc->getParamAlignment(ArgNo+1)); + if (ActualArg != *AI) + ByValInit.push_back(std::make_pair(ActualArg, (Value*) *AI)); + } + + VMap[&*I] = ActualArg; + } + + // Add alignment assumptions if necessary. We do this before the inlined + // instructions are actually cloned into the caller so that we can easily + // check what will be known at the start of the inlined code. + AddAlignmentAssumptions(CS, IFI); + + // We want the inliner to prune the code as it copies. We would LOVE to + // have no dead or constant instructions leftover after inlining occurs + // (which can happen, e.g., because an argument was constant), but we'll be + // happy with whatever the cloner can do. + CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, + /*ModuleLevelChanges=*/false, Returns, ".i", + &InlinedFunctionInfo, TheCall); + + // Remember the first block that is newly cloned over. + FirstNewBlock = LastBlock; ++FirstNewBlock; + + // Inject byval arguments initialization. + for (std::pair<Value*, Value*> &Init : ByValInit) + HandleByValArgumentInit(Init.first, Init.second, Caller->getParent(), + &*FirstNewBlock, IFI); + + Optional<OperandBundleUse> ParentDeopt = + CS.getOperandBundle(LLVMContext::OB_deopt); + if (ParentDeopt) { + SmallVector<OperandBundleDef, 2> OpDefs; + + for (auto &VH : InlinedFunctionInfo.OperandBundleCallSites) { + Instruction *I = dyn_cast_or_null<Instruction>(VH); + if (!I) continue; // instruction was DCE'd or RAUW'ed to undef + + OpDefs.clear(); + + CallSite ICS(I); + OpDefs.reserve(ICS.getNumOperandBundles()); + + for (unsigned i = 0, e = ICS.getNumOperandBundles(); i < e; ++i) { + auto ChildOB = ICS.getOperandBundleAt(i); + if (ChildOB.getTagID() != LLVMContext::OB_deopt) { + // If the inlined call has other operand bundles, let them be + OpDefs.emplace_back(ChildOB); + continue; + } + + // It may be useful to separate this logic (of handling operand + // bundles) out to a separate "policy" component if this gets crowded. + // Prepend the parent's deoptimization continuation to the newly + // inlined call's deoptimization continuation. + std::vector<Value *> MergedDeoptArgs; + MergedDeoptArgs.reserve(ParentDeopt->Inputs.size() + + ChildOB.Inputs.size()); + + MergedDeoptArgs.insert(MergedDeoptArgs.end(), + ParentDeopt->Inputs.begin(), + ParentDeopt->Inputs.end()); + MergedDeoptArgs.insert(MergedDeoptArgs.end(), ChildOB.Inputs.begin(), + ChildOB.Inputs.end()); + + OpDefs.emplace_back("deopt", std::move(MergedDeoptArgs)); + } + + Instruction *NewI = nullptr; + if (isa<CallInst>(I)) + NewI = CallInst::Create(cast<CallInst>(I), OpDefs, I); + else + NewI = InvokeInst::Create(cast<InvokeInst>(I), OpDefs, I); + + // Note: the RAUW does the appropriate fixup in VMap, so we need to do + // this even if the call returns void. + I->replaceAllUsesWith(NewI); + + VH = nullptr; + I->eraseFromParent(); + } + } + + // Update the callgraph if requested. + if (IFI.CG) + UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI); + + // For 'nodebug' functions, the associated DISubprogram is always null. + // Conservatively avoid propagating the callsite debug location to + // instructions inlined from a function whose DISubprogram is not null. + fixupLineNumbers(Caller, FirstNewBlock, TheCall, + CalledFunc->getSubprogram() != nullptr); + + // Clone existing noalias metadata if necessary. + CloneAliasScopeMetadata(CS, VMap); + + // Add noalias metadata if necessary. + AddAliasScopeMetadata(CS, VMap, DL, CalleeAAR); + + // Propagate llvm.mem.parallel_loop_access if necessary. + PropagateParallelLoopAccessMetadata(CS, VMap); + + // Register any cloned assumptions. + if (IFI.GetAssumptionCache) + for (BasicBlock &NewBlock : + make_range(FirstNewBlock->getIterator(), Caller->end())) + for (Instruction &I : NewBlock) { + if (auto *II = dyn_cast<IntrinsicInst>(&I)) + if (II->getIntrinsicID() == Intrinsic::assume) + (*IFI.GetAssumptionCache)(*Caller).registerAssumption(II); + } + } + + // If there are any alloca instructions in the block that used to be the entry + // block for the callee, move them to the entry block of the caller. First + // calculate which instruction they should be inserted before. We insert the + // instructions at the end of the current alloca list. + { + BasicBlock::iterator InsertPoint = Caller->begin()->begin(); + for (BasicBlock::iterator I = FirstNewBlock->begin(), + E = FirstNewBlock->end(); I != E; ) { + AllocaInst *AI = dyn_cast<AllocaInst>(I++); + if (!AI) continue; + + // If the alloca is now dead, remove it. This often occurs due to code + // specialization. + if (AI->use_empty()) { + AI->eraseFromParent(); + continue; + } + + if (!allocaWouldBeStaticInEntry(AI)) + continue; + + // Keep track of the static allocas that we inline into the caller. + IFI.StaticAllocas.push_back(AI); + + // Scan for the block of allocas that we can move over, and move them + // all at once. + while (isa<AllocaInst>(I) && + allocaWouldBeStaticInEntry(cast<AllocaInst>(I))) { + IFI.StaticAllocas.push_back(cast<AllocaInst>(I)); + ++I; + } + + // Transfer all of the allocas over in a block. Using splice means + // that the instructions aren't removed from the symbol table, then + // reinserted. + Caller->getEntryBlock().getInstList().splice( + InsertPoint, FirstNewBlock->getInstList(), AI->getIterator(), I); + } + // Move any dbg.declares describing the allocas into the entry basic block. + DIBuilder DIB(*Caller->getParent()); + for (auto &AI : IFI.StaticAllocas) + replaceDbgDeclareForAlloca(AI, AI, DIB, /*Deref=*/false); + } + + bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false; + if (InlinedFunctionInfo.ContainsCalls) { + CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None; + if (CallInst *CI = dyn_cast<CallInst>(TheCall)) + CallSiteTailKind = CI->getTailCallKind(); + + for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; + ++BB) { + for (Instruction &I : *BB) { + CallInst *CI = dyn_cast<CallInst>(&I); + if (!CI) + continue; + + if (Function *F = CI->getCalledFunction()) + InlinedDeoptimizeCalls |= + F->getIntrinsicID() == Intrinsic::experimental_deoptimize; + + // We need to reduce the strength of any inlined tail calls. For + // musttail, we have to avoid introducing potential unbounded stack + // growth. For example, if functions 'f' and 'g' are mutually recursive + // with musttail, we can inline 'g' into 'f' so long as we preserve + // musttail on the cloned call to 'f'. If either the inlined call site + // or the cloned call site is *not* musttail, the program already has + // one frame of stack growth, so it's safe to remove musttail. Here is + // a table of example transformations: + // + // f -> musttail g -> musttail f ==> f -> musttail f + // f -> musttail g -> tail f ==> f -> tail f + // f -> g -> musttail f ==> f -> f + // f -> g -> tail f ==> f -> f + CallInst::TailCallKind ChildTCK = CI->getTailCallKind(); + ChildTCK = std::min(CallSiteTailKind, ChildTCK); + CI->setTailCallKind(ChildTCK); + InlinedMustTailCalls |= CI->isMustTailCall(); + + // Calls inlined through a 'nounwind' call site should be marked + // 'nounwind'. + if (MarkNoUnwind) + CI->setDoesNotThrow(); + } + } + } + + // Leave lifetime markers for the static alloca's, scoping them to the + // function we just inlined. + if (InsertLifetime && !IFI.StaticAllocas.empty()) { + IRBuilder<> builder(&FirstNewBlock->front()); + for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) { + AllocaInst *AI = IFI.StaticAllocas[ai]; + // Don't mark swifterror allocas. They can't have bitcast uses. + if (AI->isSwiftError()) + continue; + + // If the alloca is already scoped to something smaller than the whole + // function then there's no need to add redundant, less accurate markers. + if (hasLifetimeMarkers(AI)) + continue; + + // Try to determine the size of the allocation. + ConstantInt *AllocaSize = nullptr; + if (ConstantInt *AIArraySize = + dyn_cast<ConstantInt>(AI->getArraySize())) { + auto &DL = Caller->getParent()->getDataLayout(); + Type *AllocaType = AI->getAllocatedType(); + uint64_t AllocaTypeSize = DL.getTypeAllocSize(AllocaType); + uint64_t AllocaArraySize = AIArraySize->getLimitedValue(); + + // Don't add markers for zero-sized allocas. + if (AllocaArraySize == 0) + continue; + + // Check that array size doesn't saturate uint64_t and doesn't + // overflow when it's multiplied by type size. + if (AllocaArraySize != ~0ULL && + UINT64_MAX / AllocaArraySize >= AllocaTypeSize) { + AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()), + AllocaArraySize * AllocaTypeSize); + } + } + + builder.CreateLifetimeStart(AI, AllocaSize); + for (ReturnInst *RI : Returns) { + // Don't insert llvm.lifetime.end calls between a musttail or deoptimize + // call and a return. The return kills all local allocas. + if (InlinedMustTailCalls && + RI->getParent()->getTerminatingMustTailCall()) + continue; + if (InlinedDeoptimizeCalls && + RI->getParent()->getTerminatingDeoptimizeCall()) + continue; + IRBuilder<>(RI).CreateLifetimeEnd(AI, AllocaSize); + } + } + } + + // If the inlined code contained dynamic alloca instructions, wrap the inlined + // code with llvm.stacksave/llvm.stackrestore intrinsics. + if (InlinedFunctionInfo.ContainsDynamicAllocas) { + Module *M = Caller->getParent(); + // Get the two intrinsics we care about. + Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave); + Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore); + + // Insert the llvm.stacksave. + CallInst *SavedPtr = IRBuilder<>(&*FirstNewBlock, FirstNewBlock->begin()) + .CreateCall(StackSave, {}, "savedstack"); + + // Insert a call to llvm.stackrestore before any return instructions in the + // inlined function. + for (ReturnInst *RI : Returns) { + // Don't insert llvm.stackrestore calls between a musttail or deoptimize + // call and a return. The return will restore the stack pointer. + if (InlinedMustTailCalls && RI->getParent()->getTerminatingMustTailCall()) + continue; + if (InlinedDeoptimizeCalls && RI->getParent()->getTerminatingDeoptimizeCall()) + continue; + IRBuilder<>(RI).CreateCall(StackRestore, SavedPtr); + } + } + + // If we are inlining for an invoke instruction, we must make sure to rewrite + // any call instructions into invoke instructions. This is sensitive to which + // funclet pads were top-level in the inlinee, so must be done before + // rewriting the "parent pad" links. + if (auto *II = dyn_cast<InvokeInst>(TheCall)) { + BasicBlock *UnwindDest = II->getUnwindDest(); + Instruction *FirstNonPHI = UnwindDest->getFirstNonPHI(); + if (isa<LandingPadInst>(FirstNonPHI)) { + HandleInlinedLandingPad(II, &*FirstNewBlock, InlinedFunctionInfo); + } else { + HandleInlinedEHPad(II, &*FirstNewBlock, InlinedFunctionInfo); + } + } + + // Update the lexical scopes of the new funclets and callsites. + // Anything that had 'none' as its parent is now nested inside the callsite's + // EHPad. + + if (CallSiteEHPad) { + for (Function::iterator BB = FirstNewBlock->getIterator(), + E = Caller->end(); + BB != E; ++BB) { + // Add bundle operands to any top-level call sites. + SmallVector<OperandBundleDef, 1> OpBundles; + for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;) { + Instruction *I = &*BBI++; + CallSite CS(I); + if (!CS) + continue; + + // Skip call sites which are nounwind intrinsics. + auto *CalledFn = + dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts()); + if (CalledFn && CalledFn->isIntrinsic() && CS.doesNotThrow()) + continue; + + // Skip call sites which already have a "funclet" bundle. + if (CS.getOperandBundle(LLVMContext::OB_funclet)) + continue; + + CS.getOperandBundlesAsDefs(OpBundles); + OpBundles.emplace_back("funclet", CallSiteEHPad); + + Instruction *NewInst; + if (CS.isCall()) + NewInst = CallInst::Create(cast<CallInst>(I), OpBundles, I); + else + NewInst = InvokeInst::Create(cast<InvokeInst>(I), OpBundles, I); + NewInst->takeName(I); + I->replaceAllUsesWith(NewInst); + I->eraseFromParent(); + + OpBundles.clear(); + } + + // It is problematic if the inlinee has a cleanupret which unwinds to + // caller and we inline it into a call site which doesn't unwind but into + // an EH pad that does. Such an edge must be dynamically unreachable. + // As such, we replace the cleanupret with unreachable. + if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(BB->getTerminator())) + if (CleanupRet->unwindsToCaller() && EHPadForCallUnwindsLocally) + changeToUnreachable(CleanupRet, /*UseLLVMTrap=*/false); + + Instruction *I = BB->getFirstNonPHI(); + if (!I->isEHPad()) + continue; + + if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) { + if (isa<ConstantTokenNone>(CatchSwitch->getParentPad())) + CatchSwitch->setParentPad(CallSiteEHPad); + } else { + auto *FPI = cast<FuncletPadInst>(I); + if (isa<ConstantTokenNone>(FPI->getParentPad())) + FPI->setParentPad(CallSiteEHPad); + } + } + } + + if (InlinedDeoptimizeCalls) { + // We need to at least remove the deoptimizing returns from the Return set, + // so that the control flow from those returns does not get merged into the + // caller (but terminate it instead). If the caller's return type does not + // match the callee's return type, we also need to change the return type of + // the intrinsic. + if (Caller->getReturnType() == TheCall->getType()) { + auto NewEnd = remove_if(Returns, [](ReturnInst *RI) { + return RI->getParent()->getTerminatingDeoptimizeCall() != nullptr; + }); + Returns.erase(NewEnd, Returns.end()); + } else { + SmallVector<ReturnInst *, 8> NormalReturns; + Function *NewDeoptIntrinsic = Intrinsic::getDeclaration( + Caller->getParent(), Intrinsic::experimental_deoptimize, + {Caller->getReturnType()}); + + for (ReturnInst *RI : Returns) { + CallInst *DeoptCall = RI->getParent()->getTerminatingDeoptimizeCall(); + if (!DeoptCall) { + NormalReturns.push_back(RI); + continue; + } + + // The calling convention on the deoptimize call itself may be bogus, + // since the code we're inlining may have undefined behavior (and may + // never actually execute at runtime); but all + // @llvm.experimental.deoptimize declarations have to have the same + // calling convention in a well-formed module. + auto CallingConv = DeoptCall->getCalledFunction()->getCallingConv(); + NewDeoptIntrinsic->setCallingConv(CallingConv); + auto *CurBB = RI->getParent(); + RI->eraseFromParent(); + + SmallVector<Value *, 4> CallArgs(DeoptCall->arg_begin(), + DeoptCall->arg_end()); + + SmallVector<OperandBundleDef, 1> OpBundles; + DeoptCall->getOperandBundlesAsDefs(OpBundles); + DeoptCall->eraseFromParent(); + assert(!OpBundles.empty() && + "Expected at least the deopt operand bundle"); + + IRBuilder<> Builder(CurBB); + CallInst *NewDeoptCall = + Builder.CreateCall(NewDeoptIntrinsic, CallArgs, OpBundles); + NewDeoptCall->setCallingConv(CallingConv); + if (NewDeoptCall->getType()->isVoidTy()) + Builder.CreateRetVoid(); + else + Builder.CreateRet(NewDeoptCall); + } + + // Leave behind the normal returns so we can merge control flow. + std::swap(Returns, NormalReturns); + } + } + + // Handle any inlined musttail call sites. In order for a new call site to be + // musttail, the source of the clone and the inlined call site must have been + // musttail. Therefore it's safe to return without merging control into the + // phi below. + if (InlinedMustTailCalls) { + // Check if we need to bitcast the result of any musttail calls. + Type *NewRetTy = Caller->getReturnType(); + bool NeedBitCast = !TheCall->use_empty() && TheCall->getType() != NewRetTy; + + // Handle the returns preceded by musttail calls separately. + SmallVector<ReturnInst *, 8> NormalReturns; + for (ReturnInst *RI : Returns) { + CallInst *ReturnedMustTail = + RI->getParent()->getTerminatingMustTailCall(); + if (!ReturnedMustTail) { + NormalReturns.push_back(RI); + continue; + } + if (!NeedBitCast) + continue; + + // Delete the old return and any preceding bitcast. + BasicBlock *CurBB = RI->getParent(); + auto *OldCast = dyn_cast_or_null<BitCastInst>(RI->getReturnValue()); + RI->eraseFromParent(); + if (OldCast) + OldCast->eraseFromParent(); + + // Insert a new bitcast and return with the right type. + IRBuilder<> Builder(CurBB); + Builder.CreateRet(Builder.CreateBitCast(ReturnedMustTail, NewRetTy)); + } + + // Leave behind the normal returns so we can merge control flow. + std::swap(Returns, NormalReturns); + } + + // Now that all of the transforms on the inlined code have taken place but + // before we splice the inlined code into the CFG and lose track of which + // blocks were actually inlined, collect the call sites. We only do this if + // call graph updates weren't requested, as those provide value handle based + // tracking of inlined call sites instead. + if (InlinedFunctionInfo.ContainsCalls && !IFI.CG) { + // Otherwise just collect the raw call sites that were inlined. + for (BasicBlock &NewBB : + make_range(FirstNewBlock->getIterator(), Caller->end())) + for (Instruction &I : NewBB) + if (auto CS = CallSite(&I)) + IFI.InlinedCallSites.push_back(CS); + } + + // If we cloned in _exactly one_ basic block, and if that block ends in a + // return instruction, we splice the body of the inlined callee directly into + // the calling basic block. + if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) { + // Move all of the instructions right before the call. + OrigBB->getInstList().splice(TheCall->getIterator(), + FirstNewBlock->getInstList(), + FirstNewBlock->begin(), FirstNewBlock->end()); + // Remove the cloned basic block. + Caller->getBasicBlockList().pop_back(); + + // If the call site was an invoke instruction, add a branch to the normal + // destination. + if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { + BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall); + NewBr->setDebugLoc(Returns[0]->getDebugLoc()); + } + + // If the return instruction returned a value, replace uses of the call with + // uses of the returned value. + if (!TheCall->use_empty()) { + ReturnInst *R = Returns[0]; + if (TheCall == R->getReturnValue()) + TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); + else + TheCall->replaceAllUsesWith(R->getReturnValue()); + } + // Since we are now done with the Call/Invoke, we can delete it. + TheCall->eraseFromParent(); + + // Since we are now done with the return instruction, delete it also. + Returns[0]->eraseFromParent(); + + // We are now done with the inlining. + return true; + } + + // Otherwise, we have the normal case, of more than one block to inline or + // multiple return sites. + + // We want to clone the entire callee function into the hole between the + // "starter" and "ender" blocks. How we accomplish this depends on whether + // this is an invoke instruction or a call instruction. + BasicBlock *AfterCallBB; + BranchInst *CreatedBranchToNormalDest = nullptr; + if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { + + // Add an unconditional branch to make this look like the CallInst case... + CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), TheCall); + + // Split the basic block. This guarantees that no PHI nodes will have to be + // updated due to new incoming edges, and make the invoke case more + // symmetric to the call case. + AfterCallBB = + OrigBB->splitBasicBlock(CreatedBranchToNormalDest->getIterator(), + CalledFunc->getName() + ".exit"); + + } else { // It's a call + // If this is a call instruction, we need to split the basic block that + // the call lives in. + // + AfterCallBB = OrigBB->splitBasicBlock(TheCall->getIterator(), + CalledFunc->getName() + ".exit"); + } + + // Change the branch that used to go to AfterCallBB to branch to the first + // basic block of the inlined function. + // + TerminatorInst *Br = OrigBB->getTerminator(); + assert(Br && Br->getOpcode() == Instruction::Br && + "splitBasicBlock broken!"); + Br->setOperand(0, &*FirstNewBlock); + + // Now that the function is correct, make it a little bit nicer. In + // particular, move the basic blocks inserted from the end of the function + // into the space made by splitting the source basic block. + Caller->getBasicBlockList().splice(AfterCallBB->getIterator(), + Caller->getBasicBlockList(), FirstNewBlock, + Caller->end()); + + // Handle all of the return instructions that we just cloned in, and eliminate + // any users of the original call/invoke instruction. + Type *RTy = CalledFunc->getReturnType(); + + PHINode *PHI = nullptr; + if (Returns.size() > 1) { + // The PHI node should go at the front of the new basic block to merge all + // possible incoming values. + if (!TheCall->use_empty()) { + PHI = PHINode::Create(RTy, Returns.size(), TheCall->getName(), + &AfterCallBB->front()); + // Anything that used the result of the function call should now use the + // PHI node as their operand. + TheCall->replaceAllUsesWith(PHI); + } + + // Loop over all of the return instructions adding entries to the PHI node + // as appropriate. + if (PHI) { + for (unsigned i = 0, e = Returns.size(); i != e; ++i) { + ReturnInst *RI = Returns[i]; + assert(RI->getReturnValue()->getType() == PHI->getType() && + "Ret value not consistent in function!"); + PHI->addIncoming(RI->getReturnValue(), RI->getParent()); + } + } + + // Add a branch to the merge points and remove return instructions. + DebugLoc Loc; + for (unsigned i = 0, e = Returns.size(); i != e; ++i) { + ReturnInst *RI = Returns[i]; + BranchInst* BI = BranchInst::Create(AfterCallBB, RI); + Loc = RI->getDebugLoc(); + BI->setDebugLoc(Loc); + RI->eraseFromParent(); + } + // We need to set the debug location to *somewhere* inside the + // inlined function. The line number may be nonsensical, but the + // instruction will at least be associated with the right + // function. + if (CreatedBranchToNormalDest) + CreatedBranchToNormalDest->setDebugLoc(Loc); + } else if (!Returns.empty()) { + // Otherwise, if there is exactly one return value, just replace anything + // using the return value of the call with the computed value. + if (!TheCall->use_empty()) { + if (TheCall == Returns[0]->getReturnValue()) + TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); + else + TheCall->replaceAllUsesWith(Returns[0]->getReturnValue()); + } + + // Update PHI nodes that use the ReturnBB to use the AfterCallBB. + BasicBlock *ReturnBB = Returns[0]->getParent(); + ReturnBB->replaceAllUsesWith(AfterCallBB); + + // Splice the code from the return block into the block that it will return + // to, which contains the code that was after the call. + AfterCallBB->getInstList().splice(AfterCallBB->begin(), + ReturnBB->getInstList()); + + if (CreatedBranchToNormalDest) + CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc()); + + // Delete the return instruction now and empty ReturnBB now. + Returns[0]->eraseFromParent(); + ReturnBB->eraseFromParent(); + } else if (!TheCall->use_empty()) { + // No returns, but something is using the return value of the call. Just + // nuke the result. + TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); + } + + // Since we are now done with the Call/Invoke, we can delete it. + TheCall->eraseFromParent(); + + // If we inlined any musttail calls and the original return is now + // unreachable, delete it. It can only contain a bitcast and ret. + if (InlinedMustTailCalls && pred_begin(AfterCallBB) == pred_end(AfterCallBB)) + AfterCallBB->eraseFromParent(); + + // We should always be able to fold the entry block of the function into the + // single predecessor of the block... + assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!"); + BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0); + + // Splice the code entry block into calling block, right before the + // unconditional branch. + CalleeEntry->replaceAllUsesWith(OrigBB); // Update PHI nodes + OrigBB->getInstList().splice(Br->getIterator(), CalleeEntry->getInstList()); + + // Remove the unconditional branch. + OrigBB->getInstList().erase(Br); + + // Now we can remove the CalleeEntry block, which is now empty. + Caller->getBasicBlockList().erase(CalleeEntry); + + // If we inserted a phi node, check to see if it has a single value (e.g. all + // the entries are the same or undef). If so, remove the PHI so it doesn't + // block other optimizations. + if (PHI) { + AssumptionCache *AC = + IFI.GetAssumptionCache ? &(*IFI.GetAssumptionCache)(*Caller) : nullptr; + auto &DL = Caller->getParent()->getDataLayout(); + if (Value *V = SimplifyInstruction(PHI, DL, nullptr, nullptr, AC)) { + PHI->replaceAllUsesWith(V); + PHI->eraseFromParent(); + } + } + + return true; +} diff --git a/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp b/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp new file mode 100644 index 000000000000..8a1973d1db05 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp @@ -0,0 +1,64 @@ +//===- InstructionNamer.cpp - Give anonymous instructions names -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is a little utility pass that gives instructions names, this is mostly +// useful when diffing the effect of an optimization because deleting an +// unnamed instruction can change all other instruction numbering, making the +// diff very noisy. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Type.h" +#include "llvm/Pass.h" +using namespace llvm; + +namespace { + struct InstNamer : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + InstNamer() : FunctionPass(ID) { + initializeInstNamerPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &Info) const override { + Info.setPreservesAll(); + } + + bool runOnFunction(Function &F) override { + for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); + AI != AE; ++AI) + if (!AI->hasName() && !AI->getType()->isVoidTy()) + AI->setName("arg"); + + for (BasicBlock &BB : F) { + if (!BB.hasName()) + BB.setName("bb"); + + for (Instruction &I : BB) + if (!I.hasName() && !I.getType()->isVoidTy()) + I.setName("tmp"); + } + return true; + } + }; + + char InstNamer::ID = 0; +} + +INITIALIZE_PASS(InstNamer, "instnamer", + "Assign names to anonymous instructions", false, false) +char &llvm::InstructionNamerID = InstNamer::ID; +//===----------------------------------------------------------------------===// +// +// InstructionNamer - Give any unnamed non-void instructions "tmp" names. +// +FunctionPass *llvm::createInstructionNamerPass() { + return new InstNamer(); +} diff --git a/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp b/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp new file mode 100644 index 000000000000..5a90dcb033b2 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp @@ -0,0 +1,674 @@ +//===-- IntegerDivision.cpp - Expand integer division ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains an implementation of 32bit and 64bit scalar integer +// division for targets that don't have native support. It's largely derived +// from compiler-rt's implementations of __udivsi3 and __udivmoddi4, +// but hand-tuned for targets that prefer less control flow. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/IntegerDivision.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include <utility> + +using namespace llvm; + +#define DEBUG_TYPE "integer-division" + +/// Generate code to compute the remainder of two signed integers. Returns the +/// remainder, which will have the sign of the dividend. Builder's insert point +/// should be pointing where the caller wants code generated, e.g. at the srem +/// instruction. This will generate a urem in the process, and Builder's insert +/// point will be pointing at the uren (if present, i.e. not folded), ready to +/// be expanded if the user wishes +static Value *generateSignedRemainderCode(Value *Dividend, Value *Divisor, + IRBuilder<> &Builder) { + unsigned BitWidth = Dividend->getType()->getIntegerBitWidth(); + ConstantInt *Shift; + + if (BitWidth == 64) { + Shift = Builder.getInt64(63); + } else { + assert(BitWidth == 32 && "Unexpected bit width"); + Shift = Builder.getInt32(31); + } + + // Following instructions are generated for both i32 (shift 31) and + // i64 (shift 63). + + // ; %dividend_sgn = ashr i32 %dividend, 31 + // ; %divisor_sgn = ashr i32 %divisor, 31 + // ; %dvd_xor = xor i32 %dividend, %dividend_sgn + // ; %dvs_xor = xor i32 %divisor, %divisor_sgn + // ; %u_dividend = sub i32 %dvd_xor, %dividend_sgn + // ; %u_divisor = sub i32 %dvs_xor, %divisor_sgn + // ; %urem = urem i32 %dividend, %divisor + // ; %xored = xor i32 %urem, %dividend_sgn + // ; %srem = sub i32 %xored, %dividend_sgn + Value *DividendSign = Builder.CreateAShr(Dividend, Shift); + Value *DivisorSign = Builder.CreateAShr(Divisor, Shift); + Value *DvdXor = Builder.CreateXor(Dividend, DividendSign); + Value *DvsXor = Builder.CreateXor(Divisor, DivisorSign); + Value *UDividend = Builder.CreateSub(DvdXor, DividendSign); + Value *UDivisor = Builder.CreateSub(DvsXor, DivisorSign); + Value *URem = Builder.CreateURem(UDividend, UDivisor); + Value *Xored = Builder.CreateXor(URem, DividendSign); + Value *SRem = Builder.CreateSub(Xored, DividendSign); + + if (Instruction *URemInst = dyn_cast<Instruction>(URem)) + Builder.SetInsertPoint(URemInst); + + return SRem; +} + + +/// Generate code to compute the remainder of two unsigned integers. Returns the +/// remainder. Builder's insert point should be pointing where the caller wants +/// code generated, e.g. at the urem instruction. This will generate a udiv in +/// the process, and Builder's insert point will be pointing at the udiv (if +/// present, i.e. not folded), ready to be expanded if the user wishes +static Value *generatedUnsignedRemainderCode(Value *Dividend, Value *Divisor, + IRBuilder<> &Builder) { + // Remainder = Dividend - Quotient*Divisor + + // Following instructions are generated for both i32 and i64 + + // ; %quotient = udiv i32 %dividend, %divisor + // ; %product = mul i32 %divisor, %quotient + // ; %remainder = sub i32 %dividend, %product + Value *Quotient = Builder.CreateUDiv(Dividend, Divisor); + Value *Product = Builder.CreateMul(Divisor, Quotient); + Value *Remainder = Builder.CreateSub(Dividend, Product); + + if (Instruction *UDiv = dyn_cast<Instruction>(Quotient)) + Builder.SetInsertPoint(UDiv); + + return Remainder; +} + +/// Generate code to divide two signed integers. Returns the quotient, rounded +/// towards 0. Builder's insert point should be pointing where the caller wants +/// code generated, e.g. at the sdiv instruction. This will generate a udiv in +/// the process, and Builder's insert point will be pointing at the udiv (if +/// present, i.e. not folded), ready to be expanded if the user wishes. +static Value *generateSignedDivisionCode(Value *Dividend, Value *Divisor, + IRBuilder<> &Builder) { + // Implementation taken from compiler-rt's __divsi3 and __divdi3 + + unsigned BitWidth = Dividend->getType()->getIntegerBitWidth(); + ConstantInt *Shift; + + if (BitWidth == 64) { + Shift = Builder.getInt64(63); + } else { + assert(BitWidth == 32 && "Unexpected bit width"); + Shift = Builder.getInt32(31); + } + + // Following instructions are generated for both i32 (shift 31) and + // i64 (shift 63). + + // ; %tmp = ashr i32 %dividend, 31 + // ; %tmp1 = ashr i32 %divisor, 31 + // ; %tmp2 = xor i32 %tmp, %dividend + // ; %u_dvnd = sub nsw i32 %tmp2, %tmp + // ; %tmp3 = xor i32 %tmp1, %divisor + // ; %u_dvsr = sub nsw i32 %tmp3, %tmp1 + // ; %q_sgn = xor i32 %tmp1, %tmp + // ; %q_mag = udiv i32 %u_dvnd, %u_dvsr + // ; %tmp4 = xor i32 %q_mag, %q_sgn + // ; %q = sub i32 %tmp4, %q_sgn + Value *Tmp = Builder.CreateAShr(Dividend, Shift); + Value *Tmp1 = Builder.CreateAShr(Divisor, Shift); + Value *Tmp2 = Builder.CreateXor(Tmp, Dividend); + Value *U_Dvnd = Builder.CreateSub(Tmp2, Tmp); + Value *Tmp3 = Builder.CreateXor(Tmp1, Divisor); + Value *U_Dvsr = Builder.CreateSub(Tmp3, Tmp1); + Value *Q_Sgn = Builder.CreateXor(Tmp1, Tmp); + Value *Q_Mag = Builder.CreateUDiv(U_Dvnd, U_Dvsr); + Value *Tmp4 = Builder.CreateXor(Q_Mag, Q_Sgn); + Value *Q = Builder.CreateSub(Tmp4, Q_Sgn); + + if (Instruction *UDiv = dyn_cast<Instruction>(Q_Mag)) + Builder.SetInsertPoint(UDiv); + + return Q; +} + +/// Generates code to divide two unsigned scalar 32-bit or 64-bit integers. +/// Returns the quotient, rounded towards 0. Builder's insert point should +/// point where the caller wants code generated, e.g. at the udiv instruction. +static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor, + IRBuilder<> &Builder) { + // The basic algorithm can be found in the compiler-rt project's + // implementation of __udivsi3.c. Here, we do a lower-level IR based approach + // that's been hand-tuned to lessen the amount of control flow involved. + + // Some helper values + IntegerType *DivTy = cast<IntegerType>(Dividend->getType()); + unsigned BitWidth = DivTy->getBitWidth(); + + ConstantInt *Zero; + ConstantInt *One; + ConstantInt *NegOne; + ConstantInt *MSB; + + if (BitWidth == 64) { + Zero = Builder.getInt64(0); + One = Builder.getInt64(1); + NegOne = ConstantInt::getSigned(DivTy, -1); + MSB = Builder.getInt64(63); + } else { + assert(BitWidth == 32 && "Unexpected bit width"); + Zero = Builder.getInt32(0); + One = Builder.getInt32(1); + NegOne = ConstantInt::getSigned(DivTy, -1); + MSB = Builder.getInt32(31); + } + + ConstantInt *True = Builder.getTrue(); + + BasicBlock *IBB = Builder.GetInsertBlock(); + Function *F = IBB->getParent(); + Function *CTLZ = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, + DivTy); + + // Our CFG is going to look like: + // +---------------------+ + // | special-cases | + // | ... | + // +---------------------+ + // | | + // | +----------+ + // | | bb1 | + // | | ... | + // | +----------+ + // | | | + // | | +------------+ + // | | | preheader | + // | | | ... | + // | | +------------+ + // | | | + // | | | +---+ + // | | | | | + // | | +------------+ | + // | | | do-while | | + // | | | ... | | + // | | +------------+ | + // | | | | | + // | +-----------+ +---+ + // | | loop-exit | + // | | ... | + // | +-----------+ + // | | + // +-------+ + // | ... | + // | end | + // +-------+ + BasicBlock *SpecialCases = Builder.GetInsertBlock(); + SpecialCases->setName(Twine(SpecialCases->getName(), "_udiv-special-cases")); + BasicBlock *End = SpecialCases->splitBasicBlock(Builder.GetInsertPoint(), + "udiv-end"); + BasicBlock *LoopExit = BasicBlock::Create(Builder.getContext(), + "udiv-loop-exit", F, End); + BasicBlock *DoWhile = BasicBlock::Create(Builder.getContext(), + "udiv-do-while", F, End); + BasicBlock *Preheader = BasicBlock::Create(Builder.getContext(), + "udiv-preheader", F, End); + BasicBlock *BB1 = BasicBlock::Create(Builder.getContext(), + "udiv-bb1", F, End); + + // We'll be overwriting the terminator to insert our extra blocks + SpecialCases->getTerminator()->eraseFromParent(); + + // Same instructions are generated for both i32 (msb 31) and i64 (msb 63). + + // First off, check for special cases: dividend or divisor is zero, divisor + // is greater than dividend, and divisor is 1. + // ; special-cases: + // ; %ret0_1 = icmp eq i32 %divisor, 0 + // ; %ret0_2 = icmp eq i32 %dividend, 0 + // ; %ret0_3 = or i1 %ret0_1, %ret0_2 + // ; %tmp0 = tail call i32 @llvm.ctlz.i32(i32 %divisor, i1 true) + // ; %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %dividend, i1 true) + // ; %sr = sub nsw i32 %tmp0, %tmp1 + // ; %ret0_4 = icmp ugt i32 %sr, 31 + // ; %ret0 = or i1 %ret0_3, %ret0_4 + // ; %retDividend = icmp eq i32 %sr, 31 + // ; %retVal = select i1 %ret0, i32 0, i32 %dividend + // ; %earlyRet = or i1 %ret0, %retDividend + // ; br i1 %earlyRet, label %end, label %bb1 + Builder.SetInsertPoint(SpecialCases); + Value *Ret0_1 = Builder.CreateICmpEQ(Divisor, Zero); + Value *Ret0_2 = Builder.CreateICmpEQ(Dividend, Zero); + Value *Ret0_3 = Builder.CreateOr(Ret0_1, Ret0_2); + Value *Tmp0 = Builder.CreateCall(CTLZ, {Divisor, True}); + Value *Tmp1 = Builder.CreateCall(CTLZ, {Dividend, True}); + Value *SR = Builder.CreateSub(Tmp0, Tmp1); + Value *Ret0_4 = Builder.CreateICmpUGT(SR, MSB); + Value *Ret0 = Builder.CreateOr(Ret0_3, Ret0_4); + Value *RetDividend = Builder.CreateICmpEQ(SR, MSB); + Value *RetVal = Builder.CreateSelect(Ret0, Zero, Dividend); + Value *EarlyRet = Builder.CreateOr(Ret0, RetDividend); + Builder.CreateCondBr(EarlyRet, End, BB1); + + // ; bb1: ; preds = %special-cases + // ; %sr_1 = add i32 %sr, 1 + // ; %tmp2 = sub i32 31, %sr + // ; %q = shl i32 %dividend, %tmp2 + // ; %skipLoop = icmp eq i32 %sr_1, 0 + // ; br i1 %skipLoop, label %loop-exit, label %preheader + Builder.SetInsertPoint(BB1); + Value *SR_1 = Builder.CreateAdd(SR, One); + Value *Tmp2 = Builder.CreateSub(MSB, SR); + Value *Q = Builder.CreateShl(Dividend, Tmp2); + Value *SkipLoop = Builder.CreateICmpEQ(SR_1, Zero); + Builder.CreateCondBr(SkipLoop, LoopExit, Preheader); + + // ; preheader: ; preds = %bb1 + // ; %tmp3 = lshr i32 %dividend, %sr_1 + // ; %tmp4 = add i32 %divisor, -1 + // ; br label %do-while + Builder.SetInsertPoint(Preheader); + Value *Tmp3 = Builder.CreateLShr(Dividend, SR_1); + Value *Tmp4 = Builder.CreateAdd(Divisor, NegOne); + Builder.CreateBr(DoWhile); + + // ; do-while: ; preds = %do-while, %preheader + // ; %carry_1 = phi i32 [ 0, %preheader ], [ %carry, %do-while ] + // ; %sr_3 = phi i32 [ %sr_1, %preheader ], [ %sr_2, %do-while ] + // ; %r_1 = phi i32 [ %tmp3, %preheader ], [ %r, %do-while ] + // ; %q_2 = phi i32 [ %q, %preheader ], [ %q_1, %do-while ] + // ; %tmp5 = shl i32 %r_1, 1 + // ; %tmp6 = lshr i32 %q_2, 31 + // ; %tmp7 = or i32 %tmp5, %tmp6 + // ; %tmp8 = shl i32 %q_2, 1 + // ; %q_1 = or i32 %carry_1, %tmp8 + // ; %tmp9 = sub i32 %tmp4, %tmp7 + // ; %tmp10 = ashr i32 %tmp9, 31 + // ; %carry = and i32 %tmp10, 1 + // ; %tmp11 = and i32 %tmp10, %divisor + // ; %r = sub i32 %tmp7, %tmp11 + // ; %sr_2 = add i32 %sr_3, -1 + // ; %tmp12 = icmp eq i32 %sr_2, 0 + // ; br i1 %tmp12, label %loop-exit, label %do-while + Builder.SetInsertPoint(DoWhile); + PHINode *Carry_1 = Builder.CreatePHI(DivTy, 2); + PHINode *SR_3 = Builder.CreatePHI(DivTy, 2); + PHINode *R_1 = Builder.CreatePHI(DivTy, 2); + PHINode *Q_2 = Builder.CreatePHI(DivTy, 2); + Value *Tmp5 = Builder.CreateShl(R_1, One); + Value *Tmp6 = Builder.CreateLShr(Q_2, MSB); + Value *Tmp7 = Builder.CreateOr(Tmp5, Tmp6); + Value *Tmp8 = Builder.CreateShl(Q_2, One); + Value *Q_1 = Builder.CreateOr(Carry_1, Tmp8); + Value *Tmp9 = Builder.CreateSub(Tmp4, Tmp7); + Value *Tmp10 = Builder.CreateAShr(Tmp9, MSB); + Value *Carry = Builder.CreateAnd(Tmp10, One); + Value *Tmp11 = Builder.CreateAnd(Tmp10, Divisor); + Value *R = Builder.CreateSub(Tmp7, Tmp11); + Value *SR_2 = Builder.CreateAdd(SR_3, NegOne); + Value *Tmp12 = Builder.CreateICmpEQ(SR_2, Zero); + Builder.CreateCondBr(Tmp12, LoopExit, DoWhile); + + // ; loop-exit: ; preds = %do-while, %bb1 + // ; %carry_2 = phi i32 [ 0, %bb1 ], [ %carry, %do-while ] + // ; %q_3 = phi i32 [ %q, %bb1 ], [ %q_1, %do-while ] + // ; %tmp13 = shl i32 %q_3, 1 + // ; %q_4 = or i32 %carry_2, %tmp13 + // ; br label %end + Builder.SetInsertPoint(LoopExit); + PHINode *Carry_2 = Builder.CreatePHI(DivTy, 2); + PHINode *Q_3 = Builder.CreatePHI(DivTy, 2); + Value *Tmp13 = Builder.CreateShl(Q_3, One); + Value *Q_4 = Builder.CreateOr(Carry_2, Tmp13); + Builder.CreateBr(End); + + // ; end: ; preds = %loop-exit, %special-cases + // ; %q_5 = phi i32 [ %q_4, %loop-exit ], [ %retVal, %special-cases ] + // ; ret i32 %q_5 + Builder.SetInsertPoint(End, End->begin()); + PHINode *Q_5 = Builder.CreatePHI(DivTy, 2); + + // Populate the Phis, since all values have now been created. Our Phis were: + // ; %carry_1 = phi i32 [ 0, %preheader ], [ %carry, %do-while ] + Carry_1->addIncoming(Zero, Preheader); + Carry_1->addIncoming(Carry, DoWhile); + // ; %sr_3 = phi i32 [ %sr_1, %preheader ], [ %sr_2, %do-while ] + SR_3->addIncoming(SR_1, Preheader); + SR_3->addIncoming(SR_2, DoWhile); + // ; %r_1 = phi i32 [ %tmp3, %preheader ], [ %r, %do-while ] + R_1->addIncoming(Tmp3, Preheader); + R_1->addIncoming(R, DoWhile); + // ; %q_2 = phi i32 [ %q, %preheader ], [ %q_1, %do-while ] + Q_2->addIncoming(Q, Preheader); + Q_2->addIncoming(Q_1, DoWhile); + // ; %carry_2 = phi i32 [ 0, %bb1 ], [ %carry, %do-while ] + Carry_2->addIncoming(Zero, BB1); + Carry_2->addIncoming(Carry, DoWhile); + // ; %q_3 = phi i32 [ %q, %bb1 ], [ %q_1, %do-while ] + Q_3->addIncoming(Q, BB1); + Q_3->addIncoming(Q_1, DoWhile); + // ; %q_5 = phi i32 [ %q_4, %loop-exit ], [ %retVal, %special-cases ] + Q_5->addIncoming(Q_4, LoopExit); + Q_5->addIncoming(RetVal, SpecialCases); + + return Q_5; +} + +/// Generate code to calculate the remainder of two integers, replacing Rem with +/// the generated code. This currently generates code using the udiv expansion, +/// but future work includes generating more specialized code, e.g. when more +/// information about the operands are known. Implements both 32bit and 64bit +/// scalar division. +/// +/// @brief Replace Rem with generated code. +bool llvm::expandRemainder(BinaryOperator *Rem) { + assert((Rem->getOpcode() == Instruction::SRem || + Rem->getOpcode() == Instruction::URem) && + "Trying to expand remainder from a non-remainder function"); + + IRBuilder<> Builder(Rem); + + assert(!Rem->getType()->isVectorTy() && "Div over vectors not supported"); + assert((Rem->getType()->getIntegerBitWidth() == 32 || + Rem->getType()->getIntegerBitWidth() == 64) && + "Div of bitwidth other than 32 or 64 not supported"); + + // First prepare the sign if it's a signed remainder + if (Rem->getOpcode() == Instruction::SRem) { + Value *Remainder = generateSignedRemainderCode(Rem->getOperand(0), + Rem->getOperand(1), Builder); + + // Check whether this is the insert point while Rem is still valid. + bool IsInsertPoint = Rem->getIterator() == Builder.GetInsertPoint(); + Rem->replaceAllUsesWith(Remainder); + Rem->dropAllReferences(); + Rem->eraseFromParent(); + + // If we didn't actually generate an urem instruction, we're done + // This happens for example if the input were constant. In this case the + // Builder insertion point was unchanged + if (IsInsertPoint) + return true; + + BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint()); + Rem = BO; + } + + Value *Remainder = generatedUnsignedRemainderCode(Rem->getOperand(0), + Rem->getOperand(1), + Builder); + + Rem->replaceAllUsesWith(Remainder); + Rem->dropAllReferences(); + Rem->eraseFromParent(); + + // Expand the udiv + if (BinaryOperator *UDiv = dyn_cast<BinaryOperator>(Builder.GetInsertPoint())) { + assert(UDiv->getOpcode() == Instruction::UDiv && "Non-udiv in expansion?"); + expandDivision(UDiv); + } + + return true; +} + + +/// Generate code to divide two integers, replacing Div with the generated +/// code. This currently generates code similarly to compiler-rt's +/// implementations, but future work includes generating more specialized code +/// when more information about the operands are known. Implements both +/// 32bit and 64bit scalar division. +/// +/// @brief Replace Div with generated code. +bool llvm::expandDivision(BinaryOperator *Div) { + assert((Div->getOpcode() == Instruction::SDiv || + Div->getOpcode() == Instruction::UDiv) && + "Trying to expand division from a non-division function"); + + IRBuilder<> Builder(Div); + + assert(!Div->getType()->isVectorTy() && "Div over vectors not supported"); + assert((Div->getType()->getIntegerBitWidth() == 32 || + Div->getType()->getIntegerBitWidth() == 64) && + "Div of bitwidth other than 32 or 64 not supported"); + + // First prepare the sign if it's a signed division + if (Div->getOpcode() == Instruction::SDiv) { + // Lower the code to unsigned division, and reset Div to point to the udiv. + Value *Quotient = generateSignedDivisionCode(Div->getOperand(0), + Div->getOperand(1), Builder); + + // Check whether this is the insert point while Div is still valid. + bool IsInsertPoint = Div->getIterator() == Builder.GetInsertPoint(); + Div->replaceAllUsesWith(Quotient); + Div->dropAllReferences(); + Div->eraseFromParent(); + + // If we didn't actually generate an udiv instruction, we're done + // This happens for example if the input were constant. In this case the + // Builder insertion point was unchanged + if (IsInsertPoint) + return true; + + BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint()); + Div = BO; + } + + // Insert the unsigned division code + Value *Quotient = generateUnsignedDivisionCode(Div->getOperand(0), + Div->getOperand(1), + Builder); + Div->replaceAllUsesWith(Quotient); + Div->dropAllReferences(); + Div->eraseFromParent(); + + return true; +} + +/// Generate code to compute the remainder of two integers of bitwidth up to +/// 32 bits. Uses the above routines and extends the inputs/truncates the +/// outputs to operate in 32 bits; that is, these routines are good for targets +/// that have no or very little suppport for smaller than 32 bit integer +/// arithmetic. +/// +/// @brief Replace Rem with emulation code. +bool llvm::expandRemainderUpTo32Bits(BinaryOperator *Rem) { + assert((Rem->getOpcode() == Instruction::SRem || + Rem->getOpcode() == Instruction::URem) && + "Trying to expand remainder from a non-remainder function"); + + Type *RemTy = Rem->getType(); + assert(!RemTy->isVectorTy() && "Div over vectors not supported"); + + unsigned RemTyBitWidth = RemTy->getIntegerBitWidth(); + + assert(RemTyBitWidth <= 32 && + "Div of bitwidth greater than 32 not supported"); + + if (RemTyBitWidth == 32) + return expandRemainder(Rem); + + // If bitwidth smaller than 32 extend inputs, extend output and proceed + // with 32 bit division. + IRBuilder<> Builder(Rem); + + Value *ExtDividend; + Value *ExtDivisor; + Value *ExtRem; + Value *Trunc; + Type *Int32Ty = Builder.getInt32Ty(); + + if (Rem->getOpcode() == Instruction::SRem) { + ExtDividend = Builder.CreateSExt(Rem->getOperand(0), Int32Ty); + ExtDivisor = Builder.CreateSExt(Rem->getOperand(1), Int32Ty); + ExtRem = Builder.CreateSRem(ExtDividend, ExtDivisor); + } else { + ExtDividend = Builder.CreateZExt(Rem->getOperand(0), Int32Ty); + ExtDivisor = Builder.CreateZExt(Rem->getOperand(1), Int32Ty); + ExtRem = Builder.CreateURem(ExtDividend, ExtDivisor); + } + Trunc = Builder.CreateTrunc(ExtRem, RemTy); + + Rem->replaceAllUsesWith(Trunc); + Rem->dropAllReferences(); + Rem->eraseFromParent(); + + return expandRemainder(cast<BinaryOperator>(ExtRem)); +} + +/// Generate code to compute the remainder of two integers of bitwidth up to +/// 64 bits. Uses the above routines and extends the inputs/truncates the +/// outputs to operate in 64 bits. +/// +/// @brief Replace Rem with emulation code. +bool llvm::expandRemainderUpTo64Bits(BinaryOperator *Rem) { + assert((Rem->getOpcode() == Instruction::SRem || + Rem->getOpcode() == Instruction::URem) && + "Trying to expand remainder from a non-remainder function"); + + Type *RemTy = Rem->getType(); + assert(!RemTy->isVectorTy() && "Div over vectors not supported"); + + unsigned RemTyBitWidth = RemTy->getIntegerBitWidth(); + + assert(RemTyBitWidth <= 64 && "Div of bitwidth greater than 64 not supported"); + + if (RemTyBitWidth == 64) + return expandRemainder(Rem); + + // If bitwidth smaller than 64 extend inputs, extend output and proceed + // with 64 bit division. + IRBuilder<> Builder(Rem); + + Value *ExtDividend; + Value *ExtDivisor; + Value *ExtRem; + Value *Trunc; + Type *Int64Ty = Builder.getInt64Ty(); + + if (Rem->getOpcode() == Instruction::SRem) { + ExtDividend = Builder.CreateSExt(Rem->getOperand(0), Int64Ty); + ExtDivisor = Builder.CreateSExt(Rem->getOperand(1), Int64Ty); + ExtRem = Builder.CreateSRem(ExtDividend, ExtDivisor); + } else { + ExtDividend = Builder.CreateZExt(Rem->getOperand(0), Int64Ty); + ExtDivisor = Builder.CreateZExt(Rem->getOperand(1), Int64Ty); + ExtRem = Builder.CreateURem(ExtDividend, ExtDivisor); + } + Trunc = Builder.CreateTrunc(ExtRem, RemTy); + + Rem->replaceAllUsesWith(Trunc); + Rem->dropAllReferences(); + Rem->eraseFromParent(); + + return expandRemainder(cast<BinaryOperator>(ExtRem)); +} + +/// Generate code to divide two integers of bitwidth up to 32 bits. Uses the +/// above routines and extends the inputs/truncates the outputs to operate +/// in 32 bits; that is, these routines are good for targets that have no +/// or very little support for smaller than 32 bit integer arithmetic. +/// +/// @brief Replace Div with emulation code. +bool llvm::expandDivisionUpTo32Bits(BinaryOperator *Div) { + assert((Div->getOpcode() == Instruction::SDiv || + Div->getOpcode() == Instruction::UDiv) && + "Trying to expand division from a non-division function"); + + Type *DivTy = Div->getType(); + assert(!DivTy->isVectorTy() && "Div over vectors not supported"); + + unsigned DivTyBitWidth = DivTy->getIntegerBitWidth(); + + assert(DivTyBitWidth <= 32 && "Div of bitwidth greater than 32 not supported"); + + if (DivTyBitWidth == 32) + return expandDivision(Div); + + // If bitwidth smaller than 32 extend inputs, extend output and proceed + // with 32 bit division. + IRBuilder<> Builder(Div); + + Value *ExtDividend; + Value *ExtDivisor; + Value *ExtDiv; + Value *Trunc; + Type *Int32Ty = Builder.getInt32Ty(); + + if (Div->getOpcode() == Instruction::SDiv) { + ExtDividend = Builder.CreateSExt(Div->getOperand(0), Int32Ty); + ExtDivisor = Builder.CreateSExt(Div->getOperand(1), Int32Ty); + ExtDiv = Builder.CreateSDiv(ExtDividend, ExtDivisor); + } else { + ExtDividend = Builder.CreateZExt(Div->getOperand(0), Int32Ty); + ExtDivisor = Builder.CreateZExt(Div->getOperand(1), Int32Ty); + ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor); + } + Trunc = Builder.CreateTrunc(ExtDiv, DivTy); + + Div->replaceAllUsesWith(Trunc); + Div->dropAllReferences(); + Div->eraseFromParent(); + + return expandDivision(cast<BinaryOperator>(ExtDiv)); +} + +/// Generate code to divide two integers of bitwidth up to 64 bits. Uses the +/// above routines and extends the inputs/truncates the outputs to operate +/// in 64 bits. +/// +/// @brief Replace Div with emulation code. +bool llvm::expandDivisionUpTo64Bits(BinaryOperator *Div) { + assert((Div->getOpcode() == Instruction::SDiv || + Div->getOpcode() == Instruction::UDiv) && + "Trying to expand division from a non-division function"); + + Type *DivTy = Div->getType(); + assert(!DivTy->isVectorTy() && "Div over vectors not supported"); + + unsigned DivTyBitWidth = DivTy->getIntegerBitWidth(); + + assert(DivTyBitWidth <= 64 && + "Div of bitwidth greater than 64 not supported"); + + if (DivTyBitWidth == 64) + return expandDivision(Div); + + // If bitwidth smaller than 64 extend inputs, extend output and proceed + // with 64 bit division. + IRBuilder<> Builder(Div); + + Value *ExtDividend; + Value *ExtDivisor; + Value *ExtDiv; + Value *Trunc; + Type *Int64Ty = Builder.getInt64Ty(); + + if (Div->getOpcode() == Instruction::SDiv) { + ExtDividend = Builder.CreateSExt(Div->getOperand(0), Int64Ty); + ExtDivisor = Builder.CreateSExt(Div->getOperand(1), Int64Ty); + ExtDiv = Builder.CreateSDiv(ExtDividend, ExtDivisor); + } else { + ExtDividend = Builder.CreateZExt(Div->getOperand(0), Int64Ty); + ExtDivisor = Builder.CreateZExt(Div->getOperand(1), Int64Ty); + ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor); + } + Trunc = Builder.CreateTrunc(ExtDiv, DivTy); + + Div->replaceAllUsesWith(Trunc); + Div->dropAllReferences(); + Div->eraseFromParent(); + + return expandDivision(cast<BinaryOperator>(ExtDiv)); +} diff --git a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp new file mode 100644 index 000000000000..68c6b74d5e5b --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp @@ -0,0 +1,405 @@ +//===-- LCSSA.cpp - Convert loops into loop-closed SSA form ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass transforms loops by placing phi nodes at the end of the loops for +// all values that are live across the loop boundary. For example, it turns +// the left into the right code: +// +// for (...) for (...) +// if (c) if (c) +// X1 = ... X1 = ... +// else else +// X2 = ... X2 = ... +// X3 = phi(X1, X2) X3 = phi(X1, X2) +// ... = X3 + 4 X4 = phi(X3) +// ... = X4 + 4 +// +// This is still valid LLVM; the extra phi nodes are purely redundant, and will +// be trivially eliminated by InstCombine. The major benefit of this +// transformation is that it makes many other loop optimizations, such as +// LoopUnswitching, simpler. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/LCSSA.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/PredIteratorCache.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" +using namespace llvm; + +#define DEBUG_TYPE "lcssa" + +STATISTIC(NumLCSSA, "Number of live out of a loop variables"); + +#ifdef EXPENSIVE_CHECKS +static bool VerifyLoopLCSSA = true; +#else +static bool VerifyLoopLCSSA = false; +#endif +static cl::opt<bool,true> +VerifyLoopLCSSAFlag("verify-loop-lcssa", cl::location(VerifyLoopLCSSA), + cl::desc("Verify loop lcssa form (time consuming)")); + +/// Return true if the specified block is in the list. +static bool isExitBlock(BasicBlock *BB, + const SmallVectorImpl<BasicBlock *> &ExitBlocks) { + return is_contained(ExitBlocks, BB); +} + +/// For every instruction from the worklist, check to see if it has any uses +/// that are outside the current loop. If so, insert LCSSA PHI nodes and +/// rewrite the uses. +bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, + DominatorTree &DT, LoopInfo &LI) { + SmallVector<Use *, 16> UsesToRewrite; + SmallSetVector<PHINode *, 16> PHIsToRemove; + PredIteratorCache PredCache; + bool Changed = false; + + // Cache the Loop ExitBlocks across this loop. We expect to get a lot of + // instructions within the same loops, computing the exit blocks is + // expensive, and we're not mutating the loop structure. + SmallDenseMap<Loop*, SmallVector<BasicBlock *,1>> LoopExitBlocks; + + while (!Worklist.empty()) { + UsesToRewrite.clear(); + + Instruction *I = Worklist.pop_back_val(); + BasicBlock *InstBB = I->getParent(); + Loop *L = LI.getLoopFor(InstBB); + if (!LoopExitBlocks.count(L)) + L->getExitBlocks(LoopExitBlocks[L]); + assert(LoopExitBlocks.count(L)); + const SmallVectorImpl<BasicBlock *> &ExitBlocks = LoopExitBlocks[L]; + + if (ExitBlocks.empty()) + continue; + + // Tokens cannot be used in PHI nodes, so we skip over them. + // We can run into tokens which are live out of a loop with catchswitch + // instructions in Windows EH if the catchswitch has one catchpad which + // is inside the loop and another which is not. + if (I->getType()->isTokenTy()) + continue; + + for (Use &U : I->uses()) { + Instruction *User = cast<Instruction>(U.getUser()); + BasicBlock *UserBB = User->getParent(); + if (PHINode *PN = dyn_cast<PHINode>(User)) + UserBB = PN->getIncomingBlock(U); + + if (InstBB != UserBB && !L->contains(UserBB)) + UsesToRewrite.push_back(&U); + } + + // If there are no uses outside the loop, exit with no change. + if (UsesToRewrite.empty()) + continue; + + ++NumLCSSA; // We are applying the transformation + + // Invoke instructions are special in that their result value is not + // available along their unwind edge. The code below tests to see whether + // DomBB dominates the value, so adjust DomBB to the normal destination + // block, which is effectively where the value is first usable. + BasicBlock *DomBB = InstBB; + if (InvokeInst *Inv = dyn_cast<InvokeInst>(I)) + DomBB = Inv->getNormalDest(); + + DomTreeNode *DomNode = DT.getNode(DomBB); + + SmallVector<PHINode *, 16> AddedPHIs; + SmallVector<PHINode *, 8> PostProcessPHIs; + + SmallVector<PHINode *, 4> InsertedPHIs; + SSAUpdater SSAUpdate(&InsertedPHIs); + SSAUpdate.Initialize(I->getType(), I->getName()); + + // Insert the LCSSA phi's into all of the exit blocks dominated by the + // value, and add them to the Phi's map. + for (BasicBlock *ExitBB : ExitBlocks) { + if (!DT.dominates(DomNode, DT.getNode(ExitBB))) + continue; + + // If we already inserted something for this BB, don't reprocess it. + if (SSAUpdate.HasValueForBlock(ExitBB)) + continue; + + PHINode *PN = PHINode::Create(I->getType(), PredCache.size(ExitBB), + I->getName() + ".lcssa", &ExitBB->front()); + + // Add inputs from inside the loop for this PHI. + for (BasicBlock *Pred : PredCache.get(ExitBB)) { + PN->addIncoming(I, Pred); + + // If the exit block has a predecessor not within the loop, arrange for + // the incoming value use corresponding to that predecessor to be + // rewritten in terms of a different LCSSA PHI. + if (!L->contains(Pred)) + UsesToRewrite.push_back( + &PN->getOperandUse(PN->getOperandNumForIncomingValue( + PN->getNumIncomingValues() - 1))); + } + + AddedPHIs.push_back(PN); + + // Remember that this phi makes the value alive in this block. + SSAUpdate.AddAvailableValue(ExitBB, PN); + + // LoopSimplify might fail to simplify some loops (e.g. when indirect + // branches are involved). In such situations, it might happen that an + // exit for Loop L1 is the header of a disjoint Loop L2. Thus, when we + // create PHIs in such an exit block, we are also inserting PHIs into L2's + // header. This could break LCSSA form for L2 because these inserted PHIs + // can also have uses outside of L2. Remember all PHIs in such situation + // as to revisit than later on. FIXME: Remove this if indirectbr support + // into LoopSimplify gets improved. + if (auto *OtherLoop = LI.getLoopFor(ExitBB)) + if (!L->contains(OtherLoop)) + PostProcessPHIs.push_back(PN); + } + + // Rewrite all uses outside the loop in terms of the new PHIs we just + // inserted. + for (Use *UseToRewrite : UsesToRewrite) { + // If this use is in an exit block, rewrite to use the newly inserted PHI. + // This is required for correctness because SSAUpdate doesn't handle uses + // in the same block. It assumes the PHI we inserted is at the end of the + // block. + Instruction *User = cast<Instruction>(UseToRewrite->getUser()); + BasicBlock *UserBB = User->getParent(); + if (PHINode *PN = dyn_cast<PHINode>(User)) + UserBB = PN->getIncomingBlock(*UseToRewrite); + + if (isa<PHINode>(UserBB->begin()) && isExitBlock(UserBB, ExitBlocks)) { + // Tell the VHs that the uses changed. This updates SCEV's caches. + if (UseToRewrite->get()->hasValueHandle()) + ValueHandleBase::ValueIsRAUWd(*UseToRewrite, &UserBB->front()); + UseToRewrite->set(&UserBB->front()); + continue; + } + + // Otherwise, do full PHI insertion. + SSAUpdate.RewriteUse(*UseToRewrite); + } + + // SSAUpdater might have inserted phi-nodes inside other loops. We'll need + // to post-process them to keep LCSSA form. + for (PHINode *InsertedPN : InsertedPHIs) { + if (auto *OtherLoop = LI.getLoopFor(InsertedPN->getParent())) + if (!L->contains(OtherLoop)) + PostProcessPHIs.push_back(InsertedPN); + } + + // Post process PHI instructions that were inserted into another disjoint + // loop and update their exits properly. + for (auto *PostProcessPN : PostProcessPHIs) { + if (PostProcessPN->use_empty()) + continue; + + // Reprocess each PHI instruction. + Worklist.push_back(PostProcessPN); + } + + // Keep track of PHI nodes that we want to remove because they did not have + // any uses rewritten. + for (PHINode *PN : AddedPHIs) + if (PN->use_empty()) + PHIsToRemove.insert(PN); + + Changed = true; + } + // Remove PHI nodes that did not have any uses rewritten. + for (PHINode *PN : PHIsToRemove) { + assert (PN->use_empty() && "Trying to remove a phi with uses."); + PN->eraseFromParent(); + } + return Changed; +} + +/// Return true if the specified block dominates at least +/// one of the blocks in the specified list. +static bool +blockDominatesAnExit(BasicBlock *BB, + DominatorTree &DT, + const SmallVectorImpl<BasicBlock *> &ExitBlocks) { + DomTreeNode *DomNode = DT.getNode(BB); + return any_of(ExitBlocks, [&](BasicBlock *EB) { + return DT.dominates(DomNode, DT.getNode(EB)); + }); +} + +bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI, + ScalarEvolution *SE) { + bool Changed = false; + + // Get the set of exiting blocks. + SmallVector<BasicBlock *, 8> ExitBlocks; + L.getExitBlocks(ExitBlocks); + + if (ExitBlocks.empty()) + return false; + + SmallVector<Instruction *, 8> Worklist; + + // Look at all the instructions in the loop, checking to see if they have uses + // outside the loop. If so, put them into the worklist to rewrite those uses. + for (BasicBlock *BB : L.blocks()) { + // For large loops, avoid use-scanning by using dominance information: In + // particular, if a block does not dominate any of the loop exits, then none + // of the values defined in the block could be used outside the loop. + if (!blockDominatesAnExit(BB, DT, ExitBlocks)) + continue; + + for (Instruction &I : *BB) { + // Reject two common cases fast: instructions with no uses (like stores) + // and instructions with one use that is in the same block as this. + if (I.use_empty() || + (I.hasOneUse() && I.user_back()->getParent() == BB && + !isa<PHINode>(I.user_back()))) + continue; + + Worklist.push_back(&I); + } + } + Changed = formLCSSAForInstructions(Worklist, DT, *LI); + + // If we modified the code, remove any caches about the loop from SCEV to + // avoid dangling entries. + // FIXME: This is a big hammer, can we clear the cache more selectively? + if (SE && Changed) + SE->forgetLoop(&L); + + assert(L.isLCSSAForm(DT)); + + return Changed; +} + +/// Process a loop nest depth first. +bool llvm::formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI, + ScalarEvolution *SE) { + bool Changed = false; + + // Recurse depth-first through inner loops. + for (Loop *SubLoop : L.getSubLoops()) + Changed |= formLCSSARecursively(*SubLoop, DT, LI, SE); + + Changed |= formLCSSA(L, DT, LI, SE); + return Changed; +} + +/// Process all loops in the function, inner-most out. +static bool formLCSSAOnAllLoops(LoopInfo *LI, DominatorTree &DT, + ScalarEvolution *SE) { + bool Changed = false; + for (auto &L : *LI) + Changed |= formLCSSARecursively(*L, DT, LI, SE); + return Changed; +} + +namespace { +struct LCSSAWrapperPass : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + LCSSAWrapperPass() : FunctionPass(ID) { + initializeLCSSAWrapperPassPass(*PassRegistry::getPassRegistry()); + } + + // Cached analysis information for the current function. + DominatorTree *DT; + LoopInfo *LI; + ScalarEvolution *SE; + + bool runOnFunction(Function &F) override; + void verifyAnalysis() const override { + // This check is very expensive. On the loop intensive compiles it may cause + // up to 10x slowdown. Currently it's disabled by default. LPPassManager + // always does limited form of the LCSSA verification. Similar reasoning + // was used for the LoopInfo verifier. + if (VerifyLoopLCSSA) { + assert(all_of(*LI, + [&](Loop *L) { + return L->isRecursivelyLCSSAForm(*DT, *LI); + }) && + "LCSSA form is broken!"); + } + }; + + /// This transformation requires natural loop information & requires that + /// loop preheaders be inserted into the CFG. It maintains both of these, + /// as well as the CFG. It also requires dominator information. + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<LoopInfoWrapperPass>(); + AU.addPreservedID(LoopSimplifyID); + AU.addPreserved<AAResultsWrapperPass>(); + AU.addPreserved<BasicAAWrapperPass>(); + AU.addPreserved<GlobalsAAWrapperPass>(); + AU.addPreserved<ScalarEvolutionWrapperPass>(); + AU.addPreserved<SCEVAAWrapperPass>(); + + // This is needed to perform LCSSA verification inside LPPassManager + AU.addRequired<LCSSAVerificationPass>(); + AU.addPreserved<LCSSAVerificationPass>(); + } +}; +} + +char LCSSAWrapperPass::ID = 0; +INITIALIZE_PASS_BEGIN(LCSSAWrapperPass, "lcssa", "Loop-Closed SSA Form Pass", + false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LCSSAVerificationPass) +INITIALIZE_PASS_END(LCSSAWrapperPass, "lcssa", "Loop-Closed SSA Form Pass", + false, false) + +Pass *llvm::createLCSSAPass() { return new LCSSAWrapperPass(); } +char &llvm::LCSSAID = LCSSAWrapperPass::ID; + +/// Transform \p F into loop-closed SSA form. +bool LCSSAWrapperPass::runOnFunction(Function &F) { + LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>(); + SE = SEWP ? &SEWP->getSE() : nullptr; + + return formLCSSAOnAllLoops(LI, *DT, SE); +} + +PreservedAnalyses LCSSAPass::run(Function &F, FunctionAnalysisManager &AM) { + auto &LI = AM.getResult<LoopAnalysis>(F); + auto &DT = AM.getResult<DominatorTreeAnalysis>(F); + auto *SE = AM.getCachedResult<ScalarEvolutionAnalysis>(F); + if (!formLCSSAOnAllLoops(&LI, DT, SE)) + return PreservedAnalyses::all(); + + // FIXME: This should also 'preserve the CFG'. + PreservedAnalyses PA; + PA.preserve<BasicAA>(); + PA.preserve<GlobalsAA>(); + PA.preserve<SCEVAA>(); + PA.preserve<ScalarEvolutionAnalysis>(); + return PA; +} diff --git a/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp new file mode 100644 index 000000000000..d97cd7582eaa --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp @@ -0,0 +1,571 @@ +//===-- LibCallsShrinkWrap.cpp ----------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass shrink-wraps a call to function if the result is not used. +// The call can set errno but is otherwise side effect free. For example: +// sqrt(val); +// is transformed to +// if (val < 0) +// sqrt(val); +// Even if the result of library call is not being used, the compiler cannot +// safely delete the call because the function can set errno on error +// conditions. +// Note in many functions, the error condition solely depends on the incoming +// parameter. In this optimization, we can generate the condition can lead to +// the errno to shrink-wrap the call. Since the chances of hitting the error +// condition is low, the runtime call is effectively eliminated. +// +// These partially dead calls are usually results of C++ abstraction penalty +// exposed by inlining. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/LibCallsShrinkWrap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +using namespace llvm; + +#define DEBUG_TYPE "libcalls-shrinkwrap" + +STATISTIC(NumWrappedOneCond, "Number of One-Condition Wrappers Inserted"); +STATISTIC(NumWrappedTwoCond, "Number of Two-Condition Wrappers Inserted"); + +static cl::opt<bool> LibCallsShrinkWrapDoDomainError( + "libcalls-shrinkwrap-domain-error", cl::init(true), cl::Hidden, + cl::desc("Perform shrink-wrap on lib calls with domain errors")); +static cl::opt<bool> LibCallsShrinkWrapDoRangeError( + "libcalls-shrinkwrap-range-error", cl::init(true), cl::Hidden, + cl::desc("Perform shrink-wrap on lib calls with range errors")); +static cl::opt<bool> LibCallsShrinkWrapDoPoleError( + "libcalls-shrinkwrap-pole-error", cl::init(true), cl::Hidden, + cl::desc("Perform shrink-wrap on lib calls with pole errors")); + +namespace { +class LibCallsShrinkWrapLegacyPass : public FunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + explicit LibCallsShrinkWrapLegacyPass() : FunctionPass(ID) { + initializeLibCallsShrinkWrapLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnFunction(Function &F) override; +}; +} + +char LibCallsShrinkWrapLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(LibCallsShrinkWrapLegacyPass, "libcalls-shrinkwrap", + "Conditionally eliminate dead library calls", false, + false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(LibCallsShrinkWrapLegacyPass, "libcalls-shrinkwrap", + "Conditionally eliminate dead library calls", false, false) + +namespace { +class LibCallsShrinkWrap : public InstVisitor<LibCallsShrinkWrap> { +public: + LibCallsShrinkWrap(const TargetLibraryInfo &TLI) : TLI(TLI), Changed(false){}; + bool isChanged() const { return Changed; } + void visitCallInst(CallInst &CI) { checkCandidate(CI); } + void perform() { + for (auto &CI : WorkList) { + DEBUG(dbgs() << "CDCE calls: " << CI->getCalledFunction()->getName() + << "\n"); + if (perform(CI)) { + Changed = true; + DEBUG(dbgs() << "Transformed\n"); + } + } + } + +private: + bool perform(CallInst *CI); + void checkCandidate(CallInst &CI); + void shrinkWrapCI(CallInst *CI, Value *Cond); + bool performCallDomainErrorOnly(CallInst *CI, const LibFunc::Func &Func); + bool performCallErrors(CallInst *CI, const LibFunc::Func &Func); + bool performCallRangeErrorOnly(CallInst *CI, const LibFunc::Func &Func); + Value *generateOneRangeCond(CallInst *CI, const LibFunc::Func &Func); + Value *generateTwoRangeCond(CallInst *CI, const LibFunc::Func &Func); + Value *generateCondForPow(CallInst *CI, const LibFunc::Func &Func); + + // Create an OR of two conditions. + Value *createOrCond(CallInst *CI, CmpInst::Predicate Cmp, float Val, + CmpInst::Predicate Cmp2, float Val2) { + IRBuilder<> BBBuilder(CI); + Value *Arg = CI->getArgOperand(0); + auto Cond2 = createCond(BBBuilder, Arg, Cmp2, Val2); + auto Cond1 = createCond(BBBuilder, Arg, Cmp, Val); + return BBBuilder.CreateOr(Cond1, Cond2); + } + + // Create a single condition using IRBuilder. + Value *createCond(IRBuilder<> &BBBuilder, Value *Arg, CmpInst::Predicate Cmp, + float Val) { + Constant *V = ConstantFP::get(BBBuilder.getContext(), APFloat(Val)); + if (!Arg->getType()->isFloatTy()) + V = ConstantExpr::getFPExtend(V, Arg->getType()); + return BBBuilder.CreateFCmp(Cmp, Arg, V); + } + + // Create a single condition. + Value *createCond(CallInst *CI, CmpInst::Predicate Cmp, float Val) { + IRBuilder<> BBBuilder(CI); + Value *Arg = CI->getArgOperand(0); + return createCond(BBBuilder, Arg, Cmp, Val); + } + + const TargetLibraryInfo &TLI; + SmallVector<CallInst *, 16> WorkList; + bool Changed; +}; +} // end anonymous namespace + +// Perform the transformation to calls with errno set by domain error. +bool LibCallsShrinkWrap::performCallDomainErrorOnly(CallInst *CI, + const LibFunc::Func &Func) { + Value *Cond = nullptr; + + switch (Func) { + case LibFunc::acos: // DomainError: (x < -1 || x > 1) + case LibFunc::acosf: // Same as acos + case LibFunc::acosl: // Same as acos + case LibFunc::asin: // DomainError: (x < -1 || x > 1) + case LibFunc::asinf: // Same as asin + case LibFunc::asinl: // Same as asin + { + ++NumWrappedTwoCond; + Cond = createOrCond(CI, CmpInst::FCMP_OLT, -1.0f, CmpInst::FCMP_OGT, 1.0f); + break; + } + case LibFunc::cos: // DomainError: (x == +inf || x == -inf) + case LibFunc::cosf: // Same as cos + case LibFunc::cosl: // Same as cos + case LibFunc::sin: // DomainError: (x == +inf || x == -inf) + case LibFunc::sinf: // Same as sin + case LibFunc::sinl: // Same as sin + { + ++NumWrappedTwoCond; + Cond = createOrCond(CI, CmpInst::FCMP_OEQ, INFINITY, CmpInst::FCMP_OEQ, + -INFINITY); + break; + } + case LibFunc::acosh: // DomainError: (x < 1) + case LibFunc::acoshf: // Same as acosh + case LibFunc::acoshl: // Same as acosh + { + ++NumWrappedOneCond; + Cond = createCond(CI, CmpInst::FCMP_OLT, 1.0f); + break; + } + case LibFunc::sqrt: // DomainError: (x < 0) + case LibFunc::sqrtf: // Same as sqrt + case LibFunc::sqrtl: // Same as sqrt + { + ++NumWrappedOneCond; + Cond = createCond(CI, CmpInst::FCMP_OLT, 0.0f); + break; + } + default: + return false; + } + shrinkWrapCI(CI, Cond); + return true; +} + +// Perform the transformation to calls with errno set by range error. +bool LibCallsShrinkWrap::performCallRangeErrorOnly(CallInst *CI, + const LibFunc::Func &Func) { + Value *Cond = nullptr; + + switch (Func) { + case LibFunc::cosh: + case LibFunc::coshf: + case LibFunc::coshl: + case LibFunc::exp: + case LibFunc::expf: + case LibFunc::expl: + case LibFunc::exp10: + case LibFunc::exp10f: + case LibFunc::exp10l: + case LibFunc::exp2: + case LibFunc::exp2f: + case LibFunc::exp2l: + case LibFunc::sinh: + case LibFunc::sinhf: + case LibFunc::sinhl: { + Cond = generateTwoRangeCond(CI, Func); + break; + } + case LibFunc::expm1: // RangeError: (709, inf) + case LibFunc::expm1f: // RangeError: (88, inf) + case LibFunc::expm1l: // RangeError: (11356, inf) + { + Cond = generateOneRangeCond(CI, Func); + break; + } + default: + return false; + } + shrinkWrapCI(CI, Cond); + return true; +} + +// Perform the transformation to calls with errno set by combination of errors. +bool LibCallsShrinkWrap::performCallErrors(CallInst *CI, + const LibFunc::Func &Func) { + Value *Cond = nullptr; + + switch (Func) { + case LibFunc::atanh: // DomainError: (x < -1 || x > 1) + // PoleError: (x == -1 || x == 1) + // Overall Cond: (x <= -1 || x >= 1) + case LibFunc::atanhf: // Same as atanh + case LibFunc::atanhl: // Same as atanh + { + if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError) + return false; + ++NumWrappedTwoCond; + Cond = createOrCond(CI, CmpInst::FCMP_OLE, -1.0f, CmpInst::FCMP_OGE, 1.0f); + break; + } + case LibFunc::log: // DomainError: (x < 0) + // PoleError: (x == 0) + // Overall Cond: (x <= 0) + case LibFunc::logf: // Same as log + case LibFunc::logl: // Same as log + case LibFunc::log10: // Same as log + case LibFunc::log10f: // Same as log + case LibFunc::log10l: // Same as log + case LibFunc::log2: // Same as log + case LibFunc::log2f: // Same as log + case LibFunc::log2l: // Same as log + case LibFunc::logb: // Same as log + case LibFunc::logbf: // Same as log + case LibFunc::logbl: // Same as log + { + if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError) + return false; + ++NumWrappedOneCond; + Cond = createCond(CI, CmpInst::FCMP_OLE, 0.0f); + break; + } + case LibFunc::log1p: // DomainError: (x < -1) + // PoleError: (x == -1) + // Overall Cond: (x <= -1) + case LibFunc::log1pf: // Same as log1p + case LibFunc::log1pl: // Same as log1p + { + if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError) + return false; + ++NumWrappedOneCond; + Cond = createCond(CI, CmpInst::FCMP_OLE, -1.0f); + break; + } + case LibFunc::pow: // DomainError: x < 0 and y is noninteger + // PoleError: x == 0 and y < 0 + // RangeError: overflow or underflow + case LibFunc::powf: + case LibFunc::powl: { + if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError || + !LibCallsShrinkWrapDoRangeError) + return false; + Cond = generateCondForPow(CI, Func); + if (Cond == nullptr) + return false; + break; + } + default: + return false; + } + assert(Cond && "performCallErrors should not see an empty condition"); + shrinkWrapCI(CI, Cond); + return true; +} + +// Checks if CI is a candidate for shrinkwrapping and put it into work list if +// true. +void LibCallsShrinkWrap::checkCandidate(CallInst &CI) { + if (CI.isNoBuiltin()) + return; + // A possible improvement is to handle the calls with the return value being + // used. If there is API for fast libcall implementation without setting + // errno, we can use the same framework to direct/wrap the call to the fast + // API in the error free path, and leave the original call in the slow path. + if (!CI.use_empty()) + return; + + LibFunc::Func Func; + Function *Callee = CI.getCalledFunction(); + if (!Callee) + return; + if (!TLI.getLibFunc(*Callee, Func) || !TLI.has(Func)) + return; + + if (CI.getNumArgOperands() == 0) + return; + // TODO: Handle long double in other formats. + Type *ArgType = CI.getArgOperand(0)->getType(); + if (!(ArgType->isFloatTy() || ArgType->isDoubleTy() || + ArgType->isX86_FP80Ty())) + return; + + WorkList.push_back(&CI); +} + +// Generate the upper bound condition for RangeError. +Value *LibCallsShrinkWrap::generateOneRangeCond(CallInst *CI, + const LibFunc::Func &Func) { + float UpperBound; + switch (Func) { + case LibFunc::expm1: // RangeError: (709, inf) + UpperBound = 709.0f; + break; + case LibFunc::expm1f: // RangeError: (88, inf) + UpperBound = 88.0f; + break; + case LibFunc::expm1l: // RangeError: (11356, inf) + UpperBound = 11356.0f; + break; + default: + llvm_unreachable("Should be reach here"); + } + + ++NumWrappedOneCond; + return createCond(CI, CmpInst::FCMP_OGT, UpperBound); +} + +// Generate the lower and upper bound condition for RangeError. +Value *LibCallsShrinkWrap::generateTwoRangeCond(CallInst *CI, + const LibFunc::Func &Func) { + float UpperBound, LowerBound; + switch (Func) { + case LibFunc::cosh: // RangeError: (x < -710 || x > 710) + case LibFunc::sinh: // Same as cosh + LowerBound = -710.0f; + UpperBound = 710.0f; + break; + case LibFunc::coshf: // RangeError: (x < -89 || x > 89) + case LibFunc::sinhf: // Same as coshf + LowerBound = -89.0f; + UpperBound = 89.0f; + break; + case LibFunc::coshl: // RangeError: (x < -11357 || x > 11357) + case LibFunc::sinhl: // Same as coshl + LowerBound = -11357.0f; + UpperBound = 11357.0f; + break; + case LibFunc::exp: // RangeError: (x < -745 || x > 709) + LowerBound = -745.0f; + UpperBound = 709.0f; + break; + case LibFunc::expf: // RangeError: (x < -103 || x > 88) + LowerBound = -103.0f; + UpperBound = 88.0f; + break; + case LibFunc::expl: // RangeError: (x < -11399 || x > 11356) + LowerBound = -11399.0f; + UpperBound = 11356.0f; + break; + case LibFunc::exp10: // RangeError: (x < -323 || x > 308) + LowerBound = -323.0f; + UpperBound = 308.0f; + break; + case LibFunc::exp10f: // RangeError: (x < -45 || x > 38) + LowerBound = -45.0f; + UpperBound = 38.0f; + break; + case LibFunc::exp10l: // RangeError: (x < -4950 || x > 4932) + LowerBound = -4950.0f; + UpperBound = 4932.0f; + break; + case LibFunc::exp2: // RangeError: (x < -1074 || x > 1023) + LowerBound = -1074.0f; + UpperBound = 1023.0f; + break; + case LibFunc::exp2f: // RangeError: (x < -149 || x > 127) + LowerBound = -149.0f; + UpperBound = 127.0f; + break; + case LibFunc::exp2l: // RangeError: (x < -16445 || x > 11383) + LowerBound = -16445.0f; + UpperBound = 11383.0f; + break; + default: + llvm_unreachable("Should be reach here"); + } + + ++NumWrappedTwoCond; + return createOrCond(CI, CmpInst::FCMP_OGT, UpperBound, CmpInst::FCMP_OLT, + LowerBound); +} + +// For pow(x,y), We only handle the following cases: +// (1) x is a constant && (x >= 1) && (x < MaxUInt8) +// Cond is: (y > 127) +// (2) x is a value coming from an integer type. +// (2.1) if x's bit_size == 8 +// Cond: (x <= 0 || y > 128) +// (2.2) if x's bit_size is 16 +// Cond: (x <= 0 || y > 64) +// (2.3) if x's bit_size is 32 +// Cond: (x <= 0 || y > 32) +// Support for powl(x,y) and powf(x,y) are TBD. +// +// Note that condition can be more conservative than the actual condition +// (i.e. we might invoke the calls that will not set the errno.). +// +Value *LibCallsShrinkWrap::generateCondForPow(CallInst *CI, + const LibFunc::Func &Func) { + // FIXME: LibFunc::powf and powl TBD. + if (Func != LibFunc::pow) { + DEBUG(dbgs() << "Not handled powf() and powl()\n"); + return nullptr; + } + + Value *Base = CI->getArgOperand(0); + Value *Exp = CI->getArgOperand(1); + IRBuilder<> BBBuilder(CI); + + // Constant Base case. + if (ConstantFP *CF = dyn_cast<ConstantFP>(Base)) { + double D = CF->getValueAPF().convertToDouble(); + if (D < 1.0f || D > APInt::getMaxValue(8).getZExtValue()) { + DEBUG(dbgs() << "Not handled pow(): constant base out of range\n"); + return nullptr; + } + + ++NumWrappedOneCond; + Constant *V = ConstantFP::get(CI->getContext(), APFloat(127.0f)); + if (!Exp->getType()->isFloatTy()) + V = ConstantExpr::getFPExtend(V, Exp->getType()); + return BBBuilder.CreateFCmp(CmpInst::FCMP_OGT, Exp, V); + } + + // If the Base value coming from an integer type. + Instruction *I = dyn_cast<Instruction>(Base); + if (!I) { + DEBUG(dbgs() << "Not handled pow(): FP type base\n"); + return nullptr; + } + unsigned Opcode = I->getOpcode(); + if (Opcode == Instruction::UIToFP || Opcode == Instruction::SIToFP) { + unsigned BW = I->getOperand(0)->getType()->getPrimitiveSizeInBits(); + float UpperV = 0.0f; + if (BW == 8) + UpperV = 128.0f; + else if (BW == 16) + UpperV = 64.0f; + else if (BW == 32) + UpperV = 32.0f; + else { + DEBUG(dbgs() << "Not handled pow(): type too wide\n"); + return nullptr; + } + + ++NumWrappedTwoCond; + Constant *V = ConstantFP::get(CI->getContext(), APFloat(UpperV)); + Constant *V0 = ConstantFP::get(CI->getContext(), APFloat(0.0f)); + if (!Exp->getType()->isFloatTy()) + V = ConstantExpr::getFPExtend(V, Exp->getType()); + if (!Base->getType()->isFloatTy()) + V0 = ConstantExpr::getFPExtend(V0, Exp->getType()); + + Value *Cond = BBBuilder.CreateFCmp(CmpInst::FCMP_OGT, Exp, V); + Value *Cond0 = BBBuilder.CreateFCmp(CmpInst::FCMP_OLE, Base, V0); + return BBBuilder.CreateOr(Cond0, Cond); + } + DEBUG(dbgs() << "Not handled pow(): base not from integer convert\n"); + return nullptr; +} + +// Wrap conditions that can potentially generate errno to the library call. +void LibCallsShrinkWrap::shrinkWrapCI(CallInst *CI, Value *Cond) { + assert(Cond != nullptr && "hrinkWrapCI is not expecting an empty call inst"); + MDNode *BranchWeights = + MDBuilder(CI->getContext()).createBranchWeights(1, 2000); + TerminatorInst *NewInst = + SplitBlockAndInsertIfThen(Cond, CI, false, BranchWeights); + BasicBlock *CallBB = NewInst->getParent(); + CallBB->setName("cdce.call"); + CallBB->getSingleSuccessor()->setName("cdce.end"); + CI->removeFromParent(); + CallBB->getInstList().insert(CallBB->getFirstInsertionPt(), CI); + DEBUG(dbgs() << "== Basic Block After =="); + DEBUG(dbgs() << *CallBB->getSinglePredecessor() << *CallBB + << *CallBB->getSingleSuccessor() << "\n"); +} + +// Perform the transformation to a single candidate. +bool LibCallsShrinkWrap::perform(CallInst *CI) { + LibFunc::Func Func; + Function *Callee = CI->getCalledFunction(); + assert(Callee && "perform() should apply to a non-empty callee"); + TLI.getLibFunc(*Callee, Func); + assert(Func && "perform() is not expecting an empty function"); + + if (LibCallsShrinkWrapDoDomainError && performCallDomainErrorOnly(CI, Func)) + return true; + + if (LibCallsShrinkWrapDoRangeError && performCallRangeErrorOnly(CI, Func)) + return true; + + return performCallErrors(CI, Func); +} + +void LibCallsShrinkWrapLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved<GlobalsAAWrapperPass>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); +} + +static bool runImpl(Function &F, const TargetLibraryInfo &TLI) { + if (F.hasFnAttribute(Attribute::OptimizeForSize)) + return false; + LibCallsShrinkWrap CCDCE(TLI); + CCDCE.visit(F); + CCDCE.perform(); + return CCDCE.isChanged(); +} + +bool LibCallsShrinkWrapLegacyPass::runOnFunction(Function &F) { + auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); + return runImpl(F, TLI); +} + +namespace llvm { +char &LibCallsShrinkWrapPassID = LibCallsShrinkWrapLegacyPass::ID; + +// Public interface to LibCallsShrinkWrap pass. +FunctionPass *createLibCallsShrinkWrapPass() { + return new LibCallsShrinkWrapLegacyPass(); +} + +PreservedAnalyses LibCallsShrinkWrapPass::run(Function &F, + FunctionAnalysisManager &FAM) { + auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F); + bool Changed = runImpl(F, TLI); + if (!Changed) + return PreservedAnalyses::all(); + auto PA = PreservedAnalyses(); + PA.preserve<GlobalsAA>(); + return PA; +} +} diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp new file mode 100644 index 000000000000..6e4174aa0cda --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp @@ -0,0 +1,2076 @@ +//===-- Local.cpp - Functions to perform local transformations ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This family of functions perform various local transformations to the +// program. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/EHPersonalities.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/LazyValueInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DIBuilder.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; +using namespace llvm::PatternMatch; + +#define DEBUG_TYPE "local" + +STATISTIC(NumRemoved, "Number of unreachable basic blocks removed"); + +//===----------------------------------------------------------------------===// +// Local constant propagation. +// + +/// ConstantFoldTerminator - If a terminator instruction is predicated on a +/// constant value, convert it into an unconditional branch to the constant +/// destination. This is a nontrivial operation because the successors of this +/// basic block must have their PHI nodes updated. +/// Also calls RecursivelyDeleteTriviallyDeadInstructions() on any branch/switch +/// conditions and indirectbr addresses this might make dead if +/// DeleteDeadConditions is true. +bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, + const TargetLibraryInfo *TLI) { + TerminatorInst *T = BB->getTerminator(); + IRBuilder<> Builder(T); + + // Branch - See if we are conditional jumping on constant + if (BranchInst *BI = dyn_cast<BranchInst>(T)) { + if (BI->isUnconditional()) return false; // Can't optimize uncond branch + BasicBlock *Dest1 = BI->getSuccessor(0); + BasicBlock *Dest2 = BI->getSuccessor(1); + + if (ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition())) { + // Are we branching on constant? + // YES. Change to unconditional branch... + BasicBlock *Destination = Cond->getZExtValue() ? Dest1 : Dest2; + BasicBlock *OldDest = Cond->getZExtValue() ? Dest2 : Dest1; + + //cerr << "Function: " << T->getParent()->getParent() + // << "\nRemoving branch from " << T->getParent() + // << "\n\nTo: " << OldDest << endl; + + // Let the basic block know that we are letting go of it. Based on this, + // it will adjust it's PHI nodes. + OldDest->removePredecessor(BB); + + // Replace the conditional branch with an unconditional one. + Builder.CreateBr(Destination); + BI->eraseFromParent(); + return true; + } + + if (Dest2 == Dest1) { // Conditional branch to same location? + // This branch matches something like this: + // br bool %cond, label %Dest, label %Dest + // and changes it into: br label %Dest + + // Let the basic block know that we are letting go of one copy of it. + assert(BI->getParent() && "Terminator not inserted in block!"); + Dest1->removePredecessor(BI->getParent()); + + // Replace the conditional branch with an unconditional one. + Builder.CreateBr(Dest1); + Value *Cond = BI->getCondition(); + BI->eraseFromParent(); + if (DeleteDeadConditions) + RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI); + return true; + } + return false; + } + + if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) { + // If we are switching on a constant, we can convert the switch to an + // unconditional branch. + ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition()); + BasicBlock *DefaultDest = SI->getDefaultDest(); + BasicBlock *TheOnlyDest = DefaultDest; + + // If the default is unreachable, ignore it when searching for TheOnlyDest. + if (isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg()) && + SI->getNumCases() > 0) { + TheOnlyDest = SI->case_begin().getCaseSuccessor(); + } + + // Figure out which case it goes to. + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); + i != e; ++i) { + // Found case matching a constant operand? + if (i.getCaseValue() == CI) { + TheOnlyDest = i.getCaseSuccessor(); + break; + } + + // Check to see if this branch is going to the same place as the default + // dest. If so, eliminate it as an explicit compare. + if (i.getCaseSuccessor() == DefaultDest) { + MDNode *MD = SI->getMetadata(LLVMContext::MD_prof); + unsigned NCases = SI->getNumCases(); + // Fold the case metadata into the default if there will be any branches + // left, unless the metadata doesn't match the switch. + if (NCases > 1 && MD && MD->getNumOperands() == 2 + NCases) { + // Collect branch weights into a vector. + SmallVector<uint32_t, 8> Weights; + for (unsigned MD_i = 1, MD_e = MD->getNumOperands(); MD_i < MD_e; + ++MD_i) { + auto *CI = mdconst::extract<ConstantInt>(MD->getOperand(MD_i)); + Weights.push_back(CI->getValue().getZExtValue()); + } + // Merge weight of this case to the default weight. + unsigned idx = i.getCaseIndex(); + Weights[0] += Weights[idx+1]; + // Remove weight for this case. + std::swap(Weights[idx+1], Weights.back()); + Weights.pop_back(); + SI->setMetadata(LLVMContext::MD_prof, + MDBuilder(BB->getContext()). + createBranchWeights(Weights)); + } + // Remove this entry. + DefaultDest->removePredecessor(SI->getParent()); + SI->removeCase(i); + --i; --e; + continue; + } + + // Otherwise, check to see if the switch only branches to one destination. + // We do this by reseting "TheOnlyDest" to null when we find two non-equal + // destinations. + if (i.getCaseSuccessor() != TheOnlyDest) TheOnlyDest = nullptr; + } + + if (CI && !TheOnlyDest) { + // Branching on a constant, but not any of the cases, go to the default + // successor. + TheOnlyDest = SI->getDefaultDest(); + } + + // If we found a single destination that we can fold the switch into, do so + // now. + if (TheOnlyDest) { + // Insert the new branch. + Builder.CreateBr(TheOnlyDest); + BasicBlock *BB = SI->getParent(); + + // Remove entries from PHI nodes which we no longer branch to... + for (BasicBlock *Succ : SI->successors()) { + // Found case matching a constant operand? + if (Succ == TheOnlyDest) + TheOnlyDest = nullptr; // Don't modify the first branch to TheOnlyDest + else + Succ->removePredecessor(BB); + } + + // Delete the old switch. + Value *Cond = SI->getCondition(); + SI->eraseFromParent(); + if (DeleteDeadConditions) + RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI); + return true; + } + + if (SI->getNumCases() == 1) { + // Otherwise, we can fold this switch into a conditional branch + // instruction if it has only one non-default destination. + SwitchInst::CaseIt FirstCase = SI->case_begin(); + Value *Cond = Builder.CreateICmpEQ(SI->getCondition(), + FirstCase.getCaseValue(), "cond"); + + // Insert the new branch. + BranchInst *NewBr = Builder.CreateCondBr(Cond, + FirstCase.getCaseSuccessor(), + SI->getDefaultDest()); + MDNode *MD = SI->getMetadata(LLVMContext::MD_prof); + if (MD && MD->getNumOperands() == 3) { + ConstantInt *SICase = + mdconst::dyn_extract<ConstantInt>(MD->getOperand(2)); + ConstantInt *SIDef = + mdconst::dyn_extract<ConstantInt>(MD->getOperand(1)); + assert(SICase && SIDef); + // The TrueWeight should be the weight for the single case of SI. + NewBr->setMetadata(LLVMContext::MD_prof, + MDBuilder(BB->getContext()). + createBranchWeights(SICase->getValue().getZExtValue(), + SIDef->getValue().getZExtValue())); + } + + // Update make.implicit metadata to the newly-created conditional branch. + MDNode *MakeImplicitMD = SI->getMetadata(LLVMContext::MD_make_implicit); + if (MakeImplicitMD) + NewBr->setMetadata(LLVMContext::MD_make_implicit, MakeImplicitMD); + + // Delete the old switch. + SI->eraseFromParent(); + return true; + } + return false; + } + + if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(T)) { + // indirectbr blockaddress(@F, @BB) -> br label @BB + if (BlockAddress *BA = + dyn_cast<BlockAddress>(IBI->getAddress()->stripPointerCasts())) { + BasicBlock *TheOnlyDest = BA->getBasicBlock(); + // Insert the new branch. + Builder.CreateBr(TheOnlyDest); + + for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { + if (IBI->getDestination(i) == TheOnlyDest) + TheOnlyDest = nullptr; + else + IBI->getDestination(i)->removePredecessor(IBI->getParent()); + } + Value *Address = IBI->getAddress(); + IBI->eraseFromParent(); + if (DeleteDeadConditions) + RecursivelyDeleteTriviallyDeadInstructions(Address, TLI); + + // If we didn't find our destination in the IBI successor list, then we + // have undefined behavior. Replace the unconditional branch with an + // 'unreachable' instruction. + if (TheOnlyDest) { + BB->getTerminator()->eraseFromParent(); + new UnreachableInst(BB->getContext(), BB); + } + + return true; + } + } + + return false; +} + + +//===----------------------------------------------------------------------===// +// Local dead code elimination. +// + +/// isInstructionTriviallyDead - Return true if the result produced by the +/// instruction is not used, and the instruction has no side effects. +/// +bool llvm::isInstructionTriviallyDead(Instruction *I, + const TargetLibraryInfo *TLI) { + if (!I->use_empty() || isa<TerminatorInst>(I)) return false; + + // We don't want the landingpad-like instructions removed by anything this + // general. + if (I->isEHPad()) + return false; + + // We don't want debug info removed by anything this general, unless + // debug info is empty. + if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(I)) { + if (DDI->getAddress()) + return false; + return true; + } + if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(I)) { + if (DVI->getValue()) + return false; + return true; + } + + if (!I->mayHaveSideEffects()) return true; + + // Special case intrinsics that "may have side effects" but can be deleted + // when dead. + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + // Safe to delete llvm.stacksave if dead. + if (II->getIntrinsicID() == Intrinsic::stacksave) + return true; + + // Lifetime intrinsics are dead when their right-hand is undef. + if (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) + return isa<UndefValue>(II->getArgOperand(1)); + + // Assumptions are dead if their condition is trivially true. Guards on + // true are operationally no-ops. In the future we can consider more + // sophisticated tradeoffs for guards considering potential for check + // widening, but for now we keep things simple. + if (II->getIntrinsicID() == Intrinsic::assume || + II->getIntrinsicID() == Intrinsic::experimental_guard) { + if (ConstantInt *Cond = dyn_cast<ConstantInt>(II->getArgOperand(0))) + return !Cond->isZero(); + + return false; + } + } + + if (isAllocLikeFn(I, TLI)) return true; + + if (CallInst *CI = isFreeCall(I, TLI)) + if (Constant *C = dyn_cast<Constant>(CI->getArgOperand(0))) + return C->isNullValue() || isa<UndefValue>(C); + + if (CallSite CS = CallSite(I)) + if (isMathLibCallNoop(CS, TLI)) + return true; + + return false; +} + +/// RecursivelyDeleteTriviallyDeadInstructions - If the specified value is a +/// trivially dead instruction, delete it. If that makes any of its operands +/// trivially dead, delete them too, recursively. Return true if any +/// instructions were deleted. +bool +llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V, + const TargetLibraryInfo *TLI) { + Instruction *I = dyn_cast<Instruction>(V); + if (!I || !I->use_empty() || !isInstructionTriviallyDead(I, TLI)) + return false; + + SmallVector<Instruction*, 16> DeadInsts; + DeadInsts.push_back(I); + + do { + I = DeadInsts.pop_back_val(); + + // Null out all of the instruction's operands to see if any operand becomes + // dead as we go. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + Value *OpV = I->getOperand(i); + I->setOperand(i, nullptr); + + if (!OpV->use_empty()) continue; + + // If the operand is an instruction that became dead as we nulled out the + // operand, and if it is 'trivially' dead, delete it in a future loop + // iteration. + if (Instruction *OpI = dyn_cast<Instruction>(OpV)) + if (isInstructionTriviallyDead(OpI, TLI)) + DeadInsts.push_back(OpI); + } + + I->eraseFromParent(); + } while (!DeadInsts.empty()); + + return true; +} + +/// areAllUsesEqual - Check whether the uses of a value are all the same. +/// This is similar to Instruction::hasOneUse() except this will also return +/// true when there are no uses or multiple uses that all refer to the same +/// value. +static bool areAllUsesEqual(Instruction *I) { + Value::user_iterator UI = I->user_begin(); + Value::user_iterator UE = I->user_end(); + if (UI == UE) + return true; + + User *TheUse = *UI; + for (++UI; UI != UE; ++UI) { + if (*UI != TheUse) + return false; + } + return true; +} + +/// RecursivelyDeleteDeadPHINode - If the specified value is an effectively +/// dead PHI node, due to being a def-use chain of single-use nodes that +/// either forms a cycle or is terminated by a trivially dead instruction, +/// delete it. If that makes any of its operands trivially dead, delete them +/// too, recursively. Return true if a change was made. +bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN, + const TargetLibraryInfo *TLI) { + SmallPtrSet<Instruction*, 4> Visited; + for (Instruction *I = PN; areAllUsesEqual(I) && !I->mayHaveSideEffects(); + I = cast<Instruction>(*I->user_begin())) { + if (I->use_empty()) + return RecursivelyDeleteTriviallyDeadInstructions(I, TLI); + + // If we find an instruction more than once, we're on a cycle that + // won't prove fruitful. + if (!Visited.insert(I).second) { + // Break the cycle and delete the instruction and its operands. + I->replaceAllUsesWith(UndefValue::get(I->getType())); + (void)RecursivelyDeleteTriviallyDeadInstructions(I, TLI); + return true; + } + } + return false; +} + +static bool +simplifyAndDCEInstruction(Instruction *I, + SmallSetVector<Instruction *, 16> &WorkList, + const DataLayout &DL, + const TargetLibraryInfo *TLI) { + if (isInstructionTriviallyDead(I, TLI)) { + // Null out all of the instruction's operands to see if any operand becomes + // dead as we go. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + Value *OpV = I->getOperand(i); + I->setOperand(i, nullptr); + + if (!OpV->use_empty() || I == OpV) + continue; + + // If the operand is an instruction that became dead as we nulled out the + // operand, and if it is 'trivially' dead, delete it in a future loop + // iteration. + if (Instruction *OpI = dyn_cast<Instruction>(OpV)) + if (isInstructionTriviallyDead(OpI, TLI)) + WorkList.insert(OpI); + } + + I->eraseFromParent(); + + return true; + } + + if (Value *SimpleV = SimplifyInstruction(I, DL)) { + // Add the users to the worklist. CAREFUL: an instruction can use itself, + // in the case of a phi node. + for (User *U : I->users()) { + if (U != I) { + WorkList.insert(cast<Instruction>(U)); + } + } + + // Replace the instruction with its simplified value. + bool Changed = false; + if (!I->use_empty()) { + I->replaceAllUsesWith(SimpleV); + Changed = true; + } + if (isInstructionTriviallyDead(I, TLI)) { + I->eraseFromParent(); + Changed = true; + } + return Changed; + } + return false; +} + +/// SimplifyInstructionsInBlock - Scan the specified basic block and try to +/// simplify any instructions in it and recursively delete dead instructions. +/// +/// This returns true if it changed the code, note that it can delete +/// instructions in other blocks as well in this block. +bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, + const TargetLibraryInfo *TLI) { + bool MadeChange = false; + const DataLayout &DL = BB->getModule()->getDataLayout(); + +#ifndef NDEBUG + // In debug builds, ensure that the terminator of the block is never replaced + // or deleted by these simplifications. The idea of simplification is that it + // cannot introduce new instructions, and there is no way to replace the + // terminator of a block without introducing a new instruction. + AssertingVH<Instruction> TerminatorVH(&BB->back()); +#endif + + SmallSetVector<Instruction *, 16> WorkList; + // Iterate over the original function, only adding insts to the worklist + // if they actually need to be revisited. This avoids having to pre-init + // the worklist with the entire function's worth of instructions. + for (BasicBlock::iterator BI = BB->begin(), E = std::prev(BB->end()); + BI != E;) { + assert(!BI->isTerminator()); + Instruction *I = &*BI; + ++BI; + + // We're visiting this instruction now, so make sure it's not in the + // worklist from an earlier visit. + if (!WorkList.count(I)) + MadeChange |= simplifyAndDCEInstruction(I, WorkList, DL, TLI); + } + + while (!WorkList.empty()) { + Instruction *I = WorkList.pop_back_val(); + MadeChange |= simplifyAndDCEInstruction(I, WorkList, DL, TLI); + } + return MadeChange; +} + +//===----------------------------------------------------------------------===// +// Control Flow Graph Restructuring. +// + + +/// RemovePredecessorAndSimplify - Like BasicBlock::removePredecessor, this +/// method is called when we're about to delete Pred as a predecessor of BB. If +/// BB contains any PHI nodes, this drops the entries in the PHI nodes for Pred. +/// +/// Unlike the removePredecessor method, this attempts to simplify uses of PHI +/// nodes that collapse into identity values. For example, if we have: +/// x = phi(1, 0, 0, 0) +/// y = and x, z +/// +/// .. and delete the predecessor corresponding to the '1', this will attempt to +/// recursively fold the and to 0. +void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred) { + // This only adjusts blocks with PHI nodes. + if (!isa<PHINode>(BB->begin())) + return; + + // Remove the entries for Pred from the PHI nodes in BB, but do not simplify + // them down. This will leave us with single entry phi nodes and other phis + // that can be removed. + BB->removePredecessor(Pred, true); + + WeakVH PhiIt = &BB->front(); + while (PHINode *PN = dyn_cast<PHINode>(PhiIt)) { + PhiIt = &*++BasicBlock::iterator(cast<Instruction>(PhiIt)); + Value *OldPhiIt = PhiIt; + + if (!recursivelySimplifyInstruction(PN)) + continue; + + // If recursive simplification ended up deleting the next PHI node we would + // iterate to, then our iterator is invalid, restart scanning from the top + // of the block. + if (PhiIt != OldPhiIt) PhiIt = &BB->front(); + } +} + + +/// MergeBasicBlockIntoOnlyPred - DestBB is a block with one predecessor and its +/// predecessor is known to have one successor (DestBB!). Eliminate the edge +/// between them, moving the instructions in the predecessor into DestBB and +/// deleting the predecessor block. +/// +void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT) { + // If BB has single-entry PHI nodes, fold them. + while (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) { + Value *NewVal = PN->getIncomingValue(0); + // Replace self referencing PHI with undef, it must be dead. + if (NewVal == PN) NewVal = UndefValue::get(PN->getType()); + PN->replaceAllUsesWith(NewVal); + PN->eraseFromParent(); + } + + BasicBlock *PredBB = DestBB->getSinglePredecessor(); + assert(PredBB && "Block doesn't have a single predecessor!"); + + // Zap anything that took the address of DestBB. Not doing this will give the + // address an invalid value. + if (DestBB->hasAddressTaken()) { + BlockAddress *BA = BlockAddress::get(DestBB); + Constant *Replacement = + ConstantInt::get(llvm::Type::getInt32Ty(BA->getContext()), 1); + BA->replaceAllUsesWith(ConstantExpr::getIntToPtr(Replacement, + BA->getType())); + BA->destroyConstant(); + } + + // Anything that branched to PredBB now branches to DestBB. + PredBB->replaceAllUsesWith(DestBB); + + // Splice all the instructions from PredBB to DestBB. + PredBB->getTerminator()->eraseFromParent(); + DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList()); + + // If the PredBB is the entry block of the function, move DestBB up to + // become the entry block after we erase PredBB. + if (PredBB == &DestBB->getParent()->getEntryBlock()) + DestBB->moveAfter(PredBB); + + if (DT) { + BasicBlock *PredBBIDom = DT->getNode(PredBB)->getIDom()->getBlock(); + DT->changeImmediateDominator(DestBB, PredBBIDom); + DT->eraseNode(PredBB); + } + // Nuke BB. + PredBB->eraseFromParent(); +} + +/// CanMergeValues - Return true if we can choose one of these values to use +/// in place of the other. Note that we will always choose the non-undef +/// value to keep. +static bool CanMergeValues(Value *First, Value *Second) { + return First == Second || isa<UndefValue>(First) || isa<UndefValue>(Second); +} + +/// CanPropagatePredecessorsForPHIs - Return true if we can fold BB, an +/// almost-empty BB ending in an unconditional branch to Succ, into Succ. +/// +/// Assumption: Succ is the single successor for BB. +/// +static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { + assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!"); + + DEBUG(dbgs() << "Looking to fold " << BB->getName() << " into " + << Succ->getName() << "\n"); + // Shortcut, if there is only a single predecessor it must be BB and merging + // is always safe + if (Succ->getSinglePredecessor()) return true; + + // Make a list of the predecessors of BB + SmallPtrSet<BasicBlock*, 16> BBPreds(pred_begin(BB), pred_end(BB)); + + // Look at all the phi nodes in Succ, to see if they present a conflict when + // merging these blocks + for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) { + PHINode *PN = cast<PHINode>(I); + + // If the incoming value from BB is again a PHINode in + // BB which has the same incoming value for *PI as PN does, we can + // merge the phi nodes and then the blocks can still be merged + PHINode *BBPN = dyn_cast<PHINode>(PN->getIncomingValueForBlock(BB)); + if (BBPN && BBPN->getParent() == BB) { + for (unsigned PI = 0, PE = PN->getNumIncomingValues(); PI != PE; ++PI) { + BasicBlock *IBB = PN->getIncomingBlock(PI); + if (BBPreds.count(IBB) && + !CanMergeValues(BBPN->getIncomingValueForBlock(IBB), + PN->getIncomingValue(PI))) { + DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in " + << Succ->getName() << " is conflicting with " + << BBPN->getName() << " with regard to common predecessor " + << IBB->getName() << "\n"); + return false; + } + } + } else { + Value* Val = PN->getIncomingValueForBlock(BB); + for (unsigned PI = 0, PE = PN->getNumIncomingValues(); PI != PE; ++PI) { + // See if the incoming value for the common predecessor is equal to the + // one for BB, in which case this phi node will not prevent the merging + // of the block. + BasicBlock *IBB = PN->getIncomingBlock(PI); + if (BBPreds.count(IBB) && + !CanMergeValues(Val, PN->getIncomingValue(PI))) { + DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in " + << Succ->getName() << " is conflicting with regard to common " + << "predecessor " << IBB->getName() << "\n"); + return false; + } + } + } + } + + return true; +} + +typedef SmallVector<BasicBlock *, 16> PredBlockVector; +typedef DenseMap<BasicBlock *, Value *> IncomingValueMap; + +/// \brief Determines the value to use as the phi node input for a block. +/// +/// Select between \p OldVal any value that we know flows from \p BB +/// to a particular phi on the basis of which one (if either) is not +/// undef. Update IncomingValues based on the selected value. +/// +/// \param OldVal The value we are considering selecting. +/// \param BB The block that the value flows in from. +/// \param IncomingValues A map from block-to-value for other phi inputs +/// that we have examined. +/// +/// \returns the selected value. +static Value *selectIncomingValueForBlock(Value *OldVal, BasicBlock *BB, + IncomingValueMap &IncomingValues) { + if (!isa<UndefValue>(OldVal)) { + assert((!IncomingValues.count(BB) || + IncomingValues.find(BB)->second == OldVal) && + "Expected OldVal to match incoming value from BB!"); + + IncomingValues.insert(std::make_pair(BB, OldVal)); + return OldVal; + } + + IncomingValueMap::const_iterator It = IncomingValues.find(BB); + if (It != IncomingValues.end()) return It->second; + + return OldVal; +} + +/// \brief Create a map from block to value for the operands of a +/// given phi. +/// +/// Create a map from block to value for each non-undef value flowing +/// into \p PN. +/// +/// \param PN The phi we are collecting the map for. +/// \param IncomingValues [out] The map from block to value for this phi. +static void gatherIncomingValuesToPhi(PHINode *PN, + IncomingValueMap &IncomingValues) { + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + BasicBlock *BB = PN->getIncomingBlock(i); + Value *V = PN->getIncomingValue(i); + + if (!isa<UndefValue>(V)) + IncomingValues.insert(std::make_pair(BB, V)); + } +} + +/// \brief Replace the incoming undef values to a phi with the values +/// from a block-to-value map. +/// +/// \param PN The phi we are replacing the undefs in. +/// \param IncomingValues A map from block to value. +static void replaceUndefValuesInPhi(PHINode *PN, + const IncomingValueMap &IncomingValues) { + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *V = PN->getIncomingValue(i); + + if (!isa<UndefValue>(V)) continue; + + BasicBlock *BB = PN->getIncomingBlock(i); + IncomingValueMap::const_iterator It = IncomingValues.find(BB); + if (It == IncomingValues.end()) continue; + + PN->setIncomingValue(i, It->second); + } +} + +/// \brief Replace a value flowing from a block to a phi with +/// potentially multiple instances of that value flowing from the +/// block's predecessors to the phi. +/// +/// \param BB The block with the value flowing into the phi. +/// \param BBPreds The predecessors of BB. +/// \param PN The phi that we are updating. +static void redirectValuesFromPredecessorsToPhi(BasicBlock *BB, + const PredBlockVector &BBPreds, + PHINode *PN) { + Value *OldVal = PN->removeIncomingValue(BB, false); + assert(OldVal && "No entry in PHI for Pred BB!"); + + IncomingValueMap IncomingValues; + + // We are merging two blocks - BB, and the block containing PN - and + // as a result we need to redirect edges from the predecessors of BB + // to go to the block containing PN, and update PN + // accordingly. Since we allow merging blocks in the case where the + // predecessor and successor blocks both share some predecessors, + // and where some of those common predecessors might have undef + // values flowing into PN, we want to rewrite those values to be + // consistent with the non-undef values. + + gatherIncomingValuesToPhi(PN, IncomingValues); + + // If this incoming value is one of the PHI nodes in BB, the new entries + // in the PHI node are the entries from the old PHI. + if (isa<PHINode>(OldVal) && cast<PHINode>(OldVal)->getParent() == BB) { + PHINode *OldValPN = cast<PHINode>(OldVal); + for (unsigned i = 0, e = OldValPN->getNumIncomingValues(); i != e; ++i) { + // Note that, since we are merging phi nodes and BB and Succ might + // have common predecessors, we could end up with a phi node with + // identical incoming branches. This will be cleaned up later (and + // will trigger asserts if we try to clean it up now, without also + // simplifying the corresponding conditional branch). + BasicBlock *PredBB = OldValPN->getIncomingBlock(i); + Value *PredVal = OldValPN->getIncomingValue(i); + Value *Selected = selectIncomingValueForBlock(PredVal, PredBB, + IncomingValues); + + // And add a new incoming value for this predecessor for the + // newly retargeted branch. + PN->addIncoming(Selected, PredBB); + } + } else { + for (unsigned i = 0, e = BBPreds.size(); i != e; ++i) { + // Update existing incoming values in PN for this + // predecessor of BB. + BasicBlock *PredBB = BBPreds[i]; + Value *Selected = selectIncomingValueForBlock(OldVal, PredBB, + IncomingValues); + + // And add a new incoming value for this predecessor for the + // newly retargeted branch. + PN->addIncoming(Selected, PredBB); + } + } + + replaceUndefValuesInPhi(PN, IncomingValues); +} + +/// TryToSimplifyUncondBranchFromEmptyBlock - BB is known to contain an +/// unconditional branch, and contains no instructions other than PHI nodes, +/// potential side-effect free intrinsics and the branch. If possible, +/// eliminate BB by rewriting all the predecessors to branch to the successor +/// block and return true. If we can't transform, return false. +bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) { + assert(BB != &BB->getParent()->getEntryBlock() && + "TryToSimplifyUncondBranchFromEmptyBlock called on entry block!"); + + // We can't eliminate infinite loops. + BasicBlock *Succ = cast<BranchInst>(BB->getTerminator())->getSuccessor(0); + if (BB == Succ) return false; + + // Check to see if merging these blocks would cause conflicts for any of the + // phi nodes in BB or Succ. If not, we can safely merge. + if (!CanPropagatePredecessorsForPHIs(BB, Succ)) return false; + + // Check for cases where Succ has multiple predecessors and a PHI node in BB + // has uses which will not disappear when the PHI nodes are merged. It is + // possible to handle such cases, but difficult: it requires checking whether + // BB dominates Succ, which is non-trivial to calculate in the case where + // Succ has multiple predecessors. Also, it requires checking whether + // constructing the necessary self-referential PHI node doesn't introduce any + // conflicts; this isn't too difficult, but the previous code for doing this + // was incorrect. + // + // Note that if this check finds a live use, BB dominates Succ, so BB is + // something like a loop pre-header (or rarely, a part of an irreducible CFG); + // folding the branch isn't profitable in that case anyway. + if (!Succ->getSinglePredecessor()) { + BasicBlock::iterator BBI = BB->begin(); + while (isa<PHINode>(*BBI)) { + for (Use &U : BBI->uses()) { + if (PHINode* PN = dyn_cast<PHINode>(U.getUser())) { + if (PN->getIncomingBlock(U) != BB) + return false; + } else { + return false; + } + } + ++BBI; + } + } + + DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB); + + if (isa<PHINode>(Succ->begin())) { + // If there is more than one pred of succ, and there are PHI nodes in + // the successor, then we need to add incoming edges for the PHI nodes + // + const PredBlockVector BBPreds(pred_begin(BB), pred_end(BB)); + + // Loop over all of the PHI nodes in the successor of BB. + for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) { + PHINode *PN = cast<PHINode>(I); + + redirectValuesFromPredecessorsToPhi(BB, BBPreds, PN); + } + } + + if (Succ->getSinglePredecessor()) { + // BB is the only predecessor of Succ, so Succ will end up with exactly + // the same predecessors BB had. + + // Copy over any phi, debug or lifetime instruction. + BB->getTerminator()->eraseFromParent(); + Succ->getInstList().splice(Succ->getFirstNonPHI()->getIterator(), + BB->getInstList()); + } else { + while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) { + // We explicitly check for such uses in CanPropagatePredecessorsForPHIs. + assert(PN->use_empty() && "There shouldn't be any uses here!"); + PN->eraseFromParent(); + } + } + + // If the unconditional branch we replaced contains llvm.loop metadata, we + // add the metadata to the branch instructions in the predecessors. + unsigned LoopMDKind = BB->getContext().getMDKindID("llvm.loop"); + Instruction *TI = BB->getTerminator(); + if (TI) + if (MDNode *LoopMD = TI->getMetadata(LoopMDKind)) + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + BasicBlock *Pred = *PI; + Pred->getTerminator()->setMetadata(LoopMDKind, LoopMD); + } + + // Everything that jumped to BB now goes to Succ. + BB->replaceAllUsesWith(Succ); + if (!Succ->hasName()) Succ->takeName(BB); + BB->eraseFromParent(); // Delete the old basic block. + return true; +} + +/// EliminateDuplicatePHINodes - Check for and eliminate duplicate PHI +/// nodes in this block. This doesn't try to be clever about PHI nodes +/// which differ only in the order of the incoming values, but instcombine +/// orders them so it usually won't matter. +/// +bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { + // This implementation doesn't currently consider undef operands + // specially. Theoretically, two phis which are identical except for + // one having an undef where the other doesn't could be collapsed. + + struct PHIDenseMapInfo { + static PHINode *getEmptyKey() { + return DenseMapInfo<PHINode *>::getEmptyKey(); + } + static PHINode *getTombstoneKey() { + return DenseMapInfo<PHINode *>::getTombstoneKey(); + } + static unsigned getHashValue(PHINode *PN) { + // Compute a hash value on the operands. Instcombine will likely have + // sorted them, which helps expose duplicates, but we have to check all + // the operands to be safe in case instcombine hasn't run. + return static_cast<unsigned>(hash_combine( + hash_combine_range(PN->value_op_begin(), PN->value_op_end()), + hash_combine_range(PN->block_begin(), PN->block_end()))); + } + static bool isEqual(PHINode *LHS, PHINode *RHS) { + if (LHS == getEmptyKey() || LHS == getTombstoneKey() || + RHS == getEmptyKey() || RHS == getTombstoneKey()) + return LHS == RHS; + return LHS->isIdenticalTo(RHS); + } + }; + + // Set of unique PHINodes. + DenseSet<PHINode *, PHIDenseMapInfo> PHISet; + + // Examine each PHI. + bool Changed = false; + for (auto I = BB->begin(); PHINode *PN = dyn_cast<PHINode>(I++);) { + auto Inserted = PHISet.insert(PN); + if (!Inserted.second) { + // A duplicate. Replace this PHI with its duplicate. + PN->replaceAllUsesWith(*Inserted.first); + PN->eraseFromParent(); + Changed = true; + + // The RAUW can change PHIs that we already visited. Start over from the + // beginning. + PHISet.clear(); + I = BB->begin(); + } + } + + return Changed; +} + +/// enforceKnownAlignment - If the specified pointer points to an object that +/// we control, modify the object's alignment to PrefAlign. This isn't +/// often possible though. If alignment is important, a more reliable approach +/// is to simply align all global variables and allocation instructions to +/// their preferred alignment from the beginning. +/// +static unsigned enforceKnownAlignment(Value *V, unsigned Align, + unsigned PrefAlign, + const DataLayout &DL) { + assert(PrefAlign > Align); + + V = V->stripPointerCasts(); + + if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) { + // TODO: ideally, computeKnownBits ought to have used + // AllocaInst::getAlignment() in its computation already, making + // the below max redundant. But, as it turns out, + // stripPointerCasts recurses through infinite layers of bitcasts, + // while computeKnownBits is not allowed to traverse more than 6 + // levels. + Align = std::max(AI->getAlignment(), Align); + if (PrefAlign <= Align) + return Align; + + // If the preferred alignment is greater than the natural stack alignment + // then don't round up. This avoids dynamic stack realignment. + if (DL.exceedsNaturalStackAlignment(PrefAlign)) + return Align; + AI->setAlignment(PrefAlign); + return PrefAlign; + } + + if (auto *GO = dyn_cast<GlobalObject>(V)) { + // TODO: as above, this shouldn't be necessary. + Align = std::max(GO->getAlignment(), Align); + if (PrefAlign <= Align) + return Align; + + // If there is a large requested alignment and we can, bump up the alignment + // of the global. If the memory we set aside for the global may not be the + // memory used by the final program then it is impossible for us to reliably + // enforce the preferred alignment. + if (!GO->canIncreaseAlignment()) + return Align; + + GO->setAlignment(PrefAlign); + return PrefAlign; + } + + return Align; +} + +unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, + const DataLayout &DL, + const Instruction *CxtI, + AssumptionCache *AC, + const DominatorTree *DT) { + assert(V->getType()->isPointerTy() && + "getOrEnforceKnownAlignment expects a pointer!"); + unsigned BitWidth = DL.getPointerTypeSizeInBits(V->getType()); + + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + computeKnownBits(V, KnownZero, KnownOne, DL, 0, AC, CxtI, DT); + unsigned TrailZ = KnownZero.countTrailingOnes(); + + // Avoid trouble with ridiculously large TrailZ values, such as + // those computed from a null pointer. + TrailZ = std::min(TrailZ, unsigned(sizeof(unsigned) * CHAR_BIT - 1)); + + unsigned Align = 1u << std::min(BitWidth - 1, TrailZ); + + // LLVM doesn't support alignments larger than this currently. + Align = std::min(Align, +Value::MaximumAlignment); + + if (PrefAlign > Align) + Align = enforceKnownAlignment(V, Align, PrefAlign, DL); + + // We don't need to make any adjustment. + return Align; +} + +///===---------------------------------------------------------------------===// +/// Dbg Intrinsic utilities +/// + +/// See if there is a dbg.value intrinsic for DIVar before I. +static bool LdStHasDebugValue(DILocalVariable *DIVar, DIExpression *DIExpr, + Instruction *I) { + // Since we can't guarantee that the original dbg.declare instrinsic + // is removed by LowerDbgDeclare(), we need to make sure that we are + // not inserting the same dbg.value intrinsic over and over. + llvm::BasicBlock::InstListType::iterator PrevI(I); + if (PrevI != I->getParent()->getInstList().begin()) { + --PrevI; + if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(PrevI)) + if (DVI->getValue() == I->getOperand(0) && + DVI->getOffset() == 0 && + DVI->getVariable() == DIVar && + DVI->getExpression() == DIExpr) + return true; + } + return false; +} + +/// See if there is a dbg.value intrinsic for DIVar for the PHI node. +static bool PhiHasDebugValue(DILocalVariable *DIVar, + DIExpression *DIExpr, + PHINode *APN) { + // Since we can't guarantee that the original dbg.declare instrinsic + // is removed by LowerDbgDeclare(), we need to make sure that we are + // not inserting the same dbg.value intrinsic over and over. + DbgValueList DbgValues; + FindAllocaDbgValues(DbgValues, APN); + for (auto DVI : DbgValues) { + assert (DVI->getValue() == APN); + assert (DVI->getOffset() == 0); + if ((DVI->getVariable() == DIVar) && (DVI->getExpression() == DIExpr)) + return true; + } + return false; +} + +/// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value +/// that has an associated llvm.dbg.decl intrinsic. +void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, + StoreInst *SI, DIBuilder &Builder) { + auto *DIVar = DDI->getVariable(); + auto *DIExpr = DDI->getExpression(); + assert(DIVar && "Missing variable"); + + // If an argument is zero extended then use argument directly. The ZExt + // may be zapped by an optimization pass in future. + Argument *ExtendedArg = nullptr; + if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0))) + ExtendedArg = dyn_cast<Argument>(ZExt->getOperand(0)); + if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0))) + ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0)); + if (ExtendedArg) { + // We're now only describing a subset of the variable. The fragment we're + // describing will always be smaller than the variable size, because + // VariableSize == Size of Alloca described by DDI. Since SI stores + // to the alloca described by DDI, if it's first operand is an extend, + // we're guaranteed that before extension, the value was narrower than + // the size of the alloca, hence the size of the described variable. + SmallVector<uint64_t, 3> Ops; + unsigned FragmentOffset = 0; + // If this already is a bit fragment, we drop the bit fragment from the + // expression and record the offset. + auto Fragment = DIExpr->getFragmentInfo(); + if (Fragment) { + Ops.append(DIExpr->elements_begin(), DIExpr->elements_end()-3); + FragmentOffset = Fragment->OffsetInBits; + } else { + Ops.append(DIExpr->elements_begin(), DIExpr->elements_end()); + } + Ops.push_back(dwarf::DW_OP_LLVM_fragment); + Ops.push_back(FragmentOffset); + const DataLayout &DL = DDI->getModule()->getDataLayout(); + Ops.push_back(DL.getTypeSizeInBits(ExtendedArg->getType())); + auto NewDIExpr = Builder.createExpression(Ops); + if (!LdStHasDebugValue(DIVar, NewDIExpr, SI)) + Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, NewDIExpr, + DDI->getDebugLoc(), SI); + } else if (!LdStHasDebugValue(DIVar, DIExpr, SI)) + Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar, DIExpr, + DDI->getDebugLoc(), SI); +} + +/// Inserts a llvm.dbg.value intrinsic before a load of an alloca'd value +/// that has an associated llvm.dbg.decl intrinsic. +void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, + LoadInst *LI, DIBuilder &Builder) { + auto *DIVar = DDI->getVariable(); + auto *DIExpr = DDI->getExpression(); + assert(DIVar && "Missing variable"); + + if (LdStHasDebugValue(DIVar, DIExpr, LI)) + return; + + // We are now tracking the loaded value instead of the address. In the + // future if multi-location support is added to the IR, it might be + // preferable to keep tracking both the loaded value and the original + // address in case the alloca can not be elided. + Instruction *DbgValue = Builder.insertDbgValueIntrinsic( + LI, 0, DIVar, DIExpr, DDI->getDebugLoc(), (Instruction *)nullptr); + DbgValue->insertAfter(LI); +} + +/// Inserts a llvm.dbg.value intrinsic after a phi +/// that has an associated llvm.dbg.decl intrinsic. +void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, + PHINode *APN, DIBuilder &Builder) { + auto *DIVar = DDI->getVariable(); + auto *DIExpr = DDI->getExpression(); + assert(DIVar && "Missing variable"); + + if (PhiHasDebugValue(DIVar, DIExpr, APN)) + return; + + BasicBlock *BB = APN->getParent(); + auto InsertionPt = BB->getFirstInsertionPt(); + + // The block may be a catchswitch block, which does not have a valid + // insertion point. + // FIXME: Insert dbg.value markers in the successors when appropriate. + if (InsertionPt != BB->end()) + Builder.insertDbgValueIntrinsic(APN, 0, DIVar, DIExpr, DDI->getDebugLoc(), + &*InsertionPt); +} + +/// Determine whether this alloca is either a VLA or an array. +static bool isArray(AllocaInst *AI) { + return AI->isArrayAllocation() || + AI->getType()->getElementType()->isArrayTy(); +} + +/// LowerDbgDeclare - Lowers llvm.dbg.declare intrinsics into appropriate set +/// of llvm.dbg.value intrinsics. +bool llvm::LowerDbgDeclare(Function &F) { + DIBuilder DIB(*F.getParent(), /*AllowUnresolved*/ false); + SmallVector<DbgDeclareInst *, 4> Dbgs; + for (auto &FI : F) + for (Instruction &BI : FI) + if (auto DDI = dyn_cast<DbgDeclareInst>(&BI)) + Dbgs.push_back(DDI); + + if (Dbgs.empty()) + return false; + + for (auto &I : Dbgs) { + DbgDeclareInst *DDI = I; + AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress()); + // If this is an alloca for a scalar variable, insert a dbg.value + // at each load and store to the alloca and erase the dbg.declare. + // The dbg.values allow tracking a variable even if it is not + // stored on the stack, while the dbg.declare can only describe + // the stack slot (and at a lexical-scope granularity). Later + // passes will attempt to elide the stack slot. + if (AI && !isArray(AI)) { + for (auto &AIUse : AI->uses()) { + User *U = AIUse.getUser(); + if (StoreInst *SI = dyn_cast<StoreInst>(U)) { + if (AIUse.getOperandNo() == 1) + ConvertDebugDeclareToDebugValue(DDI, SI, DIB); + } else if (LoadInst *LI = dyn_cast<LoadInst>(U)) { + ConvertDebugDeclareToDebugValue(DDI, LI, DIB); + } else if (CallInst *CI = dyn_cast<CallInst>(U)) { + // This is a call by-value or some other instruction that + // takes a pointer to the variable. Insert a *value* + // intrinsic that describes the alloca. + SmallVector<uint64_t, 1> NewDIExpr; + auto *DIExpr = DDI->getExpression(); + NewDIExpr.push_back(dwarf::DW_OP_deref); + NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end()); + DIB.insertDbgValueIntrinsic(AI, 0, DDI->getVariable(), + DIB.createExpression(NewDIExpr), + DDI->getDebugLoc(), CI); + } + } + DDI->eraseFromParent(); + } + } + return true; +} + +/// FindAllocaDbgDeclare - Finds the llvm.dbg.declare intrinsic describing the +/// alloca 'V', if any. +DbgDeclareInst *llvm::FindAllocaDbgDeclare(Value *V) { + if (auto *L = LocalAsMetadata::getIfExists(V)) + if (auto *MDV = MetadataAsValue::getIfExists(V->getContext(), L)) + for (User *U : MDV->users()) + if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U)) + return DDI; + + return nullptr; +} + +/// FindAllocaDbgValues - Finds the llvm.dbg.value intrinsics describing the +/// alloca 'V', if any. +void llvm::FindAllocaDbgValues(DbgValueList &DbgValues, Value *V) { + if (auto *L = LocalAsMetadata::getIfExists(V)) + if (auto *MDV = MetadataAsValue::getIfExists(V->getContext(), L)) + for (User *U : MDV->users()) + if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(U)) + DbgValues.push_back(DVI); +} + +static void DIExprAddDeref(SmallVectorImpl<uint64_t> &Expr) { + Expr.push_back(dwarf::DW_OP_deref); +} + +static void DIExprAddOffset(SmallVectorImpl<uint64_t> &Expr, int Offset) { + if (Offset > 0) { + Expr.push_back(dwarf::DW_OP_plus); + Expr.push_back(Offset); + } else if (Offset < 0) { + Expr.push_back(dwarf::DW_OP_minus); + Expr.push_back(-Offset); + } +} + +static DIExpression *BuildReplacementDIExpr(DIBuilder &Builder, + DIExpression *DIExpr, bool Deref, + int Offset) { + if (!Deref && !Offset) + return DIExpr; + // Create a copy of the original DIDescriptor for user variable, prepending + // "deref" operation to a list of address elements, as new llvm.dbg.declare + // will take a value storing address of the memory for variable, not + // alloca itself. + SmallVector<uint64_t, 4> NewDIExpr; + if (Deref) + DIExprAddDeref(NewDIExpr); + DIExprAddOffset(NewDIExpr, Offset); + if (DIExpr) + NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end()); + return Builder.createExpression(NewDIExpr); +} + +bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress, + Instruction *InsertBefore, DIBuilder &Builder, + bool Deref, int Offset) { + DbgDeclareInst *DDI = FindAllocaDbgDeclare(Address); + if (!DDI) + return false; + DebugLoc Loc = DDI->getDebugLoc(); + auto *DIVar = DDI->getVariable(); + auto *DIExpr = DDI->getExpression(); + assert(DIVar && "Missing variable"); + + DIExpr = BuildReplacementDIExpr(Builder, DIExpr, Deref, Offset); + + // Insert llvm.dbg.declare immediately after the original alloca, and remove + // old llvm.dbg.declare. + Builder.insertDeclare(NewAddress, DIVar, DIExpr, Loc, InsertBefore); + DDI->eraseFromParent(); + return true; +} + +bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress, + DIBuilder &Builder, bool Deref, int Offset) { + return replaceDbgDeclare(AI, NewAllocaAddress, AI->getNextNode(), Builder, + Deref, Offset); +} + +static void replaceOneDbgValueForAlloca(DbgValueInst *DVI, Value *NewAddress, + DIBuilder &Builder, int Offset) { + DebugLoc Loc = DVI->getDebugLoc(); + auto *DIVar = DVI->getVariable(); + auto *DIExpr = DVI->getExpression(); + assert(DIVar && "Missing variable"); + + // This is an alloca-based llvm.dbg.value. The first thing it should do with + // the alloca pointer is dereference it. Otherwise we don't know how to handle + // it and give up. + if (!DIExpr || DIExpr->getNumElements() < 1 || + DIExpr->getElement(0) != dwarf::DW_OP_deref) + return; + + // Insert the offset immediately after the first deref. + // We could just change the offset argument of dbg.value, but it's unsigned... + if (Offset) { + SmallVector<uint64_t, 4> NewDIExpr; + DIExprAddDeref(NewDIExpr); + DIExprAddOffset(NewDIExpr, Offset); + NewDIExpr.append(DIExpr->elements_begin() + 1, DIExpr->elements_end()); + DIExpr = Builder.createExpression(NewDIExpr); + } + + Builder.insertDbgValueIntrinsic(NewAddress, DVI->getOffset(), DIVar, DIExpr, + Loc, DVI); + DVI->eraseFromParent(); +} + +void llvm::replaceDbgValueForAlloca(AllocaInst *AI, Value *NewAllocaAddress, + DIBuilder &Builder, int Offset) { + if (auto *L = LocalAsMetadata::getIfExists(AI)) + if (auto *MDV = MetadataAsValue::getIfExists(AI->getContext(), L)) + for (auto UI = MDV->use_begin(), UE = MDV->use_end(); UI != UE;) { + Use &U = *UI++; + if (auto *DVI = dyn_cast<DbgValueInst>(U.getUser())) + replaceOneDbgValueForAlloca(DVI, NewAllocaAddress, Builder, Offset); + } +} + +unsigned llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) { + unsigned NumDeadInst = 0; + // Delete the instructions backwards, as it has a reduced likelihood of + // having to update as many def-use and use-def chains. + Instruction *EndInst = BB->getTerminator(); // Last not to be deleted. + while (EndInst != &BB->front()) { + // Delete the next to last instruction. + Instruction *Inst = &*--EndInst->getIterator(); + if (!Inst->use_empty() && !Inst->getType()->isTokenTy()) + Inst->replaceAllUsesWith(UndefValue::get(Inst->getType())); + if (Inst->isEHPad() || Inst->getType()->isTokenTy()) { + EndInst = Inst; + continue; + } + if (!isa<DbgInfoIntrinsic>(Inst)) + ++NumDeadInst; + Inst->eraseFromParent(); + } + return NumDeadInst; +} + +unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap, + bool PreserveLCSSA) { + BasicBlock *BB = I->getParent(); + // Loop over all of the successors, removing BB's entry from any PHI + // nodes. + for (BasicBlock *Successor : successors(BB)) + Successor->removePredecessor(BB, PreserveLCSSA); + + // Insert a call to llvm.trap right before this. This turns the undefined + // behavior into a hard fail instead of falling through into random code. + if (UseLLVMTrap) { + Function *TrapFn = + Intrinsic::getDeclaration(BB->getParent()->getParent(), Intrinsic::trap); + CallInst *CallTrap = CallInst::Create(TrapFn, "", I); + CallTrap->setDebugLoc(I->getDebugLoc()); + } + new UnreachableInst(I->getContext(), I); + + // All instructions after this are dead. + unsigned NumInstrsRemoved = 0; + BasicBlock::iterator BBI = I->getIterator(), BBE = BB->end(); + while (BBI != BBE) { + if (!BBI->use_empty()) + BBI->replaceAllUsesWith(UndefValue::get(BBI->getType())); + BB->getInstList().erase(BBI++); + ++NumInstrsRemoved; + } + return NumInstrsRemoved; +} + +/// changeToCall - Convert the specified invoke into a normal call. +static void changeToCall(InvokeInst *II) { + SmallVector<Value*, 8> Args(II->arg_begin(), II->arg_end()); + SmallVector<OperandBundleDef, 1> OpBundles; + II->getOperandBundlesAsDefs(OpBundles); + CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, OpBundles, + "", II); + NewCall->takeName(II); + NewCall->setCallingConv(II->getCallingConv()); + NewCall->setAttributes(II->getAttributes()); + NewCall->setDebugLoc(II->getDebugLoc()); + II->replaceAllUsesWith(NewCall); + + // Follow the call by a branch to the normal destination. + BranchInst::Create(II->getNormalDest(), II); + + // Update PHI nodes in the unwind destination + II->getUnwindDest()->removePredecessor(II->getParent()); + II->eraseFromParent(); +} + +BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI, + BasicBlock *UnwindEdge) { + BasicBlock *BB = CI->getParent(); + + // Convert this function call into an invoke instruction. First, split the + // basic block. + BasicBlock *Split = + BB->splitBasicBlock(CI->getIterator(), CI->getName() + ".noexc"); + + // Delete the unconditional branch inserted by splitBasicBlock + BB->getInstList().pop_back(); + + // Create the new invoke instruction. + SmallVector<Value *, 8> InvokeArgs(CI->arg_begin(), CI->arg_end()); + SmallVector<OperandBundleDef, 1> OpBundles; + + CI->getOperandBundlesAsDefs(OpBundles); + + // Note: we're round tripping operand bundles through memory here, and that + // can potentially be avoided with a cleverer API design that we do not have + // as of this time. + + InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), Split, UnwindEdge, + InvokeArgs, OpBundles, CI->getName(), BB); + II->setDebugLoc(CI->getDebugLoc()); + II->setCallingConv(CI->getCallingConv()); + II->setAttributes(CI->getAttributes()); + + // Make sure that anything using the call now uses the invoke! This also + // updates the CallGraph if present, because it uses a WeakVH. + CI->replaceAllUsesWith(II); + + // Delete the original call + Split->getInstList().pop_front(); + return Split; +} + +static bool markAliveBlocks(Function &F, + SmallPtrSetImpl<BasicBlock*> &Reachable) { + + SmallVector<BasicBlock*, 128> Worklist; + BasicBlock *BB = &F.front(); + Worklist.push_back(BB); + Reachable.insert(BB); + bool Changed = false; + do { + BB = Worklist.pop_back_val(); + + // Do a quick scan of the basic block, turning any obviously unreachable + // instructions into LLVM unreachable insts. The instruction combining pass + // canonicalizes unreachable insts into stores to null or undef. + for (Instruction &I : *BB) { + // Assumptions that are known to be false are equivalent to unreachable. + // Also, if the condition is undefined, then we make the choice most + // beneficial to the optimizer, and choose that to also be unreachable. + if (auto *II = dyn_cast<IntrinsicInst>(&I)) { + if (II->getIntrinsicID() == Intrinsic::assume) { + if (match(II->getArgOperand(0), m_CombineOr(m_Zero(), m_Undef()))) { + // Don't insert a call to llvm.trap right before the unreachable. + changeToUnreachable(II, false); + Changed = true; + break; + } + } + + if (II->getIntrinsicID() == Intrinsic::experimental_guard) { + // A call to the guard intrinsic bails out of the current compilation + // unit if the predicate passed to it is false. If the predicate is a + // constant false, then we know the guard will bail out of the current + // compile unconditionally, so all code following it is dead. + // + // Note: unlike in llvm.assume, it is not "obviously profitable" for + // guards to treat `undef` as `false` since a guard on `undef` can + // still be useful for widening. + if (match(II->getArgOperand(0), m_Zero())) + if (!isa<UnreachableInst>(II->getNextNode())) { + changeToUnreachable(II->getNextNode(), /*UseLLVMTrap=*/ false); + Changed = true; + break; + } + } + } + + if (auto *CI = dyn_cast<CallInst>(&I)) { + Value *Callee = CI->getCalledValue(); + if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) { + changeToUnreachable(CI, /*UseLLVMTrap=*/false); + Changed = true; + break; + } + if (CI->doesNotReturn()) { + // If we found a call to a no-return function, insert an unreachable + // instruction after it. Make sure there isn't *already* one there + // though. + if (!isa<UnreachableInst>(CI->getNextNode())) { + // Don't insert a call to llvm.trap right before the unreachable. + changeToUnreachable(CI->getNextNode(), false); + Changed = true; + } + break; + } + } + + // Store to undef and store to null are undefined and used to signal that + // they should be changed to unreachable by passes that can't modify the + // CFG. + if (auto *SI = dyn_cast<StoreInst>(&I)) { + // Don't touch volatile stores. + if (SI->isVolatile()) continue; + + Value *Ptr = SI->getOperand(1); + + if (isa<UndefValue>(Ptr) || + (isa<ConstantPointerNull>(Ptr) && + SI->getPointerAddressSpace() == 0)) { + changeToUnreachable(SI, true); + Changed = true; + break; + } + } + } + + TerminatorInst *Terminator = BB->getTerminator(); + if (auto *II = dyn_cast<InvokeInst>(Terminator)) { + // Turn invokes that call 'nounwind' functions into ordinary calls. + Value *Callee = II->getCalledValue(); + if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) { + changeToUnreachable(II, true); + Changed = true; + } else if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(&F)) { + if (II->use_empty() && II->onlyReadsMemory()) { + // jump to the normal destination branch. + BranchInst::Create(II->getNormalDest(), II); + II->getUnwindDest()->removePredecessor(II->getParent()); + II->eraseFromParent(); + } else + changeToCall(II); + Changed = true; + } + } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Terminator)) { + // Remove catchpads which cannot be reached. + struct CatchPadDenseMapInfo { + static CatchPadInst *getEmptyKey() { + return DenseMapInfo<CatchPadInst *>::getEmptyKey(); + } + static CatchPadInst *getTombstoneKey() { + return DenseMapInfo<CatchPadInst *>::getTombstoneKey(); + } + static unsigned getHashValue(CatchPadInst *CatchPad) { + return static_cast<unsigned>(hash_combine_range( + CatchPad->value_op_begin(), CatchPad->value_op_end())); + } + static bool isEqual(CatchPadInst *LHS, CatchPadInst *RHS) { + if (LHS == getEmptyKey() || LHS == getTombstoneKey() || + RHS == getEmptyKey() || RHS == getTombstoneKey()) + return LHS == RHS; + return LHS->isIdenticalTo(RHS); + } + }; + + // Set of unique CatchPads. + SmallDenseMap<CatchPadInst *, detail::DenseSetEmpty, 4, + CatchPadDenseMapInfo, detail::DenseSetPair<CatchPadInst *>> + HandlerSet; + detail::DenseSetEmpty Empty; + for (CatchSwitchInst::handler_iterator I = CatchSwitch->handler_begin(), + E = CatchSwitch->handler_end(); + I != E; ++I) { + BasicBlock *HandlerBB = *I; + auto *CatchPad = cast<CatchPadInst>(HandlerBB->getFirstNonPHI()); + if (!HandlerSet.insert({CatchPad, Empty}).second) { + CatchSwitch->removeHandler(I); + --I; + --E; + Changed = true; + } + } + } + + Changed |= ConstantFoldTerminator(BB, true); + for (BasicBlock *Successor : successors(BB)) + if (Reachable.insert(Successor).second) + Worklist.push_back(Successor); + } while (!Worklist.empty()); + return Changed; +} + +void llvm::removeUnwindEdge(BasicBlock *BB) { + TerminatorInst *TI = BB->getTerminator(); + + if (auto *II = dyn_cast<InvokeInst>(TI)) { + changeToCall(II); + return; + } + + TerminatorInst *NewTI; + BasicBlock *UnwindDest; + + if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) { + NewTI = CleanupReturnInst::Create(CRI->getCleanupPad(), nullptr, CRI); + UnwindDest = CRI->getUnwindDest(); + } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(TI)) { + auto *NewCatchSwitch = CatchSwitchInst::Create( + CatchSwitch->getParentPad(), nullptr, CatchSwitch->getNumHandlers(), + CatchSwitch->getName(), CatchSwitch); + for (BasicBlock *PadBB : CatchSwitch->handlers()) + NewCatchSwitch->addHandler(PadBB); + + NewTI = NewCatchSwitch; + UnwindDest = CatchSwitch->getUnwindDest(); + } else { + llvm_unreachable("Could not find unwind successor"); + } + + NewTI->takeName(TI); + NewTI->setDebugLoc(TI->getDebugLoc()); + UnwindDest->removePredecessor(BB); + TI->replaceAllUsesWith(NewTI); + TI->eraseFromParent(); +} + +/// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even +/// if they are in a dead cycle. Return true if a change was made, false +/// otherwise. +bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI) { + SmallPtrSet<BasicBlock*, 16> Reachable; + bool Changed = markAliveBlocks(F, Reachable); + + // If there are unreachable blocks in the CFG... + if (Reachable.size() == F.size()) + return Changed; + + assert(Reachable.size() < F.size()); + NumRemoved += F.size()-Reachable.size(); + + // Loop over all of the basic blocks that are not reachable, dropping all of + // their internal references... + for (Function::iterator BB = ++F.begin(), E = F.end(); BB != E; ++BB) { + if (Reachable.count(&*BB)) + continue; + + for (BasicBlock *Successor : successors(&*BB)) + if (Reachable.count(Successor)) + Successor->removePredecessor(&*BB); + if (LVI) + LVI->eraseBlock(&*BB); + BB->dropAllReferences(); + } + + for (Function::iterator I = ++F.begin(); I != F.end();) + if (!Reachable.count(&*I)) + I = F.getBasicBlockList().erase(I); + else + ++I; + + return true; +} + +void llvm::combineMetadata(Instruction *K, const Instruction *J, + ArrayRef<unsigned> KnownIDs) { + SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata; + K->dropUnknownNonDebugMetadata(KnownIDs); + K->getAllMetadataOtherThanDebugLoc(Metadata); + for (const auto &MD : Metadata) { + unsigned Kind = MD.first; + MDNode *JMD = J->getMetadata(Kind); + MDNode *KMD = MD.second; + + switch (Kind) { + default: + K->setMetadata(Kind, nullptr); // Remove unknown metadata + break; + case LLVMContext::MD_dbg: + llvm_unreachable("getAllMetadataOtherThanDebugLoc returned a MD_dbg"); + case LLVMContext::MD_tbaa: + K->setMetadata(Kind, MDNode::getMostGenericTBAA(JMD, KMD)); + break; + case LLVMContext::MD_alias_scope: + K->setMetadata(Kind, MDNode::getMostGenericAliasScope(JMD, KMD)); + break; + case LLVMContext::MD_noalias: + case LLVMContext::MD_mem_parallel_loop_access: + K->setMetadata(Kind, MDNode::intersect(JMD, KMD)); + break; + case LLVMContext::MD_range: + K->setMetadata(Kind, MDNode::getMostGenericRange(JMD, KMD)); + break; + case LLVMContext::MD_fpmath: + K->setMetadata(Kind, MDNode::getMostGenericFPMath(JMD, KMD)); + break; + case LLVMContext::MD_invariant_load: + // Only set the !invariant.load if it is present in both instructions. + K->setMetadata(Kind, JMD); + break; + case LLVMContext::MD_nonnull: + // Only set the !nonnull if it is present in both instructions. + K->setMetadata(Kind, JMD); + break; + case LLVMContext::MD_invariant_group: + // Preserve !invariant.group in K. + break; + case LLVMContext::MD_align: + K->setMetadata(Kind, + MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD)); + break; + case LLVMContext::MD_dereferenceable: + case LLVMContext::MD_dereferenceable_or_null: + K->setMetadata(Kind, + MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD)); + break; + } + } + // Set !invariant.group from J if J has it. If both instructions have it + // then we will just pick it from J - even when they are different. + // Also make sure that K is load or store - f.e. combining bitcast with load + // could produce bitcast with invariant.group metadata, which is invalid. + // FIXME: we should try to preserve both invariant.group md if they are + // different, but right now instruction can only have one invariant.group. + if (auto *JMD = J->getMetadata(LLVMContext::MD_invariant_group)) + if (isa<LoadInst>(K) || isa<StoreInst>(K)) + K->setMetadata(LLVMContext::MD_invariant_group, JMD); +} + +void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J) { + unsigned KnownIDs[] = { + LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, + LLVMContext::MD_noalias, LLVMContext::MD_range, + LLVMContext::MD_invariant_load, LLVMContext::MD_nonnull, + LLVMContext::MD_invariant_group, LLVMContext::MD_align, + LLVMContext::MD_dereferenceable, + LLVMContext::MD_dereferenceable_or_null}; + combineMetadata(K, J, KnownIDs); +} + +unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To, + DominatorTree &DT, + const BasicBlockEdge &Root) { + assert(From->getType() == To->getType()); + + unsigned Count = 0; + for (Value::use_iterator UI = From->use_begin(), UE = From->use_end(); + UI != UE; ) { + Use &U = *UI++; + if (DT.dominates(Root, U)) { + U.set(To); + DEBUG(dbgs() << "Replace dominated use of '" + << From->getName() << "' as " + << *To << " in " << *U << "\n"); + ++Count; + } + } + return Count; +} + +unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To, + DominatorTree &DT, + const BasicBlock *BB) { + assert(From->getType() == To->getType()); + + unsigned Count = 0; + for (Value::use_iterator UI = From->use_begin(), UE = From->use_end(); + UI != UE;) { + Use &U = *UI++; + auto *I = cast<Instruction>(U.getUser()); + if (DT.properlyDominates(BB, I->getParent())) { + U.set(To); + DEBUG(dbgs() << "Replace dominated use of '" << From->getName() << "' as " + << *To << " in " << *U << "\n"); + ++Count; + } + } + return Count; +} + +bool llvm::callsGCLeafFunction(ImmutableCallSite CS) { + // Check if the function is specifically marked as a gc leaf function. + if (CS.hasFnAttr("gc-leaf-function")) + return true; + if (const Function *F = CS.getCalledFunction()) { + if (F->hasFnAttribute("gc-leaf-function")) + return true; + + if (auto IID = F->getIntrinsicID()) + // Most LLVM intrinsics do not take safepoints. + return IID != Intrinsic::experimental_gc_statepoint && + IID != Intrinsic::experimental_deoptimize; + } + + return false; +} + +namespace { +/// A potential constituent of a bitreverse or bswap expression. See +/// collectBitParts for a fuller explanation. +struct BitPart { + BitPart(Value *P, unsigned BW) : Provider(P) { + Provenance.resize(BW); + } + + /// The Value that this is a bitreverse/bswap of. + Value *Provider; + /// The "provenance" of each bit. Provenance[A] = B means that bit A + /// in Provider becomes bit B in the result of this expression. + SmallVector<int8_t, 32> Provenance; // int8_t means max size is i128. + + enum { Unset = -1 }; +}; +} // end anonymous namespace + +/// Analyze the specified subexpression and see if it is capable of providing +/// pieces of a bswap or bitreverse. The subexpression provides a potential +/// piece of a bswap or bitreverse if it can be proven that each non-zero bit in +/// the output of the expression came from a corresponding bit in some other +/// value. This function is recursive, and the end result is a mapping of +/// bitnumber to bitnumber. It is the caller's responsibility to validate that +/// the bitnumber to bitnumber mapping is correct for a bswap or bitreverse. +/// +/// For example, if the current subexpression if "(shl i32 %X, 24)" then we know +/// that the expression deposits the low byte of %X into the high byte of the +/// result and that all other bits are zero. This expression is accepted and a +/// BitPart is returned with Provider set to %X and Provenance[24-31] set to +/// [0-7]. +/// +/// To avoid revisiting values, the BitPart results are memoized into the +/// provided map. To avoid unnecessary copying of BitParts, BitParts are +/// constructed in-place in the \c BPS map. Because of this \c BPS needs to +/// store BitParts objects, not pointers. As we need the concept of a nullptr +/// BitParts (Value has been analyzed and the analysis failed), we an Optional +/// type instead to provide the same functionality. +/// +/// Because we pass around references into \c BPS, we must use a container that +/// does not invalidate internal references (std::map instead of DenseMap). +/// +static const Optional<BitPart> & +collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, + std::map<Value *, Optional<BitPart>> &BPS) { + auto I = BPS.find(V); + if (I != BPS.end()) + return I->second; + + auto &Result = BPS[V] = None; + auto BitWidth = cast<IntegerType>(V->getType())->getBitWidth(); + + if (Instruction *I = dyn_cast<Instruction>(V)) { + // If this is an or instruction, it may be an inner node of the bswap. + if (I->getOpcode() == Instruction::Or) { + auto &A = collectBitParts(I->getOperand(0), MatchBSwaps, + MatchBitReversals, BPS); + auto &B = collectBitParts(I->getOperand(1), MatchBSwaps, + MatchBitReversals, BPS); + if (!A || !B) + return Result; + + // Try and merge the two together. + if (!A->Provider || A->Provider != B->Provider) + return Result; + + Result = BitPart(A->Provider, BitWidth); + for (unsigned i = 0; i < A->Provenance.size(); ++i) { + if (A->Provenance[i] != BitPart::Unset && + B->Provenance[i] != BitPart::Unset && + A->Provenance[i] != B->Provenance[i]) + return Result = None; + + if (A->Provenance[i] == BitPart::Unset) + Result->Provenance[i] = B->Provenance[i]; + else + Result->Provenance[i] = A->Provenance[i]; + } + + return Result; + } + + // If this is a logical shift by a constant, recurse then shift the result. + if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) { + unsigned BitShift = + cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U); + // Ensure the shift amount is defined. + if (BitShift > BitWidth) + return Result; + + auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps, + MatchBitReversals, BPS); + if (!Res) + return Result; + Result = Res; + + // Perform the "shift" on BitProvenance. + auto &P = Result->Provenance; + if (I->getOpcode() == Instruction::Shl) { + P.erase(std::prev(P.end(), BitShift), P.end()); + P.insert(P.begin(), BitShift, BitPart::Unset); + } else { + P.erase(P.begin(), std::next(P.begin(), BitShift)); + P.insert(P.end(), BitShift, BitPart::Unset); + } + + return Result; + } + + // If this is a logical 'and' with a mask that clears bits, recurse then + // unset the appropriate bits. + if (I->getOpcode() == Instruction::And && + isa<ConstantInt>(I->getOperand(1))) { + APInt Bit(I->getType()->getPrimitiveSizeInBits(), 1); + const APInt &AndMask = cast<ConstantInt>(I->getOperand(1))->getValue(); + + // Check that the mask allows a multiple of 8 bits for a bswap, for an + // early exit. + unsigned NumMaskedBits = AndMask.countPopulation(); + if (!MatchBitReversals && NumMaskedBits % 8 != 0) + return Result; + + auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps, + MatchBitReversals, BPS); + if (!Res) + return Result; + Result = Res; + + for (unsigned i = 0; i < BitWidth; ++i, Bit <<= 1) + // If the AndMask is zero for this bit, clear the bit. + if ((AndMask & Bit) == 0) + Result->Provenance[i] = BitPart::Unset; + return Result; + } + + // If this is a zext instruction zero extend the result. + if (I->getOpcode() == Instruction::ZExt) { + auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps, + MatchBitReversals, BPS); + if (!Res) + return Result; + + Result = BitPart(Res->Provider, BitWidth); + auto NarrowBitWidth = + cast<IntegerType>(cast<ZExtInst>(I)->getSrcTy())->getBitWidth(); + for (unsigned i = 0; i < NarrowBitWidth; ++i) + Result->Provenance[i] = Res->Provenance[i]; + for (unsigned i = NarrowBitWidth; i < BitWidth; ++i) + Result->Provenance[i] = BitPart::Unset; + return Result; + } + } + + // Okay, we got to something that isn't a shift, 'or' or 'and'. This must be + // the input value to the bswap/bitreverse. + Result = BitPart(V, BitWidth); + for (unsigned i = 0; i < BitWidth; ++i) + Result->Provenance[i] = i; + return Result; +} + +static bool bitTransformIsCorrectForBSwap(unsigned From, unsigned To, + unsigned BitWidth) { + if (From % 8 != To % 8) + return false; + // Convert from bit indices to byte indices and check for a byte reversal. + From >>= 3; + To >>= 3; + BitWidth >>= 3; + return From == BitWidth - To - 1; +} + +static bool bitTransformIsCorrectForBitReverse(unsigned From, unsigned To, + unsigned BitWidth) { + return From == BitWidth - To - 1; +} + +/// Given an OR instruction, check to see if this is a bitreverse +/// idiom. If so, insert the new intrinsic and return true. +bool llvm::recognizeBSwapOrBitReverseIdiom( + Instruction *I, bool MatchBSwaps, bool MatchBitReversals, + SmallVectorImpl<Instruction *> &InsertedInsts) { + if (Operator::getOpcode(I) != Instruction::Or) + return false; + if (!MatchBSwaps && !MatchBitReversals) + return false; + IntegerType *ITy = dyn_cast<IntegerType>(I->getType()); + if (!ITy || ITy->getBitWidth() > 128) + return false; // Can't do vectors or integers > 128 bits. + unsigned BW = ITy->getBitWidth(); + + unsigned DemandedBW = BW; + IntegerType *DemandedTy = ITy; + if (I->hasOneUse()) { + if (TruncInst *Trunc = dyn_cast<TruncInst>(I->user_back())) { + DemandedTy = cast<IntegerType>(Trunc->getType()); + DemandedBW = DemandedTy->getBitWidth(); + } + } + + // Try to find all the pieces corresponding to the bswap. + std::map<Value *, Optional<BitPart>> BPS; + auto Res = collectBitParts(I, MatchBSwaps, MatchBitReversals, BPS); + if (!Res) + return false; + auto &BitProvenance = Res->Provenance; + + // Now, is the bit permutation correct for a bswap or a bitreverse? We can + // only byteswap values with an even number of bytes. + bool OKForBSwap = DemandedBW % 16 == 0, OKForBitReverse = true; + for (unsigned i = 0; i < DemandedBW; ++i) { + OKForBSwap &= + bitTransformIsCorrectForBSwap(BitProvenance[i], i, DemandedBW); + OKForBitReverse &= + bitTransformIsCorrectForBitReverse(BitProvenance[i], i, DemandedBW); + } + + Intrinsic::ID Intrin; + if (OKForBSwap && MatchBSwaps) + Intrin = Intrinsic::bswap; + else if (OKForBitReverse && MatchBitReversals) + Intrin = Intrinsic::bitreverse; + else + return false; + + if (ITy != DemandedTy) { + Function *F = Intrinsic::getDeclaration(I->getModule(), Intrin, DemandedTy); + Value *Provider = Res->Provider; + IntegerType *ProviderTy = cast<IntegerType>(Provider->getType()); + // We may need to truncate the provider. + if (DemandedTy != ProviderTy) { + auto *Trunc = CastInst::Create(Instruction::Trunc, Provider, DemandedTy, + "trunc", I); + InsertedInsts.push_back(Trunc); + Provider = Trunc; + } + auto *CI = CallInst::Create(F, Provider, "rev", I); + InsertedInsts.push_back(CI); + auto *ExtInst = CastInst::Create(Instruction::ZExt, CI, ITy, "zext", I); + InsertedInsts.push_back(ExtInst); + return true; + } + + Function *F = Intrinsic::getDeclaration(I->getModule(), Intrin, ITy); + InsertedInsts.push_back(CallInst::Create(F, Res->Provider, "rev", I)); + return true; +} + +// CodeGen has special handling for some string functions that may replace +// them with target-specific intrinsics. Since that'd skip our interceptors +// in ASan/MSan/TSan/DFSan, and thus make us miss some memory accesses, +// we mark affected calls as NoBuiltin, which will disable optimization +// in CodeGen. +void llvm::maybeMarkSanitizerLibraryCallNoBuiltin( + CallInst *CI, const TargetLibraryInfo *TLI) { + Function *F = CI->getCalledFunction(); + LibFunc::Func Func; + if (F && !F->hasLocalLinkage() && F->hasName() && + TLI->getLibFunc(F->getName(), Func) && TLI->hasOptimizedCodeGen(Func) && + !F->doesNotAccessMemory()) + CI->addAttribute(AttributeSet::FunctionIndex, Attribute::NoBuiltin); +} diff --git a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp new file mode 100644 index 000000000000..00cda2af00c6 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp @@ -0,0 +1,926 @@ +//===- LoopSimplify.cpp - Loop Canonicalization Pass ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass performs several transformations to transform natural loops into a +// simpler form, which makes subsequent analyses and transformations simpler and +// more effective. +// +// Loop pre-header insertion guarantees that there is a single, non-critical +// entry edge from outside of the loop to the loop header. This simplifies a +// number of analyses and transformations, such as LICM. +// +// Loop exit-block insertion guarantees that all exit blocks from the loop +// (blocks which are outside of the loop that have predecessors inside of the +// loop) only have predecessors from inside of the loop (and are thus dominated +// by the loop header). This simplifies transformations such as store-sinking +// that are built into LICM. +// +// This pass also guarantees that loops will have exactly one backedge. +// +// Indirectbr instructions introduce several complications. If the loop +// contains or is entered by an indirectbr instruction, it may not be possible +// to transform the loop and make these guarantees. Client code should check +// that these conditions are true before relying on them. +// +// Note that the simplifycfg pass will clean up blocks which are split out but +// end up being unnecessary, so usage of this pass should not pessimize +// generated code. +// +// This pass obviously modifies the CFG, but updates loop information and +// dominator information. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/LoopSimplify.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/DependenceAnalysis.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +using namespace llvm; + +#define DEBUG_TYPE "loop-simplify" + +STATISTIC(NumInserted, "Number of pre-header or exit blocks inserted"); +STATISTIC(NumNested , "Number of nested loops split out"); + +// If the block isn't already, move the new block to right after some 'outside +// block' block. This prevents the preheader from being placed inside the loop +// body, e.g. when the loop hasn't been rotated. +static void placeSplitBlockCarefully(BasicBlock *NewBB, + SmallVectorImpl<BasicBlock *> &SplitPreds, + Loop *L) { + // Check to see if NewBB is already well placed. + Function::iterator BBI = --NewBB->getIterator(); + for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) { + if (&*BBI == SplitPreds[i]) + return; + } + + // If it isn't already after an outside block, move it after one. This is + // always good as it makes the uncond branch from the outside block into a + // fall-through. + + // Figure out *which* outside block to put this after. Prefer an outside + // block that neighbors a BB actually in the loop. + BasicBlock *FoundBB = nullptr; + for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) { + Function::iterator BBI = SplitPreds[i]->getIterator(); + if (++BBI != NewBB->getParent()->end() && L->contains(&*BBI)) { + FoundBB = SplitPreds[i]; + break; + } + } + + // If our heuristic for a *good* bb to place this after doesn't find + // anything, just pick something. It's likely better than leaving it within + // the loop. + if (!FoundBB) + FoundBB = SplitPreds[0]; + NewBB->moveAfter(FoundBB); +} + +/// InsertPreheaderForLoop - Once we discover that a loop doesn't have a +/// preheader, this method is called to insert one. This method has two phases: +/// preheader insertion and analysis updating. +/// +BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT, + LoopInfo *LI, bool PreserveLCSSA) { + BasicBlock *Header = L->getHeader(); + + // Compute the set of predecessors of the loop that are not in the loop. + SmallVector<BasicBlock*, 8> OutsideBlocks; + for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header); + PI != PE; ++PI) { + BasicBlock *P = *PI; + if (!L->contains(P)) { // Coming in from outside the loop? + // If the loop is branched to from an indirect branch, we won't + // be able to fully transform the loop, because it prohibits + // edge splitting. + if (isa<IndirectBrInst>(P->getTerminator())) return nullptr; + + // Keep track of it. + OutsideBlocks.push_back(P); + } + } + + // Split out the loop pre-header. + BasicBlock *PreheaderBB; + PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader", DT, + LI, PreserveLCSSA); + if (!PreheaderBB) + return nullptr; + + DEBUG(dbgs() << "LoopSimplify: Creating pre-header " + << PreheaderBB->getName() << "\n"); + + // Make sure that NewBB is put someplace intelligent, which doesn't mess up + // code layout too horribly. + placeSplitBlockCarefully(PreheaderBB, OutsideBlocks, L); + + return PreheaderBB; +} + +/// \brief Ensure that the loop preheader dominates all exit blocks. +/// +/// This method is used to split exit blocks that have predecessors outside of +/// the loop. +static BasicBlock *rewriteLoopExitBlock(Loop *L, BasicBlock *Exit, + DominatorTree *DT, LoopInfo *LI, + bool PreserveLCSSA) { + SmallVector<BasicBlock*, 8> LoopBlocks; + for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) { + BasicBlock *P = *I; + if (L->contains(P)) { + // Don't do this if the loop is exited via an indirect branch. + if (isa<IndirectBrInst>(P->getTerminator())) return nullptr; + + LoopBlocks.push_back(P); + } + } + + assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?"); + BasicBlock *NewExitBB = nullptr; + + NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", DT, LI, + PreserveLCSSA); + if (!NewExitBB) + return nullptr; + + DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block " + << NewExitBB->getName() << "\n"); + return NewExitBB; +} + +/// Add the specified block, and all of its predecessors, to the specified set, +/// if it's not already in there. Stop predecessor traversal when we reach +/// StopBlock. +static void addBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock, + std::set<BasicBlock*> &Blocks) { + SmallVector<BasicBlock *, 8> Worklist; + Worklist.push_back(InputBB); + do { + BasicBlock *BB = Worklist.pop_back_val(); + if (Blocks.insert(BB).second && BB != StopBlock) + // If BB is not already processed and it is not a stop block then + // insert its predecessor in the work list + for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) { + BasicBlock *WBB = *I; + Worklist.push_back(WBB); + } + } while (!Worklist.empty()); +} + +/// \brief The first part of loop-nestification is to find a PHI node that tells +/// us how to partition the loops. +static PHINode *findPHIToPartitionLoops(Loop *L, DominatorTree *DT, + AssumptionCache *AC) { + const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); + for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) { + PHINode *PN = cast<PHINode>(I); + ++I; + if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) { + // This is a degenerate PHI already, don't modify it! + PN->replaceAllUsesWith(V); + PN->eraseFromParent(); + continue; + } + + // Scan this PHI node looking for a use of the PHI node by itself. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingValue(i) == PN && + L->contains(PN->getIncomingBlock(i))) + // We found something tasty to remove. + return PN; + } + return nullptr; +} + +/// \brief If this loop has multiple backedges, try to pull one of them out into +/// a nested loop. +/// +/// This is important for code that looks like +/// this: +/// +/// Loop: +/// ... +/// br cond, Loop, Next +/// ... +/// br cond2, Loop, Out +/// +/// To identify this common case, we look at the PHI nodes in the header of the +/// loop. PHI nodes with unchanging values on one backedge correspond to values +/// that change in the "outer" loop, but not in the "inner" loop. +/// +/// If we are able to separate out a loop, return the new outer loop that was +/// created. +/// +static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader, + DominatorTree *DT, LoopInfo *LI, + ScalarEvolution *SE, bool PreserveLCSSA, + AssumptionCache *AC) { + // Don't try to separate loops without a preheader. + if (!Preheader) + return nullptr; + + // The header is not a landing pad; preheader insertion should ensure this. + BasicBlock *Header = L->getHeader(); + assert(!Header->isEHPad() && "Can't insert backedge to EH pad"); + + PHINode *PN = findPHIToPartitionLoops(L, DT, AC); + if (!PN) return nullptr; // No known way to partition. + + // Pull out all predecessors that have varying values in the loop. This + // handles the case when a PHI node has multiple instances of itself as + // arguments. + SmallVector<BasicBlock*, 8> OuterLoopPreds; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + if (PN->getIncomingValue(i) != PN || + !L->contains(PN->getIncomingBlock(i))) { + // We can't split indirectbr edges. + if (isa<IndirectBrInst>(PN->getIncomingBlock(i)->getTerminator())) + return nullptr; + OuterLoopPreds.push_back(PN->getIncomingBlock(i)); + } + } + DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n"); + + // If ScalarEvolution is around and knows anything about values in + // this loop, tell it to forget them, because we're about to + // substantially change it. + if (SE) + SE->forgetLoop(L); + + BasicBlock *NewBB = SplitBlockPredecessors(Header, OuterLoopPreds, ".outer", + DT, LI, PreserveLCSSA); + + // Make sure that NewBB is put someplace intelligent, which doesn't mess up + // code layout too horribly. + placeSplitBlockCarefully(NewBB, OuterLoopPreds, L); + + // Create the new outer loop. + Loop *NewOuter = new Loop(); + + // Change the parent loop to use the outer loop as its child now. + if (Loop *Parent = L->getParentLoop()) + Parent->replaceChildLoopWith(L, NewOuter); + else + LI->changeTopLevelLoop(L, NewOuter); + + // L is now a subloop of our outer loop. + NewOuter->addChildLoop(L); + + for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); + I != E; ++I) + NewOuter->addBlockEntry(*I); + + // Now reset the header in L, which had been moved by + // SplitBlockPredecessors for the outer loop. + L->moveToHeader(Header); + + // Determine which blocks should stay in L and which should be moved out to + // the Outer loop now. + std::set<BasicBlock*> BlocksInL; + for (pred_iterator PI=pred_begin(Header), E = pred_end(Header); PI!=E; ++PI) { + BasicBlock *P = *PI; + if (DT->dominates(Header, P)) + addBlockAndPredsToSet(P, Header, BlocksInL); + } + + // Scan all of the loop children of L, moving them to OuterLoop if they are + // not part of the inner loop. + const std::vector<Loop*> &SubLoops = L->getSubLoops(); + for (size_t I = 0; I != SubLoops.size(); ) + if (BlocksInL.count(SubLoops[I]->getHeader())) + ++I; // Loop remains in L + else + NewOuter->addChildLoop(L->removeChildLoop(SubLoops.begin() + I)); + + SmallVector<BasicBlock *, 8> OuterLoopBlocks; + OuterLoopBlocks.push_back(NewBB); + // Now that we know which blocks are in L and which need to be moved to + // OuterLoop, move any blocks that need it. + for (unsigned i = 0; i != L->getBlocks().size(); ++i) { + BasicBlock *BB = L->getBlocks()[i]; + if (!BlocksInL.count(BB)) { + // Move this block to the parent, updating the exit blocks sets + L->removeBlockFromLoop(BB); + if ((*LI)[BB] == L) { + LI->changeLoopFor(BB, NewOuter); + OuterLoopBlocks.push_back(BB); + } + --i; + } + } + + // Split edges to exit blocks from the inner loop, if they emerged in the + // process of separating the outer one. + SmallVector<BasicBlock *, 8> ExitBlocks; + L->getExitBlocks(ExitBlocks); + SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(), + ExitBlocks.end()); + for (BasicBlock *ExitBlock : ExitBlockSet) { + if (any_of(predecessors(ExitBlock), + [L](BasicBlock *BB) { return !L->contains(BB); })) { + rewriteLoopExitBlock(L, ExitBlock, DT, LI, PreserveLCSSA); + } + } + + if (PreserveLCSSA) { + // Fix LCSSA form for L. Some values, which previously were only used inside + // L, can now be used in NewOuter loop. We need to insert phi-nodes for them + // in corresponding exit blocks. + // We don't need to form LCSSA recursively, because there cannot be uses + // inside a newly created loop of defs from inner loops as those would + // already be a use of an LCSSA phi node. + formLCSSA(*L, *DT, LI, SE); + + assert(NewOuter->isRecursivelyLCSSAForm(*DT, *LI) && + "LCSSA is broken after separating nested loops!"); + } + + return NewOuter; +} + +/// \brief This method is called when the specified loop has more than one +/// backedge in it. +/// +/// If this occurs, revector all of these backedges to target a new basic block +/// and have that block branch to the loop header. This ensures that loops +/// have exactly one backedge. +static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, + DominatorTree *DT, LoopInfo *LI) { + assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!"); + + // Get information about the loop + BasicBlock *Header = L->getHeader(); + Function *F = Header->getParent(); + + // Unique backedge insertion currently depends on having a preheader. + if (!Preheader) + return nullptr; + + // The header is not an EH pad; preheader insertion should ensure this. + assert(!Header->isEHPad() && "Can't insert backedge to EH pad"); + + // Figure out which basic blocks contain back-edges to the loop header. + std::vector<BasicBlock*> BackedgeBlocks; + for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){ + BasicBlock *P = *I; + + // Indirectbr edges cannot be split, so we must fail if we find one. + if (isa<IndirectBrInst>(P->getTerminator())) + return nullptr; + + if (P != Preheader) BackedgeBlocks.push_back(P); + } + + // Create and insert the new backedge block... + BasicBlock *BEBlock = BasicBlock::Create(Header->getContext(), + Header->getName() + ".backedge", F); + BranchInst *BETerminator = BranchInst::Create(Header, BEBlock); + BETerminator->setDebugLoc(Header->getFirstNonPHI()->getDebugLoc()); + + DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block " + << BEBlock->getName() << "\n"); + + // Move the new backedge block to right after the last backedge block. + Function::iterator InsertPos = ++BackedgeBlocks.back()->getIterator(); + F->getBasicBlockList().splice(InsertPos, F->getBasicBlockList(), BEBlock); + + // Now that the block has been inserted into the function, create PHI nodes in + // the backedge block which correspond to any PHI nodes in the header block. + for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { + PHINode *PN = cast<PHINode>(I); + PHINode *NewPN = PHINode::Create(PN->getType(), BackedgeBlocks.size(), + PN->getName()+".be", BETerminator); + + // Loop over the PHI node, moving all entries except the one for the + // preheader over to the new PHI node. + unsigned PreheaderIdx = ~0U; + bool HasUniqueIncomingValue = true; + Value *UniqueValue = nullptr; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + BasicBlock *IBB = PN->getIncomingBlock(i); + Value *IV = PN->getIncomingValue(i); + if (IBB == Preheader) { + PreheaderIdx = i; + } else { + NewPN->addIncoming(IV, IBB); + if (HasUniqueIncomingValue) { + if (!UniqueValue) + UniqueValue = IV; + else if (UniqueValue != IV) + HasUniqueIncomingValue = false; + } + } + } + + // Delete all of the incoming values from the old PN except the preheader's + assert(PreheaderIdx != ~0U && "PHI has no preheader entry??"); + if (PreheaderIdx != 0) { + PN->setIncomingValue(0, PN->getIncomingValue(PreheaderIdx)); + PN->setIncomingBlock(0, PN->getIncomingBlock(PreheaderIdx)); + } + // Nuke all entries except the zero'th. + for (unsigned i = 0, e = PN->getNumIncomingValues()-1; i != e; ++i) + PN->removeIncomingValue(e-i, false); + + // Finally, add the newly constructed PHI node as the entry for the BEBlock. + PN->addIncoming(NewPN, BEBlock); + + // As an optimization, if all incoming values in the new PhiNode (which is a + // subset of the incoming values of the old PHI node) have the same value, + // eliminate the PHI Node. + if (HasUniqueIncomingValue) { + NewPN->replaceAllUsesWith(UniqueValue); + BEBlock->getInstList().erase(NewPN); + } + } + + // Now that all of the PHI nodes have been inserted and adjusted, modify the + // backedge blocks to jump to the BEBlock instead of the header. + // If one of the backedges has llvm.loop metadata attached, we remove + // it from the backedge and add it to BEBlock. + unsigned LoopMDKind = BEBlock->getContext().getMDKindID("llvm.loop"); + MDNode *LoopMD = nullptr; + for (unsigned i = 0, e = BackedgeBlocks.size(); i != e; ++i) { + TerminatorInst *TI = BackedgeBlocks[i]->getTerminator(); + if (!LoopMD) + LoopMD = TI->getMetadata(LoopMDKind); + TI->setMetadata(LoopMDKind, nullptr); + for (unsigned Op = 0, e = TI->getNumSuccessors(); Op != e; ++Op) + if (TI->getSuccessor(Op) == Header) + TI->setSuccessor(Op, BEBlock); + } + BEBlock->getTerminator()->setMetadata(LoopMDKind, LoopMD); + + //===--- Update all analyses which we must preserve now -----------------===// + + // Update Loop Information - we know that this block is now in the current + // loop and all parent loops. + L->addBasicBlockToLoop(BEBlock, *LI); + + // Update dominator information + DT->splitBlock(BEBlock); + + return BEBlock; +} + +/// \brief Simplify one loop and queue further loops for simplification. +static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist, + DominatorTree *DT, LoopInfo *LI, + ScalarEvolution *SE, AssumptionCache *AC, + bool PreserveLCSSA) { + bool Changed = false; +ReprocessLoop: + + // Check to see that no blocks (other than the header) in this loop have + // predecessors that are not in the loop. This is not valid for natural + // loops, but can occur if the blocks are unreachable. Since they are + // unreachable we can just shamelessly delete those CFG edges! + for (Loop::block_iterator BB = L->block_begin(), E = L->block_end(); + BB != E; ++BB) { + if (*BB == L->getHeader()) continue; + + SmallPtrSet<BasicBlock*, 4> BadPreds; + for (pred_iterator PI = pred_begin(*BB), + PE = pred_end(*BB); PI != PE; ++PI) { + BasicBlock *P = *PI; + if (!L->contains(P)) + BadPreds.insert(P); + } + + // Delete each unique out-of-loop (and thus dead) predecessor. + for (BasicBlock *P : BadPreds) { + + DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor " + << P->getName() << "\n"); + + // Zap the dead pred's terminator and replace it with unreachable. + TerminatorInst *TI = P->getTerminator(); + changeToUnreachable(TI, /*UseLLVMTrap=*/false, PreserveLCSSA); + Changed = true; + } + } + + // If there are exiting blocks with branches on undef, resolve the undef in + // the direction which will exit the loop. This will help simplify loop + // trip count computations. + SmallVector<BasicBlock*, 8> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + for (BasicBlock *ExitingBlock : ExitingBlocks) + if (BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator())) + if (BI->isConditional()) { + if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) { + + DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in " + << ExitingBlock->getName() << "\n"); + + BI->setCondition(ConstantInt::get(Cond->getType(), + !L->contains(BI->getSuccessor(0)))); + + // This may make the loop analyzable, force SCEV recomputation. + if (SE) + SE->forgetLoop(L); + + Changed = true; + } + } + + // Does the loop already have a preheader? If so, don't insert one. + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) { + Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA); + if (Preheader) { + ++NumInserted; + Changed = true; + } + } + + // Next, check to make sure that all exit nodes of the loop only have + // predecessors that are inside of the loop. This check guarantees that the + // loop preheader/header will dominate the exit blocks. If the exit block has + // predecessors from outside of the loop, split the edge now. + SmallVector<BasicBlock*, 8> ExitBlocks; + L->getExitBlocks(ExitBlocks); + + SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(), + ExitBlocks.end()); + for (BasicBlock *ExitBlock : ExitBlockSet) { + if (any_of(predecessors(ExitBlock), + [L](BasicBlock *BB) { return !L->contains(BB); })) { + rewriteLoopExitBlock(L, ExitBlock, DT, LI, PreserveLCSSA); + ++NumInserted; + Changed = true; + } + } + + // If the header has more than two predecessors at this point (from the + // preheader and from multiple backedges), we must adjust the loop. + BasicBlock *LoopLatch = L->getLoopLatch(); + if (!LoopLatch) { + // If this is really a nested loop, rip it out into a child loop. Don't do + // this for loops with a giant number of backedges, just factor them into a + // common backedge instead. + if (L->getNumBackEdges() < 8) { + if (Loop *OuterL = + separateNestedLoop(L, Preheader, DT, LI, SE, PreserveLCSSA, AC)) { + ++NumNested; + // Enqueue the outer loop as it should be processed next in our + // depth-first nest walk. + Worklist.push_back(OuterL); + + // This is a big restructuring change, reprocess the whole loop. + Changed = true; + // GCC doesn't tail recursion eliminate this. + // FIXME: It isn't clear we can't rely on LLVM to TRE this. + goto ReprocessLoop; + } + } + + // If we either couldn't, or didn't want to, identify nesting of the loops, + // insert a new block that all backedges target, then make it jump to the + // loop header. + LoopLatch = insertUniqueBackedgeBlock(L, Preheader, DT, LI); + if (LoopLatch) { + ++NumInserted; + Changed = true; + } + } + + const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); + + // Scan over the PHI nodes in the loop header. Since they now have only two + // incoming values (the loop is canonicalized), we may have simplified the PHI + // down to 'X = phi [X, Y]', which should be replaced with 'Y'. + PHINode *PN; + for (BasicBlock::iterator I = L->getHeader()->begin(); + (PN = dyn_cast<PHINode>(I++)); ) + if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) { + if (SE) SE->forgetValue(PN); + if (!PreserveLCSSA || LI->replacementPreservesLCSSAForm(PN, V)) { + PN->replaceAllUsesWith(V); + PN->eraseFromParent(); + } + } + + // If this loop has multiple exits and the exits all go to the same + // block, attempt to merge the exits. This helps several passes, such + // as LoopRotation, which do not support loops with multiple exits. + // SimplifyCFG also does this (and this code uses the same utility + // function), however this code is loop-aware, where SimplifyCFG is + // not. That gives it the advantage of being able to hoist + // loop-invariant instructions out of the way to open up more + // opportunities, and the disadvantage of having the responsibility + // to preserve dominator information. + bool UniqueExit = true; + if (!ExitBlocks.empty()) + for (unsigned i = 1, e = ExitBlocks.size(); i != e; ++i) + if (ExitBlocks[i] != ExitBlocks[0]) { + UniqueExit = false; + break; + } + if (UniqueExit) { + for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { + BasicBlock *ExitingBlock = ExitingBlocks[i]; + if (!ExitingBlock->getSinglePredecessor()) continue; + BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator()); + if (!BI || !BI->isConditional()) continue; + CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition()); + if (!CI || CI->getParent() != ExitingBlock) continue; + + // Attempt to hoist out all instructions except for the + // comparison and the branch. + bool AllInvariant = true; + bool AnyInvariant = false; + for (BasicBlock::iterator I = ExitingBlock->begin(); &*I != BI; ) { + Instruction *Inst = &*I++; + // Skip debug info intrinsics. + if (isa<DbgInfoIntrinsic>(Inst)) + continue; + if (Inst == CI) + continue; + if (!L->makeLoopInvariant(Inst, AnyInvariant, + Preheader ? Preheader->getTerminator() + : nullptr)) { + AllInvariant = false; + break; + } + } + if (AnyInvariant) { + Changed = true; + // The loop disposition of all SCEV expressions that depend on any + // hoisted values have also changed. + if (SE) + SE->forgetLoopDispositions(L); + } + if (!AllInvariant) continue; + + // The block has now been cleared of all instructions except for + // a comparison and a conditional branch. SimplifyCFG may be able + // to fold it now. + if (!FoldBranchToCommonDest(BI)) + continue; + + // Success. The block is now dead, so remove it from the loop, + // update the dominator tree and delete it. + DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block " + << ExitingBlock->getName() << "\n"); + + // Notify ScalarEvolution before deleting this block. Currently assume the + // parent loop doesn't change (spliting edges doesn't count). If blocks, + // CFG edges, or other values in the parent loop change, then we need call + // to forgetLoop() for the parent instead. + if (SE) + SE->forgetLoop(L); + + assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock)); + Changed = true; + LI->removeBlock(ExitingBlock); + + DomTreeNode *Node = DT->getNode(ExitingBlock); + const std::vector<DomTreeNodeBase<BasicBlock> *> &Children = + Node->getChildren(); + while (!Children.empty()) { + DomTreeNode *Child = Children.front(); + DT->changeImmediateDominator(Child, Node->getIDom()); + } + DT->eraseNode(ExitingBlock); + + BI->getSuccessor(0)->removePredecessor( + ExitingBlock, /* DontDeleteUselessPHIs */ PreserveLCSSA); + BI->getSuccessor(1)->removePredecessor( + ExitingBlock, /* DontDeleteUselessPHIs */ PreserveLCSSA); + ExitingBlock->eraseFromParent(); + } + } + + return Changed; +} + +bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, + ScalarEvolution *SE, AssumptionCache *AC, + bool PreserveLCSSA) { + bool Changed = false; + + // Worklist maintains our depth-first queue of loops in this nest to process. + SmallVector<Loop *, 4> Worklist; + Worklist.push_back(L); + + // Walk the worklist from front to back, pushing newly found sub loops onto + // the back. This will let us process loops from back to front in depth-first + // order. We can use this simple process because loops form a tree. + for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) { + Loop *L2 = Worklist[Idx]; + Worklist.append(L2->begin(), L2->end()); + } + + while (!Worklist.empty()) + Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, DT, LI, SE, + AC, PreserveLCSSA); + + return Changed; +} + +namespace { + struct LoopSimplify : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + LoopSimplify() : FunctionPass(ID) { + initializeLoopSimplifyPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<AssumptionCacheTracker>(); + + // We need loop information to identify the loops... + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addPreserved<DominatorTreeWrapperPass>(); + + AU.addRequired<LoopInfoWrapperPass>(); + AU.addPreserved<LoopInfoWrapperPass>(); + + AU.addPreserved<BasicAAWrapperPass>(); + AU.addPreserved<AAResultsWrapperPass>(); + AU.addPreserved<GlobalsAAWrapperPass>(); + AU.addPreserved<ScalarEvolutionWrapperPass>(); + AU.addPreserved<SCEVAAWrapperPass>(); + AU.addPreservedID(LCSSAID); + AU.addPreserved<DependenceAnalysisWrapperPass>(); + AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added. + } + + /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees. + void verifyAnalysis() const override; + }; +} + +char LoopSimplify::ID = 0; +INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify", + "Canonicalize natural loops", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_END(LoopSimplify, "loop-simplify", + "Canonicalize natural loops", false, false) + +// Publicly exposed interface to pass... +char &llvm::LoopSimplifyID = LoopSimplify::ID; +Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); } + +/// runOnFunction - Run down all loops in the CFG (recursively, but we could do +/// it in any convenient order) inserting preheaders... +/// +bool LoopSimplify::runOnFunction(Function &F) { + bool Changed = false; + LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>(); + ScalarEvolution *SE = SEWP ? &SEWP->getSE() : nullptr; + AssumptionCache *AC = + &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); + + bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); +#ifndef NDEBUG + if (PreserveLCSSA) { + assert(DT && "DT not available."); + assert(LI && "LI not available."); + bool InLCSSA = all_of( + *LI, [&](Loop *L) { return L->isRecursivelyLCSSAForm(*DT, *LI); }); + assert(InLCSSA && "Requested to preserve LCSSA, but it's already broken."); + } +#endif + + // Simplify each loop nest in the function. + for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) + Changed |= simplifyLoop(*I, DT, LI, SE, AC, PreserveLCSSA); + +#ifndef NDEBUG + if (PreserveLCSSA) { + bool InLCSSA = all_of( + *LI, [&](Loop *L) { return L->isRecursivelyLCSSAForm(*DT, *LI); }); + assert(InLCSSA && "LCSSA is broken after loop-simplify."); + } +#endif + return Changed; +} + +PreservedAnalyses LoopSimplifyPass::run(Function &F, + FunctionAnalysisManager &AM) { + bool Changed = false; + LoopInfo *LI = &AM.getResult<LoopAnalysis>(F); + DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F); + ScalarEvolution *SE = AM.getCachedResult<ScalarEvolutionAnalysis>(F); + AssumptionCache *AC = &AM.getResult<AssumptionAnalysis>(F); + + // FIXME: This pass should verify that the loops on which it's operating + // are in canonical SSA form, and that the pass itself preserves this form. + for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) + Changed |= simplifyLoop(*I, DT, LI, SE, AC, true /* PreserveLCSSA */); + + // FIXME: We need to invalidate this to avoid PR28400. Is there a better + // solution? + AM.invalidate<ScalarEvolutionAnalysis>(F); + + if (!Changed) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserve<DominatorTreeAnalysis>(); + PA.preserve<LoopAnalysis>(); + PA.preserve<BasicAA>(); + PA.preserve<GlobalsAA>(); + PA.preserve<SCEVAA>(); + PA.preserve<ScalarEvolutionAnalysis>(); + PA.preserve<DependenceAnalysis>(); + return PA; +} + +// FIXME: Restore this code when we re-enable verification in verifyAnalysis +// below. +#if 0 +static void verifyLoop(Loop *L) { + // Verify subloops. + for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) + verifyLoop(*I); + + // It used to be possible to just assert L->isLoopSimplifyForm(), however + // with the introduction of indirectbr, there are now cases where it's + // not possible to transform a loop as necessary. We can at least check + // that there is an indirectbr near any time there's trouble. + + // Indirectbr can interfere with preheader and unique backedge insertion. + if (!L->getLoopPreheader() || !L->getLoopLatch()) { + bool HasIndBrPred = false; + for (pred_iterator PI = pred_begin(L->getHeader()), + PE = pred_end(L->getHeader()); PI != PE; ++PI) + if (isa<IndirectBrInst>((*PI)->getTerminator())) { + HasIndBrPred = true; + break; + } + assert(HasIndBrPred && + "LoopSimplify has no excuse for missing loop header info!"); + (void)HasIndBrPred; + } + + // Indirectbr can interfere with exit block canonicalization. + if (!L->hasDedicatedExits()) { + bool HasIndBrExiting = false; + SmallVector<BasicBlock*, 8> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { + if (isa<IndirectBrInst>((ExitingBlocks[i])->getTerminator())) { + HasIndBrExiting = true; + break; + } + } + + assert(HasIndBrExiting && + "LoopSimplify has no excuse for missing exit block info!"); + (void)HasIndBrExiting; + } +} +#endif + +void LoopSimplify::verifyAnalysis() const { + // FIXME: This routine is being called mid-way through the loop pass manager + // as loop passes destroy this analysis. That's actually fine, but we have no + // way of expressing that here. Once all of the passes that destroy this are + // hoisted out of the loop pass manager we can add back verification here. +#if 0 + for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) + verifyLoop(*I); +#endif +} diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp new file mode 100644 index 000000000000..e551e4b47ac1 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -0,0 +1,759 @@ +//===-- UnrollLoop.cpp - Loop unrolling utilities -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements some loop unrolling utilities. It does not define any +// actual pass or policy, but provides a single function to perform loop +// unrolling. +// +// The process of unrolling can produce extraneous basic blocks linked with +// unconditional branches. This will be corrected in the future. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/UnrollLoop.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopIterator.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/LoopSimplify.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/SimplifyIndVar.h" +using namespace llvm; + +#define DEBUG_TYPE "loop-unroll" + +// TODO: Should these be here or in LoopUnroll? +STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled"); +STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)"); + +static cl::opt<bool> +UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden, + cl::desc("Allow runtime unrolled loops to be unrolled " + "with epilog instead of prolog.")); + +/// Convert the instruction operands from referencing the current values into +/// those specified by VMap. +static inline void remapInstruction(Instruction *I, + ValueToValueMapTy &VMap) { + for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) { + Value *Op = I->getOperand(op); + ValueToValueMapTy::iterator It = VMap.find(Op); + if (It != VMap.end()) + I->setOperand(op, It->second); + } + + if (PHINode *PN = dyn_cast<PHINode>(I)) { + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + ValueToValueMapTy::iterator It = VMap.find(PN->getIncomingBlock(i)); + if (It != VMap.end()) + PN->setIncomingBlock(i, cast<BasicBlock>(It->second)); + } + } +} + +/// Folds a basic block into its predecessor if it only has one predecessor, and +/// that predecessor only has one successor. +/// The LoopInfo Analysis that is passed will be kept consistent. If folding is +/// successful references to the containing loop must be removed from +/// ScalarEvolution by calling ScalarEvolution::forgetLoop because SE may have +/// references to the eliminated BB. The argument ForgottenLoops contains a set +/// of loops that have already been forgotten to prevent redundant, expensive +/// calls to ScalarEvolution::forgetLoop. Returns the new combined block. +static BasicBlock * +foldBlockIntoPredecessor(BasicBlock *BB, LoopInfo *LI, ScalarEvolution *SE, + SmallPtrSetImpl<Loop *> &ForgottenLoops, + DominatorTree *DT) { + // Merge basic blocks into their predecessor if there is only one distinct + // pred, and if there is only one distinct successor of the predecessor, and + // if there are no PHI nodes. + BasicBlock *OnlyPred = BB->getSinglePredecessor(); + if (!OnlyPred) return nullptr; + + if (OnlyPred->getTerminator()->getNumSuccessors() != 1) + return nullptr; + + DEBUG(dbgs() << "Merging: " << *BB << "into: " << *OnlyPred); + + // Resolve any PHI nodes at the start of the block. They are all + // guaranteed to have exactly one entry if they exist, unless there are + // multiple duplicate (but guaranteed to be equal) entries for the + // incoming edges. This occurs when there are multiple edges from + // OnlyPred to OnlySucc. + FoldSingleEntryPHINodes(BB); + + // Delete the unconditional branch from the predecessor... + OnlyPred->getInstList().pop_back(); + + // Make all PHI nodes that referred to BB now refer to Pred as their + // source... + BB->replaceAllUsesWith(OnlyPred); + + // Move all definitions in the successor to the predecessor... + OnlyPred->getInstList().splice(OnlyPred->end(), BB->getInstList()); + + // OldName will be valid until erased. + StringRef OldName = BB->getName(); + + // Erase the old block and update dominator info. + if (DT) + if (DomTreeNode *DTN = DT->getNode(BB)) { + DomTreeNode *PredDTN = DT->getNode(OnlyPred); + SmallVector<DomTreeNode *, 8> Children(DTN->begin(), DTN->end()); + for (auto *DI : Children) + DT->changeImmediateDominator(DI, PredDTN); + + DT->eraseNode(BB); + } + + // ScalarEvolution holds references to loop exit blocks. + if (SE) { + if (Loop *L = LI->getLoopFor(BB)) { + if (ForgottenLoops.insert(L).second) + SE->forgetLoop(L); + } + } + LI->removeBlock(BB); + + // Inherit predecessor's name if it exists... + if (!OldName.empty() && !OnlyPred->hasName()) + OnlyPred->setName(OldName); + + BB->eraseFromParent(); + + return OnlyPred; +} + +/// Check if unrolling created a situation where we need to insert phi nodes to +/// preserve LCSSA form. +/// \param Blocks is a vector of basic blocks representing unrolled loop. +/// \param L is the outer loop. +/// It's possible that some of the blocks are in L, and some are not. In this +/// case, if there is a use is outside L, and definition is inside L, we need to +/// insert a phi-node, otherwise LCSSA will be broken. +/// The function is just a helper function for llvm::UnrollLoop that returns +/// true if this situation occurs, indicating that LCSSA needs to be fixed. +static bool needToInsertPhisForLCSSA(Loop *L, std::vector<BasicBlock *> Blocks, + LoopInfo *LI) { + for (BasicBlock *BB : Blocks) { + if (LI->getLoopFor(BB) == L) + continue; + for (Instruction &I : *BB) { + for (Use &U : I.operands()) { + if (auto Def = dyn_cast<Instruction>(U)) { + Loop *DefLoop = LI->getLoopFor(Def->getParent()); + if (!DefLoop) + continue; + if (DefLoop->contains(L)) + return true; + } + } + } + } + return false; +} + +/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true +/// if unrolling was successful, or false if the loop was unmodified. Unrolling +/// can only fail when the loop's latch block is not terminated by a conditional +/// branch instruction. However, if the trip count (and multiple) are not known, +/// loop unrolling will mostly produce more code that is no faster. +/// +/// TripCount is the upper bound of the iteration on which control exits +/// LatchBlock. Control may exit the loop prior to TripCount iterations either +/// via an early branch in other loop block or via LatchBlock terminator. This +/// is relaxed from the general definition of trip count which is the number of +/// times the loop header executes. Note that UnrollLoop assumes that the loop +/// counter test is in LatchBlock in order to remove unnecesssary instances of +/// the test. If control can exit the loop from the LatchBlock's terminator +/// prior to TripCount iterations, flag PreserveCondBr needs to be set. +/// +/// PreserveCondBr indicates whether the conditional branch of the LatchBlock +/// needs to be preserved. It is needed when we use trip count upper bound to +/// fully unroll the loop. If PreserveOnlyFirst is also set then only the first +/// conditional branch needs to be preserved. +/// +/// Similarly, TripMultiple divides the number of times that the LatchBlock may +/// execute without exiting the loop. +/// +/// If AllowRuntime is true then UnrollLoop will consider unrolling loops that +/// have a runtime (i.e. not compile time constant) trip count. Unrolling these +/// loops require a unroll "prologue" that runs "RuntimeTripCount % Count" +/// iterations before branching into the unrolled loop. UnrollLoop will not +/// runtime-unroll the loop if computing RuntimeTripCount will be expensive and +/// AllowExpensiveTripCount is false. +/// +/// If we want to perform PGO-based loop peeling, PeelCount is set to the +/// number of iterations we want to peel off. +/// +/// The LoopInfo Analysis that is passed will be kept consistent. +/// +/// This utility preserves LoopInfo. It will also preserve ScalarEvolution and +/// DominatorTree if they are non-null. +bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, + bool AllowRuntime, bool AllowExpensiveTripCount, + bool PreserveCondBr, bool PreserveOnlyFirst, + unsigned TripMultiple, unsigned PeelCount, LoopInfo *LI, + ScalarEvolution *SE, DominatorTree *DT, + AssumptionCache *AC, OptimizationRemarkEmitter *ORE, + bool PreserveLCSSA) { + + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) { + DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n"); + return false; + } + + BasicBlock *LatchBlock = L->getLoopLatch(); + if (!LatchBlock) { + DEBUG(dbgs() << " Can't unroll; loop exit-block-insertion failed.\n"); + return false; + } + + // Loops with indirectbr cannot be cloned. + if (!L->isSafeToClone()) { + DEBUG(dbgs() << " Can't unroll; Loop body cannot be cloned.\n"); + return false; + } + + BasicBlock *Header = L->getHeader(); + BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator()); + + if (!BI || BI->isUnconditional()) { + // The loop-rotate pass can be helpful to avoid this in many cases. + DEBUG(dbgs() << + " Can't unroll; loop not terminated by a conditional branch.\n"); + return false; + } + + if (Header->hasAddressTaken()) { + // The loop-rotate pass can be helpful to avoid this in many cases. + DEBUG(dbgs() << + " Won't unroll loop: address of header block is taken.\n"); + return false; + } + + if (TripCount != 0) + DEBUG(dbgs() << " Trip Count = " << TripCount << "\n"); + if (TripMultiple != 1) + DEBUG(dbgs() << " Trip Multiple = " << TripMultiple << "\n"); + + // Effectively "DCE" unrolled iterations that are beyond the tripcount + // and will never be executed. + if (TripCount != 0 && Count > TripCount) + Count = TripCount; + + // Don't enter the unroll code if there is nothing to do. + if (TripCount == 0 && Count < 2 && PeelCount == 0) + return false; + + assert(Count > 0); + assert(TripMultiple > 0); + assert(TripCount == 0 || TripCount % TripMultiple == 0); + + // Are we eliminating the loop control altogether? + bool CompletelyUnroll = Count == TripCount; + SmallVector<BasicBlock *, 4> ExitBlocks; + L->getExitBlocks(ExitBlocks); + std::vector<BasicBlock*> OriginalLoopBlocks = L->getBlocks(); + + // Go through all exits of L and see if there are any phi-nodes there. We just + // conservatively assume that they're inserted to preserve LCSSA form, which + // means that complete unrolling might break this form. We need to either fix + // it in-place after the transformation, or entirely rebuild LCSSA. TODO: For + // now we just recompute LCSSA for the outer loop, but it should be possible + // to fix it in-place. + bool NeedToFixLCSSA = PreserveLCSSA && CompletelyUnroll && + any_of(ExitBlocks, [](const BasicBlock *BB) { + return isa<PHINode>(BB->begin()); + }); + + // We assume a run-time trip count if the compiler cannot + // figure out the loop trip count and the unroll-runtime + // flag is specified. + bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime); + + assert((!RuntimeTripCount || !PeelCount) && + "Did not expect runtime trip-count unrolling " + "and peeling for the same loop"); + + if (PeelCount) + peelLoop(L, PeelCount, LI, SE, DT, PreserveLCSSA); + + // Loops containing convergent instructions must have a count that divides + // their TripMultiple. + DEBUG( + { + bool HasConvergent = false; + for (auto &BB : L->blocks()) + for (auto &I : *BB) + if (auto CS = CallSite(&I)) + HasConvergent |= CS.isConvergent(); + assert((!HasConvergent || TripMultiple % Count == 0) && + "Unroll count must divide trip multiple if loop contains a " + "convergent operation."); + }); + + if (RuntimeTripCount && TripMultiple % Count != 0 && + !UnrollRuntimeLoopRemainder(L, Count, AllowExpensiveTripCount, + UnrollRuntimeEpilog, LI, SE, DT, + PreserveLCSSA)) { + if (Force) + RuntimeTripCount = false; + else + return false; + } + + // Notify ScalarEvolution that the loop will be substantially changed, + // if not outright eliminated. + if (SE) + SE->forgetLoop(L); + + // If we know the trip count, we know the multiple... + unsigned BreakoutTrip = 0; + if (TripCount != 0) { + BreakoutTrip = TripCount % Count; + TripMultiple = 0; + } else { + // Figure out what multiple to use. + BreakoutTrip = TripMultiple = + (unsigned)GreatestCommonDivisor64(Count, TripMultiple); + } + + using namespace ore; + // Report the unrolling decision. + if (CompletelyUnroll) { + DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName() + << " with trip count " << TripCount << "!\n"); + ORE->emit(OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(), + L->getHeader()) + << "completely unrolled loop with " + << NV("UnrollCount", TripCount) << " iterations"); + } else if (PeelCount) { + DEBUG(dbgs() << "PEELING loop %" << Header->getName() + << " with iteration count " << PeelCount << "!\n"); + ORE->emit(OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(), + L->getHeader()) + << " peeled loop by " << NV("PeelCount", PeelCount) + << " iterations"); + } else { + OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(), + L->getHeader()); + Diag << "unrolled loop by a factor of " << NV("UnrollCount", Count); + + DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() + << " by " << Count); + if (TripMultiple == 0 || BreakoutTrip != TripMultiple) { + DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip); + ORE->emit(Diag << " with a breakout at trip " + << NV("BreakoutTrip", BreakoutTrip)); + } else if (TripMultiple != 1) { + DEBUG(dbgs() << " with " << TripMultiple << " trips per branch"); + ORE->emit(Diag << " with " << NV("TripMultiple", TripMultiple) + << " trips per branch"); + } else if (RuntimeTripCount) { + DEBUG(dbgs() << " with run-time trip count"); + ORE->emit(Diag << " with run-time trip count"); + } + DEBUG(dbgs() << "!\n"); + } + + bool ContinueOnTrue = L->contains(BI->getSuccessor(0)); + BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue); + + // For the first iteration of the loop, we should use the precloned values for + // PHI nodes. Insert associations now. + ValueToValueMapTy LastValueMap; + std::vector<PHINode*> OrigPHINode; + for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { + OrigPHINode.push_back(cast<PHINode>(I)); + } + + std::vector<BasicBlock*> Headers; + std::vector<BasicBlock*> Latches; + Headers.push_back(Header); + Latches.push_back(LatchBlock); + + // The current on-the-fly SSA update requires blocks to be processed in + // reverse postorder so that LastValueMap contains the correct value at each + // exit. + LoopBlocksDFS DFS(L); + DFS.perform(LI); + + // Stash the DFS iterators before adding blocks to the loop. + LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO(); + LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO(); + + std::vector<BasicBlock*> UnrolledLoopBlocks = L->getBlocks(); + + // Loop Unrolling might create new loops. While we do preserve LoopInfo, we + // might break loop-simplified form for these loops (as they, e.g., would + // share the same exit blocks). We'll keep track of loops for which we can + // break this so that later we can re-simplify them. + SmallSetVector<Loop *, 4> LoopsToSimplify; + for (Loop *SubLoop : *L) + LoopsToSimplify.insert(SubLoop); + + for (unsigned It = 1; It != Count; ++It) { + std::vector<BasicBlock*> NewBlocks; + SmallDenseMap<const Loop *, Loop *, 4> NewLoops; + NewLoops[L] = L; + + for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { + ValueToValueMapTy VMap; + BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It)); + Header->getParent()->getBasicBlockList().push_back(New); + + // Tell LI about New. + if (*BB == Header) { + assert(LI->getLoopFor(*BB) == L && "Header should not be in a sub-loop"); + L->addBasicBlockToLoop(New, *LI); + } else { + // Figure out which loop New is in. + const Loop *OldLoop = LI->getLoopFor(*BB); + assert(OldLoop && "Should (at least) be in the loop being unrolled!"); + + Loop *&NewLoop = NewLoops[OldLoop]; + if (!NewLoop) { + // Found a new sub-loop. + assert(*BB == OldLoop->getHeader() && + "Header should be first in RPO"); + + Loop *NewLoopParent = NewLoops.lookup(OldLoop->getParentLoop()); + assert(NewLoopParent && + "Expected parent loop before sub-loop in RPO"); + NewLoop = new Loop; + NewLoopParent->addChildLoop(NewLoop); + LoopsToSimplify.insert(NewLoop); + + // Forget the old loop, since its inputs may have changed. + if (SE) + SE->forgetLoop(OldLoop); + } + NewLoop->addBasicBlockToLoop(New, *LI); + } + + if (*BB == Header) + // Loop over all of the PHI nodes in the block, changing them to use + // the incoming values from the previous block. + for (PHINode *OrigPHI : OrigPHINode) { + PHINode *NewPHI = cast<PHINode>(VMap[OrigPHI]); + Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock); + if (Instruction *InValI = dyn_cast<Instruction>(InVal)) + if (It > 1 && L->contains(InValI)) + InVal = LastValueMap[InValI]; + VMap[OrigPHI] = InVal; + New->getInstList().erase(NewPHI); + } + + // Update our running map of newest clones + LastValueMap[*BB] = New; + for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end(); + VI != VE; ++VI) + LastValueMap[VI->first] = VI->second; + + // Add phi entries for newly created values to all exit blocks. + for (BasicBlock *Succ : successors(*BB)) { + if (L->contains(Succ)) + continue; + for (BasicBlock::iterator BBI = Succ->begin(); + PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) { + Value *Incoming = phi->getIncomingValueForBlock(*BB); + ValueToValueMapTy::iterator It = LastValueMap.find(Incoming); + if (It != LastValueMap.end()) + Incoming = It->second; + phi->addIncoming(Incoming, New); + } + } + // Keep track of new headers and latches as we create them, so that + // we can insert the proper branches later. + if (*BB == Header) + Headers.push_back(New); + if (*BB == LatchBlock) + Latches.push_back(New); + + NewBlocks.push_back(New); + UnrolledLoopBlocks.push_back(New); + + // Update DomTree: since we just copy the loop body, and each copy has a + // dedicated entry block (copy of the header block), this header's copy + // dominates all copied blocks. That means, dominance relations in the + // copied body are the same as in the original body. + if (DT) { + if (*BB == Header) + DT->addNewBlock(New, Latches[It - 1]); + else { + auto BBDomNode = DT->getNode(*BB); + auto BBIDom = BBDomNode->getIDom(); + BasicBlock *OriginalBBIDom = BBIDom->getBlock(); + DT->addNewBlock( + New, cast<BasicBlock>(LastValueMap[cast<Value>(OriginalBBIDom)])); + } + } + } + + // Remap all instructions in the most recent iteration + for (BasicBlock *NewBlock : NewBlocks) { + for (Instruction &I : *NewBlock) { + ::remapInstruction(&I, LastValueMap); + if (auto *II = dyn_cast<IntrinsicInst>(&I)) + if (II->getIntrinsicID() == Intrinsic::assume) + AC->registerAssumption(II); + } + } + } + + // Loop over the PHI nodes in the original block, setting incoming values. + for (PHINode *PN : OrigPHINode) { + if (CompletelyUnroll) { + PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader)); + Header->getInstList().erase(PN); + } + else if (Count > 1) { + Value *InVal = PN->removeIncomingValue(LatchBlock, false); + // If this value was defined in the loop, take the value defined by the + // last iteration of the loop. + if (Instruction *InValI = dyn_cast<Instruction>(InVal)) { + if (L->contains(InValI)) + InVal = LastValueMap[InVal]; + } + assert(Latches.back() == LastValueMap[LatchBlock] && "bad last latch"); + PN->addIncoming(InVal, Latches.back()); + } + } + + // Now that all the basic blocks for the unrolled iterations are in place, + // set up the branches to connect them. + for (unsigned i = 0, e = Latches.size(); i != e; ++i) { + // The original branch was replicated in each unrolled iteration. + BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator()); + + // The branch destination. + unsigned j = (i + 1) % e; + BasicBlock *Dest = Headers[j]; + bool NeedConditional = true; + + if (RuntimeTripCount && j != 0) { + NeedConditional = false; + } + + // For a complete unroll, make the last iteration end with a branch + // to the exit block. + if (CompletelyUnroll) { + if (j == 0) + Dest = LoopExit; + // If using trip count upper bound to completely unroll, we need to keep + // the conditional branch except the last one because the loop may exit + // after any iteration. + assert(NeedConditional && + "NeedCondition cannot be modified by both complete " + "unrolling and runtime unrolling"); + NeedConditional = (PreserveCondBr && j && !(PreserveOnlyFirst && i != 0)); + } else if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) { + // If we know the trip count or a multiple of it, we can safely use an + // unconditional branch for some iterations. + NeedConditional = false; + } + + if (NeedConditional) { + // Update the conditional branch's successor for the following + // iteration. + Term->setSuccessor(!ContinueOnTrue, Dest); + } else { + // Remove phi operands at this loop exit + if (Dest != LoopExit) { + BasicBlock *BB = Latches[i]; + for (BasicBlock *Succ: successors(BB)) { + if (Succ == Headers[i]) + continue; + for (BasicBlock::iterator BBI = Succ->begin(); + PHINode *Phi = dyn_cast<PHINode>(BBI); ++BBI) { + Phi->removeIncomingValue(BB, false); + } + } + } + // Replace the conditional branch with an unconditional one. + BranchInst::Create(Dest, Term); + Term->eraseFromParent(); + } + } + // Update dominators of blocks we might reach through exits. + // Immediate dominator of such block might change, because we add more + // routes which can lead to the exit: we can now reach it from the copied + // iterations too. Thus, the new idom of the block will be the nearest + // common dominator of the previous idom and common dominator of all copies of + // the previous idom. This is equivalent to the nearest common dominator of + // the previous idom and the first latch, which dominates all copies of the + // previous idom. + if (DT && Count > 1) { + for (auto *BB : OriginalLoopBlocks) { + auto *BBDomNode = DT->getNode(BB); + SmallVector<BasicBlock *, 16> ChildrenToUpdate; + for (auto *ChildDomNode : BBDomNode->getChildren()) { + auto *ChildBB = ChildDomNode->getBlock(); + if (!L->contains(ChildBB)) + ChildrenToUpdate.push_back(ChildBB); + } + BasicBlock *NewIDom = DT->findNearestCommonDominator(BB, Latches[0]); + for (auto *ChildBB : ChildrenToUpdate) + DT->changeImmediateDominator(ChildBB, NewIDom); + } + } + + // Merge adjacent basic blocks, if possible. + SmallPtrSet<Loop *, 4> ForgottenLoops; + for (BasicBlock *Latch : Latches) { + BranchInst *Term = cast<BranchInst>(Latch->getTerminator()); + if (Term->isUnconditional()) { + BasicBlock *Dest = Term->getSuccessor(0); + if (BasicBlock *Fold = + foldBlockIntoPredecessor(Dest, LI, SE, ForgottenLoops, DT)) { + // Dest has been folded into Fold. Update our worklists accordingly. + std::replace(Latches.begin(), Latches.end(), Dest, Fold); + UnrolledLoopBlocks.erase(std::remove(UnrolledLoopBlocks.begin(), + UnrolledLoopBlocks.end(), Dest), + UnrolledLoopBlocks.end()); + } + } + } + + // FIXME: We only preserve DT info for complete unrolling now. Incrementally + // updating domtree after partial loop unrolling should also be easy. + if (DT && !CompletelyUnroll) + DT->recalculate(*L->getHeader()->getParent()); + else if (DT) + DEBUG(DT->verifyDomTree()); + + // Simplify any new induction variables in the partially unrolled loop. + if (SE && !CompletelyUnroll && Count > 1) { + SmallVector<WeakVH, 16> DeadInsts; + simplifyLoopIVs(L, SE, DT, LI, DeadInsts); + + // Aggressively clean up dead instructions that simplifyLoopIVs already + // identified. Any remaining should be cleaned up below. + while (!DeadInsts.empty()) + if (Instruction *Inst = + dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val())) + RecursivelyDeleteTriviallyDeadInstructions(Inst); + } + + // At this point, the code is well formed. We now do a quick sweep over the + // inserted code, doing constant propagation and dead code elimination as we + // go. + const DataLayout &DL = Header->getModule()->getDataLayout(); + const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks(); + for (BasicBlock *BB : NewLoopBlocks) { + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { + Instruction *Inst = &*I++; + + if (Value *V = SimplifyInstruction(Inst, DL)) + if (LI->replacementPreservesLCSSAForm(Inst, V)) + Inst->replaceAllUsesWith(V); + if (isInstructionTriviallyDead(Inst)) + BB->getInstList().erase(Inst); + } + } + + // TODO: after peeling or unrolling, previously loop variant conditions are + // likely to fold to constants, eagerly propagating those here will require + // fewer cleanup passes to be run. Alternatively, a LoopEarlyCSE might be + // appropriate. + + NumCompletelyUnrolled += CompletelyUnroll; + ++NumUnrolled; + + Loop *OuterL = L->getParentLoop(); + // Update LoopInfo if the loop is completely removed. + if (CompletelyUnroll) + LI->markAsRemoved(L); + + // After complete unrolling most of the blocks should be contained in OuterL. + // However, some of them might happen to be out of OuterL (e.g. if they + // precede a loop exit). In this case we might need to insert PHI nodes in + // order to preserve LCSSA form. + // We don't need to check this if we already know that we need to fix LCSSA + // form. + // TODO: For now we just recompute LCSSA for the outer loop in this case, but + // it should be possible to fix it in-place. + if (PreserveLCSSA && OuterL && CompletelyUnroll && !NeedToFixLCSSA) + NeedToFixLCSSA |= ::needToInsertPhisForLCSSA(OuterL, UnrolledLoopBlocks, LI); + + // If we have a pass and a DominatorTree we should re-simplify impacted loops + // to ensure subsequent analyses can rely on this form. We want to simplify + // at least one layer outside of the loop that was unrolled so that any + // changes to the parent loop exposed by the unrolling are considered. + if (DT) { + if (!OuterL && !CompletelyUnroll) + OuterL = L; + if (OuterL) { + // OuterL includes all loops for which we can break loop-simplify, so + // it's sufficient to simplify only it (it'll recursively simplify inner + // loops too). + // TODO: That potentially might be compile-time expensive. We should try + // to fix the loop-simplified form incrementally. + simplifyLoop(OuterL, DT, LI, SE, AC, PreserveLCSSA); + + // LCSSA must be performed on the outermost affected loop. The unrolled + // loop's last loop latch is guaranteed to be in the outermost loop after + // LoopInfo's been updated by markAsRemoved. + Loop *LatchLoop = LI->getLoopFor(Latches.back()); + if (!OuterL->contains(LatchLoop)) + while (OuterL->getParentLoop() != LatchLoop) + OuterL = OuterL->getParentLoop(); + + if (NeedToFixLCSSA) + formLCSSARecursively(*OuterL, *DT, LI, SE); + else + assert(OuterL->isLCSSAForm(*DT) && + "Loops should be in LCSSA form after loop-unroll."); + } else { + // Simplify loops for which we might've broken loop-simplify form. + for (Loop *SubLoop : LoopsToSimplify) + simplifyLoop(SubLoop, DT, LI, SE, AC, PreserveLCSSA); + } + } + + return true; +} + +/// Given an llvm.loop loop id metadata node, returns the loop hint metadata +/// node with the given name (for example, "llvm.loop.unroll.count"). If no +/// such metadata node exists, then nullptr is returned. +MDNode *llvm::GetUnrollMetadata(MDNode *LoopID, StringRef Name) { + // First operand should refer to the loop id itself. + assert(LoopID->getNumOperands() > 0 && "requires at least one operand"); + assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); + + for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) { + MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); + if (!MD) + continue; + + MDString *S = dyn_cast<MDString>(MD->getOperand(0)); + if (!S) + continue; + + if (Name.equals(S->getString())) + return MD; + } + return nullptr; +} diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp new file mode 100644 index 000000000000..842cf31f2e3d --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -0,0 +1,414 @@ +//===-- UnrollLoopPeel.cpp - Loop peeling utilities -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements some loop unrolling utilities for peeling loops +// with dynamically inferred (from PGO) trip counts. See LoopUnroll.cpp for +// unrolling loops with compile-time constant trip counts. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/LoopIterator.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/UnrollLoop.h" +#include <algorithm> + +using namespace llvm; + +#define DEBUG_TYPE "loop-unroll" +STATISTIC(NumPeeled, "Number of loops peeled"); + +static cl::opt<unsigned> UnrollPeelMaxCount( + "unroll-peel-max-count", cl::init(7), cl::Hidden, + cl::desc("Max average trip count which will cause loop peeling.")); + +static cl::opt<unsigned> UnrollForcePeelCount( + "unroll-force-peel-count", cl::init(0), cl::Hidden, + cl::desc("Force a peel count regardless of profiling information.")); + +// Check whether we are capable of peeling this loop. +static bool canPeel(Loop *L) { + // Make sure the loop is in simplified form + if (!L->isLoopSimplifyForm()) + return false; + + // Only peel loops that contain a single exit + if (!L->getExitingBlock() || !L->getUniqueExitBlock()) + return false; + + return true; +} + +// Return the number of iterations we want to peel off. +void llvm::computePeelCount(Loop *L, unsigned LoopSize, + TargetTransformInfo::UnrollingPreferences &UP) { + UP.PeelCount = 0; + if (!canPeel(L)) + return; + + // Only try to peel innermost loops. + if (!L->empty()) + return; + + // If the user provided a peel count, use that. + bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > 0; + if (UserPeelCount) { + DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount + << " iterations.\n"); + UP.PeelCount = UnrollForcePeelCount; + return; + } + + // If we don't know the trip count, but have reason to believe the average + // trip count is low, peeling should be beneficial, since we will usually + // hit the peeled section. + // We only do this in the presence of profile information, since otherwise + // our estimates of the trip count are not reliable enough. + if (UP.AllowPeeling && L->getHeader()->getParent()->getEntryCount()) { + Optional<unsigned> PeelCount = getLoopEstimatedTripCount(L); + if (!PeelCount) + return; + + DEBUG(dbgs() << "Profile-based estimated trip count is " << *PeelCount + << "\n"); + + if (*PeelCount) { + if ((*PeelCount <= UnrollPeelMaxCount) && + (LoopSize * (*PeelCount + 1) <= UP.Threshold)) { + DEBUG(dbgs() << "Peeling first " << *PeelCount << " iterations.\n"); + UP.PeelCount = *PeelCount; + return; + } + DEBUG(dbgs() << "Requested peel count: " << *PeelCount << "\n"); + DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n"); + DEBUG(dbgs() << "Peel cost: " << LoopSize * (*PeelCount + 1) << "\n"); + DEBUG(dbgs() << "Max peel cost: " << UP.Threshold << "\n"); + } + } + + return; +} + +/// \brief Update the branch weights of the latch of a peeled-off loop +/// iteration. +/// This sets the branch weights for the latch of the recently peeled off loop +/// iteration correctly. +/// Our goal is to make sure that: +/// a) The total weight of all the copies of the loop body is preserved. +/// b) The total weight of the loop exit is preserved. +/// c) The body weight is reasonably distributed between the peeled iterations. +/// +/// \param Header The copy of the header block that belongs to next iteration. +/// \param LatchBR The copy of the latch branch that belongs to this iteration. +/// \param IterNumber The serial number of the iteration that was just +/// peeled off. +/// \param AvgIters The average number of iterations we expect the loop to have. +/// \param[in,out] PeeledHeaderWeight The total number of dynamic loop +/// iterations that are unaccounted for. As an input, it represents the number +/// of times we expect to enter the header of the iteration currently being +/// peeled off. The output is the number of times we expect to enter the +/// header of the next iteration. +static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR, + unsigned IterNumber, unsigned AvgIters, + uint64_t &PeeledHeaderWeight) { + + // FIXME: Pick a more realistic distribution. + // Currently the proportion of weight we assign to the fall-through + // side of the branch drops linearly with the iteration number, and we use + // a 0.9 fudge factor to make the drop-off less sharp... + if (PeeledHeaderWeight) { + uint64_t FallThruWeight = + PeeledHeaderWeight * ((float)(AvgIters - IterNumber) / AvgIters * 0.9); + uint64_t ExitWeight = PeeledHeaderWeight - FallThruWeight; + PeeledHeaderWeight -= ExitWeight; + + unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1); + MDBuilder MDB(LatchBR->getContext()); + MDNode *WeightNode = + HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThruWeight) + : MDB.createBranchWeights(FallThruWeight, ExitWeight); + LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode); + } +} + +/// \brief Clones the body of the loop L, putting it between \p InsertTop and \p +/// InsertBot. +/// \param IterNumber The serial number of the iteration currently being +/// peeled off. +/// \param Exit The exit block of the original loop. +/// \param[out] NewBlocks A list of the the blocks in the newly created clone +/// \param[out] VMap The value map between the loop and the new clone. +/// \param LoopBlocks A helper for DFS-traversal of the loop. +/// \param LVMap A value-map that maps instructions from the original loop to +/// instructions in the last peeled-off iteration. +static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop, + BasicBlock *InsertBot, BasicBlock *Exit, + SmallVectorImpl<BasicBlock *> &NewBlocks, + LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, + ValueToValueMapTy &LVMap, LoopInfo *LI) { + + BasicBlock *Header = L->getHeader(); + BasicBlock *Latch = L->getLoopLatch(); + BasicBlock *PreHeader = L->getLoopPreheader(); + + Function *F = Header->getParent(); + LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO(); + LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO(); + Loop *ParentLoop = L->getParentLoop(); + + // For each block in the original loop, create a new copy, + // and update the value map with the newly created values. + for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { + BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".peel", F); + NewBlocks.push_back(NewBB); + + if (ParentLoop) + ParentLoop->addBasicBlockToLoop(NewBB, *LI); + + VMap[*BB] = NewBB; + } + + // Hook-up the control flow for the newly inserted blocks. + // The new header is hooked up directly to the "top", which is either + // the original loop preheader (for the first iteration) or the previous + // iteration's exiting block (for every other iteration) + InsertTop->getTerminator()->setSuccessor(0, cast<BasicBlock>(VMap[Header])); + + // Similarly, for the latch: + // The original exiting edge is still hooked up to the loop exit. + // The backedge now goes to the "bottom", which is either the loop's real + // header (for the last peeled iteration) or the copied header of the next + // iteration (for every other iteration) + BranchInst *LatchBR = + cast<BranchInst>(cast<BasicBlock>(VMap[Latch])->getTerminator()); + unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1); + LatchBR->setSuccessor(HeaderIdx, InsertBot); + LatchBR->setSuccessor(1 - HeaderIdx, Exit); + + // The new copy of the loop body starts with a bunch of PHI nodes + // that pick an incoming value from either the preheader, or the previous + // loop iteration. Since this copy is no longer part of the loop, we + // resolve this statically: + // For the first iteration, we use the value from the preheader directly. + // For any other iteration, we replace the phi with the value generated by + // the immediately preceding clone of the loop body (which represents + // the previous iteration). + for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { + PHINode *NewPHI = cast<PHINode>(VMap[&*I]); + if (IterNumber == 0) { + VMap[&*I] = NewPHI->getIncomingValueForBlock(PreHeader); + } else { + Value *LatchVal = NewPHI->getIncomingValueForBlock(Latch); + Instruction *LatchInst = dyn_cast<Instruction>(LatchVal); + if (LatchInst && L->contains(LatchInst)) + VMap[&*I] = LVMap[LatchInst]; + else + VMap[&*I] = LatchVal; + } + cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI); + } + + // Fix up the outgoing values - we need to add a value for the iteration + // we've just created. Note that this must happen *after* the incoming + // values are adjusted, since the value going out of the latch may also be + // a value coming into the header. + for (BasicBlock::iterator I = Exit->begin(); isa<PHINode>(I); ++I) { + PHINode *PHI = cast<PHINode>(I); + Value *LatchVal = PHI->getIncomingValueForBlock(Latch); + Instruction *LatchInst = dyn_cast<Instruction>(LatchVal); + if (LatchInst && L->contains(LatchInst)) + LatchVal = VMap[LatchVal]; + PHI->addIncoming(LatchVal, cast<BasicBlock>(VMap[Latch])); + } + + // LastValueMap is updated with the values for the current loop + // which are used the next time this function is called. + for (const auto &KV : VMap) + LVMap[KV.first] = KV.second; +} + +/// \brief Peel off the first \p PeelCount iterations of loop \p L. +/// +/// Note that this does not peel them off as a single straight-line block. +/// Rather, each iteration is peeled off separately, and needs to check the +/// exit condition. +/// For loops that dynamically execute \p PeelCount iterations or less +/// this provides a benefit, since the peeled off iterations, which account +/// for the bulk of dynamic execution, can be further simplified by scalar +/// optimizations. +bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, + ScalarEvolution *SE, DominatorTree *DT, + bool PreserveLCSSA) { + if (!canPeel(L)) + return false; + + LoopBlocksDFS LoopBlocks(L); + LoopBlocks.perform(LI); + + BasicBlock *Header = L->getHeader(); + BasicBlock *PreHeader = L->getLoopPreheader(); + BasicBlock *Latch = L->getLoopLatch(); + BasicBlock *Exit = L->getUniqueExitBlock(); + + Function *F = Header->getParent(); + + // Set up all the necessary basic blocks. It is convenient to split the + // preheader into 3 parts - two blocks to anchor the peeled copy of the loop + // body, and a new preheader for the "real" loop. + + // Peeling the first iteration transforms. + // + // PreHeader: + // ... + // Header: + // LoopBody + // If (cond) goto Header + // Exit: + // + // into + // + // InsertTop: + // LoopBody + // If (!cond) goto Exit + // InsertBot: + // NewPreHeader: + // ... + // Header: + // LoopBody + // If (cond) goto Header + // Exit: + // + // Each following iteration will split the current bottom anchor in two, + // and put the new copy of the loop body between these two blocks. That is, + // after peeling another iteration from the example above, we'll split + // InsertBot, and get: + // + // InsertTop: + // LoopBody + // If (!cond) goto Exit + // InsertBot: + // LoopBody + // If (!cond) goto Exit + // InsertBot.next: + // NewPreHeader: + // ... + // Header: + // LoopBody + // If (cond) goto Header + // Exit: + + BasicBlock *InsertTop = SplitEdge(PreHeader, Header, DT, LI); + BasicBlock *InsertBot = + SplitBlock(InsertTop, InsertTop->getTerminator(), DT, LI); + BasicBlock *NewPreHeader = + SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI); + + InsertTop->setName(Header->getName() + ".peel.begin"); + InsertBot->setName(Header->getName() + ".peel.next"); + NewPreHeader->setName(PreHeader->getName() + ".peel.newph"); + + ValueToValueMapTy LVMap; + + // If we have branch weight information, we'll want to update it for the + // newly created branches. + BranchInst *LatchBR = + cast<BranchInst>(cast<BasicBlock>(Latch)->getTerminator()); + unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1); + + uint64_t TrueWeight, FalseWeight; + uint64_t ExitWeight = 0, CurHeaderWeight = 0; + if (LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) { + ExitWeight = HeaderIdx ? TrueWeight : FalseWeight; + // The # of times the loop body executes is the sum of the exit block + // weight and the # of times the backedges are taken. + CurHeaderWeight = TrueWeight + FalseWeight; + } + + // For each peeled-off iteration, make a copy of the loop. + for (unsigned Iter = 0; Iter < PeelCount; ++Iter) { + SmallVector<BasicBlock *, 8> NewBlocks; + ValueToValueMapTy VMap; + + // Subtract the exit weight from the current header weight -- the exit + // weight is exactly the weight of the previous iteration's header. + // FIXME: due to the way the distribution is constructed, we need a + // guard here to make sure we don't end up with non-positive weights. + if (ExitWeight < CurHeaderWeight) + CurHeaderWeight -= ExitWeight; + else + CurHeaderWeight = 1; + + cloneLoopBlocks(L, Iter, InsertTop, InsertBot, Exit, + NewBlocks, LoopBlocks, VMap, LVMap, LI); + updateBranchWeights(InsertBot, cast<BranchInst>(VMap[LatchBR]), Iter, + PeelCount, ExitWeight); + + InsertTop = InsertBot; + InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI); + InsertBot->setName(Header->getName() + ".peel.next"); + + F->getBasicBlockList().splice(InsertTop->getIterator(), + F->getBasicBlockList(), + NewBlocks[0]->getIterator(), F->end()); + + // Remap to use values from the current iteration instead of the + // previous one. + remapInstructionsInBlocks(NewBlocks, VMap); + } + + // Now adjust the phi nodes in the loop header to get their initial values + // from the last peeled-off iteration instead of the preheader. + for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { + PHINode *PHI = cast<PHINode>(I); + Value *NewVal = PHI->getIncomingValueForBlock(Latch); + Instruction *LatchInst = dyn_cast<Instruction>(NewVal); + if (LatchInst && L->contains(LatchInst)) + NewVal = LVMap[LatchInst]; + + PHI->setIncomingValue(PHI->getBasicBlockIndex(NewPreHeader), NewVal); + } + + // Adjust the branch weights on the loop exit. + if (ExitWeight) { + // The backedge count is the difference of current header weight and + // current loop exit weight. If the current header weight is smaller than + // the current loop exit weight, we mark the loop backedge weight as 1. + uint64_t BackEdgeWeight = 0; + if (ExitWeight < CurHeaderWeight) + BackEdgeWeight = CurHeaderWeight - ExitWeight; + else + BackEdgeWeight = 1; + MDBuilder MDB(LatchBR->getContext()); + MDNode *WeightNode = + HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight) + : MDB.createBranchWeights(BackEdgeWeight, ExitWeight); + LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode); + } + + // If the loop is nested, we changed the parent loop, update SE. + if (Loop *ParentLoop = L->getParentLoop()) + SE->forgetLoop(ParentLoop); + + NumPeeled++; + + return true; +} diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp new file mode 100644 index 000000000000..5758a415f12b --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -0,0 +1,691 @@ +//===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements some loop unrolling utilities for loops with run-time +// trip counts. See LoopUnroll.cpp for unrolling loops with compile-time +// trip counts. +// +// The functions in this file are used to generate extra code when the +// run-time trip count modulo the unroll factor is not 0. When this is the +// case, we need to generate code to execute these 'left over' iterations. +// +// The current strategy generates an if-then-else sequence prior to the +// unrolled loop to execute the 'left over' iterations before or after the +// unrolled loop. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/UnrollLoop.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/LoopIterator.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include <algorithm> + +using namespace llvm; + +#define DEBUG_TYPE "loop-unroll" + +STATISTIC(NumRuntimeUnrolled, + "Number of loops unrolled with run-time trip counts"); + +/// Connect the unrolling prolog code to the original loop. +/// The unrolling prolog code contains code to execute the +/// 'extra' iterations if the run-time trip count modulo the +/// unroll count is non-zero. +/// +/// This function performs the following: +/// - Create PHI nodes at prolog end block to combine values +/// that exit the prolog code and jump around the prolog. +/// - Add a PHI operand to a PHI node at the loop exit block +/// for values that exit the prolog and go around the loop. +/// - Branch around the original loop if the trip count is less +/// than the unroll factor. +/// +static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, + BasicBlock *PrologExit, BasicBlock *PreHeader, + BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, + DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA) { + BasicBlock *Latch = L->getLoopLatch(); + assert(Latch && "Loop must have a latch"); + BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]); + + // Create a PHI node for each outgoing value from the original loop + // (which means it is an outgoing value from the prolog code too). + // The new PHI node is inserted in the prolog end basic block. + // The new PHI node value is added as an operand of a PHI node in either + // the loop header or the loop exit block. + for (BasicBlock *Succ : successors(Latch)) { + for (Instruction &BBI : *Succ) { + PHINode *PN = dyn_cast<PHINode>(&BBI); + // Exit when we passed all PHI nodes. + if (!PN) + break; + // Add a new PHI node to the prolog end block and add the + // appropriate incoming values. + PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName() + ".unr", + PrologExit->getFirstNonPHI()); + // Adding a value to the new PHI node from the original loop preheader. + // This is the value that skips all the prolog code. + if (L->contains(PN)) { + NewPN->addIncoming(PN->getIncomingValueForBlock(NewPreHeader), + PreHeader); + } else { + NewPN->addIncoming(UndefValue::get(PN->getType()), PreHeader); + } + + Value *V = PN->getIncomingValueForBlock(Latch); + if (Instruction *I = dyn_cast<Instruction>(V)) { + if (L->contains(I)) { + V = VMap.lookup(I); + } + } + // Adding a value to the new PHI node from the last prolog block + // that was created. + NewPN->addIncoming(V, PrologLatch); + + // Update the existing PHI node operand with the value from the + // new PHI node. How this is done depends on if the existing + // PHI node is in the original loop block, or the exit block. + if (L->contains(PN)) { + PN->setIncomingValue(PN->getBasicBlockIndex(NewPreHeader), NewPN); + } else { + PN->addIncoming(NewPN, PrologExit); + } + } + } + + // Make sure that created prolog loop is in simplified form + SmallVector<BasicBlock *, 4> PrologExitPreds; + Loop *PrologLoop = LI->getLoopFor(PrologLatch); + if (PrologLoop) { + for (BasicBlock *PredBB : predecessors(PrologExit)) + if (PrologLoop->contains(PredBB)) + PrologExitPreds.push_back(PredBB); + + SplitBlockPredecessors(PrologExit, PrologExitPreds, ".unr-lcssa", DT, LI, + PreserveLCSSA); + } + + // Create a branch around the original loop, which is taken if there are no + // iterations remaining to be executed after running the prologue. + Instruction *InsertPt = PrologExit->getTerminator(); + IRBuilder<> B(InsertPt); + + assert(Count != 0 && "nonsensical Count!"); + + // If BECount <u (Count - 1) then (BECount + 1) % Count == (BECount + 1) + // This means %xtraiter is (BECount + 1) and all of the iterations of this + // loop were executed by the prologue. Note that if BECount <u (Count - 1) + // then (BECount + 1) cannot unsigned-overflow. + Value *BrLoopExit = + B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1)); + BasicBlock *Exit = L->getUniqueExitBlock(); + assert(Exit && "Loop must have a single exit block only"); + // Split the exit to maintain loop canonicalization guarantees + SmallVector<BasicBlock*, 4> Preds(predecessors(Exit)); + SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", DT, LI, + PreserveLCSSA); + // Add the branch to the exit block (around the unrolled loop) + B.CreateCondBr(BrLoopExit, Exit, NewPreHeader); + InsertPt->eraseFromParent(); +} + +/// Connect the unrolling epilog code to the original loop. +/// The unrolling epilog code contains code to execute the +/// 'extra' iterations if the run-time trip count modulo the +/// unroll count is non-zero. +/// +/// This function performs the following: +/// - Update PHI nodes at the unrolling loop exit and epilog loop exit +/// - Create PHI nodes at the unrolling loop exit to combine +/// values that exit the unrolling loop code and jump around it. +/// - Update PHI operands in the epilog loop by the new PHI nodes +/// - Branch around the epilog loop if extra iters (ModVal) is zero. +/// +static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, + BasicBlock *Exit, BasicBlock *PreHeader, + BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader, + ValueToValueMapTy &VMap, DominatorTree *DT, + LoopInfo *LI, bool PreserveLCSSA) { + BasicBlock *Latch = L->getLoopLatch(); + assert(Latch && "Loop must have a latch"); + BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]); + + // Loop structure should be the following: + // + // PreHeader + // NewPreHeader + // Header + // ... + // Latch + // NewExit (PN) + // EpilogPreHeader + // EpilogHeader + // ... + // EpilogLatch + // Exit (EpilogPN) + + // Update PHI nodes at NewExit and Exit. + for (Instruction &BBI : *NewExit) { + PHINode *PN = dyn_cast<PHINode>(&BBI); + // Exit when we passed all PHI nodes. + if (!PN) + break; + // PN should be used in another PHI located in Exit block as + // Exit was split by SplitBlockPredecessors into Exit and NewExit + // Basicaly it should look like: + // NewExit: + // PN = PHI [I, Latch] + // ... + // Exit: + // EpilogPN = PHI [PN, EpilogPreHeader] + // + // There is EpilogPreHeader incoming block instead of NewExit as + // NewExit was spilt 1 more time to get EpilogPreHeader. + assert(PN->hasOneUse() && "The phi should have 1 use"); + PHINode *EpilogPN = cast<PHINode> (PN->use_begin()->getUser()); + assert(EpilogPN->getParent() == Exit && "EpilogPN should be in Exit block"); + + // Add incoming PreHeader from branch around the Loop + PN->addIncoming(UndefValue::get(PN->getType()), PreHeader); + + Value *V = PN->getIncomingValueForBlock(Latch); + Instruction *I = dyn_cast<Instruction>(V); + if (I && L->contains(I)) + // If value comes from an instruction in the loop add VMap value. + V = VMap.lookup(I); + // For the instruction out of the loop, constant or undefined value + // insert value itself. + EpilogPN->addIncoming(V, EpilogLatch); + + assert(EpilogPN->getBasicBlockIndex(EpilogPreHeader) >= 0 && + "EpilogPN should have EpilogPreHeader incoming block"); + // Change EpilogPreHeader incoming block to NewExit. + EpilogPN->setIncomingBlock(EpilogPN->getBasicBlockIndex(EpilogPreHeader), + NewExit); + // Now PHIs should look like: + // NewExit: + // PN = PHI [I, Latch], [undef, PreHeader] + // ... + // Exit: + // EpilogPN = PHI [PN, NewExit], [VMap[I], EpilogLatch] + } + + // Create PHI nodes at NewExit (from the unrolling loop Latch and PreHeader). + // Update corresponding PHI nodes in epilog loop. + for (BasicBlock *Succ : successors(Latch)) { + // Skip this as we already updated phis in exit blocks. + if (!L->contains(Succ)) + continue; + for (Instruction &BBI : *Succ) { + PHINode *PN = dyn_cast<PHINode>(&BBI); + // Exit when we passed all PHI nodes. + if (!PN) + break; + // Add new PHI nodes to the loop exit block and update epilog + // PHIs with the new PHI values. + PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName() + ".unr", + NewExit->getFirstNonPHI()); + // Adding a value to the new PHI node from the unrolling loop preheader. + NewPN->addIncoming(PN->getIncomingValueForBlock(NewPreHeader), PreHeader); + // Adding a value to the new PHI node from the unrolling loop latch. + NewPN->addIncoming(PN->getIncomingValueForBlock(Latch), Latch); + + // Update the existing PHI node operand with the value from the new PHI + // node. Corresponding instruction in epilog loop should be PHI. + PHINode *VPN = cast<PHINode>(VMap[&BBI]); + VPN->setIncomingValue(VPN->getBasicBlockIndex(EpilogPreHeader), NewPN); + } + } + + Instruction *InsertPt = NewExit->getTerminator(); + IRBuilder<> B(InsertPt); + Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod"); + assert(Exit && "Loop must have a single exit block only"); + // Split the exit to maintain loop canonicalization guarantees + SmallVector<BasicBlock*, 4> Preds(predecessors(Exit)); + SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, + PreserveLCSSA); + // Add the branch to the exit block (around the unrolling loop) + B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit); + InsertPt->eraseFromParent(); +} + +/// Create a clone of the blocks in a loop and connect them together. +/// If CreateRemainderLoop is false, loop structure will not be cloned, +/// otherwise a new loop will be created including all cloned blocks, and the +/// iterator of it switches to count NewIter down to 0. +/// The cloned blocks should be inserted between InsertTop and InsertBot. +/// If loop structure is cloned InsertTop should be new preheader, InsertBot +/// new loop exit. +/// +static void CloneLoopBlocks(Loop *L, Value *NewIter, + const bool CreateRemainderLoop, + const bool UseEpilogRemainder, + BasicBlock *InsertTop, BasicBlock *InsertBot, + BasicBlock *Preheader, + std::vector<BasicBlock *> &NewBlocks, + LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, + LoopInfo *LI) { + StringRef suffix = UseEpilogRemainder ? "epil" : "prol"; + BasicBlock *Header = L->getHeader(); + BasicBlock *Latch = L->getLoopLatch(); + Function *F = Header->getParent(); + LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO(); + LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO(); + Loop *NewLoop = nullptr; + Loop *ParentLoop = L->getParentLoop(); + if (CreateRemainderLoop) { + NewLoop = new Loop(); + if (ParentLoop) + ParentLoop->addChildLoop(NewLoop); + else + LI->addTopLevelLoop(NewLoop); + } + + // For each block in the original loop, create a new copy, + // and update the value map with the newly created values. + for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { + BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F); + NewBlocks.push_back(NewBB); + + if (NewLoop) + NewLoop->addBasicBlockToLoop(NewBB, *LI); + else if (ParentLoop) + ParentLoop->addBasicBlockToLoop(NewBB, *LI); + + VMap[*BB] = NewBB; + if (Header == *BB) { + // For the first block, add a CFG connection to this newly + // created block. + InsertTop->getTerminator()->setSuccessor(0, NewBB); + } + + if (Latch == *BB) { + // For the last block, if CreateRemainderLoop is false, create a direct + // jump to InsertBot. If not, create a loop back to cloned head. + VMap.erase((*BB)->getTerminator()); + BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]); + BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator()); + IRBuilder<> Builder(LatchBR); + if (!CreateRemainderLoop) { + Builder.CreateBr(InsertBot); + } else { + PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2, + suffix + ".iter", + FirstLoopBB->getFirstNonPHI()); + Value *IdxSub = + Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1), + NewIdx->getName() + ".sub"); + Value *IdxCmp = + Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp"); + Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot); + NewIdx->addIncoming(NewIter, InsertTop); + NewIdx->addIncoming(IdxSub, NewBB); + } + LatchBR->eraseFromParent(); + } + } + + // Change the incoming values to the ones defined in the preheader or + // cloned loop. + for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { + PHINode *NewPHI = cast<PHINode>(VMap[&*I]); + if (!CreateRemainderLoop) { + if (UseEpilogRemainder) { + unsigned idx = NewPHI->getBasicBlockIndex(Preheader); + NewPHI->setIncomingBlock(idx, InsertTop); + NewPHI->removeIncomingValue(Latch, false); + } else { + VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader); + cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI); + } + } else { + unsigned idx = NewPHI->getBasicBlockIndex(Preheader); + NewPHI->setIncomingBlock(idx, InsertTop); + BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]); + idx = NewPHI->getBasicBlockIndex(Latch); + Value *InVal = NewPHI->getIncomingValue(idx); + NewPHI->setIncomingBlock(idx, NewLatch); + if (Value *V = VMap.lookup(InVal)) + NewPHI->setIncomingValue(idx, V); + } + } + if (NewLoop) { + // Add unroll disable metadata to disable future unrolling for this loop. + SmallVector<Metadata *, 4> MDs; + // Reserve first location for self reference to the LoopID metadata node. + MDs.push_back(nullptr); + MDNode *LoopID = NewLoop->getLoopID(); + if (LoopID) { + // First remove any existing loop unrolling metadata. + for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { + bool IsUnrollMetadata = false; + MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); + if (MD) { + const MDString *S = dyn_cast<MDString>(MD->getOperand(0)); + IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll."); + } + if (!IsUnrollMetadata) + MDs.push_back(LoopID->getOperand(i)); + } + } + + LLVMContext &Context = NewLoop->getHeader()->getContext(); + SmallVector<Metadata *, 1> DisableOperands; + DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.disable")); + MDNode *DisableNode = MDNode::get(Context, DisableOperands); + MDs.push_back(DisableNode); + + MDNode *NewLoopID = MDNode::get(Context, MDs); + // Set operand 0 to refer to the loop id itself. + NewLoopID->replaceOperandWith(0, NewLoopID); + NewLoop->setLoopID(NewLoopID); + } +} + +/// Insert code in the prolog/epilog code when unrolling a loop with a +/// run-time trip-count. +/// +/// This method assumes that the loop unroll factor is total number +/// of loop bodies in the loop after unrolling. (Some folks refer +/// to the unroll factor as the number of *extra* copies added). +/// We assume also that the loop unroll factor is a power-of-two. So, after +/// unrolling the loop, the number of loop bodies executed is 2, +/// 4, 8, etc. Note - LLVM converts the if-then-sequence to a switch +/// instruction in SimplifyCFG.cpp. Then, the backend decides how code for +/// the switch instruction is generated. +/// +/// ***Prolog case*** +/// extraiters = tripcount % loopfactor +/// if (extraiters == 0) jump Loop: +/// else jump Prol: +/// Prol: LoopBody; +/// extraiters -= 1 // Omitted if unroll factor is 2. +/// if (extraiters != 0) jump Prol: // Omitted if unroll factor is 2. +/// if (tripcount < loopfactor) jump End: +/// Loop: +/// ... +/// End: +/// +/// ***Epilog case*** +/// extraiters = tripcount % loopfactor +/// if (tripcount < loopfactor) jump LoopExit: +/// unroll_iters = tripcount - extraiters +/// Loop: LoopBody; (executes unroll_iter times); +/// unroll_iter -= 1 +/// if (unroll_iter != 0) jump Loop: +/// LoopExit: +/// if (extraiters == 0) jump EpilExit: +/// Epil: LoopBody; (executes extraiters times) +/// extraiters -= 1 // Omitted if unroll factor is 2. +/// if (extraiters != 0) jump Epil: // Omitted if unroll factor is 2. +/// EpilExit: + +bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, + bool AllowExpensiveTripCount, + bool UseEpilogRemainder, + LoopInfo *LI, ScalarEvolution *SE, + DominatorTree *DT, bool PreserveLCSSA) { + // for now, only unroll loops that contain a single exit + if (!L->getExitingBlock()) + return false; + + // Make sure the loop is in canonical form, and there is a single + // exit block only. + if (!L->isLoopSimplifyForm()) + return false; + BasicBlock *Exit = L->getUniqueExitBlock(); // successor out of loop + if (!Exit) + return false; + + // Use Scalar Evolution to compute the trip count. This allows more loops to + // be unrolled than relying on induction var simplification. + if (!SE) + return false; + + // Only unroll loops with a computable trip count, and the trip count needs + // to be an int value (allowing a pointer type is a TODO item). + const SCEV *BECountSC = SE->getBackedgeTakenCount(L); + if (isa<SCEVCouldNotCompute>(BECountSC) || + !BECountSC->getType()->isIntegerTy()) + return false; + + unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth(); + + // Add 1 since the backedge count doesn't include the first loop iteration. + const SCEV *TripCountSC = + SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1)); + if (isa<SCEVCouldNotCompute>(TripCountSC)) + return false; + + BasicBlock *Header = L->getHeader(); + BasicBlock *PreHeader = L->getLoopPreheader(); + BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); + const DataLayout &DL = Header->getModule()->getDataLayout(); + SCEVExpander Expander(*SE, DL, "loop-unroll"); + if (!AllowExpensiveTripCount && + Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR)) + return false; + + // This constraint lets us deal with an overflowing trip count easily; see the + // comment on ModVal below. + if (Log2_32(Count) > BEWidth) + return false; + + BasicBlock *Latch = L->getLoopLatch(); + + // Loop structure is the following: + // + // PreHeader + // Header + // ... + // Latch + // Exit + + BasicBlock *NewPreHeader; + BasicBlock *NewExit = nullptr; + BasicBlock *PrologExit = nullptr; + BasicBlock *EpilogPreHeader = nullptr; + BasicBlock *PrologPreHeader = nullptr; + + if (UseEpilogRemainder) { + // If epilog remainder + // Split PreHeader to insert a branch around loop for unrolling. + NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI); + NewPreHeader->setName(PreHeader->getName() + ".new"); + // Split Exit to create phi nodes from branch above. + SmallVector<BasicBlock*, 4> Preds(predecessors(Exit)); + NewExit = SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", + DT, LI, PreserveLCSSA); + // Split NewExit to insert epilog remainder loop. + EpilogPreHeader = SplitBlock(NewExit, NewExit->getTerminator(), DT, LI); + EpilogPreHeader->setName(Header->getName() + ".epil.preheader"); + } else { + // If prolog remainder + // Split the original preheader twice to insert prolog remainder loop + PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI); + PrologPreHeader->setName(Header->getName() + ".prol.preheader"); + PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(), + DT, LI); + PrologExit->setName(Header->getName() + ".prol.loopexit"); + // Split PrologExit to get NewPreHeader. + NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI); + NewPreHeader->setName(PreHeader->getName() + ".new"); + } + // Loop structure should be the following: + // Epilog Prolog + // + // PreHeader PreHeader + // *NewPreHeader *PrologPreHeader + // Header *PrologExit + // ... *NewPreHeader + // Latch Header + // *NewExit ... + // *EpilogPreHeader Latch + // Exit Exit + + // Calculate conditions for branch around loop for unrolling + // in epilog case and around prolog remainder loop in prolog case. + // Compute the number of extra iterations required, which is: + // extra iterations = run-time trip count % loop unroll factor + PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); + Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(), + PreHeaderBR); + Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(), + PreHeaderBR); + IRBuilder<> B(PreHeaderBR); + Value *ModVal; + // Calculate ModVal = (BECount + 1) % Count. + // Note that TripCount is BECount + 1. + if (isPowerOf2_32(Count)) { + // When Count is power of 2 we don't BECount for epilog case, however we'll + // need it for a branch around unrolling loop for prolog case. + ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter"); + // 1. There are no iterations to be run in the prolog/epilog loop. + // OR + // 2. The addition computing TripCount overflowed. + // + // If (2) is true, we know that TripCount really is (1 << BEWidth) and so + // the number of iterations that remain to be run in the original loop is a + // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we + // explicitly check this above). + } else { + // As (BECount + 1) can potentially unsigned overflow we count + // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count. + Value *ModValTmp = B.CreateURem(BECount, + ConstantInt::get(BECount->getType(), + Count)); + Value *ModValAdd = B.CreateAdd(ModValTmp, + ConstantInt::get(ModValTmp->getType(), 1)); + // At that point (BECount % Count) + 1 could be equal to Count. + // To handle this case we need to take mod by Count one more time. + ModVal = B.CreateURem(ModValAdd, + ConstantInt::get(BECount->getType(), Count), + "xtraiter"); + } + Value *BranchVal = + UseEpilogRemainder ? B.CreateICmpULT(BECount, + ConstantInt::get(BECount->getType(), + Count - 1)) : + B.CreateIsNotNull(ModVal, "lcmp.mod"); + BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader; + BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit; + // Branch to either remainder (extra iterations) loop or unrolling loop. + B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop); + PreHeaderBR->eraseFromParent(); + Function *F = Header->getParent(); + // Get an ordered list of blocks in the loop to help with the ordering of the + // cloned blocks in the prolog/epilog code + LoopBlocksDFS LoopBlocks(L); + LoopBlocks.perform(LI); + + // + // For each extra loop iteration, create a copy of the loop's basic blocks + // and generate a condition that branches to the copy depending on the + // number of 'left over' iterations. + // + std::vector<BasicBlock *> NewBlocks; + ValueToValueMapTy VMap; + + // For unroll factor 2 remainder loop will have 1 iterations. + // Do not create 1 iteration loop. + bool CreateRemainderLoop = (Count != 2); + + // Clone all the basic blocks in the loop. If Count is 2, we don't clone + // the loop, otherwise we create a cloned loop to execute the extra + // iterations. This function adds the appropriate CFG connections. + BasicBlock *InsertBot = UseEpilogRemainder ? Exit : PrologExit; + BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader; + CloneLoopBlocks(L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop, + InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, LI); + + // Insert the cloned blocks into the function. + F->getBasicBlockList().splice(InsertBot->getIterator(), + F->getBasicBlockList(), + NewBlocks[0]->getIterator(), + F->end()); + + // Loop structure should be the following: + // Epilog Prolog + // + // PreHeader PreHeader + // NewPreHeader PrologPreHeader + // Header PrologHeader + // ... ... + // Latch PrologLatch + // NewExit PrologExit + // EpilogPreHeader NewPreHeader + // EpilogHeader Header + // ... ... + // EpilogLatch Latch + // Exit Exit + + // Rewrite the cloned instruction operands to use the values created when the + // clone is created. + for (BasicBlock *BB : NewBlocks) { + for (Instruction &I : *BB) { + RemapInstruction(&I, VMap, + RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); + } + } + + if (UseEpilogRemainder) { + // Connect the epilog code to the original loop and update the + // PHI functions. + ConnectEpilog(L, ModVal, NewExit, Exit, PreHeader, + EpilogPreHeader, NewPreHeader, VMap, DT, LI, + PreserveLCSSA); + + // Update counter in loop for unrolling. + // I should be multiply of Count. + IRBuilder<> B2(NewPreHeader->getTerminator()); + Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter"); + BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator()); + B2.SetInsertPoint(LatchBR); + PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter", + Header->getFirstNonPHI()); + Value *IdxSub = + B2.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1), + NewIdx->getName() + ".nsub"); + Value *IdxCmp; + if (LatchBR->getSuccessor(0) == Header) + IdxCmp = B2.CreateIsNotNull(IdxSub, NewIdx->getName() + ".ncmp"); + else + IdxCmp = B2.CreateIsNull(IdxSub, NewIdx->getName() + ".ncmp"); + NewIdx->addIncoming(TestVal, NewPreHeader); + NewIdx->addIncoming(IdxSub, Latch); + LatchBR->setCondition(IdxCmp); + } else { + // Connect the prolog code to the original loop and update the + // PHI functions. + ConnectProlog(L, BECount, Count, PrologExit, PreHeader, NewPreHeader, + VMap, DT, LI, PreserveLCSSA); + } + + // If this loop is nested, then the loop unroller changes the code in the + // parent loop, so the Scalar Evolution pass needs to be run again. + if (Loop *ParentLoop = L->getParentLoop()) + SE->forgetLoop(ParentLoop); + + NumRuntimeUnrolled++; + return true; +} diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp new file mode 100644 index 000000000000..09e9f1ddc7fe --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -0,0 +1,1102 @@ +//===-- LoopUtils.cpp - Loop Utility functions -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines common loop utility functions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; +using namespace llvm::PatternMatch; + +#define DEBUG_TYPE "loop-utils" + +bool RecurrenceDescriptor::areAllUsesIn(Instruction *I, + SmallPtrSetImpl<Instruction *> &Set) { + for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use) + if (!Set.count(dyn_cast<Instruction>(*Use))) + return false; + return true; +} + +bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurrenceKind Kind) { + switch (Kind) { + default: + break; + case RK_IntegerAdd: + case RK_IntegerMult: + case RK_IntegerOr: + case RK_IntegerAnd: + case RK_IntegerXor: + case RK_IntegerMinMax: + return true; + } + return false; +} + +bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurrenceKind Kind) { + return (Kind != RK_NoRecurrence) && !isIntegerRecurrenceKind(Kind); +} + +bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurrenceKind Kind) { + switch (Kind) { + default: + break; + case RK_IntegerAdd: + case RK_IntegerMult: + case RK_FloatAdd: + case RK_FloatMult: + return true; + } + return false; +} + +Instruction * +RecurrenceDescriptor::lookThroughAnd(PHINode *Phi, Type *&RT, + SmallPtrSetImpl<Instruction *> &Visited, + SmallPtrSetImpl<Instruction *> &CI) { + if (!Phi->hasOneUse()) + return Phi; + + const APInt *M = nullptr; + Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser()); + + // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT + // with a new integer type of the corresponding bit width. + if (match(J, m_CombineOr(m_And(m_Instruction(I), m_APInt(M)), + m_And(m_APInt(M), m_Instruction(I))))) { + int32_t Bits = (*M + 1).exactLogBase2(); + if (Bits > 0) { + RT = IntegerType::get(Phi->getContext(), Bits); + Visited.insert(Phi); + CI.insert(J); + return J; + } + } + return Phi; +} + +bool RecurrenceDescriptor::getSourceExtensionKind( + Instruction *Start, Instruction *Exit, Type *RT, bool &IsSigned, + SmallPtrSetImpl<Instruction *> &Visited, + SmallPtrSetImpl<Instruction *> &CI) { + + SmallVector<Instruction *, 8> Worklist; + bool FoundOneOperand = false; + unsigned DstSize = RT->getPrimitiveSizeInBits(); + Worklist.push_back(Exit); + + // Traverse the instructions in the reduction expression, beginning with the + // exit value. + while (!Worklist.empty()) { + Instruction *I = Worklist.pop_back_val(); + for (Use &U : I->operands()) { + + // Terminate the traversal if the operand is not an instruction, or we + // reach the starting value. + Instruction *J = dyn_cast<Instruction>(U.get()); + if (!J || J == Start) + continue; + + // Otherwise, investigate the operation if it is also in the expression. + if (Visited.count(J)) { + Worklist.push_back(J); + continue; + } + + // If the operand is not in Visited, it is not a reduction operation, but + // it does feed into one. Make sure it is either a single-use sign- or + // zero-extend instruction. + CastInst *Cast = dyn_cast<CastInst>(J); + bool IsSExtInst = isa<SExtInst>(J); + if (!Cast || !Cast->hasOneUse() || !(isa<ZExtInst>(J) || IsSExtInst)) + return false; + + // Ensure the source type of the extend is no larger than the reduction + // type. It is not necessary for the types to be identical. + unsigned SrcSize = Cast->getSrcTy()->getPrimitiveSizeInBits(); + if (SrcSize > DstSize) + return false; + + // Furthermore, ensure that all such extends are of the same kind. + if (FoundOneOperand) { + if (IsSigned != IsSExtInst) + return false; + } else { + FoundOneOperand = true; + IsSigned = IsSExtInst; + } + + // Lastly, if the source type of the extend matches the reduction type, + // add the extend to CI so that we can avoid accounting for it in the + // cost model. + if (SrcSize == DstSize) + CI.insert(Cast); + } + } + return true; +} + +bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, + Loop *TheLoop, bool HasFunNoNaNAttr, + RecurrenceDescriptor &RedDes) { + if (Phi->getNumIncomingValues() != 2) + return false; + + // Reduction variables are only found in the loop header block. + if (Phi->getParent() != TheLoop->getHeader()) + return false; + + // Obtain the reduction start value from the value that comes from the loop + // preheader. + Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader()); + + // ExitInstruction is the single value which is used outside the loop. + // We only allow for a single reduction value to be used outside the loop. + // This includes users of the reduction, variables (which form a cycle + // which ends in the phi node). + Instruction *ExitInstruction = nullptr; + // Indicates that we found a reduction operation in our scan. + bool FoundReduxOp = false; + + // We start with the PHI node and scan for all of the users of this + // instruction. All users must be instructions that can be used as reduction + // variables (such as ADD). We must have a single out-of-block user. The cycle + // must include the original PHI. + bool FoundStartPHI = false; + + // To recognize min/max patterns formed by a icmp select sequence, we store + // the number of instruction we saw from the recognized min/max pattern, + // to make sure we only see exactly the two instructions. + unsigned NumCmpSelectPatternInst = 0; + InstDesc ReduxDesc(false, nullptr); + + // Data used for determining if the recurrence has been type-promoted. + Type *RecurrenceType = Phi->getType(); + SmallPtrSet<Instruction *, 4> CastInsts; + Instruction *Start = Phi; + bool IsSigned = false; + + SmallPtrSet<Instruction *, 8> VisitedInsts; + SmallVector<Instruction *, 8> Worklist; + + // Return early if the recurrence kind does not match the type of Phi. If the + // recurrence kind is arithmetic, we attempt to look through AND operations + // resulting from the type promotion performed by InstCombine. Vector + // operations are not limited to the legal integer widths, so we may be able + // to evaluate the reduction in the narrower width. + if (RecurrenceType->isFloatingPointTy()) { + if (!isFloatingPointRecurrenceKind(Kind)) + return false; + } else { + if (!isIntegerRecurrenceKind(Kind)) + return false; + if (isArithmeticRecurrenceKind(Kind)) + Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts); + } + + Worklist.push_back(Start); + VisitedInsts.insert(Start); + + // A value in the reduction can be used: + // - By the reduction: + // - Reduction operation: + // - One use of reduction value (safe). + // - Multiple use of reduction value (not safe). + // - PHI: + // - All uses of the PHI must be the reduction (safe). + // - Otherwise, not safe. + // - By one instruction outside of the loop (safe). + // - By further instructions outside of the loop (not safe). + // - By an instruction that is not part of the reduction (not safe). + // This is either: + // * An instruction type other than PHI or the reduction operation. + // * A PHI in the header other than the initial PHI. + while (!Worklist.empty()) { + Instruction *Cur = Worklist.back(); + Worklist.pop_back(); + + // No Users. + // If the instruction has no users then this is a broken chain and can't be + // a reduction variable. + if (Cur->use_empty()) + return false; + + bool IsAPhi = isa<PHINode>(Cur); + + // A header PHI use other than the original PHI. + if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent()) + return false; + + // Reductions of instructions such as Div, and Sub is only possible if the + // LHS is the reduction variable. + if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) && + !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) && + !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0)))) + return false; + + // Any reduction instruction must be of one of the allowed kinds. We ignore + // the starting value (the Phi or an AND instruction if the Phi has been + // type-promoted). + if (Cur != Start) { + ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr); + if (!ReduxDesc.isRecurrence()) + return false; + } + + // A reduction operation must only have one use of the reduction value. + if (!IsAPhi && Kind != RK_IntegerMinMax && Kind != RK_FloatMinMax && + hasMultipleUsesOf(Cur, VisitedInsts)) + return false; + + // All inputs to a PHI node must be a reduction value. + if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts)) + return false; + + if (Kind == RK_IntegerMinMax && + (isa<ICmpInst>(Cur) || isa<SelectInst>(Cur))) + ++NumCmpSelectPatternInst; + if (Kind == RK_FloatMinMax && (isa<FCmpInst>(Cur) || isa<SelectInst>(Cur))) + ++NumCmpSelectPatternInst; + + // Check whether we found a reduction operator. + FoundReduxOp |= !IsAPhi && Cur != Start; + + // Process users of current instruction. Push non-PHI nodes after PHI nodes + // onto the stack. This way we are going to have seen all inputs to PHI + // nodes once we get to them. + SmallVector<Instruction *, 8> NonPHIs; + SmallVector<Instruction *, 8> PHIs; + for (User *U : Cur->users()) { + Instruction *UI = cast<Instruction>(U); + + // Check if we found the exit user. + BasicBlock *Parent = UI->getParent(); + if (!TheLoop->contains(Parent)) { + // Exit if you find multiple outside users or if the header phi node is + // being used. In this case the user uses the value of the previous + // iteration, in which case we would loose "VF-1" iterations of the + // reduction operation if we vectorize. + if (ExitInstruction != nullptr || Cur == Phi) + return false; + + // The instruction used by an outside user must be the last instruction + // before we feed back to the reduction phi. Otherwise, we loose VF-1 + // operations on the value. + if (!is_contained(Phi->operands(), Cur)) + return false; + + ExitInstruction = Cur; + continue; + } + + // Process instructions only once (termination). Each reduction cycle + // value must only be used once, except by phi nodes and min/max + // reductions which are represented as a cmp followed by a select. + InstDesc IgnoredVal(false, nullptr); + if (VisitedInsts.insert(UI).second) { + if (isa<PHINode>(UI)) + PHIs.push_back(UI); + else + NonPHIs.push_back(UI); + } else if (!isa<PHINode>(UI) && + ((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) && + !isa<SelectInst>(UI)) || + !isMinMaxSelectCmpPattern(UI, IgnoredVal).isRecurrence())) + return false; + + // Remember that we completed the cycle. + if (UI == Phi) + FoundStartPHI = true; + } + Worklist.append(PHIs.begin(), PHIs.end()); + Worklist.append(NonPHIs.begin(), NonPHIs.end()); + } + + // This means we have seen one but not the other instruction of the + // pattern or more than just a select and cmp. + if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) && + NumCmpSelectPatternInst != 2) + return false; + + if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction) + return false; + + // If we think Phi may have been type-promoted, we also need to ensure that + // all source operands of the reduction are either SExtInsts or ZEstInsts. If + // so, we will be able to evaluate the reduction in the narrower bit width. + if (Start != Phi) + if (!getSourceExtensionKind(Start, ExitInstruction, RecurrenceType, + IsSigned, VisitedInsts, CastInsts)) + return false; + + // We found a reduction var if we have reached the original phi node and we + // only have a single instruction with out-of-loop users. + + // The ExitInstruction(Instruction which is allowed to have out-of-loop users) + // is saved as part of the RecurrenceDescriptor. + + // Save the description of this reduction variable. + RecurrenceDescriptor RD( + RdxStart, ExitInstruction, Kind, ReduxDesc.getMinMaxKind(), + ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts); + RedDes = RD; + + return true; +} + +/// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction +/// pattern corresponding to a min(X, Y) or max(X, Y). +RecurrenceDescriptor::InstDesc +RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev) { + + assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) && + "Expect a select instruction"); + Instruction *Cmp = nullptr; + SelectInst *Select = nullptr; + + // We must handle the select(cmp()) as a single instruction. Advance to the + // select. + if ((Cmp = dyn_cast<ICmpInst>(I)) || (Cmp = dyn_cast<FCmpInst>(I))) { + if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->user_begin()))) + return InstDesc(false, I); + return InstDesc(Select, Prev.getMinMaxKind()); + } + + // Only handle single use cases for now. + if (!(Select = dyn_cast<SelectInst>(I))) + return InstDesc(false, I); + if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))) && + !(Cmp = dyn_cast<FCmpInst>(I->getOperand(0)))) + return InstDesc(false, I); + if (!Cmp->hasOneUse()) + return InstDesc(false, I); + + Value *CmpLeft; + Value *CmpRight; + + // Look for a min/max pattern. + if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) + return InstDesc(Select, MRK_UIntMin); + else if (m_UMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) + return InstDesc(Select, MRK_UIntMax); + else if (m_SMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) + return InstDesc(Select, MRK_SIntMax); + else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) + return InstDesc(Select, MRK_SIntMin); + else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) + return InstDesc(Select, MRK_FloatMin); + else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) + return InstDesc(Select, MRK_FloatMax); + else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) + return InstDesc(Select, MRK_FloatMin); + else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) + return InstDesc(Select, MRK_FloatMax); + + return InstDesc(false, I); +} + +RecurrenceDescriptor::InstDesc +RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind, + InstDesc &Prev, bool HasFunNoNaNAttr) { + bool FP = I->getType()->isFloatingPointTy(); + Instruction *UAI = Prev.getUnsafeAlgebraInst(); + if (!UAI && FP && !I->hasUnsafeAlgebra()) + UAI = I; // Found an unsafe (unvectorizable) algebra instruction. + + switch (I->getOpcode()) { + default: + return InstDesc(false, I); + case Instruction::PHI: + return InstDesc(I, Prev.getMinMaxKind(), Prev.getUnsafeAlgebraInst()); + case Instruction::Sub: + case Instruction::Add: + return InstDesc(Kind == RK_IntegerAdd, I); + case Instruction::Mul: + return InstDesc(Kind == RK_IntegerMult, I); + case Instruction::And: + return InstDesc(Kind == RK_IntegerAnd, I); + case Instruction::Or: + return InstDesc(Kind == RK_IntegerOr, I); + case Instruction::Xor: + return InstDesc(Kind == RK_IntegerXor, I); + case Instruction::FMul: + return InstDesc(Kind == RK_FloatMult, I, UAI); + case Instruction::FSub: + case Instruction::FAdd: + return InstDesc(Kind == RK_FloatAdd, I, UAI); + case Instruction::FCmp: + case Instruction::ICmp: + case Instruction::Select: + if (Kind != RK_IntegerMinMax && + (!HasFunNoNaNAttr || Kind != RK_FloatMinMax)) + return InstDesc(false, I); + return isMinMaxSelectCmpPattern(I, Prev); + } +} + +bool RecurrenceDescriptor::hasMultipleUsesOf( + Instruction *I, SmallPtrSetImpl<Instruction *> &Insts) { + unsigned NumUses = 0; + for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; + ++Use) { + if (Insts.count(dyn_cast<Instruction>(*Use))) + ++NumUses; + if (NumUses > 1) + return true; + } + + return false; +} +bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop, + RecurrenceDescriptor &RedDes) { + + BasicBlock *Header = TheLoop->getHeader(); + Function &F = *Header->getParent(); + bool HasFunNoNaNAttr = + F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true"; + + if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes)) { + DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n"); + return true; + } + if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes)) { + DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n"); + return true; + } + if (AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes)) { + DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n"); + return true; + } + if (AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes)) { + DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n"); + return true; + } + if (AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes)) { + DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n"); + return true; + } + if (AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr, + RedDes)) { + DEBUG(dbgs() << "Found a MINMAX reduction PHI." << *Phi << "\n"); + return true; + } + if (AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes)) { + DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n"); + return true; + } + if (AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes)) { + DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n"); + return true; + } + if (AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes)) { + DEBUG(dbgs() << "Found an float MINMAX reduction PHI." << *Phi << "\n"); + return true; + } + // Not a reduction of known type. + return false; +} + +bool RecurrenceDescriptor::isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop, + DominatorTree *DT) { + + // Ensure the phi node is in the loop header and has two incoming values. + if (Phi->getParent() != TheLoop->getHeader() || + Phi->getNumIncomingValues() != 2) + return false; + + // Ensure the loop has a preheader and a single latch block. The loop + // vectorizer will need the latch to set up the next iteration of the loop. + auto *Preheader = TheLoop->getLoopPreheader(); + auto *Latch = TheLoop->getLoopLatch(); + if (!Preheader || !Latch) + return false; + + // Ensure the phi node's incoming blocks are the loop preheader and latch. + if (Phi->getBasicBlockIndex(Preheader) < 0 || + Phi->getBasicBlockIndex(Latch) < 0) + return false; + + // Get the previous value. The previous value comes from the latch edge while + // the initial value comes form the preheader edge. + auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch)); + if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous)) + return false; + + // Ensure every user of the phi node is dominated by the previous value. The + // dominance requirement ensures the loop vectorizer will not need to + // vectorize the initial value prior to the first iteration of the loop. + for (User *U : Phi->users()) + if (auto *I = dyn_cast<Instruction>(U)) + if (!DT->dominates(Previous, I)) + return false; + + return true; +} + +/// This function returns the identity element (or neutral element) for +/// the operation K. +Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurrenceKind K, + Type *Tp) { + switch (K) { + case RK_IntegerXor: + case RK_IntegerAdd: + case RK_IntegerOr: + // Adding, Xoring, Oring zero to a number does not change it. + return ConstantInt::get(Tp, 0); + case RK_IntegerMult: + // Multiplying a number by 1 does not change it. + return ConstantInt::get(Tp, 1); + case RK_IntegerAnd: + // AND-ing a number with an all-1 value does not change it. + return ConstantInt::get(Tp, -1, true); + case RK_FloatMult: + // Multiplying a number by 1 does not change it. + return ConstantFP::get(Tp, 1.0L); + case RK_FloatAdd: + // Adding zero to a number does not change it. + return ConstantFP::get(Tp, 0.0L); + default: + llvm_unreachable("Unknown recurrence kind"); + } +} + +/// This function translates the recurrence kind to an LLVM binary operator. +unsigned RecurrenceDescriptor::getRecurrenceBinOp(RecurrenceKind Kind) { + switch (Kind) { + case RK_IntegerAdd: + return Instruction::Add; + case RK_IntegerMult: + return Instruction::Mul; + case RK_IntegerOr: + return Instruction::Or; + case RK_IntegerAnd: + return Instruction::And; + case RK_IntegerXor: + return Instruction::Xor; + case RK_FloatMult: + return Instruction::FMul; + case RK_FloatAdd: + return Instruction::FAdd; + case RK_IntegerMinMax: + return Instruction::ICmp; + case RK_FloatMinMax: + return Instruction::FCmp; + default: + llvm_unreachable("Unknown recurrence operation"); + } +} + +Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder, + MinMaxRecurrenceKind RK, + Value *Left, Value *Right) { + CmpInst::Predicate P = CmpInst::ICMP_NE; + switch (RK) { + default: + llvm_unreachable("Unknown min/max recurrence kind"); + case MRK_UIntMin: + P = CmpInst::ICMP_ULT; + break; + case MRK_UIntMax: + P = CmpInst::ICMP_UGT; + break; + case MRK_SIntMin: + P = CmpInst::ICMP_SLT; + break; + case MRK_SIntMax: + P = CmpInst::ICMP_SGT; + break; + case MRK_FloatMin: + P = CmpInst::FCMP_OLT; + break; + case MRK_FloatMax: + P = CmpInst::FCMP_OGT; + break; + } + + // We only match FP sequences with unsafe algebra, so we can unconditionally + // set it on any generated instructions. + IRBuilder<>::FastMathFlagGuard FMFG(Builder); + FastMathFlags FMF; + FMF.setUnsafeAlgebra(); + Builder.setFastMathFlags(FMF); + + Value *Cmp; + if (RK == MRK_FloatMin || RK == MRK_FloatMax) + Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp"); + else + Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp"); + + Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select"); + return Select; +} + +InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K, + const SCEV *Step, BinaryOperator *BOp) + : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) { + assert(IK != IK_NoInduction && "Not an induction"); + + // Start value type should match the induction kind and the value + // itself should not be null. + assert(StartValue && "StartValue is null"); + assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) && + "StartValue is not a pointer for pointer induction"); + assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) && + "StartValue is not an integer for integer induction"); + + // Check the Step Value. It should be non-zero integer value. + assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) && + "Step value is zero"); + + assert((IK != IK_PtrInduction || getConstIntStepValue()) && + "Step value should be constant for pointer induction"); + assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) && + "StepValue is not an integer"); + + assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) && + "StepValue is not FP for FpInduction"); + assert((IK != IK_FpInduction || (InductionBinOp && + (InductionBinOp->getOpcode() == Instruction::FAdd || + InductionBinOp->getOpcode() == Instruction::FSub))) && + "Binary opcode should be specified for FP induction"); +} + +int InductionDescriptor::getConsecutiveDirection() const { + ConstantInt *ConstStep = getConstIntStepValue(); + if (ConstStep && (ConstStep->isOne() || ConstStep->isMinusOne())) + return ConstStep->getSExtValue(); + return 0; +} + +ConstantInt *InductionDescriptor::getConstIntStepValue() const { + if (isa<SCEVConstant>(Step)) + return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue()); + return nullptr; +} + +Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index, + ScalarEvolution *SE, + const DataLayout& DL) const { + + SCEVExpander Exp(*SE, DL, "induction"); + assert(Index->getType() == Step->getType() && + "Index type does not match StepValue type"); + switch (IK) { + case IK_IntInduction: { + assert(Index->getType() == StartValue->getType() && + "Index type does not match StartValue type"); + + // FIXME: Theoretically, we can call getAddExpr() of ScalarEvolution + // and calculate (Start + Index * Step) for all cases, without + // special handling for "isOne" and "isMinusOne". + // But in the real life the result code getting worse. We mix SCEV + // expressions and ADD/SUB operations and receive redundant + // intermediate values being calculated in different ways and + // Instcombine is unable to reduce them all. + + if (getConstIntStepValue() && + getConstIntStepValue()->isMinusOne()) + return B.CreateSub(StartValue, Index); + if (getConstIntStepValue() && + getConstIntStepValue()->isOne()) + return B.CreateAdd(StartValue, Index); + const SCEV *S = SE->getAddExpr(SE->getSCEV(StartValue), + SE->getMulExpr(Step, SE->getSCEV(Index))); + return Exp.expandCodeFor(S, StartValue->getType(), &*B.GetInsertPoint()); + } + case IK_PtrInduction: { + assert(isa<SCEVConstant>(Step) && + "Expected constant step for pointer induction"); + const SCEV *S = SE->getMulExpr(SE->getSCEV(Index), Step); + Index = Exp.expandCodeFor(S, Index->getType(), &*B.GetInsertPoint()); + return B.CreateGEP(nullptr, StartValue, Index); + } + case IK_FpInduction: { + assert(Step->getType()->isFloatingPointTy() && "Expected FP Step value"); + assert(InductionBinOp && + (InductionBinOp->getOpcode() == Instruction::FAdd || + InductionBinOp->getOpcode() == Instruction::FSub) && + "Original bin op should be defined for FP induction"); + + Value *StepValue = cast<SCEVUnknown>(Step)->getValue(); + + // Floating point operations had to be 'fast' to enable the induction. + FastMathFlags Flags; + Flags.setUnsafeAlgebra(); + + Value *MulExp = B.CreateFMul(StepValue, Index); + if (isa<Instruction>(MulExp)) + // We have to check, the MulExp may be a constant. + cast<Instruction>(MulExp)->setFastMathFlags(Flags); + + Value *BOp = B.CreateBinOp(InductionBinOp->getOpcode() , StartValue, + MulExp, "induction"); + if (isa<Instruction>(BOp)) + cast<Instruction>(BOp)->setFastMathFlags(Flags); + + return BOp; + } + case IK_NoInduction: + return nullptr; + } + llvm_unreachable("invalid enum"); +} + +bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop, + ScalarEvolution *SE, + InductionDescriptor &D) { + + // Here we only handle FP induction variables. + assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type"); + + if (TheLoop->getHeader() != Phi->getParent()) + return false; + + // The loop may have multiple entrances or multiple exits; we can analyze + // this phi if it has a unique entry value and a unique backedge value. + if (Phi->getNumIncomingValues() != 2) + return false; + Value *BEValue = nullptr, *StartValue = nullptr; + if (TheLoop->contains(Phi->getIncomingBlock(0))) { + BEValue = Phi->getIncomingValue(0); + StartValue = Phi->getIncomingValue(1); + } else { + assert(TheLoop->contains(Phi->getIncomingBlock(1)) && + "Unexpected Phi node in the loop"); + BEValue = Phi->getIncomingValue(1); + StartValue = Phi->getIncomingValue(0); + } + + BinaryOperator *BOp = dyn_cast<BinaryOperator>(BEValue); + if (!BOp) + return false; + + Value *Addend = nullptr; + if (BOp->getOpcode() == Instruction::FAdd) { + if (BOp->getOperand(0) == Phi) + Addend = BOp->getOperand(1); + else if (BOp->getOperand(1) == Phi) + Addend = BOp->getOperand(0); + } else if (BOp->getOpcode() == Instruction::FSub) + if (BOp->getOperand(0) == Phi) + Addend = BOp->getOperand(1); + + if (!Addend) + return false; + + // The addend should be loop invariant + if (auto *I = dyn_cast<Instruction>(Addend)) + if (TheLoop->contains(I)) + return false; + + // FP Step has unknown SCEV + const SCEV *Step = SE->getUnknown(Addend); + D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp); + return true; +} + +bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, + PredicatedScalarEvolution &PSE, + InductionDescriptor &D, + bool Assume) { + Type *PhiTy = Phi->getType(); + + // Handle integer and pointer inductions variables. + // Now we handle also FP induction but not trying to make a + // recurrent expression from the PHI node in-place. + + if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() && + !PhiTy->isFloatTy() && !PhiTy->isDoubleTy() && !PhiTy->isHalfTy()) + return false; + + if (PhiTy->isFloatingPointTy()) + return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D); + + const SCEV *PhiScev = PSE.getSCEV(Phi); + const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); + + // We need this expression to be an AddRecExpr. + if (Assume && !AR) + AR = PSE.getAsAddRec(Phi); + + if (!AR) { + DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); + return false; + } + + return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR); +} + +bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, + ScalarEvolution *SE, + InductionDescriptor &D, + const SCEV *Expr) { + Type *PhiTy = Phi->getType(); + // We only handle integer and pointer inductions variables. + if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy()) + return false; + + // Check that the PHI is consecutive. + const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi); + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); + + if (!AR) { + DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); + return false; + } + + assert(TheLoop->getHeader() == Phi->getParent() && + "PHI is an AddRec for a different loop?!"); + Value *StartValue = + Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader()); + const SCEV *Step = AR->getStepRecurrence(*SE); + // Calculate the pointer stride and check if it is consecutive. + // The stride may be a constant or a loop invariant integer value. + const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step); + if (!ConstStep && !SE->isLoopInvariant(Step, TheLoop)) + return false; + + if (PhiTy->isIntegerTy()) { + D = InductionDescriptor(StartValue, IK_IntInduction, Step); + return true; + } + + assert(PhiTy->isPointerTy() && "The PHI must be a pointer"); + // Pointer induction should be a constant. + if (!ConstStep) + return false; + + ConstantInt *CV = ConstStep->getValue(); + Type *PointerElementType = PhiTy->getPointerElementType(); + // The pointer stride cannot be determined if the pointer element type is not + // sized. + if (!PointerElementType->isSized()) + return false; + + const DataLayout &DL = Phi->getModule()->getDataLayout(); + int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(PointerElementType)); + if (!Size) + return false; + + int64_t CVSize = CV->getSExtValue(); + if (CVSize % Size) + return false; + auto *StepValue = SE->getConstant(CV->getType(), CVSize / Size, + true /* signed */); + D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue); + return true; +} + +/// \brief Returns the instructions that use values defined in the loop. +SmallVector<Instruction *, 8> llvm::findDefsUsedOutsideOfLoop(Loop *L) { + SmallVector<Instruction *, 8> UsedOutside; + + for (auto *Block : L->getBlocks()) + // FIXME: I believe that this could use copy_if if the Inst reference could + // be adapted into a pointer. + for (auto &Inst : *Block) { + auto Users = Inst.users(); + if (any_of(Users, [&](User *U) { + auto *Use = cast<Instruction>(U); + return !L->contains(Use->getParent()); + })) + UsedOutside.push_back(&Inst); + } + + return UsedOutside; +} + +void llvm::getLoopAnalysisUsage(AnalysisUsage &AU) { + // By definition, all loop passes need the LoopInfo analysis and the + // Dominator tree it depends on. Because they all participate in the loop + // pass manager, they must also preserve these. + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addPreserved<DominatorTreeWrapperPass>(); + AU.addRequired<LoopInfoWrapperPass>(); + AU.addPreserved<LoopInfoWrapperPass>(); + + // We must also preserve LoopSimplify and LCSSA. We locally access their IDs + // here because users shouldn't directly get them from this header. + extern char &LoopSimplifyID; + extern char &LCSSAID; + AU.addRequiredID(LoopSimplifyID); + AU.addPreservedID(LoopSimplifyID); + AU.addRequiredID(LCSSAID); + AU.addPreservedID(LCSSAID); + // This is used in the LPPassManager to perform LCSSA verification on passes + // which preserve lcssa form + AU.addRequired<LCSSAVerificationPass>(); + AU.addPreserved<LCSSAVerificationPass>(); + + // Loop passes are designed to run inside of a loop pass manager which means + // that any function analyses they require must be required by the first loop + // pass in the manager (so that it is computed before the loop pass manager + // runs) and preserved by all loop pasess in the manager. To make this + // reasonably robust, the set needed for most loop passes is maintained here. + // If your loop pass requires an analysis not listed here, you will need to + // carefully audit the loop pass manager nesting structure that results. + AU.addRequired<AAResultsWrapperPass>(); + AU.addPreserved<AAResultsWrapperPass>(); + AU.addPreserved<BasicAAWrapperPass>(); + AU.addPreserved<GlobalsAAWrapperPass>(); + AU.addPreserved<SCEVAAWrapperPass>(); + AU.addRequired<ScalarEvolutionWrapperPass>(); + AU.addPreserved<ScalarEvolutionWrapperPass>(); +} + +/// Manually defined generic "LoopPass" dependency initialization. This is used +/// to initialize the exact set of passes from above in \c +/// getLoopAnalysisUsage. It can be used within a loop pass's initialization +/// with: +/// +/// INITIALIZE_PASS_DEPENDENCY(LoopPass) +/// +/// As-if "LoopPass" were a pass. +void llvm::initializeLoopPassPass(PassRegistry &Registry) { + INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) + INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) + INITIALIZE_PASS_DEPENDENCY(LoopSimplify) + INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass) + INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) + INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass) + INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) + INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass) + INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) +} + +/// \brief Find string metadata for loop +/// +/// If it has a value (e.g. {"llvm.distribute", 1} return the value as an +/// operand or null otherwise. If the string metadata is not found return +/// Optional's not-a-value. +Optional<const MDOperand *> llvm::findStringMetadataForLoop(Loop *TheLoop, + StringRef Name) { + MDNode *LoopID = TheLoop->getLoopID(); + // Return none if LoopID is false. + if (!LoopID) + return None; + + // First operand should refer to the loop id itself. + assert(LoopID->getNumOperands() > 0 && "requires at least one operand"); + assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); + + // Iterate over LoopID operands and look for MDString Metadata + for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) { + MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); + if (!MD) + continue; + MDString *S = dyn_cast<MDString>(MD->getOperand(0)); + if (!S) + continue; + // Return true if MDString holds expected MetaData. + if (Name.equals(S->getString())) + switch (MD->getNumOperands()) { + case 1: + return nullptr; + case 2: + return &MD->getOperand(1); + default: + llvm_unreachable("loop metadata has 0 or 1 operand"); + } + } + return None; +} + +/// Returns true if the instruction in a loop is guaranteed to execute at least +/// once. +bool llvm::isGuaranteedToExecute(const Instruction &Inst, + const DominatorTree *DT, const Loop *CurLoop, + const LoopSafetyInfo *SafetyInfo) { + // We have to check to make sure that the instruction dominates all + // of the exit blocks. If it doesn't, then there is a path out of the loop + // which does not execute this instruction, so we can't hoist it. + + // If the instruction is in the header block for the loop (which is very + // common), it is always guaranteed to dominate the exit blocks. Since this + // is a common case, and can save some work, check it now. + if (Inst.getParent() == CurLoop->getHeader()) + // If there's a throw in the header block, we can't guarantee we'll reach + // Inst. + return !SafetyInfo->HeaderMayThrow; + + // Somewhere in this loop there is an instruction which may throw and make us + // exit the loop. + if (SafetyInfo->MayThrow) + return false; + + // Get the exit blocks for the current loop. + SmallVector<BasicBlock *, 8> ExitBlocks; + CurLoop->getExitBlocks(ExitBlocks); + + // Verify that the block dominates each of the exit blocks of the loop. + for (BasicBlock *ExitBlock : ExitBlocks) + if (!DT->dominates(Inst.getParent(), ExitBlock)) + return false; + + // As a degenerate case, if the loop is statically infinite then we haven't + // proven anything since there are no exit blocks. + if (ExitBlocks.empty()) + return false; + + // FIXME: In general, we have to prove that the loop isn't an infinite loop. + // See http::llvm.org/PR24078 . (The "ExitBlocks.empty()" check above is + // just a special case of this.) + return true; +} + +Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) { + // Only support loops with a unique exiting block, and a latch. + if (!L->getExitingBlock()) + return None; + + // Get the branch weights for the the loop's backedge. + BranchInst *LatchBR = + dyn_cast<BranchInst>(L->getLoopLatch()->getTerminator()); + if (!LatchBR || LatchBR->getNumSuccessors() != 2) + return None; + + assert((LatchBR->getSuccessor(0) == L->getHeader() || + LatchBR->getSuccessor(1) == L->getHeader()) && + "At least one edge out of the latch must go to the header"); + + // To estimate the number of times the loop body was executed, we want to + // know the number of times the backedge was taken, vs. the number of times + // we exited the loop. + uint64_t TrueVal, FalseVal; + if (!LatchBR->extractProfMetadata(TrueVal, FalseVal)) + return None; + + if (!TrueVal || !FalseVal) + return 0; + + // Divide the count of the backedge by the count of the edge exiting the loop, + // rounding to nearest. + if (LatchBR->getSuccessor(0) == L->getHeader()) + return (TrueVal + (FalseVal / 2)) / FalseVal; + else + return (FalseVal + (TrueVal / 2)) / TrueVal; +} diff --git a/contrib/llvm/lib/Transforms/Utils/LoopVersioning.cpp b/contrib/llvm/lib/Transforms/Utils/LoopVersioning.cpp new file mode 100644 index 000000000000..29756d9dab7f --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/LoopVersioning.cpp @@ -0,0 +1,323 @@ +//===- LoopVersioning.cpp - Utility to version a loop ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a utility class to perform loop versioning. The versioned +// loop speculates that otherwise may-aliasing memory accesses don't overlap and +// emits checks to prove this. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/LoopVersioning.h" +#include "llvm/Analysis/LoopAccessAnalysis.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" + +using namespace llvm; + +static cl::opt<bool> + AnnotateNoAlias("loop-version-annotate-no-alias", cl::init(true), + cl::Hidden, + cl::desc("Add no-alias annotation for instructions that " + "are disambiguated by memchecks")); + +LoopVersioning::LoopVersioning(const LoopAccessInfo &LAI, Loop *L, LoopInfo *LI, + DominatorTree *DT, ScalarEvolution *SE, + bool UseLAIChecks) + : VersionedLoop(L), NonVersionedLoop(nullptr), LAI(LAI), LI(LI), DT(DT), + SE(SE) { + assert(L->getExitBlock() && "No single exit block"); + assert(L->isLoopSimplifyForm() && "Loop is not in loop-simplify form"); + if (UseLAIChecks) { + setAliasChecks(LAI.getRuntimePointerChecking()->getChecks()); + setSCEVChecks(LAI.getPSE().getUnionPredicate()); + } +} + +void LoopVersioning::setAliasChecks( + SmallVector<RuntimePointerChecking::PointerCheck, 4> Checks) { + AliasChecks = std::move(Checks); +} + +void LoopVersioning::setSCEVChecks(SCEVUnionPredicate Check) { + Preds = std::move(Check); +} + +void LoopVersioning::versionLoop( + const SmallVectorImpl<Instruction *> &DefsUsedOutside) { + Instruction *FirstCheckInst; + Instruction *MemRuntimeCheck; + Value *SCEVRuntimeCheck; + Value *RuntimeCheck = nullptr; + + // Add the memcheck in the original preheader (this is empty initially). + BasicBlock *RuntimeCheckBB = VersionedLoop->getLoopPreheader(); + std::tie(FirstCheckInst, MemRuntimeCheck) = + LAI.addRuntimeChecks(RuntimeCheckBB->getTerminator(), AliasChecks); + + const SCEVUnionPredicate &Pred = LAI.getPSE().getUnionPredicate(); + SCEVExpander Exp(*SE, RuntimeCheckBB->getModule()->getDataLayout(), + "scev.check"); + SCEVRuntimeCheck = + Exp.expandCodeForPredicate(&Pred, RuntimeCheckBB->getTerminator()); + auto *CI = dyn_cast<ConstantInt>(SCEVRuntimeCheck); + + // Discard the SCEV runtime check if it is always true. + if (CI && CI->isZero()) + SCEVRuntimeCheck = nullptr; + + if (MemRuntimeCheck && SCEVRuntimeCheck) { + RuntimeCheck = BinaryOperator::Create(Instruction::Or, MemRuntimeCheck, + SCEVRuntimeCheck, "lver.safe"); + if (auto *I = dyn_cast<Instruction>(RuntimeCheck)) + I->insertBefore(RuntimeCheckBB->getTerminator()); + } else + RuntimeCheck = MemRuntimeCheck ? MemRuntimeCheck : SCEVRuntimeCheck; + + assert(RuntimeCheck && "called even though we don't need " + "any runtime checks"); + + // Rename the block to make the IR more readable. + RuntimeCheckBB->setName(VersionedLoop->getHeader()->getName() + + ".lver.check"); + + // Create empty preheader for the loop (and after cloning for the + // non-versioned loop). + BasicBlock *PH = + SplitBlock(RuntimeCheckBB, RuntimeCheckBB->getTerminator(), DT, LI); + PH->setName(VersionedLoop->getHeader()->getName() + ".ph"); + + // Clone the loop including the preheader. + // + // FIXME: This does not currently preserve SimplifyLoop because the exit + // block is a join between the two loops. + SmallVector<BasicBlock *, 8> NonVersionedLoopBlocks; + NonVersionedLoop = + cloneLoopWithPreheader(PH, RuntimeCheckBB, VersionedLoop, VMap, + ".lver.orig", LI, DT, NonVersionedLoopBlocks); + remapInstructionsInBlocks(NonVersionedLoopBlocks, VMap); + + // Insert the conditional branch based on the result of the memchecks. + Instruction *OrigTerm = RuntimeCheckBB->getTerminator(); + BranchInst::Create(NonVersionedLoop->getLoopPreheader(), + VersionedLoop->getLoopPreheader(), RuntimeCheck, OrigTerm); + OrigTerm->eraseFromParent(); + + // The loops merge in the original exit block. This is now dominated by the + // memchecking block. + DT->changeImmediateDominator(VersionedLoop->getExitBlock(), RuntimeCheckBB); + + // Adds the necessary PHI nodes for the versioned loops based on the + // loop-defined values used outside of the loop. + addPHINodes(DefsUsedOutside); +} + +void LoopVersioning::addPHINodes( + const SmallVectorImpl<Instruction *> &DefsUsedOutside) { + BasicBlock *PHIBlock = VersionedLoop->getExitBlock(); + assert(PHIBlock && "No single successor to loop exit block"); + PHINode *PN; + + // First add a single-operand PHI for each DefsUsedOutside if one does not + // exists yet. + for (auto *Inst : DefsUsedOutside) { + // See if we have a single-operand PHI with the value defined by the + // original loop. + for (auto I = PHIBlock->begin(); (PN = dyn_cast<PHINode>(I)); ++I) { + if (PN->getIncomingValue(0) == Inst) + break; + } + // If not create it. + if (!PN) { + PN = PHINode::Create(Inst->getType(), 2, Inst->getName() + ".lver", + &PHIBlock->front()); + for (auto *User : Inst->users()) + if (!VersionedLoop->contains(cast<Instruction>(User)->getParent())) + User->replaceUsesOfWith(Inst, PN); + PN->addIncoming(Inst, VersionedLoop->getExitingBlock()); + } + } + + // Then for each PHI add the operand for the edge from the cloned loop. + for (auto I = PHIBlock->begin(); (PN = dyn_cast<PHINode>(I)); ++I) { + assert(PN->getNumOperands() == 1 && + "Exit block should only have on predecessor"); + + // If the definition was cloned used that otherwise use the same value. + Value *ClonedValue = PN->getIncomingValue(0); + auto Mapped = VMap.find(ClonedValue); + if (Mapped != VMap.end()) + ClonedValue = Mapped->second; + + PN->addIncoming(ClonedValue, NonVersionedLoop->getExitingBlock()); + } +} + +void LoopVersioning::prepareNoAliasMetadata() { + // We need to turn the no-alias relation between pointer checking groups into + // no-aliasing annotations between instructions. + // + // We accomplish this by mapping each pointer checking group (a set of + // pointers memchecked together) to an alias scope and then also mapping each + // group to the list of scopes it can't alias. + + const RuntimePointerChecking *RtPtrChecking = LAI.getRuntimePointerChecking(); + LLVMContext &Context = VersionedLoop->getHeader()->getContext(); + + // First allocate an aliasing scope for each pointer checking group. + // + // While traversing through the checking groups in the loop, also create a + // reverse map from pointers to the pointer checking group they were assigned + // to. + MDBuilder MDB(Context); + MDNode *Domain = MDB.createAnonymousAliasScopeDomain("LVerDomain"); + + for (const auto &Group : RtPtrChecking->CheckingGroups) { + GroupToScope[&Group] = MDB.createAnonymousAliasScope(Domain); + + for (unsigned PtrIdx : Group.Members) + PtrToGroup[RtPtrChecking->getPointerInfo(PtrIdx).PointerValue] = &Group; + } + + // Go through the checks and for each pointer group, collect the scopes for + // each non-aliasing pointer group. + DenseMap<const RuntimePointerChecking::CheckingPtrGroup *, + SmallVector<Metadata *, 4>> + GroupToNonAliasingScopes; + + for (const auto &Check : AliasChecks) + GroupToNonAliasingScopes[Check.first].push_back(GroupToScope[Check.second]); + + // Finally, transform the above to actually map to scope list which is what + // the metadata uses. + + for (auto Pair : GroupToNonAliasingScopes) + GroupToNonAliasingScopeList[Pair.first] = MDNode::get(Context, Pair.second); +} + +void LoopVersioning::annotateLoopWithNoAlias() { + if (!AnnotateNoAlias) + return; + + // First prepare the maps. + prepareNoAliasMetadata(); + + // Add the scope and no-alias metadata to the instructions. + for (Instruction *I : LAI.getDepChecker().getMemoryInstructions()) { + annotateInstWithNoAlias(I); + } +} + +void LoopVersioning::annotateInstWithNoAlias(Instruction *VersionedInst, + const Instruction *OrigInst) { + if (!AnnotateNoAlias) + return; + + LLVMContext &Context = VersionedLoop->getHeader()->getContext(); + const Value *Ptr = isa<LoadInst>(OrigInst) + ? cast<LoadInst>(OrigInst)->getPointerOperand() + : cast<StoreInst>(OrigInst)->getPointerOperand(); + + // Find the group for the pointer and then add the scope metadata. + auto Group = PtrToGroup.find(Ptr); + if (Group != PtrToGroup.end()) { + VersionedInst->setMetadata( + LLVMContext::MD_alias_scope, + MDNode::concatenate( + VersionedInst->getMetadata(LLVMContext::MD_alias_scope), + MDNode::get(Context, GroupToScope[Group->second]))); + + // Add the no-alias metadata. + auto NonAliasingScopeList = GroupToNonAliasingScopeList.find(Group->second); + if (NonAliasingScopeList != GroupToNonAliasingScopeList.end()) + VersionedInst->setMetadata( + LLVMContext::MD_noalias, + MDNode::concatenate( + VersionedInst->getMetadata(LLVMContext::MD_noalias), + NonAliasingScopeList->second)); + } +} + +namespace { +/// \brief Also expose this is a pass. Currently this is only used for +/// unit-testing. It adds all memchecks necessary to remove all may-aliasing +/// array accesses from the loop. +class LoopVersioningPass : public FunctionPass { +public: + LoopVersioningPass() : FunctionPass(ID) { + initializeLoopVersioningPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + auto *LAA = &getAnalysis<LoopAccessLegacyAnalysis>(); + auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); + + // Build up a worklist of inner-loops to version. This is necessary as the + // act of versioning a loop creates new loops and can invalidate iterators + // across the loops. + SmallVector<Loop *, 8> Worklist; + + for (Loop *TopLevelLoop : *LI) + for (Loop *L : depth_first(TopLevelLoop)) + // We only handle inner-most loops. + if (L->empty()) + Worklist.push_back(L); + + // Now walk the identified inner loops. + bool Changed = false; + for (Loop *L : Worklist) { + const LoopAccessInfo &LAI = LAA->getInfo(L); + if (L->isLoopSimplifyForm() && (LAI.getNumRuntimePointerChecks() || + !LAI.getPSE().getUnionPredicate().isAlwaysTrue())) { + LoopVersioning LVer(LAI, L, LI, DT, SE); + LVer.versionLoop(); + LVer.annotateLoopWithNoAlias(); + Changed = true; + } + } + + return Changed; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<LoopInfoWrapperPass>(); + AU.addPreserved<LoopInfoWrapperPass>(); + AU.addRequired<LoopAccessLegacyAnalysis>(); + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addPreserved<DominatorTreeWrapperPass>(); + AU.addRequired<ScalarEvolutionWrapperPass>(); + } + + static char ID; +}; +} + +#define LVER_OPTION "loop-versioning" +#define DEBUG_TYPE LVER_OPTION + +char LoopVersioningPass::ID; +static const char LVer_name[] = "Loop Versioning"; + +INITIALIZE_PASS_BEGIN(LoopVersioningPass, LVER_OPTION, LVer_name, false, false) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) +INITIALIZE_PASS_END(LoopVersioningPass, LVER_OPTION, LVer_name, false, false) + +namespace llvm { +FunctionPass *createLoopVersioningPass() { + return new LoopVersioningPass(); +} +} diff --git a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp new file mode 100644 index 000000000000..ee84541e526d --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp @@ -0,0 +1,94 @@ +//===- LowerInvoke.cpp - Eliminate Invoke instructions --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This transformation is designed for use by code generators which do not yet +// support stack unwinding. This pass converts 'invoke' instructions to 'call' +// instructions, so that any exception-handling 'landingpad' blocks become dead +// code (which can be removed by running the '-simplifycfg' pass afterwards). +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/LowerInvoke.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Scalar.h" +using namespace llvm; + +#define DEBUG_TYPE "lowerinvoke" + +STATISTIC(NumInvokes, "Number of invokes replaced"); + +namespace { + class LowerInvokeLegacyPass : public FunctionPass { + public: + static char ID; // Pass identification, replacement for typeid + explicit LowerInvokeLegacyPass() : FunctionPass(ID) { + initializeLowerInvokeLegacyPassPass(*PassRegistry::getPassRegistry()); + } + bool runOnFunction(Function &F) override; + }; +} + +char LowerInvokeLegacyPass::ID = 0; +INITIALIZE_PASS(LowerInvokeLegacyPass, "lowerinvoke", + "Lower invoke and unwind, for unwindless code generators", + false, false) + +static bool runImpl(Function &F) { + bool Changed = false; + for (BasicBlock &BB : F) + if (InvokeInst *II = dyn_cast<InvokeInst>(BB.getTerminator())) { + SmallVector<Value *, 16> CallArgs(II->op_begin(), II->op_end() - 3); + // Insert a normal call instruction... + CallInst *NewCall = + CallInst::Create(II->getCalledValue(), CallArgs, "", II); + NewCall->takeName(II); + NewCall->setCallingConv(II->getCallingConv()); + NewCall->setAttributes(II->getAttributes()); + NewCall->setDebugLoc(II->getDebugLoc()); + II->replaceAllUsesWith(NewCall); + + // Insert an unconditional branch to the normal destination. + BranchInst::Create(II->getNormalDest(), II); + + // Remove any PHI node entries from the exception destination. + II->getUnwindDest()->removePredecessor(&BB); + + // Remove the invoke instruction now. + BB.getInstList().erase(II); + + ++NumInvokes; + Changed = true; + } + return Changed; +} + +bool LowerInvokeLegacyPass::runOnFunction(Function &F) { + return runImpl(F); +} + +namespace llvm { +char &LowerInvokePassID = LowerInvokeLegacyPass::ID; + +// Public Interface To the LowerInvoke pass. +FunctionPass *createLowerInvokePass() { return new LowerInvokeLegacyPass(); } + +PreservedAnalyses LowerInvokePass::run(Function &F, + FunctionAnalysisManager &AM) { + bool Changed = runImpl(F); + if (!Changed) + return PreservedAnalyses::all(); + + return PreservedAnalyses::none(); +} +} diff --git a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp new file mode 100644 index 000000000000..75cd3bc8b2bf --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp @@ -0,0 +1,523 @@ +//===- LowerSwitch.cpp - Eliminate Switch instructions --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The LowerSwitch transformation rewrites switch instructions with a sequence +// of branches, which allows targets to get away with not implementing the +// switch instruction until it is convenient. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Pass.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" +#include <algorithm> +using namespace llvm; + +#define DEBUG_TYPE "lower-switch" + +namespace { + struct IntRange { + int64_t Low, High; + }; + // Return true iff R is covered by Ranges. + static bool IsInRanges(const IntRange &R, + const std::vector<IntRange> &Ranges) { + // Note: Ranges must be sorted, non-overlapping and non-adjacent. + + // Find the first range whose High field is >= R.High, + // then check if the Low field is <= R.Low. If so, we + // have a Range that covers R. + auto I = std::lower_bound( + Ranges.begin(), Ranges.end(), R, + [](const IntRange &A, const IntRange &B) { return A.High < B.High; }); + return I != Ranges.end() && I->Low <= R.Low; + } + + /// Replace all SwitchInst instructions with chained branch instructions. + class LowerSwitch : public FunctionPass { + public: + static char ID; // Pass identification, replacement for typeid + LowerSwitch() : FunctionPass(ID) { + initializeLowerSwitchPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + + struct CaseRange { + ConstantInt* Low; + ConstantInt* High; + BasicBlock* BB; + + CaseRange(ConstantInt *low, ConstantInt *high, BasicBlock *bb) + : Low(low), High(high), BB(bb) {} + }; + + typedef std::vector<CaseRange> CaseVector; + typedef std::vector<CaseRange>::iterator CaseItr; + private: + void processSwitchInst(SwitchInst *SI, SmallPtrSetImpl<BasicBlock*> &DeleteList); + + BasicBlock *switchConvert(CaseItr Begin, CaseItr End, + ConstantInt *LowerBound, ConstantInt *UpperBound, + Value *Val, BasicBlock *Predecessor, + BasicBlock *OrigBlock, BasicBlock *Default, + const std::vector<IntRange> &UnreachableRanges); + BasicBlock *newLeafBlock(CaseRange &Leaf, Value *Val, BasicBlock *OrigBlock, + BasicBlock *Default); + unsigned Clusterify(CaseVector &Cases, SwitchInst *SI); + }; + + /// The comparison function for sorting the switch case values in the vector. + /// WARNING: Case ranges should be disjoint! + struct CaseCmp { + bool operator () (const LowerSwitch::CaseRange& C1, + const LowerSwitch::CaseRange& C2) { + + const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low); + const ConstantInt* CI2 = cast<const ConstantInt>(C2.High); + return CI1->getValue().slt(CI2->getValue()); + } + }; +} + +char LowerSwitch::ID = 0; +INITIALIZE_PASS(LowerSwitch, "lowerswitch", + "Lower SwitchInst's to branches", false, false) + +// Publicly exposed interface to pass... +char &llvm::LowerSwitchID = LowerSwitch::ID; +// createLowerSwitchPass - Interface to this file... +FunctionPass *llvm::createLowerSwitchPass() { + return new LowerSwitch(); +} + +bool LowerSwitch::runOnFunction(Function &F) { + bool Changed = false; + SmallPtrSet<BasicBlock*, 8> DeleteList; + + for (Function::iterator I = F.begin(), E = F.end(); I != E; ) { + BasicBlock *Cur = &*I++; // Advance over block so we don't traverse new blocks + + // If the block is a dead Default block that will be deleted later, don't + // waste time processing it. + if (DeleteList.count(Cur)) + continue; + + if (SwitchInst *SI = dyn_cast<SwitchInst>(Cur->getTerminator())) { + Changed = true; + processSwitchInst(SI, DeleteList); + } + } + + for (BasicBlock* BB: DeleteList) { + DeleteDeadBlock(BB); + } + + return Changed; +} + +/// Used for debugging purposes. +static raw_ostream& operator<<(raw_ostream &O, + const LowerSwitch::CaseVector &C) + LLVM_ATTRIBUTE_USED; +static raw_ostream& operator<<(raw_ostream &O, + const LowerSwitch::CaseVector &C) { + O << "["; + + for (LowerSwitch::CaseVector::const_iterator B = C.begin(), + E = C.end(); B != E; ) { + O << *B->Low << " -" << *B->High; + if (++B != E) O << ", "; + } + + return O << "]"; +} + +/// \brief Update the first occurrence of the "switch statement" BB in the PHI +/// node with the "new" BB. The other occurrences will: +/// +/// 1) Be updated by subsequent calls to this function. Switch statements may +/// have more than one outcoming edge into the same BB if they all have the same +/// value. When the switch statement is converted these incoming edges are now +/// coming from multiple BBs. +/// 2) Removed if subsequent incoming values now share the same case, i.e., +/// multiple outcome edges are condensed into one. This is necessary to keep the +/// number of phi values equal to the number of branches to SuccBB. +static void fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB, + unsigned NumMergedCases) { + for (BasicBlock::iterator I = SuccBB->begin(), + IE = SuccBB->getFirstNonPHI()->getIterator(); + I != IE; ++I) { + PHINode *PN = cast<PHINode>(I); + + // Only update the first occurrence. + unsigned Idx = 0, E = PN->getNumIncomingValues(); + unsigned LocalNumMergedCases = NumMergedCases; + for (; Idx != E; ++Idx) { + if (PN->getIncomingBlock(Idx) == OrigBB) { + PN->setIncomingBlock(Idx, NewBB); + break; + } + } + + // Remove additional occurrences coming from condensed cases and keep the + // number of incoming values equal to the number of branches to SuccBB. + SmallVector<unsigned, 8> Indices; + for (++Idx; LocalNumMergedCases > 0 && Idx < E; ++Idx) + if (PN->getIncomingBlock(Idx) == OrigBB) { + Indices.push_back(Idx); + LocalNumMergedCases--; + } + // Remove incoming values in the reverse order to prevent invalidating + // *successive* index. + for (unsigned III : reverse(Indices)) + PN->removeIncomingValue(III); + } +} + +/// Convert the switch statement into a binary lookup of the case values. +/// The function recursively builds this tree. LowerBound and UpperBound are +/// used to keep track of the bounds for Val that have already been checked by +/// a block emitted by one of the previous calls to switchConvert in the call +/// stack. +BasicBlock * +LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound, + ConstantInt *UpperBound, Value *Val, + BasicBlock *Predecessor, BasicBlock *OrigBlock, + BasicBlock *Default, + const std::vector<IntRange> &UnreachableRanges) { + unsigned Size = End - Begin; + + if (Size == 1) { + // Check if the Case Range is perfectly squeezed in between + // already checked Upper and Lower bounds. If it is then we can avoid + // emitting the code that checks if the value actually falls in the range + // because the bounds already tell us so. + if (Begin->Low == LowerBound && Begin->High == UpperBound) { + unsigned NumMergedCases = 0; + if (LowerBound && UpperBound) + NumMergedCases = + UpperBound->getSExtValue() - LowerBound->getSExtValue(); + fixPhis(Begin->BB, OrigBlock, Predecessor, NumMergedCases); + return Begin->BB; + } + return newLeafBlock(*Begin, Val, OrigBlock, Default); + } + + unsigned Mid = Size / 2; + std::vector<CaseRange> LHS(Begin, Begin + Mid); + DEBUG(dbgs() << "LHS: " << LHS << "\n"); + std::vector<CaseRange> RHS(Begin + Mid, End); + DEBUG(dbgs() << "RHS: " << RHS << "\n"); + + CaseRange &Pivot = *(Begin + Mid); + DEBUG(dbgs() << "Pivot ==> " + << Pivot.Low->getValue() + << " -" << Pivot.High->getValue() << "\n"); + + // NewLowerBound here should never be the integer minimal value. + // This is because it is computed from a case range that is never + // the smallest, so there is always a case range that has at least + // a smaller value. + ConstantInt *NewLowerBound = Pivot.Low; + + // Because NewLowerBound is never the smallest representable integer + // it is safe here to subtract one. + ConstantInt *NewUpperBound = ConstantInt::get(NewLowerBound->getContext(), + NewLowerBound->getValue() - 1); + + if (!UnreachableRanges.empty()) { + // Check if the gap between LHS's highest and NewLowerBound is unreachable. + int64_t GapLow = LHS.back().High->getSExtValue() + 1; + int64_t GapHigh = NewLowerBound->getSExtValue() - 1; + IntRange Gap = { GapLow, GapHigh }; + if (GapHigh >= GapLow && IsInRanges(Gap, UnreachableRanges)) + NewUpperBound = LHS.back().High; + } + + DEBUG(dbgs() << "LHS Bounds ==> "; + if (LowerBound) { + dbgs() << LowerBound->getSExtValue(); + } else { + dbgs() << "NONE"; + } + dbgs() << " - " << NewUpperBound->getSExtValue() << "\n"; + dbgs() << "RHS Bounds ==> "; + dbgs() << NewLowerBound->getSExtValue() << " - "; + if (UpperBound) { + dbgs() << UpperBound->getSExtValue() << "\n"; + } else { + dbgs() << "NONE\n"; + }); + + // Create a new node that checks if the value is < pivot. Go to the + // left branch if it is and right branch if not. + Function* F = OrigBlock->getParent(); + BasicBlock* NewNode = BasicBlock::Create(Val->getContext(), "NodeBlock"); + + ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_SLT, + Val, Pivot.Low, "Pivot"); + + BasicBlock *LBranch = switchConvert(LHS.begin(), LHS.end(), LowerBound, + NewUpperBound, Val, NewNode, OrigBlock, + Default, UnreachableRanges); + BasicBlock *RBranch = switchConvert(RHS.begin(), RHS.end(), NewLowerBound, + UpperBound, Val, NewNode, OrigBlock, + Default, UnreachableRanges); + + F->getBasicBlockList().insert(++OrigBlock->getIterator(), NewNode); + NewNode->getInstList().push_back(Comp); + + BranchInst::Create(LBranch, RBranch, Comp, NewNode); + return NewNode; +} + +/// Create a new leaf block for the binary lookup tree. It checks if the +/// switch's value == the case's value. If not, then it jumps to the default +/// branch. At this point in the tree, the value can't be another valid case +/// value, so the jump to the "default" branch is warranted. +BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val, + BasicBlock* OrigBlock, + BasicBlock* Default) +{ + Function* F = OrigBlock->getParent(); + BasicBlock* NewLeaf = BasicBlock::Create(Val->getContext(), "LeafBlock"); + F->getBasicBlockList().insert(++OrigBlock->getIterator(), NewLeaf); + + // Emit comparison + ICmpInst* Comp = nullptr; + if (Leaf.Low == Leaf.High) { + // Make the seteq instruction... + Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_EQ, Val, + Leaf.Low, "SwitchLeaf"); + } else { + // Make range comparison + if (Leaf.Low->isMinValue(true /*isSigned*/)) { + // Val >= Min && Val <= Hi --> Val <= Hi + Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SLE, Val, Leaf.High, + "SwitchLeaf"); + } else if (Leaf.Low->isZero()) { + // Val >= 0 && Val <= Hi --> Val <=u Hi + Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Val, Leaf.High, + "SwitchLeaf"); + } else { + // Emit V-Lo <=u Hi-Lo + Constant* NegLo = ConstantExpr::getNeg(Leaf.Low); + Instruction* Add = BinaryOperator::CreateAdd(Val, NegLo, + Val->getName()+".off", + NewLeaf); + Constant *UpperBound = ConstantExpr::getAdd(NegLo, Leaf.High); + Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Add, UpperBound, + "SwitchLeaf"); + } + } + + // Make the conditional branch... + BasicBlock* Succ = Leaf.BB; + BranchInst::Create(Succ, Default, Comp, NewLeaf); + + // If there were any PHI nodes in this successor, rewrite one entry + // from OrigBlock to come from NewLeaf. + for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) { + PHINode* PN = cast<PHINode>(I); + // Remove all but one incoming entries from the cluster + uint64_t Range = Leaf.High->getSExtValue() - + Leaf.Low->getSExtValue(); + for (uint64_t j = 0; j < Range; ++j) { + PN->removeIncomingValue(OrigBlock); + } + + int BlockIdx = PN->getBasicBlockIndex(OrigBlock); + assert(BlockIdx != -1 && "Switch didn't go to this successor??"); + PN->setIncomingBlock((unsigned)BlockIdx, NewLeaf); + } + + return NewLeaf; +} + +/// Transform simple list of Cases into list of CaseRange's. +unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) { + unsigned numCmps = 0; + + // Start with "simple" cases + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i) + Cases.push_back(CaseRange(i.getCaseValue(), i.getCaseValue(), + i.getCaseSuccessor())); + + std::sort(Cases.begin(), Cases.end(), CaseCmp()); + + // Merge case into clusters + if (Cases.size() >= 2) { + CaseItr I = Cases.begin(); + for (CaseItr J = std::next(I), E = Cases.end(); J != E; ++J) { + int64_t nextValue = J->Low->getSExtValue(); + int64_t currentValue = I->High->getSExtValue(); + BasicBlock* nextBB = J->BB; + BasicBlock* currentBB = I->BB; + + // If the two neighboring cases go to the same destination, merge them + // into a single case. + assert(nextValue > currentValue && "Cases should be strictly ascending"); + if ((nextValue == currentValue + 1) && (currentBB == nextBB)) { + I->High = J->High; + // FIXME: Combine branch weights. + } else if (++I != J) { + *I = *J; + } + } + Cases.erase(std::next(I), Cases.end()); + } + + for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) { + if (I->Low != I->High) + // A range counts double, since it requires two compares. + ++numCmps; + } + + return numCmps; +} + +/// Replace the specified switch instruction with a sequence of chained if-then +/// insts in a balanced binary search. +void LowerSwitch::processSwitchInst(SwitchInst *SI, + SmallPtrSetImpl<BasicBlock*> &DeleteList) { + BasicBlock *CurBlock = SI->getParent(); + BasicBlock *OrigBlock = CurBlock; + Function *F = CurBlock->getParent(); + Value *Val = SI->getCondition(); // The value we are switching on... + BasicBlock* Default = SI->getDefaultDest(); + + // If there is only the default destination, just branch. + if (!SI->getNumCases()) { + BranchInst::Create(Default, CurBlock); + SI->eraseFromParent(); + return; + } + + // Prepare cases vector. + CaseVector Cases; + unsigned numCmps = Clusterify(Cases, SI); + DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size() + << ". Total compares: " << numCmps << "\n"); + DEBUG(dbgs() << "Cases: " << Cases << "\n"); + (void)numCmps; + + ConstantInt *LowerBound = nullptr; + ConstantInt *UpperBound = nullptr; + std::vector<IntRange> UnreachableRanges; + + if (isa<UnreachableInst>(Default->getFirstNonPHIOrDbg())) { + // Make the bounds tightly fitted around the case value range, because we + // know that the value passed to the switch must be exactly one of the case + // values. + assert(!Cases.empty()); + LowerBound = Cases.front().Low; + UpperBound = Cases.back().High; + + DenseMap<BasicBlock *, unsigned> Popularity; + unsigned MaxPop = 0; + BasicBlock *PopSucc = nullptr; + + IntRange R = { INT64_MIN, INT64_MAX }; + UnreachableRanges.push_back(R); + for (const auto &I : Cases) { + int64_t Low = I.Low->getSExtValue(); + int64_t High = I.High->getSExtValue(); + + IntRange &LastRange = UnreachableRanges.back(); + if (LastRange.Low == Low) { + // There is nothing left of the previous range. + UnreachableRanges.pop_back(); + } else { + // Terminate the previous range. + assert(Low > LastRange.Low); + LastRange.High = Low - 1; + } + if (High != INT64_MAX) { + IntRange R = { High + 1, INT64_MAX }; + UnreachableRanges.push_back(R); + } + + // Count popularity. + int64_t N = High - Low + 1; + unsigned &Pop = Popularity[I.BB]; + if ((Pop += N) > MaxPop) { + MaxPop = Pop; + PopSucc = I.BB; + } + } +#ifndef NDEBUG + /* UnreachableRanges should be sorted and the ranges non-adjacent. */ + for (auto I = UnreachableRanges.begin(), E = UnreachableRanges.end(); + I != E; ++I) { + assert(I->Low <= I->High); + auto Next = I + 1; + if (Next != E) { + assert(Next->Low > I->High); + } + } +#endif + + // Use the most popular block as the new default, reducing the number of + // cases. + assert(MaxPop > 0 && PopSucc); + Default = PopSucc; + Cases.erase( + remove_if(Cases, + [PopSucc](const CaseRange &R) { return R.BB == PopSucc; }), + Cases.end()); + + // If there are no cases left, just branch. + if (Cases.empty()) { + BranchInst::Create(Default, CurBlock); + SI->eraseFromParent(); + return; + } + } + + // Create a new, empty default block so that the new hierarchy of + // if-then statements go to this and the PHI nodes are happy. + BasicBlock *NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault"); + F->getBasicBlockList().insert(Default->getIterator(), NewDefault); + BranchInst::Create(Default, NewDefault); + + // If there is an entry in any PHI nodes for the default edge, make sure + // to update them as well. + for (BasicBlock::iterator I = Default->begin(); isa<PHINode>(I); ++I) { + PHINode *PN = cast<PHINode>(I); + int BlockIdx = PN->getBasicBlockIndex(OrigBlock); + assert(BlockIdx != -1 && "Switch didn't go to this successor??"); + PN->setIncomingBlock((unsigned)BlockIdx, NewDefault); + } + + BasicBlock *SwitchBlock = + switchConvert(Cases.begin(), Cases.end(), LowerBound, UpperBound, Val, + OrigBlock, OrigBlock, NewDefault, UnreachableRanges); + + // Branch to our shiny new if-then stuff... + BranchInst::Create(SwitchBlock, OrigBlock); + + // We are now done with the switch instruction, delete it. + BasicBlock *OldDefault = SI->getDefaultDest(); + CurBlock->getInstList().erase(SI); + + // If the Default block has no more predecessors just add it to DeleteList. + if (pred_begin(OldDefault) == pred_end(OldDefault)) + DeleteList.insert(OldDefault); +} diff --git a/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp new file mode 100644 index 000000000000..24b3b12930ac --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp @@ -0,0 +1,107 @@ +//===- Mem2Reg.cpp - The -mem2reg pass, a wrapper around the Utils lib ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass is a simple pass wrapper around the PromoteMemToReg function call +// exposed by the Utils library. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/Mem2Reg.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/PromoteMemToReg.h" +#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" +using namespace llvm; + +#define DEBUG_TYPE "mem2reg" + +STATISTIC(NumPromoted, "Number of alloca's promoted"); + +static bool promoteMemoryToRegister(Function &F, DominatorTree &DT, + AssumptionCache &AC) { + std::vector<AllocaInst *> Allocas; + BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function + bool Changed = false; + + while (1) { + Allocas.clear(); + + // Find allocas that are safe to promote, by looking at all instructions in + // the entry node + for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I) + if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca? + if (isAllocaPromotable(AI)) + Allocas.push_back(AI); + + if (Allocas.empty()) + break; + + PromoteMemToReg(Allocas, DT, nullptr, &AC); + NumPromoted += Allocas.size(); + Changed = true; + } + return Changed; +} + +PreservedAnalyses PromotePass::run(Function &F, FunctionAnalysisManager &AM) { + auto &DT = AM.getResult<DominatorTreeAnalysis>(F); + auto &AC = AM.getResult<AssumptionAnalysis>(F); + if (!promoteMemoryToRegister(F, DT, AC)) + return PreservedAnalyses::all(); + + // FIXME: This should also 'preserve the CFG'. + return PreservedAnalyses::none(); +} + +namespace { +struct PromoteLegacyPass : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + PromoteLegacyPass() : FunctionPass(ID) { + initializePromoteLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + // runOnFunction - To run this pass, first we calculate the alloca + // instructions that are safe for promotion, then we promote each one. + // + bool runOnFunction(Function &F) override { + if (skipFunction(F)) + return false; + + DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + AssumptionCache &AC = + getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); + return promoteMemoryToRegister(F, DT, AC); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<AssumptionCacheTracker>(); + AU.addRequired<DominatorTreeWrapperPass>(); + AU.setPreservesCFG(); + } + }; +} // end of anonymous namespace + +char PromoteLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(PromoteLegacyPass, "mem2reg", "Promote Memory to " + "Register", + false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END(PromoteLegacyPass, "mem2reg", "Promote Memory to Register", + false, false) + +// createPromoteMemoryToRegister - Provide an entry point to create this pass. +// +FunctionPass *llvm::createPromoteMemoryToRegisterPass() { + return new PromoteLegacyPass(); +} diff --git a/contrib/llvm/lib/Transforms/Utils/MemorySSA.cpp b/contrib/llvm/lib/Transforms/Utils/MemorySSA.cpp new file mode 100644 index 000000000000..1ce4225f09cc --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/MemorySSA.cpp @@ -0,0 +1,2305 @@ +//===-- MemorySSA.cpp - Memory SSA Builder---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------===// +// +// This file implements the MemorySSA class. +// +//===----------------------------------------------------------------===// +#include "llvm/Transforms/Utils/MemorySSA.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallBitVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/IteratedDominanceFrontier.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/PHITransAddr.h" +#include "llvm/IR/AssemblyAnnotationWriter.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Transforms/Scalar.h" +#include <algorithm> + +#define DEBUG_TYPE "memoryssa" +using namespace llvm; +STATISTIC(NumClobberCacheLookups, "Number of Memory SSA version cache lookups"); +STATISTIC(NumClobberCacheHits, "Number of Memory SSA version cache hits"); +STATISTIC(NumClobberCacheInserts, "Number of MemorySSA version cache inserts"); + +INITIALIZE_PASS_BEGIN(MemorySSAWrapperPass, "memoryssa", "Memory SSA", false, + true) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_END(MemorySSAWrapperPass, "memoryssa", "Memory SSA", false, + true) + +INITIALIZE_PASS_BEGIN(MemorySSAPrinterLegacyPass, "print-memoryssa", + "Memory SSA Printer", false, false) +INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) +INITIALIZE_PASS_END(MemorySSAPrinterLegacyPass, "print-memoryssa", + "Memory SSA Printer", false, false) + +static cl::opt<unsigned> MaxCheckLimit( + "memssa-check-limit", cl::Hidden, cl::init(100), + cl::desc("The maximum number of stores/phis MemorySSA" + "will consider trying to walk past (default = 100)")); + +static cl::opt<bool> + VerifyMemorySSA("verify-memoryssa", cl::init(false), cl::Hidden, + cl::desc("Verify MemorySSA in legacy printer pass.")); + +namespace llvm { +/// \brief An assembly annotator class to print Memory SSA information in +/// comments. +class MemorySSAAnnotatedWriter : public AssemblyAnnotationWriter { + friend class MemorySSA; + const MemorySSA *MSSA; + +public: + MemorySSAAnnotatedWriter(const MemorySSA *M) : MSSA(M) {} + + virtual void emitBasicBlockStartAnnot(const BasicBlock *BB, + formatted_raw_ostream &OS) { + if (MemoryAccess *MA = MSSA->getMemoryAccess(BB)) + OS << "; " << *MA << "\n"; + } + + virtual void emitInstructionAnnot(const Instruction *I, + formatted_raw_ostream &OS) { + if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) + OS << "; " << *MA << "\n"; + } +}; +} + +namespace { +/// Our current alias analysis API differentiates heavily between calls and +/// non-calls, and functions called on one usually assert on the other. +/// This class encapsulates the distinction to simplify other code that wants +/// "Memory affecting instructions and related data" to use as a key. +/// For example, this class is used as a densemap key in the use optimizer. +class MemoryLocOrCall { +public: + MemoryLocOrCall() : IsCall(false) {} + MemoryLocOrCall(MemoryUseOrDef *MUD) + : MemoryLocOrCall(MUD->getMemoryInst()) {} + MemoryLocOrCall(const MemoryUseOrDef *MUD) + : MemoryLocOrCall(MUD->getMemoryInst()) {} + + MemoryLocOrCall(Instruction *Inst) { + if (ImmutableCallSite(Inst)) { + IsCall = true; + CS = ImmutableCallSite(Inst); + } else { + IsCall = false; + // There is no such thing as a memorylocation for a fence inst, and it is + // unique in that regard. + if (!isa<FenceInst>(Inst)) + Loc = MemoryLocation::get(Inst); + } + } + + explicit MemoryLocOrCall(const MemoryLocation &Loc) + : IsCall(false), Loc(Loc) {} + + bool IsCall; + ImmutableCallSite getCS() const { + assert(IsCall); + return CS; + } + MemoryLocation getLoc() const { + assert(!IsCall); + return Loc; + } + + bool operator==(const MemoryLocOrCall &Other) const { + if (IsCall != Other.IsCall) + return false; + + if (IsCall) + return CS.getCalledValue() == Other.CS.getCalledValue(); + return Loc == Other.Loc; + } + +private: + union { + ImmutableCallSite CS; + MemoryLocation Loc; + }; +}; +} + +namespace llvm { +template <> struct DenseMapInfo<MemoryLocOrCall> { + static inline MemoryLocOrCall getEmptyKey() { + return MemoryLocOrCall(DenseMapInfo<MemoryLocation>::getEmptyKey()); + } + static inline MemoryLocOrCall getTombstoneKey() { + return MemoryLocOrCall(DenseMapInfo<MemoryLocation>::getTombstoneKey()); + } + static unsigned getHashValue(const MemoryLocOrCall &MLOC) { + if (MLOC.IsCall) + return hash_combine(MLOC.IsCall, + DenseMapInfo<const Value *>::getHashValue( + MLOC.getCS().getCalledValue())); + return hash_combine( + MLOC.IsCall, DenseMapInfo<MemoryLocation>::getHashValue(MLOC.getLoc())); + } + static bool isEqual(const MemoryLocOrCall &LHS, const MemoryLocOrCall &RHS) { + return LHS == RHS; + } +}; + +enum class Reorderability { Always, IfNoAlias, Never }; + +/// This does one-way checks to see if Use could theoretically be hoisted above +/// MayClobber. This will not check the other way around. +/// +/// This assumes that, for the purposes of MemorySSA, Use comes directly after +/// MayClobber, with no potentially clobbering operations in between them. +/// (Where potentially clobbering ops are memory barriers, aliased stores, etc.) +static Reorderability getLoadReorderability(const LoadInst *Use, + const LoadInst *MayClobber) { + bool VolatileUse = Use->isVolatile(); + bool VolatileClobber = MayClobber->isVolatile(); + // Volatile operations may never be reordered with other volatile operations. + if (VolatileUse && VolatileClobber) + return Reorderability::Never; + + // The lang ref allows reordering of volatile and non-volatile operations. + // Whether an aliasing nonvolatile load and volatile load can be reordered, + // though, is ambiguous. Because it may not be best to exploit this ambiguity, + // we only allow volatile/non-volatile reordering if the volatile and + // non-volatile operations don't alias. + Reorderability Result = VolatileUse || VolatileClobber + ? Reorderability::IfNoAlias + : Reorderability::Always; + + // If a load is seq_cst, it cannot be moved above other loads. If its ordering + // is weaker, it can be moved above other loads. We just need to be sure that + // MayClobber isn't an acquire load, because loads can't be moved above + // acquire loads. + // + // Note that this explicitly *does* allow the free reordering of monotonic (or + // weaker) loads of the same address. + bool SeqCstUse = Use->getOrdering() == AtomicOrdering::SequentiallyConsistent; + bool MayClobberIsAcquire = isAtLeastOrStrongerThan(MayClobber->getOrdering(), + AtomicOrdering::Acquire); + if (SeqCstUse || MayClobberIsAcquire) + return Reorderability::Never; + return Result; +} + +static bool instructionClobbersQuery(MemoryDef *MD, + const MemoryLocation &UseLoc, + const Instruction *UseInst, + AliasAnalysis &AA) { + Instruction *DefInst = MD->getMemoryInst(); + assert(DefInst && "Defining instruction not actually an instruction"); + + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) { + // These intrinsics will show up as affecting memory, but they are just + // markers. + switch (II->getIntrinsicID()) { + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + case Intrinsic::assume: + return false; + default: + break; + } + } + + ImmutableCallSite UseCS(UseInst); + if (UseCS) { + ModRefInfo I = AA.getModRefInfo(DefInst, UseCS); + return I != MRI_NoModRef; + } + + if (auto *DefLoad = dyn_cast<LoadInst>(DefInst)) { + if (auto *UseLoad = dyn_cast<LoadInst>(UseInst)) { + switch (getLoadReorderability(UseLoad, DefLoad)) { + case Reorderability::Always: + return false; + case Reorderability::Never: + return true; + case Reorderability::IfNoAlias: + return !AA.isNoAlias(UseLoc, MemoryLocation::get(DefLoad)); + } + } + } + + return AA.getModRefInfo(DefInst, UseLoc) & MRI_Mod; +} + +static bool instructionClobbersQuery(MemoryDef *MD, const MemoryUseOrDef *MU, + const MemoryLocOrCall &UseMLOC, + AliasAnalysis &AA) { + // FIXME: This is a temporary hack to allow a single instructionClobbersQuery + // to exist while MemoryLocOrCall is pushed through places. + if (UseMLOC.IsCall) + return instructionClobbersQuery(MD, MemoryLocation(), MU->getMemoryInst(), + AA); + return instructionClobbersQuery(MD, UseMLOC.getLoc(), MU->getMemoryInst(), + AA); +} + +// Return true when MD may alias MU, return false otherwise. +bool defClobbersUseOrDef(MemoryDef *MD, const MemoryUseOrDef *MU, + AliasAnalysis &AA) { + return instructionClobbersQuery(MD, MU, MemoryLocOrCall(MU), AA); +} +} + +namespace { +struct UpwardsMemoryQuery { + // True if our original query started off as a call + bool IsCall; + // The pointer location we started the query with. This will be empty if + // IsCall is true. + MemoryLocation StartingLoc; + // This is the instruction we were querying about. + const Instruction *Inst; + // The MemoryAccess we actually got called with, used to test local domination + const MemoryAccess *OriginalAccess; + + UpwardsMemoryQuery() + : IsCall(false), Inst(nullptr), OriginalAccess(nullptr) {} + + UpwardsMemoryQuery(const Instruction *Inst, const MemoryAccess *Access) + : IsCall(ImmutableCallSite(Inst)), Inst(Inst), OriginalAccess(Access) { + if (!IsCall) + StartingLoc = MemoryLocation::get(Inst); + } +}; + +static bool lifetimeEndsAt(MemoryDef *MD, const MemoryLocation &Loc, + AliasAnalysis &AA) { + Instruction *Inst = MD->getMemoryInst(); + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { + switch (II->getIntrinsicID()) { + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + return AA.isMustAlias(MemoryLocation(II->getArgOperand(1)), Loc); + default: + return false; + } + } + return false; +} + +static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysis &AA, + const Instruction *I) { + // If the memory can't be changed, then loads of the memory can't be + // clobbered. + // + // FIXME: We should handle invariant groups, as well. It's a bit harder, + // because we need to pay close attention to invariant group barriers. + return isa<LoadInst>(I) && (I->getMetadata(LLVMContext::MD_invariant_load) || + AA.pointsToConstantMemory(I)); +} + +/// Cache for our caching MemorySSA walker. +class WalkerCache { + DenseMap<ConstMemoryAccessPair, MemoryAccess *> Accesses; + DenseMap<const MemoryAccess *, MemoryAccess *> Calls; + +public: + MemoryAccess *lookup(const MemoryAccess *MA, const MemoryLocation &Loc, + bool IsCall) const { + ++NumClobberCacheLookups; + MemoryAccess *R = IsCall ? Calls.lookup(MA) : Accesses.lookup({MA, Loc}); + if (R) + ++NumClobberCacheHits; + return R; + } + + bool insert(const MemoryAccess *MA, MemoryAccess *To, + const MemoryLocation &Loc, bool IsCall) { + // This is fine for Phis, since there are times where we can't optimize + // them. Making a def its own clobber is never correct, though. + assert((MA != To || isa<MemoryPhi>(MA)) && + "Something can't clobber itself!"); + + ++NumClobberCacheInserts; + bool Inserted; + if (IsCall) + Inserted = Calls.insert({MA, To}).second; + else + Inserted = Accesses.insert({{MA, Loc}, To}).second; + + return Inserted; + } + + bool remove(const MemoryAccess *MA, const MemoryLocation &Loc, bool IsCall) { + return IsCall ? Calls.erase(MA) : Accesses.erase({MA, Loc}); + } + + void clear() { + Accesses.clear(); + Calls.clear(); + } + + bool contains(const MemoryAccess *MA) const { + for (auto &P : Accesses) + if (P.first.first == MA || P.second == MA) + return true; + for (auto &P : Calls) + if (P.first == MA || P.second == MA) + return true; + return false; + } +}; + +/// Walks the defining uses of MemoryDefs. Stops after we hit something that has +/// no defining use (e.g. a MemoryPhi or liveOnEntry). Note that, when comparing +/// against a null def_chain_iterator, this will compare equal only after +/// walking said Phi/liveOnEntry. +struct def_chain_iterator + : public iterator_facade_base<def_chain_iterator, std::forward_iterator_tag, + MemoryAccess *> { + def_chain_iterator() : MA(nullptr) {} + def_chain_iterator(MemoryAccess *MA) : MA(MA) {} + + MemoryAccess *operator*() const { return MA; } + + def_chain_iterator &operator++() { + // N.B. liveOnEntry has a null defining access. + if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA)) + MA = MUD->getDefiningAccess(); + else + MA = nullptr; + return *this; + } + + bool operator==(const def_chain_iterator &O) const { return MA == O.MA; } + +private: + MemoryAccess *MA; +}; + +static iterator_range<def_chain_iterator> +def_chain(MemoryAccess *MA, MemoryAccess *UpTo = nullptr) { +#ifdef EXPENSIVE_CHECKS + assert((!UpTo || find(def_chain(MA), UpTo) != def_chain_iterator()) && + "UpTo isn't in the def chain!"); +#endif + return make_range(def_chain_iterator(MA), def_chain_iterator(UpTo)); +} + +/// Verifies that `Start` is clobbered by `ClobberAt`, and that nothing +/// inbetween `Start` and `ClobberAt` can clobbers `Start`. +/// +/// This is meant to be as simple and self-contained as possible. Because it +/// uses no cache, etc., it can be relatively expensive. +/// +/// \param Start The MemoryAccess that we want to walk from. +/// \param ClobberAt A clobber for Start. +/// \param StartLoc The MemoryLocation for Start. +/// \param MSSA The MemorySSA isntance that Start and ClobberAt belong to. +/// \param Query The UpwardsMemoryQuery we used for our search. +/// \param AA The AliasAnalysis we used for our search. +static void LLVM_ATTRIBUTE_UNUSED +checkClobberSanity(MemoryAccess *Start, MemoryAccess *ClobberAt, + const MemoryLocation &StartLoc, const MemorySSA &MSSA, + const UpwardsMemoryQuery &Query, AliasAnalysis &AA) { + assert(MSSA.dominates(ClobberAt, Start) && "Clobber doesn't dominate start?"); + + if (MSSA.isLiveOnEntryDef(Start)) { + assert(MSSA.isLiveOnEntryDef(ClobberAt) && + "liveOnEntry must clobber itself"); + return; + } + + bool FoundClobber = false; + DenseSet<MemoryAccessPair> VisitedPhis; + SmallVector<MemoryAccessPair, 8> Worklist; + Worklist.emplace_back(Start, StartLoc); + // Walk all paths from Start to ClobberAt, while looking for clobbers. If one + // is found, complain. + while (!Worklist.empty()) { + MemoryAccessPair MAP = Worklist.pop_back_val(); + // All we care about is that nothing from Start to ClobberAt clobbers Start. + // We learn nothing from revisiting nodes. + if (!VisitedPhis.insert(MAP).second) + continue; + + for (MemoryAccess *MA : def_chain(MAP.first)) { + if (MA == ClobberAt) { + if (auto *MD = dyn_cast<MemoryDef>(MA)) { + // instructionClobbersQuery isn't essentially free, so don't use `|=`, + // since it won't let us short-circuit. + // + // Also, note that this can't be hoisted out of the `Worklist` loop, + // since MD may only act as a clobber for 1 of N MemoryLocations. + FoundClobber = + FoundClobber || MSSA.isLiveOnEntryDef(MD) || + instructionClobbersQuery(MD, MAP.second, Query.Inst, AA); + } + break; + } + + // We should never hit liveOnEntry, unless it's the clobber. + assert(!MSSA.isLiveOnEntryDef(MA) && "Hit liveOnEntry before clobber?"); + + if (auto *MD = dyn_cast<MemoryDef>(MA)) { + (void)MD; + assert(!instructionClobbersQuery(MD, MAP.second, Query.Inst, AA) && + "Found clobber before reaching ClobberAt!"); + continue; + } + + assert(isa<MemoryPhi>(MA)); + Worklist.append(upward_defs_begin({MA, MAP.second}), upward_defs_end()); + } + } + + // If ClobberAt is a MemoryPhi, we can assume something above it acted as a + // clobber. Otherwise, `ClobberAt` should've acted as a clobber at some point. + assert((isa<MemoryPhi>(ClobberAt) || FoundClobber) && + "ClobberAt never acted as a clobber"); +} + +/// Our algorithm for walking (and trying to optimize) clobbers, all wrapped up +/// in one class. +class ClobberWalker { + /// Save a few bytes by using unsigned instead of size_t. + using ListIndex = unsigned; + + /// Represents a span of contiguous MemoryDefs, potentially ending in a + /// MemoryPhi. + struct DefPath { + MemoryLocation Loc; + // Note that, because we always walk in reverse, Last will always dominate + // First. Also note that First and Last are inclusive. + MemoryAccess *First; + MemoryAccess *Last; + Optional<ListIndex> Previous; + + DefPath(const MemoryLocation &Loc, MemoryAccess *First, MemoryAccess *Last, + Optional<ListIndex> Previous) + : Loc(Loc), First(First), Last(Last), Previous(Previous) {} + + DefPath(const MemoryLocation &Loc, MemoryAccess *Init, + Optional<ListIndex> Previous) + : DefPath(Loc, Init, Init, Previous) {} + }; + + const MemorySSA &MSSA; + AliasAnalysis &AA; + DominatorTree &DT; + WalkerCache &WC; + UpwardsMemoryQuery *Query; + bool UseCache; + + // Phi optimization bookkeeping + SmallVector<DefPath, 32> Paths; + DenseSet<ConstMemoryAccessPair> VisitedPhis; + DenseMap<const BasicBlock *, MemoryAccess *> WalkTargetCache; + + void setUseCache(bool Use) { UseCache = Use; } + bool shouldIgnoreCache() const { + // UseCache will only be false when we're debugging, or when expensive + // checks are enabled. In either case, we don't care deeply about speed. + return LLVM_UNLIKELY(!UseCache); + } + + void addCacheEntry(const MemoryAccess *What, MemoryAccess *To, + const MemoryLocation &Loc) const { +// EXPENSIVE_CHECKS because most of these queries are redundant. +#ifdef EXPENSIVE_CHECKS + assert(MSSA.dominates(To, What)); +#endif + if (shouldIgnoreCache()) + return; + WC.insert(What, To, Loc, Query->IsCall); + } + + MemoryAccess *lookupCache(const MemoryAccess *MA, const MemoryLocation &Loc) { + return shouldIgnoreCache() ? nullptr : WC.lookup(MA, Loc, Query->IsCall); + } + + void cacheDefPath(const DefPath &DN, MemoryAccess *Target) const { + if (shouldIgnoreCache()) + return; + + for (MemoryAccess *MA : def_chain(DN.First, DN.Last)) + addCacheEntry(MA, Target, DN.Loc); + + // DefPaths only express the path we walked. So, DN.Last could either be a + // thing we want to cache, or not. + if (DN.Last != Target) + addCacheEntry(DN.Last, Target, DN.Loc); + } + + /// Find the nearest def or phi that `From` can legally be optimized to. + /// + /// FIXME: Deduplicate this with MSSA::findDominatingDef. Ideally, MSSA should + /// keep track of this information for us, and allow us O(1) lookups of this + /// info. + MemoryAccess *getWalkTarget(const MemoryPhi *From) { + assert(From->getNumOperands() && "Phi with no operands?"); + + BasicBlock *BB = From->getBlock(); + auto At = WalkTargetCache.find(BB); + if (At != WalkTargetCache.end()) + return At->second; + + SmallVector<const BasicBlock *, 8> ToCache; + ToCache.push_back(BB); + + MemoryAccess *Result = MSSA.getLiveOnEntryDef(); + DomTreeNode *Node = DT.getNode(BB); + while ((Node = Node->getIDom())) { + auto At = WalkTargetCache.find(BB); + if (At != WalkTargetCache.end()) { + Result = At->second; + break; + } + + auto *Accesses = MSSA.getBlockAccesses(Node->getBlock()); + if (Accesses) { + auto Iter = find_if(reverse(*Accesses), [](const MemoryAccess &MA) { + return !isa<MemoryUse>(MA); + }); + if (Iter != Accesses->rend()) { + Result = const_cast<MemoryAccess *>(&*Iter); + break; + } + } + + ToCache.push_back(Node->getBlock()); + } + + for (const BasicBlock *BB : ToCache) + WalkTargetCache.insert({BB, Result}); + return Result; + } + + /// Result of calling walkToPhiOrClobber. + struct UpwardsWalkResult { + /// The "Result" of the walk. Either a clobber, the last thing we walked, or + /// both. + MemoryAccess *Result; + bool IsKnownClobber; + bool FromCache; + }; + + /// Walk to the next Phi or Clobber in the def chain starting at Desc.Last. + /// This will update Desc.Last as it walks. It will (optionally) also stop at + /// StopAt. + /// + /// This does not test for whether StopAt is a clobber + UpwardsWalkResult walkToPhiOrClobber(DefPath &Desc, + MemoryAccess *StopAt = nullptr) { + assert(!isa<MemoryUse>(Desc.Last) && "Uses don't exist in my world"); + + for (MemoryAccess *Current : def_chain(Desc.Last)) { + Desc.Last = Current; + if (Current == StopAt) + return {Current, false, false}; + + if (auto *MD = dyn_cast<MemoryDef>(Current)) + if (MSSA.isLiveOnEntryDef(MD) || + instructionClobbersQuery(MD, Desc.Loc, Query->Inst, AA)) + return {MD, true, false}; + + // Cache checks must be done last, because if Current is a clobber, the + // cache will contain the clobber for Current. + if (MemoryAccess *MA = lookupCache(Current, Desc.Loc)) + return {MA, true, true}; + } + + assert(isa<MemoryPhi>(Desc.Last) && + "Ended at a non-clobber that's not a phi?"); + return {Desc.Last, false, false}; + } + + void addSearches(MemoryPhi *Phi, SmallVectorImpl<ListIndex> &PausedSearches, + ListIndex PriorNode) { + auto UpwardDefs = make_range(upward_defs_begin({Phi, Paths[PriorNode].Loc}), + upward_defs_end()); + for (const MemoryAccessPair &P : UpwardDefs) { + PausedSearches.push_back(Paths.size()); + Paths.emplace_back(P.second, P.first, PriorNode); + } + } + + /// Represents a search that terminated after finding a clobber. This clobber + /// may or may not be present in the path of defs from LastNode..SearchStart, + /// since it may have been retrieved from cache. + struct TerminatedPath { + MemoryAccess *Clobber; + ListIndex LastNode; + }; + + /// Get an access that keeps us from optimizing to the given phi. + /// + /// PausedSearches is an array of indices into the Paths array. Its incoming + /// value is the indices of searches that stopped at the last phi optimization + /// target. It's left in an unspecified state. + /// + /// If this returns None, NewPaused is a vector of searches that terminated + /// at StopWhere. Otherwise, NewPaused is left in an unspecified state. + Optional<TerminatedPath> + getBlockingAccess(MemoryAccess *StopWhere, + SmallVectorImpl<ListIndex> &PausedSearches, + SmallVectorImpl<ListIndex> &NewPaused, + SmallVectorImpl<TerminatedPath> &Terminated) { + assert(!PausedSearches.empty() && "No searches to continue?"); + + // BFS vs DFS really doesn't make a difference here, so just do a DFS with + // PausedSearches as our stack. + while (!PausedSearches.empty()) { + ListIndex PathIndex = PausedSearches.pop_back_val(); + DefPath &Node = Paths[PathIndex]; + + // If we've already visited this path with this MemoryLocation, we don't + // need to do so again. + // + // NOTE: That we just drop these paths on the ground makes caching + // behavior sporadic. e.g. given a diamond: + // A + // B C + // D + // + // ...If we walk D, B, A, C, we'll only cache the result of phi + // optimization for A, B, and D; C will be skipped because it dies here. + // This arguably isn't the worst thing ever, since: + // - We generally query things in a top-down order, so if we got below D + // without needing cache entries for {C, MemLoc}, then chances are + // that those cache entries would end up ultimately unused. + // - We still cache things for A, so C only needs to walk up a bit. + // If this behavior becomes problematic, we can fix without a ton of extra + // work. + if (!VisitedPhis.insert({Node.Last, Node.Loc}).second) + continue; + + UpwardsWalkResult Res = walkToPhiOrClobber(Node, /*StopAt=*/StopWhere); + if (Res.IsKnownClobber) { + assert(Res.Result != StopWhere || Res.FromCache); + // If this wasn't a cache hit, we hit a clobber when walking. That's a + // failure. + TerminatedPath Term{Res.Result, PathIndex}; + if (!Res.FromCache || !MSSA.dominates(Res.Result, StopWhere)) + return Term; + + // Otherwise, it's a valid thing to potentially optimize to. + Terminated.push_back(Term); + continue; + } + + if (Res.Result == StopWhere) { + // We've hit our target. Save this path off for if we want to continue + // walking. + NewPaused.push_back(PathIndex); + continue; + } + + assert(!MSSA.isLiveOnEntryDef(Res.Result) && "liveOnEntry is a clobber"); + addSearches(cast<MemoryPhi>(Res.Result), PausedSearches, PathIndex); + } + + return None; + } + + template <typename T, typename Walker> + struct generic_def_path_iterator + : public iterator_facade_base<generic_def_path_iterator<T, Walker>, + std::forward_iterator_tag, T *> { + generic_def_path_iterator() : W(nullptr), N(None) {} + generic_def_path_iterator(Walker *W, ListIndex N) : W(W), N(N) {} + + T &operator*() const { return curNode(); } + + generic_def_path_iterator &operator++() { + N = curNode().Previous; + return *this; + } + + bool operator==(const generic_def_path_iterator &O) const { + if (N.hasValue() != O.N.hasValue()) + return false; + return !N.hasValue() || *N == *O.N; + } + + private: + T &curNode() const { return W->Paths[*N]; } + + Walker *W; + Optional<ListIndex> N; + }; + + using def_path_iterator = generic_def_path_iterator<DefPath, ClobberWalker>; + using const_def_path_iterator = + generic_def_path_iterator<const DefPath, const ClobberWalker>; + + iterator_range<def_path_iterator> def_path(ListIndex From) { + return make_range(def_path_iterator(this, From), def_path_iterator()); + } + + iterator_range<const_def_path_iterator> const_def_path(ListIndex From) const { + return make_range(const_def_path_iterator(this, From), + const_def_path_iterator()); + } + + struct OptznResult { + /// The path that contains our result. + TerminatedPath PrimaryClobber; + /// The paths that we can legally cache back from, but that aren't + /// necessarily the result of the Phi optimization. + SmallVector<TerminatedPath, 4> OtherClobbers; + }; + + ListIndex defPathIndex(const DefPath &N) const { + // The assert looks nicer if we don't need to do &N + const DefPath *NP = &N; + assert(!Paths.empty() && NP >= &Paths.front() && NP <= &Paths.back() && + "Out of bounds DefPath!"); + return NP - &Paths.front(); + } + + /// Try to optimize a phi as best as we can. Returns a SmallVector of Paths + /// that act as legal clobbers. Note that this won't return *all* clobbers. + /// + /// Phi optimization algorithm tl;dr: + /// - Find the earliest def/phi, A, we can optimize to + /// - Find if all paths from the starting memory access ultimately reach A + /// - If not, optimization isn't possible. + /// - Otherwise, walk from A to another clobber or phi, A'. + /// - If A' is a def, we're done. + /// - If A' is a phi, try to optimize it. + /// + /// A path is a series of {MemoryAccess, MemoryLocation} pairs. A path + /// terminates when a MemoryAccess that clobbers said MemoryLocation is found. + OptznResult tryOptimizePhi(MemoryPhi *Phi, MemoryAccess *Start, + const MemoryLocation &Loc) { + assert(Paths.empty() && VisitedPhis.empty() && + "Reset the optimization state."); + + Paths.emplace_back(Loc, Start, Phi, None); + // Stores how many "valid" optimization nodes we had prior to calling + // addSearches/getBlockingAccess. Necessary for caching if we had a blocker. + auto PriorPathsSize = Paths.size(); + + SmallVector<ListIndex, 16> PausedSearches; + SmallVector<ListIndex, 8> NewPaused; + SmallVector<TerminatedPath, 4> TerminatedPaths; + + addSearches(Phi, PausedSearches, 0); + + // Moves the TerminatedPath with the "most dominated" Clobber to the end of + // Paths. + auto MoveDominatedPathToEnd = [&](SmallVectorImpl<TerminatedPath> &Paths) { + assert(!Paths.empty() && "Need a path to move"); + auto Dom = Paths.begin(); + for (auto I = std::next(Dom), E = Paths.end(); I != E; ++I) + if (!MSSA.dominates(I->Clobber, Dom->Clobber)) + Dom = I; + auto Last = Paths.end() - 1; + if (Last != Dom) + std::iter_swap(Last, Dom); + }; + + MemoryPhi *Current = Phi; + while (1) { + assert(!MSSA.isLiveOnEntryDef(Current) && + "liveOnEntry wasn't treated as a clobber?"); + + MemoryAccess *Target = getWalkTarget(Current); + // If a TerminatedPath doesn't dominate Target, then it wasn't a legal + // optimization for the prior phi. + assert(all_of(TerminatedPaths, [&](const TerminatedPath &P) { + return MSSA.dominates(P.Clobber, Target); + })); + + // FIXME: This is broken, because the Blocker may be reported to be + // liveOnEntry, and we'll happily wait for that to disappear (read: never) + // For the moment, this is fine, since we do nothing with blocker info. + if (Optional<TerminatedPath> Blocker = getBlockingAccess( + Target, PausedSearches, NewPaused, TerminatedPaths)) { + // Cache our work on the blocking node, since we know that's correct. + cacheDefPath(Paths[Blocker->LastNode], Blocker->Clobber); + + // Find the node we started at. We can't search based on N->Last, since + // we may have gone around a loop with a different MemoryLocation. + auto Iter = find_if(def_path(Blocker->LastNode), [&](const DefPath &N) { + return defPathIndex(N) < PriorPathsSize; + }); + assert(Iter != def_path_iterator()); + + DefPath &CurNode = *Iter; + assert(CurNode.Last == Current); + + // Two things: + // A. We can't reliably cache all of NewPaused back. Consider a case + // where we have two paths in NewPaused; one of which can't optimize + // above this phi, whereas the other can. If we cache the second path + // back, we'll end up with suboptimal cache entries. We can handle + // cases like this a bit better when we either try to find all + // clobbers that block phi optimization, or when our cache starts + // supporting unfinished searches. + // B. We can't reliably cache TerminatedPaths back here without doing + // extra checks; consider a case like: + // T + // / \ + // D C + // \ / + // S + // Where T is our target, C is a node with a clobber on it, D is a + // diamond (with a clobber *only* on the left or right node, N), and + // S is our start. Say we walk to D, through the node opposite N + // (read: ignoring the clobber), and see a cache entry in the top + // node of D. That cache entry gets put into TerminatedPaths. We then + // walk up to C (N is later in our worklist), find the clobber, and + // quit. If we append TerminatedPaths to OtherClobbers, we'll cache + // the bottom part of D to the cached clobber, ignoring the clobber + // in N. Again, this problem goes away if we start tracking all + // blockers for a given phi optimization. + TerminatedPath Result{CurNode.Last, defPathIndex(CurNode)}; + return {Result, {}}; + } + + // If there's nothing left to search, then all paths led to valid clobbers + // that we got from our cache; pick the nearest to the start, and allow + // the rest to be cached back. + if (NewPaused.empty()) { + MoveDominatedPathToEnd(TerminatedPaths); + TerminatedPath Result = TerminatedPaths.pop_back_val(); + return {Result, std::move(TerminatedPaths)}; + } + + MemoryAccess *DefChainEnd = nullptr; + SmallVector<TerminatedPath, 4> Clobbers; + for (ListIndex Paused : NewPaused) { + UpwardsWalkResult WR = walkToPhiOrClobber(Paths[Paused]); + if (WR.IsKnownClobber) + Clobbers.push_back({WR.Result, Paused}); + else + // Micro-opt: If we hit the end of the chain, save it. + DefChainEnd = WR.Result; + } + + if (!TerminatedPaths.empty()) { + // If we couldn't find the dominating phi/liveOnEntry in the above loop, + // do it now. + if (!DefChainEnd) + for (MemoryAccess *MA : def_chain(Target)) + DefChainEnd = MA; + + // If any of the terminated paths don't dominate the phi we'll try to + // optimize, we need to figure out what they are and quit. + const BasicBlock *ChainBB = DefChainEnd->getBlock(); + for (const TerminatedPath &TP : TerminatedPaths) { + // Because we know that DefChainEnd is as "high" as we can go, we + // don't need local dominance checks; BB dominance is sufficient. + if (DT.dominates(ChainBB, TP.Clobber->getBlock())) + Clobbers.push_back(TP); + } + } + + // If we have clobbers in the def chain, find the one closest to Current + // and quit. + if (!Clobbers.empty()) { + MoveDominatedPathToEnd(Clobbers); + TerminatedPath Result = Clobbers.pop_back_val(); + return {Result, std::move(Clobbers)}; + } + + assert(all_of(NewPaused, + [&](ListIndex I) { return Paths[I].Last == DefChainEnd; })); + + // Because liveOnEntry is a clobber, this must be a phi. + auto *DefChainPhi = cast<MemoryPhi>(DefChainEnd); + + PriorPathsSize = Paths.size(); + PausedSearches.clear(); + for (ListIndex I : NewPaused) + addSearches(DefChainPhi, PausedSearches, I); + NewPaused.clear(); + + Current = DefChainPhi; + } + } + + /// Caches everything in an OptznResult. + void cacheOptResult(const OptznResult &R) { + if (R.OtherClobbers.empty()) { + // If we're not going to be caching OtherClobbers, don't bother with + // marking visited/etc. + for (const DefPath &N : const_def_path(R.PrimaryClobber.LastNode)) + cacheDefPath(N, R.PrimaryClobber.Clobber); + return; + } + + // PrimaryClobber is our answer. If we can cache anything back, we need to + // stop caching when we visit PrimaryClobber. + SmallBitVector Visited(Paths.size()); + for (const DefPath &N : const_def_path(R.PrimaryClobber.LastNode)) { + Visited[defPathIndex(N)] = true; + cacheDefPath(N, R.PrimaryClobber.Clobber); + } + + for (const TerminatedPath &P : R.OtherClobbers) { + for (const DefPath &N : const_def_path(P.LastNode)) { + ListIndex NIndex = defPathIndex(N); + if (Visited[NIndex]) + break; + Visited[NIndex] = true; + cacheDefPath(N, P.Clobber); + } + } + } + + void verifyOptResult(const OptznResult &R) const { + assert(all_of(R.OtherClobbers, [&](const TerminatedPath &P) { + return MSSA.dominates(P.Clobber, R.PrimaryClobber.Clobber); + })); + } + + void resetPhiOptznState() { + Paths.clear(); + VisitedPhis.clear(); + } + +public: + ClobberWalker(const MemorySSA &MSSA, AliasAnalysis &AA, DominatorTree &DT, + WalkerCache &WC) + : MSSA(MSSA), AA(AA), DT(DT), WC(WC), UseCache(true) {} + + void reset() { WalkTargetCache.clear(); } + + /// Finds the nearest clobber for the given query, optimizing phis if + /// possible. + MemoryAccess *findClobber(MemoryAccess *Start, UpwardsMemoryQuery &Q, + bool UseWalkerCache = true) { + setUseCache(UseWalkerCache); + Query = &Q; + + MemoryAccess *Current = Start; + // This walker pretends uses don't exist. If we're handed one, silently grab + // its def. (This has the nice side-effect of ensuring we never cache uses) + if (auto *MU = dyn_cast<MemoryUse>(Start)) + Current = MU->getDefiningAccess(); + + DefPath FirstDesc(Q.StartingLoc, Current, Current, None); + // Fast path for the overly-common case (no crazy phi optimization + // necessary) + UpwardsWalkResult WalkResult = walkToPhiOrClobber(FirstDesc); + MemoryAccess *Result; + if (WalkResult.IsKnownClobber) { + cacheDefPath(FirstDesc, WalkResult.Result); + Result = WalkResult.Result; + } else { + OptznResult OptRes = tryOptimizePhi(cast<MemoryPhi>(FirstDesc.Last), + Current, Q.StartingLoc); + verifyOptResult(OptRes); + cacheOptResult(OptRes); + resetPhiOptznState(); + Result = OptRes.PrimaryClobber.Clobber; + } + +#ifdef EXPENSIVE_CHECKS + checkClobberSanity(Current, Result, Q.StartingLoc, MSSA, Q, AA); +#endif + return Result; + } + + void verify(const MemorySSA *MSSA) { assert(MSSA == &this->MSSA); } +}; + +struct RenamePassData { + DomTreeNode *DTN; + DomTreeNode::const_iterator ChildIt; + MemoryAccess *IncomingVal; + + RenamePassData(DomTreeNode *D, DomTreeNode::const_iterator It, + MemoryAccess *M) + : DTN(D), ChildIt(It), IncomingVal(M) {} + void swap(RenamePassData &RHS) { + std::swap(DTN, RHS.DTN); + std::swap(ChildIt, RHS.ChildIt); + std::swap(IncomingVal, RHS.IncomingVal); + } +}; +} // anonymous namespace + +namespace llvm { +/// \brief A MemorySSAWalker that does AA walks and caching of lookups to +/// disambiguate accesses. +/// +/// FIXME: The current implementation of this can take quadratic space in rare +/// cases. This can be fixed, but it is something to note until it is fixed. +/// +/// In order to trigger this behavior, you need to store to N distinct locations +/// (that AA can prove don't alias), perform M stores to other memory +/// locations that AA can prove don't alias any of the initial N locations, and +/// then load from all of the N locations. In this case, we insert M cache +/// entries for each of the N loads. +/// +/// For example: +/// define i32 @foo() { +/// %a = alloca i32, align 4 +/// %b = alloca i32, align 4 +/// store i32 0, i32* %a, align 4 +/// store i32 0, i32* %b, align 4 +/// +/// ; Insert M stores to other memory that doesn't alias %a or %b here +/// +/// %c = load i32, i32* %a, align 4 ; Caches M entries in +/// ; CachedUpwardsClobberingAccess for the +/// ; MemoryLocation %a +/// %d = load i32, i32* %b, align 4 ; Caches M entries in +/// ; CachedUpwardsClobberingAccess for the +/// ; MemoryLocation %b +/// +/// ; For completeness' sake, loading %a or %b again would not cache *another* +/// ; M entries. +/// %r = add i32 %c, %d +/// ret i32 %r +/// } +class MemorySSA::CachingWalker final : public MemorySSAWalker { + WalkerCache Cache; + ClobberWalker Walker; + bool AutoResetWalker; + + MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, UpwardsMemoryQuery &); + void verifyRemoved(MemoryAccess *); + +public: + CachingWalker(MemorySSA *, AliasAnalysis *, DominatorTree *); + ~CachingWalker() override; + + using MemorySSAWalker::getClobberingMemoryAccess; + MemoryAccess *getClobberingMemoryAccess(MemoryAccess *) override; + MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, + const MemoryLocation &) override; + void invalidateInfo(MemoryAccess *) override; + + /// Whether we call resetClobberWalker() after each time we *actually* walk to + /// answer a clobber query. + void setAutoResetWalker(bool AutoReset) { AutoResetWalker = AutoReset; } + + /// Drop the walker's persistent data structures. At the moment, this means + /// "drop the walker's cache of BasicBlocks -> + /// earliest-MemoryAccess-we-can-optimize-to". This is necessary if we're + /// going to have DT updates, if we remove MemoryAccesses, etc. + void resetClobberWalker() { Walker.reset(); } + + void verify(const MemorySSA *MSSA) override { + MemorySSAWalker::verify(MSSA); + Walker.verify(MSSA); + } +}; + +/// \brief Rename a single basic block into MemorySSA form. +/// Uses the standard SSA renaming algorithm. +/// \returns The new incoming value. +MemoryAccess *MemorySSA::renameBlock(BasicBlock *BB, + MemoryAccess *IncomingVal) { + auto It = PerBlockAccesses.find(BB); + // Skip most processing if the list is empty. + if (It != PerBlockAccesses.end()) { + AccessList *Accesses = It->second.get(); + for (MemoryAccess &L : *Accesses) { + if (MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(&L)) { + if (MUD->getDefiningAccess() == nullptr) + MUD->setDefiningAccess(IncomingVal); + if (isa<MemoryDef>(&L)) + IncomingVal = &L; + } else { + IncomingVal = &L; + } + } + } + + // Pass through values to our successors + for (const BasicBlock *S : successors(BB)) { + auto It = PerBlockAccesses.find(S); + // Rename the phi nodes in our successor block + if (It == PerBlockAccesses.end() || !isa<MemoryPhi>(It->second->front())) + continue; + AccessList *Accesses = It->second.get(); + auto *Phi = cast<MemoryPhi>(&Accesses->front()); + Phi->addIncoming(IncomingVal, BB); + } + + return IncomingVal; +} + +/// \brief This is the standard SSA renaming algorithm. +/// +/// We walk the dominator tree in preorder, renaming accesses, and then filling +/// in phi nodes in our successors. +void MemorySSA::renamePass(DomTreeNode *Root, MemoryAccess *IncomingVal, + SmallPtrSet<BasicBlock *, 16> &Visited) { + SmallVector<RenamePassData, 32> WorkStack; + IncomingVal = renameBlock(Root->getBlock(), IncomingVal); + WorkStack.push_back({Root, Root->begin(), IncomingVal}); + Visited.insert(Root->getBlock()); + + while (!WorkStack.empty()) { + DomTreeNode *Node = WorkStack.back().DTN; + DomTreeNode::const_iterator ChildIt = WorkStack.back().ChildIt; + IncomingVal = WorkStack.back().IncomingVal; + + if (ChildIt == Node->end()) { + WorkStack.pop_back(); + } else { + DomTreeNode *Child = *ChildIt; + ++WorkStack.back().ChildIt; + BasicBlock *BB = Child->getBlock(); + Visited.insert(BB); + IncomingVal = renameBlock(BB, IncomingVal); + WorkStack.push_back({Child, Child->begin(), IncomingVal}); + } + } +} + +/// \brief Compute dominator levels, used by the phi insertion algorithm above. +void MemorySSA::computeDomLevels(DenseMap<DomTreeNode *, unsigned> &DomLevels) { + for (auto DFI = df_begin(DT->getRootNode()), DFE = df_end(DT->getRootNode()); + DFI != DFE; ++DFI) + DomLevels[*DFI] = DFI.getPathLength() - 1; +} + +/// \brief This handles unreachable block accesses by deleting phi nodes in +/// unreachable blocks, and marking all other unreachable MemoryAccess's as +/// being uses of the live on entry definition. +void MemorySSA::markUnreachableAsLiveOnEntry(BasicBlock *BB) { + assert(!DT->isReachableFromEntry(BB) && + "Reachable block found while handling unreachable blocks"); + + // Make sure phi nodes in our reachable successors end up with a + // LiveOnEntryDef for our incoming edge, even though our block is forward + // unreachable. We could just disconnect these blocks from the CFG fully, + // but we do not right now. + for (const BasicBlock *S : successors(BB)) { + if (!DT->isReachableFromEntry(S)) + continue; + auto It = PerBlockAccesses.find(S); + // Rename the phi nodes in our successor block + if (It == PerBlockAccesses.end() || !isa<MemoryPhi>(It->second->front())) + continue; + AccessList *Accesses = It->second.get(); + auto *Phi = cast<MemoryPhi>(&Accesses->front()); + Phi->addIncoming(LiveOnEntryDef.get(), BB); + } + + auto It = PerBlockAccesses.find(BB); + if (It == PerBlockAccesses.end()) + return; + + auto &Accesses = It->second; + for (auto AI = Accesses->begin(), AE = Accesses->end(); AI != AE;) { + auto Next = std::next(AI); + // If we have a phi, just remove it. We are going to replace all + // users with live on entry. + if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(AI)) + UseOrDef->setDefiningAccess(LiveOnEntryDef.get()); + else + Accesses->erase(AI); + AI = Next; + } +} + +MemorySSA::MemorySSA(Function &Func, AliasAnalysis *AA, DominatorTree *DT) + : AA(AA), DT(DT), F(Func), LiveOnEntryDef(nullptr), Walker(nullptr), + NextID(INVALID_MEMORYACCESS_ID) { + buildMemorySSA(); +} + +MemorySSA::~MemorySSA() { + // Drop all our references + for (const auto &Pair : PerBlockAccesses) + for (MemoryAccess &MA : *Pair.second) + MA.dropAllReferences(); +} + +MemorySSA::AccessList *MemorySSA::getOrCreateAccessList(const BasicBlock *BB) { + auto Res = PerBlockAccesses.insert(std::make_pair(BB, nullptr)); + + if (Res.second) + Res.first->second = make_unique<AccessList>(); + return Res.first->second.get(); +} + +/// This class is a batch walker of all MemoryUse's in the program, and points +/// their defining access at the thing that actually clobbers them. Because it +/// is a batch walker that touches everything, it does not operate like the +/// other walkers. This walker is basically performing a top-down SSA renaming +/// pass, where the version stack is used as the cache. This enables it to be +/// significantly more time and memory efficient than using the regular walker, +/// which is walking bottom-up. +class MemorySSA::OptimizeUses { +public: + OptimizeUses(MemorySSA *MSSA, MemorySSAWalker *Walker, AliasAnalysis *AA, + DominatorTree *DT) + : MSSA(MSSA), Walker(Walker), AA(AA), DT(DT) { + Walker = MSSA->getWalker(); + } + + void optimizeUses(); + +private: + /// This represents where a given memorylocation is in the stack. + struct MemlocStackInfo { + // This essentially is keeping track of versions of the stack. Whenever + // the stack changes due to pushes or pops, these versions increase. + unsigned long StackEpoch; + unsigned long PopEpoch; + // This is the lower bound of places on the stack to check. It is equal to + // the place the last stack walk ended. + // Note: Correctness depends on this being initialized to 0, which densemap + // does + unsigned long LowerBound; + const BasicBlock *LowerBoundBlock; + // This is where the last walk for this memory location ended. + unsigned long LastKill; + bool LastKillValid; + }; + void optimizeUsesInBlock(const BasicBlock *, unsigned long &, unsigned long &, + SmallVectorImpl<MemoryAccess *> &, + DenseMap<MemoryLocOrCall, MemlocStackInfo> &); + MemorySSA *MSSA; + MemorySSAWalker *Walker; + AliasAnalysis *AA; + DominatorTree *DT; +}; + +/// Optimize the uses in a given block This is basically the SSA renaming +/// algorithm, with one caveat: We are able to use a single stack for all +/// MemoryUses. This is because the set of *possible* reaching MemoryDefs is +/// the same for every MemoryUse. The *actual* clobbering MemoryDef is just +/// going to be some position in that stack of possible ones. +/// +/// We track the stack positions that each MemoryLocation needs +/// to check, and last ended at. This is because we only want to check the +/// things that changed since last time. The same MemoryLocation should +/// get clobbered by the same store (getModRefInfo does not use invariantness or +/// things like this, and if they start, we can modify MemoryLocOrCall to +/// include relevant data) +void MemorySSA::OptimizeUses::optimizeUsesInBlock( + const BasicBlock *BB, unsigned long &StackEpoch, unsigned long &PopEpoch, + SmallVectorImpl<MemoryAccess *> &VersionStack, + DenseMap<MemoryLocOrCall, MemlocStackInfo> &LocStackInfo) { + + /// If no accesses, nothing to do. + MemorySSA::AccessList *Accesses = MSSA->getWritableBlockAccesses(BB); + if (Accesses == nullptr) + return; + + // Pop everything that doesn't dominate the current block off the stack, + // increment the PopEpoch to account for this. + while (!VersionStack.empty()) { + BasicBlock *BackBlock = VersionStack.back()->getBlock(); + if (DT->dominates(BackBlock, BB)) + break; + while (VersionStack.back()->getBlock() == BackBlock) + VersionStack.pop_back(); + ++PopEpoch; + } + for (MemoryAccess &MA : *Accesses) { + auto *MU = dyn_cast<MemoryUse>(&MA); + if (!MU) { + VersionStack.push_back(&MA); + ++StackEpoch; + continue; + } + + if (isUseTriviallyOptimizableToLiveOnEntry(*AA, MU->getMemoryInst())) { + MU->setDefiningAccess(MSSA->getLiveOnEntryDef(), true); + continue; + } + + MemoryLocOrCall UseMLOC(MU); + auto &LocInfo = LocStackInfo[UseMLOC]; + // If the pop epoch changed, it means we've removed stuff from top of + // stack due to changing blocks. We may have to reset the lower bound or + // last kill info. + if (LocInfo.PopEpoch != PopEpoch) { + LocInfo.PopEpoch = PopEpoch; + LocInfo.StackEpoch = StackEpoch; + // If the lower bound was in something that no longer dominates us, we + // have to reset it. + // We can't simply track stack size, because the stack may have had + // pushes/pops in the meantime. + // XXX: This is non-optimal, but only is slower cases with heavily + // branching dominator trees. To get the optimal number of queries would + // be to make lowerbound and lastkill a per-loc stack, and pop it until + // the top of that stack dominates us. This does not seem worth it ATM. + // A much cheaper optimization would be to always explore the deepest + // branch of the dominator tree first. This will guarantee this resets on + // the smallest set of blocks. + if (LocInfo.LowerBoundBlock && LocInfo.LowerBoundBlock != BB && + !DT->dominates(LocInfo.LowerBoundBlock, BB)) { + // Reset the lower bound of things to check. + // TODO: Some day we should be able to reset to last kill, rather than + // 0. + LocInfo.LowerBound = 0; + LocInfo.LowerBoundBlock = VersionStack[0]->getBlock(); + LocInfo.LastKillValid = false; + } + } else if (LocInfo.StackEpoch != StackEpoch) { + // If all that has changed is the StackEpoch, we only have to check the + // new things on the stack, because we've checked everything before. In + // this case, the lower bound of things to check remains the same. + LocInfo.PopEpoch = PopEpoch; + LocInfo.StackEpoch = StackEpoch; + } + if (!LocInfo.LastKillValid) { + LocInfo.LastKill = VersionStack.size() - 1; + LocInfo.LastKillValid = true; + } + + // At this point, we should have corrected last kill and LowerBound to be + // in bounds. + assert(LocInfo.LowerBound < VersionStack.size() && + "Lower bound out of range"); + assert(LocInfo.LastKill < VersionStack.size() && + "Last kill info out of range"); + // In any case, the new upper bound is the top of the stack. + unsigned long UpperBound = VersionStack.size() - 1; + + if (UpperBound - LocInfo.LowerBound > MaxCheckLimit) { + DEBUG(dbgs() << "MemorySSA skipping optimization of " << *MU << " (" + << *(MU->getMemoryInst()) << ")" + << " because there are " << UpperBound - LocInfo.LowerBound + << " stores to disambiguate\n"); + // Because we did not walk, LastKill is no longer valid, as this may + // have been a kill. + LocInfo.LastKillValid = false; + continue; + } + bool FoundClobberResult = false; + while (UpperBound > LocInfo.LowerBound) { + if (isa<MemoryPhi>(VersionStack[UpperBound])) { + // For phis, use the walker, see where we ended up, go there + Instruction *UseInst = MU->getMemoryInst(); + MemoryAccess *Result = Walker->getClobberingMemoryAccess(UseInst); + // We are guaranteed to find it or something is wrong + while (VersionStack[UpperBound] != Result) { + assert(UpperBound != 0); + --UpperBound; + } + FoundClobberResult = true; + break; + } + + MemoryDef *MD = cast<MemoryDef>(VersionStack[UpperBound]); + // If the lifetime of the pointer ends at this instruction, it's live on + // entry. + if (!UseMLOC.IsCall && lifetimeEndsAt(MD, UseMLOC.getLoc(), *AA)) { + // Reset UpperBound to liveOnEntryDef's place in the stack + UpperBound = 0; + FoundClobberResult = true; + break; + } + if (instructionClobbersQuery(MD, MU, UseMLOC, *AA)) { + FoundClobberResult = true; + break; + } + --UpperBound; + } + // At the end of this loop, UpperBound is either a clobber, or lower bound + // PHI walking may cause it to be < LowerBound, and in fact, < LastKill. + if (FoundClobberResult || UpperBound < LocInfo.LastKill) { + MU->setDefiningAccess(VersionStack[UpperBound], true); + // We were last killed now by where we got to + LocInfo.LastKill = UpperBound; + } else { + // Otherwise, we checked all the new ones, and now we know we can get to + // LastKill. + MU->setDefiningAccess(VersionStack[LocInfo.LastKill], true); + } + LocInfo.LowerBound = VersionStack.size() - 1; + LocInfo.LowerBoundBlock = BB; + } +} + +/// Optimize uses to point to their actual clobbering definitions. +void MemorySSA::OptimizeUses::optimizeUses() { + + // We perform a non-recursive top-down dominator tree walk + struct StackInfo { + const DomTreeNode *Node; + DomTreeNode::const_iterator Iter; + }; + + SmallVector<MemoryAccess *, 16> VersionStack; + SmallVector<StackInfo, 16> DomTreeWorklist; + DenseMap<MemoryLocOrCall, MemlocStackInfo> LocStackInfo; + VersionStack.push_back(MSSA->getLiveOnEntryDef()); + + unsigned long StackEpoch = 1; + unsigned long PopEpoch = 1; + for (const auto *DomNode : depth_first(DT->getRootNode())) + optimizeUsesInBlock(DomNode->getBlock(), StackEpoch, PopEpoch, VersionStack, + LocStackInfo); +} + +void MemorySSA::placePHINodes( + const SmallPtrSetImpl<BasicBlock *> &DefiningBlocks, + const DenseMap<const BasicBlock *, unsigned int> &BBNumbers) { + // Determine where our MemoryPhi's should go + ForwardIDFCalculator IDFs(*DT); + IDFs.setDefiningBlocks(DefiningBlocks); + SmallVector<BasicBlock *, 32> IDFBlocks; + IDFs.calculate(IDFBlocks); + + std::sort(IDFBlocks.begin(), IDFBlocks.end(), + [&BBNumbers](const BasicBlock *A, const BasicBlock *B) { + return BBNumbers.lookup(A) < BBNumbers.lookup(B); + }); + + // Now place MemoryPhi nodes. + for (auto &BB : IDFBlocks) { + // Insert phi node + AccessList *Accesses = getOrCreateAccessList(BB); + MemoryPhi *Phi = new MemoryPhi(BB->getContext(), BB, NextID++); + ValueToMemoryAccess[BB] = Phi; + // Phi's always are placed at the front of the block. + Accesses->push_front(Phi); + } +} + +void MemorySSA::buildMemorySSA() { + // We create an access to represent "live on entry", for things like + // arguments or users of globals, where the memory they use is defined before + // the beginning of the function. We do not actually insert it into the IR. + // We do not define a live on exit for the immediate uses, and thus our + // semantics do *not* imply that something with no immediate uses can simply + // be removed. + BasicBlock &StartingPoint = F.getEntryBlock(); + LiveOnEntryDef = make_unique<MemoryDef>(F.getContext(), nullptr, nullptr, + &StartingPoint, NextID++); + DenseMap<const BasicBlock *, unsigned int> BBNumbers; + unsigned NextBBNum = 0; + + // We maintain lists of memory accesses per-block, trading memory for time. We + // could just look up the memory access for every possible instruction in the + // stream. + SmallPtrSet<BasicBlock *, 32> DefiningBlocks; + SmallPtrSet<BasicBlock *, 32> DefUseBlocks; + // Go through each block, figure out where defs occur, and chain together all + // the accesses. + for (BasicBlock &B : F) { + BBNumbers[&B] = NextBBNum++; + bool InsertIntoDef = false; + AccessList *Accesses = nullptr; + for (Instruction &I : B) { + MemoryUseOrDef *MUD = createNewAccess(&I); + if (!MUD) + continue; + InsertIntoDef |= isa<MemoryDef>(MUD); + + if (!Accesses) + Accesses = getOrCreateAccessList(&B); + Accesses->push_back(MUD); + } + if (InsertIntoDef) + DefiningBlocks.insert(&B); + if (Accesses) + DefUseBlocks.insert(&B); + } + placePHINodes(DefiningBlocks, BBNumbers); + + // Now do regular SSA renaming on the MemoryDef/MemoryUse. Visited will get + // filled in with all blocks. + SmallPtrSet<BasicBlock *, 16> Visited; + renamePass(DT->getRootNode(), LiveOnEntryDef.get(), Visited); + + CachingWalker *Walker = getWalkerImpl(); + + // We're doing a batch of updates; don't drop useful caches between them. + Walker->setAutoResetWalker(false); + OptimizeUses(this, Walker, AA, DT).optimizeUses(); + Walker->setAutoResetWalker(true); + Walker->resetClobberWalker(); + + // Mark the uses in unreachable blocks as live on entry, so that they go + // somewhere. + for (auto &BB : F) + if (!Visited.count(&BB)) + markUnreachableAsLiveOnEntry(&BB); +} + +MemorySSAWalker *MemorySSA::getWalker() { return getWalkerImpl(); } + +MemorySSA::CachingWalker *MemorySSA::getWalkerImpl() { + if (Walker) + return Walker.get(); + + Walker = make_unique<CachingWalker>(this, AA, DT); + return Walker.get(); +} + +MemoryPhi *MemorySSA::createMemoryPhi(BasicBlock *BB) { + assert(!getMemoryAccess(BB) && "MemoryPhi already exists for this BB"); + AccessList *Accesses = getOrCreateAccessList(BB); + MemoryPhi *Phi = new MemoryPhi(BB->getContext(), BB, NextID++); + ValueToMemoryAccess[BB] = Phi; + // Phi's always are placed at the front of the block. + Accesses->push_front(Phi); + BlockNumberingValid.erase(BB); + return Phi; +} + +MemoryUseOrDef *MemorySSA::createDefinedAccess(Instruction *I, + MemoryAccess *Definition) { + assert(!isa<PHINode>(I) && "Cannot create a defined access for a PHI"); + MemoryUseOrDef *NewAccess = createNewAccess(I); + assert( + NewAccess != nullptr && + "Tried to create a memory access for a non-memory touching instruction"); + NewAccess->setDefiningAccess(Definition); + return NewAccess; +} + +MemoryAccess *MemorySSA::createMemoryAccessInBB(Instruction *I, + MemoryAccess *Definition, + const BasicBlock *BB, + InsertionPlace Point) { + MemoryUseOrDef *NewAccess = createDefinedAccess(I, Definition); + auto *Accesses = getOrCreateAccessList(BB); + if (Point == Beginning) { + // It goes after any phi nodes + auto AI = find_if( + *Accesses, [](const MemoryAccess &MA) { return !isa<MemoryPhi>(MA); }); + + Accesses->insert(AI, NewAccess); + } else { + Accesses->push_back(NewAccess); + } + BlockNumberingValid.erase(BB); + return NewAccess; +} + +MemoryUseOrDef *MemorySSA::createMemoryAccessBefore(Instruction *I, + MemoryAccess *Definition, + MemoryUseOrDef *InsertPt) { + assert(I->getParent() == InsertPt->getBlock() && + "New and old access must be in the same block"); + MemoryUseOrDef *NewAccess = createDefinedAccess(I, Definition); + auto *Accesses = getOrCreateAccessList(InsertPt->getBlock()); + Accesses->insert(AccessList::iterator(InsertPt), NewAccess); + BlockNumberingValid.erase(InsertPt->getBlock()); + return NewAccess; +} + +MemoryUseOrDef *MemorySSA::createMemoryAccessAfter(Instruction *I, + MemoryAccess *Definition, + MemoryAccess *InsertPt) { + assert(I->getParent() == InsertPt->getBlock() && + "New and old access must be in the same block"); + MemoryUseOrDef *NewAccess = createDefinedAccess(I, Definition); + auto *Accesses = getOrCreateAccessList(InsertPt->getBlock()); + Accesses->insertAfter(AccessList::iterator(InsertPt), NewAccess); + BlockNumberingValid.erase(InsertPt->getBlock()); + return NewAccess; +} + +void MemorySSA::spliceMemoryAccessAbove(MemoryDef *Where, + MemoryUseOrDef *What) { + assert(What != getLiveOnEntryDef() && + Where != getLiveOnEntryDef() && "Can't splice (above) LOE."); + assert(dominates(Where, What) && "Only upwards splices are permitted."); + + if (Where == What) + return; + if (isa<MemoryDef>(What)) { + // TODO: possibly use removeMemoryAccess' more efficient RAUW + What->replaceAllUsesWith(What->getDefiningAccess()); + What->setDefiningAccess(Where->getDefiningAccess()); + Where->setDefiningAccess(What); + } + AccessList *Src = getWritableBlockAccesses(What->getBlock()); + AccessList *Dest = getWritableBlockAccesses(Where->getBlock()); + Dest->splice(AccessList::iterator(Where), *Src, What); + + BlockNumberingValid.erase(What->getBlock()); + if (What->getBlock() != Where->getBlock()) + BlockNumberingValid.erase(Where->getBlock()); +} + +/// \brief Helper function to create new memory accesses +MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I) { + // The assume intrinsic has a control dependency which we model by claiming + // that it writes arbitrarily. Ignore that fake memory dependency here. + // FIXME: Replace this special casing with a more accurate modelling of + // assume's control dependency. + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) + if (II->getIntrinsicID() == Intrinsic::assume) + return nullptr; + + // Find out what affect this instruction has on memory. + ModRefInfo ModRef = AA->getModRefInfo(I); + bool Def = bool(ModRef & MRI_Mod); + bool Use = bool(ModRef & MRI_Ref); + + // It's possible for an instruction to not modify memory at all. During + // construction, we ignore them. + if (!Def && !Use) + return nullptr; + + assert((Def || Use) && + "Trying to create a memory access with a non-memory instruction"); + + MemoryUseOrDef *MUD; + if (Def) + MUD = new MemoryDef(I->getContext(), nullptr, I, I->getParent(), NextID++); + else + MUD = new MemoryUse(I->getContext(), nullptr, I, I->getParent()); + ValueToMemoryAccess[I] = MUD; + return MUD; +} + +MemoryAccess *MemorySSA::findDominatingDef(BasicBlock *UseBlock, + enum InsertionPlace Where) { + // Handle the initial case + if (Where == Beginning) + // The only thing that could define us at the beginning is a phi node + if (MemoryPhi *Phi = getMemoryAccess(UseBlock)) + return Phi; + + DomTreeNode *CurrNode = DT->getNode(UseBlock); + // Need to be defined by our dominator + if (Where == Beginning) + CurrNode = CurrNode->getIDom(); + Where = End; + while (CurrNode) { + auto It = PerBlockAccesses.find(CurrNode->getBlock()); + if (It != PerBlockAccesses.end()) { + auto &Accesses = It->second; + for (MemoryAccess &RA : reverse(*Accesses)) { + if (isa<MemoryDef>(RA) || isa<MemoryPhi>(RA)) + return &RA; + } + } + CurrNode = CurrNode->getIDom(); + } + return LiveOnEntryDef.get(); +} + +/// \brief Returns true if \p Replacer dominates \p Replacee . +bool MemorySSA::dominatesUse(const MemoryAccess *Replacer, + const MemoryAccess *Replacee) const { + if (isa<MemoryUseOrDef>(Replacee)) + return DT->dominates(Replacer->getBlock(), Replacee->getBlock()); + const auto *MP = cast<MemoryPhi>(Replacee); + // For a phi node, the use occurs in the predecessor block of the phi node. + // Since we may occur multiple times in the phi node, we have to check each + // operand to ensure Replacer dominates each operand where Replacee occurs. + for (const Use &Arg : MP->operands()) { + if (Arg.get() != Replacee && + !DT->dominates(Replacer->getBlock(), MP->getIncomingBlock(Arg))) + return false; + } + return true; +} + +/// \brief If all arguments of a MemoryPHI are defined by the same incoming +/// argument, return that argument. +static MemoryAccess *onlySingleValue(MemoryPhi *MP) { + MemoryAccess *MA = nullptr; + + for (auto &Arg : MP->operands()) { + if (!MA) + MA = cast<MemoryAccess>(Arg); + else if (MA != Arg) + return nullptr; + } + return MA; +} + +/// \brief Properly remove \p MA from all of MemorySSA's lookup tables. +/// +/// Because of the way the intrusive list and use lists work, it is important to +/// do removal in the right order. +void MemorySSA::removeFromLookups(MemoryAccess *MA) { + assert(MA->use_empty() && + "Trying to remove memory access that still has uses"); + BlockNumbering.erase(MA); + if (MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(MA)) + MUD->setDefiningAccess(nullptr); + // Invalidate our walker's cache if necessary + if (!isa<MemoryUse>(MA)) + Walker->invalidateInfo(MA); + // The call below to erase will destroy MA, so we can't change the order we + // are doing things here + Value *MemoryInst; + if (MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(MA)) { + MemoryInst = MUD->getMemoryInst(); + } else { + MemoryInst = MA->getBlock(); + } + auto VMA = ValueToMemoryAccess.find(MemoryInst); + if (VMA->second == MA) + ValueToMemoryAccess.erase(VMA); + + auto AccessIt = PerBlockAccesses.find(MA->getBlock()); + std::unique_ptr<AccessList> &Accesses = AccessIt->second; + Accesses->erase(MA); + if (Accesses->empty()) + PerBlockAccesses.erase(AccessIt); +} + +void MemorySSA::removeMemoryAccess(MemoryAccess *MA) { + assert(!isLiveOnEntryDef(MA) && "Trying to remove the live on entry def"); + // We can only delete phi nodes if they have no uses, or we can replace all + // uses with a single definition. + MemoryAccess *NewDefTarget = nullptr; + if (MemoryPhi *MP = dyn_cast<MemoryPhi>(MA)) { + // Note that it is sufficient to know that all edges of the phi node have + // the same argument. If they do, by the definition of dominance frontiers + // (which we used to place this phi), that argument must dominate this phi, + // and thus, must dominate the phi's uses, and so we will not hit the assert + // below. + NewDefTarget = onlySingleValue(MP); + assert((NewDefTarget || MP->use_empty()) && + "We can't delete this memory phi"); + } else { + NewDefTarget = cast<MemoryUseOrDef>(MA)->getDefiningAccess(); + } + + // Re-point the uses at our defining access + if (!MA->use_empty()) { + // Reset optimized on users of this store, and reset the uses. + // A few notes: + // 1. This is a slightly modified version of RAUW to avoid walking the + // uses twice here. + // 2. If we wanted to be complete, we would have to reset the optimized + // flags on users of phi nodes if doing the below makes a phi node have all + // the same arguments. Instead, we prefer users to removeMemoryAccess those + // phi nodes, because doing it here would be N^3. + if (MA->hasValueHandle()) + ValueHandleBase::ValueIsRAUWd(MA, NewDefTarget); + // Note: We assume MemorySSA is not used in metadata since it's not really + // part of the IR. + + while (!MA->use_empty()) { + Use &U = *MA->use_begin(); + if (MemoryUse *MU = dyn_cast<MemoryUse>(U.getUser())) + MU->resetOptimized(); + U.set(NewDefTarget); + } + } + + // The call below to erase will destroy MA, so we can't change the order we + // are doing things here + removeFromLookups(MA); +} + +void MemorySSA::print(raw_ostream &OS) const { + MemorySSAAnnotatedWriter Writer(this); + F.print(OS, &Writer); +} + +void MemorySSA::dump() const { + MemorySSAAnnotatedWriter Writer(this); + F.print(dbgs(), &Writer); +} + +void MemorySSA::verifyMemorySSA() const { + verifyDefUses(F); + verifyDomination(F); + verifyOrdering(F); + Walker->verify(this); +} + +/// \brief Verify that the order and existence of MemoryAccesses matches the +/// order and existence of memory affecting instructions. +void MemorySSA::verifyOrdering(Function &F) const { + // Walk all the blocks, comparing what the lookups think and what the access + // lists think, as well as the order in the blocks vs the order in the access + // lists. + SmallVector<MemoryAccess *, 32> ActualAccesses; + for (BasicBlock &B : F) { + const AccessList *AL = getBlockAccesses(&B); + MemoryAccess *Phi = getMemoryAccess(&B); + if (Phi) + ActualAccesses.push_back(Phi); + for (Instruction &I : B) { + MemoryAccess *MA = getMemoryAccess(&I); + assert((!MA || AL) && "We have memory affecting instructions " + "in this block but they are not in the " + "access list"); + if (MA) + ActualAccesses.push_back(MA); + } + // Either we hit the assert, really have no accesses, or we have both + // accesses and an access list + if (!AL) + continue; + assert(AL->size() == ActualAccesses.size() && + "We don't have the same number of accesses in the block as on the " + "access list"); + auto ALI = AL->begin(); + auto AAI = ActualAccesses.begin(); + while (ALI != AL->end() && AAI != ActualAccesses.end()) { + assert(&*ALI == *AAI && "Not the same accesses in the same order"); + ++ALI; + ++AAI; + } + ActualAccesses.clear(); + } +} + +/// \brief Verify the domination properties of MemorySSA by checking that each +/// definition dominates all of its uses. +void MemorySSA::verifyDomination(Function &F) const { +#ifndef NDEBUG + for (BasicBlock &B : F) { + // Phi nodes are attached to basic blocks + if (MemoryPhi *MP = getMemoryAccess(&B)) + for (const Use &U : MP->uses()) + assert(dominates(MP, U) && "Memory PHI does not dominate it's uses"); + + for (Instruction &I : B) { + MemoryAccess *MD = dyn_cast_or_null<MemoryDef>(getMemoryAccess(&I)); + if (!MD) + continue; + + for (const Use &U : MD->uses()) + assert(dominates(MD, U) && "Memory Def does not dominate it's uses"); + } + } +#endif +} + +/// \brief Verify the def-use lists in MemorySSA, by verifying that \p Use +/// appears in the use list of \p Def. + +void MemorySSA::verifyUseInDefs(MemoryAccess *Def, MemoryAccess *Use) const { +#ifndef NDEBUG + // The live on entry use may cause us to get a NULL def here + if (!Def) + assert(isLiveOnEntryDef(Use) && + "Null def but use not point to live on entry def"); + else + assert(is_contained(Def->users(), Use) && + "Did not find use in def's use list"); +#endif +} + +/// \brief Verify the immediate use information, by walking all the memory +/// accesses and verifying that, for each use, it appears in the +/// appropriate def's use list +void MemorySSA::verifyDefUses(Function &F) const { + for (BasicBlock &B : F) { + // Phi nodes are attached to basic blocks + if (MemoryPhi *Phi = getMemoryAccess(&B)) { + assert(Phi->getNumOperands() == static_cast<unsigned>(std::distance( + pred_begin(&B), pred_end(&B))) && + "Incomplete MemoryPhi Node"); + for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) + verifyUseInDefs(Phi->getIncomingValue(I), Phi); + } + + for (Instruction &I : B) { + if (MemoryUseOrDef *MA = getMemoryAccess(&I)) { + verifyUseInDefs(MA->getDefiningAccess(), MA); + } + } + } +} + +MemoryUseOrDef *MemorySSA::getMemoryAccess(const Instruction *I) const { + return cast_or_null<MemoryUseOrDef>(ValueToMemoryAccess.lookup(I)); +} + +MemoryPhi *MemorySSA::getMemoryAccess(const BasicBlock *BB) const { + return cast_or_null<MemoryPhi>(ValueToMemoryAccess.lookup(cast<Value>(BB))); +} + +/// Perform a local numbering on blocks so that instruction ordering can be +/// determined in constant time. +/// TODO: We currently just number in order. If we numbered by N, we could +/// allow at least N-1 sequences of insertBefore or insertAfter (and at least +/// log2(N) sequences of mixed before and after) without needing to invalidate +/// the numbering. +void MemorySSA::renumberBlock(const BasicBlock *B) const { + // The pre-increment ensures the numbers really start at 1. + unsigned long CurrentNumber = 0; + const AccessList *AL = getBlockAccesses(B); + assert(AL != nullptr && "Asking to renumber an empty block"); + for (const auto &I : *AL) + BlockNumbering[&I] = ++CurrentNumber; + BlockNumberingValid.insert(B); +} + +/// \brief Determine, for two memory accesses in the same block, +/// whether \p Dominator dominates \p Dominatee. +/// \returns True if \p Dominator dominates \p Dominatee. +bool MemorySSA::locallyDominates(const MemoryAccess *Dominator, + const MemoryAccess *Dominatee) const { + + const BasicBlock *DominatorBlock = Dominator->getBlock(); + + assert((DominatorBlock == Dominatee->getBlock()) && + "Asking for local domination when accesses are in different blocks!"); + // A node dominates itself. + if (Dominatee == Dominator) + return true; + + // When Dominatee is defined on function entry, it is not dominated by another + // memory access. + if (isLiveOnEntryDef(Dominatee)) + return false; + + // When Dominator is defined on function entry, it dominates the other memory + // access. + if (isLiveOnEntryDef(Dominator)) + return true; + + if (!BlockNumberingValid.count(DominatorBlock)) + renumberBlock(DominatorBlock); + + unsigned long DominatorNum = BlockNumbering.lookup(Dominator); + // All numbers start with 1 + assert(DominatorNum != 0 && "Block was not numbered properly"); + unsigned long DominateeNum = BlockNumbering.lookup(Dominatee); + assert(DominateeNum != 0 && "Block was not numbered properly"); + return DominatorNum < DominateeNum; +} + +bool MemorySSA::dominates(const MemoryAccess *Dominator, + const MemoryAccess *Dominatee) const { + if (Dominator == Dominatee) + return true; + + if (isLiveOnEntryDef(Dominatee)) + return false; + + if (Dominator->getBlock() != Dominatee->getBlock()) + return DT->dominates(Dominator->getBlock(), Dominatee->getBlock()); + return locallyDominates(Dominator, Dominatee); +} + +bool MemorySSA::dominates(const MemoryAccess *Dominator, + const Use &Dominatee) const { + if (MemoryPhi *MP = dyn_cast<MemoryPhi>(Dominatee.getUser())) { + BasicBlock *UseBB = MP->getIncomingBlock(Dominatee); + // The def must dominate the incoming block of the phi. + if (UseBB != Dominator->getBlock()) + return DT->dominates(Dominator->getBlock(), UseBB); + // If the UseBB and the DefBB are the same, compare locally. + return locallyDominates(Dominator, cast<MemoryAccess>(Dominatee)); + } + // If it's not a PHI node use, the normal dominates can already handle it. + return dominates(Dominator, cast<MemoryAccess>(Dominatee.getUser())); +} + +const static char LiveOnEntryStr[] = "liveOnEntry"; + +void MemoryDef::print(raw_ostream &OS) const { + MemoryAccess *UO = getDefiningAccess(); + + OS << getID() << " = MemoryDef("; + if (UO && UO->getID()) + OS << UO->getID(); + else + OS << LiveOnEntryStr; + OS << ')'; +} + +void MemoryPhi::print(raw_ostream &OS) const { + bool First = true; + OS << getID() << " = MemoryPhi("; + for (const auto &Op : operands()) { + BasicBlock *BB = getIncomingBlock(Op); + MemoryAccess *MA = cast<MemoryAccess>(Op); + if (!First) + OS << ','; + else + First = false; + + OS << '{'; + if (BB->hasName()) + OS << BB->getName(); + else + BB->printAsOperand(OS, false); + OS << ','; + if (unsigned ID = MA->getID()) + OS << ID; + else + OS << LiveOnEntryStr; + OS << '}'; + } + OS << ')'; +} + +MemoryAccess::~MemoryAccess() {} + +void MemoryUse::print(raw_ostream &OS) const { + MemoryAccess *UO = getDefiningAccess(); + OS << "MemoryUse("; + if (UO && UO->getID()) + OS << UO->getID(); + else + OS << LiveOnEntryStr; + OS << ')'; +} + +void MemoryAccess::dump() const { + print(dbgs()); + dbgs() << "\n"; +} + +char MemorySSAPrinterLegacyPass::ID = 0; + +MemorySSAPrinterLegacyPass::MemorySSAPrinterLegacyPass() : FunctionPass(ID) { + initializeMemorySSAPrinterLegacyPassPass(*PassRegistry::getPassRegistry()); +} + +void MemorySSAPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<MemorySSAWrapperPass>(); + AU.addPreserved<MemorySSAWrapperPass>(); +} + +bool MemorySSAPrinterLegacyPass::runOnFunction(Function &F) { + auto &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA(); + MSSA.print(dbgs()); + if (VerifyMemorySSA) + MSSA.verifyMemorySSA(); + return false; +} + +AnalysisKey MemorySSAAnalysis::Key; + +MemorySSAAnalysis::Result MemorySSAAnalysis::run(Function &F, + FunctionAnalysisManager &AM) { + auto &DT = AM.getResult<DominatorTreeAnalysis>(F); + auto &AA = AM.getResult<AAManager>(F); + return MemorySSAAnalysis::Result(make_unique<MemorySSA>(F, &AA, &DT)); +} + +PreservedAnalyses MemorySSAPrinterPass::run(Function &F, + FunctionAnalysisManager &AM) { + OS << "MemorySSA for function: " << F.getName() << "\n"; + AM.getResult<MemorySSAAnalysis>(F).getMSSA().print(OS); + + return PreservedAnalyses::all(); +} + +PreservedAnalyses MemorySSAVerifierPass::run(Function &F, + FunctionAnalysisManager &AM) { + AM.getResult<MemorySSAAnalysis>(F).getMSSA().verifyMemorySSA(); + + return PreservedAnalyses::all(); +} + +char MemorySSAWrapperPass::ID = 0; + +MemorySSAWrapperPass::MemorySSAWrapperPass() : FunctionPass(ID) { + initializeMemorySSAWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +void MemorySSAWrapperPass::releaseMemory() { MSSA.reset(); } + +void MemorySSAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequiredTransitive<DominatorTreeWrapperPass>(); + AU.addRequiredTransitive<AAResultsWrapperPass>(); +} + +bool MemorySSAWrapperPass::runOnFunction(Function &F) { + auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); + MSSA.reset(new MemorySSA(F, &AA, &DT)); + return false; +} + +void MemorySSAWrapperPass::verifyAnalysis() const { MSSA->verifyMemorySSA(); } + +void MemorySSAWrapperPass::print(raw_ostream &OS, const Module *M) const { + MSSA->print(OS); +} + +MemorySSAWalker::MemorySSAWalker(MemorySSA *M) : MSSA(M) {} + +MemorySSA::CachingWalker::CachingWalker(MemorySSA *M, AliasAnalysis *A, + DominatorTree *D) + : MemorySSAWalker(M), Walker(*M, *A, *D, Cache), AutoResetWalker(true) {} + +MemorySSA::CachingWalker::~CachingWalker() {} + +void MemorySSA::CachingWalker::invalidateInfo(MemoryAccess *MA) { + // TODO: We can do much better cache invalidation with differently stored + // caches. For now, for MemoryUses, we simply remove them + // from the cache, and kill the entire call/non-call cache for everything + // else. The problem is for phis or defs, currently we'd need to follow use + // chains down and invalidate anything below us in the chain that currently + // terminates at this access. + + // See if this is a MemoryUse, if so, just remove the cached info. MemoryUse + // is by definition never a barrier, so nothing in the cache could point to + // this use. In that case, we only need invalidate the info for the use + // itself. + + if (MemoryUse *MU = dyn_cast<MemoryUse>(MA)) { + UpwardsMemoryQuery Q(MU->getMemoryInst(), MU); + Cache.remove(MU, Q.StartingLoc, Q.IsCall); + MU->resetOptimized(); + } else { + // If it is not a use, the best we can do right now is destroy the cache. + Cache.clear(); + } + +#ifdef EXPENSIVE_CHECKS + verifyRemoved(MA); +#endif +} + +/// \brief Walk the use-def chains starting at \p MA and find +/// the MemoryAccess that actually clobbers Loc. +/// +/// \returns our clobbering memory access +MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess( + MemoryAccess *StartingAccess, UpwardsMemoryQuery &Q) { + MemoryAccess *New = Walker.findClobber(StartingAccess, Q); +#ifdef EXPENSIVE_CHECKS + MemoryAccess *NewNoCache = + Walker.findClobber(StartingAccess, Q, /*UseWalkerCache=*/false); + assert(NewNoCache == New && "Cache made us hand back a different result?"); +#endif + if (AutoResetWalker) + resetClobberWalker(); + return New; +} + +MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess( + MemoryAccess *StartingAccess, const MemoryLocation &Loc) { + if (isa<MemoryPhi>(StartingAccess)) + return StartingAccess; + + auto *StartingUseOrDef = cast<MemoryUseOrDef>(StartingAccess); + if (MSSA->isLiveOnEntryDef(StartingUseOrDef)) + return StartingUseOrDef; + + Instruction *I = StartingUseOrDef->getMemoryInst(); + + // Conservatively, fences are always clobbers, so don't perform the walk if we + // hit a fence. + if (!ImmutableCallSite(I) && I->isFenceLike()) + return StartingUseOrDef; + + UpwardsMemoryQuery Q; + Q.OriginalAccess = StartingUseOrDef; + Q.StartingLoc = Loc; + Q.Inst = I; + Q.IsCall = false; + + if (auto *CacheResult = Cache.lookup(StartingUseOrDef, Loc, Q.IsCall)) + return CacheResult; + + // Unlike the other function, do not walk to the def of a def, because we are + // handed something we already believe is the clobbering access. + MemoryAccess *DefiningAccess = isa<MemoryUse>(StartingUseOrDef) + ? StartingUseOrDef->getDefiningAccess() + : StartingUseOrDef; + + MemoryAccess *Clobber = getClobberingMemoryAccess(DefiningAccess, Q); + DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is "); + DEBUG(dbgs() << *StartingUseOrDef << "\n"); + DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is "); + DEBUG(dbgs() << *Clobber << "\n"); + return Clobber; +} + +MemoryAccess * +MemorySSA::CachingWalker::getClobberingMemoryAccess(MemoryAccess *MA) { + auto *StartingAccess = dyn_cast<MemoryUseOrDef>(MA); + // If this is a MemoryPhi, we can't do anything. + if (!StartingAccess) + return MA; + + // If this is an already optimized use or def, return the optimized result. + // Note: Currently, we do not store the optimized def result because we'd need + // a separate field, since we can't use it as the defining access. + if (MemoryUse *MU = dyn_cast<MemoryUse>(StartingAccess)) + if (MU->isOptimized()) + return MU->getDefiningAccess(); + + const Instruction *I = StartingAccess->getMemoryInst(); + UpwardsMemoryQuery Q(I, StartingAccess); + // We can't sanely do anything with a fences, they conservatively + // clobber all memory, and have no locations to get pointers from to + // try to disambiguate. + if (!Q.IsCall && I->isFenceLike()) + return StartingAccess; + + if (auto *CacheResult = Cache.lookup(StartingAccess, Q.StartingLoc, Q.IsCall)) + return CacheResult; + + if (isUseTriviallyOptimizableToLiveOnEntry(*MSSA->AA, I)) { + MemoryAccess *LiveOnEntry = MSSA->getLiveOnEntryDef(); + Cache.insert(StartingAccess, LiveOnEntry, Q.StartingLoc, Q.IsCall); + if (MemoryUse *MU = dyn_cast<MemoryUse>(StartingAccess)) + MU->setDefiningAccess(LiveOnEntry, true); + return LiveOnEntry; + } + + // Start with the thing we already think clobbers this location + MemoryAccess *DefiningAccess = StartingAccess->getDefiningAccess(); + + // At this point, DefiningAccess may be the live on entry def. + // If it is, we will not get a better result. + if (MSSA->isLiveOnEntryDef(DefiningAccess)) + return DefiningAccess; + + MemoryAccess *Result = getClobberingMemoryAccess(DefiningAccess, Q); + DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is "); + DEBUG(dbgs() << *DefiningAccess << "\n"); + DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is "); + DEBUG(dbgs() << *Result << "\n"); + if (MemoryUse *MU = dyn_cast<MemoryUse>(StartingAccess)) + MU->setDefiningAccess(Result, true); + + return Result; +} + +// Verify that MA doesn't exist in any of the caches. +void MemorySSA::CachingWalker::verifyRemoved(MemoryAccess *MA) { + assert(!Cache.contains(MA) && "Found removed MemoryAccess in cache."); +} + +MemoryAccess * +DoNothingMemorySSAWalker::getClobberingMemoryAccess(MemoryAccess *MA) { + if (auto *Use = dyn_cast<MemoryUseOrDef>(MA)) + return Use->getDefiningAccess(); + return MA; +} + +MemoryAccess *DoNothingMemorySSAWalker::getClobberingMemoryAccess( + MemoryAccess *StartingAccess, const MemoryLocation &) { + if (auto *Use = dyn_cast<MemoryUseOrDef>(StartingAccess)) + return Use->getDefiningAccess(); + return StartingAccess; +} +} // namespace llvm diff --git a/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp b/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp new file mode 100644 index 000000000000..c999bd008fef --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp @@ -0,0 +1,150 @@ +//===- MetaRenamer.cpp - Rename everything with metasyntatic names --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass renames everything with metasyntatic names. The intent is to use +// this pass after bugpoint reduction to conceal the nature of the original +// program. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/TypeFinder.h" +#include "llvm/Pass.h" +using namespace llvm; + +namespace { + + // This PRNG is from the ISO C spec. It is intentionally simple and + // unsuitable for cryptographic use. We're just looking for enough + // variety to surprise and delight users. + struct PRNG { + unsigned long next; + + void srand(unsigned int seed) { + next = seed; + } + + int rand() { + next = next * 1103515245 + 12345; + return (unsigned int)(next / 65536) % 32768; + } + }; + + static const char *const metaNames[] = { + // See http://en.wikipedia.org/wiki/Metasyntactic_variable + "foo", "bar", "baz", "quux", "barney", "snork", "zot", "blam", "hoge", + "wibble", "wobble", "widget", "wombat", "ham", "eggs", "pluto", "spam" + }; + + struct Renamer { + Renamer(unsigned int seed) { + prng.srand(seed); + } + + const char *newName() { + return metaNames[prng.rand() % array_lengthof(metaNames)]; + } + + PRNG prng; + }; + + struct MetaRenamer : public ModulePass { + static char ID; // Pass identification, replacement for typeid + MetaRenamer() : ModulePass(ID) { + initializeMetaRenamerPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } + + bool runOnModule(Module &M) override { + // Seed our PRNG with simple additive sum of ModuleID. We're looking to + // simply avoid always having the same function names, and we need to + // remain deterministic. + unsigned int randSeed = 0; + for (auto C : M.getModuleIdentifier()) + randSeed += C; + + Renamer renamer(randSeed); + + // Rename all aliases + for (auto AI = M.alias_begin(), AE = M.alias_end(); AI != AE; ++AI) { + StringRef Name = AI->getName(); + if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1)) + continue; + + AI->setName("alias"); + } + + // Rename all global variables + for (auto GI = M.global_begin(), GE = M.global_end(); GI != GE; ++GI) { + StringRef Name = GI->getName(); + if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1)) + continue; + + GI->setName("global"); + } + + // Rename all struct types + TypeFinder StructTypes; + StructTypes.run(M, true); + for (StructType *STy : StructTypes) { + if (STy->isLiteral() || STy->getName().empty()) continue; + + SmallString<128> NameStorage; + STy->setName((Twine("struct.") + + renamer.newName()).toStringRef(NameStorage)); + } + + // Rename all functions + for (auto &F : M) { + StringRef Name = F.getName(); + if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1)) + continue; + + F.setName(renamer.newName()); + runOnFunction(F); + } + return true; + } + + bool runOnFunction(Function &F) { + for (auto AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI) + if (!AI->getType()->isVoidTy()) + AI->setName("arg"); + + for (auto &BB : F) { + BB.setName("bb"); + + for (auto &I : BB) + if (!I.getType()->isVoidTy()) + I.setName("tmp"); + } + return true; + } + }; +} + +char MetaRenamer::ID = 0; +INITIALIZE_PASS(MetaRenamer, "metarenamer", + "Assign new names to everything", false, false) +//===----------------------------------------------------------------------===// +// +// MetaRenamer - Rename everything with metasyntactic names. +// +ModulePass *llvm::createMetaRenamerPass() { + return new MetaRenamer(); +} diff --git a/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp new file mode 100644 index 000000000000..0d623df77a67 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp @@ -0,0 +1,230 @@ +//===-- ModuleUtils.cpp - Functions to manipulate Modules -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This family of functions perform manipulations on Modules. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/ModuleUtils.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +static void appendToGlobalArray(const char *Array, Module &M, Function *F, + int Priority, Constant *Data) { + IRBuilder<> IRB(M.getContext()); + FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false); + + // Get the current set of static global constructors and add the new ctor + // to the list. + SmallVector<Constant *, 16> CurrentCtors; + StructType *EltTy; + if (GlobalVariable *GVCtor = M.getNamedGlobal(Array)) { + ArrayType *ATy = cast<ArrayType>(GVCtor->getValueType()); + StructType *OldEltTy = cast<StructType>(ATy->getElementType()); + // Upgrade a 2-field global array type to the new 3-field format if needed. + if (Data && OldEltTy->getNumElements() < 3) + EltTy = StructType::get(IRB.getInt32Ty(), PointerType::getUnqual(FnTy), + IRB.getInt8PtrTy(), nullptr); + else + EltTy = OldEltTy; + if (Constant *Init = GVCtor->getInitializer()) { + unsigned n = Init->getNumOperands(); + CurrentCtors.reserve(n + 1); + for (unsigned i = 0; i != n; ++i) { + auto Ctor = cast<Constant>(Init->getOperand(i)); + if (EltTy != OldEltTy) + Ctor = ConstantStruct::get( + EltTy, Ctor->getAggregateElement((unsigned)0), + Ctor->getAggregateElement(1), + Constant::getNullValue(IRB.getInt8PtrTy()), nullptr); + CurrentCtors.push_back(Ctor); + } + } + GVCtor->eraseFromParent(); + } else { + // Use the new three-field struct if there isn't one already. + EltTy = StructType::get(IRB.getInt32Ty(), PointerType::getUnqual(FnTy), + IRB.getInt8PtrTy(), nullptr); + } + + // Build a 2 or 3 field global_ctor entry. We don't take a comdat key. + Constant *CSVals[3]; + CSVals[0] = IRB.getInt32(Priority); + CSVals[1] = F; + // FIXME: Drop support for the two element form in LLVM 4.0. + if (EltTy->getNumElements() >= 3) + CSVals[2] = Data ? ConstantExpr::getPointerCast(Data, IRB.getInt8PtrTy()) + : Constant::getNullValue(IRB.getInt8PtrTy()); + Constant *RuntimeCtorInit = + ConstantStruct::get(EltTy, makeArrayRef(CSVals, EltTy->getNumElements())); + + CurrentCtors.push_back(RuntimeCtorInit); + + // Create a new initializer. + ArrayType *AT = ArrayType::get(EltTy, CurrentCtors.size()); + Constant *NewInit = ConstantArray::get(AT, CurrentCtors); + + // Create the new global variable and replace all uses of + // the old global variable with the new one. + (void)new GlobalVariable(M, NewInit->getType(), false, + GlobalValue::AppendingLinkage, NewInit, Array); +} + +void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority, Constant *Data) { + appendToGlobalArray("llvm.global_ctors", M, F, Priority, Data); +} + +void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority, Constant *Data) { + appendToGlobalArray("llvm.global_dtors", M, F, Priority, Data); +} + +static void appendToUsedList(Module &M, StringRef Name, ArrayRef<GlobalValue *> Values) { + GlobalVariable *GV = M.getGlobalVariable(Name); + SmallPtrSet<Constant *, 16> InitAsSet; + SmallVector<Constant *, 16> Init; + if (GV) { + ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer()); + for (auto &Op : CA->operands()) { + Constant *C = cast_or_null<Constant>(Op); + if (InitAsSet.insert(C).second) + Init.push_back(C); + } + GV->eraseFromParent(); + } + + Type *Int8PtrTy = llvm::Type::getInt8PtrTy(M.getContext()); + for (auto *V : Values) { + Constant *C = ConstantExpr::getBitCast(V, Int8PtrTy); + if (InitAsSet.insert(C).second) + Init.push_back(C); + } + + if (Init.empty()) + return; + + ArrayType *ATy = ArrayType::get(Int8PtrTy, Init.size()); + GV = new llvm::GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage, + ConstantArray::get(ATy, Init), Name); + GV->setSection("llvm.metadata"); +} + +void llvm::appendToUsed(Module &M, ArrayRef<GlobalValue *> Values) { + appendToUsedList(M, "llvm.used", Values); +} + +void llvm::appendToCompilerUsed(Module &M, ArrayRef<GlobalValue *> Values) { + appendToUsedList(M, "llvm.compiler.used", Values); +} + +Function *llvm::checkSanitizerInterfaceFunction(Constant *FuncOrBitcast) { + if (isa<Function>(FuncOrBitcast)) + return cast<Function>(FuncOrBitcast); + FuncOrBitcast->dump(); + std::string Err; + raw_string_ostream Stream(Err); + Stream << "Sanitizer interface function redefined: " << *FuncOrBitcast; + report_fatal_error(Err); +} + +std::pair<Function *, Function *> llvm::createSanitizerCtorAndInitFunctions( + Module &M, StringRef CtorName, StringRef InitName, + ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs, + StringRef VersionCheckName) { + assert(!InitName.empty() && "Expected init function name"); + assert(InitArgs.size() == InitArgTypes.size() && + "Sanitizer's init function expects different number of arguments"); + Function *Ctor = Function::Create( + FunctionType::get(Type::getVoidTy(M.getContext()), false), + GlobalValue::InternalLinkage, CtorName, &M); + BasicBlock *CtorBB = BasicBlock::Create(M.getContext(), "", Ctor); + IRBuilder<> IRB(ReturnInst::Create(M.getContext(), CtorBB)); + Function *InitFunction = + checkSanitizerInterfaceFunction(M.getOrInsertFunction( + InitName, FunctionType::get(IRB.getVoidTy(), InitArgTypes, false), + AttributeSet())); + InitFunction->setLinkage(Function::ExternalLinkage); + IRB.CreateCall(InitFunction, InitArgs); + if (!VersionCheckName.empty()) { + Function *VersionCheckFunction = + checkSanitizerInterfaceFunction(M.getOrInsertFunction( + VersionCheckName, FunctionType::get(IRB.getVoidTy(), {}, false), + AttributeSet())); + IRB.CreateCall(VersionCheckFunction, {}); + } + return std::make_pair(Ctor, InitFunction); +} + +void llvm::filterDeadComdatFunctions( + Module &M, SmallVectorImpl<Function *> &DeadComdatFunctions) { + // Build a map from the comdat to the number of entries in that comdat we + // think are dead. If this fully covers the comdat group, then the entire + // group is dead. If we find another entry in the comdat group though, we'll + // have to preserve the whole group. + SmallDenseMap<Comdat *, int, 16> ComdatEntriesCovered; + for (Function *F : DeadComdatFunctions) { + Comdat *C = F->getComdat(); + assert(C && "Expected all input GVs to be in a comdat!"); + ComdatEntriesCovered[C] += 1; + } + + auto CheckComdat = [&](Comdat &C) { + auto CI = ComdatEntriesCovered.find(&C); + if (CI == ComdatEntriesCovered.end()) + return; + + // If this could have been covered by a dead entry, just subtract one to + // account for it. + if (CI->second > 0) { + CI->second -= 1; + return; + } + + // If we've already accounted for all the entries that were dead, the + // entire comdat is alive so remove it from the map. + ComdatEntriesCovered.erase(CI); + }; + + auto CheckAllComdats = [&] { + for (Function &F : M.functions()) + if (Comdat *C = F.getComdat()) { + CheckComdat(*C); + if (ComdatEntriesCovered.empty()) + return; + } + for (GlobalVariable &GV : M.globals()) + if (Comdat *C = GV.getComdat()) { + CheckComdat(*C); + if (ComdatEntriesCovered.empty()) + return; + } + for (GlobalAlias &GA : M.aliases()) + if (Comdat *C = GA.getComdat()) { + CheckComdat(*C); + if (ComdatEntriesCovered.empty()) + return; + } + }; + CheckAllComdats(); + + if (ComdatEntriesCovered.empty()) { + DeadComdatFunctions.clear(); + return; + } + + // Remove the entries that were not covering. + erase_if(DeadComdatFunctions, [&](GlobalValue *GV) { + return ComdatEntriesCovered.find(GV->getComdat()) == + ComdatEntriesCovered.end(); + }); +} diff --git a/contrib/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp b/contrib/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp new file mode 100644 index 000000000000..34dc1cccdd5b --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp @@ -0,0 +1,121 @@ +//===- NameAnonGlobals.cpp - ThinLTO Support: Name Unnamed Globals --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements naming anonymous globals to make sure they can be +// referred to by ThinLTO. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/NameAnonGlobals.h" + +#include "llvm/ADT/SmallString.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/MD5.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" + +using namespace llvm; + +namespace { +// Compute a "unique" hash for the module based on the name of the public +// globals. +class ModuleHasher { + Module &TheModule; + std::string TheHash; + +public: + ModuleHasher(Module &M) : TheModule(M) {} + + /// Return the lazily computed hash. + std::string &get() { + if (!TheHash.empty()) + // Cache hit :) + return TheHash; + + MD5 Hasher; + for (auto &F : TheModule) { + if (F.isDeclaration() || F.hasLocalLinkage() || !F.hasName()) + continue; + auto Name = F.getName(); + Hasher.update(Name); + } + for (auto &GV : TheModule.globals()) { + if (GV.isDeclaration() || GV.hasLocalLinkage() || !GV.hasName()) + continue; + auto Name = GV.getName(); + Hasher.update(Name); + } + + // Now return the result. + MD5::MD5Result Hash; + Hasher.final(Hash); + SmallString<32> Result; + MD5::stringifyResult(Hash, Result); + TheHash = Result.str(); + return TheHash; + } +}; +} // end anonymous namespace + +// Rename all the anon globals in the module +bool llvm::nameUnamedGlobals(Module &M) { + bool Changed = false; + ModuleHasher ModuleHash(M); + int count = 0; + auto RenameIfNeed = [&](GlobalValue &GV) { + if (GV.hasName()) + return; + GV.setName(Twine("anon.") + ModuleHash.get() + "." + Twine(count++)); + Changed = true; + }; + for (auto &GO : M.global_objects()) + RenameIfNeed(GO); + for (auto &GA : M.aliases()) + RenameIfNeed(GA); + + return Changed; +} + +namespace { + +// Legacy pass that provides a name to every anon globals. +class NameAnonGlobalLegacyPass : public ModulePass { + +public: + /// Pass identification, replacement for typeid + static char ID; + + /// Specify pass name for debug output + StringRef getPassName() const override { return "Name Anon Globals"; } + + explicit NameAnonGlobalLegacyPass() : ModulePass(ID) {} + + bool runOnModule(Module &M) override { return nameUnamedGlobals(M); } +}; +char NameAnonGlobalLegacyPass::ID = 0; + +} // anonymous namespace + +PreservedAnalyses NameAnonGlobalPass::run(Module &M, + ModuleAnalysisManager &AM) { + if (!nameUnamedGlobals(M)) + return PreservedAnalyses::all(); + + return PreservedAnalyses::none(); +} + +INITIALIZE_PASS_BEGIN(NameAnonGlobalLegacyPass, "name-anon-globals", + "Provide a name to nameless globals", false, false) +INITIALIZE_PASS_END(NameAnonGlobalLegacyPass, "name-anon-globals", + "Provide a name to nameless globals", false, false) + +namespace llvm { +ModulePass *createNameAnonGlobalPass() { + return new NameAnonGlobalLegacyPass(); +} +} diff --git a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp new file mode 100644 index 000000000000..35faa6f65efd --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -0,0 +1,996 @@ +//===- PromoteMemoryToRegister.cpp - Convert allocas to registers ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file promotes memory references to be register references. It promotes +// alloca instructions which only have loads and stores as uses. An alloca is +// transformed by using iterated dominator frontiers to place PHI nodes, then +// traversing the function in depth-first order to rewrite loads and stores as +// appropriate. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/PromoteMemToReg.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/IteratedDominanceFrontier.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DIBuilder.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/Transforms/Utils/Local.h" +#include <algorithm> +using namespace llvm; + +#define DEBUG_TYPE "mem2reg" + +STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block"); +STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store"); +STATISTIC(NumDeadAlloca, "Number of dead alloca's removed"); +STATISTIC(NumPHIInsert, "Number of PHI nodes inserted"); + +bool llvm::isAllocaPromotable(const AllocaInst *AI) { + // FIXME: If the memory unit is of pointer or integer type, we can permit + // assignments to subsections of the memory unit. + unsigned AS = AI->getType()->getAddressSpace(); + + // Only allow direct and non-volatile loads and stores... + for (const User *U : AI->users()) { + if (const LoadInst *LI = dyn_cast<LoadInst>(U)) { + // Note that atomic loads can be transformed; atomic semantics do + // not have any meaning for a local alloca. + if (LI->isVolatile()) + return false; + } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { + if (SI->getOperand(0) == AI) + return false; // Don't allow a store OF the AI, only INTO the AI. + // Note that atomic stores can be transformed; atomic semantics do + // not have any meaning for a local alloca. + if (SI->isVolatile()) + return false; + } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { + if (II->getIntrinsicID() != Intrinsic::lifetime_start && + II->getIntrinsicID() != Intrinsic::lifetime_end) + return false; + } else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { + if (BCI->getType() != Type::getInt8PtrTy(U->getContext(), AS)) + return false; + if (!onlyUsedByLifetimeMarkers(BCI)) + return false; + } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) { + if (GEPI->getType() != Type::getInt8PtrTy(U->getContext(), AS)) + return false; + if (!GEPI->hasAllZeroIndices()) + return false; + if (!onlyUsedByLifetimeMarkers(GEPI)) + return false; + } else { + return false; + } + } + + return true; +} + +namespace { + +struct AllocaInfo { + SmallVector<BasicBlock *, 32> DefiningBlocks; + SmallVector<BasicBlock *, 32> UsingBlocks; + + StoreInst *OnlyStore; + BasicBlock *OnlyBlock; + bool OnlyUsedInOneBlock; + + Value *AllocaPointerVal; + DbgDeclareInst *DbgDeclare; + + void clear() { + DefiningBlocks.clear(); + UsingBlocks.clear(); + OnlyStore = nullptr; + OnlyBlock = nullptr; + OnlyUsedInOneBlock = true; + AllocaPointerVal = nullptr; + DbgDeclare = nullptr; + } + + /// Scan the uses of the specified alloca, filling in the AllocaInfo used + /// by the rest of the pass to reason about the uses of this alloca. + void AnalyzeAlloca(AllocaInst *AI) { + clear(); + + // As we scan the uses of the alloca instruction, keep track of stores, + // and decide whether all of the loads and stores to the alloca are within + // the same basic block. + for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) { + Instruction *User = cast<Instruction>(*UI++); + + if (StoreInst *SI = dyn_cast<StoreInst>(User)) { + // Remember the basic blocks which define new values for the alloca + DefiningBlocks.push_back(SI->getParent()); + AllocaPointerVal = SI->getOperand(0); + OnlyStore = SI; + } else { + LoadInst *LI = cast<LoadInst>(User); + // Otherwise it must be a load instruction, keep track of variable + // reads. + UsingBlocks.push_back(LI->getParent()); + AllocaPointerVal = LI; + } + + if (OnlyUsedInOneBlock) { + if (!OnlyBlock) + OnlyBlock = User->getParent(); + else if (OnlyBlock != User->getParent()) + OnlyUsedInOneBlock = false; + } + } + + DbgDeclare = FindAllocaDbgDeclare(AI); + } +}; + +// Data package used by RenamePass() +class RenamePassData { +public: + typedef std::vector<Value *> ValVector; + + RenamePassData() : BB(nullptr), Pred(nullptr), Values() {} + RenamePassData(BasicBlock *B, BasicBlock *P, const ValVector &V) + : BB(B), Pred(P), Values(V) {} + BasicBlock *BB; + BasicBlock *Pred; + ValVector Values; + + void swap(RenamePassData &RHS) { + std::swap(BB, RHS.BB); + std::swap(Pred, RHS.Pred); + Values.swap(RHS.Values); + } +}; + +/// \brief This assigns and keeps a per-bb relative ordering of load/store +/// instructions in the block that directly load or store an alloca. +/// +/// This functionality is important because it avoids scanning large basic +/// blocks multiple times when promoting many allocas in the same block. +class LargeBlockInfo { + /// \brief For each instruction that we track, keep the index of the + /// instruction. + /// + /// The index starts out as the number of the instruction from the start of + /// the block. + DenseMap<const Instruction *, unsigned> InstNumbers; + +public: + + /// This code only looks at accesses to allocas. + static bool isInterestingInstruction(const Instruction *I) { + return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) || + (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1))); + } + + /// Get or calculate the index of the specified instruction. + unsigned getInstructionIndex(const Instruction *I) { + assert(isInterestingInstruction(I) && + "Not a load/store to/from an alloca?"); + + // If we already have this instruction number, return it. + DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I); + if (It != InstNumbers.end()) + return It->second; + + // Scan the whole block to get the instruction. This accumulates + // information for every interesting instruction in the block, in order to + // avoid gratuitus rescans. + const BasicBlock *BB = I->getParent(); + unsigned InstNo = 0; + for (const Instruction &BBI : *BB) + if (isInterestingInstruction(&BBI)) + InstNumbers[&BBI] = InstNo++; + It = InstNumbers.find(I); + + assert(It != InstNumbers.end() && "Didn't insert instruction?"); + return It->second; + } + + void deleteValue(const Instruction *I) { InstNumbers.erase(I); } + + void clear() { InstNumbers.clear(); } +}; + +struct PromoteMem2Reg { + /// The alloca instructions being promoted. + std::vector<AllocaInst *> Allocas; + DominatorTree &DT; + DIBuilder DIB; + + /// An AliasSetTracker object to update. If null, don't update it. + AliasSetTracker *AST; + + /// A cache of @llvm.assume intrinsics used by SimplifyInstruction. + AssumptionCache *AC; + + /// Reverse mapping of Allocas. + DenseMap<AllocaInst *, unsigned> AllocaLookup; + + /// \brief The PhiNodes we're adding. + /// + /// That map is used to simplify some Phi nodes as we iterate over it, so + /// it should have deterministic iterators. We could use a MapVector, but + /// since we already maintain a map from BasicBlock* to a stable numbering + /// (BBNumbers), the DenseMap is more efficient (also supports removal). + DenseMap<std::pair<unsigned, unsigned>, PHINode *> NewPhiNodes; + + /// For each PHI node, keep track of which entry in Allocas it corresponds + /// to. + DenseMap<PHINode *, unsigned> PhiToAllocaMap; + + /// If we are updating an AliasSetTracker, then for each alloca that is of + /// pointer type, we keep track of what to copyValue to the inserted PHI + /// nodes here. + std::vector<Value *> PointerAllocaValues; + + /// For each alloca, we keep track of the dbg.declare intrinsic that + /// describes it, if any, so that we can convert it to a dbg.value + /// intrinsic if the alloca gets promoted. + SmallVector<DbgDeclareInst *, 8> AllocaDbgDeclares; + + /// The set of basic blocks the renamer has already visited. + /// + SmallPtrSet<BasicBlock *, 16> Visited; + + /// Contains a stable numbering of basic blocks to avoid non-determinstic + /// behavior. + DenseMap<BasicBlock *, unsigned> BBNumbers; + + /// Lazily compute the number of predecessors a block has. + DenseMap<const BasicBlock *, unsigned> BBNumPreds; + +public: + PromoteMem2Reg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT, + AliasSetTracker *AST, AssumptionCache *AC) + : Allocas(Allocas.begin(), Allocas.end()), DT(DT), + DIB(*DT.getRoot()->getParent()->getParent(), /*AllowUnresolved*/ false), + AST(AST), AC(AC) {} + + void run(); + +private: + void RemoveFromAllocasList(unsigned &AllocaIdx) { + Allocas[AllocaIdx] = Allocas.back(); + Allocas.pop_back(); + --AllocaIdx; + } + + unsigned getNumPreds(const BasicBlock *BB) { + unsigned &NP = BBNumPreds[BB]; + if (NP == 0) + NP = std::distance(pred_begin(BB), pred_end(BB)) + 1; + return NP - 1; + } + + void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, + const SmallPtrSetImpl<BasicBlock *> &DefBlocks, + SmallPtrSetImpl<BasicBlock *> &LiveInBlocks); + void RenamePass(BasicBlock *BB, BasicBlock *Pred, + RenamePassData::ValVector &IncVals, + std::vector<RenamePassData> &Worklist); + bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version); +}; + +} // end of anonymous namespace + +static void removeLifetimeIntrinsicUsers(AllocaInst *AI) { + // Knowing that this alloca is promotable, we know that it's safe to kill all + // instructions except for load and store. + + for (auto UI = AI->user_begin(), UE = AI->user_end(); UI != UE;) { + Instruction *I = cast<Instruction>(*UI); + ++UI; + if (isa<LoadInst>(I) || isa<StoreInst>(I)) + continue; + + if (!I->getType()->isVoidTy()) { + // The only users of this bitcast/GEP instruction are lifetime intrinsics. + // Follow the use/def chain to erase them now instead of leaving it for + // dead code elimination later. + for (auto UUI = I->user_begin(), UUE = I->user_end(); UUI != UUE;) { + Instruction *Inst = cast<Instruction>(*UUI); + ++UUI; + Inst->eraseFromParent(); + } + } + I->eraseFromParent(); + } +} + +/// \brief Rewrite as many loads as possible given a single store. +/// +/// When there is only a single store, we can use the domtree to trivially +/// replace all of the dominated loads with the stored value. Do so, and return +/// true if this has successfully promoted the alloca entirely. If this returns +/// false there were some loads which were not dominated by the single store +/// and thus must be phi-ed with undef. We fall back to the standard alloca +/// promotion algorithm in that case. +static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, + LargeBlockInfo &LBI, + DominatorTree &DT, + AliasSetTracker *AST) { + StoreInst *OnlyStore = Info.OnlyStore; + bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0)); + BasicBlock *StoreBB = OnlyStore->getParent(); + int StoreIndex = -1; + + // Clear out UsingBlocks. We will reconstruct it here if needed. + Info.UsingBlocks.clear(); + + for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) { + Instruction *UserInst = cast<Instruction>(*UI++); + if (!isa<LoadInst>(UserInst)) { + assert(UserInst == OnlyStore && "Should only have load/stores"); + continue; + } + LoadInst *LI = cast<LoadInst>(UserInst); + + // Okay, if we have a load from the alloca, we want to replace it with the + // only value stored to the alloca. We can do this if the value is + // dominated by the store. If not, we use the rest of the mem2reg machinery + // to insert the phi nodes as needed. + if (!StoringGlobalVal) { // Non-instructions are always dominated. + if (LI->getParent() == StoreBB) { + // If we have a use that is in the same block as the store, compare the + // indices of the two instructions to see which one came first. If the + // load came before the store, we can't handle it. + if (StoreIndex == -1) + StoreIndex = LBI.getInstructionIndex(OnlyStore); + + if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) { + // Can't handle this load, bail out. + Info.UsingBlocks.push_back(StoreBB); + continue; + } + + } else if (LI->getParent() != StoreBB && + !DT.dominates(StoreBB, LI->getParent())) { + // If the load and store are in different blocks, use BB dominance to + // check their relationships. If the store doesn't dom the use, bail + // out. + Info.UsingBlocks.push_back(LI->getParent()); + continue; + } + } + + // Otherwise, we *can* safely rewrite this load. + Value *ReplVal = OnlyStore->getOperand(0); + // If the replacement value is the load, this must occur in unreachable + // code. + if (ReplVal == LI) + ReplVal = UndefValue::get(LI->getType()); + LI->replaceAllUsesWith(ReplVal); + if (AST && LI->getType()->isPointerTy()) + AST->deleteValue(LI); + LI->eraseFromParent(); + LBI.deleteValue(LI); + } + + // Finally, after the scan, check to see if the store is all that is left. + if (!Info.UsingBlocks.empty()) + return false; // If not, we'll have to fall back for the remainder. + + // Record debuginfo for the store and remove the declaration's + // debuginfo. + if (DbgDeclareInst *DDI = Info.DbgDeclare) { + DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false); + ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, DIB); + DDI->eraseFromParent(); + LBI.deleteValue(DDI); + } + // Remove the (now dead) store and alloca. + Info.OnlyStore->eraseFromParent(); + LBI.deleteValue(Info.OnlyStore); + + if (AST) + AST->deleteValue(AI); + AI->eraseFromParent(); + LBI.deleteValue(AI); + return true; +} + +/// Many allocas are only used within a single basic block. If this is the +/// case, avoid traversing the CFG and inserting a lot of potentially useless +/// PHI nodes by just performing a single linear pass over the basic block +/// using the Alloca. +/// +/// If we cannot promote this alloca (because it is read before it is written), +/// return false. This is necessary in cases where, due to control flow, the +/// alloca is undefined only on some control flow paths. e.g. code like +/// this is correct in LLVM IR: +/// // A is an alloca with no stores so far +/// for (...) { +/// int t = *A; +/// if (!first_iteration) +/// use(t); +/// *A = 42; +/// } +static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, + LargeBlockInfo &LBI, + AliasSetTracker *AST) { + // The trickiest case to handle is when we have large blocks. Because of this, + // this code is optimized assuming that large blocks happen. This does not + // significantly pessimize the small block case. This uses LargeBlockInfo to + // make it efficient to get the index of various operations in the block. + + // Walk the use-def list of the alloca, getting the locations of all stores. + typedef SmallVector<std::pair<unsigned, StoreInst *>, 64> StoresByIndexTy; + StoresByIndexTy StoresByIndex; + + for (User *U : AI->users()) + if (StoreInst *SI = dyn_cast<StoreInst>(U)) + StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI)); + + // Sort the stores by their index, making it efficient to do a lookup with a + // binary search. + std::sort(StoresByIndex.begin(), StoresByIndex.end(), less_first()); + + // Walk all of the loads from this alloca, replacing them with the nearest + // store above them, if any. + for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) { + LoadInst *LI = dyn_cast<LoadInst>(*UI++); + if (!LI) + continue; + + unsigned LoadIdx = LBI.getInstructionIndex(LI); + + // Find the nearest store that has a lower index than this load. + StoresByIndexTy::iterator I = + std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(), + std::make_pair(LoadIdx, + static_cast<StoreInst *>(nullptr)), + less_first()); + if (I == StoresByIndex.begin()) { + if (StoresByIndex.empty()) + // If there are no stores, the load takes the undef value. + LI->replaceAllUsesWith(UndefValue::get(LI->getType())); + else + // There is no store before this load, bail out (load may be affected + // by the following stores - see main comment). + return false; + } + else + // Otherwise, there was a store before this load, the load takes its value. + LI->replaceAllUsesWith(std::prev(I)->second->getOperand(0)); + + if (AST && LI->getType()->isPointerTy()) + AST->deleteValue(LI); + LI->eraseFromParent(); + LBI.deleteValue(LI); + } + + // Remove the (now dead) stores and alloca. + while (!AI->use_empty()) { + StoreInst *SI = cast<StoreInst>(AI->user_back()); + // Record debuginfo for the store before removing it. + if (DbgDeclareInst *DDI = Info.DbgDeclare) { + DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false); + ConvertDebugDeclareToDebugValue(DDI, SI, DIB); + } + SI->eraseFromParent(); + LBI.deleteValue(SI); + } + + if (AST) + AST->deleteValue(AI); + AI->eraseFromParent(); + LBI.deleteValue(AI); + + // The alloca's debuginfo can be removed as well. + if (DbgDeclareInst *DDI = Info.DbgDeclare) { + DDI->eraseFromParent(); + LBI.deleteValue(DDI); + } + + ++NumLocalPromoted; + return true; +} + +void PromoteMem2Reg::run() { + Function &F = *DT.getRoot()->getParent(); + + if (AST) + PointerAllocaValues.resize(Allocas.size()); + AllocaDbgDeclares.resize(Allocas.size()); + + AllocaInfo Info; + LargeBlockInfo LBI; + ForwardIDFCalculator IDF(DT); + + for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) { + AllocaInst *AI = Allocas[AllocaNum]; + + assert(isAllocaPromotable(AI) && "Cannot promote non-promotable alloca!"); + assert(AI->getParent()->getParent() == &F && + "All allocas should be in the same function, which is same as DF!"); + + removeLifetimeIntrinsicUsers(AI); + + if (AI->use_empty()) { + // If there are no uses of the alloca, just delete it now. + if (AST) + AST->deleteValue(AI); + AI->eraseFromParent(); + + // Remove the alloca from the Allocas list, since it has been processed + RemoveFromAllocasList(AllocaNum); + ++NumDeadAlloca; + continue; + } + + // Calculate the set of read and write-locations for each alloca. This is + // analogous to finding the 'uses' and 'definitions' of each variable. + Info.AnalyzeAlloca(AI); + + // If there is only a single store to this value, replace any loads of + // it that are directly dominated by the definition with the value stored. + if (Info.DefiningBlocks.size() == 1) { + if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AST)) { + // The alloca has been processed, move on. + RemoveFromAllocasList(AllocaNum); + ++NumSingleStore; + continue; + } + } + + // If the alloca is only read and written in one basic block, just perform a + // linear sweep over the block to eliminate it. + if (Info.OnlyUsedInOneBlock && + promoteSingleBlockAlloca(AI, Info, LBI, AST)) { + // The alloca has been processed, move on. + RemoveFromAllocasList(AllocaNum); + continue; + } + + // If we haven't computed a numbering for the BB's in the function, do so + // now. + if (BBNumbers.empty()) { + unsigned ID = 0; + for (auto &BB : F) + BBNumbers[&BB] = ID++; + } + + // If we have an AST to keep updated, remember some pointer value that is + // stored into the alloca. + if (AST) + PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal; + + // Remember the dbg.declare intrinsic describing this alloca, if any. + if (Info.DbgDeclare) + AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare; + + // Keep the reverse mapping of the 'Allocas' array for the rename pass. + AllocaLookup[Allocas[AllocaNum]] = AllocaNum; + + // At this point, we're committed to promoting the alloca using IDF's, and + // the standard SSA construction algorithm. Determine which blocks need PHI + // nodes and see if we can optimize out some work by avoiding insertion of + // dead phi nodes. + + + // Unique the set of defining blocks for efficient lookup. + SmallPtrSet<BasicBlock *, 32> DefBlocks; + DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end()); + + // Determine which blocks the value is live in. These are blocks which lead + // to uses. + SmallPtrSet<BasicBlock *, 32> LiveInBlocks; + ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks); + + // At this point, we're committed to promoting the alloca using IDF's, and + // the standard SSA construction algorithm. Determine which blocks need phi + // nodes and see if we can optimize out some work by avoiding insertion of + // dead phi nodes. + IDF.setLiveInBlocks(LiveInBlocks); + IDF.setDefiningBlocks(DefBlocks); + SmallVector<BasicBlock *, 32> PHIBlocks; + IDF.calculate(PHIBlocks); + if (PHIBlocks.size() > 1) + std::sort(PHIBlocks.begin(), PHIBlocks.end(), + [this](BasicBlock *A, BasicBlock *B) { + return BBNumbers.lookup(A) < BBNumbers.lookup(B); + }); + + unsigned CurrentVersion = 0; + for (unsigned i = 0, e = PHIBlocks.size(); i != e; ++i) + QueuePhiNode(PHIBlocks[i], AllocaNum, CurrentVersion); + } + + if (Allocas.empty()) + return; // All of the allocas must have been trivial! + + LBI.clear(); + + // Set the incoming values for the basic block to be null values for all of + // the alloca's. We do this in case there is a load of a value that has not + // been stored yet. In this case, it will get this null value. + // + RenamePassData::ValVector Values(Allocas.size()); + for (unsigned i = 0, e = Allocas.size(); i != e; ++i) + Values[i] = UndefValue::get(Allocas[i]->getAllocatedType()); + + // Walks all basic blocks in the function performing the SSA rename algorithm + // and inserting the phi nodes we marked as necessary + // + std::vector<RenamePassData> RenamePassWorkList; + RenamePassWorkList.emplace_back(&F.front(), nullptr, std::move(Values)); + do { + RenamePassData RPD; + RPD.swap(RenamePassWorkList.back()); + RenamePassWorkList.pop_back(); + // RenamePass may add new worklist entries. + RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList); + } while (!RenamePassWorkList.empty()); + + // The renamer uses the Visited set to avoid infinite loops. Clear it now. + Visited.clear(); + + // Remove the allocas themselves from the function. + for (unsigned i = 0, e = Allocas.size(); i != e; ++i) { + Instruction *A = Allocas[i]; + + // If there are any uses of the alloca instructions left, they must be in + // unreachable basic blocks that were not processed by walking the dominator + // tree. Just delete the users now. + if (!A->use_empty()) + A->replaceAllUsesWith(UndefValue::get(A->getType())); + if (AST) + AST->deleteValue(A); + A->eraseFromParent(); + } + + const DataLayout &DL = F.getParent()->getDataLayout(); + + // Remove alloca's dbg.declare instrinsics from the function. + for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i) + if (DbgDeclareInst *DDI = AllocaDbgDeclares[i]) + DDI->eraseFromParent(); + + // Loop over all of the PHI nodes and see if there are any that we can get + // rid of because they merge all of the same incoming values. This can + // happen due to undef values coming into the PHI nodes. This process is + // iterative, because eliminating one PHI node can cause others to be removed. + bool EliminatedAPHI = true; + while (EliminatedAPHI) { + EliminatedAPHI = false; + + // Iterating over NewPhiNodes is deterministic, so it is safe to try to + // simplify and RAUW them as we go. If it was not, we could add uses to + // the values we replace with in a non-deterministic order, thus creating + // non-deterministic def->use chains. + for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator + I = NewPhiNodes.begin(), + E = NewPhiNodes.end(); + I != E;) { + PHINode *PN = I->second; + + // If this PHI node merges one value and/or undefs, get the value. + if (Value *V = SimplifyInstruction(PN, DL, nullptr, &DT, AC)) { + if (AST && PN->getType()->isPointerTy()) + AST->deleteValue(PN); + PN->replaceAllUsesWith(V); + PN->eraseFromParent(); + NewPhiNodes.erase(I++); + EliminatedAPHI = true; + continue; + } + ++I; + } + } + + // At this point, the renamer has added entries to PHI nodes for all reachable + // code. Unfortunately, there may be unreachable blocks which the renamer + // hasn't traversed. If this is the case, the PHI nodes may not + // have incoming values for all predecessors. Loop over all PHI nodes we have + // created, inserting undef values if they are missing any incoming values. + // + for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator + I = NewPhiNodes.begin(), + E = NewPhiNodes.end(); + I != E; ++I) { + // We want to do this once per basic block. As such, only process a block + // when we find the PHI that is the first entry in the block. + PHINode *SomePHI = I->second; + BasicBlock *BB = SomePHI->getParent(); + if (&BB->front() != SomePHI) + continue; + + // Only do work here if there the PHI nodes are missing incoming values. We + // know that all PHI nodes that were inserted in a block will have the same + // number of incoming values, so we can just check any of them. + if (SomePHI->getNumIncomingValues() == getNumPreds(BB)) + continue; + + // Get the preds for BB. + SmallVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB)); + + // Ok, now we know that all of the PHI nodes are missing entries for some + // basic blocks. Start by sorting the incoming predecessors for efficient + // access. + std::sort(Preds.begin(), Preds.end()); + + // Now we loop through all BB's which have entries in SomePHI and remove + // them from the Preds list. + for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) { + // Do a log(n) search of the Preds list for the entry we want. + SmallVectorImpl<BasicBlock *>::iterator EntIt = std::lower_bound( + Preds.begin(), Preds.end(), SomePHI->getIncomingBlock(i)); + assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i) && + "PHI node has entry for a block which is not a predecessor!"); + + // Remove the entry + Preds.erase(EntIt); + } + + // At this point, the blocks left in the preds list must have dummy + // entries inserted into every PHI nodes for the block. Update all the phi + // nodes in this block that we are inserting (there could be phis before + // mem2reg runs). + unsigned NumBadPreds = SomePHI->getNumIncomingValues(); + BasicBlock::iterator BBI = BB->begin(); + while ((SomePHI = dyn_cast<PHINode>(BBI++)) && + SomePHI->getNumIncomingValues() == NumBadPreds) { + Value *UndefVal = UndefValue::get(SomePHI->getType()); + for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred) + SomePHI->addIncoming(UndefVal, Preds[pred]); + } + } + + NewPhiNodes.clear(); +} + +/// \brief Determine which blocks the value is live in. +/// +/// These are blocks which lead to uses. Knowing this allows us to avoid +/// inserting PHI nodes into blocks which don't lead to uses (thus, the +/// inserted phi nodes would be dead). +void PromoteMem2Reg::ComputeLiveInBlocks( + AllocaInst *AI, AllocaInfo &Info, + const SmallPtrSetImpl<BasicBlock *> &DefBlocks, + SmallPtrSetImpl<BasicBlock *> &LiveInBlocks) { + + // To determine liveness, we must iterate through the predecessors of blocks + // where the def is live. Blocks are added to the worklist if we need to + // check their predecessors. Start with all the using blocks. + SmallVector<BasicBlock *, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(), + Info.UsingBlocks.end()); + + // If any of the using blocks is also a definition block, check to see if the + // definition occurs before or after the use. If it happens before the use, + // the value isn't really live-in. + for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) { + BasicBlock *BB = LiveInBlockWorklist[i]; + if (!DefBlocks.count(BB)) + continue; + + // Okay, this is a block that both uses and defines the value. If the first + // reference to the alloca is a def (store), then we know it isn't live-in. + for (BasicBlock::iterator I = BB->begin();; ++I) { + if (StoreInst *SI = dyn_cast<StoreInst>(I)) { + if (SI->getOperand(1) != AI) + continue; + + // We found a store to the alloca before a load. The alloca is not + // actually live-in here. + LiveInBlockWorklist[i] = LiveInBlockWorklist.back(); + LiveInBlockWorklist.pop_back(); + --i; + --e; + break; + } + + if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + if (LI->getOperand(0) != AI) + continue; + + // Okay, we found a load before a store to the alloca. It is actually + // live into this block. + break; + } + } + } + + // Now that we have a set of blocks where the phi is live-in, recursively add + // their predecessors until we find the full region the value is live. + while (!LiveInBlockWorklist.empty()) { + BasicBlock *BB = LiveInBlockWorklist.pop_back_val(); + + // The block really is live in here, insert it into the set. If already in + // the set, then it has already been processed. + if (!LiveInBlocks.insert(BB).second) + continue; + + // Since the value is live into BB, it is either defined in a predecessor or + // live into it to. Add the preds to the worklist unless they are a + // defining block. + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + BasicBlock *P = *PI; + + // The value is not live into a predecessor if it defines the value. + if (DefBlocks.count(P)) + continue; + + // Otherwise it is, add to the worklist. + LiveInBlockWorklist.push_back(P); + } + } +} + +/// \brief Queue a phi-node to be added to a basic-block for a specific Alloca. +/// +/// Returns true if there wasn't already a phi-node for that variable +bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, + unsigned &Version) { + // Look up the basic-block in question. + PHINode *&PN = NewPhiNodes[std::make_pair(BBNumbers[BB], AllocaNo)]; + + // If the BB already has a phi node added for the i'th alloca then we're done! + if (PN) + return false; + + // Create a PhiNode using the dereferenced type... and add the phi-node to the + // BasicBlock. + PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB), + Allocas[AllocaNo]->getName() + "." + Twine(Version++), + &BB->front()); + ++NumPHIInsert; + PhiToAllocaMap[PN] = AllocaNo; + + if (AST && PN->getType()->isPointerTy()) + AST->copyValue(PointerAllocaValues[AllocaNo], PN); + + return true; +} + +/// \brief Recursively traverse the CFG of the function, renaming loads and +/// stores to the allocas which we are promoting. +/// +/// IncomingVals indicates what value each Alloca contains on exit from the +/// predecessor block Pred. +void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred, + RenamePassData::ValVector &IncomingVals, + std::vector<RenamePassData> &Worklist) { +NextIteration: + // If we are inserting any phi nodes into this BB, they will already be in the + // block. + if (PHINode *APN = dyn_cast<PHINode>(BB->begin())) { + // If we have PHI nodes to update, compute the number of edges from Pred to + // BB. + if (PhiToAllocaMap.count(APN)) { + // We want to be able to distinguish between PHI nodes being inserted by + // this invocation of mem2reg from those phi nodes that already existed in + // the IR before mem2reg was run. We determine that APN is being inserted + // because it is missing incoming edges. All other PHI nodes being + // inserted by this pass of mem2reg will have the same number of incoming + // operands so far. Remember this count. + unsigned NewPHINumOperands = APN->getNumOperands(); + + unsigned NumEdges = std::count(succ_begin(Pred), succ_end(Pred), BB); + assert(NumEdges && "Must be at least one edge from Pred to BB!"); + + // Add entries for all the phis. + BasicBlock::iterator PNI = BB->begin(); + do { + unsigned AllocaNo = PhiToAllocaMap[APN]; + + // Add N incoming values to the PHI node. + for (unsigned i = 0; i != NumEdges; ++i) + APN->addIncoming(IncomingVals[AllocaNo], Pred); + + // The currently active variable for this block is now the PHI. + IncomingVals[AllocaNo] = APN; + if (DbgDeclareInst *DDI = AllocaDbgDeclares[AllocaNo]) + ConvertDebugDeclareToDebugValue(DDI, APN, DIB); + + // Get the next phi node. + ++PNI; + APN = dyn_cast<PHINode>(PNI); + if (!APN) + break; + + // Verify that it is missing entries. If not, it is not being inserted + // by this mem2reg invocation so we want to ignore it. + } while (APN->getNumOperands() == NewPHINumOperands); + } + } + + // Don't revisit blocks. + if (!Visited.insert(BB).second) + return; + + for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II);) { + Instruction *I = &*II++; // get the instruction, increment iterator + + if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand()); + if (!Src) + continue; + + DenseMap<AllocaInst *, unsigned>::iterator AI = AllocaLookup.find(Src); + if (AI == AllocaLookup.end()) + continue; + + Value *V = IncomingVals[AI->second]; + + // Anything using the load now uses the current value. + LI->replaceAllUsesWith(V); + if (AST && LI->getType()->isPointerTy()) + AST->deleteValue(LI); + BB->getInstList().erase(LI); + } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { + // Delete this instruction and mark the name as the current holder of the + // value + AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand()); + if (!Dest) + continue; + + DenseMap<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest); + if (ai == AllocaLookup.end()) + continue; + + // what value were we writing? + IncomingVals[ai->second] = SI->getOperand(0); + // Record debuginfo for the store before removing it. + if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second]) + ConvertDebugDeclareToDebugValue(DDI, SI, DIB); + BB->getInstList().erase(SI); + } + } + + // 'Recurse' to our successors. + succ_iterator I = succ_begin(BB), E = succ_end(BB); + if (I == E) + return; + + // Keep track of the successors so we don't visit the same successor twice + SmallPtrSet<BasicBlock *, 8> VisitedSuccs; + + // Handle the first successor without using the worklist. + VisitedSuccs.insert(*I); + Pred = BB; + BB = *I; + ++I; + + for (; I != E; ++I) + if (VisitedSuccs.insert(*I).second) + Worklist.emplace_back(*I, Pred, IncomingVals); + + goto NextIteration; +} + +void llvm::PromoteMemToReg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT, + AliasSetTracker *AST, AssumptionCache *AC) { + // If there is nothing to do, bail out... + if (Allocas.empty()) + return; + + PromoteMem2Reg(Allocas, DT, AST, AC).run(); +} diff --git a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp new file mode 100644 index 000000000000..8e93ee757a15 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp @@ -0,0 +1,486 @@ +//===- SSAUpdater.cpp - Unstructured SSA Update Tool ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SSAUpdater class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/TinyPtrVector.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/SSAUpdaterImpl.h" + +using namespace llvm; + +#define DEBUG_TYPE "ssaupdater" + +typedef DenseMap<BasicBlock*, Value*> AvailableValsTy; +static AvailableValsTy &getAvailableVals(void *AV) { + return *static_cast<AvailableValsTy*>(AV); +} + +SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI) + : AV(nullptr), ProtoType(nullptr), ProtoName(), InsertedPHIs(NewPHI) {} + +SSAUpdater::~SSAUpdater() { + delete static_cast<AvailableValsTy*>(AV); +} + +void SSAUpdater::Initialize(Type *Ty, StringRef Name) { + if (!AV) + AV = new AvailableValsTy(); + else + getAvailableVals(AV).clear(); + ProtoType = Ty; + ProtoName = Name; +} + +bool SSAUpdater::HasValueForBlock(BasicBlock *BB) const { + return getAvailableVals(AV).count(BB); +} + +void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) { + assert(ProtoType && "Need to initialize SSAUpdater"); + assert(ProtoType == V->getType() && + "All rewritten values must have the same type"); + getAvailableVals(AV)[BB] = V; +} + +static bool IsEquivalentPHI(PHINode *PHI, + SmallDenseMap<BasicBlock*, Value*, 8> &ValueMapping) { + unsigned PHINumValues = PHI->getNumIncomingValues(); + if (PHINumValues != ValueMapping.size()) + return false; + + // Scan the phi to see if it matches. + for (unsigned i = 0, e = PHINumValues; i != e; ++i) + if (ValueMapping[PHI->getIncomingBlock(i)] != + PHI->getIncomingValue(i)) { + return false; + } + + return true; +} + +Value *SSAUpdater::GetValueAtEndOfBlock(BasicBlock *BB) { + Value *Res = GetValueAtEndOfBlockInternal(BB); + return Res; +} + +Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { + // If there is no definition of the renamed variable in this block, just use + // GetValueAtEndOfBlock to do our work. + if (!HasValueForBlock(BB)) + return GetValueAtEndOfBlock(BB); + + // Otherwise, we have the hard case. Get the live-in values for each + // predecessor. + SmallVector<std::pair<BasicBlock*, Value*>, 8> PredValues; + Value *SingularValue = nullptr; + + // We can get our predecessor info by walking the pred_iterator list, but it + // is relatively slow. If we already have PHI nodes in this block, walk one + // of them to get the predecessor list instead. + if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) { + for (unsigned i = 0, e = SomePhi->getNumIncomingValues(); i != e; ++i) { + BasicBlock *PredBB = SomePhi->getIncomingBlock(i); + Value *PredVal = GetValueAtEndOfBlock(PredBB); + PredValues.push_back(std::make_pair(PredBB, PredVal)); + + // Compute SingularValue. + if (i == 0) + SingularValue = PredVal; + else if (PredVal != SingularValue) + SingularValue = nullptr; + } + } else { + bool isFirstPred = true; + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + BasicBlock *PredBB = *PI; + Value *PredVal = GetValueAtEndOfBlock(PredBB); + PredValues.push_back(std::make_pair(PredBB, PredVal)); + + // Compute SingularValue. + if (isFirstPred) { + SingularValue = PredVal; + isFirstPred = false; + } else if (PredVal != SingularValue) + SingularValue = nullptr; + } + } + + // If there are no predecessors, just return undef. + if (PredValues.empty()) + return UndefValue::get(ProtoType); + + // Otherwise, if all the merged values are the same, just use it. + if (SingularValue) + return SingularValue; + + // Otherwise, we do need a PHI: check to see if we already have one available + // in this block that produces the right value. + if (isa<PHINode>(BB->begin())) { + SmallDenseMap<BasicBlock*, Value*, 8> ValueMapping(PredValues.begin(), + PredValues.end()); + PHINode *SomePHI; + for (BasicBlock::iterator It = BB->begin(); + (SomePHI = dyn_cast<PHINode>(It)); ++It) { + if (IsEquivalentPHI(SomePHI, ValueMapping)) + return SomePHI; + } + } + + // Ok, we have no way out, insert a new one now. + PHINode *InsertedPHI = PHINode::Create(ProtoType, PredValues.size(), + ProtoName, &BB->front()); + + // Fill in all the predecessors of the PHI. + for (const auto &PredValue : PredValues) + InsertedPHI->addIncoming(PredValue.second, PredValue.first); + + // See if the PHI node can be merged to a single value. This can happen in + // loop cases when we get a PHI of itself and one other value. + if (Value *V = + SimplifyInstruction(InsertedPHI, BB->getModule()->getDataLayout())) { + InsertedPHI->eraseFromParent(); + return V; + } + + // Set the DebugLoc of the inserted PHI, if available. + DebugLoc DL; + if (const Instruction *I = BB->getFirstNonPHI()) + DL = I->getDebugLoc(); + InsertedPHI->setDebugLoc(DL); + + // If the client wants to know about all new instructions, tell it. + if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); + + DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n"); + return InsertedPHI; +} + +void SSAUpdater::RewriteUse(Use &U) { + Instruction *User = cast<Instruction>(U.getUser()); + + Value *V; + if (PHINode *UserPN = dyn_cast<PHINode>(User)) + V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U)); + else + V = GetValueInMiddleOfBlock(User->getParent()); + + // Notify that users of the existing value that it is being replaced. + Value *OldVal = U.get(); + if (OldVal != V && OldVal->hasValueHandle()) + ValueHandleBase::ValueIsRAUWd(OldVal, V); + + U.set(V); +} + +void SSAUpdater::RewriteUseAfterInsertions(Use &U) { + Instruction *User = cast<Instruction>(U.getUser()); + + Value *V; + if (PHINode *UserPN = dyn_cast<PHINode>(User)) + V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U)); + else + V = GetValueAtEndOfBlock(User->getParent()); + + U.set(V); +} + +namespace llvm { +template<> +class SSAUpdaterTraits<SSAUpdater> { +public: + typedef BasicBlock BlkT; + typedef Value *ValT; + typedef PHINode PhiT; + + typedef succ_iterator BlkSucc_iterator; + static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return succ_begin(BB); } + static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return succ_end(BB); } + + class PHI_iterator { + private: + PHINode *PHI; + unsigned idx; + + public: + explicit PHI_iterator(PHINode *P) // begin iterator + : PHI(P), idx(0) {} + PHI_iterator(PHINode *P, bool) // end iterator + : PHI(P), idx(PHI->getNumIncomingValues()) {} + + PHI_iterator &operator++() { ++idx; return *this; } + bool operator==(const PHI_iterator& x) const { return idx == x.idx; } + bool operator!=(const PHI_iterator& x) const { return !operator==(x); } + Value *getIncomingValue() { return PHI->getIncomingValue(idx); } + BasicBlock *getIncomingBlock() { return PHI->getIncomingBlock(idx); } + }; + + static PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); } + static PHI_iterator PHI_end(PhiT *PHI) { + return PHI_iterator(PHI, true); + } + + /// FindPredecessorBlocks - Put the predecessors of Info->BB into the Preds + /// vector, set Info->NumPreds, and allocate space in Info->Preds. + static void FindPredecessorBlocks(BasicBlock *BB, + SmallVectorImpl<BasicBlock*> *Preds) { + // We can get our predecessor info by walking the pred_iterator list, + // but it is relatively slow. If we already have PHI nodes in this + // block, walk one of them to get the predecessor list instead. + if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) { + Preds->append(SomePhi->block_begin(), SomePhi->block_end()); + } else { + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + Preds->push_back(*PI); + } + } + + /// GetUndefVal - Get an undefined value of the same type as the value + /// being handled. + static Value *GetUndefVal(BasicBlock *BB, SSAUpdater *Updater) { + return UndefValue::get(Updater->ProtoType); + } + + /// CreateEmptyPHI - Create a new PHI instruction in the specified block. + /// Reserve space for the operands but do not fill them in yet. + static Value *CreateEmptyPHI(BasicBlock *BB, unsigned NumPreds, + SSAUpdater *Updater) { + PHINode *PHI = PHINode::Create(Updater->ProtoType, NumPreds, + Updater->ProtoName, &BB->front()); + return PHI; + } + + /// AddPHIOperand - Add the specified value as an operand of the PHI for + /// the specified predecessor block. + static void AddPHIOperand(PHINode *PHI, Value *Val, BasicBlock *Pred) { + PHI->addIncoming(Val, Pred); + } + + /// InstrIsPHI - Check if an instruction is a PHI. + /// + static PHINode *InstrIsPHI(Instruction *I) { + return dyn_cast<PHINode>(I); + } + + /// ValueIsPHI - Check if a value is a PHI. + /// + static PHINode *ValueIsPHI(Value *Val, SSAUpdater *Updater) { + return dyn_cast<PHINode>(Val); + } + + /// ValueIsNewPHI - Like ValueIsPHI but also check if the PHI has no source + /// operands, i.e., it was just added. + static PHINode *ValueIsNewPHI(Value *Val, SSAUpdater *Updater) { + PHINode *PHI = ValueIsPHI(Val, Updater); + if (PHI && PHI->getNumIncomingValues() == 0) + return PHI; + return nullptr; + } + + /// GetPHIValue - For the specified PHI instruction, return the value + /// that it defines. + static Value *GetPHIValue(PHINode *PHI) { + return PHI; + } +}; + +} // End llvm namespace + +/// Check to see if AvailableVals has an entry for the specified BB and if so, +/// return it. If not, construct SSA form by first calculating the required +/// placement of PHIs and then inserting new PHIs where needed. +Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { + AvailableValsTy &AvailableVals = getAvailableVals(AV); + if (Value *V = AvailableVals[BB]) + return V; + + SSAUpdaterImpl<SSAUpdater> Impl(this, &AvailableVals, InsertedPHIs); + return Impl.GetValue(BB); +} + +//===----------------------------------------------------------------------===// +// LoadAndStorePromoter Implementation +//===----------------------------------------------------------------------===// + +LoadAndStorePromoter:: +LoadAndStorePromoter(ArrayRef<const Instruction*> Insts, + SSAUpdater &S, StringRef BaseName) : SSA(S) { + if (Insts.empty()) return; + + const Value *SomeVal; + if (const LoadInst *LI = dyn_cast<LoadInst>(Insts[0])) + SomeVal = LI; + else + SomeVal = cast<StoreInst>(Insts[0])->getOperand(0); + + if (BaseName.empty()) + BaseName = SomeVal->getName(); + SSA.Initialize(SomeVal->getType(), BaseName); +} + + +void LoadAndStorePromoter:: +run(const SmallVectorImpl<Instruction*> &Insts) const { + + // First step: bucket up uses of the alloca by the block they occur in. + // This is important because we have to handle multiple defs/uses in a block + // ourselves: SSAUpdater is purely for cross-block references. + DenseMap<BasicBlock*, TinyPtrVector<Instruction*> > UsesByBlock; + + for (Instruction *User : Insts) + UsesByBlock[User->getParent()].push_back(User); + + // Okay, now we can iterate over all the blocks in the function with uses, + // processing them. Keep track of which loads are loading a live-in value. + // Walk the uses in the use-list order to be determinstic. + SmallVector<LoadInst*, 32> LiveInLoads; + DenseMap<Value*, Value*> ReplacedLoads; + + for (Instruction *User : Insts) { + BasicBlock *BB = User->getParent(); + TinyPtrVector<Instruction*> &BlockUses = UsesByBlock[BB]; + + // If this block has already been processed, ignore this repeat use. + if (BlockUses.empty()) continue; + + // Okay, this is the first use in the block. If this block just has a + // single user in it, we can rewrite it trivially. + if (BlockUses.size() == 1) { + // If it is a store, it is a trivial def of the value in the block. + if (StoreInst *SI = dyn_cast<StoreInst>(User)) { + updateDebugInfo(SI); + SSA.AddAvailableValue(BB, SI->getOperand(0)); + } else + // Otherwise it is a load, queue it to rewrite as a live-in load. + LiveInLoads.push_back(cast<LoadInst>(User)); + BlockUses.clear(); + continue; + } + + // Otherwise, check to see if this block is all loads. + bool HasStore = false; + for (Instruction *I : BlockUses) { + if (isa<StoreInst>(I)) { + HasStore = true; + break; + } + } + + // If so, we can queue them all as live in loads. We don't have an + // efficient way to tell which on is first in the block and don't want to + // scan large blocks, so just add all loads as live ins. + if (!HasStore) { + for (Instruction *I : BlockUses) + LiveInLoads.push_back(cast<LoadInst>(I)); + BlockUses.clear(); + continue; + } + + // Otherwise, we have mixed loads and stores (or just a bunch of stores). + // Since SSAUpdater is purely for cross-block values, we need to determine + // the order of these instructions in the block. If the first use in the + // block is a load, then it uses the live in value. The last store defines + // the live out value. We handle this by doing a linear scan of the block. + Value *StoredValue = nullptr; + for (Instruction &I : *BB) { + if (LoadInst *L = dyn_cast<LoadInst>(&I)) { + // If this is a load from an unrelated pointer, ignore it. + if (!isInstInList(L, Insts)) continue; + + // If we haven't seen a store yet, this is a live in use, otherwise + // use the stored value. + if (StoredValue) { + replaceLoadWithValue(L, StoredValue); + L->replaceAllUsesWith(StoredValue); + ReplacedLoads[L] = StoredValue; + } else { + LiveInLoads.push_back(L); + } + continue; + } + + if (StoreInst *SI = dyn_cast<StoreInst>(&I)) { + // If this is a store to an unrelated pointer, ignore it. + if (!isInstInList(SI, Insts)) continue; + updateDebugInfo(SI); + + // Remember that this is the active value in the block. + StoredValue = SI->getOperand(0); + } + } + + // The last stored value that happened is the live-out for the block. + assert(StoredValue && "Already checked that there is a store in block"); + SSA.AddAvailableValue(BB, StoredValue); + BlockUses.clear(); + } + + // Okay, now we rewrite all loads that use live-in values in the loop, + // inserting PHI nodes as necessary. + for (LoadInst *ALoad : LiveInLoads) { + Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent()); + replaceLoadWithValue(ALoad, NewVal); + + // Avoid assertions in unreachable code. + if (NewVal == ALoad) NewVal = UndefValue::get(NewVal->getType()); + ALoad->replaceAllUsesWith(NewVal); + ReplacedLoads[ALoad] = NewVal; + } + + // Allow the client to do stuff before we start nuking things. + doExtraRewritesBeforeFinalDeletion(); + + // Now that everything is rewritten, delete the old instructions from the + // function. They should all be dead now. + for (Instruction *User : Insts) { + // If this is a load that still has uses, then the load must have been added + // as a live value in the SSAUpdate data structure for a block (e.g. because + // the loaded value was stored later). In this case, we need to recursively + // propagate the updates until we get to the real value. + if (!User->use_empty()) { + Value *NewVal = ReplacedLoads[User]; + assert(NewVal && "not a replaced load?"); + + // Propagate down to the ultimate replacee. The intermediately loads + // could theoretically already have been deleted, so we don't want to + // dereference the Value*'s. + DenseMap<Value*, Value*>::iterator RLI = ReplacedLoads.find(NewVal); + while (RLI != ReplacedLoads.end()) { + NewVal = RLI->second; + RLI = ReplacedLoads.find(NewVal); + } + + replaceLoadWithValue(cast<LoadInst>(User), NewVal); + User->replaceAllUsesWith(NewVal); + } + + instructionDeleted(User); + User->eraseFromParent(); + } +} + +bool +LoadAndStorePromoter::isInstInList(Instruction *I, + const SmallVectorImpl<Instruction*> &Insts) + const { + return is_contained(Insts, I); +} diff --git a/contrib/llvm/lib/Transforms/Utils/SanitizerStats.cpp b/contrib/llvm/lib/Transforms/Utils/SanitizerStats.cpp new file mode 100644 index 000000000000..9afd175c10ed --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/SanitizerStats.cpp @@ -0,0 +1,108 @@ +//===- SanitizerStats.cpp - Sanitizer statistics gathering ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements code generation for sanitizer statistics gathering. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/SanitizerStats.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" +#include "llvm/ADT/Triple.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Module.h" + +using namespace llvm; + +SanitizerStatReport::SanitizerStatReport(Module *M) : M(M) { + StatTy = ArrayType::get(Type::getInt8PtrTy(M->getContext()), 2); + EmptyModuleStatsTy = makeModuleStatsTy(); + + ModuleStatsGV = new GlobalVariable(*M, EmptyModuleStatsTy, false, + GlobalValue::InternalLinkage, nullptr); +} + +ArrayType *SanitizerStatReport::makeModuleStatsArrayTy() { + return ArrayType::get(StatTy, Inits.size()); +} + +StructType *SanitizerStatReport::makeModuleStatsTy() { + return StructType::get(M->getContext(), {Type::getInt8PtrTy(M->getContext()), + Type::getInt32Ty(M->getContext()), + makeModuleStatsArrayTy()}); +} + +void SanitizerStatReport::create(IRBuilder<> &B, SanitizerStatKind SK) { + Function *F = B.GetInsertBlock()->getParent(); + Module *M = F->getParent(); + PointerType *Int8PtrTy = B.getInt8PtrTy(); + IntegerType *IntPtrTy = B.getIntPtrTy(M->getDataLayout()); + ArrayType *StatTy = ArrayType::get(Int8PtrTy, 2); + + Inits.push_back(ConstantArray::get( + StatTy, + {Constant::getNullValue(Int8PtrTy), + ConstantExpr::getIntToPtr( + ConstantInt::get(IntPtrTy, uint64_t(SK) << (IntPtrTy->getBitWidth() - + kSanitizerStatKindBits)), + Int8PtrTy)})); + + FunctionType *StatReportTy = + FunctionType::get(B.getVoidTy(), Int8PtrTy, false); + Constant *StatReport = M->getOrInsertFunction( + "__sanitizer_stat_report", StatReportTy); + + auto InitAddr = ConstantExpr::getGetElementPtr( + EmptyModuleStatsTy, ModuleStatsGV, + ArrayRef<Constant *>{ + ConstantInt::get(IntPtrTy, 0), ConstantInt::get(B.getInt32Ty(), 2), + ConstantInt::get(IntPtrTy, Inits.size() - 1), + }); + B.CreateCall(StatReport, ConstantExpr::getBitCast(InitAddr, Int8PtrTy)); +} + +void SanitizerStatReport::finish() { + if (Inits.empty()) { + ModuleStatsGV->eraseFromParent(); + return; + } + + PointerType *Int8PtrTy = Type::getInt8PtrTy(M->getContext()); + IntegerType *Int32Ty = Type::getInt32Ty(M->getContext()); + Type *VoidTy = Type::getVoidTy(M->getContext()); + + // Create a new ModuleStatsGV to replace the old one. We can't just set the + // old one's initializer because its type is different. + auto NewModuleStatsGV = new GlobalVariable( + *M, makeModuleStatsTy(), false, GlobalValue::InternalLinkage, + ConstantStruct::getAnon( + {Constant::getNullValue(Int8PtrTy), + ConstantInt::get(Int32Ty, Inits.size()), + ConstantArray::get(makeModuleStatsArrayTy(), Inits)})); + ModuleStatsGV->replaceAllUsesWith( + ConstantExpr::getBitCast(NewModuleStatsGV, ModuleStatsGV->getType())); + ModuleStatsGV->eraseFromParent(); + + // Create a global constructor to register NewModuleStatsGV. + auto F = Function::Create(FunctionType::get(VoidTy, false), + GlobalValue::InternalLinkage, "", M); + auto BB = BasicBlock::Create(M->getContext(), "", F); + IRBuilder<> B(BB); + + FunctionType *StatInitTy = FunctionType::get(VoidTy, Int8PtrTy, false); + Constant *StatInit = M->getOrInsertFunction( + "__sanitizer_stat_init", StatInitTy); + + B.CreateCall(StatInit, ConstantExpr::getBitCast(NewModuleStatsGV, Int8PtrTy)); + B.CreateRetVoid(); + + appendToGlobalCtors(*M, F, 0); +} diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp new file mode 100644 index 000000000000..54390e77bb1f --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -0,0 +1,6012 @@ +//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Peephole optimize the CFG. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/EHPersonalities.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/ConstantRange.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/NoFolder.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/ValueMapper.h" +#include <algorithm> +#include <cassert> +#include <climits> +#include <cstddef> +#include <cstdint> +#include <iterator> +#include <map> +#include <set> +#include <utility> +#include <vector> + +using namespace llvm; +using namespace PatternMatch; + +#define DEBUG_TYPE "simplifycfg" + +// Chosen as 2 so as to be cheap, but still to have enough power to fold +// a select, so the "clamp" idiom (of a min followed by a max) will be caught. +// To catch this, we need to fold a compare and a select, hence '2' being the +// minimum reasonable default. +static cl::opt<unsigned> PHINodeFoldingThreshold( + "phi-node-folding-threshold", cl::Hidden, cl::init(2), + cl::desc( + "Control the amount of phi node folding to perform (default = 2)")); + +static cl::opt<bool> DupRet( + "simplifycfg-dup-ret", cl::Hidden, cl::init(false), + cl::desc("Duplicate return instructions into unconditional branches")); + +static cl::opt<bool> + SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), + cl::desc("Sink common instructions down to the end block")); + +static cl::opt<bool> HoistCondStores( + "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), + cl::desc("Hoist conditional stores if an unconditional store precedes")); + +static cl::opt<bool> MergeCondStores( + "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true), + cl::desc("Hoist conditional stores even if an unconditional store does not " + "precede - hoist multiple conditional stores into a single " + "predicated store")); + +static cl::opt<bool> MergeCondStoresAggressively( + "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false), + cl::desc("When merging conditional stores, do so even if the resultant " + "basic blocks are unlikely to be if-converted as a result")); + +static cl::opt<bool> SpeculateOneExpensiveInst( + "speculate-one-expensive-inst", cl::Hidden, cl::init(true), + cl::desc("Allow exactly one expensive instruction to be speculatively " + "executed")); + +static cl::opt<unsigned> MaxSpeculationDepth( + "max-speculation-depth", cl::Hidden, cl::init(10), + cl::desc("Limit maximum recursion depth when calculating costs of " + "speculatively executed instructions")); + +STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); +STATISTIC(NumLinearMaps, + "Number of switch instructions turned into linear mapping"); +STATISTIC(NumLookupTables, + "Number of switch instructions turned into lookup tables"); +STATISTIC( + NumLookupTablesHoles, + "Number of switch instructions turned into lookup tables (holes checked)"); +STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares"); +STATISTIC(NumSinkCommons, + "Number of common instructions sunk down to the end block"); +STATISTIC(NumSpeculations, "Number of speculative executed instructions"); + +namespace { + +// The first field contains the value that the switch produces when a certain +// case group is selected, and the second field is a vector containing the +// cases composing the case group. +typedef SmallVector<std::pair<Constant *, SmallVector<ConstantInt *, 4>>, 2> + SwitchCaseResultVectorTy; +// The first field contains the phi node that generates a result of the switch +// and the second field contains the value generated for a certain case in the +// switch for that PHI. +typedef SmallVector<std::pair<PHINode *, Constant *>, 4> SwitchCaseResultsTy; + +/// ValueEqualityComparisonCase - Represents a case of a switch. +struct ValueEqualityComparisonCase { + ConstantInt *Value; + BasicBlock *Dest; + + ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest) + : Value(Value), Dest(Dest) {} + + bool operator<(ValueEqualityComparisonCase RHS) const { + // Comparing pointers is ok as we only rely on the order for uniquing. + return Value < RHS.Value; + } + + bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; } +}; + +class SimplifyCFGOpt { + const TargetTransformInfo &TTI; + const DataLayout &DL; + unsigned BonusInstThreshold; + AssumptionCache *AC; + SmallPtrSetImpl<BasicBlock *> *LoopHeaders; + Value *isValueEqualityComparison(TerminatorInst *TI); + BasicBlock *GetValueEqualityComparisonCases( + TerminatorInst *TI, std::vector<ValueEqualityComparisonCase> &Cases); + bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, + BasicBlock *Pred, + IRBuilder<> &Builder); + bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI, + IRBuilder<> &Builder); + + bool SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder); + bool SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder); + bool SimplifySingleResume(ResumeInst *RI); + bool SimplifyCommonResume(ResumeInst *RI); + bool SimplifyCleanupReturn(CleanupReturnInst *RI); + bool SimplifyUnreachable(UnreachableInst *UI); + bool SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder); + bool SimplifyIndirectBr(IndirectBrInst *IBI); + bool SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder); + bool SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder); + +public: + SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout &DL, + unsigned BonusInstThreshold, AssumptionCache *AC, + SmallPtrSetImpl<BasicBlock *> *LoopHeaders) + : TTI(TTI), DL(DL), BonusInstThreshold(BonusInstThreshold), AC(AC), + LoopHeaders(LoopHeaders) {} + + bool run(BasicBlock *BB); +}; + +} // end anonymous namespace + +/// Return true if it is safe to merge these two +/// terminator instructions together. +static bool +SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2, + SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) { + if (SI1 == SI2) + return false; // Can't merge with self! + + // It is not safe to merge these two switch instructions if they have a common + // successor, and if that successor has a PHI node, and if *that* PHI node has + // conflicting incoming values from the two switch blocks. + BasicBlock *SI1BB = SI1->getParent(); + BasicBlock *SI2BB = SI2->getParent(); + + SmallPtrSet<BasicBlock *, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB)); + bool Fail = false; + for (BasicBlock *Succ : successors(SI2BB)) + if (SI1Succs.count(Succ)) + for (BasicBlock::iterator BBI = Succ->begin(); isa<PHINode>(BBI); ++BBI) { + PHINode *PN = cast<PHINode>(BBI); + if (PN->getIncomingValueForBlock(SI1BB) != + PN->getIncomingValueForBlock(SI2BB)) { + if (FailBlocks) + FailBlocks->insert(Succ); + Fail = true; + } + } + + return !Fail; +} + +/// Return true if it is safe and profitable to merge these two terminator +/// instructions together, where SI1 is an unconditional branch. PhiNodes will +/// store all PHI nodes in common successors. +static bool +isProfitableToFoldUnconditional(BranchInst *SI1, BranchInst *SI2, + Instruction *Cond, + SmallVectorImpl<PHINode *> &PhiNodes) { + if (SI1 == SI2) + return false; // Can't merge with self! + assert(SI1->isUnconditional() && SI2->isConditional()); + + // We fold the unconditional branch if we can easily update all PHI nodes in + // common successors: + // 1> We have a constant incoming value for the conditional branch; + // 2> We have "Cond" as the incoming value for the unconditional branch; + // 3> SI2->getCondition() and Cond have same operands. + CmpInst *Ci2 = dyn_cast<CmpInst>(SI2->getCondition()); + if (!Ci2) + return false; + if (!(Cond->getOperand(0) == Ci2->getOperand(0) && + Cond->getOperand(1) == Ci2->getOperand(1)) && + !(Cond->getOperand(0) == Ci2->getOperand(1) && + Cond->getOperand(1) == Ci2->getOperand(0))) + return false; + + BasicBlock *SI1BB = SI1->getParent(); + BasicBlock *SI2BB = SI2->getParent(); + SmallPtrSet<BasicBlock *, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB)); + for (BasicBlock *Succ : successors(SI2BB)) + if (SI1Succs.count(Succ)) + for (BasicBlock::iterator BBI = Succ->begin(); isa<PHINode>(BBI); ++BBI) { + PHINode *PN = cast<PHINode>(BBI); + if (PN->getIncomingValueForBlock(SI1BB) != Cond || + !isa<ConstantInt>(PN->getIncomingValueForBlock(SI2BB))) + return false; + PhiNodes.push_back(PN); + } + return true; +} + +/// Update PHI nodes in Succ to indicate that there will now be entries in it +/// from the 'NewPred' block. The values that will be flowing into the PHI nodes +/// will be the same as those coming in from ExistPred, an existing predecessor +/// of Succ. +static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, + BasicBlock *ExistPred) { + if (!isa<PHINode>(Succ->begin())) + return; // Quick exit if nothing to do + + PHINode *PN; + for (BasicBlock::iterator I = Succ->begin(); (PN = dyn_cast<PHINode>(I)); ++I) + PN->addIncoming(PN->getIncomingValueForBlock(ExistPred), NewPred); +} + +/// Compute an abstract "cost" of speculating the given instruction, +/// which is assumed to be safe to speculate. TCC_Free means cheap, +/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively +/// expensive. +static unsigned ComputeSpeculationCost(const User *I, + const TargetTransformInfo &TTI) { + assert(isSafeToSpeculativelyExecute(I) && + "Instruction is not safe to speculatively execute!"); + return TTI.getUserCost(I); +} + +/// If we have a merge point of an "if condition" as accepted above, +/// return true if the specified value dominates the block. We +/// don't handle the true generality of domination here, just a special case +/// which works well enough for us. +/// +/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to +/// see if V (which must be an instruction) and its recursive operands +/// that do not dominate BB have a combined cost lower than CostRemaining and +/// are non-trapping. If both are true, the instruction is inserted into the +/// set and true is returned. +/// +/// The cost for most non-trapping instructions is defined as 1 except for +/// Select whose cost is 2. +/// +/// After this function returns, CostRemaining is decreased by the cost of +/// V plus its non-dominating operands. If that cost is greater than +/// CostRemaining, false is returned and CostRemaining is undefined. +static bool DominatesMergePoint(Value *V, BasicBlock *BB, + SmallPtrSetImpl<Instruction *> *AggressiveInsts, + unsigned &CostRemaining, + const TargetTransformInfo &TTI, + unsigned Depth = 0) { + // It is possible to hit a zero-cost cycle (phi/gep instructions for example), + // so limit the recursion depth. + // TODO: While this recursion limit does prevent pathological behavior, it + // would be better to track visited instructions to avoid cycles. + if (Depth == MaxSpeculationDepth) + return false; + + Instruction *I = dyn_cast<Instruction>(V); + if (!I) { + // Non-instructions all dominate instructions, but not all constantexprs + // can be executed unconditionally. + if (ConstantExpr *C = dyn_cast<ConstantExpr>(V)) + if (C->canTrap()) + return false; + return true; + } + BasicBlock *PBB = I->getParent(); + + // We don't want to allow weird loops that might have the "if condition" in + // the bottom of this block. + if (PBB == BB) + return false; + + // If this instruction is defined in a block that contains an unconditional + // branch to BB, then it must be in the 'conditional' part of the "if + // statement". If not, it definitely dominates the region. + BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator()); + if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB) + return true; + + // If we aren't allowing aggressive promotion anymore, then don't consider + // instructions in the 'if region'. + if (!AggressiveInsts) + return false; + + // If we have seen this instruction before, don't count it again. + if (AggressiveInsts->count(I)) + return true; + + // Okay, it looks like the instruction IS in the "condition". Check to + // see if it's a cheap instruction to unconditionally compute, and if it + // only uses stuff defined outside of the condition. If so, hoist it out. + if (!isSafeToSpeculativelyExecute(I)) + return false; + + unsigned Cost = ComputeSpeculationCost(I, TTI); + + // Allow exactly one instruction to be speculated regardless of its cost + // (as long as it is safe to do so). + // This is intended to flatten the CFG even if the instruction is a division + // or other expensive operation. The speculation of an expensive instruction + // is expected to be undone in CodeGenPrepare if the speculation has not + // enabled further IR optimizations. + if (Cost > CostRemaining && + (!SpeculateOneExpensiveInst || !AggressiveInsts->empty() || Depth > 0)) + return false; + + // Avoid unsigned wrap. + CostRemaining = (Cost > CostRemaining) ? 0 : CostRemaining - Cost; + + // Okay, we can only really hoist these out if their operands do + // not take us over the cost threshold. + for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) + if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, TTI, + Depth + 1)) + return false; + // Okay, it's safe to do this! Remember this instruction. + AggressiveInsts->insert(I); + return true; +} + +/// Extract ConstantInt from value, looking through IntToPtr +/// and PointerNullValue. Return NULL if value is not a constant int. +static ConstantInt *GetConstantInt(Value *V, const DataLayout &DL) { + // Normal constant int. + ConstantInt *CI = dyn_cast<ConstantInt>(V); + if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy()) + return CI; + + // This is some kind of pointer constant. Turn it into a pointer-sized + // ConstantInt if possible. + IntegerType *PtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType())); + + // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*). + if (isa<ConstantPointerNull>(V)) + return ConstantInt::get(PtrTy, 0); + + // IntToPtr const int. + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) + if (CE->getOpcode() == Instruction::IntToPtr) + if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) { + // The constant is very likely to have the right type already. + if (CI->getType() == PtrTy) + return CI; + else + return cast<ConstantInt>( + ConstantExpr::getIntegerCast(CI, PtrTy, /*isSigned=*/false)); + } + return nullptr; +} + +namespace { + +/// Given a chain of or (||) or and (&&) comparison of a value against a +/// constant, this will try to recover the information required for a switch +/// structure. +/// It will depth-first traverse the chain of comparison, seeking for patterns +/// like %a == 12 or %a < 4 and combine them to produce a set of integer +/// representing the different cases for the switch. +/// Note that if the chain is composed of '||' it will build the set of elements +/// that matches the comparisons (i.e. any of this value validate the chain) +/// while for a chain of '&&' it will build the set elements that make the test +/// fail. +struct ConstantComparesGatherer { + const DataLayout &DL; + Value *CompValue; /// Value found for the switch comparison + Value *Extra; /// Extra clause to be checked before the switch + SmallVector<ConstantInt *, 8> Vals; /// Set of integers to match in switch + unsigned UsedICmps; /// Number of comparisons matched in the and/or chain + + /// Construct and compute the result for the comparison instruction Cond + ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) + : DL(DL), CompValue(nullptr), Extra(nullptr), UsedICmps(0) { + gather(Cond); + } + + /// Prevent copy + ConstantComparesGatherer(const ConstantComparesGatherer &) = delete; + ConstantComparesGatherer & + operator=(const ConstantComparesGatherer &) = delete; + +private: + /// Try to set the current value used for the comparison, it succeeds only if + /// it wasn't set before or if the new value is the same as the old one + bool setValueOnce(Value *NewVal) { + if (CompValue && CompValue != NewVal) + return false; + CompValue = NewVal; + return (CompValue != nullptr); + } + + /// Try to match Instruction "I" as a comparison against a constant and + /// populates the array Vals with the set of values that match (or do not + /// match depending on isEQ). + /// Return false on failure. On success, the Value the comparison matched + /// against is placed in CompValue. + /// If CompValue is already set, the function is expected to fail if a match + /// is found but the value compared to is different. + bool matchInstruction(Instruction *I, bool isEQ) { + // If this is an icmp against a constant, handle this as one of the cases. + ICmpInst *ICI; + ConstantInt *C; + if (!((ICI = dyn_cast<ICmpInst>(I)) && + (C = GetConstantInt(I->getOperand(1), DL)))) { + return false; + } + + Value *RHSVal; + const APInt *RHSC; + + // Pattern match a special case + // (x & ~2^z) == y --> x == y || x == y|2^z + // This undoes a transformation done by instcombine to fuse 2 compares. + if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) { + + // It's a little bit hard to see why the following transformations are + // correct. Here is a CVC3 program to verify them for 64-bit values: + + /* + ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63); + x : BITVECTOR(64); + y : BITVECTOR(64); + z : BITVECTOR(64); + mask : BITVECTOR(64) = BVSHL(ONE, z); + QUERY( (y & ~mask = y) => + ((x & ~mask = y) <=> (x = y OR x = (y | mask))) + ); + QUERY( (y | mask = y) => + ((x | mask = y) <=> (x = y OR x = (y & ~mask))) + ); + */ + + // Please note that each pattern must be a dual implication (<--> or + // iff). One directional implication can create spurious matches. If the + // implication is only one-way, an unsatisfiable condition on the left + // side can imply a satisfiable condition on the right side. Dual + // implication ensures that satisfiable conditions are transformed to + // other satisfiable conditions and unsatisfiable conditions are + // transformed to other unsatisfiable conditions. + + // Here is a concrete example of a unsatisfiable condition on the left + // implying a satisfiable condition on the right: + // + // mask = (1 << z) + // (x & ~mask) == y --> (x == y || x == (y | mask)) + // + // Substituting y = 3, z = 0 yields: + // (x & -2) == 3 --> (x == 3 || x == 2) + + // Pattern match a special case: + /* + QUERY( (y & ~mask = y) => + ((x & ~mask = y) <=> (x = y OR x = (y | mask))) + ); + */ + if (match(ICI->getOperand(0), + m_And(m_Value(RHSVal), m_APInt(RHSC)))) { + APInt Mask = ~*RHSC; + if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) { + // If we already have a value for the switch, it has to match! + if (!setValueOnce(RHSVal)) + return false; + + Vals.push_back(C); + Vals.push_back( + ConstantInt::get(C->getContext(), + C->getValue() | Mask)); + UsedICmps++; + return true; + } + } + + // Pattern match a special case: + /* + QUERY( (y | mask = y) => + ((x | mask = y) <=> (x = y OR x = (y & ~mask))) + ); + */ + if (match(ICI->getOperand(0), + m_Or(m_Value(RHSVal), m_APInt(RHSC)))) { + APInt Mask = *RHSC; + if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) { + // If we already have a value for the switch, it has to match! + if (!setValueOnce(RHSVal)) + return false; + + Vals.push_back(C); + Vals.push_back(ConstantInt::get(C->getContext(), + C->getValue() & ~Mask)); + UsedICmps++; + return true; + } + } + + // If we already have a value for the switch, it has to match! + if (!setValueOnce(ICI->getOperand(0))) + return false; + + UsedICmps++; + Vals.push_back(C); + return ICI->getOperand(0); + } + + // If we have "x ult 3", for example, then we can add 0,1,2 to the set. + ConstantRange Span = ConstantRange::makeAllowedICmpRegion( + ICI->getPredicate(), C->getValue()); + + // Shift the range if the compare is fed by an add. This is the range + // compare idiom as emitted by instcombine. + Value *CandidateVal = I->getOperand(0); + if (match(I->getOperand(0), m_Add(m_Value(RHSVal), m_APInt(RHSC)))) { + Span = Span.subtract(*RHSC); + CandidateVal = RHSVal; + } + + // If this is an and/!= check, then we are looking to build the set of + // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into + // x != 0 && x != 1. + if (!isEQ) + Span = Span.inverse(); + + // If there are a ton of values, we don't want to make a ginormous switch. + if (Span.getSetSize().ugt(8) || Span.isEmptySet()) { + return false; + } + + // If we already have a value for the switch, it has to match! + if (!setValueOnce(CandidateVal)) + return false; + + // Add all values from the range to the set + for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp) + Vals.push_back(ConstantInt::get(I->getContext(), Tmp)); + + UsedICmps++; + return true; + } + + /// Given a potentially 'or'd or 'and'd together collection of icmp + /// eq/ne/lt/gt instructions that compare a value against a constant, extract + /// the value being compared, and stick the list constants into the Vals + /// vector. + /// One "Extra" case is allowed to differ from the other. + void gather(Value *V) { + Instruction *I = dyn_cast<Instruction>(V); + bool isEQ = (I->getOpcode() == Instruction::Or); + + // Keep a stack (SmallVector for efficiency) for depth-first traversal + SmallVector<Value *, 8> DFT; + SmallPtrSet<Value *, 8> Visited; + + // Initialize + Visited.insert(V); + DFT.push_back(V); + + while (!DFT.empty()) { + V = DFT.pop_back_val(); + + if (Instruction *I = dyn_cast<Instruction>(V)) { + // If it is a || (or && depending on isEQ), process the operands. + if (I->getOpcode() == (isEQ ? Instruction::Or : Instruction::And)) { + if (Visited.insert(I->getOperand(1)).second) + DFT.push_back(I->getOperand(1)); + if (Visited.insert(I->getOperand(0)).second) + DFT.push_back(I->getOperand(0)); + continue; + } + + // Try to match the current instruction + if (matchInstruction(I, isEQ)) + // Match succeed, continue the loop + continue; + } + + // One element of the sequence of || (or &&) could not be match as a + // comparison against the same value as the others. + // We allow only one "Extra" case to be checked before the switch + if (!Extra) { + Extra = V; + continue; + } + // Failed to parse a proper sequence, abort now + CompValue = nullptr; + break; + } + } +}; + +} // end anonymous namespace + +static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) { + Instruction *Cond = nullptr; + if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { + Cond = dyn_cast<Instruction>(SI->getCondition()); + } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { + if (BI->isConditional()) + Cond = dyn_cast<Instruction>(BI->getCondition()); + } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) { + Cond = dyn_cast<Instruction>(IBI->getAddress()); + } + + TI->eraseFromParent(); + if (Cond) + RecursivelyDeleteTriviallyDeadInstructions(Cond); +} + +/// Return true if the specified terminator checks +/// to see if a value is equal to constant integer value. +Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) { + Value *CV = nullptr; + if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { + // Do not permit merging of large switch instructions into their + // predecessors unless there is only one predecessor. + if (SI->getNumSuccessors() * std::distance(pred_begin(SI->getParent()), + pred_end(SI->getParent())) <= + 128) + CV = SI->getCondition(); + } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) + if (BI->isConditional() && BI->getCondition()->hasOneUse()) + if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) { + if (ICI->isEquality() && GetConstantInt(ICI->getOperand(1), DL)) + CV = ICI->getOperand(0); + } + + // Unwrap any lossless ptrtoint cast. + if (CV) { + if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) { + Value *Ptr = PTII->getPointerOperand(); + if (PTII->getType() == DL.getIntPtrType(Ptr->getType())) + CV = Ptr; + } + } + return CV; +} + +/// Given a value comparison instruction, +/// decode all of the 'cases' that it represents and return the 'default' block. +BasicBlock *SimplifyCFGOpt::GetValueEqualityComparisonCases( + TerminatorInst *TI, std::vector<ValueEqualityComparisonCase> &Cases) { + if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { + Cases.reserve(SI->getNumCases()); + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; + ++i) + Cases.push_back( + ValueEqualityComparisonCase(i.getCaseValue(), i.getCaseSuccessor())); + return SI->getDefaultDest(); + } + + BranchInst *BI = cast<BranchInst>(TI); + ICmpInst *ICI = cast<ICmpInst>(BI->getCondition()); + BasicBlock *Succ = BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_NE); + Cases.push_back(ValueEqualityComparisonCase( + GetConstantInt(ICI->getOperand(1), DL), Succ)); + return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ); +} + +/// Given a vector of bb/value pairs, remove any entries +/// in the list that match the specified block. +static void +EliminateBlockCases(BasicBlock *BB, + std::vector<ValueEqualityComparisonCase> &Cases) { + Cases.erase(std::remove(Cases.begin(), Cases.end(), BB), Cases.end()); +} + +/// Return true if there are any keys in C1 that exist in C2 as well. +static bool ValuesOverlap(std::vector<ValueEqualityComparisonCase> &C1, + std::vector<ValueEqualityComparisonCase> &C2) { + std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2; + + // Make V1 be smaller than V2. + if (V1->size() > V2->size()) + std::swap(V1, V2); + + if (V1->empty()) + return false; + if (V1->size() == 1) { + // Just scan V2. + ConstantInt *TheVal = (*V1)[0].Value; + for (unsigned i = 0, e = V2->size(); i != e; ++i) + if (TheVal == (*V2)[i].Value) + return true; + } + + // Otherwise, just sort both lists and compare element by element. + array_pod_sort(V1->begin(), V1->end()); + array_pod_sort(V2->begin(), V2->end()); + unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size(); + while (i1 != e1 && i2 != e2) { + if ((*V1)[i1].Value == (*V2)[i2].Value) + return true; + if ((*V1)[i1].Value < (*V2)[i2].Value) + ++i1; + else + ++i2; + } + return false; +} + +/// If TI is known to be a terminator instruction and its block is known to +/// only have a single predecessor block, check to see if that predecessor is +/// also a value comparison with the same value, and if that comparison +/// determines the outcome of this comparison. If so, simplify TI. This does a +/// very limited form of jump threading. +bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor( + TerminatorInst *TI, BasicBlock *Pred, IRBuilder<> &Builder) { + Value *PredVal = isValueEqualityComparison(Pred->getTerminator()); + if (!PredVal) + return false; // Not a value comparison in predecessor. + + Value *ThisVal = isValueEqualityComparison(TI); + assert(ThisVal && "This isn't a value comparison!!"); + if (ThisVal != PredVal) + return false; // Different predicates. + + // TODO: Preserve branch weight metadata, similarly to how + // FoldValueComparisonIntoPredecessors preserves it. + + // Find out information about when control will move from Pred to TI's block. + std::vector<ValueEqualityComparisonCase> PredCases; + BasicBlock *PredDef = + GetValueEqualityComparisonCases(Pred->getTerminator(), PredCases); + EliminateBlockCases(PredDef, PredCases); // Remove default from cases. + + // Find information about how control leaves this block. + std::vector<ValueEqualityComparisonCase> ThisCases; + BasicBlock *ThisDef = GetValueEqualityComparisonCases(TI, ThisCases); + EliminateBlockCases(ThisDef, ThisCases); // Remove default from cases. + + // If TI's block is the default block from Pred's comparison, potentially + // simplify TI based on this knowledge. + if (PredDef == TI->getParent()) { + // If we are here, we know that the value is none of those cases listed in + // PredCases. If there are any cases in ThisCases that are in PredCases, we + // can simplify TI. + if (!ValuesOverlap(PredCases, ThisCases)) + return false; + + if (isa<BranchInst>(TI)) { + // Okay, one of the successors of this condbr is dead. Convert it to a + // uncond br. + assert(ThisCases.size() == 1 && "Branch can only have one case!"); + // Insert the new branch. + Instruction *NI = Builder.CreateBr(ThisDef); + (void)NI; + + // Remove PHI node entries for the dead edge. + ThisCases[0].Dest->removePredecessor(TI->getParent()); + + DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() + << "Through successor TI: " << *TI << "Leaving: " << *NI + << "\n"); + + EraseTerminatorInstAndDCECond(TI); + return true; + } + + SwitchInst *SI = cast<SwitchInst>(TI); + // Okay, TI has cases that are statically dead, prune them away. + SmallPtrSet<Constant *, 16> DeadCases; + for (unsigned i = 0, e = PredCases.size(); i != e; ++i) + DeadCases.insert(PredCases[i].Value); + + DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() + << "Through successor TI: " << *TI); + + // Collect branch weights into a vector. + SmallVector<uint32_t, 8> Weights; + MDNode *MD = SI->getMetadata(LLVMContext::MD_prof); + bool HasWeight = MD && (MD->getNumOperands() == 2 + SI->getNumCases()); + if (HasWeight) + for (unsigned MD_i = 1, MD_e = MD->getNumOperands(); MD_i < MD_e; + ++MD_i) { + ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(MD_i)); + Weights.push_back(CI->getValue().getZExtValue()); + } + for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) { + --i; + if (DeadCases.count(i.getCaseValue())) { + if (HasWeight) { + std::swap(Weights[i.getCaseIndex() + 1], Weights.back()); + Weights.pop_back(); + } + i.getCaseSuccessor()->removePredecessor(TI->getParent()); + SI->removeCase(i); + } + } + if (HasWeight && Weights.size() >= 2) + SI->setMetadata(LLVMContext::MD_prof, + MDBuilder(SI->getParent()->getContext()) + .createBranchWeights(Weights)); + + DEBUG(dbgs() << "Leaving: " << *TI << "\n"); + return true; + } + + // Otherwise, TI's block must correspond to some matched value. Find out + // which value (or set of values) this is. + ConstantInt *TIV = nullptr; + BasicBlock *TIBB = TI->getParent(); + for (unsigned i = 0, e = PredCases.size(); i != e; ++i) + if (PredCases[i].Dest == TIBB) { + if (TIV) + return false; // Cannot handle multiple values coming to this block. + TIV = PredCases[i].Value; + } + assert(TIV && "No edge from pred to succ?"); + + // Okay, we found the one constant that our value can be if we get into TI's + // BB. Find out which successor will unconditionally be branched to. + BasicBlock *TheRealDest = nullptr; + for (unsigned i = 0, e = ThisCases.size(); i != e; ++i) + if (ThisCases[i].Value == TIV) { + TheRealDest = ThisCases[i].Dest; + break; + } + + // If not handled by any explicit cases, it is handled by the default case. + if (!TheRealDest) + TheRealDest = ThisDef; + + // Remove PHI node entries for dead edges. + BasicBlock *CheckEdge = TheRealDest; + for (BasicBlock *Succ : successors(TIBB)) + if (Succ != CheckEdge) + Succ->removePredecessor(TIBB); + else + CheckEdge = nullptr; + + // Insert the new branch. + Instruction *NI = Builder.CreateBr(TheRealDest); + (void)NI; + + DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() + << "Through successor TI: " << *TI << "Leaving: " << *NI + << "\n"); + + EraseTerminatorInstAndDCECond(TI); + return true; +} + +namespace { + +/// This class implements a stable ordering of constant +/// integers that does not depend on their address. This is important for +/// applications that sort ConstantInt's to ensure uniqueness. +struct ConstantIntOrdering { + bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const { + return LHS->getValue().ult(RHS->getValue()); + } +}; + +} // end anonymous namespace + +static int ConstantIntSortPredicate(ConstantInt *const *P1, + ConstantInt *const *P2) { + const ConstantInt *LHS = *P1; + const ConstantInt *RHS = *P2; + if (LHS == RHS) + return 0; + return LHS->getValue().ult(RHS->getValue()) ? 1 : -1; +} + +static inline bool HasBranchWeights(const Instruction *I) { + MDNode *ProfMD = I->getMetadata(LLVMContext::MD_prof); + if (ProfMD && ProfMD->getOperand(0)) + if (MDString *MDS = dyn_cast<MDString>(ProfMD->getOperand(0))) + return MDS->getString().equals("branch_weights"); + + return false; +} + +/// Get Weights of a given TerminatorInst, the default weight is at the front +/// of the vector. If TI is a conditional eq, we need to swap the branch-weight +/// metadata. +static void GetBranchWeights(TerminatorInst *TI, + SmallVectorImpl<uint64_t> &Weights) { + MDNode *MD = TI->getMetadata(LLVMContext::MD_prof); + assert(MD); + for (unsigned i = 1, e = MD->getNumOperands(); i < e; ++i) { + ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(i)); + Weights.push_back(CI->getValue().getZExtValue()); + } + + // If TI is a conditional eq, the default case is the false case, + // and the corresponding branch-weight data is at index 2. We swap the + // default weight to be the first entry. + if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { + assert(Weights.size() == 2); + ICmpInst *ICI = cast<ICmpInst>(BI->getCondition()); + if (ICI->getPredicate() == ICmpInst::ICMP_EQ) + std::swap(Weights.front(), Weights.back()); + } +} + +/// Keep halving the weights until all can fit in uint32_t. +static void FitWeights(MutableArrayRef<uint64_t> Weights) { + uint64_t Max = *std::max_element(Weights.begin(), Weights.end()); + if (Max > UINT_MAX) { + unsigned Offset = 32 - countLeadingZeros(Max); + for (uint64_t &I : Weights) + I >>= Offset; + } +} + +/// The specified terminator is a value equality comparison instruction +/// (either a switch or a branch on "X == c"). +/// See if any of the predecessors of the terminator block are value comparisons +/// on the same value. If so, and if safe to do so, fold them together. +bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, + IRBuilder<> &Builder) { + BasicBlock *BB = TI->getParent(); + Value *CV = isValueEqualityComparison(TI); // CondVal + assert(CV && "Not a comparison?"); + bool Changed = false; + + SmallVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB)); + while (!Preds.empty()) { + BasicBlock *Pred = Preds.pop_back_val(); + + // See if the predecessor is a comparison with the same value. + TerminatorInst *PTI = Pred->getTerminator(); + Value *PCV = isValueEqualityComparison(PTI); // PredCondVal + + if (PCV == CV && TI != PTI) { + SmallSetVector<BasicBlock*, 4> FailBlocks; + if (!SafeToMergeTerminators(TI, PTI, &FailBlocks)) { + for (auto *Succ : FailBlocks) { + std::vector<BasicBlock*> Blocks = { TI->getParent() }; + if (!SplitBlockPredecessors(Succ, Blocks, ".fold.split")) + return false; + } + } + + // Figure out which 'cases' to copy from SI to PSI. + std::vector<ValueEqualityComparisonCase> BBCases; + BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, BBCases); + + std::vector<ValueEqualityComparisonCase> PredCases; + BasicBlock *PredDefault = GetValueEqualityComparisonCases(PTI, PredCases); + + // Based on whether the default edge from PTI goes to BB or not, fill in + // PredCases and PredDefault with the new switch cases we would like to + // build. + SmallVector<BasicBlock *, 8> NewSuccessors; + + // Update the branch weight metadata along the way + SmallVector<uint64_t, 8> Weights; + bool PredHasWeights = HasBranchWeights(PTI); + bool SuccHasWeights = HasBranchWeights(TI); + + if (PredHasWeights) { + GetBranchWeights(PTI, Weights); + // branch-weight metadata is inconsistent here. + if (Weights.size() != 1 + PredCases.size()) + PredHasWeights = SuccHasWeights = false; + } else if (SuccHasWeights) + // If there are no predecessor weights but there are successor weights, + // populate Weights with 1, which will later be scaled to the sum of + // successor's weights + Weights.assign(1 + PredCases.size(), 1); + + SmallVector<uint64_t, 8> SuccWeights; + if (SuccHasWeights) { + GetBranchWeights(TI, SuccWeights); + // branch-weight metadata is inconsistent here. + if (SuccWeights.size() != 1 + BBCases.size()) + PredHasWeights = SuccHasWeights = false; + } else if (PredHasWeights) + SuccWeights.assign(1 + BBCases.size(), 1); + + if (PredDefault == BB) { + // If this is the default destination from PTI, only the edges in TI + // that don't occur in PTI, or that branch to BB will be activated. + std::set<ConstantInt *, ConstantIntOrdering> PTIHandled; + for (unsigned i = 0, e = PredCases.size(); i != e; ++i) + if (PredCases[i].Dest != BB) + PTIHandled.insert(PredCases[i].Value); + else { + // The default destination is BB, we don't need explicit targets. + std::swap(PredCases[i], PredCases.back()); + + if (PredHasWeights || SuccHasWeights) { + // Increase weight for the default case. + Weights[0] += Weights[i + 1]; + std::swap(Weights[i + 1], Weights.back()); + Weights.pop_back(); + } + + PredCases.pop_back(); + --i; + --e; + } + + // Reconstruct the new switch statement we will be building. + if (PredDefault != BBDefault) { + PredDefault->removePredecessor(Pred); + PredDefault = BBDefault; + NewSuccessors.push_back(BBDefault); + } + + unsigned CasesFromPred = Weights.size(); + uint64_t ValidTotalSuccWeight = 0; + for (unsigned i = 0, e = BBCases.size(); i != e; ++i) + if (!PTIHandled.count(BBCases[i].Value) && + BBCases[i].Dest != BBDefault) { + PredCases.push_back(BBCases[i]); + NewSuccessors.push_back(BBCases[i].Dest); + if (SuccHasWeights || PredHasWeights) { + // The default weight is at index 0, so weight for the ith case + // should be at index i+1. Scale the cases from successor by + // PredDefaultWeight (Weights[0]). + Weights.push_back(Weights[0] * SuccWeights[i + 1]); + ValidTotalSuccWeight += SuccWeights[i + 1]; + } + } + + if (SuccHasWeights || PredHasWeights) { + ValidTotalSuccWeight += SuccWeights[0]; + // Scale the cases from predecessor by ValidTotalSuccWeight. + for (unsigned i = 1; i < CasesFromPred; ++i) + Weights[i] *= ValidTotalSuccWeight; + // Scale the default weight by SuccDefaultWeight (SuccWeights[0]). + Weights[0] *= SuccWeights[0]; + } + } else { + // If this is not the default destination from PSI, only the edges + // in SI that occur in PSI with a destination of BB will be + // activated. + std::set<ConstantInt *, ConstantIntOrdering> PTIHandled; + std::map<ConstantInt *, uint64_t> WeightsForHandled; + for (unsigned i = 0, e = PredCases.size(); i != e; ++i) + if (PredCases[i].Dest == BB) { + PTIHandled.insert(PredCases[i].Value); + + if (PredHasWeights || SuccHasWeights) { + WeightsForHandled[PredCases[i].Value] = Weights[i + 1]; + std::swap(Weights[i + 1], Weights.back()); + Weights.pop_back(); + } + + std::swap(PredCases[i], PredCases.back()); + PredCases.pop_back(); + --i; + --e; + } + + // Okay, now we know which constants were sent to BB from the + // predecessor. Figure out where they will all go now. + for (unsigned i = 0, e = BBCases.size(); i != e; ++i) + if (PTIHandled.count(BBCases[i].Value)) { + // If this is one we are capable of getting... + if (PredHasWeights || SuccHasWeights) + Weights.push_back(WeightsForHandled[BBCases[i].Value]); + PredCases.push_back(BBCases[i]); + NewSuccessors.push_back(BBCases[i].Dest); + PTIHandled.erase( + BBCases[i].Value); // This constant is taken care of + } + + // If there are any constants vectored to BB that TI doesn't handle, + // they must go to the default destination of TI. + for (ConstantInt *I : PTIHandled) { + if (PredHasWeights || SuccHasWeights) + Weights.push_back(WeightsForHandled[I]); + PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault)); + NewSuccessors.push_back(BBDefault); + } + } + + // Okay, at this point, we know which new successor Pred will get. Make + // sure we update the number of entries in the PHI nodes for these + // successors. + for (BasicBlock *NewSuccessor : NewSuccessors) + AddPredecessorToBlock(NewSuccessor, Pred, BB); + + Builder.SetInsertPoint(PTI); + // Convert pointer to int before we switch. + if (CV->getType()->isPointerTy()) { + CV = Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), + "magicptr"); + } + + // Now that the successors are updated, create the new Switch instruction. + SwitchInst *NewSI = + Builder.CreateSwitch(CV, PredDefault, PredCases.size()); + NewSI->setDebugLoc(PTI->getDebugLoc()); + for (ValueEqualityComparisonCase &V : PredCases) + NewSI->addCase(V.Value, V.Dest); + + if (PredHasWeights || SuccHasWeights) { + // Halve the weights if any of them cannot fit in an uint32_t + FitWeights(Weights); + + SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end()); + + NewSI->setMetadata( + LLVMContext::MD_prof, + MDBuilder(BB->getContext()).createBranchWeights(MDWeights)); + } + + EraseTerminatorInstAndDCECond(PTI); + + // Okay, last check. If BB is still a successor of PSI, then we must + // have an infinite loop case. If so, add an infinitely looping block + // to handle the case to preserve the behavior of the code. + BasicBlock *InfLoopBlock = nullptr; + for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i) + if (NewSI->getSuccessor(i) == BB) { + if (!InfLoopBlock) { + // Insert it at the end of the function, because it's either code, + // or it won't matter if it's hot. :) + InfLoopBlock = BasicBlock::Create(BB->getContext(), "infloop", + BB->getParent()); + BranchInst::Create(InfLoopBlock, InfLoopBlock); + } + NewSI->setSuccessor(i, InfLoopBlock); + } + + Changed = true; + } + } + return Changed; +} + +// If we would need to insert a select that uses the value of this invoke +// (comments in HoistThenElseCodeToIf explain why we would need to do this), we +// can't hoist the invoke, as there is nowhere to put the select in this case. +static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, + Instruction *I1, Instruction *I2) { + for (BasicBlock *Succ : successors(BB1)) { + PHINode *PN; + for (BasicBlock::iterator BBI = Succ->begin(); + (PN = dyn_cast<PHINode>(BBI)); ++BBI) { + Value *BB1V = PN->getIncomingValueForBlock(BB1); + Value *BB2V = PN->getIncomingValueForBlock(BB2); + if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) { + return false; + } + } + } + return true; +} + +static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I); + +/// Given a conditional branch that goes to BB1 and BB2, hoist any common code +/// in the two blocks up into the branch block. The caller of this function +/// guarantees that BI's block dominates BB1 and BB2. +static bool HoistThenElseCodeToIf(BranchInst *BI, + const TargetTransformInfo &TTI) { + // This does very trivial matching, with limited scanning, to find identical + // instructions in the two blocks. In particular, we don't want to get into + // O(M*N) situations here where M and N are the sizes of BB1 and BB2. As + // such, we currently just scan for obviously identical instructions in an + // identical order. + BasicBlock *BB1 = BI->getSuccessor(0); // The true destination. + BasicBlock *BB2 = BI->getSuccessor(1); // The false destination + + BasicBlock::iterator BB1_Itr = BB1->begin(); + BasicBlock::iterator BB2_Itr = BB2->begin(); + + Instruction *I1 = &*BB1_Itr++, *I2 = &*BB2_Itr++; + // Skip debug info if it is not identical. + DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1); + DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2); + if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) { + while (isa<DbgInfoIntrinsic>(I1)) + I1 = &*BB1_Itr++; + while (isa<DbgInfoIntrinsic>(I2)) + I2 = &*BB2_Itr++; + } + if (isa<PHINode>(I1) || !I1->isIdenticalToWhenDefined(I2) || + (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))) + return false; + + BasicBlock *BIParent = BI->getParent(); + + bool Changed = false; + do { + // If we are hoisting the terminator instruction, don't move one (making a + // broken BB), instead clone it, and remove BI. + if (isa<TerminatorInst>(I1)) + goto HoistTerminator; + + if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2)) + return Changed; + + // For a normal instruction, we just move one to right before the branch, + // then replace all uses of the other with the first. Finally, we remove + // the now redundant second instruction. + BIParent->getInstList().splice(BI->getIterator(), BB1->getInstList(), I1); + if (!I2->use_empty()) + I2->replaceAllUsesWith(I1); + I1->andIRFlags(I2); + unsigned KnownIDs[] = {LLVMContext::MD_tbaa, + LLVMContext::MD_range, + LLVMContext::MD_fpmath, + LLVMContext::MD_invariant_load, + LLVMContext::MD_nonnull, + LLVMContext::MD_invariant_group, + LLVMContext::MD_align, + LLVMContext::MD_dereferenceable, + LLVMContext::MD_dereferenceable_or_null, + LLVMContext::MD_mem_parallel_loop_access}; + combineMetadata(I1, I2, KnownIDs); + + // If the debug loc for I1 and I2 are different, as we are combining them + // into one instruction, we do not want to select debug loc randomly from + // I1 or I2. + if (!isa<CallInst>(I1) && I1->getDebugLoc() != I2->getDebugLoc()) + I1->setDebugLoc( + DILocation::getMergedLocation(I1->getDebugLoc(), I2->getDebugLoc())); + + I2->eraseFromParent(); + Changed = true; + + I1 = &*BB1_Itr++; + I2 = &*BB2_Itr++; + // Skip debug info if it is not identical. + DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1); + DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2); + if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) { + while (isa<DbgInfoIntrinsic>(I1)) + I1 = &*BB1_Itr++; + while (isa<DbgInfoIntrinsic>(I2)) + I2 = &*BB2_Itr++; + } + } while (I1->isIdenticalToWhenDefined(I2)); + + return true; + +HoistTerminator: + // It may not be possible to hoist an invoke. + if (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)) + return Changed; + + for (BasicBlock *Succ : successors(BB1)) { + PHINode *PN; + for (BasicBlock::iterator BBI = Succ->begin(); + (PN = dyn_cast<PHINode>(BBI)); ++BBI) { + Value *BB1V = PN->getIncomingValueForBlock(BB1); + Value *BB2V = PN->getIncomingValueForBlock(BB2); + if (BB1V == BB2V) + continue; + + // Check for passingValueIsAlwaysUndefined here because we would rather + // eliminate undefined control flow then converting it to a select. + if (passingValueIsAlwaysUndefined(BB1V, PN) || + passingValueIsAlwaysUndefined(BB2V, PN)) + return Changed; + + if (isa<ConstantExpr>(BB1V) && !isSafeToSpeculativelyExecute(BB1V)) + return Changed; + if (isa<ConstantExpr>(BB2V) && !isSafeToSpeculativelyExecute(BB2V)) + return Changed; + } + } + + // Okay, it is safe to hoist the terminator. + Instruction *NT = I1->clone(); + BIParent->getInstList().insert(BI->getIterator(), NT); + if (!NT->getType()->isVoidTy()) { + I1->replaceAllUsesWith(NT); + I2->replaceAllUsesWith(NT); + NT->takeName(I1); + } + + IRBuilder<NoFolder> Builder(NT); + // Hoisting one of the terminators from our successor is a great thing. + // Unfortunately, the successors of the if/else blocks may have PHI nodes in + // them. If they do, all PHI entries for BB1/BB2 must agree for all PHI + // nodes, so we insert select instruction to compute the final result. + std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects; + for (BasicBlock *Succ : successors(BB1)) { + PHINode *PN; + for (BasicBlock::iterator BBI = Succ->begin(); + (PN = dyn_cast<PHINode>(BBI)); ++BBI) { + Value *BB1V = PN->getIncomingValueForBlock(BB1); + Value *BB2V = PN->getIncomingValueForBlock(BB2); + if (BB1V == BB2V) + continue; + + // These values do not agree. Insert a select instruction before NT + // that determines the right value. + SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)]; + if (!SI) + SI = cast<SelectInst>( + Builder.CreateSelect(BI->getCondition(), BB1V, BB2V, + BB1V->getName() + "." + BB2V->getName(), BI)); + + // Make the PHI node use the select for all incoming values for BB1/BB2 + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingBlock(i) == BB1 || PN->getIncomingBlock(i) == BB2) + PN->setIncomingValue(i, SI); + } + } + + // Update any PHI nodes in our new successors. + for (BasicBlock *Succ : successors(BB1)) + AddPredecessorToBlock(Succ, BIParent, BB1); + + EraseTerminatorInstAndDCECond(BI); + return true; +} + +// Is it legal to place a variable in operand \c OpIdx of \c I? +// FIXME: This should be promoted to Instruction. +static bool canReplaceOperandWithVariable(const Instruction *I, + unsigned OpIdx) { + // We can't have a PHI with a metadata type. + if (I->getOperand(OpIdx)->getType()->isMetadataTy()) + return false; + + // Early exit. + if (!isa<Constant>(I->getOperand(OpIdx))) + return true; + + switch (I->getOpcode()) { + default: + return true; + case Instruction::Call: + case Instruction::Invoke: + // FIXME: many arithmetic intrinsics have no issue taking a + // variable, however it's hard to distingish these from + // specials such as @llvm.frameaddress that require a constant. + if (isa<IntrinsicInst>(I)) + return false; + + // Constant bundle operands may need to retain their constant-ness for + // correctness. + if (ImmutableCallSite(I).isBundleOperand(OpIdx)) + return false; + + return true; + + case Instruction::ShuffleVector: + // Shufflevector masks are constant. + return OpIdx != 2; + case Instruction::ExtractValue: + case Instruction::InsertValue: + // All operands apart from the first are constant. + return OpIdx == 0; + case Instruction::Alloca: + return false; + case Instruction::GetElementPtr: + if (OpIdx == 0) + return true; + gep_type_iterator It = std::next(gep_type_begin(I), OpIdx - 1); + return It.isSequential(); + } +} + +// All instructions in Insts belong to different blocks that all unconditionally +// branch to a common successor. Analyze each instruction and return true if it +// would be possible to sink them into their successor, creating one common +// instruction instead. For every value that would be required to be provided by +// PHI node (because an operand varies in each input block), add to PHIOperands. +static bool canSinkInstructions( + ArrayRef<Instruction *> Insts, + DenseMap<Instruction *, SmallVector<Value *, 4>> &PHIOperands) { + // Prune out obviously bad instructions to move. Any non-store instruction + // must have exactly one use, and we check later that use is by a single, + // common PHI instruction in the successor. + for (auto *I : Insts) { + // These instructions may change or break semantics if moved. + if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) || + I->getType()->isTokenTy()) + return false; + // Everything must have only one use too, apart from stores which + // have no uses. + if (!isa<StoreInst>(I) && !I->hasOneUse()) + return false; + } + + const Instruction *I0 = Insts.front(); + for (auto *I : Insts) + if (!I->isSameOperationAs(I0)) + return false; + + // All instructions in Insts are known to be the same opcode. If they aren't + // stores, check the only user of each is a PHI or in the same block as the + // instruction, because if a user is in the same block as an instruction + // we're contemplating sinking, it must already be determined to be sinkable. + if (!isa<StoreInst>(I0)) { + auto *PNUse = dyn_cast<PHINode>(*I0->user_begin()); + auto *Succ = I0->getParent()->getTerminator()->getSuccessor(0); + if (!all_of(Insts, [&PNUse,&Succ](const Instruction *I) -> bool { + auto *U = cast<Instruction>(*I->user_begin()); + return (PNUse && + PNUse->getParent() == Succ && + PNUse->getIncomingValueForBlock(I->getParent()) == I) || + U->getParent() == I->getParent(); + })) + return false; + } + + for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) { + if (I0->getOperand(OI)->getType()->isTokenTy()) + // Don't touch any operand of token type. + return false; + + // Because SROA can't handle speculating stores of selects, try not + // to sink loads or stores of allocas when we'd have to create a PHI for + // the address operand. Also, because it is likely that loads or stores + // of allocas will disappear when Mem2Reg/SROA is run, don't sink them. + // This can cause code churn which can have unintended consequences down + // the line - see https://llvm.org/bugs/show_bug.cgi?id=30244. + // FIXME: This is a workaround for a deficiency in SROA - see + // https://llvm.org/bugs/show_bug.cgi?id=30188 + if (OI == 1 && isa<StoreInst>(I0) && + any_of(Insts, [](const Instruction *I) { + return isa<AllocaInst>(I->getOperand(1)); + })) + return false; + if (OI == 0 && isa<LoadInst>(I0) && any_of(Insts, [](const Instruction *I) { + return isa<AllocaInst>(I->getOperand(0)); + })) + return false; + + auto SameAsI0 = [&I0, OI](const Instruction *I) { + assert(I->getNumOperands() == I0->getNumOperands()); + return I->getOperand(OI) == I0->getOperand(OI); + }; + if (!all_of(Insts, SameAsI0)) { + if (!canReplaceOperandWithVariable(I0, OI)) + // We can't create a PHI from this GEP. + return false; + // Don't create indirect calls! The called value is the final operand. + if ((isa<CallInst>(I0) || isa<InvokeInst>(I0)) && OI == OE - 1) { + // FIXME: if the call was *already* indirect, we should do this. + return false; + } + for (auto *I : Insts) + PHIOperands[I].push_back(I->getOperand(OI)); + } + } + return true; +} + +// Assuming canSinkLastInstruction(Blocks) has returned true, sink the last +// instruction of every block in Blocks to their common successor, commoning +// into one instruction. +static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) { + auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0); + + // canSinkLastInstruction returning true guarantees that every block has at + // least one non-terminator instruction. + SmallVector<Instruction*,4> Insts; + for (auto *BB : Blocks) { + Instruction *I = BB->getTerminator(); + do { + I = I->getPrevNode(); + } while (isa<DbgInfoIntrinsic>(I) && I != &BB->front()); + if (!isa<DbgInfoIntrinsic>(I)) + Insts.push_back(I); + } + + // The only checking we need to do now is that all users of all instructions + // are the same PHI node. canSinkLastInstruction should have checked this but + // it is slightly over-aggressive - it gets confused by commutative instructions + // so double-check it here. + Instruction *I0 = Insts.front(); + if (!isa<StoreInst>(I0)) { + auto *PNUse = dyn_cast<PHINode>(*I0->user_begin()); + if (!all_of(Insts, [&PNUse](const Instruction *I) -> bool { + auto *U = cast<Instruction>(*I->user_begin()); + return U == PNUse; + })) + return false; + } + + // We don't need to do any more checking here; canSinkLastInstruction should + // have done it all for us. + SmallVector<Value*, 4> NewOperands; + for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) { + // This check is different to that in canSinkLastInstruction. There, we + // cared about the global view once simplifycfg (and instcombine) have + // completed - it takes into account PHIs that become trivially + // simplifiable. However here we need a more local view; if an operand + // differs we create a PHI and rely on instcombine to clean up the very + // small mess we may make. + bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) { + return I->getOperand(O) != I0->getOperand(O); + }); + if (!NeedPHI) { + NewOperands.push_back(I0->getOperand(O)); + continue; + } + + // Create a new PHI in the successor block and populate it. + auto *Op = I0->getOperand(O); + assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!"); + auto *PN = PHINode::Create(Op->getType(), Insts.size(), + Op->getName() + ".sink", &BBEnd->front()); + for (auto *I : Insts) + PN->addIncoming(I->getOperand(O), I->getParent()); + NewOperands.push_back(PN); + } + + // Arbitrarily use I0 as the new "common" instruction; remap its operands + // and move it to the start of the successor block. + for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) + I0->getOperandUse(O).set(NewOperands[O]); + I0->moveBefore(&*BBEnd->getFirstInsertionPt()); + + // The debug location for the "common" instruction is the merged locations of + // all the commoned instructions. We start with the original location of the + // "common" instruction and iteratively merge each location in the loop below. + DILocation *Loc = I0->getDebugLoc(); + + // Update metadata and IR flags, and merge debug locations. + for (auto *I : Insts) + if (I != I0) { + Loc = DILocation::getMergedLocation(Loc, I->getDebugLoc()); + combineMetadataForCSE(I0, I); + I0->andIRFlags(I); + } + if (!isa<CallInst>(I0)) + I0->setDebugLoc(Loc); + + if (!isa<StoreInst>(I0)) { + // canSinkLastInstruction checked that all instructions were used by + // one and only one PHI node. Find that now, RAUW it to our common + // instruction and nuke it. + assert(I0->hasOneUse()); + auto *PN = cast<PHINode>(*I0->user_begin()); + PN->replaceAllUsesWith(I0); + PN->eraseFromParent(); + } + + // Finally nuke all instructions apart from the common instruction. + for (auto *I : Insts) + if (I != I0) + I->eraseFromParent(); + + return true; +} + +namespace { + + // LockstepReverseIterator - Iterates through instructions + // in a set of blocks in reverse order from the first non-terminator. + // For example (assume all blocks have size n): + // LockstepReverseIterator I([B1, B2, B3]); + // *I-- = [B1[n], B2[n], B3[n]]; + // *I-- = [B1[n-1], B2[n-1], B3[n-1]]; + // *I-- = [B1[n-2], B2[n-2], B3[n-2]]; + // ... + class LockstepReverseIterator { + ArrayRef<BasicBlock*> Blocks; + SmallVector<Instruction*,4> Insts; + bool Fail; + public: + LockstepReverseIterator(ArrayRef<BasicBlock*> Blocks) : + Blocks(Blocks) { + reset(); + } + + void reset() { + Fail = false; + Insts.clear(); + for (auto *BB : Blocks) { + Instruction *Inst = BB->getTerminator(); + for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);) + Inst = Inst->getPrevNode(); + if (!Inst) { + // Block wasn't big enough. + Fail = true; + return; + } + Insts.push_back(Inst); + } + } + + bool isValid() const { + return !Fail; + } + + void operator -- () { + if (Fail) + return; + for (auto *&Inst : Insts) { + for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);) + Inst = Inst->getPrevNode(); + // Already at beginning of block. + if (!Inst) { + Fail = true; + return; + } + } + } + + ArrayRef<Instruction*> operator * () const { + return Insts; + } + }; + +} // end anonymous namespace + +/// Given an unconditional branch that goes to BBEnd, +/// check whether BBEnd has only two predecessors and the other predecessor +/// ends with an unconditional branch. If it is true, sink any common code +/// in the two predecessors to BBEnd. +static bool SinkThenElseCodeToEnd(BranchInst *BI1) { + assert(BI1->isUnconditional()); + BasicBlock *BBEnd = BI1->getSuccessor(0); + + // We support two situations: + // (1) all incoming arcs are unconditional + // (2) one incoming arc is conditional + // + // (2) is very common in switch defaults and + // else-if patterns; + // + // if (a) f(1); + // else if (b) f(2); + // + // produces: + // + // [if] + // / \ + // [f(1)] [if] + // | | \ + // | | \ + // | [f(2)]| + // \ | / + // [ end ] + // + // [end] has two unconditional predecessor arcs and one conditional. The + // conditional refers to the implicit empty 'else' arc. This conditional + // arc can also be caused by an empty default block in a switch. + // + // In this case, we attempt to sink code from all *unconditional* arcs. + // If we can sink instructions from these arcs (determined during the scan + // phase below) we insert a common successor for all unconditional arcs and + // connect that to [end], to enable sinking: + // + // [if] + // / \ + // [x(1)] [if] + // | | \ + // | | \ + // | [x(2)] | + // \ / | + // [sink.split] | + // \ / + // [ end ] + // + SmallVector<BasicBlock*,4> UnconditionalPreds; + Instruction *Cond = nullptr; + for (auto *B : predecessors(BBEnd)) { + auto *T = B->getTerminator(); + if (isa<BranchInst>(T) && cast<BranchInst>(T)->isUnconditional()) + UnconditionalPreds.push_back(B); + else if ((isa<BranchInst>(T) || isa<SwitchInst>(T)) && !Cond) + Cond = T; + else + return false; + } + if (UnconditionalPreds.size() < 2) + return false; + + bool Changed = false; + // We take a two-step approach to tail sinking. First we scan from the end of + // each block upwards in lockstep. If the n'th instruction from the end of each + // block can be sunk, those instructions are added to ValuesToSink and we + // carry on. If we can sink an instruction but need to PHI-merge some operands + // (because they're not identical in each instruction) we add these to + // PHIOperands. + unsigned ScanIdx = 0; + SmallPtrSet<Value*,4> InstructionsToSink; + DenseMap<Instruction*, SmallVector<Value*,4>> PHIOperands; + LockstepReverseIterator LRI(UnconditionalPreds); + while (LRI.isValid() && + canSinkInstructions(*LRI, PHIOperands)) { + DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0] << "\n"); + InstructionsToSink.insert((*LRI).begin(), (*LRI).end()); + ++ScanIdx; + --LRI; + } + + auto ProfitableToSinkInstruction = [&](LockstepReverseIterator &LRI) { + unsigned NumPHIdValues = 0; + for (auto *I : *LRI) + for (auto *V : PHIOperands[I]) + if (InstructionsToSink.count(V) == 0) + ++NumPHIdValues; + DEBUG(dbgs() << "SINK: #phid values: " << NumPHIdValues << "\n"); + unsigned NumPHIInsts = NumPHIdValues / UnconditionalPreds.size(); + if ((NumPHIdValues % UnconditionalPreds.size()) != 0) + NumPHIInsts++; + + return NumPHIInsts <= 1; + }; + + if (ScanIdx > 0 && Cond) { + // Check if we would actually sink anything first! This mutates the CFG and + // adds an extra block. The goal in doing this is to allow instructions that + // couldn't be sunk before to be sunk - obviously, speculatable instructions + // (such as trunc, add) can be sunk and predicated already. So we check that + // we're going to sink at least one non-speculatable instruction. + LRI.reset(); + unsigned Idx = 0; + bool Profitable = false; + while (ProfitableToSinkInstruction(LRI) && Idx < ScanIdx) { + if (!isSafeToSpeculativelyExecute((*LRI)[0])) { + Profitable = true; + break; + } + --LRI; + ++Idx; + } + if (!Profitable) + return false; + + DEBUG(dbgs() << "SINK: Splitting edge\n"); + // We have a conditional edge and we're going to sink some instructions. + // Insert a new block postdominating all blocks we're going to sink from. + if (!SplitBlockPredecessors(BI1->getSuccessor(0), UnconditionalPreds, + ".sink.split")) + // Edges couldn't be split. + return false; + Changed = true; + } + + // Now that we've analyzed all potential sinking candidates, perform the + // actual sink. We iteratively sink the last non-terminator of the source + // blocks into their common successor unless doing so would require too + // many PHI instructions to be generated (currently only one PHI is allowed + // per sunk instruction). + // + // We can use InstructionsToSink to discount values needing PHI-merging that will + // actually be sunk in a later iteration. This allows us to be more + // aggressive in what we sink. This does allow a false positive where we + // sink presuming a later value will also be sunk, but stop half way through + // and never actually sink it which means we produce more PHIs than intended. + // This is unlikely in practice though. + for (unsigned SinkIdx = 0; SinkIdx != ScanIdx; ++SinkIdx) { + DEBUG(dbgs() << "SINK: Sink: " + << *UnconditionalPreds[0]->getTerminator()->getPrevNode() + << "\n"); + + // Because we've sunk every instruction in turn, the current instruction to + // sink is always at index 0. + LRI.reset(); + if (!ProfitableToSinkInstruction(LRI)) { + // Too many PHIs would be created. + DEBUG(dbgs() << "SINK: stopping here, too many PHIs would be created!\n"); + break; + } + + if (!sinkLastInstruction(UnconditionalPreds)) + return Changed; + NumSinkCommons++; + Changed = true; + } + return Changed; +} + +/// \brief Determine if we can hoist sink a sole store instruction out of a +/// conditional block. +/// +/// We are looking for code like the following: +/// BrBB: +/// store i32 %add, i32* %arrayidx2 +/// ... // No other stores or function calls (we could be calling a memory +/// ... // function). +/// %cmp = icmp ult %x, %y +/// br i1 %cmp, label %EndBB, label %ThenBB +/// ThenBB: +/// store i32 %add5, i32* %arrayidx2 +/// br label EndBB +/// EndBB: +/// ... +/// We are going to transform this into: +/// BrBB: +/// store i32 %add, i32* %arrayidx2 +/// ... // +/// %cmp = icmp ult %x, %y +/// %add.add5 = select i1 %cmp, i32 %add, %add5 +/// store i32 %add.add5, i32* %arrayidx2 +/// ... +/// +/// \return The pointer to the value of the previous store if the store can be +/// hoisted into the predecessor block. 0 otherwise. +static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, + BasicBlock *StoreBB, BasicBlock *EndBB) { + StoreInst *StoreToHoist = dyn_cast<StoreInst>(I); + if (!StoreToHoist) + return nullptr; + + // Volatile or atomic. + if (!StoreToHoist->isSimple()) + return nullptr; + + Value *StorePtr = StoreToHoist->getPointerOperand(); + + // Look for a store to the same pointer in BrBB. + unsigned MaxNumInstToLookAt = 9; + for (Instruction &CurI : reverse(*BrBB)) { + if (!MaxNumInstToLookAt) + break; + // Skip debug info. + if (isa<DbgInfoIntrinsic>(CurI)) + continue; + --MaxNumInstToLookAt; + + // Could be calling an instruction that affects memory like free(). + if (CurI.mayHaveSideEffects() && !isa<StoreInst>(CurI)) + return nullptr; + + if (auto *SI = dyn_cast<StoreInst>(&CurI)) { + // Found the previous store make sure it stores to the same location. + if (SI->getPointerOperand() == StorePtr) + // Found the previous store, return its value operand. + return SI->getValueOperand(); + return nullptr; // Unknown store. + } + } + + return nullptr; +} + +/// \brief Speculate a conditional basic block flattening the CFG. +/// +/// Note that this is a very risky transform currently. Speculating +/// instructions like this is most often not desirable. Instead, there is an MI +/// pass which can do it with full awareness of the resource constraints. +/// However, some cases are "obvious" and we should do directly. An example of +/// this is speculating a single, reasonably cheap instruction. +/// +/// There is only one distinct advantage to flattening the CFG at the IR level: +/// it makes very common but simplistic optimizations such as are common in +/// instcombine and the DAG combiner more powerful by removing CFG edges and +/// modeling their effects with easier to reason about SSA value graphs. +/// +/// +/// An illustration of this transform is turning this IR: +/// \code +/// BB: +/// %cmp = icmp ult %x, %y +/// br i1 %cmp, label %EndBB, label %ThenBB +/// ThenBB: +/// %sub = sub %x, %y +/// br label BB2 +/// EndBB: +/// %phi = phi [ %sub, %ThenBB ], [ 0, %EndBB ] +/// ... +/// \endcode +/// +/// Into this IR: +/// \code +/// BB: +/// %cmp = icmp ult %x, %y +/// %sub = sub %x, %y +/// %cond = select i1 %cmp, 0, %sub +/// ... +/// \endcode +/// +/// \returns true if the conditional block is removed. +static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, + const TargetTransformInfo &TTI) { + // Be conservative for now. FP select instruction can often be expensive. + Value *BrCond = BI->getCondition(); + if (isa<FCmpInst>(BrCond)) + return false; + + BasicBlock *BB = BI->getParent(); + BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0); + + // If ThenBB is actually on the false edge of the conditional branch, remember + // to swap the select operands later. + bool Invert = false; + if (ThenBB != BI->getSuccessor(0)) { + assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?"); + Invert = true; + } + assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block"); + + // Keep a count of how many times instructions are used within CondBB when + // they are candidates for sinking into CondBB. Specifically: + // - They are defined in BB, and + // - They have no side effects, and + // - All of their uses are in CondBB. + SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts; + + unsigned SpeculationCost = 0; + Value *SpeculatedStoreValue = nullptr; + StoreInst *SpeculatedStore = nullptr; + for (BasicBlock::iterator BBI = ThenBB->begin(), + BBE = std::prev(ThenBB->end()); + BBI != BBE; ++BBI) { + Instruction *I = &*BBI; + // Skip debug info. + if (isa<DbgInfoIntrinsic>(I)) + continue; + + // Only speculatively execute a single instruction (not counting the + // terminator) for now. + ++SpeculationCost; + if (SpeculationCost > 1) + return false; + + // Don't hoist the instruction if it's unsafe or expensive. + if (!isSafeToSpeculativelyExecute(I) && + !(HoistCondStores && (SpeculatedStoreValue = isSafeToSpeculateStore( + I, BB, ThenBB, EndBB)))) + return false; + if (!SpeculatedStoreValue && + ComputeSpeculationCost(I, TTI) > + PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic) + return false; + + // Store the store speculation candidate. + if (SpeculatedStoreValue) + SpeculatedStore = cast<StoreInst>(I); + + // Do not hoist the instruction if any of its operands are defined but not + // used in BB. The transformation will prevent the operand from + // being sunk into the use block. + for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) { + Instruction *OpI = dyn_cast<Instruction>(*i); + if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects()) + continue; // Not a candidate for sinking. + + ++SinkCandidateUseCounts[OpI]; + } + } + + // Consider any sink candidates which are only used in CondBB as costs for + // speculation. Note, while we iterate over a DenseMap here, we are summing + // and so iteration order isn't significant. + for (SmallDenseMap<Instruction *, unsigned, 4>::iterator + I = SinkCandidateUseCounts.begin(), + E = SinkCandidateUseCounts.end(); + I != E; ++I) + if (I->first->getNumUses() == I->second) { + ++SpeculationCost; + if (SpeculationCost > 1) + return false; + } + + // Check that the PHI nodes can be converted to selects. + bool HaveRewritablePHIs = false; + for (BasicBlock::iterator I = EndBB->begin(); + PHINode *PN = dyn_cast<PHINode>(I); ++I) { + Value *OrigV = PN->getIncomingValueForBlock(BB); + Value *ThenV = PN->getIncomingValueForBlock(ThenBB); + + // FIXME: Try to remove some of the duplication with HoistThenElseCodeToIf. + // Skip PHIs which are trivial. + if (ThenV == OrigV) + continue; + + // Don't convert to selects if we could remove undefined behavior instead. + if (passingValueIsAlwaysUndefined(OrigV, PN) || + passingValueIsAlwaysUndefined(ThenV, PN)) + return false; + + HaveRewritablePHIs = true; + ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV); + ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV); + if (!OrigCE && !ThenCE) + continue; // Known safe and cheap. + + if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) || + (OrigCE && !isSafeToSpeculativelyExecute(OrigCE))) + return false; + unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, TTI) : 0; + unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, TTI) : 0; + unsigned MaxCost = + 2 * PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic; + if (OrigCost + ThenCost > MaxCost) + return false; + + // Account for the cost of an unfolded ConstantExpr which could end up + // getting expanded into Instructions. + // FIXME: This doesn't account for how many operations are combined in the + // constant expression. + ++SpeculationCost; + if (SpeculationCost > 1) + return false; + } + + // If there are no PHIs to process, bail early. This helps ensure idempotence + // as well. + if (!HaveRewritablePHIs && !(HoistCondStores && SpeculatedStoreValue)) + return false; + + // If we get here, we can hoist the instruction and if-convert. + DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";); + + // Insert a select of the value of the speculated store. + if (SpeculatedStoreValue) { + IRBuilder<NoFolder> Builder(BI); + Value *TrueV = SpeculatedStore->getValueOperand(); + Value *FalseV = SpeculatedStoreValue; + if (Invert) + std::swap(TrueV, FalseV); + Value *S = Builder.CreateSelect( + BrCond, TrueV, FalseV, TrueV->getName() + "." + FalseV->getName(), BI); + SpeculatedStore->setOperand(0, S); + } + + // Metadata can be dependent on the condition we are hoisting above. + // Conservatively strip all metadata on the instruction. + for (auto &I : *ThenBB) + I.dropUnknownNonDebugMetadata(); + + // Hoist the instructions. + BB->getInstList().splice(BI->getIterator(), ThenBB->getInstList(), + ThenBB->begin(), std::prev(ThenBB->end())); + + // Insert selects and rewrite the PHI operands. + IRBuilder<NoFolder> Builder(BI); + for (BasicBlock::iterator I = EndBB->begin(); + PHINode *PN = dyn_cast<PHINode>(I); ++I) { + unsigned OrigI = PN->getBasicBlockIndex(BB); + unsigned ThenI = PN->getBasicBlockIndex(ThenBB); + Value *OrigV = PN->getIncomingValue(OrigI); + Value *ThenV = PN->getIncomingValue(ThenI); + + // Skip PHIs which are trivial. + if (OrigV == ThenV) + continue; + + // Create a select whose true value is the speculatively executed value and + // false value is the preexisting value. Swap them if the branch + // destinations were inverted. + Value *TrueV = ThenV, *FalseV = OrigV; + if (Invert) + std::swap(TrueV, FalseV); + Value *V = Builder.CreateSelect( + BrCond, TrueV, FalseV, TrueV->getName() + "." + FalseV->getName(), BI); + PN->setIncomingValue(OrigI, V); + PN->setIncomingValue(ThenI, V); + } + + ++NumSpeculations; + return true; +} + +/// Return true if we can thread a branch across this block. +static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { + BranchInst *BI = cast<BranchInst>(BB->getTerminator()); + unsigned Size = 0; + + for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) { + if (isa<DbgInfoIntrinsic>(BBI)) + continue; + if (Size > 10) + return false; // Don't clone large BB's. + ++Size; + + // We can only support instructions that do not define values that are + // live outside of the current basic block. + for (User *U : BBI->users()) { + Instruction *UI = cast<Instruction>(U); + if (UI->getParent() != BB || isa<PHINode>(UI)) + return false; + } + + // Looks ok, continue checking. + } + + return true; +} + +/// If we have a conditional branch on a PHI node value that is defined in the +/// same block as the branch and if any PHI entries are constants, thread edges +/// corresponding to that entry to be branches to their ultimate destination. +static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL) { + BasicBlock *BB = BI->getParent(); + PHINode *PN = dyn_cast<PHINode>(BI->getCondition()); + // NOTE: we currently cannot transform this case if the PHI node is used + // outside of the block. + if (!PN || PN->getParent() != BB || !PN->hasOneUse()) + return false; + + // Degenerate case of a single entry PHI. + if (PN->getNumIncomingValues() == 1) { + FoldSingleEntryPHINodes(PN->getParent()); + return true; + } + + // Now we know that this block has multiple preds and two succs. + if (!BlockIsSimpleEnoughToThreadThrough(BB)) + return false; + + // Can't fold blocks that contain noduplicate or convergent calls. + if (any_of(*BB, [](const Instruction &I) { + const CallInst *CI = dyn_cast<CallInst>(&I); + return CI && (CI->cannotDuplicate() || CI->isConvergent()); + })) + return false; + + // Okay, this is a simple enough basic block. See if any phi values are + // constants. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + ConstantInt *CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i)); + if (!CB || !CB->getType()->isIntegerTy(1)) + continue; + + // Okay, we now know that all edges from PredBB should be revectored to + // branch to RealDest. + BasicBlock *PredBB = PN->getIncomingBlock(i); + BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue()); + + if (RealDest == BB) + continue; // Skip self loops. + // Skip if the predecessor's terminator is an indirect branch. + if (isa<IndirectBrInst>(PredBB->getTerminator())) + continue; + + // The dest block might have PHI nodes, other predecessors and other + // difficult cases. Instead of being smart about this, just insert a new + // block that jumps to the destination block, effectively splitting + // the edge we are about to create. + BasicBlock *EdgeBB = + BasicBlock::Create(BB->getContext(), RealDest->getName() + ".critedge", + RealDest->getParent(), RealDest); + BranchInst::Create(RealDest, EdgeBB); + + // Update PHI nodes. + AddPredecessorToBlock(RealDest, EdgeBB, BB); + + // BB may have instructions that are being threaded over. Clone these + // instructions into EdgeBB. We know that there will be no uses of the + // cloned instructions outside of EdgeBB. + BasicBlock::iterator InsertPt = EdgeBB->begin(); + DenseMap<Value *, Value *> TranslateMap; // Track translated values. + for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) { + if (PHINode *PN = dyn_cast<PHINode>(BBI)) { + TranslateMap[PN] = PN->getIncomingValueForBlock(PredBB); + continue; + } + // Clone the instruction. + Instruction *N = BBI->clone(); + if (BBI->hasName()) + N->setName(BBI->getName() + ".c"); + + // Update operands due to translation. + for (User::op_iterator i = N->op_begin(), e = N->op_end(); i != e; ++i) { + DenseMap<Value *, Value *>::iterator PI = TranslateMap.find(*i); + if (PI != TranslateMap.end()) + *i = PI->second; + } + + // Check for trivial simplification. + if (Value *V = SimplifyInstruction(N, DL)) { + if (!BBI->use_empty()) + TranslateMap[&*BBI] = V; + if (!N->mayHaveSideEffects()) { + delete N; // Instruction folded away, don't need actual inst + N = nullptr; + } + } else { + if (!BBI->use_empty()) + TranslateMap[&*BBI] = N; + } + // Insert the new instruction into its new home. + if (N) + EdgeBB->getInstList().insert(InsertPt, N); + } + + // Loop over all of the edges from PredBB to BB, changing them to branch + // to EdgeBB instead. + TerminatorInst *PredBBTI = PredBB->getTerminator(); + for (unsigned i = 0, e = PredBBTI->getNumSuccessors(); i != e; ++i) + if (PredBBTI->getSuccessor(i) == BB) { + BB->removePredecessor(PredBB); + PredBBTI->setSuccessor(i, EdgeBB); + } + + // Recurse, simplifying any other constants. + return FoldCondBranchOnPHI(BI, DL) | true; + } + + return false; +} + +/// Given a BB that starts with the specified two-entry PHI node, +/// see if we can eliminate it. +static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, + const DataLayout &DL) { + // Ok, this is a two entry PHI node. Check to see if this is a simple "if + // statement", which has a very simple dominance structure. Basically, we + // are trying to find the condition that is being branched on, which + // subsequently causes this merge to happen. We really want control + // dependence information for this check, but simplifycfg can't keep it up + // to date, and this catches most of the cases we care about anyway. + BasicBlock *BB = PN->getParent(); + BasicBlock *IfTrue, *IfFalse; + Value *IfCond = GetIfCondition(BB, IfTrue, IfFalse); + if (!IfCond || + // Don't bother if the branch will be constant folded trivially. + isa<ConstantInt>(IfCond)) + return false; + + // Okay, we found that we can merge this two-entry phi node into a select. + // Doing so would require us to fold *all* two entry phi nodes in this block. + // At some point this becomes non-profitable (particularly if the target + // doesn't support cmov's). Only do this transformation if there are two or + // fewer PHI nodes in this block. + unsigned NumPhis = 0; + for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I) + if (NumPhis > 2) + return false; + + // Loop over the PHI's seeing if we can promote them all to select + // instructions. While we are at it, keep track of the instructions + // that need to be moved to the dominating block. + SmallPtrSet<Instruction *, 4> AggressiveInsts; + unsigned MaxCostVal0 = PHINodeFoldingThreshold, + MaxCostVal1 = PHINodeFoldingThreshold; + MaxCostVal0 *= TargetTransformInfo::TCC_Basic; + MaxCostVal1 *= TargetTransformInfo::TCC_Basic; + + for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) { + PHINode *PN = cast<PHINode>(II++); + if (Value *V = SimplifyInstruction(PN, DL)) { + PN->replaceAllUsesWith(V); + PN->eraseFromParent(); + continue; + } + + if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts, + MaxCostVal0, TTI) || + !DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts, + MaxCostVal1, TTI)) + return false; + } + + // If we folded the first phi, PN dangles at this point. Refresh it. If + // we ran out of PHIs then we simplified them all. + PN = dyn_cast<PHINode>(BB->begin()); + if (!PN) + return true; + + // Don't fold i1 branches on PHIs which contain binary operators. These can + // often be turned into switches and other things. + if (PN->getType()->isIntegerTy(1) && + (isa<BinaryOperator>(PN->getIncomingValue(0)) || + isa<BinaryOperator>(PN->getIncomingValue(1)) || + isa<BinaryOperator>(IfCond))) + return false; + + // If all PHI nodes are promotable, check to make sure that all instructions + // in the predecessor blocks can be promoted as well. If not, we won't be able + // to get rid of the control flow, so it's not worth promoting to select + // instructions. + BasicBlock *DomBlock = nullptr; + BasicBlock *IfBlock1 = PN->getIncomingBlock(0); + BasicBlock *IfBlock2 = PN->getIncomingBlock(1); + if (cast<BranchInst>(IfBlock1->getTerminator())->isConditional()) { + IfBlock1 = nullptr; + } else { + DomBlock = *pred_begin(IfBlock1); + for (BasicBlock::iterator I = IfBlock1->begin(); !isa<TerminatorInst>(I); + ++I) + if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I)) { + // This is not an aggressive instruction that we can promote. + // Because of this, we won't be able to get rid of the control flow, so + // the xform is not worth it. + return false; + } + } + + if (cast<BranchInst>(IfBlock2->getTerminator())->isConditional()) { + IfBlock2 = nullptr; + } else { + DomBlock = *pred_begin(IfBlock2); + for (BasicBlock::iterator I = IfBlock2->begin(); !isa<TerminatorInst>(I); + ++I) + if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I)) { + // This is not an aggressive instruction that we can promote. + // Because of this, we won't be able to get rid of the control flow, so + // the xform is not worth it. + return false; + } + } + + DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond << " T: " + << IfTrue->getName() << " F: " << IfFalse->getName() << "\n"); + + // If we can still promote the PHI nodes after this gauntlet of tests, + // do all of the PHI's now. + Instruction *InsertPt = DomBlock->getTerminator(); + IRBuilder<NoFolder> Builder(InsertPt); + + // Move all 'aggressive' instructions, which are defined in the + // conditional parts of the if's up to the dominating block. + if (IfBlock1) { + for (auto &I : *IfBlock1) + I.dropUnknownNonDebugMetadata(); + DomBlock->getInstList().splice(InsertPt->getIterator(), + IfBlock1->getInstList(), IfBlock1->begin(), + IfBlock1->getTerminator()->getIterator()); + } + if (IfBlock2) { + for (auto &I : *IfBlock2) + I.dropUnknownNonDebugMetadata(); + DomBlock->getInstList().splice(InsertPt->getIterator(), + IfBlock2->getInstList(), IfBlock2->begin(), + IfBlock2->getTerminator()->getIterator()); + } + + while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) { + // Change the PHI node into a select instruction. + Value *TrueVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse); + Value *FalseVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue); + + Value *Sel = Builder.CreateSelect(IfCond, TrueVal, FalseVal, "", InsertPt); + PN->replaceAllUsesWith(Sel); + Sel->takeName(PN); + PN->eraseFromParent(); + } + + // At this point, IfBlock1 and IfBlock2 are both empty, so our if statement + // has been flattened. Change DomBlock to jump directly to our new block to + // avoid other simplifycfg's kicking in on the diamond. + TerminatorInst *OldTI = DomBlock->getTerminator(); + Builder.SetInsertPoint(OldTI); + Builder.CreateBr(BB); + OldTI->eraseFromParent(); + return true; +} + +/// If we found a conditional branch that goes to two returning blocks, +/// try to merge them together into one return, +/// introducing a select if the return values disagree. +static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, + IRBuilder<> &Builder) { + assert(BI->isConditional() && "Must be a conditional branch"); + BasicBlock *TrueSucc = BI->getSuccessor(0); + BasicBlock *FalseSucc = BI->getSuccessor(1); + ReturnInst *TrueRet = cast<ReturnInst>(TrueSucc->getTerminator()); + ReturnInst *FalseRet = cast<ReturnInst>(FalseSucc->getTerminator()); + + // Check to ensure both blocks are empty (just a return) or optionally empty + // with PHI nodes. If there are other instructions, merging would cause extra + // computation on one path or the other. + if (!TrueSucc->getFirstNonPHIOrDbg()->isTerminator()) + return false; + if (!FalseSucc->getFirstNonPHIOrDbg()->isTerminator()) + return false; + + Builder.SetInsertPoint(BI); + // Okay, we found a branch that is going to two return nodes. If + // there is no return value for this function, just change the + // branch into a return. + if (FalseRet->getNumOperands() == 0) { + TrueSucc->removePredecessor(BI->getParent()); + FalseSucc->removePredecessor(BI->getParent()); + Builder.CreateRetVoid(); + EraseTerminatorInstAndDCECond(BI); + return true; + } + + // Otherwise, figure out what the true and false return values are + // so we can insert a new select instruction. + Value *TrueValue = TrueRet->getReturnValue(); + Value *FalseValue = FalseRet->getReturnValue(); + + // Unwrap any PHI nodes in the return blocks. + if (PHINode *TVPN = dyn_cast_or_null<PHINode>(TrueValue)) + if (TVPN->getParent() == TrueSucc) + TrueValue = TVPN->getIncomingValueForBlock(BI->getParent()); + if (PHINode *FVPN = dyn_cast_or_null<PHINode>(FalseValue)) + if (FVPN->getParent() == FalseSucc) + FalseValue = FVPN->getIncomingValueForBlock(BI->getParent()); + + // In order for this transformation to be safe, we must be able to + // unconditionally execute both operands to the return. This is + // normally the case, but we could have a potentially-trapping + // constant expression that prevents this transformation from being + // safe. + if (ConstantExpr *TCV = dyn_cast_or_null<ConstantExpr>(TrueValue)) + if (TCV->canTrap()) + return false; + if (ConstantExpr *FCV = dyn_cast_or_null<ConstantExpr>(FalseValue)) + if (FCV->canTrap()) + return false; + + // Okay, we collected all the mapped values and checked them for sanity, and + // defined to really do this transformation. First, update the CFG. + TrueSucc->removePredecessor(BI->getParent()); + FalseSucc->removePredecessor(BI->getParent()); + + // Insert select instructions where needed. + Value *BrCond = BI->getCondition(); + if (TrueValue) { + // Insert a select if the results differ. + if (TrueValue == FalseValue || isa<UndefValue>(FalseValue)) { + } else if (isa<UndefValue>(TrueValue)) { + TrueValue = FalseValue; + } else { + TrueValue = + Builder.CreateSelect(BrCond, TrueValue, FalseValue, "retval", BI); + } + } + + Value *RI = + !TrueValue ? Builder.CreateRetVoid() : Builder.CreateRet(TrueValue); + + (void)RI; + + DEBUG(dbgs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:" + << "\n " << *BI << "NewRet = " << *RI + << "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: " << *FalseSucc); + + EraseTerminatorInstAndDCECond(BI); + + return true; +} + +/// Return true if the given instruction is available +/// in its predecessor block. If yes, the instruction will be removed. +static bool checkCSEInPredecessor(Instruction *Inst, BasicBlock *PB) { + if (!isa<BinaryOperator>(Inst) && !isa<CmpInst>(Inst)) + return false; + for (Instruction &I : *PB) { + Instruction *PBI = &I; + // Check whether Inst and PBI generate the same value. + if (Inst->isIdenticalTo(PBI)) { + Inst->replaceAllUsesWith(PBI); + Inst->eraseFromParent(); + return true; + } + } + return false; +} + +/// Return true if either PBI or BI has branch weight available, and store +/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does +/// not have branch weight, use 1:1 as its weight. +static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI, + uint64_t &PredTrueWeight, + uint64_t &PredFalseWeight, + uint64_t &SuccTrueWeight, + uint64_t &SuccFalseWeight) { + bool PredHasWeights = + PBI->extractProfMetadata(PredTrueWeight, PredFalseWeight); + bool SuccHasWeights = + BI->extractProfMetadata(SuccTrueWeight, SuccFalseWeight); + if (PredHasWeights || SuccHasWeights) { + if (!PredHasWeights) + PredTrueWeight = PredFalseWeight = 1; + if (!SuccHasWeights) + SuccTrueWeight = SuccFalseWeight = 1; + return true; + } else { + return false; + } +} + +/// If this basic block is simple enough, and if a predecessor branches to us +/// and one of our successors, fold the block into the predecessor and use +/// logical operations to pick the right destination. +bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) { + BasicBlock *BB = BI->getParent(); + + Instruction *Cond = nullptr; + if (BI->isConditional()) + Cond = dyn_cast<Instruction>(BI->getCondition()); + else { + // For unconditional branch, check for a simple CFG pattern, where + // BB has a single predecessor and BB's successor is also its predecessor's + // successor. If such pattern exisits, check for CSE between BB and its + // predecessor. + if (BasicBlock *PB = BB->getSinglePredecessor()) + if (BranchInst *PBI = dyn_cast<BranchInst>(PB->getTerminator())) + if (PBI->isConditional() && + (BI->getSuccessor(0) == PBI->getSuccessor(0) || + BI->getSuccessor(0) == PBI->getSuccessor(1))) { + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) { + Instruction *Curr = &*I++; + if (isa<CmpInst>(Curr)) { + Cond = Curr; + break; + } + // Quit if we can't remove this instruction. + if (!checkCSEInPredecessor(Curr, PB)) + return false; + } + } + + if (!Cond) + return false; + } + + if (!Cond || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) || + Cond->getParent() != BB || !Cond->hasOneUse()) + return false; + + // Make sure the instruction after the condition is the cond branch. + BasicBlock::iterator CondIt = ++Cond->getIterator(); + + // Ignore dbg intrinsics. + while (isa<DbgInfoIntrinsic>(CondIt)) + ++CondIt; + + if (&*CondIt != BI) + return false; + + // Only allow this transformation if computing the condition doesn't involve + // too many instructions and these involved instructions can be executed + // unconditionally. We denote all involved instructions except the condition + // as "bonus instructions", and only allow this transformation when the + // number of the bonus instructions does not exceed a certain threshold. + unsigned NumBonusInsts = 0; + for (auto I = BB->begin(); Cond != &*I; ++I) { + // Ignore dbg intrinsics. + if (isa<DbgInfoIntrinsic>(I)) + continue; + if (!I->hasOneUse() || !isSafeToSpeculativelyExecute(&*I)) + return false; + // I has only one use and can be executed unconditionally. + Instruction *User = dyn_cast<Instruction>(I->user_back()); + if (User == nullptr || User->getParent() != BB) + return false; + // I is used in the same BB. Since BI uses Cond and doesn't have more slots + // to use any other instruction, User must be an instruction between next(I) + // and Cond. + ++NumBonusInsts; + // Early exits once we reach the limit. + if (NumBonusInsts > BonusInstThreshold) + return false; + } + + // Cond is known to be a compare or binary operator. Check to make sure that + // neither operand is a potentially-trapping constant expression. + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(0))) + if (CE->canTrap()) + return false; + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(1))) + if (CE->canTrap()) + return false; + + // Finally, don't infinitely unroll conditional loops. + BasicBlock *TrueDest = BI->getSuccessor(0); + BasicBlock *FalseDest = (BI->isConditional()) ? BI->getSuccessor(1) : nullptr; + if (TrueDest == BB || FalseDest == BB) + return false; + + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + BasicBlock *PredBlock = *PI; + BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator()); + + // Check that we have two conditional branches. If there is a PHI node in + // the common successor, verify that the same value flows in from both + // blocks. + SmallVector<PHINode *, 4> PHIs; + if (!PBI || PBI->isUnconditional() || + (BI->isConditional() && !SafeToMergeTerminators(BI, PBI)) || + (!BI->isConditional() && + !isProfitableToFoldUnconditional(BI, PBI, Cond, PHIs))) + continue; + + // Determine if the two branches share a common destination. + Instruction::BinaryOps Opc = Instruction::BinaryOpsEnd; + bool InvertPredCond = false; + + if (BI->isConditional()) { + if (PBI->getSuccessor(0) == TrueDest) { + Opc = Instruction::Or; + } else if (PBI->getSuccessor(1) == FalseDest) { + Opc = Instruction::And; + } else if (PBI->getSuccessor(0) == FalseDest) { + Opc = Instruction::And; + InvertPredCond = true; + } else if (PBI->getSuccessor(1) == TrueDest) { + Opc = Instruction::Or; + InvertPredCond = true; + } else { + continue; + } + } else { + if (PBI->getSuccessor(0) != TrueDest && PBI->getSuccessor(1) != TrueDest) + continue; + } + + DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB); + IRBuilder<> Builder(PBI); + + // If we need to invert the condition in the pred block to match, do so now. + if (InvertPredCond) { + Value *NewCond = PBI->getCondition(); + + if (NewCond->hasOneUse() && isa<CmpInst>(NewCond)) { + CmpInst *CI = cast<CmpInst>(NewCond); + CI->setPredicate(CI->getInversePredicate()); + } else { + NewCond = + Builder.CreateNot(NewCond, PBI->getCondition()->getName() + ".not"); + } + + PBI->setCondition(NewCond); + PBI->swapSuccessors(); + } + + // If we have bonus instructions, clone them into the predecessor block. + // Note that there may be multiple predecessor blocks, so we cannot move + // bonus instructions to a predecessor block. + ValueToValueMapTy VMap; // maps original values to cloned values + // We already make sure Cond is the last instruction before BI. Therefore, + // all instructions before Cond other than DbgInfoIntrinsic are bonus + // instructions. + for (auto BonusInst = BB->begin(); Cond != &*BonusInst; ++BonusInst) { + if (isa<DbgInfoIntrinsic>(BonusInst)) + continue; + Instruction *NewBonusInst = BonusInst->clone(); + RemapInstruction(NewBonusInst, VMap, + RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); + VMap[&*BonusInst] = NewBonusInst; + + // If we moved a load, we cannot any longer claim any knowledge about + // its potential value. The previous information might have been valid + // only given the branch precondition. + // For an analogous reason, we must also drop all the metadata whose + // semantics we don't understand. + NewBonusInst->dropUnknownNonDebugMetadata(); + + PredBlock->getInstList().insert(PBI->getIterator(), NewBonusInst); + NewBonusInst->takeName(&*BonusInst); + BonusInst->setName(BonusInst->getName() + ".old"); + } + + // Clone Cond into the predecessor basic block, and or/and the + // two conditions together. + Instruction *New = Cond->clone(); + RemapInstruction(New, VMap, + RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); + PredBlock->getInstList().insert(PBI->getIterator(), New); + New->takeName(Cond); + Cond->setName(New->getName() + ".old"); + + if (BI->isConditional()) { + Instruction *NewCond = cast<Instruction>( + Builder.CreateBinOp(Opc, PBI->getCondition(), New, "or.cond")); + PBI->setCondition(NewCond); + + uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight; + bool HasWeights = + extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight, + SuccTrueWeight, SuccFalseWeight); + SmallVector<uint64_t, 8> NewWeights; + + if (PBI->getSuccessor(0) == BB) { + if (HasWeights) { + // PBI: br i1 %x, BB, FalseDest + // BI: br i1 %y, TrueDest, FalseDest + // TrueWeight is TrueWeight for PBI * TrueWeight for BI. + NewWeights.push_back(PredTrueWeight * SuccTrueWeight); + // FalseWeight is FalseWeight for PBI * TotalWeight for BI + + // TrueWeight for PBI * FalseWeight for BI. + // We assume that total weights of a BranchInst can fit into 32 bits. + // Therefore, we will not have overflow using 64-bit arithmetic. + NewWeights.push_back(PredFalseWeight * + (SuccFalseWeight + SuccTrueWeight) + + PredTrueWeight * SuccFalseWeight); + } + AddPredecessorToBlock(TrueDest, PredBlock, BB); + PBI->setSuccessor(0, TrueDest); + } + if (PBI->getSuccessor(1) == BB) { + if (HasWeights) { + // PBI: br i1 %x, TrueDest, BB + // BI: br i1 %y, TrueDest, FalseDest + // TrueWeight is TrueWeight for PBI * TotalWeight for BI + + // FalseWeight for PBI * TrueWeight for BI. + NewWeights.push_back(PredTrueWeight * + (SuccFalseWeight + SuccTrueWeight) + + PredFalseWeight * SuccTrueWeight); + // FalseWeight is FalseWeight for PBI * FalseWeight for BI. + NewWeights.push_back(PredFalseWeight * SuccFalseWeight); + } + AddPredecessorToBlock(FalseDest, PredBlock, BB); + PBI->setSuccessor(1, FalseDest); + } + if (NewWeights.size() == 2) { + // Halve the weights if any of them cannot fit in an uint32_t + FitWeights(NewWeights); + + SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(), + NewWeights.end()); + PBI->setMetadata( + LLVMContext::MD_prof, + MDBuilder(BI->getContext()).createBranchWeights(MDWeights)); + } else + PBI->setMetadata(LLVMContext::MD_prof, nullptr); + } else { + // Update PHI nodes in the common successors. + for (unsigned i = 0, e = PHIs.size(); i != e; ++i) { + ConstantInt *PBI_C = cast<ConstantInt>( + PHIs[i]->getIncomingValueForBlock(PBI->getParent())); + assert(PBI_C->getType()->isIntegerTy(1)); + Instruction *MergedCond = nullptr; + if (PBI->getSuccessor(0) == TrueDest) { + // Create (PBI_Cond and PBI_C) or (!PBI_Cond and BI_Value) + // PBI_C is true: PBI_Cond or (!PBI_Cond and BI_Value) + // is false: !PBI_Cond and BI_Value + Instruction *NotCond = cast<Instruction>( + Builder.CreateNot(PBI->getCondition(), "not.cond")); + MergedCond = cast<Instruction>( + Builder.CreateBinOp(Instruction::And, NotCond, New, "and.cond")); + if (PBI_C->isOne()) + MergedCond = cast<Instruction>(Builder.CreateBinOp( + Instruction::Or, PBI->getCondition(), MergedCond, "or.cond")); + } else { + // Create (PBI_Cond and BI_Value) or (!PBI_Cond and PBI_C) + // PBI_C is true: (PBI_Cond and BI_Value) or (!PBI_Cond) + // is false: PBI_Cond and BI_Value + MergedCond = cast<Instruction>(Builder.CreateBinOp( + Instruction::And, PBI->getCondition(), New, "and.cond")); + if (PBI_C->isOne()) { + Instruction *NotCond = cast<Instruction>( + Builder.CreateNot(PBI->getCondition(), "not.cond")); + MergedCond = cast<Instruction>(Builder.CreateBinOp( + Instruction::Or, NotCond, MergedCond, "or.cond")); + } + } + // Update PHI Node. + PHIs[i]->setIncomingValue(PHIs[i]->getBasicBlockIndex(PBI->getParent()), + MergedCond); + } + // Change PBI from Conditional to Unconditional. + BranchInst *New_PBI = BranchInst::Create(TrueDest, PBI); + EraseTerminatorInstAndDCECond(PBI); + PBI = New_PBI; + } + + // If BI was a loop latch, it may have had associated loop metadata. + // We need to copy it to the new latch, that is, PBI. + if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop)) + PBI->setMetadata(LLVMContext::MD_loop, LoopMD); + + // TODO: If BB is reachable from all paths through PredBlock, then we + // could replace PBI's branch probabilities with BI's. + + // Copy any debug value intrinsics into the end of PredBlock. + for (Instruction &I : *BB) + if (isa<DbgInfoIntrinsic>(I)) + I.clone()->insertBefore(PBI); + + return true; + } + return false; +} + +// If there is only one store in BB1 and BB2, return it, otherwise return +// nullptr. +static StoreInst *findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2) { + StoreInst *S = nullptr; + for (auto *BB : {BB1, BB2}) { + if (!BB) + continue; + for (auto &I : *BB) + if (auto *SI = dyn_cast<StoreInst>(&I)) { + if (S) + // Multiple stores seen. + return nullptr; + else + S = SI; + } + } + return S; +} + +static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, + Value *AlternativeV = nullptr) { + // PHI is going to be a PHI node that allows the value V that is defined in + // BB to be referenced in BB's only successor. + // + // If AlternativeV is nullptr, the only value we care about in PHI is V. It + // doesn't matter to us what the other operand is (it'll never get used). We + // could just create a new PHI with an undef incoming value, but that could + // increase register pressure if EarlyCSE/InstCombine can't fold it with some + // other PHI. So here we directly look for some PHI in BB's successor with V + // as an incoming operand. If we find one, we use it, else we create a new + // one. + // + // If AlternativeV is not nullptr, we care about both incoming values in PHI. + // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV] + // where OtherBB is the single other predecessor of BB's only successor. + PHINode *PHI = nullptr; + BasicBlock *Succ = BB->getSingleSuccessor(); + + for (auto I = Succ->begin(); isa<PHINode>(I); ++I) + if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) { + PHI = cast<PHINode>(I); + if (!AlternativeV) + break; + + assert(std::distance(pred_begin(Succ), pred_end(Succ)) == 2); + auto PredI = pred_begin(Succ); + BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI; + if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV) + break; + PHI = nullptr; + } + if (PHI) + return PHI; + + // If V is not an instruction defined in BB, just return it. + if (!AlternativeV && + (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB)) + return V; + + PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge", &Succ->front()); + PHI->addIncoming(V, BB); + for (BasicBlock *PredBB : predecessors(Succ)) + if (PredBB != BB) + PHI->addIncoming( + AlternativeV ? AlternativeV : UndefValue::get(V->getType()), PredBB); + return PHI; +} + +static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, + BasicBlock *QTB, BasicBlock *QFB, + BasicBlock *PostBB, Value *Address, + bool InvertPCond, bool InvertQCond) { + auto IsaBitcastOfPointerType = [](const Instruction &I) { + return Operator::getOpcode(&I) == Instruction::BitCast && + I.getType()->isPointerTy(); + }; + + // If we're not in aggressive mode, we only optimize if we have some + // confidence that by optimizing we'll allow P and/or Q to be if-converted. + auto IsWorthwhile = [&](BasicBlock *BB) { + if (!BB) + return true; + // Heuristic: if the block can be if-converted/phi-folded and the + // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to + // thread this store. + unsigned N = 0; + for (auto &I : *BB) { + // Cheap instructions viable for folding. + if (isa<BinaryOperator>(I) || isa<GetElementPtrInst>(I) || + isa<StoreInst>(I)) + ++N; + // Free instructions. + else if (isa<TerminatorInst>(I) || isa<DbgInfoIntrinsic>(I) || + IsaBitcastOfPointerType(I)) + continue; + else + return false; + } + return N <= PHINodeFoldingThreshold; + }; + + if (!MergeCondStoresAggressively && + (!IsWorthwhile(PTB) || !IsWorthwhile(PFB) || !IsWorthwhile(QTB) || + !IsWorthwhile(QFB))) + return false; + + // For every pointer, there must be exactly two stores, one coming from + // PTB or PFB, and the other from QTB or QFB. We don't support more than one + // store (to any address) in PTB,PFB or QTB,QFB. + // FIXME: We could relax this restriction with a bit more work and performance + // testing. + StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB); + StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB); + if (!PStore || !QStore) + return false; + + // Now check the stores are compatible. + if (!QStore->isUnordered() || !PStore->isUnordered()) + return false; + + // Check that sinking the store won't cause program behavior changes. Sinking + // the store out of the Q blocks won't change any behavior as we're sinking + // from a block to its unconditional successor. But we're moving a store from + // the P blocks down through the middle block (QBI) and past both QFB and QTB. + // So we need to check that there are no aliasing loads or stores in + // QBI, QTB and QFB. We also need to check there are no conflicting memory + // operations between PStore and the end of its parent block. + // + // The ideal way to do this is to query AliasAnalysis, but we don't + // preserve AA currently so that is dangerous. Be super safe and just + // check there are no other memory operations at all. + for (auto &I : *QFB->getSinglePredecessor()) + if (I.mayReadOrWriteMemory()) + return false; + for (auto &I : *QFB) + if (&I != QStore && I.mayReadOrWriteMemory()) + return false; + if (QTB) + for (auto &I : *QTB) + if (&I != QStore && I.mayReadOrWriteMemory()) + return false; + for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end(); + I != E; ++I) + if (&*I != PStore && I->mayReadOrWriteMemory()) + return false; + + // OK, we're going to sink the stores to PostBB. The store has to be + // conditional though, so first create the predicate. + Value *PCond = cast<BranchInst>(PFB->getSinglePredecessor()->getTerminator()) + ->getCondition(); + Value *QCond = cast<BranchInst>(QFB->getSinglePredecessor()->getTerminator()) + ->getCondition(); + + Value *PPHI = ensureValueAvailableInSuccessor(PStore->getValueOperand(), + PStore->getParent()); + Value *QPHI = ensureValueAvailableInSuccessor(QStore->getValueOperand(), + QStore->getParent(), PPHI); + + IRBuilder<> QB(&*PostBB->getFirstInsertionPt()); + + Value *PPred = PStore->getParent() == PTB ? PCond : QB.CreateNot(PCond); + Value *QPred = QStore->getParent() == QTB ? QCond : QB.CreateNot(QCond); + + if (InvertPCond) + PPred = QB.CreateNot(PPred); + if (InvertQCond) + QPred = QB.CreateNot(QPred); + Value *CombinedPred = QB.CreateOr(PPred, QPred); + + auto *T = + SplitBlockAndInsertIfThen(CombinedPred, &*QB.GetInsertPoint(), false); + QB.SetInsertPoint(T); + StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address)); + AAMDNodes AAMD; + PStore->getAAMetadata(AAMD, /*Merge=*/false); + PStore->getAAMetadata(AAMD, /*Merge=*/true); + SI->setAAMetadata(AAMD); + + QStore->eraseFromParent(); + PStore->eraseFromParent(); + + return true; +} + +static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI) { + // The intention here is to find diamonds or triangles (see below) where each + // conditional block contains a store to the same address. Both of these + // stores are conditional, so they can't be unconditionally sunk. But it may + // be profitable to speculatively sink the stores into one merged store at the + // end, and predicate the merged store on the union of the two conditions of + // PBI and QBI. + // + // This can reduce the number of stores executed if both of the conditions are + // true, and can allow the blocks to become small enough to be if-converted. + // This optimization will also chain, so that ladders of test-and-set + // sequences can be if-converted away. + // + // We only deal with simple diamonds or triangles: + // + // PBI or PBI or a combination of the two + // / \ | \ + // PTB PFB | PFB + // \ / | / + // QBI QBI + // / \ | \ + // QTB QFB | QFB + // \ / | / + // PostBB PostBB + // + // We model triangles as a type of diamond with a nullptr "true" block. + // Triangles are canonicalized so that the fallthrough edge is represented by + // a true condition, as in the diagram above. + // + BasicBlock *PTB = PBI->getSuccessor(0); + BasicBlock *PFB = PBI->getSuccessor(1); + BasicBlock *QTB = QBI->getSuccessor(0); + BasicBlock *QFB = QBI->getSuccessor(1); + BasicBlock *PostBB = QFB->getSingleSuccessor(); + + bool InvertPCond = false, InvertQCond = false; + // Canonicalize fallthroughs to the true branches. + if (PFB == QBI->getParent()) { + std::swap(PFB, PTB); + InvertPCond = true; + } + if (QFB == PostBB) { + std::swap(QFB, QTB); + InvertQCond = true; + } + + // From this point on we can assume PTB or QTB may be fallthroughs but PFB + // and QFB may not. Model fallthroughs as a nullptr block. + if (PTB == QBI->getParent()) + PTB = nullptr; + if (QTB == PostBB) + QTB = nullptr; + + // Legality bailouts. We must have at least the non-fallthrough blocks and + // the post-dominating block, and the non-fallthroughs must only have one + // predecessor. + auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) { + return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S; + }; + if (!PostBB || + !HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) || + !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB)) + return false; + if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) || + (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB))) + return false; + if (PostBB->getNumUses() != 2 || QBI->getParent()->getNumUses() != 2) + return false; + + // OK, this is a sequence of two diamonds or triangles. + // Check if there are stores in PTB or PFB that are repeated in QTB or QFB. + SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses; + for (auto *BB : {PTB, PFB}) { + if (!BB) + continue; + for (auto &I : *BB) + if (StoreInst *SI = dyn_cast<StoreInst>(&I)) + PStoreAddresses.insert(SI->getPointerOperand()); + } + for (auto *BB : {QTB, QFB}) { + if (!BB) + continue; + for (auto &I : *BB) + if (StoreInst *SI = dyn_cast<StoreInst>(&I)) + QStoreAddresses.insert(SI->getPointerOperand()); + } + + set_intersect(PStoreAddresses, QStoreAddresses); + // set_intersect mutates PStoreAddresses in place. Rename it here to make it + // clear what it contains. + auto &CommonAddresses = PStoreAddresses; + + bool Changed = false; + for (auto *Address : CommonAddresses) + Changed |= mergeConditionalStoreToAddress( + PTB, PFB, QTB, QFB, PostBB, Address, InvertPCond, InvertQCond); + return Changed; +} + +/// If we have a conditional branch as a predecessor of another block, +/// this function tries to simplify it. We know +/// that PBI and BI are both conditional branches, and BI is in one of the +/// successor blocks of PBI - PBI branches to BI. +static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, + const DataLayout &DL) { + assert(PBI->isConditional() && BI->isConditional()); + BasicBlock *BB = BI->getParent(); + + // If this block ends with a branch instruction, and if there is a + // predecessor that ends on a branch of the same condition, make + // this conditional branch redundant. + if (PBI->getCondition() == BI->getCondition() && + PBI->getSuccessor(0) != PBI->getSuccessor(1)) { + // Okay, the outcome of this conditional branch is statically + // knowable. If this block had a single pred, handle specially. + if (BB->getSinglePredecessor()) { + // Turn this into a branch on constant. + bool CondIsTrue = PBI->getSuccessor(0) == BB; + BI->setCondition( + ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue)); + return true; // Nuke the branch on constant. + } + + // Otherwise, if there are multiple predecessors, insert a PHI that merges + // in the constant and simplify the block result. Subsequent passes of + // simplifycfg will thread the block. + if (BlockIsSimpleEnoughToThreadThrough(BB)) { + pred_iterator PB = pred_begin(BB), PE = pred_end(BB); + PHINode *NewPN = PHINode::Create( + Type::getInt1Ty(BB->getContext()), std::distance(PB, PE), + BI->getCondition()->getName() + ".pr", &BB->front()); + // Okay, we're going to insert the PHI node. Since PBI is not the only + // predecessor, compute the PHI'd conditional value for all of the preds. + // Any predecessor where the condition is not computable we keep symbolic. + for (pred_iterator PI = PB; PI != PE; ++PI) { + BasicBlock *P = *PI; + if ((PBI = dyn_cast<BranchInst>(P->getTerminator())) && PBI != BI && + PBI->isConditional() && PBI->getCondition() == BI->getCondition() && + PBI->getSuccessor(0) != PBI->getSuccessor(1)) { + bool CondIsTrue = PBI->getSuccessor(0) == BB; + NewPN->addIncoming( + ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue), + P); + } else { + NewPN->addIncoming(BI->getCondition(), P); + } + } + + BI->setCondition(NewPN); + return true; + } + } + + if (auto *CE = dyn_cast<ConstantExpr>(BI->getCondition())) + if (CE->canTrap()) + return false; + + // If both branches are conditional and both contain stores to the same + // address, remove the stores from the conditionals and create a conditional + // merged store at the end. + if (MergeCondStores && mergeConditionalStores(PBI, BI)) + return true; + + // If this is a conditional branch in an empty block, and if any + // predecessors are a conditional branch to one of our destinations, + // fold the conditions into logical ops and one cond br. + BasicBlock::iterator BBI = BB->begin(); + // Ignore dbg intrinsics. + while (isa<DbgInfoIntrinsic>(BBI)) + ++BBI; + if (&*BBI != BI) + return false; + + int PBIOp, BIOp; + if (PBI->getSuccessor(0) == BI->getSuccessor(0)) { + PBIOp = 0; + BIOp = 0; + } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) { + PBIOp = 0; + BIOp = 1; + } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) { + PBIOp = 1; + BIOp = 0; + } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) { + PBIOp = 1; + BIOp = 1; + } else { + return false; + } + + // Check to make sure that the other destination of this branch + // isn't BB itself. If so, this is an infinite loop that will + // keep getting unwound. + if (PBI->getSuccessor(PBIOp) == BB) + return false; + + // Do not perform this transformation if it would require + // insertion of a large number of select instructions. For targets + // without predication/cmovs, this is a big pessimization. + + // Also do not perform this transformation if any phi node in the common + // destination block can trap when reached by BB or PBB (PR17073). In that + // case, it would be unsafe to hoist the operation into a select instruction. + + BasicBlock *CommonDest = PBI->getSuccessor(PBIOp); + unsigned NumPhis = 0; + for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II); + ++II, ++NumPhis) { + if (NumPhis > 2) // Disable this xform. + return false; + + PHINode *PN = cast<PHINode>(II); + Value *BIV = PN->getIncomingValueForBlock(BB); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BIV)) + if (CE->canTrap()) + return false; + + unsigned PBBIdx = PN->getBasicBlockIndex(PBI->getParent()); + Value *PBIV = PN->getIncomingValue(PBBIdx); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(PBIV)) + if (CE->canTrap()) + return false; + } + + // Finally, if everything is ok, fold the branches to logical ops. + BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1); + + DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent() + << "AND: " << *BI->getParent()); + + // If OtherDest *is* BB, then BB is a basic block with a single conditional + // branch in it, where one edge (OtherDest) goes back to itself but the other + // exits. We don't *know* that the program avoids the infinite loop + // (even though that seems likely). If we do this xform naively, we'll end up + // recursively unpeeling the loop. Since we know that (after the xform is + // done) that the block *is* infinite if reached, we just make it an obviously + // infinite loop with no cond branch. + if (OtherDest == BB) { + // Insert it at the end of the function, because it's either code, + // or it won't matter if it's hot. :) + BasicBlock *InfLoopBlock = + BasicBlock::Create(BB->getContext(), "infloop", BB->getParent()); + BranchInst::Create(InfLoopBlock, InfLoopBlock); + OtherDest = InfLoopBlock; + } + + DEBUG(dbgs() << *PBI->getParent()->getParent()); + + // BI may have other predecessors. Because of this, we leave + // it alone, but modify PBI. + + // Make sure we get to CommonDest on True&True directions. + Value *PBICond = PBI->getCondition(); + IRBuilder<NoFolder> Builder(PBI); + if (PBIOp) + PBICond = Builder.CreateNot(PBICond, PBICond->getName() + ".not"); + + Value *BICond = BI->getCondition(); + if (BIOp) + BICond = Builder.CreateNot(BICond, BICond->getName() + ".not"); + + // Merge the conditions. + Value *Cond = Builder.CreateOr(PBICond, BICond, "brmerge"); + + // Modify PBI to branch on the new condition to the new dests. + PBI->setCondition(Cond); + PBI->setSuccessor(0, CommonDest); + PBI->setSuccessor(1, OtherDest); + + // Update branch weight for PBI. + uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight; + uint64_t PredCommon, PredOther, SuccCommon, SuccOther; + bool HasWeights = + extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight, + SuccTrueWeight, SuccFalseWeight); + if (HasWeights) { + PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight; + PredOther = PBIOp ? PredTrueWeight : PredFalseWeight; + SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight; + SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight; + // The weight to CommonDest should be PredCommon * SuccTotal + + // PredOther * SuccCommon. + // The weight to OtherDest should be PredOther * SuccOther. + uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) + + PredOther * SuccCommon, + PredOther * SuccOther}; + // Halve the weights if any of them cannot fit in an uint32_t + FitWeights(NewWeights); + + PBI->setMetadata(LLVMContext::MD_prof, + MDBuilder(BI->getContext()) + .createBranchWeights(NewWeights[0], NewWeights[1])); + } + + // OtherDest may have phi nodes. If so, add an entry from PBI's + // block that are identical to the entries for BI's block. + AddPredecessorToBlock(OtherDest, PBI->getParent(), BB); + + // We know that the CommonDest already had an edge from PBI to + // it. If it has PHIs though, the PHIs may have different + // entries for BB and PBI's BB. If so, insert a select to make + // them agree. + PHINode *PN; + for (BasicBlock::iterator II = CommonDest->begin(); + (PN = dyn_cast<PHINode>(II)); ++II) { + Value *BIV = PN->getIncomingValueForBlock(BB); + unsigned PBBIdx = PN->getBasicBlockIndex(PBI->getParent()); + Value *PBIV = PN->getIncomingValue(PBBIdx); + if (BIV != PBIV) { + // Insert a select in PBI to pick the right value. + SelectInst *NV = cast<SelectInst>( + Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux")); + PN->setIncomingValue(PBBIdx, NV); + // Although the select has the same condition as PBI, the original branch + // weights for PBI do not apply to the new select because the select's + // 'logical' edges are incoming edges of the phi that is eliminated, not + // the outgoing edges of PBI. + if (HasWeights) { + uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight; + uint64_t PredOther = PBIOp ? PredTrueWeight : PredFalseWeight; + uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight; + uint64_t SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight; + // The weight to PredCommonDest should be PredCommon * SuccTotal. + // The weight to PredOtherDest should be PredOther * SuccCommon. + uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther), + PredOther * SuccCommon}; + + FitWeights(NewWeights); + + NV->setMetadata(LLVMContext::MD_prof, + MDBuilder(BI->getContext()) + .createBranchWeights(NewWeights[0], NewWeights[1])); + } + } + } + + DEBUG(dbgs() << "INTO: " << *PBI->getParent()); + DEBUG(dbgs() << *PBI->getParent()->getParent()); + + // This basic block is probably dead. We know it has at least + // one fewer predecessor. + return true; +} + +// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is +// true or to FalseBB if Cond is false. +// Takes care of updating the successors and removing the old terminator. +// Also makes sure not to introduce new successors by assuming that edges to +// non-successor TrueBBs and FalseBBs aren't reachable. +static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond, + BasicBlock *TrueBB, BasicBlock *FalseBB, + uint32_t TrueWeight, + uint32_t FalseWeight) { + // Remove any superfluous successor edges from the CFG. + // First, figure out which successors to preserve. + // If TrueBB and FalseBB are equal, only try to preserve one copy of that + // successor. + BasicBlock *KeepEdge1 = TrueBB; + BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr; + + // Then remove the rest. + for (BasicBlock *Succ : OldTerm->successors()) { + // Make sure only to keep exactly one copy of each edge. + if (Succ == KeepEdge1) + KeepEdge1 = nullptr; + else if (Succ == KeepEdge2) + KeepEdge2 = nullptr; + else + Succ->removePredecessor(OldTerm->getParent(), + /*DontDeleteUselessPHIs=*/true); + } + + IRBuilder<> Builder(OldTerm); + Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc()); + + // Insert an appropriate new terminator. + if (!KeepEdge1 && !KeepEdge2) { + if (TrueBB == FalseBB) + // We were only looking for one successor, and it was present. + // Create an unconditional branch to it. + Builder.CreateBr(TrueBB); + else { + // We found both of the successors we were looking for. + // Create a conditional branch sharing the condition of the select. + BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB); + if (TrueWeight != FalseWeight) + NewBI->setMetadata(LLVMContext::MD_prof, + MDBuilder(OldTerm->getContext()) + .createBranchWeights(TrueWeight, FalseWeight)); + } + } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) { + // Neither of the selected blocks were successors, so this + // terminator must be unreachable. + new UnreachableInst(OldTerm->getContext(), OldTerm); + } else { + // One of the selected values was a successor, but the other wasn't. + // Insert an unconditional branch to the one that was found; + // the edge to the one that wasn't must be unreachable. + if (!KeepEdge1) + // Only TrueBB was found. + Builder.CreateBr(TrueBB); + else + // Only FalseBB was found. + Builder.CreateBr(FalseBB); + } + + EraseTerminatorInstAndDCECond(OldTerm); + return true; +} + +// Replaces +// (switch (select cond, X, Y)) on constant X, Y +// with a branch - conditional if X and Y lead to distinct BBs, +// unconditional otherwise. +static bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) { + // Check for constant integer values in the select. + ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue()); + ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue()); + if (!TrueVal || !FalseVal) + return false; + + // Find the relevant condition and destinations. + Value *Condition = Select->getCondition(); + BasicBlock *TrueBB = SI->findCaseValue(TrueVal).getCaseSuccessor(); + BasicBlock *FalseBB = SI->findCaseValue(FalseVal).getCaseSuccessor(); + + // Get weight for TrueBB and FalseBB. + uint32_t TrueWeight = 0, FalseWeight = 0; + SmallVector<uint64_t, 8> Weights; + bool HasWeights = HasBranchWeights(SI); + if (HasWeights) { + GetBranchWeights(SI, Weights); + if (Weights.size() == 1 + SI->getNumCases()) { + TrueWeight = + (uint32_t)Weights[SI->findCaseValue(TrueVal).getSuccessorIndex()]; + FalseWeight = + (uint32_t)Weights[SI->findCaseValue(FalseVal).getSuccessorIndex()]; + } + } + + // Perform the actual simplification. + return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, TrueWeight, + FalseWeight); +} + +// Replaces +// (indirectbr (select cond, blockaddress(@fn, BlockA), +// blockaddress(@fn, BlockB))) +// with +// (br cond, BlockA, BlockB). +static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) { + // Check that both operands of the select are block addresses. + BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue()); + BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue()); + if (!TBA || !FBA) + return false; + + // Extract the actual blocks. + BasicBlock *TrueBB = TBA->getBasicBlock(); + BasicBlock *FalseBB = FBA->getBasicBlock(); + + // Perform the actual simplification. + return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB, 0, + 0); +} + +/// This is called when we find an icmp instruction +/// (a seteq/setne with a constant) as the only instruction in a +/// block that ends with an uncond branch. We are looking for a very specific +/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In +/// this case, we merge the first two "or's of icmp" into a switch, but then the +/// default value goes to an uncond block with a seteq in it, we get something +/// like: +/// +/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ] +/// DEFAULT: +/// %tmp = icmp eq i8 %A, 92 +/// br label %end +/// end: +/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ] +/// +/// We prefer to split the edge to 'end' so that there is a true/false entry to +/// the PHI, merging the third icmp into the switch. +static bool TryToSimplifyUncondBranchWithICmpInIt( + ICmpInst *ICI, IRBuilder<> &Builder, const DataLayout &DL, + const TargetTransformInfo &TTI, unsigned BonusInstThreshold, + AssumptionCache *AC) { + BasicBlock *BB = ICI->getParent(); + + // If the block has any PHIs in it or the icmp has multiple uses, it is too + // complex. + if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse()) + return false; + + Value *V = ICI->getOperand(0); + ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1)); + + // The pattern we're looking for is where our only predecessor is a switch on + // 'V' and this block is the default case for the switch. In this case we can + // fold the compared value into the switch to simplify things. + BasicBlock *Pred = BB->getSinglePredecessor(); + if (!Pred || !isa<SwitchInst>(Pred->getTerminator())) + return false; + + SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator()); + if (SI->getCondition() != V) + return false; + + // If BB is reachable on a non-default case, then we simply know the value of + // V in this block. Substitute it and constant fold the icmp instruction + // away. + if (SI->getDefaultDest() != BB) { + ConstantInt *VVal = SI->findCaseDest(BB); + assert(VVal && "Should have a unique destination value"); + ICI->setOperand(0, VVal); + + if (Value *V = SimplifyInstruction(ICI, DL)) { + ICI->replaceAllUsesWith(V); + ICI->eraseFromParent(); + } + // BB is now empty, so it is likely to simplify away. + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + } + + // Ok, the block is reachable from the default dest. If the constant we're + // comparing exists in one of the other edges, then we can constant fold ICI + // and zap it. + if (SI->findCaseValue(Cst) != SI->case_default()) { + Value *V; + if (ICI->getPredicate() == ICmpInst::ICMP_EQ) + V = ConstantInt::getFalse(BB->getContext()); + else + V = ConstantInt::getTrue(BB->getContext()); + + ICI->replaceAllUsesWith(V); + ICI->eraseFromParent(); + // BB is now empty, so it is likely to simplify away. + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + } + + // The use of the icmp has to be in the 'end' block, by the only PHI node in + // the block. + BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0); + PHINode *PHIUse = dyn_cast<PHINode>(ICI->user_back()); + if (PHIUse == nullptr || PHIUse != &SuccBlock->front() || + isa<PHINode>(++BasicBlock::iterator(PHIUse))) + return false; + + // If the icmp is a SETEQ, then the default dest gets false, the new edge gets + // true in the PHI. + Constant *DefaultCst = ConstantInt::getTrue(BB->getContext()); + Constant *NewCst = ConstantInt::getFalse(BB->getContext()); + + if (ICI->getPredicate() == ICmpInst::ICMP_EQ) + std::swap(DefaultCst, NewCst); + + // Replace ICI (which is used by the PHI for the default value) with true or + // false depending on if it is EQ or NE. + ICI->replaceAllUsesWith(DefaultCst); + ICI->eraseFromParent(); + + // Okay, the switch goes to this block on a default value. Add an edge from + // the switch to the merge point on the compared value. + BasicBlock *NewBB = + BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB); + SmallVector<uint64_t, 8> Weights; + bool HasWeights = HasBranchWeights(SI); + if (HasWeights) { + GetBranchWeights(SI, Weights); + if (Weights.size() == 1 + SI->getNumCases()) { + // Split weight for default case to case for "Cst". + Weights[0] = (Weights[0] + 1) >> 1; + Weights.push_back(Weights[0]); + + SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end()); + SI->setMetadata( + LLVMContext::MD_prof, + MDBuilder(SI->getContext()).createBranchWeights(MDWeights)); + } + } + SI->addCase(Cst, NewBB); + + // NewBB branches to the phi block, add the uncond branch and the phi entry. + Builder.SetInsertPoint(NewBB); + Builder.SetCurrentDebugLocation(SI->getDebugLoc()); + Builder.CreateBr(SuccBlock); + PHIUse->addIncoming(NewCst, NewBB); + return true; +} + +/// The specified branch is a conditional branch. +/// Check to see if it is branching on an or/and chain of icmp instructions, and +/// fold it into a switch instruction if so. +static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder, + const DataLayout &DL) { + Instruction *Cond = dyn_cast<Instruction>(BI->getCondition()); + if (!Cond) + return false; + + // Change br (X == 0 | X == 1), T, F into a switch instruction. + // If this is a bunch of seteq's or'd together, or if it's a bunch of + // 'setne's and'ed together, collect them. + + // Try to gather values from a chain of and/or to be turned into a switch + ConstantComparesGatherer ConstantCompare(Cond, DL); + // Unpack the result + SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals; + Value *CompVal = ConstantCompare.CompValue; + unsigned UsedICmps = ConstantCompare.UsedICmps; + Value *ExtraCase = ConstantCompare.Extra; + + // If we didn't have a multiply compared value, fail. + if (!CompVal) + return false; + + // Avoid turning single icmps into a switch. + if (UsedICmps <= 1) + return false; + + bool TrueWhenEqual = (Cond->getOpcode() == Instruction::Or); + + // There might be duplicate constants in the list, which the switch + // instruction can't handle, remove them now. + array_pod_sort(Values.begin(), Values.end(), ConstantIntSortPredicate); + Values.erase(std::unique(Values.begin(), Values.end()), Values.end()); + + // If Extra was used, we require at least two switch values to do the + // transformation. A switch with one value is just a conditional branch. + if (ExtraCase && Values.size() < 2) + return false; + + // TODO: Preserve branch weight metadata, similarly to how + // FoldValueComparisonIntoPredecessors preserves it. + + // Figure out which block is which destination. + BasicBlock *DefaultBB = BI->getSuccessor(1); + BasicBlock *EdgeBB = BI->getSuccessor(0); + if (!TrueWhenEqual) + std::swap(DefaultBB, EdgeBB); + + BasicBlock *BB = BI->getParent(); + + DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size() + << " cases into SWITCH. BB is:\n" + << *BB); + + // If there are any extra values that couldn't be folded into the switch + // then we evaluate them with an explicit branch first. Split the block + // right before the condbr to handle it. + if (ExtraCase) { + BasicBlock *NewBB = + BB->splitBasicBlock(BI->getIterator(), "switch.early.test"); + // Remove the uncond branch added to the old block. + TerminatorInst *OldTI = BB->getTerminator(); + Builder.SetInsertPoint(OldTI); + + if (TrueWhenEqual) + Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB); + else + Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB); + + OldTI->eraseFromParent(); + + // If there are PHI nodes in EdgeBB, then we need to add a new entry to them + // for the edge we just added. + AddPredecessorToBlock(EdgeBB, BB, NewBB); + + DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase + << "\nEXTRABB = " << *BB); + BB = NewBB; + } + + Builder.SetInsertPoint(BI); + // Convert pointer to int before we switch. + if (CompVal->getType()->isPointerTy()) { + CompVal = Builder.CreatePtrToInt( + CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr"); + } + + // Create the new switch instruction now. + SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size()); + + // Add all of the 'cases' to the switch instruction. + for (unsigned i = 0, e = Values.size(); i != e; ++i) + New->addCase(Values[i], EdgeBB); + + // We added edges from PI to the EdgeBB. As such, if there were any + // PHI nodes in EdgeBB, they need entries to be added corresponding to + // the number of edges added. + for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(BBI); ++BBI) { + PHINode *PN = cast<PHINode>(BBI); + Value *InVal = PN->getIncomingValueForBlock(BB); + for (unsigned i = 0, e = Values.size() - 1; i != e; ++i) + PN->addIncoming(InVal, BB); + } + + // Erase the old branch instruction. + EraseTerminatorInstAndDCECond(BI); + + DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n'); + return true; +} + +bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) { + if (isa<PHINode>(RI->getValue())) + return SimplifyCommonResume(RI); + else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHI()) && + RI->getValue() == RI->getParent()->getFirstNonPHI()) + // The resume must unwind the exception that caused control to branch here. + return SimplifySingleResume(RI); + + return false; +} + +// Simplify resume that is shared by several landing pads (phi of landing pad). +bool SimplifyCFGOpt::SimplifyCommonResume(ResumeInst *RI) { + BasicBlock *BB = RI->getParent(); + + // Check that there are no other instructions except for debug intrinsics + // between the phi of landing pads (RI->getValue()) and resume instruction. + BasicBlock::iterator I = cast<Instruction>(RI->getValue())->getIterator(), + E = RI->getIterator(); + while (++I != E) + if (!isa<DbgInfoIntrinsic>(I)) + return false; + + SmallSet<BasicBlock *, 4> TrivialUnwindBlocks; + auto *PhiLPInst = cast<PHINode>(RI->getValue()); + + // Check incoming blocks to see if any of them are trivial. + for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End; + Idx++) { + auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx); + auto *IncomingValue = PhiLPInst->getIncomingValue(Idx); + + // If the block has other successors, we can not delete it because + // it has other dependents. + if (IncomingBB->getUniqueSuccessor() != BB) + continue; + + auto *LandingPad = dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHI()); + // Not the landing pad that caused the control to branch here. + if (IncomingValue != LandingPad) + continue; + + bool isTrivial = true; + + I = IncomingBB->getFirstNonPHI()->getIterator(); + E = IncomingBB->getTerminator()->getIterator(); + while (++I != E) + if (!isa<DbgInfoIntrinsic>(I)) { + isTrivial = false; + break; + } + + if (isTrivial) + TrivialUnwindBlocks.insert(IncomingBB); + } + + // If no trivial unwind blocks, don't do any simplifications. + if (TrivialUnwindBlocks.empty()) + return false; + + // Turn all invokes that unwind here into calls. + for (auto *TrivialBB : TrivialUnwindBlocks) { + // Blocks that will be simplified should be removed from the phi node. + // Note there could be multiple edges to the resume block, and we need + // to remove them all. + while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1) + BB->removePredecessor(TrivialBB, true); + + for (pred_iterator PI = pred_begin(TrivialBB), PE = pred_end(TrivialBB); + PI != PE;) { + BasicBlock *Pred = *PI++; + removeUnwindEdge(Pred); + } + + // In each SimplifyCFG run, only the current processed block can be erased. + // Otherwise, it will break the iteration of SimplifyCFG pass. So instead + // of erasing TrivialBB, we only remove the branch to the common resume + // block so that we can later erase the resume block since it has no + // predecessors. + TrivialBB->getTerminator()->eraseFromParent(); + new UnreachableInst(RI->getContext(), TrivialBB); + } + + // Delete the resume block if all its predecessors have been removed. + if (pred_empty(BB)) + BB->eraseFromParent(); + + return !TrivialUnwindBlocks.empty(); +} + +// Simplify resume that is only used by a single (non-phi) landing pad. +bool SimplifyCFGOpt::SimplifySingleResume(ResumeInst *RI) { + BasicBlock *BB = RI->getParent(); + LandingPadInst *LPInst = dyn_cast<LandingPadInst>(BB->getFirstNonPHI()); + assert(RI->getValue() == LPInst && + "Resume must unwind the exception that caused control to here"); + + // Check that there are no other instructions except for debug intrinsics. + BasicBlock::iterator I = LPInst->getIterator(), E = RI->getIterator(); + while (++I != E) + if (!isa<DbgInfoIntrinsic>(I)) + return false; + + // Turn all invokes that unwind here into calls and delete the basic block. + for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) { + BasicBlock *Pred = *PI++; + removeUnwindEdge(Pred); + } + + // The landingpad is now unreachable. Zap it. + BB->eraseFromParent(); + if (LoopHeaders) + LoopHeaders->erase(BB); + return true; +} + +static bool removeEmptyCleanup(CleanupReturnInst *RI) { + // If this is a trivial cleanup pad that executes no instructions, it can be + // eliminated. If the cleanup pad continues to the caller, any predecessor + // that is an EH pad will be updated to continue to the caller and any + // predecessor that terminates with an invoke instruction will have its invoke + // instruction converted to a call instruction. If the cleanup pad being + // simplified does not continue to the caller, each predecessor will be + // updated to continue to the unwind destination of the cleanup pad being + // simplified. + BasicBlock *BB = RI->getParent(); + CleanupPadInst *CPInst = RI->getCleanupPad(); + if (CPInst->getParent() != BB) + // This isn't an empty cleanup. + return false; + + // We cannot kill the pad if it has multiple uses. This typically arises + // from unreachable basic blocks. + if (!CPInst->hasOneUse()) + return false; + + // Check that there are no other instructions except for benign intrinsics. + BasicBlock::iterator I = CPInst->getIterator(), E = RI->getIterator(); + while (++I != E) { + auto *II = dyn_cast<IntrinsicInst>(I); + if (!II) + return false; + + Intrinsic::ID IntrinsicID = II->getIntrinsicID(); + switch (IntrinsicID) { + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + case Intrinsic::lifetime_end: + break; + default: + return false; + } + } + + // If the cleanup return we are simplifying unwinds to the caller, this will + // set UnwindDest to nullptr. + BasicBlock *UnwindDest = RI->getUnwindDest(); + Instruction *DestEHPad = UnwindDest ? UnwindDest->getFirstNonPHI() : nullptr; + + // We're about to remove BB from the control flow. Before we do, sink any + // PHINodes into the unwind destination. Doing this before changing the + // control flow avoids some potentially slow checks, since we can currently + // be certain that UnwindDest and BB have no common predecessors (since they + // are both EH pads). + if (UnwindDest) { + // First, go through the PHI nodes in UnwindDest and update any nodes that + // reference the block we are removing + for (BasicBlock::iterator I = UnwindDest->begin(), + IE = DestEHPad->getIterator(); + I != IE; ++I) { + PHINode *DestPN = cast<PHINode>(I); + + int Idx = DestPN->getBasicBlockIndex(BB); + // Since BB unwinds to UnwindDest, it has to be in the PHI node. + assert(Idx != -1); + // This PHI node has an incoming value that corresponds to a control + // path through the cleanup pad we are removing. If the incoming + // value is in the cleanup pad, it must be a PHINode (because we + // verified above that the block is otherwise empty). Otherwise, the + // value is either a constant or a value that dominates the cleanup + // pad being removed. + // + // Because BB and UnwindDest are both EH pads, all of their + // predecessors must unwind to these blocks, and since no instruction + // can have multiple unwind destinations, there will be no overlap in + // incoming blocks between SrcPN and DestPN. + Value *SrcVal = DestPN->getIncomingValue(Idx); + PHINode *SrcPN = dyn_cast<PHINode>(SrcVal); + + // Remove the entry for the block we are deleting. + DestPN->removeIncomingValue(Idx, false); + + if (SrcPN && SrcPN->getParent() == BB) { + // If the incoming value was a PHI node in the cleanup pad we are + // removing, we need to merge that PHI node's incoming values into + // DestPN. + for (unsigned SrcIdx = 0, SrcE = SrcPN->getNumIncomingValues(); + SrcIdx != SrcE; ++SrcIdx) { + DestPN->addIncoming(SrcPN->getIncomingValue(SrcIdx), + SrcPN->getIncomingBlock(SrcIdx)); + } + } else { + // Otherwise, the incoming value came from above BB and + // so we can just reuse it. We must associate all of BB's + // predecessors with this value. + for (auto *pred : predecessors(BB)) { + DestPN->addIncoming(SrcVal, pred); + } + } + } + + // Sink any remaining PHI nodes directly into UnwindDest. + Instruction *InsertPt = DestEHPad; + for (BasicBlock::iterator I = BB->begin(), + IE = BB->getFirstNonPHI()->getIterator(); + I != IE;) { + // The iterator must be incremented here because the instructions are + // being moved to another block. + PHINode *PN = cast<PHINode>(I++); + if (PN->use_empty()) + // If the PHI node has no uses, just leave it. It will be erased + // when we erase BB below. + continue; + + // Otherwise, sink this PHI node into UnwindDest. + // Any predecessors to UnwindDest which are not already represented + // must be back edges which inherit the value from the path through + // BB. In this case, the PHI value must reference itself. + for (auto *pred : predecessors(UnwindDest)) + if (pred != BB) + PN->addIncoming(PN, pred); + PN->moveBefore(InsertPt); + } + } + + for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) { + // The iterator must be updated here because we are removing this pred. + BasicBlock *PredBB = *PI++; + if (UnwindDest == nullptr) { + removeUnwindEdge(PredBB); + } else { + TerminatorInst *TI = PredBB->getTerminator(); + TI->replaceUsesOfWith(BB, UnwindDest); + } + } + + // The cleanup pad is now unreachable. Zap it. + BB->eraseFromParent(); + return true; +} + +// Try to merge two cleanuppads together. +static bool mergeCleanupPad(CleanupReturnInst *RI) { + // Skip any cleanuprets which unwind to caller, there is nothing to merge + // with. + BasicBlock *UnwindDest = RI->getUnwindDest(); + if (!UnwindDest) + return false; + + // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't + // be safe to merge without code duplication. + if (UnwindDest->getSinglePredecessor() != RI->getParent()) + return false; + + // Verify that our cleanuppad's unwind destination is another cleanuppad. + auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(&UnwindDest->front()); + if (!SuccessorCleanupPad) + return false; + + CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad(); + // Replace any uses of the successor cleanupad with the predecessor pad + // The only cleanuppad uses should be this cleanupret, it's cleanupret and + // funclet bundle operands. + SuccessorCleanupPad->replaceAllUsesWith(PredecessorCleanupPad); + // Remove the old cleanuppad. + SuccessorCleanupPad->eraseFromParent(); + // Now, we simply replace the cleanupret with a branch to the unwind + // destination. + BranchInst::Create(UnwindDest, RI->getParent()); + RI->eraseFromParent(); + + return true; +} + +bool SimplifyCFGOpt::SimplifyCleanupReturn(CleanupReturnInst *RI) { + // It is possible to transiantly have an undef cleanuppad operand because we + // have deleted some, but not all, dead blocks. + // Eventually, this block will be deleted. + if (isa<UndefValue>(RI->getOperand(0))) + return false; + + if (mergeCleanupPad(RI)) + return true; + + if (removeEmptyCleanup(RI)) + return true; + + return false; +} + +bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { + BasicBlock *BB = RI->getParent(); + if (!BB->getFirstNonPHIOrDbg()->isTerminator()) + return false; + + // Find predecessors that end with branches. + SmallVector<BasicBlock *, 8> UncondBranchPreds; + SmallVector<BranchInst *, 8> CondBranchPreds; + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + BasicBlock *P = *PI; + TerminatorInst *PTI = P->getTerminator(); + if (BranchInst *BI = dyn_cast<BranchInst>(PTI)) { + if (BI->isUnconditional()) + UncondBranchPreds.push_back(P); + else + CondBranchPreds.push_back(BI); + } + } + + // If we found some, do the transformation! + if (!UncondBranchPreds.empty() && DupRet) { + while (!UncondBranchPreds.empty()) { + BasicBlock *Pred = UncondBranchPreds.pop_back_val(); + DEBUG(dbgs() << "FOLDING: " << *BB + << "INTO UNCOND BRANCH PRED: " << *Pred); + (void)FoldReturnIntoUncondBranch(RI, BB, Pred); + } + + // If we eliminated all predecessors of the block, delete the block now. + if (pred_empty(BB)) { + // We know there are no successors, so just nuke the block. + BB->eraseFromParent(); + if (LoopHeaders) + LoopHeaders->erase(BB); + } + + return true; + } + + // Check out all of the conditional branches going to this return + // instruction. If any of them just select between returns, change the + // branch itself into a select/return pair. + while (!CondBranchPreds.empty()) { + BranchInst *BI = CondBranchPreds.pop_back_val(); + + // Check to see if the non-BB successor is also a return block. + if (isa<ReturnInst>(BI->getSuccessor(0)->getTerminator()) && + isa<ReturnInst>(BI->getSuccessor(1)->getTerminator()) && + SimplifyCondBranchToTwoReturns(BI, Builder)) + return true; + } + return false; +} + +bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { + BasicBlock *BB = UI->getParent(); + + bool Changed = false; + + // If there are any instructions immediately before the unreachable that can + // be removed, do so. + while (UI->getIterator() != BB->begin()) { + BasicBlock::iterator BBI = UI->getIterator(); + --BBI; + // Do not delete instructions that can have side effects which might cause + // the unreachable to not be reachable; specifically, calls and volatile + // operations may have this effect. + if (isa<CallInst>(BBI) && !isa<DbgInfoIntrinsic>(BBI)) + break; + + if (BBI->mayHaveSideEffects()) { + if (auto *SI = dyn_cast<StoreInst>(BBI)) { + if (SI->isVolatile()) + break; + } else if (auto *LI = dyn_cast<LoadInst>(BBI)) { + if (LI->isVolatile()) + break; + } else if (auto *RMWI = dyn_cast<AtomicRMWInst>(BBI)) { + if (RMWI->isVolatile()) + break; + } else if (auto *CXI = dyn_cast<AtomicCmpXchgInst>(BBI)) { + if (CXI->isVolatile()) + break; + } else if (isa<CatchPadInst>(BBI)) { + // A catchpad may invoke exception object constructors and such, which + // in some languages can be arbitrary code, so be conservative by + // default. + // For CoreCLR, it just involves a type test, so can be removed. + if (classifyEHPersonality(BB->getParent()->getPersonalityFn()) != + EHPersonality::CoreCLR) + break; + } else if (!isa<FenceInst>(BBI) && !isa<VAArgInst>(BBI) && + !isa<LandingPadInst>(BBI)) { + break; + } + // Note that deleting LandingPad's here is in fact okay, although it + // involves a bit of subtle reasoning. If this inst is a LandingPad, + // all the predecessors of this block will be the unwind edges of Invokes, + // and we can therefore guarantee this block will be erased. + } + + // Delete this instruction (any uses are guaranteed to be dead) + if (!BBI->use_empty()) + BBI->replaceAllUsesWith(UndefValue::get(BBI->getType())); + BBI->eraseFromParent(); + Changed = true; + } + + // If the unreachable instruction is the first in the block, take a gander + // at all of the predecessors of this instruction, and simplify them. + if (&BB->front() != UI) + return Changed; + + SmallVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB)); + for (unsigned i = 0, e = Preds.size(); i != e; ++i) { + TerminatorInst *TI = Preds[i]->getTerminator(); + IRBuilder<> Builder(TI); + if (auto *BI = dyn_cast<BranchInst>(TI)) { + if (BI->isUnconditional()) { + if (BI->getSuccessor(0) == BB) { + new UnreachableInst(TI->getContext(), TI); + TI->eraseFromParent(); + Changed = true; + } + } else { + if (BI->getSuccessor(0) == BB) { + Builder.CreateBr(BI->getSuccessor(1)); + EraseTerminatorInstAndDCECond(BI); + } else if (BI->getSuccessor(1) == BB) { + Builder.CreateBr(BI->getSuccessor(0)); + EraseTerminatorInstAndDCECond(BI); + Changed = true; + } + } + } else if (auto *SI = dyn_cast<SwitchInst>(TI)) { + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; + ++i) + if (i.getCaseSuccessor() == BB) { + BB->removePredecessor(SI->getParent()); + SI->removeCase(i); + --i; + --e; + Changed = true; + } + } else if (auto *II = dyn_cast<InvokeInst>(TI)) { + if (II->getUnwindDest() == BB) { + removeUnwindEdge(TI->getParent()); + Changed = true; + } + } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) { + if (CSI->getUnwindDest() == BB) { + removeUnwindEdge(TI->getParent()); + Changed = true; + continue; + } + + for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(), + E = CSI->handler_end(); + I != E; ++I) { + if (*I == BB) { + CSI->removeHandler(I); + --I; + --E; + Changed = true; + } + } + if (CSI->getNumHandlers() == 0) { + BasicBlock *CatchSwitchBB = CSI->getParent(); + if (CSI->hasUnwindDest()) { + // Redirect preds to the unwind dest + CatchSwitchBB->replaceAllUsesWith(CSI->getUnwindDest()); + } else { + // Rewrite all preds to unwind to caller (or from invoke to call). + SmallVector<BasicBlock *, 8> EHPreds(predecessors(CatchSwitchBB)); + for (BasicBlock *EHPred : EHPreds) + removeUnwindEdge(EHPred); + } + // The catchswitch is no longer reachable. + new UnreachableInst(CSI->getContext(), CSI); + CSI->eraseFromParent(); + Changed = true; + } + } else if (isa<CleanupReturnInst>(TI)) { + new UnreachableInst(TI->getContext(), TI); + TI->eraseFromParent(); + Changed = true; + } + } + + // If this block is now dead, remove it. + if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) { + // We know there are no successors, so just nuke the block. + BB->eraseFromParent(); + if (LoopHeaders) + LoopHeaders->erase(BB); + return true; + } + + return Changed; +} + +static bool CasesAreContiguous(SmallVectorImpl<ConstantInt *> &Cases) { + assert(Cases.size() >= 1); + + array_pod_sort(Cases.begin(), Cases.end(), ConstantIntSortPredicate); + for (size_t I = 1, E = Cases.size(); I != E; ++I) { + if (Cases[I - 1]->getValue() != Cases[I]->getValue() + 1) + return false; + } + return true; +} + +/// Turn a switch with two reachable destinations into an integer range +/// comparison and branch. +static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { + assert(SI->getNumCases() > 1 && "Degenerate switch?"); + + bool HasDefault = + !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg()); + + // Partition the cases into two sets with different destinations. + BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr; + BasicBlock *DestB = nullptr; + SmallVector<ConstantInt *, 16> CasesA; + SmallVector<ConstantInt *, 16> CasesB; + + for (SwitchInst::CaseIt I : SI->cases()) { + BasicBlock *Dest = I.getCaseSuccessor(); + if (!DestA) + DestA = Dest; + if (Dest == DestA) { + CasesA.push_back(I.getCaseValue()); + continue; + } + if (!DestB) + DestB = Dest; + if (Dest == DestB) { + CasesB.push_back(I.getCaseValue()); + continue; + } + return false; // More than two destinations. + } + + assert(DestA && DestB && + "Single-destination switch should have been folded."); + assert(DestA != DestB); + assert(DestB != SI->getDefaultDest()); + assert(!CasesB.empty() && "There must be non-default cases."); + assert(!CasesA.empty() || HasDefault); + + // Figure out if one of the sets of cases form a contiguous range. + SmallVectorImpl<ConstantInt *> *ContiguousCases = nullptr; + BasicBlock *ContiguousDest = nullptr; + BasicBlock *OtherDest = nullptr; + if (!CasesA.empty() && CasesAreContiguous(CasesA)) { + ContiguousCases = &CasesA; + ContiguousDest = DestA; + OtherDest = DestB; + } else if (CasesAreContiguous(CasesB)) { + ContiguousCases = &CasesB; + ContiguousDest = DestB; + OtherDest = DestA; + } else + return false; + + // Start building the compare and branch. + + Constant *Offset = ConstantExpr::getNeg(ContiguousCases->back()); + Constant *NumCases = + ConstantInt::get(Offset->getType(), ContiguousCases->size()); + + Value *Sub = SI->getCondition(); + if (!Offset->isNullValue()) + Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off"); + + Value *Cmp; + // If NumCases overflowed, then all possible values jump to the successor. + if (NumCases->isNullValue() && !ContiguousCases->empty()) + Cmp = ConstantInt::getTrue(SI->getContext()); + else + Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch"); + BranchInst *NewBI = Builder.CreateCondBr(Cmp, ContiguousDest, OtherDest); + + // Update weight for the newly-created conditional branch. + if (HasBranchWeights(SI)) { + SmallVector<uint64_t, 8> Weights; + GetBranchWeights(SI, Weights); + if (Weights.size() == 1 + SI->getNumCases()) { + uint64_t TrueWeight = 0; + uint64_t FalseWeight = 0; + for (size_t I = 0, E = Weights.size(); I != E; ++I) { + if (SI->getSuccessor(I) == ContiguousDest) + TrueWeight += Weights[I]; + else + FalseWeight += Weights[I]; + } + while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) { + TrueWeight /= 2; + FalseWeight /= 2; + } + NewBI->setMetadata(LLVMContext::MD_prof, + MDBuilder(SI->getContext()) + .createBranchWeights((uint32_t)TrueWeight, + (uint32_t)FalseWeight)); + } + } + + // Prune obsolete incoming values off the successors' PHI nodes. + for (auto BBI = ContiguousDest->begin(); isa<PHINode>(BBI); ++BBI) { + unsigned PreviousEdges = ContiguousCases->size(); + if (ContiguousDest == SI->getDefaultDest()) + ++PreviousEdges; + for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I) + cast<PHINode>(BBI)->removeIncomingValue(SI->getParent()); + } + for (auto BBI = OtherDest->begin(); isa<PHINode>(BBI); ++BBI) { + unsigned PreviousEdges = SI->getNumCases() - ContiguousCases->size(); + if (OtherDest == SI->getDefaultDest()) + ++PreviousEdges; + for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I) + cast<PHINode>(BBI)->removeIncomingValue(SI->getParent()); + } + + // Drop the switch. + SI->eraseFromParent(); + + return true; +} + +/// Compute masked bits for the condition of a switch +/// and use it to remove dead cases. +static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, + const DataLayout &DL) { + Value *Cond = SI->getCondition(); + unsigned Bits = Cond->getType()->getIntegerBitWidth(); + APInt KnownZero(Bits, 0), KnownOne(Bits, 0); + computeKnownBits(Cond, KnownZero, KnownOne, DL, 0, AC, SI); + + // We can also eliminate cases by determining that their values are outside of + // the limited range of the condition based on how many significant (non-sign) + // bits are in the condition value. + unsigned ExtraSignBits = ComputeNumSignBits(Cond, DL, 0, AC, SI) - 1; + unsigned MaxSignificantBitsInCond = Bits - ExtraSignBits; + + // Gather dead cases. + SmallVector<ConstantInt *, 8> DeadCases; + for (auto &Case : SI->cases()) { + APInt CaseVal = Case.getCaseValue()->getValue(); + if ((CaseVal & KnownZero) != 0 || (CaseVal & KnownOne) != KnownOne || + (CaseVal.getMinSignedBits() > MaxSignificantBitsInCond)) { + DeadCases.push_back(Case.getCaseValue()); + DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal << " is dead.\n"); + } + } + + // If we can prove that the cases must cover all possible values, the + // default destination becomes dead and we can remove it. If we know some + // of the bits in the value, we can use that to more precisely compute the + // number of possible unique case values. + bool HasDefault = + !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg()); + const unsigned NumUnknownBits = + Bits - (KnownZero.Or(KnownOne)).countPopulation(); + assert(NumUnknownBits <= Bits); + if (HasDefault && DeadCases.empty() && + NumUnknownBits < 64 /* avoid overflow */ && + SI->getNumCases() == (1ULL << NumUnknownBits)) { + DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n"); + BasicBlock *NewDefault = + SplitBlockPredecessors(SI->getDefaultDest(), SI->getParent(), ""); + SI->setDefaultDest(&*NewDefault); + SplitBlock(&*NewDefault, &NewDefault->front()); + auto *OldTI = NewDefault->getTerminator(); + new UnreachableInst(SI->getContext(), OldTI); + EraseTerminatorInstAndDCECond(OldTI); + return true; + } + + SmallVector<uint64_t, 8> Weights; + bool HasWeight = HasBranchWeights(SI); + if (HasWeight) { + GetBranchWeights(SI, Weights); + HasWeight = (Weights.size() == 1 + SI->getNumCases()); + } + + // Remove dead cases from the switch. + for (ConstantInt *DeadCase : DeadCases) { + SwitchInst::CaseIt Case = SI->findCaseValue(DeadCase); + assert(Case != SI->case_default() && + "Case was not found. Probably mistake in DeadCases forming."); + if (HasWeight) { + std::swap(Weights[Case.getCaseIndex() + 1], Weights.back()); + Weights.pop_back(); + } + + // Prune unused values from PHI nodes. + Case.getCaseSuccessor()->removePredecessor(SI->getParent()); + SI->removeCase(Case); + } + if (HasWeight && Weights.size() >= 2) { + SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end()); + SI->setMetadata(LLVMContext::MD_prof, + MDBuilder(SI->getParent()->getContext()) + .createBranchWeights(MDWeights)); + } + + return !DeadCases.empty(); +} + +/// If BB would be eligible for simplification by +/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated +/// by an unconditional branch), look at the phi node for BB in the successor +/// block and see if the incoming value is equal to CaseValue. If so, return +/// the phi node, and set PhiIndex to BB's index in the phi node. +static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue, + BasicBlock *BB, int *PhiIndex) { + if (BB->getFirstNonPHIOrDbg() != BB->getTerminator()) + return nullptr; // BB must be empty to be a candidate for simplification. + if (!BB->getSinglePredecessor()) + return nullptr; // BB must be dominated by the switch. + + BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator()); + if (!Branch || !Branch->isUnconditional()) + return nullptr; // Terminator must be unconditional branch. + + BasicBlock *Succ = Branch->getSuccessor(0); + + BasicBlock::iterator I = Succ->begin(); + while (PHINode *PHI = dyn_cast<PHINode>(I++)) { + int Idx = PHI->getBasicBlockIndex(BB); + assert(Idx >= 0 && "PHI has no entry for predecessor?"); + + Value *InValue = PHI->getIncomingValue(Idx); + if (InValue != CaseValue) + continue; + + *PhiIndex = Idx; + return PHI; + } + + return nullptr; +} + +/// Try to forward the condition of a switch instruction to a phi node +/// dominated by the switch, if that would mean that some of the destination +/// blocks of the switch can be folded away. +/// Returns true if a change is made. +static bool ForwardSwitchConditionToPHI(SwitchInst *SI) { + typedef DenseMap<PHINode *, SmallVector<int, 4>> ForwardingNodesMap; + ForwardingNodesMap ForwardingNodes; + + for (SwitchInst::CaseIt I = SI->case_begin(), E = SI->case_end(); I != E; + ++I) { + ConstantInt *CaseValue = I.getCaseValue(); + BasicBlock *CaseDest = I.getCaseSuccessor(); + + int PhiIndex; + PHINode *PHI = + FindPHIForConditionForwarding(CaseValue, CaseDest, &PhiIndex); + if (!PHI) + continue; + + ForwardingNodes[PHI].push_back(PhiIndex); + } + + bool Changed = false; + + for (ForwardingNodesMap::iterator I = ForwardingNodes.begin(), + E = ForwardingNodes.end(); + I != E; ++I) { + PHINode *Phi = I->first; + SmallVectorImpl<int> &Indexes = I->second; + + if (Indexes.size() < 2) + continue; + + for (size_t I = 0, E = Indexes.size(); I != E; ++I) + Phi->setIncomingValue(Indexes[I], SI->getCondition()); + Changed = true; + } + + return Changed; +} + +/// Return true if the backend will be able to handle +/// initializing an array of constants like C. +static bool ValidLookupTableConstant(Constant *C, const TargetTransformInfo &TTI) { + if (C->isThreadDependent()) + return false; + if (C->isDLLImportDependent()) + return false; + + if (!isa<ConstantFP>(C) && !isa<ConstantInt>(C) && + !isa<ConstantPointerNull>(C) && !isa<GlobalValue>(C) && + !isa<UndefValue>(C) && !isa<ConstantExpr>(C)) + return false; + + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { + if (!CE->isGEPWithNoNotionalOverIndexing()) + return false; + if (!ValidLookupTableConstant(CE->getOperand(0), TTI)) + return false; + } + + if (!TTI.shouldBuildLookupTablesForConstant(C)) + return false; + + return true; +} + +/// If V is a Constant, return it. Otherwise, try to look up +/// its constant value in ConstantPool, returning 0 if it's not there. +static Constant * +LookupConstant(Value *V, + const SmallDenseMap<Value *, Constant *> &ConstantPool) { + if (Constant *C = dyn_cast<Constant>(V)) + return C; + return ConstantPool.lookup(V); +} + +/// Try to fold instruction I into a constant. This works for +/// simple instructions such as binary operations where both operands are +/// constant or can be replaced by constants from the ConstantPool. Returns the +/// resulting constant on success, 0 otherwise. +static Constant * +ConstantFold(Instruction *I, const DataLayout &DL, + const SmallDenseMap<Value *, Constant *> &ConstantPool) { + if (SelectInst *Select = dyn_cast<SelectInst>(I)) { + Constant *A = LookupConstant(Select->getCondition(), ConstantPool); + if (!A) + return nullptr; + if (A->isAllOnesValue()) + return LookupConstant(Select->getTrueValue(), ConstantPool); + if (A->isNullValue()) + return LookupConstant(Select->getFalseValue(), ConstantPool); + return nullptr; + } + + SmallVector<Constant *, 4> COps; + for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) { + if (Constant *A = LookupConstant(I->getOperand(N), ConstantPool)) + COps.push_back(A); + else + return nullptr; + } + + if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) { + return ConstantFoldCompareInstOperands(Cmp->getPredicate(), COps[0], + COps[1], DL); + } + + return ConstantFoldInstOperands(I, COps, DL); +} + +/// Try to determine the resulting constant values in phi nodes +/// at the common destination basic block, *CommonDest, for one of the case +/// destionations CaseDest corresponding to value CaseVal (0 for the default +/// case), of a switch instruction SI. +static bool +GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, + BasicBlock **CommonDest, + SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res, + const DataLayout &DL, const TargetTransformInfo &TTI) { + // The block from which we enter the common destination. + BasicBlock *Pred = SI->getParent(); + + // If CaseDest is empty except for some side-effect free instructions through + // which we can constant-propagate the CaseVal, continue to its successor. + SmallDenseMap<Value *, Constant *> ConstantPool; + ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal)); + for (BasicBlock::iterator I = CaseDest->begin(), E = CaseDest->end(); I != E; + ++I) { + if (TerminatorInst *T = dyn_cast<TerminatorInst>(I)) { + // If the terminator is a simple branch, continue to the next block. + if (T->getNumSuccessors() != 1 || T->isExceptional()) + return false; + Pred = CaseDest; + CaseDest = T->getSuccessor(0); + } else if (isa<DbgInfoIntrinsic>(I)) { + // Skip debug intrinsic. + continue; + } else if (Constant *C = ConstantFold(&*I, DL, ConstantPool)) { + // Instruction is side-effect free and constant. + + // If the instruction has uses outside this block or a phi node slot for + // the block, it is not safe to bypass the instruction since it would then + // no longer dominate all its uses. + for (auto &Use : I->uses()) { + User *User = Use.getUser(); + if (Instruction *I = dyn_cast<Instruction>(User)) + if (I->getParent() == CaseDest) + continue; + if (PHINode *Phi = dyn_cast<PHINode>(User)) + if (Phi->getIncomingBlock(Use) == CaseDest) + continue; + return false; + } + + ConstantPool.insert(std::make_pair(&*I, C)); + } else { + break; + } + } + + // If we did not have a CommonDest before, use the current one. + if (!*CommonDest) + *CommonDest = CaseDest; + // If the destination isn't the common one, abort. + if (CaseDest != *CommonDest) + return false; + + // Get the values for this case from phi nodes in the destination block. + BasicBlock::iterator I = (*CommonDest)->begin(); + while (PHINode *PHI = dyn_cast<PHINode>(I++)) { + int Idx = PHI->getBasicBlockIndex(Pred); + if (Idx == -1) + continue; + + Constant *ConstVal = + LookupConstant(PHI->getIncomingValue(Idx), ConstantPool); + if (!ConstVal) + return false; + + // Be conservative about which kinds of constants we support. + if (!ValidLookupTableConstant(ConstVal, TTI)) + return false; + + Res.push_back(std::make_pair(PHI, ConstVal)); + } + + return Res.size() > 0; +} + +// Helper function used to add CaseVal to the list of cases that generate +// Result. +static void MapCaseToResult(ConstantInt *CaseVal, + SwitchCaseResultVectorTy &UniqueResults, + Constant *Result) { + for (auto &I : UniqueResults) { + if (I.first == Result) { + I.second.push_back(CaseVal); + return; + } + } + UniqueResults.push_back( + std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal))); +} + +// Helper function that initializes a map containing +// results for the PHI node of the common destination block for a switch +// instruction. Returns false if multiple PHI nodes have been found or if +// there is not a common destination block for the switch. +static bool InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI, + BasicBlock *&CommonDest, + SwitchCaseResultVectorTy &UniqueResults, + Constant *&DefaultResult, + const DataLayout &DL, + const TargetTransformInfo &TTI) { + for (auto &I : SI->cases()) { + ConstantInt *CaseVal = I.getCaseValue(); + + // Resulting value at phi nodes for this case value. + SwitchCaseResultsTy Results; + if (!GetCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results, + DL, TTI)) + return false; + + // Only one value per case is permitted + if (Results.size() > 1) + return false; + MapCaseToResult(CaseVal, UniqueResults, Results.begin()->second); + + // Check the PHI consistency. + if (!PHI) + PHI = Results[0].first; + else if (PHI != Results[0].first) + return false; + } + // Find the default result value. + SmallVector<std::pair<PHINode *, Constant *>, 1> DefaultResults; + BasicBlock *DefaultDest = SI->getDefaultDest(); + GetCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults, + DL, TTI); + // If the default value is not found abort unless the default destination + // is unreachable. + DefaultResult = + DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr; + if ((!DefaultResult && + !isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg()))) + return false; + + return true; +} + +// Helper function that checks if it is possible to transform a switch with only +// two cases (or two cases + default) that produces a result into a select. +// Example: +// switch (a) { +// case 10: %0 = icmp eq i32 %a, 10 +// return 10; %1 = select i1 %0, i32 10, i32 4 +// case 20: ----> %2 = icmp eq i32 %a, 20 +// return 2; %3 = select i1 %2, i32 2, i32 %1 +// default: +// return 4; +// } +static Value *ConvertTwoCaseSwitch(const SwitchCaseResultVectorTy &ResultVector, + Constant *DefaultResult, Value *Condition, + IRBuilder<> &Builder) { + assert(ResultVector.size() == 2 && + "We should have exactly two unique results at this point"); + // If we are selecting between only two cases transform into a simple + // select or a two-way select if default is possible. + if (ResultVector[0].second.size() == 1 && + ResultVector[1].second.size() == 1) { + ConstantInt *const FirstCase = ResultVector[0].second[0]; + ConstantInt *const SecondCase = ResultVector[1].second[0]; + + bool DefaultCanTrigger = DefaultResult; + Value *SelectValue = ResultVector[1].first; + if (DefaultCanTrigger) { + Value *const ValueCompare = + Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp"); + SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first, + DefaultResult, "switch.select"); + } + Value *const ValueCompare = + Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp"); + return Builder.CreateSelect(ValueCompare, ResultVector[0].first, + SelectValue, "switch.select"); + } + + return nullptr; +} + +// Helper function to cleanup a switch instruction that has been converted into +// a select, fixing up PHI nodes and basic blocks. +static void RemoveSwitchAfterSelectConversion(SwitchInst *SI, PHINode *PHI, + Value *SelectValue, + IRBuilder<> &Builder) { + BasicBlock *SelectBB = SI->getParent(); + while (PHI->getBasicBlockIndex(SelectBB) >= 0) + PHI->removeIncomingValue(SelectBB); + PHI->addIncoming(SelectValue, SelectBB); + + Builder.CreateBr(PHI->getParent()); + + // Remove the switch. + for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) { + BasicBlock *Succ = SI->getSuccessor(i); + + if (Succ == PHI->getParent()) + continue; + Succ->removePredecessor(SelectBB); + } + SI->eraseFromParent(); +} + +/// If the switch is only used to initialize one or more +/// phi nodes in a common successor block with only two different +/// constant values, replace the switch with select. +static bool SwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, + AssumptionCache *AC, const DataLayout &DL, + const TargetTransformInfo &TTI) { + Value *const Cond = SI->getCondition(); + PHINode *PHI = nullptr; + BasicBlock *CommonDest = nullptr; + Constant *DefaultResult; + SwitchCaseResultVectorTy UniqueResults; + // Collect all the cases that will deliver the same value from the switch. + if (!InitializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult, + DL, TTI)) + return false; + // Selects choose between maximum two values. + if (UniqueResults.size() != 2) + return false; + assert(PHI != nullptr && "PHI for value select not found"); + + Builder.SetInsertPoint(SI); + Value *SelectValue = + ConvertTwoCaseSwitch(UniqueResults, DefaultResult, Cond, Builder); + if (SelectValue) { + RemoveSwitchAfterSelectConversion(SI, PHI, SelectValue, Builder); + return true; + } + // The switch couldn't be converted into a select. + return false; +} + +namespace { + +/// This class represents a lookup table that can be used to replace a switch. +class SwitchLookupTable { +public: + /// Create a lookup table to use as a switch replacement with the contents + /// of Values, using DefaultValue to fill any holes in the table. + SwitchLookupTable( + Module &M, uint64_t TableSize, ConstantInt *Offset, + const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values, + Constant *DefaultValue, const DataLayout &DL); + + /// Build instructions with Builder to retrieve the value at + /// the position given by Index in the lookup table. + Value *BuildLookup(Value *Index, IRBuilder<> &Builder); + + /// Return true if a table with TableSize elements of + /// type ElementType would fit in a target-legal register. + static bool WouldFitInRegister(const DataLayout &DL, uint64_t TableSize, + Type *ElementType); + +private: + // Depending on the contents of the table, it can be represented in + // different ways. + enum { + // For tables where each element contains the same value, we just have to + // store that single value and return it for each lookup. + SingleValueKind, + + // For tables where there is a linear relationship between table index + // and values. We calculate the result with a simple multiplication + // and addition instead of a table lookup. + LinearMapKind, + + // For small tables with integer elements, we can pack them into a bitmap + // that fits into a target-legal register. Values are retrieved by + // shift and mask operations. + BitMapKind, + + // The table is stored as an array of values. Values are retrieved by load + // instructions from the table. + ArrayKind + } Kind; + + // For SingleValueKind, this is the single value. + Constant *SingleValue; + + // For BitMapKind, this is the bitmap. + ConstantInt *BitMap; + IntegerType *BitMapElementTy; + + // For LinearMapKind, these are the constants used to derive the value. + ConstantInt *LinearOffset; + ConstantInt *LinearMultiplier; + + // For ArrayKind, this is the array. + GlobalVariable *Array; +}; + +} // end anonymous namespace + +SwitchLookupTable::SwitchLookupTable( + Module &M, uint64_t TableSize, ConstantInt *Offset, + const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values, + Constant *DefaultValue, const DataLayout &DL) + : SingleValue(nullptr), BitMap(nullptr), BitMapElementTy(nullptr), + LinearOffset(nullptr), LinearMultiplier(nullptr), Array(nullptr) { + assert(Values.size() && "Can't build lookup table without values!"); + assert(TableSize >= Values.size() && "Can't fit values in table!"); + + // If all values in the table are equal, this is that value. + SingleValue = Values.begin()->second; + + Type *ValueType = Values.begin()->second->getType(); + + // Build up the table contents. + SmallVector<Constant *, 64> TableContents(TableSize); + for (size_t I = 0, E = Values.size(); I != E; ++I) { + ConstantInt *CaseVal = Values[I].first; + Constant *CaseRes = Values[I].second; + assert(CaseRes->getType() == ValueType); + + uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue(); + TableContents[Idx] = CaseRes; + + if (CaseRes != SingleValue) + SingleValue = nullptr; + } + + // Fill in any holes in the table with the default result. + if (Values.size() < TableSize) { + assert(DefaultValue && + "Need a default value to fill the lookup table holes."); + assert(DefaultValue->getType() == ValueType); + for (uint64_t I = 0; I < TableSize; ++I) { + if (!TableContents[I]) + TableContents[I] = DefaultValue; + } + + if (DefaultValue != SingleValue) + SingleValue = nullptr; + } + + // If each element in the table contains the same value, we only need to store + // that single value. + if (SingleValue) { + Kind = SingleValueKind; + return; + } + + // Check if we can derive the value with a linear transformation from the + // table index. + if (isa<IntegerType>(ValueType)) { + bool LinearMappingPossible = true; + APInt PrevVal; + APInt DistToPrev; + assert(TableSize >= 2 && "Should be a SingleValue table."); + // Check if there is the same distance between two consecutive values. + for (uint64_t I = 0; I < TableSize; ++I) { + ConstantInt *ConstVal = dyn_cast<ConstantInt>(TableContents[I]); + if (!ConstVal) { + // This is an undef. We could deal with it, but undefs in lookup tables + // are very seldom. It's probably not worth the additional complexity. + LinearMappingPossible = false; + break; + } + APInt Val = ConstVal->getValue(); + if (I != 0) { + APInt Dist = Val - PrevVal; + if (I == 1) { + DistToPrev = Dist; + } else if (Dist != DistToPrev) { + LinearMappingPossible = false; + break; + } + } + PrevVal = Val; + } + if (LinearMappingPossible) { + LinearOffset = cast<ConstantInt>(TableContents[0]); + LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev); + Kind = LinearMapKind; + ++NumLinearMaps; + return; + } + } + + // If the type is integer and the table fits in a register, build a bitmap. + if (WouldFitInRegister(DL, TableSize, ValueType)) { + IntegerType *IT = cast<IntegerType>(ValueType); + APInt TableInt(TableSize * IT->getBitWidth(), 0); + for (uint64_t I = TableSize; I > 0; --I) { + TableInt <<= IT->getBitWidth(); + // Insert values into the bitmap. Undef values are set to zero. + if (!isa<UndefValue>(TableContents[I - 1])) { + ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]); + TableInt |= Val->getValue().zext(TableInt.getBitWidth()); + } + } + BitMap = ConstantInt::get(M.getContext(), TableInt); + BitMapElementTy = IT; + Kind = BitMapKind; + ++NumBitMaps; + return; + } + + // Store the table in an array. + ArrayType *ArrayTy = ArrayType::get(ValueType, TableSize); + Constant *Initializer = ConstantArray::get(ArrayTy, TableContents); + + Array = new GlobalVariable(M, ArrayTy, /*constant=*/true, + GlobalVariable::PrivateLinkage, Initializer, + "switch.table"); + Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); + Kind = ArrayKind; +} + +Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) { + switch (Kind) { + case SingleValueKind: + return SingleValue; + case LinearMapKind: { + // Derive the result value from the input value. + Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(), + false, "switch.idx.cast"); + if (!LinearMultiplier->isOne()) + Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult"); + if (!LinearOffset->isZero()) + Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset"); + return Result; + } + case BitMapKind: { + // Type of the bitmap (e.g. i59). + IntegerType *MapTy = BitMap->getType(); + + // Cast Index to the same type as the bitmap. + // Note: The Index is <= the number of elements in the table, so + // truncating it to the width of the bitmask is safe. + Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast"); + + // Multiply the shift amount by the element width. + ShiftAmt = Builder.CreateMul( + ShiftAmt, ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()), + "switch.shiftamt"); + + // Shift down. + Value *DownShifted = + Builder.CreateLShr(BitMap, ShiftAmt, "switch.downshift"); + // Mask off. + return Builder.CreateTrunc(DownShifted, BitMapElementTy, "switch.masked"); + } + case ArrayKind: { + // Make sure the table index will not overflow when treated as signed. + IntegerType *IT = cast<IntegerType>(Index->getType()); + uint64_t TableSize = + Array->getInitializer()->getType()->getArrayNumElements(); + if (TableSize > (1ULL << (IT->getBitWidth() - 1))) + Index = Builder.CreateZExt( + Index, IntegerType::get(IT->getContext(), IT->getBitWidth() + 1), + "switch.tableidx.zext"); + + Value *GEPIndices[] = {Builder.getInt32(0), Index}; + Value *GEP = Builder.CreateInBoundsGEP(Array->getValueType(), Array, + GEPIndices, "switch.gep"); + return Builder.CreateLoad(GEP, "switch.load"); + } + } + llvm_unreachable("Unknown lookup table kind!"); +} + +bool SwitchLookupTable::WouldFitInRegister(const DataLayout &DL, + uint64_t TableSize, + Type *ElementType) { + auto *IT = dyn_cast<IntegerType>(ElementType); + if (!IT) + return false; + // FIXME: If the type is wider than it needs to be, e.g. i8 but all values + // are <= 15, we could try to narrow the type. + + // Avoid overflow, fitsInLegalInteger uses unsigned int for the width. + if (TableSize >= UINT_MAX / IT->getBitWidth()) + return false; + return DL.fitsInLegalInteger(TableSize * IT->getBitWidth()); +} + +/// Determine whether a lookup table should be built for this switch, based on +/// the number of cases, size of the table, and the types of the results. +static bool +ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, + const TargetTransformInfo &TTI, const DataLayout &DL, + const SmallDenseMap<PHINode *, Type *> &ResultTypes) { + if (SI->getNumCases() > TableSize || TableSize >= UINT64_MAX / 10) + return false; // TableSize overflowed, or mul below might overflow. + + bool AllTablesFitInRegister = true; + bool HasIllegalType = false; + for (const auto &I : ResultTypes) { + Type *Ty = I.second; + + // Saturate this flag to true. + HasIllegalType = HasIllegalType || !TTI.isTypeLegal(Ty); + + // Saturate this flag to false. + AllTablesFitInRegister = + AllTablesFitInRegister && + SwitchLookupTable::WouldFitInRegister(DL, TableSize, Ty); + + // If both flags saturate, we're done. NOTE: This *only* works with + // saturating flags, and all flags have to saturate first due to the + // non-deterministic behavior of iterating over a dense map. + if (HasIllegalType && !AllTablesFitInRegister) + break; + } + + // If each table would fit in a register, we should build it anyway. + if (AllTablesFitInRegister) + return true; + + // Don't build a table that doesn't fit in-register if it has illegal types. + if (HasIllegalType) + return false; + + // The table density should be at least 40%. This is the same criterion as for + // jump tables, see SelectionDAGBuilder::handleJTSwitchCase. + // FIXME: Find the best cut-off. + return SI->getNumCases() * 10 >= TableSize * 4; +} + +/// Try to reuse the switch table index compare. Following pattern: +/// \code +/// if (idx < tablesize) +/// r = table[idx]; // table does not contain default_value +/// else +/// r = default_value; +/// if (r != default_value) +/// ... +/// \endcode +/// Is optimized to: +/// \code +/// cond = idx < tablesize; +/// if (cond) +/// r = table[idx]; +/// else +/// r = default_value; +/// if (cond) +/// ... +/// \endcode +/// Jump threading will then eliminate the second if(cond). +static void reuseTableCompare( + User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch, + Constant *DefaultValue, + const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) { + + ICmpInst *CmpInst = dyn_cast<ICmpInst>(PhiUser); + if (!CmpInst) + return; + + // We require that the compare is in the same block as the phi so that jump + // threading can do its work afterwards. + if (CmpInst->getParent() != PhiBlock) + return; + + Constant *CmpOp1 = dyn_cast<Constant>(CmpInst->getOperand(1)); + if (!CmpOp1) + return; + + Value *RangeCmp = RangeCheckBranch->getCondition(); + Constant *TrueConst = ConstantInt::getTrue(RangeCmp->getType()); + Constant *FalseConst = ConstantInt::getFalse(RangeCmp->getType()); + + // Check if the compare with the default value is constant true or false. + Constant *DefaultConst = ConstantExpr::getICmp(CmpInst->getPredicate(), + DefaultValue, CmpOp1, true); + if (DefaultConst != TrueConst && DefaultConst != FalseConst) + return; + + // Check if the compare with the case values is distinct from the default + // compare result. + for (auto ValuePair : Values) { + Constant *CaseConst = ConstantExpr::getICmp(CmpInst->getPredicate(), + ValuePair.second, CmpOp1, true); + if (!CaseConst || CaseConst == DefaultConst) + return; + assert((CaseConst == TrueConst || CaseConst == FalseConst) && + "Expect true or false as compare result."); + } + + // Check if the branch instruction dominates the phi node. It's a simple + // dominance check, but sufficient for our needs. + // Although this check is invariant in the calling loops, it's better to do it + // at this late stage. Practically we do it at most once for a switch. + BasicBlock *BranchBlock = RangeCheckBranch->getParent(); + for (auto PI = pred_begin(PhiBlock), E = pred_end(PhiBlock); PI != E; ++PI) { + BasicBlock *Pred = *PI; + if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock) + return; + } + + if (DefaultConst == FalseConst) { + // The compare yields the same result. We can replace it. + CmpInst->replaceAllUsesWith(RangeCmp); + ++NumTableCmpReuses; + } else { + // The compare yields the same result, just inverted. We can replace it. + Value *InvertedTableCmp = BinaryOperator::CreateXor( + RangeCmp, ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp", + RangeCheckBranch); + CmpInst->replaceAllUsesWith(InvertedTableCmp); + ++NumTableCmpReuses; + } +} + +/// If the switch is only used to initialize one or more phi nodes in a common +/// successor block with different constant values, replace the switch with +/// lookup tables. +static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, + const DataLayout &DL, + const TargetTransformInfo &TTI) { + assert(SI->getNumCases() > 1 && "Degenerate switch?"); + + // Only build lookup table when we have a target that supports it. + if (!TTI.shouldBuildLookupTables()) + return false; + + // FIXME: If the switch is too sparse for a lookup table, perhaps we could + // split off a dense part and build a lookup table for that. + + // FIXME: This creates arrays of GEPs to constant strings, which means each + // GEP needs a runtime relocation in PIC code. We should just build one big + // string and lookup indices into that. + + // Ignore switches with less than three cases. Lookup tables will not make + // them + // faster, so we don't analyze them. + if (SI->getNumCases() < 3) + return false; + + // Figure out the corresponding result for each case value and phi node in the + // common destination, as well as the min and max case values. + assert(SI->case_begin() != SI->case_end()); + SwitchInst::CaseIt CI = SI->case_begin(); + ConstantInt *MinCaseVal = CI.getCaseValue(); + ConstantInt *MaxCaseVal = CI.getCaseValue(); + + BasicBlock *CommonDest = nullptr; + typedef SmallVector<std::pair<ConstantInt *, Constant *>, 4> ResultListTy; + SmallDenseMap<PHINode *, ResultListTy> ResultLists; + SmallDenseMap<PHINode *, Constant *> DefaultResults; + SmallDenseMap<PHINode *, Type *> ResultTypes; + SmallVector<PHINode *, 4> PHIs; + + for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) { + ConstantInt *CaseVal = CI.getCaseValue(); + if (CaseVal->getValue().slt(MinCaseVal->getValue())) + MinCaseVal = CaseVal; + if (CaseVal->getValue().sgt(MaxCaseVal->getValue())) + MaxCaseVal = CaseVal; + + // Resulting value at phi nodes for this case value. + typedef SmallVector<std::pair<PHINode *, Constant *>, 4> ResultsTy; + ResultsTy Results; + if (!GetCaseResults(SI, CaseVal, CI.getCaseSuccessor(), &CommonDest, + Results, DL, TTI)) + return false; + + // Append the result from this case to the list for each phi. + for (const auto &I : Results) { + PHINode *PHI = I.first; + Constant *Value = I.second; + if (!ResultLists.count(PHI)) + PHIs.push_back(PHI); + ResultLists[PHI].push_back(std::make_pair(CaseVal, Value)); + } + } + + // Keep track of the result types. + for (PHINode *PHI : PHIs) { + ResultTypes[PHI] = ResultLists[PHI][0].second->getType(); + } + + uint64_t NumResults = ResultLists[PHIs[0]].size(); + APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue(); + uint64_t TableSize = RangeSpread.getLimitedValue() + 1; + bool TableHasHoles = (NumResults < TableSize); + + // If the table has holes, we need a constant result for the default case + // or a bitmask that fits in a register. + SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList; + bool HasDefaultResults = + GetCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, + DefaultResultsList, DL, TTI); + + bool NeedMask = (TableHasHoles && !HasDefaultResults); + if (NeedMask) { + // As an extra penalty for the validity test we require more cases. + if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark). + return false; + if (!DL.fitsInLegalInteger(TableSize)) + return false; + } + + for (const auto &I : DefaultResultsList) { + PHINode *PHI = I.first; + Constant *Result = I.second; + DefaultResults[PHI] = Result; + } + + if (!ShouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes)) + return false; + + // Create the BB that does the lookups. + Module &Mod = *CommonDest->getParent()->getParent(); + BasicBlock *LookupBB = BasicBlock::Create( + Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest); + + // Compute the table index value. + Builder.SetInsertPoint(SI); + Value *TableIndex = + Builder.CreateSub(SI->getCondition(), MinCaseVal, "switch.tableidx"); + + // Compute the maximum table size representable by the integer type we are + // switching upon. + unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits(); + uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize; + assert(MaxTableSize >= TableSize && + "It is impossible for a switch to have more entries than the max " + "representable value of its input integer type's size."); + + // If the default destination is unreachable, or if the lookup table covers + // all values of the conditional variable, branch directly to the lookup table + // BB. Otherwise, check that the condition is within the case range. + const bool DefaultIsReachable = + !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg()); + const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize); + BranchInst *RangeCheckBranch = nullptr; + + if (!DefaultIsReachable || GeneratingCoveredLookupTable) { + Builder.CreateBr(LookupBB); + // Note: We call removeProdecessor later since we need to be able to get the + // PHI value for the default case in case we're using a bit mask. + } else { + Value *Cmp = Builder.CreateICmpULT( + TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize)); + RangeCheckBranch = + Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest()); + } + + // Populate the BB that does the lookups. + Builder.SetInsertPoint(LookupBB); + + if (NeedMask) { + // Before doing the lookup we do the hole check. + // The LookupBB is therefore re-purposed to do the hole check + // and we create a new LookupBB. + BasicBlock *MaskBB = LookupBB; + MaskBB->setName("switch.hole_check"); + LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup", + CommonDest->getParent(), CommonDest); + + // Make the mask's bitwidth at least 8bit and a power-of-2 to avoid + // unnecessary illegal types. + uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL)); + APInt MaskInt(TableSizePowOf2, 0); + APInt One(TableSizePowOf2, 1); + // Build bitmask; fill in a 1 bit for every case. + const ResultListTy &ResultList = ResultLists[PHIs[0]]; + for (size_t I = 0, E = ResultList.size(); I != E; ++I) { + uint64_t Idx = (ResultList[I].first->getValue() - MinCaseVal->getValue()) + .getLimitedValue(); + MaskInt |= One << Idx; + } + ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt); + + // Get the TableIndex'th bit of the bitmask. + // If this bit is 0 (meaning hole) jump to the default destination, + // else continue with table lookup. + IntegerType *MapTy = TableMask->getType(); + Value *MaskIndex = + Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex"); + Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted"); + Value *LoBit = Builder.CreateTrunc( + Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit"); + Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest()); + + Builder.SetInsertPoint(LookupBB); + AddPredecessorToBlock(SI->getDefaultDest(), MaskBB, SI->getParent()); + } + + if (!DefaultIsReachable || GeneratingCoveredLookupTable) { + // We cached PHINodes in PHIs, to avoid accessing deleted PHINodes later, + // do not delete PHINodes here. + SI->getDefaultDest()->removePredecessor(SI->getParent(), + /*DontDeleteUselessPHIs=*/true); + } + + bool ReturnedEarly = false; + for (size_t I = 0, E = PHIs.size(); I != E; ++I) { + PHINode *PHI = PHIs[I]; + const ResultListTy &ResultList = ResultLists[PHI]; + + // If using a bitmask, use any value to fill the lookup table holes. + Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI]; + SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultList, DV, DL); + + Value *Result = Table.BuildLookup(TableIndex, Builder); + + // If the result is used to return immediately from the function, we want to + // do that right here. + if (PHI->hasOneUse() && isa<ReturnInst>(*PHI->user_begin()) && + PHI->user_back() == CommonDest->getFirstNonPHIOrDbg()) { + Builder.CreateRet(Result); + ReturnedEarly = true; + break; + } + + // Do a small peephole optimization: re-use the switch table compare if + // possible. + if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) { + BasicBlock *PhiBlock = PHI->getParent(); + // Search for compare instructions which use the phi. + for (auto *User : PHI->users()) { + reuseTableCompare(User, PhiBlock, RangeCheckBranch, DV, ResultList); + } + } + + PHI->addIncoming(Result, LookupBB); + } + + if (!ReturnedEarly) + Builder.CreateBr(CommonDest); + + // Remove the switch. + for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) { + BasicBlock *Succ = SI->getSuccessor(i); + + if (Succ == SI->getDefaultDest()) + continue; + Succ->removePredecessor(SI->getParent()); + } + SI->eraseFromParent(); + + ++NumLookupTables; + if (NeedMask) + ++NumLookupTablesHoles; + return true; +} + +static bool isSwitchDense(ArrayRef<int64_t> Values) { + // See also SelectionDAGBuilder::isDense(), which this function was based on. + uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front(); + uint64_t Range = Diff + 1; + uint64_t NumCases = Values.size(); + // 40% is the default density for building a jump table in optsize/minsize mode. + uint64_t MinDensity = 40; + + return NumCases * 100 >= Range * MinDensity; +} + +// Try and transform a switch that has "holes" in it to a contiguous sequence +// of cases. +// +// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be +// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}. +// +// This converts a sparse switch into a dense switch which allows better +// lowering and could also allow transforming into a lookup table. +static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, + const DataLayout &DL, + const TargetTransformInfo &TTI) { + auto *CondTy = cast<IntegerType>(SI->getCondition()->getType()); + if (CondTy->getIntegerBitWidth() > 64 || + !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth())) + return false; + // Only bother with this optimization if there are more than 3 switch cases; + // SDAG will only bother creating jump tables for 4 or more cases. + if (SI->getNumCases() < 4) + return false; + + // This transform is agnostic to the signedness of the input or case values. We + // can treat the case values as signed or unsigned. We can optimize more common + // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values + // as signed. + SmallVector<int64_t,4> Values; + for (auto &C : SI->cases()) + Values.push_back(C.getCaseValue()->getValue().getSExtValue()); + std::sort(Values.begin(), Values.end()); + + // If the switch is already dense, there's nothing useful to do here. + if (isSwitchDense(Values)) + return false; + + // First, transform the values such that they start at zero and ascend. + int64_t Base = Values[0]; + for (auto &V : Values) + V -= Base; + + // Now we have signed numbers that have been shifted so that, given enough + // precision, there are no negative values. Since the rest of the transform + // is bitwise only, we switch now to an unsigned representation. + uint64_t GCD = 0; + for (auto &V : Values) + GCD = GreatestCommonDivisor64(GCD, (uint64_t)V); + + // This transform can be done speculatively because it is so cheap - it results + // in a single rotate operation being inserted. This can only happen if the + // factor extracted is a power of 2. + // FIXME: If the GCD is an odd number we can multiply by the multiplicative + // inverse of GCD and then perform this transform. + // FIXME: It's possible that optimizing a switch on powers of two might also + // be beneficial - flag values are often powers of two and we could use a CLZ + // as the key function. + if (GCD <= 1 || !isPowerOf2_64(GCD)) + // No common divisor found or too expensive to compute key function. + return false; + + unsigned Shift = Log2_64(GCD); + for (auto &V : Values) + V = (int64_t)((uint64_t)V >> Shift); + + if (!isSwitchDense(Values)) + // Transform didn't create a dense switch. + return false; + + // The obvious transform is to shift the switch condition right and emit a + // check that the condition actually cleanly divided by GCD, i.e. + // C & (1 << Shift - 1) == 0 + // inserting a new CFG edge to handle the case where it didn't divide cleanly. + // + // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the + // shift and puts the shifted-off bits in the uppermost bits. If any of these + // are nonzero then the switch condition will be very large and will hit the + // default case. + + auto *Ty = cast<IntegerType>(SI->getCondition()->getType()); + Builder.SetInsertPoint(SI); + auto *ShiftC = ConstantInt::get(Ty, Shift); + auto *Sub = Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base)); + auto *LShr = Builder.CreateLShr(Sub, ShiftC); + auto *Shl = Builder.CreateShl(Sub, Ty->getBitWidth() - Shift); + auto *Rot = Builder.CreateOr(LShr, Shl); + SI->replaceUsesOfWith(SI->getCondition(), Rot); + + for (SwitchInst::CaseIt C = SI->case_begin(), E = SI->case_end(); C != E; + ++C) { + auto *Orig = C.getCaseValue(); + auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base); + C.setValue( + cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(ShiftC->getValue())))); + } + return true; +} + +bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { + BasicBlock *BB = SI->getParent(); + + if (isValueEqualityComparison(SI)) { + // If we only have one predecessor, and if it is a branch on this value, + // see if that predecessor totally determines the outcome of this switch. + if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) + if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + + Value *Cond = SI->getCondition(); + if (SelectInst *Select = dyn_cast<SelectInst>(Cond)) + if (SimplifySwitchOnSelect(SI, Select)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + + // If the block only contains the switch, see if we can fold the block + // away into any preds. + BasicBlock::iterator BBI = BB->begin(); + // Ignore dbg intrinsics. + while (isa<DbgInfoIntrinsic>(BBI)) + ++BBI; + if (SI == &*BBI) + if (FoldValueComparisonIntoPredecessors(SI, Builder)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + } + + // Try to transform the switch into an icmp and a branch. + if (TurnSwitchRangeIntoICmp(SI, Builder)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + + // Remove unreachable cases. + if (EliminateDeadSwitchCases(SI, AC, DL)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + + if (SwitchToSelect(SI, Builder, AC, DL, TTI)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + + if (ForwardSwitchConditionToPHI(SI)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + + if (SwitchToLookupTable(SI, Builder, DL, TTI)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + + if (ReduceSwitchRange(SI, Builder, DL, TTI)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + + return false; +} + +bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { + BasicBlock *BB = IBI->getParent(); + bool Changed = false; + + // Eliminate redundant destinations. + SmallPtrSet<Value *, 8> Succs; + for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { + BasicBlock *Dest = IBI->getDestination(i); + if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) { + Dest->removePredecessor(BB); + IBI->removeDestination(i); + --i; + --e; + Changed = true; + } + } + + if (IBI->getNumDestinations() == 0) { + // If the indirectbr has no successors, change it to unreachable. + new UnreachableInst(IBI->getContext(), IBI); + EraseTerminatorInstAndDCECond(IBI); + return true; + } + + if (IBI->getNumDestinations() == 1) { + // If the indirectbr has one successor, change it to a direct branch. + BranchInst::Create(IBI->getDestination(0), IBI); + EraseTerminatorInstAndDCECond(IBI); + return true; + } + + if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) { + if (SimplifyIndirectBrOnSelect(IBI, SI)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + } + return Changed; +} + +/// Given an block with only a single landing pad and a unconditional branch +/// try to find another basic block which this one can be merged with. This +/// handles cases where we have multiple invokes with unique landing pads, but +/// a shared handler. +/// +/// We specifically choose to not worry about merging non-empty blocks +/// here. That is a PRE/scheduling problem and is best solved elsewhere. In +/// practice, the optimizer produces empty landing pad blocks quite frequently +/// when dealing with exception dense code. (see: instcombine, gvn, if-else +/// sinking in this file) +/// +/// This is primarily a code size optimization. We need to avoid performing +/// any transform which might inhibit optimization (such as our ability to +/// specialize a particular handler via tail commoning). We do this by not +/// merging any blocks which require us to introduce a phi. Since the same +/// values are flowing through both blocks, we don't loose any ability to +/// specialize. If anything, we make such specialization more likely. +/// +/// TODO - This transformation could remove entries from a phi in the target +/// block when the inputs in the phi are the same for the two blocks being +/// merged. In some cases, this could result in removal of the PHI entirely. +static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, + BasicBlock *BB) { + auto Succ = BB->getUniqueSuccessor(); + assert(Succ); + // If there's a phi in the successor block, we'd likely have to introduce + // a phi into the merged landing pad block. + if (isa<PHINode>(*Succ->begin())) + return false; + + for (BasicBlock *OtherPred : predecessors(Succ)) { + if (BB == OtherPred) + continue; + BasicBlock::iterator I = OtherPred->begin(); + LandingPadInst *LPad2 = dyn_cast<LandingPadInst>(I); + if (!LPad2 || !LPad2->isIdenticalTo(LPad)) + continue; + for (++I; isa<DbgInfoIntrinsic>(I); ++I) { + } + BranchInst *BI2 = dyn_cast<BranchInst>(I); + if (!BI2 || !BI2->isIdenticalTo(BI)) + continue; + + // We've found an identical block. Update our predecessors to take that + // path instead and make ourselves dead. + SmallSet<BasicBlock *, 16> Preds; + Preds.insert(pred_begin(BB), pred_end(BB)); + for (BasicBlock *Pred : Preds) { + InvokeInst *II = cast<InvokeInst>(Pred->getTerminator()); + assert(II->getNormalDest() != BB && II->getUnwindDest() == BB && + "unexpected successor"); + II->setUnwindDest(OtherPred); + } + + // The debug info in OtherPred doesn't cover the merged control flow that + // used to go through BB. We need to delete it or update it. + for (auto I = OtherPred->begin(), E = OtherPred->end(); I != E;) { + Instruction &Inst = *I; + I++; + if (isa<DbgInfoIntrinsic>(Inst)) + Inst.eraseFromParent(); + } + + SmallSet<BasicBlock *, 16> Succs; + Succs.insert(succ_begin(BB), succ_end(BB)); + for (BasicBlock *Succ : Succs) { + Succ->removePredecessor(BB); + } + + IRBuilder<> Builder(BI); + Builder.CreateUnreachable(); + BI->eraseFromParent(); + return true; + } + return false; +} + +bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, + IRBuilder<> &Builder) { + BasicBlock *BB = BI->getParent(); + + if (SinkCommon && SinkThenElseCodeToEnd(BI)) + return true; + + // If the Terminator is the only non-phi instruction, simplify the block. + // if LoopHeader is provided, check if the block is a loop header + // (This is for early invocations before loop simplify and vectorization + // to keep canonical loop forms for nested loops. + // These blocks can be eliminated when the pass is invoked later + // in the back-end.) + BasicBlock::iterator I = BB->getFirstNonPHIOrDbg()->getIterator(); + if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() && + (!LoopHeaders || !LoopHeaders->count(BB)) && + TryToSimplifyUncondBranchFromEmptyBlock(BB)) + return true; + + // If the only instruction in the block is a seteq/setne comparison + // against a constant, try to simplify the block. + if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) + if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) { + for (++I; isa<DbgInfoIntrinsic>(I); ++I) + ; + if (I->isTerminator() && + TryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, DL, TTI, + BonusInstThreshold, AC)) + return true; + } + + // See if we can merge an empty landing pad block with another which is + // equivalent. + if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) { + for (++I; isa<DbgInfoIntrinsic>(I); ++I) { + } + if (I->isTerminator() && TryToMergeLandingPad(LPad, BI, BB)) + return true; + } + + // If this basic block is ONLY a compare and a branch, and if a predecessor + // branches to us and our successor, fold the comparison into the + // predecessor and use logical operations to update the incoming value + // for PHI nodes in common successor. + if (FoldBranchToCommonDest(BI, BonusInstThreshold)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + return false; +} + +static BasicBlock *allPredecessorsComeFromSameSource(BasicBlock *BB) { + BasicBlock *PredPred = nullptr; + for (auto *P : predecessors(BB)) { + BasicBlock *PPred = P->getSinglePredecessor(); + if (!PPred || (PredPred && PredPred != PPred)) + return nullptr; + PredPred = PPred; + } + return PredPred; +} + +bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { + BasicBlock *BB = BI->getParent(); + + // Conditional branch + if (isValueEqualityComparison(BI)) { + // If we only have one predecessor, and if it is a branch on this value, + // see if that predecessor totally determines the outcome of this + // switch. + if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) + if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + + // This block must be empty, except for the setcond inst, if it exists. + // Ignore dbg intrinsics. + BasicBlock::iterator I = BB->begin(); + // Ignore dbg intrinsics. + while (isa<DbgInfoIntrinsic>(I)) + ++I; + if (&*I == BI) { + if (FoldValueComparisonIntoPredecessors(BI, Builder)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + } else if (&*I == cast<Instruction>(BI->getCondition())) { + ++I; + // Ignore dbg intrinsics. + while (isa<DbgInfoIntrinsic>(I)) + ++I; + if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + } + } + + // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction. + if (SimplifyBranchOnICmpChain(BI, Builder, DL)) + return true; + + // If this basic block has a single dominating predecessor block and the + // dominating block's condition implies BI's condition, we know the direction + // of the BI branch. + if (BasicBlock *Dom = BB->getSinglePredecessor()) { + auto *PBI = dyn_cast_or_null<BranchInst>(Dom->getTerminator()); + if (PBI && PBI->isConditional() && + PBI->getSuccessor(0) != PBI->getSuccessor(1) && + (PBI->getSuccessor(0) == BB || PBI->getSuccessor(1) == BB)) { + bool CondIsFalse = PBI->getSuccessor(1) == BB; + Optional<bool> Implication = isImpliedCondition( + PBI->getCondition(), BI->getCondition(), DL, CondIsFalse); + if (Implication) { + // Turn this into a branch on constant. + auto *OldCond = BI->getCondition(); + ConstantInt *CI = *Implication + ? ConstantInt::getTrue(BB->getContext()) + : ConstantInt::getFalse(BB->getContext()); + BI->setCondition(CI); + RecursivelyDeleteTriviallyDeadInstructions(OldCond); + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + } + } + } + + // If this basic block is ONLY a compare and a branch, and if a predecessor + // branches to us and one of our successors, fold the comparison into the + // predecessor and use logical operations to pick the right destination. + if (FoldBranchToCommonDest(BI, BonusInstThreshold)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + + // We have a conditional branch to two blocks that are only reachable + // from BI. We know that the condbr dominates the two blocks, so see if + // there is any identical code in the "then" and "else" blocks. If so, we + // can hoist it up to the branching block. + if (BI->getSuccessor(0)->getSinglePredecessor()) { + if (BI->getSuccessor(1)->getSinglePredecessor()) { + if (HoistThenElseCodeToIf(BI, TTI)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + } else { + // If Successor #1 has multiple preds, we may be able to conditionally + // execute Successor #0 if it branches to Successor #1. + TerminatorInst *Succ0TI = BI->getSuccessor(0)->getTerminator(); + if (Succ0TI->getNumSuccessors() == 1 && + Succ0TI->getSuccessor(0) == BI->getSuccessor(1)) + if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), TTI)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + } + } else if (BI->getSuccessor(1)->getSinglePredecessor()) { + // If Successor #0 has multiple preds, we may be able to conditionally + // execute Successor #1 if it branches to Successor #0. + TerminatorInst *Succ1TI = BI->getSuccessor(1)->getTerminator(); + if (Succ1TI->getNumSuccessors() == 1 && + Succ1TI->getSuccessor(0) == BI->getSuccessor(0)) + if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), TTI)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + } + + // If this is a branch on a phi node in the current block, thread control + // through this block if any PHI node entries are constants. + if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition())) + if (PN->getParent() == BI->getParent()) + if (FoldCondBranchOnPHI(BI, DL)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + + // Scan predecessor blocks for conditional branches. + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator())) + if (PBI != BI && PBI->isConditional()) + if (SimplifyCondBranchToCondBranch(PBI, BI, DL)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + + // Look for diamond patterns. + if (MergeCondStores) + if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB)) + if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator())) + if (PBI != BI && PBI->isConditional()) + if (mergeConditionalStores(PBI, BI)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + + return false; +} + +/// Check if passing a value to an instruction will cause undefined behavior. +static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) { + Constant *C = dyn_cast<Constant>(V); + if (!C) + return false; + + if (I->use_empty()) + return false; + + if (C->isNullValue() || isa<UndefValue>(C)) { + // Only look at the first use, avoid hurting compile time with long uselists + User *Use = *I->user_begin(); + + // Now make sure that there are no instructions in between that can alter + // control flow (eg. calls) + for (BasicBlock::iterator + i = ++BasicBlock::iterator(I), + UI = BasicBlock::iterator(dyn_cast<Instruction>(Use)); + i != UI; ++i) + if (i == I->getParent()->end() || i->mayHaveSideEffects()) + return false; + + // Look through GEPs. A load from a GEP derived from NULL is still undefined + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Use)) + if (GEP->getPointerOperand() == I) + return passingValueIsAlwaysUndefined(V, GEP); + + // Look through bitcasts. + if (BitCastInst *BC = dyn_cast<BitCastInst>(Use)) + return passingValueIsAlwaysUndefined(V, BC); + + // Load from null is undefined. + if (LoadInst *LI = dyn_cast<LoadInst>(Use)) + if (!LI->isVolatile()) + return LI->getPointerAddressSpace() == 0; + + // Store to null is undefined. + if (StoreInst *SI = dyn_cast<StoreInst>(Use)) + if (!SI->isVolatile()) + return SI->getPointerAddressSpace() == 0 && + SI->getPointerOperand() == I; + + // A call to null is undefined. + if (auto CS = CallSite(Use)) + return CS.getCalledValue() == I; + } + return false; +} + +/// If BB has an incoming value that will always trigger undefined behavior +/// (eg. null pointer dereference), remove the branch leading here. +static bool removeUndefIntroducingPredecessor(BasicBlock *BB) { + for (BasicBlock::iterator i = BB->begin(); + PHINode *PHI = dyn_cast<PHINode>(i); ++i) + for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) + if (passingValueIsAlwaysUndefined(PHI->getIncomingValue(i), PHI)) { + TerminatorInst *T = PHI->getIncomingBlock(i)->getTerminator(); + IRBuilder<> Builder(T); + if (BranchInst *BI = dyn_cast<BranchInst>(T)) { + BB->removePredecessor(PHI->getIncomingBlock(i)); + // Turn uncoditional branches into unreachables and remove the dead + // destination from conditional branches. + if (BI->isUnconditional()) + Builder.CreateUnreachable(); + else + Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1) + : BI->getSuccessor(0)); + BI->eraseFromParent(); + return true; + } + // TODO: SwitchInst. + } + + return false; +} + +bool SimplifyCFGOpt::run(BasicBlock *BB) { + bool Changed = false; + + assert(BB && BB->getParent() && "Block not embedded in function!"); + assert(BB->getTerminator() && "Degenerate basic block encountered!"); + + // Remove basic blocks that have no predecessors (except the entry block)... + // or that just have themself as a predecessor. These are unreachable. + if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) || + BB->getSinglePredecessor() == BB) { + DEBUG(dbgs() << "Removing BB: \n" << *BB); + DeleteDeadBlock(BB); + return true; + } + + // Check to see if we can constant propagate this terminator instruction + // away... + Changed |= ConstantFoldTerminator(BB, true); + + // Check for and eliminate duplicate PHI nodes in this block. + Changed |= EliminateDuplicatePHINodes(BB); + + // Check for and remove branches that will always cause undefined behavior. + Changed |= removeUndefIntroducingPredecessor(BB); + + // Merge basic blocks into their predecessor if there is only one distinct + // pred, and if there is only one distinct successor of the predecessor, and + // if there are no PHI nodes. + // + if (MergeBlockIntoPredecessor(BB)) + return true; + + IRBuilder<> Builder(BB); + + // If there is a trivial two-entry PHI node in this basic block, and we can + // eliminate it, do so now. + if (PHINode *PN = dyn_cast<PHINode>(BB->begin())) + if (PN->getNumIncomingValues() == 2) + Changed |= FoldTwoEntryPHINode(PN, TTI, DL); + + Builder.SetInsertPoint(BB->getTerminator()); + if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) { + if (BI->isUnconditional()) { + if (SimplifyUncondBranch(BI, Builder)) + return true; + } else { + if (SimplifyCondBranch(BI, Builder)) + return true; + } + } else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { + if (SimplifyReturn(RI, Builder)) + return true; + } else if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) { + if (SimplifyResume(RI, Builder)) + return true; + } else if (CleanupReturnInst *RI = + dyn_cast<CleanupReturnInst>(BB->getTerminator())) { + if (SimplifyCleanupReturn(RI)) + return true; + } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) { + if (SimplifySwitch(SI, Builder)) + return true; + } else if (UnreachableInst *UI = + dyn_cast<UnreachableInst>(BB->getTerminator())) { + if (SimplifyUnreachable(UI)) + return true; + } else if (IndirectBrInst *IBI = + dyn_cast<IndirectBrInst>(BB->getTerminator())) { + if (SimplifyIndirectBr(IBI)) + return true; + } + + return Changed; +} + +/// This function is used to do simplification of a CFG. +/// For example, it adjusts branches to branches to eliminate the extra hop, +/// eliminates unreachable basic blocks, and does other "peephole" optimization +/// of the CFG. It returns true if a modification was made. +/// +bool llvm::SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, + unsigned BonusInstThreshold, AssumptionCache *AC, + SmallPtrSetImpl<BasicBlock *> *LoopHeaders) { + return SimplifyCFGOpt(TTI, BB->getModule()->getDataLayout(), + BonusInstThreshold, AC, LoopHeaders) + .run(BB); +} diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp new file mode 100644 index 000000000000..6b1d3dc41330 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -0,0 +1,688 @@ +//===-- SimplifyIndVar.cpp - Induction variable simplification ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements induction variable simplification. It does +// not define any actual pass or policy, but provides a single function to +// simplify a loop's induction variables based on ScalarEvolution. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/SimplifyIndVar.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "indvars" + +STATISTIC(NumElimIdentity, "Number of IV identities eliminated"); +STATISTIC(NumElimOperand, "Number of IV operands folded into a use"); +STATISTIC(NumElimRem , "Number of IV remainder operations eliminated"); +STATISTIC(NumElimCmp , "Number of IV comparisons eliminated"); + +namespace { + /// This is a utility for simplifying induction variables + /// based on ScalarEvolution. It is the primary instrument of the + /// IndvarSimplify pass, but it may also be directly invoked to cleanup after + /// other loop passes that preserve SCEV. + class SimplifyIndvar { + Loop *L; + LoopInfo *LI; + ScalarEvolution *SE; + DominatorTree *DT; + + SmallVectorImpl<WeakVH> &DeadInsts; + + bool Changed; + + public: + SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, DominatorTree *DT, + LoopInfo *LI,SmallVectorImpl<WeakVH> &Dead) + : L(Loop), LI(LI), SE(SE), DT(DT), DeadInsts(Dead), Changed(false) { + assert(LI && "IV simplification requires LoopInfo"); + } + + bool hasChanged() const { return Changed; } + + /// Iteratively perform simplification on a worklist of users of the + /// specified induction variable. This is the top-level driver that applies + /// all simplifications to users of an IV. + void simplifyUsers(PHINode *CurrIV, IVVisitor *V = nullptr); + + Value *foldIVUser(Instruction *UseInst, Instruction *IVOperand); + + bool eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand); + + bool eliminateOverflowIntrinsic(CallInst *CI); + bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand); + void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand); + void eliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand, + bool IsSigned); + bool strengthenOverflowingOperation(BinaryOperator *OBO, Value *IVOperand); + }; +} + +/// Fold an IV operand into its use. This removes increments of an +/// aligned IV when used by a instruction that ignores the low bits. +/// +/// IVOperand is guaranteed SCEVable, but UseInst may not be. +/// +/// Return the operand of IVOperand for this induction variable if IVOperand can +/// be folded (in case more folding opportunities have been exposed). +/// Otherwise return null. +Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) { + Value *IVSrc = nullptr; + unsigned OperIdx = 0; + const SCEV *FoldedExpr = nullptr; + switch (UseInst->getOpcode()) { + default: + return nullptr; + case Instruction::UDiv: + case Instruction::LShr: + // We're only interested in the case where we know something about + // the numerator and have a constant denominator. + if (IVOperand != UseInst->getOperand(OperIdx) || + !isa<ConstantInt>(UseInst->getOperand(1))) + return nullptr; + + // Attempt to fold a binary operator with constant operand. + // e.g. ((I + 1) >> 2) => I >> 2 + if (!isa<BinaryOperator>(IVOperand) + || !isa<ConstantInt>(IVOperand->getOperand(1))) + return nullptr; + + IVSrc = IVOperand->getOperand(0); + // IVSrc must be the (SCEVable) IV, since the other operand is const. + assert(SE->isSCEVable(IVSrc->getType()) && "Expect SCEVable IV operand"); + + ConstantInt *D = cast<ConstantInt>(UseInst->getOperand(1)); + if (UseInst->getOpcode() == Instruction::LShr) { + // Get a constant for the divisor. See createSCEV. + uint32_t BitWidth = cast<IntegerType>(UseInst->getType())->getBitWidth(); + if (D->getValue().uge(BitWidth)) + return nullptr; + + D = ConstantInt::get(UseInst->getContext(), + APInt::getOneBitSet(BitWidth, D->getZExtValue())); + } + FoldedExpr = SE->getUDivExpr(SE->getSCEV(IVSrc), SE->getSCEV(D)); + } + // We have something that might fold it's operand. Compare SCEVs. + if (!SE->isSCEVable(UseInst->getType())) + return nullptr; + + // Bypass the operand if SCEV can prove it has no effect. + if (SE->getSCEV(UseInst) != FoldedExpr) + return nullptr; + + DEBUG(dbgs() << "INDVARS: Eliminated IV operand: " << *IVOperand + << " -> " << *UseInst << '\n'); + + UseInst->setOperand(OperIdx, IVSrc); + assert(SE->getSCEV(UseInst) == FoldedExpr && "bad SCEV with folded oper"); + + ++NumElimOperand; + Changed = true; + if (IVOperand->use_empty()) + DeadInsts.emplace_back(IVOperand); + return IVSrc; +} + +/// SimplifyIVUsers helper for eliminating useless +/// comparisons against an induction variable. +void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) { + unsigned IVOperIdx = 0; + ICmpInst::Predicate Pred = ICmp->getPredicate(); + if (IVOperand != ICmp->getOperand(0)) { + // Swapped + assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand"); + IVOperIdx = 1; + Pred = ICmpInst::getSwappedPredicate(Pred); + } + + // Get the SCEVs for the ICmp operands. + const SCEV *S = SE->getSCEV(ICmp->getOperand(IVOperIdx)); + const SCEV *X = SE->getSCEV(ICmp->getOperand(1 - IVOperIdx)); + + // Simplify unnecessary loops away. + const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent()); + S = SE->getSCEVAtScope(S, ICmpLoop); + X = SE->getSCEVAtScope(X, ICmpLoop); + + ICmpInst::Predicate InvariantPredicate; + const SCEV *InvariantLHS, *InvariantRHS; + + // If the condition is always true or always false, replace it with + // a constant value. + if (SE->isKnownPredicate(Pred, S, X)) { + ICmp->replaceAllUsesWith(ConstantInt::getTrue(ICmp->getContext())); + DeadInsts.emplace_back(ICmp); + DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n'); + } else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X)) { + ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext())); + DeadInsts.emplace_back(ICmp); + DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n'); + } else if (isa<PHINode>(IVOperand) && + SE->isLoopInvariantPredicate(Pred, S, X, L, InvariantPredicate, + InvariantLHS, InvariantRHS)) { + + // Rewrite the comparison to a loop invariant comparison if it can be done + // cheaply, where cheaply means "we don't need to emit any new + // instructions". + + Value *NewLHS = nullptr, *NewRHS = nullptr; + + if (S == InvariantLHS || X == InvariantLHS) + NewLHS = + ICmp->getOperand(S == InvariantLHS ? IVOperIdx : (1 - IVOperIdx)); + + if (S == InvariantRHS || X == InvariantRHS) + NewRHS = + ICmp->getOperand(S == InvariantRHS ? IVOperIdx : (1 - IVOperIdx)); + + auto *PN = cast<PHINode>(IVOperand); + for (unsigned i = 0, e = PN->getNumIncomingValues(); + i != e && (!NewLHS || !NewRHS); + ++i) { + + // If this is a value incoming from the backedge, then it cannot be a loop + // invariant value (since we know that IVOperand is an induction variable). + if (L->contains(PN->getIncomingBlock(i))) + continue; + + // NB! This following assert does not fundamentally have to be true, but + // it is true today given how SCEV analyzes induction variables. + // Specifically, today SCEV will *not* recognize %iv as an induction + // variable in the following case: + // + // define void @f(i32 %k) { + // entry: + // br i1 undef, label %r, label %l + // + // l: + // %k.inc.l = add i32 %k, 1 + // br label %loop + // + // r: + // %k.inc.r = add i32 %k, 1 + // br label %loop + // + // loop: + // %iv = phi i32 [ %k.inc.l, %l ], [ %k.inc.r, %r ], [ %iv.inc, %loop ] + // %iv.inc = add i32 %iv, 1 + // br label %loop + // } + // + // but if it starts to, at some point, then the assertion below will have + // to be changed to a runtime check. + + Value *Incoming = PN->getIncomingValue(i); + +#ifndef NDEBUG + if (auto *I = dyn_cast<Instruction>(Incoming)) + assert(DT->dominates(I, ICmp) && "Should be a unique loop dominating value!"); +#endif + + const SCEV *IncomingS = SE->getSCEV(Incoming); + + if (!NewLHS && IncomingS == InvariantLHS) + NewLHS = Incoming; + if (!NewRHS && IncomingS == InvariantRHS) + NewRHS = Incoming; + } + + if (!NewLHS || !NewRHS) + // We could not find an existing value to replace either LHS or RHS. + // Generating new instructions has subtler tradeoffs, so avoid doing that + // for now. + return; + + DEBUG(dbgs() << "INDVARS: Simplified comparison: " << *ICmp << '\n'); + ICmp->setPredicate(InvariantPredicate); + ICmp->setOperand(0, NewLHS); + ICmp->setOperand(1, NewRHS); + } else + return; + + ++NumElimCmp; + Changed = true; +} + +/// SimplifyIVUsers helper for eliminating useless +/// remainder operations operating on an induction variable. +void SimplifyIndvar::eliminateIVRemainder(BinaryOperator *Rem, + Value *IVOperand, + bool IsSigned) { + // We're only interested in the case where we know something about + // the numerator. + if (IVOperand != Rem->getOperand(0)) + return; + + // Get the SCEVs for the ICmp operands. + const SCEV *S = SE->getSCEV(Rem->getOperand(0)); + const SCEV *X = SE->getSCEV(Rem->getOperand(1)); + + // Simplify unnecessary loops away. + const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent()); + S = SE->getSCEVAtScope(S, ICmpLoop); + X = SE->getSCEVAtScope(X, ICmpLoop); + + // i % n --> i if i is in [0,n). + if ((!IsSigned || SE->isKnownNonNegative(S)) && + SE->isKnownPredicate(IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, + S, X)) + Rem->replaceAllUsesWith(Rem->getOperand(0)); + else { + // (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n). + const SCEV *LessOne = SE->getMinusSCEV(S, SE->getOne(S->getType())); + if (IsSigned && !SE->isKnownNonNegative(LessOne)) + return; + + if (!SE->isKnownPredicate(IsSigned ? + ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, + LessOne, X)) + return; + + ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ, + Rem->getOperand(0), Rem->getOperand(1)); + SelectInst *Sel = + SelectInst::Create(ICmp, + ConstantInt::get(Rem->getType(), 0), + Rem->getOperand(0), "tmp", Rem); + Rem->replaceAllUsesWith(Sel); + } + + DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); + ++NumElimRem; + Changed = true; + DeadInsts.emplace_back(Rem); +} + +bool SimplifyIndvar::eliminateOverflowIntrinsic(CallInst *CI) { + auto *F = CI->getCalledFunction(); + if (!F) + return false; + + typedef const SCEV *(ScalarEvolution::*OperationFunctionTy)( + const SCEV *, const SCEV *, SCEV::NoWrapFlags); + typedef const SCEV *(ScalarEvolution::*ExtensionFunctionTy)( + const SCEV *, Type *); + + OperationFunctionTy Operation; + ExtensionFunctionTy Extension; + + Instruction::BinaryOps RawOp; + + // We always have exactly one of nsw or nuw. If NoSignedOverflow is false, we + // have nuw. + bool NoSignedOverflow; + + switch (F->getIntrinsicID()) { + default: + return false; + + case Intrinsic::sadd_with_overflow: + Operation = &ScalarEvolution::getAddExpr; + Extension = &ScalarEvolution::getSignExtendExpr; + RawOp = Instruction::Add; + NoSignedOverflow = true; + break; + + case Intrinsic::uadd_with_overflow: + Operation = &ScalarEvolution::getAddExpr; + Extension = &ScalarEvolution::getZeroExtendExpr; + RawOp = Instruction::Add; + NoSignedOverflow = false; + break; + + case Intrinsic::ssub_with_overflow: + Operation = &ScalarEvolution::getMinusSCEV; + Extension = &ScalarEvolution::getSignExtendExpr; + RawOp = Instruction::Sub; + NoSignedOverflow = true; + break; + + case Intrinsic::usub_with_overflow: + Operation = &ScalarEvolution::getMinusSCEV; + Extension = &ScalarEvolution::getZeroExtendExpr; + RawOp = Instruction::Sub; + NoSignedOverflow = false; + break; + } + + const SCEV *LHS = SE->getSCEV(CI->getArgOperand(0)); + const SCEV *RHS = SE->getSCEV(CI->getArgOperand(1)); + + auto *NarrowTy = cast<IntegerType>(LHS->getType()); + auto *WideTy = + IntegerType::get(NarrowTy->getContext(), NarrowTy->getBitWidth() * 2); + + const SCEV *A = + (SE->*Extension)((SE->*Operation)(LHS, RHS, SCEV::FlagAnyWrap), WideTy); + const SCEV *B = + (SE->*Operation)((SE->*Extension)(LHS, WideTy), + (SE->*Extension)(RHS, WideTy), SCEV::FlagAnyWrap); + + if (A != B) + return false; + + // Proved no overflow, nuke the overflow check and, if possible, the overflow + // intrinsic as well. + + BinaryOperator *NewResult = BinaryOperator::Create( + RawOp, CI->getArgOperand(0), CI->getArgOperand(1), "", CI); + + if (NoSignedOverflow) + NewResult->setHasNoSignedWrap(true); + else + NewResult->setHasNoUnsignedWrap(true); + + SmallVector<ExtractValueInst *, 4> ToDelete; + + for (auto *U : CI->users()) { + if (auto *EVI = dyn_cast<ExtractValueInst>(U)) { + if (EVI->getIndices()[0] == 1) + EVI->replaceAllUsesWith(ConstantInt::getFalse(CI->getContext())); + else { + assert(EVI->getIndices()[0] == 0 && "Only two possibilities!"); + EVI->replaceAllUsesWith(NewResult); + } + ToDelete.push_back(EVI); + } + } + + for (auto *EVI : ToDelete) + EVI->eraseFromParent(); + + if (CI->use_empty()) + CI->eraseFromParent(); + + return true; +} + +/// Eliminate an operation that consumes a simple IV and has no observable +/// side-effect given the range of IV values. IVOperand is guaranteed SCEVable, +/// but UseInst may not be. +bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst, + Instruction *IVOperand) { + if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) { + eliminateIVComparison(ICmp, IVOperand); + return true; + } + if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) { + bool IsSigned = Rem->getOpcode() == Instruction::SRem; + if (IsSigned || Rem->getOpcode() == Instruction::URem) { + eliminateIVRemainder(Rem, IVOperand, IsSigned); + return true; + } + } + + if (auto *CI = dyn_cast<CallInst>(UseInst)) + if (eliminateOverflowIntrinsic(CI)) + return true; + + if (eliminateIdentitySCEV(UseInst, IVOperand)) + return true; + + return false; +} + +/// Eliminate any operation that SCEV can prove is an identity function. +bool SimplifyIndvar::eliminateIdentitySCEV(Instruction *UseInst, + Instruction *IVOperand) { + if (!SE->isSCEVable(UseInst->getType()) || + (UseInst->getType() != IVOperand->getType()) || + (SE->getSCEV(UseInst) != SE->getSCEV(IVOperand))) + return false; + + // getSCEV(X) == getSCEV(Y) does not guarantee that X and Y are related in the + // dominator tree, even if X is an operand to Y. For instance, in + // + // %iv = phi i32 {0,+,1} + // br %cond, label %left, label %merge + // + // left: + // %X = add i32 %iv, 0 + // br label %merge + // + // merge: + // %M = phi (%X, %iv) + // + // getSCEV(%M) == getSCEV(%X) == {0,+,1}, but %X does not dominate %M, and + // %M.replaceAllUsesWith(%X) would be incorrect. + + if (isa<PHINode>(UseInst)) + // If UseInst is not a PHI node then we know that IVOperand dominates + // UseInst directly from the legality of SSA. + if (!DT || !DT->dominates(IVOperand, UseInst)) + return false; + + if (!LI->replacementPreservesLCSSAForm(UseInst, IVOperand)) + return false; + + DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n'); + + UseInst->replaceAllUsesWith(IVOperand); + ++NumElimIdentity; + Changed = true; + DeadInsts.emplace_back(UseInst); + return true; +} + +/// Annotate BO with nsw / nuw if it provably does not signed-overflow / +/// unsigned-overflow. Returns true if anything changed, false otherwise. +bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO, + Value *IVOperand) { + + // Fastpath: we don't have any work to do if `BO` is `nuw` and `nsw`. + if (BO->hasNoUnsignedWrap() && BO->hasNoSignedWrap()) + return false; + + const SCEV *(ScalarEvolution::*GetExprForBO)(const SCEV *, const SCEV *, + SCEV::NoWrapFlags); + + switch (BO->getOpcode()) { + default: + return false; + + case Instruction::Add: + GetExprForBO = &ScalarEvolution::getAddExpr; + break; + + case Instruction::Sub: + GetExprForBO = &ScalarEvolution::getMinusSCEV; + break; + + case Instruction::Mul: + GetExprForBO = &ScalarEvolution::getMulExpr; + break; + } + + unsigned BitWidth = cast<IntegerType>(BO->getType())->getBitWidth(); + Type *WideTy = IntegerType::get(BO->getContext(), BitWidth * 2); + const SCEV *LHS = SE->getSCEV(BO->getOperand(0)); + const SCEV *RHS = SE->getSCEV(BO->getOperand(1)); + + bool Changed = false; + + if (!BO->hasNoUnsignedWrap()) { + const SCEV *ExtendAfterOp = SE->getZeroExtendExpr(SE->getSCEV(BO), WideTy); + const SCEV *OpAfterExtend = (SE->*GetExprForBO)( + SE->getZeroExtendExpr(LHS, WideTy), SE->getZeroExtendExpr(RHS, WideTy), + SCEV::FlagAnyWrap); + if (ExtendAfterOp == OpAfterExtend) { + BO->setHasNoUnsignedWrap(); + SE->forgetValue(BO); + Changed = true; + } + } + + if (!BO->hasNoSignedWrap()) { + const SCEV *ExtendAfterOp = SE->getSignExtendExpr(SE->getSCEV(BO), WideTy); + const SCEV *OpAfterExtend = (SE->*GetExprForBO)( + SE->getSignExtendExpr(LHS, WideTy), SE->getSignExtendExpr(RHS, WideTy), + SCEV::FlagAnyWrap); + if (ExtendAfterOp == OpAfterExtend) { + BO->setHasNoSignedWrap(); + SE->forgetValue(BO); + Changed = true; + } + } + + return Changed; +} + +/// Add all uses of Def to the current IV's worklist. +static void pushIVUsers( + Instruction *Def, + SmallPtrSet<Instruction*,16> &Simplified, + SmallVectorImpl< std::pair<Instruction*,Instruction*> > &SimpleIVUsers) { + + for (User *U : Def->users()) { + Instruction *UI = cast<Instruction>(U); + + // Avoid infinite or exponential worklist processing. + // Also ensure unique worklist users. + // If Def is a LoopPhi, it may not be in the Simplified set, so check for + // self edges first. + if (UI != Def && Simplified.insert(UI).second) + SimpleIVUsers.push_back(std::make_pair(UI, Def)); + } +} + +/// Return true if this instruction generates a simple SCEV +/// expression in terms of that IV. +/// +/// This is similar to IVUsers' isInteresting() but processes each instruction +/// non-recursively when the operand is already known to be a simpleIVUser. +/// +static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) { + if (!SE->isSCEVable(I->getType())) + return false; + + // Get the symbolic expression for this instruction. + const SCEV *S = SE->getSCEV(I); + + // Only consider affine recurrences. + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S); + if (AR && AR->getLoop() == L) + return true; + + return false; +} + +/// Iteratively perform simplification on a worklist of users +/// of the specified induction variable. Each successive simplification may push +/// more users which may themselves be candidates for simplification. +/// +/// This algorithm does not require IVUsers analysis. Instead, it simplifies +/// instructions in-place during analysis. Rather than rewriting induction +/// variables bottom-up from their users, it transforms a chain of IVUsers +/// top-down, updating the IR only when it encounters a clear optimization +/// opportunity. +/// +/// Once DisableIVRewrite is default, LSR will be the only client of IVUsers. +/// +void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) { + if (!SE->isSCEVable(CurrIV->getType())) + return; + + // Instructions processed by SimplifyIndvar for CurrIV. + SmallPtrSet<Instruction*,16> Simplified; + + // Use-def pairs if IV users waiting to be processed for CurrIV. + SmallVector<std::pair<Instruction*, Instruction*>, 8> SimpleIVUsers; + + // Push users of the current LoopPhi. In rare cases, pushIVUsers may be + // called multiple times for the same LoopPhi. This is the proper thing to + // do for loop header phis that use each other. + pushIVUsers(CurrIV, Simplified, SimpleIVUsers); + + while (!SimpleIVUsers.empty()) { + std::pair<Instruction*, Instruction*> UseOper = + SimpleIVUsers.pop_back_val(); + Instruction *UseInst = UseOper.first; + + // Bypass back edges to avoid extra work. + if (UseInst == CurrIV) continue; + + Instruction *IVOperand = UseOper.second; + for (unsigned N = 0; IVOperand; ++N) { + assert(N <= Simplified.size() && "runaway iteration"); + + Value *NewOper = foldIVUser(UseOper.first, IVOperand); + if (!NewOper) + break; // done folding + IVOperand = dyn_cast<Instruction>(NewOper); + } + if (!IVOperand) + continue; + + if (eliminateIVUser(UseOper.first, IVOperand)) { + pushIVUsers(IVOperand, Simplified, SimpleIVUsers); + continue; + } + + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(UseOper.first)) { + if (isa<OverflowingBinaryOperator>(BO) && + strengthenOverflowingOperation(BO, IVOperand)) { + // re-queue uses of the now modified binary operator and fall + // through to the checks that remain. + pushIVUsers(IVOperand, Simplified, SimpleIVUsers); + } + } + + CastInst *Cast = dyn_cast<CastInst>(UseOper.first); + if (V && Cast) { + V->visitCast(Cast); + continue; + } + if (isSimpleIVUser(UseOper.first, L, SE)) { + pushIVUsers(UseOper.first, Simplified, SimpleIVUsers); + } + } +} + +namespace llvm { + +void IVVisitor::anchor() { } + +/// Simplify instructions that use this induction variable +/// by using ScalarEvolution to analyze the IV's recurrence. +bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, + LoopInfo *LI, SmallVectorImpl<WeakVH> &Dead, + IVVisitor *V) { + SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, DT, LI, Dead); + SIV.simplifyUsers(CurrIV, V); + return SIV.hasChanged(); +} + +/// Simplify users of induction variables within this +/// loop. This does not actually change or add IVs. +bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT, + LoopInfo *LI, SmallVectorImpl<WeakVH> &Dead) { + bool Changed = false; + for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { + Changed |= simplifyUsersOfIV(cast<PHINode>(I), SE, DT, LI, Dead); + } + return Changed; +} + +} // namespace llvm diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp new file mode 100644 index 000000000000..1220490123ce --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp @@ -0,0 +1,141 @@ +//===------ SimplifyInstructions.cpp - Remove redundant instructions ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is a utility pass used for testing the InstructionSimplify analysis. +// The analysis is applied to every instruction, and if it simplifies then the +// instruction is replaced by the simplification. If you are looking for a pass +// that performs serious instruction folding, use the instcombine pass instead. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/SimplifyInstructions.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Type.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Scalar.h" +using namespace llvm; + +#define DEBUG_TYPE "instsimplify" + +STATISTIC(NumSimplified, "Number of redundant instructions removed"); + +static bool runImpl(Function &F, const DominatorTree *DT, + const TargetLibraryInfo *TLI, AssumptionCache *AC) { + const DataLayout &DL = F.getParent()->getDataLayout(); + SmallPtrSet<const Instruction *, 8> S1, S2, *ToSimplify = &S1, *Next = &S2; + bool Changed = false; + + do { + for (BasicBlock *BB : depth_first(&F.getEntryBlock())) { + // Here be subtlety: the iterator must be incremented before the loop + // body (not sure why), so a range-for loop won't work here. + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { + Instruction *I = &*BI++; + // The first time through the loop ToSimplify is empty and we try to + // simplify all instructions. On later iterations ToSimplify is not + // empty and we only bother simplifying instructions that are in it. + if (!ToSimplify->empty() && !ToSimplify->count(I)) + continue; + + // Don't waste time simplifying unused instructions. + if (!I->use_empty()) { + if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AC)) { + // Mark all uses for resimplification next time round the loop. + for (User *U : I->users()) + Next->insert(cast<Instruction>(U)); + I->replaceAllUsesWith(V); + ++NumSimplified; + Changed = true; + } + } + if (RecursivelyDeleteTriviallyDeadInstructions(I, TLI)) { + // RecursivelyDeleteTriviallyDeadInstruction can remove more than one + // instruction, so simply incrementing the iterator does not work. + // When instructions get deleted re-iterate instead. + BI = BB->begin(); + BE = BB->end(); + Changed = true; + } + } + } + + // Place the list of instructions to simplify on the next loop iteration + // into ToSimplify. + std::swap(ToSimplify, Next); + Next->clear(); + } while (!ToSimplify->empty()); + + return Changed; +} + +namespace { + struct InstSimplifier : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + InstSimplifier() : FunctionPass(ID) { + initializeInstSimplifierPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<AssumptionCacheTracker>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); + } + + /// runOnFunction - Remove instructions that simplify. + bool runOnFunction(Function &F) override { + if (skipFunction(F)) + return false; + + const DominatorTree *DT = + &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + const TargetLibraryInfo *TLI = + &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); + AssumptionCache *AC = + &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); + return runImpl(F, DT, TLI, AC); + } + }; +} + +char InstSimplifier::ID = 0; +INITIALIZE_PASS_BEGIN(InstSimplifier, "instsimplify", + "Remove redundant instructions", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(InstSimplifier, "instsimplify", + "Remove redundant instructions", false, false) +char &llvm::InstructionSimplifierID = InstSimplifier::ID; + +// Public interface to the simplify instructions pass. +FunctionPass *llvm::createInstructionSimplifierPass() { + return new InstSimplifier(); +} + +PreservedAnalyses InstSimplifierPass::run(Function &F, + FunctionAnalysisManager &AM) { + auto &DT = AM.getResult<DominatorTreeAnalysis>(F); + auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); + auto &AC = AM.getResult<AssumptionAnalysis>(F); + bool Changed = runImpl(F, &DT, &TLI, &AC); + if (!Changed) + return PreservedAnalyses::all(); + // FIXME: This should also 'preserve the CFG'. + return PreservedAnalyses::none(); +} diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp new file mode 100644 index 000000000000..11d54bcf4f89 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -0,0 +1,2386 @@ +//===------ SimplifyLibCalls.cpp - Library calls simplifier ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is a utility pass used for testing the InstructionSimplify analysis. +// The analysis is applied to every instruction, and if it simplifies then the +// instruction is replaced by the simplification. If you are looking for a pass +// that performs serious instruction folding, use the instcombine pass instead. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/SimplifyLibCalls.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Utils/BuildLibCalls.h" +#include "llvm/Transforms/Utils/Local.h" + +using namespace llvm; +using namespace PatternMatch; + +static cl::opt<bool> + ColdErrorCalls("error-reporting-is-cold", cl::init(true), cl::Hidden, + cl::desc("Treat error-reporting calls as cold")); + +static cl::opt<bool> + EnableUnsafeFPShrink("enable-double-float-shrink", cl::Hidden, + cl::init(false), + cl::desc("Enable unsafe double to float " + "shrinking for math lib calls")); + + +//===----------------------------------------------------------------------===// +// Helper Functions +//===----------------------------------------------------------------------===// + +static bool ignoreCallingConv(LibFunc::Func Func) { + return Func == LibFunc::abs || Func == LibFunc::labs || + Func == LibFunc::llabs || Func == LibFunc::strlen; +} + +static bool isCallingConvCCompatible(CallInst *CI) { + switch(CI->getCallingConv()) { + default: + return false; + case llvm::CallingConv::C: + return true; + case llvm::CallingConv::ARM_APCS: + case llvm::CallingConv::ARM_AAPCS: + case llvm::CallingConv::ARM_AAPCS_VFP: { + + // The iOS ABI diverges from the standard in some cases, so for now don't + // try to simplify those calls. + if (Triple(CI->getModule()->getTargetTriple()).isiOS()) + return false; + + auto *FuncTy = CI->getFunctionType(); + + if (!FuncTy->getReturnType()->isPointerTy() && + !FuncTy->getReturnType()->isIntegerTy() && + !FuncTy->getReturnType()->isVoidTy()) + return false; + + for (auto Param : FuncTy->params()) { + if (!Param->isPointerTy() && !Param->isIntegerTy()) + return false; + } + return true; + } + } + return false; +} + +/// Return true if it only matters that the value is equal or not-equal to zero. +static bool isOnlyUsedInZeroEqualityComparison(Value *V) { + for (User *U : V->users()) { + if (ICmpInst *IC = dyn_cast<ICmpInst>(U)) + if (IC->isEquality()) + if (Constant *C = dyn_cast<Constant>(IC->getOperand(1))) + if (C->isNullValue()) + continue; + // Unknown instruction. + return false; + } + return true; +} + +/// Return true if it is only used in equality comparisons with With. +static bool isOnlyUsedInEqualityComparison(Value *V, Value *With) { + for (User *U : V->users()) { + if (ICmpInst *IC = dyn_cast<ICmpInst>(U)) + if (IC->isEquality() && IC->getOperand(1) == With) + continue; + // Unknown instruction. + return false; + } + return true; +} + +static bool callHasFloatingPointArgument(const CallInst *CI) { + return any_of(CI->operands(), [](const Use &OI) { + return OI->getType()->isFloatingPointTy(); + }); +} + +/// \brief Check whether the overloaded unary floating point function +/// corresponding to \a Ty is available. +static bool hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty, + LibFunc::Func DoubleFn, LibFunc::Func FloatFn, + LibFunc::Func LongDoubleFn) { + switch (Ty->getTypeID()) { + case Type::FloatTyID: + return TLI->has(FloatFn); + case Type::DoubleTyID: + return TLI->has(DoubleFn); + default: + return TLI->has(LongDoubleFn); + } +} + +//===----------------------------------------------------------------------===// +// String and Memory Library Call Optimizations +//===----------------------------------------------------------------------===// + +Value *LibCallSimplifier::optimizeStrCat(CallInst *CI, IRBuilder<> &B) { + // Extract some information from the instruction + Value *Dst = CI->getArgOperand(0); + Value *Src = CI->getArgOperand(1); + + // See if we can get the length of the input string. + uint64_t Len = GetStringLength(Src); + if (Len == 0) + return nullptr; + --Len; // Unbias length. + + // Handle the simple, do-nothing case: strcat(x, "") -> x + if (Len == 0) + return Dst; + + return emitStrLenMemCpy(Src, Dst, Len, B); +} + +Value *LibCallSimplifier::emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, + IRBuilder<> &B) { + // We need to find the end of the destination string. That's where the + // memory is to be moved to. We just generate a call to strlen. + Value *DstLen = emitStrLen(Dst, B, DL, TLI); + if (!DstLen) + return nullptr; + + // Now that we have the destination's length, we must index into the + // destination's pointer to get the actual memcpy destination (end of + // the string .. we're concatenating). + Value *CpyDst = B.CreateGEP(B.getInt8Ty(), Dst, DstLen, "endptr"); + + // We have enough information to now generate the memcpy call to do the + // concatenation for us. Make a memcpy to copy the nul byte with align = 1. + B.CreateMemCpy(CpyDst, Src, + ConstantInt::get(DL.getIntPtrType(Src->getContext()), Len + 1), + 1); + return Dst; +} + +Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilder<> &B) { + // Extract some information from the instruction. + Value *Dst = CI->getArgOperand(0); + Value *Src = CI->getArgOperand(1); + uint64_t Len; + + // We don't do anything if length is not constant. + if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2))) + Len = LengthArg->getZExtValue(); + else + return nullptr; + + // See if we can get the length of the input string. + uint64_t SrcLen = GetStringLength(Src); + if (SrcLen == 0) + return nullptr; + --SrcLen; // Unbias length. + + // Handle the simple, do-nothing cases: + // strncat(x, "", c) -> x + // strncat(x, c, 0) -> x + if (SrcLen == 0 || Len == 0) + return Dst; + + // We don't optimize this case. + if (Len < SrcLen) + return nullptr; + + // strncat(x, s, c) -> strcat(x, s) + // s is constant so the strcat can be optimized further. + return emitStrLenMemCpy(Src, Dst, SrcLen, B); +} + +Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) { + Function *Callee = CI->getCalledFunction(); + FunctionType *FT = Callee->getFunctionType(); + Value *SrcStr = CI->getArgOperand(0); + + // If the second operand is non-constant, see if we can compute the length + // of the input string and turn this into memchr. + ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); + if (!CharC) { + uint64_t Len = GetStringLength(SrcStr); + if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32)) // memchr needs i32. + return nullptr; + + return emitMemChr(SrcStr, CI->getArgOperand(1), // include nul. + ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len), + B, DL, TLI); + } + + // Otherwise, the character is a constant, see if the first argument is + // a string literal. If so, we can constant fold. + StringRef Str; + if (!getConstantStringInfo(SrcStr, Str)) { + if (CharC->isZero()) // strchr(p, 0) -> p + strlen(p) + return B.CreateGEP(B.getInt8Ty(), SrcStr, emitStrLen(SrcStr, B, DL, TLI), + "strchr"); + return nullptr; + } + + // Compute the offset, make sure to handle the case when we're searching for + // zero (a weird way to spell strlen). + size_t I = (0xFF & CharC->getSExtValue()) == 0 + ? Str.size() + : Str.find(CharC->getSExtValue()); + if (I == StringRef::npos) // Didn't find the char. strchr returns null. + return Constant::getNullValue(CI->getType()); + + // strchr(s+n,c) -> gep(s+n+i,c) + return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "strchr"); +} + +Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilder<> &B) { + Value *SrcStr = CI->getArgOperand(0); + ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); + + // Cannot fold anything if we're not looking for a constant. + if (!CharC) + return nullptr; + + StringRef Str; + if (!getConstantStringInfo(SrcStr, Str)) { + // strrchr(s, 0) -> strchr(s, 0) + if (CharC->isZero()) + return emitStrChr(SrcStr, '\0', B, TLI); + return nullptr; + } + + // Compute the offset. + size_t I = (0xFF & CharC->getSExtValue()) == 0 + ? Str.size() + : Str.rfind(CharC->getSExtValue()); + if (I == StringRef::npos) // Didn't find the char. Return null. + return Constant::getNullValue(CI->getType()); + + // strrchr(s+n,c) -> gep(s+n+i,c) + return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "strrchr"); +} + +Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilder<> &B) { + Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1); + if (Str1P == Str2P) // strcmp(x,x) -> 0 + return ConstantInt::get(CI->getType(), 0); + + StringRef Str1, Str2; + bool HasStr1 = getConstantStringInfo(Str1P, Str1); + bool HasStr2 = getConstantStringInfo(Str2P, Str2); + + // strcmp(x, y) -> cnst (if both x and y are constant strings) + if (HasStr1 && HasStr2) + return ConstantInt::get(CI->getType(), Str1.compare(Str2)); + + if (HasStr1 && Str1.empty()) // strcmp("", x) -> -*x + return B.CreateNeg( + B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType())); + + if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x + return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType()); + + // strcmp(P, "x") -> memcmp(P, "x", 2) + uint64_t Len1 = GetStringLength(Str1P); + uint64_t Len2 = GetStringLength(Str2P); + if (Len1 && Len2) { + return emitMemCmp(Str1P, Str2P, + ConstantInt::get(DL.getIntPtrType(CI->getContext()), + std::min(Len1, Len2)), + B, DL, TLI); + } + + return nullptr; +} + +Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) { + Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1); + if (Str1P == Str2P) // strncmp(x,x,n) -> 0 + return ConstantInt::get(CI->getType(), 0); + + // Get the length argument if it is constant. + uint64_t Length; + if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2))) + Length = LengthArg->getZExtValue(); + else + return nullptr; + + if (Length == 0) // strncmp(x,y,0) -> 0 + return ConstantInt::get(CI->getType(), 0); + + if (Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1) + return emitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, DL, TLI); + + StringRef Str1, Str2; + bool HasStr1 = getConstantStringInfo(Str1P, Str1); + bool HasStr2 = getConstantStringInfo(Str2P, Str2); + + // strncmp(x, y) -> cnst (if both x and y are constant strings) + if (HasStr1 && HasStr2) { + StringRef SubStr1 = Str1.substr(0, Length); + StringRef SubStr2 = Str2.substr(0, Length); + return ConstantInt::get(CI->getType(), SubStr1.compare(SubStr2)); + } + + if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> -*x + return B.CreateNeg( + B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType())); + + if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x + return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType()); + + return nullptr; +} + +Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilder<> &B) { + Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); + if (Dst == Src) // strcpy(x,x) -> x + return Src; + + // See if we can get the length of the input string. + uint64_t Len = GetStringLength(Src); + if (Len == 0) + return nullptr; + + // We have enough information to now generate the memcpy call to do the + // copy for us. Make a memcpy to copy the nul byte with align = 1. + B.CreateMemCpy(Dst, Src, + ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len), 1); + return Dst; +} + +Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) { + Function *Callee = CI->getCalledFunction(); + Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); + if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x) + Value *StrLen = emitStrLen(Src, B, DL, TLI); + return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr; + } + + // See if we can get the length of the input string. + uint64_t Len = GetStringLength(Src); + if (Len == 0) + return nullptr; + + Type *PT = Callee->getFunctionType()->getParamType(0); + Value *LenV = ConstantInt::get(DL.getIntPtrType(PT), Len); + Value *DstEnd = B.CreateGEP(B.getInt8Ty(), Dst, + ConstantInt::get(DL.getIntPtrType(PT), Len - 1)); + + // We have enough information to now generate the memcpy call to do the + // copy for us. Make a memcpy to copy the nul byte with align = 1. + B.CreateMemCpy(Dst, Src, LenV, 1); + return DstEnd; +} + +Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) { + Function *Callee = CI->getCalledFunction(); + Value *Dst = CI->getArgOperand(0); + Value *Src = CI->getArgOperand(1); + Value *LenOp = CI->getArgOperand(2); + + // See if we can get the length of the input string. + uint64_t SrcLen = GetStringLength(Src); + if (SrcLen == 0) + return nullptr; + --SrcLen; + + if (SrcLen == 0) { + // strncpy(x, "", y) -> memset(x, '\0', y, 1) + B.CreateMemSet(Dst, B.getInt8('\0'), LenOp, 1); + return Dst; + } + + uint64_t Len; + if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(LenOp)) + Len = LengthArg->getZExtValue(); + else + return nullptr; + + if (Len == 0) + return Dst; // strncpy(x, y, 0) -> x + + // Let strncpy handle the zero padding + if (Len > SrcLen + 1) + return nullptr; + + Type *PT = Callee->getFunctionType()->getParamType(0); + // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant] + B.CreateMemCpy(Dst, Src, ConstantInt::get(DL.getIntPtrType(PT), Len), 1); + + return Dst; +} + +Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) { + Value *Src = CI->getArgOperand(0); + + // Constant folding: strlen("xyz") -> 3 + if (uint64_t Len = GetStringLength(Src)) + return ConstantInt::get(CI->getType(), Len - 1); + + // If s is a constant pointer pointing to a string literal, we can fold + // strlen(s + x) to strlen(s) - x, when x is known to be in the range + // [0, strlen(s)] or the string has a single null terminator '\0' at the end. + // We only try to simplify strlen when the pointer s points to an array + // of i8. Otherwise, we would need to scale the offset x before doing the + // subtraction. This will make the optimization more complex, and it's not + // very useful because calling strlen for a pointer of other types is + // very uncommon. + if (GEPOperator *GEP = dyn_cast<GEPOperator>(Src)) { + if (!isGEPBasedOnPointerToString(GEP)) + return nullptr; + + StringRef Str; + if (getConstantStringInfo(GEP->getOperand(0), Str, 0, false)) { + size_t NullTermIdx = Str.find('\0'); + + // If the string does not have '\0', leave it to strlen to compute + // its length. + if (NullTermIdx == StringRef::npos) + return nullptr; + + Value *Offset = GEP->getOperand(2); + unsigned BitWidth = Offset->getType()->getIntegerBitWidth(); + APInt KnownZero(BitWidth, 0); + APInt KnownOne(BitWidth, 0); + computeKnownBits(Offset, KnownZero, KnownOne, DL, 0, nullptr, CI, + nullptr); + KnownZero.flipAllBits(); + size_t ArrSize = + cast<ArrayType>(GEP->getSourceElementType())->getNumElements(); + + // KnownZero's bits are flipped, so zeros in KnownZero now represent + // bits known to be zeros in Offset, and ones in KnowZero represent + // bits unknown in Offset. Therefore, Offset is known to be in range + // [0, NullTermIdx] when the flipped KnownZero is non-negative and + // unsigned-less-than NullTermIdx. + // + // If Offset is not provably in the range [0, NullTermIdx], we can still + // optimize if we can prove that the program has undefined behavior when + // Offset is outside that range. That is the case when GEP->getOperand(0) + // is a pointer to an object whose memory extent is NullTermIdx+1. + if ((KnownZero.isNonNegative() && KnownZero.ule(NullTermIdx)) || + (GEP->isInBounds() && isa<GlobalVariable>(GEP->getOperand(0)) && + NullTermIdx == ArrSize - 1)) + return B.CreateSub(ConstantInt::get(CI->getType(), NullTermIdx), + Offset); + } + + return nullptr; + } + + // strlen(x?"foo":"bars") --> x ? 3 : 4 + if (SelectInst *SI = dyn_cast<SelectInst>(Src)) { + uint64_t LenTrue = GetStringLength(SI->getTrueValue()); + uint64_t LenFalse = GetStringLength(SI->getFalseValue()); + if (LenTrue && LenFalse) { + Function *Caller = CI->getParent()->getParent(); + emitOptimizationRemark(CI->getContext(), "simplify-libcalls", *Caller, + SI->getDebugLoc(), + "folded strlen(select) to select of constants"); + return B.CreateSelect(SI->getCondition(), + ConstantInt::get(CI->getType(), LenTrue - 1), + ConstantInt::get(CI->getType(), LenFalse - 1)); + } + } + + // strlen(x) != 0 --> *x != 0 + // strlen(x) == 0 --> *x == 0 + if (isOnlyUsedInZeroEqualityComparison(CI)) + return B.CreateZExt(B.CreateLoad(Src, "strlenfirst"), CI->getType()); + + return nullptr; +} + +Value *LibCallSimplifier::optimizeStrPBrk(CallInst *CI, IRBuilder<> &B) { + StringRef S1, S2; + bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1); + bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2); + + // strpbrk(s, "") -> nullptr + // strpbrk("", s) -> nullptr + if ((HasS1 && S1.empty()) || (HasS2 && S2.empty())) + return Constant::getNullValue(CI->getType()); + + // Constant folding. + if (HasS1 && HasS2) { + size_t I = S1.find_first_of(S2); + if (I == StringRef::npos) // No match. + return Constant::getNullValue(CI->getType()); + + return B.CreateGEP(B.getInt8Ty(), CI->getArgOperand(0), B.getInt64(I), + "strpbrk"); + } + + // strpbrk(s, "a") -> strchr(s, 'a') + if (HasS2 && S2.size() == 1) + return emitStrChr(CI->getArgOperand(0), S2[0], B, TLI); + + return nullptr; +} + +Value *LibCallSimplifier::optimizeStrTo(CallInst *CI, IRBuilder<> &B) { + Value *EndPtr = CI->getArgOperand(1); + if (isa<ConstantPointerNull>(EndPtr)) { + // With a null EndPtr, this function won't capture the main argument. + // It would be readonly too, except that it still may write to errno. + CI->addAttribute(1, Attribute::NoCapture); + } + + return nullptr; +} + +Value *LibCallSimplifier::optimizeStrSpn(CallInst *CI, IRBuilder<> &B) { + StringRef S1, S2; + bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1); + bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2); + + // strspn(s, "") -> 0 + // strspn("", s) -> 0 + if ((HasS1 && S1.empty()) || (HasS2 && S2.empty())) + return Constant::getNullValue(CI->getType()); + + // Constant folding. + if (HasS1 && HasS2) { + size_t Pos = S1.find_first_not_of(S2); + if (Pos == StringRef::npos) + Pos = S1.size(); + return ConstantInt::get(CI->getType(), Pos); + } + + return nullptr; +} + +Value *LibCallSimplifier::optimizeStrCSpn(CallInst *CI, IRBuilder<> &B) { + StringRef S1, S2; + bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1); + bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2); + + // strcspn("", s) -> 0 + if (HasS1 && S1.empty()) + return Constant::getNullValue(CI->getType()); + + // Constant folding. + if (HasS1 && HasS2) { + size_t Pos = S1.find_first_of(S2); + if (Pos == StringRef::npos) + Pos = S1.size(); + return ConstantInt::get(CI->getType(), Pos); + } + + // strcspn(s, "") -> strlen(s) + if (HasS2 && S2.empty()) + return emitStrLen(CI->getArgOperand(0), B, DL, TLI); + + return nullptr; +} + +Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilder<> &B) { + // fold strstr(x, x) -> x. + if (CI->getArgOperand(0) == CI->getArgOperand(1)) + return B.CreateBitCast(CI->getArgOperand(0), CI->getType()); + + // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0 + if (isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) { + Value *StrLen = emitStrLen(CI->getArgOperand(1), B, DL, TLI); + if (!StrLen) + return nullptr; + Value *StrNCmp = emitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1), + StrLen, B, DL, TLI); + if (!StrNCmp) + return nullptr; + for (auto UI = CI->user_begin(), UE = CI->user_end(); UI != UE;) { + ICmpInst *Old = cast<ICmpInst>(*UI++); + Value *Cmp = + B.CreateICmp(Old->getPredicate(), StrNCmp, + ConstantInt::getNullValue(StrNCmp->getType()), "cmp"); + replaceAllUsesWith(Old, Cmp); + } + return CI; + } + + // See if either input string is a constant string. + StringRef SearchStr, ToFindStr; + bool HasStr1 = getConstantStringInfo(CI->getArgOperand(0), SearchStr); + bool HasStr2 = getConstantStringInfo(CI->getArgOperand(1), ToFindStr); + + // fold strstr(x, "") -> x. + if (HasStr2 && ToFindStr.empty()) + return B.CreateBitCast(CI->getArgOperand(0), CI->getType()); + + // If both strings are known, constant fold it. + if (HasStr1 && HasStr2) { + size_t Offset = SearchStr.find(ToFindStr); + + if (Offset == StringRef::npos) // strstr("foo", "bar") -> null + return Constant::getNullValue(CI->getType()); + + // strstr("abcd", "bc") -> gep((char*)"abcd", 1) + Value *Result = castToCStr(CI->getArgOperand(0), B); + Result = B.CreateConstInBoundsGEP1_64(Result, Offset, "strstr"); + return B.CreateBitCast(Result, CI->getType()); + } + + // fold strstr(x, "y") -> strchr(x, 'y'). + if (HasStr2 && ToFindStr.size() == 1) { + Value *StrChr = emitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TLI); + return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : nullptr; + } + return nullptr; +} + +Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) { + Value *SrcStr = CI->getArgOperand(0); + ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); + ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2)); + + // memchr(x, y, 0) -> null + if (LenC && LenC->isNullValue()) + return Constant::getNullValue(CI->getType()); + + // From now on we need at least constant length and string. + StringRef Str; + if (!LenC || !getConstantStringInfo(SrcStr, Str, 0, /*TrimAtNul=*/false)) + return nullptr; + + // Truncate the string to LenC. If Str is smaller than LenC we will still only + // scan the string, as reading past the end of it is undefined and we can just + // return null if we don't find the char. + Str = Str.substr(0, LenC->getZExtValue()); + + // If the char is variable but the input str and length are not we can turn + // this memchr call into a simple bit field test. Of course this only works + // when the return value is only checked against null. + // + // It would be really nice to reuse switch lowering here but we can't change + // the CFG at this point. + // + // memchr("\r\n", C, 2) != nullptr -> (C & ((1 << '\r') | (1 << '\n'))) != 0 + // after bounds check. + if (!CharC && !Str.empty() && isOnlyUsedInZeroEqualityComparison(CI)) { + unsigned char Max = + *std::max_element(reinterpret_cast<const unsigned char *>(Str.begin()), + reinterpret_cast<const unsigned char *>(Str.end())); + + // Make sure the bit field we're about to create fits in a register on the + // target. + // FIXME: On a 64 bit architecture this prevents us from using the + // interesting range of alpha ascii chars. We could do better by emitting + // two bitfields or shifting the range by 64 if no lower chars are used. + if (!DL.fitsInLegalInteger(Max + 1)) + return nullptr; + + // For the bit field use a power-of-2 type with at least 8 bits to avoid + // creating unnecessary illegal types. + unsigned char Width = NextPowerOf2(std::max((unsigned char)7, Max)); + + // Now build the bit field. + APInt Bitfield(Width, 0); + for (char C : Str) + Bitfield.setBit((unsigned char)C); + Value *BitfieldC = B.getInt(Bitfield); + + // First check that the bit field access is within bounds. + Value *C = B.CreateZExtOrTrunc(CI->getArgOperand(1), BitfieldC->getType()); + Value *Bounds = B.CreateICmp(ICmpInst::ICMP_ULT, C, B.getIntN(Width, Width), + "memchr.bounds"); + + // Create code that checks if the given bit is set in the field. + Value *Shl = B.CreateShl(B.getIntN(Width, 1ULL), C); + Value *Bits = B.CreateIsNotNull(B.CreateAnd(Shl, BitfieldC), "memchr.bits"); + + // Finally merge both checks and cast to pointer type. The inttoptr + // implicitly zexts the i1 to intptr type. + return B.CreateIntToPtr(B.CreateAnd(Bounds, Bits, "memchr"), CI->getType()); + } + + // Check if all arguments are constants. If so, we can constant fold. + if (!CharC) + return nullptr; + + // Compute the offset. + size_t I = Str.find(CharC->getSExtValue() & 0xFF); + if (I == StringRef::npos) // Didn't find the char. memchr returns null. + return Constant::getNullValue(CI->getType()); + + // memchr(s+n,c,l) -> gep(s+n+i,c) + return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "memchr"); +} + +Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) { + Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1); + + if (LHS == RHS) // memcmp(s,s,x) -> 0 + return Constant::getNullValue(CI->getType()); + + // Make sure we have a constant length. + ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2)); + if (!LenC) + return nullptr; + uint64_t Len = LenC->getZExtValue(); + + if (Len == 0) // memcmp(s1,s2,0) -> 0 + return Constant::getNullValue(CI->getType()); + + // memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS + if (Len == 1) { + Value *LHSV = B.CreateZExt(B.CreateLoad(castToCStr(LHS, B), "lhsc"), + CI->getType(), "lhsv"); + Value *RHSV = B.CreateZExt(B.CreateLoad(castToCStr(RHS, B), "rhsc"), + CI->getType(), "rhsv"); + return B.CreateSub(LHSV, RHSV, "chardiff"); + } + + // memcmp(S1,S2,N/8)==0 -> (*(intN_t*)S1 != *(intN_t*)S2)==0 + if (DL.isLegalInteger(Len * 8) && isOnlyUsedInZeroEqualityComparison(CI)) { + + IntegerType *IntType = IntegerType::get(CI->getContext(), Len * 8); + unsigned PrefAlignment = DL.getPrefTypeAlignment(IntType); + + if (getKnownAlignment(LHS, DL, CI) >= PrefAlignment && + getKnownAlignment(RHS, DL, CI) >= PrefAlignment) { + + Type *LHSPtrTy = + IntType->getPointerTo(LHS->getType()->getPointerAddressSpace()); + Type *RHSPtrTy = + IntType->getPointerTo(RHS->getType()->getPointerAddressSpace()); + + Value *LHSV = + B.CreateLoad(B.CreateBitCast(LHS, LHSPtrTy, "lhsc"), "lhsv"); + Value *RHSV = + B.CreateLoad(B.CreateBitCast(RHS, RHSPtrTy, "rhsc"), "rhsv"); + + return B.CreateZExt(B.CreateICmpNE(LHSV, RHSV), CI->getType(), "memcmp"); + } + } + + // Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant) + StringRef LHSStr, RHSStr; + if (getConstantStringInfo(LHS, LHSStr) && + getConstantStringInfo(RHS, RHSStr)) { + // Make sure we're not reading out-of-bounds memory. + if (Len > LHSStr.size() || Len > RHSStr.size()) + return nullptr; + // Fold the memcmp and normalize the result. This way we get consistent + // results across multiple platforms. + uint64_t Ret = 0; + int Cmp = memcmp(LHSStr.data(), RHSStr.data(), Len); + if (Cmp < 0) + Ret = -1; + else if (Cmp > 0) + Ret = 1; + return ConstantInt::get(CI->getType(), Ret); + } + + return nullptr; +} + +Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) { + // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1) + B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1); + return CI->getArgOperand(0); +} + +Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B) { + // memmove(x, y, n) -> llvm.memmove(x, y, n, 1) + B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1); + return CI->getArgOperand(0); +} + +// TODO: Does this belong in BuildLibCalls or should all of those similar +// functions be moved here? +static Value *emitCalloc(Value *Num, Value *Size, const AttributeSet &Attrs, + IRBuilder<> &B, const TargetLibraryInfo &TLI) { + LibFunc::Func Func; + if (!TLI.getLibFunc("calloc", Func) || !TLI.has(Func)) + return nullptr; + + Module *M = B.GetInsertBlock()->getModule(); + const DataLayout &DL = M->getDataLayout(); + IntegerType *PtrType = DL.getIntPtrType((B.GetInsertBlock()->getContext())); + Value *Calloc = M->getOrInsertFunction("calloc", Attrs, B.getInt8PtrTy(), + PtrType, PtrType, nullptr); + CallInst *CI = B.CreateCall(Calloc, { Num, Size }, "calloc"); + + if (const auto *F = dyn_cast<Function>(Calloc->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; +} + +/// Fold memset[_chk](malloc(n), 0, n) --> calloc(1, n). +static Value *foldMallocMemset(CallInst *Memset, IRBuilder<> &B, + const TargetLibraryInfo &TLI) { + // This has to be a memset of zeros (bzero). + auto *FillValue = dyn_cast<ConstantInt>(Memset->getArgOperand(1)); + if (!FillValue || FillValue->getZExtValue() != 0) + return nullptr; + + // TODO: We should handle the case where the malloc has more than one use. + // This is necessary to optimize common patterns such as when the result of + // the malloc is checked against null or when a memset intrinsic is used in + // place of a memset library call. + auto *Malloc = dyn_cast<CallInst>(Memset->getArgOperand(0)); + if (!Malloc || !Malloc->hasOneUse()) + return nullptr; + + // Is the inner call really malloc()? + Function *InnerCallee = Malloc->getCalledFunction(); + LibFunc::Func Func; + if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) || + Func != LibFunc::malloc) + return nullptr; + + // The memset must cover the same number of bytes that are malloc'd. + if (Memset->getArgOperand(2) != Malloc->getArgOperand(0)) + return nullptr; + + // Replace the malloc with a calloc. We need the data layout to know what the + // actual size of a 'size_t' parameter is. + B.SetInsertPoint(Malloc->getParent(), ++Malloc->getIterator()); + const DataLayout &DL = Malloc->getModule()->getDataLayout(); + IntegerType *SizeType = DL.getIntPtrType(B.GetInsertBlock()->getContext()); + Value *Calloc = emitCalloc(ConstantInt::get(SizeType, 1), + Malloc->getArgOperand(0), Malloc->getAttributes(), + B, TLI); + if (!Calloc) + return nullptr; + + Malloc->replaceAllUsesWith(Calloc); + Malloc->eraseFromParent(); + + return Calloc; +} + +Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilder<> &B) { + if (auto *Calloc = foldMallocMemset(CI, B, *TLI)) + return Calloc; + + // memset(p, v, n) -> llvm.memset(p, v, n, 1) + Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false); + B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1); + return CI->getArgOperand(0); +} + +//===----------------------------------------------------------------------===// +// Math Library Optimizations +//===----------------------------------------------------------------------===// + +/// Return a variant of Val with float type. +/// Currently this works in two cases: If Val is an FPExtension of a float +/// value to something bigger, simply return the operand. +/// If Val is a ConstantFP but can be converted to a float ConstantFP without +/// loss of precision do so. +static Value *valueHasFloatPrecision(Value *Val) { + if (FPExtInst *Cast = dyn_cast<FPExtInst>(Val)) { + Value *Op = Cast->getOperand(0); + if (Op->getType()->isFloatTy()) + return Op; + } + if (ConstantFP *Const = dyn_cast<ConstantFP>(Val)) { + APFloat F = Const->getValueAPF(); + bool losesInfo; + (void)F.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, + &losesInfo); + if (!losesInfo) + return ConstantFP::get(Const->getContext(), F); + } + return nullptr; +} + +/// Shrink double -> float for unary functions like 'floor'. +static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B, + bool CheckRetType) { + Function *Callee = CI->getCalledFunction(); + // We know this libcall has a valid prototype, but we don't know which. + if (!CI->getType()->isDoubleTy()) + return nullptr; + + if (CheckRetType) { + // Check if all the uses for function like 'sin' are converted to float. + for (User *U : CI->users()) { + FPTruncInst *Cast = dyn_cast<FPTruncInst>(U); + if (!Cast || !Cast->getType()->isFloatTy()) + return nullptr; + } + } + + // If this is something like 'floor((double)floatval)', convert to floorf. + Value *V = valueHasFloatPrecision(CI->getArgOperand(0)); + if (V == nullptr) + return nullptr; + + // Propagate fast-math flags from the existing call to the new call. + IRBuilder<>::FastMathFlagGuard Guard(B); + B.setFastMathFlags(CI->getFastMathFlags()); + + // floor((double)floatval) -> (double)floorf(floatval) + if (Callee->isIntrinsic()) { + Module *M = CI->getModule(); + Intrinsic::ID IID = Callee->getIntrinsicID(); + Function *F = Intrinsic::getDeclaration(M, IID, B.getFloatTy()); + V = B.CreateCall(F, V); + } else { + // The call is a library call rather than an intrinsic. + V = emitUnaryFloatFnCall(V, Callee->getName(), B, Callee->getAttributes()); + } + + return B.CreateFPExt(V, B.getDoubleTy()); +} + +/// Shrink double -> float for binary functions like 'fmin/fmax'. +static Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) { + Function *Callee = CI->getCalledFunction(); + // We know this libcall has a valid prototype, but we don't know which. + if (!CI->getType()->isDoubleTy()) + return nullptr; + + // If this is something like 'fmin((double)floatval1, (double)floatval2)', + // or fmin(1.0, (double)floatval), then we convert it to fminf. + Value *V1 = valueHasFloatPrecision(CI->getArgOperand(0)); + if (V1 == nullptr) + return nullptr; + Value *V2 = valueHasFloatPrecision(CI->getArgOperand(1)); + if (V2 == nullptr) + return nullptr; + + // Propagate fast-math flags from the existing call to the new call. + IRBuilder<>::FastMathFlagGuard Guard(B); + B.setFastMathFlags(CI->getFastMathFlags()); + + // fmin((double)floatval1, (double)floatval2) + // -> (double)fminf(floatval1, floatval2) + // TODO: Handle intrinsics in the same way as in optimizeUnaryDoubleFP(). + Value *V = emitBinaryFloatFnCall(V1, V2, Callee->getName(), B, + Callee->getAttributes()); + return B.CreateFPExt(V, B.getDoubleTy()); +} + +Value *LibCallSimplifier::optimizeCos(CallInst *CI, IRBuilder<> &B) { + Function *Callee = CI->getCalledFunction(); + Value *Ret = nullptr; + StringRef Name = Callee->getName(); + if (UnsafeFPShrink && Name == "cos" && hasFloatVersion(Name)) + Ret = optimizeUnaryDoubleFP(CI, B, true); + + // cos(-x) -> cos(x) + Value *Op1 = CI->getArgOperand(0); + if (BinaryOperator::isFNeg(Op1)) { + BinaryOperator *BinExpr = cast<BinaryOperator>(Op1); + return B.CreateCall(Callee, BinExpr->getOperand(1), "cos"); + } + return Ret; +} + +static Value *getPow(Value *InnerChain[33], unsigned Exp, IRBuilder<> &B) { + // Multiplications calculated using Addition Chains. + // Refer: http://wwwhomes.uni-bielefeld.de/achim/addition_chain.html + + assert(Exp != 0 && "Incorrect exponent 0 not handled"); + + if (InnerChain[Exp]) + return InnerChain[Exp]; + + static const unsigned AddChain[33][2] = { + {0, 0}, // Unused. + {0, 0}, // Unused (base case = pow1). + {1, 1}, // Unused (pre-computed). + {1, 2}, {2, 2}, {2, 3}, {3, 3}, {2, 5}, {4, 4}, + {1, 8}, {5, 5}, {1, 10}, {6, 6}, {4, 9}, {7, 7}, + {3, 12}, {8, 8}, {8, 9}, {2, 16}, {1, 18}, {10, 10}, + {6, 15}, {11, 11}, {3, 20}, {12, 12}, {8, 17}, {13, 13}, + {3, 24}, {14, 14}, {4, 25}, {15, 15}, {3, 28}, {16, 16}, + }; + + InnerChain[Exp] = B.CreateFMul(getPow(InnerChain, AddChain[Exp][0], B), + getPow(InnerChain, AddChain[Exp][1], B)); + return InnerChain[Exp]; +} + +Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { + Function *Callee = CI->getCalledFunction(); + Value *Ret = nullptr; + StringRef Name = Callee->getName(); + if (UnsafeFPShrink && Name == "pow" && hasFloatVersion(Name)) + Ret = optimizeUnaryDoubleFP(CI, B, true); + + Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1); + + // pow(1.0, x) -> 1.0 + if (match(Op1, m_SpecificFP(1.0))) + return Op1; + // pow(2.0, x) -> llvm.exp2(x) + if (match(Op1, m_SpecificFP(2.0))) { + Value *Exp2 = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::exp2, + CI->getType()); + return B.CreateCall(Exp2, Op2, "exp2"); + } + + // There's no llvm.exp10 intrinsic yet, but, maybe, some day there will + // be one. + if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) { + // pow(10.0, x) -> exp10(x) + if (Op1C->isExactlyValue(10.0) && + hasUnaryFloatFn(TLI, Op1->getType(), LibFunc::exp10, LibFunc::exp10f, + LibFunc::exp10l)) + return emitUnaryFloatFnCall(Op2, TLI->getName(LibFunc::exp10), B, + Callee->getAttributes()); + } + + // pow(exp(x), y) -> exp(x * y) + // pow(exp2(x), y) -> exp2(x * y) + // We enable these only with fast-math. Besides rounding differences, the + // transformation changes overflow and underflow behavior quite dramatically. + // Example: x = 1000, y = 0.001. + // pow(exp(x), y) = pow(inf, 0.001) = inf, whereas exp(x*y) = exp(1). + auto *OpC = dyn_cast<CallInst>(Op1); + if (OpC && OpC->hasUnsafeAlgebra() && CI->hasUnsafeAlgebra()) { + LibFunc::Func Func; + Function *OpCCallee = OpC->getCalledFunction(); + if (OpCCallee && TLI->getLibFunc(OpCCallee->getName(), Func) && + TLI->has(Func) && (Func == LibFunc::exp || Func == LibFunc::exp2)) { + IRBuilder<>::FastMathFlagGuard Guard(B); + B.setFastMathFlags(CI->getFastMathFlags()); + Value *FMul = B.CreateFMul(OpC->getArgOperand(0), Op2, "mul"); + return emitUnaryFloatFnCall(FMul, OpCCallee->getName(), B, + OpCCallee->getAttributes()); + } + } + + ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2); + if (!Op2C) + return Ret; + + if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0 + return ConstantFP::get(CI->getType(), 1.0); + + if (Op2C->isExactlyValue(0.5) && + hasUnaryFloatFn(TLI, Op2->getType(), LibFunc::sqrt, LibFunc::sqrtf, + LibFunc::sqrtl) && + hasUnaryFloatFn(TLI, Op2->getType(), LibFunc::fabs, LibFunc::fabsf, + LibFunc::fabsl)) { + + // In -ffast-math, pow(x, 0.5) -> sqrt(x). + if (CI->hasUnsafeAlgebra()) { + IRBuilder<>::FastMathFlagGuard Guard(B); + B.setFastMathFlags(CI->getFastMathFlags()); + + // Unlike other math intrinsics, sqrt has differerent semantics + // from the libc function. See LangRef for details. + return emitUnaryFloatFnCall(Op1, TLI->getName(LibFunc::sqrt), B, + Callee->getAttributes()); + } + + // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))). + // This is faster than calling pow, and still handles negative zero + // and negative infinity correctly. + // TODO: In finite-only mode, this could be just fabs(sqrt(x)). + Value *Inf = ConstantFP::getInfinity(CI->getType()); + Value *NegInf = ConstantFP::getInfinity(CI->getType(), true); + Value *Sqrt = emitUnaryFloatFnCall(Op1, "sqrt", B, Callee->getAttributes()); + Value *FAbs = + emitUnaryFloatFnCall(Sqrt, "fabs", B, Callee->getAttributes()); + Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf); + Value *Sel = B.CreateSelect(FCmp, Inf, FAbs); + return Sel; + } + + if (Op2C->isExactlyValue(1.0)) // pow(x, 1.0) -> x + return Op1; + if (Op2C->isExactlyValue(2.0)) // pow(x, 2.0) -> x*x + return B.CreateFMul(Op1, Op1, "pow2"); + if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x + return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Op1, "powrecip"); + + // In -ffast-math, generate repeated fmul instead of generating pow(x, n). + if (CI->hasUnsafeAlgebra()) { + APFloat V = abs(Op2C->getValueAPF()); + // We limit to a max of 7 fmul(s). Thus max exponent is 32. + // This transformation applies to integer exponents only. + if (V.compare(APFloat(V.getSemantics(), 32.0)) == APFloat::cmpGreaterThan || + !V.isInteger()) + return nullptr; + + // We will memoize intermediate products of the Addition Chain. + Value *InnerChain[33] = {nullptr}; + InnerChain[1] = Op1; + InnerChain[2] = B.CreateFMul(Op1, Op1); + + // We cannot readily convert a non-double type (like float) to a double. + // So we first convert V to something which could be converted to double. + bool ignored; + V.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &ignored); + + // TODO: Should the new instructions propagate the 'fast' flag of the pow()? + Value *FMul = getPow(InnerChain, V.convertToDouble(), B); + // For negative exponents simply compute the reciprocal. + if (Op2C->isNegative()) + FMul = B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), FMul); + return FMul; + } + + return nullptr; +} + +Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) { + Function *Callee = CI->getCalledFunction(); + Value *Ret = nullptr; + StringRef Name = Callee->getName(); + if (UnsafeFPShrink && Name == "exp2" && hasFloatVersion(Name)) + Ret = optimizeUnaryDoubleFP(CI, B, true); + + Value *Op = CI->getArgOperand(0); + // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32 + // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32 + LibFunc::Func LdExp = LibFunc::ldexpl; + if (Op->getType()->isFloatTy()) + LdExp = LibFunc::ldexpf; + else if (Op->getType()->isDoubleTy()) + LdExp = LibFunc::ldexp; + + if (TLI->has(LdExp)) { + Value *LdExpArg = nullptr; + if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) { + if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32) + LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty()); + } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) { + if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32) + LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty()); + } + + if (LdExpArg) { + Constant *One = ConstantFP::get(CI->getContext(), APFloat(1.0f)); + if (!Op->getType()->isFloatTy()) + One = ConstantExpr::getFPExtend(One, Op->getType()); + + Module *M = CI->getModule(); + Value *NewCallee = + M->getOrInsertFunction(TLI->getName(LdExp), Op->getType(), + Op->getType(), B.getInt32Ty(), nullptr); + CallInst *CI = B.CreateCall(NewCallee, {One, LdExpArg}); + if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; + } + } + return Ret; +} + +Value *LibCallSimplifier::optimizeFabs(CallInst *CI, IRBuilder<> &B) { + Function *Callee = CI->getCalledFunction(); + StringRef Name = Callee->getName(); + if (Name == "fabs" && hasFloatVersion(Name)) + return optimizeUnaryDoubleFP(CI, B, false); + + return nullptr; +} + +Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) { + Function *Callee = CI->getCalledFunction(); + // If we can shrink the call to a float function rather than a double + // function, do that first. + StringRef Name = Callee->getName(); + if ((Name == "fmin" || Name == "fmax") && hasFloatVersion(Name)) + if (Value *Ret = optimizeBinaryDoubleFP(CI, B)) + return Ret; + + IRBuilder<>::FastMathFlagGuard Guard(B); + FastMathFlags FMF; + if (CI->hasUnsafeAlgebra()) { + // Unsafe algebra sets all fast-math-flags to true. + FMF.setUnsafeAlgebra(); + } else { + // At a minimum, no-nans-fp-math must be true. + if (!CI->hasNoNaNs()) + return nullptr; + // No-signed-zeros is implied by the definitions of fmax/fmin themselves: + // "Ideally, fmax would be sensitive to the sign of zero, for example + // fmax(-0. 0, +0. 0) would return +0; however, implementation in software + // might be impractical." + FMF.setNoSignedZeros(); + FMF.setNoNaNs(); + } + B.setFastMathFlags(FMF); + + // We have a relaxed floating-point environment. We can ignore NaN-handling + // and transform to a compare and select. We do not have to consider errno or + // exceptions, because fmin/fmax do not have those. + Value *Op0 = CI->getArgOperand(0); + Value *Op1 = CI->getArgOperand(1); + Value *Cmp = Callee->getName().startswith("fmin") ? + B.CreateFCmpOLT(Op0, Op1) : B.CreateFCmpOGT(Op0, Op1); + return B.CreateSelect(Cmp, Op0, Op1); +} + +Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) { + Function *Callee = CI->getCalledFunction(); + Value *Ret = nullptr; + StringRef Name = Callee->getName(); + if (UnsafeFPShrink && hasFloatVersion(Name)) + Ret = optimizeUnaryDoubleFP(CI, B, true); + + if (!CI->hasUnsafeAlgebra()) + return Ret; + Value *Op1 = CI->getArgOperand(0); + auto *OpC = dyn_cast<CallInst>(Op1); + + // The earlier call must also be unsafe in order to do these transforms. + if (!OpC || !OpC->hasUnsafeAlgebra()) + return Ret; + + // log(pow(x,y)) -> y*log(x) + // This is only applicable to log, log2, log10. + if (Name != "log" && Name != "log2" && Name != "log10") + return Ret; + + IRBuilder<>::FastMathFlagGuard Guard(B); + FastMathFlags FMF; + FMF.setUnsafeAlgebra(); + B.setFastMathFlags(FMF); + + LibFunc::Func Func; + Function *F = OpC->getCalledFunction(); + if (F && ((TLI->getLibFunc(F->getName(), Func) && TLI->has(Func) && + Func == LibFunc::pow) || F->getIntrinsicID() == Intrinsic::pow)) + return B.CreateFMul(OpC->getArgOperand(1), + emitUnaryFloatFnCall(OpC->getOperand(0), Callee->getName(), B, + Callee->getAttributes()), "mul"); + + // log(exp2(y)) -> y*log(2) + if (F && Name == "log" && TLI->getLibFunc(F->getName(), Func) && + TLI->has(Func) && Func == LibFunc::exp2) + return B.CreateFMul( + OpC->getArgOperand(0), + emitUnaryFloatFnCall(ConstantFP::get(CI->getType(), 2.0), + Callee->getName(), B, Callee->getAttributes()), + "logmul"); + return Ret; +} + +Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) { + Function *Callee = CI->getCalledFunction(); + Value *Ret = nullptr; + if (TLI->has(LibFunc::sqrtf) && (Callee->getName() == "sqrt" || + Callee->getIntrinsicID() == Intrinsic::sqrt)) + Ret = optimizeUnaryDoubleFP(CI, B, true); + + if (!CI->hasUnsafeAlgebra()) + return Ret; + + Instruction *I = dyn_cast<Instruction>(CI->getArgOperand(0)); + if (!I || I->getOpcode() != Instruction::FMul || !I->hasUnsafeAlgebra()) + return Ret; + + // We're looking for a repeated factor in a multiplication tree, + // so we can do this fold: sqrt(x * x) -> fabs(x); + // or this fold: sqrt((x * x) * y) -> fabs(x) * sqrt(y). + Value *Op0 = I->getOperand(0); + Value *Op1 = I->getOperand(1); + Value *RepeatOp = nullptr; + Value *OtherOp = nullptr; + if (Op0 == Op1) { + // Simple match: the operands of the multiply are identical. + RepeatOp = Op0; + } else { + // Look for a more complicated pattern: one of the operands is itself + // a multiply, so search for a common factor in that multiply. + // Note: We don't bother looking any deeper than this first level or for + // variations of this pattern because instcombine's visitFMUL and/or the + // reassociation pass should give us this form. + Value *OtherMul0, *OtherMul1; + if (match(Op0, m_FMul(m_Value(OtherMul0), m_Value(OtherMul1)))) { + // Pattern: sqrt((x * y) * z) + if (OtherMul0 == OtherMul1 && + cast<Instruction>(Op0)->hasUnsafeAlgebra()) { + // Matched: sqrt((x * x) * z) + RepeatOp = OtherMul0; + OtherOp = Op1; + } + } + } + if (!RepeatOp) + return Ret; + + // Fast math flags for any created instructions should match the sqrt + // and multiply. + IRBuilder<>::FastMathFlagGuard Guard(B); + B.setFastMathFlags(I->getFastMathFlags()); + + // If we found a repeated factor, hoist it out of the square root and + // replace it with the fabs of that factor. + Module *M = Callee->getParent(); + Type *ArgType = I->getType(); + Value *Fabs = Intrinsic::getDeclaration(M, Intrinsic::fabs, ArgType); + Value *FabsCall = B.CreateCall(Fabs, RepeatOp, "fabs"); + if (OtherOp) { + // If we found a non-repeated factor, we still need to get its square + // root. We then multiply that by the value that was simplified out + // of the square root calculation. + Value *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, ArgType); + Value *SqrtCall = B.CreateCall(Sqrt, OtherOp, "sqrt"); + return B.CreateFMul(FabsCall, SqrtCall); + } + return FabsCall; +} + +// TODO: Generalize to handle any trig function and its inverse. +Value *LibCallSimplifier::optimizeTan(CallInst *CI, IRBuilder<> &B) { + Function *Callee = CI->getCalledFunction(); + Value *Ret = nullptr; + StringRef Name = Callee->getName(); + if (UnsafeFPShrink && Name == "tan" && hasFloatVersion(Name)) + Ret = optimizeUnaryDoubleFP(CI, B, true); + + Value *Op1 = CI->getArgOperand(0); + auto *OpC = dyn_cast<CallInst>(Op1); + if (!OpC) + return Ret; + + // Both calls must allow unsafe optimizations in order to remove them. + if (!CI->hasUnsafeAlgebra() || !OpC->hasUnsafeAlgebra()) + return Ret; + + // tan(atan(x)) -> x + // tanf(atanf(x)) -> x + // tanl(atanl(x)) -> x + LibFunc::Func Func; + Function *F = OpC->getCalledFunction(); + if (F && TLI->getLibFunc(F->getName(), Func) && TLI->has(Func) && + ((Func == LibFunc::atan && Callee->getName() == "tan") || + (Func == LibFunc::atanf && Callee->getName() == "tanf") || + (Func == LibFunc::atanl && Callee->getName() == "tanl"))) + Ret = OpC->getArgOperand(0); + return Ret; +} + +static bool isTrigLibCall(CallInst *CI) { + // We can only hope to do anything useful if we can ignore things like errno + // and floating-point exceptions. + // We already checked the prototype. + return CI->hasFnAttr(Attribute::NoUnwind) && + CI->hasFnAttr(Attribute::ReadNone); +} + +static void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg, + bool UseFloat, Value *&Sin, Value *&Cos, + Value *&SinCos) { + Type *ArgTy = Arg->getType(); + Type *ResTy; + StringRef Name; + + Triple T(OrigCallee->getParent()->getTargetTriple()); + if (UseFloat) { + Name = "__sincospif_stret"; + + assert(T.getArch() != Triple::x86 && "x86 messy and unsupported for now"); + // x86_64 can't use {float, float} since that would be returned in both + // xmm0 and xmm1, which isn't what a real struct would do. + ResTy = T.getArch() == Triple::x86_64 + ? static_cast<Type *>(VectorType::get(ArgTy, 2)) + : static_cast<Type *>(StructType::get(ArgTy, ArgTy, nullptr)); + } else { + Name = "__sincospi_stret"; + ResTy = StructType::get(ArgTy, ArgTy, nullptr); + } + + Module *M = OrigCallee->getParent(); + Value *Callee = M->getOrInsertFunction(Name, OrigCallee->getAttributes(), + ResTy, ArgTy, nullptr); + + if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) { + // If the argument is an instruction, it must dominate all uses so put our + // sincos call there. + B.SetInsertPoint(ArgInst->getParent(), ++ArgInst->getIterator()); + } else { + // Otherwise (e.g. for a constant) the beginning of the function is as + // good a place as any. + BasicBlock &EntryBB = B.GetInsertBlock()->getParent()->getEntryBlock(); + B.SetInsertPoint(&EntryBB, EntryBB.begin()); + } + + SinCos = B.CreateCall(Callee, Arg, "sincospi"); + + if (SinCos->getType()->isStructTy()) { + Sin = B.CreateExtractValue(SinCos, 0, "sinpi"); + Cos = B.CreateExtractValue(SinCos, 1, "cospi"); + } else { + Sin = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 0), + "sinpi"); + Cos = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 1), + "cospi"); + } +} + +Value *LibCallSimplifier::optimizeSinCosPi(CallInst *CI, IRBuilder<> &B) { + // Make sure the prototype is as expected, otherwise the rest of the + // function is probably invalid and likely to abort. + if (!isTrigLibCall(CI)) + return nullptr; + + Value *Arg = CI->getArgOperand(0); + SmallVector<CallInst *, 1> SinCalls; + SmallVector<CallInst *, 1> CosCalls; + SmallVector<CallInst *, 1> SinCosCalls; + + bool IsFloat = Arg->getType()->isFloatTy(); + + // Look for all compatible sinpi, cospi and sincospi calls with the same + // argument. If there are enough (in some sense) we can make the + // substitution. + Function *F = CI->getFunction(); + for (User *U : Arg->users()) + classifyArgUse(U, F, IsFloat, SinCalls, CosCalls, SinCosCalls); + + // It's only worthwhile if both sinpi and cospi are actually used. + if (SinCosCalls.empty() && (SinCalls.empty() || CosCalls.empty())) + return nullptr; + + Value *Sin, *Cos, *SinCos; + insertSinCosCall(B, CI->getCalledFunction(), Arg, IsFloat, Sin, Cos, SinCos); + + auto replaceTrigInsts = [this](SmallVectorImpl<CallInst *> &Calls, + Value *Res) { + for (CallInst *C : Calls) + replaceAllUsesWith(C, Res); + }; + + replaceTrigInsts(SinCalls, Sin); + replaceTrigInsts(CosCalls, Cos); + replaceTrigInsts(SinCosCalls, SinCos); + + return nullptr; +} + +void LibCallSimplifier::classifyArgUse( + Value *Val, Function *F, bool IsFloat, + SmallVectorImpl<CallInst *> &SinCalls, + SmallVectorImpl<CallInst *> &CosCalls, + SmallVectorImpl<CallInst *> &SinCosCalls) { + CallInst *CI = dyn_cast<CallInst>(Val); + + if (!CI) + return; + + // Don't consider calls in other functions. + if (CI->getFunction() != F) + return; + + Function *Callee = CI->getCalledFunction(); + LibFunc::Func Func; + if (!Callee || !TLI->getLibFunc(*Callee, Func) || !TLI->has(Func) || + !isTrigLibCall(CI)) + return; + + if (IsFloat) { + if (Func == LibFunc::sinpif) + SinCalls.push_back(CI); + else if (Func == LibFunc::cospif) + CosCalls.push_back(CI); + else if (Func == LibFunc::sincospif_stret) + SinCosCalls.push_back(CI); + } else { + if (Func == LibFunc::sinpi) + SinCalls.push_back(CI); + else if (Func == LibFunc::cospi) + CosCalls.push_back(CI); + else if (Func == LibFunc::sincospi_stret) + SinCosCalls.push_back(CI); + } +} + +//===----------------------------------------------------------------------===// +// Integer Library Call Optimizations +//===----------------------------------------------------------------------===// + +Value *LibCallSimplifier::optimizeFFS(CallInst *CI, IRBuilder<> &B) { + // ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0 + Value *Op = CI->getArgOperand(0); + Type *ArgType = Op->getType(); + Value *F = Intrinsic::getDeclaration(CI->getCalledFunction()->getParent(), + Intrinsic::cttz, ArgType); + Value *V = B.CreateCall(F, {Op, B.getTrue()}, "cttz"); + V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1)); + V = B.CreateIntCast(V, B.getInt32Ty(), false); + + Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType)); + return B.CreateSelect(Cond, V, B.getInt32(0)); +} + +Value *LibCallSimplifier::optimizeFls(CallInst *CI, IRBuilder<> &B) { + // fls(x) -> (i32)(sizeInBits(x) - llvm.ctlz(x, false)) + Value *Op = CI->getArgOperand(0); + Type *ArgType = Op->getType(); + Value *F = Intrinsic::getDeclaration(CI->getCalledFunction()->getParent(), + Intrinsic::ctlz, ArgType); + Value *V = B.CreateCall(F, {Op, B.getFalse()}, "ctlz"); + V = B.CreateSub(ConstantInt::get(V->getType(), ArgType->getIntegerBitWidth()), + V); + return B.CreateIntCast(V, CI->getType(), false); +} + +Value *LibCallSimplifier::optimizeAbs(CallInst *CI, IRBuilder<> &B) { + // abs(x) -> x >s -1 ? x : -x + Value *Op = CI->getArgOperand(0); + Value *Pos = + B.CreateICmpSGT(Op, Constant::getAllOnesValue(Op->getType()), "ispos"); + Value *Neg = B.CreateNeg(Op, "neg"); + return B.CreateSelect(Pos, Op, Neg); +} + +Value *LibCallSimplifier::optimizeIsDigit(CallInst *CI, IRBuilder<> &B) { + // isdigit(c) -> (c-'0') <u 10 + Value *Op = CI->getArgOperand(0); + Op = B.CreateSub(Op, B.getInt32('0'), "isdigittmp"); + Op = B.CreateICmpULT(Op, B.getInt32(10), "isdigit"); + return B.CreateZExt(Op, CI->getType()); +} + +Value *LibCallSimplifier::optimizeIsAscii(CallInst *CI, IRBuilder<> &B) { + // isascii(c) -> c <u 128 + Value *Op = CI->getArgOperand(0); + Op = B.CreateICmpULT(Op, B.getInt32(128), "isascii"); + return B.CreateZExt(Op, CI->getType()); +} + +Value *LibCallSimplifier::optimizeToAscii(CallInst *CI, IRBuilder<> &B) { + // toascii(c) -> c & 0x7f + return B.CreateAnd(CI->getArgOperand(0), + ConstantInt::get(CI->getType(), 0x7F)); +} + +//===----------------------------------------------------------------------===// +// Formatting and IO Library Call Optimizations +//===----------------------------------------------------------------------===// + +static bool isReportingError(Function *Callee, CallInst *CI, int StreamArg); + +Value *LibCallSimplifier::optimizeErrorReporting(CallInst *CI, IRBuilder<> &B, + int StreamArg) { + Function *Callee = CI->getCalledFunction(); + // Error reporting calls should be cold, mark them as such. + // This applies even to non-builtin calls: it is only a hint and applies to + // functions that the frontend might not understand as builtins. + + // This heuristic was suggested in: + // Improving Static Branch Prediction in a Compiler + // Brian L. Deitrich, Ben-Chung Cheng, Wen-mei W. Hwu + // Proceedings of PACT'98, Oct. 1998, IEEE + if (!CI->hasFnAttr(Attribute::Cold) && + isReportingError(Callee, CI, StreamArg)) { + CI->addAttribute(AttributeSet::FunctionIndex, Attribute::Cold); + } + + return nullptr; +} + +static bool isReportingError(Function *Callee, CallInst *CI, int StreamArg) { + if (!ColdErrorCalls || !Callee || !Callee->isDeclaration()) + return false; + + if (StreamArg < 0) + return true; + + // These functions might be considered cold, but only if their stream + // argument is stderr. + + if (StreamArg >= (int)CI->getNumArgOperands()) + return false; + LoadInst *LI = dyn_cast<LoadInst>(CI->getArgOperand(StreamArg)); + if (!LI) + return false; + GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getPointerOperand()); + if (!GV || !GV->isDeclaration()) + return false; + return GV->getName() == "stderr"; +} + +Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilder<> &B) { + // Check for a fixed format string. + StringRef FormatStr; + if (!getConstantStringInfo(CI->getArgOperand(0), FormatStr)) + return nullptr; + + // Empty format string -> noop. + if (FormatStr.empty()) // Tolerate printf's declared void. + return CI->use_empty() ? (Value *)CI : ConstantInt::get(CI->getType(), 0); + + // Do not do any of the following transformations if the printf return value + // is used, in general the printf return value is not compatible with either + // putchar() or puts(). + if (!CI->use_empty()) + return nullptr; + + // printf("x") -> putchar('x'), even for "%" and "%%". + if (FormatStr.size() == 1 || FormatStr == "%%") + return emitPutChar(B.getInt32(FormatStr[0]), B, TLI); + + // printf("%s", "a") --> putchar('a') + if (FormatStr == "%s" && CI->getNumArgOperands() > 1) { + StringRef ChrStr; + if (!getConstantStringInfo(CI->getOperand(1), ChrStr)) + return nullptr; + if (ChrStr.size() != 1) + return nullptr; + return emitPutChar(B.getInt32(ChrStr[0]), B, TLI); + } + + // printf("foo\n") --> puts("foo") + if (FormatStr[FormatStr.size() - 1] == '\n' && + FormatStr.find('%') == StringRef::npos) { // No format characters. + // Create a string literal with no \n on it. We expect the constant merge + // pass to be run after this pass, to merge duplicate strings. + FormatStr = FormatStr.drop_back(); + Value *GV = B.CreateGlobalString(FormatStr, "str"); + return emitPutS(GV, B, TLI); + } + + // Optimize specific format strings. + // printf("%c", chr) --> putchar(chr) + if (FormatStr == "%c" && CI->getNumArgOperands() > 1 && + CI->getArgOperand(1)->getType()->isIntegerTy()) + return emitPutChar(CI->getArgOperand(1), B, TLI); + + // printf("%s\n", str) --> puts(str) + if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 && + CI->getArgOperand(1)->getType()->isPointerTy()) + return emitPutS(CI->getArgOperand(1), B, TLI); + return nullptr; +} + +Value *LibCallSimplifier::optimizePrintF(CallInst *CI, IRBuilder<> &B) { + + Function *Callee = CI->getCalledFunction(); + FunctionType *FT = Callee->getFunctionType(); + if (Value *V = optimizePrintFString(CI, B)) { + return V; + } + + // printf(format, ...) -> iprintf(format, ...) if no floating point + // arguments. + if (TLI->has(LibFunc::iprintf) && !callHasFloatingPointArgument(CI)) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + Constant *IPrintFFn = + M->getOrInsertFunction("iprintf", FT, Callee->getAttributes()); + CallInst *New = cast<CallInst>(CI->clone()); + New->setCalledFunction(IPrintFFn); + B.Insert(New); + return New; + } + return nullptr; +} + +Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) { + // Check for a fixed format string. + StringRef FormatStr; + if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr)) + return nullptr; + + // If we just have a format string (nothing else crazy) transform it. + if (CI->getNumArgOperands() == 2) { + // Make sure there's no % in the constant array. We could try to handle + // %% -> % in the future if we cared. + for (unsigned i = 0, e = FormatStr.size(); i != e; ++i) + if (FormatStr[i] == '%') + return nullptr; // we found a format specifier, bail out. + + // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1) + B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), + ConstantInt::get(DL.getIntPtrType(CI->getContext()), + FormatStr.size() + 1), + 1); // Copy the null byte. + return ConstantInt::get(CI->getType(), FormatStr.size()); + } + + // The remaining optimizations require the format string to be "%s" or "%c" + // and have an extra operand. + if (FormatStr.size() != 2 || FormatStr[0] != '%' || + CI->getNumArgOperands() < 3) + return nullptr; + + // Decode the second character of the format string. + if (FormatStr[1] == 'c') { + // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0 + if (!CI->getArgOperand(2)->getType()->isIntegerTy()) + return nullptr; + Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char"); + Value *Ptr = castToCStr(CI->getArgOperand(0), B); + B.CreateStore(V, Ptr); + Ptr = B.CreateGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul"); + B.CreateStore(B.getInt8(0), Ptr); + + return ConstantInt::get(CI->getType(), 1); + } + + if (FormatStr[1] == 's') { + // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1) + if (!CI->getArgOperand(2)->getType()->isPointerTy()) + return nullptr; + + Value *Len = emitStrLen(CI->getArgOperand(2), B, DL, TLI); + if (!Len) + return nullptr; + Value *IncLen = + B.CreateAdd(Len, ConstantInt::get(Len->getType(), 1), "leninc"); + B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1); + + // The sprintf result is the unincremented number of bytes in the string. + return B.CreateIntCast(Len, CI->getType(), false); + } + return nullptr; +} + +Value *LibCallSimplifier::optimizeSPrintF(CallInst *CI, IRBuilder<> &B) { + Function *Callee = CI->getCalledFunction(); + FunctionType *FT = Callee->getFunctionType(); + if (Value *V = optimizeSPrintFString(CI, B)) { + return V; + } + + // sprintf(str, format, ...) -> siprintf(str, format, ...) if no floating + // point arguments. + if (TLI->has(LibFunc::siprintf) && !callHasFloatingPointArgument(CI)) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + Constant *SIPrintFFn = + M->getOrInsertFunction("siprintf", FT, Callee->getAttributes()); + CallInst *New = cast<CallInst>(CI->clone()); + New->setCalledFunction(SIPrintFFn); + B.Insert(New); + return New; + } + return nullptr; +} + +Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI, IRBuilder<> &B) { + optimizeErrorReporting(CI, B, 0); + + // All the optimizations depend on the format string. + StringRef FormatStr; + if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr)) + return nullptr; + + // Do not do any of the following transformations if the fprintf return + // value is used, in general the fprintf return value is not compatible + // with fwrite(), fputc() or fputs(). + if (!CI->use_empty()) + return nullptr; + + // fprintf(F, "foo") --> fwrite("foo", 3, 1, F) + if (CI->getNumArgOperands() == 2) { + for (unsigned i = 0, e = FormatStr.size(); i != e; ++i) + if (FormatStr[i] == '%') // Could handle %% -> % if we cared. + return nullptr; // We found a format specifier. + + return emitFWrite( + CI->getArgOperand(1), + ConstantInt::get(DL.getIntPtrType(CI->getContext()), FormatStr.size()), + CI->getArgOperand(0), B, DL, TLI); + } + + // The remaining optimizations require the format string to be "%s" or "%c" + // and have an extra operand. + if (FormatStr.size() != 2 || FormatStr[0] != '%' || + CI->getNumArgOperands() < 3) + return nullptr; + + // Decode the second character of the format string. + if (FormatStr[1] == 'c') { + // fprintf(F, "%c", chr) --> fputc(chr, F) + if (!CI->getArgOperand(2)->getType()->isIntegerTy()) + return nullptr; + return emitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI); + } + + if (FormatStr[1] == 's') { + // fprintf(F, "%s", str) --> fputs(str, F) + if (!CI->getArgOperand(2)->getType()->isPointerTy()) + return nullptr; + return emitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI); + } + return nullptr; +} + +Value *LibCallSimplifier::optimizeFPrintF(CallInst *CI, IRBuilder<> &B) { + Function *Callee = CI->getCalledFunction(); + FunctionType *FT = Callee->getFunctionType(); + if (Value *V = optimizeFPrintFString(CI, B)) { + return V; + } + + // fprintf(stream, format, ...) -> fiprintf(stream, format, ...) if no + // floating point arguments. + if (TLI->has(LibFunc::fiprintf) && !callHasFloatingPointArgument(CI)) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + Constant *FIPrintFFn = + M->getOrInsertFunction("fiprintf", FT, Callee->getAttributes()); + CallInst *New = cast<CallInst>(CI->clone()); + New->setCalledFunction(FIPrintFFn); + B.Insert(New); + return New; + } + return nullptr; +} + +Value *LibCallSimplifier::optimizeFWrite(CallInst *CI, IRBuilder<> &B) { + optimizeErrorReporting(CI, B, 3); + + // Get the element size and count. + ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); + ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2)); + if (!SizeC || !CountC) + return nullptr; + uint64_t Bytes = SizeC->getZExtValue() * CountC->getZExtValue(); + + // If this is writing zero records, remove the call (it's a noop). + if (Bytes == 0) + return ConstantInt::get(CI->getType(), 0); + + // If this is writing one byte, turn it into fputc. + // This optimisation is only valid, if the return value is unused. + if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F) + Value *Char = B.CreateLoad(castToCStr(CI->getArgOperand(0), B), "char"); + Value *NewCI = emitFPutC(Char, CI->getArgOperand(3), B, TLI); + return NewCI ? ConstantInt::get(CI->getType(), 1) : nullptr; + } + + return nullptr; +} + +Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilder<> &B) { + optimizeErrorReporting(CI, B, 1); + + // Don't rewrite fputs to fwrite when optimising for size because fwrite + // requires more arguments and thus extra MOVs are required. + if (CI->getParent()->getParent()->optForSize()) + return nullptr; + + // We can't optimize if return value is used. + if (!CI->use_empty()) + return nullptr; + + // fputs(s,F) --> fwrite(s,1,strlen(s),F) + uint64_t Len = GetStringLength(CI->getArgOperand(0)); + if (!Len) + return nullptr; + + // Known to have no uses (see above). + return emitFWrite( + CI->getArgOperand(0), + ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len - 1), + CI->getArgOperand(1), B, DL, TLI); +} + +Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilder<> &B) { + // Check for a constant string. + StringRef Str; + if (!getConstantStringInfo(CI->getArgOperand(0), Str)) + return nullptr; + + if (Str.empty() && CI->use_empty()) { + // puts("") -> putchar('\n') + Value *Res = emitPutChar(B.getInt32('\n'), B, TLI); + if (CI->use_empty() || !Res) + return Res; + return B.CreateIntCast(Res, CI->getType(), true); + } + + return nullptr; +} + +bool LibCallSimplifier::hasFloatVersion(StringRef FuncName) { + LibFunc::Func Func; + SmallString<20> FloatFuncName = FuncName; + FloatFuncName += 'f'; + if (TLI->getLibFunc(FloatFuncName, Func)) + return TLI->has(Func); + return false; +} + +Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI, + IRBuilder<> &Builder) { + LibFunc::Func Func; + Function *Callee = CI->getCalledFunction(); + // Check for string/memory library functions. + if (TLI->getLibFunc(*Callee, Func) && TLI->has(Func)) { + // Make sure we never change the calling convention. + assert((ignoreCallingConv(Func) || + isCallingConvCCompatible(CI)) && + "Optimizing string/memory libcall would change the calling convention"); + switch (Func) { + case LibFunc::strcat: + return optimizeStrCat(CI, Builder); + case LibFunc::strncat: + return optimizeStrNCat(CI, Builder); + case LibFunc::strchr: + return optimizeStrChr(CI, Builder); + case LibFunc::strrchr: + return optimizeStrRChr(CI, Builder); + case LibFunc::strcmp: + return optimizeStrCmp(CI, Builder); + case LibFunc::strncmp: + return optimizeStrNCmp(CI, Builder); + case LibFunc::strcpy: + return optimizeStrCpy(CI, Builder); + case LibFunc::stpcpy: + return optimizeStpCpy(CI, Builder); + case LibFunc::strncpy: + return optimizeStrNCpy(CI, Builder); + case LibFunc::strlen: + return optimizeStrLen(CI, Builder); + case LibFunc::strpbrk: + return optimizeStrPBrk(CI, Builder); + case LibFunc::strtol: + case LibFunc::strtod: + case LibFunc::strtof: + case LibFunc::strtoul: + case LibFunc::strtoll: + case LibFunc::strtold: + case LibFunc::strtoull: + return optimizeStrTo(CI, Builder); + case LibFunc::strspn: + return optimizeStrSpn(CI, Builder); + case LibFunc::strcspn: + return optimizeStrCSpn(CI, Builder); + case LibFunc::strstr: + return optimizeStrStr(CI, Builder); + case LibFunc::memchr: + return optimizeMemChr(CI, Builder); + case LibFunc::memcmp: + return optimizeMemCmp(CI, Builder); + case LibFunc::memcpy: + return optimizeMemCpy(CI, Builder); + case LibFunc::memmove: + return optimizeMemMove(CI, Builder); + case LibFunc::memset: + return optimizeMemSet(CI, Builder); + default: + break; + } + } + return nullptr; +} + +Value *LibCallSimplifier::optimizeCall(CallInst *CI) { + if (CI->isNoBuiltin()) + return nullptr; + + LibFunc::Func Func; + Function *Callee = CI->getCalledFunction(); + StringRef FuncName = Callee->getName(); + + SmallVector<OperandBundleDef, 2> OpBundles; + CI->getOperandBundlesAsDefs(OpBundles); + IRBuilder<> Builder(CI, /*FPMathTag=*/nullptr, OpBundles); + bool isCallingConvC = isCallingConvCCompatible(CI); + + // Command-line parameter overrides instruction attribute. + if (EnableUnsafeFPShrink.getNumOccurrences() > 0) + UnsafeFPShrink = EnableUnsafeFPShrink; + else if (isa<FPMathOperator>(CI) && CI->hasUnsafeAlgebra()) + UnsafeFPShrink = true; + + // First, check for intrinsics. + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) { + if (!isCallingConvC) + return nullptr; + switch (II->getIntrinsicID()) { + case Intrinsic::pow: + return optimizePow(CI, Builder); + case Intrinsic::exp2: + return optimizeExp2(CI, Builder); + case Intrinsic::fabs: + return optimizeFabs(CI, Builder); + case Intrinsic::log: + return optimizeLog(CI, Builder); + case Intrinsic::sqrt: + return optimizeSqrt(CI, Builder); + // TODO: Use foldMallocMemset() with memset intrinsic. + default: + return nullptr; + } + } + + // Also try to simplify calls to fortified library functions. + if (Value *SimplifiedFortifiedCI = FortifiedSimplifier.optimizeCall(CI)) { + // Try to further simplify the result. + CallInst *SimplifiedCI = dyn_cast<CallInst>(SimplifiedFortifiedCI); + if (SimplifiedCI && SimplifiedCI->getCalledFunction()) { + // Use an IR Builder from SimplifiedCI if available instead of CI + // to guarantee we reach all uses we might replace later on. + IRBuilder<> TmpBuilder(SimplifiedCI); + if (Value *V = optimizeStringMemoryLibCall(SimplifiedCI, TmpBuilder)) { + // If we were able to further simplify, remove the now redundant call. + SimplifiedCI->replaceAllUsesWith(V); + SimplifiedCI->eraseFromParent(); + return V; + } + } + return SimplifiedFortifiedCI; + } + + // Then check for known library functions. + if (TLI->getLibFunc(*Callee, Func) && TLI->has(Func)) { + // We never change the calling convention. + if (!ignoreCallingConv(Func) && !isCallingConvC) + return nullptr; + if (Value *V = optimizeStringMemoryLibCall(CI, Builder)) + return V; + switch (Func) { + case LibFunc::cosf: + case LibFunc::cos: + case LibFunc::cosl: + return optimizeCos(CI, Builder); + case LibFunc::sinpif: + case LibFunc::sinpi: + case LibFunc::cospif: + case LibFunc::cospi: + return optimizeSinCosPi(CI, Builder); + case LibFunc::powf: + case LibFunc::pow: + case LibFunc::powl: + return optimizePow(CI, Builder); + case LibFunc::exp2l: + case LibFunc::exp2: + case LibFunc::exp2f: + return optimizeExp2(CI, Builder); + case LibFunc::fabsf: + case LibFunc::fabs: + case LibFunc::fabsl: + return optimizeFabs(CI, Builder); + case LibFunc::sqrtf: + case LibFunc::sqrt: + case LibFunc::sqrtl: + return optimizeSqrt(CI, Builder); + case LibFunc::ffs: + case LibFunc::ffsl: + case LibFunc::ffsll: + return optimizeFFS(CI, Builder); + case LibFunc::fls: + case LibFunc::flsl: + case LibFunc::flsll: + return optimizeFls(CI, Builder); + case LibFunc::abs: + case LibFunc::labs: + case LibFunc::llabs: + return optimizeAbs(CI, Builder); + case LibFunc::isdigit: + return optimizeIsDigit(CI, Builder); + case LibFunc::isascii: + return optimizeIsAscii(CI, Builder); + case LibFunc::toascii: + return optimizeToAscii(CI, Builder); + case LibFunc::printf: + return optimizePrintF(CI, Builder); + case LibFunc::sprintf: + return optimizeSPrintF(CI, Builder); + case LibFunc::fprintf: + return optimizeFPrintF(CI, Builder); + case LibFunc::fwrite: + return optimizeFWrite(CI, Builder); + case LibFunc::fputs: + return optimizeFPuts(CI, Builder); + case LibFunc::log: + case LibFunc::log10: + case LibFunc::log1p: + case LibFunc::log2: + case LibFunc::logb: + return optimizeLog(CI, Builder); + case LibFunc::puts: + return optimizePuts(CI, Builder); + case LibFunc::tan: + case LibFunc::tanf: + case LibFunc::tanl: + return optimizeTan(CI, Builder); + case LibFunc::perror: + return optimizeErrorReporting(CI, Builder); + case LibFunc::vfprintf: + case LibFunc::fiprintf: + return optimizeErrorReporting(CI, Builder, 0); + case LibFunc::fputc: + return optimizeErrorReporting(CI, Builder, 1); + case LibFunc::ceil: + case LibFunc::floor: + case LibFunc::rint: + case LibFunc::round: + case LibFunc::nearbyint: + case LibFunc::trunc: + if (hasFloatVersion(FuncName)) + return optimizeUnaryDoubleFP(CI, Builder, false); + return nullptr; + case LibFunc::acos: + case LibFunc::acosh: + case LibFunc::asin: + case LibFunc::asinh: + case LibFunc::atan: + case LibFunc::atanh: + case LibFunc::cbrt: + case LibFunc::cosh: + case LibFunc::exp: + case LibFunc::exp10: + case LibFunc::expm1: + case LibFunc::sin: + case LibFunc::sinh: + case LibFunc::tanh: + if (UnsafeFPShrink && hasFloatVersion(FuncName)) + return optimizeUnaryDoubleFP(CI, Builder, true); + return nullptr; + case LibFunc::copysign: + if (hasFloatVersion(FuncName)) + return optimizeBinaryDoubleFP(CI, Builder); + return nullptr; + case LibFunc::fminf: + case LibFunc::fmin: + case LibFunc::fminl: + case LibFunc::fmaxf: + case LibFunc::fmax: + case LibFunc::fmaxl: + return optimizeFMinFMax(CI, Builder); + default: + return nullptr; + } + } + return nullptr; +} + +LibCallSimplifier::LibCallSimplifier( + const DataLayout &DL, const TargetLibraryInfo *TLI, + function_ref<void(Instruction *, Value *)> Replacer) + : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), UnsafeFPShrink(false), + Replacer(Replacer) {} + +void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) { + // Indirect through the replacer used in this instance. + Replacer(I, With); +} + +// TODO: +// Additional cases that we need to add to this file: +// +// cbrt: +// * cbrt(expN(X)) -> expN(x/3) +// * cbrt(sqrt(x)) -> pow(x,1/6) +// * cbrt(cbrt(x)) -> pow(x,1/9) +// +// exp, expf, expl: +// * exp(log(x)) -> x +// +// log, logf, logl: +// * log(exp(x)) -> x +// * log(exp(y)) -> y*log(e) +// * log(exp10(y)) -> y*log(10) +// * log(sqrt(x)) -> 0.5*log(x) +// +// lround, lroundf, lroundl: +// * lround(cnst) -> cnst' +// +// pow, powf, powl: +// * pow(sqrt(x),y) -> pow(x,y*0.5) +// * pow(pow(x,y),z)-> pow(x,y*z) +// +// round, roundf, roundl: +// * round(cnst) -> cnst' +// +// signbit: +// * signbit(cnst) -> cnst' +// * signbit(nncst) -> 0 (if pstv is a non-negative constant) +// +// sqrt, sqrtf, sqrtl: +// * sqrt(expN(x)) -> expN(x*0.5) +// * sqrt(Nroot(x)) -> pow(x,1/(2*N)) +// * sqrt(pow(x,y)) -> pow(|x|,y*0.5) +// +// trunc, truncf, truncl: +// * trunc(cnst) -> cnst' +// +// + +//===----------------------------------------------------------------------===// +// Fortified Library Call Optimizations +//===----------------------------------------------------------------------===// + +bool FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI, + unsigned ObjSizeOp, + unsigned SizeOp, + bool isString) { + if (CI->getArgOperand(ObjSizeOp) == CI->getArgOperand(SizeOp)) + return true; + if (ConstantInt *ObjSizeCI = + dyn_cast<ConstantInt>(CI->getArgOperand(ObjSizeOp))) { + if (ObjSizeCI->isAllOnesValue()) + return true; + // If the object size wasn't -1 (unknown), bail out if we were asked to. + if (OnlyLowerUnknownSize) + return false; + if (isString) { + uint64_t Len = GetStringLength(CI->getArgOperand(SizeOp)); + // If the length is 0 we don't know how long it is and so we can't + // remove the check. + if (Len == 0) + return false; + return ObjSizeCI->getZExtValue() >= Len; + } + if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getArgOperand(SizeOp))) + return ObjSizeCI->getZExtValue() >= SizeCI->getZExtValue(); + } + return false; +} + +Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI, + IRBuilder<> &B) { + if (isFortifiedCallFoldable(CI, 3, 2, false)) { + B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1); + return CI->getArgOperand(0); + } + return nullptr; +} + +Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI, + IRBuilder<> &B) { + if (isFortifiedCallFoldable(CI, 3, 2, false)) { + B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1); + return CI->getArgOperand(0); + } + return nullptr; +} + +Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI, + IRBuilder<> &B) { + // TODO: Try foldMallocMemset() here. + + if (isFortifiedCallFoldable(CI, 3, 2, false)) { + Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false); + B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1); + return CI->getArgOperand(0); + } + return nullptr; +} + +Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI, + IRBuilder<> &B, + LibFunc::Func Func) { + Function *Callee = CI->getCalledFunction(); + StringRef Name = Callee->getName(); + const DataLayout &DL = CI->getModule()->getDataLayout(); + Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1), + *ObjSize = CI->getArgOperand(2); + + // __stpcpy_chk(x,x,...) -> x+strlen(x) + if (Func == LibFunc::stpcpy_chk && !OnlyLowerUnknownSize && Dst == Src) { + Value *StrLen = emitStrLen(Src, B, DL, TLI); + return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr; + } + + // If a) we don't have any length information, or b) we know this will + // fit then just lower to a plain st[rp]cpy. Otherwise we'll keep our + // st[rp]cpy_chk call which may fail at runtime if the size is too long. + // TODO: It might be nice to get a maximum length out of the possible + // string lengths for varying. + if (isFortifiedCallFoldable(CI, 2, 1, true)) + return emitStrCpy(Dst, Src, B, TLI, Name.substr(2, 6)); + + if (OnlyLowerUnknownSize) + return nullptr; + + // Maybe we can stil fold __st[rp]cpy_chk to __memcpy_chk. + uint64_t Len = GetStringLength(Src); + if (Len == 0) + return nullptr; + + Type *SizeTTy = DL.getIntPtrType(CI->getContext()); + Value *LenV = ConstantInt::get(SizeTTy, Len); + Value *Ret = emitMemCpyChk(Dst, Src, LenV, ObjSize, B, DL, TLI); + // If the function was an __stpcpy_chk, and we were able to fold it into + // a __memcpy_chk, we still need to return the correct end pointer. + if (Ret && Func == LibFunc::stpcpy_chk) + return B.CreateGEP(B.getInt8Ty(), Dst, ConstantInt::get(SizeTTy, Len - 1)); + return Ret; +} + +Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI, + IRBuilder<> &B, + LibFunc::Func Func) { + Function *Callee = CI->getCalledFunction(); + StringRef Name = Callee->getName(); + if (isFortifiedCallFoldable(CI, 3, 2, false)) { + Value *Ret = emitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), B, TLI, Name.substr(2, 7)); + return Ret; + } + return nullptr; +} + +Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) { + // FIXME: We shouldn't be changing "nobuiltin" or TLI unavailable calls here. + // Some clang users checked for _chk libcall availability using: + // __has_builtin(__builtin___memcpy_chk) + // When compiling with -fno-builtin, this is always true. + // When passing -ffreestanding/-mkernel, which both imply -fno-builtin, we + // end up with fortified libcalls, which isn't acceptable in a freestanding + // environment which only provides their non-fortified counterparts. + // + // Until we change clang and/or teach external users to check for availability + // differently, disregard the "nobuiltin" attribute and TLI::has. + // + // PR23093. + + LibFunc::Func Func; + Function *Callee = CI->getCalledFunction(); + + SmallVector<OperandBundleDef, 2> OpBundles; + CI->getOperandBundlesAsDefs(OpBundles); + IRBuilder<> Builder(CI, /*FPMathTag=*/nullptr, OpBundles); + bool isCallingConvC = isCallingConvCCompatible(CI); + + // First, check that this is a known library functions and that the prototype + // is correct. + if (!TLI->getLibFunc(*Callee, Func)) + return nullptr; + + // We never change the calling convention. + if (!ignoreCallingConv(Func) && !isCallingConvC) + return nullptr; + + switch (Func) { + case LibFunc::memcpy_chk: + return optimizeMemCpyChk(CI, Builder); + case LibFunc::memmove_chk: + return optimizeMemMoveChk(CI, Builder); + case LibFunc::memset_chk: + return optimizeMemSetChk(CI, Builder); + case LibFunc::stpcpy_chk: + case LibFunc::strcpy_chk: + return optimizeStrpCpyChk(CI, Builder, Func); + case LibFunc::stpncpy_chk: + case LibFunc::strncpy_chk: + return optimizeStrpNCpyChk(CI, Builder, Func); + default: + break; + } + return nullptr; +} + +FortifiedLibCallSimplifier::FortifiedLibCallSimplifier( + const TargetLibraryInfo *TLI, bool OnlyLowerUnknownSize) + : TLI(TLI), OnlyLowerUnknownSize(OnlyLowerUnknownSize) {} diff --git a/contrib/llvm/lib/Transforms/Utils/SplitModule.cpp b/contrib/llvm/lib/Transforms/Utils/SplitModule.cpp new file mode 100644 index 000000000000..e9a368f4faa4 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/SplitModule.cpp @@ -0,0 +1,263 @@ +//===- SplitModule.cpp - Split a module into partitions -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the function llvm::SplitModule, which splits a module +// into multiple linkable partitions. It can be used to implement parallel code +// generation for link-time optimization. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "split-module" + +#include "llvm/Transforms/Utils/SplitModule.h" +#include "llvm/ADT/EquivalenceClasses.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalObject.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MD5.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include <queue> + +using namespace llvm; + +namespace { +typedef EquivalenceClasses<const GlobalValue *> ClusterMapType; +typedef DenseMap<const Comdat *, const GlobalValue *> ComdatMembersType; +typedef DenseMap<const GlobalValue *, unsigned> ClusterIDMapType; +} + +static void addNonConstUser(ClusterMapType &GVtoClusterMap, + const GlobalValue *GV, const User *U) { + assert((!isa<Constant>(U) || isa<GlobalValue>(U)) && "Bad user"); + + if (const Instruction *I = dyn_cast<Instruction>(U)) { + const GlobalValue *F = I->getParent()->getParent(); + GVtoClusterMap.unionSets(GV, F); + } else if (isa<GlobalIndirectSymbol>(U) || isa<Function>(U) || + isa<GlobalVariable>(U)) { + GVtoClusterMap.unionSets(GV, cast<GlobalValue>(U)); + } else { + llvm_unreachable("Underimplemented use case"); + } +} + +// Adds all GlobalValue users of V to the same cluster as GV. +static void addAllGlobalValueUsers(ClusterMapType &GVtoClusterMap, + const GlobalValue *GV, const Value *V) { + for (auto *U : V->users()) { + SmallVector<const User *, 4> Worklist; + Worklist.push_back(U); + while (!Worklist.empty()) { + const User *UU = Worklist.pop_back_val(); + // For each constant that is not a GV (a pure const) recurse. + if (isa<Constant>(UU) && !isa<GlobalValue>(UU)) { + Worklist.append(UU->user_begin(), UU->user_end()); + continue; + } + addNonConstUser(GVtoClusterMap, GV, UU); + } + } +} + +// Find partitions for module in the way that no locals need to be +// globalized. +// Try to balance pack those partitions into N files since this roughly equals +// thread balancing for the backend codegen step. +static void findPartitions(Module *M, ClusterIDMapType &ClusterIDMap, + unsigned N) { + // At this point module should have the proper mix of globals and locals. + // As we attempt to partition this module, we must not change any + // locals to globals. + DEBUG(dbgs() << "Partition module with (" << M->size() << ")functions\n"); + ClusterMapType GVtoClusterMap; + ComdatMembersType ComdatMembers; + + auto recordGVSet = [&GVtoClusterMap, &ComdatMembers](GlobalValue &GV) { + if (GV.isDeclaration()) + return; + + if (!GV.hasName()) + GV.setName("__llvmsplit_unnamed"); + + // Comdat groups must not be partitioned. For comdat groups that contain + // locals, record all their members here so we can keep them together. + // Comdat groups that only contain external globals are already handled by + // the MD5-based partitioning. + if (const Comdat *C = GV.getComdat()) { + auto &Member = ComdatMembers[C]; + if (Member) + GVtoClusterMap.unionSets(Member, &GV); + else + Member = &GV; + } + + // For aliases we should not separate them from their aliasees regardless + // of linkage. + if (auto *GIS = dyn_cast<GlobalIndirectSymbol>(&GV)) { + if (const GlobalObject *Base = GIS->getBaseObject()) + GVtoClusterMap.unionSets(&GV, Base); + } + + if (const Function *F = dyn_cast<Function>(&GV)) { + for (const BasicBlock &BB : *F) { + BlockAddress *BA = BlockAddress::lookup(&BB); + if (!BA || !BA->isConstantUsed()) + continue; + addAllGlobalValueUsers(GVtoClusterMap, F, BA); + } + } + + if (GV.hasLocalLinkage()) + addAllGlobalValueUsers(GVtoClusterMap, &GV, &GV); + }; + + std::for_each(M->begin(), M->end(), recordGVSet); + std::for_each(M->global_begin(), M->global_end(), recordGVSet); + std::for_each(M->alias_begin(), M->alias_end(), recordGVSet); + + // Assigned all GVs to merged clusters while balancing number of objects in + // each. + auto CompareClusters = [](const std::pair<unsigned, unsigned> &a, + const std::pair<unsigned, unsigned> &b) { + if (a.second || b.second) + return a.second > b.second; + else + return a.first > b.first; + }; + + std::priority_queue<std::pair<unsigned, unsigned>, + std::vector<std::pair<unsigned, unsigned>>, + decltype(CompareClusters)> + BalancinQueue(CompareClusters); + // Pre-populate priority queue with N slot blanks. + for (unsigned i = 0; i < N; ++i) + BalancinQueue.push(std::make_pair(i, 0)); + + typedef std::pair<unsigned, ClusterMapType::iterator> SortType; + SmallVector<SortType, 64> Sets; + SmallPtrSet<const GlobalValue *, 32> Visited; + + // To guarantee determinism, we have to sort SCC according to size. + // When size is the same, use leader's name. + for (ClusterMapType::iterator I = GVtoClusterMap.begin(), + E = GVtoClusterMap.end(); I != E; ++I) + if (I->isLeader()) + Sets.push_back( + std::make_pair(std::distance(GVtoClusterMap.member_begin(I), + GVtoClusterMap.member_end()), I)); + + std::sort(Sets.begin(), Sets.end(), [](const SortType &a, const SortType &b) { + if (a.first == b.first) + return a.second->getData()->getName() > b.second->getData()->getName(); + else + return a.first > b.first; + }); + + for (auto &I : Sets) { + unsigned CurrentClusterID = BalancinQueue.top().first; + unsigned CurrentClusterSize = BalancinQueue.top().second; + BalancinQueue.pop(); + + DEBUG(dbgs() << "Root[" << CurrentClusterID << "] cluster_size(" << I.first + << ") ----> " << I.second->getData()->getName() << "\n"); + + for (ClusterMapType::member_iterator MI = + GVtoClusterMap.findLeader(I.second); + MI != GVtoClusterMap.member_end(); ++MI) { + if (!Visited.insert(*MI).second) + continue; + DEBUG(dbgs() << "----> " << (*MI)->getName() + << ((*MI)->hasLocalLinkage() ? " l " : " e ") << "\n"); + Visited.insert(*MI); + ClusterIDMap[*MI] = CurrentClusterID; + CurrentClusterSize++; + } + // Add this set size to the number of entries in this cluster. + BalancinQueue.push(std::make_pair(CurrentClusterID, CurrentClusterSize)); + } +} + +static void externalize(GlobalValue *GV) { + if (GV->hasLocalLinkage()) { + GV->setLinkage(GlobalValue::ExternalLinkage); + GV->setVisibility(GlobalValue::HiddenVisibility); + } + + // Unnamed entities must be named consistently between modules. setName will + // give a distinct name to each such entity. + if (!GV->hasName()) + GV->setName("__llvmsplit_unnamed"); +} + +// Returns whether GV should be in partition (0-based) I of N. +static bool isInPartition(const GlobalValue *GV, unsigned I, unsigned N) { + if (auto *GIS = dyn_cast<GlobalIndirectSymbol>(GV)) + if (const GlobalObject *Base = GIS->getBaseObject()) + GV = Base; + + StringRef Name; + if (const Comdat *C = GV->getComdat()) + Name = C->getName(); + else + Name = GV->getName(); + + // Partition by MD5 hash. We only need a few bits for evenness as the number + // of partitions will generally be in the 1-2 figure range; the low 16 bits + // are enough. + MD5 H; + MD5::MD5Result R; + H.update(Name); + H.final(R); + return (R[0] | (R[1] << 8)) % N == I; +} + +void llvm::SplitModule( + std::unique_ptr<Module> M, unsigned N, + function_ref<void(std::unique_ptr<Module> MPart)> ModuleCallback, + bool PreserveLocals) { + if (!PreserveLocals) { + for (Function &F : *M) + externalize(&F); + for (GlobalVariable &GV : M->globals()) + externalize(&GV); + for (GlobalAlias &GA : M->aliases()) + externalize(&GA); + for (GlobalIFunc &GIF : M->ifuncs()) + externalize(&GIF); + } + + // This performs splitting without a need for externalization, which might not + // always be possible. + ClusterIDMapType ClusterIDMap; + findPartitions(M.get(), ClusterIDMap, N); + + // FIXME: We should be able to reuse M as the last partition instead of + // cloning it. + for (unsigned I = 0; I < N; ++I) { + ValueToValueMapTy VMap; + std::unique_ptr<Module> MPart( + CloneModule(M.get(), VMap, [&](const GlobalValue *GV) { + if (ClusterIDMap.count(GV)) + return (ClusterIDMap[GV] == I); + else + return isInPartition(GV, I, N); + })); + if (I != 0) + MPart->setModuleInlineAsm(""); + ModuleCallback(std::move(MPart)); + } +} diff --git a/contrib/llvm/lib/Transforms/Utils/StripGCRelocates.cpp b/contrib/llvm/lib/Transforms/Utils/StripGCRelocates.cpp new file mode 100644 index 000000000000..f3d3fadb51e9 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/StripGCRelocates.cpp @@ -0,0 +1,80 @@ +//===- StripGCRelocates.cpp - Remove gc.relocates inserted by RewriteStatePoints===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is a little utility pass that removes the gc.relocates inserted by +// RewriteStatepointsForGC. Note that the generated IR is incorrect, +// but this is useful as a single pass in itself, for analysis of IR, without +// the GC.relocates. The statepoint and gc.result instrinsics would still be +// present. +//===----------------------------------------------------------------------===// + +#include "llvm/IR/Function.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Statepoint.h" +#include "llvm/IR/Type.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { +struct StripGCRelocates : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + StripGCRelocates() : FunctionPass(ID) { + initializeStripGCRelocatesPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &Info) const override {} + + bool runOnFunction(Function &F) override; + +}; +char StripGCRelocates::ID = 0; +} + +bool StripGCRelocates::runOnFunction(Function &F) { + // Nothing to do for declarations. + if (F.isDeclaration()) + return false; + SmallVector<GCRelocateInst *, 20> GCRelocates; + // TODO: We currently do not handle gc.relocates that are in landing pads, + // i.e. not bound to a single statepoint token. + for (Instruction &I : instructions(F)) { + if (auto *GCR = dyn_cast<GCRelocateInst>(&I)) + if (isStatepoint(GCR->getOperand(0))) + GCRelocates.push_back(GCR); + } + // All gc.relocates are bound to a single statepoint token. The order of + // visiting gc.relocates for deletion does not matter. + for (GCRelocateInst *GCRel : GCRelocates) { + Value *OrigPtr = GCRel->getDerivedPtr(); + Value *ReplaceGCRel = OrigPtr; + + // All gc_relocates are i8 addrspace(1)* typed, we need a bitcast from i8 + // addrspace(1)* to the type of the OrigPtr, if the are not the same. + if (GCRel->getType() != OrigPtr->getType()) + ReplaceGCRel = new BitCastInst(OrigPtr, GCRel->getType(), "cast", GCRel); + + // Replace all uses of gc.relocate and delete the gc.relocate + // There maybe unncessary bitcasts back to the OrigPtr type, an instcombine + // pass would clear this up. + GCRel->replaceAllUsesWith(ReplaceGCRel); + GCRel->eraseFromParent(); + } + return !GCRelocates.empty(); +} + +INITIALIZE_PASS(StripGCRelocates, "strip-gc-relocates", + "Strip gc.relocates inserted through RewriteStatepointsForGC", + true, false) +FunctionPass *llvm::createStripGCRelocatesPass() { + return new StripGCRelocates(); +} diff --git a/contrib/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp b/contrib/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp new file mode 100644 index 000000000000..66dbf335cb95 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp @@ -0,0 +1,42 @@ +//===- StripNonLineTableDebugInfo.cpp -- Strip parts of Debug Info --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/Pass.h" +using namespace llvm; + +namespace { + +/// This pass strips all debug info that is not related line tables. +/// The result will be the same as if the program where compiled with +/// -gline-tables-only. +struct StripNonLineTableDebugInfo : public ModulePass { + static char ID; // Pass identification, replacement for typeid + StripNonLineTableDebugInfo() : ModulePass(ID) { + initializeStripNonLineTableDebugInfoPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } + + bool runOnModule(Module &M) override { + return llvm::stripNonLineTableDebugInfo(M); + } +}; +} + +char StripNonLineTableDebugInfo::ID = 0; +INITIALIZE_PASS(StripNonLineTableDebugInfo, "strip-nonlinetable-debuginfo", + "Strip all debug info except linetables", false, false) + +ModulePass *llvm::createStripNonLineTableDebugInfoPass() { + return new StripNonLineTableDebugInfo(); +} diff --git a/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp b/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp new file mode 100644 index 000000000000..6d136636ce70 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp @@ -0,0 +1,565 @@ +//===- SymbolRewriter.cpp - Symbol Rewriter ---------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// SymbolRewriter is a LLVM pass which can rewrite symbols transparently within +// existing code. It is implemented as a compiler pass and is configured via a +// YAML configuration file. +// +// The YAML configuration file format is as follows: +// +// RewriteMapFile := RewriteDescriptors +// RewriteDescriptors := RewriteDescriptor | RewriteDescriptors +// RewriteDescriptor := RewriteDescriptorType ':' '{' RewriteDescriptorFields '}' +// RewriteDescriptorFields := RewriteDescriptorField | RewriteDescriptorFields +// RewriteDescriptorField := FieldIdentifier ':' FieldValue ',' +// RewriteDescriptorType := Identifier +// FieldIdentifier := Identifier +// FieldValue := Identifier +// Identifier := [0-9a-zA-Z]+ +// +// Currently, the following descriptor types are supported: +// +// - function: (function rewriting) +// + Source (original name of the function) +// + Target (explicit transformation) +// + Transform (pattern transformation) +// + Naked (boolean, whether the function is undecorated) +// - global variable: (external linkage global variable rewriting) +// + Source (original name of externally visible variable) +// + Target (explicit transformation) +// + Transform (pattern transformation) +// - global alias: (global alias rewriting) +// + Source (original name of the aliased name) +// + Target (explicit transformation) +// + Transform (pattern transformation) +// +// Note that source and exactly one of [Target, Transform] must be provided +// +// New rewrite descriptors can be created. Addding a new rewrite descriptor +// involves: +// +// a) extended the rewrite descriptor kind enumeration +// (<anonymous>::RewriteDescriptor::RewriteDescriptorType) +// b) implementing the new descriptor +// (c.f. <anonymous>::ExplicitRewriteFunctionDescriptor) +// c) extending the rewrite map parser +// (<anonymous>::RewriteMapParser::parseEntry) +// +// Specify to rewrite the symbols using the `-rewrite-symbols` option, and +// specify the map file to use for the rewriting via the `-rewrite-map-file` +// option. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "symbol-rewriter" +#include "llvm/Transforms/Utils/SymbolRewriter.h" +#include "llvm/Pass.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Regex.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/YAMLParser.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace SymbolRewriter; + +static cl::list<std::string> RewriteMapFiles("rewrite-map-file", + cl::desc("Symbol Rewrite Map"), + cl::value_desc("filename")); + +static void rewriteComdat(Module &M, GlobalObject *GO, + const std::string &Source, + const std::string &Target) { + if (Comdat *CD = GO->getComdat()) { + auto &Comdats = M.getComdatSymbolTable(); + + Comdat *C = M.getOrInsertComdat(Target); + C->setSelectionKind(CD->getSelectionKind()); + GO->setComdat(C); + + Comdats.erase(Comdats.find(Source)); + } +} + +namespace { +template <RewriteDescriptor::Type DT, typename ValueType, + ValueType *(llvm::Module::*Get)(StringRef) const> +class ExplicitRewriteDescriptor : public RewriteDescriptor { +public: + const std::string Source; + const std::string Target; + + ExplicitRewriteDescriptor(StringRef S, StringRef T, const bool Naked) + : RewriteDescriptor(DT), Source(Naked ? StringRef("\01" + S.str()) : S), + Target(T) {} + + bool performOnModule(Module &M) override; + + static bool classof(const RewriteDescriptor *RD) { + return RD->getType() == DT; + } +}; + +template <RewriteDescriptor::Type DT, typename ValueType, + ValueType *(llvm::Module::*Get)(StringRef) const> +bool ExplicitRewriteDescriptor<DT, ValueType, Get>::performOnModule(Module &M) { + bool Changed = false; + if (ValueType *S = (M.*Get)(Source)) { + if (GlobalObject *GO = dyn_cast<GlobalObject>(S)) + rewriteComdat(M, GO, Source, Target); + + if (Value *T = (M.*Get)(Target)) + S->setValueName(T->getValueName()); + else + S->setName(Target); + + Changed = true; + } + return Changed; +} + +template <RewriteDescriptor::Type DT, typename ValueType, + ValueType *(llvm::Module::*Get)(StringRef) const, + iterator_range<typename iplist<ValueType>::iterator> + (llvm::Module::*Iterator)()> +class PatternRewriteDescriptor : public RewriteDescriptor { +public: + const std::string Pattern; + const std::string Transform; + + PatternRewriteDescriptor(StringRef P, StringRef T) + : RewriteDescriptor(DT), Pattern(P), Transform(T) { } + + bool performOnModule(Module &M) override; + + static bool classof(const RewriteDescriptor *RD) { + return RD->getType() == DT; + } +}; + +template <RewriteDescriptor::Type DT, typename ValueType, + ValueType *(llvm::Module::*Get)(StringRef) const, + iterator_range<typename iplist<ValueType>::iterator> + (llvm::Module::*Iterator)()> +bool PatternRewriteDescriptor<DT, ValueType, Get, Iterator>:: +performOnModule(Module &M) { + bool Changed = false; + for (auto &C : (M.*Iterator)()) { + std::string Error; + + std::string Name = Regex(Pattern).sub(Transform, C.getName(), &Error); + if (!Error.empty()) + report_fatal_error("unable to transforn " + C.getName() + " in " + + M.getModuleIdentifier() + ": " + Error); + + if (C.getName() == Name) + continue; + + if (GlobalObject *GO = dyn_cast<GlobalObject>(&C)) + rewriteComdat(M, GO, C.getName(), Name); + + if (Value *V = (M.*Get)(Name)) + C.setValueName(V->getValueName()); + else + C.setName(Name); + + Changed = true; + } + return Changed; +} + +/// Represents a rewrite for an explicitly named (function) symbol. Both the +/// source function name and target function name of the transformation are +/// explicitly spelt out. +typedef ExplicitRewriteDescriptor<RewriteDescriptor::Type::Function, + llvm::Function, &llvm::Module::getFunction> + ExplicitRewriteFunctionDescriptor; + +/// Represents a rewrite for an explicitly named (global variable) symbol. Both +/// the source variable name and target variable name are spelt out. This +/// applies only to module level variables. +typedef ExplicitRewriteDescriptor<RewriteDescriptor::Type::GlobalVariable, + llvm::GlobalVariable, + &llvm::Module::getGlobalVariable> + ExplicitRewriteGlobalVariableDescriptor; + +/// Represents a rewrite for an explicitly named global alias. Both the source +/// and target name are explicitly spelt out. +typedef ExplicitRewriteDescriptor<RewriteDescriptor::Type::NamedAlias, + llvm::GlobalAlias, + &llvm::Module::getNamedAlias> + ExplicitRewriteNamedAliasDescriptor; + +/// Represents a rewrite for a regular expression based pattern for functions. +/// A pattern for the function name is provided and a transformation for that +/// pattern to determine the target function name create the rewrite rule. +typedef PatternRewriteDescriptor<RewriteDescriptor::Type::Function, + llvm::Function, &llvm::Module::getFunction, + &llvm::Module::functions> + PatternRewriteFunctionDescriptor; + +/// Represents a rewrite for a global variable based upon a matching pattern. +/// Each global variable matching the provided pattern will be transformed as +/// described in the transformation pattern for the target. Applies only to +/// module level variables. +typedef PatternRewriteDescriptor<RewriteDescriptor::Type::GlobalVariable, + llvm::GlobalVariable, + &llvm::Module::getGlobalVariable, + &llvm::Module::globals> + PatternRewriteGlobalVariableDescriptor; + +/// PatternRewriteNamedAliasDescriptor - represents a rewrite for global +/// aliases which match a given pattern. The provided transformation will be +/// applied to each of the matching names. +typedef PatternRewriteDescriptor<RewriteDescriptor::Type::NamedAlias, + llvm::GlobalAlias, + &llvm::Module::getNamedAlias, + &llvm::Module::aliases> + PatternRewriteNamedAliasDescriptor; +} // namespace + +bool RewriteMapParser::parse(const std::string &MapFile, + RewriteDescriptorList *DL) { + ErrorOr<std::unique_ptr<MemoryBuffer>> Mapping = + MemoryBuffer::getFile(MapFile); + + if (!Mapping) + report_fatal_error("unable to read rewrite map '" + MapFile + "': " + + Mapping.getError().message()); + + if (!parse(*Mapping, DL)) + report_fatal_error("unable to parse rewrite map '" + MapFile + "'"); + + return true; +} + +bool RewriteMapParser::parse(std::unique_ptr<MemoryBuffer> &MapFile, + RewriteDescriptorList *DL) { + SourceMgr SM; + yaml::Stream YS(MapFile->getBuffer(), SM); + + for (auto &Document : YS) { + yaml::MappingNode *DescriptorList; + + // ignore empty documents + if (isa<yaml::NullNode>(Document.getRoot())) + continue; + + DescriptorList = dyn_cast<yaml::MappingNode>(Document.getRoot()); + if (!DescriptorList) { + YS.printError(Document.getRoot(), "DescriptorList node must be a map"); + return false; + } + + for (auto &Descriptor : *DescriptorList) + if (!parseEntry(YS, Descriptor, DL)) + return false; + } + + return true; +} + +bool RewriteMapParser::parseEntry(yaml::Stream &YS, yaml::KeyValueNode &Entry, + RewriteDescriptorList *DL) { + yaml::ScalarNode *Key; + yaml::MappingNode *Value; + SmallString<32> KeyStorage; + StringRef RewriteType; + + Key = dyn_cast<yaml::ScalarNode>(Entry.getKey()); + if (!Key) { + YS.printError(Entry.getKey(), "rewrite type must be a scalar"); + return false; + } + + Value = dyn_cast<yaml::MappingNode>(Entry.getValue()); + if (!Value) { + YS.printError(Entry.getValue(), "rewrite descriptor must be a map"); + return false; + } + + RewriteType = Key->getValue(KeyStorage); + if (RewriteType.equals("function")) + return parseRewriteFunctionDescriptor(YS, Key, Value, DL); + else if (RewriteType.equals("global variable")) + return parseRewriteGlobalVariableDescriptor(YS, Key, Value, DL); + else if (RewriteType.equals("global alias")) + return parseRewriteGlobalAliasDescriptor(YS, Key, Value, DL); + + YS.printError(Entry.getKey(), "unknown rewrite type"); + return false; +} + +bool RewriteMapParser:: +parseRewriteFunctionDescriptor(yaml::Stream &YS, yaml::ScalarNode *K, + yaml::MappingNode *Descriptor, + RewriteDescriptorList *DL) { + bool Naked = false; + std::string Source; + std::string Target; + std::string Transform; + + for (auto &Field : *Descriptor) { + yaml::ScalarNode *Key; + yaml::ScalarNode *Value; + SmallString<32> KeyStorage; + SmallString<32> ValueStorage; + StringRef KeyValue; + + Key = dyn_cast<yaml::ScalarNode>(Field.getKey()); + if (!Key) { + YS.printError(Field.getKey(), "descriptor key must be a scalar"); + return false; + } + + Value = dyn_cast<yaml::ScalarNode>(Field.getValue()); + if (!Value) { + YS.printError(Field.getValue(), "descriptor value must be a scalar"); + return false; + } + + KeyValue = Key->getValue(KeyStorage); + if (KeyValue.equals("source")) { + std::string Error; + + Source = Value->getValue(ValueStorage); + if (!Regex(Source).isValid(Error)) { + YS.printError(Field.getKey(), "invalid regex: " + Error); + return false; + } + } else if (KeyValue.equals("target")) { + Target = Value->getValue(ValueStorage); + } else if (KeyValue.equals("transform")) { + Transform = Value->getValue(ValueStorage); + } else if (KeyValue.equals("naked")) { + std::string Undecorated; + + Undecorated = Value->getValue(ValueStorage); + Naked = StringRef(Undecorated).lower() == "true" || Undecorated == "1"; + } else { + YS.printError(Field.getKey(), "unknown key for function"); + return false; + } + } + + if (Transform.empty() == Target.empty()) { + YS.printError(Descriptor, + "exactly one of transform or target must be specified"); + return false; + } + + // TODO see if there is a more elegant solution to selecting the rewrite + // descriptor type + if (!Target.empty()) + DL->push_back(llvm::make_unique<ExplicitRewriteFunctionDescriptor>( + Source, Target, Naked)); + else + DL->push_back( + llvm::make_unique<PatternRewriteFunctionDescriptor>(Source, Transform)); + + return true; +} + +bool RewriteMapParser:: +parseRewriteGlobalVariableDescriptor(yaml::Stream &YS, yaml::ScalarNode *K, + yaml::MappingNode *Descriptor, + RewriteDescriptorList *DL) { + std::string Source; + std::string Target; + std::string Transform; + + for (auto &Field : *Descriptor) { + yaml::ScalarNode *Key; + yaml::ScalarNode *Value; + SmallString<32> KeyStorage; + SmallString<32> ValueStorage; + StringRef KeyValue; + + Key = dyn_cast<yaml::ScalarNode>(Field.getKey()); + if (!Key) { + YS.printError(Field.getKey(), "descriptor Key must be a scalar"); + return false; + } + + Value = dyn_cast<yaml::ScalarNode>(Field.getValue()); + if (!Value) { + YS.printError(Field.getValue(), "descriptor value must be a scalar"); + return false; + } + + KeyValue = Key->getValue(KeyStorage); + if (KeyValue.equals("source")) { + std::string Error; + + Source = Value->getValue(ValueStorage); + if (!Regex(Source).isValid(Error)) { + YS.printError(Field.getKey(), "invalid regex: " + Error); + return false; + } + } else if (KeyValue.equals("target")) { + Target = Value->getValue(ValueStorage); + } else if (KeyValue.equals("transform")) { + Transform = Value->getValue(ValueStorage); + } else { + YS.printError(Field.getKey(), "unknown Key for Global Variable"); + return false; + } + } + + if (Transform.empty() == Target.empty()) { + YS.printError(Descriptor, + "exactly one of transform or target must be specified"); + return false; + } + + if (!Target.empty()) + DL->push_back(llvm::make_unique<ExplicitRewriteGlobalVariableDescriptor>( + Source, Target, + /*Naked*/ false)); + else + DL->push_back(llvm::make_unique<PatternRewriteGlobalVariableDescriptor>( + Source, Transform)); + + return true; +} + +bool RewriteMapParser:: +parseRewriteGlobalAliasDescriptor(yaml::Stream &YS, yaml::ScalarNode *K, + yaml::MappingNode *Descriptor, + RewriteDescriptorList *DL) { + std::string Source; + std::string Target; + std::string Transform; + + for (auto &Field : *Descriptor) { + yaml::ScalarNode *Key; + yaml::ScalarNode *Value; + SmallString<32> KeyStorage; + SmallString<32> ValueStorage; + StringRef KeyValue; + + Key = dyn_cast<yaml::ScalarNode>(Field.getKey()); + if (!Key) { + YS.printError(Field.getKey(), "descriptor key must be a scalar"); + return false; + } + + Value = dyn_cast<yaml::ScalarNode>(Field.getValue()); + if (!Value) { + YS.printError(Field.getValue(), "descriptor value must be a scalar"); + return false; + } + + KeyValue = Key->getValue(KeyStorage); + if (KeyValue.equals("source")) { + std::string Error; + + Source = Value->getValue(ValueStorage); + if (!Regex(Source).isValid(Error)) { + YS.printError(Field.getKey(), "invalid regex: " + Error); + return false; + } + } else if (KeyValue.equals("target")) { + Target = Value->getValue(ValueStorage); + } else if (KeyValue.equals("transform")) { + Transform = Value->getValue(ValueStorage); + } else { + YS.printError(Field.getKey(), "unknown key for Global Alias"); + return false; + } + } + + if (Transform.empty() == Target.empty()) { + YS.printError(Descriptor, + "exactly one of transform or target must be specified"); + return false; + } + + if (!Target.empty()) + DL->push_back(llvm::make_unique<ExplicitRewriteNamedAliasDescriptor>( + Source, Target, + /*Naked*/ false)); + else + DL->push_back(llvm::make_unique<PatternRewriteNamedAliasDescriptor>( + Source, Transform)); + + return true; +} + +namespace { +class RewriteSymbolsLegacyPass : public ModulePass { +public: + static char ID; // Pass identification, replacement for typeid + + RewriteSymbolsLegacyPass(); + RewriteSymbolsLegacyPass(SymbolRewriter::RewriteDescriptorList &DL); + + bool runOnModule(Module &M) override; + +private: + RewriteSymbolPass Impl; +}; + +char RewriteSymbolsLegacyPass::ID = 0; + +RewriteSymbolsLegacyPass::RewriteSymbolsLegacyPass() : ModulePass(ID), Impl() { + initializeRewriteSymbolsLegacyPassPass(*PassRegistry::getPassRegistry()); +} + +RewriteSymbolsLegacyPass::RewriteSymbolsLegacyPass( + SymbolRewriter::RewriteDescriptorList &DL) + : ModulePass(ID), Impl(DL) {} + +bool RewriteSymbolsLegacyPass::runOnModule(Module &M) { + return Impl.runImpl(M); +} +} + +namespace llvm { +PreservedAnalyses RewriteSymbolPass::run(Module &M, ModuleAnalysisManager &AM) { + if (!runImpl(M)) + return PreservedAnalyses::all(); + + return PreservedAnalyses::none(); +} + +bool RewriteSymbolPass::runImpl(Module &M) { + bool Changed; + + Changed = false; + for (auto &Descriptor : Descriptors) + Changed |= Descriptor->performOnModule(M); + + return Changed; +} + +void RewriteSymbolPass::loadAndParseMapFiles() { + const std::vector<std::string> MapFiles(RewriteMapFiles); + SymbolRewriter::RewriteMapParser Parser; + + for (const auto &MapFile : MapFiles) + Parser.parse(MapFile, &Descriptors); +} +} + +INITIALIZE_PASS(RewriteSymbolsLegacyPass, "rewrite-symbols", "Rewrite Symbols", + false, false) + +ModulePass *llvm::createRewriteSymbolsPass() { + return new RewriteSymbolsLegacyPass(); +} + +ModulePass * +llvm::createRewriteSymbolsPass(SymbolRewriter::RewriteDescriptorList &DL) { + return new RewriteSymbolsLegacyPass(DL); +} diff --git a/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp new file mode 100644 index 000000000000..9385f825523c --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp @@ -0,0 +1,116 @@ +//===- UnifyFunctionExitNodes.cpp - Make all functions have a single exit -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass is used to ensure that functions have at most one return +// instruction in them. Additionally, it keeps track of which node is the new +// exit node of the CFG. If there are no exit nodes in the CFG, the getExitNode +// method will return a null pointer. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Type.h" +#include "llvm/Transforms/Scalar.h" +using namespace llvm; + +char UnifyFunctionExitNodes::ID = 0; +INITIALIZE_PASS(UnifyFunctionExitNodes, "mergereturn", + "Unify function exit nodes", false, false) + +Pass *llvm::createUnifyFunctionExitNodesPass() { + return new UnifyFunctionExitNodes(); +} + +void UnifyFunctionExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{ + // We preserve the non-critical-edgeness property + AU.addPreservedID(BreakCriticalEdgesID); + // This is a cluster of orthogonal Transforms + AU.addPreservedID(LowerSwitchID); +} + +// UnifyAllExitNodes - Unify all exit nodes of the CFG by creating a new +// BasicBlock, and converting all returns to unconditional branches to this +// new basic block. The singular exit node is returned. +// +// If there are no return stmts in the Function, a null pointer is returned. +// +bool UnifyFunctionExitNodes::runOnFunction(Function &F) { + // Loop over all of the blocks in a function, tracking all of the blocks that + // return. + // + std::vector<BasicBlock*> ReturningBlocks; + std::vector<BasicBlock*> UnreachableBlocks; + for (BasicBlock &I : F) + if (isa<ReturnInst>(I.getTerminator())) + ReturningBlocks.push_back(&I); + else if (isa<UnreachableInst>(I.getTerminator())) + UnreachableBlocks.push_back(&I); + + // Then unreachable blocks. + if (UnreachableBlocks.empty()) { + UnreachableBlock = nullptr; + } else if (UnreachableBlocks.size() == 1) { + UnreachableBlock = UnreachableBlocks.front(); + } else { + UnreachableBlock = BasicBlock::Create(F.getContext(), + "UnifiedUnreachableBlock", &F); + new UnreachableInst(F.getContext(), UnreachableBlock); + + for (BasicBlock *BB : UnreachableBlocks) { + BB->getInstList().pop_back(); // Remove the unreachable inst. + BranchInst::Create(UnreachableBlock, BB); + } + } + + // Now handle return blocks. + if (ReturningBlocks.empty()) { + ReturnBlock = nullptr; + return false; // No blocks return + } else if (ReturningBlocks.size() == 1) { + ReturnBlock = ReturningBlocks.front(); // Already has a single return block + return false; + } + + // Otherwise, we need to insert a new basic block into the function, add a PHI + // nodes (if the function returns values), and convert all of the return + // instructions into unconditional branches. + // + BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(), + "UnifiedReturnBlock", &F); + + PHINode *PN = nullptr; + if (F.getReturnType()->isVoidTy()) { + ReturnInst::Create(F.getContext(), nullptr, NewRetBlock); + } else { + // If the function doesn't return void... add a PHI node to the block... + PN = PHINode::Create(F.getReturnType(), ReturningBlocks.size(), + "UnifiedRetVal"); + NewRetBlock->getInstList().push_back(PN); + ReturnInst::Create(F.getContext(), PN, NewRetBlock); + } + + // Loop over all of the blocks, replacing the return instruction with an + // unconditional branch. + // + for (BasicBlock *BB : ReturningBlocks) { + // Add an incoming element to the PHI node for every return instruction that + // is merging into this new block... + if (PN) + PN->addIncoming(BB->getTerminator()->getOperand(0), BB); + + BB->getInstList().pop_back(); // Remove the return insn + BranchInst::Create(NewRetBlock, BB); + } + ReturnBlock = NewRetBlock; + return true; +} diff --git a/contrib/llvm/lib/Transforms/Utils/Utils.cpp b/contrib/llvm/lib/Transforms/Utils/Utils.cpp new file mode 100644 index 000000000000..7b9de2eadc61 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/Utils.cpp @@ -0,0 +1,46 @@ +//===-- Utils.cpp - TransformUtils Infrastructure -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the common initialization infrastructure for the +// TransformUtils library. +// +//===----------------------------------------------------------------------===// + +#include "llvm/InitializePasses.h" +#include "llvm-c/Initialization.h" +#include "llvm/PassRegistry.h" + +using namespace llvm; + +/// initializeTransformUtils - Initialize all passes in the TransformUtils +/// library. +void llvm::initializeTransformUtils(PassRegistry &Registry) { + initializeAddDiscriminatorsLegacyPassPass(Registry); + initializeBreakCriticalEdgesPass(Registry); + initializeInstNamerPass(Registry); + initializeLCSSAWrapperPassPass(Registry); + initializeLibCallsShrinkWrapLegacyPassPass(Registry); + initializeLoopSimplifyPass(Registry); + initializeLowerInvokeLegacyPassPass(Registry); + initializeLowerSwitchPass(Registry); + initializeNameAnonGlobalLegacyPassPass(Registry); + initializePromoteLegacyPassPass(Registry); + initializeStripNonLineTableDebugInfoPass(Registry); + initializeUnifyFunctionExitNodesPass(Registry); + initializeInstSimplifierPass(Registry); + initializeMetaRenamerPass(Registry); + initializeMemorySSAWrapperPassPass(Registry); + initializeMemorySSAPrinterLegacyPassPass(Registry); + initializeStripGCRelocatesPass(Registry); +} + +/// LLVMInitializeTransformUtils - C binding for initializeTransformUtilsPasses. +void LLVMInitializeTransformUtils(LLVMPassRegistryRef R) { + initializeTransformUtils(*unwrap(R)); +} diff --git a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp new file mode 100644 index 000000000000..0e9baaf8649d --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp @@ -0,0 +1,1102 @@ +//===- ValueMapper.cpp - Interface shared by lib/Transforms/Utils ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the MapValue function, which is shared by various parts of +// the lib/Transforms/Utils library. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/ValueMapper.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Operator.h" +using namespace llvm; + +// Out of line method to get vtable etc for class. +void ValueMapTypeRemapper::anchor() {} +void ValueMaterializer::anchor() {} + +namespace { + +/// A basic block used in a BlockAddress whose function body is not yet +/// materialized. +struct DelayedBasicBlock { + BasicBlock *OldBB; + std::unique_ptr<BasicBlock> TempBB; + + DelayedBasicBlock(const BlockAddress &Old) + : OldBB(Old.getBasicBlock()), + TempBB(BasicBlock::Create(Old.getContext())) {} +}; + +struct WorklistEntry { + enum EntryKind { + MapGlobalInit, + MapAppendingVar, + MapGlobalAliasee, + RemapFunction + }; + struct GVInitTy { + GlobalVariable *GV; + Constant *Init; + }; + struct AppendingGVTy { + GlobalVariable *GV; + Constant *InitPrefix; + }; + struct GlobalAliaseeTy { + GlobalAlias *GA; + Constant *Aliasee; + }; + + unsigned Kind : 2; + unsigned MCID : 29; + unsigned AppendingGVIsOldCtorDtor : 1; + unsigned AppendingGVNumNewMembers; + union { + GVInitTy GVInit; + AppendingGVTy AppendingGV; + GlobalAliaseeTy GlobalAliasee; + Function *RemapF; + } Data; +}; + +struct MappingContext { + ValueToValueMapTy *VM; + ValueMaterializer *Materializer = nullptr; + + /// Construct a MappingContext with a value map and materializer. + explicit MappingContext(ValueToValueMapTy &VM, + ValueMaterializer *Materializer = nullptr) + : VM(&VM), Materializer(Materializer) {} +}; + +class MDNodeMapper; +class Mapper { + friend class MDNodeMapper; + +#ifndef NDEBUG + DenseSet<GlobalValue *> AlreadyScheduled; +#endif + + RemapFlags Flags; + ValueMapTypeRemapper *TypeMapper; + unsigned CurrentMCID = 0; + SmallVector<MappingContext, 2> MCs; + SmallVector<WorklistEntry, 4> Worklist; + SmallVector<DelayedBasicBlock, 1> DelayedBBs; + SmallVector<Constant *, 16> AppendingInits; + +public: + Mapper(ValueToValueMapTy &VM, RemapFlags Flags, + ValueMapTypeRemapper *TypeMapper, ValueMaterializer *Materializer) + : Flags(Flags), TypeMapper(TypeMapper), + MCs(1, MappingContext(VM, Materializer)) {} + + /// ValueMapper should explicitly call \a flush() before destruction. + ~Mapper() { assert(!hasWorkToDo() && "Expected to be flushed"); } + + bool hasWorkToDo() const { return !Worklist.empty(); } + + unsigned + registerAlternateMappingContext(ValueToValueMapTy &VM, + ValueMaterializer *Materializer = nullptr) { + MCs.push_back(MappingContext(VM, Materializer)); + return MCs.size() - 1; + } + + void addFlags(RemapFlags Flags); + + Value *mapValue(const Value *V); + void remapInstruction(Instruction *I); + void remapFunction(Function &F); + + Constant *mapConstant(const Constant *C) { + return cast_or_null<Constant>(mapValue(C)); + } + + /// Map metadata. + /// + /// Find the mapping for MD. Guarantees that the return will be resolved + /// (not an MDNode, or MDNode::isResolved() returns true). + Metadata *mapMetadata(const Metadata *MD); + + void scheduleMapGlobalInitializer(GlobalVariable &GV, Constant &Init, + unsigned MCID); + void scheduleMapAppendingVariable(GlobalVariable &GV, Constant *InitPrefix, + bool IsOldCtorDtor, + ArrayRef<Constant *> NewMembers, + unsigned MCID); + void scheduleMapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee, + unsigned MCID); + void scheduleRemapFunction(Function &F, unsigned MCID); + + void flush(); + +private: + void mapGlobalInitializer(GlobalVariable &GV, Constant &Init); + void mapAppendingVariable(GlobalVariable &GV, Constant *InitPrefix, + bool IsOldCtorDtor, + ArrayRef<Constant *> NewMembers); + void mapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee); + void remapFunction(Function &F, ValueToValueMapTy &VM); + + ValueToValueMapTy &getVM() { return *MCs[CurrentMCID].VM; } + ValueMaterializer *getMaterializer() { return MCs[CurrentMCID].Materializer; } + + Value *mapBlockAddress(const BlockAddress &BA); + + /// Map metadata that doesn't require visiting operands. + Optional<Metadata *> mapSimpleMetadata(const Metadata *MD); + + Metadata *mapToMetadata(const Metadata *Key, Metadata *Val); + Metadata *mapToSelf(const Metadata *MD); +}; + +class MDNodeMapper { + Mapper &M; + + /// Data about a node in \a UniquedGraph. + struct Data { + bool HasChanged = false; + unsigned ID = ~0u; + TempMDNode Placeholder; + }; + + /// A graph of uniqued nodes. + struct UniquedGraph { + SmallDenseMap<const Metadata *, Data, 32> Info; // Node properties. + SmallVector<MDNode *, 16> POT; // Post-order traversal. + + /// Propagate changed operands through the post-order traversal. + /// + /// Iteratively update \a Data::HasChanged for each node based on \a + /// Data::HasChanged of its operands, until fixed point. + void propagateChanges(); + + /// Get a forward reference to a node to use as an operand. + Metadata &getFwdReference(MDNode &Op); + }; + + /// Worklist of distinct nodes whose operands need to be remapped. + SmallVector<MDNode *, 16> DistinctWorklist; + + // Storage for a UniquedGraph. + SmallDenseMap<const Metadata *, Data, 32> InfoStorage; + SmallVector<MDNode *, 16> POTStorage; + +public: + MDNodeMapper(Mapper &M) : M(M) {} + + /// Map a metadata node (and its transitive operands). + /// + /// Map all the (unmapped) nodes in the subgraph under \c N. The iterative + /// algorithm handles distinct nodes and uniqued node subgraphs using + /// different strategies. + /// + /// Distinct nodes are immediately mapped and added to \a DistinctWorklist + /// using \a mapDistinctNode(). Their mapping can always be computed + /// immediately without visiting operands, even if their operands change. + /// + /// The mapping for uniqued nodes depends on whether their operands change. + /// \a mapTopLevelUniquedNode() traverses the transitive uniqued subgraph of + /// a node to calculate uniqued node mappings in bulk. Distinct leafs are + /// added to \a DistinctWorklist with \a mapDistinctNode(). + /// + /// After mapping \c N itself, this function remaps the operands of the + /// distinct nodes in \a DistinctWorklist until the entire subgraph under \c + /// N has been mapped. + Metadata *map(const MDNode &N); + +private: + /// Map a top-level uniqued node and the uniqued subgraph underneath it. + /// + /// This builds up a post-order traversal of the (unmapped) uniqued subgraph + /// underneath \c FirstN and calculates the nodes' mapping. Each node uses + /// the identity mapping (\a Mapper::mapToSelf()) as long as all of its + /// operands uses the identity mapping. + /// + /// The algorithm works as follows: + /// + /// 1. \a createPOT(): traverse the uniqued subgraph under \c FirstN and + /// save the post-order traversal in the given \a UniquedGraph, tracking + /// nodes' operands change. + /// + /// 2. \a UniquedGraph::propagateChanges(): propagate changed operands + /// through the \a UniquedGraph until fixed point, following the rule + /// that if a node changes, any node that references must also change. + /// + /// 3. \a mapNodesInPOT(): map the uniqued nodes, creating new uniqued nodes + /// (referencing new operands) where necessary. + Metadata *mapTopLevelUniquedNode(const MDNode &FirstN); + + /// Try to map the operand of an \a MDNode. + /// + /// If \c Op is already mapped, return the mapping. If it's not an \a + /// MDNode, compute and return the mapping. If it's a distinct \a MDNode, + /// return the result of \a mapDistinctNode(). + /// + /// \return None if \c Op is an unmapped uniqued \a MDNode. + /// \post getMappedOp(Op) only returns None if this returns None. + Optional<Metadata *> tryToMapOperand(const Metadata *Op); + + /// Map a distinct node. + /// + /// Return the mapping for the distinct node \c N, saving the result in \a + /// DistinctWorklist for later remapping. + /// + /// \pre \c N is not yet mapped. + /// \pre \c N.isDistinct(). + MDNode *mapDistinctNode(const MDNode &N); + + /// Get a previously mapped node. + Optional<Metadata *> getMappedOp(const Metadata *Op) const; + + /// Create a post-order traversal of an unmapped uniqued node subgraph. + /// + /// This traverses the metadata graph deeply enough to map \c FirstN. It + /// uses \a tryToMapOperand() (via \a Mapper::mapSimplifiedNode()), so any + /// metadata that has already been mapped will not be part of the POT. + /// + /// Each node that has a changed operand from outside the graph (e.g., a + /// distinct node, an already-mapped uniqued node, or \a ConstantAsMetadata) + /// is marked with \a Data::HasChanged. + /// + /// \return \c true if any nodes in \c G have \a Data::HasChanged. + /// \post \c G.POT is a post-order traversal ending with \c FirstN. + /// \post \a Data::hasChanged in \c G.Info indicates whether any node needs + /// to change because of operands outside the graph. + bool createPOT(UniquedGraph &G, const MDNode &FirstN); + + /// Visit the operands of a uniqued node in the POT. + /// + /// Visit the operands in the range from \c I to \c E, returning the first + /// uniqued node we find that isn't yet in \c G. \c I is always advanced to + /// where to continue the loop through the operands. + /// + /// This sets \c HasChanged if any of the visited operands change. + MDNode *visitOperands(UniquedGraph &G, MDNode::op_iterator &I, + MDNode::op_iterator E, bool &HasChanged); + + /// Map all the nodes in the given uniqued graph. + /// + /// This visits all the nodes in \c G in post-order, using the identity + /// mapping or creating a new node depending on \a Data::HasChanged. + /// + /// \pre \a getMappedOp() returns None for nodes in \c G, but not for any of + /// their operands outside of \c G. + /// \pre \a Data::HasChanged is true for a node in \c G iff any of its + /// operands have changed. + /// \post \a getMappedOp() returns the mapped node for every node in \c G. + void mapNodesInPOT(UniquedGraph &G); + + /// Remap a node's operands using the given functor. + /// + /// Iterate through the operands of \c N and update them in place using \c + /// mapOperand. + /// + /// \pre N.isDistinct() or N.isTemporary(). + template <class OperandMapper> + void remapOperands(MDNode &N, OperandMapper mapOperand); +}; + +} // end namespace + +Value *Mapper::mapValue(const Value *V) { + ValueToValueMapTy::iterator I = getVM().find(V); + + // If the value already exists in the map, use it. + if (I != getVM().end()) { + assert(I->second && "Unexpected null mapping"); + return I->second; + } + + // If we have a materializer and it can materialize a value, use that. + if (auto *Materializer = getMaterializer()) { + if (Value *NewV = Materializer->materialize(const_cast<Value *>(V))) { + getVM()[V] = NewV; + return NewV; + } + } + + // Global values do not need to be seeded into the VM if they + // are using the identity mapping. + if (isa<GlobalValue>(V)) { + if (Flags & RF_NullMapMissingGlobalValues) + return nullptr; + return getVM()[V] = const_cast<Value *>(V); + } + + if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) { + // Inline asm may need *type* remapping. + FunctionType *NewTy = IA->getFunctionType(); + if (TypeMapper) { + NewTy = cast<FunctionType>(TypeMapper->remapType(NewTy)); + + if (NewTy != IA->getFunctionType()) + V = InlineAsm::get(NewTy, IA->getAsmString(), IA->getConstraintString(), + IA->hasSideEffects(), IA->isAlignStack()); + } + + return getVM()[V] = const_cast<Value *>(V); + } + + if (const auto *MDV = dyn_cast<MetadataAsValue>(V)) { + const Metadata *MD = MDV->getMetadata(); + + if (auto *LAM = dyn_cast<LocalAsMetadata>(MD)) { + // Look through to grab the local value. + if (Value *LV = mapValue(LAM->getValue())) { + if (V == LAM->getValue()) + return const_cast<Value *>(V); + return MetadataAsValue::get(V->getContext(), ValueAsMetadata::get(LV)); + } + + // FIXME: always return nullptr once Verifier::verifyDominatesUse() + // ensures metadata operands only reference defined SSA values. + return (Flags & RF_IgnoreMissingLocals) + ? nullptr + : MetadataAsValue::get(V->getContext(), + MDTuple::get(V->getContext(), None)); + } + + // If this is a module-level metadata and we know that nothing at the module + // level is changing, then use an identity mapping. + if (Flags & RF_NoModuleLevelChanges) + return getVM()[V] = const_cast<Value *>(V); + + // Map the metadata and turn it into a value. + auto *MappedMD = mapMetadata(MD); + if (MD == MappedMD) + return getVM()[V] = const_cast<Value *>(V); + return getVM()[V] = MetadataAsValue::get(V->getContext(), MappedMD); + } + + // Okay, this either must be a constant (which may or may not be mappable) or + // is something that is not in the mapping table. + Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V)); + if (!C) + return nullptr; + + if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) + return mapBlockAddress(*BA); + + auto mapValueOrNull = [this](Value *V) { + auto Mapped = mapValue(V); + assert((Mapped || (Flags & RF_NullMapMissingGlobalValues)) && + "Unexpected null mapping for constant operand without " + "NullMapMissingGlobalValues flag"); + return Mapped; + }; + + // Otherwise, we have some other constant to remap. Start by checking to see + // if all operands have an identity remapping. + unsigned OpNo = 0, NumOperands = C->getNumOperands(); + Value *Mapped = nullptr; + for (; OpNo != NumOperands; ++OpNo) { + Value *Op = C->getOperand(OpNo); + Mapped = mapValueOrNull(Op); + if (!Mapped) + return nullptr; + if (Mapped != Op) + break; + } + + // See if the type mapper wants to remap the type as well. + Type *NewTy = C->getType(); + if (TypeMapper) + NewTy = TypeMapper->remapType(NewTy); + + // If the result type and all operands match up, then just insert an identity + // mapping. + if (OpNo == NumOperands && NewTy == C->getType()) + return getVM()[V] = C; + + // Okay, we need to create a new constant. We've already processed some or + // all of the operands, set them all up now. + SmallVector<Constant*, 8> Ops; + Ops.reserve(NumOperands); + for (unsigned j = 0; j != OpNo; ++j) + Ops.push_back(cast<Constant>(C->getOperand(j))); + + // If one of the operands mismatch, push it and the other mapped operands. + if (OpNo != NumOperands) { + Ops.push_back(cast<Constant>(Mapped)); + + // Map the rest of the operands that aren't processed yet. + for (++OpNo; OpNo != NumOperands; ++OpNo) { + Mapped = mapValueOrNull(C->getOperand(OpNo)); + if (!Mapped) + return nullptr; + Ops.push_back(cast<Constant>(Mapped)); + } + } + Type *NewSrcTy = nullptr; + if (TypeMapper) + if (auto *GEPO = dyn_cast<GEPOperator>(C)) + NewSrcTy = TypeMapper->remapType(GEPO->getSourceElementType()); + + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) + return getVM()[V] = CE->getWithOperands(Ops, NewTy, false, NewSrcTy); + if (isa<ConstantArray>(C)) + return getVM()[V] = ConstantArray::get(cast<ArrayType>(NewTy), Ops); + if (isa<ConstantStruct>(C)) + return getVM()[V] = ConstantStruct::get(cast<StructType>(NewTy), Ops); + if (isa<ConstantVector>(C)) + return getVM()[V] = ConstantVector::get(Ops); + // If this is a no-operand constant, it must be because the type was remapped. + if (isa<UndefValue>(C)) + return getVM()[V] = UndefValue::get(NewTy); + if (isa<ConstantAggregateZero>(C)) + return getVM()[V] = ConstantAggregateZero::get(NewTy); + assert(isa<ConstantPointerNull>(C)); + return getVM()[V] = ConstantPointerNull::get(cast<PointerType>(NewTy)); +} + +Value *Mapper::mapBlockAddress(const BlockAddress &BA) { + Function *F = cast<Function>(mapValue(BA.getFunction())); + + // F may not have materialized its initializer. In that case, create a + // dummy basic block for now, and replace it once we've materialized all + // the initializers. + BasicBlock *BB; + if (F->empty()) { + DelayedBBs.push_back(DelayedBasicBlock(BA)); + BB = DelayedBBs.back().TempBB.get(); + } else { + BB = cast_or_null<BasicBlock>(mapValue(BA.getBasicBlock())); + } + + return getVM()[&BA] = BlockAddress::get(F, BB ? BB : BA.getBasicBlock()); +} + +Metadata *Mapper::mapToMetadata(const Metadata *Key, Metadata *Val) { + getVM().MD()[Key].reset(Val); + return Val; +} + +Metadata *Mapper::mapToSelf(const Metadata *MD) { + return mapToMetadata(MD, const_cast<Metadata *>(MD)); +} + +Optional<Metadata *> MDNodeMapper::tryToMapOperand(const Metadata *Op) { + if (!Op) + return nullptr; + + if (Optional<Metadata *> MappedOp = M.mapSimpleMetadata(Op)) { +#ifndef NDEBUG + if (auto *CMD = dyn_cast<ConstantAsMetadata>(Op)) + assert((!*MappedOp || M.getVM().count(CMD->getValue()) || + M.getVM().getMappedMD(Op)) && + "Expected Value to be memoized"); + else + assert((isa<MDString>(Op) || M.getVM().getMappedMD(Op)) && + "Expected result to be memoized"); +#endif + return *MappedOp; + } + + const MDNode &N = *cast<MDNode>(Op); + if (N.isDistinct()) + return mapDistinctNode(N); + return None; +} + +MDNode *MDNodeMapper::mapDistinctNode(const MDNode &N) { + assert(N.isDistinct() && "Expected a distinct node"); + assert(!M.getVM().getMappedMD(&N) && "Expected an unmapped node"); + DistinctWorklist.push_back(cast<MDNode>( + (M.Flags & RF_MoveDistinctMDs) + ? M.mapToSelf(&N) + : M.mapToMetadata(&N, MDNode::replaceWithDistinct(N.clone())))); + return DistinctWorklist.back(); +} + +static ConstantAsMetadata *wrapConstantAsMetadata(const ConstantAsMetadata &CMD, + Value *MappedV) { + if (CMD.getValue() == MappedV) + return const_cast<ConstantAsMetadata *>(&CMD); + return MappedV ? ConstantAsMetadata::getConstant(MappedV) : nullptr; +} + +Optional<Metadata *> MDNodeMapper::getMappedOp(const Metadata *Op) const { + if (!Op) + return nullptr; + + if (Optional<Metadata *> MappedOp = M.getVM().getMappedMD(Op)) + return *MappedOp; + + if (isa<MDString>(Op)) + return const_cast<Metadata *>(Op); + + if (auto *CMD = dyn_cast<ConstantAsMetadata>(Op)) + return wrapConstantAsMetadata(*CMD, M.getVM().lookup(CMD->getValue())); + + return None; +} + +Metadata &MDNodeMapper::UniquedGraph::getFwdReference(MDNode &Op) { + auto Where = Info.find(&Op); + assert(Where != Info.end() && "Expected a valid reference"); + + auto &OpD = Where->second; + if (!OpD.HasChanged) + return Op; + + // Lazily construct a temporary node. + if (!OpD.Placeholder) + OpD.Placeholder = Op.clone(); + + return *OpD.Placeholder; +} + +template <class OperandMapper> +void MDNodeMapper::remapOperands(MDNode &N, OperandMapper mapOperand) { + assert(!N.isUniqued() && "Expected distinct or temporary nodes"); + for (unsigned I = 0, E = N.getNumOperands(); I != E; ++I) { + Metadata *Old = N.getOperand(I); + Metadata *New = mapOperand(Old); + + if (Old != New) + N.replaceOperandWith(I, New); + } +} + +namespace { +/// An entry in the worklist for the post-order traversal. +struct POTWorklistEntry { + MDNode *N; ///< Current node. + MDNode::op_iterator Op; ///< Current operand of \c N. + + /// Keep a flag of whether operands have changed in the worklist to avoid + /// hitting the map in \a UniquedGraph. + bool HasChanged = false; + + POTWorklistEntry(MDNode &N) : N(&N), Op(N.op_begin()) {} +}; +} // end namespace + +bool MDNodeMapper::createPOT(UniquedGraph &G, const MDNode &FirstN) { + assert(G.Info.empty() && "Expected a fresh traversal"); + assert(FirstN.isUniqued() && "Expected uniqued node in POT"); + + // Construct a post-order traversal of the uniqued subgraph under FirstN. + bool AnyChanges = false; + SmallVector<POTWorklistEntry, 16> Worklist; + Worklist.push_back(POTWorklistEntry(const_cast<MDNode &>(FirstN))); + (void)G.Info[&FirstN]; + while (!Worklist.empty()) { + // Start or continue the traversal through the this node's operands. + auto &WE = Worklist.back(); + if (MDNode *N = visitOperands(G, WE.Op, WE.N->op_end(), WE.HasChanged)) { + // Push a new node to traverse first. + Worklist.push_back(POTWorklistEntry(*N)); + continue; + } + + // Push the node onto the POT. + assert(WE.N->isUniqued() && "Expected only uniqued nodes"); + assert(WE.Op == WE.N->op_end() && "Expected to visit all operands"); + auto &D = G.Info[WE.N]; + AnyChanges |= D.HasChanged = WE.HasChanged; + D.ID = G.POT.size(); + G.POT.push_back(WE.N); + + // Pop the node off the worklist. + Worklist.pop_back(); + } + return AnyChanges; +} + +MDNode *MDNodeMapper::visitOperands(UniquedGraph &G, MDNode::op_iterator &I, + MDNode::op_iterator E, bool &HasChanged) { + while (I != E) { + Metadata *Op = *I++; // Increment even on early return. + if (Optional<Metadata *> MappedOp = tryToMapOperand(Op)) { + // Check if the operand changes. + HasChanged |= Op != *MappedOp; + continue; + } + + // A uniqued metadata node. + MDNode &OpN = *cast<MDNode>(Op); + assert(OpN.isUniqued() && + "Only uniqued operands cannot be mapped immediately"); + if (G.Info.insert(std::make_pair(&OpN, Data())).second) + return &OpN; // This is a new one. Return it. + } + return nullptr; +} + +void MDNodeMapper::UniquedGraph::propagateChanges() { + bool AnyChanges; + do { + AnyChanges = false; + for (MDNode *N : POT) { + auto &D = Info[N]; + if (D.HasChanged) + continue; + + if (none_of(N->operands(), [&](const Metadata *Op) { + auto Where = Info.find(Op); + return Where != Info.end() && Where->second.HasChanged; + })) + continue; + + AnyChanges = D.HasChanged = true; + } + } while (AnyChanges); +} + +void MDNodeMapper::mapNodesInPOT(UniquedGraph &G) { + // Construct uniqued nodes, building forward references as necessary. + SmallVector<MDNode *, 16> CyclicNodes; + for (auto *N : G.POT) { + auto &D = G.Info[N]; + if (!D.HasChanged) { + // The node hasn't changed. + M.mapToSelf(N); + continue; + } + + // Remember whether this node had a placeholder. + bool HadPlaceholder(D.Placeholder); + + // Clone the uniqued node and remap the operands. + TempMDNode ClonedN = D.Placeholder ? std::move(D.Placeholder) : N->clone(); + remapOperands(*ClonedN, [this, &D, &G](Metadata *Old) { + if (Optional<Metadata *> MappedOp = getMappedOp(Old)) + return *MappedOp; + assert(G.Info[Old].ID > D.ID && "Expected a forward reference"); + return &G.getFwdReference(*cast<MDNode>(Old)); + }); + + auto *NewN = MDNode::replaceWithUniqued(std::move(ClonedN)); + M.mapToMetadata(N, NewN); + + // Nodes that were referenced out of order in the POT are involved in a + // uniquing cycle. + if (HadPlaceholder) + CyclicNodes.push_back(NewN); + } + + // Resolve cycles. + for (auto *N : CyclicNodes) + if (!N->isResolved()) + N->resolveCycles(); +} + +Metadata *MDNodeMapper::map(const MDNode &N) { + assert(DistinctWorklist.empty() && "MDNodeMapper::map is not recursive"); + assert(!(M.Flags & RF_NoModuleLevelChanges) && + "MDNodeMapper::map assumes module-level changes"); + + // Require resolved nodes whenever metadata might be remapped. + assert(N.isResolved() && "Unexpected unresolved node"); + + Metadata *MappedN = + N.isUniqued() ? mapTopLevelUniquedNode(N) : mapDistinctNode(N); + while (!DistinctWorklist.empty()) + remapOperands(*DistinctWorklist.pop_back_val(), [this](Metadata *Old) { + if (Optional<Metadata *> MappedOp = tryToMapOperand(Old)) + return *MappedOp; + return mapTopLevelUniquedNode(*cast<MDNode>(Old)); + }); + return MappedN; +} + +Metadata *MDNodeMapper::mapTopLevelUniquedNode(const MDNode &FirstN) { + assert(FirstN.isUniqued() && "Expected uniqued node"); + + // Create a post-order traversal of uniqued nodes under FirstN. + UniquedGraph G; + if (!createPOT(G, FirstN)) { + // Return early if no nodes have changed. + for (const MDNode *N : G.POT) + M.mapToSelf(N); + return &const_cast<MDNode &>(FirstN); + } + + // Update graph with all nodes that have changed. + G.propagateChanges(); + + // Map all the nodes in the graph. + mapNodesInPOT(G); + + // Return the original node, remapped. + return *getMappedOp(&FirstN); +} + +namespace { + +struct MapMetadataDisabler { + ValueToValueMapTy &VM; + + MapMetadataDisabler(ValueToValueMapTy &VM) : VM(VM) { + VM.disableMapMetadata(); + } + ~MapMetadataDisabler() { VM.enableMapMetadata(); } +}; + +} // end namespace + +Optional<Metadata *> Mapper::mapSimpleMetadata(const Metadata *MD) { + // If the value already exists in the map, use it. + if (Optional<Metadata *> NewMD = getVM().getMappedMD(MD)) + return *NewMD; + + if (isa<MDString>(MD)) + return const_cast<Metadata *>(MD); + + // This is a module-level metadata. If nothing at the module level is + // changing, use an identity mapping. + if ((Flags & RF_NoModuleLevelChanges)) + return const_cast<Metadata *>(MD); + + if (auto *CMD = dyn_cast<ConstantAsMetadata>(MD)) { + // Disallow recursion into metadata mapping through mapValue. + MapMetadataDisabler MMD(getVM()); + + // Don't memoize ConstantAsMetadata. Instead of lasting until the + // LLVMContext is destroyed, they can be deleted when the GlobalValue they + // reference is destructed. These aren't super common, so the extra + // indirection isn't that expensive. + return wrapConstantAsMetadata(*CMD, mapValue(CMD->getValue())); + } + + assert(isa<MDNode>(MD) && "Expected a metadata node"); + + return None; +} + +Metadata *Mapper::mapMetadata(const Metadata *MD) { + assert(MD && "Expected valid metadata"); + assert(!isa<LocalAsMetadata>(MD) && "Unexpected local metadata"); + + if (Optional<Metadata *> NewMD = mapSimpleMetadata(MD)) + return *NewMD; + + return MDNodeMapper(*this).map(*cast<MDNode>(MD)); +} + +void Mapper::flush() { + // Flush out the worklist of global values. + while (!Worklist.empty()) { + WorklistEntry E = Worklist.pop_back_val(); + CurrentMCID = E.MCID; + switch (E.Kind) { + case WorklistEntry::MapGlobalInit: + E.Data.GVInit.GV->setInitializer(mapConstant(E.Data.GVInit.Init)); + break; + case WorklistEntry::MapAppendingVar: { + unsigned PrefixSize = AppendingInits.size() - E.AppendingGVNumNewMembers; + mapAppendingVariable(*E.Data.AppendingGV.GV, + E.Data.AppendingGV.InitPrefix, + E.AppendingGVIsOldCtorDtor, + makeArrayRef(AppendingInits).slice(PrefixSize)); + AppendingInits.resize(PrefixSize); + break; + } + case WorklistEntry::MapGlobalAliasee: + E.Data.GlobalAliasee.GA->setAliasee( + mapConstant(E.Data.GlobalAliasee.Aliasee)); + break; + case WorklistEntry::RemapFunction: + remapFunction(*E.Data.RemapF); + break; + } + } + CurrentMCID = 0; + + // Finish logic for block addresses now that all global values have been + // handled. + while (!DelayedBBs.empty()) { + DelayedBasicBlock DBB = DelayedBBs.pop_back_val(); + BasicBlock *BB = cast_or_null<BasicBlock>(mapValue(DBB.OldBB)); + DBB.TempBB->replaceAllUsesWith(BB ? BB : DBB.OldBB); + } +} + +void Mapper::remapInstruction(Instruction *I) { + // Remap operands. + for (Use &Op : I->operands()) { + Value *V = mapValue(Op); + // If we aren't ignoring missing entries, assert that something happened. + if (V) + Op = V; + else + assert((Flags & RF_IgnoreMissingLocals) && + "Referenced value not in value map!"); + } + + // Remap phi nodes' incoming blocks. + if (PHINode *PN = dyn_cast<PHINode>(I)) { + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *V = mapValue(PN->getIncomingBlock(i)); + // If we aren't ignoring missing entries, assert that something happened. + if (V) + PN->setIncomingBlock(i, cast<BasicBlock>(V)); + else + assert((Flags & RF_IgnoreMissingLocals) && + "Referenced block not in value map!"); + } + } + + // Remap attached metadata. + SmallVector<std::pair<unsigned, MDNode *>, 4> MDs; + I->getAllMetadata(MDs); + for (const auto &MI : MDs) { + MDNode *Old = MI.second; + MDNode *New = cast_or_null<MDNode>(mapMetadata(Old)); + if (New != Old) + I->setMetadata(MI.first, New); + } + + if (!TypeMapper) + return; + + // If the instruction's type is being remapped, do so now. + if (auto CS = CallSite(I)) { + SmallVector<Type *, 3> Tys; + FunctionType *FTy = CS.getFunctionType(); + Tys.reserve(FTy->getNumParams()); + for (Type *Ty : FTy->params()) + Tys.push_back(TypeMapper->remapType(Ty)); + CS.mutateFunctionType(FunctionType::get( + TypeMapper->remapType(I->getType()), Tys, FTy->isVarArg())); + return; + } + if (auto *AI = dyn_cast<AllocaInst>(I)) + AI->setAllocatedType(TypeMapper->remapType(AI->getAllocatedType())); + if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) { + GEP->setSourceElementType( + TypeMapper->remapType(GEP->getSourceElementType())); + GEP->setResultElementType( + TypeMapper->remapType(GEP->getResultElementType())); + } + I->mutateType(TypeMapper->remapType(I->getType())); +} + +void Mapper::remapFunction(Function &F) { + // Remap the operands. + for (Use &Op : F.operands()) + if (Op) + Op = mapValue(Op); + + // Remap the metadata attachments. + SmallVector<std::pair<unsigned, MDNode *>, 8> MDs; + F.getAllMetadata(MDs); + F.clearMetadata(); + for (const auto &I : MDs) + F.addMetadata(I.first, *cast<MDNode>(mapMetadata(I.second))); + + // Remap the argument types. + if (TypeMapper) + for (Argument &A : F.args()) + A.mutateType(TypeMapper->remapType(A.getType())); + + // Remap the instructions. + for (BasicBlock &BB : F) + for (Instruction &I : BB) + remapInstruction(&I); +} + +void Mapper::mapAppendingVariable(GlobalVariable &GV, Constant *InitPrefix, + bool IsOldCtorDtor, + ArrayRef<Constant *> NewMembers) { + SmallVector<Constant *, 16> Elements; + if (InitPrefix) { + unsigned NumElements = + cast<ArrayType>(InitPrefix->getType())->getNumElements(); + for (unsigned I = 0; I != NumElements; ++I) + Elements.push_back(InitPrefix->getAggregateElement(I)); + } + + PointerType *VoidPtrTy; + Type *EltTy; + if (IsOldCtorDtor) { + // FIXME: This upgrade is done during linking to support the C API. See + // also IRLinker::linkAppendingVarProto() in IRMover.cpp. + VoidPtrTy = Type::getInt8Ty(GV.getContext())->getPointerTo(); + auto &ST = *cast<StructType>(NewMembers.front()->getType()); + Type *Tys[3] = {ST.getElementType(0), ST.getElementType(1), VoidPtrTy}; + EltTy = StructType::get(GV.getContext(), Tys, false); + } + + for (auto *V : NewMembers) { + Constant *NewV; + if (IsOldCtorDtor) { + auto *S = cast<ConstantStruct>(V); + auto *E1 = mapValue(S->getOperand(0)); + auto *E2 = mapValue(S->getOperand(1)); + Value *Null = Constant::getNullValue(VoidPtrTy); + NewV = + ConstantStruct::get(cast<StructType>(EltTy), E1, E2, Null, nullptr); + } else { + NewV = cast_or_null<Constant>(mapValue(V)); + } + Elements.push_back(NewV); + } + + GV.setInitializer(ConstantArray::get( + cast<ArrayType>(GV.getType()->getElementType()), Elements)); +} + +void Mapper::scheduleMapGlobalInitializer(GlobalVariable &GV, Constant &Init, + unsigned MCID) { + assert(AlreadyScheduled.insert(&GV).second && "Should not reschedule"); + assert(MCID < MCs.size() && "Invalid mapping context"); + + WorklistEntry WE; + WE.Kind = WorklistEntry::MapGlobalInit; + WE.MCID = MCID; + WE.Data.GVInit.GV = &GV; + WE.Data.GVInit.Init = &Init; + Worklist.push_back(WE); +} + +void Mapper::scheduleMapAppendingVariable(GlobalVariable &GV, + Constant *InitPrefix, + bool IsOldCtorDtor, + ArrayRef<Constant *> NewMembers, + unsigned MCID) { + assert(AlreadyScheduled.insert(&GV).second && "Should not reschedule"); + assert(MCID < MCs.size() && "Invalid mapping context"); + + WorklistEntry WE; + WE.Kind = WorklistEntry::MapAppendingVar; + WE.MCID = MCID; + WE.Data.AppendingGV.GV = &GV; + WE.Data.AppendingGV.InitPrefix = InitPrefix; + WE.AppendingGVIsOldCtorDtor = IsOldCtorDtor; + WE.AppendingGVNumNewMembers = NewMembers.size(); + Worklist.push_back(WE); + AppendingInits.append(NewMembers.begin(), NewMembers.end()); +} + +void Mapper::scheduleMapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee, + unsigned MCID) { + assert(AlreadyScheduled.insert(&GA).second && "Should not reschedule"); + assert(MCID < MCs.size() && "Invalid mapping context"); + + WorklistEntry WE; + WE.Kind = WorklistEntry::MapGlobalAliasee; + WE.MCID = MCID; + WE.Data.GlobalAliasee.GA = &GA; + WE.Data.GlobalAliasee.Aliasee = &Aliasee; + Worklist.push_back(WE); +} + +void Mapper::scheduleRemapFunction(Function &F, unsigned MCID) { + assert(AlreadyScheduled.insert(&F).second && "Should not reschedule"); + assert(MCID < MCs.size() && "Invalid mapping context"); + + WorklistEntry WE; + WE.Kind = WorklistEntry::RemapFunction; + WE.MCID = MCID; + WE.Data.RemapF = &F; + Worklist.push_back(WE); +} + +void Mapper::addFlags(RemapFlags Flags) { + assert(!hasWorkToDo() && "Expected to have flushed the worklist"); + this->Flags = this->Flags | Flags; +} + +static Mapper *getAsMapper(void *pImpl) { + return reinterpret_cast<Mapper *>(pImpl); +} + +namespace { + +class FlushingMapper { + Mapper &M; + +public: + explicit FlushingMapper(void *pImpl) : M(*getAsMapper(pImpl)) { + assert(!M.hasWorkToDo() && "Expected to be flushed"); + } + ~FlushingMapper() { M.flush(); } + Mapper *operator->() const { return &M; } +}; + +} // end namespace + +ValueMapper::ValueMapper(ValueToValueMapTy &VM, RemapFlags Flags, + ValueMapTypeRemapper *TypeMapper, + ValueMaterializer *Materializer) + : pImpl(new Mapper(VM, Flags, TypeMapper, Materializer)) {} + +ValueMapper::~ValueMapper() { delete getAsMapper(pImpl); } + +unsigned +ValueMapper::registerAlternateMappingContext(ValueToValueMapTy &VM, + ValueMaterializer *Materializer) { + return getAsMapper(pImpl)->registerAlternateMappingContext(VM, Materializer); +} + +void ValueMapper::addFlags(RemapFlags Flags) { + FlushingMapper(pImpl)->addFlags(Flags); +} + +Value *ValueMapper::mapValue(const Value &V) { + return FlushingMapper(pImpl)->mapValue(&V); +} + +Constant *ValueMapper::mapConstant(const Constant &C) { + return cast_or_null<Constant>(mapValue(C)); +} + +Metadata *ValueMapper::mapMetadata(const Metadata &MD) { + return FlushingMapper(pImpl)->mapMetadata(&MD); +} + +MDNode *ValueMapper::mapMDNode(const MDNode &N) { + return cast_or_null<MDNode>(mapMetadata(N)); +} + +void ValueMapper::remapInstruction(Instruction &I) { + FlushingMapper(pImpl)->remapInstruction(&I); +} + +void ValueMapper::remapFunction(Function &F) { + FlushingMapper(pImpl)->remapFunction(F); +} + +void ValueMapper::scheduleMapGlobalInitializer(GlobalVariable &GV, + Constant &Init, + unsigned MCID) { + getAsMapper(pImpl)->scheduleMapGlobalInitializer(GV, Init, MCID); +} + +void ValueMapper::scheduleMapAppendingVariable(GlobalVariable &GV, + Constant *InitPrefix, + bool IsOldCtorDtor, + ArrayRef<Constant *> NewMembers, + unsigned MCID) { + getAsMapper(pImpl)->scheduleMapAppendingVariable( + GV, InitPrefix, IsOldCtorDtor, NewMembers, MCID); +} + +void ValueMapper::scheduleMapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee, + unsigned MCID) { + getAsMapper(pImpl)->scheduleMapGlobalAliasee(GA, Aliasee, MCID); +} + +void ValueMapper::scheduleRemapFunction(Function &F, unsigned MCID) { + getAsMapper(pImpl)->scheduleRemapFunction(F, MCID); +} |